Source code for tpot_classification

import cv2
from src.cascade_classifcation import find_face, create_folder
import pandas as pd
import os
from tpot import TPOTRegressor
import numpy as np
from sklearn.utils import shuffle
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.externals import joblib

[docs]class TpotClassifier():
    """
    Trains a machine learning model with tpot,
    which can then be used to predict attractiveness of pictures of humans.

    :param data_path: Path to the folder that has the image folder and users.csv
    :param make_data_set: Takes the images from the data folder, finds faces in them \
    and transforms those into 64x64 grayscale images. Making the data set takes a long time.
    :param gabor: This determines if gabor filter is used in training.
    :param reduction_method: Which method to use for dimensionality reduction. Supported types are 'pca' and 'lda'.
    """
    def __init__(self, data_path, gabor=False, reduction_method=None):
        self.regressor = TPOTRegressor(generations=5, population_size=20, verbosity=2, config_dict='TPOT light')
        self.gabor = gabor
        self.data_path = data_path
        self.filters = None
        if self.gabor:
            self.filters = self.build_filters()
        self.reduction_method = reduction_method
        self.pca = PCA(n_components=2048)
        self.lda = LDA()

[docs]    @staticmethod
    def create_data(data_path):
        """Makes a 64x64 grayscale image dataset of faces.

        :param data_path: Path to the folder that has the image folder and users.csv
        """
        create_folder(os.path.join(data_path, 'faces'))
        scores = []
        image_paths = []
        df = pd.read_csv(os.path.join(data_path, 'users.csv'))
        for index, row in df.iterrows():
            if row.gender == 'M':
                filename = os.path.split(row.image_path)[1]
                im = cv2.imread(os.path.join(data_path, 'images', filename))
                gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)

                x, y, w, h = find_face(gray)
                if x is None:
                    continue
                roi_gray = gray[y:y + h, x:x + w]
                resized = cv2.resize(roi_gray, (64, 64))
                path = os.path.join(os.path.join(data_path, 'faces'), filename)
                cv2.imwrite(path, resized)

                image_paths.append(path)
                scores.append(row.score)

        new_df = pd.DataFrame({'image_path': image_paths, 'score': scores})
        new_df.to_csv(os.path.join(data_path, 'faces.csv'))

[docs]    def train(self):
        """
        Uses tpot to train a machine learning model to predict attractiveness of pictures of human faces.

        :return: Various accuracy measures.
        """
        df = pd.read_csv(os.path.join(self.data_path, 'faces.csv'))

        imgs = []
        labels = []

        for index, row in df.iterrows():
            img = cv2.imread(row.image_path, cv2.IMREAD_GRAYSCALE)

            if self.gabor:
                img = self.process(img, self.filters)

            imgs.append(np.array(img.flatten()))
            labels.append(row.score)

        imgs = np.array(imgs)
        labels = np.array(labels)

        imgs, labels = shuffle(imgs, labels)

        train_len = int(len(imgs) * 0.8)
        train_data = imgs[:train_len]
        train_labels = labels[:train_len]
        test_data = imgs[train_len:]
        test_labels = labels[train_len:]

        if self.reduction_method == 'pca':
            self.pca.fit(train_data)
            joblib.dump(self.pca, os.path.join(self.data_path, 'fitted_pca.pkl'))
            train_data = self.pca.transform(train_data)
            test_data = self.pca.transform(test_data)
        elif self.reduction_method == 'lda':
            self.lda.fit(train_data, np.round(train_labels))
            joblib.dump(self.lda, os.path.join(self.data_path, 'fitted_lda.pkl'))
            train_data = self.lda.transform(train_data)
            test_data = self.lda.transform(test_data)

        self.regressor.fit(train_data, train_labels)
        joblib.dump(self.regressor.fitted_pipeline_, os.path.join(self.data_path, 'fitted_tpot.pkl'))

        # Calculate test accuracy
        rounded_labels = np.round(test_labels)
        preds = self.regressor.predict(test_data)
        rounded_preds = np.round(preds)
        accuracy = np.sum(rounded_labels == rounded_preds) / len(rounded_labels)


        # Calculate within 1 accuracy
        dists = np.abs(test_labels - preds)
        close = np.sum(dists <= 1)
        within_1_accuracy = close / len(dists)

        mean_sqrt_error = self.regressor.score(test_data, test_labels)

        return accuracy, within_1_accuracy, mean_sqrt_error

[docs]    def predict(self, image):
        """
        Predicts the attractiveness of a picture with a human in it.

        :param image: The image with the human in it.
        :return: The prediction.
        """
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        x, y, w, h = find_face(gray)
        if x is None:
            return None

        if self.gabor:
            gray = self.process(gray, self.filters)

        roi_gray = gray[y:y + h, x:x + w]
        resized = cv2.resize(roi_gray, (64, 64)).flatten()

        if self.reduction_method == 'pca':
            resized = self.pca.transform([resized])[0]
        elif self.reduction_method == 'lda':
            resized = self.lda.transform([resized])[0]

        return self.regressor.predict(np.array([resized]))[0]

[docs]    def build_filters(self):
        """ returns a list of kernels in several orientations
        """
        filters = []
        ksize = 31
        for theta in np.arange(0, np.pi, np.pi / 32):
            params = {'ksize':(ksize, ksize), 'sigma':1.0, 'theta':theta, 'lambd':15.0,
                      'gamma':0.02, 'psi':0, 'ktype':cv2.CV_32F}
            kern = cv2.getGaborKernel(**params)
            kern /= 1.5*kern.sum()
            filters.append((kern,params))
        return filters

[docs]    def process(self, img, filters):
        """
        Returns the img filtered by the filter list.

        :param img: Image to be filtered.
        :param filters: The gabor filters to be used.
        """
        accum = np.zeros_like(img)
        for kern,params in filters:
            fimg = cv2.filter2D(img, cv2.CV_8UC3, kern)
            np.maximum(accum, fimg, accum)
        return accum

[docs]    def load(self):
        """
        Loads the fitted tpot models into usage

        :return: None
        """
        try:
            if os.path.exists(os.path.join(self.data_path, 'fitted_tpot.pkl')):
                self.regressor.fitted_pipeline_ = joblib.load(os.path.join(self.data_path, 'fitted_tpot.pkl'))
            if os.path.exists(os.path.join(self.data_path, 'fitted_pca.pkl')):
                self.pca = joblib.load(os.path.join(self.data_path, 'fitted_pca.pkl'))
            if os.path.exists(os.path.join(self.data_path, 'fitted_lda.pkl')):
                self.lda = joblib.load(os.path.join(self.data_path, 'fitted_lda.pkl'))
        except:
            raise Exception('Fitted model does not exist under Rateme/src')