Source code for tpot_classification

import cv2
from src.cascade_classifcation import find_face, create_folder
import pandas as pd
import os
from tpot import TPOTRegressor
import numpy as np
from sklearn.utils import shuffle
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.externals import joblib

[docs]class TpotClassifier(): """ Trains a machine learning model with tpot, which can then be used to predict attractiveness of pictures of humans. :param data_path: Path to the folder that has the image folder and users.csv :param make_data_set: Takes the images from the data folder, finds faces in them \ and transforms those into 64x64 grayscale images. Making the data set takes a long time. :param gabor: This determines if gabor filter is used in training. :param reduction_method: Which method to use for dimensionality reduction. Supported types are 'pca' and 'lda'. """ def __init__(self, data_path, gabor=False, reduction_method=None): self.regressor = TPOTRegressor(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') self.gabor = gabor self.data_path = data_path self.filters = None if self.gabor: self.filters = self.build_filters() self.reduction_method = reduction_method self.pca = PCA(n_components=2048) self.lda = LDA()
[docs] @staticmethod def create_data(data_path): """Makes a 64x64 grayscale image dataset of faces. :param data_path: Path to the folder that has the image folder and users.csv """ create_folder(os.path.join(data_path, 'faces')) scores = [] image_paths = [] df = pd.read_csv(os.path.join(data_path, 'users.csv')) for index, row in df.iterrows(): if row.gender == 'M': filename = os.path.split(row.image_path)[1] im = cv2.imread(os.path.join(data_path, 'images', filename)) gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) x, y, w, h = find_face(gray) if x is None: continue roi_gray = gray[y:y + h, x:x + w] resized = cv2.resize(roi_gray, (64, 64)) path = os.path.join(os.path.join(data_path, 'faces'), filename) cv2.imwrite(path, resized) image_paths.append(path) scores.append(row.score) new_df = pd.DataFrame({'image_path': image_paths, 'score': scores}) new_df.to_csv(os.path.join(data_path, 'faces.csv'))
[docs] def train(self): """ Uses tpot to train a machine learning model to predict attractiveness of pictures of human faces. :return: Various accuracy measures. """ df = pd.read_csv(os.path.join(self.data_path, 'faces.csv')) imgs = [] labels = [] for index, row in df.iterrows(): img = cv2.imread(row.image_path, cv2.IMREAD_GRAYSCALE) if self.gabor: img = self.process(img, self.filters) imgs.append(np.array(img.flatten())) labels.append(row.score) imgs = np.array(imgs) labels = np.array(labels) imgs, labels = shuffle(imgs, labels) train_len = int(len(imgs) * 0.8) train_data = imgs[:train_len] train_labels = labels[:train_len] test_data = imgs[train_len:] test_labels = labels[train_len:] if self.reduction_method == 'pca': self.pca.fit(train_data) joblib.dump(self.pca, os.path.join(self.data_path, 'fitted_pca.pkl')) train_data = self.pca.transform(train_data) test_data = self.pca.transform(test_data) elif self.reduction_method == 'lda': self.lda.fit(train_data, np.round(train_labels)) joblib.dump(self.lda, os.path.join(self.data_path, 'fitted_lda.pkl')) train_data = self.lda.transform(train_data) test_data = self.lda.transform(test_data) self.regressor.fit(train_data, train_labels) joblib.dump(self.regressor.fitted_pipeline_, os.path.join(self.data_path, 'fitted_tpot.pkl')) # Calculate test accuracy rounded_labels = np.round(test_labels) preds = self.regressor.predict(test_data) rounded_preds = np.round(preds) accuracy = np.sum(rounded_labels == rounded_preds) / len(rounded_labels) # Calculate within 1 accuracy dists = np.abs(test_labels - preds) close = np.sum(dists <= 1) within_1_accuracy = close / len(dists) mean_sqrt_error = self.regressor.score(test_data, test_labels) return accuracy, within_1_accuracy, mean_sqrt_error
[docs] def predict(self, image): """ Predicts the attractiveness of a picture with a human in it. :param image: The image with the human in it. :return: The prediction. """ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) x, y, w, h = find_face(gray) if x is None: return None if self.gabor: gray = self.process(gray, self.filters) roi_gray = gray[y:y + h, x:x + w] resized = cv2.resize(roi_gray, (64, 64)).flatten() if self.reduction_method == 'pca': resized = self.pca.transform([resized])[0] elif self.reduction_method == 'lda': resized = self.lda.transform([resized])[0] return self.regressor.predict(np.array([resized]))[0]
[docs] def build_filters(self): """ returns a list of kernels in several orientations """ filters = [] ksize = 31 for theta in np.arange(0, np.pi, np.pi / 32): params = {'ksize':(ksize, ksize), 'sigma':1.0, 'theta':theta, 'lambd':15.0, 'gamma':0.02, 'psi':0, 'ktype':cv2.CV_32F} kern = cv2.getGaborKernel(**params) kern /= 1.5*kern.sum() filters.append((kern,params)) return filters
[docs] def process(self, img, filters): """ Returns the img filtered by the filter list. :param img: Image to be filtered. :param filters: The gabor filters to be used. """ accum = np.zeros_like(img) for kern,params in filters: fimg = cv2.filter2D(img, cv2.CV_8UC3, kern) np.maximum(accum, fimg, accum) return accum
[docs] def load(self): """ Loads the fitted tpot models into usage :return: None """ try: if os.path.exists(os.path.join(self.data_path, 'fitted_tpot.pkl')): self.regressor.fitted_pipeline_ = joblib.load(os.path.join(self.data_path, 'fitted_tpot.pkl')) if os.path.exists(os.path.join(self.data_path, 'fitted_pca.pkl')): self.pca = joblib.load(os.path.join(self.data_path, 'fitted_pca.pkl')) if os.path.exists(os.path.join(self.data_path, 'fitted_lda.pkl')): self.lda = joblib.load(os.path.join(self.data_path, 'fitted_lda.pkl')) except: raise Exception('Fitted model does not exist under Rateme/src')