You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

64 lines
2.0 KiB

from PIL import Image, ImageOps
import numpy, os
from sklearn.feature_extraction import image
from sklearn.model_selection import KFold, cross_val_score
import numpy as np
import pandas as pd
from time import time
import pickle
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
path="dataset/"
Xlist=[]
Ylist=[]
size = 100, 100
#load images from dataset
for directory in os.listdir(path):
for file in os.listdir(path+directory):
print(path+directory+"/"+file)
img=Image.open(path+directory+"/"+file)
#resize
thumb = ImageOps.fit(img, size, Image.ANTIALIAS)
image_data = np.array(thumb).flatten()[:100]
Xlist.append(image_data)
Ylist.append(directory)
from sklearn.ensemble import RandomForestClassifier
pipe = Pipeline([
('clf', RandomForestClassifier()),
])
param_grid = dict(clf__n_estimators=[100])
grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, verbose=1, cv=3)
# Utility function to report best scores
def report(results, n_top=10):
for i in range(1, n_top + 1):
candidates = np.flatnonzero(results['rank_test_score'] == i)
for candidate in candidates:
print("Model with rank: {0}".format(i))
print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
results['mean_test_score'][candidate],
results['std_test_score'][candidate]))
print("Parameters: {0}".format(results['params'][candidate]))
print("")
start = time()
grid_search = GridSearchCV(pipe, param_grid, n_jobs=-1, verbose=1, cv=3)
grid_search.fit(Xlist, Ylist)
print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
% (time() - start, len(grid_search.cv_results_['params'])))
print("finished GridSearch")
report(grid_search.cv_results_)
pickle.dump(grid_search, open('model.pkl', 'wb'))
print("pipeline model saved to model.pkl")