@ -1 +0,0 @@ |
|||||
serverGo |
|
@ -1,48 +0,0 @@ |
|||||
from os import walk |
|
||||
import matplotlib.pyplot as plt |
|
||||
import numpy as np |
|
||||
from PIL import Image, ImageOps |
|
||||
|
|
||||
#pixels, pixels of the output resizing images |
|
||||
size = 100, 100 |
|
||||
def imgFileToData(path): |
|
||||
image = Image.open(path) |
|
||||
#resize the image |
|
||||
thumb = ImageOps.fit(image, size, Image.ANTIALIAS) |
|
||||
image_data = np.asarray(thumb).flatten() |
|
||||
''' |
|
||||
plt.plot(111) |
|
||||
plt.imshow(thumb) |
|
||||
plt.show() |
|
||||
''' |
|
||||
if len(image_data)!=30000: |
|
||||
print "possible future ERROR!" |
|
||||
print "len: " + str(len(image_data)) |
|
||||
print "please, delete: " + path |
|
||||
return image_data |
|
||||
|
|
||||
def getDirectoryFiles(path, imgClass): |
|
||||
images = [] |
|
||||
for (dirpath, dirnames, filenames) in walk(path): |
|
||||
for filename in filenames: |
|
||||
#print filename |
|
||||
image_data = imgFileToData(path + "/" + filename) |
|
||||
images.append([image_data, imgClass]) |
|
||||
print path + "/" + filename |
|
||||
return images |
|
||||
|
|
||||
|
|
||||
def asdf(): |
|
||||
for index, (image, prediction) in enumerate(images_and_predictions[:4]): |
|
||||
plt.subplot(2, 4, index + 5) |
|
||||
plt.axis('off') |
|
||||
plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') |
|
||||
plt.title('Prediction: %i' % prediction) |
|
||||
|
|
||||
|
|
||||
objects = getDirectoryFiles("object", 1) |
|
||||
noobjects = getDirectoryFiles("noobject", 0) |
|
||||
|
|
||||
dataset = np.concatenate((objects, noobjects), axis=0) |
|
||||
|
|
||||
np.save('dataset.npy', dataset) |
|
@ -1 +0,0 @@ |
|||||
trainBACKUP.py |
|
@ -1,24 +0,0 @@ |
|||||
# serverImgPredictor |
|
||||
Need the file dataset.data |
|
||||
|
|
||||
### install Flask |
|
||||
http://flask.pocoo.org/docs/0.12/quickstart/#a-minimal-application |
|
||||
(sudo) pip install Flask |
|
||||
|
|
||||
pip install flask_restful |
|
||||
pip install flask-jsonpify |
|
||||
|
|
||||
### install scikit-neuralnetwork |
|
||||
https://scikit-neuralnetwork.readthedocs.io/en/latest/guide_installation.html |
|
||||
pip install scikit-neuralnetwork |
|
||||
|
|
||||
also need to upgrade the Lasagne library: |
|
||||
(sudo) pip install --upgrade https://github.com/Lasagne/Lasagne/archive/master.zip |
|
||||
|
|
||||
|
|
||||
## Run |
|
||||
python train.py |
|
||||
|
|
||||
will generate nn.pkl |
|
||||
|
|
||||
copy nn.pkl to the serverPredictor directory |
|
@ -1,32 +0,0 @@ |
|||||
from sklearn.neural_network import MLPClassifier |
|
||||
from skimage import io |
|
||||
|
|
||||
img1 = io.imread("imgs/25.png") |
|
||||
img2 = io.imread("imgs/24.png") |
|
||||
img3 = io.imread("imgs/104.png") |
|
||||
|
|
||||
img4 = io.imread("otherimgs/image_0008.jpg") |
|
||||
|
|
||||
|
|
||||
data_train = [img1, img2, img3, img4] |
|
||||
data_labels = [1, 1, 1, 0] |
|
||||
data_test = [img4, img3] |
|
||||
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, |
|
||||
hidden_layer_sizes=(5,2), random_state=1) |
|
||||
clf.fit(data_train, data_labels) |
|
||||
|
|
||||
clf.predict(data_test) |
|
||||
|
|
||||
print "MPLClassifier values:" |
|
||||
[coef.shape for coef in clf.coefs_] |
|
||||
|
|
||||
|
|
||||
|
|
||||
''' |
|
||||
images_and_predictions = list(zip(digits.images[n_samples // 2:], predicted)) |
|
||||
for index, (image, prediction) in enumerate(images_and_predictions[:4]): |
|
||||
plt.subplot(2, 4, index + 5) |
|
||||
plt.axis('off') |
|
||||
plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') |
|
||||
plt.title('Prediction: %i' % prediction) |
|
||||
''' |
|
@ -1,62 +0,0 @@ |
|||||
import matplotlib.pyplot as plt |
|
||||
import numpy as np |
|
||||
from random import randint |
|
||||
import pickle |
|
||||
from sknn.mlp import Classifier, Layer, Convolution |
|
||||
|
|
||||
def datasetToTrainAndTestData(dataset, numtest): |
|
||||
np.random.shuffle(dataset) |
|
||||
print "length total data:" + str(len(dataset)) |
|
||||
|
|
||||
traindata = np.copy(dataset) |
|
||||
testdata = [] |
|
||||
for i in range(numtest): |
|
||||
#get random integer between 0 and the total amount of images in the dataset |
|
||||
n = randint(0, len(traindata)) |
|
||||
testdata.append(dataset[n]) |
|
||||
|
|
||||
#delete the n image (dataset[n]) of the traindata |
|
||||
traindata = np.delete(traindata, n, axis=0) |
|
||||
testdataNP = np.array(testdata) |
|
||||
return traindata, testdataNP |
|
||||
|
|
||||
|
|
||||
#read the dataset made with the 'imagesToDataset' repository |
|
||||
dataset = np.load('dataset.npy') |
|
||||
|
|
||||
traindata, testdata = datasetToTrainAndTestData(dataset, 10) |
|
||||
print "length traindata: " + str(len(traindata)) |
|
||||
print "length testdata: " + str(len(testdata)) |
|
||||
|
|
||||
#traindataAttributes contains all the pixels of each image |
|
||||
traindataAttributes = traindata[:,0] |
|
||||
traindataAttributes = np.array([[row] for row in traindataAttributes]) |
|
||||
|
|
||||
#traindataLabels contains each label of each image |
|
||||
traindataLabels = traindata[:,1] |
|
||||
traindataLabels = traindataLabels.astype('int') |
|
||||
|
|
||||
#testdataAttributes contains the pixels of the test images |
|
||||
testdataAttributes = testdata[:,0] |
|
||||
testdataAttributes = np.array([[row] for row in testdataAttributes]) |
|
||||
|
|
||||
#testdataLabels contains each label of each image |
|
||||
testdataLabels = testdata[:,1] |
|
||||
testdataLabels = testdataLabels.astype('int') |
|
||||
|
|
||||
#default: units=100, learning_rate=0.001, n_iter=25 |
|
||||
nn = Classifier( |
|
||||
layers=[ |
|
||||
Layer("Sigmoid", units=10), |
|
||||
Layer("Softmax")], |
|
||||
learning_rate=0.001, |
|
||||
n_iter=20, |
|
||||
verbose=True) |
|
||||
|
|
||||
nn.fit(traindataAttributes, traindataLabels) |
|
||||
|
|
||||
print('\nTRAIN SCORE', nn.score(traindataAttributes, traindataLabels)) |
|
||||
print('TEST SCORE', nn.score(testdataAttributes, testdataLabels)) |
|
||||
|
|
||||
#save the neural network configuration |
|
||||
pickle.dump(nn, open('nn.pkl', 'wb')) |
|
@ -0,0 +1,54 @@ |
|||||
|
from os import walk |
||||
|
import matplotlib.pyplot as plt |
||||
|
import numpy as np |
||||
|
from PIL import Image, ImageOps |
||||
|
import pandas as pd |
||||
|
|
||||
|
#pixels, pixels of the output resizing images |
||||
|
size = 100, 100 |
||||
|
def imgFileToData(path): |
||||
|
image = Image.open(path) |
||||
|
#resize the image |
||||
|
thumb = ImageOps.fit(image, size, Image.ANTIALIAS) |
||||
|
image_data = np.asarray(thumb) |
||||
|
#.flatten() |
||||
|
|
||||
|
#check if the image had been resized to 100x100. 3pixels * 100width + 100 height = 30000 |
||||
|
if len(image_data)!=100: |
||||
|
print("possible future ERROR!") |
||||
|
print("len: " + str(len(image_data))) |
||||
|
print("please, delete: " + path) |
||||
|
return np.array(list(image_data)) |
||||
|
|
||||
|
def getDirectoryFiles(path, imgClass): |
||||
|
images = [] |
||||
|
for (dirpath, dirnames, filenames) in walk(path): |
||||
|
for filename in filenames: |
||||
|
#print(filename) |
||||
|
image_data = imgFileToData(path + "/" + filename) |
||||
|
images.append([image_data, imgClass]) |
||||
|
print(path + "/" + filename) |
||||
|
return images |
||||
|
|
||||
|
|
||||
|
objects = getDirectoryFiles("object", 1) |
||||
|
noobjects = getDirectoryFiles("noobject", 0) |
||||
|
|
||||
|
dataset = np.concatenate((objects, noobjects), axis=0) |
||||
|
#print(dataset[0]) |
||||
|
|
||||
|
np.save('dataset.npy', dataset) |
||||
|
''' |
||||
|
print(dataset) |
||||
|
np.savetxt('dataset.csv', dataset, delimiter=",", fmt='%d') |
||||
|
|
||||
|
pd.set_option('display.max_colwidth', -1) |
||||
|
df = pd.DataFrame(dataset) |
||||
|
print(df.head()) |
||||
|
print("aaa") |
||||
|
print(df[0][0]) |
||||
|
print("aaa") |
||||
|
pd.set_option('display.max_colwidth', -1) |
||||
|
pd.set_option('display.max_columns', None) |
||||
|
df.to_csv("dataset.csv", encoding='utf-8', index=False, header=False) |
||||
|
''' |
@ -0,0 +1 @@ |
|||||
|
currentimage.png |
@ -0,0 +1,10 @@ |
|||||
|
echo "sending img1 to server" |
||||
|
echo "server response:" |
||||
|
curl -F file=@./test1.png http://127.0.0.1:3045/predict |
||||
|
echo "" |
||||
|
|
||||
|
|
||||
|
echo "sending img2 to server" |
||||
|
echo "server response:" |
||||
|
curl -F file=@./test2.png http://127.0.0.1:3045/predict |
||||
|
echo "" |
@ -1 +1,2 @@ |
|||||
currentimage.png |
|
||||
|
log |
||||
|
.ipynb_checkpoints |
@ -0,0 +1,946 @@ |
|||||
|
{ |
||||
|
"cells": [ |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 1, |
||||
|
"metadata": {}, |
||||
|
"outputs": [ |
||||
|
{ |
||||
|
"name": "stderr", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"/home/asus/anaconda3/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", |
||||
|
" \"This module will be removed in 0.20.\", DeprecationWarning)\n" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"source": [ |
||||
|
"from PIL import Image, ImageOps\n", |
||||
|
"import numpy, os\n", |
||||
|
"from sklearn.ensemble import AdaBoostClassifier\n", |
||||
|
"from sklearn.cross_validation import cross_val_score\n", |
||||
|
"import numpy as np\n", |
||||
|
"import pandas as pd" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 2, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"path=\"dataset/\"\n", |
||||
|
"Xlist=[]\n", |
||||
|
"Ylist=[]\n", |
||||
|
"size = 100, 100" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 3, |
||||
|
"metadata": {}, |
||||
|
"outputs": [ |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"dataset/object/27.png\n", |
||||
|
"dataset/object/82.png\n", |
||||
|
"dataset/object/83.png\n", |
||||
|
"dataset/object/100.png\n", |
||||
|
"dataset/object/0.png\n", |
||||
|
"dataset/object/13.png\n", |
||||
|
"dataset/object/45.png\n", |
||||
|
"dataset/object/64.png\n", |
||||
|
"dataset/object/19.png\n", |
||||
|
"dataset/object/101.png\n", |
||||
|
"dataset/object/40.png\n", |
||||
|
"dataset/object/97.png\n", |
||||
|
"dataset/object/41.png\n", |
||||
|
"dataset/object/7.png\n", |
||||
|
"dataset/object/66.png\n", |
||||
|
"dataset/object/55.png\n", |
||||
|
"dataset/object/56.png\n", |
||||
|
"dataset/object/65.png\n", |
||||
|
"dataset/object/18.png\n", |
||||
|
"dataset/object/24.png\n", |
||||
|
"dataset/object/105.png\n", |
||||
|
"dataset/object/116.png\n", |
||||
|
"dataset/object/117.png\n", |
||||
|
"dataset/object/104.png\n", |
||||
|
"dataset/object/63.png\n", |
||||
|
"dataset/object/38.png\n", |
||||
|
"dataset/object/58.png\n", |
||||
|
"dataset/object/103.png\n", |
||||
|
"dataset/object/112.png\n", |
||||
|
"dataset/object/33.png\n", |
||||
|
"dataset/object/76.png\n", |
||||
|
"dataset/object/59.png\n", |
||||
|
"dataset/object/96.png\n", |
||||
|
"dataset/object/91.png\n", |
||||
|
"dataset/object/57.png\n", |
||||
|
"dataset/object/2.png\n", |
||||
|
"dataset/object/75.png\n", |
||||
|
"dataset/object/107.png\n", |
||||
|
"dataset/object/50.png\n", |
||||
|
"dataset/object/16.png\n", |
||||
|
"dataset/object/32.png\n", |
||||
|
"dataset/object/15.png\n", |
||||
|
"dataset/object/5.png\n", |
||||
|
"dataset/object/72.png\n", |
||||
|
"dataset/object/52.png\n", |
||||
|
"dataset/object/4.png\n", |
||||
|
"dataset/object/28.png\n", |
||||
|
"dataset/object/43.png\n", |
||||
|
"dataset/object/87.png\n", |
||||
|
"dataset/object/98.png\n", |
||||
|
"dataset/object/71.png\n", |
||||
|
"dataset/object/102.png\n", |
||||
|
"dataset/object/62.png\n", |
||||
|
"dataset/object/9.png\n", |
||||
|
"dataset/object/6.png\n", |
||||
|
"dataset/object/85.png\n", |
||||
|
"dataset/object/70.png\n", |
||||
|
"dataset/object/42.png\n", |
||||
|
"dataset/object/34.png\n", |
||||
|
"dataset/object/81.png\n", |
||||
|
"dataset/object/94.png\n", |
||||
|
"dataset/object/26.png\n", |
||||
|
"dataset/object/90.png\n", |
||||
|
"dataset/object/44.png\n", |
||||
|
"dataset/object/60.png\n", |
||||
|
"dataset/object/17.png\n", |
||||
|
"dataset/object/10.png\n", |
||||
|
"dataset/object/53.png\n", |
||||
|
"dataset/object/25.png\n", |
||||
|
"dataset/object/21.png\n", |
||||
|
"dataset/object/22.png\n", |
||||
|
"dataset/object/30.png\n", |
||||
|
"dataset/object/78.png\n", |
||||
|
"dataset/object/118.png\n", |
||||
|
"dataset/object/110.png\n", |
||||
|
"dataset/object/79.png\n", |
||||
|
"dataset/object/77.png\n", |
||||
|
"dataset/object/12.png\n", |
||||
|
"dataset/object/115.png\n", |
||||
|
"dataset/object/67.png\n", |
||||
|
"dataset/object/84.png\n", |
||||
|
"dataset/object/11.png\n", |
||||
|
"dataset/object/86.png\n", |
||||
|
"dataset/object/89.png\n", |
||||
|
"dataset/object/113.png\n", |
||||
|
"dataset/noobject/image_0056.jpg\n", |
||||
|
"dataset/noobject/image_0181.jpg\n", |
||||
|
"dataset/noobject/image_0127.jpg\n", |
||||
|
"dataset/noobject/image_0142.jpg\n", |
||||
|
"dataset/noobject/image_0025.jpg\n", |
||||
|
"dataset/noobject/image_0065.jpg\n", |
||||
|
"dataset/noobject/image_0174.jpg\n", |
||||
|
"dataset/noobject/image_0091.jpg\n", |
||||
|
"dataset/noobject/image_0124.jpg\n", |
||||
|
"dataset/noobject/image_0086.jpg\n", |
||||
|
"dataset/noobject/image_0079.jpg\n", |
||||
|
"dataset/noobject/image_0058.jpg\n", |
||||
|
"dataset/noobject/image_0060.jpg\n", |
||||
|
"dataset/noobject/image_0119.jpg\n", |
||||
|
"dataset/noobject/image_0023.jpg\n", |
||||
|
"dataset/noobject/image_0075.jpg\n", |
||||
|
"dataset/noobject/image_0020.jpg\n", |
||||
|
"dataset/noobject/image_0013.jpg\n", |
||||
|
"dataset/noobject/image_0126.jpg\n", |
||||
|
"dataset/noobject/image_0012.jpg\n", |
||||
|
"dataset/noobject/image_0055.jpg\n", |
||||
|
"dataset/noobject/image_0176.jpg\n", |
||||
|
"dataset/noobject/image_0144.jpg\n", |
||||
|
"dataset/noobject/image_0048.jpg\n", |
||||
|
"dataset/noobject/image_0121.jpg\n", |
||||
|
"dataset/noobject/image_0070.jpg\n", |
||||
|
"dataset/noobject/image_0082.jpg\n", |
||||
|
"dataset/noobject/image_0095.jpg\n", |
||||
|
"dataset/noobject/image_0022.jpg\n", |
||||
|
"dataset/noobject/image_0120.jpg\n", |
||||
|
"dataset/noobject/image_0139.jpg\n", |
||||
|
"dataset/noobject/image_0073.jpg\n", |
||||
|
"dataset/noobject/image_0090.jpg\n", |
||||
|
"dataset/noobject/image_0145.jpg\n", |
||||
|
"dataset/noobject/image_0173.jpg\n", |
||||
|
"dataset/noobject/image_0078.jpg\n", |
||||
|
"dataset/noobject/image_0085.jpg\n", |
||||
|
"dataset/noobject/image_0083.jpg\n", |
||||
|
"dataset/noobject/image_0179.jpg\n", |
||||
|
"dataset/noobject/image_0050.jpg\n", |
||||
|
"dataset/noobject/image_0076.jpg\n", |
||||
|
"dataset/noobject/image_0014.jpg\n", |
||||
|
"dataset/noobject/image_0054.jpg\n", |
||||
|
"dataset/noobject/image_0066.jpg\n", |
||||
|
"dataset/noobject/image_0001.jpg\n", |
||||
|
"dataset/noobject/image_0047.jpg\n", |
||||
|
"dataset/noobject/image_0077.jpg\n", |
||||
|
"dataset/noobject/image_0122.jpg\n", |
||||
|
"dataset/noobject/image_0068.jpg\n", |
||||
|
"dataset/noobject/image_0049.jpg\n", |
||||
|
"dataset/noobject/image_0092.jpg\n", |
||||
|
"dataset/noobject/image_0138.jpg\n", |
||||
|
"dataset/noobject/image_0072.jpg\n", |
||||
|
"dataset/noobject/image_0146.jpg\n", |
||||
|
"dataset/noobject/image_0061.jpg\n", |
||||
|
"dataset/noobject/image_0011.jpg\n", |
||||
|
"dataset/noobject/image_0002.jpg\n", |
||||
|
"dataset/noobject/image_0143.jpg\n", |
||||
|
"dataset/noobject/image_0088.jpg\n", |
||||
|
"dataset/noobject/image_0062.jpg\n", |
||||
|
"dataset/noobject/image_0089.jpg\n", |
||||
|
"dataset/noobject/image_0018.jpg\n", |
||||
|
"dataset/noobject/image_0024.jpg\n", |
||||
|
"dataset/noobject/image_0064.jpg\n", |
||||
|
"dataset/noobject/image_0074.jpg\n", |
||||
|
"dataset/noobject/image_0052.jpg\n", |
||||
|
"dataset/noobject/image_0096.jpg\n", |
||||
|
"dataset/noobject/image_0178.jpg\n", |
||||
|
"dataset/noobject/image_0067.jpg\n", |
||||
|
"dataset/noobject/image_0140.jpg\n", |
||||
|
"dataset/noobject/image_0084.jpg\n", |
||||
|
"dataset/noobject/image_0010.jpg\n", |
||||
|
"dataset/noobject/image_0081.jpg\n", |
||||
|
"dataset/noobject/image_0059.jpg\n", |
||||
|
"dataset/noobject/image_0016.jpg\n", |
||||
|
"dataset/noobject/image_0175.jpg\n", |
||||
|
"dataset/noobject/image_0094.jpg\n", |
||||
|
"dataset/noobject/image_0071.jpg\n", |
||||
|
"dataset/noobject/image_0080.jpg\n", |
||||
|
"dataset/noobject/image_0125.jpg\n", |
||||
|
"dataset/noobject/image_0008.jpg\n", |
||||
|
"dataset/noobject/image_0019.jpg\n", |
||||
|
"dataset/noobject/image_0017.jpg\n", |
||||
|
"dataset/noobject/image_0180.jpg\n" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"source": [ |
||||
|
"for directory in os.listdir(path):\n", |
||||
|
" for file in os.listdir(path+directory):\n", |
||||
|
" print(path+directory+\"/\"+file)\n", |
||||
|
" img=Image.open(path+directory+\"/\"+file)\n", |
||||
|
" #resize\n", |
||||
|
" thumb = ImageOps.fit(img, size, Image.ANTIALIAS)\n", |
||||
|
" image_data = np.array(thumb).flatten()[:100]\n", |
||||
|
" #image_data=numpy.array(img).flatten()[:50] #in my case the images dont have the same dimensions, so [:50] only takes the first 50 values\n", |
||||
|
" Xlist.append(image_data)\n", |
||||
|
" Ylist.append(directory)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 4, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"from sklearn.model_selection import train_test_split\n", |
||||
|
"X_train, X_test, y_train, y_test = train_test_split(Xlist, Ylist, test_size=0.2)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "markdown", |
||||
|
"metadata": {}, |
||||
|
"source": [ |
||||
|
"### AdaBoostClassifier" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 5, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"clf=AdaBoostClassifier(n_estimators=100)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 6, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"scores = cross_val_score(clf, X_train, y_train, cv=3)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 7, |
||||
|
"metadata": {}, |
||||
|
"outputs": [ |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"0.77037037037\n" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"source": [ |
||||
|
"print(scores.mean())" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "markdown", |
||||
|
"metadata": {}, |
||||
|
"source": [ |
||||
|
"### GaussianNB" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 8, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"from sklearn.naive_bayes import GaussianNB" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 9, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"clf = GaussianNB()" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 10, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 11, |
||||
|
"metadata": {}, |
||||
|
"outputs": [ |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"0.721908939014\n" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"source": [ |
||||
|
"print(scores.mean())" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "markdown", |
||||
|
"metadata": {}, |
||||
|
"source": [ |
||||
|
"### KNeighborsClassifier" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 12, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"from sklearn.neighbors import KNeighborsClassifier" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 13, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"clf = KNeighborsClassifier(n_neighbors=10)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 14, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 15, |
||||
|
"metadata": {}, |
||||
|
"outputs": [ |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"0.751357560568\n" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"source": [ |
||||
|
"print(scores.mean())" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "markdown", |
||||
|
"metadata": {}, |
||||
|
"source": [ |
||||
|
"### LinearSVC" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 16, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"from sklearn.svm import LinearSVC" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 17, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"clf = LinearSVC()" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 18, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 19, |
||||
|
"metadata": {}, |
||||
|
"outputs": [ |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"0.638575605681\n" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"source": [ |
||||
|
"print(scores.mean())" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "markdown", |
||||
|
"metadata": {}, |
||||
|
"source": [ |
||||
|
"### SVC" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 20, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"from sklearn.svm import SVC" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 21, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"clf = SVC()" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 22, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 23, |
||||
|
"metadata": {}, |
||||
|
"outputs": [ |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"0.668650793651\n" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"source": [ |
||||
|
"print(scores.mean())" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "markdown", |
||||
|
"metadata": {}, |
||||
|
"source": [ |
||||
|
"### GaussianProcessClassifier" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 24, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"from sklearn.gaussian_process import GaussianProcessClassifier" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 25, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"clf = GaussianProcessClassifier()" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 26, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 27, |
||||
|
"metadata": {}, |
||||
|
"outputs": [ |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"0.491228070175\n" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"source": [ |
||||
|
"print(scores.mean())" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "markdown", |
||||
|
"metadata": {}, |
||||
|
"source": [ |
||||
|
"### RandomForestClassifier" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 28, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"from sklearn.ensemble import RandomForestClassifier" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 29, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"clf = RandomForestClassifier()" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 30, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 31, |
||||
|
"metadata": {}, |
||||
|
"outputs": [ |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"0.710317460317\n" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"source": [ |
||||
|
"print(scores.mean())" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "markdown", |
||||
|
"metadata": {}, |
||||
|
"source": [ |
||||
|
"# Hyperparameters Tuning using sklearn pipeline and gridsearch" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 32, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"from sklearn.pipeline import Pipeline\n", |
||||
|
"from sklearn.model_selection import GridSearchCV\n", |
||||
|
"from sklearn.model_selection import RandomizedSearchCV" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 33, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"pipe = [Pipeline([\n", |
||||
|
" ('clf', RandomForestClassifier()),\n", |
||||
|
" ]),\n", |
||||
|
" Pipeline([\n", |
||||
|
" ('clf', KNeighborsClassifier()),\n", |
||||
|
" ]),\n", |
||||
|
" Pipeline([\n", |
||||
|
" ('clf', GaussianProcessClassifier()),\n", |
||||
|
" ]),\n", |
||||
|
" Pipeline([\n", |
||||
|
" ('clf', AdaBoostClassifier()),\n", |
||||
|
" ]),\n", |
||||
|
" Pipeline([\n", |
||||
|
" ('clf', SVC()),\n", |
||||
|
" ]),\n", |
||||
|
"]" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 34, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"param_grid = [dict(clf__n_estimators=[3, 10, 100]),\n", |
||||
|
" dict(clf__n_neighbors=[3,10]),\n", |
||||
|
" dict(clf__n_restarts_optimizer=[0,1]),\n", |
||||
|
" dict(clf__n_estimators=[3, 10, 100]),\n", |
||||
|
" dict(clf__C=[3, 10, 100]),\n", |
||||
|
" ]" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 35, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, verbose=1, cv=3)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 36, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"#grid_search.fit(Xlist, Ylist)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 37, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [ |
||||
|
"# Utility function to report best scores\n", |
||||
|
"def report(results, n_top=10):\n", |
||||
|
" for i in range(1, n_top + 1):\n", |
||||
|
" candidates = np.flatnonzero(results['rank_test_score'] == i)\n", |
||||
|
" for candidate in candidates:\n", |
||||
|
" print(\"Model with rank: {0}\".format(i))\n", |
||||
|
" print(\"Mean validation score: {0:.3f} (std: {1:.3f})\".format(\n", |
||||
|
" results['mean_test_score'][candidate],\n", |
||||
|
" results['std_test_score'][candidate]))\n", |
||||
|
" print(\"Parameters: {0}\".format(results['params'][candidate]))\n", |
||||
|
" print(\"\")" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": 38, |
||||
|
"metadata": { |
||||
|
"scrolled": false |
||||
|
}, |
||||
|
"outputs": [ |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"-----\n", |
||||
|
"classifier:\n", |
||||
|
"RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", |
||||
|
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n", |
||||
|
" min_impurity_decrease=0.0, min_impurity_split=None,\n", |
||||
|
" min_samples_leaf=1, min_samples_split=2,\n", |
||||
|
" min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n", |
||||
|
" oob_score=False, random_state=None, verbose=0,\n", |
||||
|
" warm_start=False)\n", |
||||
|
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"name": "stderr", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 1.5s finished\n" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"GridSearchCV took 2.38 seconds for 3 candidate parameter settings.\n", |
||||
|
"finished GridSearch\n", |
||||
|
"Model with rank: 1\n", |
||||
|
"Mean validation score: 0.815 (std: 0.073)\n", |
||||
|
"Parameters: {'clf__n_estimators': 100}\n", |
||||
|
"\n", |
||||
|
"Model with rank: 2\n", |
||||
|
"Mean validation score: 0.763 (std: 0.093)\n", |
||||
|
"Parameters: {'clf__n_estimators': 10}\n", |
||||
|
"\n", |
||||
|
"Model with rank: 3\n", |
||||
|
"Mean validation score: 0.756 (std: 0.110)\n", |
||||
|
"Parameters: {'clf__n_estimators': 3}\n", |
||||
|
"\n", |
||||
|
"-----\n", |
||||
|
"classifier:\n", |
||||
|
"KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", |
||||
|
" metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n", |
||||
|
" weights='uniform')\n", |
||||
|
"Fitting 3 folds for each of 2 candidates, totalling 6 fits\n", |
||||
|
"GridSearchCV took 0.23 seconds for 2 candidate parameter settings.\n", |
||||
|
"finished GridSearch\n", |
||||
|
"Model with rank: 1\n", |
||||
|
"Mean validation score: 0.778 (std: 0.048)\n", |
||||
|
"Parameters: {'clf__n_neighbors': 3}\n", |
||||
|
"\n", |
||||
|
"Model with rank: 2\n", |
||||
|
"Mean validation score: 0.704 (std: 0.010)\n", |
||||
|
"Parameters: {'clf__n_neighbors': 10}\n", |
||||
|
"\n", |
||||
|
"-----\n", |
||||
|
"classifier:\n", |
||||
|
"GaussianProcessClassifier(copy_X_train=True, kernel=None,\n", |
||||
|
" max_iter_predict=100, multi_class='one_vs_rest', n_jobs=1,\n", |
||||
|
" n_restarts_optimizer=0, optimizer='fmin_l_bfgs_b',\n", |
||||
|
" random_state=None, warm_start=False)\n", |
||||
|
"Fitting 3 folds for each of 2 candidates, totalling 6 fits\n" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"name": "stderr", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.1s remaining: 0.0s\n", |
||||
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.1s finished\n" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"GridSearchCV took 0.36 seconds for 2 candidate parameter settings.\n", |
||||
|
"finished GridSearch\n", |
||||
|
"Model with rank: 1\n", |
||||
|
"Mean validation score: 0.489 (std: 0.000)\n", |
||||
|
"Parameters: {'clf__n_restarts_optimizer': 0}\n", |
||||
|
"\n", |
||||
|
"Model with rank: 1\n", |
||||
|
"Mean validation score: 0.489 (std: 0.000)\n", |
||||
|
"Parameters: {'clf__n_restarts_optimizer': 1}\n", |
||||
|
"\n", |
||||
|
"-----\n", |
||||
|
"classifier:\n", |
||||
|
"AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,\n", |
||||
|
" learning_rate=1.0, n_estimators=50, random_state=None)\n", |
||||
|
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"name": "stderr", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.2s remaining: 0.0s\n", |
||||
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.2s finished\n", |
||||
|
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 0.9s finished\n" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"name": "stdout", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"GridSearchCV took 1.16 seconds for 3 candidate parameter settings.\n", |
||||
|
"finished GridSearch\n", |
||||
|
"Model with rank: 1\n", |
||||
|
"Mean validation score: 0.807 (std: 0.093)\n", |
||||
|
"Parameters: {'clf__n_estimators': 3}\n", |
||||
|
"\n", |
||||
|
"Model with rank: 2\n", |
||||
|
"Mean validation score: 0.756 (std: 0.048)\n", |
||||
|
"Parameters: {'clf__n_estimators': 100}\n", |
||||
|
"\n", |
||||
|
"Model with rank: 3\n", |
||||
|
"Mean validation score: 0.733 (std: 0.054)\n", |
||||
|
"Parameters: {'clf__n_estimators': 10}\n", |
||||
|
"\n", |
||||
|
"-----\n", |
||||
|
"classifier:\n", |
||||
|
"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", |
||||
|
" decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", |
||||
|
" max_iter=-1, probability=False, random_state=None, shrinking=True,\n", |
||||
|
" tol=0.001, verbose=False)\n", |
||||
|
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n", |
||||
|
"GridSearchCV took 0.35 seconds for 3 candidate parameter settings.\n", |
||||
|
"finished GridSearch\n", |
||||
|
"Model with rank: 1\n", |
||||
|
"Mean validation score: 0.689 (std: 0.031)\n", |
||||
|
"Parameters: {'clf__C': 3}\n", |
||||
|
"\n", |
||||
|
"Model with rank: 1\n", |
||||
|
"Mean validation score: 0.689 (std: 0.031)\n", |
||||
|
"Parameters: {'clf__C': 10}\n", |
||||
|
"\n", |
||||
|
"Model with rank: 1\n", |
||||
|
"Mean validation score: 0.689 (std: 0.031)\n", |
||||
|
"Parameters: {'clf__C': 100}\n", |
||||
|
"\n" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"name": "stderr", |
||||
|
"output_type": "stream", |
||||
|
"text": [ |
||||
|
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 0.1s finished\n" |
||||
|
] |
||||
|
} |
||||
|
], |
||||
|
"source": [ |
||||
|
"from time import time\n", |
||||
|
"\n", |
||||
|
"for i in range(len(pipe)):\n", |
||||
|
" start = time()\n", |
||||
|
" print(\"-----\")\n", |
||||
|
" print(\"classifier:\")\n", |
||||
|
" print(pipe[i].named_steps['clf'])\n", |
||||
|
" grid_search = GridSearchCV(pipe[i], param_grid[i], n_jobs=-1, verbose=1, cv=3)\n", |
||||
|
" grid_search.fit(X_train, y_train)\n", |
||||
|
" print(\"GridSearchCV took %.2f seconds for %d candidate parameter settings.\"\n", |
||||
|
" % (time() - start, len(grid_search.cv_results_['params'])))\n", |
||||
|
" print(\"finished GridSearch\")\n", |
||||
|
" report(grid_search.cv_results_)" |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cell_type": "code", |
||||
|
"execution_count": null, |
||||
|
"metadata": { |
||||
|
"collapsed": true |
||||
|
}, |
||||
|
"outputs": [], |
||||
|
"source": [] |
||||
|
} |
||||
|
], |
||||
|
"metadata": { |
||||
|
"kernelspec": { |
||||
|
"display_name": "Python 3", |
||||
|
"language": "python", |
||||
|
"name": "python3" |
||||
|
}, |
||||
|
"language_info": { |
||||
|
"codemirror_mode": { |
||||
|
"name": "ipython", |
||||
|
"version": 3 |
||||
|
}, |
||||
|
"file_extension": ".py", |
||||
|
"mimetype": "text/x-python", |
||||
|
"name": "python", |
||||
|
"nbconvert_exporter": "python", |
||||
|
"pygments_lexer": "ipython3", |
||||
|
"version": "3.6.3" |
||||
|
} |
||||
|
}, |
||||
|
"nbformat": 4, |
||||
|
"nbformat_minor": 2 |
||||
|
} |
@ -0,0 +1,64 @@ |
|||||
|
from PIL import Image, ImageOps |
||||
|
import numpy, os |
||||
|
|
||||
|
from sklearn.feature_extraction import image |
||||
|
from sklearn.model_selection import KFold, cross_val_score |
||||
|
|
||||
|
import numpy as np |
||||
|
import pandas as pd |
||||
|
from time import time |
||||
|
import pickle |
||||
|
|
||||
|
from sklearn.pipeline import Pipeline |
||||
|
from sklearn.model_selection import GridSearchCV |
||||
|
from sklearn.model_selection import RandomizedSearchCV |
||||
|
|
||||
|
|
||||
|
path="dataset/" |
||||
|
Xlist=[] |
||||
|
Ylist=[] |
||||
|
size = 100, 100 |
||||
|
|
||||
|
#load images from dataset |
||||
|
for directory in os.listdir(path): |
||||
|
for file in os.listdir(path+directory): |
||||
|
print(path+directory+"/"+file) |
||||
|
img=Image.open(path+directory+"/"+file) |
||||
|
#resize |
||||
|
thumb = ImageOps.fit(img, size, Image.ANTIALIAS) |
||||
|
image_data = np.array(thumb).flatten()[:100] |
||||
|
Xlist.append(image_data) |
||||
|
Ylist.append(directory) |
||||
|
|
||||
|
from sklearn.ensemble import RandomForestClassifier |
||||
|
|
||||
|
pipe = Pipeline([ |
||||
|
('clf', RandomForestClassifier()), |
||||
|
]) |
||||
|
|
||||
|
param_grid = dict(clf__n_estimators=[100]) |
||||
|
|
||||
|
grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, verbose=1, cv=3) |
||||
|
|
||||
|
# Utility function to report best scores |
||||
|
def report(results, n_top=10): |
||||
|
for i in range(1, n_top + 1): |
||||
|
candidates = np.flatnonzero(results['rank_test_score'] == i) |
||||
|
for candidate in candidates: |
||||
|
print("Model with rank: {0}".format(i)) |
||||
|
print("Mean validation score: {0:.3f} (std: {1:.3f})".format( |
||||
|
results['mean_test_score'][candidate], |
||||
|
results['std_test_score'][candidate])) |
||||
|
print("Parameters: {0}".format(results['params'][candidate])) |
||||
|
print("") |
||||
|
|
||||
|
start = time() |
||||
|
grid_search = GridSearchCV(pipe, param_grid, n_jobs=-1, verbose=1, cv=3) |
||||
|
grid_search.fit(Xlist, Ylist) |
||||
|
print("GridSearchCV took %.2f seconds for %d candidate parameter settings." |
||||
|
% (time() - start, len(grid_search.cv_results_['params']))) |
||||
|
print("finished GridSearch") |
||||
|
report(grid_search.cv_results_) |
||||
|
|
||||
|
pickle.dump(grid_search, open('model.pkl', 'wb')) |
||||
|
print("pipeline model saved to model.pkl") |