pipeline model chooser working, server predictor working

This commit is contained in:
arnaucode
2017-11-29 18:22:09 +01:00
parent 26f61c02f5
commit 950c6b4c57
207 changed files with 1168 additions and 449 deletions

1
.gitignore vendored
View File

@@ -1 +0,0 @@
serverGo

View File

@@ -10,3 +10,16 @@
- Take photo and upload to the server, to get the response (object or no object)
![hotdognohotdog](https://raw.githubusercontent.com/arnaucode/objectImageIdentifierAI/master/hotdognohotdog.png "hotdognohotdog")
## Real steps
- download images
- for example, can be done with https://github.com/arnaucode/imgDownloader.git
- In /serverPredictor directory
```
python classifierChooser.py
```
This will generate the model.pkl. Then, run the serverPredictor.py
```
python serverPredictor.py
```

View File

@@ -2,23 +2,18 @@ timestamp() {
date +"%T"
}
cd serverPredictor
timestamp
echo "Starting imagesToDataset"
cd imagesToDataset && python main.py
cd ..
cp ./imagesToDataset/dataset.npy ./nnTrain/dataset.npy
timestamp
echo "Starting nnTrain"
cd nnTrain && python train.py
cd ..
cp ./nnTrain/nn.pkl ./serverPredictor/nn.pkl
echo "Starting training the model: classifierChooser.py"
python classifierChooser.py
timestamp
echo "Starting serverPredictor"
cd serverPredictor && python main.py
xterm -hold -e 'python serverPredictor.py' &
sleep 4
echo "----- deploy.sh finished -----"
timestamp
echo "Starting test.sh"
xterm -hold -e 'bash test.sh' &

Binary file not shown.

View File

@@ -1,48 +0,0 @@
from os import walk
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageOps
#pixels, pixels of the output resizing images
size = 100, 100
def imgFileToData(path):
image = Image.open(path)
#resize the image
thumb = ImageOps.fit(image, size, Image.ANTIALIAS)
image_data = np.asarray(thumb).flatten()
'''
plt.plot(111)
plt.imshow(thumb)
plt.show()
'''
if len(image_data)!=30000:
print "possible future ERROR!"
print "len: " + str(len(image_data))
print "please, delete: " + path
return image_data
def getDirectoryFiles(path, imgClass):
images = []
for (dirpath, dirnames, filenames) in walk(path):
for filename in filenames:
#print filename
image_data = imgFileToData(path + "/" + filename)
images.append([image_data, imgClass])
print path + "/" + filename
return images
def asdf():
for index, (image, prediction) in enumerate(images_and_predictions[:4]):
plt.subplot(2, 4, index + 5)
plt.axis('off')
plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
plt.title('Prediction: %i' % prediction)
objects = getDirectoryFiles("object", 1)
noobjects = getDirectoryFiles("noobject", 0)
dataset = np.concatenate((objects, noobjects), axis=0)
np.save('dataset.npy', dataset)

1
nnTrain/.gitignore vendored
View File

@@ -1 +0,0 @@
trainBACKUP.py

View File

@@ -1,24 +0,0 @@
# serverImgPredictor
Need the file dataset.data
### install Flask
http://flask.pocoo.org/docs/0.12/quickstart/#a-minimal-application
(sudo) pip install Flask
pip install flask_restful
pip install flask-jsonpify
### install scikit-neuralnetwork
https://scikit-neuralnetwork.readthedocs.io/en/latest/guide_installation.html
pip install scikit-neuralnetwork
also need to upgrade the Lasagne library:
(sudo) pip install --upgrade https://github.com/Lasagne/Lasagne/archive/master.zip
## Run
python train.py
will generate nn.pkl
copy nn.pkl to the serverPredictor directory

Binary file not shown.

View File

@@ -1,32 +0,0 @@
from sklearn.neural_network import MLPClassifier
from skimage import io
img1 = io.imread("imgs/25.png")
img2 = io.imread("imgs/24.png")
img3 = io.imread("imgs/104.png")
img4 = io.imread("otherimgs/image_0008.jpg")
data_train = [img1, img2, img3, img4]
data_labels = [1, 1, 1, 0]
data_test = [img4, img3]
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
hidden_layer_sizes=(5,2), random_state=1)
clf.fit(data_train, data_labels)
clf.predict(data_test)
print "MPLClassifier values:"
[coef.shape for coef in clf.coefs_]
'''
images_and_predictions = list(zip(digits.images[n_samples // 2:], predicted))
for index, (image, prediction) in enumerate(images_and_predictions[:4]):
plt.subplot(2, 4, index + 5)
plt.axis('off')
plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
plt.title('Prediction: %i' % prediction)
'''

View File

@@ -1,62 +0,0 @@
import matplotlib.pyplot as plt
import numpy as np
from random import randint
import pickle
from sknn.mlp import Classifier, Layer, Convolution
def datasetToTrainAndTestData(dataset, numtest):
np.random.shuffle(dataset)
print "length total data:" + str(len(dataset))
traindata = np.copy(dataset)
testdata = []
for i in range(numtest):
#get random integer between 0 and the total amount of images in the dataset
n = randint(0, len(traindata))
testdata.append(dataset[n])
#delete the n image (dataset[n]) of the traindata
traindata = np.delete(traindata, n, axis=0)
testdataNP = np.array(testdata)
return traindata, testdataNP
#read the dataset made with the 'imagesToDataset' repository
dataset = np.load('dataset.npy')
traindata, testdata = datasetToTrainAndTestData(dataset, 10)
print "length traindata: " + str(len(traindata))
print "length testdata: " + str(len(testdata))
#traindataAttributes contains all the pixels of each image
traindataAttributes = traindata[:,0]
traindataAttributes = np.array([[row] for row in traindataAttributes])
#traindataLabels contains each label of each image
traindataLabels = traindata[:,1]
traindataLabels = traindataLabels.astype('int')
#testdataAttributes contains the pixels of the test images
testdataAttributes = testdata[:,0]
testdataAttributes = np.array([[row] for row in testdataAttributes])
#testdataLabels contains each label of each image
testdataLabels = testdata[:,1]
testdataLabels = testdataLabels.astype('int')
#default: units=100, learning_rate=0.001, n_iter=25
nn = Classifier(
layers=[
Layer("Sigmoid", units=10),
Layer("Softmax")],
learning_rate=0.001,
n_iter=20,
verbose=True)
nn.fit(traindataAttributes, traindataLabels)
print('\nTRAIN SCORE', nn.score(traindataAttributes, traindataLabels))
print('TEST SCORE', nn.score(testdataAttributes, testdataLabels))
#save the neural network configuration
pickle.dump(nn, open('nn.pkl', 'wb'))

Binary file not shown.

View File

@@ -0,0 +1,54 @@
from os import walk
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageOps
import pandas as pd
#pixels, pixels of the output resizing images
size = 100, 100
def imgFileToData(path):
image = Image.open(path)
#resize the image
thumb = ImageOps.fit(image, size, Image.ANTIALIAS)
image_data = np.asarray(thumb)
#.flatten()
#check if the image had been resized to 100x100. 3pixels * 100width + 100 height = 30000
if len(image_data)!=100:
print("possible future ERROR!")
print("len: " + str(len(image_data)))
print("please, delete: " + path)
return np.array(list(image_data))
def getDirectoryFiles(path, imgClass):
images = []
for (dirpath, dirnames, filenames) in walk(path):
for filename in filenames:
#print(filename)
image_data = imgFileToData(path + "/" + filename)
images.append([image_data, imgClass])
print(path + "/" + filename)
return images
objects = getDirectoryFiles("object", 1)
noobjects = getDirectoryFiles("noobject", 0)
dataset = np.concatenate((objects, noobjects), axis=0)
#print(dataset[0])
np.save('dataset.npy', dataset)
'''
print(dataset)
np.savetxt('dataset.csv', dataset, delimiter=",", fmt='%d')
pd.set_option('display.max_colwidth', -1)
df = pd.DataFrame(dataset)
print(df.head())
print("aaa")
print(df[0][0])
print("aaa")
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', None)
df.to_csv("dataset.csv", encoding='utf-8', index=False, header=False)
'''

1
other/serverPredictorOLD/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
currentimage.png

View File

@@ -0,0 +1,10 @@
echo "sending img1 to server"
echo "server response:"
curl -F file=@./test1.png http://127.0.0.1:3045/predict
echo ""
echo "sending img2 to server"
echo "server response:"
curl -F file=@./test2.png http://127.0.0.1:3045/predict
echo ""

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

View File

@@ -1 +1,2 @@
currentimage.png
log
.ipynb_checkpoints

View File

@@ -0,0 +1,946 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/asus/anaconda3/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
" \"This module will be removed in 0.20.\", DeprecationWarning)\n"
]
}
],
"source": [
"from PIL import Image, ImageOps\n",
"import numpy, os\n",
"from sklearn.ensemble import AdaBoostClassifier\n",
"from sklearn.cross_validation import cross_val_score\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"path=\"dataset/\"\n",
"Xlist=[]\n",
"Ylist=[]\n",
"size = 100, 100"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dataset/object/27.png\n",
"dataset/object/82.png\n",
"dataset/object/83.png\n",
"dataset/object/100.png\n",
"dataset/object/0.png\n",
"dataset/object/13.png\n",
"dataset/object/45.png\n",
"dataset/object/64.png\n",
"dataset/object/19.png\n",
"dataset/object/101.png\n",
"dataset/object/40.png\n",
"dataset/object/97.png\n",
"dataset/object/41.png\n",
"dataset/object/7.png\n",
"dataset/object/66.png\n",
"dataset/object/55.png\n",
"dataset/object/56.png\n",
"dataset/object/65.png\n",
"dataset/object/18.png\n",
"dataset/object/24.png\n",
"dataset/object/105.png\n",
"dataset/object/116.png\n",
"dataset/object/117.png\n",
"dataset/object/104.png\n",
"dataset/object/63.png\n",
"dataset/object/38.png\n",
"dataset/object/58.png\n",
"dataset/object/103.png\n",
"dataset/object/112.png\n",
"dataset/object/33.png\n",
"dataset/object/76.png\n",
"dataset/object/59.png\n",
"dataset/object/96.png\n",
"dataset/object/91.png\n",
"dataset/object/57.png\n",
"dataset/object/2.png\n",
"dataset/object/75.png\n",
"dataset/object/107.png\n",
"dataset/object/50.png\n",
"dataset/object/16.png\n",
"dataset/object/32.png\n",
"dataset/object/15.png\n",
"dataset/object/5.png\n",
"dataset/object/72.png\n",
"dataset/object/52.png\n",
"dataset/object/4.png\n",
"dataset/object/28.png\n",
"dataset/object/43.png\n",
"dataset/object/87.png\n",
"dataset/object/98.png\n",
"dataset/object/71.png\n",
"dataset/object/102.png\n",
"dataset/object/62.png\n",
"dataset/object/9.png\n",
"dataset/object/6.png\n",
"dataset/object/85.png\n",
"dataset/object/70.png\n",
"dataset/object/42.png\n",
"dataset/object/34.png\n",
"dataset/object/81.png\n",
"dataset/object/94.png\n",
"dataset/object/26.png\n",
"dataset/object/90.png\n",
"dataset/object/44.png\n",
"dataset/object/60.png\n",
"dataset/object/17.png\n",
"dataset/object/10.png\n",
"dataset/object/53.png\n",
"dataset/object/25.png\n",
"dataset/object/21.png\n",
"dataset/object/22.png\n",
"dataset/object/30.png\n",
"dataset/object/78.png\n",
"dataset/object/118.png\n",
"dataset/object/110.png\n",
"dataset/object/79.png\n",
"dataset/object/77.png\n",
"dataset/object/12.png\n",
"dataset/object/115.png\n",
"dataset/object/67.png\n",
"dataset/object/84.png\n",
"dataset/object/11.png\n",
"dataset/object/86.png\n",
"dataset/object/89.png\n",
"dataset/object/113.png\n",
"dataset/noobject/image_0056.jpg\n",
"dataset/noobject/image_0181.jpg\n",
"dataset/noobject/image_0127.jpg\n",
"dataset/noobject/image_0142.jpg\n",
"dataset/noobject/image_0025.jpg\n",
"dataset/noobject/image_0065.jpg\n",
"dataset/noobject/image_0174.jpg\n",
"dataset/noobject/image_0091.jpg\n",
"dataset/noobject/image_0124.jpg\n",
"dataset/noobject/image_0086.jpg\n",
"dataset/noobject/image_0079.jpg\n",
"dataset/noobject/image_0058.jpg\n",
"dataset/noobject/image_0060.jpg\n",
"dataset/noobject/image_0119.jpg\n",
"dataset/noobject/image_0023.jpg\n",
"dataset/noobject/image_0075.jpg\n",
"dataset/noobject/image_0020.jpg\n",
"dataset/noobject/image_0013.jpg\n",
"dataset/noobject/image_0126.jpg\n",
"dataset/noobject/image_0012.jpg\n",
"dataset/noobject/image_0055.jpg\n",
"dataset/noobject/image_0176.jpg\n",
"dataset/noobject/image_0144.jpg\n",
"dataset/noobject/image_0048.jpg\n",
"dataset/noobject/image_0121.jpg\n",
"dataset/noobject/image_0070.jpg\n",
"dataset/noobject/image_0082.jpg\n",
"dataset/noobject/image_0095.jpg\n",
"dataset/noobject/image_0022.jpg\n",
"dataset/noobject/image_0120.jpg\n",
"dataset/noobject/image_0139.jpg\n",
"dataset/noobject/image_0073.jpg\n",
"dataset/noobject/image_0090.jpg\n",
"dataset/noobject/image_0145.jpg\n",
"dataset/noobject/image_0173.jpg\n",
"dataset/noobject/image_0078.jpg\n",
"dataset/noobject/image_0085.jpg\n",
"dataset/noobject/image_0083.jpg\n",
"dataset/noobject/image_0179.jpg\n",
"dataset/noobject/image_0050.jpg\n",
"dataset/noobject/image_0076.jpg\n",
"dataset/noobject/image_0014.jpg\n",
"dataset/noobject/image_0054.jpg\n",
"dataset/noobject/image_0066.jpg\n",
"dataset/noobject/image_0001.jpg\n",
"dataset/noobject/image_0047.jpg\n",
"dataset/noobject/image_0077.jpg\n",
"dataset/noobject/image_0122.jpg\n",
"dataset/noobject/image_0068.jpg\n",
"dataset/noobject/image_0049.jpg\n",
"dataset/noobject/image_0092.jpg\n",
"dataset/noobject/image_0138.jpg\n",
"dataset/noobject/image_0072.jpg\n",
"dataset/noobject/image_0146.jpg\n",
"dataset/noobject/image_0061.jpg\n",
"dataset/noobject/image_0011.jpg\n",
"dataset/noobject/image_0002.jpg\n",
"dataset/noobject/image_0143.jpg\n",
"dataset/noobject/image_0088.jpg\n",
"dataset/noobject/image_0062.jpg\n",
"dataset/noobject/image_0089.jpg\n",
"dataset/noobject/image_0018.jpg\n",
"dataset/noobject/image_0024.jpg\n",
"dataset/noobject/image_0064.jpg\n",
"dataset/noobject/image_0074.jpg\n",
"dataset/noobject/image_0052.jpg\n",
"dataset/noobject/image_0096.jpg\n",
"dataset/noobject/image_0178.jpg\n",
"dataset/noobject/image_0067.jpg\n",
"dataset/noobject/image_0140.jpg\n",
"dataset/noobject/image_0084.jpg\n",
"dataset/noobject/image_0010.jpg\n",
"dataset/noobject/image_0081.jpg\n",
"dataset/noobject/image_0059.jpg\n",
"dataset/noobject/image_0016.jpg\n",
"dataset/noobject/image_0175.jpg\n",
"dataset/noobject/image_0094.jpg\n",
"dataset/noobject/image_0071.jpg\n",
"dataset/noobject/image_0080.jpg\n",
"dataset/noobject/image_0125.jpg\n",
"dataset/noobject/image_0008.jpg\n",
"dataset/noobject/image_0019.jpg\n",
"dataset/noobject/image_0017.jpg\n",
"dataset/noobject/image_0180.jpg\n"
]
}
],
"source": [
"for directory in os.listdir(path):\n",
" for file in os.listdir(path+directory):\n",
" print(path+directory+\"/\"+file)\n",
" img=Image.open(path+directory+\"/\"+file)\n",
" #resize\n",
" thumb = ImageOps.fit(img, size, Image.ANTIALIAS)\n",
" image_data = np.array(thumb).flatten()[:100]\n",
" #image_data=numpy.array(img).flatten()[:50] #in my case the images dont have the same dimensions, so [:50] only takes the first 50 values\n",
" Xlist.append(image_data)\n",
" Ylist.append(directory)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(Xlist, Ylist, test_size=0.2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### AdaBoostClassifier"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf=AdaBoostClassifier(n_estimators=100)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, X_train, y_train, cv=3)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.77037037037\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GaussianNB"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.naive_bayes import GaussianNB"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = GaussianNB()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.721908939014\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### KNeighborsClassifier"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.neighbors import KNeighborsClassifier"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = KNeighborsClassifier(n_neighbors=10)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.751357560568\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### LinearSVC"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.svm import LinearSVC"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = LinearSVC()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.638575605681\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### SVC"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.svm import SVC"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = SVC()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.668650793651\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GaussianProcessClassifier"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.gaussian_process import GaussianProcessClassifier"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = GaussianProcessClassifier()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.491228070175\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### RandomForestClassifier"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = RandomForestClassifier()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.710317460317\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Hyperparameters Tuning using sklearn pipeline and gridsearch"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.pipeline import Pipeline\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.model_selection import RandomizedSearchCV"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pipe = [Pipeline([\n",
" ('clf', RandomForestClassifier()),\n",
" ]),\n",
" Pipeline([\n",
" ('clf', KNeighborsClassifier()),\n",
" ]),\n",
" Pipeline([\n",
" ('clf', GaussianProcessClassifier()),\n",
" ]),\n",
" Pipeline([\n",
" ('clf', AdaBoostClassifier()),\n",
" ]),\n",
" Pipeline([\n",
" ('clf', SVC()),\n",
" ]),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"param_grid = [dict(clf__n_estimators=[3, 10, 100]),\n",
" dict(clf__n_neighbors=[3,10]),\n",
" dict(clf__n_restarts_optimizer=[0,1]),\n",
" dict(clf__n_estimators=[3, 10, 100]),\n",
" dict(clf__C=[3, 10, 100]),\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, verbose=1, cv=3)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#grid_search.fit(Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Utility function to report best scores\n",
"def report(results, n_top=10):\n",
" for i in range(1, n_top + 1):\n",
" candidates = np.flatnonzero(results['rank_test_score'] == i)\n",
" for candidate in candidates:\n",
" print(\"Model with rank: {0}\".format(i))\n",
" print(\"Mean validation score: {0:.3f} (std: {1:.3f})\".format(\n",
" results['mean_test_score'][candidate],\n",
" results['std_test_score'][candidate]))\n",
" print(\"Parameters: {0}\".format(results['params'][candidate]))\n",
" print(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----\n",
"classifier:\n",
"RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n",
" oob_score=False, random_state=None, verbose=0,\n",
" warm_start=False)\n",
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 1.5s finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"GridSearchCV took 2.38 seconds for 3 candidate parameter settings.\n",
"finished GridSearch\n",
"Model with rank: 1\n",
"Mean validation score: 0.815 (std: 0.073)\n",
"Parameters: {'clf__n_estimators': 100}\n",
"\n",
"Model with rank: 2\n",
"Mean validation score: 0.763 (std: 0.093)\n",
"Parameters: {'clf__n_estimators': 10}\n",
"\n",
"Model with rank: 3\n",
"Mean validation score: 0.756 (std: 0.110)\n",
"Parameters: {'clf__n_estimators': 3}\n",
"\n",
"-----\n",
"classifier:\n",
"KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
" metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n",
" weights='uniform')\n",
"Fitting 3 folds for each of 2 candidates, totalling 6 fits\n",
"GridSearchCV took 0.23 seconds for 2 candidate parameter settings.\n",
"finished GridSearch\n",
"Model with rank: 1\n",
"Mean validation score: 0.778 (std: 0.048)\n",
"Parameters: {'clf__n_neighbors': 3}\n",
"\n",
"Model with rank: 2\n",
"Mean validation score: 0.704 (std: 0.010)\n",
"Parameters: {'clf__n_neighbors': 10}\n",
"\n",
"-----\n",
"classifier:\n",
"GaussianProcessClassifier(copy_X_train=True, kernel=None,\n",
" max_iter_predict=100, multi_class='one_vs_rest', n_jobs=1,\n",
" n_restarts_optimizer=0, optimizer='fmin_l_bfgs_b',\n",
" random_state=None, warm_start=False)\n",
"Fitting 3 folds for each of 2 candidates, totalling 6 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.1s remaining: 0.0s\n",
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.1s finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"GridSearchCV took 0.36 seconds for 2 candidate parameter settings.\n",
"finished GridSearch\n",
"Model with rank: 1\n",
"Mean validation score: 0.489 (std: 0.000)\n",
"Parameters: {'clf__n_restarts_optimizer': 0}\n",
"\n",
"Model with rank: 1\n",
"Mean validation score: 0.489 (std: 0.000)\n",
"Parameters: {'clf__n_restarts_optimizer': 1}\n",
"\n",
"-----\n",
"classifier:\n",
"AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,\n",
" learning_rate=1.0, n_estimators=50, random_state=None)\n",
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.2s remaining: 0.0s\n",
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.2s finished\n",
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 0.9s finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"GridSearchCV took 1.16 seconds for 3 candidate parameter settings.\n",
"finished GridSearch\n",
"Model with rank: 1\n",
"Mean validation score: 0.807 (std: 0.093)\n",
"Parameters: {'clf__n_estimators': 3}\n",
"\n",
"Model with rank: 2\n",
"Mean validation score: 0.756 (std: 0.048)\n",
"Parameters: {'clf__n_estimators': 100}\n",
"\n",
"Model with rank: 3\n",
"Mean validation score: 0.733 (std: 0.054)\n",
"Parameters: {'clf__n_estimators': 10}\n",
"\n",
"-----\n",
"classifier:\n",
"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
" decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n",
" max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
" tol=0.001, verbose=False)\n",
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n",
"GridSearchCV took 0.35 seconds for 3 candidate parameter settings.\n",
"finished GridSearch\n",
"Model with rank: 1\n",
"Mean validation score: 0.689 (std: 0.031)\n",
"Parameters: {'clf__C': 3}\n",
"\n",
"Model with rank: 1\n",
"Mean validation score: 0.689 (std: 0.031)\n",
"Parameters: {'clf__C': 10}\n",
"\n",
"Model with rank: 1\n",
"Mean validation score: 0.689 (std: 0.031)\n",
"Parameters: {'clf__C': 100}\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 0.1s finished\n"
]
}
],
"source": [
"from time import time\n",
"\n",
"for i in range(len(pipe)):\n",
" start = time()\n",
" print(\"-----\")\n",
" print(\"classifier:\")\n",
" print(pipe[i].named_steps['clf'])\n",
" grid_search = GridSearchCV(pipe[i], param_grid[i], n_jobs=-1, verbose=1, cv=3)\n",
" grid_search.fit(X_train, y_train)\n",
" print(\"GridSearchCV took %.2f seconds for %d candidate parameter settings.\"\n",
" % (time() - start, len(grid_search.cv_results_['params'])))\n",
" print(\"finished GridSearch\")\n",
" report(grid_search.cv_results_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,64 @@
from PIL import Image, ImageOps
import numpy, os
from sklearn.feature_extraction import image
from sklearn.model_selection import KFold, cross_val_score
import numpy as np
import pandas as pd
from time import time
import pickle
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
path="dataset/"
Xlist=[]
Ylist=[]
size = 100, 100
#load images from dataset
for directory in os.listdir(path):
for file in os.listdir(path+directory):
print(path+directory+"/"+file)
img=Image.open(path+directory+"/"+file)
#resize
thumb = ImageOps.fit(img, size, Image.ANTIALIAS)
image_data = np.array(thumb).flatten()[:100]
Xlist.append(image_data)
Ylist.append(directory)
from sklearn.ensemble import RandomForestClassifier
pipe = Pipeline([
('clf', RandomForestClassifier()),
])
param_grid = dict(clf__n_estimators=[100])
grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, verbose=1, cv=3)
# Utility function to report best scores
def report(results, n_top=10):
for i in range(1, n_top + 1):
candidates = np.flatnonzero(results['rank_test_score'] == i)
for candidate in candidates:
print("Model with rank: {0}".format(i))
print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
results['mean_test_score'][candidate],
results['std_test_score'][candidate]))
print("Parameters: {0}".format(results['params'][candidate]))
print("")
start = time()
grid_search = GridSearchCV(pipe, param_grid, n_jobs=-1, verbose=1, cv=3)
grid_search.fit(Xlist, Ylist)
print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
% (time() - start, len(grid_search.cv_results_['params'])))
print("finished GridSearch")
report(grid_search.cv_results_)
pickle.dump(grid_search, open('model.pkl', 'wb'))
print("pipeline model saved to model.pkl")

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.2 KiB

Some files were not shown because too many files have changed in this diff Show More