pipeline model chooser working, server predictor working

This commit is contained in:
arnaucode
2017-11-29 18:22:09 +01:00
parent 26f61c02f5
commit 950c6b4c57
207 changed files with 1168 additions and 449 deletions

View File

@@ -1 +1,2 @@
currentimage.png
log
.ipynb_checkpoints

View File

@@ -0,0 +1,946 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/asus/anaconda3/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
" \"This module will be removed in 0.20.\", DeprecationWarning)\n"
]
}
],
"source": [
"from PIL import Image, ImageOps\n",
"import numpy, os\n",
"from sklearn.ensemble import AdaBoostClassifier\n",
"from sklearn.cross_validation import cross_val_score\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"path=\"dataset/\"\n",
"Xlist=[]\n",
"Ylist=[]\n",
"size = 100, 100"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dataset/object/27.png\n",
"dataset/object/82.png\n",
"dataset/object/83.png\n",
"dataset/object/100.png\n",
"dataset/object/0.png\n",
"dataset/object/13.png\n",
"dataset/object/45.png\n",
"dataset/object/64.png\n",
"dataset/object/19.png\n",
"dataset/object/101.png\n",
"dataset/object/40.png\n",
"dataset/object/97.png\n",
"dataset/object/41.png\n",
"dataset/object/7.png\n",
"dataset/object/66.png\n",
"dataset/object/55.png\n",
"dataset/object/56.png\n",
"dataset/object/65.png\n",
"dataset/object/18.png\n",
"dataset/object/24.png\n",
"dataset/object/105.png\n",
"dataset/object/116.png\n",
"dataset/object/117.png\n",
"dataset/object/104.png\n",
"dataset/object/63.png\n",
"dataset/object/38.png\n",
"dataset/object/58.png\n",
"dataset/object/103.png\n",
"dataset/object/112.png\n",
"dataset/object/33.png\n",
"dataset/object/76.png\n",
"dataset/object/59.png\n",
"dataset/object/96.png\n",
"dataset/object/91.png\n",
"dataset/object/57.png\n",
"dataset/object/2.png\n",
"dataset/object/75.png\n",
"dataset/object/107.png\n",
"dataset/object/50.png\n",
"dataset/object/16.png\n",
"dataset/object/32.png\n",
"dataset/object/15.png\n",
"dataset/object/5.png\n",
"dataset/object/72.png\n",
"dataset/object/52.png\n",
"dataset/object/4.png\n",
"dataset/object/28.png\n",
"dataset/object/43.png\n",
"dataset/object/87.png\n",
"dataset/object/98.png\n",
"dataset/object/71.png\n",
"dataset/object/102.png\n",
"dataset/object/62.png\n",
"dataset/object/9.png\n",
"dataset/object/6.png\n",
"dataset/object/85.png\n",
"dataset/object/70.png\n",
"dataset/object/42.png\n",
"dataset/object/34.png\n",
"dataset/object/81.png\n",
"dataset/object/94.png\n",
"dataset/object/26.png\n",
"dataset/object/90.png\n",
"dataset/object/44.png\n",
"dataset/object/60.png\n",
"dataset/object/17.png\n",
"dataset/object/10.png\n",
"dataset/object/53.png\n",
"dataset/object/25.png\n",
"dataset/object/21.png\n",
"dataset/object/22.png\n",
"dataset/object/30.png\n",
"dataset/object/78.png\n",
"dataset/object/118.png\n",
"dataset/object/110.png\n",
"dataset/object/79.png\n",
"dataset/object/77.png\n",
"dataset/object/12.png\n",
"dataset/object/115.png\n",
"dataset/object/67.png\n",
"dataset/object/84.png\n",
"dataset/object/11.png\n",
"dataset/object/86.png\n",
"dataset/object/89.png\n",
"dataset/object/113.png\n",
"dataset/noobject/image_0056.jpg\n",
"dataset/noobject/image_0181.jpg\n",
"dataset/noobject/image_0127.jpg\n",
"dataset/noobject/image_0142.jpg\n",
"dataset/noobject/image_0025.jpg\n",
"dataset/noobject/image_0065.jpg\n",
"dataset/noobject/image_0174.jpg\n",
"dataset/noobject/image_0091.jpg\n",
"dataset/noobject/image_0124.jpg\n",
"dataset/noobject/image_0086.jpg\n",
"dataset/noobject/image_0079.jpg\n",
"dataset/noobject/image_0058.jpg\n",
"dataset/noobject/image_0060.jpg\n",
"dataset/noobject/image_0119.jpg\n",
"dataset/noobject/image_0023.jpg\n",
"dataset/noobject/image_0075.jpg\n",
"dataset/noobject/image_0020.jpg\n",
"dataset/noobject/image_0013.jpg\n",
"dataset/noobject/image_0126.jpg\n",
"dataset/noobject/image_0012.jpg\n",
"dataset/noobject/image_0055.jpg\n",
"dataset/noobject/image_0176.jpg\n",
"dataset/noobject/image_0144.jpg\n",
"dataset/noobject/image_0048.jpg\n",
"dataset/noobject/image_0121.jpg\n",
"dataset/noobject/image_0070.jpg\n",
"dataset/noobject/image_0082.jpg\n",
"dataset/noobject/image_0095.jpg\n",
"dataset/noobject/image_0022.jpg\n",
"dataset/noobject/image_0120.jpg\n",
"dataset/noobject/image_0139.jpg\n",
"dataset/noobject/image_0073.jpg\n",
"dataset/noobject/image_0090.jpg\n",
"dataset/noobject/image_0145.jpg\n",
"dataset/noobject/image_0173.jpg\n",
"dataset/noobject/image_0078.jpg\n",
"dataset/noobject/image_0085.jpg\n",
"dataset/noobject/image_0083.jpg\n",
"dataset/noobject/image_0179.jpg\n",
"dataset/noobject/image_0050.jpg\n",
"dataset/noobject/image_0076.jpg\n",
"dataset/noobject/image_0014.jpg\n",
"dataset/noobject/image_0054.jpg\n",
"dataset/noobject/image_0066.jpg\n",
"dataset/noobject/image_0001.jpg\n",
"dataset/noobject/image_0047.jpg\n",
"dataset/noobject/image_0077.jpg\n",
"dataset/noobject/image_0122.jpg\n",
"dataset/noobject/image_0068.jpg\n",
"dataset/noobject/image_0049.jpg\n",
"dataset/noobject/image_0092.jpg\n",
"dataset/noobject/image_0138.jpg\n",
"dataset/noobject/image_0072.jpg\n",
"dataset/noobject/image_0146.jpg\n",
"dataset/noobject/image_0061.jpg\n",
"dataset/noobject/image_0011.jpg\n",
"dataset/noobject/image_0002.jpg\n",
"dataset/noobject/image_0143.jpg\n",
"dataset/noobject/image_0088.jpg\n",
"dataset/noobject/image_0062.jpg\n",
"dataset/noobject/image_0089.jpg\n",
"dataset/noobject/image_0018.jpg\n",
"dataset/noobject/image_0024.jpg\n",
"dataset/noobject/image_0064.jpg\n",
"dataset/noobject/image_0074.jpg\n",
"dataset/noobject/image_0052.jpg\n",
"dataset/noobject/image_0096.jpg\n",
"dataset/noobject/image_0178.jpg\n",
"dataset/noobject/image_0067.jpg\n",
"dataset/noobject/image_0140.jpg\n",
"dataset/noobject/image_0084.jpg\n",
"dataset/noobject/image_0010.jpg\n",
"dataset/noobject/image_0081.jpg\n",
"dataset/noobject/image_0059.jpg\n",
"dataset/noobject/image_0016.jpg\n",
"dataset/noobject/image_0175.jpg\n",
"dataset/noobject/image_0094.jpg\n",
"dataset/noobject/image_0071.jpg\n",
"dataset/noobject/image_0080.jpg\n",
"dataset/noobject/image_0125.jpg\n",
"dataset/noobject/image_0008.jpg\n",
"dataset/noobject/image_0019.jpg\n",
"dataset/noobject/image_0017.jpg\n",
"dataset/noobject/image_0180.jpg\n"
]
}
],
"source": [
"for directory in os.listdir(path):\n",
" for file in os.listdir(path+directory):\n",
" print(path+directory+\"/\"+file)\n",
" img=Image.open(path+directory+\"/\"+file)\n",
" #resize\n",
" thumb = ImageOps.fit(img, size, Image.ANTIALIAS)\n",
" image_data = np.array(thumb).flatten()[:100]\n",
" #image_data=numpy.array(img).flatten()[:50] #in my case the images dont have the same dimensions, so [:50] only takes the first 50 values\n",
" Xlist.append(image_data)\n",
" Ylist.append(directory)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(Xlist, Ylist, test_size=0.2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### AdaBoostClassifier"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf=AdaBoostClassifier(n_estimators=100)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, X_train, y_train, cv=3)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.77037037037\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GaussianNB"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.naive_bayes import GaussianNB"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = GaussianNB()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.721908939014\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### KNeighborsClassifier"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.neighbors import KNeighborsClassifier"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = KNeighborsClassifier(n_neighbors=10)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.751357560568\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### LinearSVC"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.svm import LinearSVC"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = LinearSVC()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.638575605681\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### SVC"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.svm import SVC"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = SVC()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.668650793651\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### GaussianProcessClassifier"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.gaussian_process import GaussianProcessClassifier"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = GaussianProcessClassifier()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.491228070175\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### RandomForestClassifier"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = RandomForestClassifier()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"scores = cross_val_score(clf, Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.710317460317\n"
]
}
],
"source": [
"print(scores.mean())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Hyperparameters Tuning using sklearn pipeline and gridsearch"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.pipeline import Pipeline\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.model_selection import RandomizedSearchCV"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pipe = [Pipeline([\n",
" ('clf', RandomForestClassifier()),\n",
" ]),\n",
" Pipeline([\n",
" ('clf', KNeighborsClassifier()),\n",
" ]),\n",
" Pipeline([\n",
" ('clf', GaussianProcessClassifier()),\n",
" ]),\n",
" Pipeline([\n",
" ('clf', AdaBoostClassifier()),\n",
" ]),\n",
" Pipeline([\n",
" ('clf', SVC()),\n",
" ]),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"param_grid = [dict(clf__n_estimators=[3, 10, 100]),\n",
" dict(clf__n_neighbors=[3,10]),\n",
" dict(clf__n_restarts_optimizer=[0,1]),\n",
" dict(clf__n_estimators=[3, 10, 100]),\n",
" dict(clf__C=[3, 10, 100]),\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, verbose=1, cv=3)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#grid_search.fit(Xlist, Ylist)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Utility function to report best scores\n",
"def report(results, n_top=10):\n",
" for i in range(1, n_top + 1):\n",
" candidates = np.flatnonzero(results['rank_test_score'] == i)\n",
" for candidate in candidates:\n",
" print(\"Model with rank: {0}\".format(i))\n",
" print(\"Mean validation score: {0:.3f} (std: {1:.3f})\".format(\n",
" results['mean_test_score'][candidate],\n",
" results['std_test_score'][candidate]))\n",
" print(\"Parameters: {0}\".format(results['params'][candidate]))\n",
" print(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----\n",
"classifier:\n",
"RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n",
" oob_score=False, random_state=None, verbose=0,\n",
" warm_start=False)\n",
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 1.5s finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"GridSearchCV took 2.38 seconds for 3 candidate parameter settings.\n",
"finished GridSearch\n",
"Model with rank: 1\n",
"Mean validation score: 0.815 (std: 0.073)\n",
"Parameters: {'clf__n_estimators': 100}\n",
"\n",
"Model with rank: 2\n",
"Mean validation score: 0.763 (std: 0.093)\n",
"Parameters: {'clf__n_estimators': 10}\n",
"\n",
"Model with rank: 3\n",
"Mean validation score: 0.756 (std: 0.110)\n",
"Parameters: {'clf__n_estimators': 3}\n",
"\n",
"-----\n",
"classifier:\n",
"KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
" metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n",
" weights='uniform')\n",
"Fitting 3 folds for each of 2 candidates, totalling 6 fits\n",
"GridSearchCV took 0.23 seconds for 2 candidate parameter settings.\n",
"finished GridSearch\n",
"Model with rank: 1\n",
"Mean validation score: 0.778 (std: 0.048)\n",
"Parameters: {'clf__n_neighbors': 3}\n",
"\n",
"Model with rank: 2\n",
"Mean validation score: 0.704 (std: 0.010)\n",
"Parameters: {'clf__n_neighbors': 10}\n",
"\n",
"-----\n",
"classifier:\n",
"GaussianProcessClassifier(copy_X_train=True, kernel=None,\n",
" max_iter_predict=100, multi_class='one_vs_rest', n_jobs=1,\n",
" n_restarts_optimizer=0, optimizer='fmin_l_bfgs_b',\n",
" random_state=None, warm_start=False)\n",
"Fitting 3 folds for each of 2 candidates, totalling 6 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.1s remaining: 0.0s\n",
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.1s finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"GridSearchCV took 0.36 seconds for 2 candidate parameter settings.\n",
"finished GridSearch\n",
"Model with rank: 1\n",
"Mean validation score: 0.489 (std: 0.000)\n",
"Parameters: {'clf__n_restarts_optimizer': 0}\n",
"\n",
"Model with rank: 1\n",
"Mean validation score: 0.489 (std: 0.000)\n",
"Parameters: {'clf__n_restarts_optimizer': 1}\n",
"\n",
"-----\n",
"classifier:\n",
"AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,\n",
" learning_rate=1.0, n_estimators=50, random_state=None)\n",
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.2s remaining: 0.0s\n",
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.2s finished\n",
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 0.9s finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"GridSearchCV took 1.16 seconds for 3 candidate parameter settings.\n",
"finished GridSearch\n",
"Model with rank: 1\n",
"Mean validation score: 0.807 (std: 0.093)\n",
"Parameters: {'clf__n_estimators': 3}\n",
"\n",
"Model with rank: 2\n",
"Mean validation score: 0.756 (std: 0.048)\n",
"Parameters: {'clf__n_estimators': 100}\n",
"\n",
"Model with rank: 3\n",
"Mean validation score: 0.733 (std: 0.054)\n",
"Parameters: {'clf__n_estimators': 10}\n",
"\n",
"-----\n",
"classifier:\n",
"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
" decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n",
" max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
" tol=0.001, verbose=False)\n",
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n",
"GridSearchCV took 0.35 seconds for 3 candidate parameter settings.\n",
"finished GridSearch\n",
"Model with rank: 1\n",
"Mean validation score: 0.689 (std: 0.031)\n",
"Parameters: {'clf__C': 3}\n",
"\n",
"Model with rank: 1\n",
"Mean validation score: 0.689 (std: 0.031)\n",
"Parameters: {'clf__C': 10}\n",
"\n",
"Model with rank: 1\n",
"Mean validation score: 0.689 (std: 0.031)\n",
"Parameters: {'clf__C': 100}\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 0.1s finished\n"
]
}
],
"source": [
"from time import time\n",
"\n",
"for i in range(len(pipe)):\n",
" start = time()\n",
" print(\"-----\")\n",
" print(\"classifier:\")\n",
" print(pipe[i].named_steps['clf'])\n",
" grid_search = GridSearchCV(pipe[i], param_grid[i], n_jobs=-1, verbose=1, cv=3)\n",
" grid_search.fit(X_train, y_train)\n",
" print(\"GridSearchCV took %.2f seconds for %d candidate parameter settings.\"\n",
" % (time() - start, len(grid_search.cv_results_['params'])))\n",
" print(\"finished GridSearch\")\n",
" report(grid_search.cv_results_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,64 @@
from PIL import Image, ImageOps
import numpy, os
from sklearn.feature_extraction import image
from sklearn.model_selection import KFold, cross_val_score
import numpy as np
import pandas as pd
from time import time
import pickle
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
path="dataset/"
Xlist=[]
Ylist=[]
size = 100, 100
#load images from dataset
for directory in os.listdir(path):
for file in os.listdir(path+directory):
print(path+directory+"/"+file)
img=Image.open(path+directory+"/"+file)
#resize
thumb = ImageOps.fit(img, size, Image.ANTIALIAS)
image_data = np.array(thumb).flatten()[:100]
Xlist.append(image_data)
Ylist.append(directory)
from sklearn.ensemble import RandomForestClassifier
pipe = Pipeline([
('clf', RandomForestClassifier()),
])
param_grid = dict(clf__n_estimators=[100])
grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, verbose=1, cv=3)
# Utility function to report best scores
def report(results, n_top=10):
for i in range(1, n_top + 1):
candidates = np.flatnonzero(results['rank_test_score'] == i)
for candidate in candidates:
print("Model with rank: {0}".format(i))
print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
results['mean_test_score'][candidate],
results['std_test_score'][candidate]))
print("Parameters: {0}".format(results['params'][candidate]))
print("")
start = time()
grid_search = GridSearchCV(pipe, param_grid, n_jobs=-1, verbose=1, cv=3)
grid_search.fit(Xlist, Ylist)
print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
% (time() - start, len(grid_search.cv_results_['params'])))
print("finished GridSearch")
report(grid_search.cv_results_)
pickle.dump(grid_search, open('model.pkl', 'wb'))
print("pipeline model saved to model.pkl")

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Some files were not shown because too many files have changed in this diff Show More