|
|
@ -0,0 +1,946 @@ |
|
|
|
{ |
|
|
|
"cells": [ |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 1, |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stderr", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"/home/asus/anaconda3/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", |
|
|
|
" \"This module will be removed in 0.20.\", DeprecationWarning)\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"from PIL import Image, ImageOps\n", |
|
|
|
"import numpy, os\n", |
|
|
|
"from sklearn.ensemble import AdaBoostClassifier\n", |
|
|
|
"from sklearn.cross_validation import cross_val_score\n", |
|
|
|
"import numpy as np\n", |
|
|
|
"import pandas as pd" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 2, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"path=\"dataset/\"\n", |
|
|
|
"Xlist=[]\n", |
|
|
|
"Ylist=[]\n", |
|
|
|
"size = 100, 100" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 3, |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"dataset/object/27.png\n", |
|
|
|
"dataset/object/82.png\n", |
|
|
|
"dataset/object/83.png\n", |
|
|
|
"dataset/object/100.png\n", |
|
|
|
"dataset/object/0.png\n", |
|
|
|
"dataset/object/13.png\n", |
|
|
|
"dataset/object/45.png\n", |
|
|
|
"dataset/object/64.png\n", |
|
|
|
"dataset/object/19.png\n", |
|
|
|
"dataset/object/101.png\n", |
|
|
|
"dataset/object/40.png\n", |
|
|
|
"dataset/object/97.png\n", |
|
|
|
"dataset/object/41.png\n", |
|
|
|
"dataset/object/7.png\n", |
|
|
|
"dataset/object/66.png\n", |
|
|
|
"dataset/object/55.png\n", |
|
|
|
"dataset/object/56.png\n", |
|
|
|
"dataset/object/65.png\n", |
|
|
|
"dataset/object/18.png\n", |
|
|
|
"dataset/object/24.png\n", |
|
|
|
"dataset/object/105.png\n", |
|
|
|
"dataset/object/116.png\n", |
|
|
|
"dataset/object/117.png\n", |
|
|
|
"dataset/object/104.png\n", |
|
|
|
"dataset/object/63.png\n", |
|
|
|
"dataset/object/38.png\n", |
|
|
|
"dataset/object/58.png\n", |
|
|
|
"dataset/object/103.png\n", |
|
|
|
"dataset/object/112.png\n", |
|
|
|
"dataset/object/33.png\n", |
|
|
|
"dataset/object/76.png\n", |
|
|
|
"dataset/object/59.png\n", |
|
|
|
"dataset/object/96.png\n", |
|
|
|
"dataset/object/91.png\n", |
|
|
|
"dataset/object/57.png\n", |
|
|
|
"dataset/object/2.png\n", |
|
|
|
"dataset/object/75.png\n", |
|
|
|
"dataset/object/107.png\n", |
|
|
|
"dataset/object/50.png\n", |
|
|
|
"dataset/object/16.png\n", |
|
|
|
"dataset/object/32.png\n", |
|
|
|
"dataset/object/15.png\n", |
|
|
|
"dataset/object/5.png\n", |
|
|
|
"dataset/object/72.png\n", |
|
|
|
"dataset/object/52.png\n", |
|
|
|
"dataset/object/4.png\n", |
|
|
|
"dataset/object/28.png\n", |
|
|
|
"dataset/object/43.png\n", |
|
|
|
"dataset/object/87.png\n", |
|
|
|
"dataset/object/98.png\n", |
|
|
|
"dataset/object/71.png\n", |
|
|
|
"dataset/object/102.png\n", |
|
|
|
"dataset/object/62.png\n", |
|
|
|
"dataset/object/9.png\n", |
|
|
|
"dataset/object/6.png\n", |
|
|
|
"dataset/object/85.png\n", |
|
|
|
"dataset/object/70.png\n", |
|
|
|
"dataset/object/42.png\n", |
|
|
|
"dataset/object/34.png\n", |
|
|
|
"dataset/object/81.png\n", |
|
|
|
"dataset/object/94.png\n", |
|
|
|
"dataset/object/26.png\n", |
|
|
|
"dataset/object/90.png\n", |
|
|
|
"dataset/object/44.png\n", |
|
|
|
"dataset/object/60.png\n", |
|
|
|
"dataset/object/17.png\n", |
|
|
|
"dataset/object/10.png\n", |
|
|
|
"dataset/object/53.png\n", |
|
|
|
"dataset/object/25.png\n", |
|
|
|
"dataset/object/21.png\n", |
|
|
|
"dataset/object/22.png\n", |
|
|
|
"dataset/object/30.png\n", |
|
|
|
"dataset/object/78.png\n", |
|
|
|
"dataset/object/118.png\n", |
|
|
|
"dataset/object/110.png\n", |
|
|
|
"dataset/object/79.png\n", |
|
|
|
"dataset/object/77.png\n", |
|
|
|
"dataset/object/12.png\n", |
|
|
|
"dataset/object/115.png\n", |
|
|
|
"dataset/object/67.png\n", |
|
|
|
"dataset/object/84.png\n", |
|
|
|
"dataset/object/11.png\n", |
|
|
|
"dataset/object/86.png\n", |
|
|
|
"dataset/object/89.png\n", |
|
|
|
"dataset/object/113.png\n", |
|
|
|
"dataset/noobject/image_0056.jpg\n", |
|
|
|
"dataset/noobject/image_0181.jpg\n", |
|
|
|
"dataset/noobject/image_0127.jpg\n", |
|
|
|
"dataset/noobject/image_0142.jpg\n", |
|
|
|
"dataset/noobject/image_0025.jpg\n", |
|
|
|
"dataset/noobject/image_0065.jpg\n", |
|
|
|
"dataset/noobject/image_0174.jpg\n", |
|
|
|
"dataset/noobject/image_0091.jpg\n", |
|
|
|
"dataset/noobject/image_0124.jpg\n", |
|
|
|
"dataset/noobject/image_0086.jpg\n", |
|
|
|
"dataset/noobject/image_0079.jpg\n", |
|
|
|
"dataset/noobject/image_0058.jpg\n", |
|
|
|
"dataset/noobject/image_0060.jpg\n", |
|
|
|
"dataset/noobject/image_0119.jpg\n", |
|
|
|
"dataset/noobject/image_0023.jpg\n", |
|
|
|
"dataset/noobject/image_0075.jpg\n", |
|
|
|
"dataset/noobject/image_0020.jpg\n", |
|
|
|
"dataset/noobject/image_0013.jpg\n", |
|
|
|
"dataset/noobject/image_0126.jpg\n", |
|
|
|
"dataset/noobject/image_0012.jpg\n", |
|
|
|
"dataset/noobject/image_0055.jpg\n", |
|
|
|
"dataset/noobject/image_0176.jpg\n", |
|
|
|
"dataset/noobject/image_0144.jpg\n", |
|
|
|
"dataset/noobject/image_0048.jpg\n", |
|
|
|
"dataset/noobject/image_0121.jpg\n", |
|
|
|
"dataset/noobject/image_0070.jpg\n", |
|
|
|
"dataset/noobject/image_0082.jpg\n", |
|
|
|
"dataset/noobject/image_0095.jpg\n", |
|
|
|
"dataset/noobject/image_0022.jpg\n", |
|
|
|
"dataset/noobject/image_0120.jpg\n", |
|
|
|
"dataset/noobject/image_0139.jpg\n", |
|
|
|
"dataset/noobject/image_0073.jpg\n", |
|
|
|
"dataset/noobject/image_0090.jpg\n", |
|
|
|
"dataset/noobject/image_0145.jpg\n", |
|
|
|
"dataset/noobject/image_0173.jpg\n", |
|
|
|
"dataset/noobject/image_0078.jpg\n", |
|
|
|
"dataset/noobject/image_0085.jpg\n", |
|
|
|
"dataset/noobject/image_0083.jpg\n", |
|
|
|
"dataset/noobject/image_0179.jpg\n", |
|
|
|
"dataset/noobject/image_0050.jpg\n", |
|
|
|
"dataset/noobject/image_0076.jpg\n", |
|
|
|
"dataset/noobject/image_0014.jpg\n", |
|
|
|
"dataset/noobject/image_0054.jpg\n", |
|
|
|
"dataset/noobject/image_0066.jpg\n", |
|
|
|
"dataset/noobject/image_0001.jpg\n", |
|
|
|
"dataset/noobject/image_0047.jpg\n", |
|
|
|
"dataset/noobject/image_0077.jpg\n", |
|
|
|
"dataset/noobject/image_0122.jpg\n", |
|
|
|
"dataset/noobject/image_0068.jpg\n", |
|
|
|
"dataset/noobject/image_0049.jpg\n", |
|
|
|
"dataset/noobject/image_0092.jpg\n", |
|
|
|
"dataset/noobject/image_0138.jpg\n", |
|
|
|
"dataset/noobject/image_0072.jpg\n", |
|
|
|
"dataset/noobject/image_0146.jpg\n", |
|
|
|
"dataset/noobject/image_0061.jpg\n", |
|
|
|
"dataset/noobject/image_0011.jpg\n", |
|
|
|
"dataset/noobject/image_0002.jpg\n", |
|
|
|
"dataset/noobject/image_0143.jpg\n", |
|
|
|
"dataset/noobject/image_0088.jpg\n", |
|
|
|
"dataset/noobject/image_0062.jpg\n", |
|
|
|
"dataset/noobject/image_0089.jpg\n", |
|
|
|
"dataset/noobject/image_0018.jpg\n", |
|
|
|
"dataset/noobject/image_0024.jpg\n", |
|
|
|
"dataset/noobject/image_0064.jpg\n", |
|
|
|
"dataset/noobject/image_0074.jpg\n", |
|
|
|
"dataset/noobject/image_0052.jpg\n", |
|
|
|
"dataset/noobject/image_0096.jpg\n", |
|
|
|
"dataset/noobject/image_0178.jpg\n", |
|
|
|
"dataset/noobject/image_0067.jpg\n", |
|
|
|
"dataset/noobject/image_0140.jpg\n", |
|
|
|
"dataset/noobject/image_0084.jpg\n", |
|
|
|
"dataset/noobject/image_0010.jpg\n", |
|
|
|
"dataset/noobject/image_0081.jpg\n", |
|
|
|
"dataset/noobject/image_0059.jpg\n", |
|
|
|
"dataset/noobject/image_0016.jpg\n", |
|
|
|
"dataset/noobject/image_0175.jpg\n", |
|
|
|
"dataset/noobject/image_0094.jpg\n", |
|
|
|
"dataset/noobject/image_0071.jpg\n", |
|
|
|
"dataset/noobject/image_0080.jpg\n", |
|
|
|
"dataset/noobject/image_0125.jpg\n", |
|
|
|
"dataset/noobject/image_0008.jpg\n", |
|
|
|
"dataset/noobject/image_0019.jpg\n", |
|
|
|
"dataset/noobject/image_0017.jpg\n", |
|
|
|
"dataset/noobject/image_0180.jpg\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"for directory in os.listdir(path):\n", |
|
|
|
" for file in os.listdir(path+directory):\n", |
|
|
|
" print(path+directory+\"/\"+file)\n", |
|
|
|
" img=Image.open(path+directory+\"/\"+file)\n", |
|
|
|
" #resize\n", |
|
|
|
" thumb = ImageOps.fit(img, size, Image.ANTIALIAS)\n", |
|
|
|
" image_data = np.array(thumb).flatten()[:100]\n", |
|
|
|
" #image_data=numpy.array(img).flatten()[:50] #in my case the images dont have the same dimensions, so [:50] only takes the first 50 values\n", |
|
|
|
" Xlist.append(image_data)\n", |
|
|
|
" Ylist.append(directory)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 4, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"from sklearn.model_selection import train_test_split\n", |
|
|
|
"X_train, X_test, y_train, y_test = train_test_split(Xlist, Ylist, test_size=0.2)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"metadata": {}, |
|
|
|
"source": [ |
|
|
|
"### AdaBoostClassifier" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 5, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"clf=AdaBoostClassifier(n_estimators=100)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 6, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"scores = cross_val_score(clf, X_train, y_train, cv=3)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 7, |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"0.77037037037\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"print(scores.mean())" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"metadata": {}, |
|
|
|
"source": [ |
|
|
|
"### GaussianNB" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 8, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"from sklearn.naive_bayes import GaussianNB" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 9, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"clf = GaussianNB()" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 10, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 11, |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"0.721908939014\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"print(scores.mean())" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"metadata": {}, |
|
|
|
"source": [ |
|
|
|
"### KNeighborsClassifier" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 12, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"from sklearn.neighbors import KNeighborsClassifier" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 13, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"clf = KNeighborsClassifier(n_neighbors=10)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 14, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 15, |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"0.751357560568\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"print(scores.mean())" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"metadata": {}, |
|
|
|
"source": [ |
|
|
|
"### LinearSVC" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 16, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"from sklearn.svm import LinearSVC" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 17, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"clf = LinearSVC()" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 18, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 19, |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"0.638575605681\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"print(scores.mean())" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"metadata": {}, |
|
|
|
"source": [ |
|
|
|
"### SVC" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 20, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"from sklearn.svm import SVC" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 21, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"clf = SVC()" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 22, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 23, |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"0.668650793651\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"print(scores.mean())" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"metadata": {}, |
|
|
|
"source": [ |
|
|
|
"### GaussianProcessClassifier" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 24, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"from sklearn.gaussian_process import GaussianProcessClassifier" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 25, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"clf = GaussianProcessClassifier()" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 26, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 27, |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"0.491228070175\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"print(scores.mean())" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"metadata": {}, |
|
|
|
"source": [ |
|
|
|
"### RandomForestClassifier" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 28, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"from sklearn.ensemble import RandomForestClassifier" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 29, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"clf = RandomForestClassifier()" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 30, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"scores = cross_val_score(clf, Xlist, Ylist)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 31, |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"0.710317460317\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"print(scores.mean())" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"metadata": {}, |
|
|
|
"source": [ |
|
|
|
"# Hyperparameters Tuning using sklearn pipeline and gridsearch" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 32, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"from sklearn.pipeline import Pipeline\n", |
|
|
|
"from sklearn.model_selection import GridSearchCV\n", |
|
|
|
"from sklearn.model_selection import RandomizedSearchCV" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 33, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"pipe = [Pipeline([\n", |
|
|
|
" ('clf', RandomForestClassifier()),\n", |
|
|
|
" ]),\n", |
|
|
|
" Pipeline([\n", |
|
|
|
" ('clf', KNeighborsClassifier()),\n", |
|
|
|
" ]),\n", |
|
|
|
" Pipeline([\n", |
|
|
|
" ('clf', GaussianProcessClassifier()),\n", |
|
|
|
" ]),\n", |
|
|
|
" Pipeline([\n", |
|
|
|
" ('clf', AdaBoostClassifier()),\n", |
|
|
|
" ]),\n", |
|
|
|
" Pipeline([\n", |
|
|
|
" ('clf', SVC()),\n", |
|
|
|
" ]),\n", |
|
|
|
"]" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 34, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"param_grid = [dict(clf__n_estimators=[3, 10, 100]),\n", |
|
|
|
" dict(clf__n_neighbors=[3,10]),\n", |
|
|
|
" dict(clf__n_restarts_optimizer=[0,1]),\n", |
|
|
|
" dict(clf__n_estimators=[3, 10, 100]),\n", |
|
|
|
" dict(clf__C=[3, 10, 100]),\n", |
|
|
|
" ]" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 35, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, verbose=1, cv=3)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 36, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"#grid_search.fit(Xlist, Ylist)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 37, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"# Utility function to report best scores\n", |
|
|
|
"def report(results, n_top=10):\n", |
|
|
|
" for i in range(1, n_top + 1):\n", |
|
|
|
" candidates = np.flatnonzero(results['rank_test_score'] == i)\n", |
|
|
|
" for candidate in candidates:\n", |
|
|
|
" print(\"Model with rank: {0}\".format(i))\n", |
|
|
|
" print(\"Mean validation score: {0:.3f} (std: {1:.3f})\".format(\n", |
|
|
|
" results['mean_test_score'][candidate],\n", |
|
|
|
" results['std_test_score'][candidate]))\n", |
|
|
|
" print(\"Parameters: {0}\".format(results['params'][candidate]))\n", |
|
|
|
" print(\"\")" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 38, |
|
|
|
"metadata": { |
|
|
|
"scrolled": false |
|
|
|
}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"-----\n", |
|
|
|
"classifier:\n", |
|
|
|
"RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", |
|
|
|
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n", |
|
|
|
" min_impurity_decrease=0.0, min_impurity_split=None,\n", |
|
|
|
" min_samples_leaf=1, min_samples_split=2,\n", |
|
|
|
" min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n", |
|
|
|
" oob_score=False, random_state=None, verbose=0,\n", |
|
|
|
" warm_start=False)\n", |
|
|
|
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"name": "stderr", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 1.5s finished\n" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"GridSearchCV took 2.38 seconds for 3 candidate parameter settings.\n", |
|
|
|
"finished GridSearch\n", |
|
|
|
"Model with rank: 1\n", |
|
|
|
"Mean validation score: 0.815 (std: 0.073)\n", |
|
|
|
"Parameters: {'clf__n_estimators': 100}\n", |
|
|
|
"\n", |
|
|
|
"Model with rank: 2\n", |
|
|
|
"Mean validation score: 0.763 (std: 0.093)\n", |
|
|
|
"Parameters: {'clf__n_estimators': 10}\n", |
|
|
|
"\n", |
|
|
|
"Model with rank: 3\n", |
|
|
|
"Mean validation score: 0.756 (std: 0.110)\n", |
|
|
|
"Parameters: {'clf__n_estimators': 3}\n", |
|
|
|
"\n", |
|
|
|
"-----\n", |
|
|
|
"classifier:\n", |
|
|
|
"KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", |
|
|
|
" metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n", |
|
|
|
" weights='uniform')\n", |
|
|
|
"Fitting 3 folds for each of 2 candidates, totalling 6 fits\n", |
|
|
|
"GridSearchCV took 0.23 seconds for 2 candidate parameter settings.\n", |
|
|
|
"finished GridSearch\n", |
|
|
|
"Model with rank: 1\n", |
|
|
|
"Mean validation score: 0.778 (std: 0.048)\n", |
|
|
|
"Parameters: {'clf__n_neighbors': 3}\n", |
|
|
|
"\n", |
|
|
|
"Model with rank: 2\n", |
|
|
|
"Mean validation score: 0.704 (std: 0.010)\n", |
|
|
|
"Parameters: {'clf__n_neighbors': 10}\n", |
|
|
|
"\n", |
|
|
|
"-----\n", |
|
|
|
"classifier:\n", |
|
|
|
"GaussianProcessClassifier(copy_X_train=True, kernel=None,\n", |
|
|
|
" max_iter_predict=100, multi_class='one_vs_rest', n_jobs=1,\n", |
|
|
|
" n_restarts_optimizer=0, optimizer='fmin_l_bfgs_b',\n", |
|
|
|
" random_state=None, warm_start=False)\n", |
|
|
|
"Fitting 3 folds for each of 2 candidates, totalling 6 fits\n" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"name": "stderr", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.1s remaining: 0.0s\n", |
|
|
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.1s finished\n" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"GridSearchCV took 0.36 seconds for 2 candidate parameter settings.\n", |
|
|
|
"finished GridSearch\n", |
|
|
|
"Model with rank: 1\n", |
|
|
|
"Mean validation score: 0.489 (std: 0.000)\n", |
|
|
|
"Parameters: {'clf__n_restarts_optimizer': 0}\n", |
|
|
|
"\n", |
|
|
|
"Model with rank: 1\n", |
|
|
|
"Mean validation score: 0.489 (std: 0.000)\n", |
|
|
|
"Parameters: {'clf__n_restarts_optimizer': 1}\n", |
|
|
|
"\n", |
|
|
|
"-----\n", |
|
|
|
"classifier:\n", |
|
|
|
"AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,\n", |
|
|
|
" learning_rate=1.0, n_estimators=50, random_state=None)\n", |
|
|
|
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"name": "stderr", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.2s remaining: 0.0s\n", |
|
|
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.2s finished\n", |
|
|
|
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 0.9s finished\n" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"GridSearchCV took 1.16 seconds for 3 candidate parameter settings.\n", |
|
|
|
"finished GridSearch\n", |
|
|
|
"Model with rank: 1\n", |
|
|
|
"Mean validation score: 0.807 (std: 0.093)\n", |
|
|
|
"Parameters: {'clf__n_estimators': 3}\n", |
|
|
|
"\n", |
|
|
|
"Model with rank: 2\n", |
|
|
|
"Mean validation score: 0.756 (std: 0.048)\n", |
|
|
|
"Parameters: {'clf__n_estimators': 100}\n", |
|
|
|
"\n", |
|
|
|
"Model with rank: 3\n", |
|
|
|
"Mean validation score: 0.733 (std: 0.054)\n", |
|
|
|
"Parameters: {'clf__n_estimators': 10}\n", |
|
|
|
"\n", |
|
|
|
"-----\n", |
|
|
|
"classifier:\n", |
|
|
|
"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", |
|
|
|
" decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n", |
|
|
|
" max_iter=-1, probability=False, random_state=None, shrinking=True,\n", |
|
|
|
" tol=0.001, verbose=False)\n", |
|
|
|
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n", |
|
|
|
"GridSearchCV took 0.35 seconds for 3 candidate parameter settings.\n", |
|
|
|
"finished GridSearch\n", |
|
|
|
"Model with rank: 1\n", |
|
|
|
"Mean validation score: 0.689 (std: 0.031)\n", |
|
|
|
"Parameters: {'clf__C': 3}\n", |
|
|
|
"\n", |
|
|
|
"Model with rank: 1\n", |
|
|
|
"Mean validation score: 0.689 (std: 0.031)\n", |
|
|
|
"Parameters: {'clf__C': 10}\n", |
|
|
|
"\n", |
|
|
|
"Model with rank: 1\n", |
|
|
|
"Mean validation score: 0.689 (std: 0.031)\n", |
|
|
|
"Parameters: {'clf__C': 100}\n", |
|
|
|
"\n" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"name": "stderr", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 0.1s finished\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"from time import time\n", |
|
|
|
"\n", |
|
|
|
"for i in range(len(pipe)):\n", |
|
|
|
" start = time()\n", |
|
|
|
" print(\"-----\")\n", |
|
|
|
" print(\"classifier:\")\n", |
|
|
|
" print(pipe[i].named_steps['clf'])\n", |
|
|
|
" grid_search = GridSearchCV(pipe[i], param_grid[i], n_jobs=-1, verbose=1, cv=3)\n", |
|
|
|
" grid_search.fit(X_train, y_train)\n", |
|
|
|
" print(\"GridSearchCV took %.2f seconds for %d candidate parameter settings.\"\n", |
|
|
|
" % (time() - start, len(grid_search.cv_results_['params'])))\n", |
|
|
|
" print(\"finished GridSearch\")\n", |
|
|
|
" report(grid_search.cv_results_)" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": null, |
|
|
|
"metadata": { |
|
|
|
"collapsed": true |
|
|
|
}, |
|
|
|
"outputs": [], |
|
|
|
"source": [] |
|
|
|
} |
|
|
|
], |
|
|
|
"metadata": { |
|
|
|
"kernelspec": { |
|
|
|
"display_name": "Python 3", |
|
|
|
"language": "python", |
|
|
|
"name": "python3" |
|
|
|
}, |
|
|
|
"language_info": { |
|
|
|
"codemirror_mode": { |
|
|
|
"name": "ipython", |
|
|
|
"version": 3 |
|
|
|
}, |
|
|
|
"file_extension": ".py", |
|
|
|
"mimetype": "text/x-python", |
|
|
|
"name": "python", |
|
|
|
"nbconvert_exporter": "python", |
|
|
|
"pygments_lexer": "ipython3", |
|
|
|
"version": "3.6.3" |
|
|
|
} |
|
|
|
}, |
|
|
|
"nbformat": 4, |
|
|
|
"nbformat_minor": 2 |
|
|
|
} |