{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/asus/anaconda3/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
|
|
" \"This module will be removed in 0.20.\", DeprecationWarning)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from PIL import Image, ImageOps\n",
|
|
"import numpy, os\n",
|
|
"from sklearn.ensemble import AdaBoostClassifier\n",
|
|
"from sklearn.cross_validation import cross_val_score\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"path=\"dataset/\"\n",
|
|
"Xlist=[]\n",
|
|
"Ylist=[]\n",
|
|
"size = 100, 100"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"dataset/object/27.png\n",
|
|
"dataset/object/82.png\n",
|
|
"dataset/object/83.png\n",
|
|
"dataset/object/100.png\n",
|
|
"dataset/object/0.png\n",
|
|
"dataset/object/13.png\n",
|
|
"dataset/object/45.png\n",
|
|
"dataset/object/64.png\n",
|
|
"dataset/object/19.png\n",
|
|
"dataset/object/101.png\n",
|
|
"dataset/object/40.png\n",
|
|
"dataset/object/97.png\n",
|
|
"dataset/object/41.png\n",
|
|
"dataset/object/7.png\n",
|
|
"dataset/object/66.png\n",
|
|
"dataset/object/55.png\n",
|
|
"dataset/object/56.png\n",
|
|
"dataset/object/65.png\n",
|
|
"dataset/object/18.png\n",
|
|
"dataset/object/24.png\n",
|
|
"dataset/object/105.png\n",
|
|
"dataset/object/116.png\n",
|
|
"dataset/object/117.png\n",
|
|
"dataset/object/104.png\n",
|
|
"dataset/object/63.png\n",
|
|
"dataset/object/38.png\n",
|
|
"dataset/object/58.png\n",
|
|
"dataset/object/103.png\n",
|
|
"dataset/object/112.png\n",
|
|
"dataset/object/33.png\n",
|
|
"dataset/object/76.png\n",
|
|
"dataset/object/59.png\n",
|
|
"dataset/object/96.png\n",
|
|
"dataset/object/91.png\n",
|
|
"dataset/object/57.png\n",
|
|
"dataset/object/2.png\n",
|
|
"dataset/object/75.png\n",
|
|
"dataset/object/107.png\n",
|
|
"dataset/object/50.png\n",
|
|
"dataset/object/16.png\n",
|
|
"dataset/object/32.png\n",
|
|
"dataset/object/15.png\n",
|
|
"dataset/object/5.png\n",
|
|
"dataset/object/72.png\n",
|
|
"dataset/object/52.png\n",
|
|
"dataset/object/4.png\n",
|
|
"dataset/object/28.png\n",
|
|
"dataset/object/43.png\n",
|
|
"dataset/object/87.png\n",
|
|
"dataset/object/98.png\n",
|
|
"dataset/object/71.png\n",
|
|
"dataset/object/102.png\n",
|
|
"dataset/object/62.png\n",
|
|
"dataset/object/9.png\n",
|
|
"dataset/object/6.png\n",
|
|
"dataset/object/85.png\n",
|
|
"dataset/object/70.png\n",
|
|
"dataset/object/42.png\n",
|
|
"dataset/object/34.png\n",
|
|
"dataset/object/81.png\n",
|
|
"dataset/object/94.png\n",
|
|
"dataset/object/26.png\n",
|
|
"dataset/object/90.png\n",
|
|
"dataset/object/44.png\n",
|
|
"dataset/object/60.png\n",
|
|
"dataset/object/17.png\n",
|
|
"dataset/object/10.png\n",
|
|
"dataset/object/53.png\n",
|
|
"dataset/object/25.png\n",
|
|
"dataset/object/21.png\n",
|
|
"dataset/object/22.png\n",
|
|
"dataset/object/30.png\n",
|
|
"dataset/object/78.png\n",
|
|
"dataset/object/118.png\n",
|
|
"dataset/object/110.png\n",
|
|
"dataset/object/79.png\n",
|
|
"dataset/object/77.png\n",
|
|
"dataset/object/12.png\n",
|
|
"dataset/object/115.png\n",
|
|
"dataset/object/67.png\n",
|
|
"dataset/object/84.png\n",
|
|
"dataset/object/11.png\n",
|
|
"dataset/object/86.png\n",
|
|
"dataset/object/89.png\n",
|
|
"dataset/object/113.png\n",
|
|
"dataset/noobject/image_0056.jpg\n",
|
|
"dataset/noobject/image_0181.jpg\n",
|
|
"dataset/noobject/image_0127.jpg\n",
|
|
"dataset/noobject/image_0142.jpg\n",
|
|
"dataset/noobject/image_0025.jpg\n",
|
|
"dataset/noobject/image_0065.jpg\n",
|
|
"dataset/noobject/image_0174.jpg\n",
|
|
"dataset/noobject/image_0091.jpg\n",
|
|
"dataset/noobject/image_0124.jpg\n",
|
|
"dataset/noobject/image_0086.jpg\n",
|
|
"dataset/noobject/image_0079.jpg\n",
|
|
"dataset/noobject/image_0058.jpg\n",
|
|
"dataset/noobject/image_0060.jpg\n",
|
|
"dataset/noobject/image_0119.jpg\n",
|
|
"dataset/noobject/image_0023.jpg\n",
|
|
"dataset/noobject/image_0075.jpg\n",
|
|
"dataset/noobject/image_0020.jpg\n",
|
|
"dataset/noobject/image_0013.jpg\n",
|
|
"dataset/noobject/image_0126.jpg\n",
|
|
"dataset/noobject/image_0012.jpg\n",
|
|
"dataset/noobject/image_0055.jpg\n",
|
|
"dataset/noobject/image_0176.jpg\n",
|
|
"dataset/noobject/image_0144.jpg\n",
|
|
"dataset/noobject/image_0048.jpg\n",
|
|
"dataset/noobject/image_0121.jpg\n",
|
|
"dataset/noobject/image_0070.jpg\n",
|
|
"dataset/noobject/image_0082.jpg\n",
|
|
"dataset/noobject/image_0095.jpg\n",
|
|
"dataset/noobject/image_0022.jpg\n",
|
|
"dataset/noobject/image_0120.jpg\n",
|
|
"dataset/noobject/image_0139.jpg\n",
|
|
"dataset/noobject/image_0073.jpg\n",
|
|
"dataset/noobject/image_0090.jpg\n",
|
|
"dataset/noobject/image_0145.jpg\n",
|
|
"dataset/noobject/image_0173.jpg\n",
|
|
"dataset/noobject/image_0078.jpg\n",
|
|
"dataset/noobject/image_0085.jpg\n",
|
|
"dataset/noobject/image_0083.jpg\n",
|
|
"dataset/noobject/image_0179.jpg\n",
|
|
"dataset/noobject/image_0050.jpg\n",
|
|
"dataset/noobject/image_0076.jpg\n",
|
|
"dataset/noobject/image_0014.jpg\n",
|
|
"dataset/noobject/image_0054.jpg\n",
|
|
"dataset/noobject/image_0066.jpg\n",
|
|
"dataset/noobject/image_0001.jpg\n",
|
|
"dataset/noobject/image_0047.jpg\n",
|
|
"dataset/noobject/image_0077.jpg\n",
|
|
"dataset/noobject/image_0122.jpg\n",
|
|
"dataset/noobject/image_0068.jpg\n",
|
|
"dataset/noobject/image_0049.jpg\n",
|
|
"dataset/noobject/image_0092.jpg\n",
|
|
"dataset/noobject/image_0138.jpg\n",
|
|
"dataset/noobject/image_0072.jpg\n",
|
|
"dataset/noobject/image_0146.jpg\n",
|
|
"dataset/noobject/image_0061.jpg\n",
|
|
"dataset/noobject/image_0011.jpg\n",
|
|
"dataset/noobject/image_0002.jpg\n",
|
|
"dataset/noobject/image_0143.jpg\n",
|
|
"dataset/noobject/image_0088.jpg\n",
|
|
"dataset/noobject/image_0062.jpg\n",
|
|
"dataset/noobject/image_0089.jpg\n",
|
|
"dataset/noobject/image_0018.jpg\n",
|
|
"dataset/noobject/image_0024.jpg\n",
|
|
"dataset/noobject/image_0064.jpg\n",
|
|
"dataset/noobject/image_0074.jpg\n",
|
|
"dataset/noobject/image_0052.jpg\n",
|
|
"dataset/noobject/image_0096.jpg\n",
|
|
"dataset/noobject/image_0178.jpg\n",
|
|
"dataset/noobject/image_0067.jpg\n",
|
|
"dataset/noobject/image_0140.jpg\n",
|
|
"dataset/noobject/image_0084.jpg\n",
|
|
"dataset/noobject/image_0010.jpg\n",
|
|
"dataset/noobject/image_0081.jpg\n",
|
|
"dataset/noobject/image_0059.jpg\n",
|
|
"dataset/noobject/image_0016.jpg\n",
|
|
"dataset/noobject/image_0175.jpg\n",
|
|
"dataset/noobject/image_0094.jpg\n",
|
|
"dataset/noobject/image_0071.jpg\n",
|
|
"dataset/noobject/image_0080.jpg\n",
|
|
"dataset/noobject/image_0125.jpg\n",
|
|
"dataset/noobject/image_0008.jpg\n",
|
|
"dataset/noobject/image_0019.jpg\n",
|
|
"dataset/noobject/image_0017.jpg\n",
|
|
"dataset/noobject/image_0180.jpg\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for directory in os.listdir(path):\n",
|
|
" for file in os.listdir(path+directory):\n",
|
|
" print(path+directory+\"/\"+file)\n",
|
|
" img=Image.open(path+directory+\"/\"+file)\n",
|
|
" #resize\n",
|
|
" thumb = ImageOps.fit(img, size, Image.ANTIALIAS)\n",
|
|
" image_data = np.array(thumb).flatten()[:100]\n",
|
|
" #image_data=numpy.array(img).flatten()[:50] #in my case the images dont have the same dimensions, so [:50] only takes the first 50 values\n",
|
|
" Xlist.append(image_data)\n",
|
|
" Ylist.append(directory)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(Xlist, Ylist, test_size=0.2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### AdaBoostClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"clf=AdaBoostClassifier(n_estimators=100)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"scores = cross_val_score(clf, X_train, y_train, cv=3)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.77037037037\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(scores.mean())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### GaussianNB"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.naive_bayes import GaussianNB"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"clf = GaussianNB()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"scores = cross_val_score(clf, Xlist, Ylist)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.721908939014\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(scores.mean())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### KNeighborsClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.neighbors import KNeighborsClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"clf = KNeighborsClassifier(n_neighbors=10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"scores = cross_val_score(clf, Xlist, Ylist)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.751357560568\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(scores.mean())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### LinearSVC"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.svm import LinearSVC"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"clf = LinearSVC()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"scores = cross_val_score(clf, Xlist, Ylist)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.638575605681\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(scores.mean())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### SVC"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.svm import SVC"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"clf = SVC()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"scores = cross_val_score(clf, Xlist, Ylist)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.668650793651\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(scores.mean())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### GaussianProcessClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.gaussian_process import GaussianProcessClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"clf = GaussianProcessClassifier()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"scores = cross_val_score(clf, Xlist, Ylist)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.491228070175\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(scores.mean())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### RandomForestClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.ensemble import RandomForestClassifier"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 29,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"clf = RandomForestClassifier()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 30,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"scores = cross_val_score(clf, Xlist, Ylist)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.710317460317\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(scores.mean())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Hyperparameters Tuning using sklearn pipeline and gridsearch"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.pipeline import Pipeline\n",
|
|
"from sklearn.model_selection import GridSearchCV\n",
|
|
"from sklearn.model_selection import RandomizedSearchCV"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"pipe = [Pipeline([\n",
|
|
" ('clf', RandomForestClassifier()),\n",
|
|
" ]),\n",
|
|
" Pipeline([\n",
|
|
" ('clf', KNeighborsClassifier()),\n",
|
|
" ]),\n",
|
|
" Pipeline([\n",
|
|
" ('clf', GaussianProcessClassifier()),\n",
|
|
" ]),\n",
|
|
" Pipeline([\n",
|
|
" ('clf', AdaBoostClassifier()),\n",
|
|
" ]),\n",
|
|
" Pipeline([\n",
|
|
" ('clf', SVC()),\n",
|
|
" ]),\n",
|
|
"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"param_grid = [dict(clf__n_estimators=[3, 10, 100]),\n",
|
|
" dict(clf__n_neighbors=[3,10]),\n",
|
|
" dict(clf__n_restarts_optimizer=[0,1]),\n",
|
|
" dict(clf__n_estimators=[3, 10, 100]),\n",
|
|
" dict(clf__C=[3, 10, 100]),\n",
|
|
" ]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 35,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=-1, verbose=1, cv=3)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"#grid_search.fit(Xlist, Ylist)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Utility function to report best scores\n",
|
|
"def report(results, n_top=10):\n",
|
|
" for i in range(1, n_top + 1):\n",
|
|
" candidates = np.flatnonzero(results['rank_test_score'] == i)\n",
|
|
" for candidate in candidates:\n",
|
|
" print(\"Model with rank: {0}\".format(i))\n",
|
|
" print(\"Mean validation score: {0:.3f} (std: {1:.3f})\".format(\n",
|
|
" results['mean_test_score'][candidate],\n",
|
|
" results['std_test_score'][candidate]))\n",
|
|
" print(\"Parameters: {0}\".format(results['params'][candidate]))\n",
|
|
" print(\"\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"-----\n",
|
|
"classifier:\n",
|
|
"RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
|
|
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
|
|
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
|
|
" min_samples_leaf=1, min_samples_split=2,\n",
|
|
" min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n",
|
|
" oob_score=False, random_state=None, verbose=0,\n",
|
|
" warm_start=False)\n",
|
|
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 1.5s finished\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"GridSearchCV took 2.38 seconds for 3 candidate parameter settings.\n",
|
|
"finished GridSearch\n",
|
|
"Model with rank: 1\n",
|
|
"Mean validation score: 0.815 (std: 0.073)\n",
|
|
"Parameters: {'clf__n_estimators': 100}\n",
|
|
"\n",
|
|
"Model with rank: 2\n",
|
|
"Mean validation score: 0.763 (std: 0.093)\n",
|
|
"Parameters: {'clf__n_estimators': 10}\n",
|
|
"\n",
|
|
"Model with rank: 3\n",
|
|
"Mean validation score: 0.756 (std: 0.110)\n",
|
|
"Parameters: {'clf__n_estimators': 3}\n",
|
|
"\n",
|
|
"-----\n",
|
|
"classifier:\n",
|
|
"KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
|
|
" metric_params=None, n_jobs=1, n_neighbors=5, p=2,\n",
|
|
" weights='uniform')\n",
|
|
"Fitting 3 folds for each of 2 candidates, totalling 6 fits\n",
|
|
"GridSearchCV took 0.23 seconds for 2 candidate parameter settings.\n",
|
|
"finished GridSearch\n",
|
|
"Model with rank: 1\n",
|
|
"Mean validation score: 0.778 (std: 0.048)\n",
|
|
"Parameters: {'clf__n_neighbors': 3}\n",
|
|
"\n",
|
|
"Model with rank: 2\n",
|
|
"Mean validation score: 0.704 (std: 0.010)\n",
|
|
"Parameters: {'clf__n_neighbors': 10}\n",
|
|
"\n",
|
|
"-----\n",
|
|
"classifier:\n",
|
|
"GaussianProcessClassifier(copy_X_train=True, kernel=None,\n",
|
|
" max_iter_predict=100, multi_class='one_vs_rest', n_jobs=1,\n",
|
|
" n_restarts_optimizer=0, optimizer='fmin_l_bfgs_b',\n",
|
|
" random_state=None, warm_start=False)\n",
|
|
"Fitting 3 folds for each of 2 candidates, totalling 6 fits\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.1s remaining: 0.0s\n",
|
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.1s finished\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"GridSearchCV took 0.36 seconds for 2 candidate parameter settings.\n",
|
|
"finished GridSearch\n",
|
|
"Model with rank: 1\n",
|
|
"Mean validation score: 0.489 (std: 0.000)\n",
|
|
"Parameters: {'clf__n_restarts_optimizer': 0}\n",
|
|
"\n",
|
|
"Model with rank: 1\n",
|
|
"Mean validation score: 0.489 (std: 0.000)\n",
|
|
"Parameters: {'clf__n_restarts_optimizer': 1}\n",
|
|
"\n",
|
|
"-----\n",
|
|
"classifier:\n",
|
|
"AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,\n",
|
|
" learning_rate=1.0, n_estimators=50, random_state=None)\n",
|
|
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.2s remaining: 0.0s\n",
|
|
"[Parallel(n_jobs=-1)]: Done 6 out of 6 | elapsed: 0.2s finished\n",
|
|
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 0.9s finished\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"GridSearchCV took 1.16 seconds for 3 candidate parameter settings.\n",
|
|
"finished GridSearch\n",
|
|
"Model with rank: 1\n",
|
|
"Mean validation score: 0.807 (std: 0.093)\n",
|
|
"Parameters: {'clf__n_estimators': 3}\n",
|
|
"\n",
|
|
"Model with rank: 2\n",
|
|
"Mean validation score: 0.756 (std: 0.048)\n",
|
|
"Parameters: {'clf__n_estimators': 100}\n",
|
|
"\n",
|
|
"Model with rank: 3\n",
|
|
"Mean validation score: 0.733 (std: 0.054)\n",
|
|
"Parameters: {'clf__n_estimators': 10}\n",
|
|
"\n",
|
|
"-----\n",
|
|
"classifier:\n",
|
|
"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
|
|
" decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n",
|
|
" max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
|
|
" tol=0.001, verbose=False)\n",
|
|
"Fitting 3 folds for each of 3 candidates, totalling 9 fits\n",
|
|
"GridSearchCV took 0.35 seconds for 3 candidate parameter settings.\n",
|
|
"finished GridSearch\n",
|
|
"Model with rank: 1\n",
|
|
"Mean validation score: 0.689 (std: 0.031)\n",
|
|
"Parameters: {'clf__C': 3}\n",
|
|
"\n",
|
|
"Model with rank: 1\n",
|
|
"Mean validation score: 0.689 (std: 0.031)\n",
|
|
"Parameters: {'clf__C': 10}\n",
|
|
"\n",
|
|
"Model with rank: 1\n",
|
|
"Mean validation score: 0.689 (std: 0.031)\n",
|
|
"Parameters: {'clf__C': 100}\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[Parallel(n_jobs=-1)]: Done 9 out of 9 | elapsed: 0.1s finished\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from time import time\n",
|
|
"\n",
|
|
"for i in range(len(pipe)):\n",
|
|
" start = time()\n",
|
|
" print(\"-----\")\n",
|
|
" print(\"classifier:\")\n",
|
|
" print(pipe[i].named_steps['clf'])\n",
|
|
" grid_search = GridSearchCV(pipe[i], param_grid[i], n_jobs=-1, verbose=1, cv=3)\n",
|
|
" grid_search.fit(X_train, y_train)\n",
|
|
" print(\"GridSearchCV took %.2f seconds for %d candidate parameter settings.\"\n",
|
|
" % (time() - start, len(grid_search.cv_results_['params'])))\n",
|
|
" print(\"finished GridSearch\")\n",
|
|
" report(grid_search.cv_results_)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|