回顾
1.将数据集分成训练数据集合测试数据集 2.将训练数据集进行归一化 3.使用训练数据集的均值和方差将测试数据集归一化 4.使用训练数集训练处模型 5.使用归一化后的测试数据集测试分类的准确度(accuracy) 6.使用网格搜索寻找最好的超参数,然后回到1-5
1
2
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y)
from sklearn.preprocessing import StandardScaler
standardScaler = StandardScaler()
# 存放了均值方差归一化所对应的信息
standardScaler.fit(X_train)
X_train = standardScaler.transform(X_train)
X_test = standardScaler.transform(X_test)
from sklearn.neighbors import KNeighborsClassifier
sklearn_knn_clf = KNeighborsClassifier(n_neighbors=6)
sklearn_knn_clf.fit(X_train,y_train)
sklearn_knn_clf.score(X_test,y_test)
y_predict = sklearn_knn_clf.predict(X_test)
# array<dict<参数名:参数可能的取值>>
param_grid =[
{
'weights':['uniform'],
'n_neighbors': [i for i in range(1,11)]
},
{
'weights':['distance'],
'n_neighbors': [i for i in range(1,11)],
'p': [i for i in range(1,6)]
}
]
# 先new一个默认的Classifier对象
knn_clf = KNeighborsClassifier()
# 调用GridSearchCV创建网格搜索对象,传入参数为Classifier对象以及参数列表
from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(knn_clf,param_grid)
# 调用fit方法执行网格搜索
%%time
grid_search.fit(X_train,y_train)
# 获得最好的评估结果,返回的是KNeighborsClassifier对象,可以直接拿来做机器学习预测了
grid_search.best_estimator_
# 最好的分数
grid_search.best_score_
# 最好的参数
grid_search.best_params_