TVP

# 数学推导＋纯Python实现机器学习算法3：k近邻

k 近邻基本理论

knn 的 Python 实现

knn 算法的函数化封装

importnumpyasnp

fromcollectionsimportCounter

importrandom

importmatplotlib.pyplotasplt

fromsklearnimportdatasets

fromsklearn.utilsimportshuffleplt.rcParams['figure.figsize'] = (10.0,8.0) plt.rcParams['image.interpolation'] ='nearest'

plt.rcParams['image.cmap'] ='gray'

classKNearestNeighbor(object):def__init__(self):passdeftrain(self, X, y):self.X_train = X self.y_train = y

defcompute_distances(self, X):num_test = X.shape[] num_train = self.X_train.shape[] dists = np.zeros((num_test, num_train)) M = np.dot(X, self.X_train.T) te = np.square(X).sum(axis=1) tr = np.square(self.X_train).sum(axis=1) dists = np.sqrt(-2* M + tr + np.matrix(te).T)

returndists

defpredict_labels(self, dists, k=1):num_test = dists.shape[] y_pred = np.zeros(num_test)

foriinrange(num_test): closest_y = [] labels = self.y_train[np.argsort(dists[i, :])].flatten() closest_y = labels[:k] c = Counter(closest_y) y_pred[i] = c.most_common(1)[][]

returny_pred

defcross_validation(self, X_train, y_train):num_folds =5k_choices = [1,3,5,8,10,12,15,20,50,100] X_train_folds = [] y_train_folds = [] X_train_folds = np.array_split(X_train, num_folds) y_train_folds = np.array_split(y_train, num_folds) k_to_accuracies = {}

forkink_choices:

forfoldinrange(num_folds): validation_X_test = X_train_folds[fold] validation_y_test = y_train_folds[fold] temp_X_train = np.concatenate(X_train_folds[:fold] + X_train_folds[fold +1:]) temp_y_train = np.concatenate(y_train_folds[:fold] + y_train_folds[fold +1:]) self.train(temp_X_train, temp_y_train ) temp_dists = self.compute_distances(validation_X_test) temp_y_test_pred = self.predict_labels(temp_dists, k=k) temp_y_test_pred = temp_y_test_pred.reshape((-1,1))#Checking accuraciesnum_correct = np.sum(temp_y_test_pred == validation_y_test) num_test = validation_X_test.shape[] accuracy = float(num_correct) / num_test k_to_accuracies[k] = k_to_accuracies.get(k,[]) + [accuracy]# Print out the computed accuracies

forkinsorted(k_to_accuracies):

foraccuracyink_to_accuracies[k]: print('k = %d, accuracy = %f'% (k, accuracy)) accuracies_mean = np.array([np.mean(v)fork,vinsorted(k_to_accuracies.items())]) best_k = k_choices[np.argmax(accuracies_mean)] print('最佳k值为{}'.format(best_k))

returnbest_k

defcreate_train_test(self):X, y = shuffle(iris.data, iris.target, random_state=13) X = X.astype(np.float32) y = y.reshape((-1,1)) offset = int(X.shape[] *0.7) X_train, y_train = X[:offset], y[:offset] X_test, y_test = X[offset:], y[offset:] y_train = y_train.reshape((-1,1)) y_test = y_test.reshape((-1,1))

returnX_train, y_train, X_test, y_test

if__name__ =='__main__': knn_classifier = KNearestNeighbor() X_train, y_train, X_test, y_test = knn_classifier.create_train_test() best_k = knn_classifier.cross_validation(X_train, y_train) dists = knn_classifier.compute_distances(X_test) y_test_pred = knn_classifier.predict_labels(dists, k=best_k) y_test_pred = y_test_pred.reshape((-1,1)) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / X_test.shape[] print('Got %d / %d correct => accuracy: %f'% (num_correct, X_test.shape[], accuracy))

cs231n lecture1

• 发表于:
• 原文链接https://kuaibao.qq.com/s/20181001B1GRRP00?refer=cp_1026
• 腾讯「腾讯云开发者社区」是腾讯内容开放平台帐号（企鹅号）传播渠道之一，根据《腾讯内容开放平台服务协议》转载发布内容。
• 如有侵权，请联系 cloudcommunity@tencent.com 删除。

2018-09-02

2018-01-26

2023-05-06

2018-05-23

2018-01-27

2018-05-30

2018-01-29

2018-05-18

2018-08-11

2018-09-01

2019-06-10

2018-03-06

2018-01-28

2018-04-18

2020-12-01

2018-08-09

2018-06-13

2023-06-05

2018-03-08

2018-07-16