def erzhianglecos(x_train,y_train,sample):
"""
:function 按照二值夹角余弦距离法计算待测样品与样品库中的相似度
:param x_train: 训练集 M*N M为样本个数 N为特征个数
:param y_train: 训练集标签 1*M
:param sample: 待识别样品
:return: 返回判断类别
"""
#二值化
spit = 0.5*(np.max(x_train) - np.min(x_train))
train = np.where(x_train>spit,1,0)
sample = np.where(sample>spit,1,0)
#计算夹角余弦
erZhiAng = np.sum(train*sample)/np.sqrt(np.sum(train^2)*np.sum(sample^2))
#值越大越相似
disMaxId = np.argmax(erZhiAng)
label = y_train[disMaxId]
return label
测试代码
from sklearn import datasets
from Include.chapter3 import function
import numpy as np
#读取数据
digits = datasets.load_digits()
x , y = digits.data,digits.target
#划分数据集
x_train, y_train, x_test, y_test = function.train_test_split(x,y)
testId = np.random.randint(0, x_test.shape[0])
sample = x_test[testId, :]
ans = function.anglecos(x_train,y_train,sample)
print(ans==y_test[testId])
True