
import numpy as np
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import matplotlib.cm as cm
digits=load_digits()
data=digits.data
n_samples,n_features=data.shape
n_digits=len(np.unique(digits.target))
labels=digits.target
pca=PCA(n_components=10)
data_r=pca.fit(data).transform(data)
print('explained variance ratio (first components): %s'%str(pca.explained_variance_ratio_))
print('sum of explained variance (first two components): %s'%str(sum(pca.explained_variance_ratio_)))
x=np.arange(10)
ys=[i+x+(i*x)**2 for i in range(10)]
plt.figure()
colors = cm.rainbow(np.linspace(0,1,len(ys)))
for c,i,target_name in zip(colors, [1,2,3,4,5,6,7,8,9,10], labels):
plt.scatter(data_r[labels == 1, 0], data_r[labels == 1, 1],
c=c, alpha = 0.4)
plt.title('Scatterplot of Points plotted in first \n'
'10 Principal Components')
plt.show()explained variance ratio (first components): [0.14890594 0.13618771 0.11794594 0.08409979 0.05782414 0.04916908 0.04315977 0.0366137 0.03353239 0.03078768] sum of explained variance (first two components): 0.7382261453429998

算法:主成分分析是通过逐一辨别数据集中方差最大的方向(主成分)来提取向量。步骤如下:
链接:https://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/
本文分享自 图像处理与模式识别研究所 微信公众号,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文参与 腾讯云自媒体同步曝光计划 ,欢迎热爱写作的你一起参与!