# 【Python数据挖掘课程】PCA降维操作及subplot子图绘制

## 二. Python中Sklearn的PCA扩展包

`from sklearn.decomposition import PCA`

`pca = PCA(n_components=2)`

```import numpy as npfrom sklearn.decomposition import PCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = PCA(n_components=2)
print pca
pca.fit(X)
print(pca.explained_variance_ratio_)```

`PCA(copy=True, n_components=2, whiten=False)[ 0.99244291  0.00755711]`

```#载入数据集
x = d.data
y = d.target
print x[:10]
print u'形状:', x.shape
#降维
import numpy as np
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
newData = pca.fit_transform(x)
print u'降维后数据:'
print newData[:4]
print u'形状:', newData.shape```

```[[  6.32000000e-03   1.80000000e+01   2.31000000e+00   0.00000000e+00

5.38000000e-01   6.57500000e+00   6.52000000e+01   4.09000000e+00

1.00000000e+00   2.96000000e+02   1.53000000e+01   3.96900000e+02

4.98000000e+00]

[  2.73100000e-02   0.00000000e+00   7.07000000e+00   0.00000000e+00

4.69000000e-01   6.42100000e+00   7.89000000e+01   4.96710000e+00

2.00000000e+00   2.42000000e+02   1.78000000e+01   3.96900000e+02

9.14000000e+00]

[  2.72900000e-02   0.00000000e+00   7.07000000e+00   0.00000000e+00

4.69000000e-01   7.18500000e+00   6.11000000e+01   4.96710000e+00

2.00000000e+00   2.42000000e+02   1.78000000e+01   3.92830000e+02

4.03000000e+00]

[  3.23700000e-02   0.00000000e+00   2.18000000e+00   0.00000000e+00

4.58000000e-01   6.99800000e+00   4.58000000e+01   6.06220000e+00

3.00000000e+00   2.22000000e+02   1.87000000e+01   3.94630000e+02

2.94000000e+00]]

[[-119.81821283    5.56072403]

[-168.88993091  -10.11419701]

[-169.31150637  -14.07855395]

[-190.2305986   -18.29993274]]

## 三. Kmeans聚类糖尿病及降维subplot绘制子图

```import numpy as npimport matplotlib.pyplot as pltplt.figure(1) # 创建图表1
plt.figure(2) # 创建图表2
ax1 = plt.subplot(211) # 在图表2中创建子图1
ax2 = plt.subplot(212) # 在图表2中创建子图2
x = np.linspace(0, 3, 100)
for i in xrange(5):
plt.figure(1)    # 选择图表1
plt.plot(x, np.exp(i*x/3))
plt.sca(ax1)    # 选择图表2的子图1
plt.plot(x, np.sin(i*x))
plt.sca(ax2)    # 选择图表2的子图2
plt.plot(x, np.cos(i*x))
plt.show()```

```# -*- coding: utf-8 -*-

#糖尿病数据集

x = data.data

print x[:4]

y = data.target

print y[:4]

#KMeans聚类算法

from sklearn.cluster import KMeans

#训练

clf = KMeans(n_clusters=2)

print clf

clf.fit(x)

#预测

pre = clf.predict(x)

print pre[:10]

#使用PCA降维操作

from sklearn.decomposition import PCA

pca = PCA(n_components=2)

newData = pca.fit_transform(x)

print newData[:4]

L1 = [n[0] for n in newData]

L2 = [n[1] for n in newData]

#绘图

import numpy as np

import matplotlib.pyplot as plt

#用来正常显示中文标签

plt.rc('font', family='SimHei', size=8)

#plt.rcParams['font.sans-serif']=['SimHei']

#用来正常显示负号

plt.rcParams['axes.unicode_minus']=False

p1 = plt.subplot(221)

plt.title(u"Kmeans聚类 n=2")

plt.scatter(L1,L2,c=pre,marker="s")

plt.sca(p1)

###################################

# 聚类 类蔟数=3

clf = KMeans(n_clusters=3)

clf.fit(x)

pre = clf.predict(x)

p2 = plt.subplot(222)

plt.title("Kmeans n=3")

plt.scatter(L1,L2,c=pre,marker="s")

plt.sca(p2)

###################################

# 聚类 类蔟数=4

clf = KMeans(n_clusters=4)

clf.fit(x)

pre = clf.predict(x)

p3 = plt.subplot(223)

plt.title("Kmeans n=4")

plt.scatter(L1,L2,c=pre,marker="+")

plt.sca(p3)

###################################

# 聚类 类蔟数=5

clf = KMeans(n_clusters=5)

clf.fit(x)

pre = clf.predict(x)

p4 = plt.subplot(224)

plt.title("Kmeans n=5")

plt.scatter(L1,L2,c=pre,marker="+")

plt.sca(p4)

#保存图片本地

plt.savefig('power.png', dpi=300)

plt.show()```

553 篇文章58 人订阅

0 条评论

## 相关文章

11040

19320

17810

### 媒体炒作掩盖研究价值？OpenAI的GPT-2不只是代码开放问题

2 月 14 日，OpenAI 发布大型无监督语言模型 GPT-2，它能够产生连贯的文本段落，在许多语言建模基准上取得了 SOTA 表现。而且该模型在没有任务特...

8710

10630

11710

18820

### 使用IsolationForest 与Meanshift算法进行异常检测

https://blog.csdn.net/App_12062011/article/details/84797641

22110

6940

15930