# 机器学习：Python测试线性可分性的方法

,

，那么两组集合

• 领域和专业知识
• 数据可视化
• 计算几何学(凸包)
• 机器学习:
• 感知器
• 支持向量机

### 领域和专业知识

• Iris数据集：https://archive.ics.uci.edu/ml/datasets/iris

Iris以鸢尾花的特征作为数据来源，数据集包含150个数据集，分为3类，每类50个数据，每个数据包含4个属性，是在数据挖掘、数据分类中非常常用的测试集、训练集。

```import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import datasets

#create a DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['Target'] = pd.DataFrame(data.target)

```from pandas.tools.plotting import scatter_matrix
scatter_matrix(df.iloc[:,0:4], figsize=(15,11))```

```plt.clf()
plt.figure(figsize=(10,6))
plt.scatter(df.iloc[:,2], df.iloc[:,3])
plt.title('Petal Width vs Petal Length')
plt.xlabel(data.feature_names[2])
plt.ylabel(data.feature_names[3])
plt.show()```

```plt.clf()
plt.figure(figsize = (10, 6))
names = data.target_names
colors = ['b','r','g']
label = (data.target).astype(np.int)
plt.title('Petal Width vs Petal Length')
plt.xlabel(data.feature_names[2])
plt.ylabel(data.feature_names[3])
for i in range(len(names)):
bucket = df[df['Target'] == i]
bucket = bucket.iloc[:,[2,3]].values
plt.scatter(bucket[:, 0], bucket[:, 1], label=names[i])
plt.legend()
plt.show()```

### 计算几何学

• 地址：https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.ConvexHull.html

```from scipy.spatial import ConvexHull

plt.clf()
plt.figure(figsize = (10, 6))
names = data.target_names
label = (data.target).astype(np.int)
colors = ['b','r','g']
plt.title('Petal Width vs Petal Length')
plt.xlabel(data.feature_names[2])
plt.ylabel(data.feature_names[3])
for i in range(len(names)):
bucket = df[df['Target'] == i]
bucket = bucket.iloc[:,[2,3]].values
hull = ConvexHull(bucket)
plt.scatter(bucket[:, 0], bucket[:, 1], label=names[i])
for j in hull.simplices:
plt.plot(bucket[j,0], bucket[j,1], colors[i])
plt.legend()
plt.show()```

### 机器学习

```# Data Preprocessing
x = df.iloc[:, [2,3]].values
# we are picking Setosa to be 1 and all other classes will be 0
y = (data.target == 0).astype(np.int)

#Perform feature scaling
from sklearn.preprocessing import StandardScaler
sc= StandardScaler()
x = sc.fit_transform(x)```

```from sklearn.linear_model import Perceptron
perceptron = Perceptron(random_state = 0)
perceptron.fit(x, y)
predicted = perceptron.predict(x)```

```from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y, predicted)

plt.clf()
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
classNames = ['Negative','Positive']
plt.title('Perceptron Confusion Matrix - Entire Data')
plt.ylabel('True label')
plt.xlabel('Predicted label')
tick_marks = np.arange(len(classNames))
plt.xticks(tick_marks, classNames, rotation=45)
plt.yticks(tick_marks, classNames)
s = [['TN','FP'], ['FN', 'TP']]

for i in range(2):
for j in range(2):
plt.text(j,i, str(s[i][j])+" = "+str(cm[i][j]))
plt.show()```

```from matplotlib.colors import ListedColormap
plt.clf()
X_set, y_set = x, y
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:,
0].max() + 1, step = 0.01),
np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step =
0.01))
plt.contourf(X1, X2, perceptron.predict(np.array([X1.ravel(), X2.ravel()]).T).
reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('navajowhite', 'darkkhaki')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Perceptron Classifier (Decision boundary for Setosa vs the rest)')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.legend()
plt.show()```

```from sklearn.svm import SVC
svm = SVC(C=1.0, kernel='linear', random_state=0)
svm.fit(x, y)

predicted = svm.predict(x)

cm = confusion_matrix(y, predicted)

plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
classNames = ['Negative','Positive']
plt.title('SVM Linear Kernel Confusion Matrix - Setosa')
plt.ylabel('True label')
plt.xlabel('Predicted label')
tick_marks = np.arange(len(classNames))
plt.xticks(tick_marks, classNames, rotation=45)
plt.yticks(tick_marks, classNames)
s = [['TN','FP'], ['FN', 'TP']]

for i in range(2):
for j in range(2):
plt.text(j,i, str(s[i][j])+" = "+str(cm[i][j]))```

```x = df.iloc[:, [2,3]].values
y = (data.target == 1).astype(np.int) # we are picking Versicolor to be 1
and all other classes will be 0

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x = sc.fit_transform(x)

from sklearn.svm import SVC
svm = SVC(kernel='rbf', random_state=0)
svm.fit(x, y)

predicted = svm.predict(x)

cm = confusion_matrix(y, predicted)

plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Wistia)
classNames = ['Negative','Positive']
plt.title('SVM RBF Confusion Matrix - Versicolor')
plt.ylabel('True label')
plt.xlabel('Predicted label')
tick_marks = np.arange(len(classNames))
plt.xticks(tick_marks, classNames, rotation=45)
plt.yticks(tick_marks, classNames)
s = [['TN','FP'], ['FN', 'TP']]

for i in range(2):
for j in range(2):
plt.text(j,i, str(s[i][j])+" = "+str(cm[i][j]))```
• Python源代码：https://github.com/tatwan/Linear-Separability/blob/master/linear_separability.py

1848 篇文章92 人订阅

0 条评论

## 相关文章

34260

1.3K20

39950

44180

### 如何做特征选择

1.数据挖掘与聚类分析概述 数据挖掘一般由以下几个步骤： (l)分析问题:源数据数据库必须经过评估确认其是否符合数据挖掘标准。以决定预期结果，也就选择了这项工作...

44850

### 终于，Geoffrey Hinton那篇备受关注的Capsule论文公开了

Geoffrey Hinton 等人备受关注的 NIPS 2017 论文《Dynamic Routing Between Capsules》已于数小时前公开。

9720

### 中国团队夺得MegaFace百万人脸识别冠军，精度98%再创记录，论文代码+数据全开源

MegaFace数据集 网络结构 首先，我们尝试在人脸识别的任务上找到一个优秀的网络结构。 3.1 网络输入设定 在我们所有的实验当中，都根据人脸的 5 个...

1.2K100

40560

53760

92960