前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >python实现支持向量机之具体实现

python实现支持向量机之具体实现

作者头像
西西嘛呦
发布2020-08-26 14:22:43
8720
发布2020-08-26 14:22:43
举报

代码来源: https://github.com/eriklindernoren/ML-From-Scratch

支持向量机代码:

代码语言:javascript
复制
from __future__ import division, print_function
import numpy as np
import cvxopt
from mlfromscratch.utils import train_test_split, normalize, accuracy_score
from mlfromscratch.utils.kernels import *
from mlfromscratch.utils import Plot

# Hide cvxopt output
cvxopt.solvers.options['show_progress'] = False

class SupportVectorMachine(object):
    """The Support Vector Machine classifier.
    Uses cvxopt to solve the quadratic optimization problem.

    Parameters:
    -----------
    C: float
        Penalty term.
    kernel: function
        Kernel function. Can be either polynomial, rbf or linear.
    power: int
        The degree of the polynomial kernel. Will be ignored by the other
        kernel functions.
    gamma: float
        Used in the rbf kernel function.
    coef: float
        Bias term used in the polynomial kernel function.
    """
    def __init__(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4):
        #系数C
        self.C = C
        #核函数
        self.kernel = kernel
        self.power = power
        self.gamma = gamma
        self.coef = coef
        self.lagr_multipliers = None
        self.support_vectors = None
        self.support_vector_labels = None
        self.intercept = None

    def fit(self, X, y):
        #样本数,特征数
        n_samples, n_features = np.shape(X)
        
        # Set gamma to 1/n_features by default
        if not self.gamma:
            self.gamma = 1 / n_features

        # Initialize kernel method with parameters
        #初始化核函数的一些参数
        self.kernel = self.kernel(
            power=self.power,
            gamma=self.gamma,
            coef=self.coef)

        # Calculate kernel matrix
        #计算核矩阵
        kernel_matrix = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                kernel_matrix[i, j] = self.kernel(X[i], X[j])

        # Define the quadratic optimization problem
        #cvxopt是凸优化库
        #cvxopt.matrix(array,dims)用于将array按dims进行重新排列
        #np.outer用于计算两个向量的外积
        P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d')
        q = cvxopt.matrix(np.ones(n_samples) * -1)
        A = cvxopt.matrix(y, (1, n_samples), tc='d')
        b = cvxopt.matrix(0, tc='d')

        if not self.C:
            #Numpy.identity()输入n为行数或列数,返回一个n*n的对角阵,
            #对角线元素为1,其余为0。dtype可选,默认为float格式。
            G = cvxopt.matrix(np.identity(n_samples) * -1)
            h = cvxopt.matrix(np.zeros(n_samples))
        else:
            G_max = np.identity(n_samples) * -1
            G_min = np.identity(n_samples)
            G = cvxopt.matrix(np.vstack((G_max, G_min)))
            h_max = cvxopt.matrix(np.zeros(n_samples))
            h_min = cvxopt.matrix(np.ones(n_samples) * self.C)
            h = cvxopt.matrix(np.vstack((h_max, h_min)))

        # Solve the quadratic optimization problem using cvxopt
        minimization = cvxopt.solvers.qp(P, q, G, h, A, b)

        # Lagrange multipliers
        #np.ravel用于降维
        lagr_mult = np.ravel(minimization['x'])

        # Extract support vectors
        # Get indexes of non-zero lagr. multipiers
        idx = lagr_mult > 1e-7
        # Get the corresponding lagr. multipliers
        self.lagr_multipliers = lagr_mult[idx]
        # Get the samples that will act as support vectors
        self.support_vectors = X[idx]
        # Get the corresponding labels
        self.support_vector_labels = y[idx]

        # Calculate intercept with first support vector
        self.intercept = self.support_vector_labels[0]
        for i in range(len(self.lagr_multipliers)):
            self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[
                i] * self.kernel(self.support_vectors[i], self.support_vectors[0])

    def predict(self, X):
        y_pred = []
        # Iterate through list of samples and make predictions
        for sample in X:
            prediction = 0
            # Determine the label of the sample by the support vectors
            for i in range(len(self.lagr_multipliers)):
                prediction += self.lagr_multipliers[i] * self.support_vector_labels[
                    i] * self.kernel(self.support_vectors[i], sample)
            prediction += self.intercept
            y_pred.append(np.sign(prediction))
        return np.array(y_pred)

其中使用到的部分函数:

Cvxopt.solvers.qp(P,q,G,h,A,b)

标准形式:

核函数定义:

代码语言:javascript
复制
import numpy as np


def linear_kernel(**kwargs):
    def f(x1, x2):
        return np.inner(x1, x2)
    return f


def polynomial_kernel(power, coef, **kwargs):
    def f(x1, x2):
        return (np.inner(x1, x2) + coef)**power
    return f


def rbf_kernel(gamma, **kwargs):
    def f(x1, x2):
        distance = np.linalg.norm(x1 - x2) ** 2
        return np.exp(-gamma * distance)
    return f

np.inner()用于返回两个向量的内积。np.linalg.norm()用于求范数,ord参数指定使用的范数,如果没有指定,则是求整体矩阵元素平方和再开根号。

运行主代码:

代码语言:javascript
复制
from __future__ import division, print_function
import numpy as np
from sklearn import datasets

# Import helper functions
import sys
sys.path.append("/content/drive/My Drive/learn/ML-From-Scratch/")
from mlfromscratch.utils import train_test_split, normalize, accuracy_score, Plot
from mlfromscratch.utils.kernels import *
from mlfromscratch.supervised_learning import SupportVectorMachine

def main():
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = -1
    y[y == 2] = 1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)


if __name__ == "__main__":
    main()

使用的数据集是sklearn自带的鸢尾花数据集,划分后的训练集大小为:

(67, 4) (67,)

需要注意的是鸢尾花数据集中有三类,分别是0,1,2类,这里只提取了其中的第1,2类,并且对样本进行了标准化,对标签重新进行编号,记为+1和-1.

部分数据及对应标签如下:

代码语言:javascript
复制
[[0.69198788 0.34599394 0.58626751 0.24027357]
 [0.70779525 0.31850786 0.60162596 0.1887454 ]
 [0.73239618 0.38547167 0.53966034 0.15418867]
 [0.69299099 0.34199555 0.60299216 0.19799743]
 [0.72712585 0.26661281 0.60593821 0.18178146]]
[ 1 -1 -1  1  1]

这里使用的核函数是多项式核函数,上面已经给出了其具体代码,这里设定power=4, coef=1。

以该输入为例,我们一步步探索整个支持向量机运行的过程:

(1)fit()函数传入X_train和y_train,n_samples=67,n_features=4

(2)这里的gamma用不上,定义了多项式核函数:(x1⋅x2+1)**4

(3)计算核矩阵,其大小是(67,67)

(4)计算P,q,A,b,大小分别是

代码语言:javascript
复制
P: (67, 67)
q: (67, 1)
A: (1, 67)
b: (1, 1)

(5)计算G,h

代码语言:javascript
复制
G: (134, 67)
h: (134, 1)

(6)计算minimization

代码语言:javascript
复制
{
'x': <67x1 matrix, tc='d'>, 
'y': <1x1 matrix, tc='d'>, 
's': <134x1 matrix, tc='d'>, 
'z': <134x1 matrix, tc='d'>, 
'status': 'optimal', 
'gap': 6.025067170026107e-06, 
'relative gap': 2.658291994898124e-07, 
'primal objective': -22.66518193482733, 
'dual objective': -22.6651879598945, 
'primal infeasibility': 5.244963534670188e-16, 
'dual infeasibility': 1.4242235222101717e-14, 
'primal slack': 1.992950796227594e-08, 
'dual slack': 2.0187091588623967e-08, 
'iterations': 8
}

接着取出键为x的那一项<67x1 matrix, tc='d'>,再分别计算以下项:

代码语言:javascript
复制
lagr_mult: [9.99999993e-01 3.11335122e-07 5.76626923e-08 9.99999986e-01
 9.99999960e-01 8.40647046e-09 1.62550393e-08 9.99999990e-01
 8.80788872e-09 7.47600294e-09 9.99999993e-01 1.38140431e-08
 9.99999841e-01 1.84408877e-08 4.45365428e-08 9.99999993e-01
 9.99999994e-01 7.31294539e-09 9.99999981e-01 2.81302345e-01
 4.29885349e-08 9.99999993e-01 6.50674166e-08 1.53345935e-08
 9.99999997e-01 9.99999980e-01 1.60572220e-08 4.62959213e-09
 1.74974625e-08 9.99999994e-01 6.71173485e-09 1.57489685e-08
 9.99999965e-01 9.99999988e-01 1.01875501e-08 3.19854125e-01
 1.93465392e-08 9.99999991e-01 9.99999969e-01 1.58210493e-08
 9.99999992e-01 6.53854743e-08 9.49484746e-09 9.99999991e-01
 9.99999975e-01 1.33182181e-08 2.74386270e-07 9.99999985e-01
 9.58193356e-09 1.42607893e-08 1.44826324e-08 9.99999988e-01
 9.99999991e-01 1.05018808e-08 9.99999983e-01 8.02961322e-09
 6.01155709e-01 3.54164542e-07 9.99999994e-01 9.99999972e-01
 3.49899863e-08 1.56995796e-08 9.99999986e-01 9.99999949e-01
 1.95124926e-08 1.13750234e-07 3.24069998e-09]
idx: [ True  True False  True  True False False  True False False  True False
  True False False  True  True False  True  True False  True False False
  True  True False False False  True False False  True  True False  True
 False  True  True False  True False False  True  True False  True  True
 False False False  True  True False  True False  True  True  True  True
 False False  True  True False  True False]
self.lagr_multipliers: [9.99999993e-01 3.11335122e-07 9.99999986e-01 9.99999960e-01
 9.99999990e-01 9.99999993e-01 9.99999841e-01 9.99999993e-01
 9.99999994e-01 9.99999981e-01 2.81302345e-01 9.99999993e-01
 9.99999997e-01 9.99999980e-01 9.99999994e-01 9.99999965e-01
 9.99999988e-01 3.19854125e-01 9.99999991e-01 9.99999969e-01
 9.99999992e-01 9.99999991e-01 9.99999975e-01 2.74386270e-07
 9.99999985e-01 9.99999988e-01 9.99999991e-01 9.99999983e-01
 6.01155709e-01 3.54164542e-07 9.99999994e-01 9.99999972e-01
 9.99999986e-01 9.99999949e-01 1.13750234e-07]
self.support_vectors: [[0.72785195 0.32870733 0.56349829 0.21131186]
 [0.71491405 0.30207636 0.59408351 0.21145345]
 [0.72460233 0.37623583 0.54345175 0.19508524]
 [0.76467269 0.31486523 0.53976896 0.15743261]
 [0.73122464 0.31338199 0.56873028 0.20892133]
 [0.71562645 0.3523084  0.56149152 0.22019275]
 [0.71366557 0.28351098 0.61590317 0.17597233]
 [0.71066905 0.35533453 0.56853524 0.21320072]
 [0.73089855 0.30454106 0.58877939 0.1624219 ]
 [0.73544284 0.35458851 0.55158213 0.1707278 ]
 [0.71524936 0.40530797 0.53643702 0.19073316]
 [0.71171214 0.35002236 0.57170319 0.21001342]
 [0.70779525 0.31850786 0.60162596 0.1887454 ]
 [0.73659895 0.33811099 0.56754345 0.14490471]
 [0.72366005 0.32162669 0.58582004 0.17230001]
 [0.72634846 0.38046824 0.54187901 0.18446945]
 [0.73081412 0.34743622 0.56308629 0.16772783]
 [0.75519285 0.33928954 0.53629637 0.16417236]
 [0.75384916 0.31524601 0.54825394 0.17818253]
 [0.72155725 0.32308533 0.56001458 0.24769876]
 [0.70631892 0.37838513 0.5675777  0.18919257]
 [0.71529453 0.31790868 0.59607878 0.17882363]
 [0.73260391 0.36029701 0.55245541 0.1681386 ]
 [0.70558934 0.32722984 0.58287815 0.23519645]
 [0.69299099 0.34199555 0.60299216 0.19799743]
 [0.73350949 0.35452959 0.55013212 0.18337737]
 [0.73337886 0.32948905 0.54206264 0.24445962]
 [0.74714194 0.33960997 0.54337595 0.17659719]
 [0.71576546 0.30196356 0.59274328 0.21249287]
 [0.69589887 0.34794944 0.57629125 0.25008866]
 [0.69333409 0.38518561 0.57777841 0.1925928 ]
 [0.69595601 0.3427843  0.59208198 0.21813547]
 [0.70610474 0.3258945  0.59747324 0.1955367 ]
 [0.76923077 0.30769231 0.53846154 0.15384615]
 [0.73923462 0.37588201 0.52623481 0.187941  ]]
self.support_vector_labels: [ 1  1 -1 -1  1  1  1  1  1 -1 -1  1 -1 -1  1 -1 -1 -1 -1  1 -1  1 -1  1
  1 -1  1 -1  1  1 -1  1  1 -1 -1]

最后一步:

代码语言:javascript
复制
  # Calculate intercept with first support vector
        self.intercept = self.support_vector_labels[0]
        for i in range(len(self.lagr_multipliers)):
            self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[
                i] * self.kernel(self.support_vectors[i], self.support_vectors[0])

需要好多数学知识以及矩阵之间的运算,还是有很多不理解其这么计算的原因,再慢慢摸索了。

结果:

Accuracy: 0.8787878787878788

本文参与 腾讯云自媒体分享计划,分享自作者个人站点/博客。
原始发表:2020-05-03 ,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档