# 逻辑回归模型及Python实现

1.模型

2.评价

3.优化

4.python代码实现

1 # -*- coding: utf-8 -*-

2 """

3 Created on Wed Feb 24 11:04:11 2016

4

5 @author: SumaiWong

6 """

7

8 import numpy as np

9 import pandas as pd

10 from numpy import dot

11 from numpy.linalg import inv

12

13 iris = pd.read_csv('D:\iris.csv')

14 dummy = pd.get_dummies(iris['Species']) # 对Species生成哑变量

15 iris = pd.concat([iris, dummy], axis =1 )

16 iris = iris.iloc[0:100, :] # 截取前一百行样本

17

18 # 构建Logistic Regression , 对Species是否为setosa进行分类 setosa ~ Sepal.Length

19 # Y = g(BX) = 1/(1+exp(-BX))

20 def logit(x):

21 return 1./(1+np.exp(-x))

22

23 temp = pd.DataFrame(iris.iloc[:, 0])

24 temp['x0'] = 1.

25 X = temp.iloc[:,[1,0]]

26 Y = iris['setosa'].reshape(len(iris), 1) #整理出X矩阵 和 Y矩阵

27

28 # 批量梯度下降法

29 m,n = X.shape #矩阵大小

30 alpha = 0.0065 #设定学习速率

31 theta_g = np.zeros((n,1)) #初始化参数

32 maxCycles = 3000 #迭代次数

33 J = pd.Series(np.arange(maxCycles, dtype = float)) #损失函数

34

35 for i in range(maxCycles):

36 h = logit(dot(X, theta_g)) #估计值

37 J[i] = -(1/100.)*np.sum(Y*np.log(h)+(1-Y)*np.log(1-h)) #计算损失函数值

38 error = h - Y #误差

39 grad = dot(X.T, error) #梯度

40 theta_g -= alpha * grad

41 print theta_g

42 print J.plot()

43

44 # 牛顿方法

45 theta_n = np.zeros((n,1)) #初始化参数

46 maxCycles = 10 #迭代次数

47 C = pd.Series(np.arange(maxCycles, dtype = float)) #损失函数

48 for i in range(maxCycles):

49 h = logit(dot(X, theta_n)) #估计值

50 C[i] = -(1/100.)*np.sum(Y*np.log(h)+(1-Y)*np.log(1-h)) #计算损失函数值

51 error = h - Y #误差

52 grad = dot(X.T, error) #梯度

53 A = h*(1-h)* np.eye(len(X))

54 H = np.mat(X.T)* A * np.mat(X) #海瑟矩阵, H = X`AX

55 theta_n -= inv(H)*grad

56 print theta_n

57 print C.plot()

3901 篇文章234 人订阅

0 条评论

2783

5299

5829

1635

3178

1072

2793

2502

66210

5822