# 机器学习算法：从头开始构建逻辑回归模型

Sigmoid函数（Logistic函数）

sigmoid函数图形

1import pandas as pd

2

4df = df.drop(['Id'],axis=1)

5rows = list(range(100,150))

6df = df.drop(df.index[rows]) ## Drop the rows with target values Iris-virginica

7Y = []

8target = df['Species']

9for val in target:

10 if(val == 'Iris-setosa'):

11 Y.append(0)

12 else:

13 Y.append(1)

14df = df.drop(['Species'],axis=1)

15X = df.values.tolist()

IRIS数据集有三个目标值，分别是弗吉尼亚鸢尾、山鸢尾、变色鸢尾。但是因为要实现的是二进制的分类算法，所以此处先把弗吉尼亚鸢尾剔除。

1from sklearn.utils import shuffle

2from sklearn.cross_validation import train_test_split

3import numpy as np

4

5X, Y = shuffle(X,Y)

6

7x_train = []

8y_train = []

9x_test = []

10y_test = []

11

12x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.9)

13

14x_train = np.array(x_train)

15y_train = np.array(y_train)

16x_test = np.array(x_test)

17y_test = np.array(y_test)

18

19x_1 = x_train[:,0]

20x_2 = x_train[:,1]

21x_3 = x_train[:,2]

22x_4 = x_train[:,3]

23

24x_1 = np.array(x_1)

25x_2 = np.array(x_2)

26x_3 = np.array(x_3)

27x_4 = np.array(x_4)

28

29x_1 = x_1.reshape(90,1)

30x_2 = x_2.reshape(90,1)

31x_3 = x_3.reshape(90,1)

32x_4 = x_4.reshape(90,1)

33

34y_train = y_train.reshape(90,1)

1## Logistic Regression

2import numpy as np

3

4def sigmoid(x):

5 return (1 / (1 + np.exp(-x)))

6

7m = 90

8alpha = 0.0001

9

10theta_0 = np.zeros((m,1))

11theta_1 = np.zeros((m,1))

12theta_2 = np.zeros((m,1))

13theta_3 = np.zeros((m,1))

14theta_4 = np.zeros((m,1))

15

16

17epochs = 0

18cost_func = []

19while(epochs

20 y = theta_0 + theta_1 * x_1 + theta_2 * x_2 + theta_3 * x_3 + theta_4 * x_4

21 y = sigmoid(y)

22

23 cost = (- np.dot(np.transpose(y_train),np.log(y)) - np.dot(np.transpose(1-y_train),np.log(1-y)))/m

24

30

31 theta_0 = theta_0 - alpha * theta_0_grad

32 theta_1 = theta_1 - alpha * theta_1_grad

33 theta_2 = theta_2 - alpha * theta_2_grad

34 theta_3 = theta_3 - alpha * theta_3_grad

35 theta_4 = theta_4 - alpha * theta_4_grad

36

37 cost_func.append(cost)

38 epochs += 1

1from sklearn.metrics import accuracy_score

2

3test_x_1 = x_test[:,0]

4test_x_2 = x_test[:,1]

5test_x_3 = x_test[:,2]

6test_x_4 = x_test[:,3]

7

8test_x_1 = np.array(test_x_1)

9test_x_2 = np.array(test_x_2)

10test_x_3 = np.array(test_x_3)

11test_x_4 = np.array(test_x_4)

12

13test_x_1 = test_x_1.reshape(10,1)

14test_x_2 = test_x_2.reshape(10,1)

15test_x_3 = test_x_3.reshape(10,1)

16test_x_4 = test_x_4.reshape(10,1)

17

18index = list(range(10,90))

19

20theta_0 = np.delete(theta_0, index)

21theta_1 = np.delete(theta_1, index)

22theta_2 = np.delete(theta_2, index)

23theta_3 = np.delete(theta_3, index)

24theta_4 = np.delete(theta_4, index)

25

26theta_0 = theta_0.reshape(10,1)

27theta_1 = theta_1.reshape(10,1)

28theta_2 = theta_2.reshape(10,1)

29theta_3 = theta_3.reshape(10,1)

30theta_4 = theta_4.reshape(10,1)

31

32y_pred = theta_0 + theta_1 * test_x_1 + theta_2 * test_x_2 + theta_3 * test_x_3 + theta_4 * test_x_4

33y_pred = sigmoid(y_pred)

34

35new_y_pred =[]

36for val in y_pred:

37 if(val >= 0.5):

38 new_y_pred.append(1)

39 else:

40 new_y_pred.append(0)

41

42print(accuracy_score(y_test,new_y_pred))

1import matplotlib.pyplot as plt

2

3cost_func = np.array(cost_func)

4cost_func = cost_func.reshape(10000,1)

5plt.plot(range(len(cost_func)),cost_func)

1from sklearn.metrics import accuracy_score

2from sklearn.linear_model import LogisticRegression

3

4clf = LogisticRegression()

5clf.fit(x_train,y_train)

6y_pred = clf.predict(x_test)

7print(accuracy_score(y_test,y_pred))

https://hackernoon.com/introduction-to-machine-learning-algorithms-logistic-regression-cbdd82d81a36

• 发表于:
• 原文链接https://kuaibao.qq.com/s/20180530A090UR00?refer=cp_1026
• 腾讯「云+社区」是腾讯内容开放平台帐号（企鹅号）传播渠道之一，根据《腾讯内容开放平台服务协议》转载发布内容。
• 如有侵权，请联系 yunjia_community@tencent.com 删除。

2018-05-16

2021-11-29

2021-11-29

2021-11-29

2021-11-29

2021-11-29

2021-11-29