# Python机器学习的练习三：逻辑回归

## 逻辑回归

```import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import os
path= os.getcwd()+ '\data\ex2data1.txt'

Exam 1

Exam 2

0

34.623660

78.024693

0

1

30.286711

43.894998

0

2

35.847409

72.902198

0

3

60.182599

86.308552

1

4

79.032736

75.344376

1

```positive= data[data['Admitted'].isin([1])]

fig, ax= plt.subplots(figsize=(12,8))
ax.scatter(positive['Exam 1'], positive['Exam 2'], s=50, c='b', marker='o', label='Admitted')
ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='Not Admitted')
ax.legend()
ax.set_xlabel('Exam 1 Score')
ax.set_ylabel('Exam 2 Score')```

```def sigmoid(z):
return 1 / (1 + np.exp(-z))```

```nums= np.arange(-10,10, step=1)

fig, ax= plt.subplots(figsize=(12,8))
ax.plot(nums, sigmoid(nums),'r')```

```def cost(theta, X, y):
theta= np.matrix(theta)
X= np.matrix(X)
y= np.matrix(y)
first= np.multiply(-y, np.log(sigmoid(X* theta.T)))
second= np.multiply((1 - y), np.log(1 - sigmoid(X* theta.T)))
return np.sum(first- second)/ (len(X))```

```# add a ones column - this makes the matrix multiplication work out easier
data.insert(0,'Ones',1)

# set X (training data) and y (target variable)
cols= data.shape[1]
X= data.iloc[:,0:cols-1]
y= data.iloc[:,cols-1:cols]

# convert to numpy arrays and initalize the parameter array theta
X= np.array(X.values)
y= np.array(y.values)
theta= np.zeros(3)```

`X.shape, theta.shape, y.shape`

((100L, 3L), (3L,), (100L, 1L))

`cost(theta, X, y)`

0.69314718055994529

```def gradient(theta, X, y):
theta= np.matrix(theta)
X= np.matrix(X)
y= np.matrix(y)

parameters= int(theta.ravel().shape[1])

error= sigmoid(X* theta.T)- y

for iin range(parameters):
term= np.multiply(error, X[:,i])

```import scipy.optimize as opt
result= opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(X, y))
cost(result[0], X, y)```

0.20357134412164668

```def predict(theta, X):
probability= sigmoid(X* theta.T)
return [1 if x >= 0.5 else 0 for xin probability]

theta_min= np.matrix(result[0])
predictions= predict(theta_min, X)
correct= [1 if ((a== 1 and b== 1)or (a== 0 and b== 0))else 0 for (a, b)in zip(predictions, y)]
accuracy= (sum(map(int, correct))% len(correct))
print 'accuracy = {0}%'.format(accuracy)
accuracy = 89%```

## 正则化逻辑回归

```path= os.getcwd()+ '\data\ex2data2.txt'

positive= data2[data2['Accepted'].isin([1])]
negative= data2[data2['Accepted'].isin([0])]

fig, ax= plt.subplots(figsize=(12,8))
ax.scatter(positive['Test 1'], positive['Test 2'], s=50, c='b', marker='o', label='Accepted')
ax.scatter(negative['Test 1'], negative['Test 2'], s=50, c='r', marker='x', label='Rejected')
ax.legend()
ax.set_xlabel('Test 1 Score')
ax.set_ylabel('Test 2 Score')```

```degree= 5
x1= data2['Test 1']
x2= data2['Test 2']

data2.insert(3,'Ones',1)

for iin range(1, degree):
for jin range(0, i):
data2['F' + str(i)+ str(j)]= np.power(x1, i-j)* np.power(x2, j)

data2.drop('Test 1', axis=1, inplace=True)
data2.drop('Test 2', axis=1, inplace=True)

Accepted

Ones

F10

F20

F21

F30

F31

F32

0

1

1

0.051267

0.002628

0.035864

0.000135

0.001839

0.025089

1

1

1

-0.092742

0.008601

-0.063523

-0.000798

0.005891

-0.043509

2

1

1

-0.213710

0.045672

-0.147941

-0.009761

0.031616

-0.102412

3

1

1

-0.375000

0.140625

-0.188321

-0.052734

0.070620

-0.094573

4

1

1

-0.513250

0.263426

-0.238990

-0.135203

0.122661

-0.111283

```def costReg(theta, X, y, learningRate):
theta= np.matrix(theta)
X= np.matrix(X)
y= np.matrix(y)
first= np.multiply(-y, np.log(sigmoid(X* theta.T)))
second= np.multiply((1 - y), np.log(1 - sigmoid(X* theta.T)))
reg= (learningRate/ 2 * len(X))* np.sum(np.power(theta[:,1:theta.shape[1]],2))
return np.sum(first- second)/ (len(X))+ reg```

```def gradientReg(theta, X, y, learningRate):
theta= np.matrix(theta)
X= np.matrix(X)
y= np.matrix(y)

parameters= int(theta.ravel().shape[1])

error= sigmoid(X* theta.T)- y

for iin range(parameters):
term= np.multiply(error, X[:,i])

if (i== 0):
else:
grad[i]= (np.sum(term)/ len(X))+ ((learningRate/ len(X))* theta[:,i])

```# set X and y (remember from above that we moved the label to column 0)
cols= data2.shape[1]
X2= data2.iloc[:,1:cols]
y2= data2.iloc[:,0:1]

# convert to numpy arrays and initalize the parameter array theta
X2= np.array(X2.values)
y2= np.array(y2.values)
theta2= np.zeros(11)

learningRate= 1

costReg(theta2, X2, y2, learningRate)
0.6931471805599454```

```result2= opt.fmin_tnc(func=costReg, x0=theta2, fprime=gradientReg, args=(X2, y2, learningRate))
result2```

(数组([ 0.35872309, -3.22200653, 18.97106363, -4.25297831, 18.23053189, 20.36386672, 8.94114455, -43.77439015, -17.93440473, -50.75071857, -2.84162964]), 110, 1)

```theta_min= np.matrix(result2[0])
predictions= predict(theta_min, X2)
correct= [1 if ((a== 1 and b== 1)or (a== 0 and b== 0))else 0 for (a, b)in zip(predictions, y2)]
accuracy= (sum(map(int, correct))% len(correct))
print 'accuracy = {0}%'.format(accuracy)```

http://www.johnwittenauer.net/machine-learning-exercises-in-python-part-3/

