# Python机器学习的练习五：神经网络

```import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

data
{'X': array([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]),
'__globals__': [],
'__header__':'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
'__version__':'1.0',
'y': array([[10],
[10],
[10],
...,
[9],
[9],
[9]], dtype=uint8)}```

```X= data['X']
y= data['y']
X.shape, y.shape

((5000L,400L), (5000L,1L))```

```from sklearn.preprocessingimport OneHotEncoder
encoder= OneHotEncoder(sparse=False)
y_onehot= encoder.fit_transform(y)
y_onehot.shape

(5000L,10L)
y[0], y_onehot[0,:]

(array([10], dtype=uint8),
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]))```

```def sigmoid(z):
return 1 / (1 + np.exp(-z))
def forward_propagate(X, theta1, theta2):
m= X.shape[0]

a1= np.insert(X,0, values=np.ones(m), axis=1)
z2= a1* theta1.T
a2= np.insert(sigmoid(z2),0, values=np.ones(m), axis=1)
z3= a2* theta2.T
h= sigmoid(z3)

return a1, z2, a2, z3, h
def cost(params, input_size, hidden_size, num_labels, X, y, learning_rate):
m= X.shape[0]
X= np.matrix(X)
y= np.matrix(y)

# reshape the parameter array into parameter matrices for each layer
theta1= np.matrix(np.reshape(params[:hidden_size* (input_size+ 1)], (hidden_size, (input_size+ 1))))
theta2= np.matrix(np.reshape(params[hidden_size* (input_size+ 1):], (num_labels, (hidden_size+ 1))))

# run the feed-forward pass
a1, z2, a2, z3, h= forward_propagate(X, theta1, theta2)

# compute the cost
J= 0
for iin range(m):
first_term= np.multiply(-y[i,:], np.log(h[i,:]))
second_term= np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
J+= np.sum(first_term- second_term)

J= J/ m

return J```

```# initial setup
input_size= 400
hidden_size= 25
num_labels= 10
learning_rate= 1

# randomly initialize a parameter array of the size of the full network's parameters
params= (np.random.random(size=hidden_size* (input_size+ 1)+ num_labels* (hidden_size+ 1))- 0.5)* 0.25

m= X.shape[0]
X= np.matrix(X)
y= np.matrix(y)

# unravel the parameter array into parameter matrices for each layer
theta1= np.matrix(np.reshape(params[:hidden_size* (input_size+ 1)], (hidden_size, (input_size+ 1))))
theta2= np.matrix(np.reshape(params[hidden_size* (input_size+ 1):], (num_labels, (hidden_size+ 1))))

theta1.shape, theta2.shape

((25L,401L), (10L,26L))
a1, z2, a2, z3, h= forward_propagate(X, theta1, theta2)
a1.shape, z2.shape, a2.shape, z3.shape, h.shape

((5000L,401L), (5000L,25L), (5000L,26L), (5000L,10L), (5000L,10L))```

```cost(params, input_size, hidden_size, num_labels, X, y_onehot, learning_rate)

6.8228086634127862```

`J+= (float(learning_rate)/ (2 * m))* (np.sum(np.power(theta1[:,1:],2))+ np.sum(np.power(theta2[:,1:],2)))`

```def sigmoid_gradient(z):
return np.multiply(sigmoid(z), (1 - sigmoid(z)))```

```def backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate):
##### this section is identical to the cost function logic we already saw #####
m= X.shape[0]
X= np.matrix(X)
y= np.matrix(y)

# reshape the parameter array into parameter matrices for each layer
theta1= np.matrix(np.reshape(params[:hidden_size* (input_size+ 1)], (hidden_size, (input_size+ 1))))
theta2= np.matrix(np.reshape(params[hidden_size* (input_size+ 1):], (num_labels, (hidden_size+ 1))))

# run the feed-forward pass
a1, z2, a2, z3, h= forward_propagate(X, theta1, theta2)

# initializations
J= 0
delta1= np.zeros(theta1.shape) # (25, 401)
delta2= np.zeros(theta2.shape) # (10, 26)

# compute the cost
for iin range(m):
first_term= np.multiply(-y[i,:], np.log(h[i,:]))
second_term= np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
J+= np.sum(first_term- second_term)

J= J/ m

# add the cost regularization term
J+= (float(learning_rate)/ (2 * m))* (np.sum(np.power(theta1[:,1:],2))+ np.sum(np.power(theta2[:,1:],2)))

##### end of cost function logic, below is the new part #####

# perform backpropagation
for tin range(m):
a1t= a1[t,:] # (1, 401)
z2t= z2[t,:] # (1, 25)
a2t= a2[t,:] # (1, 26)
ht= h[t,:] # (1, 10)
yt= y[t,:] # (1, 10)

d3t= ht- yt # (1, 10)

z2t= np.insert(z2t,0, values=np.ones(1)) # (1, 26)
d2t= np.multiply((theta2.T* d3t.T).T, sigmoid_gradient(z2t)) # (1, 26)

delta1= delta1+ (d2t[:,1:]).T* a1t
delta2= delta2+ d3t.T* a2t

delta1= delta1/ m
delta2= delta2/ m

delta1[:,1:]= delta1[:,1:]+ (theta1[:,1:]* learning_rate)/ m
delta2[:,1:]= delta2[:,1:]+ (theta2[:,1:]* learning_rate)/ m

# unravel the gradient matrices into a single array

backprop计算中最难的部分是获取矩阵维度。顺便说一下，不是只有你对使用 A * B和 np.multiply(A, B)感到疑惑。

```J, grad= backprop(params, input_size, hidden_size, num_labels, X, y_onehot, learning_rate)

(6.8281541822949299, (10285L,))```

```from scipy.optimizeimport minimize

# minimize the objective function
fmin= minimize(fun=backprop, x0=params, args=(input_size, hidden_size, num_labels, X, y_onehot, learning_rate),
method='TNC', jac=True, options={'maxiter':250})
fmin
status:3
success:False
nfev:250
fun:0.33900736818312283
x: array([-8.85740564e-01,  2.57420350e-04, -4.09396202e-04, ...,
1.44634791e+00,  1.68974302e+00,  7.10121593e-01])
message:'Max. number of function evaluations reach'
jac: array([-5.11463703e-04,  5.14840700e-08, -8.18792403e-08, ...,
-2.48297749e-04, -3.17870911e-04, -3.31404592e-04])
nit:21```

```X= np.matrix(X)
theta1= np.matrix(np.reshape(fmin.x[:hidden_size* (input_size+ 1)], (hidden_size, (input_size+ 1))))
theta2= np.matrix(np.reshape(fmin.x[hidden_size* (input_size+ 1):], (num_labels, (hidden_size+ 1))))

a1, z2, a2, z3, h= forward_propagate(X, theta1, theta2)
y_pred= np.array(np.argmax(h, axis=1)+ 1)
y_pred
array([[10],
[10],
[10],
...,
[9],
[9],
[9]], dtype=int64)```

```correct= [1 if a== belse 0 for (a, b)in zip(y_pred, y)]
accuracy= (sum(map(int, correct))/ float(len(correct)))
print 'accuracy = {0}%'.format(accuracy* 100)

accuracy= 99.22%```

http://www.johnwittenauer.net/machine-learning-exercises-in-python-part-5/

0 条评论

• ### Python机器学习的练习八：异常检测和推荐系统

在这篇文章中，将会涉及两个话题——异常检测和推荐系统，我们将使用高斯模型实现异常检测算法并且应用它检测网络上的故障服务器。我们还将看到如何使用协同过滤创建推荐系...

• ### Python机器学习的练习四：多元逻辑回归

在本系列的第3部分中，我们实现了简单的和正则化的逻辑回归。但我们的解决方法有一个限制—它只适用于二进制分类。在本文中，我们将在之前的练习中扩展我们的解决方案，以...

• ### Python可视化解析MCMC

马尔可夫链可以定义为一个随机过程Y，其中t时刻各点的值只取决于t-1时刻的值。这意味着随机过程在t时刻有状态x的概率，给定它所有的过去状态，等于在t时刻有状态x...

• ### Numpy

You cannot protect yourself from sadness without protecting yourself from happin...

• ### Python应用 | 三行代码告诉你快速去水印

日常学习和工作中，经常会遇到上面的情况，好好的PDF文档里面却多了很多烦人的、极其影响阅读效果的水印。如何快速去掉呢？

• ### 指针和引用

注：参考自bilibili系列视频，指针和引用视频https://www.bilibili.com/video/BV1kT4y177WE

• ### 《Java编程思想》第一章：对象导论 原

本人的笔记风格是，将书中的重要知识点摘抄出来，如果是特别转弯的话，本人会用自己的理解进行说明。从现在开始讲读书笔记一篇一篇的抛出来，供大家参阅学习讨论，...