TVP

# 只用NumPy实现神经网络

Keras、TensorFlow、PyTorch等高层框架让我们可以快速搭建复杂模型。然而，花一点时间了解下底层概念是值得的。前不久我发过一篇文章，以简单的方式解释了神经网络是如何工作的。但这是一篇高度理论性的文章，主要以数学为主（数学是神经网络超能力的来源）。我打算以实践的方式继续这一主题，在这篇文章中，我们将尝试仅仅基于NumPy创建一个可以工作的神经网络。最后，我们将测试一下创建的模型——用它来解决简单的分类问题，并和基于Keras搭建的神经网络比较一下。

[l]

input_dim

output_dim

activation

nn_architecture = [ {"input_dim": 2, "output_dim": 4, "activation": "relu"}, {"input_dim": 4, "output_dim": 6, "activation": "relu"}, {"input_dim": 6, "output_dim": 6, "activation": "relu"}, {"input_dim": 6, "output_dim": 4, "activation": "relu"}, {"input_dim": 4, "output_dim": 1, "activation": "sigmoid"},]

def init_layers(nn_architecture, seed = 99):

np.random.seed(seed)

number_of_layers = len(nn_architecture)

params_values = {}

for idx, layer in enumerate(nn_architecture):

layer_idx = idx + 1

layer_input_size = layer["input_dim"]

layer_output_size = layer["output_dim"]

params_values['W' + str(layer_idx)] = np.random.randn(

layer_output_size, layer_input_size) * 0.1

params_values['b' + str(layer_idx)] = np.random.randn(

layer_output_size, 1) * 0.1

return params_values

def sigmoid(Z):

def relu(Z):

return np.maximum(0,Z)

def sigmoid_backward(dA, Z):

sig = sigmoid(Z)

return dA * sig * (1 - sig)

def relu_backward(dA, Z):

dZ = np.array(dA, copy = True)

dZ[Z

return dZ;

def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"):

Z_curr = np.dot(W_curr, A_prev) + b_curr

if activation is "relu":

activation_func = relu

elif activation is "sigmoid":

activation_func = sigmoid

else:

raise Exception('Non-supported activation function')

return activation_func(Z_curr), Z_curr

def full_forward_propagation(X, params_values, nn_architecture):

memory = {}

A_curr = X

for idx, layer in enumerate(nn_architecture):

layer_idx = idx + 1

A_prev = A_curr

activ_function_curr = layer["activation"]

W_curr = params_values["W" + str(layer_idx)]

b_curr = params_values["b" + str(layer_idx)]

A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)

memory["A" + str(idx)] = A_prev

memory["Z" + str(layer_idx)] = Z_curr

return A_curr, memory

def get_cost_value(Y_hat, Y):

m = Y_hat.shape[1]

cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))

return np.squeeze(cost)

def get_accuracy_value(Y_hat, Y):

Y_hat_ = convert_prob_into_class(Y_hat)

return (Y_hat_ == Y).all(axis=0).mean()

def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation="relu"):

m = A_prev.shape[1]

if activation is "relu":

backward_activation_func = relu_backward

elif activation is "sigmoid":

backward_activation_func = sigmoid_backward

else:

raise Exception('Non-supported activation function')

dZ_curr = backward_activation_func(dA_curr, Z_curr)

dW_curr = np.dot(dZ_curr, A_prev.T) / m

db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m

dA_prev = np.dot(W_curr.T, dZ_curr)

return dA_prev, dW_curr, db_curr

def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):

m = Y.shape[1]

Y = Y.reshape(Y_hat.shape)

dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));

for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):

layer_idx_curr = layer_idx_prev + 1

activ_function_curr = layer["activation"]

dA_curr = dA_prev

A_prev = memory["A" + str(layer_idx_prev)]

Z_curr = memory["Z" + str(layer_idx_curr)]

W_curr = params_values["W" + str(layer_idx_curr)]

b_curr = params_values["b" + str(layer_idx_curr)]

dA_prev, dW_curr, db_curr = single_layer_backward_propagation(

dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)

params_values

，其中保存了当前参数值；

，其中保存了用于更新参数值所需的梯度信息。现在我们只需在每个网络层上应用以下等式即可。这是一个非常简单的优化算法，但我决定使用它作为更高级的优化算法的起点（大概会是我下一篇文章的主题）。

for idx, layer in enumerate(nn_architecture):

layer_idx = idx + 1

params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]

params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

return params_values;

def train(X, Y, nn_architecture, epochs, learning_rate):

params_values = init_layers(nn_architecture, 2)

cost_history = []

accuracy_history = []

for i in range(epochs):

Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)

cost = get_cost_value(Y_hat, Y)

cost_history.append(cost)

accuracy = get_accuracy_value(Y_hat, Y)

accuracy_history.append(accuracy)

grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)

params_values = update(params_values, grads_values, nn_architecture, learning_rate)

return params_values, cost_history, accuracy_history

• 发表于:
• 原文链接https://kuaibao.qq.com/s/20181019A1UJ4K00?refer=cp_1026
• 腾讯「腾讯云开发者社区」是腾讯内容开放平台帐号（企鹅号）传播渠道之一，根据《腾讯内容开放平台服务协议》转载发布内容。
• 如有侵权，请联系 cloudcommunity@tencent.com 删除。

2020-11-18

2018-06-19

2018-06-14

2018-10-28

2019-08-16

2018-02-28

2019-07-26

2018-01-28

2020-08-28

2018-07-03

2020-06-24

2018-03-09

2023-06-08

2018-01-27

2018-01-26

2018-01-27

2018-02-08

2018-01-27

2023-02-11

2023-12-21