# 独家 | 菜鸟必备的循环神经网络指南（附链接）

https://victorzhou.com/blog/intro-to-neural-networks/

1. 为什么要用RNNs?

• 情感分析（例如，这是一个积极的还是负面的评论？）通常是使用“多对一”RNN。将要分析的文本送入RNN，然后RNN产生单个输出分类（例如，这是一个积极的评论）。

2. 如何使用RNNs

1. 基于之前的隐藏状态和下一个输入，我们可以得到下一个隐藏状态。
2. 通过计算, 我们可以得到下一个输出 。

3. 问题

https://github.com/vzhou842/rnn-from-scratch/blob/master/data.py

Text

Positive?

i am good

this is very good

i am not at all happy

this was good earlier

4. 计划

5. 预处理

data.py

train_data = {

'good': True,

# ... more data

}

test_data = {

'this is happy': True,

'i am good': True,

# ... more data

}

True=积极，False=消极

##### main.py

from data import train_data, test_data

# Create the vocabulary.

vocab = list(set([w for text in train_data.keys() for w in text.split(' ')]))

vocab_size = len(vocab)

print('%d unique words found' % vocab_size) # 18 unique words found

##### main.py

word_to_idx = { w: i for i, w in enumerate(vocab) }

idx_to_word = { i: w for i, w in enumerate(vocab) }

print(word_to_idx['good']) # 16 (this may change)

print(idx_to_word[0]) # sad (this may change)

##### main.py

import numpy as np

def createInputs(text):

'''

Returns an array of one-hot vectors representing the words

in the input text string.

- text is a string

- Each one-hot vector has shape (vocab_size, 1)

'''

inputs = []

for w in text.split(' '):

v = np.zeros((vocab_size, 1))

v[word_to_idx[w]] = 1

inputs.append(v)

return inputs

6. 向前传播阶段

##### rnn.py

import numpy as np

from numpy.random import randn

class RNN:

# A Vanilla Recurrent Neural Network.

def __init__(self, input_size, output_size, hidden_size=64):

# Weights

self.Whh = randn(hidden_size, hidden_size) / 1000

self.Wxh = randn(hidden_size, input_size) / 1000

self.Why = randn(output_size, hidden_size) / 1000

# Biases

self.bh = np.zeros((hidden_size, 1))

self.by = np.zeros((output_size, 1))

##### rnn.py

class RNN:

# ...

def forward(self, inputs):

'''

Perform a forward pass of the RNN using the given inputs.

Returns the final output and hidden state.

- inputs is an array of one hot vectors with shape (input_size, 1).

'''

h = np.zeros((self.Whh.shape[0], 1))

# Perform each step of the RNN

for i, x in enumerate(inputs):

h = np.tanh(self.Wxh @ x + self.Whh @ h + self.bh)

# Compute the output

y = self.Why @ h + self.by

return y, h

##### main.py

# ...

def softmax(xs):

# Applies the Softmax Function to the input array.

return np.exp(xs) / sum(np.exp(xs))

# Initialize our RNN!

rnn = RNN(vocab_size, 2)

inputs = createInputs('i am very good')

out, h = rnn.forward(inputs)

probs = softmax(out)

print(probs) # [[0.50000095], [0.49999905]]

https://victorzhou.com/blog/softmax/

7. 反馈阶段

https://github.com/vzhou842/rnn-from-scratch

7.1定义

7.2 准备

##### rnn.py

class RNN:

# ...

def forward(self, inputs):

'''

Perform a forward pass of the RNN using the given inputs.

Returns the final output and hidden state.

- inputs is an array of one hot vectors with shape (input_size, 1).

'''

h = np.zeros((self.Whh.shape[0], 1))

self.last_inputs = inputs self.last_hs = { 0: h }

# Perform each step of the RNN

for i, x in enumerate(inputs):

h = np.tanh(self.Wxh @ x + self.Whh @ h + self.bh)

self.last_hs[i + 1] = h

# Compute the output

y = self.Why @ h + self.by

return y, h

def backprop(self, d_y, learn_rate=2e-2):

'''

Perform a backward pass of the RNN.

- d_y (dL/dy) has shape (output_size, 1).

- learn_rate is a float.

'''

pass

7.3 梯度

##### main.py

# Loop over each training example

for x, y in train_data.items():

inputs = createInputs(x)

target = int(y)

# Forward

out, _ = rnn.forward(inputs)

probs = softmax(out)

# Build dL/dy

d_L_d_y = probs d_L_d_y[target] -= 1

# Backward

rnn.backprop(d_L_d_y)

##### rnn.py

class RNN:

# ...

def backprop(self, d_y, learn_rate=2e-2):

'''

Perform a backward pass of the RNN.

- d_y (dL/dy) has shape (output_size, 1).

- learn_rate is a float.

'''

n = len(self.last_inputs)

# Calculate dL/dWhy and dL/dby.

d_Why = d_y @ self.last_hs[n].T

d_by = d_y

##### rnn.py

class RNN:

# …

def backprop(self, d_y, learn_rate=2e-2):

‘’’

Perform a backward pass of the RNN.

- d_y (dL/dy) has shape (output_size, 1).

- learn_rate is a float.

‘’’

n = len(self.last_inputs)

# Calculate dL/dWhy and dL/dby.

D_Why = d_y @ self.last_hs[n].T

d_by = d_y

# Initialize dL/dWhh, dL/dWxh, and dL/dbh to zero.

D_Whh = np.zeros(self.Whh.shape)

d_Wxh = np.zeros(self.Wxh.shape)

d_bh = np.zeros(self.bh.shape)

# Calculate dL/dh for the last h.

d_h = self.Why.T @ d_y

# Backpropagate through time.

For t in reversed(range(n)):

# An intermediate value: dL/dh * (1 – h^2)

temp = ((1 – self.last_hs[t + 1] ** 2) * d_h)

# dL/db = dL/dh * (1 – h^2)

d_bh += temp

# dL/dWhh = dL/dh * (1 – h^2) * h_{t-1}

d_Whh += temp @ self.last_hs[t].T

# dL/dWxh = dL/dh * (1 – h^2) * x

d_Wxh += temp @ self.last_inputs[t].T

# Next dL/dh = dL/dh * (1 – h^2) * Whh

d_h = self.Whh @ temp

# Clip to prevent exploding gradients.

For d in [d_Wxh, d_Whh, d_Why, d_bh, d_by]:

np.clip(d, -1, 1, out=d)

# Update weights and biases using gradient descent.

Self.Whh -= learn_rate * d_Whh

self.Wxh -= learn_rate * d_Wxh

self.Why -= learn_rate * d_Why

self.bh -= learn_rate * d_bh

self.by -= learn_rate * d_by

8. 高潮

##### main.py

import random

def processData(data, backprop=True):

'''

Returns the RNN's loss and accuracy for the given data.

- data is a dictionary mapping text to True or False.

- backprop determines if the backward phase should be run.

'''

items = list(data.items())

random.shuffle(items)

loss = 0

num_correct = 0

for x, y in items:

inputs = createInputs(x)

target = int(y)

# Forward

out, _ = rnn.forward(inputs)

probs = softmax(out)

# Calculate loss / accuracy

loss -= np.log(probs[target])

num_correct += int(np.argmax(probs) == target)

if backprop:

# Build dL/dy

d_L_d_y = probs

d_L_d_y[target] -= 1

# Backward

rnn.backprop(d_L_d_y)

return loss / len(data), num_correct / len(data)

##### main.py

# Training loop

for epoch in range(1000):

train_loss, train_acc = processData(train_data)

if epoch % 100 == 99:

print('--- Epoch %d' % (epoch + 1))

print('Train:\tLoss %.3f | Accuracy: %.3f' % (train_loss, train_acc))

test_loss, test_acc = processData(test_data, backprop=False)

print('Test:\tLoss %.3f | Accuracy: %.3f' % (test_loss, test_acc))

--- Epoch 100

Train: Loss 0.688 | Accuracy: 0.517

Test: Loss 0.700 | Accuracy: 0.500

--- Epoch 200

Train: Loss 0.680 | Accuracy: 0.552

Test: Loss 0.717 | Accuracy: 0.450

--- Epoch 300

Train: Loss 0.593 | Accuracy: 0.655

Test: Loss 0.657 | Accuracy: 0.650

--- Epoch 400

Train: Loss 0.401 | Accuracy: 0.810

Test: Loss 0.689 | Accuracy: 0.650

--- Epoch 500

Train: Loss 0.312 | Accuracy: 0.862

Test: Loss 0.693 | Accuracy: 0.550

--- Epoch 600

Train: Loss 0.148 | Accuracy: 0.914

Test: Loss 0.404 | Accuracy: 0.800

--- Epoch 700

Train: Loss 0.008 | Accuracy: 1.000

Test: Loss 0.016 | Accuracy: 1.000

--- Epoch 800

Train: Loss 0.004 | Accuracy: 1.000

Test: Loss 0.007 | Accuracy: 1.000

--- Epoch 900

Train: Loss 0.002 | Accuracy: 1.000

Test: Loss 0.004 | Accuracy: 1.000

--- Epoch 1000

Train: Loss 0.002 | Accuracy: 1.000

Test: Loss 0.003 | Accuracy: 1.000

https://github.com/vzhou842/rnn-from-scratch

9. 总结

• 了解长短期记忆网络（LSTM），这是一个更强大和更受欢迎的RNN架构，或关于LSTM的著名的变体--门控循环单元（GRU）。
• 通过恰当的ML库（如Tensorflow，Keras或PyTorch），你可以尝试更大/更好的RNN。
• 了解双向RNN，它可以处理前向和后向序列，因此输出层可以获得更多信息。
• 尝试像GloVe或Word2Vec这样的Word嵌入，可用于将单词转换为更有用的矢量表示。
• 查看自然语言工具包（NLTK），这是一个用于处理人类语言数据的Python库

An Introduction to Recurrent Neural Networks for Beginners

https://victorzhou.com/blog/intro-to-rnns/

0 条评论

• ### 吴恩达deeplearning.ai五项课程完整笔记了解一下？

来源：机器之心 通过本文为大家解读如何构建自然语言、音频和其他序列数据的模型。 自吴恩达发布 deeplearning.ai 课程以来，很多学习者陆续完成了所...

• ### 综述 | 近年来深度学习的重要研究成果（附PDF）

本文列举出了近年来深度学习的重要研究成果，从方法、架构，以及正则化、优化技术方面进行概述。

• ### 干货 | 纽约大学陈溪： AlphaGo Zero技术演进的必然性（附PPT）

本讲座选自纽约大学助理教授陈溪近日在2018第二届杉数科技AI大师圆桌会上所做的题为《 AlphaGo Zero技术演进的必然性-机器学习与决策的有机结合》的演...

• ### 菜鸟必备的循环神经网络指南

An Introduction to Recurrent Neural Networks for Beginners

• ### 深度学习中的参数梯度推导（四）：vanilla RNN的前传与反传

在前面我们讲到了DNN，以及DNN的特例CNN的模型和前向反向传播算法，这些算法都是前向反馈的，模型的输出和模型本身没有关联关系。今天我们就讨论另一类输出和模型...

• ### 一步一步学lucene——（第三步：索引篇）

在前面概要的了解了lucene的内容下面就深入一下lucene的各个模块。这里我们主要深入一下lucene的索引，就是如何构建索引的过程及概念。 lucene与...

• ### 啪啪，打脸了！领导说：try-catch必须放在循环体外！

哈喽，亲爱的小伙伴们，技术学磊哥，进步没得说！欢迎来到新一期的性能解读系列，我是磊哥。

• ### 前端跨了个域

在使用上来说，iframe 跨域是比较麻烦的一种（创建新元素 -> 处理跨域交互），但是伟大的邓小平同志说过：

• ### python dict 与list比较

Python内置了字典：dict的支持，dict全称dictionary，在其他语言中也称为map，使用键-值（key-value）存储，具有极快的查找速度

• ### "网络安全卓越人才" 可破格录取不受高考分数限制

从6月10日开始，省内四川大学，西南交大等高校陆续开始自主招生考试。尽管7、8日的全国统考已经落下帷幕，但是对于很多要参加自主招生的学生来说，高考还在继续。...