# TensorFlow 学前班

1. normalization
2. learning hyperparameters
3. initializing weights
4. forward propagation
5. caculate error
6. backpropagation

1. Define the graph of nodes and edges.
2. Propagate（传播） values through the graph.

## 1. node

```class Node(object):
def __init__(self, inbound_nodes=[]):
self.inbound_nodes = inbound_nodes
self.outbound_nodes = []
for n in self.inbound_nodes:
n.outbound_nodes.append(self)
self.value = None    ```

## Forward propagation

```def topological_sort(feed_dict):
input_nodes = [n for n in feed_dict.keys()]

G = {}
nodes = [n for n in input_nodes]
while len(nodes) > 0:
n = nodes.pop(0)
if n not in G:
G[n] = {'in': set(), 'out': set()}
for m in n.outbound_nodes:
if m not in G:
G[m] = {'in': set(), 'out': set()}
nodes.append(m)

L = []
S = set(input_nodes)
while len(S) > 0:
n = S.pop()

if isinstance(n, Input):
n.value = feed_dict[n]

L.append(n)
for m in n.outbound_nodes:
G[n]['out'].remove(m)
G[m]['in'].remove(n)
# if no other incoming edges add to S
if len(G[m]['in']) == 0:
return L

def forward_pass(output_node, sorted_nodes):
for n in sorted_nodes:
n.forward()

return output_node.value```

```class Input(Node):
def __init__(self):
Node.__init__(self)

def forward(self, value=None):
if value is not None:
self.value = value```

```class Mul(Node):
def __init__(self, *inputs):
Node.__init__(self, inputs)

def forward(self):
sum = 1.0
for n in self.inbound_nodes:
sum *= n.value
self.value = sum   ```

```x, y, z = Input(), Input(), Input()

f = Mul(x, y, z)

feed_dict = {x: 4, y: 5, z: 10}

graph = topological_sort(feed_dict)
output = forward_pass(f, graph)

# should output 19
print("{} * {} * {} = {} (according to miniflow)".format(feed_dict[x], feed_dict[y], feed_dict[z], output))```
`4 * 5 * 10 = 200.0 (according to miniflow)`

```class Linear(Node):
def __init__(self, inputs, weights, bias):
Node.__init__(self, [inputs, weights, bias])

def forward(self):
inputs = self.inbound_nodes[0].value
weights = self.inbound_nodes[1].value
bias = self.inbound_nodes[2].value

sum = 0
for i in range(len(inputs)):
sum += inputs[i] * weights[i]

self.value =  sum + bias   ```

```inputs, weights, bias = Input(), Input(), Input()

f = Linear(inputs, weights, bias)

feed_dict = {
inputs: [6, 20, 4],
weights: [0.5, 0.25, 1.5],
bias: 2
}

graph = topological_sort(feed_dict)
output = forward_pass(f, graph)

print(output)```
`16.0`

```class Sigmoid(Node):
def __init__(self, node):
Node.__init__(self, [node])

def _sigmoid(self, x):
return 1. / (1. + np.exp(-x))

def forward(self):
input_value = self.inbound_nodes[0].value
self.value = self._sigmoid(input_value)```

## 2. 定义cost函数

```class MSE(Node):
def __init__(self, y, a):
Node.__init__(self, [y, a])

def forward(self):
y = self.inbound_nodes[0].value.reshape(-1, 1)
a = self.inbound_nodes[1].value.reshape(-1, 1)
m = len(y)
sum = 0.
for (yi,ai) in zip(y,a):
sum += np.square(yi-ai)
self.value = sum / m```

## 3. 定义反向传播

```import numpy as np

class Node(object):
def __init__(self, inbound_nodes=[]):
self.inbound_nodes = inbound_nodes
self.value = None
self.outbound_nodes = []
for node in inbound_nodes:
node.outbound_nodes.append(self)

def forward(self):
raise NotImplementedError

def backward(self):
raise NotImplementedError

class Input(Node):
def __init__(self):
Node.__init__(self)

def forward(self):
pass

def backward(self):
# 输入节点的梯度等于所有输出的梯度相加
for n in self.outbound_nodes:

class Linear(Node):
def __init__(self, X, W, b):
Node.__init__(self, [X, W, b])

def forward(self):
X = self.inbound_nodes[0].value
W = self.inbound_nodes[1].value
b = self.inbound_nodes[2].value

self.value = np.dot(X, W) + b
W = self.inbound_nodes[1].value
b = self.inbound_nodes[2].value
self.value = np.dot(X, W) + b

def backward(self):
self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
for n in self.outbound_nodes:

# y = XW + b
# 分别计算y相对于每个输入节点的梯度
# delta_x = w
# delta_w = x
# delta_b = 1

class Sigmoid(Node):

def __init__(self, node):
# The base class constructor.
Node.__init__(self, [node])

def _sigmoid(self, x):
return 1. / (1. + np.exp(-x))

def forward(self):
input_value = self.inbound_nodes[0].value
self.value = self._sigmoid(input_value)

def backward(self):
# Initialize the gradients to 0.
self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}

for n in self.outbound_nodes:
# Get the partial of the cost with respect to this node.

sigmoid = self.value

class MSE(Node):
def __init__(self, y, a):

# Call the base class' constructor.
Node.__init__(self, [y, a])

def forward(self):

y = self.inbound_nodes[0].value.reshape(-1, 1)
a = self.inbound_nodes[1].value.reshape(-1, 1)

self.m = self.inbound_nodes[0].value.shape[0]

self.diff = y - a
self.value = np.mean(self.diff**2)

def backward(self):

self.gradients[self.inbound_nodes[0]] = (2 / self.m) * self.diff
self.gradients[self.inbound_nodes[1]] = (-2 / self.m) * self.diff

def topological_sort(feed_dict):

input_nodes = [n for n in feed_dict.keys()]

G = {}
nodes = [n for n in input_nodes]
while len(nodes) > 0:
n = nodes.pop(0)
if n not in G:
G[n] = {'in': set(), 'out': set()}
for m in n.outbound_nodes:
if m not in G:
G[m] = {'in': set(), 'out': set()}
nodes.append(m)

L = []
S = set(input_nodes)
while len(S) > 0:
n = S.pop()

if isinstance(n, Input):
n.value = feed_dict[n]

L.append(n)
for m in n.outbound_nodes:
G[n]['out'].remove(m)
G[m]['in'].remove(n)
# if no other incoming edges add to S
if len(G[m]['in']) == 0:
return L

def forward_and_backward(graph):
# Forward pass
for n in graph:
n.forward()

# Backward pass
# see: https://docs.python.org/2.3/whatsnew/section-slices.html
for n in graph[::-1]:
n.backward()```

```X, W, b = Input(), Input(), Input()
y = Input()
f = Linear(X, W, b)
a = Sigmoid(f)
cost = MSE(y, a)

X_ = np.array([[-1., -2.], [-1, -2]])
W_ = np.array([[2.], [3.]])
b_ = np.array([-3.])
y_ = np.array([1, 2])

feed_dict = {
X: X_,
y: y_,
W: W_,
b: b_,
}

graph = topological_sort(feed_dict)
forward_and_backward(graph)
# return the gradients for each Input

```[array([[ -3.34017280e-05,  -5.01025919e-05],
[ -6.68040138e-05,  -1.00206021e-04]]), array([[ 0.9999833],
[ 1.9999833]]), array([[  5.01028709e-05],
[  1.00205742e-04]]), array([ -5.01028709e-05])]```
```## 4. 随机梯度下降（Stochastic Gradient Descent）

```def sgd_update(trainables, learning_rate=1e-2):
for n in trainables:

from sklearn.utils import shuffle, resample

X_ = data['data']
y_ = data['target']

# Normalize data
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

n_features = X_.shape[1]
n_hidden = 10
W1_ = np.random.randn(n_features, n_hidden)
b1_ = np.zeros(n_hidden)
W2_ = np.random.randn(n_hidden, 1)
b2_ = np.zeros(1)

# Neural network
X, y = Input(), Input()
W1, b1 = Input(), Input()
W2, b2 = Input(), Input()

l1 = Linear(X, W1, b1)
s1 = Sigmoid(l1)
l2 = Linear(s1, W2, b2)
cost = MSE(y, l2)

feed_dict = {
X: X_,
y: y_,
W1: W1_,
b1: b1_,
W2: W2_,
b2: b2_
}

epochs = 10
# Total number of examples
m = X_.shape[0]
batch_size = 11
steps_per_epoch = m // batch_size

graph = topological_sort(feed_dict)
trainables = [W1, b1, W2, b2]

print("Total number of examples = {}".format(m))

# Step 4
for i in range(epochs):
loss = 0
for j in range(steps_per_epoch):
# Step 1
# Randomly sample a batch of examples
X_batch, y_batch = resample(X_, y_, n_samples=batch_size)

# Reset value of X and y Inputs
X.value = X_batch
y.value = y_batch

# Step 2
forward_and_backward(graph)

# Step 3
sgd_update(trainables)

loss += graph[-1].value

print("Epoch: {}, Loss: {:.3f}".format(i+1, loss/steps_per_epoch))```
```Total number of examples = 506
Epoch: 1, Loss: 133.910
Epoch: 2, Loss: 36.332
Epoch: 3, Loss: 22.353
Epoch: 4, Loss: 26.704
Epoch: 5, Loss: 23.121
Epoch: 6, Loss: 23.491
Epoch: 7, Loss: 21.393
Epoch: 8, Loss: 15.300
Epoch: 9, Loss: 13.391
Epoch: 10, Loss: 15.651```

## 总结

