# 深度学习笔记-深层神经网络

## 深层神经网络符号表示

• 用L=n表示一个n层的神经网络。

## 核对矩阵的维数

### 感知机算法

```from mxnet import gluon
from mxnet import nd
from mxnet import image
from mxnet.gluon import nn
import mxnet as mx
import numpy as np

"""similiar to gluon.data.DataLoader, but might be faster.

The main difference this data loader tries to read more exmaples each
time. But the limits are 1) all examples in dataset have the same shape, 2)
data transfomer needs to process multiple examples at each time
"""
def __init__(self, dataset, batch_size, shuffle):
self.dataset = dataset
self.batch_size = batch_size
self.shuffle = shuffle

def __iter__(self):
data = self.dataset[:]
X = data[0]
y = nd.array(data[1])
n = X.shape[0]
if self.shuffle:
idx = np.arange(n)
np.random.shuffle(idx)
X = nd.array(X.asnumpy()[idx])
y = nd.array(y.asnumpy()[idx])

for i in range(n//self.batch_size):
yield (X[i*self.batch_size:(i+1)*self.batch_size],
y[i*self.batch_size:(i+1)*self.batch_size])

def __len__(self):
return len(self.dataset)//self.batch_size

"""下载 fashion mnist 单色图片，并后续加载到内存里面。"""
def transform_mnist(data, label):
"""对数据进行转换为浮点数和resize"""
if resize:
n = data.shape[0]
new_data = nd.zeros((n, resize, resize, data.shape[3]))
for i in range(n):
new_data[i] = image.imresize(data[i], resize, resize)
data = new_data
return nd.transpose(data.astype('float32'), (0,3,1,2))/255, label.astype('float32')
# 训练数据的加载，train代表是否为训练数据集。
mnist_train = gluon.data.vision.FashionMNIST(root=root, train=True, transform=transform_mnist)
# 测试数据集的加载
mnist_test = gluon.data.vision.FashionMNIST(root=root, train=False, transform=transform_mnist)
# 将得到的数据进行数据处理，可以提高数据处理速度。
return (train_data, test_data)```

1.首先我们需要准备训练数据和测试数据集以及参数初始化。

```from mxnet import ndarray as nd
# Fashion MNIST 是单色图像(灰度图像)height：28px,width:28px.
num_inputs = 28*28
#我们的输出为0-9手写字，输出结果分类。
num_outputs = 10
#隐藏层的神经元节点
num_hidden = 256
#权重scale
weight_scale = .01
#对W1权重进行初始化
W1 = nd.random_normal(shape=(num_inputs, num_hidden), scale=weight_scale)
#对b1进行初始化
b1 = nd.zeros(num_hidden)
#对W2进行初始化
W2 = nd.random_normal(shape=(num_hidden, num_outputs), scale=weight_scale)
b2 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2]
#对参数进行求导时分配内存空间
for param in params:

2.我们使用reLu非线性激活函数作为神经网络的激活函数。

```def relu(X):
return nd.maximum(X, 0)```

3.定义神经网络模型。

```def net(X):
X = X.reshape((-1, num_inputs))
h1 = relu(nd.dot(X, W1) + b1)
output = nd.dot(h1, W2) + b2
return output```

4.定义衡量损失函数 在多类Logistic回归里我们提到分开实现Softmax和交叉熵损失函数可能导致数值不稳定。这里我们直接使用Gluon提供的函数。

```from mxnet import gluon
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()```

5.定义随机梯度算法

```def SGD(params, lr):
for param in params:
param[:] = param - lr * param.grad```

6.进行训练和测试

```from mxnet import autograd as autograd
learning_rate = .5
for epoch in range(5):
train_loss = 0.
train_acc = 0.
for data, label in train_data:
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
SGD(params, learning_rate/batch_size)
train_loss += nd.mean(loss).asscalar()
train_acc += utils.accuracy(output, label)
test_acc = utils.evaluate_accuracy(test_data, net)
print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
epoch, train_loss/len(train_data),
train_acc/len(train_data), test_acc))
#output
Epoch 0. Loss: 0.825024, Train acc 0.695813, Test acc 0.792167
Epoch 1. Loss: 0.497659, Train acc 0.815288, Test acc 0.821314
Epoch 2. Loss: 0.436145, Train acc 0.838325, Test acc 0.852264
Epoch 3. Loss: 0.396678, Train acc 0.853582, Test acc 0.864083
Epoch 4. Loss: 0.375521, Train acc 0.862213, Test acc 0.837540```

## 使用Gluon作用感知器多分类算法

```from mxnet import gluon
net = gluon.nn.Sequential()
with net.name_scope():
net.initialize()```

```from mxnet import ndarray as nd

batch_size = 256
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})

for epoch in range(5):
train_loss = 0.
train_acc = 0.
for data, label in train_data:
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(batch_size)

train_loss += nd.mean(loss).asscalar()
train_acc += accuracy(output, label)

test_acc = evaluate_accuracy(test_data, net)
print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (
epoch, train_loss/len(train_data), train_acc/len(train_data), test_acc))
#output
Epoch 0. Loss: 0.734776, Train acc 0.730536, Test acc 0.801182
Epoch 1. Loss: 0.468885, Train acc 0.827741, Test acc 0.798878
Epoch 2. Loss: 0.423539, Train acc 0.843249, Test acc 0.860677
Epoch 3. Loss: 0.379952, Train acc 0.860911, Test acc 0.839844
Epoch 4. Loss: 0.361828, Train acc 0.866403, Test acc 0.855168```

