使用方式和PyTorch的RNN类似,首先我们看下MXNet的RNN函数原型
mxnet.gluon.rnn.RNN(hidden_size, num_layers=1, activation='relu', layout='TNC', dropout=0, bidirectional=False, i2h_weight_initializer=None, h2h_weight_initializer=None, i2h_bias_initializer='zeros', h2h_bias_initializer='zeros', input_size=0, **kwargs)
Parameters: | hidden_size (int) – The number of features in the hidden state h. num_layers (int, default 1) – Number of recurrent layers. activation ({'relu' or 'tanh'}, default 'relu') – The activation function to use. layout (str, default 'TNC') – The format of input and output tensors. T, N and C stand for sequence length, batch size, and feature dimensions respectively. dropout (float, default 0) – If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer. bidirectional (bool, default False) – If True, becomes a bidirectional RNN. i2h_weight_initializer (str or Initializer) – Initializer for the input weights matrix, used for the linear transformation of the inputs. h2h_weight_initializer (str or Initializer) – Initializer for the recurrent weights matrix, used for the linear transformation of the recurrent state. i2h_bias_initializer (str or Initializer) – Initializer for the bias vector. h2h_bias_initializer (str or Initializer) – Initializer for the bias vector. input_size (int, default 0) – The number of expected features in the input x. If not specified, it will be inferred from input. prefix (str or None) – Prefix of this Block. params (ParameterDict or None) – Shared Parameters for this Block. |
---|
我们需要注意的参数有hidden_size (int),num_layers (int, default 1)与layout (str, default 'TNC')
这也就是我们搭建网络的依据。
class Model(gl.nn.Block):
def __init__(self, **kwargs):
super(Model, self).__init__(**kwargs)
self.rnn = gl.rnn.RNN(128, 2,layout="NTC")
self.out = gl.nn.Dense(10)
def forward(self, x):
f1 = self.rnn(x)[:,-1,:]
#print(f1.shape)
return(self.out(f1))
def init(self):
self.initialize(mx.init.Normal(sigma=0.01), force_reinit=True, ctx=ctx)
self.loss = gl.loss.SoftmaxCrossEntropyLoss()
self.opt = gl.Trainer(self.collect_params(),"adam",{"learning_rate":0.01})
return(self.loss,self.opt)
from mxnet import gluon as gl
import mxnet as mx
import numpy as np
import sys
from tqdm import tqdm
mnist_train = gl.data.vision.FashionMNIST(root="L3/fashion-mnist/",train=True)
mnist_test = gl.data.vision.FashionMNIST(root="L3/fashion-mnist/",train=False)
batch_size = 100
transformer = gl.data.vision.transforms.ToTensor()
if sys.platform.startswith('win'):
num_workers = 0 # 0表示不用额外的进程来加速读取数据
else:
num_workers = 4
train_iter = gl.data.DataLoader(mnist_train.transform_first(transformer),
batch_size, shuffle=True,
num_workers=num_workers)
test_iter = gl.data.DataLoader(mnist_test.transform_first(transformer),
1000, shuffle=False,
num_workers=num_workers)
def try_gpu():
try:
ctx = mx.gpu()
_ = mx.nd.zeros((1,), ctx=ctx)
except mx.base.MXNetError:
ctx = mx.cpu()
return ctx
ctx = mx.cpu()
def accuracy(y_hat, y):
return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
class Model(gl.nn.Block):
def __init__(self, **kwargs):
super(Model, self).__init__(**kwargs)
self.rnn = gl.rnn.RNN(128, 2,layout="NTC")
self.out = gl.nn.Dense(10)
def forward(self, x):
f1 = self.rnn(x)[:,-1,:]
#print(f1.shape)
return(self.out(f1))
def init(self):
self.initialize(mx.init.Normal(sigma=0.01), force_reinit=True, ctx=ctx)
self.loss = gl.loss.SoftmaxCrossEntropyLoss()
self.opt = gl.Trainer(self.collect_params(),"adam",{"learning_rate":0.01})
return(self.loss,self.opt)
model = Model()
loss, opt = model.init()
num = 5
for e in range(1,num+1):
losses = []
for x,y in (train_iter):
x=x.reshape((-1,14,56))
with mx.autograd.record():
l = loss(model(x),y)
losses.append(l.asnumpy())
l.backward()
opt.step(batch_size)
loss_val = np.mean(losses)
for Xt, yt in test_iter:
Xt = Xt.reshape((-1,14,56))
accy = accuracy(model(Xt),yt)
break
print('epoch %d, loss: %f, acc: %f' % (e,loss_val,accy))
print(model)
print(model.collect_params())
(MX_GPU) C:\Files\DATAs\prjs\python\mxnet\dl021>C:/Files/APPs/RuanJian/Miniconda3/envs/MX_GPU/python.exe c:/Files/DATAs/prjs/python/mxnet/dl021/L3/l031.py
epoch 1, loss: 1.012638, acc: 0.754000
epoch 2, loss: 0.597370, acc: 0.808000
epoch 3, loss: 0.509288, acc: 0.820000
epoch 4, loss: 0.489378, acc: 0.827000
epoch 5, loss: 0.466955, acc: 0.850000
Model(
(rnn): RNN(56 -> 128, NTC, num_layers=2)
(out): Dense(128 -> 10, linear)
(loss): SoftmaxCrossEntropyLoss(batch_axis=0, w=None)
)
model0_ (
Parameter rnn0_l0_i2h_weight (shape=(128, 56), dtype=<class 'numpy.float32'>)
Parameter rnn0_l0_h2h_weight (shape=(128, 128), dtype=<class 'numpy.float32'>)
Parameter rnn0_l0_i2h_bias (shape=(128,), dtype=<class 'numpy.float32'>)
Parameter rnn0_l0_h2h_bias (shape=(128,), dtype=<class 'numpy.float32'>)
Parameter rnn0_l1_i2h_weight (shape=(128, 128), dtype=<class 'numpy.float32'>)
Parameter rnn0_l1_h2h_weight (shape=(128, 128), dtype=<class 'numpy.float32'>)
Parameter rnn0_l1_i2h_bias (shape=(128,), dtype=<class 'numpy.float32'>)
Parameter rnn0_l1_h2h_bias (shape=(128,), dtype=<class 'numpy.float32'>)
Parameter dense0_weight (shape=(10, 128), dtype=float32)
Parameter dense0_bias (shape=(10,), dtype=float32)
)