版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/github_39655029/article/details/86628097
小批量随机梯度下降(mini-batch stochastic gradient descent):先选取一组模型参数的初始值,然后对参数进行多次迭代,使每次迭代都可能降低损失函数的值,在每次迭代中,先随机均匀采样一个由固定数目训练数据样本组成的小批量(mini-batch)β,然后求小批量中数据样本的平局损失有关模型参数的梯度(导数),最后用此结果与预先设定的一个正数的乘积作为模型参数在此次迭代中的减小量,注:批量大小和学习率的值是人为设定的,并非模型训练学习,因此叫做超参数,我们通常讲的“调参”正是指调节超参数;
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2019/1/15 23:04 # @Author : Cunyu # @Site : cunyu1943.github.io # @File : linearRegression.py # @Software: PyCharm from IPython import display from matplotlib import pyplot as plt from mxnet import autograd, nd import random """" 线性回归的从零实现 """ # 生成人工数据集 num_input = 2 num_examples = 1000 true_w = [2, -3.4] true_b = 4.2 features = nd.random.normal(scale=1, shape=(num_examples, num_input)) labels = true_w[0] * features[:,0] + true_w[1] * features[:,1] + true_b labels += nd.random.normal(scale=0.01, shape=labels.shape) print(features[0], labels[0]) def use_svg_display(): # 用矢量图显示 display.set_matplotlib_formats('svg') def set_figsize(figsize = (3.5, 2.5)): use_svg_display() # 设置图的尺寸 plt.rcParams['figure.figsize'] = figsize set_figsize() plt.scatter(features[:,1].asnumpy(), labels.asnumpy(), 1) plt.show() # 读取数据 def data_iter(batch_size, features, labels): num_examples = len(features) indices = list(range(num_examples)) random.shuffle(indices) for i in range(0, num_examples, batch_size): j = nd.array(indices[i:min(i+batch_size, num_examples)]) yield features.take(j), labels.take(j) # take函数根据索引返回对应元素 batch_size = 10 for X, y in data_iter(batch_size, features, labels): print(X, y) break # 初始化模型参数 w = nd.random.normal(scale=0.01, shape=(num_input, 1)) b = nd.zeros(shape=(1,)) w.attach_grad() b.attach_grad() # 定义模型 def linreg(X, w, b): return nd.dot(X, w) + b # 定义损失函数 def squared_loss(y_hat, y): return (y_hat-y.reshape(y_hat.shape)) ** 2 / 2 # 定义优化算法 def sgd(params, lr, batch_size): for param in params: param[:] = param - lr * param.grad / batch_size # 训练模型 lr = 0.03 num_epochs = 3 net = linreg loss = squared_loss print('批量样本求平均:') for epoch in range(num_epochs): # 训练模型一共需num_epochs个迭代周期 # 每个迭代周期中,会使用训练数据集中所有样本一次(假设样本数据能够被批量大小整除),X和y分别是小批量样本的特征和标签 for X, y in data_iter(batch_size, features, labels): with autograd.record(): l = loss(net(X, w, b), y) # l是有关小批量X和y的损失 l.backward() # 小批量的损失对模型参数求梯度 sgd([w, b], lr, batch_size) # 使用小批量随机梯度下降迭代模型参数 train_l = loss(net(features, w, b), labels) print('epoch %d, loss: %f' % (epoch + 1, train_l.mean().asnumpy())) print('权重:\n', true_w, w) print('偏差:\n', true_b, b)
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2019/4/10 10:34 # @Author : cunyu # @Site : cunyu1943.github.io # @File : linearRegressionSimple.py # @Software: PyCharm from mxnet import autograd, nd # 生成数据集 num_inputs = 2 num_examples = 1000 true_w = [2, -3.4] true_b = 4.2 features = nd.random.normal(scale=1, shape=(num_examples, num_inputs)) labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b labels += nd.random.normal(scale=0.01, shape=labels.shape) # 读取数据 from mxnet.gluon import data as gdata batch_size = 10 # 将训练数据的特征和标签组合 dataset = gdata.ArrayDataset(features, labels) # 随机读取小批量 data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) # 读取并打印第一个小批量数据样本 for X, y in data_iter: print(X, y) break # 定义模型 from mxnet.gluon import nn net = nn.Sequential() net.add(nn.Dense(1)) # 初始化模型参数 from mxnet import init net.initialize(init.Normal(sigma=0.01)) # 定义损失函数 from mxnet.gluon import loss as gloss loss = gloss.L2Loss() # 平方损失又称为L2范数损失 # 定义优化函数 from mxnet import gluon trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03}) # 训练模型 num_epochs = 3 for epoch in range(1, num_epochs + 1): for X, y in data_iter: with autograd.record(): l = loss(net(X), y) l.backward() trainer.step(batch_size) l = loss(net(features), labels) print('epoch: %d, loss: %f' % (epoch, l.mean().asnumpy())) dense = net[0] print(true_w, dense.weight.data()) print(true_b, dense.bias.data())
本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。
我来说两句