神经网络python实现

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/haluoluo211/article/details/81158209

本文主要内容是使用python实现神经网络。neural-networks-and-deep-learning神经网络的实现,虽然在原理上易懂,但是不够模块化,layer、net、loss、optimizer间的耦合性太高。通用的深度学习框架,例如caffe都是将每个模块分离实现,这样提升了代码的可阅读,扩展性。


整体上网络的构建以及运行结果展示

下面先给出整体的代码以及运行结果:

def test_net():
    from load_data import load_mnist
    (X_train, y_train), (X_test, y_test) = load_mnist()

    net = Net()
    net.add_layer(FCLayer(28*28, 60, activation=ReLU))
    # net.add_layer(FCLayer(28*28, 20))
    # net.add_layer(FCLayer(20, 10))
    net.add_layer(FCLayer(60, 10, activation=SoftMax))
    # net.add_layer(FCLayer(20, 10))
    net.compile()

    net.train(X_train, y_train, X_test, y_test, 100)

运行结果如下:

下面给出实现的每一个模块:

激活函数
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def softmax(z):
    x = z - np.max(z, axis=1, keepdims=True)
    exp_x = np.exp(x)
    s = exp_x / np.sum(exp_x, axis=1, keepdims=True)
    return s

class Sigmoid:

    def __init__(self):
        self.last_forward = None

    def forward(self, in_data):
        self.last_forward = sigmoid(in_data)
        return self.last_forward

    def derivative(self, in_data=None):
        self.last_forward = self.forward(in_data) if in_data else self.last_forward
        return self.last_forward * (1 - self.last_forward)

class SoftMax:
    def __init__(self):
        self.last_forward = None

    def forward(self, in_data):
        self.last_forward = softmax(in_data)
        return self.last_forward

    def derivative(self, in_data=None):
        last_forward = in_data if in_data else self.last_forward
        return np.ones(last_forward.shape)

class ReLU:
    def __init__(self):
        self.last_forward = None

    def forward(self, in_data):
        self.last_forward = in_data
        return np.maximum(0.0, in_data)

    def derivative(self, in_data=None):
        res = np.zeros(self.last_forward.shape, dtype='float32')
        res[self.last_forward > 0] = 1.
        return res
cost函数
class CrossEntropyCost:
    def __init__(self):
        pass

    @staticmethod
    def forward(y, a):
        # return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))
        return np.mean(-np.sum(y * np.log(a), axis=1))

    @staticmethod
    def backward(y, a):
        return a - y


class QuadraticCost:
    def __init__(self):
        pass

    @staticmethod
    def forward(a, y):
        return 0.5 * np.linalg.norm(a - y) ** 2

    @staticmethod
    def backward(y, a):
        return a - y
Optimizer参数的优化方法例如SGD
class Optimizer(object):
    def __init__(self, lr=0.001, clip=-1, decay=0., lr_min=0., lr_max=np.inf):
        self.lr = lr
        self.clip = clip
        self.decay = decay
        self.lr_min = lr_min
        self.lr_max = lr_max

        self.iterations = 0

    def update(self, params, grads):
        self.iterations += 1

        self.lr *= (1. / 1 + self.decay * self.iterations)
        self.lr = np.clip(self.lr, self.lr_min, self.lr_max)


class SGD(Optimizer):
    def __init__(self, *args, **kwargs):
        Optimizer.__init__(self)

    def update(self, params, grads):
        for p, g in zip(params, grads):
            # p -= 0.3 * g
            p -= self.lr * npdl_clip(g, self.clip)

        super(SGD, self).update(params, grads)
Layer全连接层
class FCLayer:
    def __init__(self, n_in, n_out, activation=Sigmoid):

        # self.b = np.random.randn(1, n_out)
        self.b = np.zeros((1, n_out), dtype='float32')
        # self.b = Zero().call((n_out, ))
        # self.w = np.random.randn(n_in, n_out)
        self.w = np.random.randn(n_in, n_out) / np.sqrt(n_in)

        self.ac_fn = activation()

        self.d_w, self.d_b = None, None
        self.last_input = None

        self.b_first_layer = False

    def forward(self, in_data):
        self.last_input = in_data

        z = np.dot(in_data, self.w) + self.b
        a = self.ac_fn.forward(z)

        return a

    def backward(self, pre_grad):
        act_grad = pre_grad * self.ac_fn.derivative()
        self.d_w = np.dot(self.last_input.T, act_grad)
        self.d_b = np.mean(act_grad, axis=0)

        if not self.b_first_layer:
            # return delta * w
            return np.dot(act_grad, self.w.T)

    # The below two function mainly used for update w, b

    @property
    def params(self):
        return self.w, self.b

    @property
    def grads(self):
        return self.d_w, self.d_b
Net网络
class Net:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.optimizer = None

    def add_layer(self, layer):
        self.layers.append(layer)

    def compile(self, cost=CrossEntropyCost, optimizer=SGD):
        self.cost = cost()
        self.optimizer = optimizer()

    def train(self, X_train, y_train, X_test, y_test,
              epochs=100, lr=0.5, batch_size=100):

        method_name = self.optimizer.__class__.__name__
        print "using %s method to train" % method_name

        n = len(X_train)

        lst_iter, lst_loss, lst_acc = [], [], []

        for ep in xrange(epochs):
            np.random.seed(ep)
            arr_idx = np.arange(n)
            np.random.shuffle(arr_idx)

            for k in xrange(0, n, batch_size):

                # forward propagation
                y_pred = self.forward(X_train[k:k+batch_size])

                # backward propagation
                next_grad = self.cost.backward(y_train[k:k+batch_size], y_pred)
                for layer in self.layers[::-1]:
                    next_grad = layer.backward(next_grad)

                # get parameter and gradients
                params = []
                grads = []
                for layer in self.layers:
                    params += layer.params
                    grads += layer.grads

                # update parameter
                self.optimizer.update(params, grads)

            # print info
            print "============== epoch %s complete =============" % ep
            cost = self.get_cost(X_train, y_train)
            print "training cost is %s" % cost

            right_num = self.get_accuracy(X_test, y_test)
            print "accuracy on test data %s / %s" % (right_num, len(y_test))

            lst_iter.append(ep)
            lst_acc.append(1.0 * right_num / len(y_test))
            lst_loss.append(cost)

        draw_result(lst_iter, lst_loss, lst_acc, method_name)

    def forward(self, x_batch):
        x_in = x_batch
        for layer in self.layers:
            x_in = layer.forward(x_in)

        y_pred = x_in
        return y_pred

    def get_accuracy(self, X_batch, y_batch):
        rets = [(np.argmax(self.forward(x)), np.argmax(y))
                for (x, y) in zip(X_batch, y_batch)]

        return sum(a == y for (a, y) in rets)

    def get_cost(self, X_train, y_train):
        a = self.forward(X_train)
        return self.cost.forward(y_train, a)
loss、cost的曲线绘制

曲线的绘制可见stackoverflow

def draw_result(lst_iter, lst_loss, lst_acc, title):
    plt.plot(lst_iter, lst_loss, '-b', label='loss')
    plt.plot(lst_iter, lst_acc, '-r', label='accuracy')

    plt.xlabel("n iteration")
    plt.legend(loc='upper left')
    plt.title(title)
    plt.savefig(title+".png")  # should before show method

    plt.show()

#对应的测试代码:

def test_draw():
    lst_iter = range(100)
    lst_loss = [0.01 * i - 0.01 * i ** 2 for i in xrange(100)]
    # lst_loss = np.random.randn(1, 100).reshape((100, ))
    lst_acc = [0.01 * i + 0.01 * i ** 2 for i in xrange(100)]
    # lst_acc = np.random.randn(1, 100).reshape((100, ))
    draw_result(lst_iter, lst_loss, lst_acc, "sgd_method")
参考:

http://neuralnetworksanddeeplearning.com/

github

stackoverflow

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

发表于

我来说两句

0 条评论
登录 后参与评论

相关文章

来自专栏marsggbo

Python数据增强(data augmentation)库--Augmentor 使用介绍

Augmentor 使用介绍 原图 ? 1.random_distortion(probability, grid_height, grid_width, ma...

4798
来自专栏杨熹的专栏

TensorFlow -2: 用 CNN 识别数字

昨天只是用了简单的 softmax 做数字识别,准确率为 92%,这个太低了,今天用 CNN 来提高一下准确率。关于 CNN,可以看这篇:图解何为CNN简单看一...

1.6K0
来自专栏MelonTeam专栏

全卷积神经网络 fcn 学习笔记

导语: 前段时间学习了一下全卷积神经网络fcn,现以笔记的形式总结学习的过程。主要包括四个部分: (1)caffe框架的搭建;(2)fcn原理介绍;(3)分析具...

7606
来自专栏CNN

MobileNet V1官方预训练模型的使用

MobileNet V1的网络结构可以直接从官方Github库中下载定义网络结构的文件,地址为:https://raw.githubusercontent.co...

6932
来自专栏WOLFRAM

三维图形中指定绘图的区域,想知道这个区域上最大值是多少?

1594
来自专栏计算机视觉与深度学习基础

【深度学习】使用tensorflow实现VGG19网络

转载注明出处:http://blog.csdn.net/accepthjp/article/details/70170217 接上一篇AlexNet,本文讲...

7519
来自专栏Petrichor的专栏

深度学习: 卷积核 为什么都是 奇数size

3281
来自专栏计算机视觉与深度学习基础

【深度学习】使用tensorflow实现VGG19网络

接上一篇AlexNet,本文讲述使用tensorflow实现VGG19网络。 VGG网络与AlexNet类似,也是一种CNN,VGG在2014年的 ILSV...

5344
来自专栏ATYUN订阅号

【教程】利用Tensorflow目标检测API确定图像中目标的位置

深度学习提供了另一种解决“Wally在哪儿”(美国漫画)问题的方法。与传统的图像处理计算机视觉方法不同的是,它只使用了少量的标记出Wally位置的示例。 在我的...

7026
来自专栏YoungGy

ML基石_12_NonLinearTransformation

retro quadratic hypothesis nonlinear transform price on nonlinear transform stru...

2028

扫码关注云+社区

领取腾讯云代金券