# 《neural network and deep learning》题解——ch03 再看手写识别问题题解与源码分析

http://blog.csdn.net/u011239443/article/details/77649026

# 交叉熵代价函数

class QuadraticCost(object):
@staticmethod
def fn(a, y):
return 0.5 * np.linalg.norm(a - y) ** 2

@staticmethod
def delta(z, a, y):
return (a - y) * sigmoid_prime(z)

class CrossEntropyCost(object):
@staticmethod
def fn(a, y):
return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))

@staticmethod
def delta(z, a, y):
return (a - y)

np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))

return (a - y)

# 初始化

    def __init__(self, sizes, cost=CrossEntropyCost):
self.num_layers = len(sizes)
self.sizes = sizes
self.default_weight_initializer()
self.cost = cost

def default_weight_initializer(self):
self.biases = [np.random.rand(y, 1) for y in self.sizes[1:]]
self.weights = [np.random.rand(y, x) / np.sqrt(x) for x, y in zip(self.sizes[:-1], self.sizes[1:])]

# 随机梯度下降

    def SGD(self, training_data, epochs, mini_batch_size, eta,
lmbda=0.0,
evaluation_data=None,
monitor_evaluation_cost=False,
monitor_evaluation_accuracy=False,
monitor_training_cost=False,
monitor_training_accuray=False):
if evaluation_data:
n_data = len(evaluation_data)
n = len(training_data)
evaluation_cost, evaluation_accurary = [], []
training_cost, training_accuray = [], []
for j in xrange(epochs):
random.shuffle(training_data)
mini_batches = [training_data[k:k + mini_batch_size] for k in range(0, n, mini_batch_size)]
for mini_batch in mini_batches:
self.update_mini_batch(mini_batch, eta, lmbda, len(training_data))

print "Epoch %s training complete" %(j+1)

if monitor_training_cost:
cost = self.total_cost(training_data, lmbda)
training_cost.append(cost)
print "Cost on train: {}".format(cost)

if monitor_training_accuray:
acc = self.accuracy(training_data,covert=True)
training_accuray.append(acc)
print "Acc on train: {} / {}".format(acc,n)

if monitor_evaluation_cost:
cost = self.total_cost(evaluation_data, lmbda,convert=True)
evaluation_cost.append(cost)
print "Cost on evaluation: {}".format(cost)

if monitor_evaluation_accuracy:
acc = self.accuracy(evaluation_data)
evaluation_accurary.append(acc)
print "Acc on evaluation: {} / {}".format(acc, n_data)

print

return evaluation_cost,evaluation_accurary,training_cost,training_accuray

# 反向传播

    def backprop(self, x, y):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]

activation = x
activations = [x]
zs = []
for b, w in zip(self.biases, self.weights):
z = np.dot(w, activation) + b
zs.append(z)
activation = sigmoid(z)
activations.append(activation)

delta = (self.cost).delta(zs[-1], activations[-1], y)
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta, activations[-2].transpose())

for l in xrange(2, self.num_layers):
z = zs[-l]
sp = sigmoid_prime(z)
delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
return (nabla_b, nabla_w)

def update_mini_batch(self, mini_batch, eta, lmbda, n):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]

for x, y in mini_batch:
delta_nabla_b, delta_nabla_w = self.backprop(x, y)
nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

self.weights = [(1 - eta * (lmbda / n)) * w - (eta / len(mini_batch)) * nw for w, nw in
zip(self.weights, nabla_w)]
self.biases = [b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

## L1规范化

  self.weights = [(1 - eta * (lmbda / n)*np.sign(w)) * w - (eta / len(mini_batch)) * nw for w, nw in
zip(self.weights, nabla_w)]
self.biases = [b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

# 测评

def vectorized_result(j):
e = np.zeros((10, 1))
e[j] = 1.0
return e

## 计算损失率

    def total_cost(self, data, lmbda, convert=False):
cost = 0.0
for x, y in data:
a = self.feedforward(x)
if convert:
y = vectorized_result(y)
cost += self.cost.fn(a, y) / len(data)
cost += 0.5 * (lmbda / len(data)) * sum(np.linalg.norm(w) ** 2 for w in self.weights)
return cost

cost +=  (lmbda / len(data)) * sum(np.linalg.norm(w) for w in self.weights)

## 计算准确率

    def accuracy(self,data,covert=False):
if covert:
results = [(np.argmax(self.feedforward(x)),np.argmax(y)) for (x,y) in data]
else:
results = [(np.argmax(self.feedforward(x)),y) for (x,y) in data]
return sum(int(x==y) for (x,y) in results)

0 条评论

• ### 《neural network and deep learning》题解——ch03 其他技术（momentun，tanh）

http://blog.csdn.net/u011239443/article/details/77848503

• ### 《neural network and deep learning》题解——ch02 Network源码分析

http://blog.csdn.net/u011239443/article/details/75008380

• ### 《机器学习实战》（十）——k-means、k-means++、二分K-means

可参阅：http://blog.csdn.net/u011239443/article/details/51707802#t0

• ### iOS 开发之路（登陆页键盘遮挡输入框问题）一

首先是swift 3.0 中，NotificationCenter 设置 selector 如下：

• ### 使用beanstalkd实现定制化持续集成过程中pipeline

持续集成是一种项目管理和流程模型，依赖于团队中各个角色的配合。各个角色的意识和配合不是一朝一夕能练就的，我们的工作只是提供一种方案和能力，这就是持续集成能力的服...

• ### 如何清除手机拍照的图片 exif 防止泄露你的隐私

然后【腾讯微信团队】对此回应：无论用微信、邮件或是其他传输工具发原图，都会附带位置、时间、拍摄机型等Exif信息。但朋友圈发的照片都经过系统自动压缩，不带位置等...

• ### D3常用API说明，含代码示例

这两个选择元素的API方法的参数是选择器，即指定应当选择文档中的哪些元素。这个选择器参数可以是CSS选择器，也可以是已经被DOM API选择的元素(如docum...