# 实现与优化深度神经网络

Linear Model

def reformat(dataset, labels): dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32) # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...] labels = (np.arange(num_labels) == labels[:, None]).astype(np.float32) return dataset, labels

TensorFlow Graph

TensorFlow Session

Session用来执行Graph里规定的计算，就好像给一个个门电路通上电，我们在Session里，给计算单元冲上数据，That’s Flow.

with tf.Session(graph=graph) as session: tf.initialize_all_variables().run()

for step in range(num_steps): _, l, predictions = session.run([optimizer, loss, train_prediction])

valid_prediction.eval()

SGD

offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :]

tf_train_dataset = tf.placeholder(tf.float32,

shape=(batch_size, image_size * image_size)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))

Y = W2 * RELU(W1*X + b1) + b2

[n * 10] = RELU([n * 784] · [784 * N] + [n * N]) · [N * 10] + [n * 10]

weights1 = tf.Variable( tf.truncated_normal([image_size * image_size, hidden_node_count])) biases1 = tf.Variable(tf.zeros([hidden_node_count])) weights2 = tf.Variable( tf.truncated_normal([hidden_node_count, num_labels])) biases2 = tf.Variable(tf.zeros([num_labels]))

ys = tf.matmul(tf_train_dataset, weights1) + biases1 hidden = tf.nn.relu(ys) logits = tf.matmul(hidden, weights2) + biases2

（https://github.com/ahangchen/GDLnotes/blob/master/src/neural/nn_overfit.py）

### Regularization

（https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/image/mnist/convolutional.py））

l2_loss = tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2)

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels)) + 0.001 * l2_loss

OverFit问题

offset_range = 1000offset = (step * batch_size) % offset_range

DropOut

keep_prob = tf.placeholder(tf.float32)if drop_out: hidden_drop = tf.nn.dropout(hidden, keep_prob) h_fc = hidden_drop

if drop_out: hidden_drop = tf.nn.dropout(hidden, 0.5) h_fc = hidden_drop

Learning Rate Decay

Deep Network

# middle layerfor i in range(layer_cnt - 2): y1 = tf.matmul(hidden_drop, weights[i]) + biases[i] hidden_drop = tf.nn.relu(y1) if drop_out: keep_prob += 0.5 * i / (layer_cnt + 1)

hidden_drop = tf.nn.dropout(hidden_drop, keep_prob)

for i in range(layer_cnt - 2): if hidden_cur_cnt > 2: hidden_next_cnt = int(hidden_cur_cnt / 2) else: hidden_next_cnt = 2 hidden_stddev = np.sqrt(2.0 / hidden_cur_cnt) weights.append(tf.Variable(tf.truncated_normal([hidden_cur_cnt, hidden_next_cnt], stddev=hidden_stddev))) biases.append(tf.Variable(tf.zeros([hidden_next_cnt]))) hidden_cur_cnt = hidden_next_cntstddev = np.sqrt(2.0 / n)

DropOut时，因为后面的layer得到的信息越重要，需要动态调整丢弃的比例，到后面的layer，丢弃的比例要减小。

keep_prob += 0.5 * i / (layer_cnt + 1)

424 篇文章81 人订阅

0 条评论

## 相关文章

3064

1674

1662

20610

1893

4644

1.3K10

2.5K12

### 人群密度估计--Crowd Counting Via Scale-adaptive Convolutional Nerual Network

Crowd Counting Via Scale-adaptive Convolutional Nerual Network https://arxiv....

2305

2426