# 入门 | 请注意，我们要谈谈神经网络的注意机制和使用方法

（用 Matlab 的表示方法），它会改变自己的维度，所以现在

，其中 m≤k。

g = I[y:y+h, x:x+w]

def gaussian_mask(u, s, d, R, C):
"""
:param u: tf.Tensor, centre of the first Gaussian.
:param s: tf.Tensor, standard deviation of Gaussians.
:param d: tf.Tensor, shift between Gaussian centres.
:param R: int, number of rows in the mask, there is one Gaussian per row.
:param C: int, number of columns in the mask.
"""
# indices to create centres
R = tf.to_float(tf.reshape(tf.range(R), (1, 1, R)))
C = tf.to_float(tf.reshape(tf.range(C), (1, C, 1)))
centres = u[np.newaxis, :, np.newaxis] + R * d
column_centres = C - centres
mask = tf.exp(-.5 * tf.square(column_centres / s))
# we add eps for numerical stability
return normalised_mask

def gaussian_glimpse(img_tensor, transform_params, crop_size):
"""
:param img_tensor: tf.Tensor of size (batch_size, Height, Width, channels)
:param transform_params: tf.Tensor of size (batch_size, 6), where params are  (mean_y, std_y, d_y, mean_x, std_x, d_x) specified in pixels.
:param crop_size): tuple of 2 ints, size of the resulting crop
"""
# parse arguments
h, w = crop_size
H, W = img_tensor.shape.as_list()[1:3]
split_ax = transform_params.shape.ndims -1
uy, sy, dy, ux, sx, dx = tf.split(transform_params, 6, split_ax)
# create Gaussian masks, one for each axis
Ay = gaussian_mask(uy, sy, dy, h, H)
Ax = gaussian_mask(ux, sx, dx, w, W)
# extract glimpse
glimpse = tf.matmul(tf.matmul(Ay, img_tensor, adjoint_a=True), Ax)
return glimpse

def spatial_transformer(img_tensor, transform_params, crop_size):
"""
:param img_tensor: tf.Tensor of size (batch_size, Height, Width, channels)
:param transform_params: tf.Tensor of size (batch_size, 4), where params are  (scale_y, shift_y, scale_x, shift_x)
:param crop_size): tuple of 2 ints, size of the resulting crop
"""
constraints = snt.AffineWarpConstraints.no_shear_2d()
img_size = img_tensor.shape.as_list()[1:]
warper = snt.AffineGridWarper(img_size, crop_size, constraints)
grid_coords = warper(transform_params)
glimpse = snt.resampler(img_tensor[..., tf.newaxis], grid_coords)
return glimpse

• 高斯注意是一种过度参数化的裁剪机制：需要 6 个参数，但却只有 4 个自由度（y、x、高度、宽度）。STN 只需要 4 个参数。
• 我还没运行过任何测试，但 STN 应该更快。它依赖于在采样点上的线性插值法，而高斯注意则必须执行两个巨大的矩阵乘法运算。STN 应该可以快上一个数量级（在输入图像中的像素方面）。
• 高斯注意应该更容易训练（没有测试运行）。这是因为结果得到的 glimpse 中的每个像素都可以是源图像的相对大批量的像素的凸组合，这使得我们能更容易找到任何错误的原因。而 STN 依赖于线性插值法，这意味着每个采样点的梯度仅相对其最近的两个像素是非 0 的。

import tensorflow as tf
import sonnet as snt
import numpy as np
import matplotlib.pyplot as plt

img_size = 10, 10
glimpse_size = 5, 5

# Create a random image with a square
x = abs(np.random.randn(1, *img_size)) * .3
x[0, 3:6, 3:6] = 1
crop = x[0, 2:7, 2:7] # contains the square

tf.reset_default_graph()

# placeholders
tx = tf.placeholder(tf.float32, x.shape, 'image')
tu = tf.placeholder(tf.float32, [1], 'u')
ts = tf.placeholder(tf.float32, [1], 's')
td = tf.placeholder(tf.float32, [1], 'd')
stn_params = tf.placeholder(tf.float32, [1, 4], 'stn_params')

# Gaussian Attention
gaussian_att_params = tf.concat([tu, ts, td, tu, ts, td], -1)
gaussian_glimpse_expr = gaussian_glimpse(tx, gaussian_att_params, glimpse_size)

# Spatial Transformer
stn_glimpse_expr = spatial_transformer(tx, stn_params, glimpse_size)

sess = tf.Session()

# extract a Gaussian glimpse
u = 2
s = .5
d = 1
u, s, d = (np.asarray([i]) for i in (u, s, d))
gaussian_crop = sess.run(gaussian_glimpse_expr, feed_dict={tx: x, tu: u, ts: s, td: d})

# extract STN glimpse
transform = [.4, -.1, .4, -.1]
transform = np.asarray(transform).reshape((1, 4))
stn_crop = sess.run(stn_glimpse_expr, {tx: x, stn_params: transform})

# plots
fig, axes = plt.subplots(1, 4, figsize=(12, 3))

titles = ['Input Image', 'Crop', 'Gaussian Att', 'STN']
imgs = [x, crop, gaussian_crop, stn_crop]
for ax, title, img in zip(axes, titles, imgs):
ax.imshow(img.squeeze(), cmap='gray', vmin=0., vmax=1.)
ax.set_title(title)
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)

0 条评论

## 相关文章

40370

39650

37450

56540

### 机器学习：提升树（boosting tree）算法的思想

《实例》阐述算法，通俗易懂，助您对算法的理解达到一个新高度。包含但不限于：经典算法，机器学习，深度学习，LeetCode 题解，Kaggle 实战。期待您的到来...

40280

34350

13030

21340

37650

### TLD跟踪算法介绍

TLD跟踪算法介绍 ? TLD(Tracking-Learning-Detection)是一种长时视频对象跟踪算法，首先要在视频一帧中指明对象位置，Tracki...

46650