# 风格迁移 Neural Transfer

## 主要程序

# desired size of the output image
imsize = 512 if use_cuda else 128  # use small size if no gpu

transforms.Scale(imsize),  # scale imported image
transforms.ToTensor()])  # transform it into a torch tensor

transforms.Scale(imsize),
transforms.RandomCrop(imsize),
transforms.ToTensor()])

image = Image.open(image_name)
# fake batch dimension required to fit network's input dimensions
image = image.unsqueeze(0)
return image

assert style_img.size() == content_img.size(), \
"we need to import style and content images of the same size"

### 内容损失

class ContentLoss(nn.Module):

def __init__(self, target, weight):
super(ContentLoss, self).__init__()
# we 'detach' the target content from the tree used
self.target = target.detach() * weight
# to dynamically compute the gradient: this is a stated value,
# not a variable. Otherwise the forward method of the criterion
# will throw an error.
# 因为这里只是需要target这个数值，这个数值是一种状态，不是Variable
# 这里单纯将其当做常量对待，因此用了detach则在backward中计算梯度时不对target之前所在的计算图存在任何影响
self.weight = weight
self.criterion = nn.MSELoss()

def forward(self, input):
self.loss = self.criterion(input * self.weight, self.target)
self.output = input
return self.output

def backward(self, retain_graph=True):
self.loss.backward(retain_graph=retain_graph)
return self.loss

GramMatric函数 gramMatric即相关矩阵函数的简化版，为了更快读更方便的计算。

class GramMatrix(nn.Module):

def forward(self, input):
a, b, c, d = input.size()  # a = batch size(=1)
# b=number of feature maps
# (c,d)=dimensions of a f. map (N=c*d)

features = input.view(a * b, c * d)  # resise F_XL into \hat F_XL

G = torch.mm(features, features.t())  # compute the gram product

# we 'normalize' the values of the gram matrix
# by dividing by the number of element in each feature maps.
return G.div(a * b * c * d)
定义风格损失函数
######################################################################
#
# The longer is the feature maps dimension :math:N, the bigger are the
# values of the gram matrix. Therefore, if we don't normalize by :math:N,
# the loss computed at the first layers (before pooling layers) will have
# much more importance during the gradient descent. We dont want that,
# since the most interesting style features are in the deepest layers!
#
# 风格损失模块和内容模块几乎是一样的，但我们需要将gramMatrix加到类中
#

class StyleLoss(nn.Module):

def __init__(self, target, weight):
super(StyleLoss, self).__init__()
self.target = target.detach() * weight
self.weight = weight
self.gram = GramMatrix()
self.criterion = nn.MSELoss()

def forward(self, input):
self.output = input.clone()
self.G = self.gram(input)
self.G.mul_(self.weight)
self.loss = self.criterion(self.G, self.target)
return self.output

def backward(self, retain_graph=True):
self.loss.backward(retain_graph=retain_graph)
return self.loss

### 定义神经网络

######################################################################
# A "Sequential" module contains an ordered list of child modules. For
# instance, "vgg19.features" contains a sequence (Conv2d, ReLU,
# Maxpool2d, Conv2d, ReLU...) aligned in the right order of depth. As we
# said in *Content loss* section, we wand to add our style and content
# loss modules as additive 'transparent' layers in our network, at desired
# depths. For that, we construct a new "Sequential" module, in wich we
# are going to add modules from "vgg19" and our loss modules in the
# right order:
# 根据VGG19构造一个和VGG19结构类似的神经网络，其中包括设计好的内容损失层和风格损失层
# 这两个层在对于在网络中的训练作用为0，我们需要的是图像在经过时产生的损失值。
#
# desired depth layers to compute style/content losses :
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']

def get_style_model_and_losses(cnn, style_img, content_img,
style_weight=1000, content_weight=1,
content_layers=content_layers_default,
style_layers=style_layers_default):
cnn = copy.deepcopy(cnn)

# just in order to have an iterable access to or list of content/syle
# losses
content_losses = []
style_losses = []

model = nn.Sequential()  # the new Sequential module network
gram = GramMatrix()  # we need a gram module in order to compute style targets

# move these modules to the GPU if possible:
if use_cuda:
model = model.cuda()
gram = gram.cuda()

i = 1
for layer in list(cnn):
if isinstance(layer, nn.Conv2d):
name = "conv_" + str(i)

if name in content_layers:
target = model(content_img).clone()
content_loss = ContentLoss(target, content_weight)
content_losses.append(content_loss)

if name in style_layers:
target_feature = model(style_img).clone()
target_feature_gram = gram(target_feature)
style_loss = StyleLoss(target_feature_gram, style_weight)
style_losses.append(style_loss)

if isinstance(layer, nn.ReLU):
name = "relu_" + str(i)

if name in content_layers:
target = model(content_img).clone()
content_loss = ContentLoss(target, content_weight)
content_losses.append(content_loss)

if name in style_layers:
target_feature = model(style_img).clone()
target_feature_gram = gram(target_feature)
style_loss = StyleLoss(target_feature_gram, style_weight)
style_losses.append(style_loss)

i += 1

if isinstance(layer, nn.MaxPool2d):
name = "pool_" + str(i)

return model, style_losses, content_losses

### 输入图像

######################################################################
# 输入图像
# ~~~~~~~~~~~
# 为了方便，输入图像为内容图像的copy，也可以创造一个白噪声图片

input_img = content_img.clone()
# if you want to use a white noise instead uncomment the below line:
# input_img = Variable(torch.randn(content_img.data.size())).type(dtype)

# add the original input image to the figure:
plt.figure()
imshow(input_img.data, title='Input Image')

######################################################################
# 梯度下降
# ~~~~~~~~~~~~~~~~
#
# 这里我们使用L-BFGS算法来进行梯度下降，不同于训练一个网络，我们想要训练这个输入图片以降低内容/风格损失。我们就简单
# 创建一个python 的L-BFGS优化器，将输入图像当做变量来进行优化。但是optim.LBFGS()接受的第一个参数是一个Pytorch中包含需要进行梯度更新的Variable列表
# 我们的输入图像是一个Variable类型但不是计算树中的一部分。为了让这个函数知道输入图像这个Variable需要进行梯度计算。
# 一种可能的方法就是从输入图像中构造一个Parameter对象。然后我们只需要将其给了优化器的构造器即可。

def get_input_param_optimizer(input_img):
# this line to show that input is a parameter that requires a gradient
input_param = nn.Parameter(input_img.data)
optimizer = optim.LBFGS([input_param])
return input_param, optimizer

### 定义执行函数

######################################################################
# **Last step**: the loop of gradient descent. At each step, we must feed
# the network with the updated input in order to compute the new losses,
# we must run the "backward" methods of each loss to dynamically compute
# their gradients and perform the step of gradient descent. The optimizer
# requires as argument a "closure": a function that reevaluates the model
# and returns the loss.
# 最后一步：进行梯度下降的循环。每一步我们必须将更新后的数值输入到网络中去计算新的损失
# 在个损失中我们用backward方法来计算他们的梯度然后进行梯度下降，优化器需要一个功能函数来
# 作为一个闭环函数，这个闭环函数最后会返回应有的损失。
#
#
# However, there's a small catch. The optimized image may take its values
# between :math:-\infty and :math:+\infty instead of staying between 0
# and 1. In other words, the image might be well optimized and have absurd
# values. In fact, we must perform an optimization under constraints in
# order to keep having right vaues into our input image. There is a simple
# solution: at each step, to correct the image to maintain its values into
# the 0-1 interval.
#

def run_style_transfer(cnn, content_img, style_img, input_img, num_steps=300,
style_weight=1000, content_weight=1):
"""Run the style transfer."""
print('Building the style transfer model..')
model, style_losses, content_losses = get_style_model_and_losses(cnn,
style_img, content_img, style_weight, content_weight)
input_param, optimizer = get_input_param_optimizer(input_img)

print('Optimizing..')
run = [0]
since = time.time()
while run[0] <= num_steps:

def closure():
# correct the values of updated input image
input_param.data.clamp_(0, 1)

model(input_param)    # 注意，在此过程中，只是在特定层中计算出了损失。
style_score = 0
content_score = 0

for sl in style_losses:
style_score += sl.backward()  # 在这里根据之前计算的风格损失进行梯度更新，也就是对图像进行更新。
for cl in content_losses:
content_score += cl.backward()  # 在这里根据之前计算的内容损失进行梯度更新，也就是对图像进行更新。

run[0] += 1
if run[0] % 50 == 0:
print("run {}:".format(run))
print('Style Loss : {:4f} Content Loss: {:4f}'.format(
style_score.data[0], content_score.data[0]))
print()

return style_score + content_score

optimizer.step(closure)

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))

# a last correction...
input_param.data.clamp_(0, 1)

return input_param.data

## 参考资料：

1、A Neural Algorithm of Artistic Style https://arxiv.org/abs/1508.06576 2、http://pytorch.org/tutorials/advanced/neural_style_tutorial.html

0 条评论

## 相关文章

5538

2194

1747

2023

4252

50910

### 常用的像素操作算法：Resize、Flip、Rotate

Resize 图像缩放是把原图像按照目标尺寸放大或者缩小，是图像处理的一种。 图像缩放有多种算法。最为简单的是最临近插值算法，它是根据原图像和目标图像的尺寸，计...

44310

1660

### Pytorch实现Logistic回归二分类

? 摘要：本文主要介绍使用深度学习框架Pytorch实现简单的Logistic回归模型，进而实现简单的分类问题。 一．逻辑回归简述 逻辑回归实质上是线性回...

1.3K14

2450