4. 中部小结

# 5. 训练模拟量化

`weight scale = max(abs(weight)) / 127`

`moving_max = moving_max * momenta + max(abs(activation)) * (1- momenta)`

`activation scale = moving_max /128`

QAT反向传播阶段求梯度的公式

```class Quantizer(nn.Module):
def __init__(self, bits, range_tracker):
super().__init__()
self.bits = bits
self.range_tracker = range_tracker
self.register_buffer('scale', None)      # 量化比例因子
self.register_buffer('zero_point', None) # 量化零点

def update_params(self):
raise NotImplementedError

# 量化
def quantize(self, input):
output = input * self.scale - self.zero_point
return output

def round(self, input):
output = Round.apply(input)
return output

# 截断
def clamp(self, input):
output = torch.clamp(input, self.min_val, self.max_val)
return output

# 反量化
def dequantize(self, input):
output = (input + self.zero_point) / self.scale
return output

def forward(self, input):
if self.bits == 32:
output = input
elif self.bits == 1:
print('！Binary quantization is not supported ！')
assert self.bits != 1
else:
self.range_tracker(input)
self.update_params()
output = self.quantize(input)   # 量化
output = self.round(output)
output = self.clamp(output)     # 截断
output = self.dequantize(output)# 反量化
return output
```

# 6. 代码实现

```# ********************* range_trackers(范围统计器，统计量化前范围) *********************
class RangeTracker(nn.Module):
def __init__(self, q_level):
super().__init__()
self.q_level = q_level

def update_range(self, min_val, max_val):
raise NotImplementedError

def forward(self, input):
if self.q_level == 'L':    # A,min_max_shape=(1, 1, 1, 1),layer级
min_val = torch.min(input)
max_val = torch.max(input)
elif self.q_level == 'C':  # W,min_max_shape=(N, 1, 1, 1),channel级
min_val = torch.min(torch.min(torch.min(input, 3, keepdim=True)[0], 2, keepdim=True)[0], 1, keepdim=True)[0]
max_val = torch.max(torch.max(torch.max(input, 3, keepdim=True)[0], 2, keepdim=True)[0], 1, keepdim=True)[0]

self.update_range(min_val, max_val)
class GlobalRangeTracker(RangeTracker):  # W,min_max_shape=(N, 1, 1, 1),channel级,取本次和之前相比的min_max —— (N, C, W, H)
def __init__(self, q_level, out_channels):
super().__init__(q_level)
self.register_buffer('min_val', torch.zeros(out_channels, 1, 1, 1))
self.register_buffer('max_val', torch.zeros(out_channels, 1, 1, 1))
self.register_buffer('first_w', torch.zeros(1))

def update_range(self, min_val, max_val):
temp_minval = self.min_val
temp_maxval = self.max_val
if self.first_w == 0:
else:
class AveragedRangeTracker(RangeTracker):  # A,min_max_shape=(1, 1, 1, 1),layer级,取running_min_max —— (N, C, W, H)
def __init__(self, q_level, momentum=0.1):
super().__init__(q_level)
self.momentum = momentum
self.register_buffer('min_val', torch.zeros(1))
self.register_buffer('max_val', torch.zeros(1))
self.register_buffer('first_a', torch.zeros(1))

def update_range(self, min_val, max_val):
if self.first_a == 0:
else:
self.min_val.mul_(1 - self.momentum).add_(min_val * self.momentum)
self.max_val.mul_(1 - self.momentum).add_(max_val * self.momentum)
```

❝pytorch一般情况下，是将网络中的参数保存成orderedDict形式的，这里的参数其实包含两种，一种是模型中各种module含的参数，即nn.Parameter,我们当然可以在网络中定义其他的nn.Parameter参数，另一种就是buffer,前者每次optim.step会得到更新，而不会更新后者。 ❞

```# ********************* 量化卷积（同时量化A/W，并做卷积） *********************
class Conv2d_Q(nn.Conv2d):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
dilation=1,
groups=1,
bias=True,
a_bits=8,
w_bits=8,
q_type=1,
first_layer=0,
):
super().__init__(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
dilation=dilation,
groups=groups,
bias=bias
)
# 实例化量化器（A-layer级，W-channel级）
if q_type == 0:
self.activation_quantizer = SymmetricQuantizer(bits=a_bits, range_tracker=AveragedRangeTracker(q_level='L'))
self.weight_quantizer = SymmetricQuantizer(bits=w_bits, range_tracker=GlobalRangeTracker(q_level='C', out_channels=out_channels))
else:
self.activation_quantizer = AsymmetricQuantizer(bits=a_bits, range_tracker=AveragedRangeTracker(q_level='L'))
self.weight_quantizer = AsymmetricQuantizer(bits=w_bits, range_tracker=GlobalRangeTracker(q_level='C', out_channels=out_channels))
self.first_layer = first_layer

def forward(self, input):
# 量化A和W
if not self.first_layer:
input = self.activation_quantizer(input)
q_input = input
q_weight = self.weight_quantizer(self.weight)
# 量化卷积
output = F.conv2d(
input=q_input,
weight=q_weight,
bias=self.bias,
stride=self.stride,
dilation=self.dilation,
groups=self.groups
)
return output
```

```def reshape_to_activation(input):
return input.reshape(1, -1, 1, 1)
def reshape_to_weight(input):
return input.reshape(-1, 1, 1, 1)
def reshape_to_bias(input):
return input.reshape(-1)
# ********************* bn融合_量化卷积（bn融合后，同时量化A/W，并做卷积） *********************
class BNFold_Conv2d_Q(Conv2d_Q):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
dilation=1,
groups=1,
bias=False,
eps=1e-5,
momentum=0.01, # 考虑量化带来的抖动影响,对momentum进行调整(0.1 ——> 0.01),削弱batch统计参数占比，一定程度抑制抖动。经实验量化训练效果更好,acc提升1%左右
a_bits=8,
w_bits=8,
q_type=1,
first_layer=0,
):
super().__init__(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
dilation=dilation,
groups=groups,
bias=bias
)
self.eps = eps
self.momentum = momentum
self.gamma = Parameter(torch.Tensor(out_channels))
self.beta = Parameter(torch.Tensor(out_channels))
self.register_buffer('running_mean', torch.zeros(out_channels))
self.register_buffer('running_var', torch.ones(out_channels))
self.register_buffer('first_bn', torch.zeros(1))
init.uniform_(self.gamma)
init.zeros_(self.beta)

# 实例化量化器（A-layer级，W-channel级）
if q_type == 0:
self.activation_quantizer = SymmetricQuantizer(bits=a_bits, range_tracker=AveragedRangeTracker(q_level='L'))
self.weight_quantizer = SymmetricQuantizer(bits=w_bits, range_tracker=GlobalRangeTracker(q_level='C', out_channels=out_channels))
else:
self.activation_quantizer = AsymmetricQuantizer(bits=a_bits, range_tracker=AveragedRangeTracker(q_level='L'))
self.weight_quantizer = AsymmetricQuantizer(bits=w_bits, range_tracker=GlobalRangeTracker(q_level='C', out_channels=out_channels))
self.first_layer = first_layer

def forward(self, input):
# 训练态
if self.training:
# 先做普通卷积得到A，以取得BN参数
output = F.conv2d(
input=input,
weight=self.weight,
bias=self.bias,
stride=self.stride,
dilation=self.dilation,
groups=self.groups
)
# 更新BN统计参数（batch和running）
dims = [dim for dim in range(4) if dim != 1]
batch_mean = torch.mean(output, dim=dims)
batch_var = torch.var(output, dim=dims)
if self.first_bn == 0:
else:
self.running_mean.mul_(1 - self.momentum).add_(batch_mean * self.momentum)
self.running_var.mul_(1 - self.momentum).add_(batch_var * self.momentum)
# BN融合
if self.bias is not None:
bias = reshape_to_bias(self.beta + (self.bias -  batch_mean) * (self.gamma / torch.sqrt(batch_var + self.eps)))
else:
bias = reshape_to_bias(self.beta - batch_mean  * (self.gamma / torch.sqrt(batch_var + self.eps)))# b融batch
weight = self.weight * reshape_to_weight(self.gamma / torch.sqrt(self.running_var + self.eps))     # w融running
# 测试态
else:
#print(self.running_mean, self.running_var)
# BN融合
if self.bias is not None:
bias = reshape_to_bias(self.beta + (self.bias - self.running_mean) * (self.gamma / torch.sqrt(self.running_var + self.eps)))
else:
bias = reshape_to_bias(self.beta - self.running_mean * (self.gamma / torch.sqrt(self.running_var + self.eps)))  # b融running
weight = self.weight * reshape_to_weight(self.gamma / torch.sqrt(self.running_var + self.eps))  # w融running

# 量化A和bn融合后的W
if not self.first_layer:
input = self.activation_quantizer(input)
q_input = input
q_weight = self.weight_quantizer(weight)
# 量化卷积
if self.training:  # 训练态
output = F.conv2d(
input=q_input,
weight=q_weight,
bias=self.bias,  # 注意，这里不加bias（self.bias为None）
stride=self.stride,
dilation=self.dilation,
groups=self.groups
)
# （这里将训练态下，卷积中w融合running参数的效果转为融合batch参数的效果）running ——> batch
output *= reshape_to_activation(torch.sqrt(self.running_var + self.eps) / torch.sqrt(batch_var + self.eps))
output += reshape_to_activation(bias)
else:  # 测试态
output = F.conv2d(
input=q_input,
weight=q_weight,
bias=bias,  # 注意，这里加bias，做完整的conv+bn
stride=self.stride,
dilation=self.dilation,
groups=self.groups
)
return output
```

# 7. 实验结果

```import torch
import torch.nn as nn
import torch.nn.functional as F
from .util_wqaq import Conv2d_Q, BNFold_Conv2d_Q

class QuanConv2d(nn.Module):
def __init__(self, input_channels, output_channels,
kernel_size=-1, stride=-1, padding=-1, groups=1, last_relu=0, abits=8, wbits=8, bn_fold=0, q_type=1, first_layer=0):
super(QuanConv2d, self).__init__()
self.last_relu = last_relu
self.bn_fold = bn_fold
self.first_layer = first_layer

if self.bn_fold == 1:
self.bn_q_conv = BNFold_Conv2d_Q(input_channels, output_channels,
kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, a_bits=abits, w_bits=wbits, q_type=q_type, first_layer=first_layer)
else:
self.q_conv = Conv2d_Q(input_channels, output_channels,
kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, a_bits=abits, w_bits=wbits, q_type=q_type, first_layer=first_layer)
self.bn = nn.BatchNorm2d(output_channels, momentum=0.01) # 考虑量化带来的抖动影响,对momentum进行调整(0.1 ——> 0.01),削弱batch统计参数占比，一定程度抑制抖动。经实验量化训练效果更好,acc提升1%左右
self.relu = nn.ReLU(inplace=True)

def forward(self, x):
if not self.first_layer:
x = self.relu(x)
if self.bn_fold == 1:
x = self.bn_q_conv(x)
else:
x = self.q_conv(x)
x = self.bn(x)
if self.last_relu:
x = self.relu(x)
return x

class Net(nn.Module):
def __init__(self, cfg = None, abits=8, wbits=8, bn_fold=0, q_type=1):
super(Net, self).__init__()
if cfg is None:
cfg = [192, 160, 96, 192, 192, 192, 192, 192]
# model - A/W全量化(除输入、输出外)
self.quan_model = nn.Sequential(
QuanConv2d(3, cfg[0], kernel_size=5, stride=1, padding=2, abits=abits, wbits=wbits, bn_fold=bn_fold, q_type=q_type, first_layer=1),
QuanConv2d(cfg[0], cfg[1], kernel_size=1, stride=1, padding=0, abits=abits, wbits=wbits, bn_fold=bn_fold, q_type=q_type),
QuanConv2d(cfg[1], cfg[2], kernel_size=1, stride=1, padding=0, abits=abits, wbits=wbits, bn_fold=bn_fold, q_type=q_type),

QuanConv2d(cfg[2], cfg[3], kernel_size=5, stride=1, padding=2, abits=abits, wbits=wbits, bn_fold=bn_fold, q_type=q_type),
QuanConv2d(cfg[3], cfg[4], kernel_size=1, stride=1, padding=0, abits=abits, wbits=wbits, bn_fold=bn_fold, q_type=q_type),
QuanConv2d(cfg[4], cfg[5], kernel_size=1, stride=1, padding=0, abits=abits, wbits=wbits, bn_fold=bn_fold, q_type=q_type),

QuanConv2d(cfg[5], cfg[6], kernel_size=3, stride=1, padding=1, abits=abits, wbits=wbits, bn_fold=bn_fold, q_type=q_type),
QuanConv2d(cfg[6], cfg[7], kernel_size=1, stride=1, padding=0, abits=abits, wbits=wbits, bn_fold=bn_fold, q_type=q_type),
QuanConv2d(cfg[7], 10, kernel_size=1, stride=1, padding=0, last_relu=1, abits=abits, wbits=wbits, bn_fold=bn_fold, q_type=q_type),
)

def forward(self, x):
x = self.quan_model(x)
x = x.view(x.size(0), -1)
return x
```

```def adjust_learning_rate(optimizer, epoch):
if args.bn_fold == 1:
if args.model_type == 0:
update_list = [12, 15, 25]
else:
update_list = [8, 12, 20, 25]
else:
update_list = [15, 17, 20]
if epoch in update_list:
for param_group in optimizer.param_groups:
param_group['lr'] = param_group['lr'] * 0.1
return
```

Acc

91.01%

88.88%

INT8

86.66%

INT8

88.89%

INT8

87.30%

INT8

QAT方式明显好于Post Train Quantzation

# 8. 总结

0 条评论

• ### 【CV中的Attention机制】ECCV 2018 Convolutional Block Attention Module

这是【CV中的Attention机制】系列的第三篇文章。目前cv领域借鉴了nlp领域的attention机制以后生产出了很多有用的基于attention机制的论...

• ### 【CV中的Attention机制】BiSeNet中的FFM模块与ARM模块

语义分割需要丰富的空间信息和相关大的感受野，目前很多语义分割方法为了达到实时推理的速度选择牺牲空间分辨率，这可能会导致比较差的模型表现。

• ### 使用关键点进行小目标检测

【GiantPandaCV导语】本文是笔者出于兴趣搞了一个小的库，主要是用于定位红外小目标。由于其具有尺度很小的特点，所以可以尝试用点的方式代表其位置。本文主要...

• ### iOS 自定义分段控制器

最近做项目时遇到一些问题，就是项目里原有分段控制器的适用范围有些局限，虽然网上也有很多分段控制器的demo，但自己写的，可控性和项目适用性自己能很明白，所以我专...

• ### 基于深度学习和经典方法的文本分类

文本分类应该是自然语言处理中最普遍的一个应用，例如文章自动分类、邮件自动分类、垃圾邮件识别、用户情感分类等等，在生活中有很多例子，这篇文章主要从传统和深度学习两...

• ### 手把手教你用Python开发“剪刀石头布”小游戏【附源码】

最近在学习PyQt5可视化界面，这是一个内容非常丰富的gui库，相对于tkinter库，功能更加强大，界面更加美观，操作也不难。于是我开始小试牛刀，用PyQt...

• ### 6.wxPython防止窗体重画棋子消失的机制

可以画图的类中wx.ClientDC不必依赖窗体绘画事件，可以随时实例化，随时画图。但是窗体最小化之后再恢复，重画的窗体上通过wx.ClientDC绘制的棋子会...

• ### 10.带人机对战的五子棋程序

今天我们带来一个带人机对战功能的五子棋程序。程序基于前面文章中的框架搭建，新增人机对战的策略。程序基于规则进行决策，不考虑禁手，玩家执黑子先行。棋盘规模采用15...

• ### 我的小工具，用C和python实现远程读卡器，远程读写消费卡片

这个远程读卡器就是一普通usb口或串口的读卡器，只不过配合一个电脑软件作为tcp服务器。这样，程序员可以在公司电脑上运行程序连到服务器上。服务器端操作控制现场...

• ### 我的小工具-远程读卡器web客户端（nodejs+websocket实现实时指令交互）

之前的小工具，远程读卡器web客户端，实现原理是把读写卡服务装在远程（现场）的电脑上，这样有一些缺点，比如现场电脑必须开启端口映射，让客户端能否访问到。只能写好...