Caffe2 - (三十一) Detectron 之 modeling - FPN 与 optimizer

Caffe2 - (三十一) Detectron 之 modeling - FPN 与 optimizer

1. FPN.py

FPN 模块.

"""
Feature Pyramid Network (FPN) 使用的相关函数.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import collections
import numpy as np

from core.config import cfg
from modeling.generate_anchors import generate_anchors ## 
from utils.c2 import const_fill
from utils.c2 import gauss_fill
import modeling.ResNet as ResNet ## 
import utils.blob as blob_utils
import utils.boxes as box_utils

"""
Backbone 骨干网络的最低(lowest) 和最高(highest) 金字塔(pyramid) 层(level).
对于 FPN, 这里假设所有的网络都有 5 个空间维度的减少(spatial reductions),
每一个减少的因子都是 2.
Level 1 对应于输入图片,此时使用时是没有意义的.
"""
LOWEST_BACKBONE_LVL = 2   # E.g., "conv2"-like level
HIGHEST_BACKBONE_LVL = 5  # E.g., "conv5"-like level


# ---------------------------------------------------------------------------- #
# FPN with ResNet
# ---------------------------------------------------------------------------- #

def add_fpn_ResNet50_conv5_body(model):
    return add_fpn_onto_conv_body(model, 
                                  ResNet.add_ResNet50_conv5_body, 
                                  fpn_level_info_ResNet50_conv5 )


def add_fpn_ResNet50_conv5_P2only_body(model):
    return add_fpn_onto_conv_body(model,
                                  ResNet.add_ResNet50_conv5_body,
                                  fpn_level_info_ResNet50_conv5,
                                  P2only=True )


def add_fpn_ResNet101_conv5_body(model):
    return add_fpn_onto_conv_body(model, 
                                  ResNet.add_ResNet101_conv5_body, 
                                  fpn_level_info_ResNet101_conv5)


def add_fpn_ResNet101_conv5_P2only_body(model):
    return add_fpn_onto_conv_body(model, 
                                  ResNet.add_ResNet101_conv5_body,
                                  fpn_level_info_ResNet101_conv5,
                                  P2only=True )


def add_fpn_ResNet152_conv5_body(model):
    return add_fpn_onto_conv_body(model, 
                                  ResNet.add_ResNet152_conv5_body, 
                                  fpn_level_info_ResNet152_conv5 )


def add_fpn_ResNet152_conv5_P2only_body(model):
    return add_fpn_onto_conv_body(model, 
                                  ResNet.add_ResNet152_conv5_body,
                                  fpn_level_info_ResNet152_conv5,
                                  P2only=True )


# ---------------------------------------------------------------------------- #
# 添加 FPN 到骨干backbone 网络结构的相关函数
# ---------------------------------------------------------------------------- #

def add_fpn_onto_conv_body(model, conv_body_func, fpn_level_info_func, P2only=False):
    """
    添加指定的 conv body 到模型model, 然后再往模型添加 FPN levels.
    """
    """
    注意事项:
        blobs_conv 是倒序方式:[fpn5, fpn4, fpn3, fpn2]
        类似地,dims_conv:[2048, 1024, 512, 256]
        类似地,spatial_scales_fpn:[1/32, 1/16, 1/8, 1/4]
    """

    conv_body_func(model)
    blobs_fpn, dim_fpn, spatial_scales_fpn = add_fpn(model, fpn_level_info_func())

    if P2only:
        # 只使用最精细层次, finest level
        return blobs_fpn[-1], dim_fpn, spatial_scales_fpn[-1]
    else:
        # 使用所有的层次 all levels
        return blobs_fpn, dim_fpn, spatial_scales_fpn


def add_fpn(model, fpn_level_info):
    """
    基于 FPN 论文叙述的模型,添加 FPN 连接.
    """
    """
    FPN levels 是从骨干backbone 网络的 highest/coarest level(通常为 conv5) 开始构建的.
    首先向下,递归地(recursively)构建 lower/finer 分辨率的 FPN levels;
    然后向上,构建比起始 level higher/coarser 分辨率的 FPN levels.
    """
    fpn_dim = cfg.FPN.DIM
    min_level, max_level = get_min_max_levels()

    """
    计算从 coarest backbone 阶段stage(通常是 "conv5"-like level) 生成 FPN levels时,
    backbone 阶段stages 数.
    如,如果 backbone levels info 定义了 4 stages:"conv5", "conv4", "conv3", "conv2",且 min_level=2,
    那么,将会添加 FPN 上的 backbone stages 数为:4 - (2 - 2) = 4
    """
    num_backbone_stages = (len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) )

    lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages]
    output_blobs = ['fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ]
    fpn_dim_lateral = fpn_level_info.dims
    xavier_fill = ('XavierFill', {})

    # 对于 coarest backbone level: 1x1 conv only seeds recursion
    model.Conv(lateral_input_blobs[0],
               output_blobs[0],
               dim_in=fpn_dim_lateral[0],
               dim_out=fpn_dim,
               kernel=1,
               pad=0,
               stride=1,
               weight_init=xavier_fill,
               bias_init=const_fill(0.0) )

    #
    # Step 1: 从 coarest backbone level 开始,递归地向下构建 FPN levels
    #

    # 对于其它 levels,添加 top-down 和侧向连接(lateral connections)
    for i in range(num_backbone_stages - 1):
        add_topdown_lateral_module(
            model,
            output_blobs[i],             # top-down blob
            lateral_input_blobs[i + 1],  # lateral blob 侧向 blob
            output_blobs[i + 1],         # next output blob 下一个输出 blob
            fpn_dim,                     # output dimension 输出维度
            fpn_dim_lateral[i + 1]       # lateral input dimension 侧向输入维度
        )

    # Post-hoc scale-specific 3x3 convs
    blobs_fpn = []
    spatial_scales = []
    for i in range(num_backbone_stages):
        fpn_blob = model.Conv(output_blobs[i],
                              fpn_{}'.format(fpn_level_info.blobs[i]),
                              dim_in=fpn_dim,
                              dim_out=fpn_dim,
                              kernel=3,
                              pad=1,
                              stride=1,
                              weight_init=xavier_fill,
                              bias_init=const_fill(0.0) )
        blobs_fpn += [fpn_blob]
        spatial_scales += [fpn_level_info.spatial_scales[i]]

    #
    # Step 2: 从 coarest backbone level 开始,递归地向上构建 FPN levels
    #

    # 判断是否需要 P6 feature map
    if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:
        # CVPR'17 FPN 论文里的原始 FPN P6 level 的实现
        P6_blob_in = blobs_fpn[0]
        P6_name = P6_blob_in + '_subsampled_2x'
        # 使用 max pooling 模拟步长为 stride=2 的下采样subsampling
        P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2)
        blobs_fpn.insert(0, P6_blob)
        spatial_scales.insert(0, spatial_scales[0] * 0.5)

    # RetinaNet 介绍的 Coarser FPN levels
    if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:
        fpn_blob = fpn_level_info.blobs[0]
        dim_in = fpn_level_info.dims[0]
        for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):
            fpn_blob_in = fpn_blob
            if i > HIGHEST_BACKBONE_LVL + 1:
                fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu')
            fpn_blob = model.Conv(fpn_blob_in,
                                  'fpn_' + str(i),
                                  dim_in=dim_in,
                                  dim_out=fpn_dim,
                                  kernel=3,
                                  pad=1,
                                  stride=2,
                                  weight_init=xavier_fill,
                                  bias_init=const_fill(0.0) )
            dim_in = fpn_dim
            blobs_fpn.insert(0, fpn_blob)
            spatial_scales.insert(0, spatial_scales[0] * 0.5)

    return blobs_fpn, fpn_dim, spatial_scales


def add_topdown_lateral_module(model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral):
    """
    添加 top-down 侧向(lateral)模块.
    """
    # Lateral 1x1 conv
    lat = model.Conv(fpn_lateral,
                     fpn_bottom + '_lateral',
                     dim_in=dim_lateral,
                     dim_out=dim_top,
                     kernel=1,
                     pad=0,
                     stride=1,
                     weight_init=(
                         const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ),
                     bias_init=const_fill(0.0) )
    # Top-down 2x 上采样upsampling
    td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2)
    # 相加 lateral and top-down
    model.net.Sum([lat, td], fpn_bottom)


def get_min_max_levels():
    """
    在 multiple FPN levels 上进行 RPN 和 RoI 变换操作所需要的 min 和 max FPN levels.
    """
    min_level = LOWEST_BACKBONE_LVL
    max_level = HIGHEST_BACKBONE_LVL
    if cfg.FPN.MULTILEVEL_RPN and not cfg.FPN.MULTILEVEL_ROIS:
        max_level = cfg.FPN.RPN_MAX_LEVEL
        min_level = cfg.FPN.RPN_MIN_LEVEL
    if not cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:
        max_level = cfg.FPN.ROI_MAX_LEVEL
        min_level = cfg.FPN.ROI_MIN_LEVEL
    if cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:
        max_level = max(cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.ROI_MAX_LEVEL)
        min_level = min(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.ROI_MIN_LEVEL)
    return min_level, max_level


# ---------------------------------------------------------------------------- #
# RPN with an FPN backbone
# ---------------------------------------------------------------------------- #

def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):
    """
    在 FPN 上添加 RPN 输出.
    """
    num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS)
    dim_out = dim_in

    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid
    assert len(blobs_in) == k_max - k_min + 1
    for lvl in range(k_min, k_max + 1):
        bl_in = blobs_in[k_max - lvl]  # blobs_in 是倒序
        sc = spatial_scales[k_max - lvl]  # in reversed order
        slvl = str(lvl)

        if lvl == k_min:
            # 对 first FPN levels, 创建 conv ops,且权重随机初始化,zeroed biases.
            # 所有的其它 FPN levels 也共享该 conv ops.
            # RPN hidden representation
            conv_rpn_fpn = model.Conv(bl_in,
                                      'conv_rpn_fpn' + slvl,
                                      dim_in,
                                      dim_out,
                                      kernel=3,
                                      pad=1,
                                      stride=1,
                                      weight_init=gauss_fill(0.01),
                                      bias_init=const_fill(0.0) )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.Conv(conv_rpn_fpn,
                                            'rpn_cls_logits_fpn' + slvl,
                                            dim_in,
                                            num_anchors,
                                            kernel=1,
                                            pad=0,
                                            stride=1,
                                            weight_init=gauss_fill(0.01),
                                            bias_init=const_fill(0.0) )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.Conv(conv_rpn_fpn,
                                           'rpn_bbox_pred_fpn' + slvl,
                                           dim_in,
                                           4 * num_anchors,
                                           kernel=1,
                                           pad=0,
                                           stride=1,
                                           weight_init=gauss_fill(0.01),
                                           bias_init=const_fill(0.0) )
        else:
            # 共享 weights and biases
            sk_min = str(k_min)
            # RPN hidden representation
            conv_rpn_fpn = model.ConvShared(
                bl_in,
                'conv_rpn_fpn' + slvl,
                dim_in,
                dim_out,
                kernel=3,
                pad=1,
                stride=1,
                weight='conv_rpn_fpn' + sk_min + '_w',
                bias='conv_rpn_fpn' + sk_min + '_b'
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.ConvShared(conv_rpn_fpn,
                                                  'rpn_cls_logits_fpn' + slvl,
                                                  dim_in,
                                                  num_anchors,
                                                  kernel=1,
                                                  pad=0,
                                                  stride=1,
                                                  weight='rpn_cls_logits_fpn' + sk_min + '_w',
                                                  bias='rpn_cls_logits_fpn' + sk_min + '_b' )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.ConvShared(conv_rpn_fpn,
                                                 'rpn_bbox_pred_fpn' + slvl,
                                                 dim_in,
                                                 4 * num_anchors,
                                                 kernel=1,
                                                 pad=0,
                                                 stride=1,
                                                 weight='rpn_bbox_pred_fpn' + sk_min + '_w',
                                                 bias='rpn_bbox_pred_fpn' + sk_min + '_b' )

        if not model.train or cfg.MODEL.FASTER_RCNN:
            """
            需要 proposals 的情况:
             - 1. inference (== not model.train) 时,RPN Only 和 Faster R-CNN
             - 2. training 时, Faster R-CNN.

            其它情况(training for RPN only),不需要 proposals.
            """
            lvl_anchors = generate_anchors(
                stride=2.**lvl,
                sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),
                aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS )
            rpn_cls_probs_fpn = model.net.Sigmoid(rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl)
            model.GenerateProposals([rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'],
                                    ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],
                                    anchors=lvl_anchors,
                                    spatial_scale=sc )


def add_fpn_rpn_losses(model):
    """
    在 FPN 添加 RPN losses.
    """
    loss_gradients = {}
    for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
        slvl = str(lvl)
        # 空间上限定 full-sized RPN label arrays, 以匹配 feature map 的大小shape.
        model.net.SpatialNarrowAs(['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl],
                                  'rpn_labels_int32_fpn' + slvl )
        for key in ('targets', 'inside_weights', 'outside_weights'):
            model.net.SpatialNarrowAs(['rpn_bbox_' + key + '_wide_fpn' + slvl,
                                       'rpn_bbox_pred_fpn' + slvl ],
                                      'rpn_bbox_' + key + '_fpn' + slvl )
        loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss(
            ['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl],
            'loss_rpn_cls_fpn' + slvl,
            normalize=0,
            scale=(1. / cfg.NUM_GPUS / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM / cfg.TRAIN.IMS_PER_BATCH )
        )
        """
        Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is handled by (1) setting bbox outside weights and (2) SmoothL1Loss normalizes by IMS_PER_BATCH.
        """
        loss_rpn_bbox_fpn = model.net.SmoothL1Loss(
            ['rpn_bbox_pred_fpn' + slvl, 
             'rpn_bbox_targets_fpn' + slvl,
             'rpn_bbox_inside_weights_fpn' + slvl,
             'rpn_bbox_outside_weights_fpn' + slvl ],
            'loss_rpn_bbox_fpn' + slvl,
            beta=1. / 9.,
            scale=1. / cfg.NUM_GPUS )
        loss_gradients.update(blob_utils.
                              get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn]) )
        model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl])
    return loss_gradients


# ---------------------------------------------------------------------------- #
# multilevel FPN RoIs 相关的辅助函数Helper functions
# ---------------------------------------------------------------------------- #

def map_rois_to_fpn_levels(rois, k_min, k_max):
    """
    判断在一系列 RoIs 集合中每个 RoI 应该映射的 FPN level,
    基于 FPN 论文中的启发式.
    """
    # 计算 level ids
    s = np.sqrt(box_utils.boxes_area(rois))
    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4

    # Eqn.(1) in FPN paper
    target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
    target_lvls = np.clip(target_lvls, k_min, k_max)
    return target_lvls


def add_multilevel_roi_blobs(blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max):
    """
    将 multiple FPN levels 的 RoI blobs 添加到 blobs dict.

        blobs: blob name 到 numpy ndarray 映射的 dict.
        blob_prefix: FPN blobs 使用的 name 前缀prefix.
        rois: rois源,2D numpy array,shape (N, 5)
              每一行是一个 roi,各列分别编码: (batch_idx, x1, y1, x2, y2)
        target_lvls: numpy array,shape (N, ),表示 rois 中每个 roi 应该被分配的 FPN level. 
        lvl_min: the finest (highest resolution) FPN level (e.g., 2)
        lvl_max: the coarest (lowest resolution) FPN level (e.g., 6)
    """
    rois_idx_order = np.empty((0, ))
    rois_stacked = np.zeros((0, 5), dtype=np.float32)  # for assert
    for lvl in range(lvl_min, lvl_max + 1):
        idx_lvl = np.where(target_lvls == lvl)[0]
        blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :]
        rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
        rois_stacked = np.vstack([rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]] )
    rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False)
    blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore
    # 安全性检测,判断恢复restore的顺序是否正确.
    assert (rois_stacked[rois_idx_restore] == rois).all()


# ---------------------------------------------------------------------------- #
# FPN level info for stages 5, 4, 3, 2 for select models (more can be added)
# ---------------------------------------------------------------------------- #

FpnLevelInfo = collections.namedtuple('FpnLevelInfo', ['blobs', 'dims', 'spatial_scales'] )


def fpn_level_info_ResNet50_conv5():
    return FpnLevelInfo(blobs=('res5_2_sum', 'res4_5_sum', 'res3_3_sum', 'res2_2_sum'),
                        dims=(2048, 1024, 512, 256),
                        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) )


def fpn_level_info_ResNet101_conv5():
    return FpnLevelInfo(blobs=('res5_2_sum', 'res4_22_sum', 'res3_3_sum', 'res2_2_sum'),
                        dims=(2048, 1024, 512, 256),
                        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) )


def fpn_level_info_ResNet152_conv5():
    return FpnLevelInfo(blobs=('res5_2_sum', 'res4_35_sum', 'res3_7_sum', 'res2_2_sum'),
                        dims=(2048, 1024, 512, 256),
                        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) )

2. optimizer.py

"""
优化 op 图构建.
Optimization operator graph construction.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging

from caffe2.python import muji

from core.config import cfg
import utils.c2 as c2_utils

logger = logging.getLogger(__name__)


def build_data_parallel_model(model, single_gpu_build_func):
    """
    给定单 GPU 模型构建函数,创建数据并行模型.
    """
    if model.train:
        # 在每个 GPU 上构建 forward graph.
        all_loss_gradients = _build_forward_graph(model, single_gpu_build_func)
        # 在所有 GPUs 上添加 backward pass
        model.AddGradientOperators(all_loss_gradients)
        if cfg.NUM_GPUS > 1:
            # 构建 graph,在 gradients 上进行 Allreduce.
            _add_allreduce_graph(model)
        for gpu_id in range(cfg.NUM_GPUS):
            # allreduce 后,所有 GPUs 并行地分别 SGD 更新参数.
            _add_parameter_update_ops(model, gpu_id)
    else:
        # 测试阶段Test-time,网络 ops 在单 GPU 上进行.
        # 测试阶段Test-time,并行是通过多线程multiprocessing 实现的.
        with c2_utils.NamedCudaScope(0):
            single_gpu_build_func(model)


def _build_forward_graph(model, single_gpu_build_func):
    """
    在每个 GPU 上构建 forward graph.
    """
    all_loss_gradients = {}  # 包括所有 GPUs 上的 loss gradients
    # 以正确的 name 和 device scoping 在每个 GPU 上构建模型
    for gpu_id in range(cfg.NUM_GPUS):
        with c2_utils.NamedCudaScope(gpu_id):
            all_loss_gradients.update(single_gpu_build_func(model))
    return all_loss_gradients


def _add_allreduce_graph(model):
    """
    构建 graph,对 gradients 进行 Allreduce.
    """
    # 如果训练时GPU 数大于1, 需要 all-reduce the per-GPU gradients.
    all_params = model.TrainableParams()
    assert len(all_params) % cfg.NUM_GPUS == 0
    # 模型参数在每个 GPU 上进行复制,获取不同参数 blobs 数.
    # (i.e., 在每个 GPU 上参数 blobs 数)
    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
    with c2_utils.CudaScope(0):
        # 对不同的参数 blobs 进行迭代
        for i in range(params_per_gpu):
            # 对于该参数 blob,所有 GPUs 上的 Gradients from all GPUs for this parameter blob
            gradients = [model.param_to_grad[p] for p in all_params[i::params_per_gpu]]
            if len(gradients) > 0:
                if cfg.USE_NCCL:
                    model.net.NCCLAllreduce(gradients, gradients)
                else:
                    muji.Allreduce(model.net, gradients, reduced_affix='')


def _add_parameter_update_ops(model, gpu_id):
    """
    构建优化更新 op 图(optimizer update op graph)
    """
    with c2_utils.NamedCudaScope(gpu_id):
        # 在训练开始时,lr 设为 0,是作为 a dummy value
        lr = model.param_init_net.ConstantFill([], 'lr', shape=[1], value=0.0)
        one = model.param_init_net.ConstantFill([], 'one', shape=[1], value=1.0)
        wd = model.param_init_net.ConstantFill([], 'wd', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY)

        for param in model.TrainableParams(gpu_id=gpu_id):
            logger.info('param ' + str(param) + ' will be updated')
            param_grad = model.param_to_grad[param]
            # 初始化 momentum 向量
            param_momentum = model.param_init_net.ConstantFill([param], param + '_momentum', value=0.0)
            if param in model.biases:
                # biases 特殊处理(主要是因为历史因素):
                #   (1) Do not apply weight decay
                #   (2) Use a 2x higher learning rate
                model.Scale(param_grad, param_grad, scale=2.0)
            elif cfg.SOLVER.WEIGHT_DECAY > 0:
                # 对 non-bias weights 应用 weight decay
                model.WeightedSum([param_grad, one, param, wd], param_grad)
            # 更新 param_grad 和 param_momentum in place
            model.net.MomentumSGDUpdate([param_grad, param_momentum, lr, param],
                                        [param_grad, param_momentum, param],
                                        momentum=cfg.SOLVER.MOMENTUM )

3.generate_anchors.py

import numpy as np

# Verify that we compute the same anchors as Shaoqing's matlab implementation:
#
#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
#    >> anchors
#
#    anchors =
#
#       -83   -39   100    56
#      -175   -87   192   104
#      -359  -183   376   200
#       -55   -55    72    72
#      -119  -119   136   136
#      -247  -247   264   264
#       -35   -79    52    96
#       -79  -167    96   184
#      -167  -343   184   360

# array([[ -83.,  -39.,  100.,   56.],
#        [-175.,  -87.,  192.,  104.],
#        [-359., -183.,  376.,  200.],
#        [ -55.,  -55.,   72.,   72.],
#        [-119., -119.,  136.,  136.],
#        [-247., -247.,  264.,  264.],
#        [ -35.,  -79.,   52.,   96.],
#        [ -79., -167.,   96.,  184.],
#        [-167., -343.,  184.,  360.]])


def generate_anchors(stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)):
    """
    生成 anchor boxes 矩阵,其格式为 (x1, y1, x2, y2).
    Anchors 是以 stride / 2 的中心,逼近指定大小的平方根面积(sqrt areas),长宽比
    Anchors are centered on stride / 2, have (approximate) sqrt areas of the specified
    sizes, and aspect ratios as given.
    """
    return _generate_anchors(stride,
                             np.array(sizes, dtype=np.float) / stride,
                             np.array(aspect_ratios, dtype=np.float) )


def _generate_anchors(base_size, scales, aspect_ratios):
    """
    通过枚举关于参考窗口window (0, 0, base_size - 1, base_size - 1) 的长宽比(aspect ratios) X scales,
    来生成 anchore 窗口(参考窗口 reference windows).
    """
    anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
    anchors = _ratio_enum(anchor, aspect_ratios)
    anchors = np.vstack([_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])])
    return anchors


def _whctrs(anchor):
    """
    返回 anchor 窗口的 width, height, x center,  y center.
    """
    w = anchor[2] - anchor[0] + 1
    h = anchor[3] - anchor[1] + 1
    x_ctr = anchor[0] + 0.5 * (w - 1)
    y_ctr = anchor[1] + 0.5 * (h - 1)
    return w, h, x_ctr, y_ctr


def _mkanchors(ws, hs, x_ctr, y_ctr):
    """
    给定 center(x_ctr, y_ctr) 及 widths (ws),heights (hs) 向量,输出 anchors窗口window 集合.
    """
    ws = ws[:, np.newaxis]
    hs = hs[:, np.newaxis]
    anchors = np.hstack( (x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
                          x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) ) )
    return anchors


def _ratio_enum(anchor, ratios):
    """
    对于每个关于一个 anchor 的长宽比aspect ratio,枚举 anchors 集合.
    """
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    size = w * h
    size_ratios = size / ratios
    ws = np.round(np.sqrt(size_ratios))
    hs = np.round(ws * ratios)
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors


def _scale_enum(anchor, scales):
    """
    对于每个关于一个 anchor 的尺度scale,枚举 anchors 集合.
    Enumerate a set of anchors for each scale wrt an anchor."""
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    ws = w * scales
    hs = h * scales
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

发表于

我来说两句

0 条评论
登录 后参与评论

相关文章

来自专栏AlgorithmDog的专栏

超快的 fastText

Word2Vec 作者、脸书科学家 Mikolov 文本分类新作 fastText:方法简单,号称并不需要深度学习那样几小时或者几天的训练时间,在普...

320100
来自专栏机器学习算法原理与实践

用scikit-learn研究局部线性嵌入(LLE)

    在局部线性嵌入(LLE)原理总结中,我们对流形学习中的局部线性嵌入(LLE)算法做了原理总结。这里我们就对scikit-learn中流形学习的一些算法做...

13920
来自专栏用户2442861的专栏

OCR material

End-to-End Text Recognition with Convolutional Neural Networks

24330
来自专栏大学生计算机视觉学习DeepLearning

python实现gabor滤波器提取纹理特征 提取指静脉纹理特征 指静脉切割代码

32750
来自专栏深度学习之tensorflow实战篇

文本分类算法带监督的FastText

FastText是Facebook开发的一款快速文本分类器,提供简单而高效的文本分类和表征学习的方法;其由两部分组成,在文末有连接以及github代码源与文本分...

48390
来自专栏AI研习社

基于深度学习的医疗影像论文汇总(Deep Learning Papers on Medical Image Analysis)

看到好东西,怎么能不分享呢。 第一次在知乎翻译,由于水平有限(不是谦虚的那种有限,是真的有限),有不准确的地方还望包涵,最重要的是,还望大佬们多多指正! B...

93580
来自专栏Petrichor的专栏

深度学习: 从新视角 综述 Detection算法

Continually updated,Constantly record my new summary of the Detection Algorithm。

32040
来自专栏专知

【论文推荐】最新5篇目标检测相关论文——显著目标检测、弱监督One-Shot检测、多框检测器、携带物体检测、假彩色图像检测

【导读】专知内容组整理了最近目标检测相关文章,为大家进行介绍,欢迎查看! 1. MSDNN: Multi-Scale Deep Neural Network f...

50970
来自专栏ATYUN订阅号

使用生成式对抗网络进行图像去模糊

AiTechYun 编辑:yuxiangyu 本文主要讨论使用生成式对抗网络实现图像去模糊。 代码:https://github.com/RaphaelMeu...

3.3K90
来自专栏技术随笔

【译】用于肺部CT肺结节分类的深度特征学习摘要

52380

扫码关注云+社区

领取腾讯云代金券