Caffe2 - (二十四) Detectron 之 utils 函数(2)

Caffe2 - (二十四) Detectron 之 utils 函数(2)

1. env.py

"""Environment helper functions."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import imp
import os
import sys


def get_runtime_dir():
    """
    寻找运行时的工作路径.
    """
    return sys.path[0]


def get_py_bin_ext():
    """
    寻找 python 的二进制扩展文件.
    """
    return '.py'


def set_up_matplotlib():
    """ 
    matplotlib 设置.
    """
    import matplotlib
    # 采用非交互的后端 non-interactive backend
    matplotlib.use('Agg')


def exit_on_error():
    """
    出现错误时,退出 detectron tool.
    """
    sys.exit(1)


def import_nccl_ops():
    """
    导入 NCCL ops.
    由于 NCCL 依赖已经在 Caffe2 gpu lib 中编译,不需要再加载 NCCL ops.
    """
    pass


def get_caffe2_dir():
    """
    寻找 Caffe2 所在路径.
    """
    _fp, c2_path, _desc = imp.find_module('caffe2')
    assert os.path.exists(c2_path), \
        'Caffe2 not found at \'{}\''.format(c2_path)
    c2_dir = os.path.dirname(os.path.abspath(c2_path))
    return c2_dir


def get_detectron_ops_lib():
    """
    寻找 Detectron ops library 库路径.
    """
    c2_dir = get_caffe2_dir()
    detectron_ops_lib = os.path.join(c2_dir, 'lib/libcaffe2_detectron_ops_gpu.so')
    assert os.path.exists(detectron_ops_lib), \
        ('Detectron ops lib not found at \'{}\'; make sure that your Caffe2 '
         'version includes Detectron module').format(detectron_ops_lib)
    return detectron_ops_lib


def get_custom_ops_lib():
    """
    寻找自定义的 osp library 库路径.
    """
    lib_dir, _utils = os.path.split(os.path.dirname(__file__))
    custom_ops_lib = os.path.join(lib_dir, 'build/libcaffe2_detectron_custom_ops_gpu.so')
    assert os.path.exists(custom_ops_lib), \
        'Custom ops lib not found at \'{}\''.format(custom_ops_lib)
    return custom_ops_lib

2. c2.py

"""Helpful utilities for working with Caffe2."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from six import string_types
import contextlib

from caffe2.proto import caffe2_pb2
from caffe2.python import core
from caffe2.python import dyndep
from caffe2.python import scope

import utils.env as envu


def import_contrib_ops():
    """
    导入 Detectron 所需的 contrib ops.
    """
    envu.import_nccl_ops()


def import_detectron_ops():
    """
    导入 Detectron ops.
    """
    detectron_ops_lib = envu.get_detectron_ops_lib()
    dyndep.InitOpsLibrary(detectron_ops_lib)


def import_custom_ops():
    """
    导入自定义的 ops.
    """
    custom_ops_lib = envu.get_custom_ops_lib()
    dyndep.InitOpsLibrary(custom_ops_lib)


def SuffixNet(name, net, prefix_len, outputs):
    """
    从给定的 net 返回新 Net. 新 Net 只包括移除 first `prefix_len` 个 ops 后的 ops.
    新 Net 是 net 的一个后缀suffix.

    在 outputs 中的 Blobs 作为 external outpout blobs 被注册.
    """
    outputs = BlobReferenceList(outputs)
    for output in outputs:
        assert net.BlobIsDefined(output)
    new_net = net.Clone(name)

    del new_net.Proto().op[:]
    del new_net.Proto().external_input[:]
    del new_net.Proto().external_output[:]

    # 添加 suffix ops
    new_net.Proto().op.extend(net.Proto().op[prefix_len:])
    # 添加 external input blobs
    # 将任何未定义的 blobs 作为 external inputs
    input_names = [
        i for op in new_net.Proto().op for i in op.input
        if not new_net.BlobIsDefined(i)]
    new_net.Proto().external_input.extend(input_names)
    # 添加 external output blobs
    output_names = [str(o) for o in outputs]
    new_net.Proto().external_output.extend(output_names)
    return new_net, [new_net.GetBlobRef(o) for o in output_names]


def BlobReferenceList(blob_ref_or_list):
    """
    将参数以 BlobReferences 列表的形式返回.
    """
    if isinstance(blob_ref_or_list, core.BlobReference):
        return [blob_ref_or_list]
    elif type(blob_ref_or_list) in (list, tuple):
        for b in blob_ref_or_list:
            assert isinstance(b, core.BlobReference)
        return blob_ref_or_list
    else:
        raise TypeError('blob_ref_or_list must be a BlobReference or a list/tuple of BlobReferences')


def UnscopeName(possibly_scoped_name):
    """
    从一个(可能的)作用域名字中移除任何名字. Remove any name scoping from a (possibly) scoped name
    如将名字 'gpu_0/foo' 转化为 'foo'.
    """
    assert isinstance(possibly_scoped_name, string_types)
    return possibly_scoped_name[
        possibly_scoped_name.rfind(scope._NAMESCOPE_SEPARATOR) + 1:]


@contextlib.contextmanager
def NamedCudaScope(gpu_id):
    """
    创建 GPU name scope 和 CUDA device scope.
    用于 reduce `with ...` nesting levels.
    """
    with GpuNameScope(gpu_id):
        with CudaScope(gpu_id):
            yield


@contextlib.contextmanager
def GpuNameScope(gpu_id):
    """
    创建 GPU device `gpu_id` 的名字域 name scope.
    """
    with core.NameScope('gpu_{:d}'.format(gpu_id)):
        yield


@contextlib.contextmanager
def CudaScope(gpu_id):
    """
    创建 GPU device `gpu_id` 的 CUDA device scope.
    """
    gpu_dev = CudaDevice(gpu_id)
    with core.DeviceScope(gpu_dev):
        yield


@contextlib.contextmanager
def CpuScope():
    """
    创建 CPU device scope.
    """
    cpu_dev = core.DeviceOption(caffe2_pb2.CPU)
    with core.DeviceScope(cpu_dev):
        yield


def CudaDevice(gpu_id):
    """
    创建 Cuda device.
    选择 gpud_id 设备.
    """
    return core.DeviceOption(caffe2_pb2.CUDA, gpu_id)


def gauss_fill(std):
    """
    减少冗余.
    Gaussian fill helper
    """
    return ('GaussianFill', {'std': std})


def const_fill(value):
    """
    减少冗余.
    Constant fill helper.
    """
    return ('ConstantFill', {'value': value})

3. io.py

"""IO utilities."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cPickle as pickle
import hashlib
import logging
import os
import re
import sys
import urllib2

logger = logging.getLogger(__name__)

# 下载 Detectron 模型的 base url.
_DETECTRON_S3_BASE_URL = 'https://s3-us-west-2.amazonaws.com/detectron'


def save_object(obj, file_name):
    """
    序列化 Python 对象object,进行存储.
    """
    file_name = os.path.abspath(file_name)
    with open(file_name, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)


def cache_url(url_or_file, cache_dir):
    """
    根据 URL 下载文件到 cache_dir,并返回下载缓存文件的路径.
    如果参数不是 URL,则直接返回.
    """
    is_url = re.match(r'^(?:http)s?://', url_or_file, re.IGNORECASE) is not None

    if not is_url:
        return url_or_file

    url = url_or_file
    assert url.startswith(_DETECTRON_S3_BASE_URL), \
        ('Detectron only automatically caches URLs in the Detectron S3 '
         'bucket: {}').format(_DETECTRON_S3_BASE_URL)

    cache_file_path = url.replace(_DETECTRON_S3_BASE_URL, cache_dir)
    if os.path.exists(cache_file_path):
        assert_cache_file_is_ok(url, cache_file_path) # 检验 cache file 的hash 值.
        return cache_file_path

    cache_file_dir = os.path.dirname(cache_file_path)
    if not os.path.exists(cache_file_dir):
        os.makedirs(cache_file_dir)

    logger.info('Downloading remote file {} to {}'.format(url, cache_file_path))
    download_url(url, cache_file_path)
    assert_cache_file_is_ok(url, cache_file_path)
    return cache_file_path


def assert_cache_file_is_ok(url, file_path):
    """
    检验 cache file 的 hash 值是否正确.
    文件已经缓存,验证其 md3sum 是否匹配,并返回其 local path.
    """
    cache_file_md5sum = _get_file_md5sum(file_path)
    ref_md5sum = _get_reference_md5sum(url)
    assert cache_file_md5sum == ref_md5sum, \
        ('Target URL {} appears to be downloaded to the local cache file '
         '{}, but the md5 hash of the local file does not match the '
         'reference (actual: {} vs. expected: {}). You may wish to delete '
         'the cached file and try again to trigger automatic '
         'download.').format(url, file_path, cache_file_md5sum, ref_md5sum)


def _progress_bar(count, total):
    """
    显示下载进度.
    参考:
    https://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113
    """
    bar_len = 60
    filled_len = int(round(bar_len * count / float(total)))

    percents = round(100.0 * count / float(total), 1)
    bar = '=' * filled_len + '-' * (bar_len - filled_len)

    sys.stdout.write('  [{}] {}% of {:.1f}MB file  \r'.format(bar, percents, total / 1024 / 1024) )
    sys.stdout.flush()
    if count >= total:
        sys.stdout.write('\n')


def download_url(url, dst_file_path, chunk_size=8192, progress_hook=_progress_bar):
    """
    下载 URL,并写入 dst_file_path.
    参考:
    https://stackoverflow.com/questions/2028517/python-urllib2-progress-hook
    """
    response = urllib2.urlopen(url)
    total_size = response.info().getheader('Content-Length').strip()
    total_size = int(total_size)
    bytes_so_far = 0

    with open(dst_file_path, 'wb') as f:
        while 1:
            chunk = response.read(chunk_size)
            bytes_so_far += len(chunk)
            if not chunk:
                break
            if progress_hook:
                progress_hook(bytes_so_far, total_size)
            f.write(chunk)

    return bytes_so_far


def _get_file_md5sum(file_name):
    """
    计算文件的 md5 hash 值.
    """
    hash_obj = hashlib.md5()
    with open(file_name, 'r') as f:
        hash_obj.update(f.read())
    return hash_obj.hexdigest()


def _get_reference_md5sum(url):
    """
    根据惯例,url 的 md5 hash 值保存在 url + '.md5sum'.
    """
    url_md5sum = url + '.md5sum'
    md5sum = urllib2.urlopen(url_md5sum).read().strip()
    return md5sum

4. keypoints.py

"""Keypoint utilities (somewhat specific to COCO keypoints)."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import cv2
import numpy as np

from core.config import cfg
import utils.blob as blob_utils


def get_keypoints():
    """
    获取 COCO kyepoints 以及其 left/right 翻转的对应映射.
    test 中 keypoints 不包括在 COCO json 中,这里进行提供.
    """
    keypoints = [ # 18 joints
        'nose',
        'left_eye',
        'right_eye',
        'left_ear',
        'right_ear',
        'left_shoulder',
        'right_shoulder',
        'left_elbow',
        'right_elbow',
        'left_wrist',
        'right_wrist',
        'left_hip',
        'right_hip',
        'left_knee',
        'right_knee',
        'left_ankle',
        'right_ankle'
    ]
    keypoint_flip_map = {
        'left_eye': 'right_eye',
        'left_ear': 'right_ear',
        'left_shoulder': 'right_shoulder',
        'left_elbow': 'right_elbow',
        'left_wrist': 'right_wrist',
        'left_hip': 'right_hip',
        'left_knee': 'right_knee',
        'left_ankle': 'right_ankle'
    }
    return keypoints, keypoint_flip_map


def get_person_class_index():
    """
    COCO 中 person class 的索引值 - 1.
    """
    return 1


def flip_keypoints(keypoints, keypoint_flip_map, keypoint_coords, width):
    """
    left/right 翻转 keypoints 坐标.
    keypoints 和 keypoints_flip_map 由 get_keypoints() 得到.
    """
    flipped_kps = keypoint_coords.copy()
    for lkp, rkp in keypoint_flip_map.items():
        lid = keypoints.index(lkp)
        rid = keypoints.index(rkp)
        flipped_kps[:, :, lid] = keypoint_coords[:, :, rid]
        flipped_kps[:, :, rid] = keypoint_coords[:, :, lid]

    # 翻转 x 坐标
    flipped_kps[:, 0, :] = width - flipped_kps[:, 0, :] - 1
    # 保持 COCO 格式,即,if visibility == 0, then x, y = 0
    inds = np.where(flipped_kps[:, 2, :] == 0)
    flipped_kps[inds[0], 0, inds[1]] = 0
    return flipped_kps


def flip_heatmaps(heatmaps):
    """
    水平翻转 heatmaps.
    """
    keypoints, flip_map = get_keypoints()
    heatmaps_flipped = heatmaps.copy()
    for lkp, rkp in flip_map.items():
        lid = keypoints.index(lkp)
        rid = keypoints.index(rkp)
        heatmaps_flipped[:, rid, :, :] = heatmaps[:, lid, :, :]
        heatmaps_flipped[:, lid, :, :] = heatmaps[:, rid, :, :]
    heatmaps_flipped = heatmaps_flipped[:, :, :, ::-1]
    return heatmaps_flipped


def heatmaps_to_keypoints(maps, rois):
    """
    从 heatmaps 得到预测的 keypoints 位置.
    输出格式为:(#rois, 4, #keypoints) 
        - 4 rows 对应于每个 keypoints 的 (x, y, logit, prob).

    该函数将 HEATMAP_SIZE x HEATMAP_SIZE image 中的离散坐标转换为连续的 keypoints 坐标.
    采用 Heckbert 1990: c = d + 0.5(其中,d 是离散坐标,c 是连续坐标.) 的变换,来保持 keypoints_to_heatmap_labels 的一致性.
    """
    offset_x = rois[:, 0]
    offset_y = rois[:, 1]

    widths = rois[:, 2] - rois[:, 0]
    heights = rois[:, 3] - rois[:, 1]
    widths = np.maximum(widths, 1)
    heights = np.maximum(heights, 1)
    widths_ceil = np.ceil(widths)
    heights_ceil = np.ceil(heights)

    # NCHW to NHWC for use with OpenCV
    maps = np.transpose(maps, [0, 2, 3, 1])
    min_size = cfg.KRCNN.INFERENCE_MIN_SIZE
    xy_preds = np.zeros((len(rois), 4, cfg.KRCNN.NUM_KEYPOINTS), dtype=np.float32)
    for i in range(len(rois)):
        if min_size > 0:
            roi_map_width = int(np.maximum(widths_ceil[i], min_size))
            roi_map_height = int(np.maximum(heights_ceil[i], min_size))
        else:
            roi_map_width = widths_ceil[i]
            roi_map_height = heights_ceil[i]
        width_correction = widths[i] / roi_map_width
        height_correction = heights[i] / roi_map_height
        roi_map = cv2.resize(maps[i], (roi_map_width, roi_map_height),
            interpolation=cv2.INTER_CUBIC)
        # Bring back to CHW
        roi_map = np.transpose(roi_map, [2, 0, 1])
        roi_map_probs = scores_to_probs(roi_map.copy())
        w = roi_map.shape[2]
        for k in range(cfg.KRCNN.NUM_KEYPOINTS):
            pos = roi_map[k, :, :].argmax()
            x_int = pos % w
            y_int = (pos - x_int) // w
            assert (roi_map_probs[k, y_int, x_int] == roi_map_probs[k, :, :].max())
            x = (x_int + 0.5) * width_correction
            y = (y_int + 0.5) * height_correction
            xy_preds[i, 0, k] = x + offset_x[i]
            xy_preds[i, 1, k] = y + offset_y[i]
            xy_preds[i, 2, k] = roi_map[k, y_int, x_int]
            xy_preds[i, 3, k] = roi_map_probs[k, y_int, x_int]

    return xy_preds


def keypoints_to_heatmap_labels(keypoints, rois):
    """
    对 target heatmap 中的 keypoints 位置进行编码,以用于 SoftmaxWithLoss.

    将 keypoints 从连续图片坐标的半开区间 [x1, x2),映射到离散图片坐标的闭区间 [0, HEATMAP_SIZE - 1].
    采用  Heckbert 1990 ("What is the coordinate of a pixel?") 的变换方案: d = floor(c) 和 c = d + 0.5(其中,d 是离散坐标,c 是连续坐标.)
    """
    assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS

    shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS)
    heatmaps = blob_utils.zeros(shape)
    weights = blob_utils.zeros(shape)

    offset_x = rois[:, 0]
    offset_y = rois[:, 1]
    scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0])
    scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1])

    for kp in range(keypoints.shape[2]):
        vis = keypoints[:, 2, kp] > 0
        x = keypoints[:, 0, kp].astype(np.float32)
        y = keypoints[:, 1, kp].astype(np.float32)
        # 由于使用了 floor 处理,如果 keypoints 正好位于 roi 的right 或 bottom 边界上,则将对其平移 eps,以保证其位于 groundtruth hetmap 上.
        x_boundary_inds = np.where(x == rois[:, 2])[0]
        y_boundary_inds = np.where(y == rois[:, 3])[0]
        x = (x - offset_x) * scale_x
        x = np.floor(x)
        if len(x_boundary_inds) > 0:
            x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1

        y = (y - offset_y) * scale_y
        y = np.floor(y)
        if len(y_boundary_inds) > 0:
            y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1

        valid_loc = np.logical_and(np.logical_and(x >= 0, y >= 0),
                                   np.logical_and(x < cfg.KRCNN.HEATMAP_SIZE, y < cfg.KRCNN.HEATMAP_SIZE))

        valid = np.logical_and(valid_loc, vis)
        valid = valid.astype(np.int32)

        lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x
        heatmaps[:, kp] = lin_ind * valid
        weights[:, kp] = valid

    return heatmaps, weights


def scores_to_probs(scores):
    """
    将 CxHxW 的 scores 转换为空间概率."""
    channels = scores.shape[0]
    for c in range(channels):
        temp = scores[c, :, :]
        max_score = temp.max()
        temp = np.exp(temp - max_score) / np.sum(np.exp(temp - max_score))
        scores[c, :, :] = temp
    return scores


def nms_oks(kp_predictions, rois, thresh):
    """
    基于 kp predictions 进行 NMS.
    """
    scores = np.mean(kp_predictions[:, 2, :], axis=1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        ovr = compute_oks(kp_predictions[i], rois[i], kp_predictions[order[1:]], rois[order[1:]])
        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep


def compute_oks(src_keypoints, src_roi, dst_keypoints, dst_roi):
    """
    计算预测的 keypoints 关于 gt_keypoints 的 OKS.
    src_keypoints: 4xK
    src_roi: 4x1
    dst_keypoints: Nx4xK
    dst_roi: Nx4
    """

    sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
    vars = (sigmas * 2)**2

    # 面积area
    src_area = (src_roi[2] - src_roi[0] + 1) * (src_roi[3] - src_roi[1] + 1)

    # 如果 keypoints visible, 计算 per-keypoints 间的距离.
    dx = dst_keypoints[:, 0, :] - src_keypoints[0, :]
    dy = dst_keypoints[:, 1, :] - src_keypoints[1, :]

    e = (dx**2 + dy**2) / vars / (src_area + np.spacing(1)) / 2
    e = np.sum(np.exp(-e), axis=1) / e.shape[1]

    return e

5. segm.py

"""
处理 COCO 格式的 segmentation masks 的函数.
所使用到的项有:
    - mask: 2D numpy array 格式的二值 mask. a binary mask encoded as a 2D numpy array
    - segm: segmentation mask 格式,(COCO 有两种 segmentation mask 格式:polygon or RLE)
        - polygon: COCO 的多边形格式(polygon format)
        - RLE: COCO's run length encoding format
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np

import pycocotools.mask as mask_util


def flip_segms(segms, height, width):
    """
    对 masks 列表中的各 mask 进行 left/right 翻转.
    """
    def _flip_poly(poly, width):
        flipped_poly = np.array(poly)
        flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
        return flipped_poly.tolist()

    def _flip_rle(rle, height, width):
        if 'counts' in rle and type(rle['counts']) == list:
            # Magic RLE 格式,利用 COCO API 的 showAnns 函数来处理.
            rle = mask_util.frPyObjects([rle], height, width)
        mask = mask_util.decode(rle)
        mask = mask[:, ::-1, :]
        rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
        return rle

    flipped_segms = []
    for segm in segms:
        if type(segm) == list:
            # 多边形格式 Polygon format
            flipped_segms.append([_flip_poly(poly, width) for poly in segm])
        else:
            # RLE format
            assert type(segm) == dict
            flipped_segms.append(_flip_rle(segm, height, width))
    return flipped_segms


def polys_to_mask(polygons, height, width):
    """
    将 COCO 的多边形分割(polygon segmentation) 格式转换为数据类型为 np.float32 的 2D numpy array 的二值 mask.
    多边形分割(polygon segmentation) 被理解为在 height x width 图片中的封闭区域.
    得到的 mask shape 是 (height, width).
    """
    rle = mask_util.frPyObjects(polygons, height, width)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask


def mask_to_bbox(mask):
    """
    计算二值 mask 的边界框 bounding box.
    """
    xs = np.where(np.sum(mask, axis=0) > 0)[0]
    ys = np.where(np.sum(mask, axis=1) > 0)[0]

    if len(xs) == 0 or len(ys) == 0:
        return None

    x0 = xs[0]
    x1 = xs[-1]
    y0 = ys[0]
    y1 = ys[-1]
    return np.array((x0, y0, x1, y1), dtype=np.float32)


def polys_to_mask_wrt_box(polygons, box, M):
    """
    将 COCO 多边形分割(polygon segmentation)格式转换为数据类型为 np.float32 的 2D numpy array 的二值 mask.
    多边形分割(polygon segmentation) 被理解为在给定 box 的封闭区域,大小为 M x M 的 mask.
    得到的 mask shape 是 (M, M).
    """
    w = box[2] - box[0]
    h = box[3] - box[1]

    w = np.maximum(w, 1)
    h = np.maximum(h, 1)

    polygons_norm = []
    for poly in polygons:
        p = np.array(poly, dtype=np.float32)
        p[0::2] = (p[0::2] - box[0]) * M / w
        p[1::2] = (p[1::2] - box[1]) * M / h
        polygons_norm.append(p)

    rle = mask_util.frPyObjects(polygons_norm, M, M)
    mask = np.array(mask_util.decode(rle), dtype=np.float32)
    # Flatten in case polygons was a list
    mask = np.sum(mask, axis=2)
    mask = np.array(mask > 0, dtype=np.float32)
    return mask


def polys_to_boxes(polys):
    """
    将多边形列表(polygons list) 转换为边界框 bounding boxes array.
    """
    boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
    for i in range(len(polys)):
        poly = polys[i]
        x0 = min(min(p[::2]) for p in poly)
        x1 = max(max(p[::2]) for p in poly)
        y0 = min(min(p[1::2]) for p in poly)
        y1 = max(max(p[1::2]) for p in poly)
        boxes_from_polys[i, :] = [x0, y0, x1, y1]

    return boxes_from_polys


def rle_mask_voting(top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'):
    """
    组合 all_masks 中多个重叠 masks 来返回新的 mask(对应于 top_masks).
    支持两种 masks 组合方法:
        - AVG - 使用重叠 mask 像素的加权平均值(weighted average of overlapping mask pixels)
        - UNION - 所有 mask 像素的并集(union of all mask pixels)s.
    """
    if len(top_masks) == 0:
        return

    all_not_crowd = [False] * len(all_masks)
    top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
    decoded_all_masks = [np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks]
    decoded_top_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
    ]
    all_boxes = all_dets[:, :4].astype(np.int32)
    all_scores = all_dets[:, 4]

    # Fill box support with weights
    mask_shape = decoded_all_masks[0].shape
    mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
    for k in range(len(all_masks)):
        ref_box = all_boxes[k]
        x_0 = max(ref_box[0], 0)
        x_1 = min(ref_box[2] + 1, mask_shape[1])
        y_0 = max(ref_box[1], 0)
        y_1 = min(ref_box[3] + 1, mask_shape[0])
        mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
    mask_weights = np.maximum(mask_weights, 1e-5)

    top_segms_out = []
    for k in range(len(top_masks)):
        # 空 mask 的极端情况 Corner case of empty mask
        if decoded_top_masks[k].sum() == 0:
            top_segms_out.append(top_masks[k])
            continue

        inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
        # Only matches itself
        if len(inds_to_vote) == 1:
            top_segms_out.append(top_masks[k])
            continue

        masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
        if method == 'AVG':
            ws = mask_weights[inds_to_vote]
            soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
            mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
        elif method == 'UNION':
            # Any pixel that's on joins the mask
            soft_mask = np.sum(masks_to_vote, axis=0)
            mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
        else:
            raise NotImplementedError('Method {} is unknown'.format(method))
        rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
        top_segms_out.append(rle)

    return top_segms_out


def rle_mask_nms(masks, dets, thresh, mode='IOU'):
    """
    基于 masks 间的重叠度量(overlap measurement) 进行贪婪 NMS 处理.
    度量类型有 mode 来定义,有:
        - 标准 IoU(standard intersection over union) 
        - IOMA (intersection over mininum area)
    """
    if len(masks) == 0:
        return []
    if len(masks) == 1:
        return [0]

    if mode == 'IOU':
        # 计算 ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2))
        all_not_crowds = [False] * len(masks)
        ious = mask_util.iou(masks, masks, all_not_crowds)
    elif mode == 'IOMA':
        # 计算 ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2))
        all_crowds = [True] * len(masks)
        # ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
        ious = mask_util.iou(masks, masks, all_crowds)
        # ... = max(area(intersect(m1, m2)) / area(m2),
        #           area(intersect(m2, m1)) / area(m1))
        ious = np.maximum(ious, ious.transpose())
    elif mode == 'CONTAINMENT':
        # 计算 ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
        # 度量了 m2 在 m1 中的部分(Which measures how much m2 is contained inside m1)
        all_crowds = [True] * len(masks)
        ious = mask_util.iou(masks, masks, all_crowds)
    else:
        raise NotImplementedError('Mode {} is unknown'.format(mode))

    scores = dets[:, 4]
    order = np.argsort(-scores)

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        ovr = ious[i, order[1:]]
        inds_to_keep = np.where(ovr <= thresh)[0]
        order = order[inds_to_keep + 1]

    return keep


def rle_masks_to_boxes(masks):
    """
    计算在 RLE 编码的 masks 列表中各 mask 的边界框bounding box.
    """
    if len(masks) == 0:
        return []

    decoded_masks = [np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks]

    def get_bounds(flat_mask):
        inds = np.where(flat_mask > 0)[0]
        return inds.min(), inds.max()

    boxes = np.zeros((len(decoded_masks), 4))
    keep = [True] * len(decoded_masks)
    for i, mask in enumerate(decoded_masks):
        if mask.sum() == 0:
            keep[i] = False
            continue
        flat_mask = mask.sum(axis=0)
        x0, x1 = get_bounds(flat_mask)
        flat_mask = mask.sum(axis=1)
        y0, y1 = get_bounds(flat_mask)
        boxes[i, :] = (x0, y0, x1, y1)

    return boxes, np.where(keep)[0]

本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。

发表于

我来说两句

0 条评论
登录 后参与评论

相关文章

来自专栏跟着阿笨一起玩NET

UML建模系列文章总结

本文转载:http://www.cnblogs.com/ywqu/tag/UML建模/

731
来自专栏前端儿

鸡兔同笼

已知鸡和兔的总数量为n,总腿数为m。输入n和m,依次输出鸡和兔的数目,如果无解,则输出“No answer”(不要引号)。

1221
来自专栏二进制文集

Caffe MNIST 简要分析

MNIST database,一个手写数字的图片数据库,每一张图片都是0到9中的单个数字。每一张都是抗锯齿(Anti-aliasing)的灰度图,图片大小282...

1222
来自专栏HT

电信网络拓扑图自动布局之总线

在前面《电信网络拓扑图自动布局》一文中,我们大体介绍了 HT for Web 电信网络拓扑图自动布局的相关知识,但是都没有深入地描述各种自动布局的用法,我们今天...

2698
来自专栏hightopo

电信网络拓扑图自动布局之总线

2184
来自专栏有趣的Python

13- 深度学习之神经网络核心原理与算法-TensorFlow介绍与框架挑选

2858
来自专栏瓜大三哥

基于FPGA的非线性滤波器(四)

基于FPGA的非线性滤波器(四) 之并行全比较排序模块设计 2.sort_2d模块设计 对于二维运算,采用同样的思路来处理,整个计算步骤如下: (1)计算一维行...

2239
来自专栏数值分析与有限元编程

有限元 | 梁单元有限元程序算例

之前发过一个梁单元有限元分析程序。在好友测试时发现一个问题,就是程序中的real型变量默认为kind=4,我们姑且称为单精度型。这样限制了程序的使用,在一些问题...

3038
来自专栏编程

隐马尔科夫模型 python 实现简单拼音输入法

关键时刻,第一时间送达! ? 在网上看到一篇关于隐马尔科夫模型的介绍,觉得简直不能再神奇,又在网上找到大神的一篇关于如何用隐马尔可夫模型实现中文拼音输入的博客(...

3480
来自专栏漫漫深度学习路

MXNET学习笔记(一):Module类(1)

Module 是 mxnet 提供给用户的一个高级封装的类。有了它,我们可以很容易的来训练模型。 Module 包含以下单元的一个 wraper symbol ...

2735

扫码关注云+社区