前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >OCR -- 训练数据扩增的方法

OCR -- 训练数据扩增的方法

作者头像
MachineLP
发布2019-05-26 15:30:13
1.1K0
发布2019-05-26 15:30:13
举报
文章被收录于专栏:小鹏的专栏小鹏的专栏

结交了一些大神级的人物, 每次交流都有很多收获,感谢一路相伴,感谢带我一起成长。

(1)透视变换

(2)gauss_blur

(3)norm_blur

(4)模糊图像,模拟小图片放大的效果

(5)颜色翻转、滤波等等

具体代码实现如下:

(1)透视变换 (具体原理可查看:仿射变换,透视变换:二维坐标到二维坐标之间的线性变换,可用于landmark人脸矫正

代码语言:javascript
复制
#!/usr/env/bin python3
from functools import reduce
import numpy as np
import cv2
import math
import random


# http://planning.cs.uiuc.edu/node102.html
def get_rotate_matrix(x, y, z):
    """
    按照 zyx 的顺序旋转,输入角度单位为 degrees, 均为顺时针旋转
    :param x: X-axis
    :param y: Y-axis
    :param z: Z-axis
    :return:
    """
    x = math.radians(x)
    y = math.radians(y)
    z = math.radians(z)

    c, s = math.cos(y), math.sin(y)
    M_y = np.matrix([[c, 0., s, 0.],
                     [0., 1., 0., 0.],
                     [-s, 0., c, 0.],
                     [0., 0., 0., 1.]])

    c, s = math.cos(x), math.sin(x)
    M_x = np.matrix([[1., 0., 0., 0.],
                     [0., c, -s, 0.],
                     [0., s, c, 0.],
                     [0., 0., 0., 1.]])

    c, s = math.cos(z), math.sin(z)
    M_z = np.matrix([[c, -s, 0., 0.],
                     [s, c, 0., 0.],
                     [0., 0., 1., 0.],
                     [0., 0., 0., 1.]])

    return M_x * M_y * M_z


def cliped_rand_norm(mu=0, sigma3=1):
    """
    :param mu: 均值
    :param sigma3: 3 倍标准差, 99% 的数据落在 (mu-3*sigma, mu+3*sigma)
    :return:
    """
    # 标准差
    sigma = sigma3 / 3
    dst = sigma * np.random.randn() + mu
    dst = np.clip(dst, 0 - sigma3, sigma3)
    return dst


def warpPerspective(src, M33, sl, gpu):
    if gpu:
        from libs.gpu.GpuWrapper import cudaWarpPerspectiveWrapper
        dst = cudaWarpPerspectiveWrapper(src.astype(np.uint8), M33, (sl, sl), cv2.INTER_CUBIC)
    else:
        dst = cv2.warpPerspective(src, M33, (sl, sl), flags=cv2.INTER_CUBIC)
    return dst


# https://stackoverflow.com/questions/17087446/how-to-calculate-perspective-transform-for-opencv-from-rotation-angles
# https://nbviewer.jupyter.org/github/manisoftwartist/perspectiveproj/blob/master/perspective.ipynb
# http://planning.cs.uiuc.edu/node102.html
class PerspectiveTransform(object):
    def __init__(self, x, y, z, scale, fovy):
        self.x = x
        self.y = y
        self.z = z
        self.scale = scale
        self.fovy = fovy

    def transform_image(self, src, gpu=False):
        if len(src.shape) > 2:
            H, W, C = src.shape
        else:
            H, W = src.shape

        M33, sl, _, ptsOut = self.get_warp_matrix(W, H, self.x, self.y, self.z, self.scale, self.fovy)
        sl = int(sl)

        dst = warpPerspective(src, M33, sl, gpu)

        return dst, M33, ptsOut

    def transform_pnts(self, pnts, M33):
        """
        :param pnts: 2D pnts, left-top, right-top, right-bottom, left-bottom
        :param M33: output from transform_image()
        :return: 2D pnts apply perspective transform
        """
        pnts = np.asarray(pnts, dtype=np.float32)
        pnts = np.array([pnts])
        dst_pnts = cv2.perspectiveTransform(pnts, M33)[0]

        return dst_pnts

    def get_warped_pnts(self, ptsIn, ptsOut, W, H, sidelength):
        ptsIn2D = ptsIn[0, :]
        ptsOut2D = ptsOut[0, :]
        ptsOut2Dlist = []
        ptsIn2Dlist = []

        for i in range(0, 4):
            ptsOut2Dlist.append([ptsOut2D[i, 0], ptsOut2D[i, 1]])
            ptsIn2Dlist.append([ptsIn2D[i, 0], ptsIn2D[i, 1]])

        pin = np.array(ptsIn2Dlist) + [W / 2., H / 2.]
        pout = (np.array(ptsOut2Dlist) + [1., 1.]) * (0.5 * sidelength)
        pin = pin.astype(np.float32)
        pout = pout.astype(np.float32)

        return pin, pout

    def get_warp_matrix(self, W, H, x, y, z, scale, fV):
        fVhalf = np.deg2rad(fV / 2.)
        d = np.sqrt(W * W + H * H)
        sideLength = scale * d / np.cos(fVhalf)
        h = d / (2.0 * np.sin(fVhalf))
        n = h - (d / 2.0)
        f = h + (d / 2.0)

        # Translation along Z-axis by -h
        T = np.eye(4, 4)
        T[2, 3] = -h

        # Rotation matrices around x,y,z
        R = get_rotate_matrix(x, y, z)

        # Projection Matrix
        P = np.eye(4, 4)
        P[0, 0] = 1.0 / np.tan(fVhalf)
        P[1, 1] = P[0, 0]
        P[2, 2] = -(f + n) / (f - n)
        P[2, 3] = -(2.0 * f * n) / (f - n)
        P[3, 2] = -1.0

        # pythonic matrix multiplication
        M44 = reduce(lambda x, y: np.matmul(x, y), [P, T, R])

        # shape should be 1,4,3 for ptsIn and ptsOut since perspectiveTransform() expects data in this way.
        # In C++, this can be achieved by Mat ptsIn(1,4,CV_64FC3);
        ptsIn = np.array([[
            [-W / 2., H / 2., 0.],
            [W / 2., H / 2., 0.],
            [W / 2., -H / 2., 0.],
            [-W / 2., -H / 2., 0.]
        ]])
        ptsOut = cv2.perspectiveTransform(ptsIn, M44)

        ptsInPt2f, ptsOutPt2f = self.get_warped_pnts(ptsIn, ptsOut, W, H, sideLength)

        # check float32 otherwise OpenCV throws an error
        assert (ptsInPt2f.dtype == np.float32)
        assert (ptsOutPt2f.dtype == np.float32)
        M33 = cv2.getPerspectiveTransform(ptsInPt2f, ptsOutPt2f).astype(np.float32)

        return M33, sideLength, ptsInPt2f, ptsOutPt2f

def apply_perspective_transform(img, text_box_pnts, max_x, max_y, max_z, gpu=False):
    """
    Apply perspective transform on image
    :param img: origin numpy image
    :param text_box_pnts: four corner points of text
    :param x: max rotate angle around X-axis
    :param y: max rotate angle around Y-axis
    :param z: max rotate angle around Z-axis
    :return:
        dst_img:
        dst_img_pnts: points of whole word image after apply perspective transform
        dst_text_pnts: points of text after apply perspective transform
    """

    x = math_utils.cliped_rand_norm(0, max_x)
    y = math_utils.cliped_rand_norm(0, max_y)
    z = math_utils.cliped_rand_norm(0, max_z)

    # print("x: %f, y: %f, z: %f" % (x, y, z))

    transformer = PerspectiveTransform(x, y, z, scale=1.0, fovy=50)

    dst_img, M33, dst_img_pnts = transformer.transform_image(img, gpu)
    dst_text_pnts = transformer.transform_pnts(text_box_pnts, M33)

    return dst_img, dst_img_pnts, dst_text_pnts

(2)gauss_blur

代码语言:javascript
复制
def apply_gauss_blur(img, ks=None):
    if ks is None:
        ks = [7, 9, 11, 13]
    ksize = random.choice(ks)

    sigmas = [0, 1, 2, 3, 4, 5, 6, 7]
    sigma = 0
    if ksize <= 3:
        sigma = random.choice(sigmas)
    img = cv2.GaussianBlur(img, (ksize, ksize), sigma)
    return img

(3)norm_blur

代码语言:javascript
复制
def apply_norm_blur(img, ks=None):
    # kernel == 1, the output image will be the same
    if ks is None:
        ks = [2, 3]
    kernel = random.choice(ks)
    img = cv2.blur(img, (kernel, kernel))
    return img

(4)模糊图像,模拟小图片放大的效果

代码语言:javascript
复制
def apply_prydown(img):
    """
    模糊图像,模拟小图片放大的效果
    """
    scale = random.uniform(1, self.cfg.prydown.max_scale)
    height = img.shape[0]
    width = img.shape[1]

    out = cv2.resize(img, (int(width / scale), int(height / scale)), interpolation=cv2.INTER_AREA)
    return cv2.resize(out, (width, height), interpolation=cv2.INTER_AREA)

(5)颜色翻转、滤波等等

代码语言:javascript
复制
def reverse_img(word_img):
    offset = np.random.randint(-10, 10)
    return 255 + offset - word_img


def apply_emboss(word_img):
    emboss_kernal = np.array([
        [-2, -1, 0],
        [-1, 1, 1],
        [0, 1, 2]
    ])
    return cv2.filter2D(word_img, -1, emboss_kernal)

def apply_sharp(word_img):
    sharp_kernel = np.array([
        [-1, -1, -1],
        [-1, 9, -1],
        [-1, -1, -1]
    ])
    return cv2.filter2D(word_img, -1, sharp_kernel)
本文参与 腾讯云自媒体分享计划,分享自作者个人站点/博客。
原始发表:2018年10月27日,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档