NV12最近的邻居插值缩放和双线性插值缩放

Abalone

修改于 2022-07-15 02:32:44

2K0

修改于 2022-07-15 02:32:44

文章被收录于专栏：影像技术栈影像技术栈

导言

本文是一个优化的NV12图像缩放程序。

有不同类型的图像缩放算法。它图像缩放算法的复杂性与图像质量损失和性能低下有关。我决定选择最简单的“最近邻居插值”和双线性插值，以调整NV12图像的大小。

背景

NV12是一种YUV系列格式。在你阅读我的提示之前。你需要对格式有一些基本的概念。并且知道什么是插值缩放算法。

如果您之前厌倦了RGBA格式的图像比例，您会更容易理解我的程序是如何工作的。

NV12格式

内存中的NV12格式图像阵列类似于：YYYYYYYY…UVUV…NV12是一种平面格式。它也被称为YUV420sp。有三架飞机：

内存中Y平面的长度为“宽度*高度”。
内存中U或V平面的长度为“宽度*高度/4”。
U和V是交错的。
如果丢弃U和V平面，Y平面是灰色值

因此’宽度高度 3 / 2’是图像的总内存长度。以下是更清晰的8*4分辨率示例：逻辑视图：

显然，宽度 = 8，高度 = 4

ylen = 84，ulen = 84/4，vlen = 8*4/4。
total_length = ylen + ulen + vlen = ylen * 3 / 2

每四个Y值匹配相同的U值和V值。

例如：

Y00 Y01 Y10 Y11 份额 U00 和 V00
Y20 Y21 Y30 Y31共享U10和V10

算法

最近的插值

复制代码

srcX = dstX * (srcWidth / dstWidth), srcY = dstY * (srcHeight / dstHeight)

这个比例通常有一个小数点部分。该算法只需使用“四舍五入”，将源图像中最近的像素值存储在dest图像数组中。因此，效果不会很大，通常会有一些严重的马赛克。

双线性插值

双线性插值同时使用小数部分和整数，根据四个像素计算最终像素值。小数部分用作加权值。它去除了锋利和马赛克。

复制代码

input: src_nv12_array, src_width, src_height,dest_width,dest_height
output: dst_nv12_array

代码

这是一个C版优化程序。

限制和注册关键字
使用移位操作来优化浮标划分
将不相关的代码移出内部循环
最好使用循环中的逻辑运算，而不是算术运算

C++

收缩▲复制代码

#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/stat.h>

typedef unsigned char uint8_t;

/**
 * @param src input nv12 raw data array
 * @param dst output nv12 raw data result,
 * the memory need to be allocated outside of the function
 * @param srcWidth width of the input nv12 image
 * @param srcHeight height of the input nv12 image
 * @param dstWidth
 * @param dstHeight
 */void nv12_nearest_scale(uint8_t* __restrict src, uint8_t* __restrict dst,
                        int srcWidth, int srcHeight, int dstWidth, int
dstHeight)//restrict keyword is for compiler to optimize program
{
    register int sw = srcWidth;//register keyword is for local var to accelorate
    register int sh = srcHeight;
    register int dw = dstWidth;
    register int dh = dstHeight;
    register int y, x;
    unsigned long int srcy, srcx, src_index, dst_index;
    unsigned long int xrIntFloat_16 = (sw << 16) / dw + 1;//better than float division
    unsigned long int yrIntFloat_16 = (sh << 16) / dh + 1;

    uint8_t* dst_uv = dst + dh * dw;//memory start pointer of dest uv
    uint8_t* src_uv = src + sh * sw;//memory start pointer of source uv
    uint8_t* dst_uv_yScanline;
    uint8_t* src_uv_yScanline;
    uint8_t* dst_y_slice = dst;//memory start pointer of dest y
    uint8_t* src_y_slice;
    uint8_t* sp;
    uint8_t* dp;

    for (y = 0; y < (dh & ~7); ++y)//'dh & ~7' is to generate faster assembly code
    {
        srcy = (y * yrIntFloat_16) >> 16;
        src_y_slice = src + srcy * sw;

        if((y & 1) == 0)
        {
            dst_uv_yScanline = dst_uv + (y / 2) * dw;
            src_uv_yScanline = src_uv + (srcy / 2) * sw;
        }

        for(x = 0; x < (dw & ~7); ++x)
        {
            srcx = (x * xrIntFloat_16) >> 16;
            dst_y_slice[x] = src_y_slice[srcx];

            if((y & 1) == 0)//y is even
            {
                if((x & 1) == 0)//x is even
                {
                    src_index = (srcx / 2) * 2;

                    sp = dst_uv_yScanline + x;
                    dp = src_uv_yScanline + src_index;
                    *sp = *dp;
                    ++sp;
                    ++dp;
                    *sp = *dp;
                }
             }
         }
         dst_y_slice += dw;
    }
}

void nv12_bilinear_scale (uint8_t* src, uint8_t* dst,
        int srcWidth, int srcHeight, int dstWidth,int dstHeight)
{
    int x, y;
    int ox, oy;
    int tmpx, tmpy;
    int xratio = (srcWidth << 8)/dstWidth;
    int yratio = (srcHeight << 8)/dstHeight;
    uint8_t* dst_y = dst;
    uint8_t* dst_uv = dst + dstHeight * dstWidth;
    uint8_t* src_y = src;
    uint8_t* src_uv = src + srcHeight * srcWidth;

    uint8_t y_plane_color[2][2];
    uint8_t u_plane_color[2][2];
    uint8_t v_plane_color[2][2];
    int j,i;
    int size = srcWidth * srcHeight;
    int offsetY;
    int y_final, u_final, v_final;
    int u_final1 = 0;
    int v_final1 = 0;
    int u_final2 = 0;
    int v_final2 = 0;
    int u_final3 = 0;
    int v_final3 = 0;
    int u_final4 = 0;
    int v_final4 = 0;
    int u_sum = 0;
    int v_sum = 0;

    tmpy = 0;
    for (j = 0; j < (dstHeight & ~7); ++j)
    {
//tmpy = j * yratio;
    oy = tmpy >> 8;
    y = tmpy & 0xFF;

    tmpx = 0;
    for (i = 0; i < (dstWidth & ~7); ++i)
    {
// tmpx = i * xratio;
        ox = tmpx >> 8;
        x = tmpx & 0xFF;

        offsetY = oy * srcWidth;
//YYYYYYYYYYYYYYYY
        y_plane_color[0][0] = src[ offsetY + ox ];
        y_plane_color[1][0] = src[ offsetY + ox + 1 ];
        y_plane_color[0][1] = src[ offsetY + srcWidth + ox ];
        y_plane_color[1][1] = src[ offsetY + srcWidth + ox + 1 ];

        int y_final = (0x100 - x) * (0x100 - y) * y_plane_color[0][0]
            + x * (0x100 - y) * y_plane_color[1][0]
            + (0x100 - x) * y * y_plane_color[0][1]
            + x * y * y_plane_color[1][1];
        y_final = y_final >> 16;
        if (y_final>255)
            y_final = 255;
        if (y_final<0)
            y_final = 0;
        dst_y[ j * dstWidth + i] = (uint8_t)y_final;//set Y in dest array
//UVUVUVUVUVUV
        if((j & 1) == 0)//j is even
        {
            if((i & 1) == 0)//i is even
            {
                u_plane_color[0][0] = src[ size + offsetY + ox ];
                u_plane_color[1][0] = src[ size + offsetY + ox ];
                u_plane_color[0][1] = src[ size + offsetY + ox ];
                u_plane_color[1][1] = src[ size + offsetY + ox ];

                v_plane_color[0][0] = src[ size + offsetY + ox + 1];
                v_plane_color[1][0] = src[ size + offsetY + ox + 1];
                v_plane_color[0][1] = src[ size + offsetY + ox + 1];
                v_plane_color[1][1] = src[ size + offsetY + ox + 1];
            }
            else//i is odd
            {
                u_plane_color[0][0] = src[ size + offsetY + ox - 1 ];
                u_plane_color[1][0] = src[ size + offsetY + ox + 1 ];
                u_plane_color[0][1] = src[ size + offsetY + ox - 1 ];
                u_plane_color[1][1] = src[ size + offsetY + ox + 1 ];

                v_plane_color[0][0] = src[ size + offsetY + ox ];
                v_plane_color[1][0] = src[ size + offsetY + ox + 1 ];
                v_plane_color[0][1] = src[ size + offsetY + ox ];
                v_plane_color[1][1] = src[ size + offsetY + ox + 1 ];
            }
        }
        else// j is odd
        {
            if((i & 1) == 0)//i is even
            {
                u_plane_color[0][0] = src[ size + offsetY + ox ];
                u_plane_color[1][0] = src[ size + offsetY + ox ];
                u_plane_color[0][1] = src[ size + offsetY + srcWidth + ox ];
                u_plane_color[1][1] = src[ size + offsetY + srcWidth + ox ];

                v_plane_color[0][0] = src[ size + offsetY + ox + 1];
                v_plane_color[1][0] = src[ size + offsetY + ox + 1];
                v_plane_color[0][1] = src[ size + offsetY + srcWidth + ox + 1];
                v_plane_color[1][1] = src[ size + offsetY + srcWidth + ox + 1];
            }
            else//i is odd
            {
                u_plane_color[0][0] = src[ size + offsetY + ox - 1 ];
                u_plane_color[1][0] = src[ size + offsetY + srcWidth + ox - 1 ];
                u_plane_color[0][1] = src[ size + offsetY + ox + 1];
                u_plane_color[1][1] = src[ size + offsetY + srcWidth + ox + 1];

                v_plane_color[0][0] = src[ size + offsetY + ox ];
                v_plane_color[1][0] = src[ size + offsetY + srcWidth + ox ];
                v_plane_color[0][1] = src[ size + offsetY + ox + 2 ];
                v_plane_color[1][1] = src[ size + offsetY + srcWidth + ox + 2 ];
            }
        }

       int u_final = (0x100 - x) * (0x100 - y) * u_plane_color[0][0]
                     + x * (0x100 - y) * u_plane_color[1][0]
                     + (0x100 - x) * y * u_plane_color[0][1]
                     + x * y * u_plane_color[1][1];
       u_final = u_final >> 16;

       int v_final = (0x100 - x) * (0x100 - y) * v_plane_color[0][0]
                      + x * (0x100 - y) * v_plane_color[1][0]
                      + (0x100 - x) * y * v_plane_color[0][1]
                      + x * y * v_plane_color[1][1];
       v_final = v_final >> 16;
       if((j & 1) == 0)
       {
           if((i & 1) == 0)
           {
//set U in dest array
               dst_uv[(j / 2) * dstWidth + i ] = (uint8_t)(u_sum / 4);
//set V in dest array
               dst_uv[(j / 2) * dstWidth + i + 1] = (uint8_t)(v_sum / 4);
               u_sum = 0;
               v_sum = 0;
           }
       }
       else
       {
           u_sum += u_final;
           v_sum += v_final;
       }
       tmpx += xratio;
    }
    tmpy += yratio;
    }
}

int ImageResize(uint8_t * src, uint8_t* dst, int sw,
        int sh,int dw,int dh)
{
    if( (src == NULL) || (dst == NULL) || (0 == dw) || (0 == dh) ||
            (0 == sw) || (0 == sh))
    {
        printf("params error\n");
        return -1;
    }
        nv12_nearest_scale(src, dst, sw, sh, dw, dh);
//nv12_bilinear_scale(src, dst, sw, sh, dw, dh);
//greyscale(src, dst, sw, sh, dw, dh);
    return 0;
}

int main(int argc,char**argv)
{
    if(argc!=7)
    {
        printf("Input Error!\n");
        printf("Usage :  <Input NV12file> <Output NV12file>
                <sw><sh> <dw> <dh>");
        return 0;
    }

    FILE *inputfp = NULL;
    FILE *outputfp = NULL;

    inputfp = fopen(argv[1], "rb");
    if (!inputfp)
    {
        fprintf(stderr, "fopen failed for input file[%s]\n",argv[1]);
        return -1;
    }

    outputfp = fopen(argv[2], "wb");

    if (!outputfp)
    {
        fprintf(stderr, "fopen failed for output file[%s]\n",argv[2]);
        return -1;
    }

    int sw = atoi(argv[3]);
    int sh = atoi(argv[4]);
    int dw = atoi(argv[5]);
    int dh = atoi(argv[6]);

    if(sw <= 0 || sh <= 0 || dw <= 0 || dh <=0)
    {
        fprintf(stderr, "parameter error [sw= %d,sh= %d,dw= %d,dh= %d]\n",sw,sh,dw,dh);
        return -1;
    }

    int inPixels = sw * sh * 3/2;
    int outPixels = dw * dh * 3/2;

    uint8_t* pInBuffer = (uint8_t*)malloc(inPixels);
    fread(pInBuffer,1,inPixels,inputfp);
    uint8_t* pOutBuffer = (uint8_t*)malloc(outPixels);

    ImageResize(pInBuffer,pOutBuffer,sw,sh,dw,dh);
//compute frame per second
    int i = 0;
    clock_t start = clock();

    for(;i<1000;++i)
    {
        ImageResize(pInBuffer,pOutBuffer,1536,1088,1024,600);//can change to be any resolution
    }
    clock_t finish = clock();
    float duration = (float)(finish-start)/CLOCKS_PER_SEC;
    float fps = 1000 / duration;
    printf("nv12Scaling:%d*%d-->%d*%d,time cost:%6.2ffps\n",sw,sh,dw,dh,fps);

    fwrite(pOutBuffer, 1 , outPixels, outputfp);

    free(pInBuffer);
    free(pOutBuffer);
    fclose(inputfp);
    fclose(outputfp);
    pInBuffer = NULL;
    pOutBuffer = NULL;
    inputfp = NULL;
    outputfp = NULL;
    return 0;
}

此外，我想使用ARM汇编语言来优化我的程序。也许是Android项目的NEON矢量化程序集。或者只是使用不同CPU的不同属性。但有时它需要巨大的更改（包括重新设计算法），同时将C代码转换为汇编代码。这取决于CPU的功能。

Resizing NV12 image using Nearest Neighbor Interpolation and Bilinear Interpolation algorithms

本文参与腾讯云自媒体同步曝光计划，分享自作者个人站点/博客。

原始发表：2022-06-19，如有侵权请联系 cloudcommunity@tencent.com 删除

编程算法