本文是一个优化的NV12图像缩放程序。
有不同类型的图像缩放算法。它图像缩放算法的复杂性与图像质量损失和性能低下有关。我决定选择最简单的“最近邻居插值”和双线性插值,以调整NV12图像的大小。
NV12是一种YUV系列格式。在你阅读我的提示之前。你需要对格式有一些基本的概念。并且知道什么是插值缩放算法。
如果您之前厌倦了RGBA格式的图像比例,您会更容易理解我的程序是如何工作的。
内存中的NV12格式图像阵列类似于:YYYYYYYY…UVUV…NV12是一种平面格式。它也被称为YUV420sp。有三架飞机:
因此’宽度高度 3 / 2’是图像的总内存长度。以下是更清晰的8*4分辨率示例: 逻辑视图:
显然,宽度 = 8,高度 = 4
每四个Y值匹配相同的U值和V值。
例如:
复制代码
srcX = dstX * (srcWidth / dstWidth), srcY = dstY * (srcHeight / dstHeight)
这个比例通常有一个小数点部分。该算法只需使用“四舍五入”,将源图像中最近的像素值存储在dest图像数组中。因此,效果不会很大,通常会有一些严重的马赛克。
双线性插值同时使用小数部分和整数,根据四个像素计算最终像素值。小数部分用作加权值。它去除了锋利和马赛克。
复制代码
input: src_nv12_array, src_width, src_height,dest_width,dest_height
output: dst_nv12_array
这是一个C版优化程序。
C++
收缩▲复制代码
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/stat.h>
typedef unsigned char uint8_t;
/**
* @param src input nv12 raw data array
* @param dst output nv12 raw data result,
* the memory need to be allocated outside of the function
* @param srcWidth width of the input nv12 image
* @param srcHeight height of the input nv12 image
* @param dstWidth
* @param dstHeight
*/void nv12_nearest_scale(uint8_t* __restrict src, uint8_t* __restrict dst,
int srcWidth, int srcHeight, int dstWidth, int
dstHeight)//restrict keyword is for compiler to optimize program
{
register int sw = srcWidth;//register keyword is for local var to accelorate
register int sh = srcHeight;
register int dw = dstWidth;
register int dh = dstHeight;
register int y, x;
unsigned long int srcy, srcx, src_index, dst_index;
unsigned long int xrIntFloat_16 = (sw << 16) / dw + 1;//better than float division
unsigned long int yrIntFloat_16 = (sh << 16) / dh + 1;
uint8_t* dst_uv = dst + dh * dw;//memory start pointer of dest uv
uint8_t* src_uv = src + sh * sw;//memory start pointer of source uv
uint8_t* dst_uv_yScanline;
uint8_t* src_uv_yScanline;
uint8_t* dst_y_slice = dst;//memory start pointer of dest y
uint8_t* src_y_slice;
uint8_t* sp;
uint8_t* dp;
for (y = 0; y < (dh & ~7); ++y)//'dh & ~7' is to generate faster assembly code
{
srcy = (y * yrIntFloat_16) >> 16;
src_y_slice = src + srcy * sw;
if((y & 1) == 0)
{
dst_uv_yScanline = dst_uv + (y / 2) * dw;
src_uv_yScanline = src_uv + (srcy / 2) * sw;
}
for(x = 0; x < (dw & ~7); ++x)
{
srcx = (x * xrIntFloat_16) >> 16;
dst_y_slice[x] = src_y_slice[srcx];
if((y & 1) == 0)//y is even
{
if((x & 1) == 0)//x is even
{
src_index = (srcx / 2) * 2;
sp = dst_uv_yScanline + x;
dp = src_uv_yScanline + src_index;
*sp = *dp;
++sp;
++dp;
*sp = *dp;
}
}
}
dst_y_slice += dw;
}
}
void nv12_bilinear_scale (uint8_t* src, uint8_t* dst,
int srcWidth, int srcHeight, int dstWidth,int dstHeight)
{
int x, y;
int ox, oy;
int tmpx, tmpy;
int xratio = (srcWidth << 8)/dstWidth;
int yratio = (srcHeight << 8)/dstHeight;
uint8_t* dst_y = dst;
uint8_t* dst_uv = dst + dstHeight * dstWidth;
uint8_t* src_y = src;
uint8_t* src_uv = src + srcHeight * srcWidth;
uint8_t y_plane_color[2][2];
uint8_t u_plane_color[2][2];
uint8_t v_plane_color[2][2];
int j,i;
int size = srcWidth * srcHeight;
int offsetY;
int y_final, u_final, v_final;
int u_final1 = 0;
int v_final1 = 0;
int u_final2 = 0;
int v_final2 = 0;
int u_final3 = 0;
int v_final3 = 0;
int u_final4 = 0;
int v_final4 = 0;
int u_sum = 0;
int v_sum = 0;
tmpy = 0;
for (j = 0; j < (dstHeight & ~7); ++j)
{
//tmpy = j * yratio;
oy = tmpy >> 8;
y = tmpy & 0xFF;
tmpx = 0;
for (i = 0; i < (dstWidth & ~7); ++i)
{
// tmpx = i * xratio;
ox = tmpx >> 8;
x = tmpx & 0xFF;
offsetY = oy * srcWidth;
//YYYYYYYYYYYYYYYY
y_plane_color[0][0] = src[ offsetY + ox ];
y_plane_color[1][0] = src[ offsetY + ox + 1 ];
y_plane_color[0][1] = src[ offsetY + srcWidth + ox ];
y_plane_color[1][1] = src[ offsetY + srcWidth + ox + 1 ];
int y_final = (0x100 - x) * (0x100 - y) * y_plane_color[0][0]
+ x * (0x100 - y) * y_plane_color[1][0]
+ (0x100 - x) * y * y_plane_color[0][1]
+ x * y * y_plane_color[1][1];
y_final = y_final >> 16;
if (y_final>255)
y_final = 255;
if (y_final<0)
y_final = 0;
dst_y[ j * dstWidth + i] = (uint8_t)y_final;//set Y in dest array
//UVUVUVUVUVUV
if((j & 1) == 0)//j is even
{
if((i & 1) == 0)//i is even
{
u_plane_color[0][0] = src[ size + offsetY + ox ];
u_plane_color[1][0] = src[ size + offsetY + ox ];
u_plane_color[0][1] = src[ size + offsetY + ox ];
u_plane_color[1][1] = src[ size + offsetY + ox ];
v_plane_color[0][0] = src[ size + offsetY + ox + 1];
v_plane_color[1][0] = src[ size + offsetY + ox + 1];
v_plane_color[0][1] = src[ size + offsetY + ox + 1];
v_plane_color[1][1] = src[ size + offsetY + ox + 1];
}
else//i is odd
{
u_plane_color[0][0] = src[ size + offsetY + ox - 1 ];
u_plane_color[1][0] = src[ size + offsetY + ox + 1 ];
u_plane_color[0][1] = src[ size + offsetY + ox - 1 ];
u_plane_color[1][1] = src[ size + offsetY + ox + 1 ];
v_plane_color[0][0] = src[ size + offsetY + ox ];
v_plane_color[1][0] = src[ size + offsetY + ox + 1 ];
v_plane_color[0][1] = src[ size + offsetY + ox ];
v_plane_color[1][1] = src[ size + offsetY + ox + 1 ];
}
}
else// j is odd
{
if((i & 1) == 0)//i is even
{
u_plane_color[0][0] = src[ size + offsetY + ox ];
u_plane_color[1][0] = src[ size + offsetY + ox ];
u_plane_color[0][1] = src[ size + offsetY + srcWidth + ox ];
u_plane_color[1][1] = src[ size + offsetY + srcWidth + ox ];
v_plane_color[0][0] = src[ size + offsetY + ox + 1];
v_plane_color[1][0] = src[ size + offsetY + ox + 1];
v_plane_color[0][1] = src[ size + offsetY + srcWidth + ox + 1];
v_plane_color[1][1] = src[ size + offsetY + srcWidth + ox + 1];
}
else//i is odd
{
u_plane_color[0][0] = src[ size + offsetY + ox - 1 ];
u_plane_color[1][0] = src[ size + offsetY + srcWidth + ox - 1 ];
u_plane_color[0][1] = src[ size + offsetY + ox + 1];
u_plane_color[1][1] = src[ size + offsetY + srcWidth + ox + 1];
v_plane_color[0][0] = src[ size + offsetY + ox ];
v_plane_color[1][0] = src[ size + offsetY + srcWidth + ox ];
v_plane_color[0][1] = src[ size + offsetY + ox + 2 ];
v_plane_color[1][1] = src[ size + offsetY + srcWidth + ox + 2 ];
}
}
int u_final = (0x100 - x) * (0x100 - y) * u_plane_color[0][0]
+ x * (0x100 - y) * u_plane_color[1][0]
+ (0x100 - x) * y * u_plane_color[0][1]
+ x * y * u_plane_color[1][1];
u_final = u_final >> 16;
int v_final = (0x100 - x) * (0x100 - y) * v_plane_color[0][0]
+ x * (0x100 - y) * v_plane_color[1][0]
+ (0x100 - x) * y * v_plane_color[0][1]
+ x * y * v_plane_color[1][1];
v_final = v_final >> 16;
if((j & 1) == 0)
{
if((i & 1) == 0)
{
//set U in dest array
dst_uv[(j / 2) * dstWidth + i ] = (uint8_t)(u_sum / 4);
//set V in dest array
dst_uv[(j / 2) * dstWidth + i + 1] = (uint8_t)(v_sum / 4);
u_sum = 0;
v_sum = 0;
}
}
else
{
u_sum += u_final;
v_sum += v_final;
}
tmpx += xratio;
}
tmpy += yratio;
}
}
int ImageResize(uint8_t * src, uint8_t* dst, int sw,
int sh,int dw,int dh)
{
if( (src == NULL) || (dst == NULL) || (0 == dw) || (0 == dh) ||
(0 == sw) || (0 == sh))
{
printf("params error\n");
return -1;
}
nv12_nearest_scale(src, dst, sw, sh, dw, dh);
//nv12_bilinear_scale(src, dst, sw, sh, dw, dh);
//greyscale(src, dst, sw, sh, dw, dh);
return 0;
}
int main(int argc,char**argv)
{
if(argc!=7)
{
printf("Input Error!\n");
printf("Usage : <Input NV12file> <Output NV12file>
<sw><sh> <dw> <dh>");
return 0;
}
FILE *inputfp = NULL;
FILE *outputfp = NULL;
inputfp = fopen(argv[1], "rb");
if (!inputfp)
{
fprintf(stderr, "fopen failed for input file[%s]\n",argv[1]);
return -1;
}
outputfp = fopen(argv[2], "wb");
if (!outputfp)
{
fprintf(stderr, "fopen failed for output file[%s]\n",argv[2]);
return -1;
}
int sw = atoi(argv[3]);
int sh = atoi(argv[4]);
int dw = atoi(argv[5]);
int dh = atoi(argv[6]);
if(sw <= 0 || sh <= 0 || dw <= 0 || dh <=0)
{
fprintf(stderr, "parameter error [sw= %d,sh= %d,dw= %d,dh= %d]\n",sw,sh,dw,dh);
return -1;
}
int inPixels = sw * sh * 3/2;
int outPixels = dw * dh * 3/2;
uint8_t* pInBuffer = (uint8_t*)malloc(inPixels);
fread(pInBuffer,1,inPixels,inputfp);
uint8_t* pOutBuffer = (uint8_t*)malloc(outPixels);
ImageResize(pInBuffer,pOutBuffer,sw,sh,dw,dh);
//compute frame per second
int i = 0;
clock_t start = clock();
for(;i<1000;++i)
{
ImageResize(pInBuffer,pOutBuffer,1536,1088,1024,600);//can change to be any resolution
}
clock_t finish = clock();
float duration = (float)(finish-start)/CLOCKS_PER_SEC;
float fps = 1000 / duration;
printf("nv12Scaling:%d*%d-->%d*%d,time cost:%6.2ffps\n",sw,sh,dw,dh,fps);
fwrite(pOutBuffer, 1 , outPixels, outputfp);
free(pInBuffer);
free(pOutBuffer);
fclose(inputfp);
fclose(outputfp);
pInBuffer = NULL;
pOutBuffer = NULL;
inputfp = NULL;
outputfp = NULL;
return 0;
}
此外,我想使用ARM汇编语言来优化我的程序。也许是Android项目的NEON矢量化程序集。或者只是使用不同CPU的不同属性。但有时它需要巨大的更改(包括重新设计算法),同时将C代码转换为汇编代码。这取决于CPU的功能。
Resizing NV12 image using Nearest Neighbor Interpolation and Bilinear Interpolation algorithms