DSP双线性差值代码优化

来源:本站
导读:目前正在解读《DSP双线性差值代码优化》的相关信息,《DSP双线性差值代码优化》是由用户自行发布的知识型内容!下面请观看由(电工技术网 - www.9ddd.net)用户发布《DSP双线性差值代码优化》的详细说明。
简介:此文为DSP双线性差值代码优化前后的程序。

<code>

代码原型,不是我写的。只是拿来copy。

static LA_bool xxx(unsigned char *src, int Width, int Height, short *table1, short *table2,

int dstWidth, int dstHeight, int nchanner, unsigned char *dst, Rect_S stRect)

{

int sx, sy;

int i, j;

int stepSrc = Width * nchanner;

int stepDstMapxy = dstWidth;

int stepDstMapCoef = dstWidth * 2;

short cof00, cof01, cof10, cof11;

int offset1, offset2;

//int r, g, b;

int dstoff = 0;

int coflinestart;

int dstlinestart;

int *xy_tab = (int *)table1;

int *cof_tab = (int *)table2;

int xyval = 0;

int xyoff, coff;

int cofval1, cofval2;

unsigned char r00, g00, b00, r01, g01, b01, r10, g10, b10, r11, g11, b11;

char *p1, *p2;

int rgb0, rgb1, rgb2, rgb3;

unsigned char *pDtmp;

for (j = stRect.top; j <stRect.bottom; ++j)

{

coflinestart = j*stepDstMapCoef;

dstlinestart = j*dstWidth * 3;

pDtmp = dst + dstlinestart + stRect.left * 3;

for (i = stRect.left; i <stRect.right; ++i)

{

xyoff = j*stepDstMapxy + i;

xyval = *(xy_tab + xyoff);

sy = (xyval>> 16) & 0x0000ffff;

sx = xyval & 0x0000ffff;

coff = coflinestart + (i <<1);

cofval1 = *(cof_tab + coff);

cofval2 = *(cof_tab + 1 + coff);

cof01 = (cofval1>> 16) & 0x0000ffff;

cof00 = cofval1 & 0x0000ffff;

cof11 = (cofval2>> 16) & 0x0000ffff;

cof10 = cofval2 & 0x0000ffff;

offset1 = (sy * stepSrc + sx*nchanner);

offset2 = offset1 + stepSrc;

p1 = (char *)(src + offset1);

p2 = (char *)(src + offset2);

r00 = *p1++;

g00 = *p1++;

b00 = *p1++;

r01 = *p1++;

g01 = *p1++;

b01 = *p1++;

r10 = *p2++;

g10 = *p2++;

b10 = *p2++;

r11 = *p2++;

g11 = *p2++;

b11 = *p2++;

*(pDtmp++) = (unsigned char)((r00 * cof00 + r10 * cof01 + r01 * cof10 + r11 * cof11)>> BITOFF);

*(pDtmp++) = (unsigned char)((g00 * cof00 + g10 * cof01 + g01 * cof10 + g11 * cof11)>> BITOFF);

*(pDtmp++) = (unsigned char)((b00 * cof00 + b10 * cof01 + b01 * cof10 + b11 * cof11)>> BITOFF);

}

}

return 1;

}

</code>

==== 优化后代码 ====

<code>

static int ImgIn_w = 800;

static int ImgIn_h = 600;

static int Img_w = 600;

static int Img_h = 800;

static int PixelSize = 2;

static void Load_Dot16_YUV422(int stRect_left, int stRect_right, int stRect_top, int stRect_bottom, int ImageIndex, int ImageOutIndex)

{

unsigned int src0 = pImgS1In;

unsigned int dst0 = pImgS1Out;

unsigned int xy_tab0 = pParam_Loadx4XY;

unsigned int cof_tab0 = pParam_Dot16Cof;

int Index_i, Index_j;

unsigned char* src;

unsigned char * src1;

register tu32 XY_tab_Addr;

unsigned long long *pXY_tab;

int i_LoopNum;

int j_offset;

unsigned long long offset_12;

int offset_load_1;

int offset_load_1_is;

unsigned long long data_load_A_1;

unsigned long long data_load_B_1;

int offset_load_2;

int offset_load_2_is;

unsigned long long data_load_A_2;

unsigned long long data_load_B_2;

unsigned long long *pCof_tab;

unsigned long long *pCof_tab1;

unsigned long long Cof_ABCD_1;

unsigned long long Cof_ABCD_2;

unsigned long long Data64_AND_00FF = 0x00FF00FF00FF00FF;

unsigned int Data32_7654_A_1;

unsigned int Data32_7654_B_1;

unsigned int Data32_3210_A_2;

unsigned int Data32_3210_B_2;

unsigned long long Data64_DP2_76765454_1;

unsigned long long Data64_DP2_32321010_2;

unsigned long long Data64_DPH4_75753131_1;

unsigned long long Data64_DPH4_75753131_2;

unsigned long long Data64_MV55_75753131_1;

unsigned long long Data64_MV33_75753131_2;

unsigned long long Data64_SHFU_75753131_1;

unsigned long long Data64_SHFU_75753131_2;

unsigned int Data32_7575_1;

unsigned int Data32_3131_1;

unsigned int Data32_7575_2;

unsigned int Data32_3131_2;

int Shift_Num_1;

int Shift_Num_2;

//AB

//CD

unsigned long long Data64_Y0_0B0D0A0C;

unsigned long long Data64_Y1_0B0D0A0C;

unsigned long long Data64_U0_0B0D0A0C;

unsigned long long Data64_V1_0B0D0A0C;

__x128_t D128_Y1Y0_0B0D0A0C;

__x128_t D128_V1U0_0B0D0A0C;

__x128_t D128_C1C0_0B0D0A0C;

unsigned long long D64_Y1Y0_Dot16;

unsigned long long D64_V1U0_Dot16;

unsigned long long D64_Y1Y0_SHRU;

unsigned long long D64_V1U0_SHRU;

unsigned long long D64_V1_Y1_U0_Y0;

unsigned int D32_V1_Y1;

unsigned int D32_U0_Y0;

unsigned int D32_V1Y1U0Y0;

src = src0;

src1 = src0 + ImgIn_w * PixelSize;

//图层偏址

xy_tab0+=Img_h*Img_w*4*ImageIndex;

cof_tab0+=Img_h*Img_w*(1<<3)*ImageIndex;

dst0 +=Img_h*Img_w*2*ImageOutIndex;

//windows偏址

{

j_offset = stRect_top*Img_w;

xy_tab0 = xy_tab0 + (j_offset<<2);

cof_tab0= cof_tab0 + (j_offset<<3);

dst0 = dst0 + (j_offset<<1);

}

//line偏址

{

j_offset = stRect_left;

xy_tab0 = xy_tab0 + (j_offset<<2);

cof_tab0= cof_tab0 + (j_offset<<3);

dst0 = dst0 + (j_offset<<1);

}

//行循环

i_LoopNum = stRect_right-stRect_left;

i_LoopNum = i_LoopNum>>1;

for (Index_j = stRect_top; Index_j <stRect_bottom; Index_j++)

{

//pre_init

XY_tab_Addr = xy_tab0;

pXY_tab = XY_tab_Addr;

xy_tab0+=(Img_w<<2);

pCof_tab = cof_tab0;

cof_tab0+=(Img_w<<3);

pDtmp = dst0;

dst0+=(Img_w<<1);

//pre_Loop

{

pCof_tab1 = &pCof_tab[1];

offset_12 = *pXY_tab++;//C1

offset_load_1 = _loll(offset_12);//C1

offset_load_2 = _hill(offset_12);//C1

data_load_A_1 = _mem8(src+offset_load_1);//C1

data_load_B_1 = _mem8(src1+offset_load_1);//C1

data_load_A_2 = _mem8(src+offset_load_2);//C1

data_load_B_2 = _mem8(src1+offset_load_2);//C1

offset_load_1_is = offset_load_1&2;//C1

offset_load_2_is = offset_load_2&2;//C1

offset_12 = *pXY_tab++;//C2

}

for (Index_i =0; Index_i <i_LoopNum+1; Index_i++)

{

Data32_7654_A_1 = _hill(data_load_A_1);

Data32_7654_B_1 = _hill(data_load_B_1);

Data64_DP2_76765454_1 = _dpack2(Data32_7654_B_1, Data32_7654_A_1);

Data32_3210_A_2 = _loll(data_load_A_2);

Data32_3210_B_2 = _loll(data_load_B_2);

Data64_DP2_32321010_2 = _dpack2(Data32_3210_B_2, Data32_3210_A_2);

Data64_DPH4_75753131_1 = _dpackh4(data_load_B_1, data_load_A_1); //76543210 76543210>7575 3131

Data64_DPH4_75753131_2 = _dpackh4(data_load_B_2, data_load_A_2); //76543210 76543210>7575 3131

Data32_7575_1 = _hill(Data64_DPH4_75753131_1);

Data32_3131_1 = _loll(Data64_DPH4_75753131_1);

Shift_Num_1 = 8;

if(offset_load_1_is==0)

{

Data32_3131_1 = Data32_7575_1;

Shift_Num_1 = 0;

}

Data64_MV55_75753131_1 = _itoll(Data32_7575_1, Data32_3131_1);

Data64_SHFU_75753131_1 = _dshru(Data64_MV55_75753131_1, Shift_Num_1);

Data32_7575_2 = _hill(Data64_DPH4_75753131_2);

Data32_3131_2 = _loll(Data64_DPH4_75753131_2);

Shift_Num_2 = 0;

if(offset_load_2_is==0)

{

Data32_7575_2 = Data32_3131_2;

Shift_Num_2 = 8;

}

Data64_MV33_75753131_2 = _itoll(Data32_7575_2, Data32_3131_2);

Data64_SHFU_75753131_2 = _dshru(Data64_MV33_75753131_2, Shift_Num_2);

Data64_Y0_0B0D0A0C = Data64_DP2_76765454_1 & Data64_AND_00FF;

Data64_Y1_0B0D0A0C = Data64_DP2_32321010_2 & Data64_AND_00FF;

Data64_U0_0B0D0A0C = Data64_SHFU_75753131_1 & Data64_AND_00FF;

Data64_V1_0B0D0A0C = Data64_SHFU_75753131_2 & Data64_AND_00FF;

//这里是循环优化begin

offset_load_1 = _loll(offset_12);//C2

offset_load_2 = _hill(offset_12);//C2

data_load_A_1 = _mem8(src+offset_load_1);//C2

data_load_B_1 = _mem8(src1+offset_load_1);//C2

data_load_A_2 = _mem8(src+offset_load_2);//C2

data_load_B_2 = _mem8(src1+offset_load_2);//C2

offset_load_1_is = offset_load_1&2;//C2

offset_load_2_is = offset_load_2&2;//C2

offset_12 = *pXY_tab++;//C3

//这里是循环优化end

//-----------------

Cof_ABCD_1 = *pCof_tab;pCof_tab+=2;

Cof_ABCD_2 = *pCof_tab1;pCof_tab1+=2;

D128_C1C0_0B0D0A0C = _llto128(Cof_ABCD_2, Cof_ABCD_1);

D128_Y1Y0_0B0D0A0C = _llto128(Data64_Y1_0B0D0A0C, Data64_Y0_0B0D0A0C);

D128_V1U0_0B0D0A0C = _llto128(Data64_V1_0B0D0A0C, Data64_U0_0B0D0A0C);

D64_Y1Y0_SHRU = _dshr(D64_Y1Y0_Dot16, BITOFF);//dot延迟,流水线输出超越处理

D64_V1U0_SHRU = _dshr(D64_V1U0_Dot16, BITOFF);

D64_Y1Y0_Dot16 = _ddotpsu4h(D128_Y1Y0_0B0D0A0C, D128_C1C0_0B0D0A0C);

D64_V1U0_Dot16 = _ddotpsu4h(D128_V1U0_0B0D0A0C, D128_C1C0_0B0D0A0C);

D64_V1_Y1_U0_Y0 = _dpackl4(D64_V1U0_SHRU, D64_Y1Y0_SHRU);

D32_V1_Y1 = _hill(D64_V1_Y1_U0_Y0);

D32_U0_Y0 = _loll(D64_V1_Y1_U0_Y0);

D32_V1Y1U0Y0 = _packl4(D32_V1_Y1, D32_U0_Y0);

if(Index_i)

{

*pDtmp++ = D32_V1Y1U0Y0;

}

//-----------------

}

}

return ;

}

</code>

提醒:《DSP双线性差值代码优化》最后刷新时间 2024-03-14 00:56:30,本站为公益型个人网站,仅供个人学习和记录信息,不进行任何商业性质的盈利。如果内容、图片资源失效或内容涉及侵权,请反馈至,我们会及时处理。本站只保证内容的可读性,无法保证真实性,《DSP双线性差值代码优化》该内容的真实性请自行鉴别。