diff options
author | dcommander <dcommander@632fc199-4ca6-4c93-a231-07263d6284db> | 2015-01-13 10:00:12 +0000 |
---|---|---|
committer | dcommander <dcommander@632fc199-4ca6-4c93-a231-07263d6284db> | 2015-01-13 10:00:12 +0000 |
commit | 8f2bcc838c38e45c25a32e8d0323ff27d52fd065 (patch) | |
tree | e922ebe810d3854223260b2ae84ee68be85295bb | |
parent | 405be457966a4b8e08166ff2f8d99e703f007a0f (diff) |
Make the formatting and naming of variables and constants more consistent
git-svn-id: svn://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1496 632fc199-4ca6-4c93-a231-07263d6284db
-rw-r--r-- | simd/jccolext-altivec.c | 50 | ||||
-rw-r--r-- | simd/jcgryext-altivec.c | 30 | ||||
-rw-r--r-- | simd/jcsample-altivec.c | 104 | ||||
-rw-r--r-- | simd/jdcolext-altivec.c | 35 | ||||
-rw-r--r-- | simd/jdsample-altivec.c | 133 | ||||
-rw-r--r-- | simd/jfdctfst-altivec.c | 8 | ||||
-rw-r--r-- | simd/jidctfst-altivec.c | 31 | ||||
-rw-r--r-- | simd/jidctint-altivec.c | 41 | ||||
-rw-r--r-- | simd/jquanti-altivec.c | 34 |
9 files changed, 235 insertions, 231 deletions
diff --git a/simd/jccolext-altivec.c b/simd/jccolext-altivec.c index 0d50e55..84d314d 100644 --- a/simd/jccolext-altivec.c +++ b/simd/jccolext-altivec.c @@ -38,7 +38,7 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, __vector unsigned char rgb4 = {0}; #endif __vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3; - __vector unsigned short y01, y23, cr01, cr23, cb01, cb23; + __vector unsigned short yl, yh, crl, crh, cbl, cbh; __vector int y0, y1, y2, y3, cr0, cr1, cr2, cr3, cb0, cb1, cb2, cb3; /* Constants */ @@ -49,7 +49,7 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, __vector unsigned short pw_f050_f000 = { __4X2(F_0_500, 0) }; __vector int pd_onehalf = { __4X(ONE_HALF) }, pd_onehalfm1_cj = { __4X(ONE_HALF - 1 + (CENTERJSAMPLE << SCALEBITS)) }; - __vector unsigned char zero = { __16X(0) }, + __vector unsigned char pb_zero = { __16X(0) }, shift_pack_index = { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; @@ -168,14 +168,14 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, * NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't * support unsigned vectors. */ - rg0 = (__vector signed short)vec_mergeh(zero, rgbg0); - bg0 = (__vector signed short)vec_mergel(zero, rgbg0); - rg1 = (__vector signed short)vec_mergeh(zero, rgbg1); - bg1 = (__vector signed short)vec_mergel(zero, rgbg1); - rg2 = (__vector signed short)vec_mergeh(zero, rgbg2); - bg2 = (__vector signed short)vec_mergel(zero, rgbg2); - rg3 = (__vector signed short)vec_mergeh(zero, rgbg3); - bg3 = (__vector signed short)vec_mergel(zero, rgbg3); + rg0 = (__vector signed short)vec_mergeh(pb_zero, rgbg0); + bg0 = (__vector signed short)vec_mergel(pb_zero, rgbg0); + rg1 = (__vector signed short)vec_mergeh(pb_zero, rgbg1); + bg1 = (__vector signed short)vec_mergel(pb_zero, rgbg1); + rg2 = (__vector signed short)vec_mergeh(pb_zero, rgbg2); + bg2 = (__vector signed short)vec_mergel(pb_zero, rgbg2); + rg3 = (__vector signed short)vec_mergeh(pb_zero, rgbg3); + bg3 = (__vector signed short)vec_mergel(pb_zero, rgbg3); /* (Original) * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B @@ -203,11 +203,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, * descaling the 32-bit results (right-shifting by 16 bits) and then * packing them. */ - y01 = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1, - shift_pack_index); - y23 = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3, - shift_pack_index); - y = vec_pack(y01, y23); + yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1, + shift_pack_index); + yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3, + shift_pack_index); + y = vec_pack(yl, yh); vec_st(y, 0, outptr0); /* Calculate Cb values */ @@ -223,11 +223,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, (__vector unsigned int)cb2); cb3 = (__vector int)vec_msum((__vector unsigned short)bg3, pw_f050_f000, (__vector unsigned int)cb3); - cb01 = vec_perm((__vector unsigned short)cb0, - (__vector unsigned short)cb1, shift_pack_index); - cb23 = vec_perm((__vector unsigned short)cb2, - (__vector unsigned short)cb3, shift_pack_index); - cb = vec_pack(cb01, cb23); + cbl = vec_perm((__vector unsigned short)cb0, + (__vector unsigned short)cb1, shift_pack_index); + cbh = vec_perm((__vector unsigned short)cb2, + (__vector unsigned short)cb3, shift_pack_index); + cb = vec_pack(cbl, cbh); vec_st(cb, 0, outptr1); /* Calculate Cr values */ @@ -243,11 +243,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf, (__vector unsigned int)cr2); cr3 = (__vector int)vec_msum((__vector unsigned short)rg3, pw_f050_f000, (__vector unsigned int)cr3); - cr01 = vec_perm((__vector unsigned short)cr0, - (__vector unsigned short)cr1, shift_pack_index); - cr23 = vec_perm((__vector unsigned short)cr2, - (__vector unsigned short)cr3, shift_pack_index); - cr = vec_pack(cr01, cr23); + crl = vec_perm((__vector unsigned short)cr0, + (__vector unsigned short)cr1, shift_pack_index); + crh = vec_perm((__vector unsigned short)cr2, + (__vector unsigned short)cr3, shift_pack_index); + cr = vec_pack(crl, crh); vec_st(cr, 0, outptr2); } } diff --git a/simd/jcgryext-altivec.c b/simd/jcgryext-altivec.c index f198e00..46f5b73 100644 --- a/simd/jcgryext-altivec.c +++ b/simd/jcgryext-altivec.c @@ -39,14 +39,14 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width, __vector unsigned char rgb4 = {0}; #endif __vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3; - __vector unsigned short y01, y23; + __vector unsigned short yl, yh; __vector int y0, y1, y2, y3; /* Constants */ __vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) }, pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) }; __vector int pd_onehalf = { __4X(ONE_HALF) }; - __vector unsigned char zero = { __16X(0) }, + __vector unsigned char pb_zero = { __16X(0) }, shift_pack_index = { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; @@ -163,14 +163,14 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width, * NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't * support unsigned vectors. */ - rg0 = (__vector signed short)vec_mergeh(zero, rgbg0); - bg0 = (__vector signed short)vec_mergel(zero, rgbg0); - rg1 = (__vector signed short)vec_mergeh(zero, rgbg1); - bg1 = (__vector signed short)vec_mergel(zero, rgbg1); - rg2 = (__vector signed short)vec_mergeh(zero, rgbg2); - bg2 = (__vector signed short)vec_mergel(zero, rgbg2); - rg3 = (__vector signed short)vec_mergeh(zero, rgbg3); - bg3 = (__vector signed short)vec_mergel(zero, rgbg3); + rg0 = (__vector signed short)vec_mergeh(pb_zero, rgbg0); + bg0 = (__vector signed short)vec_mergel(pb_zero, rgbg0); + rg1 = (__vector signed short)vec_mergeh(pb_zero, rgbg1); + bg1 = (__vector signed short)vec_mergel(pb_zero, rgbg1); + rg2 = (__vector signed short)vec_mergeh(pb_zero, rgbg2); + bg2 = (__vector signed short)vec_mergel(pb_zero, rgbg2); + rg3 = (__vector signed short)vec_mergeh(pb_zero, rgbg3); + bg3 = (__vector signed short)vec_mergel(pb_zero, rgbg3); /* (Original) * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B @@ -194,11 +194,11 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width, * descaling the 32-bit results (right-shifting by 16 bits) and then * packing them. */ - y01 = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1, - shift_pack_index); - y23 = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3, - shift_pack_index); - y = vec_pack(y01, y23); + yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1, + shift_pack_index); + yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3, + shift_pack_index); + y = vec_pack(yl, yh); vec_st(y, 0, outptr); } } diff --git a/simd/jcsample-altivec.c b/simd/jcsample-altivec.c index f312870..62f72be 100644 --- a/simd/jcsample-altivec.c +++ b/simd/jcsample-altivec.c @@ -35,15 +35,16 @@ jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, int outrow, outcol; JDIMENSION output_cols = width_blocks * DCTSIZE; JSAMPROW inptr, outptr; - __vector unsigned char tmpa, tmpb, out; - __vector unsigned short tmpae, tmpao, tmpbe, tmpbo, outl, outh; + + __vector unsigned char this0, next0, out; + __vector unsigned short this0e, this0o, next0e, next0o, outl, outh; /* Constants */ - __vector unsigned short bias = { __4X2(0, 1) }, - one = { __8X(1) }; + __vector unsigned short pw_bias = { __4X2(0, 1) }, + pw_one = { __8X(1) }; __vector unsigned char even_odd_index = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }, - zero = { __16X(0) }; + pb_zero = { __16X(0) }; expand_right_edge(input_data, max_v_samp_factor, image_width, output_cols * 2); @@ -55,22 +56,22 @@ jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, for (outcol = output_cols; outcol > 0; outcol -= 16, inptr += 32, outptr += 16) { - tmpa = vec_ld(0, inptr); - tmpa = vec_perm(tmpa, tmpa, even_odd_index); - tmpae = (__vector unsigned short)vec_mergeh(zero, tmpa); - tmpao = (__vector unsigned short)vec_mergel(zero, tmpa); - outl = vec_add(tmpae, tmpao); - outl = vec_add(outl, bias); - outl = vec_sr(outl, one); + this0 = vec_ld(0, inptr); + this0 = vec_perm(this0, this0, even_odd_index); + this0e = (__vector unsigned short)vec_mergeh(pb_zero, this0); + this0o = (__vector unsigned short)vec_mergel(pb_zero, this0); + outl = vec_add(this0e, this0o); + outl = vec_add(outl, pw_bias); + outl = vec_sr(outl, pw_one); if (outcol > 16) { - tmpb = vec_ld(16, inptr); - tmpb = vec_perm(tmpb, tmpb, even_odd_index); - tmpbe = (__vector unsigned short)vec_mergeh(zero, tmpb); - tmpbo = (__vector unsigned short)vec_mergel(zero, tmpb); - outh = vec_add(tmpbe, tmpbo); - outh = vec_add(outh, bias); - outh = vec_sr(outh, one); + next0 = vec_ld(16, inptr); + next0 = vec_perm(next0, next0, even_odd_index); + next0e = (__vector unsigned short)vec_mergeh(pb_zero, next0); + next0o = (__vector unsigned short)vec_mergel(pb_zero, next0); + outh = vec_add(next0e, next0o); + outh = vec_add(outh, pw_bias); + outh = vec_sr(outh, pw_one); } else outh = vec_splat_u16(0); @@ -90,16 +91,17 @@ jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, int inrow, outrow, outcol; JDIMENSION output_cols = width_blocks * DCTSIZE; JSAMPROW inptr0, inptr1, outptr; - __vector unsigned char tmp0a, tmp0b, tmp1a, tmp1b, out; - __vector unsigned short tmp0ae, tmp0ao, tmp0be, tmp0bo, tmp1ae, tmp1ao, - tmp1be, tmp1bo, out0l, out0h, out1l, out1h, outl, outh; + + __vector unsigned char this0, next0, this1, next1, out; + __vector unsigned short this0e, this0o, next0e, next0o, this1e, this1o, + next1e, next1o, out0l, out0h, out1l, out1h, outl, outh; /* Constants */ - __vector unsigned short bias = { __4X2(1, 2) }, - two = { __8X(2) }; + __vector unsigned short pw_bias = { __4X2(1, 2) }, + pw_two = { __8X(2) }; __vector unsigned char even_odd_index = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }, - zero = { __16X(0) }; + pb_zero = { __16X(0) }; expand_right_edge(input_data, max_v_samp_factor, image_width, output_cols * 2); @@ -107,45 +109,45 @@ jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, for (inrow = 0, outrow = 0; outrow < v_samp_factor; inrow += 2, outrow++) { - outptr = output_data[outrow]; inptr0 = input_data[inrow]; inptr1 = input_data[inrow + 1]; + outptr = output_data[outrow]; for (outcol = output_cols; outcol > 0; outcol -= 16, inptr0 += 32, inptr1 += 32, outptr += 16) { - tmp0a = vec_ld(0, inptr0); - tmp0a = vec_perm(tmp0a, tmp0a, even_odd_index); - tmp0ae = (__vector unsigned short)vec_mergeh(zero, tmp0a); - tmp0ao = (__vector unsigned short)vec_mergel(zero, tmp0a); - out0l = vec_add(tmp0ae, tmp0ao); + this0 = vec_ld(0, inptr0); + this0 = vec_perm(this0, this0, even_odd_index); + this0e = (__vector unsigned short)vec_mergeh(pb_zero, this0); + this0o = (__vector unsigned short)vec_mergel(pb_zero, this0); + out0l = vec_add(this0e, this0o); - tmp1a = vec_ld(0, inptr1); - tmp1a = vec_perm(tmp1a, tmp1a, even_odd_index); - tmp1ae = (__vector unsigned short)vec_mergeh(zero, tmp1a); - tmp1ao = (__vector unsigned short)vec_mergel(zero, tmp1a); - out1l = vec_add(tmp1ae, tmp1ao); + this1 = vec_ld(0, inptr1); + this1 = vec_perm(this1, this1, even_odd_index); + this1e = (__vector unsigned short)vec_mergeh(pb_zero, this1); + this1o = (__vector unsigned short)vec_mergel(pb_zero, this1); + out1l = vec_add(this1e, this1o); outl = vec_add(out0l, out1l); - outl = vec_add(outl, bias); - outl = vec_sr(outl, two); + outl = vec_add(outl, pw_bias); + outl = vec_sr(outl, pw_two); if (outcol > 16) { - tmp0b = vec_ld(16, inptr0); - tmp0b = vec_perm(tmp0b, tmp0b, even_odd_index); - tmp0be = (__vector unsigned short)vec_mergeh(zero, tmp0b); - tmp0bo = (__vector unsigned short)vec_mergel(zero, tmp0b); - out0h = vec_add(tmp0be, tmp0bo); - - tmp1b = vec_ld(16, inptr1); - tmp1b = vec_perm(tmp1b, tmp1b, even_odd_index); - tmp1be = (__vector unsigned short)vec_mergeh(zero, tmp1b); - tmp1bo = (__vector unsigned short)vec_mergel(zero, tmp1b); - out1h = vec_add(tmp1be, tmp1bo); + next0 = vec_ld(16, inptr0); + next0 = vec_perm(next0, next0, even_odd_index); + next0e = (__vector unsigned short)vec_mergeh(pb_zero, next0); + next0o = (__vector unsigned short)vec_mergel(pb_zero, next0); + out0h = vec_add(next0e, next0o); + + next1 = vec_ld(16, inptr1); + next1 = vec_perm(next1, next1, even_odd_index); + next1e = (__vector unsigned short)vec_mergeh(pb_zero, next1); + next1o = (__vector unsigned short)vec_mergel(pb_zero, next1); + out1h = vec_add(next1e, next1o); outh = vec_add(out0h, out1h); - outh = vec_add(outh, bias); - outh = vec_sr(outh, two); + outh = vec_add(outh, pw_bias); + outh = vec_sr(outh, pw_two); } else outh = vec_splat_u16(0); diff --git a/simd/jdcolext-altivec.c b/simd/jdcolext-altivec.c index 17c2e20..9cdcd02 100644 --- a/simd/jdcolext-altivec.c +++ b/simd/jdcolext-altivec.c @@ -37,7 +37,7 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf, __vector unsigned char rgb3, out4; #endif __vector short rg0, rg1, rg2, rg3, bx0, bx1, bx2, bx3, yl, yh, cbl, cbh, - crl, crh, rl, rh, gl, gh, bl, bh, g0s, g1s, g2s, g3s; + crl, crh, rl, rh, gl, gh, bl, bh, g0w, g1w, g2w, g3w; __vector int g0, g1, g2, g3; /* Constants @@ -47,11 +47,10 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf, __vector short pw_f0402 = { __8X(F_0_402 >> 1) }, pw_mf0228 = { __8X(-F_0_228 >> 1) }, pw_mf0344_f0285 = { __4X2(-F_0_344, F_0_285) }, - pw_one = { __8X(1) }, - pw_255 = { __8X(255) }, + pw_one = { __8X(1) }, pw_255 = { __8X(255) }, pw_cj = { __8X(CENTERJSAMPLE) }; __vector int pd_onehalf = { __4X(ONE_HALF) }; - __vector unsigned char zero = { __16X(0) }, + __vector unsigned char pb_zero = { __16X(0) }, shift_pack_index = { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; @@ -70,18 +69,18 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf, /* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't * support unsigned vectors. */ - yl = (__vector signed short)vec_mergeh(zero, y); - yh = (__vector signed short)vec_mergel(zero, y); + yl = (__vector signed short)vec_mergeh(pb_zero, y); + yh = (__vector signed short)vec_mergel(pb_zero, y); cb = vec_ld(0, inptr1); - cbl = (__vector signed short)vec_mergeh(zero, cb); - cbh = (__vector signed short)vec_mergel(zero, cb); + cbl = (__vector signed short)vec_mergeh(pb_zero, cb); + cbh = (__vector signed short)vec_mergel(pb_zero, cb); cbl = vec_sub(cbl, pw_cj); cbh = vec_sub(cbh, pw_cj); cr = vec_ld(0, inptr2); - crl = (__vector signed short)vec_mergeh(zero, cr); - crh = (__vector signed short)vec_mergel(zero, cr); + crl = (__vector signed short)vec_mergeh(pb_zero, cr); + crh = (__vector signed short)vec_mergel(pb_zero, cr); crl = vec_sub(crl, pw_cj); crh = vec_sub(crh, pw_cj); @@ -119,14 +118,14 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf, rl = vec_add(rl, yl); rh = vec_add(rh, yh); - g0s = vec_mergeh(cbl, crl); - g1s = vec_mergel(cbl, crl); - g0 = vec_msums(g0s, pw_mf0344_f0285, pd_onehalf); - g1 = vec_msums(g1s, pw_mf0344_f0285, pd_onehalf); - g2s = vec_mergeh(cbh, crh); - g3s = vec_mergel(cbh, crh); - g2 = vec_msums(g2s, pw_mf0344_f0285, pd_onehalf); - g3 = vec_msums(g3s, pw_mf0344_f0285, pd_onehalf); + g0w = vec_mergeh(cbl, crl); + g1w = vec_mergel(cbl, crl); + g0 = vec_msums(g0w, pw_mf0344_f0285, pd_onehalf); + g1 = vec_msums(g1w, pw_mf0344_f0285, pd_onehalf); + g2w = vec_mergeh(cbh, crh); + g3w = vec_mergel(cbh, crh); + g2 = vec_msums(g2w, pw_mf0344_f0285, pd_onehalf); + g3 = vec_msums(g3w, pw_mf0344_f0285, pd_onehalf); /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from * each dword into a new 16-bit vector, which is the equivalent of * descaling the 32-bit results (right-shifting by 16 bits) and then diff --git a/simd/jdsample-altivec.c b/simd/jdsample-altivec.c index be3bbe0..6b8cecf 100644 --- a/simd/jdsample-altivec.c +++ b/simd/jdsample-altivec.c @@ -35,12 +35,13 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor, JSAMPROW inptr, outptr; int inrow, col; - __vector unsigned char block, last, next, lastblock, nextblock = {0}, out; - __vector short blocke, blocko, blockl, blockh, lastl, lasth, nextl, nexth, - outle, outhe, outlo, outho; + __vector unsigned char this0, last0, p_last0, next0 = {0}, p_next0, + out; + __vector short this0e, this0o, this0l, this0h, last0l, last0h, + next0l, next0h, outle, outhe, outlo, outho; /* Constants */ - __vector unsigned char pb_three = { __16X(3) }, pb_zero = { __16X(0) }, + __vector unsigned char pb_zero = { __16X(0) }, pb_three = { __16X(3) }, last_index_col0 = {0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14}, last_index = {15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30}, next_index = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, @@ -52,44 +53,44 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor, inptr = input_data[inrow]; outptr = output_data[inrow]; - block = vec_ld(0, inptr); - last = vec_perm(block, block, last_index_col0); - lastblock = block; + this0 = vec_ld(0, inptr); + p_last0 = vec_perm(this0, this0, last_index_col0); + last0 = this0; for (col = 0; col < downsampled_width; col += 16, inptr += 16, outptr += 32) { if (col > 0) { - last = vec_perm(lastblock, block, last_index); - lastblock = block; + p_last0 = vec_perm(last0, this0, last_index); + last0 = this0; } if (downsampled_width - col <= 16) - next = vec_perm(block, block, next_index_lastcol); + p_next0 = vec_perm(this0, this0, next_index_lastcol); else { - nextblock = vec_ld(16, inptr); - next = vec_perm(block, nextblock, next_index); + next0 = vec_ld(16, inptr); + p_next0 = vec_perm(this0, next0, next_index); } - blocke = (__vector short)vec_mule(block, pb_three); - blocko = (__vector short)vec_mulo(block, pb_three); - blockl = vec_mergeh(blocke, blocko); - blockh = vec_mergel(blocke, blocko); - - lastl = (__vector short)vec_mergeh(pb_zero, last); - lasth = (__vector short)vec_mergel(pb_zero, last); - lastl = vec_add(lastl, pw_one); - lasth = vec_add(lasth, pw_one); - - nextl = (__vector short)vec_mergeh(pb_zero, next); - nexth = (__vector short)vec_mergel(pb_zero, next); - nextl = vec_add(nextl, pw_two); - nexth = vec_add(nexth, pw_two); - - outle = vec_add(blockl, lastl); - outhe = vec_add(blockh, lasth); - outlo = vec_add(blockl, nextl); - outho = vec_add(blockh, nexth); + this0e = (__vector short)vec_mule(this0, pb_three); + this0o = (__vector short)vec_mulo(this0, pb_three); + this0l = vec_mergeh(this0e, this0o); + this0h = vec_mergel(this0e, this0o); + + last0l = (__vector short)vec_mergeh(pb_zero, p_last0); + last0h = (__vector short)vec_mergel(pb_zero, p_last0); + last0l = vec_add(last0l, pw_one); + last0h = vec_add(last0h, pw_one); + + next0l = (__vector short)vec_mergeh(pb_zero, p_next0); + next0h = (__vector short)vec_mergel(pb_zero, p_next0); + next0l = vec_add(next0l, pw_two); + next0h = vec_add(next0h, pw_two); + + outle = vec_add(this0l, last0l); + outhe = vec_add(this0h, last0h); + outlo = vec_add(this0l, next0l); + outho = vec_add(this0h, next0h); outle = vec_sr(outle, (__vector unsigned short)pw_two); outhe = vec_sr(outhe, (__vector unsigned short)pw_two); outlo = vec_sr(outlo, (__vector unsigned short)pw_two); @@ -102,7 +103,7 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor, (__vector unsigned char)outho, merge_pack_index); vec_st(out, 16, outptr); - block = nextblock; + this0 = next0; } } } @@ -118,8 +119,8 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, JSAMPROW inptr_1, inptr0, inptr1, outptr0, outptr1; int inrow, outrow, col; - __vector unsigned char block_1, block0, block1, out; - __vector short block_1l, block_1h, block0l, block0h, block1l, block1h, + __vector unsigned char this_1, this0, this1, out; + __vector short this_1l, this_1h, this0l, this0h, this1l, this1h, lastcolsum_1h, lastcolsum1h, p_lastcolsum_1l, p_lastcolsum_1h, p_lastcolsum1l, p_lastcolsum1h, thiscolsum_1l, thiscolsum_1h, thiscolsum1l, thiscolsum1h, @@ -147,26 +148,26 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, outptr0 = output_data[outrow++]; outptr1 = output_data[outrow++]; - block0 = vec_ld(0, inptr0); - block0l = (__vector short)vec_mergeh(pb_zero, block0); - block0h = (__vector short)vec_mergel(pb_zero, block0); - block0l = vec_mladd(block0l, pw_three, pw_zero); - block0h = vec_mladd(block0h, pw_three, pw_zero); - - block_1 = vec_ld(0, inptr_1); - block_1l = (__vector short)vec_mergeh(pb_zero, block_1); - block_1h = (__vector short)vec_mergel(pb_zero, block_1); - thiscolsum_1l = vec_add(block0l, block_1l); - thiscolsum_1h = vec_add(block0h, block_1h); + this0 = vec_ld(0, inptr0); + this0l = (__vector short)vec_mergeh(pb_zero, this0); + this0h = (__vector short)vec_mergel(pb_zero, this0); + this0l = vec_mladd(this0l, pw_three, pw_zero); + this0h = vec_mladd(this0h, pw_three, pw_zero); + + this_1 = vec_ld(0, inptr_1); + this_1l = (__vector short)vec_mergeh(pb_zero, this_1); + this_1h = (__vector short)vec_mergel(pb_zero, this_1); + thiscolsum_1l = vec_add(this0l, this_1l); + thiscolsum_1h = vec_add(this0h, this_1h); lastcolsum_1h = thiscolsum_1h; p_lastcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1l, last_index_col0); p_lastcolsum_1h = vec_perm(thiscolsum_1l, thiscolsum_1h, last_index); - block1 = vec_ld(0, inptr1); - block1l = (__vector short)vec_mergeh(pb_zero, block1); - block1h = (__vector short)vec_mergel(pb_zero, block1); - thiscolsum1l = vec_add(block0l, block1l); - thiscolsum1h = vec_add(block0h, block1h); + this1 = vec_ld(0, inptr1); + this1l = (__vector short)vec_mergeh(pb_zero, this1); + this1h = (__vector short)vec_mergel(pb_zero, this1); + thiscolsum1l = vec_add(this0l, this1l); + thiscolsum1h = vec_add(this0h, this1h); lastcolsum1h = thiscolsum1h; p_lastcolsum1l = vec_perm(thiscolsum1l, thiscolsum1l, last_index_col0); p_lastcolsum1h = vec_perm(thiscolsum1l, thiscolsum1h, last_index); @@ -191,25 +192,25 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, p_nextcolsum1h = vec_perm(thiscolsum1h, thiscolsum1h, next_index_lastcol); } else { - block0 = vec_ld(16, inptr0); - block0l = (__vector short)vec_mergeh(pb_zero, block0); - block0h = (__vector short)vec_mergel(pb_zero, block0); - block0l = vec_mladd(block0l, pw_three, pw_zero); - block0h = vec_mladd(block0h, pw_three, pw_zero); - - block_1 = vec_ld(16, inptr_1); - block_1l = (__vector short)vec_mergeh(pb_zero, block_1); - block_1h = (__vector short)vec_mergel(pb_zero, block_1); - nextcolsum_1l = vec_add(block0l, block_1l); - nextcolsum_1h = vec_add(block0h, block_1h); + this0 = vec_ld(16, inptr0); + this0l = (__vector short)vec_mergeh(pb_zero, this0); + this0h = (__vector short)vec_mergel(pb_zero, this0); + this0l = vec_mladd(this0l, pw_three, pw_zero); + this0h = vec_mladd(this0h, pw_three, pw_zero); + + this_1 = vec_ld(16, inptr_1); + this_1l = (__vector short)vec_mergeh(pb_zero, this_1); + this_1h = (__vector short)vec_mergel(pb_zero, this_1); + nextcolsum_1l = vec_add(this0l, this_1l); + nextcolsum_1h = vec_add(this0h, this_1h); p_nextcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1h, next_index); p_nextcolsum_1h = vec_perm(thiscolsum_1h, nextcolsum_1l, next_index); - block1 = vec_ld(16, inptr1); - block1l = (__vector short)vec_mergeh(pb_zero, block1); - block1h = (__vector short)vec_mergel(pb_zero, block1); - nextcolsum1l = vec_add(block0l, block1l); - nextcolsum1h = vec_add(block0h, block1h); + this1 = vec_ld(16, inptr1); + this1l = (__vector short)vec_mergeh(pb_zero, this1); + this1h = (__vector short)vec_mergel(pb_zero, this1); + nextcolsum1l = vec_add(this0l, this1l); + nextcolsum1h = vec_add(this0h, this1h); p_nextcolsum1l = vec_perm(thiscolsum1l, thiscolsum1h, next_index); p_nextcolsum1h = vec_perm(thiscolsum1h, nextcolsum1l, next_index); } diff --git a/simd/jfdctfst-altivec.c b/simd/jfdctfst-altivec.c index 16a52df..c4cc26e 100644 --- a/simd/jfdctfst-altivec.c +++ b/simd/jfdctfst-altivec.c @@ -56,7 +56,7 @@ \ z1 = vec_add(tmp12, tmp13); \ z1 = vec_sl(z1, pre_multiply_scale_bits); \ - z1 = vec_madds(z1, pw_0707, zero); \ + z1 = vec_madds(z1, pw_0707, pw_zero); \ \ out2 = vec_add(tmp13, z1); \ out6 = vec_sub(tmp13, z1); \ @@ -70,13 +70,13 @@ tmp10 = vec_sl(tmp10, pre_multiply_scale_bits); \ tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \ z5 = vec_sub(tmp10, tmp12); \ - z5 = vec_madds(z5, pw_0382, zero); \ + z5 = vec_madds(z5, pw_0382, pw_zero); \ \ z2 = vec_madds(tmp10, pw_0541, z5); \ z4 = vec_madds(tmp12, pw_1306, z5); \ \ tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \ - z3 = vec_madds(tmp11, pw_0707, zero); \ + z3 = vec_madds(tmp11, pw_0707, pw_zero); \ \ z11 = vec_add(tmp7, z3); \ z13 = vec_sub(tmp7, z3); \ @@ -98,7 +98,7 @@ jsimd_fdct_ifast_altivec (DCTELEM *data) out0, out1, out2, out3, out4, out5, out6, out7; /* Constants */ - __vector short zero = vec_splat_s16(0), + __vector short pw_zero = { __8X(0) }, pw_0382 = { __8X(F_0_382 << CONST_SHIFT) }, pw_0541 = { __8X(F_0_541 << CONST_SHIFT) }, pw_0707 = { __8X(F_0_707 << CONST_SHIFT) }, diff --git a/simd/jidctfst-altivec.c b/simd/jidctfst-altivec.c index aa25fe0..fd7a2a3 100644 --- a/simd/jidctfst-altivec.c +++ b/simd/jidctfst-altivec.c @@ -54,7 +54,7 @@ \ tmp12 = vec_sub(in##2, in##6); \ tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \ - tmp12 = vec_madds(tmp12, pw_F1414, zero); \ + tmp12 = vec_madds(tmp12, pw_F1414, pw_zero); \ tmp12 = vec_sub(tmp12, tmp13); \ \ tmp0 = vec_add(tmp10, tmp13); \ @@ -73,7 +73,7 @@ \ tmp11 = vec_sub(z11, z13); \ tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \ - tmp11 = vec_madds(tmp11, pw_F1414, zero); \ + tmp11 = vec_madds(tmp11, pw_F1414, pw_zero); \ \ tmp7 = vec_add(z11, z13); \ \ @@ -88,9 +88,9 @@ */ \ \ z5 = vec_add(z10s, z12s); \ - z5 = vec_madds(z5, pw_F1847, zero); \ + z5 = vec_madds(z5, pw_F1847, pw_zero); \ \ - tmp10 = vec_madds(z12s, pw_F1082, zero); \ + tmp10 = vec_madds(z12s, pw_F1082, pw_zero); \ tmp10 = vec_sub(tmp10, z5); \ tmp12 = vec_madds(z10s, pw_MF1613, z5); \ tmp12 = vec_sub(tmp12, z10); \ @@ -115,6 +115,8 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { short *dct_table = (short *)dct_table_; + int *outptr; + __vector short row0, row1, row2, row3, row4, row5, row6, row7, col0, col1, col2, col3, col4, col5, col6, col7, quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7, @@ -122,10 +124,9 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block, z5, z10, z10s, z11, z12s, z13, out0, out1, out2, out3, out4, out5, out6, out7; __vector signed char outb; - int *outptr; /* Constants */ - __vector short zero = { __8X(0) }, + __vector short pw_zero = { __8X(0) }, pw_F1414 = { __8X(F_1_414 << CONST_SHIFT) }, pw_F1847 = { __8X(F_1_847 << CONST_SHIFT) }, pw_MF1613 = { __8X(-F_1_613 << CONST_SHIFT) }, @@ -154,9 +155,9 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block, tmp1 = vec_or(tmp1, tmp3); quant0 = vec_ld(0, dct_table); - col0 = vec_mladd(col0, quant0, zero); + col0 = vec_mladd(col0, quant0, pw_zero); - if (vec_all_eq(tmp1, zero)) { + if (vec_all_eq(tmp1, pw_zero)) { /* AC terms all zero */ row0 = vec_splat(col0, 0); @@ -178,13 +179,13 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block, quant6 = vec_ld(96, dct_table); quant7 = vec_ld(112, dct_table); - col1 = vec_mladd(col1, quant1, zero); - col2 = vec_mladd(col2, quant2, zero); - col3 = vec_mladd(col3, quant3, zero); - col4 = vec_mladd(col4, quant4, zero); - col5 = vec_mladd(col5, quant5, zero); - col6 = vec_mladd(col6, quant6, zero); - col7 = vec_mladd(col7, quant7, zero); + col1 = vec_mladd(col1, quant1, pw_zero); + col2 = vec_mladd(col2, quant2, pw_zero); + col3 = vec_mladd(col3, quant3, pw_zero); + col4 = vec_mladd(col4, quant4, pw_zero); + col5 = vec_mladd(col5, quant5, pw_zero); + col6 = vec_mladd(col6, quant6, pw_zero); + col7 = vec_mladd(col7, quant7, pw_zero); DO_IDCT(col); diff --git a/simd/jidctint-altivec.c b/simd/jidctint-altivec.c index 37cae75..7f0f8d0 100644 --- a/simd/jidctint-altivec.c +++ b/simd/jidctint-altivec.c @@ -61,10 +61,10 @@ in##26l = vec_mergeh(in##2, in##6); \ in##26h = vec_mergel(in##2, in##6); \ \ - tmp3l = vec_msums(in##26l, pw_f130_f054, zero32); \ - tmp3h = vec_msums(in##26h, pw_f130_f054, zero32); \ - tmp2l = vec_msums(in##26l, pw_f054_mf130, zero32); \ - tmp2h = vec_msums(in##26h, pw_f054_mf130, zero32); \ + tmp3l = vec_msums(in##26l, pw_f130_f054, pd_zero); \ + tmp3h = vec_msums(in##26h, pw_f130_f054, pd_zero); \ + tmp2l = vec_msums(in##26l, pw_f054_mf130, pd_zero); \ + tmp2h = vec_msums(in##26h, pw_f054_mf130, pd_zero); \ \ tmp0 = vec_add(in##0, in##4); \ tmp1 = vec_sub(in##0, in##4); \ @@ -111,10 +111,10 @@ z34l = vec_mergeh(z3, z4); \ z34h = vec_mergel(z3, z4); \ \ - z3l = vec_msums(z34l, pw_mf078_f117, zero32); \ - z3h = vec_msums(z34h, pw_mf078_f117, zero32); \ - z4l = vec_msums(z34l, pw_f117_f078, zero32); \ - z4h = vec_msums(z34h, pw_f117_f078, zero32); \ + z3l = vec_msums(z34l, pw_mf078_f117, pd_zero); \ + z3h = vec_msums(z34h, pw_mf078_f117, pd_zero); \ + z4l = vec_msums(z34l, pw_f117_f078, pd_zero); \ + z4h = vec_msums(z34h, pw_f117_f078, pd_zero); \ \ /* (Original) \ * z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \ @@ -210,6 +210,8 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col) { short *dct_table = (short *)dct_table_; + int *outptr; + __vector short row0, row1, row2, row3, row4, row5, row6, row7, col0, col1, col2, col3, col4, col5, col6, col7, quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7, @@ -223,10 +225,9 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block, out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h, out4l, out4h, out5l, out5h, out6l, out6h, out7l, out7h; __vector signed char outb; - int *outptr; /* Constants */ - __vector short zero16 = { __8X(0) }, + __vector short pw_zero = { __8X(0) }, pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) }, pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) }, pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) }, @@ -236,7 +237,7 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block, pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) }, pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) }; __vector unsigned short pass1_bits = { __8X(PASS1_BITS) }; - __vector int zero32 = { __4X(0) }, + __vector int pd_zero = { __4X(0) }, pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) }, pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) }; __vector unsigned int descale_p1 = { __4X(DESCALE_P1) }, @@ -263,9 +264,9 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block, tmp1 = vec_or(tmp1, tmp3); quant0 = vec_ld(0, dct_table); - col0 = vec_mladd(col0, quant0, zero16); + col0 = vec_mladd(col0, quant0, pw_zero); - if (vec_all_eq(tmp1, zero16)) { + if (vec_all_eq(tmp1, pw_zero)) { /* AC terms all zero */ col0 = vec_sl(col0, pass1_bits); @@ -289,13 +290,13 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block, quant6 = vec_ld(96, dct_table); quant7 = vec_ld(112, dct_table); - col1 = vec_mladd(col1, quant1, zero16); - col2 = vec_mladd(col2, quant2, zero16); - col3 = vec_mladd(col3, quant3, zero16); - col4 = vec_mladd(col4, quant4, zero16); - col5 = vec_mladd(col5, quant5, zero16); - col6 = vec_mladd(col6, quant6, zero16); - col7 = vec_mladd(col7, quant7, zero16); + col1 = vec_mladd(col1, quant1, pw_zero); + col2 = vec_mladd(col2, quant2, pw_zero); + col3 = vec_mladd(col3, quant3, pw_zero); + col4 = vec_mladd(col4, quant4, pw_zero); + col5 = vec_mladd(col5, quant5, pw_zero); + col6 = vec_mladd(col6, quant6, pw_zero); + col7 = vec_mladd(col7, quant7, pw_zero); DO_IDCT(col, 1); diff --git a/simd/jquanti-altivec.c b/simd/jquanti-altivec.c index 12c97ee..2ac01a2 100644 --- a/simd/jquanti-altivec.c +++ b/simd/jquanti-altivec.c @@ -42,12 +42,13 @@ jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace) { JSAMPROW elemptr; + __vector unsigned char in0, in1, in2, in3, in4, in5, in6, in7; __vector short out0, out1, out2, out3, out4, out5, out6, out7; /* Constants */ __vector short pw_centerjsamp = { __8X(CENTERJSAMPLE) }; - __vector unsigned char zero = { __16X(0) }; + __vector unsigned char pb_zero = { __16X(0) }; LOAD_ROW(0); LOAD_ROW(1); @@ -58,14 +59,14 @@ jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col, LOAD_ROW(6); LOAD_ROW(7); - out0 = (__vector short)vec_mergeh(zero, in0); - out1 = (__vector short)vec_mergeh(zero, in1); - out2 = (__vector short)vec_mergeh(zero, in2); - out3 = (__vector short)vec_mergeh(zero, in3); - out4 = (__vector short)vec_mergeh(zero, in4); - out5 = (__vector short)vec_mergeh(zero, in5); - out6 = (__vector short)vec_mergeh(zero, in6); - out7 = (__vector short)vec_mergeh(zero, in7); + out0 = (__vector short)vec_mergeh(pb_zero, in0); + out1 = (__vector short)vec_mergeh(pb_zero, in1); + out2 = (__vector short)vec_mergeh(pb_zero, in2); + out3 = (__vector short)vec_mergeh(pb_zero, in3); + out4 = (__vector short)vec_mergeh(pb_zero, in4); + out5 = (__vector short)vec_mergeh(pb_zero, in5); + out6 = (__vector short)vec_mergeh(pb_zero, in6); + out7 = (__vector short)vec_mergeh(pb_zero, in7); out0 = vec_sub(out0, pw_centerjsamp); out1 = vec_sub(out1, pw_centerjsamp); @@ -89,7 +90,8 @@ jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col, #define WORD_BIT 16 -/* There is no AltiVec unsigned multiply instruction, hence this. */ +/* There is no AltiVec 16-bit unsigned multiply instruction, hence this. + We basically need an unsigned equivalent of vec_madds(). */ #define MULTIPLY(vs0, vs1, out) { \ tmpe = vec_mule((__vector unsigned short)vs0, \ @@ -105,13 +107,11 @@ void jsimd_quantize_altivec (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace) { - __vector short row0, row1, row2, row3, row4, row5, row6, row7; - __vector short row0s, row1s, row2s, row3s, row4s, row5s, row6s, row7s; - __vector short corr0, corr1, corr2, corr3, corr4, corr5, corr6, corr7; - __vector short recip0, recip1, recip2, recip3, recip4, recip5, recip6, - recip7; - __vector short scale0, scale1, scale2, scale3, scale4, scale5, scale6, - scale7; + __vector short row0, row1, row2, row3, row4, row5, row6, row7, + row0s, row1s, row2s, row3s, row4s, row5s, row6s, row7s, + corr0, corr1, corr2, corr3, corr4, corr5, corr6, corr7, + recip0, recip1, recip2, recip3, recip4, recip5, recip6, recip7, + scale0, scale1, scale2, scale3, scale4, scale5, scale6, scale7; __vector unsigned int tmpe, tmpo; /* Constants */ |