diff options
Diffstat (limited to 'simd/jdsample-altivec.c')
-rw-r--r-- | simd/jdsample-altivec.c | 133 |
1 files changed, 67 insertions, 66 deletions
diff --git a/simd/jdsample-altivec.c b/simd/jdsample-altivec.c index be3bbe0..6b8cecf 100644 --- a/simd/jdsample-altivec.c +++ b/simd/jdsample-altivec.c @@ -35,12 +35,13 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor, JSAMPROW inptr, outptr; int inrow, col; - __vector unsigned char block, last, next, lastblock, nextblock = {0}, out; - __vector short blocke, blocko, blockl, blockh, lastl, lasth, nextl, nexth, - outle, outhe, outlo, outho; + __vector unsigned char this0, last0, p_last0, next0 = {0}, p_next0, + out; + __vector short this0e, this0o, this0l, this0h, last0l, last0h, + next0l, next0h, outle, outhe, outlo, outho; /* Constants */ - __vector unsigned char pb_three = { __16X(3) }, pb_zero = { __16X(0) }, + __vector unsigned char pb_zero = { __16X(0) }, pb_three = { __16X(3) }, last_index_col0 = {0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14}, last_index = {15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30}, next_index = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, @@ -52,44 +53,44 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor, inptr = input_data[inrow]; outptr = output_data[inrow]; - block = vec_ld(0, inptr); - last = vec_perm(block, block, last_index_col0); - lastblock = block; + this0 = vec_ld(0, inptr); + p_last0 = vec_perm(this0, this0, last_index_col0); + last0 = this0; for (col = 0; col < downsampled_width; col += 16, inptr += 16, outptr += 32) { if (col > 0) { - last = vec_perm(lastblock, block, last_index); - lastblock = block; + p_last0 = vec_perm(last0, this0, last_index); + last0 = this0; } if (downsampled_width - col <= 16) - next = vec_perm(block, block, next_index_lastcol); + p_next0 = vec_perm(this0, this0, next_index_lastcol); else { - nextblock = vec_ld(16, inptr); - next = vec_perm(block, nextblock, next_index); + next0 = vec_ld(16, inptr); + p_next0 = vec_perm(this0, next0, next_index); } - blocke = (__vector short)vec_mule(block, pb_three); - blocko = (__vector short)vec_mulo(block, pb_three); - blockl = vec_mergeh(blocke, blocko); - blockh = vec_mergel(blocke, blocko); - - lastl = (__vector short)vec_mergeh(pb_zero, last); - lasth = (__vector short)vec_mergel(pb_zero, last); - lastl = vec_add(lastl, pw_one); - lasth = vec_add(lasth, pw_one); - - nextl = (__vector short)vec_mergeh(pb_zero, next); - nexth = (__vector short)vec_mergel(pb_zero, next); - nextl = vec_add(nextl, pw_two); - nexth = vec_add(nexth, pw_two); - - outle = vec_add(blockl, lastl); - outhe = vec_add(blockh, lasth); - outlo = vec_add(blockl, nextl); - outho = vec_add(blockh, nexth); + this0e = (__vector short)vec_mule(this0, pb_three); + this0o = (__vector short)vec_mulo(this0, pb_three); + this0l = vec_mergeh(this0e, this0o); + this0h = vec_mergel(this0e, this0o); + + last0l = (__vector short)vec_mergeh(pb_zero, p_last0); + last0h = (__vector short)vec_mergel(pb_zero, p_last0); + last0l = vec_add(last0l, pw_one); + last0h = vec_add(last0h, pw_one); + + next0l = (__vector short)vec_mergeh(pb_zero, p_next0); + next0h = (__vector short)vec_mergel(pb_zero, p_next0); + next0l = vec_add(next0l, pw_two); + next0h = vec_add(next0h, pw_two); + + outle = vec_add(this0l, last0l); + outhe = vec_add(this0h, last0h); + outlo = vec_add(this0l, next0l); + outho = vec_add(this0h, next0h); outle = vec_sr(outle, (__vector unsigned short)pw_two); outhe = vec_sr(outhe, (__vector unsigned short)pw_two); outlo = vec_sr(outlo, (__vector unsigned short)pw_two); @@ -102,7 +103,7 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor, (__vector unsigned char)outho, merge_pack_index); vec_st(out, 16, outptr); - block = nextblock; + this0 = next0; } } } @@ -118,8 +119,8 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, JSAMPROW inptr_1, inptr0, inptr1, outptr0, outptr1; int inrow, outrow, col; - __vector unsigned char block_1, block0, block1, out; - __vector short block_1l, block_1h, block0l, block0h, block1l, block1h, + __vector unsigned char this_1, this0, this1, out; + __vector short this_1l, this_1h, this0l, this0h, this1l, this1h, lastcolsum_1h, lastcolsum1h, p_lastcolsum_1l, p_lastcolsum_1h, p_lastcolsum1l, p_lastcolsum1h, thiscolsum_1l, thiscolsum_1h, thiscolsum1l, thiscolsum1h, @@ -147,26 +148,26 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, outptr0 = output_data[outrow++]; outptr1 = output_data[outrow++]; - block0 = vec_ld(0, inptr0); - block0l = (__vector short)vec_mergeh(pb_zero, block0); - block0h = (__vector short)vec_mergel(pb_zero, block0); - block0l = vec_mladd(block0l, pw_three, pw_zero); - block0h = vec_mladd(block0h, pw_three, pw_zero); - - block_1 = vec_ld(0, inptr_1); - block_1l = (__vector short)vec_mergeh(pb_zero, block_1); - block_1h = (__vector short)vec_mergel(pb_zero, block_1); - thiscolsum_1l = vec_add(block0l, block_1l); - thiscolsum_1h = vec_add(block0h, block_1h); + this0 = vec_ld(0, inptr0); + this0l = (__vector short)vec_mergeh(pb_zero, this0); + this0h = (__vector short)vec_mergel(pb_zero, this0); + this0l = vec_mladd(this0l, pw_three, pw_zero); + this0h = vec_mladd(this0h, pw_three, pw_zero); + + this_1 = vec_ld(0, inptr_1); + this_1l = (__vector short)vec_mergeh(pb_zero, this_1); + this_1h = (__vector short)vec_mergel(pb_zero, this_1); + thiscolsum_1l = vec_add(this0l, this_1l); + thiscolsum_1h = vec_add(this0h, this_1h); lastcolsum_1h = thiscolsum_1h; p_lastcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1l, last_index_col0); p_lastcolsum_1h = vec_perm(thiscolsum_1l, thiscolsum_1h, last_index); - block1 = vec_ld(0, inptr1); - block1l = (__vector short)vec_mergeh(pb_zero, block1); - block1h = (__vector short)vec_mergel(pb_zero, block1); - thiscolsum1l = vec_add(block0l, block1l); - thiscolsum1h = vec_add(block0h, block1h); + this1 = vec_ld(0, inptr1); + this1l = (__vector short)vec_mergeh(pb_zero, this1); + this1h = (__vector short)vec_mergel(pb_zero, this1); + thiscolsum1l = vec_add(this0l, this1l); + thiscolsum1h = vec_add(this0h, this1h); lastcolsum1h = thiscolsum1h; p_lastcolsum1l = vec_perm(thiscolsum1l, thiscolsum1l, last_index_col0); p_lastcolsum1h = vec_perm(thiscolsum1l, thiscolsum1h, last_index); @@ -191,25 +192,25 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor, p_nextcolsum1h = vec_perm(thiscolsum1h, thiscolsum1h, next_index_lastcol); } else { - block0 = vec_ld(16, inptr0); - block0l = (__vector short)vec_mergeh(pb_zero, block0); - block0h = (__vector short)vec_mergel(pb_zero, block0); - block0l = vec_mladd(block0l, pw_three, pw_zero); - block0h = vec_mladd(block0h, pw_three, pw_zero); - - block_1 = vec_ld(16, inptr_1); - block_1l = (__vector short)vec_mergeh(pb_zero, block_1); - block_1h = (__vector short)vec_mergel(pb_zero, block_1); - nextcolsum_1l = vec_add(block0l, block_1l); - nextcolsum_1h = vec_add(block0h, block_1h); + this0 = vec_ld(16, inptr0); + this0l = (__vector short)vec_mergeh(pb_zero, this0); + this0h = (__vector short)vec_mergel(pb_zero, this0); + this0l = vec_mladd(this0l, pw_three, pw_zero); + this0h = vec_mladd(this0h, pw_three, pw_zero); + + this_1 = vec_ld(16, inptr_1); + this_1l = (__vector short)vec_mergeh(pb_zero, this_1); + this_1h = (__vector short)vec_mergel(pb_zero, this_1); + nextcolsum_1l = vec_add(this0l, this_1l); + nextcolsum_1h = vec_add(this0h, this_1h); p_nextcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1h, next_index); p_nextcolsum_1h = vec_perm(thiscolsum_1h, nextcolsum_1l, next_index); - block1 = vec_ld(16, inptr1); - block1l = (__vector short)vec_mergeh(pb_zero, block1); - block1h = (__vector short)vec_mergel(pb_zero, block1); - nextcolsum1l = vec_add(block0l, block1l); - nextcolsum1h = vec_add(block0h, block1h); + this1 = vec_ld(16, inptr1); + this1l = (__vector short)vec_mergeh(pb_zero, this1); + this1h = (__vector short)vec_mergel(pb_zero, this1); + nextcolsum1l = vec_add(this0l, this1l); + nextcolsum1h = vec_add(this0h, this1h); p_nextcolsum1l = vec_perm(thiscolsum1l, thiscolsum1h, next_index); p_nextcolsum1h = vec_perm(thiscolsum1h, nextcolsum1l, next_index); } |