diff options
Diffstat (limited to 'simd/jcsample-altivec.c')
-rw-r--r-- | simd/jcsample-altivec.c | 104 |
1 files changed, 53 insertions, 51 deletions
diff --git a/simd/jcsample-altivec.c b/simd/jcsample-altivec.c index f312870..62f72be 100644 --- a/simd/jcsample-altivec.c +++ b/simd/jcsample-altivec.c @@ -35,15 +35,16 @@ jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, int outrow, outcol; JDIMENSION output_cols = width_blocks * DCTSIZE; JSAMPROW inptr, outptr; - __vector unsigned char tmpa, tmpb, out; - __vector unsigned short tmpae, tmpao, tmpbe, tmpbo, outl, outh; + + __vector unsigned char this0, next0, out; + __vector unsigned short this0e, this0o, next0e, next0o, outl, outh; /* Constants */ - __vector unsigned short bias = { __4X2(0, 1) }, - one = { __8X(1) }; + __vector unsigned short pw_bias = { __4X2(0, 1) }, + pw_one = { __8X(1) }; __vector unsigned char even_odd_index = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }, - zero = { __16X(0) }; + pb_zero = { __16X(0) }; expand_right_edge(input_data, max_v_samp_factor, image_width, output_cols * 2); @@ -55,22 +56,22 @@ jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, for (outcol = output_cols; outcol > 0; outcol -= 16, inptr += 32, outptr += 16) { - tmpa = vec_ld(0, inptr); - tmpa = vec_perm(tmpa, tmpa, even_odd_index); - tmpae = (__vector unsigned short)vec_mergeh(zero, tmpa); - tmpao = (__vector unsigned short)vec_mergel(zero, tmpa); - outl = vec_add(tmpae, tmpao); - outl = vec_add(outl, bias); - outl = vec_sr(outl, one); + this0 = vec_ld(0, inptr); + this0 = vec_perm(this0, this0, even_odd_index); + this0e = (__vector unsigned short)vec_mergeh(pb_zero, this0); + this0o = (__vector unsigned short)vec_mergel(pb_zero, this0); + outl = vec_add(this0e, this0o); + outl = vec_add(outl, pw_bias); + outl = vec_sr(outl, pw_one); if (outcol > 16) { - tmpb = vec_ld(16, inptr); - tmpb = vec_perm(tmpb, tmpb, even_odd_index); - tmpbe = (__vector unsigned short)vec_mergeh(zero, tmpb); - tmpbo = (__vector unsigned short)vec_mergel(zero, tmpb); - outh = vec_add(tmpbe, tmpbo); - outh = vec_add(outh, bias); - outh = vec_sr(outh, one); + next0 = vec_ld(16, inptr); + next0 = vec_perm(next0, next0, even_odd_index); + next0e = (__vector unsigned short)vec_mergeh(pb_zero, next0); + next0o = (__vector unsigned short)vec_mergel(pb_zero, next0); + outh = vec_add(next0e, next0o); + outh = vec_add(outh, pw_bias); + outh = vec_sr(outh, pw_one); } else outh = vec_splat_u16(0); @@ -90,16 +91,17 @@ jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, int inrow, outrow, outcol; JDIMENSION output_cols = width_blocks * DCTSIZE; JSAMPROW inptr0, inptr1, outptr; - __vector unsigned char tmp0a, tmp0b, tmp1a, tmp1b, out; - __vector unsigned short tmp0ae, tmp0ao, tmp0be, tmp0bo, tmp1ae, tmp1ao, - tmp1be, tmp1bo, out0l, out0h, out1l, out1h, outl, outh; + + __vector unsigned char this0, next0, this1, next1, out; + __vector unsigned short this0e, this0o, next0e, next0o, this1e, this1o, + next1e, next1o, out0l, out0h, out1l, out1h, outl, outh; /* Constants */ - __vector unsigned short bias = { __4X2(1, 2) }, - two = { __8X(2) }; + __vector unsigned short pw_bias = { __4X2(1, 2) }, + pw_two = { __8X(2) }; __vector unsigned char even_odd_index = { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 }, - zero = { __16X(0) }; + pb_zero = { __16X(0) }; expand_right_edge(input_data, max_v_samp_factor, image_width, output_cols * 2); @@ -107,45 +109,45 @@ jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, for (inrow = 0, outrow = 0; outrow < v_samp_factor; inrow += 2, outrow++) { - outptr = output_data[outrow]; inptr0 = input_data[inrow]; inptr1 = input_data[inrow + 1]; + outptr = output_data[outrow]; for (outcol = output_cols; outcol > 0; outcol -= 16, inptr0 += 32, inptr1 += 32, outptr += 16) { - tmp0a = vec_ld(0, inptr0); - tmp0a = vec_perm(tmp0a, tmp0a, even_odd_index); - tmp0ae = (__vector unsigned short)vec_mergeh(zero, tmp0a); - tmp0ao = (__vector unsigned short)vec_mergel(zero, tmp0a); - out0l = vec_add(tmp0ae, tmp0ao); + this0 = vec_ld(0, inptr0); + this0 = vec_perm(this0, this0, even_odd_index); + this0e = (__vector unsigned short)vec_mergeh(pb_zero, this0); + this0o = (__vector unsigned short)vec_mergel(pb_zero, this0); + out0l = vec_add(this0e, this0o); - tmp1a = vec_ld(0, inptr1); - tmp1a = vec_perm(tmp1a, tmp1a, even_odd_index); - tmp1ae = (__vector unsigned short)vec_mergeh(zero, tmp1a); - tmp1ao = (__vector unsigned short)vec_mergel(zero, tmp1a); - out1l = vec_add(tmp1ae, tmp1ao); + this1 = vec_ld(0, inptr1); + this1 = vec_perm(this1, this1, even_odd_index); + this1e = (__vector unsigned short)vec_mergeh(pb_zero, this1); + this1o = (__vector unsigned short)vec_mergel(pb_zero, this1); + out1l = vec_add(this1e, this1o); outl = vec_add(out0l, out1l); - outl = vec_add(outl, bias); - outl = vec_sr(outl, two); + outl = vec_add(outl, pw_bias); + outl = vec_sr(outl, pw_two); if (outcol > 16) { - tmp0b = vec_ld(16, inptr0); - tmp0b = vec_perm(tmp0b, tmp0b, even_odd_index); - tmp0be = (__vector unsigned short)vec_mergeh(zero, tmp0b); - tmp0bo = (__vector unsigned short)vec_mergel(zero, tmp0b); - out0h = vec_add(tmp0be, tmp0bo); - - tmp1b = vec_ld(16, inptr1); - tmp1b = vec_perm(tmp1b, tmp1b, even_odd_index); - tmp1be = (__vector unsigned short)vec_mergeh(zero, tmp1b); - tmp1bo = (__vector unsigned short)vec_mergel(zero, tmp1b); - out1h = vec_add(tmp1be, tmp1bo); + next0 = vec_ld(16, inptr0); + next0 = vec_perm(next0, next0, even_odd_index); + next0e = (__vector unsigned short)vec_mergeh(pb_zero, next0); + next0o = (__vector unsigned short)vec_mergel(pb_zero, next0); + out0h = vec_add(next0e, next0o); + + next1 = vec_ld(16, inptr1); + next1 = vec_perm(next1, next1, even_odd_index); + next1e = (__vector unsigned short)vec_mergeh(pb_zero, next1); + next1o = (__vector unsigned short)vec_mergel(pb_zero, next1); + out1h = vec_add(next1e, next1o); outh = vec_add(out0h, out1h); - outh = vec_add(outh, bias); - outh = vec_sr(outh, two); + outh = vec_add(outh, pw_bias); + outh = vec_sr(outh, pw_two); } else outh = vec_splat_u16(0); |