diff options
author | dcommander <dcommander@632fc199-4ca6-4c93-a231-07263d6284db> | 2014-12-23 02:42:59 +0000 |
---|---|---|
committer | dcommander <dcommander@632fc199-4ca6-4c93-a231-07263d6284db> | 2014-12-23 02:42:59 +0000 |
commit | 13e937c7f49172e7415915a057c52f76c899f780 (patch) | |
tree | 4d22ac6d26773aa382e80bfd3130377c571b3d27 | |
parent | 18c3256ba0149ef24e1900ebcf7fe291a19d3aca (diff) |
Document the fact that the AltiVec implementation uses the same modified algorithms as the SSE2 implementation
git-svn-id: svn://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1473 632fc199-4ca6-4c93-a231-07263d6284db
-rw-r--r-- | simd/jfdctint-altivec.c | 37 | ||||
-rw-r--r-- | simd/jidctfst-altivec.c | 10 | ||||
-rw-r--r-- | simd/jidctint-altivec.c | 39 |
3 files changed, 85 insertions, 1 deletions
diff --git a/simd/jfdctint-altivec.c b/simd/jfdctint-altivec.c index 548ab96..1ddf261 100644 --- a/simd/jfdctint-altivec.c +++ b/simd/jfdctint-altivec.c @@ -46,6 +46,16 @@ #define DO_FDCT_COMMON(PASS) \ { \ + /* (Original) \ + * z1 = (tmp12 + tmp13) * 0.541196100; \ + * data2 = z1 + tmp13 * 0.765366865; \ + * data6 = z1 + tmp12 * -1.847759065; \ + * \ + * (This implementation) \ + * data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; \ + * data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); \ + */ \ + \ tmp1312l = vec_mergeh(tmp13, tmp12); \ tmp1312h = vec_mergel(tmp13, tmp12); \ \ @@ -67,6 +77,16 @@ z3 = vec_add(tmp4, tmp6); \ z4 = vec_add(tmp5, tmp7); \ \ + /* (Original) \ + * z5 = (z3 + z4) * 1.175875602; \ + * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ + * z3 += z5; z4 += z5; \ + * \ + * (This implementation) \ + * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ + * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ + */ \ + \ z34l = vec_mergeh(z3, z4); \ z34h = vec_mergel(z3, z4); \ \ @@ -75,6 +95,23 @@ z4l = vec_msums(z34l, pw_f117_f078, pd_descale_p##PASS); \ z4h = vec_msums(z34h, pw_f117_f078, pd_descale_p##PASS); \ \ + /* (Original) \ + * z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; \ + * tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; \ + * tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; \ + * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ + * data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; \ + * data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; \ + * \ + * (This implementation) \ + * tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; \ + * tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; \ + * tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); \ + * tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); \ + * data7 = tmp4 + z3; data5 = tmp5 + z4; \ + * data3 = tmp6 + z3; data1 = tmp7 + z4; \ + */ \ + \ tmp47l = vec_mergeh(tmp4, tmp7); \ tmp47h = vec_mergel(tmp4, tmp7); \ \ diff --git a/simd/jidctfst-altivec.c b/simd/jidctfst-altivec.c index 37a2f4e..282a97e 100644 --- a/simd/jidctfst-altivec.c +++ b/simd/jidctfst-altivec.c @@ -77,6 +77,16 @@ \ tmp7 = vec_add(z11, z13); \ \ + /* To avoid overflow... \ + * \ + * (Original) \ + * tmp12 = -2.613125930 * z10 + z5; \ + * \ + * (This implementation) \ + * tmp12 = (-1.613125930 - 1) * z10 + z5; \ + * = -1.613125930 * z10 - z10 + z5; \ + */ \ + \ z5 = vec_add(z10s, z12s); \ z5 = vec_madds(z5, pw_F1847, zero); \ \ diff --git a/simd/jidctint-altivec.c b/simd/jidctint-altivec.c index a354fcc..c30c885 100644 --- a/simd/jidctint-altivec.c +++ b/simd/jidctint-altivec.c @@ -46,7 +46,17 @@ #define DO_IDCT(in, PASS) \ { \ - /* Even part */ \ + /* Even part \ + * \ + * (Original) \ + * z1 = (z2 + z3) * 0.541196100; \ + * tmp2 = z1 + z3 * -1.847759065; \ + * tmp3 = z1 + z2 * 0.765366865; \ + * \ + * (This implementation) \ + * tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); \ + * tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; \ + */ \ \ in##26l = vec_mergeh(in##2, in##6); \ in##26h = vec_mergel(in##2, in##6); \ @@ -88,6 +98,16 @@ z3 = vec_add(in##3, in##7); \ z4 = vec_add(in##1, in##5); \ \ + /* (Original) \ + * z5 = (z3 + z4) * 1.175875602; \ + * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \ + * z3 += z5; z4 += z5; \ + * \ + * (This implementation) \ + * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \ + * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \ + */ \ + \ z34l = vec_mergeh(z3, z4); \ z34h = vec_mergel(z3, z4); \ \ @@ -96,6 +116,23 @@ z4l = vec_msums(z34l, pw_f117_f078, zero32); \ z4h = vec_msums(z34h, pw_f117_f078, zero32); \ \ + /* (Original) \ + * z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \ + * tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; \ + * tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; \ + * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \ + * tmp0 += z1 + z3; tmp1 += z2 + z4; \ + * tmp2 += z2 + z3; tmp3 += z1 + z4; \ + * \ + * (This implementation) \ + * tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; \ + * tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; \ + * tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); \ + * tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); \ + * tmp0 += z3; tmp1 += z4; \ + * tmp2 += z3; tmp3 += z4; \ + */ \ + \ in##71l = vec_mergeh(in##7, in##1); \ in##71h = vec_mergel(in##7, in##1); \ \ |