aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordcommander <dcommander@632fc199-4ca6-4c93-a231-07263d6284db>2014-12-23 02:42:59 +0000
committerdcommander <dcommander@632fc199-4ca6-4c93-a231-07263d6284db>2014-12-23 02:42:59 +0000
commit13e937c7f49172e7415915a057c52f76c899f780 (patch)
tree4d22ac6d26773aa382e80bfd3130377c571b3d27
parent18c3256ba0149ef24e1900ebcf7fe291a19d3aca (diff)
Document the fact that the AltiVec implementation uses the same modified algorithms as the SSE2 implementation
git-svn-id: svn://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1473 632fc199-4ca6-4c93-a231-07263d6284db
-rw-r--r--simd/jfdctint-altivec.c37
-rw-r--r--simd/jidctfst-altivec.c10
-rw-r--r--simd/jidctint-altivec.c39
3 files changed, 85 insertions, 1 deletions
diff --git a/simd/jfdctint-altivec.c b/simd/jfdctint-altivec.c
index 548ab96..1ddf261 100644
--- a/simd/jfdctint-altivec.c
+++ b/simd/jfdctint-altivec.c
@@ -46,6 +46,16 @@
#define DO_FDCT_COMMON(PASS) \
{ \
+ /* (Original) \
+ * z1 = (tmp12 + tmp13) * 0.541196100; \
+ * data2 = z1 + tmp13 * 0.765366865; \
+ * data6 = z1 + tmp12 * -1.847759065; \
+ * \
+ * (This implementation) \
+ * data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; \
+ * data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); \
+ */ \
+ \
tmp1312l = vec_mergeh(tmp13, tmp12); \
tmp1312h = vec_mergel(tmp13, tmp12); \
\
@@ -67,6 +77,16 @@
z3 = vec_add(tmp4, tmp6); \
z4 = vec_add(tmp5, tmp7); \
\
+ /* (Original) \
+ * z5 = (z3 + z4) * 1.175875602; \
+ * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \
+ * z3 += z5; z4 += z5; \
+ * \
+ * (This implementation) \
+ * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \
+ * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \
+ */ \
+ \
z34l = vec_mergeh(z3, z4); \
z34h = vec_mergel(z3, z4); \
\
@@ -75,6 +95,23 @@
z4l = vec_msums(z34l, pw_f117_f078, pd_descale_p##PASS); \
z4h = vec_msums(z34h, pw_f117_f078, pd_descale_p##PASS); \
\
+ /* (Original) \
+ * z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; \
+ * tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; \
+ * tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; \
+ * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \
+ * data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; \
+ * data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; \
+ * \
+ * (This implementation) \
+ * tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; \
+ * tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; \
+ * tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); \
+ * tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); \
+ * data7 = tmp4 + z3; data5 = tmp5 + z4; \
+ * data3 = tmp6 + z3; data1 = tmp7 + z4; \
+ */ \
+ \
tmp47l = vec_mergeh(tmp4, tmp7); \
tmp47h = vec_mergel(tmp4, tmp7); \
\
diff --git a/simd/jidctfst-altivec.c b/simd/jidctfst-altivec.c
index 37a2f4e..282a97e 100644
--- a/simd/jidctfst-altivec.c
+++ b/simd/jidctfst-altivec.c
@@ -77,6 +77,16 @@
\
tmp7 = vec_add(z11, z13); \
\
+ /* To avoid overflow... \
+ * \
+ * (Original) \
+ * tmp12 = -2.613125930 * z10 + z5; \
+ * \
+ * (This implementation) \
+ * tmp12 = (-1.613125930 - 1) * z10 + z5; \
+ * = -1.613125930 * z10 - z10 + z5; \
+ */ \
+ \
z5 = vec_add(z10s, z12s); \
z5 = vec_madds(z5, pw_F1847, zero); \
\
diff --git a/simd/jidctint-altivec.c b/simd/jidctint-altivec.c
index a354fcc..c30c885 100644
--- a/simd/jidctint-altivec.c
+++ b/simd/jidctint-altivec.c
@@ -46,7 +46,17 @@
#define DO_IDCT(in, PASS) \
{ \
- /* Even part */ \
+ /* Even part \
+ * \
+ * (Original) \
+ * z1 = (z2 + z3) * 0.541196100; \
+ * tmp2 = z1 + z3 * -1.847759065; \
+ * tmp3 = z1 + z2 * 0.765366865; \
+ * \
+ * (This implementation) \
+ * tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); \
+ * tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; \
+ */ \
\
in##26l = vec_mergeh(in##2, in##6); \
in##26h = vec_mergel(in##2, in##6); \
@@ -88,6 +98,16 @@
z3 = vec_add(in##3, in##7); \
z4 = vec_add(in##1, in##5); \
\
+ /* (Original) \
+ * z5 = (z3 + z4) * 1.175875602; \
+ * z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \
+ * z3 += z5; z4 += z5; \
+ * \
+ * (This implementation) \
+ * z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \
+ * z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \
+ */ \
+ \
z34l = vec_mergeh(z3, z4); \
z34h = vec_mergel(z3, z4); \
\
@@ -96,6 +116,23 @@
z4l = vec_msums(z34l, pw_f117_f078, zero32); \
z4h = vec_msums(z34h, pw_f117_f078, zero32); \
\
+ /* (Original) \
+ * z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \
+ * tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; \
+ * tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; \
+ * z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \
+ * tmp0 += z1 + z3; tmp1 += z2 + z4; \
+ * tmp2 += z2 + z3; tmp3 += z1 + z4; \
+ * \
+ * (This implementation) \
+ * tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; \
+ * tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; \
+ * tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); \
+ * tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); \
+ * tmp0 += z3; tmp1 += z4; \
+ * tmp2 += z3; tmp3 += z4; \
+ */ \
+ \
in##71l = vec_mergeh(in##7, in##1); \
in##71h = vec_mergel(in##7, in##1); \
\