aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Gall <tom.gall@linaro.org>2011-08-29 10:08:46 -0500
committerTom Gall <tom.gall@linaro.org>2011-08-29 10:08:46 -0500
commitae8c4e596b2ac0a4a1e906e26b2750195add8cf8 (patch)
tree6c5f332a663b5e786ef6f7803763c8cbfe6694d7
parent80a9edd69dd458905189babf83630b35c4b3e3b5 (diff)
restore
-rw-r--r--simd/jsimd_arm.c30
-rw-r--r--simd/jsimd_arm_neon.S205
2 files changed, 0 insertions, 235 deletions
diff --git a/simd/jsimd_arm.c b/simd/jsimd_arm.c
index 9ed49fd..a9d920c 100644
--- a/simd/jsimd_arm.c
+++ b/simd/jsimd_arm.c
@@ -136,7 +136,6 @@ jsimd_can_rgb_ycc (void)
{
init_simd();
-<<<<<<< HEAD
/* The code is optimised for these values only */
if (BITS_IN_JSAMPLE != 8)
return 0;
@@ -148,8 +147,6 @@ jsimd_can_rgb_ycc (void)
if (simd_support & JSIMD_ARM_NEON)
return 1;
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
return 0;
}
@@ -184,7 +181,6 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows)
{
-<<<<<<< HEAD
void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
switch(cinfo->in_color_space)
@@ -215,8 +211,6 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
if (simd_support & JSIMD_ARM_NEON)
neonfct(cinfo->image_width, input_buf,
output_buf, output_row, num_rows);
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
}
GLOBAL(void)
@@ -392,7 +386,6 @@ jsimd_can_convsamp (void)
{
init_simd();
-<<<<<<< HEAD
/* The code is optimised for these values only */
if (DCTSIZE != 8)
return 0;
@@ -406,8 +399,6 @@ jsimd_can_convsamp (void)
if (simd_support & JSIMD_ARM_NEON)
return 1;
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
return 0;
}
@@ -423,11 +414,8 @@ GLOBAL(void)
jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
DCTELEM * workspace)
{
-<<<<<<< HEAD
if (simd_support & JSIMD_ARM_NEON)
jsimd_convsamp_neon(sample_data, start_col, workspace);
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
}
GLOBAL(void)
@@ -449,7 +437,6 @@ jsimd_can_fdct_ifast (void)
{
init_simd();
-<<<<<<< HEAD
/* The code is optimised for these values only */
if (DCTSIZE != 8)
return 0;
@@ -459,8 +446,6 @@ jsimd_can_fdct_ifast (void)
if (simd_support & JSIMD_ARM_NEON)
return 1;
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
return 0;
}
@@ -480,11 +465,8 @@ jsimd_fdct_islow (DCTELEM * data)
GLOBAL(void)
jsimd_fdct_ifast (DCTELEM * data)
{
-<<<<<<< HEAD
if (simd_support & JSIMD_ARM_NEON)
jsimd_fdct_ifast_neon(data);
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
}
GLOBAL(void)
@@ -497,7 +479,6 @@ jsimd_can_quantize (void)
{
init_simd();
-<<<<<<< HEAD
/* The code is optimised for these values only */
if (DCTSIZE != 8)
return 0;
@@ -509,8 +490,6 @@ jsimd_can_quantize (void)
if (simd_support & JSIMD_ARM_NEON)
return 1;
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
return 0;
}
@@ -526,11 +505,8 @@ GLOBAL(void)
jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
DCTELEM * workspace)
{
-<<<<<<< HEAD
if (simd_support & JSIMD_ARM_NEON)
jsimd_quantize_neon(coef_block, divisors, workspace);
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
}
GLOBAL(void)
@@ -608,7 +584,6 @@ jsimd_can_idct_islow (void)
{
init_simd();
-<<<<<<< HEAD
/* The code is optimised for these values only */
if (DCTSIZE != 8)
return 0;
@@ -624,8 +599,6 @@ jsimd_can_idct_islow (void)
if (simd_support & JSIMD_ARM_NEON)
return 1;
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
return 0;
}
@@ -667,11 +640,8 @@ jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
JDIMENSION output_col)
{
-<<<<<<< HEAD
if ((simd_support & JSIMD_ARM_NEON))
jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col);
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
}
GLOBAL(void)
diff --git a/simd/jsimd_arm_neon.S b/simd/jsimd_arm_neon.S
index 3f1cf9e..9ef6efc 100644
--- a/simd/jsimd_arm_neon.S
+++ b/simd/jsimd_arm_neon.S
@@ -62,7 +62,6 @@ _\fname:
vtrn.32 \x1, \x3
.endm
-<<<<<<< HEAD
#define CENTERJSAMPLE 128
/*****************************************************************************/
@@ -537,8 +536,6 @@ asm_function jsimd_idct_islow_neon
.unreq ROW7R
.endfunc
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
/*****************************************************************************/
/*
@@ -546,7 +543,6 @@ asm_function jsimd_idct_islow_neon
*
* This function contains a fast, not so accurate integer implementation of
* the inverse DCT (Discrete Cosine Transform). It uses the same calculations
-<<<<<<< HEAD
* and produces exactly the same output as IJG's original 'jpeg_idct_ifast'
* function from jidctfst.c
*
@@ -556,12 +552,6 @@ asm_function jsimd_idct_islow_neon
* like "x * 1.082392200" have to be converted to "x * 0.082392200 + x",
* which introduces an extra addition. Overall, there are 6 extra additions
* per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions.
-=======
- * and produces exactly the same output as IJG's original 'jpeg_idct_fast'
- * function from jidctfst.c
- *
- * TODO: a bit better instructions scheduling is needed.
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
*/
#define XFIX_1_082392200 d0[0]
@@ -576,70 +566,12 @@ jsimd_idct_ifast_neon_consts:
.short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
.short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
-<<<<<<< HEAD
-=======
-/* 1-D IDCT helper macro */
-
-.macro idct_helper x0, x1, x2, x3, x4, x5, x6, x7, \
- t10, t11, t12, t13, t14
-
- vsub.s16 \t10, \x0, \x4
- vadd.s16 \x4, \x0, \x4
- vswp.s16 \t10, \x0
- vsub.s16 \t11, \x2, \x6
- vadd.s16 \x6, \x2, \x6
- vswp.s16 \t11, \x2
- vsub.s16 \t10, \x3, \x5
- vadd.s16 \x5, \x3, \x5
- vswp.s16 \t10, \x3
- vsub.s16 \t11, \x1, \x7
- vadd.s16 \x7, \x1, \x7
- vswp.s16 \t11, \x1
-
- vqdmulh.s16 \t13, \x2, d0[1]
- vadd.s16 \t12, \x3, \x3
- vadd.s16 \x2, \x2, \t13
- vqdmulh.s16 \t13, \x3, d0[3]
- vsub.s16 \t10, \x1, \x3
- vadd.s16 \t12, \t12, \t13
- vqdmulh.s16 \t13, \t10, d0[2]
- vsub.s16 \t11, \x7, \x5
- vadd.s16 \t10, \t10, \t13
- vqdmulh.s16 \t13, \t11, d0[1]
- vadd.s16 \t11, \t11, \t13
-
- vqdmulh.s16 \t13, \x1, d0[0]
- vsub.s16 \x2, \x6, \x2
- vsub.s16 \t14, \x0, \x2
- vadd.s16 \x2, \x0, \x2
- vadd.s16 \x0, \x4, \x6
- vsub.s16 \x4, \x4, \x6
- vadd.s16 \x1, \x1, \t13
- vadd.s16 \t13, \x7, \x5
- vsub.s16 \t12, \t13, \t12
- vsub.s16 \t12, \t12, \t10
- vadd.s16 \t11, \t12, \t11
- vsub.s16 \t10, \x1, \t10
- vadd.s16 \t10, \t10, \t11
-
- vsub.s16 \x7, \x0, \t13
- vadd.s16 \x0, \x0, \t13
- vadd.s16 \x6, \t14, \t12
- vsub.s16 \x1, \t14, \t12
- vsub.s16 \x5, \x2, \t11
- vadd.s16 \x2, \x2, \t11
- vsub.s16 \x3, \x4, \t10
- vadd.s16 \x4, \x4, \t10
-.endm
-
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
asm_function jsimd_idct_ifast_neon
DCT_TABLE .req r0
COEF_BLOCK .req r1
OUTPUT_BUF .req r2
OUTPUT_COL .req r3
-<<<<<<< HEAD
TMP1 .req r0
TMP2 .req r1
TMP3 .req r2
@@ -824,117 +756,18 @@ asm_function jsimd_idct_ifast_neon
vst1.8 {d21}, [TMP2]
vst1.8 {d22}, [TMP3]
vst1.8 {d23}, [TMP4]
-=======
- TMP .req ip
-
- vpush {d8-d15}
-
- /* Load constants */
- adr TMP, jsimd_idct_ifast_neon_consts
- vld1.16 {d0}, [TMP, :64]
-
- /* Load all COEF_BLOCK into NEON registers with the following allocation:
- * 0 1 2 3 | 4 5 6 7
- * ---------+--------
- * 0 | d4 | d5
- * 1 | d6 | d7
- * 2 | d8 | d9
- * 3 | d10 | d11
- * 4 | d12 | d13
- * 5 | d14 | d15
- * 6 | d16 | d17
- * 7 | d18 | d19
- */
- vld1.16 {d4, d5, d6, d7}, [COEF_BLOCK]!
- vld1.16 {d8, d9, d10, d11}, [COEF_BLOCK]!
- vld1.16 {d12, d13, d14, d15}, [COEF_BLOCK]!
- vld1.16 {d16, d17, d18, d19}, [COEF_BLOCK]!
- /* Dequantize */
- vld1.16 {d20, d21, d22, d23}, [DCT_TABLE]!
- vmul.s16 q2, q2, q10
- vld1.16 {d24, d25, d26, d27}, [DCT_TABLE]!
- vmul.s16 q3, q3, q11
- vmul.s16 q4, q4, q12
- vld1.16 {d28, d29, d30, d31}, [DCT_TABLE]!
- vmul.s16 q5, q5, q13
- vmul.s16 q6, q6, q14
- vld1.16 {d20, d21, d22, d23}, [DCT_TABLE]!
- vmul.s16 q7, q7, q15
- vmul.s16 q8, q8, q10
- vmul.s16 q9, q9, q11
-
- /* Pass 1 */
- idct_helper q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, q13, q14
- /* Transpose */
- transpose_4x4 d4, d6, d8, d10
- transpose_4x4 d5, d7, d9, d11
- transpose_4x4 d12, d14, d16, d18
- transpose_4x4 d13, d15, d17, d19
- vswp d12, d5
- vswp d14, d7
- vswp d16, d9
- vswp d18, d11
-
- /* Pass 2 */
- idct_helper q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, q13, q14
- /* Transpose */
- transpose_4x4 d4, d6, d8, d10
- transpose_4x4 d5, d7, d9, d11
- transpose_4x4 d12, d14, d16, d18
- transpose_4x4 d13, d15, d17, d19
- vswp d12, d5
- vswp d14, d7
- vswp d16, d9
- vswp d18, d11
-
- /* Descale and range limit */
- vmov.s16 q15, #(0x80 << 5)
- vqadd.s16 q2, q2, q15
- vqadd.s16 q3, q3, q15
- vqadd.s16 q4, q4, q15
- vqadd.s16 q5, q5, q15
- vqadd.s16 q6, q6, q15
- vqadd.s16 q7, q7, q15
- vqadd.s16 q8, q8, q15
- vqadd.s16 q9, q9, q15
- vqshrun.s16 d4, q2, #5
- vqshrun.s16 d6, q3, #5
- vqshrun.s16 d8, q4, #5
- vqshrun.s16 d10, q5, #5
- vqshrun.s16 d12, q6, #5
- vqshrun.s16 d14, q7, #5
- vqshrun.s16 d16, q8, #5
- vqshrun.s16 d18, q9, #5
-
- /* Store results to the output buffer */
- .irp x, d4, d6, d8, d10, d12, d14, d16, d18
- ldr TMP, [OUTPUT_BUF], #4
- add TMP, TMP, OUTPUT_COL
- vst1.8 {\x}, [TMP]!
- .endr
-
- vpop {d8-d15}
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
bx lr
.unreq DCT_TABLE
.unreq COEF_BLOCK
.unreq OUTPUT_BUF
.unreq OUTPUT_COL
-<<<<<<< HEAD
.unreq TMP1
.unreq TMP2
.unreq TMP3
.unreq TMP4
.endfunc
-=======
- .unreq TMP
-.endfunc
-
-.purgem idct_helper
-
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
/*****************************************************************************/
/*
@@ -1319,21 +1152,12 @@ asm_function jsimd_idct_2x2_neon
.macro do_load size
.if \size == 8
-<<<<<<< HEAD
vld1.8 {d4}, [U, :64]!
vld1.8 {d5}, [V, :64]!
vld1.8 {d0}, [Y, :64]!
pld [U, #64]
pld [V, #64]
pld [Y, #64]
-=======
- vld1.8 {d4}, [U]!
- vld1.8 {d5}, [V]!
- vld1.8 {d0}, [Y]!
- pld [Y, #64]
- pld [U, #64]
- pld [V, #64]
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
.elseif \size == 4
vld1.8 {d4[0]}, [U]!
vld1.8 {d4[1]}, [U]!
@@ -1403,15 +1227,11 @@ asm_function jsimd_idct_2x2_neon
.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, g_offs, b_offs
-<<<<<<< HEAD
/*
* 2 stage pipelined YCbCr->RGB conversion
*/
.macro do_yuv_to_rgb_stage1
-=======
-.macro do_yuv_to_rgb
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
vaddw.u8 q3, q1, d4 /* q3 = u - 128 */
vaddw.u8 q4, q1, d5 /* q2 = v - 128 */
vmull.s16 q10, d6, d1[1] /* multiply by -11277 */
@@ -1422,12 +1242,9 @@ asm_function jsimd_idct_2x2_neon
vmull.s16 q13, d9, d1[0] /* multiply by 22971 */
vmull.s16 q14, d6, d1[3] /* multiply by 29033 */
vmull.s16 q15, d7, d1[3] /* multiply by 29033 */
-<<<<<<< HEAD
.endm
.macro do_yuv_to_rgb_stage2
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
vrshrn.s32 d20, q10, #15
vrshrn.s32 d21, q11, #15
vrshrn.s32 d24, q12, #14
@@ -1442,7 +1259,6 @@ asm_function jsimd_idct_2x2_neon
vqmovun.s16 d1\b_offs, q14
.endm
-<<<<<<< HEAD
.macro do_yuv_to_rgb_stage2_store_load_stage1
vld1.8 {d4}, [U, :64]!
vrshrn.s32 d20, q10, #15
@@ -1480,8 +1296,6 @@ asm_function jsimd_idct_2x2_neon
do_yuv_to_rgb_stage2
.endm
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
/* Apple gas crashes on adrl, work around that by using adr.
* But this requires a copy of these constants for each function.
*/
@@ -1542,7 +1356,6 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
/* Inner loop over pixels */
subs N, N, #8
-<<<<<<< HEAD
blt 3f
do_load 8
do_yuv_to_rgb_stage1
@@ -1558,18 +1371,6 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
tst N, #7
beq 8f
3:
-=======
- blt 2f
-1:
- do_load 8
- do_yuv_to_rgb
- do_store \bpp, 8
- subs N, N, #8
- bge 1b
- tst N, #7
- beq 8f
-2:
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
tst N, #4
beq 3f
do_load 4
@@ -1617,12 +1418,9 @@ asm_function jsimd_ycc_\colorid\()_convert_neon
.endfunc
.purgem do_yuv_to_rgb
-<<<<<<< HEAD
.purgem do_yuv_to_rgb_stage1
.purgem do_yuv_to_rgb_stage2
.purgem do_yuv_to_rgb_stage2_store_load_stage1
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8
.endm
@@ -1638,7 +1436,6 @@ generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, 2, 3
.purgem do_store
/*****************************************************************************/
-<<<<<<< HEAD
/*
* jsimd_extrgb_ycc_convert_neon
@@ -2234,5 +2031,3 @@ asm_function jsimd_quantize_neon
.unreq SHIFT
.unreq LOOP_COUNT
.endfunc
-=======
->>>>>>> 4a72099711359606b1fc10c1744057a6c568d5d8