diff options
author | dcommander <dcommander@632fc199-4ca6-4c93-a231-07263d6284db> | 2013-10-12 21:39:20 +0000 |
---|---|---|
committer | dcommander <dcommander@632fc199-4ca6-4c93-a231-07263d6284db> | 2013-10-12 21:39:20 +0000 |
commit | 7b635af66e4a4addb70604e24aa7ff9cbb926f79 (patch) | |
tree | 55984b661eaf70a0430fe415a72a9ce024c743c2 /simd | |
parent | 57346ead7f083755eb2bd13eeed4f79f97b4685c (diff) |
SIMD-accelerated integer convsamp routine for MIPS DSPr2
git-svn-id: svn://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1059 632fc199-4ca6-4c93-a231-07263d6284db
Diffstat (limited to 'simd')
-rw-r--r-- | simd/jsimd.h | 4 | ||||
-rw-r--r-- | simd/jsimd_mips.c | 17 | ||||
-rw-r--r-- | simd/jsimd_mips_dspr2.S | 149 |
3 files changed, 170 insertions, 0 deletions
diff --git a/simd/jsimd.h b/simd/jsimd.h index e5c5d44..f75aa9b 100644 --- a/simd/jsimd.h +++ b/simd/jsimd.h @@ -651,6 +651,10 @@ EXTERN(void) jsimd_convsamp_neon JPP((JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)); +EXTERN(void) jsimd_convsamp_mips_dspr2 JPP((JSAMPARRAY sample_data, + JDIMENSION start_col, + DCTELEM * workspace)); + EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)); diff --git a/simd/jsimd_mips.c b/simd/jsimd_mips.c index d8d6b19..5b4f893 100644 --- a/simd/jsimd_mips.c +++ b/simd/jsimd_mips.c @@ -453,6 +453,21 @@ jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, GLOBAL(int) jsimd_can_convsamp (void) { + init_simd(); + + /* The code is optimised for these values only */ + if (DCTSIZE != 8) + return 0; + if (BITS_IN_JSAMPLE != 8) + return 0; + if (sizeof(JDIMENSION) != 4) + return 0; + if (sizeof(DCTELEM) != 2) + return 0; + + if (simd_support & JSIMD_MIPS_DSPR2) + return 1; + return 0; } @@ -483,6 +498,8 @@ GLOBAL(void) jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace) { + if (simd_support & JSIMD_MIPS_DSPR2) + jsimd_convsamp_mips_dspr2(sample_data, start_col, workspace); } GLOBAL(void) diff --git a/simd/jsimd_mips_dspr2.S b/simd/jsimd_mips_dspr2.S index d478a6d..198c349 100644 --- a/simd/jsimd_mips_dspr2.S +++ b/simd/jsimd_mips_dspr2.S @@ -2813,6 +2813,155 @@ LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass2_mips_dspr2) END(jsimd_idct_12x12_pass2_mips_dspr2) /*****************************************************************************/ +LEAF_MIPS_DSPR2(jsimd_convsamp_mips_dspr2) +/* + * a0 - sample_data + * a1 - start_col + * a2 - workspace + */ + + lw t0, 0(a0) + li t7, 0xff80ff80 + addu t0, t0, a1 + ulw t1, 0(t0) + ulw t2, 4(t0) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + lw t0, 4(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 0(a2) + usw t4, 4(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 8(a2) + usw t6, 12(a2) + + lw t0, 8(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 16(a2) + usw t4, 20(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 24(a2) + usw t6, 28(a2) + + lw t0, 12(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 32(a2) + usw t4, 36(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 40(a2) + usw t6, 44(a2) + + lw t0, 16(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 48(a2) + usw t4, 52(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 56(a2) + usw t6, 60(a2) + + lw t0, 20(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 64(a2) + usw t4, 68(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 72(a2) + usw t6, 76(a2) + + lw t0, 24(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 80(a2) + usw t4, 84(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 88(a2) + usw t6, 92(a2) + + lw t0, 28(a0) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu t0, t0, a1 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + ulw t1, 0(t0) + ulw t2, 4(t0) + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 96(a2) + usw t4, 100(a2) + preceu.ph.qbr t3, t1 + preceu.ph.qbl t4, t1 + usw t5, 104(a2) + usw t6, 108(a2) + preceu.ph.qbr t5, t2 + preceu.ph.qbl t6, t2 + addu.ph t3, t3, t7 + addu.ph t4, t4, t7 + addu.ph t5, t5, t7 + addu.ph t6, t6, t7 + usw t3, 112(a2) + usw t4, 116(a2) + usw t5, 120(a2) + usw t6, 124(a2) + + j ra + nop + +END(jsimd_convsamp_mips_dspr2) + +/*****************************************************************************/ LEAF_MIPS_DSPR2(jsimd_convsamp_float_mips_dspr2) /* * a0 - sample_data |