aboutsummaryrefslogtreecommitdiff
path: root/simd
diff options
context:
space:
mode:
authordcommander <dcommander@632fc199-4ca6-4c93-a231-07263d6284db>2013-10-12 21:39:20 +0000
committerdcommander <dcommander@632fc199-4ca6-4c93-a231-07263d6284db>2013-10-12 21:39:20 +0000
commit7b635af66e4a4addb70604e24aa7ff9cbb926f79 (patch)
tree55984b661eaf70a0430fe415a72a9ce024c743c2 /simd
parent57346ead7f083755eb2bd13eeed4f79f97b4685c (diff)
SIMD-accelerated integer convsamp routine for MIPS DSPr2
git-svn-id: svn://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1059 632fc199-4ca6-4c93-a231-07263d6284db
Diffstat (limited to 'simd')
-rw-r--r--simd/jsimd.h4
-rw-r--r--simd/jsimd_mips.c17
-rw-r--r--simd/jsimd_mips_dspr2.S149
3 files changed, 170 insertions, 0 deletions
diff --git a/simd/jsimd.h b/simd/jsimd.h
index e5c5d44..f75aa9b 100644
--- a/simd/jsimd.h
+++ b/simd/jsimd.h
@@ -651,6 +651,10 @@ EXTERN(void) jsimd_convsamp_neon JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
DCTELEM * workspace));
+EXTERN(void) jsimd_convsamp_mips_dspr2 JPP((JSAMPARRAY sample_data,
+ JDIMENSION start_col,
+ DCTELEM * workspace));
+
EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
FAST_FLOAT * workspace));
diff --git a/simd/jsimd_mips.c b/simd/jsimd_mips.c
index d8d6b19..5b4f893 100644
--- a/simd/jsimd_mips.c
+++ b/simd/jsimd_mips.c
@@ -453,6 +453,21 @@ jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
GLOBAL(int)
jsimd_can_convsamp (void)
{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(DCTELEM) != 2)
+ return 0;
+
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
return 0;
}
@@ -483,6 +498,8 @@ GLOBAL(void)
jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
DCTELEM * workspace)
{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_convsamp_mips_dspr2(sample_data, start_col, workspace);
}
GLOBAL(void)
diff --git a/simd/jsimd_mips_dspr2.S b/simd/jsimd_mips_dspr2.S
index d478a6d..198c349 100644
--- a/simd/jsimd_mips_dspr2.S
+++ b/simd/jsimd_mips_dspr2.S
@@ -2813,6 +2813,155 @@ LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass2_mips_dspr2)
END(jsimd_idct_12x12_pass2_mips_dspr2)
/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_convsamp_mips_dspr2)
+/*
+ * a0 - sample_data
+ * a1 - start_col
+ * a2 - workspace
+ */
+
+ lw t0, 0(a0)
+ li t7, 0xff80ff80
+ addu t0, t0, a1
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ lw t0, 4(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 0(a2)
+ usw t4, 4(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 8(a2)
+ usw t6, 12(a2)
+
+ lw t0, 8(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 16(a2)
+ usw t4, 20(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 24(a2)
+ usw t6, 28(a2)
+
+ lw t0, 12(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 32(a2)
+ usw t4, 36(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 40(a2)
+ usw t6, 44(a2)
+
+ lw t0, 16(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 48(a2)
+ usw t4, 52(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 56(a2)
+ usw t6, 60(a2)
+
+ lw t0, 20(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 64(a2)
+ usw t4, 68(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 72(a2)
+ usw t6, 76(a2)
+
+ lw t0, 24(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 80(a2)
+ usw t4, 84(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 88(a2)
+ usw t6, 92(a2)
+
+ lw t0, 28(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 96(a2)
+ usw t4, 100(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 104(a2)
+ usw t6, 108(a2)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 112(a2)
+ usw t4, 116(a2)
+ usw t5, 120(a2)
+ usw t6, 124(a2)
+
+ j ra
+ nop
+
+END(jsimd_convsamp_mips_dspr2)
+
+/*****************************************************************************/
LEAF_MIPS_DSPR2(jsimd_convsamp_float_mips_dspr2)
/*
* a0 - sample_data