summaryrefslogtreecommitdiff
path: root/trunk/simd
diff options
context:
space:
mode:
authordcommander <dcommander@3789f03b-4d11-0410-bbf8-ca57d06f2519>2011-12-01 10:58:36 +0000
committerdcommander <dcommander@3789f03b-4d11-0410-bbf8-ca57d06f2519>2011-12-01 10:58:36 +0000
commitae1fe535192eb0320b54ee93678b7db137b9994b (patch)
tree0d5faa5cd11624a584bf62a05f71766b3ce65623 /trunk/simd
parent0848d4643e873096ffd2df5270a15228c9e30bd2 (diff)
Fixed non-fatal out-of-bounds read in SSE2 SIMD code reported by valgrind when decompressing a JPEG image to a bitmap buffer whose size was not a multiple of 16 bytes.
git-svn-id: https://libjpeg-turbo.svn.sourceforge.net/svnroot/libjpeg-turbo@727 3789f03b-4d11-0410-bbf8-ca57d06f2519
Diffstat (limited to 'trunk/simd')
-rw-r--r--trunk/simd/jdclrss2-64.asm53
-rw-r--r--trunk/simd/jdclrss2.asm53
-rw-r--r--trunk/simd/jdmrgss2-64.asm53
-rw-r--r--trunk/simd/jdmrgss2.asm53
-rw-r--r--trunk/simd/jsimdext.inc2
5 files changed, 214 insertions, 0 deletions
diff --git a/trunk/simd/jdclrss2-64.asm b/trunk/simd/jdclrss2-64.asm
index fdb33a3..696a383 100644
--- a/trunk/simd/jdclrss2-64.asm
+++ b/trunk/simd/jdclrss2-64.asm
@@ -290,6 +290,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
movdqa xmmA,xmmD
sub rcx, byte SIZEOF_XMMWORD
.column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+ ; Store the lower 8 bytes of xmmA to the output when it has enough
+ ; space.
+ cmp rcx, byte SIZEOF_MMWORD
+ jb short .column_st7
+ movq MMWORD [rdi], xmmA
+ add rdi, byte SIZEOF_MMWORD
+ sub rcx, byte SIZEOF_MMWORD
+ psrldq xmmA, SIZEOF_MMWORD
+.column_st7:
+ ; Store the lower 4 bytes of xmmA to the output when it has enough
+ ; space.
+ cmp rcx, byte SIZEOF_DWORD
+ jb short .column_st3
+ movd DWORD [rdi], xmmA
+ add rdi, byte SIZEOF_DWORD
+ sub rcx, byte SIZEOF_DWORD
+ psrldq xmmA, SIZEOF_DWORD
+.column_st3:
+ ; Store the lower 2 bytes of rax to the output when it has enough
+ ; space.
+ movd eax, xmmA
+ cmp rcx, byte SIZEOF_WORD
+ jb short .column_st1
+ mov WORD [rdi], ax
+ add rdi, byte SIZEOF_WORD
+ sub rcx, byte SIZEOF_WORD
+ shr rax, 16
+.column_st1:
+ ; Store the lower 1 byte of rax to the output when it has enough
+ ; space.
+ test rcx, rcx
+ jz short .nextrow
+ mov BYTE [rdi], al
+%else
mov rax,rcx
xor rcx, byte 0x0F
shl rcx, 2
@@ -329,6 +364,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
por xmmE,xmmC
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------
%else ; RGB_PIXELSIZE == 4 ; -----------
@@ -413,6 +449,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
movdqa xmmA,xmmD
sub rcx, byte SIZEOF_XMMWORD/4
.column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+ ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+ ; space.
+ cmp rcx, byte SIZEOF_XMMWORD/8
+ jb short .column_st7
+ movq MMWORD [rdi], xmmA
+ add rdi, byte SIZEOF_XMMWORD/8*4
+ sub rcx, byte SIZEOF_XMMWORD/8
+ psrldq xmmA, SIZEOF_XMMWORD/8*4
+.column_st7:
+ ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+ ; space.
+ test rcx, rcx
+ jz short .nextrow
+ movd DWORD [rdi], xmmA
+%else
cmp rcx, byte SIZEOF_XMMWORD/16
jb near .nextrow
mov rax,rcx
@@ -452,6 +504,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
por xmmE,xmmG
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------
%endif ; RGB_PIXELSIZE ; ---------------
diff --git a/trunk/simd/jdclrss2.asm b/trunk/simd/jdclrss2.asm
index 3059d7d..7f519e6 100644
--- a/trunk/simd/jdclrss2.asm
+++ b/trunk/simd/jdclrss2.asm
@@ -302,6 +302,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
movdqa xmmA,xmmD
sub ecx, byte SIZEOF_XMMWORD
.column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+ ; Store the lower 8 bytes of xmmA to the output when it has enough
+ ; space.
+ cmp ecx, byte SIZEOF_MMWORD
+ jb short .column_st7
+ movq MMWORD [edi], xmmA
+ add edi, byte SIZEOF_MMWORD
+ sub ecx, byte SIZEOF_MMWORD
+ psrldq xmmA, SIZEOF_MMWORD
+.column_st7:
+ ; Store the lower 4 bytes of xmmA to the output when it has enough
+ ; space.
+ cmp ecx, byte SIZEOF_DWORD
+ jb short .column_st3
+ movd DWORD [edi], xmmA
+ add edi, byte SIZEOF_DWORD
+ sub ecx, byte SIZEOF_DWORD
+ psrldq xmmA, SIZEOF_DWORD
+.column_st3:
+ ; Store the lower 2 bytes of eax to the output when it has enough
+ ; space.
+ movd eax, xmmA
+ cmp ecx, byte SIZEOF_WORD
+ jb short .column_st1
+ mov WORD [edi], ax
+ add edi, byte SIZEOF_WORD
+ sub ecx, byte SIZEOF_WORD
+ shr eax, 16
+.column_st1:
+ ; Store the lower 1 byte of eax to the output when it has enough
+ ; space.
+ test ecx, ecx
+ jz short .nextrow
+ mov BYTE [edi], al
+%else
mov eax,ecx
xor ecx, byte 0x0F
shl ecx, 2
@@ -341,6 +376,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
por xmmE,xmmC
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------
%else ; RGB_PIXELSIZE == 4 ; -----------
@@ -426,6 +462,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
movdqa xmmA,xmmD
sub ecx, byte SIZEOF_XMMWORD/4
.column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+ ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+ ; space.
+ cmp ecx, byte SIZEOF_XMMWORD/8
+ jb short .column_st7
+ movq MMWORD [edi], xmmA
+ add edi, byte SIZEOF_XMMWORD/8*4
+ sub ecx, byte SIZEOF_XMMWORD/8
+ psrldq xmmA, SIZEOF_XMMWORD/8*4
+.column_st7:
+ ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+ ; space.
+ test ecx, ecx
+ jz short .nextrow
+ movd DWORD [edi], xmmA
+%else
cmp ecx, byte SIZEOF_XMMWORD/16
jb short .nextrow
mov eax,ecx
@@ -465,6 +517,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
por xmmE,xmmG
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------
%endif ; RGB_PIXELSIZE ; ---------------
diff --git a/trunk/simd/jdmrgss2-64.asm b/trunk/simd/jdmrgss2-64.asm
index 0c2503f..a64a6b3 100644
--- a/trunk/simd/jdmrgss2-64.asm
+++ b/trunk/simd/jdmrgss2-64.asm
@@ -294,6 +294,41 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
movdqa xmmA,xmmD
sub rcx, byte SIZEOF_XMMWORD
.column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+ ; Store the lower 8 bytes of xmmA to the output when it has enough
+ ; space.
+ cmp rcx, byte SIZEOF_MMWORD
+ jb short .column_st7
+ movq MMWORD [rdi], xmmA
+ add rdi, byte SIZEOF_MMWORD
+ sub rcx, byte SIZEOF_MMWORD
+ psrldq xmmA, SIZEOF_MMWORD
+.column_st7:
+ ; Store the lower 4 bytes of xmmA to the output when it has enough
+ ; space.
+ cmp rcx, byte SIZEOF_DWORD
+ jb short .column_st3
+ movd DWORD [rdi], xmmA
+ add rdi, byte SIZEOF_DWORD
+ sub rcx, byte SIZEOF_DWORD
+ psrldq xmmA, SIZEOF_DWORD
+.column_st3:
+ ; Store the lower 2 bytes of rax to the output when it has enough
+ ; space.
+ movd eax, xmmA
+ cmp rcx, byte SIZEOF_WORD
+ jb short .column_st1
+ mov WORD [rdi], ax
+ add rdi, byte SIZEOF_WORD
+ sub rcx, byte SIZEOF_WORD
+ shr rax, 16
+.column_st1:
+ ; Store the lower 1 byte of rax to the output when it has enough
+ ; space.
+ test rcx, rcx
+ jz short .endcolumn
+ mov BYTE [rdi], al
+%else
mov rax,rcx
xor rcx, byte 0x0F
shl rcx, 2
@@ -333,6 +368,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
por xmmE,xmmC
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------
%else ; RGB_PIXELSIZE == 4 ; -----------
@@ -420,6 +456,22 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
movdqa xmmA,xmmD
sub rcx, byte SIZEOF_XMMWORD/4
.column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+ ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+ ; space.
+ cmp rcx, byte SIZEOF_XMMWORD/8
+ jb short .column_st7
+ movq MMWORD [rdi], xmmA
+ add rdi, byte SIZEOF_XMMWORD/8*4
+ sub rcx, byte SIZEOF_XMMWORD/8
+ psrldq xmmA, SIZEOF_XMMWORD/8*4
+.column_st7:
+ ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+ ; space.
+ test rcx, rcx
+ jz short .endcolumn
+ movd DWORD [rdi], xmmA
+%else
cmp rcx, byte SIZEOF_XMMWORD/16
jb near .endcolumn
mov rax,rcx
@@ -459,6 +511,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
por xmmE,xmmG
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------
%endif ; RGB_PIXELSIZE ; ---------------
diff --git a/trunk/simd/jdmrgss2.asm b/trunk/simd/jdmrgss2.asm
index 368ac3c..04089aa 100644
--- a/trunk/simd/jdmrgss2.asm
+++ b/trunk/simd/jdmrgss2.asm
@@ -307,6 +307,41 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
movdqa xmmA,xmmD
sub ecx, byte SIZEOF_XMMWORD
.column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+ ; Store the lower 8 bytes of xmmA to the output when it has enough
+ ; space.
+ cmp ecx, byte SIZEOF_MMWORD
+ jb short .column_st7
+ movq MMWORD [edi], xmmA
+ add edi, byte SIZEOF_MMWORD
+ sub ecx, byte SIZEOF_MMWORD
+ psrldq xmmA, SIZEOF_MMWORD
+.column_st7:
+ ; Store the lower 4 bytes of xmmA to the output when it has enough
+ ; space.
+ cmp ecx, byte SIZEOF_DWORD
+ jb short .column_st3
+ movd DWORD [edi], xmmA
+ add edi, byte SIZEOF_DWORD
+ sub ecx, byte SIZEOF_DWORD
+ psrldq xmmA, SIZEOF_DWORD
+.column_st3:
+ ; Store the lower 2 bytes of eax to the output when it has enough
+ ; space.
+ movd eax, xmmA
+ cmp ecx, byte SIZEOF_WORD
+ jb short .column_st1
+ mov WORD [edi], ax
+ add edi, byte SIZEOF_WORD
+ sub ecx, byte SIZEOF_WORD
+ shr eax, 16
+.column_st1:
+ ; Store the lower 1 byte of eax to the output when it has enough
+ ; space.
+ test ecx, ecx
+ jz short .endcolumn
+ mov BYTE [edi], al
+%else
mov eax,ecx
xor ecx, byte 0x0F
shl ecx, 2
@@ -346,6 +381,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
por xmmE,xmmC
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------
%else ; RGB_PIXELSIZE == 4 ; -----------
@@ -434,6 +470,22 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
movdqa xmmA,xmmD
sub ecx, byte SIZEOF_XMMWORD/4
.column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+ ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+ ; space.
+ cmp ecx, byte SIZEOF_XMMWORD/8
+ jb short .column_st7
+ movq MMWORD [edi], xmmA
+ add edi, byte SIZEOF_XMMWORD/2
+ sub ecx, byte SIZEOF_XMMWORD/8
+ psrldq xmmA, 64
+.column_st7:
+ ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+ ; space.
+ test ecx, ecx
+ jz short .endcolumn
+ movd DWORD [edi], xmmA
+%else
cmp ecx, byte SIZEOF_XMMWORD/16
jb short .endcolumn
mov eax,ecx
@@ -473,6 +525,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
por xmmE,xmmG
.adj0: ; ----------------
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------
%endif ; RGB_PIXELSIZE ; ---------------
diff --git a/trunk/simd/jsimdext.inc b/trunk/simd/jsimdext.inc
index 635a931..4ab9bc0 100644
--- a/trunk/simd/jsimdext.inc
+++ b/trunk/simd/jsimdext.inc
@@ -86,6 +86,8 @@ section .note.GNU-stack noalloc noexec nowrite progbits
%define SEG_CONST .rodata progbits alloc noexec nowrite align=16
%endif
+%define STRICT_MEMORY_ACCESS 1
+
; To make the code position-independent, append -DPIC to the commandline
;
%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC