From 816ca5ad0395080597699f860e9fbfab4d70d312 Mon Sep 17 00:00:00 2001 From: dcommander Date: Thu, 1 Dec 2011 10:58:36 +0000 Subject: Fixed non-fatal out-of-bounds read in SSE2 SIMD code reported by valgrind when decompressing a JPEG image to a bitmap buffer whose size was not a multiple of 16 bytes. git-svn-id: svn://svn.code.sf.net/p/libjpeg-turbo/code/trunk@727 632fc199-4ca6-4c93-a231-07263d6284db --- ChangeLog.txt | 6 ++++++ simd/jdclrss2-64.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ simd/jdclrss2.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ simd/jdmrgss2-64.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ simd/jdmrgss2.asm | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ simd/jsimdext.inc | 2 ++ 6 files changed, 220 insertions(+) diff --git a/ChangeLog.txt b/ChangeLog.txt index ddd7bd5..be091dd 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -5,6 +5,12 @@ was not adding the current directory to the assembler include path, so YASM was not able to find jsimdcfg.inc.) +[2] Fixed out-of-bounds read in SSE2 SIMD code that occurred when decompressing +a JPEG image to a bitmap buffer whose size was not a multiple of 16 bytes. +This was more of an annoyance than an actual bug, since it did not cause any +actual run-time problems, but the issue showed up when running libjpeg-turbo in +valgrind. See http://crbug.com/72399 for more information. + 1.1.90 (1.2 beta1) ================== diff --git a/simd/jdclrss2-64.asm b/simd/jdclrss2-64.asm index fdb33a3..696a383 100644 --- a/simd/jdclrss2-64.asm +++ b/simd/jdclrss2-64.asm @@ -290,6 +290,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + mov BYTE [rdi], al +%else mov rax,rcx xor rcx, byte 0x0F shl rcx, 2 @@ -329,6 +364,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -413,6 +449,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .nextrow + movd DWORD [rdi], xmmA +%else cmp rcx, byte SIZEOF_XMMWORD/16 jb near .nextrow mov rax,rcx @@ -452,6 +504,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdclrss2.asm b/simd/jdclrss2.asm index 3059d7d..7f519e6 100644 --- a/simd/jdclrss2.asm +++ b/simd/jdclrss2.asm @@ -302,6 +302,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + mov BYTE [edi], al +%else mov eax,ecx xor ecx, byte 0x0F shl ecx, 2 @@ -341,6 +376,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -426,6 +462,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/8*4 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .nextrow + movd DWORD [edi], xmmA +%else cmp ecx, byte SIZEOF_XMMWORD/16 jb short .nextrow mov eax,ecx @@ -465,6 +517,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdmrgss2-64.asm b/simd/jdmrgss2-64.asm index 0c2503f..a64a6b3 100644 --- a/simd/jdmrgss2-64.asm +++ b/simd/jdmrgss2-64.asm @@ -294,6 +294,41 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_MMWORD + sub rcx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [rdi], xmmA + add rdi, byte SIZEOF_DWORD + sub rcx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of rax to the output when it has enough + ; space. + movd eax, xmmA + cmp rcx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [rdi], ax + add rdi, byte SIZEOF_WORD + sub rcx, byte SIZEOF_WORD + shr rax, 16 +.column_st1: + ; Store the lower 1 byte of rax to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + mov BYTE [rdi], al +%else mov rax,rcx xor rcx, byte 0x0F shl rcx, 2 @@ -333,6 +368,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -420,6 +456,22 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub rcx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp rcx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [rdi], xmmA + add rdi, byte SIZEOF_XMMWORD/8*4 + sub rcx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, SIZEOF_XMMWORD/8*4 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test rcx, rcx + jz short .endcolumn + movd DWORD [rdi], xmmA +%else cmp rcx, byte SIZEOF_XMMWORD/16 jb near .endcolumn mov rax,rcx @@ -459,6 +511,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jdmrgss2.asm b/simd/jdmrgss2.asm index 368ac3c..04089aa 100644 --- a/simd/jdmrgss2.asm +++ b/simd/jdmrgss2.asm @@ -307,6 +307,41 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store the lower 8 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_MMWORD + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_MMWORD + sub ecx, byte SIZEOF_MMWORD + psrldq xmmA, SIZEOF_MMWORD +.column_st7: + ; Store the lower 4 bytes of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_DWORD + jb short .column_st3 + movd DWORD [edi], xmmA + add edi, byte SIZEOF_DWORD + sub ecx, byte SIZEOF_DWORD + psrldq xmmA, SIZEOF_DWORD +.column_st3: + ; Store the lower 2 bytes of eax to the output when it has enough + ; space. + movd eax, xmmA + cmp ecx, byte SIZEOF_WORD + jb short .column_st1 + mov WORD [edi], ax + add edi, byte SIZEOF_WORD + sub ecx, byte SIZEOF_WORD + shr eax, 16 +.column_st1: + ; Store the lower 1 byte of eax to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + mov BYTE [edi], al +%else mov eax,ecx xor ecx, byte 0x0F shl ecx, 2 @@ -346,6 +381,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmC .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %else ; RGB_PIXELSIZE == 4 ; ----------- @@ -434,6 +470,22 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): movdqa xmmA,xmmD sub ecx, byte SIZEOF_XMMWORD/4 .column_st15: +%ifdef STRICT_MEMORY_ACCESS + ; Store two pixels (8 bytes) of xmmA to the output when it has enough + ; space. + cmp ecx, byte SIZEOF_XMMWORD/8 + jb short .column_st7 + movq MMWORD [edi], xmmA + add edi, byte SIZEOF_XMMWORD/2 + sub ecx, byte SIZEOF_XMMWORD/8 + psrldq xmmA, 64 +.column_st7: + ; Store one pixel (4 bytes) of xmmA to the output when it has enough + ; space. + test ecx, ecx + jz short .endcolumn + movd DWORD [edi], xmmA +%else cmp ecx, byte SIZEOF_XMMWORD/16 jb short .endcolumn mov eax,ecx @@ -473,6 +525,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): por xmmE,xmmG .adj0: ; ---------------- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA +%endif ; STRICT_MEMORY_ACCESS ; --------------- %endif ; RGB_PIXELSIZE ; --------------- diff --git a/simd/jsimdext.inc b/simd/jsimdext.inc index 635a931..4ab9bc0 100644 --- a/simd/jsimdext.inc +++ b/simd/jsimdext.inc @@ -86,6 +86,8 @@ section .note.GNU-stack noalloc noexec nowrite progbits %define SEG_CONST .rodata progbits alloc noexec nowrite align=16 %endif +%define STRICT_MEMORY_ACCESS 1 + ; To make the code position-independent, append -DPIC to the commandline ; %define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC -- cgit v1.2.3