aboutsummaryrefslogtreecommitdiff
path: root/simd
diff options
context:
space:
mode:
authorossman_ <ossman_@632fc199-4ca6-4c93-a231-07263d6284db>2009-06-29 12:58:48 +0000
committerossman_ <ossman_@632fc199-4ca6-4c93-a231-07263d6284db>2009-06-29 12:58:48 +0000
commitc317ab0f3286f1014a0dae41ff356a96bba15405 (patch)
treeca83ba7c502f5c11c189e7ec0f00d932871a0c62 /simd
parent90a51a655bf579de7ec8eff9383421ac3e696eba (diff)
Make x86_64 SIMD code PIC friendly
Use RIP relative addressing as that works in both PIC and non-PIC mode. git-svn-id: svn://svn.code.sf.net/p/libjpeg-turbo/code/trunk@51 632fc199-4ca6-4c93-a231-07263d6284db
Diffstat (limited to 'simd')
-rw-r--r--simd/jcclrss2-64.asm44
-rw-r--r--simd/jdclrss2-64.asm32
-rw-r--r--simd/jdmrgss2-64.asm32
-rw-r--r--simd/jdsamss2-64.asm44
-rw-r--r--simd/jfss2fst-64.asm20
-rw-r--r--simd/jfss2int-64.asm116
-rw-r--r--simd/jfsseflt-64.asm20
-rw-r--r--simd/jiss2flt-64.asm26
-rw-r--r--simd/jiss2fst-64.asm22
-rw-r--r--simd/jiss2int-64.asm82
-rw-r--r--simd/jiss2red-64.asm56
11 files changed, 247 insertions, 247 deletions
diff --git a/simd/jcclrss2-64.asm b/simd/jcclrss2-64.asm
index 9900edd..ca7e24a 100644
--- a/simd/jcclrss2-64.asm
+++ b/simd/jcclrss2-64.asm
@@ -302,10 +302,10 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
punpckhwd xmm6,xmm3
movdqa xmm7,xmm1
movdqa xmm4,xmm6
- pmaddwd xmm1,[PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
- pmaddwd xmm6,[PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
- pmaddwd xmm7,[PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
- pmaddwd xmm4,[PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
+ pmaddwd xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+ pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+ pmaddwd xmm7,[rel PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
+ pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
@@ -317,7 +317,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
psrld xmm1,1 ; xmm1=BOL*FIX(0.500)
psrld xmm6,1 ; xmm6=BOH*FIX(0.500)
- movdqa xmm5,[PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ]
+ movdqa xmm5,[rel PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ]
paddd xmm7,xmm1
paddd xmm4,xmm6
@@ -334,10 +334,10 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
punpckhwd xmm6,xmm2
movdqa xmm5,xmm0
movdqa xmm4,xmm6
- pmaddwd xmm0,[PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
- pmaddwd xmm6,[PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
- pmaddwd xmm5,[PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
- pmaddwd xmm4,[PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
+ pmaddwd xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+ pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+ pmaddwd xmm5,[rel PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
+ pmaddwd xmm4,[rel PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
@@ -349,7 +349,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
psrld xmm0,1 ; xmm0=BEL*FIX(0.500)
psrld xmm6,1 ; xmm6=BEH*FIX(0.500)
- movdqa xmm1,[PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
+ movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
paddd xmm5,xmm0
paddd xmm4,xmm6
@@ -372,12 +372,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
punpckhwd xmm4,xmm3
movdqa xmm7,xmm0
movdqa xmm5,xmm4
- pmaddwd xmm0,[PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
- pmaddwd xmm4,[PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
- pmaddwd xmm7,[PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
- pmaddwd xmm5,[PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
+ pmaddwd xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+ pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+ pmaddwd xmm7,[rel PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
+ pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
- movdqa xmm3,[PD_ONEHALF] ; xmm3=[PD_ONEHALF]
+ movdqa xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF]
paddd xmm0, XMMWORD [wk(4)]
paddd xmm4, XMMWORD [wk(5)]
@@ -394,7 +394,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
psrld xmm3,1 ; xmm3=ROL*FIX(0.500)
psrld xmm4,1 ; xmm4=ROH*FIX(0.500)
- movdqa xmm1,[PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
+ movdqa xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
paddd xmm7,xmm3
paddd xmm5,xmm4
@@ -411,12 +411,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
punpckhwd xmm4,xmm2
movdqa xmm1,xmm6
movdqa xmm5,xmm4
- pmaddwd xmm6,[PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
- pmaddwd xmm4,[PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
- pmaddwd xmm1,[PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
- pmaddwd xmm5,[PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
+ pmaddwd xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+ pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+ pmaddwd xmm1,[rel PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
+ pmaddwd xmm5,[rel PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
- movdqa xmm2,[PD_ONEHALF] ; xmm2=[PD_ONEHALF]
+ movdqa xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF]
paddd xmm6, XMMWORD [wk(6)]
paddd xmm4, XMMWORD [wk(7)]
@@ -437,7 +437,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
psrld xmm2,1 ; xmm2=REL*FIX(0.500)
psrld xmm4,1 ; xmm4=REH*FIX(0.500)
- movdqa xmm0,[PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ]
+ movdqa xmm0,[rel PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ]
paddd xmm1,xmm2
paddd xmm5,xmm4
diff --git a/simd/jdclrss2-64.asm b/simd/jdclrss2-64.asm
index 0808eca..c3c1b07 100644
--- a/simd/jdclrss2-64.asm
+++ b/simd/jdclrss2-64.asm
@@ -127,17 +127,17 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
paddw xmm0,xmm0 ; xmm0=2*CrE
paddw xmm1,xmm1 ; xmm1=2*CrO
- pmulhw xmm4,[PW_MF0228] ; xmm4=(2*CbE * -FIX(0.22800))
- pmulhw xmm5,[PW_MF0228] ; xmm5=(2*CbO * -FIX(0.22800))
- pmulhw xmm0,[PW_F0402] ; xmm0=(2*CrE * FIX(0.40200))
- pmulhw xmm1,[PW_F0402] ; xmm1=(2*CrO * FIX(0.40200))
+ pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbE * -FIX(0.22800))
+ pmulhw xmm5,[rel PW_MF0228] ; xmm5=(2*CbO * -FIX(0.22800))
+ pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrE * FIX(0.40200))
+ pmulhw xmm1,[rel PW_F0402] ; xmm1=(2*CrO * FIX(0.40200))
- paddw xmm4,[PW_ONE]
- paddw xmm5,[PW_ONE]
+ paddw xmm4,[rel PW_ONE]
+ paddw xmm5,[rel PW_ONE]
psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800))
psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800))
- paddw xmm0,[PW_ONE]
- paddw xmm1,[PW_ONE]
+ paddw xmm0,[rel PW_ONE]
+ paddw xmm1,[rel PW_ONE]
psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200))
psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200))
@@ -155,19 +155,19 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
movdqa xmm5,xmm3
punpcklwd xmm2,xmm6
punpckhwd xmm4,xmm6
- pmaddwd xmm2,[PW_MF0344_F0285]
- pmaddwd xmm4,[PW_MF0344_F0285]
+ pmaddwd xmm2,[rel PW_MF0344_F0285]
+ pmaddwd xmm4,[rel PW_MF0344_F0285]
punpcklwd xmm3,xmm7
punpckhwd xmm5,xmm7
- pmaddwd xmm3,[PW_MF0344_F0285]
- pmaddwd xmm5,[PW_MF0344_F0285]
+ pmaddwd xmm3,[rel PW_MF0344_F0285]
+ pmaddwd xmm5,[rel PW_MF0344_F0285]
- paddd xmm2,[PD_ONEHALF]
- paddd xmm4,[PD_ONEHALF]
+ paddd xmm2,[rel PD_ONEHALF]
+ paddd xmm4,[rel PD_ONEHALF]
psrad xmm2,SCALEBITS
psrad xmm4,SCALEBITS
- paddd xmm3,[PD_ONEHALF]
- paddd xmm5,[PD_ONEHALF]
+ paddd xmm3,[rel PD_ONEHALF]
+ paddd xmm5,[rel PD_ONEHALF]
psrad xmm3,SCALEBITS
psrad xmm5,SCALEBITS
diff --git a/simd/jdmrgss2-64.asm b/simd/jdmrgss2-64.asm
index 6474f43..6329f95 100644
--- a/simd/jdmrgss2-64.asm
+++ b/simd/jdmrgss2-64.asm
@@ -113,17 +113,17 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
paddw xmm7,xmm7 ; xmm7=2*CrH
paddw xmm0,xmm0 ; xmm0=2*CrL
- pmulhw xmm6,[PW_MF0228] ; xmm6=(2*CbH * -FIX(0.22800))
- pmulhw xmm4,[PW_MF0228] ; xmm4=(2*CbL * -FIX(0.22800))
- pmulhw xmm7,[PW_F0402] ; xmm7=(2*CrH * FIX(0.40200))
- pmulhw xmm0,[PW_F0402] ; xmm0=(2*CrL * FIX(0.40200))
+ pmulhw xmm6,[rel PW_MF0228] ; xmm6=(2*CbH * -FIX(0.22800))
+ pmulhw xmm4,[rel PW_MF0228] ; xmm4=(2*CbL * -FIX(0.22800))
+ pmulhw xmm7,[rel PW_F0402] ; xmm7=(2*CrH * FIX(0.40200))
+ pmulhw xmm0,[rel PW_F0402] ; xmm0=(2*CrL * FIX(0.40200))
- paddw xmm6,[PW_ONE]
- paddw xmm4,[PW_ONE]
+ paddw xmm6,[rel PW_ONE]
+ paddw xmm4,[rel PW_ONE]
psraw xmm6,1 ; xmm6=(CbH * -FIX(0.22800))
psraw xmm4,1 ; xmm4=(CbL * -FIX(0.22800))
- paddw xmm7,[PW_ONE]
- paddw xmm0,[PW_ONE]
+ paddw xmm7,[rel PW_ONE]
+ paddw xmm0,[rel PW_ONE]
psraw xmm7,1 ; xmm7=(CrH * FIX(0.40200))
psraw xmm0,1 ; xmm0=(CrL * FIX(0.40200))
@@ -141,19 +141,19 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
movdqa xmm7,xmm2
punpcklwd xmm5,xmm1
punpckhwd xmm6,xmm1
- pmaddwd xmm5,[PW_MF0344_F0285]
- pmaddwd xmm6,[PW_MF0344_F0285]
+ pmaddwd xmm5,[rel PW_MF0344_F0285]
+ pmaddwd xmm6,[rel PW_MF0344_F0285]
punpcklwd xmm2,xmm3
punpckhwd xmm7,xmm3
- pmaddwd xmm2,[PW_MF0344_F0285]
- pmaddwd xmm7,[PW_MF0344_F0285]
+ pmaddwd xmm2,[rel PW_MF0344_F0285]
+ pmaddwd xmm7,[rel PW_MF0344_F0285]
- paddd xmm5,[PD_ONEHALF]
- paddd xmm6,[PD_ONEHALF]
+ paddd xmm5,[rel PD_ONEHALF]
+ paddd xmm6,[rel PD_ONEHALF]
psrad xmm5,SCALEBITS
psrad xmm6,SCALEBITS
- paddd xmm2,[PD_ONEHALF]
- paddd xmm7,[PD_ONEHALF]
+ paddd xmm2,[rel PD_ONEHALF]
+ paddd xmm7,[rel PD_ONEHALF]
psrad xmm2,SCALEBITS
psrad xmm7,SCALEBITS
diff --git a/simd/jdsamss2-64.asm b/simd/jdsamss2-64.asm
index 1f7b1f5..7df283e 100644
--- a/simd/jdsamss2-64.asm
+++ b/simd/jdsamss2-64.asm
@@ -133,12 +133,12 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
punpcklbw xmm3,xmm0 ; xmm3=( 1 2 3 4 5 6 7 8)
punpckhbw xmm6,xmm0 ; xmm6=( 9 10 11 12 13 14 15 16)
- pmullw xmm1,[PW_THREE]
- pmullw xmm4,[PW_THREE]
- paddw xmm2,[PW_ONE]
- paddw xmm5,[PW_ONE]
- paddw xmm3,[PW_TWO]
- paddw xmm6,[PW_TWO]
+ pmullw xmm1,[rel PW_THREE]
+ pmullw xmm4,[rel PW_THREE]
+ paddw xmm2,[rel PW_ONE]
+ paddw xmm5,[rel PW_ONE]
+ paddw xmm3,[rel PW_TWO]
+ paddw xmm6,[rel PW_TWO]
paddw xmm2,xmm1
paddw xmm5,xmm4
@@ -264,8 +264,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7)
punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15)
- pmullw xmm0,[PW_THREE]
- pmullw xmm4,[PW_THREE]
+ pmullw xmm0,[rel PW_THREE]
+ pmullw xmm4,[rel PW_THREE]
pcmpeqb xmm7,xmm7
psrldq xmm7,(SIZEOF_XMMWORD-2)
@@ -324,8 +324,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
punpcklbw xmm2,xmm3 ; xmm2=row[+1]( 0 1 2 3 4 5 6 7)
punpckhbw xmm6,xmm3 ; xmm6=row[+1]( 8 9 10 11 12 13 14 15)
- pmullw xmm0,[PW_THREE]
- pmullw xmm4,[PW_THREE]
+ pmullw xmm0,[rel PW_THREE]
+ pmullw xmm4,[rel PW_THREE]
paddw xmm1,xmm0 ; xmm1=Int0L=( 0 1 2 3 4 5 6 7)
paddw xmm5,xmm4 ; xmm5=Int0H=( 8 9 10 11 12 13 14 15)
@@ -373,12 +373,12 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [wk(0)], xmm4
- pmullw xmm7,[PW_THREE]
- pmullw xmm3,[PW_THREE]
- paddw xmm1,[PW_EIGHT]
- paddw xmm5,[PW_EIGHT]
- paddw xmm0,[PW_SEVEN]
- paddw xmm2,[PW_SEVEN]
+ pmullw xmm7,[rel PW_THREE]
+ pmullw xmm3,[rel PW_THREE]
+ paddw xmm1,[rel PW_EIGHT]
+ paddw xmm5,[rel PW_EIGHT]
+ paddw xmm0,[rel PW_SEVEN]
+ paddw xmm2,[rel PW_SEVEN]
paddw xmm1,xmm7
paddw xmm5,xmm3
@@ -426,12 +426,12 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [wk(1)], xmm3
- pmullw xmm6,[PW_THREE]
- pmullw xmm4,[PW_THREE]
- paddw xmm1,[PW_EIGHT]
- paddw xmm0,[PW_EIGHT]
- paddw xmm7,[PW_SEVEN]
- paddw xmm5,[PW_SEVEN]
+ pmullw xmm6,[rel PW_THREE]
+ pmullw xmm4,[rel PW_THREE]
+ paddw xmm1,[rel PW_EIGHT]
+ paddw xmm0,[rel PW_EIGHT]
+ paddw xmm7,[rel PW_SEVEN]
+ paddw xmm5,[rel PW_SEVEN]
paddw xmm1,xmm6
paddw xmm0,xmm4
diff --git a/simd/jfss2fst-64.asm b/simd/jfss2fst-64.asm
index 9303156..b6a06bc 100644
--- a/simd/jfss2fst-64.asm
+++ b/simd/jfss2fst-64.asm
@@ -192,7 +192,7 @@ EXTN(jsimd_fdct_ifast_sse2):
paddw xmm6,xmm3
psllw xmm6,PRE_MULTIPLY_SCALE_BITS
- pmulhw xmm6,[PW_F0707] ; xmm6=z1
+ pmulhw xmm6,[rel PW_F0707] ; xmm6=z1
movdqa xmm1,xmm4
movdqa xmm7,xmm3
@@ -216,13 +216,13 @@ EXTN(jsimd_fdct_ifast_sse2):
psllw xmm0,PRE_MULTIPLY_SCALE_BITS
psllw xmm5,PRE_MULTIPLY_SCALE_BITS
- pmulhw xmm5,[PW_F0707] ; xmm5=z3
+ pmulhw xmm5,[rel PW_F0707] ; xmm5=z3
movdqa xmm4,xmm2 ; xmm4=tmp10
psubw xmm2,xmm0
- pmulhw xmm2,[PW_F0382] ; xmm2=z5
- pmulhw xmm4,[PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
- pmulhw xmm0,[PW_F1306] ; xmm0=MULTIPLY(tmp12,FIX_1_306562)
+ pmulhw xmm2,[rel PW_F0382] ; xmm2=z5
+ pmulhw xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
+ pmulhw xmm0,[rel PW_F1306] ; xmm0=MULTIPLY(tmp12,FIX_1_306562)
paddw xmm4,xmm2 ; xmm4=z2
paddw xmm0,xmm2 ; xmm0=z4
@@ -328,7 +328,7 @@ EXTN(jsimd_fdct_ifast_sse2):
paddw xmm5,xmm3
psllw xmm5,PRE_MULTIPLY_SCALE_BITS
- pmulhw xmm5,[PW_F0707] ; xmm5=z1
+ pmulhw xmm5,[rel PW_F0707] ; xmm5=z1
movdqa xmm6,xmm4
movdqa xmm2,xmm3
@@ -355,13 +355,13 @@ EXTN(jsimd_fdct_ifast_sse2):
psllw xmm1,PRE_MULTIPLY_SCALE_BITS
psllw xmm0,PRE_MULTIPLY_SCALE_BITS
- pmulhw xmm0,[PW_F0707] ; xmm0=z3
+ pmulhw xmm0,[rel PW_F0707] ; xmm0=z3
movdqa xmm4,xmm7 ; xmm4=tmp10
psubw xmm7,xmm1
- pmulhw xmm7,[PW_F0382] ; xmm7=z5
- pmulhw xmm4,[PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
- pmulhw xmm1,[PW_F1306] ; xmm1=MULTIPLY(tmp12,FIX_1_306562)
+ pmulhw xmm7,[rel PW_F0382] ; xmm7=z5
+ pmulhw xmm4,[rel PW_F0541] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
+ pmulhw xmm1,[rel PW_F1306] ; xmm1=MULTIPLY(tmp12,FIX_1_306562)
paddw xmm4,xmm7 ; xmm4=z2
paddw xmm1,xmm7 ; xmm1=z4
diff --git a/simd/jfss2int-64.asm b/simd/jfss2int-64.asm
index f787921..5fc4ac8 100644
--- a/simd/jfss2int-64.asm
+++ b/simd/jfss2int-64.asm
@@ -236,17 +236,17 @@ EXTN(jsimd_fdct_islow_sse2):
punpckhwd xmm6,xmm0
movdqa xmm4,xmm7
movdqa xmm0,xmm6
- pmaddwd xmm7,[PW_F130_F054] ; xmm7=data2L
- pmaddwd xmm6,[PW_F130_F054] ; xmm6=data2H
- pmaddwd xmm4,[PW_F054_MF130] ; xmm4=data6L
- pmaddwd xmm0,[PW_F054_MF130] ; xmm0=data6H
+ pmaddwd xmm7,[rel PW_F130_F054] ; xmm7=data2L
+ pmaddwd xmm6,[rel PW_F130_F054] ; xmm6=data2H
+ pmaddwd xmm4,[rel PW_F054_MF130] ; xmm4=data6L
+ pmaddwd xmm0,[rel PW_F054_MF130] ; xmm0=data6H
- paddd xmm7,[PD_DESCALE_P1]
- paddd xmm6,[PD_DESCALE_P1]
+ paddd xmm7,[rel PD_DESCALE_P1]
+ paddd xmm6,[rel PD_DESCALE_P1]
psrad xmm7,DESCALE_P1
psrad xmm6,DESCALE_P1
- paddd xmm4,[PD_DESCALE_P1]
- paddd xmm0,[PD_DESCALE_P1]
+ paddd xmm4,[rel PD_DESCALE_P1]
+ paddd xmm0,[rel PD_DESCALE_P1]
psrad xmm4,DESCALE_P1
psrad xmm0,DESCALE_P1
@@ -281,10 +281,10 @@ EXTN(jsimd_fdct_islow_sse2):
punpckhwd xmm4,xmm0
movdqa xmm6,xmm7
movdqa xmm0,xmm4
- pmaddwd xmm7,[PW_MF078_F117] ; xmm7=z3L
- pmaddwd xmm4,[PW_MF078_F117] ; xmm4=z3H
- pmaddwd xmm6,[PW_F117_F078] ; xmm6=z4L
- pmaddwd xmm0,[PW_F117_F078] ; xmm0=z4H
+ pmaddwd xmm7,[rel PW_MF078_F117] ; xmm7=z3L
+ pmaddwd xmm4,[rel PW_MF078_F117] ; xmm4=z3H
+ pmaddwd xmm6,[rel PW_F117_F078] ; xmm6=z4L
+ pmaddwd xmm0,[rel PW_F117_F078] ; xmm0=z4H
movdqa XMMWORD [wk(0)], xmm7 ; wk(0)=z3L
movdqa XMMWORD [wk(1)], xmm4 ; wk(1)=z3H
@@ -311,22 +311,22 @@ EXTN(jsimd_fdct_islow_sse2):
punpckhwd xmm4,xmm1
movdqa xmm2,xmm7
movdqa xmm1,xmm4
- pmaddwd xmm7,[PW_MF060_MF089] ; xmm7=tmp4L
- pmaddwd xmm4,[PW_MF060_MF089] ; xmm4=tmp4H
- pmaddwd xmm2,[PW_MF089_F060] ; xmm2=tmp7L
- pmaddwd xmm1,[PW_MF089_F060] ; xmm1=tmp7H
+ pmaddwd xmm7,[rel PW_MF060_MF089] ; xmm7=tmp4L
+ pmaddwd xmm4,[rel PW_MF060_MF089] ; xmm4=tmp4H
+ pmaddwd xmm2,[rel PW_MF089_F060] ; xmm2=tmp7L
+ pmaddwd xmm1,[rel PW_MF089_F060] ; xmm1=tmp7H
paddd xmm7, XMMWORD [wk(0)] ; xmm7=data7L
paddd xmm4, XMMWORD [wk(1)] ; xmm4=data7H
paddd xmm2,xmm6 ; xmm2=data1L
paddd xmm1,xmm0 ; xmm1=data1H
- paddd xmm7,[PD_DESCALE_P1]
- paddd xmm4,[PD_DESCALE_P1]
+ paddd xmm7,[rel PD_DESCALE_P1]
+ paddd xmm4,[rel PD_DESCALE_P1]
psrad xmm7,DESCALE_P1
psrad xmm4,DESCALE_P1
- paddd xmm2,[PD_DESCALE_P1]
- paddd xmm1,[PD_DESCALE_P1]
+ paddd xmm2,[rel PD_DESCALE_P1]
+ paddd xmm1,[rel PD_DESCALE_P1]
psrad xmm2,DESCALE_P1
psrad xmm1,DESCALE_P1
@@ -339,22 +339,22 @@ EXTN(jsimd_fdct_islow_sse2):
punpckhwd xmm1,xmm3
movdqa xmm5,xmm4
movdqa xmm3,xmm1
- pmaddwd xmm4,[PW_MF050_MF256] ; xmm4=tmp5L
- pmaddwd xmm1,[PW_MF050_MF256] ; xmm1=tmp5H
- pmaddwd xmm5,[PW_MF256_F050] ; xmm5=tmp6L
- pmaddwd xmm3,[PW_MF256_F050] ; xmm3=tmp6H
+ pmaddwd xmm4,[rel PW_MF050_MF256] ; xmm4=tmp5L
+ pmaddwd xmm1,[rel PW_MF050_MF256] ; xmm1=tmp5H
+ pmaddwd xmm5,[rel PW_MF256_F050] ; xmm5=tmp6L
+ pmaddwd xmm3,[rel PW_MF256_F050] ; xmm3=tmp6H
paddd xmm4,xmm6 ; xmm4=data5L
paddd xmm1,xmm0 ; xmm1=data5H
paddd xmm5, XMMWORD [wk(0)] ; xmm5=data3L
paddd xmm3, XMMWORD [wk(1)] ; xmm3=data3H
- paddd xmm4,[PD_DESCALE_P1]
- paddd xmm1,[PD_DESCALE_P1]
+ paddd xmm4,[rel PD_DESCALE_P1]
+ paddd xmm1,[rel PD_DESCALE_P1]
psrad xmm4,DESCALE_P1
psrad xmm1,DESCALE_P1
- paddd xmm5,[PD_DESCALE_P1]
- paddd xmm3,[PD_DESCALE_P1]
+ paddd xmm5,[rel PD_DESCALE_P1]
+ paddd xmm3,[rel PD_DESCALE_P1]
psrad xmm5,DESCALE_P1
psrad xmm3,DESCALE_P1
@@ -457,8 +457,8 @@ EXTN(jsimd_fdct_islow_sse2):
paddw xmm7,xmm2 ; xmm7=tmp10+tmp11
psubw xmm5,xmm2 ; xmm5=tmp10-tmp11
- paddw xmm7,[PW_DESCALE_P2X]
- paddw xmm5,[PW_DESCALE_P2X]
+ paddw xmm7,[rel PW_DESCALE_P2X]
+ paddw xmm5,[rel PW_DESCALE_P2X]
psraw xmm7,PASS1_BITS ; xmm7=data0
psraw xmm5,PASS1_BITS ; xmm5=data4
@@ -480,17 +480,17 @@ EXTN(jsimd_fdct_islow_sse2):
punpckhwd xmm2,xmm6
movdqa xmm1,xmm4
movdqa xmm6,xmm2
- pmaddwd xmm4,[PW_F130_F054] ; xmm4=data2L
- pmaddwd xmm2,[PW_F130_F054] ; xmm2=data2H
- pmaddwd xmm1,[PW_F054_MF130] ; xmm1=data6L
- pmaddwd xmm6,[PW_F054_MF130] ; xmm6=data6H
+ pmaddwd xmm4,[rel PW_F130_F054] ; xmm4=data2L
+ pmaddwd xmm2,[rel PW_F130_F054] ; xmm2=data2H
+ pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=data6L
+ pmaddwd xmm6,[rel PW_F054_MF130] ; xmm6=data6H
- paddd xmm4,[PD_DESCALE_P2]
- paddd xmm2,[PD_DESCALE_P2]
+ paddd xmm4,[rel PD_DESCALE_P2]
+ paddd xmm2,[rel PD_DESCALE_P2]
psrad xmm4,DESCALE_P2
psrad xmm2,DESCALE_P2
- paddd xmm1,[PD_DESCALE_P2]
- paddd xmm6,[PD_DESCALE_P2]
+ paddd xmm1,[rel PD_DESCALE_P2]
+ paddd xmm6,[rel PD_DESCALE_P2]
psrad xmm1,DESCALE_P2
psrad xmm6,DESCALE_P2
@@ -525,10 +525,10 @@ EXTN(jsimd_fdct_islow_sse2):
punpckhwd xmm1,xmm6
movdqa xmm2,xmm4
movdqa xmm6,xmm1
- pmaddwd xmm4,[PW_MF078_F117] ; xmm4=z3L
- pmaddwd xmm1,[PW_MF078_F117] ; xmm1=z3H
- pmaddwd xmm2,[PW_F117_F078] ; xmm2=z4L
- pmaddwd xmm6,[PW_F117_F078] ; xmm6=z4H
+ pmaddwd xmm4,[rel PW_MF078_F117] ; xmm4=z3L
+ pmaddwd xmm1,[rel PW_MF078_F117] ; xmm1=z3H
+ pmaddwd xmm2,[rel PW_F117_F078] ; xmm2=z4L
+ pmaddwd xmm6,[rel PW_F117_F078] ; xmm6=z4H
movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=z3L
movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=z3H
@@ -555,22 +555,22 @@ EXTN(jsimd_fdct_islow_sse2):
punpckhwd xmm1,xmm5
movdqa xmm0,xmm4
movdqa xmm5,xmm1
- pmaddwd xmm4,[PW_MF060_MF089] ; xmm4=tmp4L
- pmaddwd xmm1,[PW_MF060_MF089] ; xmm1=tmp4H
- pmaddwd xmm0,[PW_MF089_F060] ; xmm0=tmp7L
- pmaddwd xmm5,[PW_MF089_F060] ; xmm5=tmp7H
+ pmaddwd xmm4,[rel PW_MF060_MF089] ; xmm4=tmp4L
+ pmaddwd xmm1,[rel PW_MF060_MF089] ; xmm1=tmp4H
+ pmaddwd xmm0,[rel PW_MF089_F060] ; xmm0=tmp7L
+ pmaddwd xmm5,[rel PW_MF089_F060] ; xmm5=tmp7H
paddd xmm4, XMMWORD [wk(0)] ; xmm4=data7L
paddd xmm1, XMMWORD [wk(1)] ; xmm1=data7H
paddd xmm0,xmm2 ; xmm0=data1L
paddd xmm5,xmm6 ; xmm5=data1H
- paddd xmm4,[PD_DESCALE_P2]
- paddd xmm1,[PD_DESCALE_P2]
+ paddd xmm4,[rel PD_DESCALE_P2]
+ paddd xmm1,[rel PD_DESCALE_P2]
psrad xmm4,DESCALE_P2
psrad xmm1,DESCALE_P2
- paddd xmm0,[PD_DESCALE_P2]
- paddd xmm5,[PD_DESCALE_P2]
+ paddd xmm0,[rel PD_DESCALE_P2]
+ paddd xmm5,[rel PD_DESCALE_P2]
psrad xmm0,DESCALE_P2
psrad xmm5,DESCALE_P2
@@ -586,22 +586,22 @@ EXTN(jsimd_fdct_islow_sse2):
punpckhwd xmm5,xmm7
movdqa xmm3,xmm1
movdqa xmm7,xmm5
- pmaddwd xmm1,[PW_MF050_MF256] ; xmm1=tmp5L
- pmaddwd xmm5,[PW_MF050_MF256] ; xmm5=tmp5H
- pmaddwd xmm3,[PW_MF256_F050] ; xmm3=tmp6L
- pmaddwd xmm7,[PW_MF256_F050] ; xmm7=tmp6H
+ pmaddwd xmm1,[rel PW_MF050_MF256] ; xmm1=tmp5L
+ pmaddwd xmm5,[rel PW_MF050_MF256] ; xmm5=tmp5H
+ pmaddwd xmm3,[rel PW_MF256_F050] ; xmm3=tmp6L
+ pmaddwd xmm7,[rel PW_MF256_F050] ; xmm7=tmp6H
paddd xmm1,xmm2 ; xmm1=data5L
paddd xmm5,xmm6 ; xmm5=data5H
paddd xmm3, XMMWORD [wk(0)] ; xmm3=data3L
paddd xmm7, XMMWORD [wk(1)] ; xmm7=data3H
- paddd xmm1,[PD_DESCALE_P2]
- paddd xmm5,[PD_DESCALE_P2]
+ paddd xmm1,[rel PD_DESCALE_P2]
+ paddd xmm5,[rel PD_DESCALE_P2]
psrad xmm1,DESCALE_P2
psrad xmm5,DESCALE_P2
- paddd xmm3,[PD_DESCALE_P2]
- paddd xmm7,[PD_DESCALE_P2]
+ paddd xmm3,[rel PD_DESCALE_P2]
+ paddd xmm7,[rel PD_DESCALE_P2]
psrad xmm3,DESCALE_P2
psrad xmm7,DESCALE_P2
diff --git a/simd/jfsseflt-64.asm b/simd/jfsseflt-64.asm
index d8f7246..859ff54 100644
--- a/simd/jfsseflt-64.asm
+++ b/simd/jfsseflt-64.asm
@@ -159,7 +159,7 @@ EXTN(jsimd_fdct_float_sse):
addps xmm6,xmm4 ; xmm6=tmp11
addps xmm0,xmm5
- mulps xmm0,[PD_0_707] ; xmm0=z1
+ mulps xmm0,[rel PD_0_707] ; xmm0=z1
movaps xmm7,xmm1
movaps xmm4,xmm5
@@ -182,13 +182,13 @@ EXTN(jsimd_fdct_float_sse):
addps xmm3,xmm6 ; xmm3=tmp11
addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7
- mulps xmm3,[PD_0_707] ; xmm3=z3
+ mulps xmm3,[rel PD_0_707] ; xmm3=z3
movaps xmm1,xmm2 ; xmm1=tmp10
subps xmm2,xmm6
- mulps xmm2,[PD_0_382] ; xmm2=z5
- mulps xmm1,[PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
- mulps xmm6,[PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+ mulps xmm2,[rel PD_0_382] ; xmm2=z5
+ mulps xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+ mulps xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
addps xmm1,xmm2 ; xmm1=z2
addps xmm6,xmm2 ; xmm6=z4
@@ -294,7 +294,7 @@ EXTN(jsimd_fdct_float_sse):
addps xmm6,xmm4 ; xmm6=tmp11
addps xmm0,xmm5
- mulps xmm0,[PD_0_707] ; xmm0=z1
+ mulps xmm0,[rel PD_0_707] ; xmm0=z1
movaps xmm7,xmm1
movaps xmm4,xmm5
@@ -317,13 +317,13 @@ EXTN(jsimd_fdct_float_sse):
addps xmm3,xmm6 ; xmm3=tmp11
addps xmm6,xmm0 ; xmm6=tmp12, xmm0=tmp7
- mulps xmm3,[PD_0_707] ; xmm3=z3
+ mulps xmm3,[rel PD_0_707] ; xmm3=z3
movaps xmm1,xmm2 ; xmm1=tmp10
subps xmm2,xmm6
- mulps xmm2,[PD_0_382] ; xmm2=z5
- mulps xmm1,[PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
- mulps xmm6,[PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+ mulps xmm2,[rel PD_0_382] ; xmm2=z5
+ mulps xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+ mulps xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
addps xmm1,xmm2 ; xmm1=z2
addps xmm6,xmm2 ; xmm6=z4
diff --git a/simd/jiss2flt-64.asm b/simd/jiss2flt-64.asm
index 572909d..a3d4a54 100644
--- a/simd/jiss2flt-64.asm
+++ b/simd/jiss2flt-64.asm
@@ -181,7 +181,7 @@ EXTN(jsimd_idct_float_sse2):
addps xmm4,xmm2 ; xmm4=tmp10
addps xmm5,xmm3 ; xmm5=tmp13
- mulps xmm1,[PD_1_414]
+ mulps xmm1,[rel PD_1_414]
subps xmm1,xmm5 ; xmm1=tmp12
movaps xmm6,xmm4
@@ -231,13 +231,13 @@ EXTN(jsimd_idct_float_sse2):
subps xmm2,xmm5
addps xmm1,xmm5 ; xmm1=tmp7
- mulps xmm2,[PD_1_414] ; xmm2=tmp11
+ mulps xmm2,[rel PD_1_414] ; xmm2=tmp11
movaps xmm3,xmm0
addps xmm0,xmm4
- mulps xmm0,[PD_1_847] ; xmm0=z5
- mulps xmm3,[PD_M2_613] ; xmm3=(z10 * -2.613125930)
- mulps xmm4,[PD_1_082] ; xmm4=(z12 * 1.082392200)
+ mulps xmm0,[rel PD_1_847] ; xmm0=z5
+ mulps xmm3,[rel PD_M2_613] ; xmm3=(z10 * -2.613125930)
+ mulps xmm4,[rel PD_1_082] ; xmm4=(z12 * 1.082392200)
addps xmm3,xmm0 ; xmm3=tmp12
subps xmm4,xmm0 ; xmm4=tmp10
@@ -344,7 +344,7 @@ EXTN(jsimd_idct_float_sse2):
addps xmm4,xmm2 ; xmm4=tmp10
addps xmm5,xmm3 ; xmm5=tmp13
- mulps xmm1,[PD_1_414]
+ mulps xmm1,[rel PD_1_414]
subps xmm1,xmm5 ; xmm1=tmp12
movaps xmm6,xmm4
@@ -375,13 +375,13 @@ EXTN(jsimd_idct_float_sse2):
subps xmm2,xmm5
addps xmm1,xmm5 ; xmm1=tmp7
- mulps xmm2,[PD_1_414] ; xmm2=tmp11
+ mulps xmm2,[rel PD_1_414] ; xmm2=tmp11
movaps xmm3,xmm0
addps xmm0,xmm4
- mulps xmm0,[PD_1_847] ; xmm0=z5
- mulps xmm3,[PD_M2_613] ; xmm3=(z10 * -2.613125930)
- mulps xmm4,[PD_1_082] ; xmm4=(z12 * 1.082392200)
+ mulps xmm0,[rel PD_1_847] ; xmm0=z5
+ mulps xmm3,[rel PD_M2_613] ; xmm3=(z10 * -2.613125930)
+ mulps xmm4,[rel PD_1_082] ; xmm4=(z12 * 1.082392200)
addps xmm3,xmm0 ; xmm3=tmp12
subps xmm4,xmm0 ; xmm4=tmp10
@@ -396,7 +396,7 @@ EXTN(jsimd_idct_float_sse2):
subps xmm0,xmm3 ; xmm0=data6=(06 16 26 36)
subps xmm2,xmm3 ; xmm2=tmp5
- movaps xmm1,[PD_RNDINT_MAGIC] ; xmm1=[PD_RNDINT_MAGIC]
+ movaps xmm1,[rel PD_RNDINT_MAGIC] ; xmm1=[rel PD_RNDINT_MAGIC]
pcmpeqd xmm3,xmm3
psrld xmm3,WORD_BIT ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..}
@@ -423,7 +423,7 @@ EXTN(jsimd_idct_float_sse2):
subps xmm7,xmm2 ; xmm7=data5=(05 15 25 35)
subps xmm5,xmm4 ; xmm5=data3=(03 13 23 33)
- movaps xmm2,[PD_RNDINT_MAGIC] ; xmm2=[PD_RNDINT_MAGIC]
+ movaps xmm2,[rel PD_RNDINT_MAGIC] ; xmm2=[rel PD_RNDINT_MAGIC]
pcmpeqd xmm4,xmm4
psrld xmm4,WORD_BIT ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..}
@@ -439,7 +439,7 @@ EXTN(jsimd_idct_float_sse2):
por xmm3,xmm7 ; xmm3=(04 05 14 15 24 25 34 35)
por xmm1,xmm5 ; xmm1=(02 03 12 13 22 23 32 33)
- movdqa xmm2,[PB_CENTERJSAMP] ; xmm2=[PB_CENTERJSAMP]
+ movdqa xmm2,[rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP]
packsswb xmm6,xmm3 ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35)
packsswb xmm1,xmm0 ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37)
diff --git a/simd/jiss2fst-64.asm b/simd/jiss2fst-64.asm
index 97dfa76..ab6af50 100644
--- a/simd/jiss2fst-64.asm
+++ b/simd/jiss2fst-64.asm
@@ -172,7 +172,7 @@ EXTN(jsimd_idct_ifast_sse2):
paddw xmm5,xmm3 ; xmm5=tmp13
psllw xmm1,PRE_MULTIPLY_SCALE_BITS
- pmulhw xmm1,[PW_F1414]
+ pmulhw xmm1,[rel PW_F1414]
psubw xmm1,xmm5 ; xmm1=tmp12
movdqa xmm6,xmm4
@@ -212,7 +212,7 @@ EXTN(jsimd_idct_ifast_sse2):
paddw xmm3,xmm0 ; xmm3=tmp7
psllw xmm4,PRE_MULTIPLY_SCALE_BITS
- pmulhw xmm4,[PW_F1414] ; xmm4=tmp11
+ pmulhw xmm4,[rel PW_F1414] ; xmm4=tmp11
; To avoid overflow...
;
@@ -225,9 +225,9 @@ EXTN(jsimd_idct_ifast_sse2):
movdqa xmm0,xmm5
paddw xmm5,xmm2
- pmulhw xmm5,[PW_F1847] ; xmm5=z5
- pmulhw xmm0,[PW_MF1613]
- pmulhw xmm2,[PW_F1082]
+ pmulhw xmm5,[rel PW_F1847] ; xmm5=z5
+ pmulhw xmm0,[rel PW_MF1613]
+ pmulhw xmm2,[rel PW_F1082]
psubw xmm0,xmm1
psubw xmm2,xmm5 ; xmm2=tmp10
paddw xmm0,xmm5 ; xmm0=tmp12
@@ -337,7 +337,7 @@ EXTN(jsimd_idct_ifast_sse2):
paddw xmm0,xmm3 ; xmm0=tmp13
psllw xmm5,PRE_MULTIPLY_SCALE_BITS
- pmulhw xmm5,[PW_F1414]
+ pmulhw xmm5,[rel PW_F1414]
psubw xmm5,xmm0 ; xmm5=tmp12
movdqa xmm1,xmm2
@@ -373,7 +373,7 @@ EXTN(jsimd_idct_ifast_sse2):
paddw xmm5,xmm6 ; xmm5=tmp7
psllw xmm2,PRE_MULTIPLY_SCALE_BITS
- pmulhw xmm2,[PW_F1414] ; xmm2=tmp11
+ pmulhw xmm2,[rel PW_F1414] ; xmm2=tmp11
; To avoid overflow...
;
@@ -386,9 +386,9 @@ EXTN(jsimd_idct_ifast_sse2):
movdqa xmm6,xmm4
paddw xmm4,xmm0
- pmulhw xmm4,[PW_F1847] ; xmm4=z5
- pmulhw xmm6,[PW_MF1613]
- pmulhw xmm0,[PW_F1082]
+ pmulhw xmm4,[rel PW_F1847] ; xmm4=z5
+ pmulhw xmm6,[rel PW_MF1613]
+ pmulhw xmm0,[rel PW_F1082]
psubw xmm6,xmm7
psubw xmm0,xmm4 ; xmm0=tmp10
paddw xmm6,xmm4 ; xmm6=tmp12
@@ -426,7 +426,7 @@ EXTN(jsimd_idct_ifast_sse2):
psraw xmm4,(PASS1_BITS+3) ; descale
psraw xmm7,(PASS1_BITS+3) ; descale
- movdqa xmm2,[PB_CENTERJSAMP] ; xmm2=[PB_CENTERJSAMP]
+ movdqa xmm2,[rel PB_CENTERJSAMP] ; xmm2=[rel PB_CENTERJSAMP]
packsswb xmm5,xmm6 ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
packsswb xmm7,xmm4 ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
diff --git a/simd/jiss2int-64.asm b/simd/jiss2int-64.asm
index cfeb42d..97317af 100644
--- a/simd/jiss2int-64.asm
+++ b/simd/jiss2int-64.asm
@@ -196,10 +196,10 @@ EXTN(jsimd_idct_islow_sse2):
punpckhwd xmm5,xmm3
movdqa xmm1,xmm4
movdqa xmm3,xmm5
- pmaddwd xmm4,[PW_F130_F054] ; xmm4=tmp3L
- pmaddwd xmm5,[PW_F130_F054] ; xmm5=tmp3H
- pmaddwd xmm1,[PW_F054_MF130] ; xmm1=tmp2L
- pmaddwd xmm3,[PW_F054_MF130] ; xmm3=tmp2H
+ pmaddwd xmm4,[rel PW_F130_F054] ; xmm4=tmp3L
+ pmaddwd xmm5,[rel PW_F130_F054] ; xmm5=tmp3H
+ pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=tmp2L
+ pmaddwd xmm3,[rel PW_F054_MF130] ; xmm3=tmp2H
movdqa xmm6,xmm0
paddw xmm0,xmm2 ; xmm0=in0+in4
@@ -274,10 +274,10 @@ EXTN(jsimd_idct_islow_sse2):
punpckhwd xmm0,xmm7
movdqa xmm5,xmm2
movdqa xmm7,xmm0
- pmaddwd xmm2,[PW_MF078_F117] ; xmm2=z3L
- pmaddwd xmm0,[PW_MF078_F117] ; xmm0=z3H
- pmaddwd xmm5,[PW_F117_F078] ; xmm5=z4L
- pmaddwd xmm7,[PW_F117_F078] ; xmm7=z4H
+ pmaddwd xmm2,[rel PW_MF078_F117] ; xmm2=z3L
+ pmaddwd xmm0,[rel PW_MF078_F117] ; xmm0=z3H
+ pmaddwd xmm5,[rel PW_F117_F078] ; xmm5=z4L
+ pmaddwd xmm7,[rel PW_F117_F078] ; xmm7=z4H
movdqa XMMWORD [wk(10)], xmm2 ; wk(10)=z3L
movdqa XMMWORD [wk(11)], xmm0 ; wk(11)=z3H
@@ -304,10 +304,10 @@ EXTN(jsimd_idct_islow_sse2):
punpckhwd xmm0,xmm4
movdqa xmm3,xmm2
movdqa xmm4,xmm0
- pmaddwd xmm2,[PW_MF060_MF089] ; xmm2=tmp0L
- pmaddwd xmm0,[PW_MF060_MF089] ; xmm0=tmp0H
- pmaddwd xmm3,[PW_MF089_F060] ; xmm3=tmp3L
- pmaddwd xmm4,[PW_MF089_F060] ; xmm4=tmp3H
+ pmaddwd xmm2,[rel PW_MF060_MF089] ; xmm2=tmp0L
+ pmaddwd xmm0,[rel PW_MF060_MF089] ; xmm0=tmp0H
+ pmaddwd xmm3,[rel PW_MF089_F060] ; xmm3=tmp3L
+ pmaddwd xmm4,[rel PW_MF089_F060] ; xmm4=tmp3H
paddd xmm2, XMMWORD [wk(10)] ; xmm2=tmp0L
paddd xmm0, XMMWORD [wk(11)] ; xmm0=tmp0H
@@ -323,10 +323,10 @@ EXTN(jsimd_idct_islow_sse2):
punpckhwd xmm0,xmm6
movdqa xmm1,xmm2
movdqa xmm6,xmm0
- pmaddwd xmm2,[PW_MF050_MF256] ; xmm2=tmp1L
- pmaddwd xmm0,[PW_MF050_MF256] ; xmm0=tmp1H
- pmaddwd xmm1,[PW_MF256_F050] ; xmm1=tmp2L
- pmaddwd xmm6,[PW_MF256_F050] ; xmm6=tmp2H
+ pmaddwd xmm2,[rel PW_MF050_MF256] ; xmm2=tmp1L
+ pmaddwd xmm0,[rel PW_MF050_MF256] ; xmm0=tmp1H
+ pmaddwd xmm1,[rel PW_MF256_F050] ; xmm1=tmp2L
+ pmaddwd xmm6,[rel PW_MF256_F050] ; xmm6=tmp2H
paddd xmm2,xmm5 ; xmm2=tmp1L
paddd xmm0,xmm7 ; xmm0=tmp1H
@@ -348,7 +348,7 @@ EXTN(jsimd_idct_islow_sse2):
psubd xmm2,xmm3 ; xmm2=data7L
psubd xmm0,xmm4 ; xmm0=data7H
- movdqa xmm3,[PD_DESCALE_P1] ; xmm3=[PD_DESCALE_P1]
+ movdqa xmm3,[rel PD_DESCALE_P1] ; xmm3=[rel PD_DESCALE_P1]
paddd xmm5,xmm3
paddd xmm7,xmm3
@@ -372,7 +372,7 @@ EXTN(jsimd_idct_islow_sse2):
psubd xmm7,xmm1 ; xmm7=data6L
psubd xmm0,xmm6 ; xmm0=data6H
- movdqa xmm1,[PD_DESCALE_P1] ; xmm1=[PD_DESCALE_P1]
+ movdqa xmm1,[rel PD_DESCALE_P1] ; xmm1=[rel PD_DESCALE_P1]
paddd xmm4,xmm1
paddd xmm3,xmm1
@@ -410,7 +410,7 @@ EXTN(jsimd_idct_islow_sse2):
psubd xmm5,xmm4 ; xmm5=data5L
psubd xmm6,xmm2 ; xmm6=data5H
- movdqa xmm7,[PD_DESCALE_P1] ; xmm7=[PD_DESCALE_P1]
+ movdqa xmm7,[rel PD_DESCALE_P1] ; xmm7=[rel PD_DESCALE_P1]
paddd xmm3,xmm7
paddd xmm0,xmm7
@@ -436,7 +436,7 @@ EXTN(jsimd_idct_islow_sse2):
psubd xmm0,xmm2 ; xmm0=data4L
psubd xmm6,xmm7 ; xmm6=data4H
- movdqa xmm2,[PD_DESCALE_P1] ; xmm2=[PD_DESCALE_P1]
+ movdqa xmm2,[rel PD_DESCALE_P1] ; xmm2=[rel PD_DESCALE_P1]
paddd xmm1,xmm2
paddd xmm4,xmm2
@@ -536,10 +536,10 @@ EXTN(jsimd_idct_islow_sse2):
punpckhwd xmm5,xmm2
movdqa xmm1,xmm6
movdqa xmm2,xmm5
- pmaddwd xmm6,[PW_F130_F054] ; xmm6=tmp3L
- pmaddwd xmm5,[PW_F130_F054] ; xmm5=tmp3H
- pmaddwd xmm1,[PW_F054_MF130] ; xmm1=tmp2L
- pmaddwd xmm2,[PW_F054_MF130] ; xmm2=tmp2H
+ pmaddwd xmm6,[rel PW_F130_F054] ; xmm6=tmp3L
+ pmaddwd xmm5,[rel PW_F130_F054] ; xmm5=tmp3H
+ pmaddwd xmm1,[rel PW_F054_MF130] ; xmm1=tmp2L
+ pmaddwd xmm2,[rel PW_F054_MF130] ; xmm2=tmp2H
movdqa xmm3,xmm7
paddw xmm7,xmm0 ; xmm7=in0+in4
@@ -610,10 +610,10 @@ EXTN(jsimd_idct_islow_sse2):
punpckhwd xmm7,xmm4
movdqa xmm5,xmm0
movdqa xmm4,xmm7
- pmaddwd xmm0,[PW_MF078_F117] ; xmm0=z3L
- pmaddwd xmm7,[PW_MF078_F117] ; xmm7=z3H
- pmaddwd xmm5,[PW_F117_F078] ; xmm5=z4L
- pmaddwd xmm4,[PW_F117_F078] ; xmm4=z4H
+ pmaddwd xmm0,[rel PW_MF078_F117] ; xmm0=z3L
+ pmaddwd xmm7,[rel PW_MF078_F117] ; xmm7=z3H
+ pmaddwd xmm5,[rel PW_F117_F078] ; xmm5=z4L
+ pmaddwd xmm4,[rel PW_F117_F078] ; xmm4=z4H
movdqa XMMWORD [wk(10)], xmm0 ; wk(10)=z3L
movdqa XMMWORD [wk(11)], xmm7 ; wk(11)=z3H
@@ -640,10 +640,10 @@ EXTN(jsimd_idct_islow_sse2):
punpckhwd xmm7,xmm3
movdqa xmm1,xmm0
movdqa xmm3,xmm7
- pmaddwd xmm0,[PW_MF060_MF089] ; xmm0=tmp0L
- pmaddwd xmm7,[PW_MF060_MF089] ; xmm7=tmp0H
- pmaddwd xmm1,[PW_MF089_F060] ; xmm1=tmp3L
- pmaddwd xmm3,[PW_MF089_F060] ; xmm3=tmp3H
+ pmaddwd xmm0,[rel PW_MF060_MF089] ; xmm0=tmp0L
+ pmaddwd xmm7,[rel PW_MF060_MF089] ; xmm7=tmp0H
+ pmaddwd xmm1,[rel PW_MF089_F060] ; xmm1=tmp3L
+ pmaddwd xmm3,[rel PW_MF089_F060] ; xmm3=tmp3H
paddd xmm0, XMMWORD [wk(10)] ; xmm0=tmp0L
paddd xmm7, XMMWORD [wk(11)] ; xmm7=tmp0H
@@ -659,10 +659,10 @@ EXTN(jsimd_idct_islow_sse2):
punpckhwd xmm7,xmm6
movdqa xmm2,xmm0
movdqa xmm6,xmm7
- pmaddwd xmm0,[PW_MF050_MF256] ; xmm0=tmp1L
- pmaddwd xmm7,[PW_MF050_MF256] ; xmm7=tmp1H
- pmaddwd xmm2,[PW_MF256_F050] ; xmm2=tmp2L
- pmaddwd xmm6,[PW_MF256_F050] ; xmm6=tmp2H
+ pmaddwd xmm0,[rel PW_MF050_MF256] ; xmm0=tmp1L
+ pmaddwd xmm7,[rel PW_MF050_MF256] ; xmm7=tmp1H
+ pmaddwd xmm2,[rel PW_MF256_F050] ; xmm2=tmp2L
+ pmaddwd xmm6,[rel PW_MF256_F050] ; xmm6=tmp2H
paddd xmm0,xmm5 ; xmm0=tmp1L
paddd xmm7,xmm4 ; xmm7=tmp1H
@@ -684,7 +684,7 @@ EXTN(jsimd_idct_islow_sse2):
psubd xmm0,xmm1 ; xmm0=data7L
psubd xmm7,xmm3 ; xmm7=data7H
- movdqa xmm1,[PD_DESCALE_P2] ; xmm1=[PD_DESCALE_P2]
+ movdqa xmm1,[rel PD_DESCALE_P2] ; xmm1=[rel PD_DESCALE_P2]
paddd xmm5,xmm1
paddd xmm4,xmm1
@@ -708,7 +708,7 @@ EXTN(jsimd_idct_islow_sse2):
psubd xmm4,xmm2 ; xmm4=data6L
psubd xmm7,xmm6 ; xmm7=data6H
- movdqa xmm2,[PD_DESCALE_P2] ; xmm2=[PD_DESCALE_P2]
+ movdqa xmm2,[rel PD_DESCALE_P2] ; xmm2=[rel PD_DESCALE_P2]
paddd xmm3,xmm2
paddd xmm1,xmm2
@@ -740,7 +740,7 @@ EXTN(jsimd_idct_islow_sse2):
psubd xmm4,xmm1 ; xmm4=data5L
psubd xmm0,xmm7 ; xmm0=data5H
- movdqa xmm5,[PD_DESCALE_P2] ; xmm5=[PD_DESCALE_P2]
+ movdqa xmm5,[rel PD_DESCALE_P2] ; xmm5=[rel PD_DESCALE_P2]
paddd xmm6,xmm5
paddd xmm2,xmm5
@@ -766,7 +766,7 @@ EXTN(jsimd_idct_islow_sse2):
psubd xmm2,xmm7 ; xmm2=data4L
psubd xmm0,xmm5 ; xmm0=data4H
- movdqa xmm7,[PD_DESCALE_P2] ; xmm7=[PD_DESCALE_P2]
+ movdqa xmm7,[rel PD_DESCALE_P2] ; xmm7=[rel PD_DESCALE_P2]
paddd xmm3,xmm7
paddd xmm1,xmm7
@@ -777,7 +777,7 @@ EXTN(jsimd_idct_islow_sse2):
psrad xmm2,DESCALE_P2
psrad xmm0,DESCALE_P2
- movdqa xmm5,[PB_CENTERJSAMP] ; xmm5=[PB_CENTERJSAMP]
+ movdqa xmm5,[rel PB_CENTERJSAMP] ; xmm5=[rel PB_CENTERJSAMP]
packssdw xmm3,xmm1 ; xmm3=data3=(03 13 23 33 43 53 63 73)
packssdw xmm2,xmm0 ; xmm2=data4=(04 14 24 34 44 54 64 74)
diff --git a/simd/jiss2red-64.asm b/simd/jiss2red-64.asm
index 381180c..dcf745b 100644
--- a/simd/jiss2red-64.asm
+++ b/simd/jiss2red-64.asm
@@ -186,10 +186,10 @@ EXTN(jsimd_idct_4x4_sse2):
punpckhwd xmm5,xmm1
movdqa xmm0,xmm4
movdqa xmm1,xmm5
- pmaddwd xmm4,[PW_F256_F089] ; xmm4=(tmp2L)
- pmaddwd xmm5,[PW_F256_F089] ; xmm5=(tmp2H)
- pmaddwd xmm0,[PW_F106_MF217] ; xmm0=(tmp0L)
- pmaddwd xmm1,[PW_F106_MF217] ; xmm1=(tmp0H)
+ pmaddwd xmm4,[rel PW_F256_F089] ; xmm4=(tmp2L)
+ pmaddwd xmm5,[rel PW_F256_F089] ; xmm5=(tmp2H)
+ pmaddwd xmm0,[rel PW_F106_MF217] ; xmm0=(tmp0L)
+ pmaddwd xmm1,[rel PW_F106_MF217] ; xmm1=(tmp0H)
movdqa xmm6,xmm2
movdqa xmm7,xmm2
@@ -197,10 +197,10 @@ EXTN(jsimd_idct_4x4_sse2):
punpckhwd xmm7,xmm3
movdqa xmm2,xmm6
movdqa xmm3,xmm7
- pmaddwd xmm6,[PW_MF060_MF050] ; xmm6=(tmp2L)
- pmaddwd xmm7,[PW_MF060_MF050] ; xmm7=(tmp2H)
- pmaddwd xmm2,[PW_F145_MF021] ; xmm2=(tmp0L)
- pmaddwd xmm3,[PW_F145_MF021] ; xmm3=(tmp0H)
+ pmaddwd xmm6,[rel PW_MF060_MF050] ; xmm6=(tmp2L)
+ pmaddwd xmm7,[rel PW_MF060_MF050] ; xmm7=(tmp2H)
+ pmaddwd xmm2,[rel PW_F145_MF021] ; xmm2=(tmp0L)
+ pmaddwd xmm3,[rel PW_F145_MF021] ; xmm3=(tmp0H)
paddd xmm6,xmm4 ; xmm6=tmp2L
paddd xmm7,xmm5 ; xmm7=tmp2H
@@ -229,8 +229,8 @@ EXTN(jsimd_idct_4x4_sse2):
movdqa xmm3,xmm5 ; xmm5=in2=z2
punpcklwd xmm5,xmm0 ; xmm0=in6=z3
punpckhwd xmm3,xmm0
- pmaddwd xmm5,[PW_F184_MF076] ; xmm5=tmp2L
- pmaddwd xmm3,[PW_F184_MF076] ; xmm3=tmp2H
+ pmaddwd xmm5,[rel PW_F184_MF076] ; xmm5=tmp2L
+ pmaddwd xmm3,[rel PW_F184_MF076] ; xmm3=tmp2H
movdqa xmm4,xmm1
movdqa xmm0,xmm2
@@ -248,7 +248,7 @@ EXTN(jsimd_idct_4x4_sse2):
psubd xmm5,xmm6 ; xmm5=data3L
psubd xmm3,xmm7 ; xmm3=data3H
- movdqa xmm6,[PD_DESCALE_P1_4] ; xmm6=[PD_DESCALE_P1_4]
+ movdqa xmm6,[rel PD_DESCALE_P1_4] ; xmm6=[rel PD_DESCALE_P1_4]
paddd xmm1,xmm6
paddd xmm2,xmm6
@@ -272,7 +272,7 @@ EXTN(jsimd_idct_4x4_sse2):
psubd xmm2,xmm7 ; xmm2=data2L
psubd xmm3,xmm6 ; xmm3=data2H
- movdqa xmm7,[PD_DESCALE_P1_4] ; xmm7=[PD_DESCALE_P1_4]
+ movdqa xmm7,[rel PD_DESCALE_P1_4] ; xmm7=[rel PD_DESCALE_P1_4]
paddd xmm4,xmm7
paddd xmm0,xmm7
@@ -326,10 +326,10 @@ EXTN(jsimd_idct_4x4_sse2):
punpckhwd xmm6,xmm3
movdqa xmm5,xmm1
movdqa xmm2,xmm6
- pmaddwd xmm1,[PW_F256_F089] ; xmm1=(tmp2)
- pmaddwd xmm6,[PW_MF060_MF050] ; xmm6=(tmp2)
- pmaddwd xmm5,[PW_F106_MF217] ; xmm5=(tmp0)
- pmaddwd xmm2,[PW_F145_MF021] ; xmm2=(tmp0)
+ pmaddwd xmm1,[rel PW_F256_F089] ; xmm1=(tmp2)
+ pmaddwd xmm6,[rel PW_MF060_MF050] ; xmm6=(tmp2)
+ pmaddwd xmm5,[rel PW_F106_MF217] ; xmm5=(tmp0)
+ pmaddwd xmm2,[rel PW_F145_MF021] ; xmm2=(tmp0)
paddd xmm6,xmm1 ; xmm6=tmp2
paddd xmm2,xmm5 ; xmm2=tmp0
@@ -337,7 +337,7 @@ EXTN(jsimd_idct_4x4_sse2):
; -- Even part
punpcklwd xmm0,xmm3
- pmaddwd xmm0,[PW_F184_MF076] ; xmm0=tmp2
+ pmaddwd xmm0,[rel PW_F184_MF076] ; xmm0=tmp2
movdqa xmm7,xmm4
paddd xmm4,xmm0 ; xmm4=tmp10
@@ -345,7 +345,7 @@ EXTN(jsimd_idct_4x4_sse2):
; -- Final output stage
- movdqa xmm1,[PD_DESCALE_P2_4] ; xmm1=[PD_DESCALE_P2_4]
+ movdqa xmm1,[rel PD_DESCALE_P2_4] ; xmm1=[rel PD_DESCALE_P2_4]
movdqa xmm5,xmm4
movdqa xmm3,xmm7
@@ -375,7 +375,7 @@ EXTN(jsimd_idct_4x4_sse2):
punpckhdq xmm6,xmm0 ; xmm6=(20 21 22 23 30 31 32 33)
packsswb xmm4,xmm6 ; xmm4=(00 01 02 03 10 11 12 13 20 ..)
- paddb xmm4,[PB_CENTERJSAMP]
+ paddb xmm4,[rel PB_CENTERJSAMP]
pshufd xmm2,xmm4,0x39 ; xmm2=(10 11 12 13 20 21 22 23 30 ..)
pshufd xmm1,xmm4,0x4E ; xmm1=(20 21 22 23 30 31 32 33 00 ..)
@@ -457,8 +457,8 @@ EXTN(jsimd_idct_2x2_sse2):
movdqa xmm5,xmm2 ; xmm5=(50 51 ** 53 ** 55 ** 57)
punpcklwd xmm4,xmm1 ; xmm4=(10 30 11 31 ** ** 13 33)
punpcklwd xmm5,xmm3 ; xmm5=(50 70 51 71 ** ** 53 73)
- pmaddwd xmm4,[PW_F362_MF127]
- pmaddwd xmm5,[PW_F085_MF072]
+ pmaddwd xmm4,[rel PW_F362_MF127]
+ pmaddwd xmm5,[rel PW_F085_MF072]
psrld xmm0,WORD_BIT ; xmm0=(11 -- 13 -- 15 -- 17 --)
pand xmm1,xmm7 ; xmm1=(-- 31 -- 33 -- 35 -- 37)
@@ -466,8 +466,8 @@ EXTN(jsimd_idct_2x2_sse2):
pand xmm3,xmm7 ; xmm3=(-- 71 -- 73 -- 75 -- 77)
por xmm0,xmm1 ; xmm0=(11 31 13 33 15 35 17 37)
por xmm2,xmm3 ; xmm2=(51 71 53 73 55 75 57 77)
- pmaddwd xmm0,[PW_F362_MF127]
- pmaddwd xmm2,[PW_F085_MF072]
+ pmaddwd xmm0,[rel PW_F362_MF127]
+ pmaddwd xmm2,[rel PW_F085_MF072]
paddd xmm4,xmm5 ; xmm4=tmp0[col0 col1 **** col3]
paddd xmm0,xmm2 ; xmm0=tmp0[col1 col3 col5 col7]
@@ -494,7 +494,7 @@ EXTN(jsimd_idct_2x2_sse2):
psubd xmm3,xmm4 ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **)
psubd xmm5,xmm0 ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7)
- movdqa xmm2,[PD_DESCALE_P1_2] ; xmm2=[PD_DESCALE_P1_2]
+ movdqa xmm2,[rel PD_DESCALE_P1_2] ; xmm2=[rel PD_DESCALE_P1_2]
punpckldq xmm6,xmm3 ; xmm6=(A0 B0 ** **)
@@ -533,8 +533,8 @@ EXTN(jsimd_idct_2x2_sse2):
packssdw xmm1,xmm1 ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3)
packssdw xmm7,xmm7 ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7)
- pmaddwd xmm1,[PW_F362_MF127]
- pmaddwd xmm7,[PW_F085_MF072]
+ pmaddwd xmm1,[rel PW_F362_MF127]
+ pmaddwd xmm7,[rel PW_F085_MF072]
paddd xmm1,xmm7 ; xmm1=tmp0[row0 row1 row0 row1]
@@ -550,12 +550,12 @@ EXTN(jsimd_idct_2x2_sse2):
punpckldq xmm6,xmm4 ; xmm6=(C0 D0 C1 D1)
- paddd xmm6,[PD_DESCALE_P2_2]
+ paddd xmm6,[rel PD_DESCALE_P2_2]
psrad xmm6,DESCALE_P2_2
packssdw xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1)
packsswb xmm6,xmm6 ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..)
- paddb xmm6,[PB_CENTERJSAMP]
+ paddb xmm6,[rel PB_CENTERJSAMP]
pextrw ebx,xmm6,0x00 ; ebx=(C0 D0 -- --)
pextrw ecx,xmm6,0x01 ; ecx=(C1 D1 -- --)