aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/x86/vm
diff options
context:
space:
mode:
authorkvn <none@none>2012-09-17 19:39:07 -0700
committerkvn <none@none>2012-09-17 19:39:07 -0700
commite604b84931ae96f21ac2a3a731ff248259eb1612 (patch)
treef66062e5f3752060ea0ad1af50a0d98827f7f880 /src/cpu/x86/vm
parentba2e7da26db22ac7848424ac517176982ab390a2 (diff)
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
Summary: Save whole XMM/YMM registers in safepoint interrupt handler. Reviewed-by: roland, twisti
Diffstat (limited to 'src/cpu/x86/vm')
-rw-r--r--src/cpu/x86/vm/assembler_x86.cpp179
-rw-r--r--src/cpu/x86/vm/assembler_x86.hpp6
-rw-r--r--src/cpu/x86/vm/sharedRuntime_x86_32.cpp156
-rw-r--r--src/cpu/x86/vm/sharedRuntime_x86_64.cpp237
-rw-r--r--src/cpu/x86/vm/x86.ad1
5 files changed, 392 insertions, 187 deletions
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index 611d7ab50..0f6b99dd5 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -3496,6 +3496,33 @@ void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
emit_byte(0x01);
}
+void Assembler::vinsertf128h(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ bool vector256 = true;
+ assert(dst != xnoreg, "sanity");
+ int dst_enc = dst->encoding();
+ // swap src<->dst for encoding
+ vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+ emit_byte(0x18);
+ emit_operand(dst, src);
+ // 0x01 - insert into upper 128 bits
+ emit_byte(0x01);
+}
+
+void Assembler::vextractf128h(Address dst, XMMRegister src) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionMark im(this);
+ bool vector256 = true;
+ assert(src != xnoreg, "sanity");
+ int src_enc = src->encoding();
+ vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+ emit_byte(0x19);
+ emit_operand(src, dst);
+ // 0x01 - extract from upper 128 bits
+ emit_byte(0x01);
+}
+
void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx2(), "");
bool vector256 = true;
@@ -3507,6 +3534,33 @@ void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src)
emit_byte(0x01);
}
+void Assembler::vinserti128h(XMMRegister dst, Address src) {
+ assert(VM_Version::supports_avx2(), "");
+ InstructionMark im(this);
+ bool vector256 = true;
+ assert(dst != xnoreg, "sanity");
+ int dst_enc = dst->encoding();
+ // swap src<->dst for encoding
+ vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+ emit_byte(0x38);
+ emit_operand(dst, src);
+ // 0x01 - insert into upper 128 bits
+ emit_byte(0x01);
+}
+
+void Assembler::vextracti128h(Address dst, XMMRegister src) {
+ assert(VM_Version::supports_avx2(), "");
+ InstructionMark im(this);
+ bool vector256 = true;
+ assert(src != xnoreg, "sanity");
+ int src_enc = src->encoding();
+ vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+ emit_byte(0x39);
+ emit_operand(src, dst);
+ // 0x01 - extract from upper 128 bits
+ emit_byte(0x01);
+}
+
void Assembler::vzeroupper() {
assert(VM_Version::supports_avx(), "");
(void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
@@ -8907,11 +8961,9 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
pusha();
// if we are coming from c1, xmm registers may be live
- if (UseSSE >= 1) {
- subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
- }
int off = 0;
if (UseSSE == 1) {
+ subptr(rsp, sizeof(jdouble)*8);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
@@ -8921,23 +8973,50 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
} else if (UseSSE >= 2) {
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7);
+#ifdef COMPILER2
+ if (MaxVectorSize > 16) {
+ assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+ // Save upper half of YMM registes
+ subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+ vextractf128h(Address(rsp, 0),xmm0);
+ vextractf128h(Address(rsp, 16),xmm1);
+ vextractf128h(Address(rsp, 32),xmm2);
+ vextractf128h(Address(rsp, 48),xmm3);
+ vextractf128h(Address(rsp, 64),xmm4);
+ vextractf128h(Address(rsp, 80),xmm5);
+ vextractf128h(Address(rsp, 96),xmm6);
+ vextractf128h(Address(rsp,112),xmm7);
+#ifdef _LP64
+ vextractf128h(Address(rsp,128),xmm8);
+ vextractf128h(Address(rsp,144),xmm9);
+ vextractf128h(Address(rsp,160),xmm10);
+ vextractf128h(Address(rsp,176),xmm11);
+ vextractf128h(Address(rsp,192),xmm12);
+ vextractf128h(Address(rsp,208),xmm13);
+ vextractf128h(Address(rsp,224),xmm14);
+ vextractf128h(Address(rsp,240),xmm15);
+#endif
+ }
+#endif
+ // Save whole 128bit (16 bytes) XMM regiters
+ subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+ movdqu(Address(rsp,off++*16),xmm0);
+ movdqu(Address(rsp,off++*16),xmm1);
+ movdqu(Address(rsp,off++*16),xmm2);
+ movdqu(Address(rsp,off++*16),xmm3);
+ movdqu(Address(rsp,off++*16),xmm4);
+ movdqu(Address(rsp,off++*16),xmm5);
+ movdqu(Address(rsp,off++*16),xmm6);
+ movdqu(Address(rsp,off++*16),xmm7);
#ifdef _LP64
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14);
- movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15);
+ movdqu(Address(rsp,off++*16),xmm8);
+ movdqu(Address(rsp,off++*16),xmm9);
+ movdqu(Address(rsp,off++*16),xmm10);
+ movdqu(Address(rsp,off++*16),xmm11);
+ movdqu(Address(rsp,off++*16),xmm12);
+ movdqu(Address(rsp,off++*16),xmm13);
+ movdqu(Address(rsp,off++*16),xmm14);
+ movdqu(Address(rsp,off++*16),xmm15);
#endif
}
@@ -9015,28 +9094,52 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
+ addptr(rsp, sizeof(jdouble)*8);
} else if (UseSSE >= 2) {
- movdbl(xmm0, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm1, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm2, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm3, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm4, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm5, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm6, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm7, Address(rsp,off++*sizeof(jdouble)));
+ // Restore whole 128bit (16 bytes) XMM regiters
+ movdqu(xmm0, Address(rsp,off++*16));
+ movdqu(xmm1, Address(rsp,off++*16));
+ movdqu(xmm2, Address(rsp,off++*16));
+ movdqu(xmm3, Address(rsp,off++*16));
+ movdqu(xmm4, Address(rsp,off++*16));
+ movdqu(xmm5, Address(rsp,off++*16));
+ movdqu(xmm6, Address(rsp,off++*16));
+ movdqu(xmm7, Address(rsp,off++*16));
#ifdef _LP64
- movdbl(xmm8, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm9, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm10, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm11, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm12, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm13, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm14, Address(rsp,off++*sizeof(jdouble)));
- movdbl(xmm15, Address(rsp,off++*sizeof(jdouble)));
+ movdqu(xmm8, Address(rsp,off++*16));
+ movdqu(xmm9, Address(rsp,off++*16));
+ movdqu(xmm10, Address(rsp,off++*16));
+ movdqu(xmm11, Address(rsp,off++*16));
+ movdqu(xmm12, Address(rsp,off++*16));
+ movdqu(xmm13, Address(rsp,off++*16));
+ movdqu(xmm14, Address(rsp,off++*16));
+ movdqu(xmm15, Address(rsp,off++*16));
+#endif
+ addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+#ifdef COMPILER2
+ if (MaxVectorSize > 16) {
+ // Restore upper half of YMM registes.
+ vinsertf128h(xmm0, Address(rsp, 0));
+ vinsertf128h(xmm1, Address(rsp, 16));
+ vinsertf128h(xmm2, Address(rsp, 32));
+ vinsertf128h(xmm3, Address(rsp, 48));
+ vinsertf128h(xmm4, Address(rsp, 64));
+ vinsertf128h(xmm5, Address(rsp, 80));
+ vinsertf128h(xmm6, Address(rsp, 96));
+ vinsertf128h(xmm7, Address(rsp,112));
+#ifdef _LP64
+ vinsertf128h(xmm8, Address(rsp,128));
+ vinsertf128h(xmm9, Address(rsp,144));
+ vinsertf128h(xmm10, Address(rsp,160));
+ vinsertf128h(xmm11, Address(rsp,176));
+ vinsertf128h(xmm12, Address(rsp,192));
+ vinsertf128h(xmm13, Address(rsp,208));
+ vinsertf128h(xmm14, Address(rsp,224));
+ vinsertf128h(xmm15, Address(rsp,240));
+#endif
+ addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+ }
#endif
- }
- if (UseSSE >= 1) {
- addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
}
popa();
}
diff --git a/src/cpu/x86/vm/assembler_x86.hpp b/src/cpu/x86/vm/assembler_x86.hpp
index 368f5c438..dc98fa8f6 100644
--- a/src/cpu/x86/vm/assembler_x86.hpp
+++ b/src/cpu/x86/vm/assembler_x86.hpp
@@ -1743,6 +1743,12 @@ private:
void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+ // Load/store high 128bit of YMM registers which does not destroy other half.
+ void vinsertf128h(XMMRegister dst, Address src);
+ void vinserti128h(XMMRegister dst, Address src);
+ void vextractf128h(Address dst, XMMRegister src);
+ void vextracti128h(Address dst, XMMRegister src);
+
// AVX instruction which is used to clear upper 128 bits of YMM registers and
// to avoid transaction penalty between AVX and SSE states. There is no
// penalty if legacy SSE instructions are encoded using VEX prefix because
diff --git a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
index 0b157f254..7cb2bb81e 100644
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
@@ -46,11 +46,11 @@
const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
class RegisterSaver {
- enum { FPU_regs_live = 8 /*for the FPU stack*/+8/*eight more for XMM registers*/ };
// Capture info about frame layout
+#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
enum layout {
fpu_state_off = 0,
- fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
+ fpu_state_end = fpu_state_off+FPUStateSizeInWords,
st0_off, st0H_off,
st1_off, st1H_off,
st2_off, st2H_off,
@@ -59,16 +59,16 @@ class RegisterSaver {
st5_off, st5H_off,
st6_off, st6H_off,
st7_off, st7H_off,
-
- xmm0_off, xmm0H_off,
- xmm1_off, xmm1H_off,
- xmm2_off, xmm2H_off,
- xmm3_off, xmm3H_off,
- xmm4_off, xmm4H_off,
- xmm5_off, xmm5H_off,
- xmm6_off, xmm6H_off,
- xmm7_off, xmm7H_off,
- flags_off,
+ xmm_off,
+ DEF_XMM_OFFS(0),
+ DEF_XMM_OFFS(1),
+ DEF_XMM_OFFS(2),
+ DEF_XMM_OFFS(3),
+ DEF_XMM_OFFS(4),
+ DEF_XMM_OFFS(5),
+ DEF_XMM_OFFS(6),
+ DEF_XMM_OFFS(7),
+ flags_off = xmm7_off + 16/BytesPerInt + 1, // 16-byte stack alignment fill word
rdi_off,
rsi_off,
ignore_off, // extra copy of rbp,
@@ -83,13 +83,13 @@ class RegisterSaver {
rbp_off,
return_off, // slot for return address
reg_save_size };
-
+ enum { FPU_regs_live = flags_off - fpu_state_end };
public:
static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words,
- int* total_frame_words, bool verify_fpu = true);
- static void restore_live_registers(MacroAssembler* masm);
+ int* total_frame_words, bool verify_fpu = true, bool save_vectors = false);
+ static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
static int rax_offset() { return rax_off; }
static int rbx_offset() { return rbx_off; }
@@ -113,9 +113,20 @@ class RegisterSaver {
};
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words,
- int* total_frame_words, bool verify_fpu) {
-
- int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize;
+ int* total_frame_words, bool verify_fpu, bool save_vectors) {
+ int vect_words = 0;
+#ifdef COMPILER2
+ if (save_vectors) {
+ assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+ assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
+ // Save upper half of YMM registes
+ vect_words = 8 * 16 / wordSize;
+ additional_frame_words += vect_words;
+ }
+#else
+ assert(!save_vectors, "vectors are generated only by C2");
+#endif
+ int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize;
int frame_words = frame_size_in_bytes / wordSize;
*total_frame_words = frame_words;
@@ -129,7 +140,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
__ enter();
__ pusha();
__ pushf();
- __ subptr(rsp,FPU_regs_live*sizeof(jdouble)); // Push FPU registers space
+ __ subptr(rsp,FPU_regs_live*wordSize); // Push FPU registers space
__ push_FPU_state(); // Save FPU state & init
if (verify_fpu) {
@@ -183,14 +194,28 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
__ movflt(Address(rsp,xmm6_off*wordSize),xmm6);
__ movflt(Address(rsp,xmm7_off*wordSize),xmm7);
} else if( UseSSE >= 2 ) {
- __ movdbl(Address(rsp,xmm0_off*wordSize),xmm0);
- __ movdbl(Address(rsp,xmm1_off*wordSize),xmm1);
- __ movdbl(Address(rsp,xmm2_off*wordSize),xmm2);
- __ movdbl(Address(rsp,xmm3_off*wordSize),xmm3);
- __ movdbl(Address(rsp,xmm4_off*wordSize),xmm4);
- __ movdbl(Address(rsp,xmm5_off*wordSize),xmm5);
- __ movdbl(Address(rsp,xmm6_off*wordSize),xmm6);
- __ movdbl(Address(rsp,xmm7_off*wordSize),xmm7);
+ // Save whole 128bit (16 bytes) XMM regiters
+ __ movdqu(Address(rsp,xmm0_off*wordSize),xmm0);
+ __ movdqu(Address(rsp,xmm1_off*wordSize),xmm1);
+ __ movdqu(Address(rsp,xmm2_off*wordSize),xmm2);
+ __ movdqu(Address(rsp,xmm3_off*wordSize),xmm3);
+ __ movdqu(Address(rsp,xmm4_off*wordSize),xmm4);
+ __ movdqu(Address(rsp,xmm5_off*wordSize),xmm5);
+ __ movdqu(Address(rsp,xmm6_off*wordSize),xmm6);
+ __ movdqu(Address(rsp,xmm7_off*wordSize),xmm7);
+ }
+
+ if (vect_words > 0) {
+ assert(vect_words*wordSize == 128, "");
+ __ subptr(rsp, 128); // Save upper half of YMM registes
+ __ vextractf128h(Address(rsp, 0),xmm0);
+ __ vextractf128h(Address(rsp, 16),xmm1);
+ __ vextractf128h(Address(rsp, 32),xmm2);
+ __ vextractf128h(Address(rsp, 48),xmm3);
+ __ vextractf128h(Address(rsp, 64),xmm4);
+ __ vextractf128h(Address(rsp, 80),xmm5);
+ __ vextractf128h(Address(rsp, 96),xmm6);
+ __ vextractf128h(Address(rsp,112),xmm7);
}
// Set an oopmap for the call site. This oopmap will map all
@@ -253,10 +278,20 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
}
-void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
// Recover XMM & FPU state
- if( UseSSE == 1 ) {
+ int additional_frame_bytes = 0;
+#ifdef COMPILER2
+ if (restore_vectors) {
+ assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+ assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
+ additional_frame_bytes = 128;
+ }
+#else
+ assert(!restore_vectors, "vectors are generated only by C2");
+#endif
+ if (UseSSE == 1) {
+ assert(additional_frame_bytes == 0, "");
__ movflt(xmm0,Address(rsp,xmm0_off*wordSize));
__ movflt(xmm1,Address(rsp,xmm1_off*wordSize));
__ movflt(xmm2,Address(rsp,xmm2_off*wordSize));
@@ -265,18 +300,33 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
__ movflt(xmm5,Address(rsp,xmm5_off*wordSize));
__ movflt(xmm6,Address(rsp,xmm6_off*wordSize));
__ movflt(xmm7,Address(rsp,xmm7_off*wordSize));
- } else if( UseSSE >= 2 ) {
- __ movdbl(xmm0,Address(rsp,xmm0_off*wordSize));
- __ movdbl(xmm1,Address(rsp,xmm1_off*wordSize));
- __ movdbl(xmm2,Address(rsp,xmm2_off*wordSize));
- __ movdbl(xmm3,Address(rsp,xmm3_off*wordSize));
- __ movdbl(xmm4,Address(rsp,xmm4_off*wordSize));
- __ movdbl(xmm5,Address(rsp,xmm5_off*wordSize));
- __ movdbl(xmm6,Address(rsp,xmm6_off*wordSize));
- __ movdbl(xmm7,Address(rsp,xmm7_off*wordSize));
+ } else if (UseSSE >= 2) {
+#define STACK_ADDRESS(x) Address(rsp,(x)*wordSize + additional_frame_bytes)
+ __ movdqu(xmm0,STACK_ADDRESS(xmm0_off));
+ __ movdqu(xmm1,STACK_ADDRESS(xmm1_off));
+ __ movdqu(xmm2,STACK_ADDRESS(xmm2_off));
+ __ movdqu(xmm3,STACK_ADDRESS(xmm3_off));
+ __ movdqu(xmm4,STACK_ADDRESS(xmm4_off));
+ __ movdqu(xmm5,STACK_ADDRESS(xmm5_off));
+ __ movdqu(xmm6,STACK_ADDRESS(xmm6_off));
+ __ movdqu(xmm7,STACK_ADDRESS(xmm7_off));
+#undef STACK_ADDRESS
+ }
+ if (restore_vectors) {
+ // Restore upper half of YMM registes.
+ assert(additional_frame_bytes == 128, "");
+ __ vinsertf128h(xmm0, Address(rsp, 0));
+ __ vinsertf128h(xmm1, Address(rsp, 16));
+ __ vinsertf128h(xmm2, Address(rsp, 32));
+ __ vinsertf128h(xmm3, Address(rsp, 48));
+ __ vinsertf128h(xmm4, Address(rsp, 64));
+ __ vinsertf128h(xmm5, Address(rsp, 80));
+ __ vinsertf128h(xmm6, Address(rsp, 96));
+ __ vinsertf128h(xmm7, Address(rsp,112));
+ __ addptr(rsp, additional_frame_bytes);
}
__ pop_FPU_state();
- __ addptr(rsp, FPU_regs_live*sizeof(jdouble)); // Pop FPU registers
+ __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers
__ popf();
__ popa();
@@ -308,6 +358,13 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
__ addptr(rsp, return_off * wordSize);
}
+// Is vector's size (in bytes) bigger than a size saved by default?
+// 16 bytes XMM registers are saved by default using SSE2 movdqu instructions.
+// Note, MaxVectorSize == 0 with UseSSE < 2 and vectors are not generated.
+bool SharedRuntime::is_wide_vector(int size) {
+ return size > 16;
+}
+
// The java_calling_convention describes stack locations as ideal slots on
// a frame with no abi restrictions. Since we must observe abi restrictions
// (like the placement of the register window) the slots must be biased by
@@ -2732,7 +2789,6 @@ uint SharedRuntime::out_preserve_stack_slots() {
return 0;
}
-
//------------------------------generate_deopt_blob----------------------------
void SharedRuntime::generate_deopt_blob() {
// allocate space for the code
@@ -3270,7 +3326,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
// setup oopmap, and calls safepoint code to stop the compiled code for
// a safepoint.
//
-SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
// Account for thread arg in our frame
const int additional_words = 1;
@@ -3290,17 +3346,18 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
const Register java_thread = rdi; // callee-saved for VC++
address start = __ pc();
address call_pc = NULL;
-
+ bool cause_return = (poll_type == POLL_AT_RETURN);
+ bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
// If cause_return is true we are at a poll_return and there is
// the return address on the stack to the caller on the nmethod
// that is safepoint. We can leave this return on the stack and
// effectively complete the return and safepoint in the caller.
// Otherwise we push space for a return address that the safepoint
// handler will install later to make the stack walking sensible.
- if( !cause_return )
- __ push(rbx); // Make room for return address (or push it again)
+ if (!cause_return)
+ __ push(rbx); // Make room for return address (or push it again)
- map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
+ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false, save_vectors);
// The following is basically a call_VM. However, we need the precise
// address of the call in order to generate an oopmap. Hence, we do all the
@@ -3312,7 +3369,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
__ set_last_Java_frame(java_thread, noreg, noreg, NULL);
// if this was not a poll_return then we need to correct the return address now.
- if( !cause_return ) {
+ if (!cause_return) {
__ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset()));
__ movptr(Address(rbp, wordSize), rax);
}
@@ -3340,15 +3397,14 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
__ jcc(Assembler::equal, noException);
// Exception pending
-
- RegisterSaver::restore_live_registers(masm);
+ RegisterSaver::restore_live_registers(masm, save_vectors);
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
__ bind(noException);
// Normal exit, register restoring and exit
- RegisterSaver::restore_live_registers(masm);
+ RegisterSaver::restore_live_registers(masm, save_vectors);
__ ret(0);
diff --git a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
index 8fd50e278..49ddcb8f2 100644
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
@@ -116,8 +116,8 @@ class RegisterSaver {
};
public:
- static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
- static void restore_live_registers(MacroAssembler* masm);
+ static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
+ static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
// Offsets into the register save area
// Used by deoptimization when it is managing result register
@@ -134,7 +134,19 @@ class RegisterSaver {
static void restore_result_registers(MacroAssembler* masm);
};
-OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
+ int vect_words = 0;
+#ifdef COMPILER2
+ if (save_vectors) {
+ assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+ assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
+ // Save upper half of YMM registes
+ vect_words = 16 * 16 / wordSize;
+ additional_frame_words += vect_words;
+ }
+#else
+ assert(!save_vectors, "vectors are generated only by C2");
+#endif
// Always make the frame size 16-byte aligned
int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
@@ -155,6 +167,27 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
__ enter(); // rsp becomes 16-byte aligned here
__ push_CPU_state(); // Push a multiple of 16 bytes
+
+ if (vect_words > 0) {
+ assert(vect_words*wordSize == 256, "");
+ __ subptr(rsp, 256); // Save upper half of YMM registes
+ __ vextractf128h(Address(rsp, 0),xmm0);
+ __ vextractf128h(Address(rsp, 16),xmm1);
+ __ vextractf128h(Address(rsp, 32),xmm2);
+ __ vextractf128h(Address(rsp, 48),xmm3);
+ __ vextractf128h(Address(rsp, 64),xmm4);
+ __ vextractf128h(Address(rsp, 80),xmm5);
+ __ vextractf128h(Address(rsp, 96),xmm6);
+ __ vextractf128h(Address(rsp,112),xmm7);
+ __ vextractf128h(Address(rsp,128),xmm8);
+ __ vextractf128h(Address(rsp,144),xmm9);
+ __ vextractf128h(Address(rsp,160),xmm10);
+ __ vextractf128h(Address(rsp,176),xmm11);
+ __ vextractf128h(Address(rsp,192),xmm12);
+ __ vextractf128h(Address(rsp,208),xmm13);
+ __ vextractf128h(Address(rsp,224),xmm14);
+ __ vextractf128h(Address(rsp,240),xmm15);
+ }
if (frame::arg_reg_save_area_bytes != 0) {
// Allocate argument register save area
__ subptr(rsp, frame::arg_reg_save_area_bytes);
@@ -167,112 +200,111 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
OopMapSet *oop_maps = new OopMapSet();
OopMap* map = new OopMap(frame_size_in_slots, 0);
- map->set_callee_saved(VMRegImpl::stack2reg( rax_off + additional_frame_slots), rax->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( rcx_off + additional_frame_slots), rcx->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( rdx_off + additional_frame_slots), rdx->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( rbx_off + additional_frame_slots), rbx->as_VMReg());
+
+#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
+
+ map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
// rbp location is known implicitly by the frame sender code, needs no oopmap
// and the location where rbp was saved by is ignored
- map->set_callee_saved(VMRegImpl::stack2reg( rsi_off + additional_frame_slots), rsi->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( rdi_off + additional_frame_slots), rdi->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( r8_off + additional_frame_slots), r8->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( r9_off + additional_frame_slots), r9->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( r10_off + additional_frame_slots), r10->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( r11_off + additional_frame_slots), r11->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( r12_off + additional_frame_slots), r12->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( r13_off + additional_frame_slots), r13->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( r14_off + additional_frame_slots), r14->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg( r15_off + additional_frame_slots), r15->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm0_off + additional_frame_slots), xmm0->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm1_off + additional_frame_slots), xmm1->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm2_off + additional_frame_slots), xmm2->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm3_off + additional_frame_slots), xmm3->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm4_off + additional_frame_slots), xmm4->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm5_off + additional_frame_slots), xmm5->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm6_off + additional_frame_slots), xmm6->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm7_off + additional_frame_slots), xmm7->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm8_off + additional_frame_slots), xmm8->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm9_off + additional_frame_slots), xmm9->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm10_off + additional_frame_slots), xmm10->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm11_off + additional_frame_slots), xmm11->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm12_off + additional_frame_slots), xmm12->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm13_off + additional_frame_slots), xmm13->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm14_off + additional_frame_slots), xmm14->as_VMReg());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm15_off + additional_frame_slots), xmm15->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm3_off ), xmm3->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm4_off ), xmm4->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm5_off ), xmm5->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm6_off ), xmm6->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm7_off ), xmm7->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm8_off ), xmm8->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm9_off ), xmm9->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm10_off), xmm10->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg());
+ map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
// %%% These should all be a waste but we'll keep things as they were for now
if (true) {
- map->set_callee_saved(VMRegImpl::stack2reg( raxH_off + additional_frame_slots),
- rax->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( rcxH_off + additional_frame_slots),
- rcx->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( rdxH_off + additional_frame_slots),
- rdx->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( rbxH_off + additional_frame_slots),
- rbx->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
// rbp location is known implicitly by the frame sender code, needs no oopmap
- map->set_callee_saved(VMRegImpl::stack2reg( rsiH_off + additional_frame_slots),
- rsi->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( rdiH_off + additional_frame_slots),
- rdi->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( r8H_off + additional_frame_slots),
- r8->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( r9H_off + additional_frame_slots),
- r9->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( r10H_off + additional_frame_slots),
- r10->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( r11H_off + additional_frame_slots),
- r11->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( r12H_off + additional_frame_slots),
- r12->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( r13H_off + additional_frame_slots),
- r13->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( r14H_off + additional_frame_slots),
- r14->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg( r15H_off + additional_frame_slots),
- r15->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm0H_off + additional_frame_slots),
- xmm0->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm1H_off + additional_frame_slots),
- xmm1->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm2H_off + additional_frame_slots),
- xmm2->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm3H_off + additional_frame_slots),
- xmm3->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm4H_off + additional_frame_slots),
- xmm4->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm5H_off + additional_frame_slots),
- xmm5->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm6H_off + additional_frame_slots),
- xmm6->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm7H_off + additional_frame_slots),
- xmm7->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm8H_off + additional_frame_slots),
- xmm8->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm9H_off + additional_frame_slots),
- xmm9->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm10H_off + additional_frame_slots),
- xmm10->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm11H_off + additional_frame_slots),
- xmm11->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm12H_off + additional_frame_slots),
- xmm12->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm13H_off + additional_frame_slots),
- xmm13->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm14H_off + additional_frame_slots),
- xmm14->as_VMReg()->next());
- map->set_callee_saved(VMRegImpl::stack2reg(xmm15H_off + additional_frame_slots),
- xmm15->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm3H_off ), xmm3->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm4H_off ), xmm4->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm5H_off ), xmm5->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm6H_off ), xmm6->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm7H_off ), xmm7->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm8H_off ), xmm8->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm9H_off ), xmm9->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm10H_off), xmm10->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm11H_off), xmm11->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm12H_off), xmm12->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm13H_off), xmm13->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next());
+ map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next());
}
return map;
}
-void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
if (frame::arg_reg_save_area_bytes != 0) {
// Pop arg register save area
__ addptr(rsp, frame::arg_reg_save_area_bytes);
}
+#ifdef COMPILER2
+ if (restore_vectors) {
+ // Restore upper half of YMM registes.
+ assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+ assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
+ __ vinsertf128h(xmm0, Address(rsp, 0));
+ __ vinsertf128h(xmm1, Address(rsp, 16));
+ __ vinsertf128h(xmm2, Address(rsp, 32));
+ __ vinsertf128h(xmm3, Address(rsp, 48));
+ __ vinsertf128h(xmm4, Address(rsp, 64));
+ __ vinsertf128h(xmm5, Address(rsp, 80));
+ __ vinsertf128h(xmm6, Address(rsp, 96));
+ __ vinsertf128h(xmm7, Address(rsp,112));
+ __ vinsertf128h(xmm8, Address(rsp,128));
+ __ vinsertf128h(xmm9, Address(rsp,144));
+ __ vinsertf128h(xmm10, Address(rsp,160));
+ __ vinsertf128h(xmm11, Address(rsp,176));
+ __ vinsertf128h(xmm12, Address(rsp,192));
+ __ vinsertf128h(xmm13, Address(rsp,208));
+ __ vinsertf128h(xmm14, Address(rsp,224));
+ __ vinsertf128h(xmm15, Address(rsp,240));
+ __ addptr(rsp, 256);
+ }
+#else
+ assert(!restore_vectors, "vectors are generated only by C2");
+#endif
// Recover CPU state
__ pop_CPU_state();
// Get the rbp described implicitly by the calling convention (no oopMap)
@@ -297,6 +329,12 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
__ addptr(rsp, return_offset_in_bytes());
}
+// Is vector's size (in bytes) bigger than a size saved by default?
+// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
+bool SharedRuntime::is_wide_vector(int size) {
+ return size > 16;
+}
+
// The java_calling_convention describes stack locations as ideal slots on
// a frame with no abi restrictions. Since we must observe abi restrictions
// (like the placement of the register window) the slots must be biased by
@@ -3235,7 +3273,6 @@ uint SharedRuntime::out_preserve_stack_slots() {
return 0;
}
-
//------------------------------generate_deopt_blob----------------------------
void SharedRuntime::generate_deopt_blob() {
// Allocate space for the code
@@ -3740,7 +3777,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
// Generate a special Compile2Runtime blob that saves all registers,
// and setup oopmap.
//
-SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
assert(StubRoutines::forward_exception_entry() != NULL,
"must be generated before");
@@ -3755,6 +3792,8 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
address start = __ pc();
address call_pc = NULL;
int frame_size_in_words;
+ bool cause_return = (poll_type == POLL_AT_RETURN);
+ bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
// Make room for return address (or push it again)
if (!cause_return) {
@@ -3762,7 +3801,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
}
// Save registers, fpu state, and flags
- map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+ map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
// The following is basically a call_VM. However, we need the precise
// address of the call in order to generate an oopmap. Hence, we do all the
@@ -3799,7 +3838,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
// Exception pending
- RegisterSaver::restore_live_registers(masm);
+ RegisterSaver::restore_live_registers(masm, save_vectors);
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
@@ -3807,7 +3846,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause
__ bind(noException);
// Normal exit, restore registers and exit.
- RegisterSaver::restore_live_registers(masm);
+ RegisterSaver::restore_live_registers(masm, save_vectors);
__ ret(0);
diff --git a/src/cpu/x86/vm/x86.ad b/src/cpu/x86/vm/x86.ad
index 71e26fb20..9a057a47f 100644
--- a/src/cpu/x86/vm/x86.ad
+++ b/src/cpu/x86/vm/x86.ad
@@ -498,6 +498,7 @@ const bool Matcher::match_rule_supported(int opcode) {
case Op_PopCountL:
if (!UsePopCountInstruction)
return false;
+ break;
case Op_MulVI:
if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
return false;