aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornever <none@none>2010-08-19 14:51:47 -0700
committernever <none@none>2010-08-19 14:51:47 -0700
commit7cda8aba0cc1ae3404e2f37eb211eea01bc39905 (patch)
treef383cc9333cae34ef71c63484b6fcc5b2bafc3a7
parentfdfe6b7f0b223b07d1634f928b90466295a67653 (diff)
6978249: spill between cpu and fpu registers when those moves are fast
Reviewed-by: kvn
-rw-r--r--src/cpu/sparc/vm/vm_version_sparc.cpp5
-rw-r--r--src/cpu/x86/vm/vm_version_x86.cpp14
-rw-r--r--src/cpu/x86/vm/x86_32.ad46
-rw-r--r--src/cpu/x86/vm/x86_64.ad8
-rw-r--r--src/share/vm/opto/c2_globals.hpp3
-rw-r--r--src/share/vm/opto/coalesce.cpp8
-rw-r--r--src/share/vm/opto/matcher.cpp17
-rw-r--r--src/share/vm/opto/reg_split.cpp13
-rw-r--r--src/share/vm/runtime/arguments.cpp4
-rw-r--r--src/share/vm/runtime/init.cpp6
10 files changed, 116 insertions, 8 deletions
diff --git a/src/cpu/sparc/vm/vm_version_sparc.cpp b/src/cpu/sparc/vm/vm_version_sparc.cpp
index ba44fe6a5..ca8a0ddaf 100644
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp
@@ -112,6 +112,11 @@ void VM_Version::initialize() {
}
}
+#ifdef COMPILER2
+ // Currently not supported anywhere.
+ FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+#endif
+
char buf[512];
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
(has_v8() ? ", has_v8" : ""),
diff --git a/src/cpu/x86/vm/vm_version_x86.cpp b/src/cpu/x86/vm/vm_version_x86.cpp
index d6968a4f8..9fb37169a 100644
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -482,6 +482,15 @@ void VM_Version::get_processor_features() {
}
}
+#ifdef COMPILER2
+ if (UseFPUForSpilling) {
+ if (UseSSE < 2) {
+ // Only supported with SSE2+
+ FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+ }
+ }
+#endif
+
assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
@@ -520,6 +529,11 @@ void VM_Version::get_processor_features() {
if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
AllocatePrefetchDistance = 192;
AllocatePrefetchLines = 4;
+#ifdef COMPILER2
+ if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+ FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+ }
+#endif
}
}
assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad
index b4c57056f..a9cdabf8f 100644
--- a/src/cpu/x86/vm/x86_32.ad
+++ b/src/cpu/x86/vm/x86_32.ad
@@ -852,6 +852,39 @@ static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst
}
}
+static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+ int src_hi, int dst_hi, int size, outputStream* st ) {
+ // 32-bit
+ if (cbuf) {
+ emit_opcode(*cbuf, 0x66);
+ emit_opcode(*cbuf, 0x0F);
+ emit_opcode(*cbuf, 0x6E);
+ emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
+#ifndef PRODUCT
+ } else if (!do_size) {
+ st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+#endif
+ }
+ return 4;
+}
+
+
+static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+ int src_hi, int dst_hi, int size, outputStream* st ) {
+ // 32-bit
+ if (cbuf) {
+ emit_opcode(*cbuf, 0x66);
+ emit_opcode(*cbuf, 0x0F);
+ emit_opcode(*cbuf, 0x7E);
+ emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
+#ifndef PRODUCT
+ } else if (!do_size) {
+ st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+#endif
+ }
+ return 4;
+}
+
static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
if( cbuf ) {
emit_opcode(*cbuf, 0x8B );
@@ -947,6 +980,12 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
if( dst_first_rc == rc_int && src_first_rc == rc_stack )
size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
+ // Check for integer reg-xmm reg copy
+ if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
+ assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
+ "no 64 bit integer-float reg moves" );
+ return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
+ }
// --------------------------------------
// Check for float reg-reg copy
if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
@@ -1018,6 +1057,13 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
}
+ // Check for xmm reg-integer reg copy
+ if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
+ assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
+ "no 64 bit float-integer reg moves" );
+ return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
+ }
+
// Check for xmm store
if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad
index ba91cb799..ba24322a9 100644
--- a/src/cpu/x86/vm/x86_64.ad
+++ b/src/cpu/x86/vm/x86_64.ad
@@ -1607,8 +1607,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, 0x7E);
emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ Matcher::_regEncode[src_first] & 7,
+ Matcher::_regEncode[dst_first] & 7);
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdq %s, %s\t# spill",
@@ -1637,8 +1637,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
emit_opcode(*cbuf, 0x0F);
emit_opcode(*cbuf, 0x7E);
emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ Matcher::_regEncode[src_first] & 7,
+ Matcher::_regEncode[dst_first] & 7);
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdl %s, %s\t# spill",
diff --git a/src/share/vm/opto/c2_globals.hpp b/src/share/vm/opto/c2_globals.hpp
index 3fdaf8427..e07e7fdec 100644
--- a/src/share/vm/opto/c2_globals.hpp
+++ b/src/share/vm/opto/c2_globals.hpp
@@ -178,6 +178,9 @@
product(bool, ReduceBulkZeroing, true, \
"When bulk-initializing, try to avoid needless zeroing") \
\
+ product(bool, UseFPUForSpilling, false, \
+ "Spill integer registers to FPU instead of stack when possible") \
+ \
develop_pd(intx, RegisterCostAreaRatio, \
"Spill selection in reg allocator: scale area by (X/64K) before " \
"adding cost") \
diff --git a/src/share/vm/opto/coalesce.cpp b/src/share/vm/opto/coalesce.cpp
index 311b55a71..2144b59ec 100644
--- a/src/share/vm/opto/coalesce.cpp
+++ b/src/share/vm/opto/coalesce.cpp
@@ -780,6 +780,14 @@ bool PhaseConservativeCoalesce::copy_copy( Node *dst_copy, Node *src_copy, Block
// Number of bits free
uint rm_size = rm.Size();
+ if (UseFPUForSpilling && rm.is_AllStack() ) {
+ // Don't coalesce when frequency difference is large
+ Block *dst_b = _phc._cfg._bbs[dst_copy->_idx];
+ Block *src_def_b = _phc._cfg._bbs[src_def->_idx];
+ if (src_def_b->_freq > 10*dst_b->_freq )
+ return false;
+ }
+
// If we can use any stack slot, then effective size is infinite
if( rm.is_AllStack() ) rm_size += 1000000;
// Incompatible masks, no way to coalesce
diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp
index 78931baa4..94178813e 100644
--- a/src/share/vm/opto/matcher.cpp
+++ b/src/share/vm/opto/matcher.cpp
@@ -456,6 +456,23 @@ void Matcher::init_first_stack_mask() {
*idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
+ if (UseFPUForSpilling) {
+ // This mask logic assumes that the spill operations are
+ // symmetric and that the registers involved are the same size.
+ // On sparc for instance we may have to use 64 bit moves will
+ // kill 2 registers when used with F0-F31.
+ idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
+ idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
+#ifdef _LP64
+ idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
+ idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
+ idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
+ idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
+#else
+ idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
+#endif
+ }
+
// Make up debug masks. Any spill slot plus callee-save registers.
// Caller-save registers are assumed to be trashable by the various
// inline-cache fixup routines.
diff --git a/src/share/vm/opto/reg_split.cpp b/src/share/vm/opto/reg_split.cpp
index a4fee6f7c..9e3967e36 100644
--- a/src/share/vm/opto/reg_split.cpp
+++ b/src/share/vm/opto/reg_split.cpp
@@ -975,6 +975,19 @@ uint PhaseChaitin::Split( uint maxlrg ) {
insidx++; // Reset iterator to skip USE side split
continue;
}
+
+ if (UseFPUForSpilling && n->is_Call() && !uup && !dup ) {
+ // The use at the call can force the def down so insert
+ // a split before the use to allow the def more freedom.
+ maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ insidx++; // Reset iterator to skip USE side split
+ continue;
+ }
+
// Here is the logic chart which describes USE Splitting:
// 0 = false or DOWN, 1 = true or UP
//
diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp
index 48b612171..9db182f4d 100644
--- a/src/share/vm/runtime/arguments.cpp
+++ b/src/share/vm/runtime/arguments.cpp
@@ -3003,10 +3003,6 @@ jint Arguments::parse(const JavaVMInitArgs* args) {
CommandLineFlags::printSetFlags();
}
- if (PrintFlagsFinal) {
- CommandLineFlags::printFlags();
- }
-
// Apply CPU specific policy for the BiasedLocking
if (UseBiasedLocking) {
if (!VM_Version::use_biased_locking() &&
diff --git a/src/share/vm/runtime/init.cpp b/src/share/vm/runtime/init.cpp
index df3a8bd46..9eac44347 100644
--- a/src/share/vm/runtime/init.cpp
+++ b/src/share/vm/runtime/init.cpp
@@ -128,6 +128,12 @@ jint init_globals() {
Universe::verify(); // make sure we're starting with a clean slate
}
+ // All the flags that get adjusted by VM_Version_init and os::init_2
+ // have been set so dump the flags now.
+ if (PrintFlagsFinal) {
+ CommandLineFlags::printFlags();
+ }
+
return JNI_OK;
}