aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoramurillo <none@none>2014-03-13 09:57:31 -0700
committeramurillo <none@none>2014-03-13 09:57:31 -0700
commit4d03eb68452aecf7eaf400a8d91dd839396e9c80 (patch)
tree3bc8f0d21e3e4e8201b6b63e1509e2f0ace7d209
parent13b7a8057e5aeddba8ce52ea966c09222c72c996 (diff)
parentcee6e5eb989f7333212608860e4cf49d57c6c4ba (diff)
-rw-r--r--make/hotspot_version2
-rw-r--r--src/cpu/x86/vm/macroAssembler_x86.cpp1225
-rw-r--r--src/cpu/x86/vm/macroAssembler_x86.hpp7
-rw-r--r--src/cpu/x86/vm/x86_32.ad561
-rw-r--r--src/cpu/x86/vm/x86_64.ad243
-rw-r--r--src/os/bsd/vm/os_bsd.cpp18
-rw-r--r--src/os/linux/vm/os_linux.cpp30
-rw-r--r--src/os/solaris/vm/os_solaris.cpp36
-rw-r--r--src/os/solaris/vm/perfMemory_solaris.cpp20
-rw-r--r--src/os/windows/vm/os_windows.cpp15
-rw-r--r--src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp11
-rw-r--r--src/share/vm/classfile/altHashing.cpp78
-rw-r--r--src/share/vm/classfile/altHashing.hpp20
-rw-r--r--src/share/vm/oops/instanceKlass.hpp3
-rw-r--r--src/share/vm/oops/metadata.hpp4
-rw-r--r--src/share/vm/oops/oop.cpp4
-rw-r--r--src/share/vm/oops/oop.hpp4
-rw-r--r--src/share/vm/oops/symbol.cpp4
-rw-r--r--src/share/vm/oops/symbol.hpp4
-rw-r--r--src/share/vm/opto/graphKit.cpp38
-rw-r--r--src/share/vm/opto/library_call.cpp8
-rw-r--r--src/share/vm/prims/jni.cpp15
-rw-r--r--src/share/vm/prims/unsafe.cpp5
-rw-r--r--src/share/vm/prims/whitebox.cpp25
-rw-r--r--src/share/vm/prims/whitebox.hpp18
-rw-r--r--src/share/vm/runtime/os.hpp5
-rw-r--r--src/share/vm/runtime/park.cpp129
-rw-r--r--src/share/vm/runtime/thread.cpp4
-rw-r--r--src/share/vm/utilities/hashtable.cpp4
-rw-r--r--src/share/vm/utilities/hashtable.hpp6
-rw-r--r--src/share/vm/utilities/vmError.cpp23
-rw-r--r--test/TEST.groups4
-rw-r--r--test/compiler/ciReplay/TestVM.sh2
-rw-r--r--test/compiler/ciReplay/common.sh3
-rw-r--r--test/compiler/tiered/NonTieredLevelsTest.java5
-rw-r--r--test/compiler/tiered/TieredLevelsTest.java5
-rw-r--r--test/compiler/whitebox/CompilerWhiteBoxTest.java16
-rw-r--r--test/compiler/whitebox/DeoptimizeAllTest.java9
-rw-r--r--test/compiler/whitebox/DeoptimizeMethodTest.java9
-rw-r--r--test/compiler/whitebox/IsMethodCompilableTest.java7
-rw-r--r--test/compiler/whitebox/MakeMethodNotCompilableTest.java9
41 files changed, 1227 insertions, 1411 deletions
diff --git a/make/hotspot_version b/make/hotspot_version
index b6fe79757..b9a29e006 100644
--- a/make/hotspot_version
+++ b/make/hotspot_version
@@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2014
HS_MAJOR_VER=25
HS_MINOR_VER=20
-HS_BUILD_NUMBER=05
+HS_BUILD_NUMBER=06
JDK_MAJOR_VER=1
JDK_MINOR_VER=8
diff --git a/src/cpu/x86/vm/macroAssembler_x86.cpp b/src/cpu/x86/vm/macroAssembler_x86.cpp
index b4797bf3a..74fa1b298 100644
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp
@@ -98,217 +98,6 @@ Address MacroAssembler::as_Address(ArrayAddress adr) {
return Address::make_array(adr);
}
-int MacroAssembler::biased_locking_enter(Register lock_reg,
- Register obj_reg,
- Register swap_reg,
- Register tmp_reg,
- bool swap_reg_contains_mark,
- Label& done,
- Label* slow_case,
- BiasedLockingCounters* counters) {
- assert(UseBiasedLocking, "why call this otherwise?");
- assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
- assert_different_registers(lock_reg, obj_reg, swap_reg);
-
- if (PrintBiasedLockingStatistics && counters == NULL)
- counters = BiasedLocking::counters();
-
- bool need_tmp_reg = false;
- if (tmp_reg == noreg) {
- need_tmp_reg = true;
- tmp_reg = lock_reg;
- } else {
- assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
- }
- assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
- Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
- Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
- Address saved_mark_addr(lock_reg, 0);
-
- // Biased locking
- // See whether the lock is currently biased toward our thread and
- // whether the epoch is still valid
- // Note that the runtime guarantees sufficient alignment of JavaThread
- // pointers to allow age to be placed into low bits
- // First check to see whether biasing is even enabled for this object
- Label cas_label;
- int null_check_offset = -1;
- if (!swap_reg_contains_mark) {
- null_check_offset = offset();
- movl(swap_reg, mark_addr);
- }
- if (need_tmp_reg) {
- push(tmp_reg);
- }
- movl(tmp_reg, swap_reg);
- andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
- cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
- if (need_tmp_reg) {
- pop(tmp_reg);
- }
- jcc(Assembler::notEqual, cas_label);
- // The bias pattern is present in the object's header. Need to check
- // whether the bias owner and the epoch are both still current.
- // Note that because there is no current thread register on x86 we
- // need to store off the mark word we read out of the object to
- // avoid reloading it and needing to recheck invariants below. This
- // store is unfortunate but it makes the overall code shorter and
- // simpler.
- movl(saved_mark_addr, swap_reg);
- if (need_tmp_reg) {
- push(tmp_reg);
- }
- get_thread(tmp_reg);
- xorl(swap_reg, tmp_reg);
- if (swap_reg_contains_mark) {
- null_check_offset = offset();
- }
- movl(tmp_reg, klass_addr);
- xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
- andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
- if (need_tmp_reg) {
- pop(tmp_reg);
- }
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address)counters->biased_lock_entry_count_addr()));
- }
- jcc(Assembler::equal, done);
-
- Label try_revoke_bias;
- Label try_rebias;
-
- // At this point we know that the header has the bias pattern and
- // that we are not the bias owner in the current epoch. We need to
- // figure out more details about the state of the header in order to
- // know what operations can be legally performed on the object's
- // header.
-
- // If the low three bits in the xor result aren't clear, that means
- // the prototype header is no longer biased and we have to revoke
- // the bias on this object.
- testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
- jcc(Assembler::notZero, try_revoke_bias);
-
- // Biasing is still enabled for this data type. See whether the
- // epoch of the current bias is still valid, meaning that the epoch
- // bits of the mark word are equal to the epoch bits of the
- // prototype header. (Note that the prototype header's epoch bits
- // only change at a safepoint.) If not, attempt to rebias the object
- // toward the current thread. Note that we must be absolutely sure
- // that the current epoch is invalid in order to do this because
- // otherwise the manipulations it performs on the mark word are
- // illegal.
- testl(swap_reg, markOopDesc::epoch_mask_in_place);
- jcc(Assembler::notZero, try_rebias);
-
- // The epoch of the current bias is still valid but we know nothing
- // about the owner; it might be set or it might be clear. Try to
- // acquire the bias of the object using an atomic operation. If this
- // fails we will go in to the runtime to revoke the object's bias.
- // Note that we first construct the presumed unbiased header so we
- // don't accidentally blow away another thread's valid bias.
- movl(swap_reg, saved_mark_addr);
- andl(swap_reg,
- markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
- if (need_tmp_reg) {
- push(tmp_reg);
- }
- get_thread(tmp_reg);
- orl(tmp_reg, swap_reg);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(tmp_reg, Address(obj_reg, 0));
- if (need_tmp_reg) {
- pop(tmp_reg);
- }
- // If the biasing toward our thread failed, this means that
- // another thread succeeded in biasing it toward itself and we
- // need to revoke that bias. The revocation will occur in the
- // interpreter runtime in the slow case.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
- }
- if (slow_case != NULL) {
- jcc(Assembler::notZero, *slow_case);
- }
- jmp(done);
-
- bind(try_rebias);
- // At this point we know the epoch has expired, meaning that the
- // current "bias owner", if any, is actually invalid. Under these
- // circumstances _only_, we are allowed to use the current header's
- // value as the comparison value when doing the cas to acquire the
- // bias in the current epoch. In other words, we allow transfer of
- // the bias from one thread to another directly in this situation.
- //
- // FIXME: due to a lack of registers we currently blow away the age
- // bits in this situation. Should attempt to preserve them.
- if (need_tmp_reg) {
- push(tmp_reg);
- }
- get_thread(tmp_reg);
- movl(swap_reg, klass_addr);
- orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
- movl(swap_reg, saved_mark_addr);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(tmp_reg, Address(obj_reg, 0));
- if (need_tmp_reg) {
- pop(tmp_reg);
- }
- // If the biasing toward our thread failed, then another thread
- // succeeded in biasing it toward itself and we need to revoke that
- // bias. The revocation will occur in the runtime in the slow case.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
- }
- if (slow_case != NULL) {
- jcc(Assembler::notZero, *slow_case);
- }
- jmp(done);
-
- bind(try_revoke_bias);
- // The prototype mark in the klass doesn't have the bias bit set any
- // more, indicating that objects of this data type are not supposed
- // to be biased any more. We are going to try to reset the mark of
- // this object to the prototype value and fall through to the
- // CAS-based locking scheme. Note that if our CAS fails, it means
- // that another thread raced us for the privilege of revoking the
- // bias of this particular object, so it's okay to continue in the
- // normal locking code.
- //
- // FIXME: due to a lack of registers we currently blow away the age
- // bits in this situation. Should attempt to preserve them.
- movl(swap_reg, saved_mark_addr);
- if (need_tmp_reg) {
- push(tmp_reg);
- }
- movl(tmp_reg, klass_addr);
- movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(tmp_reg, Address(obj_reg, 0));
- if (need_tmp_reg) {
- pop(tmp_reg);
- }
- // Fall through to the normal CAS-based lock, because no matter what
- // the result of the above CAS, some thread must have succeeded in
- // removing the bias bit from the object's header.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
- }
-
- bind(cas_label);
-
- return null_check_offset;
-}
void MacroAssembler::call_VM_leaf_base(address entry_point,
int number_of_arguments) {
call(RuntimeAddress(entry_point));
@@ -726,165 +515,6 @@ Address MacroAssembler::as_Address(ArrayAddress adr) {
return array;
}
-int MacroAssembler::biased_locking_enter(Register lock_reg,
- Register obj_reg,
- Register swap_reg,
- Register tmp_reg,
- bool swap_reg_contains_mark,
- Label& done,
- Label* slow_case,
- BiasedLockingCounters* counters) {
- assert(UseBiasedLocking, "why call this otherwise?");
- assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
- assert(tmp_reg != noreg, "tmp_reg must be supplied");
- assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
- assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
- Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
- Address saved_mark_addr(lock_reg, 0);
-
- if (PrintBiasedLockingStatistics && counters == NULL)
- counters = BiasedLocking::counters();
-
- // Biased locking
- // See whether the lock is currently biased toward our thread and
- // whether the epoch is still valid
- // Note that the runtime guarantees sufficient alignment of JavaThread
- // pointers to allow age to be placed into low bits
- // First check to see whether biasing is even enabled for this object
- Label cas_label;
- int null_check_offset = -1;
- if (!swap_reg_contains_mark) {
- null_check_offset = offset();
- movq(swap_reg, mark_addr);
- }
- movq(tmp_reg, swap_reg);
- andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
- cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
- jcc(Assembler::notEqual, cas_label);
- // The bias pattern is present in the object's header. Need to check
- // whether the bias owner and the epoch are both still current.
- load_prototype_header(tmp_reg, obj_reg);
- orq(tmp_reg, r15_thread);
- xorq(tmp_reg, swap_reg);
- andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
- }
- jcc(Assembler::equal, done);
-
- Label try_revoke_bias;
- Label try_rebias;
-
- // At this point we know that the header has the bias pattern and
- // that we are not the bias owner in the current epoch. We need to
- // figure out more details about the state of the header in order to
- // know what operations can be legally performed on the object's
- // header.
-
- // If the low three bits in the xor result aren't clear, that means
- // the prototype header is no longer biased and we have to revoke
- // the bias on this object.
- testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
- jcc(Assembler::notZero, try_revoke_bias);
-
- // Biasing is still enabled for this data type. See whether the
- // epoch of the current bias is still valid, meaning that the epoch
- // bits of the mark word are equal to the epoch bits of the
- // prototype header. (Note that the prototype header's epoch bits
- // only change at a safepoint.) If not, attempt to rebias the object
- // toward the current thread. Note that we must be absolutely sure
- // that the current epoch is invalid in order to do this because
- // otherwise the manipulations it performs on the mark word are
- // illegal.
- testq(tmp_reg, markOopDesc::epoch_mask_in_place);
- jcc(Assembler::notZero, try_rebias);
-
- // The epoch of the current bias is still valid but we know nothing
- // about the owner; it might be set or it might be clear. Try to
- // acquire the bias of the object using an atomic operation. If this
- // fails we will go in to the runtime to revoke the object's bias.
- // Note that we first construct the presumed unbiased header so we
- // don't accidentally blow away another thread's valid bias.
- andq(swap_reg,
- markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
- movq(tmp_reg, swap_reg);
- orq(tmp_reg, r15_thread);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgq(tmp_reg, Address(obj_reg, 0));
- // If the biasing toward our thread failed, this means that
- // another thread succeeded in biasing it toward itself and we
- // need to revoke that bias. The revocation will occur in the
- // interpreter runtime in the slow case.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
- }
- if (slow_case != NULL) {
- jcc(Assembler::notZero, *slow_case);
- }
- jmp(done);
-
- bind(try_rebias);
- // At this point we know the epoch has expired, meaning that the
- // current "bias owner", if any, is actually invalid. Under these
- // circumstances _only_, we are allowed to use the current header's
- // value as the comparison value when doing the cas to acquire the
- // bias in the current epoch. In other words, we allow transfer of
- // the bias from one thread to another directly in this situation.
- //
- // FIXME: due to a lack of registers we currently blow away the age
- // bits in this situation. Should attempt to preserve them.
- load_prototype_header(tmp_reg, obj_reg);
- orq(tmp_reg, r15_thread);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgq(tmp_reg, Address(obj_reg, 0));
- // If the biasing toward our thread failed, then another thread
- // succeeded in biasing it toward itself and we need to revoke that
- // bias. The revocation will occur in the runtime in the slow case.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
- }
- if (slow_case != NULL) {
- jcc(Assembler::notZero, *slow_case);
- }
- jmp(done);
-
- bind(try_revoke_bias);
- // The prototype mark in the klass doesn't have the bias bit set any
- // more, indicating that objects of this data type are not supposed
- // to be biased any more. We are going to try to reset the mark of
- // this object to the prototype value and fall through to the
- // CAS-based locking scheme. Note that if our CAS fails, it means
- // that another thread raced us for the privilege of revoking the
- // bias of this particular object, so it's okay to continue in the
- // normal locking code.
- //
- // FIXME: due to a lack of registers we currently blow away the age
- // bits in this situation. Should attempt to preserve them.
- load_prototype_header(tmp_reg, obj_reg);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgq(tmp_reg, Address(obj_reg, 0));
- // Fall through to the normal CAS-based lock, because no matter what
- // the result of the above CAS, some thread must have succeeded in
- // removing the bias bit from the object's header.
- if (counters != NULL) {
- cond_inc32(Assembler::zero,
- ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
- }
-
- bind(cas_label);
-
- return null_check_offset;
-}
-
void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
Label L, E;
@@ -1360,9 +990,16 @@ void MacroAssembler::andptr(Register dst, int32_t imm32) {
void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
pushf();
- if (os::is_MP())
- lock();
- incrementl(counter_addr);
+ if (reachable(counter_addr)) {
+ if (os::is_MP())
+ lock();
+ incrementl(as_Address(counter_addr));
+ } else {
+ lea(rscratch1, counter_addr);
+ if (os::is_MP())
+ lock();
+ incrementl(Address(rscratch1, 0));
+ }
popf();
}
@@ -1393,6 +1030,234 @@ void MacroAssembler::bang_stack_size(Register size, Register tmp) {
}
}
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+ Register obj_reg,
+ Register swap_reg,
+ Register tmp_reg,
+ bool swap_reg_contains_mark,
+ Label& done,
+ Label* slow_case,
+ BiasedLockingCounters* counters) {
+ assert(UseBiasedLocking, "why call this otherwise?");
+ assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
+ LP64_ONLY( assert(tmp_reg != noreg, "tmp_reg must be supplied"); )
+ bool need_tmp_reg = false;
+ if (tmp_reg == noreg) {
+ need_tmp_reg = true;
+ tmp_reg = lock_reg;
+ assert_different_registers(lock_reg, obj_reg, swap_reg);
+ } else {
+ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
+ }
+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
+ Address saved_mark_addr(lock_reg, 0);
+
+ if (PrintBiasedLockingStatistics && counters == NULL) {
+ counters = BiasedLocking::counters();
+ }
+ // Biased locking
+ // See whether the lock is currently biased toward our thread and
+ // whether the epoch is still valid
+ // Note that the runtime guarantees sufficient alignment of JavaThread
+ // pointers to allow age to be placed into low bits
+ // First check to see whether biasing is even enabled for this object
+ Label cas_label;
+ int null_check_offset = -1;
+ if (!swap_reg_contains_mark) {
+ null_check_offset = offset();
+ movptr(swap_reg, mark_addr);
+ }
+ if (need_tmp_reg) {
+ push(tmp_reg);
+ }
+ movptr(tmp_reg, swap_reg);
+ andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place);
+ cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
+ if (need_tmp_reg) {
+ pop(tmp_reg);
+ }
+ jcc(Assembler::notEqual, cas_label);
+ // The bias pattern is present in the object's header. Need to check
+ // whether the bias owner and the epoch are both still current.
+#ifndef _LP64
+ // Note that because there is no current thread register on x86_32 we
+ // need to store off the mark word we read out of the object to
+ // avoid reloading it and needing to recheck invariants below. This
+ // store is unfortunate but it makes the overall code shorter and
+ // simpler.
+ movptr(saved_mark_addr, swap_reg);
+#endif
+ if (need_tmp_reg) {
+ push(tmp_reg);
+ }
+ if (swap_reg_contains_mark) {
+ null_check_offset = offset();
+ }
+ load_prototype_header(tmp_reg, obj_reg);
+#ifdef _LP64
+ orptr(tmp_reg, r15_thread);
+ xorptr(tmp_reg, swap_reg);
+ Register header_reg = tmp_reg;
+#else
+ xorptr(tmp_reg, swap_reg);
+ get_thread(swap_reg);
+ xorptr(swap_reg, tmp_reg);
+ Register header_reg = swap_reg;
+#endif
+ andptr(header_reg, ~((int) markOopDesc::age_mask_in_place));
+ if (need_tmp_reg) {
+ pop(tmp_reg);
+ }
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address) counters->biased_lock_entry_count_addr()));
+ }
+ jcc(Assembler::equal, done);
+
+ Label try_revoke_bias;
+ Label try_rebias;
+
+ // At this point we know that the header has the bias pattern and
+ // that we are not the bias owner in the current epoch. We need to
+ // figure out more details about the state of the header in order to
+ // know what operations can be legally performed on the object's
+ // header.
+
+ // If the low three bits in the xor result aren't clear, that means
+ // the prototype header is no longer biased and we have to revoke
+ // the bias on this object.
+ testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
+ jccb(Assembler::notZero, try_revoke_bias);
+
+ // Biasing is still enabled for this data type. See whether the
+ // epoch of the current bias is still valid, meaning that the epoch
+ // bits of the mark word are equal to the epoch bits of the
+ // prototype header. (Note that the prototype header's epoch bits
+ // only change at a safepoint.) If not, attempt to rebias the object
+ // toward the current thread. Note that we must be absolutely sure
+ // that the current epoch is invalid in order to do this because
+ // otherwise the manipulations it performs on the mark word are
+ // illegal.
+ testptr(header_reg, markOopDesc::epoch_mask_in_place);
+ jccb(Assembler::notZero, try_rebias);
+
+ // The epoch of the current bias is still valid but we know nothing
+ // about the owner; it might be set or it might be clear. Try to
+ // acquire the bias of the object using an atomic operation. If this
+ // fails we will go in to the runtime to revoke the object's bias.
+ // Note that we first construct the presumed unbiased header so we
+ // don't accidentally blow away another thread's valid bias.
+ NOT_LP64( movptr(swap_reg, saved_mark_addr); )
+ andptr(swap_reg,
+ markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+ if (need_tmp_reg) {
+ push(tmp_reg);
+ }
+#ifdef _LP64
+ movptr(tmp_reg, swap_reg);
+ orptr(tmp_reg, r15_thread);
+#else
+ get_thread(tmp_reg);
+ orptr(tmp_reg, swap_reg);
+#endif
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+ if (need_tmp_reg) {
+ pop(tmp_reg);
+ }
+ // If the biasing toward our thread failed, this means that
+ // another thread succeeded in biasing it toward itself and we
+ // need to revoke that bias. The revocation will occur in the
+ // interpreter runtime in the slow case.
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
+ }
+ if (slow_case != NULL) {
+ jcc(Assembler::notZero, *slow_case);
+ }
+ jmp(done);
+
+ bind(try_rebias);
+ // At this point we know the epoch has expired, meaning that the
+ // current "bias owner", if any, is actually invalid. Under these
+ // circumstances _only_, we are allowed to use the current header's
+ // value as the comparison value when doing the cas to acquire the
+ // bias in the current epoch. In other words, we allow transfer of
+ // the bias from one thread to another directly in this situation.
+ //
+ // FIXME: due to a lack of registers we currently blow away the age
+ // bits in this situation. Should attempt to preserve them.
+ if (need_tmp_reg) {
+ push(tmp_reg);
+ }
+ load_prototype_header(tmp_reg, obj_reg);
+#ifdef _LP64
+ orptr(tmp_reg, r15_thread);
+#else
+ get_thread(swap_reg);
+ orptr(tmp_reg, swap_reg);
+ movptr(swap_reg, saved_mark_addr);
+#endif
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+ if (need_tmp_reg) {
+ pop(tmp_reg);
+ }
+ // If the biasing toward our thread failed, then another thread
+ // succeeded in biasing it toward itself and we need to revoke that
+ // bias. The revocation will occur in the runtime in the slow case.
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
+ }
+ if (slow_case != NULL) {
+ jcc(Assembler::notZero, *slow_case);
+ }
+ jmp(done);
+
+ bind(try_revoke_bias);
+ // The prototype mark in the klass doesn't have the bias bit set any
+ // more, indicating that objects of this data type are not supposed
+ // to be biased any more. We are going to try to reset the mark of
+ // this object to the prototype value and fall through to the
+ // CAS-based locking scheme. Note that if our CAS fails, it means
+ // that another thread raced us for the privilege of revoking the
+ // bias of this particular object, so it's okay to continue in the
+ // normal locking code.
+ //
+ // FIXME: due to a lack of registers we currently blow away the age
+ // bits in this situation. Should attempt to preserve them.
+ NOT_LP64( movptr(swap_reg, saved_mark_addr); )
+ if (need_tmp_reg) {
+ push(tmp_reg);
+ }
+ load_prototype_header(tmp_reg, obj_reg);
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
+ if (need_tmp_reg) {
+ pop(tmp_reg);
+ }
+ // Fall through to the normal CAS-based lock, because no matter what
+ // the result of the above CAS, some thread must have succeeded in
+ // removing the bias bit from the object's header.
+ if (counters != NULL) {
+ cond_inc32(Assembler::zero,
+ ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
+ }
+
+ bind(cas_label);
+
+ return null_check_offset;
+}
+
void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
assert(UseBiasedLocking, "why call this otherwise?");
@@ -1408,6 +1273,620 @@ void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, La
jcc(Assembler::equal, done);
}
+#ifdef COMPILER2
+// Fast_Lock and Fast_Unlock used by C2
+
+// Because the transitions from emitted code to the runtime
+// monitorenter/exit helper stubs are so slow it's critical that
+// we inline both the stack-locking fast-path and the inflated fast path.
+//
+// See also: cmpFastLock and cmpFastUnlock.
+//
+// What follows is a specialized inline transliteration of the code
+// in slow_enter() and slow_exit(). If we're concerned about I$ bloat
+// another option would be to emit TrySlowEnter and TrySlowExit methods
+// at startup-time. These methods would accept arguments as
+// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
+// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
+// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
+// In practice, however, the # of lock sites is bounded and is usually small.
+// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
+// if the processor uses simple bimodal branch predictors keyed by EIP
+// Since the helper routines would be called from multiple synchronization
+// sites.
+//
+// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
+// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
+// to those specialized methods. That'd give us a mostly platform-independent
+// implementation that the JITs could optimize and inline at their pleasure.
+// Done correctly, the only time we'd need to cross to native could would be
+// to park() or unpark() threads. We'd also need a few more unsafe operators
+// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
+// (b) explicit barriers or fence operations.
+//
+// TODO:
+//
+// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
+// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
+// Given TLAB allocation, Self is usually manifested in a register, so passing it into
+// the lock operators would typically be faster than reifying Self.
+//
+// * Ideally I'd define the primitives as:
+// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
+// fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
+// Unfortunately ADLC bugs prevent us from expressing the ideal form.
+// Instead, we're stuck with a rather awkward and brittle register assignments below.
+// Furthermore the register assignments are overconstrained, possibly resulting in
+// sub-optimal code near the synchronization site.
+//
+// * Eliminate the sp-proximity tests and just use "== Self" tests instead.
+// Alternately, use a better sp-proximity test.
+//
+// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
+// Either one is sufficient to uniquely identify a thread.
+// TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
+//
+// * Intrinsify notify() and notifyAll() for the common cases where the
+// object is locked by the calling thread but the waitlist is empty.
+// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
+//
+// * use jccb and jmpb instead of jcc and jmp to improve code density.
+// But beware of excessive branch density on AMD Opterons.
+//
+// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
+// or failure of the fast-path. If the fast-path fails then we pass
+// control to the slow-path, typically in C. In Fast_Lock and
+// Fast_Unlock we often branch to DONE_LABEL, just to find that C2
+// will emit a conditional branch immediately after the node.
+// So we have branches to branches and lots of ICC.ZF games.
+// Instead, it might be better to have C2 pass a "FailureLabel"
+// into Fast_Lock and Fast_Unlock. In the case of success, control
+// will drop through the node. ICC.ZF is undefined at exit.
+// In the case of failure, the node will branch directly to the
+// FailureLabel
+
+
+// obj: object to lock
+// box: on-stack box address (displaced header location) - KILLED
+// rax,: tmp -- KILLED
+// scr: tmp -- KILLED
+void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) {
+ // Ensure the register assignents are disjoint
+ guarantee (objReg != boxReg, "");
+ guarantee (objReg != tmpReg, "");
+ guarantee (objReg != scrReg, "");
+ guarantee (boxReg != tmpReg, "");
+ guarantee (boxReg != scrReg, "");
+ guarantee (tmpReg == rax, "");
+
+ if (counters != NULL) {
+ atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()));
+ }
+ if (EmitSync & 1) {
+ // set box->dhw = unused_mark (3)
+ // Force all sync thru slow-path: slow_enter() and slow_exit()
+ movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+ cmpptr (rsp, (int32_t)NULL_WORD);
+ } else
+ if (EmitSync & 2) {
+ Label DONE_LABEL ;
+ if (UseBiasedLocking) {
+ // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
+ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
+ }
+
+ movptr(tmpReg, Address(objReg, 0)); // fetch markword
+ orptr (tmpReg, 0x1);
+ movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
+ jccb(Assembler::equal, DONE_LABEL);
+ // Recursive locking
+ subptr(tmpReg, rsp);
+ andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+ movptr(Address(boxReg, 0), tmpReg);
+ bind(DONE_LABEL);
+ } else {
+ // Possible cases that we'll encounter in fast_lock
+ // ------------------------------------------------
+ // * Inflated
+ // -- unlocked
+ // -- Locked
+ // = by self
+ // = by other
+ // * biased
+ // -- by Self
+ // -- by other
+ // * neutral
+ // * stack-locked
+ // -- by self
+ // = sp-proximity test hits
+ // = sp-proximity test generates false-negative
+ // -- by other
+ //
+
+ Label IsInflated, DONE_LABEL;
+
+ // it's stack-locked, biased or neutral
+ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
+ // order to reduce the number of conditional branches in the most common cases.
+ // Beware -- there's a subtle invariant that fetch of the markword
+ // at [FETCH], below, will never observe a biased encoding (*101b).
+ // If this invariant is not held we risk exclusion (safety) failure.
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
+ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
+ }
+
+ movptr(tmpReg, Address(objReg, 0)); // [FETCH]
+ testl (tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
+ jccb (Assembler::notZero, IsInflated);
+
+ // Attempt stack-locking ...
+ orptr (tmpReg, 0x1);
+ movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
+ if (counters != NULL) {
+ cond_inc32(Assembler::equal,
+ ExternalAddress((address)counters->fast_path_entry_count_addr()));
+ }
+ jccb(Assembler::equal, DONE_LABEL);
+
+ // Recursive locking
+ subptr(tmpReg, rsp);
+ andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+ movptr(Address(boxReg, 0), tmpReg);
+ if (counters != NULL) {
+ cond_inc32(Assembler::equal,
+ ExternalAddress((address)counters->fast_path_entry_count_addr()));
+ }
+ jmpb(DONE_LABEL);
+
+ bind(IsInflated);
+#ifndef _LP64
+ // The object is inflated.
+ //
+ // TODO-FIXME: eliminate the ugly use of manifest constants:
+ // Use markOopDesc::monitor_value instead of "2".
+ // use markOop::unused_mark() instead of "3".
+ // The tmpReg value is an objectMonitor reference ORed with
+ // markOopDesc::monitor_value (2). We can either convert tmpReg to an
+ // objectmonitor pointer by masking off the "2" bit or we can just
+ // use tmpReg as an objectmonitor pointer but bias the objectmonitor
+ // field offsets with "-2" to compensate for and annul the low-order tag bit.
+ //
+ // I use the latter as it avoids AGI stalls.
+ // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
+ // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
+ //
+ #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
+
+ // boxReg refers to the on-stack BasicLock in the current frame.
+ // We'd like to write:
+ // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
+ // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
+ // additional latency as we have another ST in the store buffer that must drain.
+
+ if (EmitSync & 8192) {
+ movptr(Address(boxReg, 0), 3); // results in ST-before-CAS penalty
+ get_thread (scrReg);
+ movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
+ movptr(tmpReg, NULL_WORD); // consider: xor vs mov
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ } else
+ if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
+ movptr(scrReg, boxReg);
+ movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
+
+ // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
+ if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+ // prefetchw [eax + Offset(_owner)-2]
+ prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ }
+
+ if ((EmitSync & 64) == 0) {
+ // Optimistic form: consider XORL tmpReg,tmpReg
+ movptr(tmpReg, NULL_WORD);
+ } else {
+ // Can suffer RTS->RTO upgrades on shared or cold $ lines
+ // Test-And-CAS instead of CAS
+ movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner
+ testptr(tmpReg, tmpReg); // Locked ?
+ jccb (Assembler::notZero, DONE_LABEL);
+ }
+
+ // Appears unlocked - try to swing _owner from null to non-null.
+ // Ideally, I'd manifest "Self" with get_thread and then attempt
+ // to CAS the register containing Self into m->Owner.
+ // But we don't have enough registers, so instead we can either try to CAS
+ // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
+ // we later store "Self" into m->Owner. Transiently storing a stack address
+ // (rsp or the address of the box) into m->owner is harmless.
+ // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
+ jccb (Assembler::notZero, DONE_LABEL);
+ get_thread (scrReg); // beware: clobbers ICCs
+ movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg);
+ xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
+
+ // If the CAS fails we can either retry or pass control to the slow-path.
+ // We use the latter tactic.
+ // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+ // If the CAS was successful ...
+ // Self has acquired the lock
+ // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+ // Intentional fall-through into DONE_LABEL ...
+ } else {
+ movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty
+ movptr(boxReg, tmpReg);
+
+ // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
+ if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+ // prefetchw [eax + Offset(_owner)-2]
+ prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ }
+
+ if ((EmitSync & 64) == 0) {
+ // Optimistic form
+ xorptr (tmpReg, tmpReg);
+ } else {
+ // Can suffer RTS->RTO upgrades on shared or cold $ lines
+ movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner
+ testptr(tmpReg, tmpReg); // Locked ?
+ jccb (Assembler::notZero, DONE_LABEL);
+ }
+
+ // Appears unlocked - try to swing _owner from null to non-null.
+ // Use either "Self" (in scr) or rsp as thread identity in _owner.
+ // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
+ get_thread (scrReg);
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+
+ // If the CAS fails we can either retry or pass control to the slow-path.
+ // We use the latter tactic.
+ // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+ // If the CAS was successful ...
+ // Self has acquired the lock
+ // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+ // Intentional fall-through into DONE_LABEL ...
+ }
+#else // _LP64
+ // It's inflated
+
+ // TODO: someday avoid the ST-before-CAS penalty by
+ // relocating (deferring) the following ST.
+ // We should also think about trying a CAS without having
+ // fetched _owner. If the CAS is successful we may
+ // avoid an RTO->RTS upgrade on the $line.
+
+ // Without cast to int32_t a movptr will destroy r10 which is typically obj
+ movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+
+ mov (boxReg, tmpReg);
+ movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ testptr(tmpReg, tmpReg);
+ jccb (Assembler::notZero, DONE_LABEL);
+
+ // It's inflated and appears unlocked
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ // Intentional fall-through into DONE_LABEL ...
+
+#endif
+
+ // DONE_LABEL is a hot target - we'd really like to place it at the
+ // start of cache line by padding with NOPs.
+ // See the AMD and Intel software optimization manuals for the
+ // most efficient "long" NOP encodings.
+ // Unfortunately none of our alignment mechanisms suffice.
+ bind(DONE_LABEL);
+
+ // At DONE_LABEL the icc ZFlag is set as follows ...
+ // Fast_Unlock uses the same protocol.
+ // ZFlag == 1 -> Success
+ // ZFlag == 0 -> Failure - force control through the slow-path
+ }
+}
+
+// obj: object to unlock
+// box: box address (displaced header location), killed. Must be EAX.
+// tmp: killed, cannot be obj nor box.
+//
+// Some commentary on balanced locking:
+//
+// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
+// Methods that don't have provably balanced locking are forced to run in the
+// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
+// The interpreter provides two properties:
+// I1: At return-time the interpreter automatically and quietly unlocks any
+// objects acquired the current activation (frame). Recall that the
+// interpreter maintains an on-stack list of locks currently held by
+// a frame.
+// I2: If a method attempts to unlock an object that is not held by the
+// the frame the interpreter throws IMSX.
+//
+// Lets say A(), which has provably balanced locking, acquires O and then calls B().
+// B() doesn't have provably balanced locking so it runs in the interpreter.
+// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
+// is still locked by A().
+//
+// The only other source of unbalanced locking would be JNI. The "Java Native Interface:
+// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
+// should not be unlocked by "normal" java-level locking and vice-versa. The specification
+// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
+
+void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
+ guarantee (objReg != boxReg, "");
+ guarantee (objReg != tmpReg, "");
+ guarantee (boxReg != tmpReg, "");
+ guarantee (boxReg == rax, "");
+
+ if (EmitSync & 4) {
+ // Disable - inhibit all inlining. Force control through the slow-path
+ cmpptr (rsp, 0);
+ } else
+ if (EmitSync & 8) {
+ Label DONE_LABEL;
+ if (UseBiasedLocking) {
+ biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+ }
+ // Classic stack-locking code ...
+ // Check whether the displaced header is 0
+ //(=> recursive unlock)
+ movptr(tmpReg, Address(boxReg, 0));
+ testptr(tmpReg, tmpReg);
+ jccb(Assembler::zero, DONE_LABEL);
+ // If not recursive lock, reset the header to displaced header
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+ bind(DONE_LABEL);
+ } else {
+ Label DONE_LABEL, Stacked, CheckSucc;
+
+ // Critically, the biased locking test must have precedence over
+ // and appear before the (box->dhw == 0) recursive stack-lock test.
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
+ biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+ }
+
+ cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
+ movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword
+ jccb (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
+
+ testptr(tmpReg, 0x02); // Inflated?
+ jccb (Assembler::zero, Stacked);
+
+ // It's inflated.
+ // Despite our balanced locking property we still check that m->_owner == Self
+ // as java routines or native JNI code called by this thread might
+ // have released the lock.
+ // Refer to the comments in synchronizer.cpp for how we might encode extra
+ // state in _succ so we can avoid fetching EntryList|cxq.
+ //
+ // I'd like to add more cases in fast_lock() and fast_unlock() --
+ // such as recursive enter and exit -- but we have to be wary of
+ // I$ bloat, T$ effects and BP$ effects.
+ //
+ // If there's no contention try a 1-0 exit. That is, exit without
+ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
+ // we detect and recover from the race that the 1-0 exit admits.
+ //
+ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
+ // before it STs null into _owner, releasing the lock. Updates
+ // to data protected by the critical section must be visible before
+ // we drop the lock (and thus before any other thread could acquire
+ // the lock and observe the fields protected by the lock).
+ // IA32's memory-model is SPO, so STs are ordered with respect to
+ // each other and there's no need for an explicit barrier (fence).
+ // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
+#ifndef _LP64
+ get_thread (boxReg);
+ if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
+ // prefetchw [ebx + Offset(_owner)-2]
+ prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ }
+
+ // Note that we could employ various encoding schemes to reduce
+ // the number of loads below (currently 4) to just 2 or 3.
+ // Refer to the comments in synchronizer.cpp.
+ // In practice the chain of fetches doesn't seem to impact performance, however.
+ if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
+ // Attempt to reduce branch density - AMD's branch predictor.
+ xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+ orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+ orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+ jccb (Assembler::notZero, DONE_LABEL);
+ movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+ jmpb (DONE_LABEL);
+ } else {
+ xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+ jccb (Assembler::notZero, DONE_LABEL);
+ movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+ orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+ jccb (Assembler::notZero, CheckSucc);
+ movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+ jmpb (DONE_LABEL);
+ }
+
+ // The Following code fragment (EmitSync & 65536) improves the performance of
+ // contended applications and contended synchronization microbenchmarks.
+ // Unfortunately the emission of the code - even though not executed - causes regressions
+ // in scimark and jetstream, evidently because of $ effects. Replacing the code
+ // with an equal number of never-executed NOPs results in the same regression.
+ // We leave it off by default.
+
+ if ((EmitSync & 65536) != 0) {
+ Label LSuccess, LGoSlowPath ;
+
+ bind (CheckSucc);
+
+ // Optional pre-test ... it's safe to elide this
+ if ((EmitSync & 16) == 0) {
+ cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+ jccb (Assembler::zero, LGoSlowPath);
+ }
+
+ // We have a classic Dekker-style idiom:
+ // ST m->_owner = 0 ; MEMBAR; LD m->_succ
+ // There are a number of ways to implement the barrier:
+ // (1) lock:andl &m->_owner, 0
+ // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
+ // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
+ // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
+ // (2) If supported, an explicit MFENCE is appealing.
+ // In older IA32 processors MFENCE is slower than lock:add or xchg
+ // particularly if the write-buffer is full as might be the case if
+ // if stores closely precede the fence or fence-equivalent instruction.
+ // In more modern implementations MFENCE appears faster, however.
+ // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
+ // The $lines underlying the top-of-stack should be in M-state.
+ // The locked add instruction is serializing, of course.
+ // (4) Use xchg, which is serializing
+ // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
+ // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
+ // The integer condition codes will tell us if succ was 0.
+ // Since _succ and _owner should reside in the same $line and
+ // we just stored into _owner, it's likely that the $line
+ // remains in M-state for the lock:orl.
+ //
+ // We currently use (3), although it's likely that switching to (2)
+ // is correct for the future.
+
+ movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+ if (os::is_MP()) {
+ if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
+ mfence();
+ } else {
+ lock (); addptr(Address(rsp, 0), 0);
+ }
+ }
+ // Ratify _succ remains non-null
+ cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0);
+ jccb (Assembler::notZero, LSuccess);
+
+ xorptr(boxReg, boxReg); // box is really EAX
+ if (os::is_MP()) { lock(); }
+ cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ jccb (Assembler::notEqual, LSuccess);
+ // Since we're low on registers we installed rsp as a placeholding in _owner.
+ // Now install Self over rsp. This is safe as we're transitioning from
+ // non-null to non=null
+ get_thread (boxReg);
+ movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
+ // Intentional fall-through into LGoSlowPath ...
+
+ bind (LGoSlowPath);
+ orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure
+ jmpb (DONE_LABEL);
+
+ bind (LSuccess);
+ xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success
+ jmpb (DONE_LABEL);
+ }
+
+ bind (Stacked);
+ // It's not inflated and it's not recursively stack-locked and it's not biased.
+ // It must be stack-locked.
+ // Try to reset the header to displaced header.
+ // The "box" value on the stack is stable, so we can reload
+ // and be assured we observe the same value as above.
+ movptr(tmpReg, Address(boxReg, 0));
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+ // Intention fall-thru into DONE_LABEL
+
+ // DONE_LABEL is a hot target - we'd really like to place it at the
+ // start of cache line by padding with NOPs.
+ // See the AMD and Intel software optimization manuals for the
+ // most efficient "long" NOP encodings.
+ // Unfortunately none of our alignment mechanisms suffice.
+ if ((EmitSync & 65536) == 0) {
+ bind (CheckSucc);
+ }
+#else // _LP64
+ // It's inflated
+ movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ xorptr(boxReg, r15_thread);
+ orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+ jccb (Assembler::notZero, DONE_LABEL);
+ movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+ orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+ jccb (Assembler::notZero, CheckSucc);
+ movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
+ jmpb (DONE_LABEL);
+
+ if ((EmitSync & 65536) == 0) {
+ Label LSuccess, LGoSlowPath ;
+ bind (CheckSucc);
+ cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+ jccb (Assembler::zero, LGoSlowPath);
+
+ // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
+ // the explicit ST;MEMBAR combination, but masm doesn't currently support
+ // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc
+ // are all faster when the write buffer is populated.
+ movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
+ if (os::is_MP()) {
+ lock (); addl (Address(rsp, 0), 0);
+ }
+ cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+ jccb (Assembler::notZero, LSuccess);
+
+ movptr (boxReg, (int32_t)NULL_WORD); // box is really EAX
+ if (os::is_MP()) { lock(); }
+ cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+ jccb (Assembler::notEqual, LSuccess);
+ // Intentional fall-through into slow-path
+
+ bind (LGoSlowPath);
+ orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
+ jmpb (DONE_LABEL);
+
+ bind (LSuccess);
+ testl (boxReg, 0); // set ICC.ZF=1 to indicate success
+ jmpb (DONE_LABEL);
+ }
+
+ bind (Stacked);
+ movptr(tmpReg, Address (boxReg, 0)); // re-fetch
+ if (os::is_MP()) { lock(); }
+ cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
+
+ if (EmitSync & 65536) {
+ bind (CheckSucc);
+ }
+#endif
+ bind(DONE_LABEL);
+ // Avoid branch to branch on AMD processors
+ if (EmitSync & 32768) {
+ nop();
+ }
+ }
+}
+#endif // COMPILER2
+
void MacroAssembler::c2bool(Register x) {
// implements x == 0 ? 0 : 1
// note: must only look at least-significant byte of x
diff --git a/src/cpu/x86/vm/macroAssembler_x86.hpp b/src/cpu/x86/vm/macroAssembler_x86.hpp
index 198fc98e8..6ac95774b 100644
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp
@@ -651,7 +651,12 @@ class MacroAssembler: public Assembler {
Label& done, Label* slow_case = NULL,
BiasedLockingCounters* counters = NULL);
void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
-
+#ifdef COMPILER2
+ // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
+ // See full desription in macroAssembler_x86.cpp.
+ void fast_lock(Register obj, Register box, Register tmp, Register scr, BiasedLockingCounters* counters);
+ void fast_unlock(Register obj, Register box, Register tmp);
+#endif
Condition negate_condition(Condition cond);
diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad
index 90c1d899f..382d09b07 100644
--- a/src/cpu/x86/vm/x86_32.ad
+++ b/src/cpu/x86/vm/x86_32.ad
@@ -2910,542 +2910,6 @@ encode %{
emit_d8 (cbuf,0 );
%}
-
- // Because the transitions from emitted code to the runtime
- // monitorenter/exit helper stubs are so slow it's critical that
- // we inline both the stack-locking fast-path and the inflated fast path.
- //
- // See also: cmpFastLock and cmpFastUnlock.
- //
- // What follows is a specialized inline transliteration of the code
- // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
- // another option would be to emit TrySlowEnter and TrySlowExit methods
- // at startup-time. These methods would accept arguments as
- // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
- // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
- // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
- // In practice, however, the # of lock sites is bounded and is usually small.
- // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
- // if the processor uses simple bimodal branch predictors keyed by EIP
- // Since the helper routines would be called from multiple synchronization
- // sites.
- //
- // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
- // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
- // to those specialized methods. That'd give us a mostly platform-independent
- // implementation that the JITs could optimize and inline at their pleasure.
- // Done correctly, the only time we'd need to cross to native could would be
- // to park() or unpark() threads. We'd also need a few more unsafe operators
- // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
- // (b) explicit barriers or fence operations.
- //
- // TODO:
- //
- // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
- // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
- // Given TLAB allocation, Self is usually manifested in a register, so passing it into
- // the lock operators would typically be faster than reifying Self.
- //
- // * Ideally I'd define the primitives as:
- // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
- // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
- // Unfortunately ADLC bugs prevent us from expressing the ideal form.
- // Instead, we're stuck with a rather awkward and brittle register assignments below.
- // Furthermore the register assignments are overconstrained, possibly resulting in
- // sub-optimal code near the synchronization site.
- //
- // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
- // Alternately, use a better sp-proximity test.
- //
- // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
- // Either one is sufficient to uniquely identify a thread.
- // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
- //
- // * Intrinsify notify() and notifyAll() for the common cases where the
- // object is locked by the calling thread but the waitlist is empty.
- // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
- //
- // * use jccb and jmpb instead of jcc and jmp to improve code density.
- // But beware of excessive branch density on AMD Opterons.
- //
- // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
- // or failure of the fast-path. If the fast-path fails then we pass
- // control to the slow-path, typically in C. In Fast_Lock and
- // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
- // will emit a conditional branch immediately after the node.
- // So we have branches to branches and lots of ICC.ZF games.
- // Instead, it might be better to have C2 pass a "FailureLabel"
- // into Fast_Lock and Fast_Unlock. In the case of success, control
- // will drop through the node. ICC.ZF is undefined at exit.
- // In the case of failure, the node will branch directly to the
- // FailureLabel
-
-
- // obj: object to lock
- // box: on-stack box address (displaced header location) - KILLED
- // rax,: tmp -- KILLED
- // scr: tmp -- KILLED
- enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
-
- Register objReg = as_Register($obj$$reg);
- Register boxReg = as_Register($box$$reg);
- Register tmpReg = as_Register($tmp$$reg);
- Register scrReg = as_Register($scr$$reg);
-
- // Ensure the register assignents are disjoint
- guarantee (objReg != boxReg, "") ;
- guarantee (objReg != tmpReg, "") ;
- guarantee (objReg != scrReg, "") ;
- guarantee (boxReg != tmpReg, "") ;
- guarantee (boxReg != scrReg, "") ;
- guarantee (tmpReg == as_Register(EAX_enc), "") ;
-
- MacroAssembler masm(&cbuf);
-
- if (_counters != NULL) {
- masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
- }
- if (EmitSync & 1) {
- // set box->dhw = unused_mark (3)
- // Force all sync thru slow-path: slow_enter() and slow_exit()
- masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;
- masm.cmpptr (rsp, (int32_t)0) ;
- } else
- if (EmitSync & 2) {
- Label DONE_LABEL ;
- if (UseBiasedLocking) {
- // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
- masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
- }
-
- masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword
- masm.orptr (tmpReg, 0x1);
- masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
- masm.jcc(Assembler::equal, DONE_LABEL);
- // Recursive locking
- masm.subptr(tmpReg, rsp);
- masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
- masm.movptr(Address(boxReg, 0), tmpReg);
- masm.bind(DONE_LABEL) ;
- } else {
- // Possible cases that we'll encounter in fast_lock
- // ------------------------------------------------
- // * Inflated
- // -- unlocked
- // -- Locked
- // = by self
- // = by other
- // * biased
- // -- by Self
- // -- by other
- // * neutral
- // * stack-locked
- // -- by self
- // = sp-proximity test hits
- // = sp-proximity test generates false-negative
- // -- by other
- //
-
- Label IsInflated, DONE_LABEL, PopDone ;
-
- // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
- // order to reduce the number of conditional branches in the most common cases.
- // Beware -- there's a subtle invariant that fetch of the markword
- // at [FETCH], below, will never observe a biased encoding (*101b).
- // If this invariant is not held we risk exclusion (safety) failure.
- if (UseBiasedLocking && !UseOptoBiasInlining) {
- masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
- }
-
- masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
- masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral)
- masm.jccb (Assembler::notZero, IsInflated) ;
-
- // Attempt stack-locking ...
- masm.orptr (tmpReg, 0x1);
- masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
- if (_counters != NULL) {
- masm.cond_inc32(Assembler::equal,
- ExternalAddress((address)_counters->fast_path_entry_count_addr()));
- }
- masm.jccb (Assembler::equal, DONE_LABEL);
-
- // Recursive locking
- masm.subptr(tmpReg, rsp);
- masm.andptr(tmpReg, 0xFFFFF003 );
- masm.movptr(Address(boxReg, 0), tmpReg);
- if (_counters != NULL) {
- masm.cond_inc32(Assembler::equal,
- ExternalAddress((address)_counters->fast_path_entry_count_addr()));
- }
- masm.jmp (DONE_LABEL) ;
-
- masm.bind (IsInflated) ;
-
- // The object is inflated.
- //
- // TODO-FIXME: eliminate the ugly use of manifest constants:
- // Use markOopDesc::monitor_value instead of "2".
- // use markOop::unused_mark() instead of "3".
- // The tmpReg value is an objectMonitor reference ORed with
- // markOopDesc::monitor_value (2). We can either convert tmpReg to an
- // objectmonitor pointer by masking off the "2" bit or we can just
- // use tmpReg as an objectmonitor pointer but bias the objectmonitor
- // field offsets with "-2" to compensate for and annul the low-order tag bit.
- //
- // I use the latter as it avoids AGI stalls.
- // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
- // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
- //
- #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
-
- // boxReg refers to the on-stack BasicLock in the current frame.
- // We'd like to write:
- // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
- // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
- // additional latency as we have another ST in the store buffer that must drain.
-
- if (EmitSync & 8192) {
- masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
- masm.get_thread (scrReg) ;
- masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
- masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
- } else
- if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
- masm.movptr(scrReg, boxReg) ;
- masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
-
- // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
- if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
- // prefetchw [eax + Offset(_owner)-2]
- masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
- }
-
- if ((EmitSync & 64) == 0) {
- // Optimistic form: consider XORL tmpReg,tmpReg
- masm.movptr(tmpReg, NULL_WORD) ;
- } else {
- // Can suffer RTS->RTO upgrades on shared or cold $ lines
- // Test-And-CAS instead of CAS
- masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
- masm.testptr(tmpReg, tmpReg) ; // Locked ?
- masm.jccb (Assembler::notZero, DONE_LABEL) ;
- }
-
- // Appears unlocked - try to swing _owner from null to non-null.
- // Ideally, I'd manifest "Self" with get_thread and then attempt
- // to CAS the register containing Self into m->Owner.
- // But we don't have enough registers, so instead we can either try to CAS
- // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
- // we later store "Self" into m->Owner. Transiently storing a stack address
- // (rsp or the address of the box) into m->owner is harmless.
- // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
- masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3
- masm.jccb (Assembler::notZero, DONE_LABEL) ;
- masm.get_thread (scrReg) ; // beware: clobbers ICCs
- masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
- masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success
-
- // If the CAS fails we can either retry or pass control to the slow-path.
- // We use the latter tactic.
- // Pass the CAS result in the icc.ZFlag into DONE_LABEL
- // If the CAS was successful ...
- // Self has acquired the lock
- // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
- // Intentional fall-through into DONE_LABEL ...
- } else {
- masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
- masm.movptr(boxReg, tmpReg) ;
-
- // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
- if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
- // prefetchw [eax + Offset(_owner)-2]
- masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
- }
-
- if ((EmitSync & 64) == 0) {
- // Optimistic form
- masm.xorptr (tmpReg, tmpReg) ;
- } else {
- // Can suffer RTS->RTO upgrades on shared or cold $ lines
- masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
- masm.testptr(tmpReg, tmpReg) ; // Locked ?
- masm.jccb (Assembler::notZero, DONE_LABEL) ;
- }
-
- // Appears unlocked - try to swing _owner from null to non-null.
- // Use either "Self" (in scr) or rsp as thread identity in _owner.
- // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
- masm.get_thread (scrReg) ;
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
-
- // If the CAS fails we can either retry or pass control to the slow-path.
- // We use the latter tactic.
- // Pass the CAS result in the icc.ZFlag into DONE_LABEL
- // If the CAS was successful ...
- // Self has acquired the lock
- // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
- // Intentional fall-through into DONE_LABEL ...
- }
-
- // DONE_LABEL is a hot target - we'd really like to place it at the
- // start of cache line by padding with NOPs.
- // See the AMD and Intel software optimization manuals for the
- // most efficient "long" NOP encodings.
- // Unfortunately none of our alignment mechanisms suffice.
- masm.bind(DONE_LABEL);
-
- // Avoid branch-to-branch on AMD processors
- // This appears to be superstition.
- if (EmitSync & 32) masm.nop() ;
-
-
- // At DONE_LABEL the icc ZFlag is set as follows ...
- // Fast_Unlock uses the same protocol.
- // ZFlag == 1 -> Success
- // ZFlag == 0 -> Failure - force control through the slow-path
- }
- %}
-
- // obj: object to unlock
- // box: box address (displaced header location), killed. Must be EAX.
- // rbx,: killed tmp; cannot be obj nor box.
- //
- // Some commentary on balanced locking:
- //
- // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
- // Methods that don't have provably balanced locking are forced to run in the
- // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
- // The interpreter provides two properties:
- // I1: At return-time the interpreter automatically and quietly unlocks any
- // objects acquired the current activation (frame). Recall that the
- // interpreter maintains an on-stack list of locks currently held by
- // a frame.
- // I2: If a method attempts to unlock an object that is not held by the
- // the frame the interpreter throws IMSX.
- //
- // Lets say A(), which has provably balanced locking, acquires O and then calls B().
- // B() doesn't have provably balanced locking so it runs in the interpreter.
- // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
- // is still locked by A().
- //
- // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
- // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
- // should not be unlocked by "normal" java-level locking and vice-versa. The specification
- // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
-
- enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
-
- Register objReg = as_Register($obj$$reg);
- Register boxReg = as_Register($box$$reg);
- Register tmpReg = as_Register($tmp$$reg);
-
- guarantee (objReg != boxReg, "") ;
- guarantee (objReg != tmpReg, "") ;
- guarantee (boxReg != tmpReg, "") ;
- guarantee (boxReg == as_Register(EAX_enc), "") ;
- MacroAssembler masm(&cbuf);
-
- if (EmitSync & 4) {
- // Disable - inhibit all inlining. Force control through the slow-path
- masm.cmpptr (rsp, 0) ;
- } else
- if (EmitSync & 8) {
- Label DONE_LABEL ;
- if (UseBiasedLocking) {
- masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
- }
- // classic stack-locking code ...
- masm.movptr(tmpReg, Address(boxReg, 0)) ;
- masm.testptr(tmpReg, tmpReg) ;
- masm.jcc (Assembler::zero, DONE_LABEL) ;
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
- masm.bind(DONE_LABEL);
- } else {
- Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
-
- // Critically, the biased locking test must have precedence over
- // and appear before the (box->dhw == 0) recursive stack-lock test.
- if (UseBiasedLocking && !UseOptoBiasInlining) {
- masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
- }
-
- masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header
- masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
- masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock
-
- masm.testptr(tmpReg, 0x02) ; // Inflated?
- masm.jccb (Assembler::zero, Stacked) ;
-
- masm.bind (Inflated) ;
- // It's inflated.
- // Despite our balanced locking property we still check that m->_owner == Self
- // as java routines or native JNI code called by this thread might
- // have released the lock.
- // Refer to the comments in synchronizer.cpp for how we might encode extra
- // state in _succ so we can avoid fetching EntryList|cxq.
- //
- // I'd like to add more cases in fast_lock() and fast_unlock() --
- // such as recursive enter and exit -- but we have to be wary of
- // I$ bloat, T$ effects and BP$ effects.
- //
- // If there's no contention try a 1-0 exit. That is, exit without
- // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
- // we detect and recover from the race that the 1-0 exit admits.
- //
- // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
- // before it STs null into _owner, releasing the lock. Updates
- // to data protected by the critical section must be visible before
- // we drop the lock (and thus before any other thread could acquire
- // the lock and observe the fields protected by the lock).
- // IA32's memory-model is SPO, so STs are ordered with respect to
- // each other and there's no need for an explicit barrier (fence).
- // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
-
- masm.get_thread (boxReg) ;
- if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
- // prefetchw [ebx + Offset(_owner)-2]
- masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
- }
-
- // Note that we could employ various encoding schemes to reduce
- // the number of loads below (currently 4) to just 2 or 3.
- // Refer to the comments in synchronizer.cpp.
- // In practice the chain of fetches doesn't seem to impact performance, however.
- if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
- // Attempt to reduce branch density - AMD's branch predictor.
- masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
- masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
- masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
- masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
- masm.jccb (Assembler::notZero, DONE_LABEL) ;
- masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
- masm.jmpb (DONE_LABEL) ;
- } else {
- masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
- masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
- masm.jccb (Assembler::notZero, DONE_LABEL) ;
- masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
- masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
- masm.jccb (Assembler::notZero, CheckSucc) ;
- masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
- masm.jmpb (DONE_LABEL) ;
- }
-
- // The Following code fragment (EmitSync & 65536) improves the performance of
- // contended applications and contended synchronization microbenchmarks.
- // Unfortunately the emission of the code - even though not executed - causes regressions
- // in scimark and jetstream, evidently because of $ effects. Replacing the code
- // with an equal number of never-executed NOPs results in the same regression.
- // We leave it off by default.
-
- if ((EmitSync & 65536) != 0) {
- Label LSuccess, LGoSlowPath ;
-
- masm.bind (CheckSucc) ;
-
- // Optional pre-test ... it's safe to elide this
- if ((EmitSync & 16) == 0) {
- masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
- masm.jccb (Assembler::zero, LGoSlowPath) ;
- }
-
- // We have a classic Dekker-style idiom:
- // ST m->_owner = 0 ; MEMBAR; LD m->_succ
- // There are a number of ways to implement the barrier:
- // (1) lock:andl &m->_owner, 0
- // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
- // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
- // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
- // (2) If supported, an explicit MFENCE is appealing.
- // In older IA32 processors MFENCE is slower than lock:add or xchg
- // particularly if the write-buffer is full as might be the case if
- // if stores closely precede the fence or fence-equivalent instruction.
- // In more modern implementations MFENCE appears faster, however.
- // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
- // The $lines underlying the top-of-stack should be in M-state.
- // The locked add instruction is serializing, of course.
- // (4) Use xchg, which is serializing
- // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
- // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
- // The integer condition codes will tell us if succ was 0.
- // Since _succ and _owner should reside in the same $line and
- // we just stored into _owner, it's likely that the $line
- // remains in M-state for the lock:orl.
- //
- // We currently use (3), although it's likely that switching to (2)
- // is correct for the future.
-
- masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
- if (os::is_MP()) {
- if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
- masm.mfence();
- } else {
- masm.lock () ; masm.addptr(Address(rsp, 0), 0) ;
- }
- }
- // Ratify _succ remains non-null
- masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
- masm.jccb (Assembler::notZero, LSuccess) ;
-
- masm.xorptr(boxReg, boxReg) ; // box is really EAX
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
- masm.jccb (Assembler::notEqual, LSuccess) ;
- // Since we're low on registers we installed rsp as a placeholding in _owner.
- // Now install Self over rsp. This is safe as we're transitioning from
- // non-null to non=null
- masm.get_thread (boxReg) ;
- masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
- // Intentional fall-through into LGoSlowPath ...
-
- masm.bind (LGoSlowPath) ;
- masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure
- masm.jmpb (DONE_LABEL) ;
-
- masm.bind (LSuccess) ;
- masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success
- masm.jmpb (DONE_LABEL) ;
- }
-
- masm.bind (Stacked) ;
- // It's not inflated and it's not recursively stack-locked and it's not biased.
- // It must be stack-locked.
- // Try to reset the header to displaced header.
- // The "box" value on the stack is stable, so we can reload
- // and be assured we observe the same value as above.
- masm.movptr(tmpReg, Address(boxReg, 0)) ;
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
- // Intention fall-thru into DONE_LABEL
-
-
- // DONE_LABEL is a hot target - we'd really like to place it at the
- // start of cache line by padding with NOPs.
- // See the AMD and Intel software optimization manuals for the
- // most efficient "long" NOP encodings.
- // Unfortunately none of our alignment mechanisms suffice.
- if ((EmitSync & 65536) == 0) {
- masm.bind (CheckSucc) ;
- }
- masm.bind(DONE_LABEL);
-
- // Avoid branch to branch on AMD processors
- if (EmitSync & 32768) { masm.nop() ; }
- }
- %}
-
-
enc_class enc_pop_rdx() %{
emit_opcode(cbuf,0x5A);
%}
@@ -13147,23 +12611,26 @@ instruct RethrowException()
// inlined locking and unlocking
-
-instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
- match( Set cr (FastLock object box) );
- effect( TEMP tmp, TEMP scr, USE_KILL box );
+instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
+ match(Set cr (FastLock object box));
+ effect(TEMP tmp, TEMP scr, USE_KILL box);
ins_cost(300);
format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
- ins_encode( Fast_Lock(object,box,tmp,scr) );
- ins_pipe( pipe_slow );
+ ins_encode %{
+ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters);
+ %}
+ ins_pipe(pipe_slow);
%}
-instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
- match( Set cr (FastUnlock object box) );
- effect( TEMP tmp, USE_KILL box );
+instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
+ match(Set cr (FastUnlock object box));
+ effect(TEMP tmp, USE_KILL box);
ins_cost(300);
format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
- ins_encode( Fast_Unlock(object,box,tmp) );
- ins_pipe( pipe_slow );
+ ins_encode %{
+ __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
%}
diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad
index 9fe92953a..70b3c5a9e 100644
--- a/src/cpu/x86/vm/x86_64.ad
+++ b/src/cpu/x86/vm/x86_64.ad
@@ -2591,231 +2591,6 @@ encode %{
%}
- // obj: object to lock
- // box: box address (header location) -- killed
- // tmp: rax -- killed
- // scr: rbx -- killed
- //
- // What follows is a direct transliteration of fast_lock() and fast_unlock()
- // from i486.ad. See that file for comments.
- // TODO: where possible switch from movq (r, 0) to movl(r,0) and
- // use the shorter encoding. (Movl clears the high-order 32-bits).
-
-
- enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr)
- %{
- Register objReg = as_Register((int)$obj$$reg);
- Register boxReg = as_Register((int)$box$$reg);
- Register tmpReg = as_Register($tmp$$reg);
- Register scrReg = as_Register($scr$$reg);
- MacroAssembler masm(&cbuf);
-
- // Verify uniqueness of register assignments -- necessary but not sufficient
- assert (objReg != boxReg && objReg != tmpReg &&
- objReg != scrReg && tmpReg != scrReg, "invariant") ;
-
- if (_counters != NULL) {
- masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
- }
- if (EmitSync & 1) {
- // Without cast to int32_t a movptr will destroy r10 which is typically obj
- masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
- masm.cmpptr(rsp, (int32_t)NULL_WORD) ;
- } else
- if (EmitSync & 2) {
- Label DONE_LABEL;
- if (UseBiasedLocking) {
- // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
- masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
- }
- // QQQ was movl...
- masm.movptr(tmpReg, 0x1);
- masm.orptr(tmpReg, Address(objReg, 0));
- masm.movptr(Address(boxReg, 0), tmpReg);
- if (os::is_MP()) {
- masm.lock();
- }
- masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
- masm.jcc(Assembler::equal, DONE_LABEL);
-
- // Recursive locking
- masm.subptr(tmpReg, rsp);
- masm.andptr(tmpReg, 7 - os::vm_page_size());
- masm.movptr(Address(boxReg, 0), tmpReg);
-
- masm.bind(DONE_LABEL);
- masm.nop(); // avoid branch to branch
- } else {
- Label DONE_LABEL, IsInflated, Egress;
-
- masm.movptr(tmpReg, Address(objReg, 0)) ;
- masm.testl (tmpReg, 0x02) ; // inflated vs stack-locked|neutral|biased
- masm.jcc (Assembler::notZero, IsInflated) ;
-
- // it's stack-locked, biased or neutral
- // TODO: optimize markword triage order to reduce the number of
- // conditional branches in the most common cases.
- // Beware -- there's a subtle invariant that fetch of the markword
- // at [FETCH], below, will never observe a biased encoding (*101b).
- // If this invariant is not held we'll suffer exclusion (safety) failure.
-
- if (UseBiasedLocking && !UseOptoBiasInlining) {
- masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters);
- masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
- }
-
- // was q will it destroy high?
- masm.orl (tmpReg, 1) ;
- masm.movptr(Address(boxReg, 0), tmpReg) ;
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
- if (_counters != NULL) {
- masm.cond_inc32(Assembler::equal,
- ExternalAddress((address) _counters->fast_path_entry_count_addr()));
- }
- masm.jcc (Assembler::equal, DONE_LABEL);
-
- // Recursive locking
- masm.subptr(tmpReg, rsp);
- masm.andptr(tmpReg, 7 - os::vm_page_size());
- masm.movptr(Address(boxReg, 0), tmpReg);
- if (_counters != NULL) {
- masm.cond_inc32(Assembler::equal,
- ExternalAddress((address) _counters->fast_path_entry_count_addr()));
- }
- masm.jmp (DONE_LABEL) ;
-
- masm.bind (IsInflated) ;
- // It's inflated
-
- // TODO: someday avoid the ST-before-CAS penalty by
- // relocating (deferring) the following ST.
- // We should also think about trying a CAS without having
- // fetched _owner. If the CAS is successful we may
- // avoid an RTO->RTS upgrade on the $line.
- // Without cast to int32_t a movptr will destroy r10 which is typically obj
- masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ;
-
- masm.mov (boxReg, tmpReg) ;
- masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
- masm.testptr(tmpReg, tmpReg) ;
- masm.jcc (Assembler::notZero, DONE_LABEL) ;
-
- // It's inflated and appears unlocked
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
- // Intentional fall-through into DONE_LABEL ...
-
- masm.bind (DONE_LABEL) ;
- masm.nop () ; // avoid jmp to jmp
- }
- %}
-
- // obj: object to unlock
- // box: box address (displaced header location), killed
- // RBX: killed tmp; cannot be obj nor box
- enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp)
- %{
-
- Register objReg = as_Register($obj$$reg);
- Register boxReg = as_Register($box$$reg);
- Register tmpReg = as_Register($tmp$$reg);
- MacroAssembler masm(&cbuf);
-
- if (EmitSync & 4) {
- masm.cmpptr(rsp, 0) ;
- } else
- if (EmitSync & 8) {
- Label DONE_LABEL;
- if (UseBiasedLocking) {
- masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
- }
-
- // Check whether the displaced header is 0
- //(=> recursive unlock)
- masm.movptr(tmpReg, Address(boxReg, 0));
- masm.testptr(tmpReg, tmpReg);
- masm.jcc(Assembler::zero, DONE_LABEL);
-
- // If not recursive lock, reset the header to displaced header
- if (os::is_MP()) {
- masm.lock();
- }
- masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
- masm.bind(DONE_LABEL);
- masm.nop(); // avoid branch to branch
- } else {
- Label DONE_LABEL, Stacked, CheckSucc ;
-
- if (UseBiasedLocking && !UseOptoBiasInlining) {
- masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
- }
-
- masm.movptr(tmpReg, Address(objReg, 0)) ;
- masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ;
- masm.jcc (Assembler::zero, DONE_LABEL) ;
- masm.testl (tmpReg, 0x02) ;
- masm.jcc (Assembler::zero, Stacked) ;
-
- // It's inflated
- masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
- masm.xorptr(boxReg, r15_thread) ;
- masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
- masm.jcc (Assembler::notZero, DONE_LABEL) ;
- masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
- masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
- masm.jcc (Assembler::notZero, CheckSucc) ;
- masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
- masm.jmp (DONE_LABEL) ;
-
- if ((EmitSync & 65536) == 0) {
- Label LSuccess, LGoSlowPath ;
- masm.bind (CheckSucc) ;
- masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
- masm.jcc (Assembler::zero, LGoSlowPath) ;
-
- // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
- // the explicit ST;MEMBAR combination, but masm doesn't currently support
- // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc
- // are all faster when the write buffer is populated.
- masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
- if (os::is_MP()) {
- masm.lock () ; masm.addl (Address(rsp, 0), 0) ;
- }
- masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ;
- masm.jcc (Assembler::notZero, LSuccess) ;
-
- masm.movptr (boxReg, (int32_t)NULL_WORD) ; // box is really EAX
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
- masm.jcc (Assembler::notEqual, LSuccess) ;
- // Intentional fall-through into slow-path
-
- masm.bind (LGoSlowPath) ;
- masm.orl (boxReg, 1) ; // set ICC.ZF=0 to indicate failure
- masm.jmp (DONE_LABEL) ;
-
- masm.bind (LSuccess) ;
- masm.testl (boxReg, 0) ; // set ICC.ZF=1 to indicate success
- masm.jmp (DONE_LABEL) ;
- }
-
- masm.bind (Stacked) ;
- masm.movptr(tmpReg, Address (boxReg, 0)) ; // re-fetch
- if (os::is_MP()) { masm.lock(); }
- masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
-
- if (EmitSync & 65536) {
- masm.bind (CheckSucc) ;
- }
- masm.bind(DONE_LABEL);
- if (EmitSync & 32768) {
- masm.nop(); // avoid branch to branch
- }
- }
- %}
-
-
enc_class enc_rethrow()
%{
cbuf.set_insts_mark();
@@ -11443,27 +11218,25 @@ instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
// ============================================================================
// inlined locking and unlocking
-instruct cmpFastLock(rFlagsReg cr,
- rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
-%{
+instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{
match(Set cr (FastLock object box));
effect(TEMP tmp, TEMP scr, USE_KILL box);
-
ins_cost(300);
format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
- ins_encode(Fast_Lock(object, box, tmp, scr));
+ ins_encode %{
+ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters);
+ %}
ins_pipe(pipe_slow);
%}
-instruct cmpFastUnlock(rFlagsReg cr,
- rRegP object, rax_RegP box, rRegP tmp)
-%{
+instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
match(Set cr (FastUnlock object box));
effect(TEMP tmp, USE_KILL box);
-
ins_cost(300);
format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
- ins_encode(Fast_Unlock(object, box, tmp));
+ ins_encode %{
+ __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
+ %}
ins_pipe(pipe_slow);
%}
diff --git a/src/os/bsd/vm/os_bsd.cpp b/src/os/bsd/vm/os_bsd.cpp
index 47e5d16ef..456daba55 100644
--- a/src/os/bsd/vm/os_bsd.cpp
+++ b/src/os/bsd/vm/os_bsd.cpp
@@ -2636,9 +2636,21 @@ int os::sleep(Thread* thread, jlong millis, bool interruptible) {
}
}
-int os::naked_sleep() {
- // %% make the sleep time an integer flag. for now use 1 millisec.
- return os::sleep(Thread::current(), 1, false);
+void os::naked_short_sleep(jlong ms) {
+ struct timespec req;
+
+ assert(ms < 1000, "Un-interruptable sleep, short time use only");
+ req.tv_sec = 0;
+ if (ms > 0) {
+ req.tv_nsec = (ms % 1000) * 1000000;
+ }
+ else {
+ req.tv_nsec = 1;
+ }
+
+ nanosleep(&req, NULL);
+
+ return;
}
// Sleep forever; naked call to OS-specific sleep; use with CAUTION
diff --git a/src/os/linux/vm/os_linux.cpp b/src/os/linux/vm/os_linux.cpp
index 54149e182..d0751f005 100644
--- a/src/os/linux/vm/os_linux.cpp
+++ b/src/os/linux/vm/os_linux.cpp
@@ -3871,9 +3871,33 @@ int os::sleep(Thread* thread, jlong millis, bool interruptible) {
}
}
-int os::naked_sleep() {
- // %% make the sleep time an integer flag. for now use 1 millisec.
- return os::sleep(Thread::current(), 1, false);
+//
+// Short sleep, direct OS call.
+//
+// Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
+// sched_yield(2) will actually give up the CPU:
+//
+// * Alone on this pariticular CPU, keeps running.
+// * Before the introduction of "skip_buddy" with "compat_yield" disabled
+// (pre 2.6.39).
+//
+// So calling this with 0 is an alternative.
+//
+void os::naked_short_sleep(jlong ms) {
+ struct timespec req;
+
+ assert(ms < 1000, "Un-interruptable sleep, short time use only");
+ req.tv_sec = 0;
+ if (ms > 0) {
+ req.tv_nsec = (ms % 1000) * 1000000;
+ }
+ else {
+ req.tv_nsec = 1;
+ }
+
+ nanosleep(&req, NULL);
+
+ return;
}
// Sleep forever; naked call to OS-specific sleep; use with CAUTION
diff --git a/src/os/solaris/vm/os_solaris.cpp b/src/os/solaris/vm/os_solaris.cpp
index a4d0178c8..650c2118a 100644
--- a/src/os/solaris/vm/os_solaris.cpp
+++ b/src/os/solaris/vm/os_solaris.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -2232,8 +2232,8 @@ static bool check_addr0(outputStream* st) {
st->cr();
status = true;
}
- ::close(fd);
}
+ ::close(fd);
}
return status;
}
@@ -2257,13 +2257,18 @@ const char *ill_names[] = { "ILL0", "ILL_ILLOPC", "ILL_ILLOPN", "ILL_ILLADR",
"ILL_ILLTRP", "ILL_PRVOPC", "ILL_PRVREG",
"ILL_COPROC", "ILL_BADSTK" };
+const size_t ill_names_length = (sizeof(ill_names)/sizeof(char *));
+
const char *fpe_names[] = { "FPE0", "FPE_INTDIV", "FPE_INTOVF", "FPE_FLTDIV",
"FPE_FLTOVF", "FPE_FLTUND", "FPE_FLTRES",
"FPE_FLTINV", "FPE_FLTSUB" };
+const size_t fpe_names_length = (sizeof(fpe_names)/sizeof(char *));
const char *segv_names[] = { "SEGV0", "SEGV_MAPERR", "SEGV_ACCERR" };
+const size_t segv_names_length = (sizeof(segv_names)/sizeof(char *));
const char *bus_names[] = { "BUS0", "BUS_ADRALN", "BUS_ADRERR", "BUS_OBJERR" };
+const size_t bus_names_length = (sizeof(bus_names)/sizeof(char *));
void os::print_siginfo(outputStream* st, void* siginfo) {
st->print("siginfo:");
@@ -2282,19 +2287,23 @@ void os::print_siginfo(outputStream* st, void* siginfo) {
assert(c > 0, "unexpected si_code");
switch (si->si_signo) {
case SIGILL:
- st->print(", si_code=%d (%s)", c, c > 8 ? "" : ill_names[c]);
+ st->print(", si_code=%d (%s)", c,
+ c >= ill_names_length ? "" : ill_names[c]);
st->print(", si_addr=" PTR_FORMAT, si->si_addr);
break;
case SIGFPE:
- st->print(", si_code=%d (%s)", c, c > 9 ? "" : fpe_names[c]);
+ st->print(", si_code=%d (%s)", c,
+ c >= fpe_names_length ? "" : fpe_names[c]);
st->print(", si_addr=" PTR_FORMAT, si->si_addr);
break;
case SIGSEGV:
- st->print(", si_code=%d (%s)", c, c > 2 ? "" : segv_names[c]);
+ st->print(", si_code=%d (%s)", c,
+ c >= segv_names_length ? "" : segv_names[c]);
st->print(", si_addr=" PTR_FORMAT, si->si_addr);
break;
case SIGBUS:
- st->print(", si_code=%d (%s)", c, c > 3 ? "" : bus_names[c]);
+ st->print(", si_code=%d (%s)", c,
+ c >= bus_names_length ? "" : bus_names[c]);
st->print(", si_addr=" PTR_FORMAT, si->si_addr);
break;
default:
@@ -3011,7 +3020,7 @@ bool os::get_page_info(char *start, page_info* info) {
char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info* page_found) {
const uint_t info_types[] = { MEMINFO_VLGRP, MEMINFO_VPAGESIZE };
const size_t types = sizeof(info_types) / sizeof(info_types[0]);
- uint64_t addrs[MAX_MEMINFO_CNT], outdata[types * MAX_MEMINFO_CNT];
+ uint64_t addrs[MAX_MEMINFO_CNT], outdata[types * MAX_MEMINFO_CNT + 1];
uint_t validity[MAX_MEMINFO_CNT];
size_t page_size = MAX2((size_t)os::vm_page_size(), page_expected->size);
@@ -3050,7 +3059,7 @@ char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info
}
}
- if (i != addrs_count) {
+ if (i < addrs_count) {
if ((validity[i] & 2) != 0) {
page_found->lgrp_id = outdata[types * i];
} else {
@@ -3540,9 +3549,14 @@ int os::sleep(Thread* thread, jlong millis, bool interruptible) {
return os_sleep(millis, interruptible);
}
-int os::naked_sleep() {
- // %% make the sleep time an integer flag. for now use 1 millisec.
- return os_sleep(1, false);
+void os::naked_short_sleep(jlong ms) {
+ assert(ms < 1000, "Un-interruptable sleep, short time use only");
+
+ // usleep is deprecated and removed from POSIX, in favour of nanosleep, but
+ // Solaris requires -lrt for this.
+ usleep((ms * 1000));
+
+ return;
}
// Sleep forever; naked call to OS-specific sleep; use with CAUTION
diff --git a/src/os/solaris/vm/perfMemory_solaris.cpp b/src/os/solaris/vm/perfMemory_solaris.cpp
index e7b31ac64..1fa30012d 100644
--- a/src/os/solaris/vm/perfMemory_solaris.cpp
+++ b/src/os/solaris/vm/perfMemory_solaris.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -431,10 +431,12 @@ static char* get_user_name(int vmid, TRAPS) {
RESTARTABLE(::read(fd, addr, remaining), result);
if (result == OS_ERR) {
+ ::close(fd);
THROW_MSG_0(vmSymbols::java_io_IOException(), "Read error");
+ } else {
+ remaining-=result;
+ addr+=result;
}
- remaining-=result;
- addr+=result;
}
::close(fd);
@@ -906,8 +908,16 @@ static void mmap_attach_shared(const char* user, int vmid, PerfMemory::PerfMemor
FREE_C_HEAP_ARRAY(char, filename, mtInternal);
// open the shared memory file for the give vmid
- fd = open_sharedmem_file(rfilename, file_flags, CHECK);
- assert(fd != OS_ERR, "unexpected value");
+ fd = open_sharedmem_file(rfilename, file_flags, THREAD);
+
+ if (fd == OS_ERR) {
+ return;
+ }
+
+ if (HAS_PENDING_EXCEPTION) {
+ ::close(fd);
+ return;
+ }
if (*sizep == 0) {
size = sharedmem_filesize(fd, CHECK);
diff --git a/src/os/windows/vm/os_windows.cpp b/src/os/windows/vm/os_windows.cpp
index 7daee3563..c09312143 100644
--- a/src/os/windows/vm/os_windows.cpp
+++ b/src/os/windows/vm/os_windows.cpp
@@ -3496,6 +3496,16 @@ int os::sleep(Thread* thread, jlong ms, bool interruptable) {
return result;
}
+//
+// Short sleep, direct OS call.
+//
+// ms = 0, means allow others (if any) to run.
+//
+void os::naked_short_sleep(jlong ms) {
+ assert(ms < 1000, "Un-interruptable sleep, short time use only");
+ Sleep(ms);
+}
+
// Sleep forever; naked call to OS-specific sleep; use with CAUTION
void os::infinite_sleep() {
while (true) { // sleep forever ...
@@ -3623,13 +3633,14 @@ bool os::is_interrupted(Thread* thread, bool clear_interrupted) {
"possibility of dangling Thread pointer");
OSThread* osthread = thread->osthread();
- bool interrupted = osthread->interrupted();
// There is no synchronization between the setting of the interrupt
// and it being cleared here. It is critical - see 6535709 - that
// we only clear the interrupt state, and reset the interrupt event,
// if we are going to report that we were indeed interrupted - else
// an interrupt can be "lost", leading to spurious wakeups or lost wakeups
- // depending on the timing
+ // depending on the timing. By checking thread interrupt event to see
+ // if the thread gets real interrupt thus prevent spurious wakeup.
+ bool interrupted = osthread->interrupted() && (WaitForSingleObject(osthread->interrupt_event(), 0) == WAIT_OBJECT_0);
if (interrupted && clear_interrupted) {
osthread->set_interrupted(false);
ResetEvent(osthread->interrupt_event());
diff --git a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp
index eb8cbe819..054a8132b 100644
--- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp
+++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -475,9 +475,11 @@ JVM_handle_solaris_signal(int sig, siginfo_t* info, void* ucVoid,
// here if the underlying file has been truncated.
// Do not crash the VM in such a case.
CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
- nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
- if (nm != NULL && nm->has_unsafe_access()) {
- stub = StubRoutines::handler_for_unsafe_access();
+ if (cb != NULL) {
+ nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
+ if (nm != NULL && nm->has_unsafe_access()) {
+ stub = StubRoutines::handler_for_unsafe_access();
+ }
}
}
else
@@ -724,6 +726,7 @@ JVM_handle_solaris_signal(int sig, siginfo_t* info, void* ucVoid,
err.report_and_die();
ShouldNotReachHere();
+ return false;
}
void os::print_context(outputStream *st, void *context) {
diff --git a/src/share/vm/classfile/altHashing.cpp b/src/share/vm/classfile/altHashing.cpp
index 8dfc3153c..91eb5bec1 100644
--- a/src/share/vm/classfile/altHashing.cpp
+++ b/src/share/vm/classfile/altHashing.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -39,18 +39,18 @@ intptr_t object_hash(Klass* k) {
}
// Seed value used for each alternative hash calculated.
-jint AltHashing::compute_seed() {
+juint AltHashing::compute_seed() {
jlong nanos = os::javaTimeNanos();
jlong now = os::javaTimeMillis();
- jint SEED_MATERIAL[8] = {
- (jint) object_hash(SystemDictionary::String_klass()),
- (jint) object_hash(SystemDictionary::System_klass()),
- (jint) os::random(), // current thread isn't a java thread
- (jint) (((julong)nanos) >> 32),
- (jint) nanos,
- (jint) (((julong)now) >> 32),
- (jint) now,
- (jint) (os::javaTimeNanos() >> 2)
+ int SEED_MATERIAL[8] = {
+ (int) object_hash(SystemDictionary::String_klass()),
+ (int) object_hash(SystemDictionary::System_klass()),
+ (int) os::random(), // current thread isn't a java thread
+ (int) (((julong)nanos) >> 32),
+ (int) nanos,
+ (int) (((julong)now) >> 32),
+ (int) now,
+ (int) (os::javaTimeNanos() >> 2)
};
return murmur3_32(SEED_MATERIAL, 8);
@@ -58,14 +58,14 @@ jint AltHashing::compute_seed() {
// Murmur3 hashing for Symbol
-jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) {
- jint h1 = seed;
+juint AltHashing::murmur3_32(juint seed, const jbyte* data, int len) {
+ juint h1 = seed;
int count = len;
int offset = 0;
// body
while (count >= 4) {
- jint k1 = (data[offset] & 0x0FF)
+ juint k1 = (data[offset] & 0x0FF)
| (data[offset + 1] & 0x0FF) << 8
| (data[offset + 2] & 0x0FF) << 16
| data[offset + 3] << 24;
@@ -85,7 +85,7 @@ jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) {
// tail
if (count > 0) {
- jint k1 = 0;
+ juint k1 = 0;
switch (count) {
case 3:
@@ -109,18 +109,18 @@ jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) {
h1 ^= len;
// finalization mix force all bits of a hash block to avalanche
- h1 ^= ((unsigned int)h1) >> 16;
+ h1 ^= h1 >> 16;
h1 *= 0x85ebca6b;
- h1 ^= ((unsigned int)h1) >> 13;
+ h1 ^= h1 >> 13;
h1 *= 0xc2b2ae35;
- h1 ^= ((unsigned int)h1) >> 16;
+ h1 ^= h1 >> 16;
return h1;
}
// Murmur3 hashing for Strings
-jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) {
- jint h1 = seed;
+juint AltHashing::murmur3_32(juint seed, const jchar* data, int len) {
+ juint h1 = seed;
int off = 0;
int count = len;
@@ -129,7 +129,7 @@ jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) {
while (count >= 2) {
jchar d1 = data[off++] & 0xFFFF;
jchar d2 = data[off++];
- jint k1 = (d1 | d2 << 16);
+ juint k1 = (d1 | d2 << 16);
count -= 2;
@@ -145,7 +145,7 @@ jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) {
// tail
if (count > 0) {
- int k1 = data[off];
+ juint k1 = (juint)data[off];
k1 *= 0xcc9e2d51;
k1 = Integer_rotateLeft(k1, 15);
@@ -157,25 +157,25 @@ jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) {
h1 ^= len * 2; // (Character.SIZE / Byte.SIZE);
// finalization mix force all bits of a hash block to avalanche
- h1 ^= ((unsigned int)h1) >> 16;
+ h1 ^= h1 >> 16;
h1 *= 0x85ebca6b;
- h1 ^= ((unsigned int)h1) >> 13;
+ h1 ^= h1 >> 13;
h1 *= 0xc2b2ae35;
- h1 ^= ((unsigned int)h1) >> 16;
+ h1 ^= h1 >> 16;
return h1;
}
// Hash used for the seed.
-jint AltHashing::murmur3_32(jint seed, const int* data, int len) {
- jint h1 = seed;
+juint AltHashing::murmur3_32(juint seed, const int* data, int len) {
+ juint h1 = seed;
int off = 0;
int end = len;
// body
while (off < end) {
- jint k1 = data[off++];
+ juint k1 = (juint)data[off++];
k1 *= 0xcc9e2d51;
k1 = Integer_rotateLeft(k1, 15);
@@ -193,26 +193,26 @@ jint AltHashing::murmur3_32(jint seed, const int* data, int len) {
h1 ^= len * 4; // (Integer.SIZE / Byte.SIZE);
// finalization mix force all bits of a hash block to avalanche
- h1 ^= ((juint)h1) >> 16;
+ h1 ^= h1 >> 16;
h1 *= 0x85ebca6b;
- h1 ^= ((juint)h1) >> 13;
+ h1 ^= h1 >> 13;
h1 *= 0xc2b2ae35;
- h1 ^= ((juint)h1) >> 16;
+ h1 ^= h1 >> 16;
return h1;
}
-jint AltHashing::murmur3_32(const int* data, int len) {
+juint AltHashing::murmur3_32(const int* data, int len) {
return murmur3_32(0, data, len);
}
#ifndef PRODUCT
// Overloaded versions for internal test.
-jint AltHashing::murmur3_32(const jbyte* data, int len) {
+juint AltHashing::murmur3_32(const jbyte* data, int len) {
return murmur3_32(0, data, len);
}
-jint AltHashing::murmur3_32(const jchar* data, int len) {
+juint AltHashing::murmur3_32(const jchar* data, int len) {
return murmur3_32(0, data, len);
}
@@ -251,11 +251,11 @@ void AltHashing::testMurmur3_32_ByteArray() {
// Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255}
for (int i = 0; i < 256; i++) {
- jint hash = murmur3_32(256 - i, vector, i);
+ juint hash = murmur3_32(256 - i, vector, i);
hashes[i * 4] = (jbyte) hash;
- hashes[i * 4 + 1] = (jbyte) (((juint)hash) >> 8);
- hashes[i * 4 + 2] = (jbyte) (((juint)hash) >> 16);
- hashes[i * 4 + 3] = (jbyte) (((juint)hash) >> 24);
+ hashes[i * 4 + 1] = (jbyte)(hash >> 8);
+ hashes[i * 4 + 2] = (jbyte)(hash >> 16);
+ hashes[i * 4 + 3] = (jbyte)(hash >> 24);
}
// hash to get const result.
@@ -269,7 +269,7 @@ void AltHashing::testMurmur3_32_ByteArray() {
}
void AltHashing::testEquivalentHashes() {
- jint jbytes, jchars, ints;
+ juint jbytes, jchars, ints;
// printf("testEquivalentHashes\n");
diff --git a/src/share/vm/classfile/altHashing.hpp b/src/share/vm/classfile/altHashing.hpp
index 941b9a0dc..2e04fd33a 100644
--- a/src/share/vm/classfile/altHashing.hpp
+++ b/src/share/vm/classfile/altHashing.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -39,24 +39,24 @@
class AltHashing : AllStatic {
// utility function copied from java/lang/Integer
- static jint Integer_rotateLeft(jint i, int distance) {
- return (i << distance) | (((juint)i) >> (32-distance));
+ static juint Integer_rotateLeft(juint i, int distance) {
+ return (i << distance) | (i >> (32-distance));
}
- static jint murmur3_32(const int* data, int len);
- static jint murmur3_32(jint seed, const int* data, int len);
+ static juint murmur3_32(const int* data, int len);
+ static juint murmur3_32(juint seed, const int* data, int len);
#ifndef PRODUCT
// Hashing functions used for internal testing
- static jint murmur3_32(const jbyte* data, int len);
- static jint murmur3_32(const jchar* data, int len);
+ static juint murmur3_32(const jbyte* data, int len);
+ static juint murmur3_32(const jchar* data, int len);
static void testMurmur3_32_ByteArray();
static void testEquivalentHashes();
#endif // PRODUCT
public:
- static jint compute_seed();
- static jint murmur3_32(jint seed, const jbyte* data, int len);
- static jint murmur3_32(jint seed, const jchar* data, int len);
+ static juint compute_seed();
+ static juint murmur3_32(juint seed, const jbyte* data, int len);
+ static juint murmur3_32(juint seed, const jchar* data, int len);
NOT_PRODUCT(static void test_alt_hash();)
};
#endif // SHARE_VM_CLASSFILE_ALTHASHING_HPP
diff --git a/src/share/vm/oops/instanceKlass.hpp b/src/share/vm/oops/instanceKlass.hpp
index ba2bce874..db14be29a 100644
--- a/src/share/vm/oops/instanceKlass.hpp
+++ b/src/share/vm/oops/instanceKlass.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -554,6 +554,7 @@ class InstanceKlass: public Klass {
if (hk == NULL) {
return NULL;
} else {
+ assert(*hk != NULL, "host klass should always be set if the address is not null");
return *hk;
}
}
diff --git a/src/share/vm/oops/metadata.hpp b/src/share/vm/oops/metadata.hpp
index a1afb0d05..84a60893e 100644
--- a/src/share/vm/oops/metadata.hpp
+++ b/src/share/vm/oops/metadata.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,7 @@ class Metadata : public MetaspaceObj {
int identity_hash() { return (int)(uintptr_t)this; }
// Rehashing support for tables containing pointers to this
- unsigned int new_hash(jint seed) { ShouldNotReachHere(); return 0; }
+ unsigned int new_hash(juint seed) { ShouldNotReachHere(); return 0; }
virtual bool is_klass() const volatile { return false; }
virtual bool is_method() const volatile { return false; }
diff --git a/src/share/vm/oops/oop.cpp b/src/share/vm/oops/oop.cpp
index aed29da67..281188354 100644
--- a/src/share/vm/oops/oop.cpp
+++ b/src/share/vm/oops/oop.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -102,7 +102,7 @@ intptr_t oopDesc::slow_identity_hash() {
}
// When String table needs to rehash
-unsigned int oopDesc::new_hash(jint seed) {
+unsigned int oopDesc::new_hash(juint seed) {
EXCEPTION_MARK;
ResourceMark rm;
int length;
diff --git a/src/share/vm/oops/oop.hpp b/src/share/vm/oops/oop.hpp
index 350c0fc61..2013c3ea7 100644
--- a/src/share/vm/oops/oop.hpp
+++ b/src/share/vm/oops/oop.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -362,7 +362,7 @@ class oopDesc {
intptr_t slow_identity_hash();
// Alternate hashing code if string table is rehashed
- unsigned int new_hash(jint seed);
+ unsigned int new_hash(juint seed);
// marks are forwarded to stack when object is locked
bool has_displaced_mark() const;
diff --git a/src/share/vm/oops/symbol.cpp b/src/share/vm/oops/symbol.cpp
index 2a2c975b6..cab865503 100644
--- a/src/share/vm/oops/symbol.cpp
+++ b/src/share/vm/oops/symbol.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -207,7 +207,7 @@ const char* Symbol::as_klass_external_name() const {
}
// Alternate hashing for unbalanced symbol tables.
-unsigned int Symbol::new_hash(jint seed) {
+unsigned int Symbol::new_hash(juint seed) {
ResourceMark rm;
// Use alternate hashing algorithm on this symbol.
return AltHashing::murmur3_32(seed, (const jbyte*)as_C_string(), utf8_length());
diff --git a/src/share/vm/oops/symbol.hpp b/src/share/vm/oops/symbol.hpp
index e747c4646..aaa55c589 100644
--- a/src/share/vm/oops/symbol.hpp
+++ b/src/share/vm/oops/symbol.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -154,7 +154,7 @@ class Symbol : private SymbolBase {
int identity_hash() { return _identity_hash; }
// For symbol table alternate hashing
- unsigned int new_hash(jint seed);
+ unsigned int new_hash(juint seed);
// Reference counting. See comments above this class for when to use.
int refcount() const { return _refcount; }
diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp
index e7ec989c5..78f2c44e9 100644
--- a/src/share/vm/opto/graphKit.cpp
+++ b/src/share/vm/opto/graphKit.cpp
@@ -2994,22 +2994,28 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
}
Node* cast_obj = NULL;
- const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr();
- // We may not have profiling here or it may not help us. If we have
- // a speculative type use it to perform an exact cast.
- ciKlass* spec_obj_type = obj_type->speculative_type();
- if (spec_obj_type != NULL ||
- (data != NULL &&
- // Counter has never been decremented (due to cast failure).
- // ...This is a reasonable thing to expect. It is true of
- // all casts inserted by javac to implement generic types.
- data->as_CounterData()->count() >= 0)) {
- cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace);
- if (cast_obj != NULL) {
- if (failure_control != NULL) // failure is now impossible
- (*failure_control) = top();
- // adjust the type of the phi to the exact klass:
- phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR));
+ if (tk->klass_is_exact()) {
+ // The following optimization tries to statically cast the speculative type of the object
+ // (for example obtained during profiling) to the type of the superklass and then do a
+ // dynamic check that the type of the object is what we expect. To work correctly
+ // for checkcast and aastore the type of superklass should be exact.
+ const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr();
+ // We may not have profiling here or it may not help us. If we have
+ // a speculative type use it to perform an exact cast.
+ ciKlass* spec_obj_type = obj_type->speculative_type();
+ if (spec_obj_type != NULL ||
+ (data != NULL &&
+ // Counter has never been decremented (due to cast failure).
+ // ...This is a reasonable thing to expect. It is true of
+ // all casts inserted by javac to implement generic types.
+ data->as_CounterData()->count() >= 0)) {
+ cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace);
+ if (cast_obj != NULL) {
+ if (failure_control != NULL) // failure is now impossible
+ (*failure_control) = top();
+ // adjust the type of the phi to the exact klass:
+ phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR));
+ }
}
}
diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp
index 564aa7c1f..1630c6881 100644
--- a/src/share/vm/opto/library_call.cpp
+++ b/src/share/vm/opto/library_call.cpp
@@ -3237,7 +3237,8 @@ bool LibraryCallKit::inline_native_currentThread() {
// private native boolean java.lang.Thread.isInterrupted(boolean ClearInterrupted);
bool LibraryCallKit::inline_native_isInterrupted() {
// Add a fast path to t.isInterrupted(clear_int):
- // (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int))
+ // (t == Thread.current() &&
+ // (!TLS._osthread._interrupted || WINDOWS_ONLY(false) NOT_WINDOWS(!clear_int)))
// ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int)
// So, in the common case that the interrupt bit is false,
// we avoid making a call into the VM. Even if the interrupt bit
@@ -3294,6 +3295,7 @@ bool LibraryCallKit::inline_native_isInterrupted() {
// drop through to next case
set_control( _gvn.transform(new (C) IfTrueNode(iff_bit)));
+#ifndef TARGET_OS_FAMILY_windows
// (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path.
Node* clr_arg = argument(1);
Node* cmp_arg = _gvn.transform(new (C) CmpINode(clr_arg, intcon(0)));
@@ -3307,6 +3309,10 @@ bool LibraryCallKit::inline_native_isInterrupted() {
// drop through to next case
set_control( _gvn.transform(new (C) IfTrueNode(iff_arg)));
+#else
+ // To return true on Windows you must read the _interrupted field
+ // and check the the event state i.e. take the slow path.
+#endif // TARGET_OS_FAMILY_windows
// (d) Otherwise, go to the slow path.
slow_region->add_req(control());
diff --git a/src/share/vm/prims/jni.cpp b/src/share/vm/prims/jni.cpp
index 3d8943f2d..cadbd54de 100644
--- a/src/share/vm/prims/jni.cpp
+++ b/src/share/vm/prims/jni.cpp
@@ -4450,8 +4450,23 @@ static bool initializeDirectBufferSupport(JNIEnv* env, JavaThread* thread) {
// Get needed field and method IDs
directByteBufferConstructor = env->GetMethodID(directByteBufferClass, "<init>", "(JI)V");
+ if (env->ExceptionCheck()) {
+ env->ExceptionClear();
+ directBufferSupportInitializeFailed = 1;
+ return false;
+ }
directBufferAddressField = env->GetFieldID(bufferClass, "address", "J");
+ if (env->ExceptionCheck()) {
+ env->ExceptionClear();
+ directBufferSupportInitializeFailed = 1;
+ return false;
+ }
bufferCapacityField = env->GetFieldID(bufferClass, "capacity", "I");
+ if (env->ExceptionCheck()) {
+ env->ExceptionClear();
+ directBufferSupportInitializeFailed = 1;
+ return false;
+ }
if ((directByteBufferConstructor == NULL) ||
(directBufferAddressField == NULL) ||
diff --git a/src/share/vm/prims/unsafe.cpp b/src/share/vm/prims/unsafe.cpp
index 2e432264b..5a5d7c93c 100644
--- a/src/share/vm/prims/unsafe.cpp
+++ b/src/share/vm/prims/unsafe.cpp
@@ -858,6 +858,11 @@ static inline void throw_new(JNIEnv *env, const char *ename) {
strcpy(buf, "java/lang/");
strcat(buf, ename);
jclass cls = env->FindClass(buf);
+ if (env->ExceptionCheck()) {
+ env->ExceptionClear();
+ tty->print_cr("Unsafe: cannot throw %s because FindClass has failed", buf);
+ return;
+ }
char* msg = NULL;
env->ThrowNew(cls, msg);
}
diff --git a/src/share/vm/prims/whitebox.cpp b/src/share/vm/prims/whitebox.cpp
index 1cb799b3e..095ac20c3 100644
--- a/src/share/vm/prims/whitebox.cpp
+++ b/src/share/vm/prims/whitebox.cpp
@@ -316,9 +316,10 @@ WB_END
WB_ENTRY(jint, WB_DeoptimizeMethod(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ int result = 0;
+ CHECK_JNI_EXCEPTION_(env, result);
MutexLockerEx mu(Compile_lock);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
- int result = 0;
nmethod* code;
if (is_osr) {
int bci = InvocationEntryBci;
@@ -344,6 +345,7 @@ WB_END
WB_ENTRY(jboolean, WB_IsMethodCompiled(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
MutexLockerEx mu(Compile_lock);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
nmethod* code = is_osr ? mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false) : mh->code();
@@ -355,6 +357,7 @@ WB_END
WB_ENTRY(jboolean, WB_IsMethodCompilable(JNIEnv* env, jobject o, jobject method, jint comp_level, jboolean is_osr))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
MutexLockerEx mu(Compile_lock);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
if (is_osr) {
@@ -366,6 +369,7 @@ WB_END
WB_ENTRY(jboolean, WB_IsMethodQueuedForCompilation(JNIEnv* env, jobject o, jobject method))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
MutexLockerEx mu(Compile_lock);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
return mh->queued_for_compilation();
@@ -373,6 +377,7 @@ WB_END
WB_ENTRY(jint, WB_GetMethodCompilationLevel(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION_(env, CompLevel_none);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
nmethod* code = is_osr ? mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false) : mh->code();
return (code != NULL ? code->comp_level() : CompLevel_none);
@@ -380,6 +385,7 @@ WB_END
WB_ENTRY(void, WB_MakeMethodNotCompilable(JNIEnv* env, jobject o, jobject method, jint comp_level, jboolean is_osr))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION(env);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
if (is_osr) {
mh->set_not_osr_compilable(comp_level, true /* report */, "WhiteBox");
@@ -390,6 +396,7 @@ WB_END
WB_ENTRY(jint, WB_GetMethodEntryBci(JNIEnv* env, jobject o, jobject method))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION_(env, InvocationEntryBci);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
nmethod* code = mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false);
return (code != NULL && code->is_osr_method() ? code->osr_entry_bci() : InvocationEntryBci);
@@ -397,6 +404,7 @@ WB_END
WB_ENTRY(jboolean, WB_TestSetDontInlineMethod(JNIEnv* env, jobject o, jobject method, jboolean value))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
bool result = mh->dont_inline();
mh->set_dont_inline(value == JNI_TRUE);
@@ -414,6 +422,7 @@ WB_END
WB_ENTRY(jboolean, WB_TestSetForceInlineMethod(JNIEnv* env, jobject o, jobject method, jboolean value))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
bool result = mh->force_inline();
mh->set_force_inline(value == JNI_TRUE);
@@ -422,6 +431,7 @@ WB_END
WB_ENTRY(jboolean, WB_EnqueueMethodForCompilation(JNIEnv* env, jobject o, jobject method, jint comp_level, jint bci))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
nmethod* nm = CompileBroker::compile_method(mh, bci, comp_level, mh, mh->invocation_count(), "WhiteBox", THREAD);
MutexLockerEx mu(Compile_lock);
@@ -430,6 +440,7 @@ WB_END
WB_ENTRY(void, WB_ClearMethodState(JNIEnv* env, jobject o, jobject method))
jmethodID jmid = reflected_method_to_jmid(thread, env, method);
+ CHECK_JNI_EXCEPTION(env);
methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
MutexLockerEx mu(Compile_lock);
MethodData* mdo = mh->method_data();
@@ -616,14 +627,18 @@ JVM_ENTRY(void, JVM_RegisterWhiteBoxMethods(JNIEnv* env, jclass wbclass))
bool result = true;
// one by one registration natives for exception catching
jclass exceptionKlass = env->FindClass(vmSymbols::java_lang_NoSuchMethodError()->as_C_string());
+ CHECK_JNI_EXCEPTION(env);
for (int i = 0, n = sizeof(methods) / sizeof(methods[0]); i < n; ++i) {
if (env->RegisterNatives(wbclass, methods + i, 1) != 0) {
result = false;
- if (env->ExceptionCheck() && env->IsInstanceOf(env->ExceptionOccurred(), exceptionKlass)) {
- // j.l.NoSuchMethodError is thrown when a method can't be found or a method is not native
- // ignoring the exception
- tty->print_cr("Warning: 'NoSuchMethodError' on register of sun.hotspot.WhiteBox::%s%s", methods[i].name, methods[i].signature);
+ jthrowable throwable_obj = env->ExceptionOccurred();
+ if (throwable_obj != NULL) {
env->ExceptionClear();
+ if (env->IsInstanceOf(throwable_obj, exceptionKlass)) {
+ // j.l.NoSuchMethodError is thrown when a method can't be found or a method is not native
+ // ignoring the exception
+ tty->print_cr("Warning: 'NoSuchMethodError' on register of sun.hotspot.WhiteBox::%s%s", methods[i].name, methods[i].signature);
+ }
} else {
// register is failed w/o exception or w/ unexpected exception
tty->print_cr("Warning: unexpected error on register of sun.hotspot.WhiteBox::%s%s. All methods will be unregistered", methods[i].name, methods[i].signature);
diff --git a/src/share/vm/prims/whitebox.hpp b/src/share/vm/prims/whitebox.hpp
index f78117414..a6e27b490 100644
--- a/src/share/vm/prims/whitebox.hpp
+++ b/src/share/vm/prims/whitebox.hpp
@@ -36,6 +36,24 @@
#define WB_END JNI_END
#define WB_METHOD_DECLARE(result_type) extern "C" result_type JNICALL
+#define CHECK_JNI_EXCEPTION_(env, value) \
+ do { \
+ JavaThread* THREAD = JavaThread::thread_from_jni_environment(env); \
+ if (HAS_PENDING_EXCEPTION) { \
+ CLEAR_PENDING_EXCEPTION; \
+ return(value); \
+ } \
+ } while (0)
+
+#define CHECK_JNI_EXCEPTION(env) \
+ do { \
+ JavaThread* THREAD = JavaThread::thread_from_jni_environment(env); \
+ if (HAS_PENDING_EXCEPTION) { \
+ CLEAR_PENDING_EXCEPTION; \
+ return; \
+ } \
+ } while (0)
+
class WhiteBox : public AllStatic {
private:
static bool _used;
diff --git a/src/share/vm/runtime/os.hpp b/src/share/vm/runtime/os.hpp
index 17fcd3bdd..47ebeb6bb 100644
--- a/src/share/vm/runtime/os.hpp
+++ b/src/share/vm/runtime/os.hpp
@@ -430,7 +430,10 @@ class os: AllStatic {
static intx current_thread_id();
static int current_process_id();
static int sleep(Thread* thread, jlong ms, bool interruptable);
- static int naked_sleep();
+ // Short standalone OS sleep suitable for slow path spin loop.
+ // Ignores Thread.interrupt() (so keep it short).
+ // ms = 0, will sleep for the least amount of time allowed by the OS.
+ static void naked_short_sleep(jlong ms);
static void infinite_sleep(); // never returns, use with CAUTION
static void yield(); // Yields to all threads with same priority
enum YieldResult {
diff --git a/src/share/vm/runtime/park.cpp b/src/share/vm/runtime/park.cpp
index 6380570ef..0ab5b5b67 100644
--- a/src/share/vm/runtime/park.cpp
+++ b/src/share/vm/runtime/park.cpp
@@ -59,58 +59,22 @@ ParkEvent * ParkEvent::Allocate (Thread * t) {
// Start by trying to recycle an existing but unassociated
// ParkEvent from the global free list.
- for (;;) {
- ev = FreeList ;
- if (ev == NULL) break ;
- // 1: Detach - sequester or privatize the list
- // Tantamount to ev = Swap (&FreeList, NULL)
- if (Atomic::cmpxchg_ptr (NULL, &FreeList, ev) != ev) {
- continue ;
+ // Using a spin lock since we are part of the mutex impl.
+ // 8028280: using concurrent free list without memory management can leak
+ // pretty badly it turns out.
+ Thread::SpinAcquire(&ListLock, "ParkEventFreeListAllocate");
+ {
+ ev = FreeList;
+ if (ev != NULL) {
+ FreeList = ev->FreeNext;
}
-
- // We've detached the list. The list in-hand is now
- // local to this thread. This thread can operate on the
- // list without risk of interference from other threads.
- // 2: Extract -- pop the 1st element from the list.
- ParkEvent * List = ev->FreeNext ;
- if (List == NULL) break ;
- for (;;) {
- // 3: Try to reattach the residual list
- guarantee (List != NULL, "invariant") ;
- ParkEvent * Arv = (ParkEvent *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
- if (Arv == NULL) break ;
-
- // New nodes arrived. Try to detach the recent arrivals.
- if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
- continue ;
- }
- guarantee (Arv != NULL, "invariant") ;
- // 4: Merge Arv into List
- ParkEvent * Tail = List ;
- while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
- Tail->FreeNext = Arv ;
- }
- break ;
}
+ Thread::SpinRelease(&ListLock);
if (ev != NULL) {
guarantee (ev->AssociatedWith == NULL, "invariant") ;
} else {
// Do this the hard way -- materialize a new ParkEvent.
- // In rare cases an allocating thread might detach a long list --
- // installing null into FreeList -- and then stall or be obstructed.
- // A 2nd thread calling Allocate() would see FreeList == null.
- // The list held privately by the 1st thread is unavailable to the 2nd thread.
- // In that case the 2nd thread would have to materialize a new ParkEvent,
- // even though free ParkEvents existed in the system. In this case we end up
- // with more ParkEvents in circulation than we need, but the race is
- // rare and the outcome is benign. Ideally, the # of extant ParkEvents
- // is equal to the maximum # of threads that existed at any one time.
- // Because of the race mentioned above, segments of the freelist
- // can be transiently inaccessible. At worst we may end up with the
- // # of ParkEvents in circulation slightly above the ideal.
- // Note that if we didn't have the TSM/immortal constraint, then
- // when reattaching, above, we could trim the list.
ev = new ParkEvent () ;
guarantee ((intptr_t(ev) & 0xFF) == 0, "invariant") ;
}
@@ -124,13 +88,14 @@ void ParkEvent::Release (ParkEvent * ev) {
if (ev == NULL) return ;
guarantee (ev->FreeNext == NULL , "invariant") ;
ev->AssociatedWith = NULL ;
- for (;;) {
- // Push ev onto FreeList
- // The mechanism is "half" lock-free.
- ParkEvent * List = FreeList ;
- ev->FreeNext = List ;
- if (Atomic::cmpxchg_ptr (ev, &FreeList, List) == List) break ;
+ // Note that if we didn't have the TSM/immortal constraint, then
+ // when reattaching we could trim the list.
+ Thread::SpinAcquire(&ListLock, "ParkEventFreeListRelease");
+ {
+ ev->FreeNext = FreeList;
+ FreeList = ev;
}
+ Thread::SpinRelease(&ListLock);
}
// Override operator new and delete so we can ensure that the
@@ -164,56 +129,21 @@ Parker * Parker::Allocate (JavaThread * t) {
// Start by trying to recycle an existing but unassociated
// Parker from the global free list.
- for (;;) {
- p = FreeList ;
- if (p == NULL) break ;
- // 1: Detach
- // Tantamount to p = Swap (&FreeList, NULL)
- if (Atomic::cmpxchg_ptr (NULL, &FreeList, p) != p) {
- continue ;
+ // 8028280: using concurrent free list without memory management can leak
+ // pretty badly it turns out.
+ Thread::SpinAcquire(&ListLock, "ParkerFreeListAllocate");
+ {
+ p = FreeList;
+ if (p != NULL) {
+ FreeList = p->FreeNext;
}
-
- // We've detached the list. The list in-hand is now
- // local to this thread. This thread can operate on the
- // list without risk of interference from other threads.
- // 2: Extract -- pop the 1st element from the list.
- Parker * List = p->FreeNext ;
- if (List == NULL) break ;
- for (;;) {
- // 3: Try to reattach the residual list
- guarantee (List != NULL, "invariant") ;
- Parker * Arv = (Parker *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
- if (Arv == NULL) break ;
-
- // New nodes arrived. Try to detach the recent arrivals.
- if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
- continue ;
- }
- guarantee (Arv != NULL, "invariant") ;
- // 4: Merge Arv into List
- Parker * Tail = List ;
- while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
- Tail->FreeNext = Arv ;
- }
- break ;
}
+ Thread::SpinRelease(&ListLock);
if (p != NULL) {
guarantee (p->AssociatedWith == NULL, "invariant") ;
} else {
// Do this the hard way -- materialize a new Parker..
- // In rare cases an allocating thread might detach
- // a long list -- installing null into FreeList --and
- // then stall. Another thread calling Allocate() would see
- // FreeList == null and then invoke the ctor. In this case we
- // end up with more Parkers in circulation than we need, but
- // the race is rare and the outcome is benign.
- // Ideally, the # of extant Parkers is equal to the
- // maximum # of threads that existed at any one time.
- // Because of the race mentioned above, segments of the
- // freelist can be transiently inaccessible. At worst
- // we may end up with the # of Parkers in circulation
- // slightly above the ideal.
p = new Parker() ;
}
p->AssociatedWith = t ; // Associate p with t
@@ -227,11 +157,12 @@ void Parker::Release (Parker * p) {
guarantee (p->AssociatedWith != NULL, "invariant") ;
guarantee (p->FreeNext == NULL , "invariant") ;
p->AssociatedWith = NULL ;
- for (;;) {
- // Push p onto FreeList
- Parker * List = FreeList ;
- p->FreeNext = List ;
- if (Atomic::cmpxchg_ptr (p, &FreeList, List) == List) break ;
+
+ Thread::SpinAcquire(&ListLock, "ParkerFreeListRelease");
+ {
+ p->FreeNext = FreeList;
+ FreeList = p;
}
+ Thread::SpinRelease(&ListLock);
}
diff --git a/src/share/vm/runtime/thread.cpp b/src/share/vm/runtime/thread.cpp
index 9f31c25b7..af24fac2c 100644
--- a/src/share/vm/runtime/thread.cpp
+++ b/src/share/vm/runtime/thread.cpp
@@ -4446,9 +4446,7 @@ void Thread::SpinAcquire (volatile int * adr, const char * LockName) {
++ctr ;
if ((ctr & 0xFFF) == 0 || !os::is_MP()) {
if (Yields > 5) {
- // Consider using a simple NakedSleep() instead.
- // Then SpinAcquire could be called by non-JVM threads
- Thread::current()->_ParkEvent->park(1) ;
+ os::naked_short_sleep(1);
} else {
os::NakedYield() ;
++Yields ;
diff --git a/src/share/vm/utilities/hashtable.cpp b/src/share/vm/utilities/hashtable.cpp
index 3e1413f61..40fb3b153 100644
--- a/src/share/vm/utilities/hashtable.cpp
+++ b/src/share/vm/utilities/hashtable.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -93,7 +93,7 @@ template <MEMFLAGS F> bool BasicHashtable<F>::check_rehash_table(int count) {
return false;
}
-template <class T, MEMFLAGS F> jint Hashtable<T, F>::_seed = 0;
+template <class T, MEMFLAGS F> juint Hashtable<T, F>::_seed = 0;
// Create a new table and using alternate hash code, populate the new table
// with the existing elements. This can be used to change the hash code
diff --git a/src/share/vm/utilities/hashtable.hpp b/src/share/vm/utilities/hashtable.hpp
index 468965dab..aa4510024 100644
--- a/src/share/vm/utilities/hashtable.hpp
+++ b/src/share/vm/utilities/hashtable.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -280,7 +280,7 @@ protected:
// Function to move these elements into the new table.
void move_to(Hashtable<T, F>* new_table);
static bool use_alternate_hashcode() { return _seed != 0; }
- static jint seed() { return _seed; }
+ static juint seed() { return _seed; }
static int literal_size(Symbol *symbol);
static int literal_size(oop oop);
@@ -296,7 +296,7 @@ public:
void dump_table(outputStream* st, const char *table_name);
private:
- static jint _seed;
+ static juint _seed;
};
diff --git a/src/share/vm/utilities/vmError.cpp b/src/share/vm/utilities/vmError.cpp
index 4b0953ae4..e950d3386 100644
--- a/src/share/vm/utilities/vmError.cpp
+++ b/src/share/vm/utilities/vmError.cpp
@@ -592,13 +592,24 @@ void VMError::report(outputStream* st) {
st->cr();
// Compiled code may use EBP register on x86 so it looks like
// non-walkable C frame. Use frame.sender() for java frames.
- if (_thread && _thread->is_Java_thread() && fr.is_java_frame()) {
- RegisterMap map((JavaThread*)_thread, false); // No update
- fr = fr.sender(&map);
- continue;
+ if (_thread && _thread->is_Java_thread()) {
+ // Catch very first native frame by using stack address.
+ // For JavaThread stack_base and stack_size should be set.
+ if (!_thread->on_local_stack((address)(fr.sender_sp() + 1))) {
+ break;
+ }
+ if (fr.is_java_frame()) {
+ RegisterMap map((JavaThread*)_thread, false); // No update
+ fr = fr.sender(&map);
+ } else {
+ fr = os::get_sender_for_C_frame(&fr);
+ }
+ } else {
+ // is_first_C_frame() does only simple checks for frame pointer,
+ // it will pass if java compiled code has a pointer in EBP.
+ if (os::is_first_C_frame(&fr)) break;
+ fr = os::get_sender_for_C_frame(&fr);
}
- if (os::is_first_C_frame(&fr)) break;
- fr = os::get_sender_for_C_frame(&fr);
}
if (count > StackPrintLimit) {
diff --git a/test/TEST.groups b/test/TEST.groups
index ee67fe938..017876be0 100644
--- a/test/TEST.groups
+++ b/test/TEST.groups
@@ -131,7 +131,9 @@ needs_compact3 = \
gc/arguments/TestG1HeapRegionSize.java \
gc/metaspace/TestMetaspaceMemoryPool.java \
runtime/InternalApi/ThreadCpuTimesDeadlock.java \
- serviceability/threads/TestFalseDeadLock.java
+ serviceability/threads/TestFalseDeadLock.java \
+ compiler/tiered/NonTieredLevelsTest.java \
+ compiler/tiered/TieredLevelsTest.java
# Compact 2 adds full VM tests
compact2 = \
diff --git a/test/compiler/ciReplay/TestVM.sh b/test/compiler/ciReplay/TestVM.sh
index e6c3cc569..615446667 100644
--- a/test/compiler/ciReplay/TestVM.sh
+++ b/test/compiler/ciReplay/TestVM.sh
@@ -78,8 +78,8 @@ then
positive_test `expr $stop_level + 50` "TIERED LEVEL $stop_level :: REPLAY" \
"-XX:TieredStopAtLevel=$stop_level"
stop_level=`expr $stop_level + 1`
+ cleanup
done
- cleanup
fi
echo TEST PASSED
diff --git a/test/compiler/ciReplay/common.sh b/test/compiler/ciReplay/common.sh
index bcfdad823..34fd729a8 100644
--- a/test/compiler/ciReplay/common.sh
+++ b/test/compiler/ciReplay/common.sh
@@ -99,14 +99,13 @@ common_tests() {
# $2 - non-tiered comp_level
nontiered_tests() {
level=`grep "^compile " $replay_data | awk '{print $6}'`
- # is level available in non-tiere
+ # is level available in non-tiered
if [ "$level" -eq $2 ]
then
positive_test $1 "NON-TIERED :: AVAILABLE COMP_LEVEL" \
-XX:-TieredCompilation
else
negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \
- negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \
-XX:-TieredCompilation
fi
}
diff --git a/test/compiler/tiered/NonTieredLevelsTest.java b/test/compiler/tiered/NonTieredLevelsTest.java
index 715d32a89..13411a0dd 100644
--- a/test/compiler/tiered/NonTieredLevelsTest.java
+++ b/test/compiler/tiered/NonTieredLevelsTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -70,6 +70,9 @@ public class NonTieredLevelsTest extends CompLevelsTest {
@Override
protected void test() throws Exception {
+ if (skipXcompOSR()) {
+ return;
+ }
checkNotCompiled();
compile();
checkCompiled();
diff --git a/test/compiler/tiered/TieredLevelsTest.java b/test/compiler/tiered/TieredLevelsTest.java
index 675a39449..9fb2254d0 100644
--- a/test/compiler/tiered/TieredLevelsTest.java
+++ b/test/compiler/tiered/TieredLevelsTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -51,6 +51,9 @@ public class TieredLevelsTest extends CompLevelsTest {
@Override
protected void test() throws Exception {
+ if (skipXcompOSR()) {
+ return;
+ }
checkNotCompiled();
compile();
checkCompiled();
diff --git a/test/compiler/whitebox/CompilerWhiteBoxTest.java b/test/compiler/whitebox/CompilerWhiteBoxTest.java
index e47231e29..450423c04 100644
--- a/test/compiler/whitebox/CompilerWhiteBoxTest.java
+++ b/test/compiler/whitebox/CompilerWhiteBoxTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -380,6 +380,20 @@ public abstract class CompilerWhiteBoxTest {
/** flag for OSR test case */
boolean isOsr();
}
+
+ /**
+ * @return {@code true} if the current test case is OSR and the mode is
+ * Xcomp, otherwise {@code false}
+ */
+ protected boolean skipXcompOSR() {
+ boolean result = testCase.isOsr()
+ && CompilerWhiteBoxTest.MODE.startsWith("compiled ");
+ if (result && IS_VERBOSE) {
+ System.err.printf("Warning: %s is not applicable in %s%n",
+ testCase.name(), CompilerWhiteBoxTest.MODE);
+ }
+ return result;
+ }
}
enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase {
diff --git a/test/compiler/whitebox/DeoptimizeAllTest.java b/test/compiler/whitebox/DeoptimizeAllTest.java
index 350c99c8f..ea4e36400 100644
--- a/test/compiler/whitebox/DeoptimizeAllTest.java
+++ b/test/compiler/whitebox/DeoptimizeAllTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -51,11 +51,8 @@ public class DeoptimizeAllTest extends CompilerWhiteBoxTest {
*/
@Override
protected void test() throws Exception {
- if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
- "compiled ")) {
- System.err.printf("Warning: %s is not applicable in %s%n",
- testCase.name(), CompilerWhiteBoxTest.MODE);
- return;
+ if (skipXcompOSR()) {
+ return;
}
compile();
checkCompiled();
diff --git a/test/compiler/whitebox/DeoptimizeMethodTest.java b/test/compiler/whitebox/DeoptimizeMethodTest.java
index 565a5b91c..0b9ffd2d9 100644
--- a/test/compiler/whitebox/DeoptimizeMethodTest.java
+++ b/test/compiler/whitebox/DeoptimizeMethodTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -51,11 +51,8 @@ public class DeoptimizeMethodTest extends CompilerWhiteBoxTest {
*/
@Override
protected void test() throws Exception {
- if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
- "compiled ")) {
- System.err.printf("Warning: %s is not applicable in %s%n",
- testCase.name(), CompilerWhiteBoxTest.MODE);
- return;
+ if (skipXcompOSR()) {
+ return;
}
compile();
checkCompiled();
diff --git a/test/compiler/whitebox/IsMethodCompilableTest.java b/test/compiler/whitebox/IsMethodCompilableTest.java
index bd5916a7c..0b7dc1786 100644
--- a/test/compiler/whitebox/IsMethodCompilableTest.java
+++ b/test/compiler/whitebox/IsMethodCompilableTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -66,10 +66,7 @@ public class IsMethodCompilableTest extends CompilerWhiteBoxTest {
*/
@Override
protected void test() throws Exception {
- if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
- "compiled ")) {
- System.err.printf("Warning: %s is not applicable in %s%n",
- testCase.name(), CompilerWhiteBoxTest.MODE);
+ if (skipXcompOSR()) {
return;
}
if (!isCompilable()) {
diff --git a/test/compiler/whitebox/MakeMethodNotCompilableTest.java b/test/compiler/whitebox/MakeMethodNotCompilableTest.java
index cbd65da45..d65868797 100644
--- a/test/compiler/whitebox/MakeMethodNotCompilableTest.java
+++ b/test/compiler/whitebox/MakeMethodNotCompilableTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -53,11 +53,8 @@ public class MakeMethodNotCompilableTest extends CompilerWhiteBoxTest {
*/
@Override
protected void test() throws Exception {
- if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith(
- "compiled ")) {
- System.err.printf("Warning: %s is not applicable in %s%n",
- testCase.name(), CompilerWhiteBoxTest.MODE);
- return;
+ if (skipXcompOSR()) {
+ return;
}
checkNotCompiled();
if (!isCompilable()) {