From c31a129a31b13c732a47561b028540e6272d26eb Mon Sep 17 00:00:00 2001 From: amurillo Date: Fri, 28 Feb 2014 09:10:55 -0800 Subject: 8036032: new hotspot build - hs25.20-b06 Reviewed-by: jcoomes --- make/hotspot_version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make/hotspot_version b/make/hotspot_version index b6fe79757..b9a29e006 100644 --- a/make/hotspot_version +++ b/make/hotspot_version @@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2014 HS_MAJOR_VER=25 HS_MINOR_VER=20 -HS_BUILD_NUMBER=05 +HS_BUILD_NUMBER=06 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 -- cgit v1.2.3 From fbd82114d27fefd7058218004f7c61904a3cbe92 Mon Sep 17 00:00:00 2001 From: ccheung Date: Wed, 12 Feb 2014 12:01:45 -0800 Subject: 8028785: [parfait] warnings from b116 for hotspot.src.share.vm.prims: JNI exception pending Summary: added JNI exception pending check in several files under src/share/vm/prims directory Reviewed-by: coleenp, minqi --- src/share/vm/prims/jni.cpp | 15 +++++++++++++++ src/share/vm/prims/unsafe.cpp | 5 +++++ src/share/vm/prims/whitebox.cpp | 25 ++++++++++++++++++++----- src/share/vm/prims/whitebox.hpp | 16 ++++++++++++++++ 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/share/vm/prims/jni.cpp b/src/share/vm/prims/jni.cpp index 3d8943f2d..cadbd54de 100644 --- a/src/share/vm/prims/jni.cpp +++ b/src/share/vm/prims/jni.cpp @@ -4450,8 +4450,23 @@ static bool initializeDirectBufferSupport(JNIEnv* env, JavaThread* thread) { // Get needed field and method IDs directByteBufferConstructor = env->GetMethodID(directByteBufferClass, "", "(JI)V"); + if (env->ExceptionCheck()) { + env->ExceptionClear(); + directBufferSupportInitializeFailed = 1; + return false; + } directBufferAddressField = env->GetFieldID(bufferClass, "address", "J"); + if (env->ExceptionCheck()) { + env->ExceptionClear(); + directBufferSupportInitializeFailed = 1; + return false; + } bufferCapacityField = env->GetFieldID(bufferClass, "capacity", "I"); + if (env->ExceptionCheck()) { + env->ExceptionClear(); + directBufferSupportInitializeFailed = 1; + return false; + } if ((directByteBufferConstructor == NULL) || (directBufferAddressField == NULL) || diff --git a/src/share/vm/prims/unsafe.cpp b/src/share/vm/prims/unsafe.cpp index 2e432264b..5a5d7c93c 100644 --- a/src/share/vm/prims/unsafe.cpp +++ b/src/share/vm/prims/unsafe.cpp @@ -858,6 +858,11 @@ static inline void throw_new(JNIEnv *env, const char *ename) { strcpy(buf, "java/lang/"); strcat(buf, ename); jclass cls = env->FindClass(buf); + if (env->ExceptionCheck()) { + env->ExceptionClear(); + tty->print_cr("Unsafe: cannot throw %s because FindClass has failed", buf); + return; + } char* msg = NULL; env->ThrowNew(cls, msg); } diff --git a/src/share/vm/prims/whitebox.cpp b/src/share/vm/prims/whitebox.cpp index 1cb799b3e..095ac20c3 100644 --- a/src/share/vm/prims/whitebox.cpp +++ b/src/share/vm/prims/whitebox.cpp @@ -316,9 +316,10 @@ WB_END WB_ENTRY(jint, WB_DeoptimizeMethod(JNIEnv* env, jobject o, jobject method, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + int result = 0; + CHECK_JNI_EXCEPTION_(env, result); MutexLockerEx mu(Compile_lock); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); - int result = 0; nmethod* code; if (is_osr) { int bci = InvocationEntryBci; @@ -344,6 +345,7 @@ WB_END WB_ENTRY(jboolean, WB_IsMethodCompiled(JNIEnv* env, jobject o, jobject method, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); MutexLockerEx mu(Compile_lock); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); nmethod* code = is_osr ? mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false) : mh->code(); @@ -355,6 +357,7 @@ WB_END WB_ENTRY(jboolean, WB_IsMethodCompilable(JNIEnv* env, jobject o, jobject method, jint comp_level, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); MutexLockerEx mu(Compile_lock); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); if (is_osr) { @@ -366,6 +369,7 @@ WB_END WB_ENTRY(jboolean, WB_IsMethodQueuedForCompilation(JNIEnv* env, jobject o, jobject method)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); MutexLockerEx mu(Compile_lock); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); return mh->queued_for_compilation(); @@ -373,6 +377,7 @@ WB_END WB_ENTRY(jint, WB_GetMethodCompilationLevel(JNIEnv* env, jobject o, jobject method, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, CompLevel_none); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); nmethod* code = is_osr ? mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false) : mh->code(); return (code != NULL ? code->comp_level() : CompLevel_none); @@ -380,6 +385,7 @@ WB_END WB_ENTRY(void, WB_MakeMethodNotCompilable(JNIEnv* env, jobject o, jobject method, jint comp_level, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION(env); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); if (is_osr) { mh->set_not_osr_compilable(comp_level, true /* report */, "WhiteBox"); @@ -390,6 +396,7 @@ WB_END WB_ENTRY(jint, WB_GetMethodEntryBci(JNIEnv* env, jobject o, jobject method)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, InvocationEntryBci); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); nmethod* code = mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false); return (code != NULL && code->is_osr_method() ? code->osr_entry_bci() : InvocationEntryBci); @@ -397,6 +404,7 @@ WB_END WB_ENTRY(jboolean, WB_TestSetDontInlineMethod(JNIEnv* env, jobject o, jobject method, jboolean value)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); bool result = mh->dont_inline(); mh->set_dont_inline(value == JNI_TRUE); @@ -414,6 +422,7 @@ WB_END WB_ENTRY(jboolean, WB_TestSetForceInlineMethod(JNIEnv* env, jobject o, jobject method, jboolean value)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); bool result = mh->force_inline(); mh->set_force_inline(value == JNI_TRUE); @@ -422,6 +431,7 @@ WB_END WB_ENTRY(jboolean, WB_EnqueueMethodForCompilation(JNIEnv* env, jobject o, jobject method, jint comp_level, jint bci)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); nmethod* nm = CompileBroker::compile_method(mh, bci, comp_level, mh, mh->invocation_count(), "WhiteBox", THREAD); MutexLockerEx mu(Compile_lock); @@ -430,6 +440,7 @@ WB_END WB_ENTRY(void, WB_ClearMethodState(JNIEnv* env, jobject o, jobject method)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION(env); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); MutexLockerEx mu(Compile_lock); MethodData* mdo = mh->method_data(); @@ -616,14 +627,18 @@ JVM_ENTRY(void, JVM_RegisterWhiteBoxMethods(JNIEnv* env, jclass wbclass)) bool result = true; // one by one registration natives for exception catching jclass exceptionKlass = env->FindClass(vmSymbols::java_lang_NoSuchMethodError()->as_C_string()); + CHECK_JNI_EXCEPTION(env); for (int i = 0, n = sizeof(methods) / sizeof(methods[0]); i < n; ++i) { if (env->RegisterNatives(wbclass, methods + i, 1) != 0) { result = false; - if (env->ExceptionCheck() && env->IsInstanceOf(env->ExceptionOccurred(), exceptionKlass)) { - // j.l.NoSuchMethodError is thrown when a method can't be found or a method is not native - // ignoring the exception - tty->print_cr("Warning: 'NoSuchMethodError' on register of sun.hotspot.WhiteBox::%s%s", methods[i].name, methods[i].signature); + jthrowable throwable_obj = env->ExceptionOccurred(); + if (throwable_obj != NULL) { env->ExceptionClear(); + if (env->IsInstanceOf(throwable_obj, exceptionKlass)) { + // j.l.NoSuchMethodError is thrown when a method can't be found or a method is not native + // ignoring the exception + tty->print_cr("Warning: 'NoSuchMethodError' on register of sun.hotspot.WhiteBox::%s%s", methods[i].name, methods[i].signature); + } } else { // register is failed w/o exception or w/ unexpected exception tty->print_cr("Warning: unexpected error on register of sun.hotspot.WhiteBox::%s%s. All methods will be unregistered", methods[i].name, methods[i].signature); diff --git a/src/share/vm/prims/whitebox.hpp b/src/share/vm/prims/whitebox.hpp index f78117414..42d00b2fc 100644 --- a/src/share/vm/prims/whitebox.hpp +++ b/src/share/vm/prims/whitebox.hpp @@ -36,6 +36,22 @@ #define WB_END JNI_END #define WB_METHOD_DECLARE(result_type) extern "C" result_type JNICALL +#define CHECK_JNI_EXCEPTION_(env, value) \ + do { \ + if (env->ExceptionCheck()) { \ + env->ExceptionClear(); \ + return(value); \ + } \ + } while (0) + +#define CHECK_JNI_EXCEPTION(env) \ + do { \ + if (env->ExceptionCheck()) { \ + env->ExceptionClear(); \ + return; \ + } \ + } while (0) + class WhiteBox : public AllStatic { private: static bool _used; -- cgit v1.2.3 From 0a64da965f5dd5713f04fb7872fc3787a781dcb0 Mon Sep 17 00:00:00 2001 From: ccheung Date: Fri, 14 Feb 2014 09:50:17 -0800 Subject: 8034860: Fatal error due to incorrect thread state during nightly testing Summary: use the HAS_PENDING_EXCEPTION and CLEAR_PENDING_EXCEPTION macros Reviewed-by: dholmes, coleenp, dcubed --- src/share/vm/prims/whitebox.hpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/share/vm/prims/whitebox.hpp b/src/share/vm/prims/whitebox.hpp index 42d00b2fc..a6e27b490 100644 --- a/src/share/vm/prims/whitebox.hpp +++ b/src/share/vm/prims/whitebox.hpp @@ -36,20 +36,22 @@ #define WB_END JNI_END #define WB_METHOD_DECLARE(result_type) extern "C" result_type JNICALL -#define CHECK_JNI_EXCEPTION_(env, value) \ - do { \ - if (env->ExceptionCheck()) { \ - env->ExceptionClear(); \ - return(value); \ - } \ +#define CHECK_JNI_EXCEPTION_(env, value) \ + do { \ + JavaThread* THREAD = JavaThread::thread_from_jni_environment(env); \ + if (HAS_PENDING_EXCEPTION) { \ + CLEAR_PENDING_EXCEPTION; \ + return(value); \ + } \ } while (0) -#define CHECK_JNI_EXCEPTION(env) \ - do { \ - if (env->ExceptionCheck()) { \ - env->ExceptionClear(); \ - return; \ - } \ +#define CHECK_JNI_EXCEPTION(env) \ + do { \ + JavaThread* THREAD = JavaThread::thread_from_jni_environment(env); \ + if (HAS_PENDING_EXCEPTION) { \ + CLEAR_PENDING_EXCEPTION; \ + return; \ + } \ } while (0) class WhiteBox : public AllStatic { -- cgit v1.2.3 From 1a71fd3089b5fe2974c694481134f22af048691a Mon Sep 17 00:00:00 2001 From: dsimms Date: Fri, 24 Jan 2014 09:28:47 +0100 Subject: 8028280: ParkEvent leak when running modified runThese which only loads classes Summary: Use spin lock to manage ParkEvent and PlatformEvent free lists. Reviewed-by: dholmes, fparain, dcubed, acorn --- src/os/bsd/vm/os_bsd.cpp | 18 +++++- src/os/linux/vm/os_linux.cpp | 30 ++++++++- src/os/solaris/vm/os_solaris.cpp | 11 +++- src/os/windows/vm/os_windows.cpp | 10 +++ src/share/vm/runtime/os.hpp | 5 +- src/share/vm/runtime/park.cpp | 129 +++++++++------------------------------ src/share/vm/runtime/thread.cpp | 4 +- 7 files changed, 95 insertions(+), 112 deletions(-) diff --git a/src/os/bsd/vm/os_bsd.cpp b/src/os/bsd/vm/os_bsd.cpp index 47e5d16ef..456daba55 100644 --- a/src/os/bsd/vm/os_bsd.cpp +++ b/src/os/bsd/vm/os_bsd.cpp @@ -2636,9 +2636,21 @@ int os::sleep(Thread* thread, jlong millis, bool interruptible) { } } -int os::naked_sleep() { - // %% make the sleep time an integer flag. for now use 1 millisec. - return os::sleep(Thread::current(), 1, false); +void os::naked_short_sleep(jlong ms) { + struct timespec req; + + assert(ms < 1000, "Un-interruptable sleep, short time use only"); + req.tv_sec = 0; + if (ms > 0) { + req.tv_nsec = (ms % 1000) * 1000000; + } + else { + req.tv_nsec = 1; + } + + nanosleep(&req, NULL); + + return; } // Sleep forever; naked call to OS-specific sleep; use with CAUTION diff --git a/src/os/linux/vm/os_linux.cpp b/src/os/linux/vm/os_linux.cpp index 54149e182..d0751f005 100644 --- a/src/os/linux/vm/os_linux.cpp +++ b/src/os/linux/vm/os_linux.cpp @@ -3871,9 +3871,33 @@ int os::sleep(Thread* thread, jlong millis, bool interruptible) { } } -int os::naked_sleep() { - // %% make the sleep time an integer flag. for now use 1 millisec. - return os::sleep(Thread::current(), 1, false); +// +// Short sleep, direct OS call. +// +// Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee +// sched_yield(2) will actually give up the CPU: +// +// * Alone on this pariticular CPU, keeps running. +// * Before the introduction of "skip_buddy" with "compat_yield" disabled +// (pre 2.6.39). +// +// So calling this with 0 is an alternative. +// +void os::naked_short_sleep(jlong ms) { + struct timespec req; + + assert(ms < 1000, "Un-interruptable sleep, short time use only"); + req.tv_sec = 0; + if (ms > 0) { + req.tv_nsec = (ms % 1000) * 1000000; + } + else { + req.tv_nsec = 1; + } + + nanosleep(&req, NULL); + + return; } // Sleep forever; naked call to OS-specific sleep; use with CAUTION diff --git a/src/os/solaris/vm/os_solaris.cpp b/src/os/solaris/vm/os_solaris.cpp index a4d0178c8..533bbb766 100644 --- a/src/os/solaris/vm/os_solaris.cpp +++ b/src/os/solaris/vm/os_solaris.cpp @@ -3540,9 +3540,14 @@ int os::sleep(Thread* thread, jlong millis, bool interruptible) { return os_sleep(millis, interruptible); } -int os::naked_sleep() { - // %% make the sleep time an integer flag. for now use 1 millisec. - return os_sleep(1, false); +void os::naked_short_sleep(jlong ms) { + assert(ms < 1000, "Un-interruptable sleep, short time use only"); + + // usleep is deprecated and removed from POSIX, in favour of nanosleep, but + // Solaris requires -lrt for this. + usleep((ms * 1000)); + + return; } // Sleep forever; naked call to OS-specific sleep; use with CAUTION diff --git a/src/os/windows/vm/os_windows.cpp b/src/os/windows/vm/os_windows.cpp index 7daee3563..4a540b66e 100644 --- a/src/os/windows/vm/os_windows.cpp +++ b/src/os/windows/vm/os_windows.cpp @@ -3496,6 +3496,16 @@ int os::sleep(Thread* thread, jlong ms, bool interruptable) { return result; } +// +// Short sleep, direct OS call. +// +// ms = 0, means allow others (if any) to run. +// +void os::naked_short_sleep(jlong ms) { + assert(ms < 1000, "Un-interruptable sleep, short time use only"); + Sleep(ms); +} + // Sleep forever; naked call to OS-specific sleep; use with CAUTION void os::infinite_sleep() { while (true) { // sleep forever ... diff --git a/src/share/vm/runtime/os.hpp b/src/share/vm/runtime/os.hpp index 17fcd3bdd..47ebeb6bb 100644 --- a/src/share/vm/runtime/os.hpp +++ b/src/share/vm/runtime/os.hpp @@ -430,7 +430,10 @@ class os: AllStatic { static intx current_thread_id(); static int current_process_id(); static int sleep(Thread* thread, jlong ms, bool interruptable); - static int naked_sleep(); + // Short standalone OS sleep suitable for slow path spin loop. + // Ignores Thread.interrupt() (so keep it short). + // ms = 0, will sleep for the least amount of time allowed by the OS. + static void naked_short_sleep(jlong ms); static void infinite_sleep(); // never returns, use with CAUTION static void yield(); // Yields to all threads with same priority enum YieldResult { diff --git a/src/share/vm/runtime/park.cpp b/src/share/vm/runtime/park.cpp index 6380570ef..0ab5b5b67 100644 --- a/src/share/vm/runtime/park.cpp +++ b/src/share/vm/runtime/park.cpp @@ -59,58 +59,22 @@ ParkEvent * ParkEvent::Allocate (Thread * t) { // Start by trying to recycle an existing but unassociated // ParkEvent from the global free list. - for (;;) { - ev = FreeList ; - if (ev == NULL) break ; - // 1: Detach - sequester or privatize the list - // Tantamount to ev = Swap (&FreeList, NULL) - if (Atomic::cmpxchg_ptr (NULL, &FreeList, ev) != ev) { - continue ; + // Using a spin lock since we are part of the mutex impl. + // 8028280: using concurrent free list without memory management can leak + // pretty badly it turns out. + Thread::SpinAcquire(&ListLock, "ParkEventFreeListAllocate"); + { + ev = FreeList; + if (ev != NULL) { + FreeList = ev->FreeNext; } - - // We've detached the list. The list in-hand is now - // local to this thread. This thread can operate on the - // list without risk of interference from other threads. - // 2: Extract -- pop the 1st element from the list. - ParkEvent * List = ev->FreeNext ; - if (List == NULL) break ; - for (;;) { - // 3: Try to reattach the residual list - guarantee (List != NULL, "invariant") ; - ParkEvent * Arv = (ParkEvent *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ; - if (Arv == NULL) break ; - - // New nodes arrived. Try to detach the recent arrivals. - if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) { - continue ; - } - guarantee (Arv != NULL, "invariant") ; - // 4: Merge Arv into List - ParkEvent * Tail = List ; - while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ; - Tail->FreeNext = Arv ; - } - break ; } + Thread::SpinRelease(&ListLock); if (ev != NULL) { guarantee (ev->AssociatedWith == NULL, "invariant") ; } else { // Do this the hard way -- materialize a new ParkEvent. - // In rare cases an allocating thread might detach a long list -- - // installing null into FreeList -- and then stall or be obstructed. - // A 2nd thread calling Allocate() would see FreeList == null. - // The list held privately by the 1st thread is unavailable to the 2nd thread. - // In that case the 2nd thread would have to materialize a new ParkEvent, - // even though free ParkEvents existed in the system. In this case we end up - // with more ParkEvents in circulation than we need, but the race is - // rare and the outcome is benign. Ideally, the # of extant ParkEvents - // is equal to the maximum # of threads that existed at any one time. - // Because of the race mentioned above, segments of the freelist - // can be transiently inaccessible. At worst we may end up with the - // # of ParkEvents in circulation slightly above the ideal. - // Note that if we didn't have the TSM/immortal constraint, then - // when reattaching, above, we could trim the list. ev = new ParkEvent () ; guarantee ((intptr_t(ev) & 0xFF) == 0, "invariant") ; } @@ -124,13 +88,14 @@ void ParkEvent::Release (ParkEvent * ev) { if (ev == NULL) return ; guarantee (ev->FreeNext == NULL , "invariant") ; ev->AssociatedWith = NULL ; - for (;;) { - // Push ev onto FreeList - // The mechanism is "half" lock-free. - ParkEvent * List = FreeList ; - ev->FreeNext = List ; - if (Atomic::cmpxchg_ptr (ev, &FreeList, List) == List) break ; + // Note that if we didn't have the TSM/immortal constraint, then + // when reattaching we could trim the list. + Thread::SpinAcquire(&ListLock, "ParkEventFreeListRelease"); + { + ev->FreeNext = FreeList; + FreeList = ev; } + Thread::SpinRelease(&ListLock); } // Override operator new and delete so we can ensure that the @@ -164,56 +129,21 @@ Parker * Parker::Allocate (JavaThread * t) { // Start by trying to recycle an existing but unassociated // Parker from the global free list. - for (;;) { - p = FreeList ; - if (p == NULL) break ; - // 1: Detach - // Tantamount to p = Swap (&FreeList, NULL) - if (Atomic::cmpxchg_ptr (NULL, &FreeList, p) != p) { - continue ; + // 8028280: using concurrent free list without memory management can leak + // pretty badly it turns out. + Thread::SpinAcquire(&ListLock, "ParkerFreeListAllocate"); + { + p = FreeList; + if (p != NULL) { + FreeList = p->FreeNext; } - - // We've detached the list. The list in-hand is now - // local to this thread. This thread can operate on the - // list without risk of interference from other threads. - // 2: Extract -- pop the 1st element from the list. - Parker * List = p->FreeNext ; - if (List == NULL) break ; - for (;;) { - // 3: Try to reattach the residual list - guarantee (List != NULL, "invariant") ; - Parker * Arv = (Parker *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ; - if (Arv == NULL) break ; - - // New nodes arrived. Try to detach the recent arrivals. - if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) { - continue ; - } - guarantee (Arv != NULL, "invariant") ; - // 4: Merge Arv into List - Parker * Tail = List ; - while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ; - Tail->FreeNext = Arv ; - } - break ; } + Thread::SpinRelease(&ListLock); if (p != NULL) { guarantee (p->AssociatedWith == NULL, "invariant") ; } else { // Do this the hard way -- materialize a new Parker.. - // In rare cases an allocating thread might detach - // a long list -- installing null into FreeList --and - // then stall. Another thread calling Allocate() would see - // FreeList == null and then invoke the ctor. In this case we - // end up with more Parkers in circulation than we need, but - // the race is rare and the outcome is benign. - // Ideally, the # of extant Parkers is equal to the - // maximum # of threads that existed at any one time. - // Because of the race mentioned above, segments of the - // freelist can be transiently inaccessible. At worst - // we may end up with the # of Parkers in circulation - // slightly above the ideal. p = new Parker() ; } p->AssociatedWith = t ; // Associate p with t @@ -227,11 +157,12 @@ void Parker::Release (Parker * p) { guarantee (p->AssociatedWith != NULL, "invariant") ; guarantee (p->FreeNext == NULL , "invariant") ; p->AssociatedWith = NULL ; - for (;;) { - // Push p onto FreeList - Parker * List = FreeList ; - p->FreeNext = List ; - if (Atomic::cmpxchg_ptr (p, &FreeList, List) == List) break ; + + Thread::SpinAcquire(&ListLock, "ParkerFreeListRelease"); + { + p->FreeNext = FreeList; + FreeList = p; } + Thread::SpinRelease(&ListLock); } diff --git a/src/share/vm/runtime/thread.cpp b/src/share/vm/runtime/thread.cpp index 9f31c25b7..af24fac2c 100644 --- a/src/share/vm/runtime/thread.cpp +++ b/src/share/vm/runtime/thread.cpp @@ -4446,9 +4446,7 @@ void Thread::SpinAcquire (volatile int * adr, const char * LockName) { ++ctr ; if ((ctr & 0xFFF) == 0 || !os::is_MP()) { if (Yields > 5) { - // Consider using a simple NakedSleep() instead. - // Then SpinAcquire could be called by non-JVM threads - Thread::current()->_ParkEvent->park(1) ; + os::naked_short_sleep(1); } else { os::NakedYield() ; ++Yields ; -- cgit v1.2.3 From 3138550fd4beb74f3e9ac617df079dd9a145c31a Mon Sep 17 00:00:00 2001 From: minqi Date: Mon, 10 Feb 2014 21:29:14 -0800 Subject: 8033792: AltHashing used jint for imprecise bit shifting Summary: AltHashing used jint the way of juint in bit shifting which could lead loss of precision. Fix by change _seed defined as juint. Reviewed-by: coleenp, ccheung Contributed-by: yumin.qi@oracle.com --- src/share/vm/classfile/altHashing.cpp | 78 +++++++++++++++++------------------ src/share/vm/classfile/altHashing.hpp | 20 ++++----- src/share/vm/oops/instanceKlass.hpp | 3 +- src/share/vm/oops/metadata.hpp | 4 +- src/share/vm/oops/oop.cpp | 4 +- src/share/vm/oops/oop.hpp | 4 +- src/share/vm/oops/symbol.cpp | 4 +- src/share/vm/oops/symbol.hpp | 4 +- src/share/vm/utilities/hashtable.cpp | 4 +- src/share/vm/utilities/hashtable.hpp | 6 +-- 10 files changed, 66 insertions(+), 65 deletions(-) diff --git a/src/share/vm/classfile/altHashing.cpp b/src/share/vm/classfile/altHashing.cpp index 8dfc3153c..91eb5bec1 100644 --- a/src/share/vm/classfile/altHashing.cpp +++ b/src/share/vm/classfile/altHashing.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,18 +39,18 @@ intptr_t object_hash(Klass* k) { } // Seed value used for each alternative hash calculated. -jint AltHashing::compute_seed() { +juint AltHashing::compute_seed() { jlong nanos = os::javaTimeNanos(); jlong now = os::javaTimeMillis(); - jint SEED_MATERIAL[8] = { - (jint) object_hash(SystemDictionary::String_klass()), - (jint) object_hash(SystemDictionary::System_klass()), - (jint) os::random(), // current thread isn't a java thread - (jint) (((julong)nanos) >> 32), - (jint) nanos, - (jint) (((julong)now) >> 32), - (jint) now, - (jint) (os::javaTimeNanos() >> 2) + int SEED_MATERIAL[8] = { + (int) object_hash(SystemDictionary::String_klass()), + (int) object_hash(SystemDictionary::System_klass()), + (int) os::random(), // current thread isn't a java thread + (int) (((julong)nanos) >> 32), + (int) nanos, + (int) (((julong)now) >> 32), + (int) now, + (int) (os::javaTimeNanos() >> 2) }; return murmur3_32(SEED_MATERIAL, 8); @@ -58,14 +58,14 @@ jint AltHashing::compute_seed() { // Murmur3 hashing for Symbol -jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) { - jint h1 = seed; +juint AltHashing::murmur3_32(juint seed, const jbyte* data, int len) { + juint h1 = seed; int count = len; int offset = 0; // body while (count >= 4) { - jint k1 = (data[offset] & 0x0FF) + juint k1 = (data[offset] & 0x0FF) | (data[offset + 1] & 0x0FF) << 8 | (data[offset + 2] & 0x0FF) << 16 | data[offset + 3] << 24; @@ -85,7 +85,7 @@ jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) { // tail if (count > 0) { - jint k1 = 0; + juint k1 = 0; switch (count) { case 3: @@ -109,18 +109,18 @@ jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) { h1 ^= len; // finalization mix force all bits of a hash block to avalanche - h1 ^= ((unsigned int)h1) >> 16; + h1 ^= h1 >> 16; h1 *= 0x85ebca6b; - h1 ^= ((unsigned int)h1) >> 13; + h1 ^= h1 >> 13; h1 *= 0xc2b2ae35; - h1 ^= ((unsigned int)h1) >> 16; + h1 ^= h1 >> 16; return h1; } // Murmur3 hashing for Strings -jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) { - jint h1 = seed; +juint AltHashing::murmur3_32(juint seed, const jchar* data, int len) { + juint h1 = seed; int off = 0; int count = len; @@ -129,7 +129,7 @@ jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) { while (count >= 2) { jchar d1 = data[off++] & 0xFFFF; jchar d2 = data[off++]; - jint k1 = (d1 | d2 << 16); + juint k1 = (d1 | d2 << 16); count -= 2; @@ -145,7 +145,7 @@ jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) { // tail if (count > 0) { - int k1 = data[off]; + juint k1 = (juint)data[off]; k1 *= 0xcc9e2d51; k1 = Integer_rotateLeft(k1, 15); @@ -157,25 +157,25 @@ jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) { h1 ^= len * 2; // (Character.SIZE / Byte.SIZE); // finalization mix force all bits of a hash block to avalanche - h1 ^= ((unsigned int)h1) >> 16; + h1 ^= h1 >> 16; h1 *= 0x85ebca6b; - h1 ^= ((unsigned int)h1) >> 13; + h1 ^= h1 >> 13; h1 *= 0xc2b2ae35; - h1 ^= ((unsigned int)h1) >> 16; + h1 ^= h1 >> 16; return h1; } // Hash used for the seed. -jint AltHashing::murmur3_32(jint seed, const int* data, int len) { - jint h1 = seed; +juint AltHashing::murmur3_32(juint seed, const int* data, int len) { + juint h1 = seed; int off = 0; int end = len; // body while (off < end) { - jint k1 = data[off++]; + juint k1 = (juint)data[off++]; k1 *= 0xcc9e2d51; k1 = Integer_rotateLeft(k1, 15); @@ -193,26 +193,26 @@ jint AltHashing::murmur3_32(jint seed, const int* data, int len) { h1 ^= len * 4; // (Integer.SIZE / Byte.SIZE); // finalization mix force all bits of a hash block to avalanche - h1 ^= ((juint)h1) >> 16; + h1 ^= h1 >> 16; h1 *= 0x85ebca6b; - h1 ^= ((juint)h1) >> 13; + h1 ^= h1 >> 13; h1 *= 0xc2b2ae35; - h1 ^= ((juint)h1) >> 16; + h1 ^= h1 >> 16; return h1; } -jint AltHashing::murmur3_32(const int* data, int len) { +juint AltHashing::murmur3_32(const int* data, int len) { return murmur3_32(0, data, len); } #ifndef PRODUCT // Overloaded versions for internal test. -jint AltHashing::murmur3_32(const jbyte* data, int len) { +juint AltHashing::murmur3_32(const jbyte* data, int len) { return murmur3_32(0, data, len); } -jint AltHashing::murmur3_32(const jchar* data, int len) { +juint AltHashing::murmur3_32(const jchar* data, int len) { return murmur3_32(0, data, len); } @@ -251,11 +251,11 @@ void AltHashing::testMurmur3_32_ByteArray() { // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255} for (int i = 0; i < 256; i++) { - jint hash = murmur3_32(256 - i, vector, i); + juint hash = murmur3_32(256 - i, vector, i); hashes[i * 4] = (jbyte) hash; - hashes[i * 4 + 1] = (jbyte) (((juint)hash) >> 8); - hashes[i * 4 + 2] = (jbyte) (((juint)hash) >> 16); - hashes[i * 4 + 3] = (jbyte) (((juint)hash) >> 24); + hashes[i * 4 + 1] = (jbyte)(hash >> 8); + hashes[i * 4 + 2] = (jbyte)(hash >> 16); + hashes[i * 4 + 3] = (jbyte)(hash >> 24); } // hash to get const result. @@ -269,7 +269,7 @@ void AltHashing::testMurmur3_32_ByteArray() { } void AltHashing::testEquivalentHashes() { - jint jbytes, jchars, ints; + juint jbytes, jchars, ints; // printf("testEquivalentHashes\n"); diff --git a/src/share/vm/classfile/altHashing.hpp b/src/share/vm/classfile/altHashing.hpp index 941b9a0dc..2e04fd33a 100644 --- a/src/share/vm/classfile/altHashing.hpp +++ b/src/share/vm/classfile/altHashing.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,24 +39,24 @@ class AltHashing : AllStatic { // utility function copied from java/lang/Integer - static jint Integer_rotateLeft(jint i, int distance) { - return (i << distance) | (((juint)i) >> (32-distance)); + static juint Integer_rotateLeft(juint i, int distance) { + return (i << distance) | (i >> (32-distance)); } - static jint murmur3_32(const int* data, int len); - static jint murmur3_32(jint seed, const int* data, int len); + static juint murmur3_32(const int* data, int len); + static juint murmur3_32(juint seed, const int* data, int len); #ifndef PRODUCT // Hashing functions used for internal testing - static jint murmur3_32(const jbyte* data, int len); - static jint murmur3_32(const jchar* data, int len); + static juint murmur3_32(const jbyte* data, int len); + static juint murmur3_32(const jchar* data, int len); static void testMurmur3_32_ByteArray(); static void testEquivalentHashes(); #endif // PRODUCT public: - static jint compute_seed(); - static jint murmur3_32(jint seed, const jbyte* data, int len); - static jint murmur3_32(jint seed, const jchar* data, int len); + static juint compute_seed(); + static juint murmur3_32(juint seed, const jbyte* data, int len); + static juint murmur3_32(juint seed, const jchar* data, int len); NOT_PRODUCT(static void test_alt_hash();) }; #endif // SHARE_VM_CLASSFILE_ALTHASHING_HPP diff --git a/src/share/vm/oops/instanceKlass.hpp b/src/share/vm/oops/instanceKlass.hpp index ba2bce874..db14be29a 100644 --- a/src/share/vm/oops/instanceKlass.hpp +++ b/src/share/vm/oops/instanceKlass.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -554,6 +554,7 @@ class InstanceKlass: public Klass { if (hk == NULL) { return NULL; } else { + assert(*hk != NULL, "host klass should always be set if the address is not null"); return *hk; } } diff --git a/src/share/vm/oops/metadata.hpp b/src/share/vm/oops/metadata.hpp index a1afb0d05..84a60893e 100644 --- a/src/share/vm/oops/metadata.hpp +++ b/src/share/vm/oops/metadata.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -40,7 +40,7 @@ class Metadata : public MetaspaceObj { int identity_hash() { return (int)(uintptr_t)this; } // Rehashing support for tables containing pointers to this - unsigned int new_hash(jint seed) { ShouldNotReachHere(); return 0; } + unsigned int new_hash(juint seed) { ShouldNotReachHere(); return 0; } virtual bool is_klass() const volatile { return false; } virtual bool is_method() const volatile { return false; } diff --git a/src/share/vm/oops/oop.cpp b/src/share/vm/oops/oop.cpp index aed29da67..281188354 100644 --- a/src/share/vm/oops/oop.cpp +++ b/src/share/vm/oops/oop.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -102,7 +102,7 @@ intptr_t oopDesc::slow_identity_hash() { } // When String table needs to rehash -unsigned int oopDesc::new_hash(jint seed) { +unsigned int oopDesc::new_hash(juint seed) { EXCEPTION_MARK; ResourceMark rm; int length; diff --git a/src/share/vm/oops/oop.hpp b/src/share/vm/oops/oop.hpp index 350c0fc61..2013c3ea7 100644 --- a/src/share/vm/oops/oop.hpp +++ b/src/share/vm/oops/oop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -362,7 +362,7 @@ class oopDesc { intptr_t slow_identity_hash(); // Alternate hashing code if string table is rehashed - unsigned int new_hash(jint seed); + unsigned int new_hash(juint seed); // marks are forwarded to stack when object is locked bool has_displaced_mark() const; diff --git a/src/share/vm/oops/symbol.cpp b/src/share/vm/oops/symbol.cpp index 2a2c975b6..cab865503 100644 --- a/src/share/vm/oops/symbol.cpp +++ b/src/share/vm/oops/symbol.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -207,7 +207,7 @@ const char* Symbol::as_klass_external_name() const { } // Alternate hashing for unbalanced symbol tables. -unsigned int Symbol::new_hash(jint seed) { +unsigned int Symbol::new_hash(juint seed) { ResourceMark rm; // Use alternate hashing algorithm on this symbol. return AltHashing::murmur3_32(seed, (const jbyte*)as_C_string(), utf8_length()); diff --git a/src/share/vm/oops/symbol.hpp b/src/share/vm/oops/symbol.hpp index e747c4646..aaa55c589 100644 --- a/src/share/vm/oops/symbol.hpp +++ b/src/share/vm/oops/symbol.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -154,7 +154,7 @@ class Symbol : private SymbolBase { int identity_hash() { return _identity_hash; } // For symbol table alternate hashing - unsigned int new_hash(jint seed); + unsigned int new_hash(juint seed); // Reference counting. See comments above this class for when to use. int refcount() const { return _refcount; } diff --git a/src/share/vm/utilities/hashtable.cpp b/src/share/vm/utilities/hashtable.cpp index 3e1413f61..40fb3b153 100644 --- a/src/share/vm/utilities/hashtable.cpp +++ b/src/share/vm/utilities/hashtable.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -93,7 +93,7 @@ template bool BasicHashtable::check_rehash_table(int count) { return false; } -template jint Hashtable::_seed = 0; +template juint Hashtable::_seed = 0; // Create a new table and using alternate hash code, populate the new table // with the existing elements. This can be used to change the hash code diff --git a/src/share/vm/utilities/hashtable.hpp b/src/share/vm/utilities/hashtable.hpp index 468965dab..aa4510024 100644 --- a/src/share/vm/utilities/hashtable.hpp +++ b/src/share/vm/utilities/hashtable.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -280,7 +280,7 @@ protected: // Function to move these elements into the new table. void move_to(Hashtable* new_table); static bool use_alternate_hashcode() { return _seed != 0; } - static jint seed() { return _seed; } + static juint seed() { return _seed; } static int literal_size(Symbol *symbol); static int literal_size(oop oop); @@ -296,7 +296,7 @@ public: void dump_table(outputStream* st, const char *table_name); private: - static jint _seed; + static juint _seed; }; -- cgit v1.2.3 From 827971ed70348690761d1c9357d152fd39e9be98 Mon Sep 17 00:00:00 2001 From: kvn Date: Mon, 24 Feb 2014 15:12:26 -0800 Subject: 8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler Summary: Consolidated C2 x86 locking code in one place in macroAssembler_x86.cpp. Reviewed-by: roland --- src/cpu/x86/vm/macroAssembler_x86.cpp | 1281 ++++++++++++++++++++++----------- src/cpu/x86/vm/macroAssembler_x86.hpp | 7 +- src/cpu/x86/vm/x86_32.ad | 561 +-------------- src/cpu/x86/vm/x86_64.ad | 243 +------ 4 files changed, 908 insertions(+), 1184 deletions(-) diff --git a/src/cpu/x86/vm/macroAssembler_x86.cpp b/src/cpu/x86/vm/macroAssembler_x86.cpp index b4797bf3a..74fa1b298 100644 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp @@ -98,217 +98,6 @@ Address MacroAssembler::as_Address(ArrayAddress adr) { return Address::make_array(adr); } -int MacroAssembler::biased_locking_enter(Register lock_reg, - Register obj_reg, - Register swap_reg, - Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, - Label* slow_case, - BiasedLockingCounters* counters) { - assert(UseBiasedLocking, "why call this otherwise?"); - assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); - assert_different_registers(lock_reg, obj_reg, swap_reg); - - if (PrintBiasedLockingStatistics && counters == NULL) - counters = BiasedLocking::counters(); - - bool need_tmp_reg = false; - if (tmp_reg == noreg) { - need_tmp_reg = true; - tmp_reg = lock_reg; - } else { - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); - } - assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); - Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); - Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); - Address saved_mark_addr(lock_reg, 0); - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - // First check to see whether biasing is even enabled for this object - Label cas_label; - int null_check_offset = -1; - if (!swap_reg_contains_mark) { - null_check_offset = offset(); - movl(swap_reg, mark_addr); - } - if (need_tmp_reg) { - push(tmp_reg); - } - movl(tmp_reg, swap_reg); - andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); - cmpl(tmp_reg, markOopDesc::biased_lock_pattern); - if (need_tmp_reg) { - pop(tmp_reg); - } - jcc(Assembler::notEqual, cas_label); - // The bias pattern is present in the object's header. Need to check - // whether the bias owner and the epoch are both still current. - // Note that because there is no current thread register on x86 we - // need to store off the mark word we read out of the object to - // avoid reloading it and needing to recheck invariants below. This - // store is unfortunate but it makes the overall code shorter and - // simpler. - movl(saved_mark_addr, swap_reg); - if (need_tmp_reg) { - push(tmp_reg); - } - get_thread(tmp_reg); - xorl(swap_reg, tmp_reg); - if (swap_reg_contains_mark) { - null_check_offset = offset(); - } - movl(tmp_reg, klass_addr); - xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); - andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); - if (need_tmp_reg) { - pop(tmp_reg); - } - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->biased_lock_entry_count_addr())); - } - jcc(Assembler::equal, done); - - Label try_revoke_bias; - Label try_rebias; - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - testl(swap_reg, markOopDesc::biased_lock_mask_in_place); - jcc(Assembler::notZero, try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - testl(swap_reg, markOopDesc::epoch_mask_in_place); - jcc(Assembler::notZero, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - movl(swap_reg, saved_mark_addr); - andl(swap_reg, - markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); - if (need_tmp_reg) { - push(tmp_reg); - } - get_thread(tmp_reg); - orl(tmp_reg, swap_reg); - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmp_reg, Address(obj_reg, 0)); - if (need_tmp_reg) { - pop(tmp_reg); - } - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - if (need_tmp_reg) { - push(tmp_reg); - } - get_thread(tmp_reg); - movl(swap_reg, klass_addr); - orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); - movl(swap_reg, saved_mark_addr); - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmp_reg, Address(obj_reg, 0)); - if (need_tmp_reg) { - pop(tmp_reg); - } - // If the biasing toward our thread failed, then another thread - // succeeded in biasing it toward itself and we need to revoke that - // bias. The revocation will occur in the runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - movl(swap_reg, saved_mark_addr); - if (need_tmp_reg) { - push(tmp_reg); - } - movl(tmp_reg, klass_addr); - movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmp_reg, Address(obj_reg, 0)); - if (need_tmp_reg) { - pop(tmp_reg); - } - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->revoked_lock_entry_count_addr())); - } - - bind(cas_label); - - return null_check_offset; -} void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { call(RuntimeAddress(entry_point)); @@ -726,201 +515,42 @@ Address MacroAssembler::as_Address(ArrayAddress adr) { return array; } -int MacroAssembler::biased_locking_enter(Register lock_reg, - Register obj_reg, - Register swap_reg, - Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, - Label* slow_case, - BiasedLockingCounters* counters) { - assert(UseBiasedLocking, "why call this otherwise?"); - assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); - assert(tmp_reg != noreg, "tmp_reg must be supplied"); - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); - assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); - Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); - Address saved_mark_addr(lock_reg, 0); +void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { + Label L, E; - if (PrintBiasedLockingStatistics && counters == NULL) - counters = BiasedLocking::counters(); +#ifdef _WIN64 + // Windows always allocates space for it's register args + assert(num_args <= 4, "only register arguments supported"); + subq(rsp, frame::arg_reg_save_area_bytes); +#endif - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - // First check to see whether biasing is even enabled for this object - Label cas_label; - int null_check_offset = -1; - if (!swap_reg_contains_mark) { - null_check_offset = offset(); - movq(swap_reg, mark_addr); + // Align stack if necessary + testl(rsp, 15); + jcc(Assembler::zero, L); + + subq(rsp, 8); + { + call(RuntimeAddress(entry_point)); } - movq(tmp_reg, swap_reg); - andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); - cmpq(tmp_reg, markOopDesc::biased_lock_pattern); - jcc(Assembler::notEqual, cas_label); - // The bias pattern is present in the object's header. Need to check - // whether the bias owner and the epoch are both still current. - load_prototype_header(tmp_reg, obj_reg); - orq(tmp_reg, r15_thread); - xorq(tmp_reg, swap_reg); - andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); + addq(rsp, 8); + jmp(E); + + bind(L); + { + call(RuntimeAddress(entry_point)); } - jcc(Assembler::equal, done); - Label try_revoke_bias; - Label try_rebias; + bind(E); - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. +#ifdef _WIN64 + // restore stack pointer + addq(rsp, frame::arg_reg_save_area_bytes); +#endif - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); - jcc(Assembler::notZero, try_revoke_bias); +} - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - testq(tmp_reg, markOopDesc::epoch_mask_in_place); - jcc(Assembler::notZero, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - andq(swap_reg, - markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); - movq(tmp_reg, swap_reg); - orq(tmp_reg, r15_thread); - if (os::is_MP()) { - lock(); - } - cmpxchgq(tmp_reg, Address(obj_reg, 0)); - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - load_prototype_header(tmp_reg, obj_reg); - orq(tmp_reg, r15_thread); - if (os::is_MP()) { - lock(); - } - cmpxchgq(tmp_reg, Address(obj_reg, 0)); - // If the biasing toward our thread failed, then another thread - // succeeded in biasing it toward itself and we need to revoke that - // bias. The revocation will occur in the runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - load_prototype_header(tmp_reg, obj_reg); - if (os::is_MP()) { - lock(); - } - cmpxchgq(tmp_reg, Address(obj_reg, 0)); - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->revoked_lock_entry_count_addr())); - } - - bind(cas_label); - - return null_check_offset; -} - -void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { - Label L, E; - -#ifdef _WIN64 - // Windows always allocates space for it's register args - assert(num_args <= 4, "only register arguments supported"); - subq(rsp, frame::arg_reg_save_area_bytes); -#endif - - // Align stack if necessary - testl(rsp, 15); - jcc(Assembler::zero, L); - - subq(rsp, 8); - { - call(RuntimeAddress(entry_point)); - } - addq(rsp, 8); - jmp(E); - - bind(L); - { - call(RuntimeAddress(entry_point)); - } - - bind(E); - -#ifdef _WIN64 - // restore stack pointer - addq(rsp, frame::arg_reg_save_area_bytes); -#endif - -} - -void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { - assert(!src2.is_lval(), "should use cmpptr"); +void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { + assert(!src2.is_lval(), "should use cmpptr"); if (reachable(src2)) { cmpq(src1, as_Address(src2)); @@ -1360,9 +990,16 @@ void MacroAssembler::andptr(Register dst, int32_t imm32) { void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { pushf(); - if (os::is_MP()) - lock(); - incrementl(counter_addr); + if (reachable(counter_addr)) { + if (os::is_MP()) + lock(); + incrementl(as_Address(counter_addr)); + } else { + lea(rscratch1, counter_addr); + if (os::is_MP()) + lock(); + incrementl(Address(rscratch1, 0)); + } popf(); } @@ -1393,6 +1030,234 @@ void MacroAssembler::bang_stack_size(Register size, Register tmp) { } } +int MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); + LP64_ONLY( assert(tmp_reg != noreg, "tmp_reg must be supplied"); ) + bool need_tmp_reg = false; + if (tmp_reg == noreg) { + need_tmp_reg = true; + tmp_reg = lock_reg; + assert_different_registers(lock_reg, obj_reg, swap_reg); + } else { + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); + } + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + Address saved_mark_addr(lock_reg, 0); + + if (PrintBiasedLockingStatistics && counters == NULL) { + counters = BiasedLocking::counters(); + } + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + movptr(swap_reg, mark_addr); + } + if (need_tmp_reg) { + push(tmp_reg); + } + movptr(tmp_reg, swap_reg); + andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place); + cmpptr(tmp_reg, markOopDesc::biased_lock_pattern); + if (need_tmp_reg) { + pop(tmp_reg); + } + jcc(Assembler::notEqual, cas_label); + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. +#ifndef _LP64 + // Note that because there is no current thread register on x86_32 we + // need to store off the mark word we read out of the object to + // avoid reloading it and needing to recheck invariants below. This + // store is unfortunate but it makes the overall code shorter and + // simpler. + movptr(saved_mark_addr, swap_reg); +#endif + if (need_tmp_reg) { + push(tmp_reg); + } + if (swap_reg_contains_mark) { + null_check_offset = offset(); + } + load_prototype_header(tmp_reg, obj_reg); +#ifdef _LP64 + orptr(tmp_reg, r15_thread); + xorptr(tmp_reg, swap_reg); + Register header_reg = tmp_reg; +#else + xorptr(tmp_reg, swap_reg); + get_thread(swap_reg); + xorptr(swap_reg, tmp_reg); + Register header_reg = swap_reg; +#endif + andptr(header_reg, ~((int) markOopDesc::age_mask_in_place)); + if (need_tmp_reg) { + pop(tmp_reg); + } + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->biased_lock_entry_count_addr())); + } + jcc(Assembler::equal, done); + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + testptr(header_reg, markOopDesc::biased_lock_mask_in_place); + jccb(Assembler::notZero, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + testptr(header_reg, markOopDesc::epoch_mask_in_place); + jccb(Assembler::notZero, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + NOT_LP64( movptr(swap_reg, saved_mark_addr); ) + andptr(swap_reg, + markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + if (need_tmp_reg) { + push(tmp_reg); + } +#ifdef _LP64 + movptr(tmp_reg, swap_reg); + orptr(tmp_reg, r15_thread); +#else + get_thread(tmp_reg); + orptr(tmp_reg, swap_reg); +#endif + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); + } + if (slow_case != NULL) { + jcc(Assembler::notZero, *slow_case); + } + jmp(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); +#ifdef _LP64 + orptr(tmp_reg, r15_thread); +#else + get_thread(swap_reg); + orptr(tmp_reg, swap_reg); + movptr(swap_reg, saved_mark_addr); +#endif + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); + } + if (slow_case != NULL) { + jcc(Assembler::notZero, *slow_case); + } + jmp(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + NOT_LP64( movptr(swap_reg, saved_mark_addr); ) + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg + if (need_tmp_reg) { + pop(tmp_reg); + } + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->revoked_lock_entry_count_addr())); + } + + bind(cas_label); + + return null_check_offset; +} + void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { assert(UseBiasedLocking, "why call this otherwise?"); @@ -1408,6 +1273,620 @@ void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, La jcc(Assembler::equal, done); } +#ifdef COMPILER2 +// Fast_Lock and Fast_Unlock used by C2 + +// Because the transitions from emitted code to the runtime +// monitorenter/exit helper stubs are so slow it's critical that +// we inline both the stack-locking fast-path and the inflated fast path. +// +// See also: cmpFastLock and cmpFastUnlock. +// +// What follows is a specialized inline transliteration of the code +// in slow_enter() and slow_exit(). If we're concerned about I$ bloat +// another option would be to emit TrySlowEnter and TrySlowExit methods +// at startup-time. These methods would accept arguments as +// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure +// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply +// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. +// In practice, however, the # of lock sites is bounded and is usually small. +// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer +// if the processor uses simple bimodal branch predictors keyed by EIP +// Since the helper routines would be called from multiple synchronization +// sites. +// +// An even better approach would be write "MonitorEnter()" and "MonitorExit()" +// in java - using j.u.c and unsafe - and just bind the lock and unlock sites +// to those specialized methods. That'd give us a mostly platform-independent +// implementation that the JITs could optimize and inline at their pleasure. +// Done correctly, the only time we'd need to cross to native could would be +// to park() or unpark() threads. We'd also need a few more unsafe operators +// to (a) prevent compiler-JIT reordering of non-volatile accesses, and +// (b) explicit barriers or fence operations. +// +// TODO: +// +// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). +// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. +// Given TLAB allocation, Self is usually manifested in a register, so passing it into +// the lock operators would typically be faster than reifying Self. +// +// * Ideally I'd define the primitives as: +// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. +// fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED +// Unfortunately ADLC bugs prevent us from expressing the ideal form. +// Instead, we're stuck with a rather awkward and brittle register assignments below. +// Furthermore the register assignments are overconstrained, possibly resulting in +// sub-optimal code near the synchronization site. +// +// * Eliminate the sp-proximity tests and just use "== Self" tests instead. +// Alternately, use a better sp-proximity test. +// +// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. +// Either one is sufficient to uniquely identify a thread. +// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. +// +// * Intrinsify notify() and notifyAll() for the common cases where the +// object is locked by the calling thread but the waitlist is empty. +// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). +// +// * use jccb and jmpb instead of jcc and jmp to improve code density. +// But beware of excessive branch density on AMD Opterons. +// +// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success +// or failure of the fast-path. If the fast-path fails then we pass +// control to the slow-path, typically in C. In Fast_Lock and +// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 +// will emit a conditional branch immediately after the node. +// So we have branches to branches and lots of ICC.ZF games. +// Instead, it might be better to have C2 pass a "FailureLabel" +// into Fast_Lock and Fast_Unlock. In the case of success, control +// will drop through the node. ICC.ZF is undefined at exit. +// In the case of failure, the node will branch directly to the +// FailureLabel + + +// obj: object to lock +// box: on-stack box address (displaced header location) - KILLED +// rax,: tmp -- KILLED +// scr: tmp -- KILLED +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) { + // Ensure the register assignents are disjoint + guarantee (objReg != boxReg, ""); + guarantee (objReg != tmpReg, ""); + guarantee (objReg != scrReg, ""); + guarantee (boxReg != tmpReg, ""); + guarantee (boxReg != scrReg, ""); + guarantee (tmpReg == rax, ""); + + if (counters != NULL) { + atomic_incl(ExternalAddress((address)counters->total_entry_count_addr())); + } + if (EmitSync & 1) { + // set box->dhw = unused_mark (3) + // Force all sync thru slow-path: slow_enter() and slow_exit() + movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); + cmpptr (rsp, (int32_t)NULL_WORD); + } else + if (EmitSync & 2) { + Label DONE_LABEL ; + if (UseBiasedLocking) { + // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters); + } + + movptr(tmpReg, Address(objReg, 0)); // fetch markword + orptr (tmpReg, 0x1); + movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS + if (os::is_MP()) { + lock(); + } + cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg + jccb(Assembler::equal, DONE_LABEL); + // Recursive locking + subptr(tmpReg, rsp); + andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); + movptr(Address(boxReg, 0), tmpReg); + bind(DONE_LABEL); + } else { + // Possible cases that we'll encounter in fast_lock + // ------------------------------------------------ + // * Inflated + // -- unlocked + // -- Locked + // = by self + // = by other + // * biased + // -- by Self + // -- by other + // * neutral + // * stack-locked + // -- by self + // = sp-proximity test hits + // = sp-proximity test generates false-negative + // -- by other + // + + Label IsInflated, DONE_LABEL; + + // it's stack-locked, biased or neutral + // TODO: optimize away redundant LDs of obj->mark and improve the markword triage + // order to reduce the number of conditional branches in the most common cases. + // Beware -- there's a subtle invariant that fetch of the markword + // at [FETCH], below, will never observe a biased encoding (*101b). + // If this invariant is not held we risk exclusion (safety) failure. + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters); + } + + movptr(tmpReg, Address(objReg, 0)); // [FETCH] + testl (tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased + jccb (Assembler::notZero, IsInflated); + + // Attempt stack-locking ... + orptr (tmpReg, 0x1); + movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS + if (os::is_MP()) { + lock(); + } + cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg + if (counters != NULL) { + cond_inc32(Assembler::equal, + ExternalAddress((address)counters->fast_path_entry_count_addr())); + } + jccb(Assembler::equal, DONE_LABEL); + + // Recursive locking + subptr(tmpReg, rsp); + andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); + movptr(Address(boxReg, 0), tmpReg); + if (counters != NULL) { + cond_inc32(Assembler::equal, + ExternalAddress((address)counters->fast_path_entry_count_addr())); + } + jmpb(DONE_LABEL); + + bind(IsInflated); +#ifndef _LP64 + // The object is inflated. + // + // TODO-FIXME: eliminate the ugly use of manifest constants: + // Use markOopDesc::monitor_value instead of "2". + // use markOop::unused_mark() instead of "3". + // The tmpReg value is an objectMonitor reference ORed with + // markOopDesc::monitor_value (2). We can either convert tmpReg to an + // objectmonitor pointer by masking off the "2" bit or we can just + // use tmpReg as an objectmonitor pointer but bias the objectmonitor + // field offsets with "-2" to compensate for and annul the low-order tag bit. + // + // I use the latter as it avoids AGI stalls. + // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]" + // instead of "mov r, [tmpReg+OFFSETOF(Owner)]". + // + #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2) + + // boxReg refers to the on-stack BasicLock in the current frame. + // We'd like to write: + // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices. + // This is convenient but results a ST-before-CAS penalty. The following CAS suffers + // additional latency as we have another ST in the store buffer that must drain. + + if (EmitSync & 8192) { + movptr(Address(boxReg, 0), 3); // results in ST-before-CAS penalty + get_thread (scrReg); + movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] + movptr(tmpReg, NULL_WORD); // consider: xor vs mov + if (os::is_MP()) { + lock(); + } + cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); + } else + if ((EmitSync & 128) == 0) { // avoid ST-before-CAS + movptr(scrReg, boxReg); + movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] + + // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes + if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { + // prefetchw [eax + Offset(_owner)-2] + prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + } + + if ((EmitSync & 64) == 0) { + // Optimistic form: consider XORL tmpReg,tmpReg + movptr(tmpReg, NULL_WORD); + } else { + // Can suffer RTS->RTO upgrades on shared or cold $ lines + // Test-And-CAS instead of CAS + movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner + testptr(tmpReg, tmpReg); // Locked ? + jccb (Assembler::notZero, DONE_LABEL); + } + + // Appears unlocked - try to swing _owner from null to non-null. + // Ideally, I'd manifest "Self" with get_thread and then attempt + // to CAS the register containing Self into m->Owner. + // But we don't have enough registers, so instead we can either try to CAS + // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds + // we later store "Self" into m->Owner. Transiently storing a stack address + // (rsp or the address of the box) into m->owner is harmless. + // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. + if (os::is_MP()) { + lock(); + } + cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); + movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3 + jccb (Assembler::notZero, DONE_LABEL); + get_thread (scrReg); // beware: clobbers ICCs + movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg); + xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success + + // If the CAS fails we can either retry or pass control to the slow-path. + // We use the latter tactic. + // Pass the CAS result in the icc.ZFlag into DONE_LABEL + // If the CAS was successful ... + // Self has acquired the lock + // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. + // Intentional fall-through into DONE_LABEL ... + } else { + movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty + movptr(boxReg, tmpReg); + + // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes + if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { + // prefetchw [eax + Offset(_owner)-2] + prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + } + + if ((EmitSync & 64) == 0) { + // Optimistic form + xorptr (tmpReg, tmpReg); + } else { + // Can suffer RTS->RTO upgrades on shared or cold $ lines + movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner + testptr(tmpReg, tmpReg); // Locked ? + jccb (Assembler::notZero, DONE_LABEL); + } + + // Appears unlocked - try to swing _owner from null to non-null. + // Use either "Self" (in scr) or rsp as thread identity in _owner. + // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. + get_thread (scrReg); + if (os::is_MP()) { + lock(); + } + cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); + + // If the CAS fails we can either retry or pass control to the slow-path. + // We use the latter tactic. + // Pass the CAS result in the icc.ZFlag into DONE_LABEL + // If the CAS was successful ... + // Self has acquired the lock + // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. + // Intentional fall-through into DONE_LABEL ... + } +#else // _LP64 + // It's inflated + + // TODO: someday avoid the ST-before-CAS penalty by + // relocating (deferring) the following ST. + // We should also think about trying a CAS without having + // fetched _owner. If the CAS is successful we may + // avoid an RTO->RTS upgrade on the $line. + + // Without cast to int32_t a movptr will destroy r10 which is typically obj + movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); + + mov (boxReg, tmpReg); + movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); + testptr(tmpReg, tmpReg); + jccb (Assembler::notZero, DONE_LABEL); + + // It's inflated and appears unlocked + if (os::is_MP()) { + lock(); + } + cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); + // Intentional fall-through into DONE_LABEL ... + +#endif + + // DONE_LABEL is a hot target - we'd really like to place it at the + // start of cache line by padding with NOPs. + // See the AMD and Intel software optimization manuals for the + // most efficient "long" NOP encodings. + // Unfortunately none of our alignment mechanisms suffice. + bind(DONE_LABEL); + + // At DONE_LABEL the icc ZFlag is set as follows ... + // Fast_Unlock uses the same protocol. + // ZFlag == 1 -> Success + // ZFlag == 0 -> Failure - force control through the slow-path + } +} + +// obj: object to unlock +// box: box address (displaced header location), killed. Must be EAX. +// tmp: killed, cannot be obj nor box. +// +// Some commentary on balanced locking: +// +// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. +// Methods that don't have provably balanced locking are forced to run in the +// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. +// The interpreter provides two properties: +// I1: At return-time the interpreter automatically and quietly unlocks any +// objects acquired the current activation (frame). Recall that the +// interpreter maintains an on-stack list of locks currently held by +// a frame. +// I2: If a method attempts to unlock an object that is not held by the +// the frame the interpreter throws IMSX. +// +// Lets say A(), which has provably balanced locking, acquires O and then calls B(). +// B() doesn't have provably balanced locking so it runs in the interpreter. +// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O +// is still locked by A(). +// +// The only other source of unbalanced locking would be JNI. The "Java Native Interface: +// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter +// should not be unlocked by "normal" java-level locking and vice-versa. The specification +// doesn't specify what will occur if a program engages in such mixed-mode locking, however. + +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) { + guarantee (objReg != boxReg, ""); + guarantee (objReg != tmpReg, ""); + guarantee (boxReg != tmpReg, ""); + guarantee (boxReg == rax, ""); + + if (EmitSync & 4) { + // Disable - inhibit all inlining. Force control through the slow-path + cmpptr (rsp, 0); + } else + if (EmitSync & 8) { + Label DONE_LABEL; + if (UseBiasedLocking) { + biased_locking_exit(objReg, tmpReg, DONE_LABEL); + } + // Classic stack-locking code ... + // Check whether the displaced header is 0 + //(=> recursive unlock) + movptr(tmpReg, Address(boxReg, 0)); + testptr(tmpReg, tmpReg); + jccb(Assembler::zero, DONE_LABEL); + // If not recursive lock, reset the header to displaced header + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box + bind(DONE_LABEL); + } else { + Label DONE_LABEL, Stacked, CheckSucc; + + // Critically, the biased locking test must have precedence over + // and appear before the (box->dhw == 0) recursive stack-lock test. + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_exit(objReg, tmpReg, DONE_LABEL); + } + + cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header + movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword + jccb (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock + + testptr(tmpReg, 0x02); // Inflated? + jccb (Assembler::zero, Stacked); + + // It's inflated. + // Despite our balanced locking property we still check that m->_owner == Self + // as java routines or native JNI code called by this thread might + // have released the lock. + // Refer to the comments in synchronizer.cpp for how we might encode extra + // state in _succ so we can avoid fetching EntryList|cxq. + // + // I'd like to add more cases in fast_lock() and fast_unlock() -- + // such as recursive enter and exit -- but we have to be wary of + // I$ bloat, T$ effects and BP$ effects. + // + // If there's no contention try a 1-0 exit. That is, exit without + // a costly MEMBAR or CAS. See synchronizer.cpp for details on how + // we detect and recover from the race that the 1-0 exit admits. + // + // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier + // before it STs null into _owner, releasing the lock. Updates + // to data protected by the critical section must be visible before + // we drop the lock (and thus before any other thread could acquire + // the lock and observe the fields protected by the lock). + // IA32's memory-model is SPO, so STs are ordered with respect to + // each other and there's no need for an explicit barrier (fence). + // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. +#ifndef _LP64 + get_thread (boxReg); + if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { + // prefetchw [ebx + Offset(_owner)-2] + prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + } + + // Note that we could employ various encoding schemes to reduce + // the number of loads below (currently 4) to just 2 or 3. + // Refer to the comments in synchronizer.cpp. + // In practice the chain of fetches doesn't seem to impact performance, however. + if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { + // Attempt to reduce branch density - AMD's branch predictor. + xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); + orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); + orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); + jccb (Assembler::notZero, DONE_LABEL); + movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); + jmpb (DONE_LABEL); + } else { + xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); + jccb (Assembler::notZero, DONE_LABEL); + movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); + orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); + jccb (Assembler::notZero, CheckSucc); + movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); + jmpb (DONE_LABEL); + } + + // The Following code fragment (EmitSync & 65536) improves the performance of + // contended applications and contended synchronization microbenchmarks. + // Unfortunately the emission of the code - even though not executed - causes regressions + // in scimark and jetstream, evidently because of $ effects. Replacing the code + // with an equal number of never-executed NOPs results in the same regression. + // We leave it off by default. + + if ((EmitSync & 65536) != 0) { + Label LSuccess, LGoSlowPath ; + + bind (CheckSucc); + + // Optional pre-test ... it's safe to elide this + if ((EmitSync & 16) == 0) { + cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); + jccb (Assembler::zero, LGoSlowPath); + } + + // We have a classic Dekker-style idiom: + // ST m->_owner = 0 ; MEMBAR; LD m->_succ + // There are a number of ways to implement the barrier: + // (1) lock:andl &m->_owner, 0 + // is fast, but mask doesn't currently support the "ANDL M,IMM32" form. + // LOCK: ANDL [ebx+Offset(_Owner)-2], 0 + // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 + // (2) If supported, an explicit MFENCE is appealing. + // In older IA32 processors MFENCE is slower than lock:add or xchg + // particularly if the write-buffer is full as might be the case if + // if stores closely precede the fence or fence-equivalent instruction. + // In more modern implementations MFENCE appears faster, however. + // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack + // The $lines underlying the top-of-stack should be in M-state. + // The locked add instruction is serializing, of course. + // (4) Use xchg, which is serializing + // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works + // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. + // The integer condition codes will tell us if succ was 0. + // Since _succ and _owner should reside in the same $line and + // we just stored into _owner, it's likely that the $line + // remains in M-state for the lock:orl. + // + // We currently use (3), although it's likely that switching to (2) + // is correct for the future. + + movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); + if (os::is_MP()) { + if (VM_Version::supports_sse2() && 1 == FenceInstruction) { + mfence(); + } else { + lock (); addptr(Address(rsp, 0), 0); + } + } + // Ratify _succ remains non-null + cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0); + jccb (Assembler::notZero, LSuccess); + + xorptr(boxReg, boxReg); // box is really EAX + if (os::is_MP()) { lock(); } + cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + jccb (Assembler::notEqual, LSuccess); + // Since we're low on registers we installed rsp as a placeholding in _owner. + // Now install Self over rsp. This is safe as we're transitioning from + // non-null to non=null + get_thread (boxReg); + movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg); + // Intentional fall-through into LGoSlowPath ... + + bind (LGoSlowPath); + orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure + jmpb (DONE_LABEL); + + bind (LSuccess); + xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success + jmpb (DONE_LABEL); + } + + bind (Stacked); + // It's not inflated and it's not recursively stack-locked and it's not biased. + // It must be stack-locked. + // Try to reset the header to displaced header. + // The "box" value on the stack is stable, so we can reload + // and be assured we observe the same value as above. + movptr(tmpReg, Address(boxReg, 0)); + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box + // Intention fall-thru into DONE_LABEL + + // DONE_LABEL is a hot target - we'd really like to place it at the + // start of cache line by padding with NOPs. + // See the AMD and Intel software optimization manuals for the + // most efficient "long" NOP encodings. + // Unfortunately none of our alignment mechanisms suffice. + if ((EmitSync & 65536) == 0) { + bind (CheckSucc); + } +#else // _LP64 + // It's inflated + movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + xorptr(boxReg, r15_thread); + orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); + jccb (Assembler::notZero, DONE_LABEL); + movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); + orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); + jccb (Assembler::notZero, CheckSucc); + movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD); + jmpb (DONE_LABEL); + + if ((EmitSync & 65536) == 0) { + Label LSuccess, LGoSlowPath ; + bind (CheckSucc); + cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); + jccb (Assembler::zero, LGoSlowPath); + + // I'd much rather use lock:andl m->_owner, 0 as it's faster than the + // the explicit ST;MEMBAR combination, but masm doesn't currently support + // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc + // are all faster when the write buffer is populated. + movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD); + if (os::is_MP()) { + lock (); addl (Address(rsp, 0), 0); + } + cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); + jccb (Assembler::notZero, LSuccess); + + movptr (boxReg, (int32_t)NULL_WORD); // box is really EAX + if (os::is_MP()) { lock(); } + cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + jccb (Assembler::notEqual, LSuccess); + // Intentional fall-through into slow-path + + bind (LGoSlowPath); + orl (boxReg, 1); // set ICC.ZF=0 to indicate failure + jmpb (DONE_LABEL); + + bind (LSuccess); + testl (boxReg, 0); // set ICC.ZF=1 to indicate success + jmpb (DONE_LABEL); + } + + bind (Stacked); + movptr(tmpReg, Address (boxReg, 0)); // re-fetch + if (os::is_MP()) { lock(); } + cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box + + if (EmitSync & 65536) { + bind (CheckSucc); + } +#endif + bind(DONE_LABEL); + // Avoid branch to branch on AMD processors + if (EmitSync & 32768) { + nop(); + } + } +} +#endif // COMPILER2 + void MacroAssembler::c2bool(Register x) { // implements x == 0 ? 0 : 1 // note: must only look at least-significant byte of x diff --git a/src/cpu/x86/vm/macroAssembler_x86.hpp b/src/cpu/x86/vm/macroAssembler_x86.hpp index 198fc98e8..6ac95774b 100644 --- a/src/cpu/x86/vm/macroAssembler_x86.hpp +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp @@ -651,7 +651,12 @@ class MacroAssembler: public Assembler { Label& done, Label* slow_case = NULL, BiasedLockingCounters* counters = NULL); void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); - +#ifdef COMPILER2 + // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. + // See full desription in macroAssembler_x86.cpp. + void fast_lock(Register obj, Register box, Register tmp, Register scr, BiasedLockingCounters* counters); + void fast_unlock(Register obj, Register box, Register tmp); +#endif Condition negate_condition(Condition cond); diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad index 90c1d899f..382d09b07 100644 --- a/src/cpu/x86/vm/x86_32.ad +++ b/src/cpu/x86/vm/x86_32.ad @@ -2910,542 +2910,6 @@ encode %{ emit_d8 (cbuf,0 ); %} - - // Because the transitions from emitted code to the runtime - // monitorenter/exit helper stubs are so slow it's critical that - // we inline both the stack-locking fast-path and the inflated fast path. - // - // See also: cmpFastLock and cmpFastUnlock. - // - // What follows is a specialized inline transliteration of the code - // in slow_enter() and slow_exit(). If we're concerned about I$ bloat - // another option would be to emit TrySlowEnter and TrySlowExit methods - // at startup-time. These methods would accept arguments as - // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure - // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply - // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. - // In practice, however, the # of lock sites is bounded and is usually small. - // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer - // if the processor uses simple bimodal branch predictors keyed by EIP - // Since the helper routines would be called from multiple synchronization - // sites. - // - // An even better approach would be write "MonitorEnter()" and "MonitorExit()" - // in java - using j.u.c and unsafe - and just bind the lock and unlock sites - // to those specialized methods. That'd give us a mostly platform-independent - // implementation that the JITs could optimize and inline at their pleasure. - // Done correctly, the only time we'd need to cross to native could would be - // to park() or unpark() threads. We'd also need a few more unsafe operators - // to (a) prevent compiler-JIT reordering of non-volatile accesses, and - // (b) explicit barriers or fence operations. - // - // TODO: - // - // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). - // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. - // Given TLAB allocation, Self is usually manifested in a register, so passing it into - // the lock operators would typically be faster than reifying Self. - // - // * Ideally I'd define the primitives as: - // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. - // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED - // Unfortunately ADLC bugs prevent us from expressing the ideal form. - // Instead, we're stuck with a rather awkward and brittle register assignments below. - // Furthermore the register assignments are overconstrained, possibly resulting in - // sub-optimal code near the synchronization site. - // - // * Eliminate the sp-proximity tests and just use "== Self" tests instead. - // Alternately, use a better sp-proximity test. - // - // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. - // Either one is sufficient to uniquely identify a thread. - // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. - // - // * Intrinsify notify() and notifyAll() for the common cases where the - // object is locked by the calling thread but the waitlist is empty. - // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). - // - // * use jccb and jmpb instead of jcc and jmp to improve code density. - // But beware of excessive branch density on AMD Opterons. - // - // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success - // or failure of the fast-path. If the fast-path fails then we pass - // control to the slow-path, typically in C. In Fast_Lock and - // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 - // will emit a conditional branch immediately after the node. - // So we have branches to branches and lots of ICC.ZF games. - // Instead, it might be better to have C2 pass a "FailureLabel" - // into Fast_Lock and Fast_Unlock. In the case of success, control - // will drop through the node. ICC.ZF is undefined at exit. - // In the case of failure, the node will branch directly to the - // FailureLabel - - - // obj: object to lock - // box: on-stack box address (displaced header location) - KILLED - // rax,: tmp -- KILLED - // scr: tmp -- KILLED - enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{ - - Register objReg = as_Register($obj$$reg); - Register boxReg = as_Register($box$$reg); - Register tmpReg = as_Register($tmp$$reg); - Register scrReg = as_Register($scr$$reg); - - // Ensure the register assignents are disjoint - guarantee (objReg != boxReg, "") ; - guarantee (objReg != tmpReg, "") ; - guarantee (objReg != scrReg, "") ; - guarantee (boxReg != tmpReg, "") ; - guarantee (boxReg != scrReg, "") ; - guarantee (tmpReg == as_Register(EAX_enc), "") ; - - MacroAssembler masm(&cbuf); - - if (_counters != NULL) { - masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr())); - } - if (EmitSync & 1) { - // set box->dhw = unused_mark (3) - // Force all sync thru slow-path: slow_enter() and slow_exit() - masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ; - masm.cmpptr (rsp, (int32_t)0) ; - } else - if (EmitSync & 2) { - Label DONE_LABEL ; - if (UseBiasedLocking) { - // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. - masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); - } - - masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword - masm.orptr (tmpReg, 0x1); - masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - masm.jcc(Assembler::equal, DONE_LABEL); - // Recursive locking - masm.subptr(tmpReg, rsp); - masm.andptr(tmpReg, (int32_t) 0xFFFFF003 ); - masm.movptr(Address(boxReg, 0), tmpReg); - masm.bind(DONE_LABEL) ; - } else { - // Possible cases that we'll encounter in fast_lock - // ------------------------------------------------ - // * Inflated - // -- unlocked - // -- Locked - // = by self - // = by other - // * biased - // -- by Self - // -- by other - // * neutral - // * stack-locked - // -- by self - // = sp-proximity test hits - // = sp-proximity test generates false-negative - // -- by other - // - - Label IsInflated, DONE_LABEL, PopDone ; - - // TODO: optimize away redundant LDs of obj->mark and improve the markword triage - // order to reduce the number of conditional branches in the most common cases. - // Beware -- there's a subtle invariant that fetch of the markword - // at [FETCH], below, will never observe a biased encoding (*101b). - // If this invariant is not held we risk exclusion (safety) failure. - if (UseBiasedLocking && !UseOptoBiasInlining) { - masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); - } - - masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH] - masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral) - masm.jccb (Assembler::notZero, IsInflated) ; - - // Attempt stack-locking ... - masm.orptr (tmpReg, 0x1); - masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - if (_counters != NULL) { - masm.cond_inc32(Assembler::equal, - ExternalAddress((address)_counters->fast_path_entry_count_addr())); - } - masm.jccb (Assembler::equal, DONE_LABEL); - - // Recursive locking - masm.subptr(tmpReg, rsp); - masm.andptr(tmpReg, 0xFFFFF003 ); - masm.movptr(Address(boxReg, 0), tmpReg); - if (_counters != NULL) { - masm.cond_inc32(Assembler::equal, - ExternalAddress((address)_counters->fast_path_entry_count_addr())); - } - masm.jmp (DONE_LABEL) ; - - masm.bind (IsInflated) ; - - // The object is inflated. - // - // TODO-FIXME: eliminate the ugly use of manifest constants: - // Use markOopDesc::monitor_value instead of "2". - // use markOop::unused_mark() instead of "3". - // The tmpReg value is an objectMonitor reference ORed with - // markOopDesc::monitor_value (2). We can either convert tmpReg to an - // objectmonitor pointer by masking off the "2" bit or we can just - // use tmpReg as an objectmonitor pointer but bias the objectmonitor - // field offsets with "-2" to compensate for and annul the low-order tag bit. - // - // I use the latter as it avoids AGI stalls. - // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]" - // instead of "mov r, [tmpReg+OFFSETOF(Owner)]". - // - #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2) - - // boxReg refers to the on-stack BasicLock in the current frame. - // We'd like to write: - // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices. - // This is convenient but results a ST-before-CAS penalty. The following CAS suffers - // additional latency as we have another ST in the store buffer that must drain. - - if (EmitSync & 8192) { - masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty - masm.get_thread (scrReg) ; - masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] - masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - } else - if ((EmitSync & 128) == 0) { // avoid ST-before-CAS - masm.movptr(scrReg, boxReg) ; - masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] - - // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes - if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { - // prefetchw [eax + Offset(_owner)-2] - masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); - } - - if ((EmitSync & 64) == 0) { - // Optimistic form: consider XORL tmpReg,tmpReg - masm.movptr(tmpReg, NULL_WORD) ; - } else { - // Can suffer RTS->RTO upgrades on shared or cold $ lines - // Test-And-CAS instead of CAS - masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner - masm.testptr(tmpReg, tmpReg) ; // Locked ? - masm.jccb (Assembler::notZero, DONE_LABEL) ; - } - - // Appears unlocked - try to swing _owner from null to non-null. - // Ideally, I'd manifest "Self" with get_thread and then attempt - // to CAS the register containing Self into m->Owner. - // But we don't have enough registers, so instead we can either try to CAS - // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds - // we later store "Self" into m->Owner. Transiently storing a stack address - // (rsp or the address of the box) into m->owner is harmless. - // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3 - masm.jccb (Assembler::notZero, DONE_LABEL) ; - masm.get_thread (scrReg) ; // beware: clobbers ICCs - masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; - masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success - - // If the CAS fails we can either retry or pass control to the slow-path. - // We use the latter tactic. - // Pass the CAS result in the icc.ZFlag into DONE_LABEL - // If the CAS was successful ... - // Self has acquired the lock - // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. - // Intentional fall-through into DONE_LABEL ... - } else { - masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty - masm.movptr(boxReg, tmpReg) ; - - // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes - if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { - // prefetchw [eax + Offset(_owner)-2] - masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); - } - - if ((EmitSync & 64) == 0) { - // Optimistic form - masm.xorptr (tmpReg, tmpReg) ; - } else { - // Can suffer RTS->RTO upgrades on shared or cold $ lines - masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner - masm.testptr(tmpReg, tmpReg) ; // Locked ? - masm.jccb (Assembler::notZero, DONE_LABEL) ; - } - - // Appears unlocked - try to swing _owner from null to non-null. - // Use either "Self" (in scr) or rsp as thread identity in _owner. - // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. - masm.get_thread (scrReg) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - - // If the CAS fails we can either retry or pass control to the slow-path. - // We use the latter tactic. - // Pass the CAS result in the icc.ZFlag into DONE_LABEL - // If the CAS was successful ... - // Self has acquired the lock - // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. - // Intentional fall-through into DONE_LABEL ... - } - - // DONE_LABEL is a hot target - we'd really like to place it at the - // start of cache line by padding with NOPs. - // See the AMD and Intel software optimization manuals for the - // most efficient "long" NOP encodings. - // Unfortunately none of our alignment mechanisms suffice. - masm.bind(DONE_LABEL); - - // Avoid branch-to-branch on AMD processors - // This appears to be superstition. - if (EmitSync & 32) masm.nop() ; - - - // At DONE_LABEL the icc ZFlag is set as follows ... - // Fast_Unlock uses the same protocol. - // ZFlag == 1 -> Success - // ZFlag == 0 -> Failure - force control through the slow-path - } - %} - - // obj: object to unlock - // box: box address (displaced header location), killed. Must be EAX. - // rbx,: killed tmp; cannot be obj nor box. - // - // Some commentary on balanced locking: - // - // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. - // Methods that don't have provably balanced locking are forced to run in the - // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. - // The interpreter provides two properties: - // I1: At return-time the interpreter automatically and quietly unlocks any - // objects acquired the current activation (frame). Recall that the - // interpreter maintains an on-stack list of locks currently held by - // a frame. - // I2: If a method attempts to unlock an object that is not held by the - // the frame the interpreter throws IMSX. - // - // Lets say A(), which has provably balanced locking, acquires O and then calls B(). - // B() doesn't have provably balanced locking so it runs in the interpreter. - // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O - // is still locked by A(). - // - // The only other source of unbalanced locking would be JNI. The "Java Native Interface: - // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter - // should not be unlocked by "normal" java-level locking and vice-versa. The specification - // doesn't specify what will occur if a program engages in such mixed-mode locking, however. - - enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{ - - Register objReg = as_Register($obj$$reg); - Register boxReg = as_Register($box$$reg); - Register tmpReg = as_Register($tmp$$reg); - - guarantee (objReg != boxReg, "") ; - guarantee (objReg != tmpReg, "") ; - guarantee (boxReg != tmpReg, "") ; - guarantee (boxReg == as_Register(EAX_enc), "") ; - MacroAssembler masm(&cbuf); - - if (EmitSync & 4) { - // Disable - inhibit all inlining. Force control through the slow-path - masm.cmpptr (rsp, 0) ; - } else - if (EmitSync & 8) { - Label DONE_LABEL ; - if (UseBiasedLocking) { - masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - // classic stack-locking code ... - masm.movptr(tmpReg, Address(boxReg, 0)) ; - masm.testptr(tmpReg, tmpReg) ; - masm.jcc (Assembler::zero, DONE_LABEL) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box - masm.bind(DONE_LABEL); - } else { - Label DONE_LABEL, Stacked, CheckSucc, Inflated ; - - // Critically, the biased locking test must have precedence over - // and appear before the (box->dhw == 0) recursive stack-lock test. - if (UseBiasedLocking && !UseOptoBiasInlining) { - masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - - masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header - masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword - masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock - - masm.testptr(tmpReg, 0x02) ; // Inflated? - masm.jccb (Assembler::zero, Stacked) ; - - masm.bind (Inflated) ; - // It's inflated. - // Despite our balanced locking property we still check that m->_owner == Self - // as java routines or native JNI code called by this thread might - // have released the lock. - // Refer to the comments in synchronizer.cpp for how we might encode extra - // state in _succ so we can avoid fetching EntryList|cxq. - // - // I'd like to add more cases in fast_lock() and fast_unlock() -- - // such as recursive enter and exit -- but we have to be wary of - // I$ bloat, T$ effects and BP$ effects. - // - // If there's no contention try a 1-0 exit. That is, exit without - // a costly MEMBAR or CAS. See synchronizer.cpp for details on how - // we detect and recover from the race that the 1-0 exit admits. - // - // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier - // before it STs null into _owner, releasing the lock. Updates - // to data protected by the critical section must be visible before - // we drop the lock (and thus before any other thread could acquire - // the lock and observe the fields protected by the lock). - // IA32's memory-model is SPO, so STs are ordered with respect to - // each other and there's no need for an explicit barrier (fence). - // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. - - masm.get_thread (boxReg) ; - if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { - // prefetchw [ebx + Offset(_owner)-2] - masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2)); - } - - // Note that we could employ various encoding schemes to reduce - // the number of loads below (currently 4) to just 2 or 3. - // Refer to the comments in synchronizer.cpp. - // In practice the chain of fetches doesn't seem to impact performance, however. - if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { - // Attempt to reduce branch density - AMD's branch predictor. - masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; - masm.jccb (Assembler::notZero, DONE_LABEL) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; - masm.jmpb (DONE_LABEL) ; - } else { - masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; - masm.jccb (Assembler::notZero, DONE_LABEL) ; - masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; - masm.jccb (Assembler::notZero, CheckSucc) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; - masm.jmpb (DONE_LABEL) ; - } - - // The Following code fragment (EmitSync & 65536) improves the performance of - // contended applications and contended synchronization microbenchmarks. - // Unfortunately the emission of the code - even though not executed - causes regressions - // in scimark and jetstream, evidently because of $ effects. Replacing the code - // with an equal number of never-executed NOPs results in the same regression. - // We leave it off by default. - - if ((EmitSync & 65536) != 0) { - Label LSuccess, LGoSlowPath ; - - masm.bind (CheckSucc) ; - - // Optional pre-test ... it's safe to elide this - if ((EmitSync & 16) == 0) { - masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; - masm.jccb (Assembler::zero, LGoSlowPath) ; - } - - // We have a classic Dekker-style idiom: - // ST m->_owner = 0 ; MEMBAR; LD m->_succ - // There are a number of ways to implement the barrier: - // (1) lock:andl &m->_owner, 0 - // is fast, but mask doesn't currently support the "ANDL M,IMM32" form. - // LOCK: ANDL [ebx+Offset(_Owner)-2], 0 - // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 - // (2) If supported, an explicit MFENCE is appealing. - // In older IA32 processors MFENCE is slower than lock:add or xchg - // particularly if the write-buffer is full as might be the case if - // if stores closely precede the fence or fence-equivalent instruction. - // In more modern implementations MFENCE appears faster, however. - // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack - // The $lines underlying the top-of-stack should be in M-state. - // The locked add instruction is serializing, of course. - // (4) Use xchg, which is serializing - // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works - // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. - // The integer condition codes will tell us if succ was 0. - // Since _succ and _owner should reside in the same $line and - // we just stored into _owner, it's likely that the $line - // remains in M-state for the lock:orl. - // - // We currently use (3), although it's likely that switching to (2) - // is correct for the future. - - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; - if (os::is_MP()) { - if (VM_Version::supports_sse2() && 1 == FenceInstruction) { - masm.mfence(); - } else { - masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; - } - } - // Ratify _succ remains non-null - masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; - masm.jccb (Assembler::notZero, LSuccess) ; - - masm.xorptr(boxReg, boxReg) ; // box is really EAX - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); - masm.jccb (Assembler::notEqual, LSuccess) ; - // Since we're low on registers we installed rsp as a placeholding in _owner. - // Now install Self over rsp. This is safe as we're transitioning from - // non-null to non=null - masm.get_thread (boxReg) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ; - // Intentional fall-through into LGoSlowPath ... - - masm.bind (LGoSlowPath) ; - masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure - masm.jmpb (DONE_LABEL) ; - - masm.bind (LSuccess) ; - masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success - masm.jmpb (DONE_LABEL) ; - } - - masm.bind (Stacked) ; - // It's not inflated and it's not recursively stack-locked and it's not biased. - // It must be stack-locked. - // Try to reset the header to displaced header. - // The "box" value on the stack is stable, so we can reload - // and be assured we observe the same value as above. - masm.movptr(tmpReg, Address(boxReg, 0)) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box - // Intention fall-thru into DONE_LABEL - - - // DONE_LABEL is a hot target - we'd really like to place it at the - // start of cache line by padding with NOPs. - // See the AMD and Intel software optimization manuals for the - // most efficient "long" NOP encodings. - // Unfortunately none of our alignment mechanisms suffice. - if ((EmitSync & 65536) == 0) { - masm.bind (CheckSucc) ; - } - masm.bind(DONE_LABEL); - - // Avoid branch to branch on AMD processors - if (EmitSync & 32768) { masm.nop() ; } - } - %} - - enc_class enc_pop_rdx() %{ emit_opcode(cbuf,0x5A); %} @@ -13147,23 +12611,26 @@ instruct RethrowException() // inlined locking and unlocking - -instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ - match( Set cr (FastLock object box) ); - effect( TEMP tmp, TEMP scr, USE_KILL box ); +instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ + match(Set cr (FastLock object box)); + effect(TEMP tmp, TEMP scr, USE_KILL box); ins_cost(300); format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} - ins_encode( Fast_Lock(object,box,tmp,scr) ); - ins_pipe( pipe_slow ); + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters); + %} + ins_pipe(pipe_slow); %} -instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ - match( Set cr (FastUnlock object box) ); - effect( TEMP tmp, USE_KILL box ); +instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ + match(Set cr (FastUnlock object box)); + effect(TEMP tmp, USE_KILL box); ins_cost(300); format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} - ins_encode( Fast_Unlock(object,box,tmp) ); - ins_pipe( pipe_slow ); + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); + %} + ins_pipe(pipe_slow); %} diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad index 9fe92953a..70b3c5a9e 100644 --- a/src/cpu/x86/vm/x86_64.ad +++ b/src/cpu/x86/vm/x86_64.ad @@ -2591,231 +2591,6 @@ encode %{ %} - // obj: object to lock - // box: box address (header location) -- killed - // tmp: rax -- killed - // scr: rbx -- killed - // - // What follows is a direct transliteration of fast_lock() and fast_unlock() - // from i486.ad. See that file for comments. - // TODO: where possible switch from movq (r, 0) to movl(r,0) and - // use the shorter encoding. (Movl clears the high-order 32-bits). - - - enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr) - %{ - Register objReg = as_Register((int)$obj$$reg); - Register boxReg = as_Register((int)$box$$reg); - Register tmpReg = as_Register($tmp$$reg); - Register scrReg = as_Register($scr$$reg); - MacroAssembler masm(&cbuf); - - // Verify uniqueness of register assignments -- necessary but not sufficient - assert (objReg != boxReg && objReg != tmpReg && - objReg != scrReg && tmpReg != scrReg, "invariant") ; - - if (_counters != NULL) { - masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr())); - } - if (EmitSync & 1) { - // Without cast to int32_t a movptr will destroy r10 which is typically obj - masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; - masm.cmpptr(rsp, (int32_t)NULL_WORD) ; - } else - if (EmitSync & 2) { - Label DONE_LABEL; - if (UseBiasedLocking) { - // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. - masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); - } - // QQQ was movl... - masm.movptr(tmpReg, 0x1); - masm.orptr(tmpReg, Address(objReg, 0)); - masm.movptr(Address(boxReg, 0), tmpReg); - if (os::is_MP()) { - masm.lock(); - } - masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - masm.jcc(Assembler::equal, DONE_LABEL); - - // Recursive locking - masm.subptr(tmpReg, rsp); - masm.andptr(tmpReg, 7 - os::vm_page_size()); - masm.movptr(Address(boxReg, 0), tmpReg); - - masm.bind(DONE_LABEL); - masm.nop(); // avoid branch to branch - } else { - Label DONE_LABEL, IsInflated, Egress; - - masm.movptr(tmpReg, Address(objReg, 0)) ; - masm.testl (tmpReg, 0x02) ; // inflated vs stack-locked|neutral|biased - masm.jcc (Assembler::notZero, IsInflated) ; - - // it's stack-locked, biased or neutral - // TODO: optimize markword triage order to reduce the number of - // conditional branches in the most common cases. - // Beware -- there's a subtle invariant that fetch of the markword - // at [FETCH], below, will never observe a biased encoding (*101b). - // If this invariant is not held we'll suffer exclusion (safety) failure. - - if (UseBiasedLocking && !UseOptoBiasInlining) { - masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters); - masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH] - } - - // was q will it destroy high? - masm.orl (tmpReg, 1) ; - masm.movptr(Address(boxReg, 0), tmpReg) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - if (_counters != NULL) { - masm.cond_inc32(Assembler::equal, - ExternalAddress((address) _counters->fast_path_entry_count_addr())); - } - masm.jcc (Assembler::equal, DONE_LABEL); - - // Recursive locking - masm.subptr(tmpReg, rsp); - masm.andptr(tmpReg, 7 - os::vm_page_size()); - masm.movptr(Address(boxReg, 0), tmpReg); - if (_counters != NULL) { - masm.cond_inc32(Assembler::equal, - ExternalAddress((address) _counters->fast_path_entry_count_addr())); - } - masm.jmp (DONE_LABEL) ; - - masm.bind (IsInflated) ; - // It's inflated - - // TODO: someday avoid the ST-before-CAS penalty by - // relocating (deferring) the following ST. - // We should also think about trying a CAS without having - // fetched _owner. If the CAS is successful we may - // avoid an RTO->RTS upgrade on the $line. - // Without cast to int32_t a movptr will destroy r10 which is typically obj - masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; - - masm.mov (boxReg, tmpReg) ; - masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.testptr(tmpReg, tmpReg) ; - masm.jcc (Assembler::notZero, DONE_LABEL) ; - - // It's inflated and appears unlocked - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - // Intentional fall-through into DONE_LABEL ... - - masm.bind (DONE_LABEL) ; - masm.nop () ; // avoid jmp to jmp - } - %} - - // obj: object to unlock - // box: box address (displaced header location), killed - // RBX: killed tmp; cannot be obj nor box - enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp) - %{ - - Register objReg = as_Register($obj$$reg); - Register boxReg = as_Register($box$$reg); - Register tmpReg = as_Register($tmp$$reg); - MacroAssembler masm(&cbuf); - - if (EmitSync & 4) { - masm.cmpptr(rsp, 0) ; - } else - if (EmitSync & 8) { - Label DONE_LABEL; - if (UseBiasedLocking) { - masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - - // Check whether the displaced header is 0 - //(=> recursive unlock) - masm.movptr(tmpReg, Address(boxReg, 0)); - masm.testptr(tmpReg, tmpReg); - masm.jcc(Assembler::zero, DONE_LABEL); - - // If not recursive lock, reset the header to displaced header - if (os::is_MP()) { - masm.lock(); - } - masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box - masm.bind(DONE_LABEL); - masm.nop(); // avoid branch to branch - } else { - Label DONE_LABEL, Stacked, CheckSucc ; - - if (UseBiasedLocking && !UseOptoBiasInlining) { - masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - - masm.movptr(tmpReg, Address(objReg, 0)) ; - masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; - masm.jcc (Assembler::zero, DONE_LABEL) ; - masm.testl (tmpReg, 0x02) ; - masm.jcc (Assembler::zero, Stacked) ; - - // It's inflated - masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.xorptr(boxReg, r15_thread) ; - masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; - masm.jcc (Assembler::notZero, DONE_LABEL) ; - masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; - masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; - masm.jcc (Assembler::notZero, CheckSucc) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; - masm.jmp (DONE_LABEL) ; - - if ((EmitSync & 65536) == 0) { - Label LSuccess, LGoSlowPath ; - masm.bind (CheckSucc) ; - masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ; - masm.jcc (Assembler::zero, LGoSlowPath) ; - - // I'd much rather use lock:andl m->_owner, 0 as it's faster than the - // the explicit ST;MEMBAR combination, but masm doesn't currently support - // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc - // are all faster when the write buffer is populated. - masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; - if (os::is_MP()) { - masm.lock () ; masm.addl (Address(rsp, 0), 0) ; - } - masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ; - masm.jcc (Assembler::notZero, LSuccess) ; - - masm.movptr (boxReg, (int32_t)NULL_WORD) ; // box is really EAX - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); - masm.jcc (Assembler::notEqual, LSuccess) ; - // Intentional fall-through into slow-path - - masm.bind (LGoSlowPath) ; - masm.orl (boxReg, 1) ; // set ICC.ZF=0 to indicate failure - masm.jmp (DONE_LABEL) ; - - masm.bind (LSuccess) ; - masm.testl (boxReg, 0) ; // set ICC.ZF=1 to indicate success - masm.jmp (DONE_LABEL) ; - } - - masm.bind (Stacked) ; - masm.movptr(tmpReg, Address (boxReg, 0)) ; // re-fetch - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box - - if (EmitSync & 65536) { - masm.bind (CheckSucc) ; - } - masm.bind(DONE_LABEL); - if (EmitSync & 32768) { - masm.nop(); // avoid branch to branch - } - } - %} - - enc_class enc_rethrow() %{ cbuf.set_insts_mark(); @@ -11443,27 +11218,25 @@ instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{ // ============================================================================ // inlined locking and unlocking -instruct cmpFastLock(rFlagsReg cr, - rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) -%{ +instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{ match(Set cr (FastLock object box)); effect(TEMP tmp, TEMP scr, USE_KILL box); - ins_cost(300); format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %} - ins_encode(Fast_Lock(object, box, tmp, scr)); + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, _counters); + %} ins_pipe(pipe_slow); %} -instruct cmpFastUnlock(rFlagsReg cr, - rRegP object, rax_RegP box, rRegP tmp) -%{ +instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{ match(Set cr (FastUnlock object box)); effect(TEMP tmp, USE_KILL box); - ins_cost(300); format %{ "fastunlock $object,$box\t! kills $box,$tmp" %} - ins_encode(Fast_Unlock(object, box, tmp)); + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); + %} ins_pipe(pipe_slow); %} -- cgit v1.2.3 From c99d52bc593f6de469b992f121203fba94c84d9b Mon Sep 17 00:00:00 2001 From: dcubed Date: Wed, 5 Mar 2014 11:28:33 -0800 Subject: 8029775: Solaris code cleanup Summary: cleaned up warnings in solaris specific os code. Reviewed-by: coleenp, fparain, dcubed Contributed-by: gerald.thornbrugh@oracle.com --- src/os/solaris/vm/os_solaris.cpp | 25 +++++++++++++++++-------- src/os/solaris/vm/perfMemory_solaris.cpp | 20 +++++++++++++++----- src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp | 11 +++++++---- 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/os/solaris/vm/os_solaris.cpp b/src/os/solaris/vm/os_solaris.cpp index 533bbb766..650c2118a 100644 --- a/src/os/solaris/vm/os_solaris.cpp +++ b/src/os/solaris/vm/os_solaris.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2232,8 +2232,8 @@ static bool check_addr0(outputStream* st) { st->cr(); status = true; } - ::close(fd); } + ::close(fd); } return status; } @@ -2257,13 +2257,18 @@ const char *ill_names[] = { "ILL0", "ILL_ILLOPC", "ILL_ILLOPN", "ILL_ILLADR", "ILL_ILLTRP", "ILL_PRVOPC", "ILL_PRVREG", "ILL_COPROC", "ILL_BADSTK" }; +const size_t ill_names_length = (sizeof(ill_names)/sizeof(char *)); + const char *fpe_names[] = { "FPE0", "FPE_INTDIV", "FPE_INTOVF", "FPE_FLTDIV", "FPE_FLTOVF", "FPE_FLTUND", "FPE_FLTRES", "FPE_FLTINV", "FPE_FLTSUB" }; +const size_t fpe_names_length = (sizeof(fpe_names)/sizeof(char *)); const char *segv_names[] = { "SEGV0", "SEGV_MAPERR", "SEGV_ACCERR" }; +const size_t segv_names_length = (sizeof(segv_names)/sizeof(char *)); const char *bus_names[] = { "BUS0", "BUS_ADRALN", "BUS_ADRERR", "BUS_OBJERR" }; +const size_t bus_names_length = (sizeof(bus_names)/sizeof(char *)); void os::print_siginfo(outputStream* st, void* siginfo) { st->print("siginfo:"); @@ -2282,19 +2287,23 @@ void os::print_siginfo(outputStream* st, void* siginfo) { assert(c > 0, "unexpected si_code"); switch (si->si_signo) { case SIGILL: - st->print(", si_code=%d (%s)", c, c > 8 ? "" : ill_names[c]); + st->print(", si_code=%d (%s)", c, + c >= ill_names_length ? "" : ill_names[c]); st->print(", si_addr=" PTR_FORMAT, si->si_addr); break; case SIGFPE: - st->print(", si_code=%d (%s)", c, c > 9 ? "" : fpe_names[c]); + st->print(", si_code=%d (%s)", c, + c >= fpe_names_length ? "" : fpe_names[c]); st->print(", si_addr=" PTR_FORMAT, si->si_addr); break; case SIGSEGV: - st->print(", si_code=%d (%s)", c, c > 2 ? "" : segv_names[c]); + st->print(", si_code=%d (%s)", c, + c >= segv_names_length ? "" : segv_names[c]); st->print(", si_addr=" PTR_FORMAT, si->si_addr); break; case SIGBUS: - st->print(", si_code=%d (%s)", c, c > 3 ? "" : bus_names[c]); + st->print(", si_code=%d (%s)", c, + c >= bus_names_length ? "" : bus_names[c]); st->print(", si_addr=" PTR_FORMAT, si->si_addr); break; default: @@ -3011,7 +3020,7 @@ bool os::get_page_info(char *start, page_info* info) { char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info* page_found) { const uint_t info_types[] = { MEMINFO_VLGRP, MEMINFO_VPAGESIZE }; const size_t types = sizeof(info_types) / sizeof(info_types[0]); - uint64_t addrs[MAX_MEMINFO_CNT], outdata[types * MAX_MEMINFO_CNT]; + uint64_t addrs[MAX_MEMINFO_CNT], outdata[types * MAX_MEMINFO_CNT + 1]; uint_t validity[MAX_MEMINFO_CNT]; size_t page_size = MAX2((size_t)os::vm_page_size(), page_expected->size); @@ -3050,7 +3059,7 @@ char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info } } - if (i != addrs_count) { + if (i < addrs_count) { if ((validity[i] & 2) != 0) { page_found->lgrp_id = outdata[types * i]; } else { diff --git a/src/os/solaris/vm/perfMemory_solaris.cpp b/src/os/solaris/vm/perfMemory_solaris.cpp index e7b31ac64..1fa30012d 100644 --- a/src/os/solaris/vm/perfMemory_solaris.cpp +++ b/src/os/solaris/vm/perfMemory_solaris.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -431,10 +431,12 @@ static char* get_user_name(int vmid, TRAPS) { RESTARTABLE(::read(fd, addr, remaining), result); if (result == OS_ERR) { + ::close(fd); THROW_MSG_0(vmSymbols::java_io_IOException(), "Read error"); + } else { + remaining-=result; + addr+=result; } - remaining-=result; - addr+=result; } ::close(fd); @@ -906,8 +908,16 @@ static void mmap_attach_shared(const char* user, int vmid, PerfMemory::PerfMemor FREE_C_HEAP_ARRAY(char, filename, mtInternal); // open the shared memory file for the give vmid - fd = open_sharedmem_file(rfilename, file_flags, CHECK); - assert(fd != OS_ERR, "unexpected value"); + fd = open_sharedmem_file(rfilename, file_flags, THREAD); + + if (fd == OS_ERR) { + return; + } + + if (HAS_PENDING_EXCEPTION) { + ::close(fd); + return; + } if (*sizep == 0) { size = sharedmem_filesize(fd, CHECK); diff --git a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp index eb8cbe819..054a8132b 100644 --- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp +++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -475,9 +475,11 @@ JVM_handle_solaris_signal(int sig, siginfo_t* info, void* ucVoid, // here if the underlying file has been truncated. // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); - nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL; - if (nm != NULL && nm->has_unsafe_access()) { - stub = StubRoutines::handler_for_unsafe_access(); + if (cb != NULL) { + nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL; + if (nm != NULL && nm->has_unsafe_access()) { + stub = StubRoutines::handler_for_unsafe_access(); + } } } else @@ -724,6 +726,7 @@ JVM_handle_solaris_signal(int sig, siginfo_t* info, void* ucVoid, err.report_and_die(); ShouldNotReachHere(); + return false; } void os::print_context(outputStream *st, void *context) { -- cgit v1.2.3 From bc058ff85585a63718dfacaf818195b3c14000f2 Mon Sep 17 00:00:00 2001 From: iveresov Date: Mon, 10 Mar 2014 11:09:02 -0700 Subject: 8025644: java/util/stream/test/org/openjdk/tests/java/util/stream/ToArrayOpTest.java fails with TestData$OfRef): failure java.lang.AssertionError: expected [true] but found [false] Summary: In GraphKit::gen_checkcast() provide only exact superklass to GraphKit::maybe_cast_profiled_receiver() Reviewed-by: kvn, roland --- src/share/vm/opto/graphKit.cpp | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp index e7ec989c5..78f2c44e9 100644 --- a/src/share/vm/opto/graphKit.cpp +++ b/src/share/vm/opto/graphKit.cpp @@ -2994,22 +2994,28 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass, } Node* cast_obj = NULL; - const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr(); - // We may not have profiling here or it may not help us. If we have - // a speculative type use it to perform an exact cast. - ciKlass* spec_obj_type = obj_type->speculative_type(); - if (spec_obj_type != NULL || - (data != NULL && - // Counter has never been decremented (due to cast failure). - // ...This is a reasonable thing to expect. It is true of - // all casts inserted by javac to implement generic types. - data->as_CounterData()->count() >= 0)) { - cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace); - if (cast_obj != NULL) { - if (failure_control != NULL) // failure is now impossible - (*failure_control) = top(); - // adjust the type of the phi to the exact klass: - phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR)); + if (tk->klass_is_exact()) { + // The following optimization tries to statically cast the speculative type of the object + // (for example obtained during profiling) to the type of the superklass and then do a + // dynamic check that the type of the object is what we expect. To work correctly + // for checkcast and aastore the type of superklass should be exact. + const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr(); + // We may not have profiling here or it may not help us. If we have + // a speculative type use it to perform an exact cast. + ciKlass* spec_obj_type = obj_type->speculative_type(); + if (spec_obj_type != NULL || + (data != NULL && + // Counter has never been decremented (due to cast failure). + // ...This is a reasonable thing to expect. It is true of + // all casts inserted by javac to implement generic types. + data->as_CounterData()->count() >= 0)) { + cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace); + if (cast_obj != NULL) { + if (failure_control != NULL) // failure is now impossible + (*failure_control) = top(); + // adjust the type of the phi to the exact klass: + phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR)); + } } } -- cgit v1.2.3 From 3499309ff3ebac04f0a57bf8502035e241f55c09 Mon Sep 17 00:00:00 2001 From: iignatyev Date: Thu, 6 Mar 2014 12:47:45 +0400 Subject: 8027124: [TESTBUG] NonTieredLevelsTest: java.lang.RuntimeException: private TestCase$Helper(java.lang.Object) must be osr_compiled Reviewed-by: kvn, roland --- test/compiler/tiered/NonTieredLevelsTest.java | 5 ++++- test/compiler/tiered/TieredLevelsTest.java | 5 ++++- test/compiler/whitebox/CompilerWhiteBoxTest.java | 16 +++++++++++++++- test/compiler/whitebox/DeoptimizeAllTest.java | 9 +++------ test/compiler/whitebox/DeoptimizeMethodTest.java | 9 +++------ test/compiler/whitebox/IsMethodCompilableTest.java | 7 ++----- test/compiler/whitebox/MakeMethodNotCompilableTest.java | 9 +++------ 7 files changed, 34 insertions(+), 26 deletions(-) diff --git a/test/compiler/tiered/NonTieredLevelsTest.java b/test/compiler/tiered/NonTieredLevelsTest.java index 715d32a89..13411a0dd 100644 --- a/test/compiler/tiered/NonTieredLevelsTest.java +++ b/test/compiler/tiered/NonTieredLevelsTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -70,6 +70,9 @@ public class NonTieredLevelsTest extends CompLevelsTest { @Override protected void test() throws Exception { + if (skipXcompOSR()) { + return; + } checkNotCompiled(); compile(); checkCompiled(); diff --git a/test/compiler/tiered/TieredLevelsTest.java b/test/compiler/tiered/TieredLevelsTest.java index 675a39449..9fb2254d0 100644 --- a/test/compiler/tiered/TieredLevelsTest.java +++ b/test/compiler/tiered/TieredLevelsTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -51,6 +51,9 @@ public class TieredLevelsTest extends CompLevelsTest { @Override protected void test() throws Exception { + if (skipXcompOSR()) { + return; + } checkNotCompiled(); compile(); checkCompiled(); diff --git a/test/compiler/whitebox/CompilerWhiteBoxTest.java b/test/compiler/whitebox/CompilerWhiteBoxTest.java index e47231e29..450423c04 100644 --- a/test/compiler/whitebox/CompilerWhiteBoxTest.java +++ b/test/compiler/whitebox/CompilerWhiteBoxTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -380,6 +380,20 @@ public abstract class CompilerWhiteBoxTest { /** flag for OSR test case */ boolean isOsr(); } + + /** + * @return {@code true} if the current test case is OSR and the mode is + * Xcomp, otherwise {@code false} + */ + protected boolean skipXcompOSR() { + boolean result = testCase.isOsr() + && CompilerWhiteBoxTest.MODE.startsWith("compiled "); + if (result && IS_VERBOSE) { + System.err.printf("Warning: %s is not applicable in %s%n", + testCase.name(), CompilerWhiteBoxTest.MODE); + } + return result; + } } enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase { diff --git a/test/compiler/whitebox/DeoptimizeAllTest.java b/test/compiler/whitebox/DeoptimizeAllTest.java index 350c99c8f..ea4e36400 100644 --- a/test/compiler/whitebox/DeoptimizeAllTest.java +++ b/test/compiler/whitebox/DeoptimizeAllTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -51,11 +51,8 @@ public class DeoptimizeAllTest extends CompilerWhiteBoxTest { */ @Override protected void test() throws Exception { - if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith( - "compiled ")) { - System.err.printf("Warning: %s is not applicable in %s%n", - testCase.name(), CompilerWhiteBoxTest.MODE); - return; + if (skipXcompOSR()) { + return; } compile(); checkCompiled(); diff --git a/test/compiler/whitebox/DeoptimizeMethodTest.java b/test/compiler/whitebox/DeoptimizeMethodTest.java index 565a5b91c..0b9ffd2d9 100644 --- a/test/compiler/whitebox/DeoptimizeMethodTest.java +++ b/test/compiler/whitebox/DeoptimizeMethodTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -51,11 +51,8 @@ public class DeoptimizeMethodTest extends CompilerWhiteBoxTest { */ @Override protected void test() throws Exception { - if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith( - "compiled ")) { - System.err.printf("Warning: %s is not applicable in %s%n", - testCase.name(), CompilerWhiteBoxTest.MODE); - return; + if (skipXcompOSR()) { + return; } compile(); checkCompiled(); diff --git a/test/compiler/whitebox/IsMethodCompilableTest.java b/test/compiler/whitebox/IsMethodCompilableTest.java index bd5916a7c..0b7dc1786 100644 --- a/test/compiler/whitebox/IsMethodCompilableTest.java +++ b/test/compiler/whitebox/IsMethodCompilableTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -66,10 +66,7 @@ public class IsMethodCompilableTest extends CompilerWhiteBoxTest { */ @Override protected void test() throws Exception { - if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith( - "compiled ")) { - System.err.printf("Warning: %s is not applicable in %s%n", - testCase.name(), CompilerWhiteBoxTest.MODE); + if (skipXcompOSR()) { return; } if (!isCompilable()) { diff --git a/test/compiler/whitebox/MakeMethodNotCompilableTest.java b/test/compiler/whitebox/MakeMethodNotCompilableTest.java index cbd65da45..d65868797 100644 --- a/test/compiler/whitebox/MakeMethodNotCompilableTest.java +++ b/test/compiler/whitebox/MakeMethodNotCompilableTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,11 +53,8 @@ public class MakeMethodNotCompilableTest extends CompilerWhiteBoxTest { */ @Override protected void test() throws Exception { - if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith( - "compiled ")) { - System.err.printf("Warning: %s is not applicable in %s%n", - testCase.name(), CompilerWhiteBoxTest.MODE); - return; + if (skipXcompOSR()) { + return; } checkNotCompiled(); if (!isCompilable()) { -- cgit v1.2.3 From e1d1708217c11c8aa0e7ce6aa0d85afed268e5f7 Mon Sep 17 00:00:00 2001 From: iignatyev Date: Thu, 6 Mar 2014 12:46:04 +0400 Subject: 8027257: [TESTBUG] compiler/ciReplay/TestVM.sh : Error: Could not find or load main class negative_test Reviewed-by: roland, kvn --- test/compiler/ciReplay/TestVM.sh | 2 +- test/compiler/ciReplay/common.sh | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/test/compiler/ciReplay/TestVM.sh b/test/compiler/ciReplay/TestVM.sh index e6c3cc569..615446667 100644 --- a/test/compiler/ciReplay/TestVM.sh +++ b/test/compiler/ciReplay/TestVM.sh @@ -78,8 +78,8 @@ then positive_test `expr $stop_level + 50` "TIERED LEVEL $stop_level :: REPLAY" \ "-XX:TieredStopAtLevel=$stop_level" stop_level=`expr $stop_level + 1` + cleanup done - cleanup fi echo TEST PASSED diff --git a/test/compiler/ciReplay/common.sh b/test/compiler/ciReplay/common.sh index bcfdad823..34fd729a8 100644 --- a/test/compiler/ciReplay/common.sh +++ b/test/compiler/ciReplay/common.sh @@ -99,13 +99,12 @@ common_tests() { # $2 - non-tiered comp_level nontiered_tests() { level=`grep "^compile " $replay_data | awk '{print $6}'` - # is level available in non-tiere + # is level available in non-tiered if [ "$level" -eq $2 ] then positive_test $1 "NON-TIERED :: AVAILABLE COMP_LEVEL" \ -XX:-TieredCompilation else - negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \ negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \ -XX:-TieredCompilation fi -- cgit v1.2.3 From fcaf631b6b2f15c72ebb5f0a66ca6f39594ca711 Mon Sep 17 00:00:00 2001 From: iignatyev Date: Thu, 6 Mar 2014 12:45:59 +0400 Subject: 8028482: [TESTBUG] tests that use JMX should be in need_compact3 test group Reviewed-by: roland, sla, dholmes --- test/TEST.groups | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/TEST.groups b/test/TEST.groups index ee67fe938..017876be0 100644 --- a/test/TEST.groups +++ b/test/TEST.groups @@ -131,7 +131,9 @@ needs_compact3 = \ gc/arguments/TestG1HeapRegionSize.java \ gc/metaspace/TestMetaspaceMemoryPool.java \ runtime/InternalApi/ThreadCpuTimesDeadlock.java \ - serviceability/threads/TestFalseDeadLock.java + serviceability/threads/TestFalseDeadLock.java \ + compiler/tiered/NonTieredLevelsTest.java \ + compiler/tiered/TieredLevelsTest.java # Compact 2 adds full VM tests compact2 = \ -- cgit v1.2.3 From b7e6e4bd7f62e8aa53735c48d3566a226f3fab83 Mon Sep 17 00:00:00 2001 From: minqi Date: Wed, 26 Feb 2014 15:20:41 -0800 Subject: 6498581: ThreadInterruptTest3 produces wrong output on Windows Summary: There is race condition between os::interrupt and os::is_interrupted on Windows. In JVM_Sleep(Thread.sleep), check if thread gets interrupted, it may see interrupted but not really interrupted so cause spurious waking up (early return from sleep). Fix by checking if interrupt event really gets set thus prevent false return. For intrinsic of _isInterrupted, on Windows, go fastpath only on bit not set. Reviewed-by: acorn, kvn Contributed-by: david.holmes@oracle.com, yumin.qi@oracle.com --- src/os/windows/vm/os_windows.cpp | 5 +++-- src/share/vm/opto/library_call.cpp | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/os/windows/vm/os_windows.cpp b/src/os/windows/vm/os_windows.cpp index 4a540b66e..c09312143 100644 --- a/src/os/windows/vm/os_windows.cpp +++ b/src/os/windows/vm/os_windows.cpp @@ -3633,13 +3633,14 @@ bool os::is_interrupted(Thread* thread, bool clear_interrupted) { "possibility of dangling Thread pointer"); OSThread* osthread = thread->osthread(); - bool interrupted = osthread->interrupted(); // There is no synchronization between the setting of the interrupt // and it being cleared here. It is critical - see 6535709 - that // we only clear the interrupt state, and reset the interrupt event, // if we are going to report that we were indeed interrupted - else // an interrupt can be "lost", leading to spurious wakeups or lost wakeups - // depending on the timing + // depending on the timing. By checking thread interrupt event to see + // if the thread gets real interrupt thus prevent spurious wakeup. + bool interrupted = osthread->interrupted() && (WaitForSingleObject(osthread->interrupt_event(), 0) == WAIT_OBJECT_0); if (interrupted && clear_interrupted) { osthread->set_interrupted(false); ResetEvent(osthread->interrupt_event()); diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp index 564aa7c1f..1630c6881 100644 --- a/src/share/vm/opto/library_call.cpp +++ b/src/share/vm/opto/library_call.cpp @@ -3237,7 +3237,8 @@ bool LibraryCallKit::inline_native_currentThread() { // private native boolean java.lang.Thread.isInterrupted(boolean ClearInterrupted); bool LibraryCallKit::inline_native_isInterrupted() { // Add a fast path to t.isInterrupted(clear_int): - // (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int)) + // (t == Thread.current() && + // (!TLS._osthread._interrupted || WINDOWS_ONLY(false) NOT_WINDOWS(!clear_int))) // ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int) // So, in the common case that the interrupt bit is false, // we avoid making a call into the VM. Even if the interrupt bit @@ -3294,6 +3295,7 @@ bool LibraryCallKit::inline_native_isInterrupted() { // drop through to next case set_control( _gvn.transform(new (C) IfTrueNode(iff_bit))); +#ifndef TARGET_OS_FAMILY_windows // (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path. Node* clr_arg = argument(1); Node* cmp_arg = _gvn.transform(new (C) CmpINode(clr_arg, intcon(0))); @@ -3307,6 +3309,10 @@ bool LibraryCallKit::inline_native_isInterrupted() { // drop through to next case set_control( _gvn.transform(new (C) IfTrueNode(iff_arg))); +#else + // To return true on Windows you must read the _interrupted field + // and check the the event state i.e. take the slow path. +#endif // TARGET_OS_FAMILY_windows // (d) Otherwise, go to the slow path. slow_region->add_req(control()); -- cgit v1.2.3 From 51cff22235359c9cf24e690073ff0191ba582965 Mon Sep 17 00:00:00 2001 From: kvn Date: Wed, 5 Mar 2014 16:21:22 -0800 Subject: 8035983: Fix "Native frames:" in crash report (hs_err file) Summary: check fr.sender_sp() in java thread instead of os::is_first_C_frame(&fr). Reviewed-by: twisti, coleenp --- src/share/vm/utilities/vmError.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/share/vm/utilities/vmError.cpp b/src/share/vm/utilities/vmError.cpp index 4b0953ae4..e950d3386 100644 --- a/src/share/vm/utilities/vmError.cpp +++ b/src/share/vm/utilities/vmError.cpp @@ -592,13 +592,24 @@ void VMError::report(outputStream* st) { st->cr(); // Compiled code may use EBP register on x86 so it looks like // non-walkable C frame. Use frame.sender() for java frames. - if (_thread && _thread->is_Java_thread() && fr.is_java_frame()) { - RegisterMap map((JavaThread*)_thread, false); // No update - fr = fr.sender(&map); - continue; + if (_thread && _thread->is_Java_thread()) { + // Catch very first native frame by using stack address. + // For JavaThread stack_base and stack_size should be set. + if (!_thread->on_local_stack((address)(fr.sender_sp() + 1))) { + break; + } + if (fr.is_java_frame()) { + RegisterMap map((JavaThread*)_thread, false); // No update + fr = fr.sender(&map); + } else { + fr = os::get_sender_for_C_frame(&fr); + } + } else { + // is_first_C_frame() does only simple checks for frame pointer, + // it will pass if java compiled code has a pointer in EBP. + if (os::is_first_C_frame(&fr)) break; + fr = os::get_sender_for_C_frame(&fr); } - if (os::is_first_C_frame(&fr)) break; - fr = os::get_sender_for_C_frame(&fr); } if (count > StackPrintLimit) { -- cgit v1.2.3