diff options
Diffstat (limited to 'src/share/vm/utilities/taskqueue.hpp')
-rw-r--r-- | src/share/vm/utilities/taskqueue.hpp | 75 |
1 files changed, 65 insertions, 10 deletions
diff --git a/src/share/vm/utilities/taskqueue.hpp b/src/share/vm/utilities/taskqueue.hpp index 0dcc7c262..8b558c988 100644 --- a/src/share/vm/utilities/taskqueue.hpp +++ b/src/share/vm/utilities/taskqueue.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -135,6 +135,8 @@ void TaskQueueStats::reset() { } #endif // TASKQUEUE_STATS +// TaskQueueSuper collects functionality common to all GenericTaskQueue instances. + template <unsigned int N, MEMFLAGS F> class TaskQueueSuper: public CHeapObj<F> { protected: @@ -252,7 +254,36 @@ public: TASKQUEUE_STATS_ONLY(TaskQueueStats stats;) }; - +// +// GenericTaskQueue implements an ABP, Aurora-Blumofe-Plaxton, double- +// ended-queue (deque), intended for use in work stealing. Queue operations +// are non-blocking. +// +// A queue owner thread performs push() and pop_local() operations on one end +// of the queue, while other threads may steal work using the pop_global() +// method. +// +// The main difference to the original algorithm is that this +// implementation allows wrap-around at the end of its allocated +// storage, which is an array. +// +// The original paper is: +// +// Arora, N. S., Blumofe, R. D., and Plaxton, C. G. +// Thread scheduling for multiprogrammed multiprocessors. +// Theory of Computing Systems 34, 2 (2001), 115-144. +// +// The following paper provides an correctness proof and an +// implementation for weakly ordered memory models including (pseudo-) +// code containing memory barriers for a Chase-Lev deque. Chase-Lev is +// similar to ABP, with the main difference that it allows resizing of the +// underlying storage: +// +// Le, N. M., Pop, A., Cohen A., and Nardell, F. Z. +// Correct and efficient work-stealing for weak memory models +// Proceedings of the 18th ACM SIGPLAN symposium on Principles and +// practice of parallel programming (PPoPP 2013), 69-80 +// template <class E, MEMFLAGS F, unsigned int N = TASKQUEUE_SIZE> class GenericTaskQueue: public TaskQueueSuper<N, F> { @@ -343,8 +374,12 @@ bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) { if (dirty_n_elems == N - 1) { // Actually means 0, so do the push. uint localBot = _bottom; - // g++ complains if the volatile result of the assignment is unused. - const_cast<E&>(_elems[localBot] = t); + // g++ complains if the volatile result of the assignment is + // unused, so we cast the volatile away. We cannot cast directly + // to void, because gcc treats that as not using the result of the + // assignment. However, casting to E& means that we trigger an + // unused-value warning. So, we cast the E& to void. + (void)const_cast<E&>(_elems[localBot] = t); OrderAccess::release_store(&_bottom, increment_index(localBot)); TASKQUEUE_STATS_ONLY(stats.record_push()); return true; @@ -394,13 +429,24 @@ bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) { template<class E, MEMFLAGS F, unsigned int N> bool GenericTaskQueue<E, F, N>::pop_global(E& t) { Age oldAge = _age.get(); - uint localBot = _bottom; + // Architectures with weak memory model require a barrier here + // to guarantee that bottom is not older than age, + // which is crucial for the correctness of the algorithm. +#if !(defined SPARC || defined IA32 || defined AMD64) + OrderAccess::fence(); +#endif + uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom); uint n_elems = size(localBot, oldAge.top()); if (n_elems == 0) { return false; } - const_cast<E&>(t = _elems[oldAge.top()]); + // g++ complains if the volatile result of the assignment is + // unused, so we cast the volatile away. We cannot cast directly + // to void, because gcc treats that as not using the result of the + // assignment. However, casting to E& means that we trigger an + // unused-value warning. So, we cast the E& to void. + (void) const_cast<E&>(t = _elems[oldAge.top()]); Age newAge(oldAge); newAge.increment(); Age resAge = _age.cmpxchg(newAge, oldAge); @@ -638,13 +684,17 @@ public: template<class E, MEMFLAGS F, unsigned int N> inline bool GenericTaskQueue<E, F, N>::push(E t) { uint localBot = _bottom; - assert((localBot >= 0) && (localBot < N), "_bottom out of range."); + assert(localBot < N, "_bottom out of range."); idx_t top = _age.top(); uint dirty_n_elems = dirty_size(localBot, top); assert(dirty_n_elems < N, "n_elems out of range."); if (dirty_n_elems < max_elems()) { - // g++ complains if the volatile result of the assignment is unused. - const_cast<E&>(_elems[localBot] = t); + // g++ complains if the volatile result of the assignment is + // unused, so we cast the volatile away. We cannot cast directly + // to void, because gcc treats that as not using the result of the + // assignment. However, casting to E& means that we trigger an + // unused-value warning. So, we cast the E& to void. + (void) const_cast<E&>(_elems[localBot] = t); OrderAccess::release_store(&_bottom, increment_index(localBot)); TASKQUEUE_STATS_ONLY(stats.record_push()); return true; @@ -668,7 +718,12 @@ GenericTaskQueue<E, F, N>::pop_local(E& t) { // This is necessary to prevent any read below from being reordered // before the store just above. OrderAccess::fence(); - const_cast<E&>(t = _elems[localBot]); + // g++ complains if the volatile result of the assignment is + // unused, so we cast the volatile away. We cannot cast directly + // to void, because gcc treats that as not using the result of the + // assignment. However, casting to E& means that we trigger an + // unused-value warning. So, we cast the E& to void. + (void) const_cast<E&>(t = _elems[localBot]); // This is a second read of "age"; the "size()" above is the first. // If there's still at least one element in the queue, based on the // "_bottom" and "age" we've read, then there can be no interference with |