aboutsummaryrefslogtreecommitdiff
path: root/src/share/vm/utilities/taskqueue.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/share/vm/utilities/taskqueue.hpp')
-rw-r--r--src/share/vm/utilities/taskqueue.hpp75
1 files changed, 65 insertions, 10 deletions
diff --git a/src/share/vm/utilities/taskqueue.hpp b/src/share/vm/utilities/taskqueue.hpp
index 0dcc7c262..8b558c988 100644
--- a/src/share/vm/utilities/taskqueue.hpp
+++ b/src/share/vm/utilities/taskqueue.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -135,6 +135,8 @@ void TaskQueueStats::reset() {
}
#endif // TASKQUEUE_STATS
+// TaskQueueSuper collects functionality common to all GenericTaskQueue instances.
+
template <unsigned int N, MEMFLAGS F>
class TaskQueueSuper: public CHeapObj<F> {
protected:
@@ -252,7 +254,36 @@ public:
TASKQUEUE_STATS_ONLY(TaskQueueStats stats;)
};
-
+//
+// GenericTaskQueue implements an ABP, Aurora-Blumofe-Plaxton, double-
+// ended-queue (deque), intended for use in work stealing. Queue operations
+// are non-blocking.
+//
+// A queue owner thread performs push() and pop_local() operations on one end
+// of the queue, while other threads may steal work using the pop_global()
+// method.
+//
+// The main difference to the original algorithm is that this
+// implementation allows wrap-around at the end of its allocated
+// storage, which is an array.
+//
+// The original paper is:
+//
+// Arora, N. S., Blumofe, R. D., and Plaxton, C. G.
+// Thread scheduling for multiprogrammed multiprocessors.
+// Theory of Computing Systems 34, 2 (2001), 115-144.
+//
+// The following paper provides an correctness proof and an
+// implementation for weakly ordered memory models including (pseudo-)
+// code containing memory barriers for a Chase-Lev deque. Chase-Lev is
+// similar to ABP, with the main difference that it allows resizing of the
+// underlying storage:
+//
+// Le, N. M., Pop, A., Cohen A., and Nardell, F. Z.
+// Correct and efficient work-stealing for weak memory models
+// Proceedings of the 18th ACM SIGPLAN symposium on Principles and
+// practice of parallel programming (PPoPP 2013), 69-80
+//
template <class E, MEMFLAGS F, unsigned int N = TASKQUEUE_SIZE>
class GenericTaskQueue: public TaskQueueSuper<N, F> {
@@ -343,8 +374,12 @@ bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) {
if (dirty_n_elems == N - 1) {
// Actually means 0, so do the push.
uint localBot = _bottom;
- // g++ complains if the volatile result of the assignment is unused.
- const_cast<E&>(_elems[localBot] = t);
+ // g++ complains if the volatile result of the assignment is
+ // unused, so we cast the volatile away. We cannot cast directly
+ // to void, because gcc treats that as not using the result of the
+ // assignment. However, casting to E& means that we trigger an
+ // unused-value warning. So, we cast the E& to void.
+ (void)const_cast<E&>(_elems[localBot] = t);
OrderAccess::release_store(&_bottom, increment_index(localBot));
TASKQUEUE_STATS_ONLY(stats.record_push());
return true;
@@ -394,13 +429,24 @@ bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) {
template<class E, MEMFLAGS F, unsigned int N>
bool GenericTaskQueue<E, F, N>::pop_global(E& t) {
Age oldAge = _age.get();
- uint localBot = _bottom;
+ // Architectures with weak memory model require a barrier here
+ // to guarantee that bottom is not older than age,
+ // which is crucial for the correctness of the algorithm.
+#if !(defined SPARC || defined IA32 || defined AMD64)
+ OrderAccess::fence();
+#endif
+ uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom);
uint n_elems = size(localBot, oldAge.top());
if (n_elems == 0) {
return false;
}
- const_cast<E&>(t = _elems[oldAge.top()]);
+ // g++ complains if the volatile result of the assignment is
+ // unused, so we cast the volatile away. We cannot cast directly
+ // to void, because gcc treats that as not using the result of the
+ // assignment. However, casting to E& means that we trigger an
+ // unused-value warning. So, we cast the E& to void.
+ (void) const_cast<E&>(t = _elems[oldAge.top()]);
Age newAge(oldAge);
newAge.increment();
Age resAge = _age.cmpxchg(newAge, oldAge);
@@ -638,13 +684,17 @@ public:
template<class E, MEMFLAGS F, unsigned int N> inline bool
GenericTaskQueue<E, F, N>::push(E t) {
uint localBot = _bottom;
- assert((localBot >= 0) && (localBot < N), "_bottom out of range.");
+ assert(localBot < N, "_bottom out of range.");
idx_t top = _age.top();
uint dirty_n_elems = dirty_size(localBot, top);
assert(dirty_n_elems < N, "n_elems out of range.");
if (dirty_n_elems < max_elems()) {
- // g++ complains if the volatile result of the assignment is unused.
- const_cast<E&>(_elems[localBot] = t);
+ // g++ complains if the volatile result of the assignment is
+ // unused, so we cast the volatile away. We cannot cast directly
+ // to void, because gcc treats that as not using the result of the
+ // assignment. However, casting to E& means that we trigger an
+ // unused-value warning. So, we cast the E& to void.
+ (void) const_cast<E&>(_elems[localBot] = t);
OrderAccess::release_store(&_bottom, increment_index(localBot));
TASKQUEUE_STATS_ONLY(stats.record_push());
return true;
@@ -668,7 +718,12 @@ GenericTaskQueue<E, F, N>::pop_local(E& t) {
// This is necessary to prevent any read below from being reordered
// before the store just above.
OrderAccess::fence();
- const_cast<E&>(t = _elems[localBot]);
+ // g++ complains if the volatile result of the assignment is
+ // unused, so we cast the volatile away. We cannot cast directly
+ // to void, because gcc treats that as not using the result of the
+ // assignment. However, casting to E& means that we trigger an
+ // unused-value warning. So, we cast the E& to void.
+ (void) const_cast<E&>(t = _elems[localBot]);
// This is a second read of "age"; the "size()" above is the first.
// If there's still at least one element in the queue, based on the
// "_bottom" and "age" we've read, then there can be no interference with