aboutsummaryrefslogtreecommitdiff
path: root/libomptarget/deviceRTLs/nvptx/src/parallel.cu
diff options
context:
space:
mode:
Diffstat (limited to 'libomptarget/deviceRTLs/nvptx/src/parallel.cu')
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/parallel.cu26
1 files changed, 22 insertions, 4 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
index d8049d9..5db443c 100644
--- a/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -311,7 +311,16 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
(int)newTaskDescr->ThreadId(), (int)nThreads);
isActive = true;
- IncParallelLevel(threadsInTeam != 1);
+ // Reconverge the threads at the end of the parallel region to correctly
+ // handle parallel levels.
+ // In Cuda9+ in non-SPMD mode we have either 1 worker thread or the whole
+ // warp. If only 1 thread is active, not need to reconverge the threads.
+ // If we have the whole warp, reconverge all the threads in the warp before
+ // actually trying to change the parallel level. Otherwise, parallel level
+ // can be changed incorrectly because of threads divergence.
+ bool IsActiveParallelRegion = threadsInTeam != 1;
+ IncParallelLevel(IsActiveParallelRegion,
+ IsActiveParallelRegion ? 0xFFFFFFFF : 1u);
}
return isActive;
@@ -329,7 +338,16 @@ EXTERN void __kmpc_kernel_end_parallel() {
omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(
threadId, currTaskDescr->GetPrevTaskDescr());
- DecParallelLevel(threadsInTeam != 1);
+ // Reconverge the threads at the end of the parallel region to correctly
+ // handle parallel levels.
+ // In Cuda9+ in non-SPMD mode we have either 1 worker thread or the whole
+ // warp. If only 1 thread is active, not need to reconverge the threads.
+ // If we have the whole warp, reconverge all the threads in the warp before
+ // actually trying to change the parallel level. Otherwise, parallel level can
+ // be changed incorrectly because of threads divergence.
+ bool IsActiveParallelRegion = threadsInTeam != 1;
+ DecParallelLevel(IsActiveParallelRegion,
+ IsActiveParallelRegion ? 0xFFFFFFFF : 1u);
}
////////////////////////////////////////////////////////////////////////////////
@@ -339,7 +357,7 @@ EXTERN void __kmpc_kernel_end_parallel() {
EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_serialized_parallel\n");
- IncParallelLevel(/*ActiveParallel=*/false);
+ IncParallelLevel(/*ActiveParallel=*/false, __kmpc_impl_activemask());
if (checkRuntimeUninitialized(loc)) {
ASSERT0(LT_FUSSY, checkSPMDMode(loc),
@@ -378,7 +396,7 @@ EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_end_serialized_parallel\n");
- DecParallelLevel(/*ActiveParallel=*/false);
+ DecParallelLevel(/*ActiveParallel=*/false, __kmpc_impl_activemask());
if (checkRuntimeUninitialized(loc)) {
ASSERT0(LT_FUSSY, checkSPMDMode(loc),