aboutsummaryrefslogtreecommitdiff
path: root/libomptarget/deviceRTLs
diff options
context:
space:
mode:
authorGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2018-09-25 13:23:32 +0000
committerGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2018-09-25 13:23:32 +0000
commite1688b470b8ef3441983d333a6642e7b19d29b90 (patch)
treee4d3b9fa9301c29a77f15eb9a22edd178746d92a /libomptarget/deviceRTLs
parente5a629ee54bdaf95e015fb991c9e1c5111a9ddb6 (diff)
[OpenMP][libomptarget] Simplify warp master selection for data sharing
Summary: There is currently no supported situation where the warp master is not the first thread in the warp. This also avoids the device execution from hanging on Volta GPUs when ballot_sync is called by a number of threads that is less that the size of a warp. Reviewers: ABataev, caomhin, grokos Reviewed By: grokos Subscribers: guansong, openmp-commits Differential Revision: https://reviews.llvm.org/D50188 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@342972 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget/deviceRTLs')
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/data_sharing.cu4
1 files changed, 2 insertions, 2 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index 2b3a90f..6e1548b 100644
--- a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -389,7 +389,7 @@ EXTERN void* __kmpc_data_sharing_push_stack(size_t DataSize,
void *&FrameP = DataSharingState.FramePtr[WID];
// Only warp active master threads manage the stack.
- if (IsWarpMasterActiveThread()) {
+ if (getThreadId() % WARPSIZE == 0) {
// SlotP will point to either the shared memory slot or an existing
// global memory slot.
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
@@ -468,7 +468,7 @@ EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) {
return omptarget_nvptx_SimpleThreadPrivateContext::Deallocate(FrameStart);
}
- if (IsWarpMasterActiveThread()) {
+ if (getThreadId() % WARPSIZE == 0) {
unsigned WID = getWarpId();
// Current slot