summaryrefslogtreecommitdiff
path: root/openmp/libomptarget
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-12-28 17:31:06 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-12-28 17:31:06 +0000
commitcfbd1ff02f66b254ee6b0b79ff04ce6c91f72ff1 (patch)
treec4eb79788348a124013a95f38b7f05192cfa74d4 /openmp/libomptarget
parenta6eb216e054b3d6a008534d263d4bf2bbad9bb52 (diff)
[OPENMP][NVPTX]Fixed initialization of the data-sharing interface.
Summary: Avoid using of the atomic loop to wait for the completion of the data-sharing interface initialization, use __shfl_sync instead for the communication within the warp to signal other threads in the warp about completion of the initialization. Reviewers: gtbercea, kkwli0, grokos Subscribers: guansong, jfb, caomhin, openmp-commits Differential Revision: https://reviews.llvm.org/D56100
Diffstat (limited to 'openmp/libomptarget')
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu15
1 files changed, 9 insertions, 6 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index f69daa172fa..9bd5cab6997 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -390,8 +390,9 @@ INLINE void* data_sharing_push_stack_common(size_t PushSize) {
PushSize = (PushSize + (Alignment - 1)) / Alignment * Alignment;
// Frame pointer must be visible to all workers in the same warp.
- unsigned WID = getWarpId();
- void *volatile &FrameP = DataSharingState.FramePtr[WID];
+ const unsigned WID = getWarpId();
+ void *FrameP = 0;
+ const int32_t CurActive = getActiveThreadsMask();
if (IsWarpMaster) {
// SlotP will point to either the shared memory slot or an existing
@@ -434,17 +435,19 @@ INLINE void* data_sharing_push_stack_common(size_t PushSize) {
// The stack pointer always points to the next free stack frame.
StackP = &NewSlot->Data[0] + PushSize;
// The frame pointer always points to the beginning of the frame.
- FrameP = &NewSlot->Data[0];
+ FrameP = DataSharingState.FramePtr[WID] = &NewSlot->Data[0];
} else {
// Add the data chunk to the current slot. The frame pointer is set to
// point to the start of the new frame held in StackP.
- FrameP = StackP;
+ FrameP = DataSharingState.FramePtr[WID] = StackP;
// Reset stack pointer to the requested address.
StackP = (void *)RequestedEndAddress;
}
- } else {
- while (!FrameP);
}
+ // Get address from lane 0.
+ ((int *)&FrameP)[0] = __SHFL_SYNC(CurActive, ((int *)&FrameP)[0], 0);
+ if (sizeof(FrameP) == 8)
+ ((int *)&FrameP)[1] = __SHFL_SYNC(CurActive, ((int *)&FrameP)[1], 0);
return FrameP;
}