diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2018-12-28 17:31:06 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2018-12-28 17:31:06 +0000 |
commit | cfbd1ff02f66b254ee6b0b79ff04ce6c91f72ff1 (patch) | |
tree | c4eb79788348a124013a95f38b7f05192cfa74d4 /openmp/libomptarget | |
parent | a6eb216e054b3d6a008534d263d4bf2bbad9bb52 (diff) |
[OPENMP][NVPTX]Fixed initialization of the data-sharing interface.
Summary:
Avoid using of the atomic loop to wait for the completion of the
data-sharing interface initialization, use __shfl_sync instead for the
communication within the warp to signal other threads in the warp about
completion of the initialization.
Reviewers: gtbercea, kkwli0, grokos
Subscribers: guansong, jfb, caomhin, openmp-commits
Differential Revision: https://reviews.llvm.org/D56100
Diffstat (limited to 'openmp/libomptarget')
-rw-r--r-- | openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu index f69daa172fa..9bd5cab6997 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -390,8 +390,9 @@ INLINE void* data_sharing_push_stack_common(size_t PushSize) { PushSize = (PushSize + (Alignment - 1)) / Alignment * Alignment; // Frame pointer must be visible to all workers in the same warp. - unsigned WID = getWarpId(); - void *volatile &FrameP = DataSharingState.FramePtr[WID]; + const unsigned WID = getWarpId(); + void *FrameP = 0; + const int32_t CurActive = getActiveThreadsMask(); if (IsWarpMaster) { // SlotP will point to either the shared memory slot or an existing @@ -434,17 +435,19 @@ INLINE void* data_sharing_push_stack_common(size_t PushSize) { // The stack pointer always points to the next free stack frame. StackP = &NewSlot->Data[0] + PushSize; // The frame pointer always points to the beginning of the frame. - FrameP = &NewSlot->Data[0]; + FrameP = DataSharingState.FramePtr[WID] = &NewSlot->Data[0]; } else { // Add the data chunk to the current slot. The frame pointer is set to // point to the start of the new frame held in StackP. - FrameP = StackP; + FrameP = DataSharingState.FramePtr[WID] = StackP; // Reset stack pointer to the requested address. StackP = (void *)RequestedEndAddress; } - } else { - while (!FrameP); } + // Get address from lane 0. + ((int *)&FrameP)[0] = __SHFL_SYNC(CurActive, ((int *)&FrameP)[0], 0); + if (sizeof(FrameP) == 8) + ((int *)&FrameP)[1] = __SHFL_SYNC(CurActive, ((int *)&FrameP)[1], 0); return FrameP; } |