diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2019-01-03 17:43:46 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2019-01-03 17:43:46 +0000 |
commit | c0c737ac56d8031e83118d231bb273a54493a489 (patch) | |
tree | 04af855e820f8941f345569523861664e46624d3 /libomptarget/deviceRTLs | |
parent | 270adfa61aae21ac9c1a643f6f4a39a50601e6e2 (diff) |
[OPENMP][NVPTX]Fix incompatibility of __syncthreads with LLVM, NFC.
Summary:
One of the LLVM optimizations, split critical edges, also clones tail
instructions. This is a dangerous operation for __syncthreads()
functions and this transformation leads to undefined behavior or
incorrect results. Patch fixes this problem by replacing __syncthreads()
function with the assembler instruction, which cost is too high and
wich cannot be copied.
Reviewers: grokos, gtbercea, kkwli0
Subscribers: guansong, openmp-commits, caomhin
Differential Revision: https://reviews.llvm.org/D56274
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@350333 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget/deviceRTLs')
4 files changed, 15 insertions, 6 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu index 9bd5cab..1f598ec 100644 --- a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -564,7 +564,8 @@ EXTERN void __kmpc_get_team_static_memory(const void *buf, size_t size, if (GetThreadIdInBlock() == 0) { *frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size); } - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); return; } ASSERT0(LT_FUSSY, GetThreadIdInBlock() == getMasterThreadId(), @@ -577,7 +578,8 @@ EXTERN void __kmpc_restore_team_static_memory(int16_t is_shared) { if (is_shared) return; if (isSPMDMode()) { - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); if (GetThreadIdInBlock() == 0) { omptarget_nvptx_simpleMemoryManager.Release(); } diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu index 8674681..dd51786 100644 --- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -105,7 +105,8 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, omptarget_nvptx_simpleThreadPrivateContext = omptarget_nvptx_device_simpleState[slot].Dequeue(); } - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); omptarget_nvptx_simpleThreadPrivateContext->Init(); return; } @@ -129,7 +130,8 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, // init team context currTeamDescr.InitTeamDescr(); } - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor(); omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor(); @@ -170,7 +172,8 @@ EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit() { EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) { // We're not going to pop the task descr stack of each thread since // there are no more parallel regions in SPMD mode. - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); int threadId = GetThreadIdInBlock(); if (!RequiresOMPRuntime) { if (threadId == 0) { diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h index b63feae..6539756 100644 --- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -63,6 +63,9 @@ #define __ACTIVEMASK() __ballot(1) #endif +#define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory"); +#define __SYNCTHREADS() __SYNCTHREADS_N(0) + // arguments needed for L0 parallelism only. class omptarget_nvptx_SharedArgs { public: diff --git a/libomptarget/deviceRTLs/nvptx/src/sync.cu b/libomptarget/deviceRTLs/nvptx/src/sync.cu index 7cdb7ff..5f6aef9 100644 --- a/libomptarget/deviceRTLs/nvptx/src/sync.cu +++ b/libomptarget/deviceRTLs/nvptx/src/sync.cu @@ -74,7 +74,8 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) { // parallel region and that all worker threads participate. EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) { PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n"); - __syncthreads(); + // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. + __SYNCTHREADS(); PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n"); } |