summaryrefslogtreecommitdiff
path: root/openmp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2019-01-03 17:43:46 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2019-01-03 17:43:46 +0000
commit5a2151d0cf22b60f07b46d1a16d6aa385de15834 (patch)
treeb97f4391181f5b55d81d16b1f8d983502e36e468 /openmp
parent481f30de556f9ffa8234ee3af543e479f94c31e4 (diff)
[OPENMP][NVPTX]Fix incompatibility of __syncthreads with LLVM, NFC.
Summary: One of the LLVM optimizations, split critical edges, also clones tail instructions. This is a dangerous operation for __syncthreads() functions and this transformation leads to undefined behavior or incorrect results. Patch fixes this problem by replacing __syncthreads() function with the assembler instruction, which cost is too high and wich cannot be copied. Reviewers: grokos, gtbercea, kkwli0 Subscribers: guansong, openmp-commits, caomhin Differential Revision: https://reviews.llvm.org/D56274
Diffstat (limited to 'openmp')
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu6
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu9
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h3
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu3
4 files changed, 15 insertions, 6 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index 9bd5cab6997..1f598ec9d2f 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -564,7 +564,8 @@ EXTERN void __kmpc_get_team_static_memory(const void *buf, size_t size,
if (GetThreadIdInBlock() == 0) {
*frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size);
}
- __syncthreads();
+ // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
+ __SYNCTHREADS();
return;
}
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == getMasterThreadId(),
@@ -577,7 +578,8 @@ EXTERN void __kmpc_restore_team_static_memory(int16_t is_shared) {
if (is_shared)
return;
if (isSPMDMode()) {
- __syncthreads();
+ // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
+ __SYNCTHREADS();
if (GetThreadIdInBlock() == 0) {
omptarget_nvptx_simpleMemoryManager.Release();
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
index 86746810547..dd5178670f8 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
@@ -105,7 +105,8 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
omptarget_nvptx_simpleThreadPrivateContext =
omptarget_nvptx_device_simpleState[slot].Dequeue();
}
- __syncthreads();
+ // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
+ __SYNCTHREADS();
omptarget_nvptx_simpleThreadPrivateContext->Init();
return;
}
@@ -129,7 +130,8 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
// init team context
currTeamDescr.InitTeamDescr();
}
- __syncthreads();
+ // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
+ __SYNCTHREADS();
omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
@@ -170,7 +172,8 @@ EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit() {
EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
// We're not going to pop the task descr stack of each thread since
// there are no more parallel regions in SPMD mode.
- __syncthreads();
+ // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
+ __SYNCTHREADS();
int threadId = GetThreadIdInBlock();
if (!RequiresOMPRuntime) {
if (threadId == 0) {
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index b63feaefbdd..6539756d9f4 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -63,6 +63,9 @@
#define __ACTIVEMASK() __ballot(1)
#endif
+#define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
+#define __SYNCTHREADS() __SYNCTHREADS_N(0)
+
// arguments needed for L0 parallelism only.
class omptarget_nvptx_SharedArgs {
public:
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
index 7cdb7ffb12a..5f6aef94d95 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -74,7 +74,8 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
// parallel region and that all worker threads participate.
EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
- __syncthreads();
+ // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
+ __SYNCTHREADS();
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
}