aboutsummaryrefslogtreecommitdiff
path: root/libomptarget
diff options
context:
space:
mode:
authorJon Chesterfield <jonathanchesterfield@gmail.com>2019-08-28 14:22:35 +0000
committerJon Chesterfield <jonathanchesterfield@gmail.com>2019-08-28 14:22:35 +0000
commitee031606128fec949b4f90152e25c37fdab13a8c (patch)
treee16724f50b1edfb4e8de86c97abdad879ceba304 /libomptarget
parent7ed5372fe0c8ae8328b2c8274d88cf3762b3b776 (diff)
[libomptarget] Refactor syncthreads macro to inline function
Summary: [libomptarget] Refactor syncthreads macro to inline function See also abandoned D66846, split into this diff and others. Rev 2 of D66855 Reviewers: jdoerfert, ABataev, grokos, ronlieb, gregrodgers Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D66861 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@370210 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget')
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/data_sharing.cu6
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu9
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h8
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/sync.cu3
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/target_impl.h9
5 files changed, 16 insertions, 19 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index b7b8002..5463adc 100644
--- a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -553,8 +553,7 @@ EXTERN void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
if (GetThreadIdInBlock() == 0) {
*frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size);
}
- // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
- __SYNCTHREADS();
+ __kmpc_impl_syncthreads();
return;
}
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
@@ -568,8 +567,7 @@ EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
if (is_shared)
return;
if (isSPMDExecutionMode) {
- // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
- __SYNCTHREADS();
+ __kmpc_impl_syncthreads();
if (GetThreadIdInBlock() == 0) {
omptarget_nvptx_simpleMemoryManager.Release();
}
diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
index 706776a..c84c055 100644
--- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "omptarget-nvptx.h"
+#include "target_impl.h"
////////////////////////////////////////////////////////////////////////////////
// global data tables
@@ -106,7 +107,7 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
}
if (!RequiresOMPRuntime) {
// Runtime is not required - exit.
- __SYNCTHREADS();
+ __kmpc_impl_syncthreads();
return;
}
@@ -125,8 +126,7 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
// init team context
currTeamDescr.InitTeamDescr();
}
- // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
- __SYNCTHREADS();
+ __kmpc_impl_syncthreads();
omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
@@ -168,8 +168,7 @@ EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
if (!RequiresOMPRuntime)
return;
- // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
- __SYNCTHREADS();
+ __kmpc_impl_syncthreads();
int threadId = GetThreadIdInBlock();
if (threadId == 0) {
// Enqueue omp state object for use by another team.
diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 5519f64..f5eb00b 100644
--- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -56,14 +56,6 @@
#define __ACTIVEMASK() __ballot(1)
#endif // CUDA_VERSION
-#define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
-// Use original __syncthreads if compiled by nvcc or clang >= 9.0.
-#if !defined(__clang__) || __clang_major__ >= 9
-#define __SYNCTHREADS() __syncthreads()
-#else
-#define __SYNCTHREADS() __SYNCTHREADS_N(0)
-#endif
-
// arguments needed for L0 parallelism only.
class omptarget_nvptx_SharedArgs {
public:
diff --git a/libomptarget/deviceRTLs/nvptx/src/sync.cu b/libomptarget/deviceRTLs/nvptx/src/sync.cu
index fcfe272..3ce695c 100644
--- a/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -75,8 +75,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
// parallel region and that all worker threads participate.
EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
- // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
- __SYNCTHREADS();
+ __kmpc_impl_syncthreads();
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
}
diff --git a/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 91883ea..b9f930d 100644
--- a/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -63,6 +63,15 @@ INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
#endif // CUDA_VERSION
}
+INLINE void __kmpc_impl_syncthreads() {
+ // Use original __syncthreads if compiled by nvcc or clang >= 9.0.
+#if !defined(__clang__) || __clang_major__ >= 9
+ __syncthreads();
+#else
+ asm volatile("bar.sync %0;" : : "r"(0) : "memory");
+#endif // __clang__
+}
+
INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
#if CUDA_VERSION >= 9000
__syncwarp(Mask);