diff options
author | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2019-08-28 14:22:35 +0000 |
---|---|---|
committer | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2019-08-28 14:22:35 +0000 |
commit | ee031606128fec949b4f90152e25c37fdab13a8c (patch) | |
tree | e16724f50b1edfb4e8de86c97abdad879ceba304 /libomptarget | |
parent | 7ed5372fe0c8ae8328b2c8274d88cf3762b3b776 (diff) |
[libomptarget] Refactor syncthreads macro to inline function
Summary:
[libomptarget] Refactor syncthreads macro to inline function
See also abandoned D66846, split into this diff and others.
Rev 2 of D66855
Reviewers: jdoerfert, ABataev, grokos, ronlieb, gregrodgers
Subscribers: openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D66861
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@370210 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget')
5 files changed, 16 insertions, 19 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu index b7b8002..5463adc 100644 --- a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -553,8 +553,7 @@ EXTERN void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, if (GetThreadIdInBlock() == 0) { *frame = omptarget_nvptx_simpleMemoryManager.Acquire(buf, size); } - // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. - __SYNCTHREADS(); + __kmpc_impl_syncthreads(); return; } ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(), @@ -568,8 +567,7 @@ EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode, if (is_shared) return; if (isSPMDExecutionMode) { - // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. - __SYNCTHREADS(); + __kmpc_impl_syncthreads(); if (GetThreadIdInBlock() == 0) { omptarget_nvptx_simpleMemoryManager.Release(); } diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu index 706776a..c84c055 100644 --- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "omptarget-nvptx.h" +#include "target_impl.h" //////////////////////////////////////////////////////////////////////////////// // global data tables @@ -106,7 +107,7 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, } if (!RequiresOMPRuntime) { // Runtime is not required - exit. - __SYNCTHREADS(); + __kmpc_impl_syncthreads(); return; } @@ -125,8 +126,7 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, // init team context currTeamDescr.InitTeamDescr(); } - // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. - __SYNCTHREADS(); + __kmpc_impl_syncthreads(); omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor(); omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor(); @@ -168,8 +168,7 @@ EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) { if (!RequiresOMPRuntime) return; - // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. - __SYNCTHREADS(); + __kmpc_impl_syncthreads(); int threadId = GetThreadIdInBlock(); if (threadId == 0) { // Enqueue omp state object for use by another team. diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h index 5519f64..f5eb00b 100644 --- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -56,14 +56,6 @@ #define __ACTIVEMASK() __ballot(1) #endif // CUDA_VERSION -#define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory"); -// Use original __syncthreads if compiled by nvcc or clang >= 9.0. -#if !defined(__clang__) || __clang_major__ >= 9 -#define __SYNCTHREADS() __syncthreads() -#else -#define __SYNCTHREADS() __SYNCTHREADS_N(0) -#endif - // arguments needed for L0 parallelism only. class omptarget_nvptx_SharedArgs { public: diff --git a/libomptarget/deviceRTLs/nvptx/src/sync.cu b/libomptarget/deviceRTLs/nvptx/src/sync.cu index fcfe272..3ce695c 100644 --- a/libomptarget/deviceRTLs/nvptx/src/sync.cu +++ b/libomptarget/deviceRTLs/nvptx/src/sync.cu @@ -75,8 +75,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) { // parallel region and that all worker threads participate. EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) { PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n"); - // FIXME: use __syncthreads instead when the function copy is fixed in LLVM. - __SYNCTHREADS(); + __kmpc_impl_syncthreads(); PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n"); } diff --git a/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/libomptarget/deviceRTLs/nvptx/src/target_impl.h index 91883ea..b9f930d 100644 --- a/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -63,6 +63,15 @@ INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask, #endif // CUDA_VERSION } +INLINE void __kmpc_impl_syncthreads() { + // Use original __syncthreads if compiled by nvcc or clang >= 9.0. +#if !defined(__clang__) || __clang_major__ >= 9 + __syncthreads(); +#else + asm volatile("bar.sync %0;" : : "r"(0) : "memory"); +#endif // __clang__ +} + INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) { #if CUDA_VERSION >= 9000 __syncwarp(Mask); |