summaryrefslogtreecommitdiff
path: root/openmp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-12-10 14:29:05 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-12-10 14:29:05 +0000
commit2586b22bb78b3a95bb3e8cfb36b6f6f540fb3324 (patch)
tree329056f18bea28e88a51d30275d76a968216ac57 /openmp
parent1086b72b83ad7aa10cb9e8709356a9d9ac83f599 (diff)
[OPENMP][NVPTX]Enable fast shuffles on 64bit values only if CUDA >= 9.
Summary: Shuffle on 64bit data is allowed only for CUDA >= 9.0. Also, fixed the constant for the mask, need one extra L in the end. Reviewers: gtbercea, kkwli0 Subscribers: guansong, caomhin, openmp-commits Differential Revision: https://reviews.llvm.org/D55440
Diffstat (limited to 'openmp')
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu12
1 files changed, 11 insertions, 1 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index a05a6e016f9..2546302bc42 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -76,7 +76,17 @@ EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) {
}
EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) {
- return __SHFL_DOWN_SYNC(0xFFFFFFFFFFFFFFFFL, val, delta, size);
+#if defined(CUDART_VERSION) && CUDART_VERSION >= 9000
+ return __SHFL_DOWN_SYNC(0xFFFFFFFFFFFFFFFFLL, (long long)val, (unsigned)delta,
+ (int)size);
+#else
+ int lo, hi;
+ asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
+ hi = __SHFL_DOWN_SYNC(0xFFFFFFFF, hi, delta, size);
+ lo = __SHFL_DOWN_SYNC(0xFFFFFFFF, lo, delta, size);
+ asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
+ return val;
+#endif
}
static INLINE void gpu_regular_warp_reduce(void *reduce_data,