diff options
author | Jonas Hahnfeld <hahnjo@hahnjo.de> | 2018-10-01 14:14:26 +0000 |
---|---|---|
committer | Jonas Hahnfeld <hahnjo@hahnjo.de> | 2018-10-01 14:14:26 +0000 |
commit | 45b8920a309576d74523c17555b273e4e6884fc9 (patch) | |
tree | 76d71cf5da32c8e99431e8b95b262f4a8e1027ec | |
parent | 88dce083137387276b6f23ad8c9f419ddd1017b0 (diff) |
[libomptarget-nvptx] reduction: Determine if runtime uninitialized
Pass in the correct value of isRuntimeUninitialized() which solves
parallel reductions as reported on the mailing list.
For reference: r333285 did the same for loop scheduling.
Differential Revision: https://reviews.llvm.org/D52725
-rw-r--r-- | openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu index b813a11d20f..21a419ce14c 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu @@ -148,7 +148,7 @@ int32_t nvptx_parallel_reduce_nowait(int32_t global_tid, int32_t num_vars, kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct, bool isSPMDExecutionMode, - bool isRuntimeUninitialized = false) { + bool isRuntimeUninitialized) { uint32_t BlockThreadId = GetLogicalThreadIdInBlock(); uint32_t NumThreads = GetNumberOfOmpThreads( BlockThreadId, isSPMDExecutionMode, isRuntimeUninitialized); @@ -240,9 +240,10 @@ EXTERN int32_t __kmpc_nvptx_parallel_reduce_nowait( int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data, kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct) { - return nvptx_parallel_reduce_nowait(global_tid, num_vars, reduce_size, - reduce_data, shflFct, cpyFct, - /*isSPMDExecutionMode=*/isSPMDMode()); + return nvptx_parallel_reduce_nowait( + global_tid, num_vars, reduce_size, reduce_data, shflFct, cpyFct, + /*isSPMDExecutionMode=*/isSPMDMode(), + /*isRuntimeUninitialized=*/isRuntimeUninitialized()); } EXTERN @@ -270,7 +271,7 @@ int32_t nvptx_teams_reduce_nowait( int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data, kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct, kmp_CopyToScratchpadFctPtr scratchFct, kmp_LoadReduceFctPtr ldFct, - bool isSPMDExecutionMode, bool isRuntimeUninitialized = false) { + bool isSPMDExecutionMode, bool isRuntimeUninitialized) { uint32_t ThreadId = GetLogicalThreadIdInBlock(); // In non-generic mode all workers participate in the teams reduction. // In generic mode only the team master participates in the teams @@ -399,9 +400,10 @@ int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars, kmp_InterWarpCopyFctPtr cpyFct, kmp_CopyToScratchpadFctPtr scratchFct, kmp_LoadReduceFctPtr ldFct) { - return nvptx_teams_reduce_nowait(global_tid, num_vars, reduce_size, - reduce_data, shflFct, cpyFct, scratchFct, - ldFct, /*isSPMDExecutionMode=*/isSPMDMode()); + return nvptx_teams_reduce_nowait( + global_tid, num_vars, reduce_size, reduce_data, shflFct, cpyFct, + scratchFct, ldFct, /*isSPMDExecutionMode=*/isSPMDMode(), + /*isRuntimeUninitialized=*/isRuntimeUninitialized()); } EXTERN |