diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2019-05-13 14:21:46 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2019-05-13 14:21:46 +0000 |
commit | d5bb0a8d360f1c053b1397618fbf9f8877365cec (patch) | |
tree | 11691459bc764d975dbb3645eabeccca38270253 /libomptarget/deviceRTLs/nvptx/src/libcall.cu | |
parent | d140b83ae9d0526ccc8339a2b845287db728cfc7 (diff) |
[OPENMP][NVPTX]Simplify handling of thread limit, NFC.
Summary:
Patch improves performance of the full runtime mode by moving
threads limit counter to the shared memory. It also allows to save
global memory.
Reviewers: grokos, kkwli0, gtbercea
Subscribers: guansong, jdoerfert, openmp-commits, caomhin
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D61801
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@360584 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget/deviceRTLs/nvptx/src/libcall.cu')
-rw-r--r-- | libomptarget/deviceRTLs/nvptx/src/libcall.cu | 16 |
1 files changed, 6 insertions, 10 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/libomptarget/deviceRTLs/nvptx/src/libcall.cu index 9c6d136..9580d75 100644 --- a/libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ b/libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -37,10 +37,8 @@ EXTERN void omp_set_num_threads(int num) { PRINT(LD_IO, "call omp_set_num_threads(num %d)\n", num); if (num <= 0) { WARNING0(LW_INPUT, "expected positive num; ignore\n"); - } else { - omptarget_nvptx_TaskDescr *currTaskDescr = - getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false); - currTaskDescr->NThreads() = num; + } else if (parallelLevel[GetWarpId()] == 0) { + nThreads = num; } } @@ -54,12 +52,10 @@ EXTERN int omp_get_max_threads(void) { if (parallelLevel[GetWarpId()] > 0) // We're already in parallel region. return 1; // default is 1 thread avail - omptarget_nvptx_TaskDescr *currTaskDescr = - getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false); - ASSERT0(LT_FUSSY, !currTaskDescr->InParallelRegion(), - "Should no be in the parallel region"); // Not currently in a parallel region, return what was set. - int rc = currTaskDescr->NThreads(); + int rc = 1; + if (parallelLevel[GetWarpId()] == 0) + rc = nThreads; ASSERT0(LT_FUSSY, rc >= 0, "bad number of threads"); PRINT(LD_IO, "call omp_get_max_threads() return %d\n", rc); return rc; @@ -175,7 +171,7 @@ EXTERN int omp_get_ancestor_thread_num(int level) { (int)currTaskDescr->InParallelRegion(), (int)sched, currTaskDescr->RuntimeChunkSize(), (int)currTaskDescr->ThreadId(), (int)threadsInTeam, - (int)currTaskDescr->NThreads()); + (int)nThreads); } if (currTaskDescr->IsParallelConstruct()) { |