diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2019-05-10 18:56:05 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2019-05-10 18:56:05 +0000 |
commit | d140b83ae9d0526ccc8339a2b845287db728cfc7 (patch) | |
tree | 0f72a13c61a4c154237fb61245c40be8133ce375 /libomptarget/deviceRTLs/nvptx/src/libcall.cu | |
parent | e55ab9929284e9fa908126748b79385dc56be764 (diff) |
[OPENMP][NVPTX]Improve number of threads counter, NFC.
Summary:
Patch improves performance of the full runtime mode by moving
number-of-threads counter to the shared memory. It also allows to save
global memory.
Reviewers: grokos, gtbercea, kkwli0
Subscribers: guansong, jfb, jdoerfert, openmp-commits, caomhin
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D61785
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@360457 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget/deviceRTLs/nvptx/src/libcall.cu')
-rw-r--r-- | libomptarget/deviceRTLs/nvptx/src/libcall.cu | 48 |
1 files changed, 16 insertions, 32 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/libomptarget/deviceRTLs/nvptx/src/libcall.cu index ae6f83f..9c6d136 100644 --- a/libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ b/libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -45,9 +45,7 @@ EXTERN void omp_set_num_threads(int num) { } EXTERN int omp_get_num_threads(void) { - bool isSPMDExecutionMode = isSPMDMode(); - int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode); - int rc = GetNumberOfOmpThreads(tid, isSPMDExecutionMode); + int rc = GetNumberOfOmpThreads(isSPMDMode()); PRINT(LD_IO, "call omp_get_num_threads() return %d\n", rc); return rc; } @@ -156,10 +154,7 @@ EXTERN int omp_get_ancestor_thread_num(int level) { int rc = -1; // If level is 0 or all parallel regions are not active - return 0. unsigned parLevel = parallelLevel[GetWarpId()]; - if (level == 0 || (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL && - level <= parLevel)) { - rc = 0; - } else if (level > 0) { + if (level == 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL) { int totLevel = omp_get_level(); if (level <= totLevel) { omptarget_nvptx_TaskDescr *currTaskDescr = @@ -179,8 +174,7 @@ EXTERN int omp_get_ancestor_thread_num(int level) { (currTaskDescr->IsParallelConstruct() ? "par" : "task"), (int)currTaskDescr->InParallelRegion(), (int)sched, currTaskDescr->RuntimeChunkSize(), - (int)currTaskDescr->ThreadId(), - (int)currTaskDescr->ThreadsInTeam(), + (int)currTaskDescr->ThreadId(), (int)threadsInTeam, (int)currTaskDescr->NThreads()); } @@ -196,6 +190,12 @@ EXTERN int omp_get_ancestor_thread_num(int level) { } while (currTaskDescr); ASSERT0(LT_FUSSY, !steps, "expected to find all steps"); } + } else if (level == 0 || + (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL && + level <= parLevel) || + (level > 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL && + level <= (parLevel - OMP_ACTIVE_PARALLEL_LEVEL))) { + rc = 0; } PRINT(LD_IO, "call omp_get_ancestor_thread_num(level %d) returns %d\n", level, rc) @@ -208,30 +208,14 @@ EXTERN int omp_get_team_size(int level) { int rc = -1; unsigned parLevel = parallelLevel[GetWarpId()]; // If level is 0 or all parallel regions are not active - return 1. - if (level == 0 || (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL && - level <= parLevel)) { + if (level == 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL) { + rc = threadsInTeam; + } else if (level == 0 || + (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL && + level <= parLevel) || + (level > 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL && + level <= (parLevel - OMP_ACTIVE_PARALLEL_LEVEL))) { rc = 1; - } else if (level > 0) { - int totLevel = omp_get_level(); - if (level <= totLevel) { - omptarget_nvptx_TaskDescr *currTaskDescr = - getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false); - int steps = totLevel - level; - ASSERT0(LT_FUSSY, currTaskDescr, - "do not expect fct to be called in a non-active thread"); - do { - if (currTaskDescr->IsParallelConstruct()) { - if (!steps) { - // found the level - rc = currTaskDescr->ThreadsInTeam(); - break; - } - steps--; - } - currTaskDescr = currTaskDescr->GetPrevTaskDescr(); - } while (currTaskDescr); - ASSERT0(LT_FUSSY, !steps, "expected to find all steps"); - } } PRINT(LD_IO, "call omp_get_team_size(level %d) returns %d\n", level, rc) return rc; |