aboutsummaryrefslogtreecommitdiff
path: root/libomptarget/deviceRTLs/nvptx/src/libcall.cu
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2019-05-10 18:56:05 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2019-05-10 18:56:05 +0000
commitd140b83ae9d0526ccc8339a2b845287db728cfc7 (patch)
tree0f72a13c61a4c154237fb61245c40be8133ce375 /libomptarget/deviceRTLs/nvptx/src/libcall.cu
parente55ab9929284e9fa908126748b79385dc56be764 (diff)
[OPENMP][NVPTX]Improve number of threads counter, NFC.
Summary: Patch improves performance of the full runtime mode by moving number-of-threads counter to the shared memory. It also allows to save global memory. Reviewers: grokos, gtbercea, kkwli0 Subscribers: guansong, jfb, jdoerfert, openmp-commits, caomhin Tags: #openmp Differential Revision: https://reviews.llvm.org/D61785 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@360457 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget/deviceRTLs/nvptx/src/libcall.cu')
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/libcall.cu48
1 files changed, 16 insertions, 32 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
index ae6f83f..9c6d136 100644
--- a/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -45,9 +45,7 @@ EXTERN void omp_set_num_threads(int num) {
}
EXTERN int omp_get_num_threads(void) {
- bool isSPMDExecutionMode = isSPMDMode();
- int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode);
- int rc = GetNumberOfOmpThreads(tid, isSPMDExecutionMode);
+ int rc = GetNumberOfOmpThreads(isSPMDMode());
PRINT(LD_IO, "call omp_get_num_threads() return %d\n", rc);
return rc;
}
@@ -156,10 +154,7 @@ EXTERN int omp_get_ancestor_thread_num(int level) {
int rc = -1;
// If level is 0 or all parallel regions are not active - return 0.
unsigned parLevel = parallelLevel[GetWarpId()];
- if (level == 0 || (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL &&
- level <= parLevel)) {
- rc = 0;
- } else if (level > 0) {
+ if (level == 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL) {
int totLevel = omp_get_level();
if (level <= totLevel) {
omptarget_nvptx_TaskDescr *currTaskDescr =
@@ -179,8 +174,7 @@ EXTERN int omp_get_ancestor_thread_num(int level) {
(currTaskDescr->IsParallelConstruct() ? "par" : "task"),
(int)currTaskDescr->InParallelRegion(), (int)sched,
currTaskDescr->RuntimeChunkSize(),
- (int)currTaskDescr->ThreadId(),
- (int)currTaskDescr->ThreadsInTeam(),
+ (int)currTaskDescr->ThreadId(), (int)threadsInTeam,
(int)currTaskDescr->NThreads());
}
@@ -196,6 +190,12 @@ EXTERN int omp_get_ancestor_thread_num(int level) {
} while (currTaskDescr);
ASSERT0(LT_FUSSY, !steps, "expected to find all steps");
}
+ } else if (level == 0 ||
+ (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL &&
+ level <= parLevel) ||
+ (level > 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL &&
+ level <= (parLevel - OMP_ACTIVE_PARALLEL_LEVEL))) {
+ rc = 0;
}
PRINT(LD_IO, "call omp_get_ancestor_thread_num(level %d) returns %d\n", level,
rc)
@@ -208,30 +208,14 @@ EXTERN int omp_get_team_size(int level) {
int rc = -1;
unsigned parLevel = parallelLevel[GetWarpId()];
// If level is 0 or all parallel regions are not active - return 1.
- if (level == 0 || (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL &&
- level <= parLevel)) {
+ if (level == 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL) {
+ rc = threadsInTeam;
+ } else if (level == 0 ||
+ (level > 0 && parLevel < OMP_ACTIVE_PARALLEL_LEVEL &&
+ level <= parLevel) ||
+ (level > 1 && parLevel > OMP_ACTIVE_PARALLEL_LEVEL &&
+ level <= (parLevel - OMP_ACTIVE_PARALLEL_LEVEL))) {
rc = 1;
- } else if (level > 0) {
- int totLevel = omp_get_level();
- if (level <= totLevel) {
- omptarget_nvptx_TaskDescr *currTaskDescr =
- getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false);
- int steps = totLevel - level;
- ASSERT0(LT_FUSSY, currTaskDescr,
- "do not expect fct to be called in a non-active thread");
- do {
- if (currTaskDescr->IsParallelConstruct()) {
- if (!steps) {
- // found the level
- rc = currTaskDescr->ThreadsInTeam();
- break;
- }
- steps--;
- }
- currTaskDescr = currTaskDescr->GetPrevTaskDescr();
- } while (currTaskDescr);
- ASSERT0(LT_FUSSY, !steps, "expected to find all steps");
- }
}
PRINT(LD_IO, "call omp_get_team_size(level %d) returns %d\n", level, rc)
return rc;