aboutsummaryrefslogtreecommitdiff
path: root/libomptarget/deviceRTLs/nvptx/src/libcall.cu
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2019-05-13 14:21:46 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2019-05-13 14:21:46 +0000
commitd5bb0a8d360f1c053b1397618fbf9f8877365cec (patch)
tree11691459bc764d975dbb3645eabeccca38270253 /libomptarget/deviceRTLs/nvptx/src/libcall.cu
parentd140b83ae9d0526ccc8339a2b845287db728cfc7 (diff)
[OPENMP][NVPTX]Simplify handling of thread limit, NFC.
Summary: Patch improves performance of the full runtime mode by moving threads limit counter to the shared memory. It also allows to save global memory. Reviewers: grokos, kkwli0, gtbercea Subscribers: guansong, jdoerfert, openmp-commits, caomhin Tags: #openmp Differential Revision: https://reviews.llvm.org/D61801 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@360584 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget/deviceRTLs/nvptx/src/libcall.cu')
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/libcall.cu16
1 files changed, 6 insertions, 10 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
index 9c6d136..9580d75 100644
--- a/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -37,10 +37,8 @@ EXTERN void omp_set_num_threads(int num) {
PRINT(LD_IO, "call omp_set_num_threads(num %d)\n", num);
if (num <= 0) {
WARNING0(LW_INPUT, "expected positive num; ignore\n");
- } else {
- omptarget_nvptx_TaskDescr *currTaskDescr =
- getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false);
- currTaskDescr->NThreads() = num;
+ } else if (parallelLevel[GetWarpId()] == 0) {
+ nThreads = num;
}
}
@@ -54,12 +52,10 @@ EXTERN int omp_get_max_threads(void) {
if (parallelLevel[GetWarpId()] > 0)
// We're already in parallel region.
return 1; // default is 1 thread avail
- omptarget_nvptx_TaskDescr *currTaskDescr =
- getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false);
- ASSERT0(LT_FUSSY, !currTaskDescr->InParallelRegion(),
- "Should no be in the parallel region");
// Not currently in a parallel region, return what was set.
- int rc = currTaskDescr->NThreads();
+ int rc = 1;
+ if (parallelLevel[GetWarpId()] == 0)
+ rc = nThreads;
ASSERT0(LT_FUSSY, rc >= 0, "bad number of threads");
PRINT(LD_IO, "call omp_get_max_threads() return %d\n", rc);
return rc;
@@ -175,7 +171,7 @@ EXTERN int omp_get_ancestor_thread_num(int level) {
(int)currTaskDescr->InParallelRegion(), (int)sched,
currTaskDescr->RuntimeChunkSize(),
(int)currTaskDescr->ThreadId(), (int)threadsInTeam,
- (int)currTaskDescr->NThreads());
+ (int)nThreads);
}
if (currTaskDescr->IsParallelConstruct()) {