diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2019-01-04 17:09:12 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2019-01-04 17:09:12 +0000 |
commit | 900210a37567f84e17b8eae18568410fbac77877 (patch) | |
tree | df84202e87da83b267668040e767c052c5838e21 /libomptarget/deviceRTLs/nvptx/src/libcall.cu | |
parent | 58df0162f131f71d38ec0d4e13015ad7940922cb (diff) |
[OPENMP][NVPTX]Improve performance + reduce number of used registers.
Summary:
Reduced number of the used register + improved performance propagating
the information about current execution/data sharing mode directly from
the compiler, where it is possible.
In some cases, it requires new/reworked interfaces of the runtime
external functions. Old functions are marked as deprecated.
Reviewers: grokos, gtbercea, kkwli0
Subscribers: guansong, jfb, openmp-commits, caomhin
Differential Revision: https://reviews.llvm.org/D56278
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@350405 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget/deviceRTLs/nvptx/src/libcall.cu')
-rw-r--r-- | libomptarget/deviceRTLs/nvptx/src/libcall.cu | 43 |
1 files changed, 28 insertions, 15 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/libomptarget/deviceRTLs/nvptx/src/libcall.cu index 91b270c..9abe599 100644 --- a/libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ b/libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -39,14 +39,17 @@ EXTERN void omp_set_num_threads(int num) { if (num <= 0) { WARNING0(LW_INPUT, "expected positive num; ignore\n"); } else { - omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(); + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false); currTaskDescr->NThreads() = num; } } EXTERN int omp_get_num_threads(void) { - int tid = GetLogicalThreadIdInBlock(); - int rc = GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()); + bool isSPMDExecutionMode = isSPMDMode(); + int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode); + int rc = + GetNumberOfOmpThreads(tid, isSPMDExecutionMode, isRuntimeUninitialized()); PRINT(LD_IO, "call omp_get_num_threads() return %d\n", rc); return rc; } @@ -58,7 +61,8 @@ EXTERN int omp_get_max_threads(void) { // We're already in parallel region. return 1; // default is 1 thread avail } - omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(); + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); int rc = 1; // default is 1 thread avail if (!currTaskDescr->InParallelRegion()) { // Not currently in a parallel region, return what was set. @@ -76,21 +80,23 @@ EXTERN int omp_get_thread_limit(void) { return 0; // default is 0 } // per contention group.. meaning threads in current team - omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(); + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); int rc = currTaskDescr->ThreadLimit(); PRINT(LD_IO, "call omp_get_thread_limit() return %d\n", rc); return rc; } EXTERN int omp_get_thread_num() { - int tid = GetLogicalThreadIdInBlock(); - int rc = GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()); + bool isSPMDExecutionMode = isSPMDMode(); + int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode); + int rc = GetOmpThreadId(tid, isSPMDExecutionMode, isRuntimeUninitialized()); PRINT(LD_IO, "call omp_get_thread_num() returns %d\n", rc); return rc; } EXTERN int omp_get_num_procs(void) { - int rc = GetNumberOfProcsInDevice(); + int rc = GetNumberOfProcsInDevice(isSPMDMode()); PRINT(LD_IO, "call omp_get_num_procs() returns %d\n", rc); return rc; } @@ -102,7 +108,8 @@ EXTERN int omp_in_parallel(void) { "Expected SPMD mode only with uninitialized runtime."); rc = 1; // SPMD mode is always in parallel. } else { - omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(); + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); if (currTaskDescr->InParallelRegion()) { rc = 1; } @@ -161,7 +168,8 @@ EXTERN int omp_get_level(void) { return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel(); } int level = 0; - omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(); + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); ASSERT0(LT_FUSSY, currTaskDescr, "do not expect fct to be called in a non-active thread"); do { @@ -181,7 +189,8 @@ EXTERN int omp_get_active_level(void) { return 1; } int level = 0; // no active level parallelism - omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(); + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); ASSERT0(LT_FUSSY, currTaskDescr, "do not expect fct to be called in a non-active thread"); do { @@ -208,7 +217,8 @@ EXTERN int omp_get_ancestor_thread_num(int level) { } else if (level > 0) { int totLevel = omp_get_level(); if (level <= totLevel) { - omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(); + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); int steps = totLevel - level; PRINT(LD_IO, "backtrack %d steps\n", steps); ASSERT0(LT_FUSSY, currTaskDescr, @@ -259,7 +269,8 @@ EXTERN int omp_get_team_size(int level) { } else if (level > 0) { int totLevel = omp_get_level(); if (level <= totLevel) { - omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(); + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); int steps = totLevel - level; ASSERT0(LT_FUSSY, currTaskDescr, "do not expect fct to be called in a non-active thread"); @@ -288,7 +299,8 @@ EXTERN void omp_get_schedule(omp_sched_t *kind, int *modifier) { *kind = omp_sched_static; *modifier = 1; } else { - omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(); + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); *kind = currTaskDescr->GetRuntimeSched(); *modifier = currTaskDescr->RuntimeChunkSize(); } @@ -305,7 +317,8 @@ EXTERN void omp_set_schedule(omp_sched_t kind, int modifier) { return; } if (kind >= omp_sched_static && kind < omp_sched_auto) { - omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(); + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); currTaskDescr->SetRuntimeSched(kind); currTaskDescr->RuntimeChunkSize() = modifier; PRINT(LD_IOD, "omp_set_schedule did set sched %d & modif %" PRIu64 "\n", |