aboutsummaryrefslogtreecommitdiff
path: root/libomptarget/deviceRTLs/nvptx/src/libcall.cu
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2019-01-04 17:09:12 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2019-01-04 17:09:12 +0000
commit900210a37567f84e17b8eae18568410fbac77877 (patch)
treedf84202e87da83b267668040e767c052c5838e21 /libomptarget/deviceRTLs/nvptx/src/libcall.cu
parent58df0162f131f71d38ec0d4e13015ad7940922cb (diff)
[OPENMP][NVPTX]Improve performance + reduce number of used registers.
Summary: Reduced number of the used register + improved performance propagating the information about current execution/data sharing mode directly from the compiler, where it is possible. In some cases, it requires new/reworked interfaces of the runtime external functions. Old functions are marked as deprecated. Reviewers: grokos, gtbercea, kkwli0 Subscribers: guansong, jfb, openmp-commits, caomhin Differential Revision: https://reviews.llvm.org/D56278 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@350405 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget/deviceRTLs/nvptx/src/libcall.cu')
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/libcall.cu43
1 files changed, 28 insertions, 15 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
index 91b270c..9abe599 100644
--- a/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -39,14 +39,17 @@ EXTERN void omp_set_num_threads(int num) {
if (num <= 0) {
WARNING0(LW_INPUT, "expected positive num; ignore\n");
} else {
- omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false);
currTaskDescr->NThreads() = num;
}
}
EXTERN int omp_get_num_threads(void) {
- int tid = GetLogicalThreadIdInBlock();
- int rc = GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized());
+ bool isSPMDExecutionMode = isSPMDMode();
+ int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode);
+ int rc =
+ GetNumberOfOmpThreads(tid, isSPMDExecutionMode, isRuntimeUninitialized());
PRINT(LD_IO, "call omp_get_num_threads() return %d\n", rc);
return rc;
}
@@ -58,7 +61,8 @@ EXTERN int omp_get_max_threads(void) {
// We're already in parallel region.
return 1; // default is 1 thread avail
}
- omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ getMyTopTaskDescriptor(isSPMDMode());
int rc = 1; // default is 1 thread avail
if (!currTaskDescr->InParallelRegion()) {
// Not currently in a parallel region, return what was set.
@@ -76,21 +80,23 @@ EXTERN int omp_get_thread_limit(void) {
return 0; // default is 0
}
// per contention group.. meaning threads in current team
- omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ getMyTopTaskDescriptor(isSPMDMode());
int rc = currTaskDescr->ThreadLimit();
PRINT(LD_IO, "call omp_get_thread_limit() return %d\n", rc);
return rc;
}
EXTERN int omp_get_thread_num() {
- int tid = GetLogicalThreadIdInBlock();
- int rc = GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized());
+ bool isSPMDExecutionMode = isSPMDMode();
+ int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode);
+ int rc = GetOmpThreadId(tid, isSPMDExecutionMode, isRuntimeUninitialized());
PRINT(LD_IO, "call omp_get_thread_num() returns %d\n", rc);
return rc;
}
EXTERN int omp_get_num_procs(void) {
- int rc = GetNumberOfProcsInDevice();
+ int rc = GetNumberOfProcsInDevice(isSPMDMode());
PRINT(LD_IO, "call omp_get_num_procs() returns %d\n", rc);
return rc;
}
@@ -102,7 +108,8 @@ EXTERN int omp_in_parallel(void) {
"Expected SPMD mode only with uninitialized runtime.");
rc = 1; // SPMD mode is always in parallel.
} else {
- omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ getMyTopTaskDescriptor(isSPMDMode());
if (currTaskDescr->InParallelRegion()) {
rc = 1;
}
@@ -161,7 +168,8 @@ EXTERN int omp_get_level(void) {
return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel();
}
int level = 0;
- omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ getMyTopTaskDescriptor(isSPMDMode());
ASSERT0(LT_FUSSY, currTaskDescr,
"do not expect fct to be called in a non-active thread");
do {
@@ -181,7 +189,8 @@ EXTERN int omp_get_active_level(void) {
return 1;
}
int level = 0; // no active level parallelism
- omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ getMyTopTaskDescriptor(isSPMDMode());
ASSERT0(LT_FUSSY, currTaskDescr,
"do not expect fct to be called in a non-active thread");
do {
@@ -208,7 +217,8 @@ EXTERN int omp_get_ancestor_thread_num(int level) {
} else if (level > 0) {
int totLevel = omp_get_level();
if (level <= totLevel) {
- omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ getMyTopTaskDescriptor(isSPMDMode());
int steps = totLevel - level;
PRINT(LD_IO, "backtrack %d steps\n", steps);
ASSERT0(LT_FUSSY, currTaskDescr,
@@ -259,7 +269,8 @@ EXTERN int omp_get_team_size(int level) {
} else if (level > 0) {
int totLevel = omp_get_level();
if (level <= totLevel) {
- omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ getMyTopTaskDescriptor(isSPMDMode());
int steps = totLevel - level;
ASSERT0(LT_FUSSY, currTaskDescr,
"do not expect fct to be called in a non-active thread");
@@ -288,7 +299,8 @@ EXTERN void omp_get_schedule(omp_sched_t *kind, int *modifier) {
*kind = omp_sched_static;
*modifier = 1;
} else {
- omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ getMyTopTaskDescriptor(isSPMDMode());
*kind = currTaskDescr->GetRuntimeSched();
*modifier = currTaskDescr->RuntimeChunkSize();
}
@@ -305,7 +317,8 @@ EXTERN void omp_set_schedule(omp_sched_t kind, int modifier) {
return;
}
if (kind >= omp_sched_static && kind < omp_sched_auto) {
- omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor();
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ getMyTopTaskDescriptor(isSPMDMode());
currTaskDescr->SetRuntimeSched(kind);
currTaskDescr->RuntimeChunkSize() = modifier;
PRINT(LD_IOD, "omp_set_schedule did set sched %d & modif %" PRIu64 "\n",