summaryrefslogtreecommitdiff
path: root/openmp/libomptarget
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-12-06 19:45:48 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-12-06 19:45:48 +0000
commit94a61ea16fb9d2a9bbc0b261ae1cf1af700f4c7a (patch)
treef56f4b57c2af40d54b7fb4511f2becf75cc0de57 /openmp/libomptarget
parentd6e65ac8632d378852f324acec399164bcad313e (diff)
[OPENMP][NVPTX]Correct type casting for printf args + simplified shfl64 function.
Summary: Explicitly casted printf's args to the required types + simplified shfl64 function. Reviewers: gtbercea, kkwli0 Subscribers: guansong, jfb, caomhin, openmp-commits Differential Revision: https://reviews.llvm.org/D55379
Diffstat (limited to 'openmp/libomptarget')
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/cancel.cu4
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu69
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/debug.h30
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu8
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu86
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu2
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu14
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu7
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h7
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu4
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/task.cu17
11 files changed, 140 insertions, 108 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/cancel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/cancel.cu
index ccfb7ad86a4..9f92e2d119a 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/cancel.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/cancel.cu
@@ -15,14 +15,14 @@
EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal) {
- PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", cancelVal);
+ PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", (int)cancelVal);
// disabled
return FALSE;
}
EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal) {
- PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", cancelVal);
+ PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", (int)cancelVal);
// disabled
return FALSE;
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index bfb8208ff6a..f69daa172fa 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -84,7 +84,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
"Entering __kmpc_initialize_data_sharing_environment\n");
unsigned WID = getWarpId();
- DSPRINT(DSFLAG_INIT, "Warp ID: %d\n", WID);
+ DSPRINT(DSFLAG_INIT, "Warp ID: %u\n", WID);
omptarget_nvptx_TeamDescr *teamDescr =
&omptarget_nvptx_threadPrivateContext->TeamContext();
@@ -95,15 +95,16 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
// We don't need to initialize the frame and active threads.
- DSPRINT(DSFLAG_INIT, "Initial data size: %08x \n", InitialDataSize);
- DSPRINT(DSFLAG_INIT, "Root slot at: %016llx \n", (long long)RootS);
+ DSPRINT(DSFLAG_INIT, "Initial data size: %08x \n", (unsigned)InitialDataSize);
+ DSPRINT(DSFLAG_INIT, "Root slot at: %016llx \n", (unsigned long long)RootS);
DSPRINT(DSFLAG_INIT, "Root slot data-end at: %016llx \n",
- (long long)RootS->DataEnd);
- DSPRINT(DSFLAG_INIT, "Root slot next at: %016llx \n", (long long)RootS->Next);
+ (unsigned long long)RootS->DataEnd);
+ DSPRINT(DSFLAG_INIT, "Root slot next at: %016llx \n",
+ (unsigned long long)RootS->Next);
DSPRINT(DSFLAG_INIT, "Shared slot ptr at: %016llx \n",
- (long long)DataSharingState.SlotPtr[WID]);
+ (unsigned long long)DataSharingState.SlotPtr[WID]);
DSPRINT(DSFLAG_INIT, "Shared stack ptr at: %016llx \n",
- (long long)DataSharingState.StackPtr[WID]);
+ (unsigned long long)DataSharingState.StackPtr[WID]);
DSPRINT0(DSFLAG_INIT, "Exiting __kmpc_initialize_data_sharing_environment\n");
}
@@ -121,8 +122,9 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
if (!IsOMPRuntimeInitialized)
return (void *)&DataSharingState;
- DSPRINT(DSFLAG, "Data Size %016llx\n", SharingDataSize);
- DSPRINT(DSFLAG, "Default Data Size %016llx\n", SharingDefaultDataSize);
+ DSPRINT(DSFLAG, "Data Size %016llx\n", (unsigned long long)SharingDataSize);
+ DSPRINT(DSFLAG, "Default Data Size %016llx\n",
+ (unsigned long long)SharingDefaultDataSize);
unsigned WID = getWarpId();
unsigned CurActiveThreads = getActiveThreadsMask();
@@ -139,11 +141,11 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
*SavedSharedFrame = FrameP;
*SavedActiveThreads = ActiveT;
- DSPRINT(DSFLAG, "Warp ID: %d\n", WID);
- DSPRINT(DSFLAG, "Saved slot ptr at: %016llx \n", (long long)SlotP);
- DSPRINT(DSFLAG, "Saved stack ptr at: %016llx \n", (long long)StackP);
+ DSPRINT(DSFLAG, "Warp ID: %u\n", WID);
+ DSPRINT(DSFLAG, "Saved slot ptr at: %016llx \n", (unsigned long long)SlotP);
+ DSPRINT(DSFLAG, "Saved stack ptr at: %016llx \n", (unsigned long long)StackP);
DSPRINT(DSFLAG, "Saved frame ptr at: %016llx \n", (long long)FrameP);
- DSPRINT(DSFLAG, "Active threads: %08x \n", ActiveT);
+ DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);
// Only the warp active master needs to grow the stack.
if (IsWarpMasterActiveThread()) {
@@ -161,12 +163,16 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
const uintptr_t RequiredEndAddress =
CurrentStartAddress + (uintptr_t)SharingDataSize;
- DSPRINT(DSFLAG, "Data Size %016llx\n", SharingDataSize);
- DSPRINT(DSFLAG, "Default Data Size %016llx\n", SharingDefaultDataSize);
- DSPRINT(DSFLAG, "Current Start Address %016llx\n", CurrentStartAddress);
- DSPRINT(DSFLAG, "Current End Address %016llx\n", CurrentEndAddress);
- DSPRINT(DSFLAG, "Required End Address %016llx\n", RequiredEndAddress);
- DSPRINT(DSFLAG, "Active Threads %08x\n", ActiveT);
+ DSPRINT(DSFLAG, "Data Size %016llx\n", (unsigned long long)SharingDataSize);
+ DSPRINT(DSFLAG, "Default Data Size %016llx\n",
+ (unsigned long long)SharingDefaultDataSize);
+ DSPRINT(DSFLAG, "Current Start Address %016llx\n",
+ (unsigned long long)CurrentStartAddress);
+ DSPRINT(DSFLAG, "Current End Address %016llx\n",
+ (unsigned long long)CurrentEndAddress);
+ DSPRINT(DSFLAG, "Required End Address %016llx\n",
+ (unsigned long long)RequiredEndAddress);
+ DSPRINT(DSFLAG, "Active Threads %08x\n", (unsigned)ActiveT);
// If we require a new slot, allocate it and initialize it (or attempt to
// reuse one). Also, set the shared stack and slot pointers to the new
@@ -184,11 +190,11 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
(uintptr_t)(&ExistingSlot->Data[0]);
if (ExistingSlotSize >= NewSize) {
DSPRINT(DSFLAG, "Reusing stack slot %016llx\n",
- (long long)ExistingSlot);
+ (unsigned long long)ExistingSlot);
NewSlot = ExistingSlot;
} else {
DSPRINT(DSFLAG, "Cleaning up -failed reuse - %016llx\n",
- (long long)SlotP->Next);
+ (unsigned long long)SlotP->Next);
free(ExistingSlot);
}
}
@@ -197,7 +203,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
NewSlot = (__kmpc_data_sharing_slot *)malloc(
sizeof(__kmpc_data_sharing_slot) + NewSize);
DSPRINT(DSFLAG, "New slot allocated %016llx (data size=%016llx)\n",
- (long long)NewSlot, NewSize);
+ (unsigned long long)NewSlot, NewSize);
}
NewSlot->Next = 0;
@@ -213,7 +219,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
// not eliminate them because that may be used to return data.
if (SlotP->Next) {
DSPRINT(DSFLAG, "Cleaning up - old not required - %016llx\n",
- (long long)SlotP->Next);
+ (unsigned long long)SlotP->Next);
free(SlotP->Next);
SlotP->Next = 0;
}
@@ -275,8 +281,8 @@ EXTERN void __kmpc_data_sharing_environment_end(
// have other threads that will return after the current ones.
ActiveT &= ~CurActive;
- DSPRINT(DSFLAG, "Active threads: %08x; New mask: %08x\n", CurActive,
- ActiveT);
+ DSPRINT(DSFLAG, "Active threads: %08x; New mask: %08x\n",
+ (unsigned)CurActive, (unsigned)ActiveT);
if (!ActiveT) {
// No other active threads? Great, lets restore the stack.
@@ -290,10 +296,13 @@ EXTERN void __kmpc_data_sharing_environment_end(
FrameP = *SavedSharedFrame;
ActiveT = *SavedActiveThreads;
- DSPRINT(DSFLAG, "Restored slot ptr at: %016llx \n", (long long)SlotP);
- DSPRINT(DSFLAG, "Restored stack ptr at: %016llx \n", (long long)StackP);
- DSPRINT(DSFLAG, "Restored frame ptr at: %016llx \n", (long long)FrameP);
- DSPRINT(DSFLAG, "Active threads: %08x \n", ActiveT);
+ DSPRINT(DSFLAG, "Restored slot ptr at: %016llx \n",
+ (unsigned long long)SlotP);
+ DSPRINT(DSFLAG, "Restored stack ptr at: %016llx \n",
+ (unsigned long long)StackP);
+ DSPRINT(DSFLAG, "Restored frame ptr at: %016llx \n",
+ (unsigned long long)FrameP);
+ DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);
}
}
@@ -319,7 +328,7 @@ __kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID,
unsigned SourceWID = SourceThreadID / WARPSIZE;
- DSPRINT(DSFLAG, "Source warp: %d\n", SourceWID);
+ DSPRINT(DSFLAG, "Source warp: %u\n", SourceWID);
void * volatile P = DataSharingState.FramePtr[SourceWID];
DSPRINT0(DSFLAG, "Exiting __kmpc_get_data_sharing_environment_frame\n");
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
index 9f59d6612b8..b556670f182 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
@@ -164,16 +164,18 @@
#define PRINT0(_flag, _str) \
{ \
if (omptarget_device_environment.debug_level && DON(_flag)) { \
- printf("<b %2d, t %4d, w %2d, l %2d>: " _str, blockIdx.x, threadIdx.x, \
- threadIdx.x / WARPSIZE, threadIdx.x & 0x1F); \
+ printf("<b %2d, t %4d, w %2d, l %2d>: " _str, (int)blockIdx.x, \
+ (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE), \
+ (int)(threadIdx.x & 0x1F)); \
} \
}
#define PRINT(_flag, _str, _args...) \
{ \
if (omptarget_device_environment.debug_level && DON(_flag)) { \
- printf("<b %2d, t %4d, w %2d, l %2d>: " _str, blockIdx.x, threadIdx.x, \
- threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args); \
+ printf("<b %2d, t %4d, w %2d, l %2d>: " _str, (int)blockIdx.x, \
+ (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE), \
+ (int)(threadIdx.x & 0x1F), _args); \
} \
}
#else
@@ -217,16 +219,18 @@
#define ASSERT0(_flag, _cond, _str) \
{ \
if (TON(_flag) && !(_cond)) { \
- printf("<b %3d, t %4d, w %2d, l %2d> ASSERT: " _str "\n", blockIdx.x, \
- threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F); \
+ printf("<b %3d, t %4d, w %2d, l %2d> ASSERT: " _str "\n", \
+ (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE), \
+ (int)(threadIdx.x & 0x1F)); \
assert(_cond); \
} \
}
#define ASSERT(_flag, _cond, _str, _args...) \
{ \
if (TON(_flag) && !(_cond)) { \
- printf("<b %3d, t %4d, w %2d, l %d2> ASSERT: " _str "\n", blockIdx.x, \
- threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args); \
+ printf("<b %3d, t %4d, w %2d, l %d2> ASSERT: " _str "\n", \
+ (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE), \
+ (int)(threadIdx.x & 0x1F), _args); \
assert(_cond); \
} \
}
@@ -253,15 +257,17 @@
#define WARNING0(_flag, _str) \
{ \
if (WON(_flag)) { \
- printf("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, blockIdx.x, \
- threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F); \
+ printf("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, (int)blockIdx.x, \
+ (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE), \
+ (int)(threadIdx.x & 0x1F)); \
} \
}
#define WARNING(_flag, _str, _args...) \
{ \
if (WON(_flag)) { \
- printf("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, blockIdx.x, \
- threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args); \
+ printf("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, (int)blockIdx.x, \
+ (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE), \
+ (int)(threadIdx.x & 0x1F), _args); \
} \
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
index 3e8f248f11e..91b270cbf44 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -222,9 +222,11 @@ EXTERN int omp_get_ancestor_thread_num(int level) {
" chunk %" PRIu64 "; tid %d, tnum %d, nthreads %d\n",
"ancestor", steps,
(currTaskDescr->IsParallelConstruct() ? "par" : "task"),
- currTaskDescr->InParallelRegion(), sched,
- currTaskDescr->RuntimeChunkSize(), currTaskDescr->ThreadId(),
- currTaskDescr->ThreadsInTeam(), currTaskDescr->NThreads());
+ (int)currTaskDescr->InParallelRegion(), (int)sched,
+ currTaskDescr->RuntimeChunkSize(),
+ (int)currTaskDescr->ThreadId(),
+ (int)currTaskDescr->ThreadsInTeam(),
+ (int)currTaskDescr->NThreads());
}
if (currTaskDescr->IsParallelConstruct()) {
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
index dfb9c8bd70a..c100be53c34 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
@@ -113,7 +113,8 @@ public:
PRINT(LD_LOOP,
"OMP Thread %d: schedule type %d, chunk size = %lld, mytid "
"%d, num tids %d\n",
- gtid, schedtype, P64(chunk), gtid, numberOfActiveOMPThreads);
+ (int)gtid, (int)schedtype, (long long)chunk, (int)gtid,
+ (int)numberOfActiveOMPThreads);
ASSERT0(LT_FUSSY, gtid < numberOfActiveOMPThreads,
"current thread is not needed here; error");
@@ -173,9 +174,9 @@ public:
break;
}
default: {
- ASSERT(LT_FUSSY, FALSE, "unknown schedtype %d", schedtype);
+ ASSERT(LT_FUSSY, FALSE, "unknown schedtype %d", (int)schedtype);
PRINT(LD_LOOP, "unknown schedtype %d, revert back to static chunk\n",
- schedtype);
+ (int)schedtype);
ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
numberOfActiveOMPThreads);
break;
@@ -189,8 +190,9 @@ public:
PRINT(LD_LOOP,
"Got sched: Active %d, total %d: lb %lld, ub %lld, stride %lld, last "
"%d\n",
- numberOfActiveOMPThreads, GetNumberOfWorkersInTeam(), P64(*plower),
- P64(*pupper), P64(*pstride), lastiter);
+ (int)numberOfActiveOMPThreads, (int)GetNumberOfWorkersInTeam(),
+ (long long)(*plower), (long long)(*pupper), (long long)(*pstride),
+ (int)lastiter);
}
////////////////////////////////////////////////////////////////////////////////
@@ -229,7 +231,7 @@ public:
__kmpc_barrier(loc, threadId);
PRINT(LD_LOOP,
"go sequential as tnum=%ld, trip count %lld, ordered sched=%d\n",
- (long)tnum, P64(tripCount), schedule);
+ (long)tnum, (long long)tripCount, (int)schedule);
schedule = kmp_sched_static_chunk;
chunk = tripCount; // one thread gets the whole loop
} else if (schedule == kmp_sched_runtime) {
@@ -255,18 +257,20 @@ public:
break;
}
}
- PRINT(LD_LOOP, "Runtime sched is %d with chunk %lld\n", schedule,
- P64(chunk));
+ PRINT(LD_LOOP, "Runtime sched is %d with chunk %lld\n", (int)schedule,
+ (long long)chunk);
} else if (schedule == kmp_sched_auto) {
schedule = kmp_sched_static_chunk;
chunk = 1;
- PRINT(LD_LOOP, "Auto sched is %d with chunk %lld\n", schedule,
- P64(chunk));
+ PRINT(LD_LOOP, "Auto sched is %d with chunk %lld\n", (int)schedule,
+ (long long)chunk);
} else {
- PRINT(LD_LOOP, "Dyn sched is %d with chunk %lld\n", schedule, P64(chunk));
+ PRINT(LD_LOOP, "Dyn sched is %d with chunk %lld\n", (int)schedule,
+ (long long)chunk);
ASSERT(LT_FUSSY,
schedule == kmp_sched_dynamic || schedule == kmp_sched_guided,
- "unknown schedule %d & chunk %lld\n", schedule, P64(chunk));
+ "unknown schedule %d & chunk %lld\n", (int)schedule,
+ (long long)chunk);
}
// init schedules
@@ -287,9 +291,12 @@ public:
PRINT(LD_LOOP,
"dispatch init (static chunk) : num threads = %d, ub = %" PRId64
", next lower bound = %llu, stride = %llu\n",
- tnum, omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
- omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
- omptarget_nvptx_threadPrivateContext->Stride(tid));
+ (int)tnum,
+ omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
+ (unsigned long long)
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+ (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride(
+ tid));
} else if (schedule == kmp_sched_static_balanced_chunk) {
ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value");
// save sched state
@@ -316,9 +323,12 @@ public:
PRINT(LD_LOOP,
"dispatch init (static chunk) : num threads = %d, ub = %" PRId64
", next lower bound = %llu, stride = %llu\n",
- tnum, omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
- omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
- omptarget_nvptx_threadPrivateContext->Stride(tid));
+ (int)tnum,
+ omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
+ (unsigned long long)
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+ (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride(
+ tid));
} else if (schedule == kmp_sched_static_nochunk) {
ASSERT0(LT_FUSSY, chunk == 0, "bad chunk value");
// save sched state
@@ -336,9 +346,12 @@ public:
PRINT(LD_LOOP,
"dispatch init (static nochunk) : num threads = %d, ub = %" PRId64
", next lower bound = %llu, stride = %llu\n",
- tnum, omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
- omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
- omptarget_nvptx_threadPrivateContext->Stride(tid));
+ (int)tnum,
+ omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
+ (unsigned long long)
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+ (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride(
+ tid));
} else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) {
__kmpc_barrier(loc, threadId);
@@ -356,7 +369,9 @@ public:
PRINT(LD_LOOP,
"dispatch init (dyn) : num threads = %d, lb = %llu, ub = %" PRId64
", chunk %" PRIu64 "\n",
- tnum, omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId),
+ (int)tnum,
+ (unsigned long long)
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId),
omptarget_nvptx_threadPrivateContext->LoopUpperBound(teamId),
omptarget_nvptx_threadPrivateContext->Chunk(teamId));
}
@@ -380,22 +395,22 @@ public:
// c. lb and ub >= loopUpperBound: empty chunk --> FINISHED
// a.
if (lb <= loopUpperBound && ub < loopUpperBound) {
- PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; not finished\n", P64(lb),
- P64(ub), P64(loopUpperBound));
+ PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; not finished\n",
+ (long long)lb, (long long)ub, (long long)loopUpperBound);
return NOT_FINISHED;
}
// b.
if (lb <= loopUpperBound) {
PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; clip to loop ub\n",
- P64(lb), P64(ub), P64(loopUpperBound));
+ (long long)lb, (long long)ub, (long long)loopUpperBound);
ub = loopUpperBound;
return LAST_CHUNK;
}
// c. if we are here, we are in case 'c'
lb = loopUpperBound + 2;
ub = loopUpperBound + 1;
- PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; finished\n", P64(lb),
- P64(ub), P64(loopUpperBound));
+ PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; finished\n", (long long)lb,
+ (long long)ub, (long long)loopUpperBound);
return FINISHED;
}
@@ -426,7 +441,7 @@ public:
// finished?
if (myLb > ub) {
PRINT(LD_LOOP, "static loop finished with myLb %lld, ub %lld\n",
- P64(myLb), P64(ub));
+ (long long)myLb, (long long)ub);
return DISPATCH_FINISHED;
}
// not finished, save current bounds
@@ -442,7 +457,7 @@ public:
ST stride = omptarget_nvptx_threadPrivateContext->Stride(tid);
omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = myLb + stride;
PRINT(LD_LOOP, "static loop continues with myLb %lld, myUb %lld\n",
- P64(*plower), P64(*pupper));
+ (long long)*plower, (long long)*pupper);
return DISPATCH_NOTFINISHED;
}
ASSERT0(LT_FUSSY,
@@ -464,12 +479,13 @@ public:
*pupper = myUb;
*pstride = 1;
- PRINT(LD_LOOP,
- "Got sched: active %d, total %d: lb %lld, ub %lld, stride = %lld, "
- "last %d\n",
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
- GetNumberOfWorkersInTeam(), P64(*plower), P64(*pupper), P64(*pstride),
- *plast);
+ PRINT(
+ LD_LOOP,
+ "Got sched: active %d, total %d: lb %lld, ub %lld, stride = %lld, "
+ "last %d\n",
+ (int)GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+ (int)GetNumberOfWorkersInTeam(), (long long)*plower, (long long)*pupper,
+ (long long)*pstride, (int)*plast);
return DISPATCH_NOTFINISHED;
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
index b0aef62ee9b..b0b12908112 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
@@ -150,7 +150,7 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
PRINT(LD_PAR,
"thread will execute parallel region with id %d in a team of "
"%d threads\n",
- newTaskDescr->ThreadId(), newTaskDescr->ThreadsInTeam());
+ (int)newTaskDescr->ThreadId(), (int)newTaskDescr->ThreadsInTeam());
if (RequiresDataSharing && threadId % WARPSIZE == 0) {
// Warp master innitializes data sharing environment.
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
index 82f513950b5..fbcbeab8eac 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -76,7 +76,7 @@ EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask,
else
*NumLanes = ConvergentSize;
ASSERT(LT_FUSSY, *NumLanes > 0, "bad thread request of %d threads",
- *NumLanes);
+ (int)*NumLanes);
// Set to true for lanes participating in the simd region.
bool isActive = false;
@@ -152,7 +152,7 @@ EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
else
NumThreads = ConvergentSize;
ASSERT(LT_FUSSY, NumThreads > 0, "bad thread request of %d threads",
- NumThreads);
+ (int)NumThreads);
// Set to true for workers participating in the parallel region.
bool isActive = false;
@@ -260,7 +260,7 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
}
ASSERT(LT_FUSSY, NumThreads > 0, "bad thread request of %d threads",
- NumThreads);
+ (int)NumThreads);
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
"only team master can create parallel");
@@ -307,7 +307,7 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
PRINT(LD_PAR,
"thread will execute parallel region with id %d in a team of "
"%d threads\n",
- newTaskDescr->ThreadId(), newTaskDescr->NThreads());
+ (int)newTaskDescr->ThreadId(), (int)newTaskDescr->NThreads());
isActive = true;
}
@@ -438,7 +438,7 @@ EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t tid,
EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t tid,
int32_t simd_limit) {
- PRINT(LD_IO, "call kmpc_push_simd_limit %d\n", simd_limit);
+ PRINT(LD_IO, "call kmpc_push_simd_limit %d\n", (int)simd_limit);
ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized.");
tid = GetLogicalThreadIdInBlock();
omptarget_nvptx_threadPrivateContext->SimdLimitForNextSimd(tid) = simd_limit;
@@ -449,12 +449,12 @@ EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t tid,
EXTERN void __kmpc_push_num_teams(kmp_Ident *loc, int32_t tid,
int32_t num_teams, int32_t thread_limit) {
- PRINT(LD_IO, "call kmpc_push_num_teams %d\n", num_teams);
+ PRINT(LD_IO, "call kmpc_push_num_teams %d\n", (int)num_teams);
ASSERT0(LT_FUSSY, FALSE,
"should never have anything with new teams on device");
}
EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t tid,
int proc_bind) {
- PRINT(LD_IO, "call kmpc_push_proc_bind %d\n", proc_bind);
+ PRINT(LD_IO, "call kmpc_push_proc_bind %d\n", (int)proc_bind);
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index a9e35a1b9fc..a05a6e016f9 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -76,12 +76,7 @@ EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) {
}
EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) {
- int lo, hi;
- asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
- hi = __SHFL_DOWN_SYNC(0xFFFFFFFF, hi, delta, size);
- lo = __SHFL_DOWN_SYNC(0xFFFFFFFF, lo, delta, size);
- asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
- return val;
+ return __SHFL_DOWN_SYNC(0xFFFFFFFFFFFFFFFFL, val, delta, size);
}
static INLINE void gpu_regular_warp_reduce(void *reduce_data,
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
index c5d2e91abc7..e2ea2d15faf 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -231,19 +231,20 @@ INLINE unsigned long PadBytes(unsigned long size,
{
// compute the necessary padding to satisfy alignment constraint
ASSERT(LT_FUSSY, (alignment & (alignment - 1)) == 0,
- "alignment %ld is not a power of 2\n", alignment);
+ "alignment %lu is not a power of 2\n", alignment);
return (~(unsigned long)size + 1) & (alignment - 1);
}
INLINE void *SafeMalloc(size_t size, const char *msg) // check if success
{
void *ptr = malloc(size);
- PRINT(LD_MEM, "malloc data of size %zu for %s: 0x%llx\n", size, msg, P64(ptr));
+ PRINT(LD_MEM, "malloc data of size %zu for %s: 0x%llx\n", size, msg,
+ (unsigned long long)ptr);
return ptr;
}
INLINE void *SafeFree(void *ptr, const char *msg) {
- PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", P64(ptr), msg);
+ PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
free(ptr);
return NULL;
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
index d4f2ada9604..7cdb7ffb12a 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -61,7 +61,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
PRINT(LD_SYNC,
"call kmpc_barrier with %d omp threads, sync parameter %d\n",
- numberOfActiveOMPThreads, threads);
+ (int)numberOfActiveOMPThreads, (int)threads);
// Barrier #1 is for synchronization among active threads.
named_sync(L1_BARRIER, threads);
}
@@ -89,7 +89,7 @@ EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
PRINT(LD_SYNC,
"call kmpc_barrier_simple_generic with %d omp threads, sync parameter "
"%d\n",
- numberOfActiveOMPThreads, threads);
+ (int)numberOfActiveOMPThreads, (int)threads);
// Barrier #1 is for synchronization among active threads.
named_sync(L1_BARRIER, threads);
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n");
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/task.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/task.cu
index c5006903b1a..2f47d4b2703 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/task.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/task.cu
@@ -39,14 +39,15 @@ EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
PRINT(LD_IO,
"call __kmpc_omp_task_alloc(size priv&struct %lld, shared %lld, "
"fct 0x%llx)\n",
- P64(sizeOfTaskInclPrivate), P64(sizeOfSharedTable), P64(taskSub));
+ (long long)sizeOfTaskInclPrivate, (long long)sizeOfSharedTable,
+ (unsigned long long)taskSub);
// want task+priv to be a multiple of 8 bytes
size_t padForTaskInclPriv = PadBytes(sizeOfTaskInclPrivate, sizeof(void *));
sizeOfTaskInclPrivate += padForTaskInclPriv;
size_t kmpSize = sizeOfTaskInclPrivate + sizeOfSharedTable;
ASSERT(LT_FUSSY, sizeof(omptarget_nvptx_TaskDescr) % sizeof(void *) == 0,
"need task descr of size %d to be a multiple of %d\n",
- sizeof(omptarget_nvptx_TaskDescr), sizeof(void *));
+ (int)sizeof(omptarget_nvptx_TaskDescr), (int)sizeof(void *));
size_t totSize = sizeof(omptarget_nvptx_TaskDescr) + kmpSize;
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
(omptarget_nvptx_ExplicitTaskDescr *)SafeMalloc(
@@ -63,7 +64,8 @@ EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
newKmpTaskDescr->sub = taskSub;
newKmpTaskDescr->destructors = NULL;
PRINT(LD_TASK, "return with task descr kmp: 0x%llx, omptarget-nvptx 0x%llx\n",
- P64(newKmpTaskDescr), P64(newExplicitTaskDescr));
+ (unsigned long long)newKmpTaskDescr,
+ (unsigned long long)newExplicitTaskDescr);
return newKmpTaskDescr;
}
@@ -102,10 +104,11 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
// 3. call sub
PRINT(LD_TASK, "call task sub 0x%llx(task descr 0x%llx)\n",
- P64(newKmpTaskDescr->sub), P64(newKmpTaskDescr));
+ (unsigned long long)newKmpTaskDescr->sub,
+ (unsigned long long)newKmpTaskDescr);
newKmpTaskDescr->sub(0, newKmpTaskDescr);
PRINT(LD_TASK, "return from call task sub 0x%llx()\n",
- P64(newKmpTaskDescr->sub));
+ (unsigned long long)newKmpTaskDescr->sub);
// 4. pop context
omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
@@ -118,7 +121,7 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr) {
PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n",
- P64(newKmpTaskDescr));
+ (unsigned long long)newKmpTaskDescr);
ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
"Runtime must be initialized.");
// 1. get explict task descr from kmp task descr
@@ -144,7 +147,7 @@ EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr) {
PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n",
- P64(newKmpTaskDescr));
+ (unsigned long long)newKmpTaskDescr);
ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
"Runtime must be initialized.");
// 1. get explict task descr from kmp task descr