summaryrefslogtreecommitdiff
path: root/openmp/libomptarget
diff options
context:
space:
mode:
authorJonas Hahnfeld <hahnjo@hahnjo.de>2018-09-04 15:13:17 +0000
committerJonas Hahnfeld <hahnjo@hahnjo.de>2018-09-04 15:13:17 +0000
commit38a5131b330a63050bbf8f42097e7c63a8608c1c (patch)
tree56dc4bb5814d9e5d5d1ccfd083f28138e430dc0d /openmp/libomptarget
parent43b1ded799646e82bec840a3fa917fbd95801ab7 (diff)
[libomptarget][NVPTX] Drop dead code and data structures, NFCI.
* cg and HasCancel in WorkDescr were never read and can be removed. * This eliminates the last use of priv in ThreadPrivateContext. * CounterGroup is unused afterwards. * Remove duplicate external declares in omptarget-nvptx.cu that are already in the header omptarget-nvptx.h. Differential Revision: https://reviews.llvm.org/D51622
Diffstat (limited to 'openmp/libomptarget')
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/counter_group.h51
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h82
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/interface.h2
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu19
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h12
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h14
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/option.h7
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu4
8 files changed, 2 insertions, 189 deletions
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/counter_group.h b/openmp/libomptarget/deviceRTLs/nvptx/src/counter_group.h
deleted file mode 100644
index b183871e73a..00000000000
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/counter_group.h
+++ /dev/null
@@ -1,51 +0,0 @@
-//===------ counter_group.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.txt for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface to implement OpenMP loop scheduling
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _OMPTARGET_NVPTX_COUNTER_GROUP_H_
-#define _OMPTARGET_NVPTX_COUNTER_GROUP_H_
-
-#include "option.h"
-
-// counter group type for synchronizations
-class omptarget_nvptx_CounterGroup {
-public:
- // getters and setters
- INLINE Counter &Event() { return v_event; }
- INLINE volatile Counter &Start() { return v_start; }
- INLINE Counter &Init() { return v_init; }
-
- // Synchronization Interface
-
- INLINE void Clear(); // first time start=event
- INLINE void Reset(); // init = first
- INLINE void Init(Counter &priv); // priv = init
- INLINE Counter Next(); // just counts number of events
-
- // set priv to n, to be used in later waitOrRelease
- INLINE void Complete(Counter &priv, Counter n);
-
- // check priv and decide if we have to wait or can free the other warps
- INLINE void Release(Counter priv, Counter current_event_value);
- INLINE void WaitOrRelease(Counter priv, Counter current_event_value);
-
-private:
- Counter v_event; // counter of events (atomic)
-
- // volatile is needed to force loads to read from global
- // memory or L2 cache and see the write by the last master
- volatile Counter v_start; // signal when events registered are finished
-
- Counter v_init; // used to initialize local thread variables
-};
-
-#endif /* SRC_COUNTER_GROUP_H_ */
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h b/openmp/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h
deleted file mode 100644
index f34de3e46b8..00000000000
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//===----- counter_groupi.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.txt for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface implementation for OpenMP loop scheduling
-//
-//===----------------------------------------------------------------------===//
-
-#include "option.h"
-
-INLINE void omptarget_nvptx_CounterGroup::Clear() {
- PRINT0(LD_SYNCD, "clear counters\n")
- v_event = 0;
- v_start = 0;
- // v_init does not need to be reset (its value is dead)
-}
-
-INLINE void omptarget_nvptx_CounterGroup::Reset() {
- // done by master before entering parallel
- ASSERT(LT_FUSSY, v_event == v_start,
- "error, entry %lld !=start %lld at reset\n", P64(v_event),
- P64(v_start));
- v_init = v_start;
-}
-
-INLINE void omptarget_nvptx_CounterGroup::Init(Counter &priv) {
- PRINT(LD_SYNCD, "init priv counter 0x%llx with val %lld\n", P64(&priv),
- P64(v_start));
- priv = v_start;
-}
-
-// just counts number of events
-INLINE Counter omptarget_nvptx_CounterGroup::Next() {
- Counter oldVal = atomicAdd(&v_event, (Counter)1);
- PRINT(LD_SYNCD, "next event counter 0x%llx with val %lld->%lld\n",
- P64(&v_event), P64(oldVal), P64(oldVal + 1));
-
- return oldVal;
-}
-
-// set priv to n, to be used in later waitOrRelease
-INLINE void omptarget_nvptx_CounterGroup::Complete(Counter &priv, Counter n) {
- PRINT(LD_SYNCD, "complete priv counter 0x%llx with val %llu->%llu (+%llu)\n",
- P64(&priv), P64(priv), P64(priv + n), n);
- priv += n;
-}
-
-INLINE void omptarget_nvptx_CounterGroup::Release(Counter priv,
- Counter current_event_value) {
- if (priv - 1 == current_event_value) {
- PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n",
- P64(&v_start), P64(v_start), P64(priv));
- v_start = priv;
- }
-}
-
-// check priv and decide if we have to wait or can free the other warps
-INLINE void
-omptarget_nvptx_CounterGroup::WaitOrRelease(Counter priv,
- Counter current_event_value) {
- if (priv - 1 == current_event_value) {
- PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n",
- P64(&v_start), P64(v_start), P64(priv));
- v_start = priv;
- } else {
- PRINT(LD_SYNCD,
- "Start waiting while start counter 0x%llx with val %lld < %lld\n",
- P64(&v_start), P64(v_start), P64(priv));
- while (priv > v_start) {
- // IDLE LOOP
- // start is volatile: it will be re-loaded at each while loop
- }
- PRINT(LD_SYNCD,
- "Done waiting as start counter 0x%llx with val %lld >= %lld\n",
- P64(&v_start), P64(v_start), P64(priv));
- }
-}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
index d958a074ab0..c3f9f702295 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
@@ -20,6 +20,8 @@
#ifndef _INTERFACES_H_
#define _INTERFACES_H_
+#include "option.h"
+
////////////////////////////////////////////////////////////////////////////////
// OpenMP interface
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
index 0f7deae51e3..fb28c4c589f 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
@@ -21,25 +21,10 @@ extern __device__
omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
omptarget_nvptx_device_State[MAX_SM];
-extern __device__ __shared__
- omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
-
extern __device__ omptarget_nvptx_Queue<
omptarget_nvptx_SimpleThreadPrivateContext, OMP_STATE_COUNT>
omptarget_nvptx_device_simpleState[MAX_SM];
-extern __device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext
- *omptarget_nvptx_simpleThreadPrivateContext;
-
-//
-// The team master sets the outlined function and its arguments in these
-// variables to communicate with the workers. Since they are in shared memory,
-// there is one copy of these variables for each kernel, instance, and team.
-//
-extern volatile __device__ __shared__ omptarget_nvptx_WorkFn
- omptarget_nvptx_workFn;
-extern __device__ __shared__ uint32_t execution_param;
-
////////////////////////////////////////////////////////////////////////////////
// init entry points
////////////////////////////////////////////////////////////////////////////////
@@ -146,8 +131,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
// init team context
currTeamDescr.InitTeamDescr();
- // init counters (copy start to init)
- workDescr.CounterGroup().Reset();
}
__syncthreads();
@@ -168,8 +151,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
newTaskDescr);
// init thread private from init value
- workDescr.CounterGroup().Init(
- omptarget_nvptx_threadPrivateContext->Priv(threadId));
PRINT(LD_PAR,
"thread will execute parallel region with id %d in a team of "
"%d threads\n",
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 7058d6050f4..f6e35a4b120 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -26,7 +26,6 @@
#include <math.h>
// local includes
-#include "counter_group.h"
#include "debug.h" // debug
#include "interface.h" // interfaces with omp, compiler, and user
#include "option.h" // choices we have
@@ -242,15 +241,10 @@ class omptarget_nvptx_WorkDescr {
public:
// access to data
- INLINE omptarget_nvptx_CounterGroup &CounterGroup() { return cg; }
INLINE omptarget_nvptx_TaskDescr *WorkTaskDescr() { return &masterTaskICV; }
- // init
- INLINE void InitWorkDescr();
private:
- omptarget_nvptx_CounterGroup cg; // for barrier (no other needed)
omptarget_nvptx_TaskDescr masterTaskICV;
- bool hasCancel;
};
////////////////////////////////////////////////////////////////////////////////
@@ -347,9 +341,6 @@ public:
INLINE uint16_t &SimdLimitForNextSimd(int tid) {
return nextRegion.slim[tid];
}
- // sync
- INLINE Counter &Priv(int tid) { return priv[tid]; }
- INLINE void IncrementPriv(int tid, Counter val) { priv[tid] += val; }
// schedule (for dispatch)
INLINE kmp_sched_t &ScheduleType(int tid) { return schedule[tid]; }
INLINE int64_t &Chunk(int tid) { return chunk[tid]; }
@@ -377,8 +368,6 @@ private:
// simd limit
uint16_t slim[MAX_THREADS_PER_TEAM];
} nextRegion;
- // sync
- Counter priv[MAX_THREADS_PER_TEAM];
// schedule (for dispatch)
kmp_sched_t schedule[MAX_THREADS_PER_TEAM]; // remember schedule type for #for
int64_t chunk[MAX_THREADS_PER_TEAM];
@@ -469,7 +458,6 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId);
// inlined implementation
////////////////////////////////////////////////////////////////////////////////
-#include "counter_groupi.h"
#include "omptarget-nvptxi.h"
#include "supporti.h"
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
index 086f4c5d2c9..1cca8201b35 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
@@ -168,31 +168,17 @@ omptarget_nvptx_ThreadPrivateContext::InitThreadPrivateContext(int tid) {
topTaskDescr[tid] = NULL;
// no num threads value has been pushed
nextRegion.tnum[tid] = 0;
- // priv counter init to zero
- priv[tid] = 0;
// the following don't need to be init here; they are init when using dyn
// sched
// current_Event, events_Number, chunk, num_Iterations, schedule
}
////////////////////////////////////////////////////////////////////////////////
-// Work Descriptor
-////////////////////////////////////////////////////////////////////////////////
-
-INLINE void omptarget_nvptx_WorkDescr::InitWorkDescr() {
- cg.Clear(); // start and stop to zero too
- // threadsInParallelTeam does not need to be init (done in start parallel)
- hasCancel = FALSE;
-}
-
-////////////////////////////////////////////////////////////////////////////////
// Team Descriptor
////////////////////////////////////////////////////////////////////////////////
INLINE void omptarget_nvptx_TeamDescr::InitTeamDescr() {
levelZeroTaskDescr.InitLevelZeroTaskDescr();
- workDescrForActiveParallel.InitWorkDescr();
- // omp_init_lock(criticalLock);
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h b/openmp/libomptarget/deviceRTLs/nvptx/src/option.h
index 43172ad45d0..791d6f3917f 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/option.h
@@ -47,13 +47,6 @@
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
-// data options
-////////////////////////////////////////////////////////////////////////////////
-
-// decide if counters are 32 or 64 bit
-#define Counter unsigned long long
-
-////////////////////////////////////////////////////////////////////////////////
// misc options (by def everythig here is device)
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
index 6157aa20794..f0ba41bd18e 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -306,8 +306,6 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
workDescr.WorkTaskDescr()->CopyToWorkDescr(currTaskDescr,
CudaThreadsForParallel / NumLanes);
- // init counters (copy start to init)
- workDescr.CounterGroup().Reset();
}
// All workers call this function. Deactivate those not needed.
@@ -345,8 +343,6 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId,
newTaskDescr);
// init private from int value
- workDescr.CounterGroup().Init(
- omptarget_nvptx_threadPrivateContext->Priv(threadId));
PRINT(LD_PAR,
"thread will execute parallel region with id %d in a team of "
"%d threads\n",