diff options
author | Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> | 2019-06-04 15:05:53 +0000 |
---|---|---|
committer | Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> | 2019-06-04 15:05:53 +0000 |
commit | dba741d61d3b460f090810bf14ec2f6cd5ecc32d (patch) | |
tree | e56155421cec60ea8dcc12a8d4ea426b07d37ad2 /libomptarget | |
parent | 6973da5c32719dead93113061a40fccbc7e8f662 (diff) |
[OpenMP][libomptarget] Enable usage of unified memory for declare target link variables
Summary: This patch enables the usage of a host variable on the device for declare target link variables when unified memory is available.
Reviewers: ABataev, caomhin, grokos
Reviewed By: grokos
Subscribers: Hahnfeld, guansong, jdoerfert, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D60884
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@362505 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget')
-rw-r--r-- | libomptarget/include/omptargetplugin.h | 3 | ||||
-rw-r--r-- | libomptarget/plugins/cuda/src/rtl.cpp | 23 | ||||
-rw-r--r-- | libomptarget/plugins/exports | 1 | ||||
-rw-r--r-- | libomptarget/src/device.cpp | 3 | ||||
-rw-r--r-- | libomptarget/src/rtl.cpp | 4 | ||||
-rw-r--r-- | libomptarget/src/rtl.h | 7 |
6 files changed, 39 insertions, 2 deletions
diff --git a/libomptarget/include/omptargetplugin.h b/libomptarget/include/omptargetplugin.h index 2876bfb..e03416c 100644 --- a/libomptarget/include/omptargetplugin.h +++ b/libomptarget/include/omptargetplugin.h @@ -31,6 +31,9 @@ int32_t __tgt_rtl_number_of_devices(void); // having to load the library, which can be expensive. int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image); +// Initialize the requires flags for the device. +int64_t __tgt_rtl_init_requires(int64_t RequiresFlags); + // Initialize the specified device. In case of success return 0; otherwise // return an error code. int32_t __tgt_rtl_init_device(int32_t ID); diff --git a/libomptarget/plugins/cuda/src/rtl.cpp b/libomptarget/plugins/cuda/src/rtl.cpp index fc0c1ec..844afa1 100644 --- a/libomptarget/plugins/cuda/src/rtl.cpp +++ b/libomptarget/plugins/cuda/src/rtl.cpp @@ -111,6 +111,9 @@ public: int EnvNumTeams; int EnvTeamLimit; + // OpenMP Requires Flags + int64_t RequiresFlags; + //static int EnvNumThreads; static const int HardTeamLimit = 1<<16; // 64k static const int HardThreadLimit = 1024; @@ -227,6 +230,9 @@ public: } else { EnvNumTeams = -1; } + + // Default state. + RequiresFlags = OMP_REQ_UNDEFINED; } ~RTLDeviceInfoTy() { @@ -264,6 +270,12 @@ int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { int32_t __tgt_rtl_number_of_devices() { return DeviceInfo.NumberOfDevices; } +int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) { + DP("Init requires flags to %ld\n", RequiresFlags); + DeviceInfo.RequiresFlags = RequiresFlags; + return RequiresFlags; +} + int32_t __tgt_rtl_init_device(int32_t device_id) { CUdevice cuDevice; @@ -436,6 +448,17 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, DPxPTR(e - HostBegin), e->name, DPxPTR(cuptr)); entry.addr = (void *)cuptr; + if (DeviceInfo.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + e->flags & OMP_DECLARE_TARGET_LINK) { + // If unified memory is present any target link variables + // can access host addresses directly. There is no longer a + // need for device copies. + cuMemcpyHtoD(cuptr, e->addr, sizeof(void *)); + DP("Copy linked variable host address (" DPxMOD ")" + "to device address (" DPxMOD ")\n", + DPxPTR(*((void**)e->addr)), DPxPTR(cuptr)); + } + DeviceInfo.addOffloadEntry(device_id, entry); continue; diff --git a/libomptarget/plugins/exports b/libomptarget/plugins/exports index 3f9f7d4..a14bedf 100644 --- a/libomptarget/plugins/exports +++ b/libomptarget/plugins/exports @@ -2,6 +2,7 @@ VERS1.0 { global: __tgt_rtl_is_valid_binary; __tgt_rtl_number_of_devices; + __tgt_rtl_init_requires; __tgt_rtl_init_device; __tgt_rtl_load_binary; __tgt_rtl_data_alloc; diff --git a/libomptarget/src/device.cpp b/libomptarget/src/device.cpp index a946b92..5ecba57 100644 --- a/libomptarget/src/device.cpp +++ b/libomptarget/src/device.cpp @@ -275,6 +275,9 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) { /// Init device, should not be called directly. void DeviceTy::init() { + // Make call to init_requires if it exists for this plugin. + if (RTL->init_requires) + RTL->init_requires(RTLRequiresFlags); int32_t rc = RTL->init_device(RTLDeviceID); if (rc == OFFLOAD_SUCCESS) { IsInit = true; diff --git a/libomptarget/src/rtl.cpp b/libomptarget/src/rtl.cpp index 770ae36..4eb7ab7 100644 --- a/libomptarget/src/rtl.cpp +++ b/libomptarget/src/rtl.cpp @@ -107,6 +107,10 @@ void RTLsTy::LoadRTLs() { dynlib_handle, "__tgt_rtl_run_target_team_region"))) continue; + // Optional functions + *((void**) &R.init_requires) = dlsym( + dynlib_handle, "__tgt_rtl_init_requires"); + // No devices are supported by this RTL? if (!(R.NumberOfDevices = R.number_of_devices())) { DP("No devices supported in this RTL\n"); diff --git a/libomptarget/src/rtl.h b/libomptarget/src/rtl.h index 381f23e..8148e81 100644 --- a/libomptarget/src/rtl.h +++ b/libomptarget/src/rtl.h @@ -36,6 +36,7 @@ struct RTLInfoTy { int32_t); typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *, int32_t, int32_t, int32_t, uint64_t); + typedef int64_t(init_requires_ty)(int64_t); int32_t Idx; // RTL index, index is the number of devices // of other RTLs that were registered before, @@ -60,6 +61,7 @@ struct RTLInfoTy { data_delete_ty *data_delete; run_region_ty *run_region; run_team_region_ty *run_team_region; + init_requires_ty *init_requires; // Are there images associated with this RTL. bool isUsed; @@ -78,8 +80,8 @@ struct RTLInfoTy { #endif is_valid_binary(0), number_of_devices(0), init_device(0), load_binary(0), data_alloc(0), data_submit(0), data_retrieve(0), - data_delete(0), run_region(0), run_team_region(0), isUsed(false), - Mtx() {} + data_delete(0), run_region(0), run_team_region(0), + init_requires(0), isUsed(false), Mtx() {} RTLInfoTy(const RTLInfoTy &r) : Mtx() { Idx = r.Idx; @@ -98,6 +100,7 @@ struct RTLInfoTy { data_delete = r.data_delete; run_region = r.run_region; run_team_region = r.run_team_region; + init_requires = r.init_requires; isUsed = r.isUsed; } }; |