aboutsummaryrefslogtreecommitdiff
path: root/libgomp/plugin
diff options
context:
space:
mode:
authorTobias Burnus <tburnus@baylibre.com>2024-05-29 15:14:38 +0200
committerTobias Burnus <tburnus@baylibre.com>2024-05-29 15:14:38 +0200
commit4ccb3366ade6ec9493f8ca20ab73b0da4b9816db (patch)
treea4f4a29116032e83903cb2352021873e7f5bfb2a /libgomp/plugin
parent19c491d1848a8410559247183597096778967edf (diff)
libgomp: Enable USM for some nvptx devices
A few high-end nvptx devices support the attribute CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS; for those, unified shared memory is supported in hardware. This patch enables support for those - if all installed nvptx devices have this feature (as the capabilities are per device type). This exposes a bug in gomp_copy_back_icvs as it did before use omp_get_mapped_ptr to find mapped variables, but that returns the unchanged pointer in cased of shared memory. But in this case, we have a few actually mapped pointers - like the ICV variables. Additionally, there was a mismatch with regards to '-1' for the device number as gomp_copy_back_icvs and omp_get_mapped_ptr count differently. Hence, do the lookup manually. include/ChangeLog: * cuda/cuda.h (CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS): Add. libgomp/ChangeLog: * libgomp.texi (nvptx): Update USM description. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_num_devices): Claim support when requesting USM and all devices support CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. * target.c (gomp_copy_back_icvs): Fix device ptr lookup. (gomp_target_init): Set GOMP_OFFLOAD_CAP_SHARED_MEM is the devices supports USM.
Diffstat (limited to 'libgomp/plugin')
-rw-r--r--libgomp/plugin/plugin-nvptx.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 5aad3448a8d..4cedc5390a3 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1201,8 +1201,23 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
if (num_devices > 0
&& ((omp_requires_mask
& ~(GOMP_REQUIRES_UNIFIED_ADDRESS
+ | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
| GOMP_REQUIRES_REVERSE_OFFLOAD)) != 0))
return -1;
+ /* Check whether host page access (direct or via migration) is supported;
+ if so, enable USM. Currently, capabilities is per device type, hence,
+ check all devices. */
+ if (num_devices > 0
+ && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))
+ for (int dev = 0; dev < num_devices; dev++)
+ {
+ int pi;
+ CUresult r;
+ r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
+ CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, dev);
+ if (r != CUDA_SUCCESS || pi == 0)
+ return -1;
+ }
return num_devices;
}