aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorChung-Lin Tang <cltang@codesourcery.com>2021-08-05 23:29:03 +0800
committerChung-Lin Tang <cltang@codesourcery.com>2021-08-05 23:29:03 +0800
commit0bac793ed6bad2c0c13cd1e93a1aa5808467afc8 (patch)
tree4f956146f4b129e344b00fa47e0326b831ef9928 /libgomp
parent8dec72aeb54e98643c0fb3d53768cdb96cf1342a (diff)
openmp: Implement omp_get_device_num routine
This patch implements the omp_get_device_num library routine, specified in OpenMP 5.0. GOMP_DEVICE_NUM_VAR is a macro symbol which defines name of a "device number" variable, is defined on the device-side libgomp, has it's address returned to host-side libgomp during device initialization, and the host libgomp then sets its value to the designated device number. libgomp/ChangeLog: * icv-device.c (omp_get_device_num): New API function, host side. * fortran.c (omp_get_device_num_): New interface function. * libgomp-plugin.h (GOMP_DEVICE_NUM_VAR): Define macro symbol. * libgomp.map (OMP_5.0.2): New version space with omp_get_device_num, omp_get_device_num_. * libgomp.texi (omp_get_device_num): Add documentation for new API function. * omp.h.in (omp_get_device_num): Add declaration. * omp_lib.f90.in (omp_get_device_num): Likewise. * omp_lib.h.in (omp_get_device_num): Likewise. * target.c (gomp_load_image_to_device): If additional entry for device number exists at end of returned entries from 'load_image_func' hook, copy the assigned device number over to the device variable. * config/gcn/icv-device.c (GOMP_DEVICE_NUM_VAR): Define static global. (omp_get_device_num): New API function, device side. * plugin/plugin-gcn.c ("symcat.h"): Add include. (GOMP_OFFLOAD_load_image): Add addresses of device GOMP_DEVICE_NUM_VAR at end of returned 'target_table' entries. * config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Define static global. (omp_get_device_num): New API function, device side. * plugin/plugin-nvptx.c ("symcat.h"): Add include. (GOMP_OFFLOAD_load_image): Add addresses of device GOMP_DEVICE_NUM_VAR at end of returned 'target_table' entries. * testsuite/lib/libgomp.exp (check_effective_target_offload_target_intelmic): New function for testing for intelmic offloading. * testsuite/libgomp.c-c++-common/target-45.c: New test. * testsuite/libgomp.fortran/target10.f90: New test.
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/config/gcn/icv-device.c11
-rw-r--r--libgomp/config/nvptx/icv-device.c11
-rw-r--r--libgomp/fortran.c7
-rw-r--r--libgomp/icv-device.c9
-rw-r--r--libgomp/libgomp-plugin.h6
-rw-r--r--libgomp/libgomp.map8
-rw-r--r--libgomp/libgomp.texi29
-rw-r--r--libgomp/omp.h.in1
-rw-r--r--libgomp/omp_lib.f90.in6
-rw-r--r--libgomp/omp_lib.h.in3
-rw-r--r--libgomp/plugin/plugin-gcn.c38
-rw-r--r--libgomp/plugin/plugin-nvptx.c25
-rw-r--r--libgomp/target.c36
-rw-r--r--libgomp/testsuite/lib/libgomp.exp5
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/target-45.c30
-rw-r--r--libgomp/testsuite/libgomp.fortran/target10.f9020
16 files changed, 238 insertions, 7 deletions
diff --git a/libgomp/config/gcn/icv-device.c b/libgomp/config/gcn/icv-device.c
index 72d4f7cff74..34e0f8346f2 100644
--- a/libgomp/config/gcn/icv-device.c
+++ b/libgomp/config/gcn/icv-device.c
@@ -70,6 +70,16 @@ omp_is_initial_device (void)
return 0;
}
+/* This is set to the device number of current GPU during device initialization,
+ when the offload image containing this libgomp portion is loaded. */
+static volatile int GOMP_DEVICE_NUM_VAR;
+
+int
+omp_get_device_num (void)
+{
+ return GOMP_DEVICE_NUM_VAR;
+}
+
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_initial_device)
@@ -77,3 +87,4 @@ ialias (omp_get_num_devices)
ialias (omp_get_num_teams)
ialias (omp_get_team_num)
ialias (omp_is_initial_device)
+ialias (omp_get_device_num)
diff --git a/libgomp/config/nvptx/icv-device.c b/libgomp/config/nvptx/icv-device.c
index 3b96890f338..b63149d0c34 100644
--- a/libgomp/config/nvptx/icv-device.c
+++ b/libgomp/config/nvptx/icv-device.c
@@ -58,8 +58,19 @@ omp_is_initial_device (void)
return 0;
}
+/* This is set to the device number of current GPU during device initialization,
+ when the offload image containing this libgomp portion is loaded. */
+static volatile int GOMP_DEVICE_NUM_VAR;
+
+int
+omp_get_device_num (void)
+{
+ return GOMP_DEVICE_NUM_VAR;
+}
+
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_initial_device)
ialias (omp_get_num_devices)
ialias (omp_is_initial_device)
+ialias (omp_get_device_num)
diff --git a/libgomp/fortran.c b/libgomp/fortran.c
index e042702ac91..07f97656e51 100644
--- a/libgomp/fortran.c
+++ b/libgomp/fortran.c
@@ -83,6 +83,7 @@ ialias_redirect (omp_get_partition_place_nums)
ialias_redirect (omp_set_default_device)
ialias_redirect (omp_get_default_device)
ialias_redirect (omp_get_num_devices)
+ialias_redirect (omp_get_device_num)
ialias_redirect (omp_get_num_teams)
ialias_redirect (omp_get_team_num)
ialias_redirect (omp_is_initial_device)
@@ -600,6 +601,12 @@ omp_get_initial_device_ (void)
}
int32_t
+omp_get_device_num_ (void)
+{
+ return omp_get_device_num ();
+}
+
+int32_t
omp_get_max_task_priority_ (void)
{
return omp_get_max_task_priority ();
diff --git a/libgomp/icv-device.c b/libgomp/icv-device.c
index c1bedf46647..f11bdfa85c4 100644
--- a/libgomp/icv-device.c
+++ b/libgomp/icv-device.c
@@ -61,8 +61,17 @@ omp_is_initial_device (void)
return 1;
}
+int
+omp_get_device_num (void)
+{
+ /* By specification, this is equivalent to omp_get_initial_device
+ on the host. */
+ return omp_get_initial_device ();
+}
+
ialias (omp_set_default_device)
ialias (omp_get_default_device)
ialias (omp_get_initial_device)
ialias (omp_get_num_devices)
ialias (omp_is_initial_device)
+ialias (omp_get_device_num)
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 62645ce9954..cf24a2bee41 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -102,6 +102,12 @@ struct addr_pair
uintptr_t end;
};
+/* This symbol is to name a target side variable that holds the designated
+ 'device number' of the target device. The symbol needs to be available to
+ libgomp code and the offload plugin (which in the latter case must be
+ stringified). */
+#define GOMP_DEVICE_NUM_VAR __gomp_device_num
+
/* Miscellaneous functions. */
extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc));
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 69aa69562b8..cc44885cba9 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -199,12 +199,18 @@ OMP_5.0.1 {
omp_fulfill_event_;
} OMP_5.0;
+OMP_5.0.2 {
+ global:
+ omp_get_device_num;
+ omp_get_device_num_;
+} OMP_5.0.1;
+
OMP_5.1 {
global:
omp_display_env;
omp_display_env_;
omp_display_env_8_;
-} OMP_5.0.1;
+} OMP_5.0.2;
GOMP_1.0 {
global:
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 2c1f1b5968b..fc9e708a8d2 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -165,6 +165,7 @@ linkage, and do not throw exceptions.
* omp_get_ancestor_thread_num:: Ancestor thread ID
* omp_get_cancellation:: Whether cancellation support is enabled
* omp_get_default_device:: Get the default device for target regions
+* omp_get_device_num:: Get device that current thread is running on
* omp_get_dynamic:: Dynamic teams setting
* omp_get_initial_device:: Device number of host device
* omp_get_level:: Number of parallel regions
@@ -385,6 +386,34 @@ For OpenMP 5.1, this must be equal to the value returned by the
+@node omp_get_device_num
+@section @code{omp_get_device_num} -- Return device number of current device
+@table @asis
+@item @emph{Description}:
+This function returns a device number that represents the device that the
+current thread is executing on. For OpenMP 5.0, this must be equal to the
+value returned by the @code{omp_get_initial_device} function when called
+from the host.
+
+@item @emph{C/C++}
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{int omp_get_device_num(void);}
+@end multitable
+
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{integer function omp_get_device_num()}
+@end multitable
+
+@item @emph{See also}:
+@ref{omp_get_initial_device}
+
+@item @emph{Reference}:
+@uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.2.37.
+@end table
+
+
+
@node omp_get_level
@section @code{omp_get_level} -- Obtain the current nesting level
@table @asis
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index c93db968d2e..da34a9d98a6 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -243,6 +243,7 @@ extern void omp_get_partition_place_nums (int *) __GOMP_NOTHROW;
extern void omp_set_default_device (int) __GOMP_NOTHROW;
extern int omp_get_default_device (void) __GOMP_NOTHROW;
extern int omp_get_num_devices (void) __GOMP_NOTHROW;
+extern int omp_get_device_num (void) __GOMP_NOTHROW;
extern int omp_get_num_teams (void) __GOMP_NOTHROW;
extern int omp_get_team_num (void) __GOMP_NOTHROW;
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index 5fc6587e49e..d7e804f4fd5 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -551,6 +551,12 @@
end interface
interface
+ function omp_get_device_num ()
+ integer (4) :: omp_get_device_num
+ end function omp_get_device_num
+ end interface
+
+ interface
function omp_get_max_task_priority ()
integer (4) :: omp_get_max_task_priority
end function omp_get_max_task_priority
diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in
index 9873cea9ac1..20c32645e3c 100644
--- a/libgomp/omp_lib.h.in
+++ b/libgomp/omp_lib.h.in
@@ -244,6 +244,9 @@
external omp_get_initial_device
integer(4) omp_get_initial_device
+ external omp_get_device_num
+ integer(4) omp_get_device_num
+
external omp_get_max_task_priority
integer(4) omp_get_max_task_priority
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 2548614a2e5..f26d7361106 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -29,6 +29,7 @@
/* {{{ Includes and defines */
#include "config.h"
+#include "symcat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -3305,6 +3306,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
struct kernel_info *kernel;
int kernel_count = image_desc->kernel_count;
unsigned var_count = image_desc->global_variable_count;
+ int other_count = 1;
agent = get_agent_info (ord);
if (!agent)
@@ -3321,7 +3323,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
GCN_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
GCN_DEBUG ("Encountered %u global variables in an image\n", var_count);
- pair = GOMP_PLUGIN_malloc ((kernel_count + var_count - 2)
+ GCN_DEBUG ("Expect %d other variables in an image\n", other_count);
+ pair = GOMP_PLUGIN_malloc ((kernel_count + var_count + other_count - 2)
* sizeof (struct addr_pair));
*target_table = pair;
module = (struct module_info *)
@@ -3396,6 +3399,37 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
pair++;
}
+ GCN_DEBUG ("Looking for variable %s\n", STRINGX (GOMP_DEVICE_NUM_VAR));
+
+ hsa_status_t status;
+ hsa_executable_symbol_t var_symbol;
+ status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ STRINGX (GOMP_DEVICE_NUM_VAR),
+ agent->id, 0, &var_symbol);
+ if (status == HSA_STATUS_SUCCESS)
+ {
+ uint64_t device_num_varptr;
+ uint32_t device_num_varsize;
+
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
+ &device_num_varptr);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable from its symbol", status);
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
+ &device_num_varsize);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable size from its symbol", status);
+
+ pair->start = device_num_varptr;
+ pair->end = device_num_varptr + device_num_varsize;
+ }
+ else
+ /* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */
+ pair->start = pair->end = 0;
+ pair++;
+
/* Ensure that constructors are run first. */
struct GOMP_kernel_launch_attributes kla =
{ 3,
@@ -3418,7 +3452,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
if (module->fini_array_func)
kernel_count--;
- return kernel_count + var_count;
+ return kernel_count + var_count + other_count;
}
/* Unload GCN object-code module described by struct gcn_image_desc in
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 1215212d501..0f16e1cf00d 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -34,6 +34,7 @@
#define _GNU_SOURCE
#include "openacc.h"
#include "config.h"
+#include "symcat.h"
#include "libgomp-plugin.h"
#include "oacc-plugin.h"
#include "gomp-constants.h"
@@ -1265,7 +1266,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
CUmodule module;
const char *const *var_names;
const struct targ_fn_launch *fn_descs;
- unsigned int fn_entries, var_entries, i, j;
+ unsigned int fn_entries, var_entries, other_entries, i, j;
struct targ_fn_descriptor *targ_fns;
struct addr_pair *targ_tbl;
const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data;
@@ -1295,8 +1296,11 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
fn_entries = img_header->fn_num;
fn_descs = img_header->fn_descs;
+ /* Currently, the only other entry kind is 'device number'. */
+ other_entries = 1;
+
targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
- * (fn_entries + var_entries));
+ * (fn_entries + var_entries + other_entries));
targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor)
* fn_entries);
@@ -1345,9 +1349,24 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
targ_tbl->end = targ_tbl->start + bytes;
}
+ CUdeviceptr device_num_varptr;
+ size_t device_num_varsize;
+ CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &device_num_varptr,
+ &device_num_varsize, module,
+ STRINGX (GOMP_DEVICE_NUM_VAR));
+ if (r == CUDA_SUCCESS)
+ {
+ targ_tbl->start = (uintptr_t) device_num_varptr;
+ targ_tbl->end = (uintptr_t) (device_num_varptr + device_num_varsize);
+ }
+ else
+ /* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */
+ targ_tbl->start = targ_tbl->end = 0;
+ targ_tbl++;
+
nvptx_set_clocktick (module, dev);
- return fn_entries + var_entries;
+ return fn_entries + var_entries + other_entries;
}
/* Unload the program described by TARGET_DATA. DEV_DATA is the
diff --git a/libgomp/target.c b/libgomp/target.c
index 453b3210e40..67fcf41cc2e 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1974,6 +1974,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
int num_funcs = host_funcs_end - host_func_table;
int num_vars = (host_vars_end - host_var_table) / 2;
+ /* Others currently is only 'device_num' */
+ int num_others = 1;
+
/* Load image to device and get target addresses for the image. */
struct addr_pair *target_table = NULL;
int i, num_target_entries;
@@ -1982,7 +1985,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
= devicep->load_image_func (devicep->target_id, version,
target_data, &target_table);
- if (num_target_entries != num_funcs + num_vars)
+ if (num_target_entries != num_funcs + num_vars
+ /* Others (device_num) are included as trailing entries in pair list. */
+ && num_target_entries != num_funcs + num_vars + num_others)
{
gomp_mutex_unlock (&devicep->lock);
if (is_register_lock)
@@ -2054,6 +2059,35 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
array++;
}
+ /* Last entry is for the on-device 'device_num' variable. Tolerate case
+ where plugin does not return this entry. */
+ if (num_funcs + num_vars < num_target_entries)
+ {
+ struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
+ /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
+ was found in this image. */
+ if (device_num_var->start != 0)
+ {
+ /* The index of the devicep within devices[] is regarded as its
+ 'device number', which is different from the per-device type
+ devicep->target_id. */
+ int device_num_val = (int) (devicep - &devices[0]);
+ if (device_num_var->end - device_num_var->start != sizeof (int))
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ if (is_register_lock)
+ gomp_mutex_unlock (&register_lock);
+ gomp_fatal ("offload plugin managed 'device_num' not of expected "
+ "format");
+ }
+
+ /* Copy device_num value to place on device memory, hereby actually
+ designating its device number into effect. */
+ gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
+ &device_num_val, sizeof (int), false, NULL);
+ }
+ }
+
free (target_table);
}
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index a2050151e84..ba8a73275c5 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -374,6 +374,11 @@ proc check_effective_target_offload_target_amdgcn { } {
return [libgomp_check_effective_target_offload_target "amdgcn"]
}
+# Return 1 if compiling for offload target intelmic
+proc check_effective_target_offload_target_intelmic { } {
+ return [libgomp_check_effective_target_offload_target "*-intelmic"]
+}
+
# Return 1 if offload device is available.
proc check_effective_target_offload_device { } {
return [check_runtime_nocache offload_device_available_ {
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-45.c b/libgomp/testsuite/libgomp.c-c++-common/target-45.c
new file mode 100644
index 00000000000..ec0d202e51c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-45.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target { ! offload_target_intelmic } } } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int main (void)
+{
+
+ int host_device_num = omp_get_device_num ();
+
+ if (host_device_num != omp_get_initial_device ())
+ abort ();
+
+ int device_num;
+ int initial_device;
+
+ #pragma omp target map(from: device_num, initial_device)
+ {
+ initial_device = omp_is_initial_device ();
+ device_num = omp_get_device_num ();
+ }
+
+ if (initial_device && host_device_num != device_num)
+ abort ();
+
+ if (!initial_device && host_device_num == device_num)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/target10.f90 b/libgomp/testsuite/libgomp.fortran/target10.f90
new file mode 100644
index 00000000000..0b939ad7a0d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/target10.f90
@@ -0,0 +1,20 @@
+! { dg-do run { target { ! offload_target_intelmic } } }
+
+program main
+ use omp_lib
+ implicit none
+ integer :: device_num, host_device_num
+ logical :: initial_device
+
+ host_device_num = omp_get_device_num ()
+ if (host_device_num .ne. omp_get_initial_device ()) stop 1
+
+ !$omp target map(from: device_num, initial_device)
+ initial_device = omp_is_initial_device ()
+ device_num = omp_get_device_num ()
+ !$omp end target
+
+ if (initial_device .and. (host_device_num .ne. device_num)) stop 2
+ if ((.not. initial_device) .and. (host_device_num .eq. device_num)) stop 3
+
+end program main