aboutsummaryrefslogtreecommitdiff
path: root/libgomp/plugin
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2023-01-30 14:43:00 +0000
committerAndrew Stubbs <ams@codesourcery.com>2023-12-06 16:48:57 +0000
commite7d6c277fa28c0b9b621d23c471e0388d2912644 (patch)
tree3ef9390ef49f8deefa281fd7ad2a145ad85254a6 /libgomp/plugin
parente9a19ead498fcc89186b724c6e76854f7751a89b (diff)
amdgcn, libgomp: low-latency allocator
This implements the OpenMP low-latency memory allocator for AMD GCN using the small per-team LDS memory (Local Data Store). Since addresses can now refer to LDS space, the "Global" address space is no-longer compatible. This patch therefore switches the backend to use entirely "Flat" addressing (which supports both memories). A future patch will re-enable "global" instructions for cases where it is known to be safe to do so. gcc/ChangeLog: * config/gcn/gcn-builtins.def (DISPATCH_PTR): New built-in. * config/gcn/gcn.cc (gcn_init_machine_status): Disable global addressing. (gcn_expand_builtin_1): Implement GCN_BUILTIN_DISPATCH_PTR. libgomp/ChangeLog: * config/gcn/libgomp-gcn.h (TEAM_ARENA_START): Move to here. (TEAM_ARENA_FREE): Likewise. (TEAM_ARENA_END): Likewise. (GCN_LOWLAT_HEAP): New. * config/gcn/team.c (LITTLEENDIAN_CPU): New, and import hsa.h. (__gcn_lowlat_init): New prototype. (gomp_gcn_enter_kernel): Initialize the low-latency heap. * libgomp.h (TEAM_ARENA_START): Move to libgomp.h. (TEAM_ARENA_FREE): Likewise. (TEAM_ARENA_END): Likewise. * plugin/plugin-gcn.c (lowlat_size): New variable. (print_kernel_dispatch): Label the group_segment_size purpose. (init_environment_variables): Read GOMP_GCN_LOWLAT_POOL. (create_kernel_dispatch): Pass low-latency head allocation to kernel. (run_kernel): Use shadow; don't assume values. * testsuite/libgomp.c/omp_alloc-traits.c: Enable for amdgcn. * config/gcn/allocator.c: New file. * libgomp.texi: Document low-latency implementation details.
Diffstat (limited to 'libgomp/plugin')
-rw-r--r--libgomp/plugin/plugin-gcn.c35
1 files changed, 29 insertions, 6 deletions
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 8aabbd99881..7f8178c78b7 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -550,6 +550,7 @@ static size_t gcn_kernel_heap_size = DEFAULT_GCN_HEAP_SIZE;
static int team_arena_size = DEFAULT_TEAM_ARENA_SIZE;
static int stack_size = DEFAULT_GCN_STACK_SIZE;
+static int lowlat_size = -1;
/* Flag to decide whether print to stderr information about what is going on.
Set in init_debug depending on environment variables. */
@@ -1016,8 +1017,8 @@ print_kernel_dispatch (struct kernel_dispatch *dispatch, unsigned indent)
fprintf (stderr, "%*sobject: %lu\n", indent, "", dispatch->object);
fprintf (stderr, "%*sprivate_segment_size: %u\n", indent, "",
dispatch->private_segment_size);
- fprintf (stderr, "%*sgroup_segment_size: %u\n", indent, "",
- dispatch->group_segment_size);
+ fprintf (stderr, "%*sgroup_segment_size: %u (low-latency pool)\n", indent,
+ "", dispatch->group_segment_size);
fprintf (stderr, "\n");
}
@@ -1088,6 +1089,10 @@ init_environment_variables (void)
if (tmp)
stack_size = tmp;;
}
+
+ const char *lowlat = secure_getenv ("GOMP_GCN_LOWLAT_POOL");
+ if (lowlat)
+ lowlat_size = atoi (lowlat);
}
/* Return malloc'd string with name of SYMBOL. */
@@ -1930,7 +1935,25 @@ create_kernel_dispatch (struct kernel_info *kernel, int num_teams,
shadow->signal = sync_signal.handle;
shadow->private_segment_size = kernel->private_segment_size;
- shadow->group_segment_size = kernel->group_segment_size;
+
+ if (lowlat_size < 0)
+ {
+ /* Divide the LDS between the number of running teams.
+ Allocate not less than is defined in the kernel metadata. */
+ int teams_per_cu = num_teams / get_cu_count (agent);
+ int LDS_per_team = (teams_per_cu ? 65536 / teams_per_cu : 65536);
+ shadow->group_segment_size
+ = (kernel->group_segment_size > LDS_per_team
+ ? kernel->group_segment_size
+ : LDS_per_team);;
+ }
+ else if (lowlat_size < GCN_LOWLAT_HEAP+8)
+ /* Ensure that there's space for the OpenMP libgomp data. */
+ shadow->group_segment_size = GCN_LOWLAT_HEAP+8;
+ else
+ shadow->group_segment_size = (lowlat_size > 65536
+ ? 65536
+ : lowlat_size);
/* We expect kernels to request a single pointer, explicitly, and the
rest of struct kernargs, implicitly. If they request anything else
@@ -2290,9 +2313,9 @@ run_kernel (struct kernel_info *kernel, void *vars,
print_kernel_dispatch (shadow, 2);
}
- packet->private_segment_size = kernel->private_segment_size;
- packet->group_segment_size = kernel->group_segment_size;
- packet->kernel_object = kernel->object;
+ packet->private_segment_size = shadow->private_segment_size;
+ packet->group_segment_size = shadow->group_segment_size;
+ packet->kernel_object = shadow->object;
packet->kernarg_address = shadow->kernarg_address;
hsa_signal_t s;
s.handle = shadow->signal;