diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2023-01-30 14:43:00 +0000 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2023-12-06 16:48:57 +0000 |
commit | e7d6c277fa28c0b9b621d23c471e0388d2912644 (patch) | |
tree | 3ef9390ef49f8deefa281fd7ad2a145ad85254a6 /libgomp/plugin | |
parent | e9a19ead498fcc89186b724c6e76854f7751a89b (diff) |
amdgcn, libgomp: low-latency allocator
This implements the OpenMP low-latency memory allocator for AMD GCN using the
small per-team LDS memory (Local Data Store).
Since addresses can now refer to LDS space, the "Global" address space is
no-longer compatible. This patch therefore switches the backend to use
entirely "Flat" addressing (which supports both memories). A future patch
will re-enable "global" instructions for cases where it is known to be safe
to do so.
gcc/ChangeLog:
* config/gcn/gcn-builtins.def (DISPATCH_PTR): New built-in.
* config/gcn/gcn.cc (gcn_init_machine_status): Disable global
addressing.
(gcn_expand_builtin_1): Implement GCN_BUILTIN_DISPATCH_PTR.
libgomp/ChangeLog:
* config/gcn/libgomp-gcn.h (TEAM_ARENA_START): Move to here.
(TEAM_ARENA_FREE): Likewise.
(TEAM_ARENA_END): Likewise.
(GCN_LOWLAT_HEAP): New.
* config/gcn/team.c (LITTLEENDIAN_CPU): New, and import hsa.h.
(__gcn_lowlat_init): New prototype.
(gomp_gcn_enter_kernel): Initialize the low-latency heap.
* libgomp.h (TEAM_ARENA_START): Move to libgomp.h.
(TEAM_ARENA_FREE): Likewise.
(TEAM_ARENA_END): Likewise.
* plugin/plugin-gcn.c (lowlat_size): New variable.
(print_kernel_dispatch): Label the group_segment_size purpose.
(init_environment_variables): Read GOMP_GCN_LOWLAT_POOL.
(create_kernel_dispatch): Pass low-latency head allocation to kernel.
(run_kernel): Use shadow; don't assume values.
* testsuite/libgomp.c/omp_alloc-traits.c: Enable for amdgcn.
* config/gcn/allocator.c: New file.
* libgomp.texi: Document low-latency implementation details.
Diffstat (limited to 'libgomp/plugin')
-rw-r--r-- | libgomp/plugin/plugin-gcn.c | 35 |
1 files changed, 29 insertions, 6 deletions
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 8aabbd99881..7f8178c78b7 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -550,6 +550,7 @@ static size_t gcn_kernel_heap_size = DEFAULT_GCN_HEAP_SIZE; static int team_arena_size = DEFAULT_TEAM_ARENA_SIZE; static int stack_size = DEFAULT_GCN_STACK_SIZE; +static int lowlat_size = -1; /* Flag to decide whether print to stderr information about what is going on. Set in init_debug depending on environment variables. */ @@ -1016,8 +1017,8 @@ print_kernel_dispatch (struct kernel_dispatch *dispatch, unsigned indent) fprintf (stderr, "%*sobject: %lu\n", indent, "", dispatch->object); fprintf (stderr, "%*sprivate_segment_size: %u\n", indent, "", dispatch->private_segment_size); - fprintf (stderr, "%*sgroup_segment_size: %u\n", indent, "", - dispatch->group_segment_size); + fprintf (stderr, "%*sgroup_segment_size: %u (low-latency pool)\n", indent, + "", dispatch->group_segment_size); fprintf (stderr, "\n"); } @@ -1088,6 +1089,10 @@ init_environment_variables (void) if (tmp) stack_size = tmp;; } + + const char *lowlat = secure_getenv ("GOMP_GCN_LOWLAT_POOL"); + if (lowlat) + lowlat_size = atoi (lowlat); } /* Return malloc'd string with name of SYMBOL. */ @@ -1930,7 +1935,25 @@ create_kernel_dispatch (struct kernel_info *kernel, int num_teams, shadow->signal = sync_signal.handle; shadow->private_segment_size = kernel->private_segment_size; - shadow->group_segment_size = kernel->group_segment_size; + + if (lowlat_size < 0) + { + /* Divide the LDS between the number of running teams. + Allocate not less than is defined in the kernel metadata. */ + int teams_per_cu = num_teams / get_cu_count (agent); + int LDS_per_team = (teams_per_cu ? 65536 / teams_per_cu : 65536); + shadow->group_segment_size + = (kernel->group_segment_size > LDS_per_team + ? kernel->group_segment_size + : LDS_per_team);; + } + else if (lowlat_size < GCN_LOWLAT_HEAP+8) + /* Ensure that there's space for the OpenMP libgomp data. */ + shadow->group_segment_size = GCN_LOWLAT_HEAP+8; + else + shadow->group_segment_size = (lowlat_size > 65536 + ? 65536 + : lowlat_size); /* We expect kernels to request a single pointer, explicitly, and the rest of struct kernargs, implicitly. If they request anything else @@ -2290,9 +2313,9 @@ run_kernel (struct kernel_info *kernel, void *vars, print_kernel_dispatch (shadow, 2); } - packet->private_segment_size = kernel->private_segment_size; - packet->group_segment_size = kernel->group_segment_size; - packet->kernel_object = kernel->object; + packet->private_segment_size = shadow->private_segment_size; + packet->group_segment_size = shadow->group_segment_size; + packet->kernel_object = shadow->object; packet->kernarg_address = shadow->kernarg_address; hsa_signal_t s; s.handle = shadow->signal; |