aboutsummaryrefslogtreecommitdiff
path: root/libgomp/plugin
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2023-10-03 14:03:49 +0100
committerAndrew Stubbs <ams@codesourcery.com>2023-11-15 14:02:00 +0000
commitae0d2c240213c5a7f6959c032bfc9f0703cab787 (patch)
tree0654f98e3ccb9d06b22dd0c8bd781c4e0fde694b /libgomp/plugin
parenta0e6306b7ee16ce4ef067c00609d1303fed71c74 (diff)
amdgcn: Add Accelerator VGPR registers
Add the new CDNA register file. We don't support any of the specialized instructions that use these registers, but they're useful to relieve register pressure without spilling to stack. Co-authored-by: Andrew Jenner <andrew@codesourcery.com> gcc/ChangeLog: * config/gcn/constraints.md: Add "a" AVGPR constraint. * config/gcn/gcn-valu.md (*mov<mode>): Add AVGPR alternatives. (*mov<mode>_4reg): Likewise. (@mov<mode>_sgprbase): Likewise. (gather<mode>_insn_1offset<exec>): Likewise. (gather<mode>_insn_1offset_ds<exec>): Likewise. (gather<mode>_insn_2offsets<exec>): Likewise. (scatter<mode>_expr<exec_scatter>): Likewise. (scatter<mode>_insn_1offset_ds<exec_scatter>): Likewise. (scatter<mode>_insn_2offsets<exec_scatter>): Likewise. * config/gcn/gcn.cc (MAX_NORMAL_AVGPR_COUNT): Define. (gcn_class_max_nregs): Handle AVGPR_REGS and ALL_VGPR_REGS. (gcn_hard_regno_mode_ok): Likewise. (gcn_regno_reg_class): Likewise. (gcn_spill_class): Allow spilling to AVGPRs on TARGET_CDNA1_PLUS. (gcn_sgpr_move_p): Handle AVGPRs. (gcn_secondary_reload): Reload AVGPRs via VGPRs. (gcn_conditional_register_usage): Handle AVGPRs. (gcn_vgpr_equivalent_register_operand): New function. (gcn_valid_move_p): Check for validity of AVGPR moves. (gcn_compute_frame_offsets): Handle AVGPRs. (gcn_memory_move_cost): Likewise. (gcn_register_move_cost): Likewise. (gcn_vmem_insn_p): Handle TYPE_VOP3P_MAI. (gcn_md_reorg): Handle AVGPRs. (gcn_hsa_declare_function_name): Likewise. (print_reg): Likewise. (gcn_dwarf_register_number): Likewise. * config/gcn/gcn.h (FIRST_AVGPR_REG): Define. (AVGPR_REGNO): Define. (LAST_AVGPR_REG): Define. (SOFT_ARG_REG): Update. (FRAME_POINTER_REGNUM): Update. (DWARF_LINK_REGISTER): Update. (FIRST_PSEUDO_REGISTER): Update. (AVGPR_REGNO_P): Define. (enum reg_class): Add AVGPR_REGS and ALL_VGPR_REGS. (REG_CLASS_CONTENTS): Add new register classes and add entries for AVGPRs to all classes. (REGISTER_NAMES): Add AVGPRs. * config/gcn/gcn.md (FIRST_AVGPR_REG, LAST_AVGPR_REG): Define. (AP_REGNUM, FP_REGNUM): Update. (define_attr "type"): Add vop3p_mai. (define_attr "unit"): Handle vop3p_mai. (define_attr "gcn_version"): Add "cdna2". (define_attr "enabled"): Handle cdna2. (*mov<mode>_insn): Add AVGPR alternatives. (*movti_insn): Likewise. * config/gcn/mkoffload.cc (isa_has_combined_avgprs): New. (process_asm): Process avgpr_count. * config/gcn/predicates.md (gcn_avgpr_register_operand): New. (gcn_avgpr_hard_register_operand): New. * doc/md.texi: Document the "a" constraint. gcc/testsuite/ChangeLog: * gcc.target/gcn/avgpr-mem-double.c: New test. * gcc.target/gcn/avgpr-mem-int.c: New test. * gcc.target/gcn/avgpr-mem-long.c: New test. * gcc.target/gcn/avgpr-mem-short.c: New test. * gcc.target/gcn/avgpr-spill-double.c: New test. * gcc.target/gcn/avgpr-spill-int.c: New test. * gcc.target/gcn/avgpr-spill-long.c: New test. * gcc.target/gcn/avgpr-spill-short.c: New test. libgomp/ChangeLog: * plugin/plugin-gcn.c (max_isa_vgprs): New. (run_kernel): CDNA2 devices have more VGPRs.
Diffstat (limited to 'libgomp/plugin')
-rw-r--r--libgomp/plugin/plugin-gcn.c24
1 files changed, 22 insertions, 2 deletions
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 7e7e2d6edfe..8aabbd99881 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -1702,6 +1702,25 @@ isa_code(const char *isa) {
return -1;
}
+/* CDNA2 devices have twice as many VGPRs compared to older devices. */
+
+static int
+max_isa_vgprs (int isa)
+{
+ switch (isa)
+ {
+ case EF_AMDGPU_MACH_AMDGCN_GFX803:
+ case EF_AMDGPU_MACH_AMDGCN_GFX900:
+ case EF_AMDGPU_MACH_AMDGCN_GFX906:
+ case EF_AMDGPU_MACH_AMDGCN_GFX908:
+ case EF_AMDGPU_MACH_AMDGCN_GFX1030:
+ return 256;
+ case EF_AMDGPU_MACH_AMDGCN_GFX90a:
+ return 512;
+ }
+ GOMP_PLUGIN_fatal ("unhandled ISA in max_isa_vgprs");
+}
+
/* }}} */
/* {{{ Run */
@@ -2143,6 +2162,7 @@ run_kernel (struct kernel_info *kernel, void *vars,
struct GOMP_kernel_launch_attributes *kla,
struct goacc_asyncqueue *aq, bool module_locked)
{
+ struct agent_info *agent = kernel->agent;
GCN_DEBUG ("SGPRs: %d, VGPRs: %d\n", kernel->description->sgpr_count,
kernel->description->vpgr_count);
@@ -2150,8 +2170,9 @@ run_kernel (struct kernel_info *kernel, void *vars,
VGPRs available to run the kernels together. */
if (kla->ndim == 3 && kernel->description->vpgr_count > 0)
{
+ int max_vgprs = max_isa_vgprs (agent->device_isa);
int granulated_vgprs = (kernel->description->vpgr_count + 3) & ~3;
- int max_threads = (256 / granulated_vgprs) * 4;
+ int max_threads = (max_vgprs / granulated_vgprs) * 4;
if (kla->gdims[2] > max_threads)
{
GCN_WARNING ("Too many VGPRs required to support %d threads/workers"
@@ -2188,7 +2209,6 @@ run_kernel (struct kernel_info *kernel, void *vars,
DEBUG_PRINT ("]\n");
DEBUG_FLUSH ();
- struct agent_info *agent = kernel->agent;
if (!module_locked && pthread_rwlock_rdlock (&agent->module_rwlock))
GOMP_PLUGIN_fatal ("Unable to read-lock a GCN agent rwlock");