aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorYvan Roux <yvan.roux@linaro.org>2016-09-14 11:45:43 +0200
committerYvan Roux <yvan.roux@linaro.org>2016-09-15 11:42:00 +0000
commit0717538f25c9c7a9858cb5fba9f2011ffe1b8816 (patch)
tree6e287ff992464b0b707621fb1ddce1d1c91839c4 /gcc
parentced81b2bb45783b724fbff285716deb61f836570 (diff)
gcc/
Backport from trunk r240102. 2016-09-12 Andrew Pinski <apinski@cavium.com> * config/aarch64/aarch64-tuning-flags.def (SLOW_UNALIGNED_LDPW): New tuning option. * config/aarch64/aarch64.c (thunderx_tunings): Enable AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW. (aarch64_operands_ok_for_ldpstp): Return false if AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW and the mode was SImode and the alignment is less than 8 byte. (aarch64_operands_adjust_ok_for_ldpstp): Likewise. gcc/testsuite/ Backport from trunk r240102. 2016-09-12 Andrew Pinski <apinski@cavium.com> * gcc.target/aarch64/thunderxloadpair.c: New testcase. * gcc.target/aarch64/thunderxnoloadpair.c: New testcase. Change-Id: I8cc39d2082b5eb901979ca677965c18b4356383b
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-tuning-flags.def5
-rw-r--r--gcc/config/aarch64/aarch64.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/thunderxloadpair.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/thunderxnoloadpair.c17
4 files changed, 61 insertions, 1 deletions
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index 048c2a3e3f7..68b5ba0ad0e 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -29,3 +29,8 @@
AARCH64_TUNE_ to give an enum name. */
AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
+
+/* Don't create non-8 byte aligned load/store pair. That is if the
+two load/stores are not at least 8 byte aligned don't create load/store
+pairs. */
+AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 17e36f239ae..a4a34b57f0e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -687,7 +687,7 @@ static const struct tune_params thunderx_tunings =
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
};
static const struct tune_params xgene1_tunings =
@@ -13571,6 +13571,15 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
return false;
+ /* If we have SImode and slow unaligned ldp,
+ check the alignment to be at least 8 byte. */
+ if (mode == SImode
+ && (aarch64_tune_params.extra_tuning_flags
+ & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
+ && !optimize_size
+ && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
+ return false;
+
/* Check if the addresses are in the form of [base+offset]. */
extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
@@ -13730,6 +13739,15 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
return false;
}
+ /* If we have SImode and slow unaligned ldp,
+ check the alignment to be at least 8 byte. */
+ if (mode == SImode
+ && (aarch64_tune_params.extra_tuning_flags
+ & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
+ && !optimize_size
+ && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
+ return false;
+
if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
rclass_1 = FP_REGS;
else
diff --git a/gcc/testsuite/gcc.target/aarch64/thunderxloadpair.c b/gcc/testsuite/gcc.target/aarch64/thunderxloadpair.c
new file mode 100644
index 00000000000..14b1f736093
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/thunderxloadpair.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=thunderx" } */
+
+struct ldp
+{
+ long long c;
+ int a, b;
+};
+
+
+int f(struct ldp *a)
+{
+ return a->a + a->b;
+}
+
+
+/* We know the alignement of a->a to be 8 byte aligned so it is profitable
+ to do ldp. */
+/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/thunderxnoloadpair.c b/gcc/testsuite/gcc.target/aarch64/thunderxnoloadpair.c
new file mode 100644
index 00000000000..3093ad0e1f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/thunderxnoloadpair.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=thunderx" } */
+
+struct noldp
+{
+ int a, b;
+};
+
+
+int f(struct noldp *a)
+{
+ return a->a + a->b;
+}
+
+/* We know the alignement of a->a to be 4 byte aligned so it is not profitable
+ to do ldp. */
+/* { dg-final { scan-assembler-not "ldp\tw\[0-9\]+, w\[0-9\]" } } */