aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@linaro.org>2016-02-17 14:16:23 +0100
committerLinaro Code Review <review@review.linaro.org>2016-03-14 13:40:02 +0000
commit1aca6bdb61c162234a2e4a1f14adebb7382521ad (patch)
tree5ae29c15db6ce8b5631114a448859789f684bdb2
parent6a47c18c96894e0debc245abe4f8561841dc78ec (diff)
gcc/
Backport from trunk r233268. 2016-02-10 Wilco Dijkstra <wdijkstr@arm.com> * config/aarch64/aarch64.c (cortexa53_tunings): Enable AES fusion. (cortexa57_tunings): Likewise. (cortexa72_tunings): Likewise. (arch_macro_fusion_pair_p): Add support for AES fusion. * config/aarch64/aarch64-fusion-pairs.def: Add AES_AESMC entry. * config/arm/aarch-common.c (aarch_crypto_can_dual_issue): Allow virtual registers before reload so early scheduling works. * config/arm/cortex-a57.md (cortex_a57_crypto_simple): Use correct latency and pipeline. (cortex_a57_crypto_complex): Likewise. (cortex_a57_crypto_xor): Likewise. (define_bypass): Add AES bypass. Change-Id: Idc69fb303db87c5ea7e5e359fd1eef0447f3124d
-rw-r--r--gcc/config/aarch64/aarch64-fusion-pairs.def1
-rw-r--r--gcc/config/aarch64/aarch64.c10
-rw-r--r--gcc/config/arm/aarch-common.c7
-rw-r--r--gcc/config/arm/cortex-a57.md17
4 files changed, 24 insertions, 11 deletions
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def
index 53bbef46eb2..fea79fc6dd5 100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -33,4 +33,5 @@ AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD)
AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK)
AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
+AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 7ade6235dc2..eab75c34dc6 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -409,7 +409,7 @@ static const struct tune_params cortexa53_tunings =
&generic_branch_cost,
4, /* memmov_cost */
2, /* issue_rate */
- (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
8, /* function_align. */
8, /* jump_align. */
@@ -431,7 +431,7 @@ static const struct tune_params cortexa57_tunings =
&cortexa57_branch_cost,
4, /* memmov_cost */
3, /* issue_rate */
- (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
16, /* function_align. */
8, /* jump_align. */
@@ -453,7 +453,7 @@ static const struct tune_params cortexa72_tunings =
&generic_branch_cost,
4, /* memmov_cost */
3, /* issue_rate */
- (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
16, /* function_align. */
8, /* jump_align. */
@@ -12892,6 +12892,10 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
}
}
+ if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_AES_AESMC)
+ && aarch_crypto_can_dual_issue (prev, curr))
+ return true;
+
if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_CMP_BRANCH)
&& any_condjump_p (curr))
{
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index 4d2a69d5bcf..af67eea4c93 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -71,8 +71,11 @@ aarch_crypto_can_dual_issue (rtx_insn *producer_insn, rtx_insn *consumer_insn)
{
unsigned int regno = REGNO (SET_DEST (producer_set));
- return REGNO (SET_DEST (consumer_set)) == regno
- && REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
+ /* Before reload the registers are virtual, so the destination of
+ consumer_set doesn't need to match. */
+
+ return (REGNO (SET_DEST (consumer_set)) == regno || !reload_completed)
+ && REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
}
return 0;
diff --git a/gcc/config/arm/cortex-a57.md b/gcc/config/arm/cortex-a57.md
index c751dd43b0e..bd23e8dbfba 100644
--- a/gcc/config/arm/cortex-a57.md
+++ b/gcc/config/arm/cortex-a57.md
@@ -747,20 +747,20 @@
neon_fp_sqrt_s_q, neon_fp_sqrt_d_q"))
"ca57_cx2_block*3")
-(define_insn_reservation "cortex_a57_crypto_simple" 4
+(define_insn_reservation "cortex_a57_crypto_simple" 3
(and (eq_attr "tune" "cortexa57")
(eq_attr "type" "crypto_aese,crypto_aesmc,crypto_sha1_fast,crypto_sha256_fast"))
- "ca57_cx2")
+ "ca57_cx1")
-(define_insn_reservation "cortex_a57_crypto_complex" 7
+(define_insn_reservation "cortex_a57_crypto_complex" 6
(and (eq_attr "tune" "cortexa57")
(eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
- "ca57_cx2+(ca57_cx2_issue,ca57_cx2)")
+ "ca57_cx1*2")
-(define_insn_reservation "cortex_a57_crypto_xor" 7
+(define_insn_reservation "cortex_a57_crypto_xor" 6
(and (eq_attr "tune" "cortexa57")
(eq_attr "type" "crypto_sha1_xor"))
- "(ca57_cx1+ca57_cx2)")
+ "(ca57_cx1*2)|(ca57_cx2*2)")
;; We lie with calls. They take up all issue slots, but are otherwise
;; not harmful.
@@ -797,3 +797,8 @@
(define_bypass 1 "cortex_a57_*"
"cortex_a57_call,cortex_a57_branch")
+;; AESE+AESMC and AESD+AESIMC pairs forward with zero latency
+(define_bypass 0 "cortex_a57_crypto_simple"
+ "cortex_a57_crypto_simple"
+ "aarch_crypto_can_dual_issue")
+