diff options
author | Christophe Lyon <christophe.lyon@linaro.org> | 2016-02-17 14:16:23 +0100 |
---|---|---|
committer | Linaro Code Review <review@review.linaro.org> | 2016-03-14 13:40:02 +0000 |
commit | 1aca6bdb61c162234a2e4a1f14adebb7382521ad (patch) | |
tree | 5ae29c15db6ce8b5631114a448859789f684bdb2 | |
parent | 6a47c18c96894e0debc245abe4f8561841dc78ec (diff) |
gcc/
Backport from trunk r233268.
2016-02-10 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.c (cortexa53_tunings): Enable AES fusion.
(cortexa57_tunings): Likewise.
(cortexa72_tunings): Likewise.
(arch_macro_fusion_pair_p): Add support for AES fusion.
* config/aarch64/aarch64-fusion-pairs.def: Add AES_AESMC entry.
* config/arm/aarch-common.c (aarch_crypto_can_dual_issue):
Allow virtual registers before reload so early scheduling works.
* config/arm/cortex-a57.md (cortex_a57_crypto_simple): Use
correct latency and pipeline.
(cortex_a57_crypto_complex): Likewise.
(cortex_a57_crypto_xor): Likewise.
(define_bypass): Add AES bypass.
Change-Id: Idc69fb303db87c5ea7e5e359fd1eef0447f3124d
-rw-r--r-- | gcc/config/aarch64/aarch64-fusion-pairs.def | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 10 | ||||
-rw-r--r-- | gcc/config/arm/aarch-common.c | 7 | ||||
-rw-r--r-- | gcc/config/arm/cortex-a57.md | 17 |
4 files changed, 24 insertions, 11 deletions
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def index 53bbef46eb2..fea79fc6dd5 100644 --- a/gcc/config/aarch64/aarch64-fusion-pairs.def +++ b/gcc/config/aarch64/aarch64-fusion-pairs.def @@ -33,4 +33,5 @@ AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD) AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK) AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR) AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH) +AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 7ade6235dc2..eab75c34dc6 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -409,7 +409,7 @@ static const struct tune_params cortexa53_tunings = &generic_branch_cost, 4, /* memmov_cost */ 2, /* issue_rate */ - (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD + (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */ 8, /* function_align. */ 8, /* jump_align. */ @@ -431,7 +431,7 @@ static const struct tune_params cortexa57_tunings = &cortexa57_branch_cost, 4, /* memmov_cost */ 3, /* issue_rate */ - (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD + (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */ 16, /* function_align. */ 8, /* jump_align. */ @@ -453,7 +453,7 @@ static const struct tune_params cortexa72_tunings = &generic_branch_cost, 4, /* memmov_cost */ 3, /* issue_rate */ - (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD + (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */ 16, /* function_align. */ 8, /* jump_align. */ @@ -12892,6 +12892,10 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) } } + if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_AES_AESMC) + && aarch_crypto_can_dual_issue (prev, curr)) + return true; + if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_CMP_BRANCH) && any_condjump_p (curr)) { diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c index 4d2a69d5bcf..af67eea4c93 100644 --- a/gcc/config/arm/aarch-common.c +++ b/gcc/config/arm/aarch-common.c @@ -71,8 +71,11 @@ aarch_crypto_can_dual_issue (rtx_insn *producer_insn, rtx_insn *consumer_insn) { unsigned int regno = REGNO (SET_DEST (producer_set)); - return REGNO (SET_DEST (consumer_set)) == regno - && REGNO (XVECEXP (consumer_src, 0, 0)) == regno; + /* Before reload the registers are virtual, so the destination of + consumer_set doesn't need to match. */ + + return (REGNO (SET_DEST (consumer_set)) == regno || !reload_completed) + && REGNO (XVECEXP (consumer_src, 0, 0)) == regno; } return 0; diff --git a/gcc/config/arm/cortex-a57.md b/gcc/config/arm/cortex-a57.md index c751dd43b0e..bd23e8dbfba 100644 --- a/gcc/config/arm/cortex-a57.md +++ b/gcc/config/arm/cortex-a57.md @@ -747,20 +747,20 @@ neon_fp_sqrt_s_q, neon_fp_sqrt_d_q")) "ca57_cx2_block*3") -(define_insn_reservation "cortex_a57_crypto_simple" 4 +(define_insn_reservation "cortex_a57_crypto_simple" 3 (and (eq_attr "tune" "cortexa57") (eq_attr "type" "crypto_aese,crypto_aesmc,crypto_sha1_fast,crypto_sha256_fast")) - "ca57_cx2") + "ca57_cx1") -(define_insn_reservation "cortex_a57_crypto_complex" 7 +(define_insn_reservation "cortex_a57_crypto_complex" 6 (and (eq_attr "tune" "cortexa57") (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow")) - "ca57_cx2+(ca57_cx2_issue,ca57_cx2)") + "ca57_cx1*2") -(define_insn_reservation "cortex_a57_crypto_xor" 7 +(define_insn_reservation "cortex_a57_crypto_xor" 6 (and (eq_attr "tune" "cortexa57") (eq_attr "type" "crypto_sha1_xor")) - "(ca57_cx1+ca57_cx2)") + "(ca57_cx1*2)|(ca57_cx2*2)") ;; We lie with calls. They take up all issue slots, but are otherwise ;; not harmful. @@ -797,3 +797,8 @@ (define_bypass 1 "cortex_a57_*" "cortex_a57_call,cortex_a57_branch") +;; AESE+AESMC and AESD+AESIMC pairs forward with zero latency +(define_bypass 0 "cortex_a57_crypto_simple" + "cortex_a57_crypto_simple" + "aarch_crypto_can_dual_issue") + |