aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2024-05-12 16:27:22 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2024-05-12 16:27:22 +0100
commit46077992180d6d86c86544df5e8cb943492d3b01 (patch)
tree9be50bca918299fcbb82247ee627554a4e13c894
parent83fb5e6f382ea99ca0e2a0afeb25a9f78909f25f (diff)
arm: Use utxb rN, rM, ror #8 to implement zero_extract on armv6.
Examining the code generated for the following C snippet on a raspberry pi: int popcount_lut8(unsigned *buf, int n) { int cnt=0; unsigned int i; do { i = *buf; cnt += lut[i&255]; cnt += lut[i>>8&255]; cnt += lut[i>>16&255]; cnt += lut[i>>24]; buf++; } while(--n); return cnt; } I was surprised to see following instruction sequence generated by the compiler: mov r5, r2, lsr #8 uxtb r5, r5 This sequence can be performed by a single ARM instruction: uxtb r5, r2, ror #8 The attached patch allows GCC's combine pass to take advantage of ARM's uxtb with rotate functionality to implement the above zero_extract, and likewise to use the sxtb with rotate to implement sign_extract. ARM's uxtb and sxtb can only be used with rotates of 0, 8, 16 and 24, and of these only the 8 and 16 are useful [ror #0 is a nop, and extends with ror #24 can be implemented using regular shifts], so the approach here is to add the six missing but useful instructions as 6 different define_insn in arm.md, rather than try to be clever with new predicates. Later ARM hardware has advanced bit field instructions, and earlier ARM cores didn't support extend-with-rotate, so this appears to only benefit armv6 era CPUs (e.g. the raspberry pi). Patch posted: https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01339.html Approved by Kyrill Tkachov: https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01881.html 2024-05-12 Roger Sayle <roger@nextmovesoftware.com> Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> * config/arm/arm.md (*arm_zeroextractsi2_8_8, *arm_signextractsi2_8_8, *arm_zeroextractsi2_8_16, *arm_signextractsi2_8_16, *arm_zeroextractsi2_16_8, *arm_signextractsi2_16_8): New. 2024-05-12 Roger Sayle <roger@nextmovesoftware.com> Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> * gcc.target/arm/extend-ror.c: New test.
-rw-r--r--gcc/config/arm/arm.md66
-rw-r--r--gcc/testsuite/gcc.target/arm/extend-ror.c38
2 files changed, 104 insertions, 0 deletions
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 1fd00146ca9..f47e036a803 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -12647,6 +12647,72 @@
""
)
+;; Implement zero_extract using uxtb/uxth instruction with
+;; the ror #N qualifier when applicable.
+
+(define_insn "*arm_zeroextractsi2_8_8"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 8) (const_int 8)))]
+ "TARGET_ARM && arm_arch6"
+ "uxtb%?\\t%0, %1, ror #8"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "extend")]
+)
+
+(define_insn "*arm_zeroextractsi2_8_16"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 8) (const_int 16)))]
+ "TARGET_ARM && arm_arch6"
+ "uxtb%?\\t%0, %1, ror #16"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "extend")]
+)
+
+(define_insn "*arm_zeroextractsi2_16_8"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (zero_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 16) (const_int 8)))]
+ "TARGET_ARM && arm_arch6"
+ "uxth%?\\t%0, %1, ror #8"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "extend")]
+)
+
+;; Implement sign_extract using sxtb/sxth instruction with
+;; the ror #N qualifier when applicable.
+
+(define_insn "*arm_signextractsi2_8_8"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 8) (const_int 8)))]
+ "TARGET_ARM && arm_arch6"
+ "sxtb%?\\t%0, %1, ror #8"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "extend")]
+)
+
+(define_insn "*arm_signextractsi2_8_16"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 8) (const_int 16)))]
+ "TARGET_ARM && arm_arch6"
+ "sxtb%?\\t%0, %1, ror #16"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "extend")]
+)
+
+(define_insn "*arm_signextractsi2_16_8"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
+ (const_int 16) (const_int 8)))]
+ "TARGET_ARM && arm_arch6"
+ "sxth%?\\t%0, %1, ror #8"
+ [(set_attr "predicable" "yes")
+ (set_attr "type" "extend")]
+)
+
;; Patterns for LDRD/STRD in Thumb2 mode
(define_insn "*thumb2_ldrd"
diff --git a/gcc/testsuite/gcc.target/arm/extend-ror.c b/gcc/testsuite/gcc.target/arm/extend-ror.c
new file mode 100644
index 00000000000..8b52a93e253
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/extend-ror.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-march=*" } { "-march=armv6" } } */
+/* { dg-require-effective-target arm_arm_ok } */
+/* { dg-add-options arm_arch_v6 } */
+/* { dg-additional-options "-O -marm" } */
+
+unsigned int zeroextractsi2_8_8(unsigned int x)
+{
+ return (unsigned char)(x>>8);
+}
+
+unsigned int zeroextractsi2_8_16(unsigned int x)
+{
+ return (unsigned char)(x>>16);
+}
+
+unsigned int signextractsi2_8_8(unsigned int x)
+{
+ return (int)(signed char)(x>>8);
+}
+
+unsigned int signextractsi2_8_16(unsigned int x)
+{
+ return (int)(signed char)(x>>16);
+}
+
+unsigned int zeroextractsi2_16_8(unsigned int x)
+{
+ return (unsigned short)(x>>8);
+}
+
+unsigned int signextractsi2_16_8(unsigned int x)
+{
+ return (int)(short)(x>>8);
+}
+
+/* { dg-final { scan-assembler-times ", ror #8" 4 } } */
+/* { dg-final { scan-assembler-times ", ror #16" 2 } } */