aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2023-10-24 16:42:10 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2023-10-24 16:43:21 +0100
commit35f4e95265b9e89c923b349988654f1da6348a44 (patch)
tree605ad4b414461f047e0415888bfc878c67f2bc23
parent326a8c047ecb0598b8403a4fe016bd67c8991d90 (diff)
ARC: Improved SImode shifts and rotates on !TARGET_BARREL_SHIFTER.
This patch completes the ARC back-end's transition to using pre-reload splitters for SImode shifts and rotates on targets without a barrel shifter. The core part is that the shift_si3 define_insn is no longer needed, as shifts and rotates that don't require a loop are split before reload, and then because shift_si3_loop is the only caller of output_shift, both can be significantly cleaned up and simplified. The output_shift function (Claudiu's "the elephant in the room") is renamed output_shift_loop, which handles just the four instruction zero-overhead loop implementations. Aside from the clean-ups, the user visible changes are much improved implementations of SImode shifts and rotates on affected targets. For the function: unsigned int rotr_1 (unsigned int x) { return (x >> 1) | (x << 31); } GCC with -O2 -mcpu=em would previously generate: rotr_1: lsr_s r2,r0 bmsk_s r0,r0,0 ror r0,r0 j_s.d [blink] or_s r0,r0,r2 with this patch, we now generate: j_s.d [blink] ror r0,r0 For the function: unsigned int rotr_31 (unsigned int x) { return (x >> 31) | (x << 1); } GCC with -O2 -mcpu=em would previously generate: rotr_31: mov_s r2,r0 ;4 asl_s r0,r0 add.f 0,r2,r2 rlc r2,0 j_s.d [blink] or_s r0,r0,r2 with this patch we now generate an add.f followed by an adc: rotr_31: add.f r0,r0,r0 j_s.d [blink] add.cs r0,r0,1 Shifts by constants requiring a loop have been improved for even counts by performing two operations in each iteration: int shl10(int x) { return x >> 10; } Previously looked like: shl10: mov.f lp_count, 10 lpnz 2f asr r0,r0 nop 2: # end single insn loop j_s [blink] And now becomes: shl10: mov lp_count,5 lp 2f asr r0,r0 asr r0,r0 2: # end single insn loop j_s [blink] So emulating ARC's SWAP on architectures that don't have it: unsigned int rotr_16 (unsigned int x) { return (x >> 16) | (x << 16); } previously required 10 instructions and ~70 cycles: rotr_16: mov_s r2,r0 ;4 mov.f lp_count, 16 lpnz 2f add r0,r0,r0 nop 2: # end single insn loop mov.f lp_count, 16 lpnz 2f lsr r2,r2 nop 2: # end single insn loop j_s.d [blink] or_s r0,r0,r2 now becomes just 4 instructions and ~18 cycles: rotr_16: mov lp_count,8 lp 2f ror r0,r0 ror r0,r0 2: # end single insn loop j_s [blink] 2023-10-24 Roger Sayle <roger@nextmovesoftware.com> Claudiu Zissulescu <claziss@gmail.com> gcc/ChangeLog * config/arc/arc-protos.h (output_shift): Rename to... (output_shift_loop): Tweak API to take an explicit rtx_code. (arc_split_ashl): Prototype new function here. (arc_split_ashr): Likewise. (arc_split_lshr): Likewise. (arc_split_rotl): Likewise. (arc_split_rotr): Likewise. * config/arc/arc.cc (output_shift): Delete local prototype. Rename. (output_shift_loop): New function replacing output_shift to output a zero overheap loop for SImode shifts and rotates on ARC targets without barrel shifter (i.e. no hardware support for these insns). (arc_split_ashl): New helper function to split *ashlsi3_nobs. (arc_split_ashr): New helper function to split *ashrsi3_nobs. (arc_split_lshr): New helper function to split *lshrsi3_nobs. (arc_split_rotl): New helper function to split *rotlsi3_nobs. (arc_split_rotr): New helper function to split *rotrsi3_nobs. (arc_print_operand): Correct whitespace. (arc_rtx_costs): Likewise. (hwloop_optimize): Likewise. * config/arc/arc.md (ANY_SHIFT_ROTATE): New define_code_iterator. (define_code_attr insn): New code attribute to map to pattern name. (<ANY_SHIFT_ROTATE>si3): New expander unifying previous ashlsi3, ashrsi3 and lshrsi3 define_expands. Adds rotlsi3 and rotrsi3. (*<ANY_SHIFT_ROTATE>si3_nobs): New define_insn_and_split that unifies the previous *ashlsi3_nobs, *ashrsi3_nobs and *lshrsi3_nobs. We now call arc_split_<insn> in arc.cc to implement each split. (shift_si3): Delete define_insn, all shifts/rotates are now split. (shift_si3_loop): Rename to... (<insn>si3_loop): define_insn to handle loop implementations of SImode shifts and rotates, calling ouput_shift_loop for template. (rotrsi3): Rename to... (*rotrsi3_insn): define_insn for TARGET_BARREL_SHIFTER's ror. (*rotlsi3): New define_insn_and_split to transform left rotates into right rotates before reload. (rotlsi3_cnt1): New define_insn_and_split to implement a left rotate by one bit using an add.f followed by an adc. * config/arc/predicates.md (shiftr4_operator): Delete.
-rw-r--r--gcc/config/arc/arc-protos.h7
-rw-r--r--gcc/config/arc/arc.cc371
-rw-r--r--gcc/config/arc/arc.md230
-rw-r--r--gcc/config/arc/predicates.md9
4 files changed, 324 insertions, 293 deletions
diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 026ea99c9c6..a48d850bc64 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -25,7 +25,12 @@ extern machine_mode arc_select_cc_mode (enum rtx_code, rtx, rtx);
extern struct rtx_def *gen_compare_reg (rtx, machine_mode);
/* Declarations for various fns used in the .md file. */
-extern const char *output_shift (rtx *);
+extern const char *output_shift_loop (enum rtx_code, rtx *);
+extern void arc_split_ashl (rtx *);
+extern void arc_split_ashr (rtx *);
+extern void arc_split_lshr (rtx *);
+extern void arc_split_rotl (rtx *);
+extern void arc_split_rotr (rtx *);
extern bool compact_sda_memory_operand (rtx, machine_mode, bool);
extern bool arc_double_limm_p (rtx);
extern void arc_print_operand (FILE *, rtx, int);
diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index 00427d859cc..353ac69de34 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -241,7 +241,6 @@ static int branch_dest (rtx);
static void arc_output_pic_addr_const (FILE *, rtx, int);
static bool arc_function_ok_for_sibcall (tree, tree);
static rtx arc_function_value (const_tree, const_tree, bool);
-const char * output_shift (rtx *);
static void arc_reorg (void);
static bool arc_in_small_data_p (const_tree);
@@ -4151,143 +4150,287 @@ arc_pre_reload_split (void)
&& !(cfun->curr_properties & PROP_rtl_split_insns));
}
-/* Output the assembler code for doing a shift.
- We go to a bit of trouble to generate efficient code as the ARC601 only has
- single bit shifts. This is taken from the h8300 port. We only have one
- mode of shifting and can't access individual bytes like the h8300 can, so
- this is greatly simplified (at the expense of not generating hyper-
- efficient code).
-
- This function is not used if the variable shift insns are present. */
-
-/* FIXME: This probably can be done using a define_split in arc.md.
- Alternately, generate rtx rather than output instructions. */
+/* Output the assembler code for a zero-overhead loop doing a shift
+ or rotate. We know OPERANDS[0] == OPERANDS[1], and the bit count
+ is OPERANDS[2]. */
const char *
-output_shift (rtx *operands)
+output_shift_loop (enum rtx_code code, rtx *operands)
{
- /* static int loopend_lab;*/
- rtx shift = operands[3];
- machine_mode mode = GET_MODE (shift);
- enum rtx_code code = GET_CODE (shift);
- const char *shift_one;
-
- gcc_assert (mode == SImode);
-
- switch (code)
- {
- case ASHIFT: shift_one = "add %0,%1,%1"; break;
- case ASHIFTRT: shift_one = "asr %0,%1"; break;
- case LSHIFTRT: shift_one = "lsr %0,%1"; break;
- default: gcc_unreachable ();
- }
+ bool twice_p = false;
+ gcc_assert (GET_MODE (operands[0]) == SImode);
if (GET_CODE (operands[2]) != CONST_INT)
{
- output_asm_insn ("and.f lp_count,%2, 0x1f", operands);
- goto shiftloop;
+ output_asm_insn ("and.f\tlp_count,%2,0x1f", operands);
+ output_asm_insn ("lpnz\t2f", operands);
}
else
{
- int n;
+ int n = INTVAL (operands[2]) & 31;
+ if (!n)
+ {
+ output_asm_insn ("mov\t%0,%1",operands);
+ return "";
+ }
- n = INTVAL (operands[2]);
+ if ((n & 1) == 0 && code != ROTATE)
+ {
+ twice_p = true;
+ n >>= 1;
+ }
+ operands[2] = GEN_INT (n);
+ output_asm_insn ("mov\tlp_count,%2", operands);
+ output_asm_insn ("lp\t2f", operands);
+ }
- /* Only consider the lower 5 bits of the shift count. */
- n = n & 0x1f;
+ switch (code)
+ {
+ case ASHIFT:
+ output_asm_insn ("add\t%0,%1,%1", operands);
+ if (twice_p)
+ output_asm_insn ("add\t%0,%1,%1", operands);
+ break;
+ case ASHIFTRT:
+ output_asm_insn ("asr\t%0,%1", operands);
+ if (twice_p)
+ output_asm_insn ("asr\t%0,%1", operands);
+ break;
+ case LSHIFTRT:
+ output_asm_insn ("lsr\t%0,%1", operands);
+ if (twice_p)
+ output_asm_insn ("lsr\t%0,%1", operands);
+ break;
+ case ROTATERT:
+ output_asm_insn ("ror\t%0,%1", operands);
+ if (twice_p)
+ output_asm_insn ("ror\t%0,%1", operands);
+ break;
+ case ROTATE:
+ output_asm_insn ("add.f\t%0,%1,%1", operands);
+ output_asm_insn ("adc\t%0,%0,0", operands);
+ twice_p = true;
+ break;
+ default:
+ gcc_unreachable ();
+ }
- /* First see if we can do them inline. */
- /* ??? We could get better scheduling & shorter code (using short insns)
- by using splitters. Alas, that'd be even more verbose. */
- if (code == ASHIFT && n <= 9 && n > 2
- && dest_reg_operand (operands[4], SImode))
+ if (!twice_p)
+ output_asm_insn ("nop", operands);
+ fprintf (asm_out_file, "2:\t%s end single insn loop\n", ASM_COMMENT_START);
+ return "";
+}
+
+
+/* Split SImode left shift instruction. */
+void
+arc_split_ashl (rtx *operands)
+{
+ if (CONST_INT_P (operands[2]))
+ {
+ int n = INTVAL (operands[2]) & 0x1f;
+ if (n <= 9)
{
- output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands);
- for (n -=3 ; n >= 3; n -= 3)
- output_asm_insn ("add3 %0,%4,%0", operands);
- if (n == 2)
- output_asm_insn ("add2 %0,%4,%0", operands);
- else if (n)
- output_asm_insn ("add %0,%0,%0", operands);
+ if (n == 0)
+ emit_move_insn (operands[0], operands[1]);
+ else if (n <= 2)
+ {
+ emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[1]));
+ if (n == 2)
+ emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[0]));
+ }
+ else
+ {
+ rtx zero = gen_reg_rtx (SImode);
+ emit_move_insn (zero, const0_rtx);
+ emit_insn (gen_add_shift (operands[0], operands[1],
+ GEN_INT (3), zero));
+ for (n -= 3; n >= 3; n -= 3)
+ emit_insn (gen_add_shift (operands[0], operands[0],
+ GEN_INT (3), zero));
+ if (n == 2)
+ emit_insn (gen_add_shift (operands[0], operands[0],
+ const2_rtx, zero));
+ else if (n)
+ emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[0]));
+ }
+ return;
}
- else if (n <= 4)
+ else if (n >= 29)
{
- while (--n >= 0)
+ if (n < 31)
{
- output_asm_insn (shift_one, operands);
- operands[1] = operands[0];
+ if (n == 29)
+ {
+ emit_insn (gen_andsi3_i (operands[0], operands[1],
+ GEN_INT (7)));
+ emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
+ }
+ else
+ emit_insn (gen_andsi3_i (operands[0], operands[1],
+ GEN_INT (3)));
+ emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
}
+ else
+ emit_insn (gen_andsi3_i (operands[0], operands[1], const1_rtx));
+ emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
+ return;
}
- /* See if we can use a rotate/and. */
- else if (n == BITS_PER_WORD - 1)
+ }
+
+ emit_insn (gen_ashlsi3_loop (operands[0], operands[1], operands[2]));
+}
+
+/* Split SImode arithmetic right shift instruction. */
+void
+arc_split_ashr (rtx *operands)
+{
+ if (CONST_INT_P (operands[2]))
+ {
+ int n = INTVAL (operands[2]) & 0x1f;
+ if (n <= 4)
{
- switch (code)
+ if (n != 0)
{
- case ASHIFT :
- output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands);
- break;
- case ASHIFTRT :
- /* The ARC doesn't have a rol insn. Use something else. */
- output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands);
- break;
- case LSHIFTRT :
- /* The ARC doesn't have a rol insn. Use something else. */
- output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands);
- break;
- default:
- break;
+ emit_insn (gen_ashrsi3_cnt1 (operands[0], operands[1]));
+ while (--n > 0)
+ emit_insn (gen_ashrsi3_cnt1 (operands[0], operands[0]));
+ }
+ else
+ emit_move_insn (operands[0], operands[1]);
+ return;
+ }
+ else if (n == 30)
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_add_f (tmp, operands[1], operands[1]));
+ emit_insn (gen_sbc (operands[0], operands[0], operands[0]));
+ emit_insn (gen_addsi_compare_2 (tmp, tmp));
+ emit_insn (gen_adc (operands[0], operands[0], operands[0]));
+ return;
+ }
+ else if (n == 31)
+ {
+ emit_insn (gen_addsi_compare_2 (operands[1], operands[1]));
+ emit_insn (gen_sbc (operands[0], operands[0], operands[0]));
+ return;
+ }
+ }
+
+ emit_insn (gen_ashrsi3_loop (operands[0], operands[1], operands[2]));
+}
+
+/* Split SImode logical right shift instruction. */
+void
+arc_split_lshr (rtx *operands)
+{
+ if (CONST_INT_P (operands[2]))
+ {
+ int n = INTVAL (operands[2]) & 0x1f;
+ if (n <= 4)
+ {
+ if (n != 0)
+ {
+ emit_insn (gen_lshrsi3_cnt1 (operands[0], operands[1]));
+ while (--n > 0)
+ emit_insn (gen_lshrsi3_cnt1 (operands[0], operands[0]));
}
+ else
+ emit_move_insn (operands[0], operands[1]);
+ return;
}
- else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode))
+ else if (n == 30)
{
- switch (code)
+ rtx tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_add_f (tmp, operands[1], operands[1]));
+ emit_insn (gen_scc_ltu_cc_c (operands[0]));
+ emit_insn (gen_addsi_compare_2 (tmp, tmp));
+ emit_insn (gen_adc (operands[0], operands[0], operands[0]));
+ return;
+ }
+ else if (n == 31)
+ {
+ emit_insn (gen_addsi_compare_2 (operands[1], operands[1]));
+ emit_insn (gen_scc_ltu_cc_c (operands[0]));
+ return;
+ }
+ }
+
+ emit_insn (gen_lshrsi3_loop (operands[0], operands[1], operands[2]));
+}
+
+/* Split SImode rotate left instruction. */
+void
+arc_split_rotl (rtx *operands)
+{
+ if (CONST_INT_P (operands[2]))
+ {
+ int n = INTVAL (operands[2]) & 0x1f;
+ if (n <= 2)
+ {
+ if (n != 0)
{
- case ASHIFT :
- output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands);
- break;
- case ASHIFTRT :
-#if 1 /* Need some scheduling comparisons. */
- output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t"
- "add.f 0,%4,%4\n\trlc %0,%0", operands);
-#else
- output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t"
- "sbc.f %0,%0,%4\n\trlc %0,%0", operands);
-#endif
- break;
- case LSHIFTRT :
-#if 1
- output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t"
- "add.f 0,%4,%4\n\trlc %0,%0", operands);
-#else
- output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t"
- "and %0,%0,1\n\trlc %0,%0", operands);
-#endif
- break;
- default:
- break;
+ emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[1]));
+ if (n == 2)
+ emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[0]));
}
+ else
+ emit_move_insn (operands[0], operands[1]);
+ return;
}
- else if (n == BITS_PER_WORD - 3 && code == ASHIFT)
- output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0",
- operands);
- /* Must loop. */
- else
+ else if (n >= 28)
+ {
+ emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[1]));
+ while (++n < 32)
+ emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
+ return;
+ }
+ else if (n >= 16 || n == 12 || n == 14)
{
- operands[2] = GEN_INT (n);
- output_asm_insn ("mov.f lp_count, %2", operands);
+ emit_insn (gen_rotrsi3_loop (operands[0], operands[1],
+ GEN_INT (32 - n)));
+ return;
+ }
+ }
- shiftloop:
+ emit_insn (gen_rotlsi3_loop (operands[0], operands[1], operands[2]));
+}
+
+/* Split SImode rotate right instruction. */
+void
+arc_split_rotr (rtx *operands)
+{
+ if (CONST_INT_P (operands[2]))
+ {
+ int n = INTVAL (operands[2]) & 0x1f;
+ if (n <= 4)
+ {
+ if (n != 0)
{
- output_asm_insn ("lpnz\t2f", operands);
- output_asm_insn (shift_one, operands);
- output_asm_insn ("nop", operands);
- fprintf (asm_out_file, "2:\t%s end single insn loop\n",
- ASM_COMMENT_START);
+ emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[1]));
+ while (--n > 0)
+ emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
}
+ else
+ emit_move_insn (operands[0], operands[1]);
+ return;
+ }
+ else if (n >= 30)
+ {
+ emit_insn (gen_rotlsi3_cnt1 (operands[0], operands[1]));
+ if (n == 31)
+ emit_insn (gen_rotlsi3_cnt1 (operands[1], operands[1]));
+ return;
+ }
+ else if (n >= 21 || n == 17 || n == 19)
+ {
+ emit_insn (gen_rotrsi3_loop (operands[0], operands[1],
+ GEN_INT (32 - n)));
+ return;
}
}
- return "";
+ emit_insn (gen_rotrsi3_loop (operands[0], operands[1], operands[2]));
}
/* Nested function support. */
@@ -4459,9 +4602,9 @@ arc_print_operand (FILE *file, rtx x, int code)
case 'c':
if (GET_CODE (x) == CONST_INT)
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) );
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) );
else
- output_operand_lossage ("invalid operands to %%c code");
+ output_operand_lossage ("invalid operands to %%c code");
return;
@@ -5433,8 +5576,8 @@ arc_rtx_costs (rtx x, machine_mode mode, int outer_code,
if ((GET_CODE (XEXP (x, 0)) == ASHIFT
&& _1_2_3_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
- || (GET_CODE (XEXP (x, 0)) == MULT
- && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode)))
+ || (GET_CODE (XEXP (x, 0)) == MULT
+ && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode)))
{
if (CONSTANT_P (XEXP (x, 1)) && !speed)
*total += COSTS_N_INSNS (4);
@@ -5445,8 +5588,8 @@ arc_rtx_costs (rtx x, machine_mode mode, int outer_code,
case MINUS:
if ((GET_CODE (XEXP (x, 1)) == ASHIFT
&& _1_2_3_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
- || (GET_CODE (XEXP (x, 1)) == MULT
- && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode)))
+ || (GET_CODE (XEXP (x, 1)) == MULT
+ && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode)))
{
if (CONSTANT_P (XEXP (x, 0)) && !speed)
*total += COSTS_N_INSNS (4);
@@ -7546,9 +7689,9 @@ hwloop_optimize (hwloop_info loop)
if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg)) != LP_COUNT)
{
if (dump_file)
- fprintf (dump_file, ";; loop %d doesn't use lp_count as loop"
+ fprintf (dump_file, ";; loop %d doesn't use lp_count as loop"
" iterator\n",
- loop->loop_no);
+ loop->loop_no);
/* This loop doesn't use the lp_count, check though if we can
fix it. */
if (TEST_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT)
@@ -7721,7 +7864,7 @@ hwloop_optimize (hwloop_info loop)
/* Make sure we don't split a call and its corresponding
CALL_ARG_LOCATION note. */
&& NOTE_KIND (entry_after) != NOTE_INSN_CALL_ARG_LOCATION))
- entry_after = NEXT_INSN (entry_after);
+ entry_after = NEXT_INSN (entry_after);
#endif
entry_after = next_nonnote_insn_bb (entry_after);
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 325e4f56b9b..ee438872dd2 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -3353,22 +3353,16 @@ archs4x, archs4xd"
;; Shift instructions.
-(define_expand "ashlsi3"
- [(set (match_operand:SI 0 "dest_reg_operand" "")
- (ashift:SI (match_operand:SI 1 "register_operand" "")
- (match_operand:SI 2 "nonmemory_operand" "")))]
- "")
+(define_code_iterator ANY_SHIFT_ROTATE [ashift ashiftrt lshiftrt
+ rotate rotatert])
-(define_expand "ashrsi3"
- [(set (match_operand:SI 0 "dest_reg_operand" "")
- (ashiftrt:SI (match_operand:SI 1 "register_operand" "")
- (match_operand:SI 2 "nonmemory_operand" "")))]
- "")
+(define_code_attr insn [(ashift "ashl") (ashiftrt "ashr") (lshiftrt "lshr")
+ (rotate "rotl") (rotatert "rotr")])
-(define_expand "lshrsi3"
+(define_expand "<insn>si3"
[(set (match_operand:SI 0 "dest_reg_operand" "")
- (lshiftrt:SI (match_operand:SI 1 "register_operand" "")
- (match_operand:SI 2 "nonmemory_operand" "")))]
+ (ANY_SHIFT_ROTATE:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")))]
"")
; asl, asr, lsr patterns:
@@ -3437,117 +3431,10 @@ archs4x, archs4xd"
[(set_attr "type" "shift")
(set_attr "length" "8")])
-(define_insn_and_split "*ashlsi3_nobs"
- [(set (match_operand:SI 0 "dest_reg_operand")
- (ashift:SI (match_operand:SI 1 "register_operand")
- (match_operand:SI 2 "nonmemory_operand")))]
- "!TARGET_BARREL_SHIFTER
- && operands[2] != const1_rtx
- && arc_pre_reload_split ()"
- "#"
- "&& 1"
- [(const_int 0)]
-{
- if (CONST_INT_P (operands[2]))
- {
- int n = INTVAL (operands[2]) & 0x1f;
- if (n <= 9)
- {
- if (n == 0)
- emit_move_insn (operands[0], operands[1]);
- else if (n <= 2)
- {
- emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[1]));
- if (n == 2)
- emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[0]));
- }
- else
- {
- rtx zero = gen_reg_rtx (SImode);
- emit_move_insn (zero, const0_rtx);
- emit_insn (gen_add_shift (operands[0], operands[1],
- GEN_INT (3), zero));
- for (n -= 3; n >= 3; n -= 3)
- emit_insn (gen_add_shift (operands[0], operands[0],
- GEN_INT (3), zero));
- if (n == 2)
- emit_insn (gen_add_shift (operands[0], operands[0],
- const2_rtx, zero));
- else if (n)
- emit_insn (gen_ashlsi3_cnt1 (operands[0], operands[0]));
- }
- DONE;
- }
- else if (n >= 29)
- {
- if (n < 31)
- {
- if (n == 29)
- {
- emit_insn (gen_andsi3_i (operands[0], operands[1],
- GEN_INT (7)));
- emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
- }
- else
- emit_insn (gen_andsi3_i (operands[0], operands[1],
- GEN_INT (3)));
- emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
- }
- else
- emit_insn (gen_andsi3_i (operands[0], operands[1], const1_rtx));
- emit_insn (gen_rotrsi3_cnt1 (operands[0], operands[0]));
- DONE;
- }
- }
-
- rtx shift = gen_rtx_fmt_ee (ASHIFT, SImode, operands[1], operands[2]);
- emit_insn (gen_shift_si3_loop (operands[0], operands[1],
- operands[2], shift));
- DONE;
-})
-
-(define_insn_and_split "*ashlri3_nobs"
- [(set (match_operand:SI 0 "dest_reg_operand")
- (ashiftrt:SI (match_operand:SI 1 "register_operand")
- (match_operand:SI 2 "nonmemory_operand")))]
- "!TARGET_BARREL_SHIFTER
- && operands[2] != const1_rtx
- && arc_pre_reload_split ()"
- "#"
- "&& 1"
- [(const_int 0)]
-{
- if (CONST_INT_P (operands[2]))
- {
- int n = INTVAL (operands[2]) & 0x1f;
- if (n <= 4)
- {
- if (n != 0)
- {
- emit_insn (gen_ashrsi3_cnt1 (operands[0], operands[1]));
- while (--n > 0)
- emit_insn (gen_ashrsi3_cnt1 (operands[0], operands[0]));
- }
- else
- emit_move_insn (operands[0], operands[1]);
- DONE;
- }
- }
-
- rtx pat;
- rtx shift = gen_rtx_fmt_ee (ASHIFTRT, SImode, operands[1], operands[2]);
- if (shiftr4_operator (shift, SImode))
- pat = gen_shift_si3 (operands[0], operands[1], operands[2], shift);
- else
- pat = gen_shift_si3_loop (operands[0], operands[1], operands[2], shift);
- emit_insn (pat);
- DONE;
-})
-
-(define_insn_and_split "*lshrsi3_nobs"
+(define_insn_and_split "*<insn>si3_nobs"
[(set (match_operand:SI 0 "dest_reg_operand")
- (lshiftrt:SI (match_operand:SI 1 "register_operand")
- (match_operand:SI 2 "nonmemory_operand")))]
+ (ANY_SHIFT_ROTATE:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "nonmemory_operand")))]
"!TARGET_BARREL_SHIFTER
&& operands[2] != const1_rtx
&& arc_pre_reload_split ()"
@@ -3555,66 +3442,28 @@ archs4x, archs4xd"
"&& 1"
[(const_int 0)]
{
- if (CONST_INT_P (operands[2]))
- {
- int n = INTVAL (operands[2]) & 0x1f;
- if (n <= 4)
- {
- if (n != 0)
- {
- emit_insn (gen_lshrsi3_cnt1 (operands[0], operands[1]));
- while (--n > 0)
- emit_insn (gen_lshrsi3_cnt1 (operands[0], operands[0]));
- }
- else
- emit_move_insn (operands[0], operands[1]);
- DONE;
- }
- }
-
- rtx pat;
- rtx shift = gen_rtx_fmt_ee (LSHIFTRT, SImode, operands[1], operands[2]);
- if (shiftr4_operator (shift, SImode))
- pat = gen_shift_si3 (operands[0], operands[1], operands[2], shift);
- else
- pat = gen_shift_si3_loop (operands[0], operands[1], operands[2], shift);
- emit_insn (pat);
+ arc_split_<insn> (operands);
DONE;
})
-;; shift_si3 appears after {ashr,lshr}si3_nobs
-(define_insn "shift_si3"
- [(set (match_operand:SI 0 "dest_reg_operand" "=r")
- (match_operator:SI 3 "shiftr4_operator"
- [(match_operand:SI 1 "register_operand" "0")
- (match_operand:SI 2 "const_int_operand" "n")]))
- (clobber (match_scratch:SI 4 "=&r"))
- (clobber (reg:CC CC_REG))
- ]
- "!TARGET_BARREL_SHIFTER
- && operands[2] != const1_rtx"
- "* return output_shift (operands);"
- [(set_attr "type" "shift")
- (set_attr "length" "16")])
-
-;; shift_si3_loop appears after {ashl,ashr,lshr}si3_nobs
-(define_insn "shift_si3_loop"
+;; <ANY_SHIFT_ROTATE>si3_loop appears after <ANY_SHIFT_ROTATE>si3_nobs
+(define_insn "<insn>si3_loop"
[(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
- (match_operator:SI 3 "shift_operator"
- [(match_operand:SI 1 "register_operand" "0,0")
- (match_operand:SI 2 "nonmemory_operand" "rn,Cal")]))
+ (ANY_SHIFT_ROTATE:SI
+ (match_operand:SI 1 "register_operand" "0,0")
+ (match_operand:SI 2 "nonmemory_operand" "rn,Cal")))
(clobber (reg:SI LP_COUNT))
(clobber (reg:CC CC_REG))
]
"!TARGET_BARREL_SHIFTER
&& operands[2] != const1_rtx"
- "* return output_shift (operands);"
+ "* return output_shift_loop (<CODE>, operands);"
[(set_attr "type" "shift")
(set_attr "length" "16,20")])
;; Rotate instructions.
-(define_insn "rotrsi3"
+(define_insn "rotrsi3_insn"
[(set (match_operand:SI 0 "dest_reg_operand" "=r, r, r")
(rotatert:SI (match_operand:SI 1 "arc_nonmemory_operand" " 0,rL,rCsz")
(match_operand:SI 2 "nonmemory_operand" "rL,rL,rCal")))]
@@ -3624,6 +3473,35 @@ archs4x, archs4xd"
(set_attr "predicable" "yes,no,no")
(set_attr "length" "4,4,8")])
+(define_insn_and_split "*rotlsi3"
+ [(set (match_operand:SI 0 "dest_reg_operand")
+ (rotate:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "nonmemory_operand")))]
+ "TARGET_BARREL_SHIFTER
+ && arc_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0) (rotatert:SI (match_dup 1) (match_dup 3)))]
+{
+ if (CONST_INT_P (operands[2]))
+ {
+ int n = INTVAL (operands[2]) & 31;
+ if (n == 0)
+ {
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+ }
+ else operands[3] = GEN_INT (32 - n);
+ }
+ else
+ {
+ if (!register_operand (operands[2], SImode))
+ operands[2] = force_reg (SImode, operands[2]);
+ operands[3] = gen_reg_rtx (SImode);
+ emit_insn (gen_negsi2 (operands[3], operands[2]));
+ }
+})
+
;; Compare / branch instructions.
(define_expand "cbranchsi4"
@@ -5995,6 +5873,20 @@ archs4x, archs4xd"
(zero_extract:SI (match_dup 1) (match_dup 5) (match_dup 7)))])
(match_dup 1)])
+(define_insn_and_split "rotlsi3_cnt1"
+ [(set (match_operand:SI 0 "dest_reg_operand" "=r")
+ (rotate:SI (match_operand:SI 1 "register_operand" "r")
+ (const_int 1)))]
+ "!TARGET_BARREL_SHIFTER"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ emit_insn (gen_add_f (operands[0], operands[1], operands[1]));
+ emit_insn (gen_adc (operands[0], operands[0], const0_rtx));
+ DONE;
+})
+
(define_insn "rotrsi3_cnt1"
[(set (match_operand:SI 0 "dest_reg_operand" "=r")
(rotatert:SI (match_operand:SI 1 "nonmemory_operand" "rL")
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
index e0aef86fd24..607075038ec 100644
--- a/gcc/config/arc/predicates.md
+++ b/gcc/config/arc/predicates.md
@@ -542,15 +542,6 @@
(match_code "ashiftrt, lshiftrt, ashift")
)
-;; Return true if OP is a right shift operator that can be implemented in
-;; four insn words or less without a barrel shifter or multiplier.
-(define_predicate "shiftr4_operator"
- (and (match_code "ashiftrt, lshiftrt")
- (match_test "const_int_operand (XEXP (op, 1), VOIDmode) ")
- (match_test "UINTVAL (XEXP (op, 1)) <= 4U
- || INTVAL (XEXP (op, 1)) == 30
- || INTVAL (XEXP (op, 1)) == 31")))
-
(define_predicate "mult_operator"
(and (match_code "mult") (match_test "TARGET_MPY"))
)