aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYvan Roux <yvan.roux@linaro.org>2015-10-30 13:05:33 +0100
committerLinaro Code Review <review@review.linaro.org>2015-11-01 19:31:59 +0000
commitab69286eaa809310478c2a8caddf2a843e36b78b (patch)
tree9bc92ac9ee80cb2dae354ea34670544c9959c3ad
parent617453bf44bd1cd92d23c0f643f6bef2b7fa2d0e (diff)
gcc/
Backport from trunk r227946. 2015-09-20 Wilco Dijkstra <wdijkstr@arm.com> * config/aarch64/aarch64.c (aarch64_bitmask_imm): Reimplement using faster algorithm. gcc/ Backport from trunk r227947. 2015-09-20 Wilco Dijkstra <wdijkstr@arm.com> * config/aarch64/aarch64.c (aarch64_internal_mov_immediate): Replace slow immediate matching loops with a faster algorithm. gcc/ Backport from trunk r227948. 2015-09-20 Wilco Dijkstra <wdijkstr@arm.com> * config/aarch64/aarch64.c (aarch64_bitmasks): Remove. (AARCH64_NUM_BITMASKS): Remove. (aarch64_bitmasks_cmp): Remove. (aarch64_build_bitmask_table): Remove. gcc/ Backport from trunk r227949. 2015-09-20 Wilco Dijkstra <wdijkstr@arm.com> * config/aarch64/aarch64.c (aarch64_internal_mov_immediate): Remove redundant immediate generation code. gcc/ Backport from trunk r227950. 2015-09-20 Wilco Dijkstra <wdijkstr@arm.com> * config/aarch64/aarch64.c (aarch64_internal_mov_immediate): Cleanup immediate generation code. Change-Id: I238e54cd695c057205238b397a8189c89cf5115e
-rw-r--r--gcc/config/aarch64/aarch64.c416
1 files changed, 109 insertions, 307 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 25e99dd6da3..2491b768f40 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -583,12 +583,6 @@ static const struct aarch64_option_extension all_extensions[] =
increment address. */
static machine_mode aarch64_memory_reference_mode;
-/* A table of valid AArch64 "bitmask immediate" values for
- logical instructions. */
-
-#define AARCH64_NUM_BITMASKS 5334
-static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
-
typedef enum aarch64_cond_code
{
AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
@@ -1263,268 +1257,95 @@ static int
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
machine_mode mode)
{
- unsigned HOST_WIDE_INT mask;
int i;
- bool first;
- unsigned HOST_WIDE_INT val;
- bool subtargets;
- rtx subtarget;
- int one_match, zero_match, first_not_ffff_match;
- int num_insns = 0;
+ unsigned HOST_WIDE_INT val, val2, mask;
+ int one_match, zero_match;
+ int num_insns;
- if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
+ val = INTVAL (imm);
+
+ if (aarch64_move_imm (val, mode))
{
if (generate)
emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
- num_insns++;
- return num_insns;
+ return 1;
}
- if (mode == SImode)
+ if ((val >> 32) == 0 || mode == SImode)
{
- /* We know we can't do this in 1 insn, and we must be able to do it
- in two; so don't mess around looking for sequences that don't buy
- us anything. */
if (generate)
{
- emit_insn (gen_rtx_SET (VOIDmode, dest,
- GEN_INT (INTVAL (imm) & 0xffff)));
- emit_insn (gen_insv_immsi (dest, GEN_INT (16),
- GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
+ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val & 0xffff)));
+ if (mode == SImode)
+ emit_insn (gen_insv_immsi (dest, GEN_INT (16),
+ GEN_INT ((val >> 16) & 0xffff)));
+ else
+ emit_insn (gen_insv_immdi (dest, GEN_INT (16),
+ GEN_INT ((val >> 16) & 0xffff)));
}
- num_insns += 2;
- return num_insns;
+ return 2;
}
/* Remaining cases are all for DImode. */
- val = INTVAL (imm);
- subtargets = optimize && can_create_pseudo_p ();
-
- one_match = 0;
- zero_match = 0;
mask = 0xffff;
- first_not_ffff_match = -1;
+ zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
+ ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
+ one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
+ ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
- for (i = 0; i < 64; i += 16, mask <<= 16)
+ if (zero_match != 2 && one_match != 2)
{
- if ((val & mask) == mask)
- one_match++;
- else
- {
- if (first_not_ffff_match < 0)
- first_not_ffff_match = i;
- if ((val & mask) == 0)
- zero_match++;
- }
- }
+ /* Try emitting a bitmask immediate with a movk replacing 16 bits.
+ For a 64-bit bitmask try whether changing 16 bits to all ones or
+ zeroes creates a valid bitmask. To check any repeated bitmask,
+ try using 16 bits from the other 32-bit half of val. */
- if (one_match == 2)
- {
- /* Set one of the quarters and then insert back into result. */
- mask = 0xffffll << first_not_ffff_match;
- if (generate)
- {
- emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
- emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
- GEN_INT ((val >> first_not_ffff_match)
- & 0xffff)));
- }
- num_insns += 2;
- return num_insns;
- }
-
- if (zero_match == 2)
- goto simple_sequence;
-
- mask = 0x0ffff0000UL;
- for (i = 16; i < 64; i += 16, mask <<= 16)
- {
- HOST_WIDE_INT comp = mask & ~(mask - 1);
-
- if (aarch64_uimm12_shift (val - (val & mask)))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
- GEN_INT (val & mask)));
- emit_insn (gen_adddi3 (dest, subtarget,
- GEN_INT (val - (val & mask))));
- }
- num_insns += 2;
- return num_insns;
- }
- else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
- GEN_INT ((val + comp) & mask)));
- emit_insn (gen_adddi3 (dest, subtarget,
- GEN_INT (val - ((val + comp) & mask))));
- }
- num_insns += 2;
- return num_insns;
- }
- else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
- GEN_INT ((val - comp) | ~mask)));
- emit_insn (gen_adddi3 (dest, subtarget,
- GEN_INT (val - ((val - comp) | ~mask))));
- }
- num_insns += 2;
- return num_insns;
- }
- else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
+ for (i = 0; i < 64; i += 16, mask <<= 16)
{
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
- GEN_INT (val | ~mask)));
- emit_insn (gen_adddi3 (dest, subtarget,
- GEN_INT (val - (val | ~mask))));
- }
- num_insns += 2;
- return num_insns;
+ val2 = val & ~mask;
+ if (val2 != val && aarch64_bitmask_imm (val2, mode))
+ break;
+ val2 = val | mask;
+ if (val2 != val && aarch64_bitmask_imm (val2, mode))
+ break;
+ val2 = val2 & ~mask;
+ val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
+ if (val2 != val && aarch64_bitmask_imm (val2, mode))
+ break;
}
- }
-
- /* See if we can do it by arithmetically combining two
- immediates. */
- for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
- {
- int j;
- mask = 0xffff;
-
- if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
- || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
+ if (i != 64)
{
if (generate)
{
- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
- GEN_INT (aarch64_bitmasks[i])));
- emit_insn (gen_adddi3 (dest, subtarget,
- GEN_INT (val - aarch64_bitmasks[i])));
- }
- num_insns += 2;
- return num_insns;
- }
-
- for (j = 0; j < 64; j += 16, mask <<= 16)
- {
- if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
- {
- if (generate)
- {
- emit_insn (gen_rtx_SET (VOIDmode, dest,
- GEN_INT (aarch64_bitmasks[i])));
- emit_insn (gen_insv_immdi (dest, GEN_INT (j),
- GEN_INT ((val >> j) & 0xffff)));
- }
- num_insns += 2;
- return num_insns;
+ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val2)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
}
}
}
- /* See if we can do it by logically combining two immediates. */
- for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
- {
- if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
- {
- int j;
-
- for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
- if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (mode) : dest;
- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
- GEN_INT (aarch64_bitmasks[i])));
- emit_insn (gen_iordi3 (dest, subtarget,
- GEN_INT (aarch64_bitmasks[j])));
- }
- num_insns += 2;
- return num_insns;
- }
- }
- else if ((val & aarch64_bitmasks[i]) == val)
- {
- int j;
+ /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
+ are emitted by the initial mov. If one_match > zero_match, skip set bits,
+ otherwise skip zero bits. */
- for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
- if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
- {
- if (generate)
- {
- subtarget = subtargets ? gen_reg_rtx (mode) : dest;
- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
- GEN_INT (aarch64_bitmasks[j])));
- emit_insn (gen_anddi3 (dest, subtarget,
- GEN_INT (aarch64_bitmasks[i])));
- }
- num_insns += 2;
- return num_insns;
- }
- }
- }
+ num_insns = 1;
+ mask = 0xffff;
+ val2 = one_match > zero_match ? ~val : val;
+ i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
- if (one_match > zero_match)
+ if (generate)
+ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (one_match > zero_match
+ ? (val | ~(mask << i))
+ : (val & (mask << i)))));
+ for (i += 16; i < 64; i += 16)
{
- /* Set either first three quarters or all but the third. */
- mask = 0xffffll << (16 - first_not_ffff_match);
+ if ((val2 & (mask << i)) == 0)
+ continue;
if (generate)
- emit_insn (gen_rtx_SET (VOIDmode, dest,
- GEN_INT (val | mask | 0xffffffff00000000ull)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
num_insns ++;
-
- /* Now insert other two quarters. */
- for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
- i < 64; i += 16, mask <<= 16)
- {
- if ((val & mask) != mask)
- {
- if (generate)
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
- num_insns ++;
- }
- }
- return num_insns;
- }
-
- simple_sequence:
- first = true;
- mask = 0xffff;
- for (i = 0; i < 64; i += 16, mask <<= 16)
- {
- if ((val & mask) != 0)
- {
- if (first)
- {
- if (generate)
- emit_insn (gen_rtx_SET (VOIDmode, dest,
- GEN_INT (val & mask)));
- num_insns ++;
- first = false;
- }
- else
- {
- if (generate)
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
- num_insns ++;
- }
- }
}
return num_insns;
@@ -3120,67 +2941,6 @@ aarch64_tls_referenced_p (rtx x)
}
-static int
-aarch64_bitmasks_cmp (const void *i1, const void *i2)
-{
- const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
- const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
-
- if (*imm1 < *imm2)
- return -1;
- if (*imm1 > *imm2)
- return +1;
- return 0;
-}
-
-
-static void
-aarch64_build_bitmask_table (void)
-{
- unsigned HOST_WIDE_INT mask, imm;
- unsigned int log_e, e, s, r;
- unsigned int nimms = 0;
-
- for (log_e = 1; log_e <= 6; log_e++)
- {
- e = 1 << log_e;
- if (e == 64)
- mask = ~(HOST_WIDE_INT) 0;
- else
- mask = ((HOST_WIDE_INT) 1 << e) - 1;
- for (s = 1; s < e; s++)
- {
- for (r = 0; r < e; r++)
- {
- /* set s consecutive bits to 1 (s < 64) */
- imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
- /* rotate right by r */
- if (r != 0)
- imm = ((imm >> r) | (imm << (e - r))) & mask;
- /* replicate the constant depending on SIMD size */
- switch (log_e) {
- case 1: imm |= (imm << 2);
- case 2: imm |= (imm << 4);
- case 3: imm |= (imm << 8);
- case 4: imm |= (imm << 16);
- case 5: imm |= (imm << 32);
- case 6:
- break;
- default:
- gcc_unreachable ();
- }
- gcc_assert (nimms < AARCH64_NUM_BITMASKS);
- aarch64_bitmasks[nimms++] = imm;
- }
- }
- }
-
- gcc_assert (nimms == AARCH64_NUM_BITMASKS);
- qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
- aarch64_bitmasks_cmp);
-}
-
-
/* Return true if val can be encoded as a 12-bit unsigned immediate with
a left shift of 0 or 12 bits. */
bool
@@ -3212,19 +2972,63 @@ aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
|| (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
}
+/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
+
+static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
+ {
+ 0x0000000100000001ull,
+ 0x0001000100010001ull,
+ 0x0101010101010101ull,
+ 0x1111111111111111ull,
+ 0x5555555555555555ull,
+ };
+
/* Return true if val is a valid bitmask immediate. */
+
bool
-aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
+aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
{
- if (GET_MODE_SIZE (mode) < 8)
- {
- /* Replicate bit pattern. */
- val &= (HOST_WIDE_INT) 0xffffffff;
- val |= val << 32;
- }
- return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
- sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
+ unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
+ int bits;
+
+ /* Check for a single sequence of one bits and return quickly if so.
+ The special cases of all ones and all zeroes returns false. */
+ val = (unsigned HOST_WIDE_INT) val_in;
+ tmp = val + (val & -val);
+
+ if (tmp == (tmp & -tmp))
+ return (val + 1) > 1;
+
+ /* Replicate 32-bit immediates so we can treat them as 64-bit. */
+ if (mode == SImode)
+ val = (val << 32) | (val & 0xffffffff);
+
+ /* Invert if the immediate doesn't start with a zero bit - this means we
+ only need to search for sequences of one bits. */
+ if (val & 1)
+ val = ~val;
+
+ /* Find the first set bit and set tmp to val with the first sequence of one
+ bits removed. Return success if there is a single sequence of ones. */
+ first_one = val & -val;
+ tmp = val & (val + first_one);
+
+ if (tmp == 0)
+ return true;
+
+ /* Find the next set bit and compute the difference in bit position. */
+ next_one = tmp & -tmp;
+ bits = clz_hwi (first_one) - clz_hwi (next_one);
+ mask = val ^ tmp;
+
+ /* Check the bit position difference is a power of 2, and that the first
+ sequence of one bits fits within 'bits' bits. */
+ if ((mask >> bits) != 0 || bits != (bits & -bits))
+ return false;
+
+ /* Check the sequence of one bits is repeated 64/bits times. */
+ return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
}
@@ -7732,8 +7536,6 @@ aarch64_override_options (void)
|| (aarch64_arch_string && valid_arch))
gcc_assert (explicit_arch != aarch64_no_arch);
- aarch64_build_bitmask_table ();
-
aarch64_override_options_internal (&global_options);
/* Save these options as the default ones in case we push and pop them later