diff options
author | Christophe Lyon <christophe.lyon@linaro.org> | 2016-02-17 14:18:45 +0100 |
---|---|---|
committer | Linaro Code Review <review@review.linaro.org> | 2016-03-14 13:43:11 +0000 |
commit | 34d9bcc7181a3076d3fb5105f15456824b92bd51 (patch) | |
tree | b60e78ef33a340dd6ea580120c4b9f6b3ecbb908 | |
parent | 4d05e02e2e1eeaa238c06639949f588a9a2b4417 (diff) |
gcc/
Backport from trunk r233461.
Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* config/aarch64/aarch64.c (aarch64_expand_vector_init): Refactor,
always use lane loads to construct non-constant vectors.
2016-02-16 James Greenhalgh <james.greenhalgh@arm.com>
gcc/testsuite/
Backport from trunk r233461.
2016-02-16 James Greenhalgh <james.greenhalgh@arm.com>
Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* gcc.target/aarch64/vector_initialization_nostack.c: New.
Change-Id: I7de97dc15b350ea0e46d23adc7eafbbd65648bb8
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 56 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c | 53 |
2 files changed, 81 insertions, 28 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index eab75c34dc6..79712b3b2a8 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -10595,28 +10595,37 @@ aarch64_simd_make_constant (rtx vals) return NULL_RTX; } +/* Expand a vector initialisation sequence, such that TARGET is + initialised to contain VALS. */ + void aarch64_expand_vector_init (rtx target, rtx vals) { machine_mode mode = GET_MODE (target); machine_mode inner_mode = GET_MODE_INNER (mode); + /* The number of vector elements. */ int n_elts = GET_MODE_NUNITS (mode); + /* The number of vector elements which are not constant. */ int n_var = 0; rtx any_const = NULL_RTX; + /* The first element of vals. */ + rtx v0 = XVECEXP (vals, 0, 0); bool all_same = true; + /* Count the number of variable elements to initialise. */ for (int i = 0; i < n_elts; ++i) { rtx x = XVECEXP (vals, 0, i); - if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) + if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x))) ++n_var; else any_const = x; - if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) - all_same = false; + all_same &= rtx_equal_p (x, v0); } + /* No variable elements, hand off to aarch64_simd_make_constant which knows + how best to handle this. */ if (n_var == 0) { rtx constant = aarch64_simd_make_constant (vals); @@ -10630,14 +10639,15 @@ aarch64_expand_vector_init (rtx target, rtx vals) /* Splat a single non-constant element if we can. */ if (all_same) { - rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); + rtx x = copy_to_mode_reg (inner_mode, v0); aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x)); return; } - /* Half the fields (or less) are non-constant. Load constant then overwrite - varying fields. Hope that this is more efficient than using the stack. */ - if (n_var <= n_elts/2) + /* Initialise a vector which is part-variable. We want to first try + to build those lanes which are constant in the most efficient way we + can. */ + if (n_var != n_elts) { rtx copy = copy_rtx (vals); @@ -10664,31 +10674,21 @@ aarch64_expand_vector_init (rtx target, rtx vals) XVECEXP (copy, 0, i) = subst; } aarch64_expand_vector_init (target, copy); + } - /* Insert variables. */ - enum insn_code icode = optab_handler (vec_set_optab, mode); - gcc_assert (icode != CODE_FOR_nothing); + /* Insert the variable lanes directly. */ - for (int i = 0; i < n_elts; i++) - { - rtx x = XVECEXP (vals, 0, i); - if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) - continue; - x = copy_to_mode_reg (inner_mode, x); - emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i))); - } - return; - } + enum insn_code icode = optab_handler (vec_set_optab, mode); + gcc_assert (icode != CODE_FOR_nothing); - /* Construct the vector in memory one field at a time - and load the whole vector. */ - rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); for (int i = 0; i < n_elts; i++) - emit_move_insn (adjust_address_nv (mem, inner_mode, - i * GET_MODE_SIZE (inner_mode)), - XVECEXP (vals, 0, i)); - emit_move_insn (target, mem); - + { + rtx x = XVECEXP (vals, 0, i); + if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) + continue; + x = copy_to_mode_reg (inner_mode, x); + emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i))); + } } static unsigned HOST_WIDE_INT diff --git a/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c b/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c new file mode 100644 index 00000000000..bbad04d0026 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -ftree-vectorize -fno-vect-cost-model" } */ +float arr_f[100][100]; +float +f9 (void) +{ + + int i; + float sum = 0; + for (i = 0; i < 100; i++) + sum += arr_f[i][0] * arr_f[0][i]; + return sum; + +} + + +int arr[100][100]; +int +f10 (void) +{ + + int i; + int sum = 0; + for (i = 0; i < 100; i++) + sum += arr[i][0] * arr[0][i]; + return sum; + +} + +double arr_d[100][100]; +double +f11 (void) +{ + int i; + double sum = 0; + for (i = 0; i < 100; i++) + sum += arr_d[i][0] * arr_d[0][i]; + return sum; +} + +char arr_c[100][100]; +char +f12 (void) +{ + int i; + char sum = 0; + for (i = 0; i < 100; i++) + sum += arr_c[i][0] * arr_c[0][i]; + return sum; +} + + +/* { dg-final { scan-assembler-not "sp" } } */ |