diff options
-rw-r--r-- | gcc/config/s390/s390.cc | 17 | ||||
-rw-r--r-- | gcc/config/s390/vx-builtins.md | 2 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/torture/vshuf-mem.C | 27 |
3 files changed, 43 insertions, 3 deletions
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index fa517bd3e77..ec836ec3cd4 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -17940,7 +17940,8 @@ expand_perm_as_replicate (const struct expand_vec_perm_d &d) unsigned char i; unsigned char elem; rtx base = d.op0; - rtx insn; + rtx insn = NULL_RTX; + /* Needed to silence maybe-uninitialized warning. */ gcc_assert (d.nelt > 0); elem = d.perm[0]; @@ -17954,7 +17955,19 @@ expand_perm_as_replicate (const struct expand_vec_perm_d &d) base = d.op1; elem -= d.nelt; } - insn = maybe_gen_vec_splat (d.vmode, d.target, base, GEN_INT (elem)); + if (memory_operand (base, d.vmode)) + { + /* Try to use vector load and replicate. */ + rtx new_base = adjust_address (base, GET_MODE_INNER (d.vmode), + elem * GET_MODE_UNIT_SIZE (d.vmode)); + insn = maybe_gen_vec_splats (d.vmode, d.target, new_base); + } + if (insn == NULL_RTX) + { + base = force_reg (d.vmode, base); + insn = maybe_gen_vec_splat (d.vmode, d.target, base, GEN_INT (elem)); + } + if (insn == NULL_RTX) return false; emit_insn (insn); diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md index 93c0d408a43..bb271c09a7d 100644 --- a/gcc/config/s390/vx-builtins.md +++ b/gcc/config/s390/vx-builtins.md @@ -145,7 +145,7 @@ DONE; }) -(define_expand "vec_splats<mode>" +(define_expand "@vec_splats<mode>" [(set (match_operand:VEC_HW 0 "register_operand" "") (vec_duplicate:VEC_HW (match_operand:<non_vec> 1 "general_operand" "")))] "TARGET_VX") diff --git a/gcc/testsuite/g++.dg/torture/vshuf-mem.C b/gcc/testsuite/g++.dg/torture/vshuf-mem.C new file mode 100644 index 00000000000..5f1ebf65665 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/vshuf-mem.C @@ -0,0 +1,27 @@ +// { dg-options "-std=c++11" } +// { dg-do run } +// { dg-additional-options "-march=z14" { target s390*-*-* } } + +/* This used to trigger (2024-05-28) the vectorize_vec_perm_const + backend hook to be invoked with a MEM source operand. Extracted + from onnxruntime's mlas library. */ + +typedef float V4SF __attribute__((vector_size (16))); +typedef int V4SI __attribute__((vector_size (16))); + +template < unsigned I0, unsigned I1, unsigned I2, unsigned I3 > V4SF +MlasShuffleFloat32x4 (V4SF Vector) +{ + return __builtin_shuffle (Vector, Vector, V4SI{I0, I1, I2, I3}); +} + +int +main () +{ + V4SF f = { 1.0f, 2.0f, 3.0f, 4.0f }; + if (MlasShuffleFloat32x4 < 1, 1, 1, 1 > (f)[3] != 2.0f) + __builtin_abort (); + if (MlasShuffleFloat32x4 < 3, 3, 3, 3 > (f)[1] != 4.0f) + __builtin_abort (); + return 0; +} |