aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config/s390/s390.cc17
-rw-r--r--gcc/config/s390/vx-builtins.md2
-rw-r--r--gcc/testsuite/g++.dg/torture/vshuf-mem.C27
3 files changed, 43 insertions, 3 deletions
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index fa517bd3e77..ec836ec3cd4 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17940,7 +17940,8 @@ expand_perm_as_replicate (const struct expand_vec_perm_d &d)
unsigned char i;
unsigned char elem;
rtx base = d.op0;
- rtx insn;
+ rtx insn = NULL_RTX;
+
/* Needed to silence maybe-uninitialized warning. */
gcc_assert (d.nelt > 0);
elem = d.perm[0];
@@ -17954,7 +17955,19 @@ expand_perm_as_replicate (const struct expand_vec_perm_d &d)
base = d.op1;
elem -= d.nelt;
}
- insn = maybe_gen_vec_splat (d.vmode, d.target, base, GEN_INT (elem));
+ if (memory_operand (base, d.vmode))
+ {
+ /* Try to use vector load and replicate. */
+ rtx new_base = adjust_address (base, GET_MODE_INNER (d.vmode),
+ elem * GET_MODE_UNIT_SIZE (d.vmode));
+ insn = maybe_gen_vec_splats (d.vmode, d.target, new_base);
+ }
+ if (insn == NULL_RTX)
+ {
+ base = force_reg (d.vmode, base);
+ insn = maybe_gen_vec_splat (d.vmode, d.target, base, GEN_INT (elem));
+ }
+
if (insn == NULL_RTX)
return false;
emit_insn (insn);
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 93c0d408a43..bb271c09a7d 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -145,7 +145,7 @@
DONE;
})
-(define_expand "vec_splats<mode>"
+(define_expand "@vec_splats<mode>"
[(set (match_operand:VEC_HW 0 "register_operand" "")
(vec_duplicate:VEC_HW (match_operand:<non_vec> 1 "general_operand" "")))]
"TARGET_VX")
diff --git a/gcc/testsuite/g++.dg/torture/vshuf-mem.C b/gcc/testsuite/g++.dg/torture/vshuf-mem.C
new file mode 100644
index 00000000000..5f1ebf65665
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/vshuf-mem.C
@@ -0,0 +1,27 @@
+// { dg-options "-std=c++11" }
+// { dg-do run }
+// { dg-additional-options "-march=z14" { target s390*-*-* } }
+
+/* This used to trigger (2024-05-28) the vectorize_vec_perm_const
+ backend hook to be invoked with a MEM source operand. Extracted
+ from onnxruntime's mlas library. */
+
+typedef float V4SF __attribute__((vector_size (16)));
+typedef int V4SI __attribute__((vector_size (16)));
+
+template < unsigned I0, unsigned I1, unsigned I2, unsigned I3 > V4SF
+MlasShuffleFloat32x4 (V4SF Vector)
+{
+ return __builtin_shuffle (Vector, Vector, V4SI{I0, I1, I2, I3});
+}
+
+int
+main ()
+{
+ V4SF f = { 1.0f, 2.0f, 3.0f, 4.0f };
+ if (MlasShuffleFloat32x4 < 1, 1, 1, 1 > (f)[3] != 2.0f)
+ __builtin_abort ();
+ if (MlasShuffleFloat32x4 < 3, 3, 3, 3 > (f)[1] != 4.0f)
+ __builtin_abort ();
+ return 0;
+}