aboutsummaryrefslogtreecommitdiff
path: root/SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c
diff options
context:
space:
mode:
Diffstat (limited to 'SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c')
-rw-r--r--SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c294
1 files changed, 294 insertions, 0 deletions
diff --git a/SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c b/SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c
new file mode 100644
index 00000000..2d62f9e9
--- /dev/null
+++ b/SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c
@@ -0,0 +1,294 @@
+/*
+ * Test shifts and rotates.
+ *
+ * This test was created to check the correctness
+ * of the following intrinsics support:
+ * _mm_shuffle_epi32()
+ * _mm_shufflehi_epi16()
+ * _mm_shufflelo_epi16()
+ * _mm256_shuffle_epi32()
+ * _mm256_shufflehi_epi16()
+ * _mm256_shufflelo_epi16()
+ * _mm512_shuffle_epi32()
+ * _mm512_shufflehi_epi16()
+ * _mm512_shufflelo_epi16()
+ */
+
+#include "m512_test_util.h"
+#include <stdio.h>
+#include <string.h>
+
+V512 counts16, counts32, counts64, src, passthru, zeros;
+__mmask8 k8 = 0xf9;
+__mmask16 k16 = 0x9ffe;
+
+volatile int vol0;
+
+/*
+ * Use this between tests to make compiler think src was updated.
+ * Prevents PRE'ing of a load of src.
+ */
+#define soft_src_update() src.xmmi[vol0] = src.xmmi[vol0]
+#define soft_counts16_update() counts16.xmmi[vol0] = counts16.xmmi[vol0]
+#define soft_counts32_update() counts32.xmmi[vol0] = counts32.xmmi[vol0]
+#define soft_counts64_update() counts64.xmmi[vol0] = counts64.xmmi[vol0]
+
+void NOINLINE init() {
+ volatile int i;
+
+ for (i = 0; i < 16; i++) {
+ counts32.s32[i] = 3;
+ zeros.u32[i] = 0;
+ src.s32[i] = -27 * i * i + 300 * i - 82;
+ if (i & 0x1) {
+ src.s32[i] *= -1;
+ }
+ passthru.s32[i] = 48 * i * i + 100 * i - 9;
+ }
+
+ for (i = 0; i < 8; i++) {
+ counts64.s64[i] = 9;
+ }
+
+ for (i = 0; i < 32; i++) {
+ counts16.s16[i] = 4;
+ }
+}
+
+
+void NOINLINE emulate_shuffle(void *presult, const void *p,
+const void *mask_src, int size, int control, int mask) {
+ int i;
+ V512 *result = (V512 *)presult;
+ V512 *v = (V512 *)p;
+ V512 *src = (V512 *)mask_src;
+ for (i = 0; i < size; i++) {
+ if (((1 << i) & mask) == 0)
+ result->u32[i] = src->u32[i];
+ else
+ result->u32[i] = v->u32[4 * (i / 4) + ((control >> (2 * (i % 4))) & 3)];
+ }
+}
+
+
+void NOINLINE emulate_shuffle16(void *presult, const void *p,
+const void *mask_src, int size, int control, int mask, int order) {
+ int i;
+ V512 *result = (V512 *)presult;
+ V512 *v = (V512 *)p;
+ V512 *src = (V512 *)mask_src;
+ for (i = 0; i < size; i++) {
+ if (((1 << i) & mask) == 0) {
+ result->u16[i] = src->u16[i];
+ } else {
+ if ((i / 4) % 2 == order) {
+ result->u16[i] = v->u16[i];
+ } else {
+ result->u16[i] = v->u16[4 * (i / 4) + ((control >> (2 * (i % 4))) & 3)];
+ }
+ }
+ }
+}
+
+
+void NOINLINE do_shuffle_epi32() {
+ volatile V512 res;
+ V512 expected;
+
+ // checking mm512 shuffle
+ soft_counts32_update();
+ res.zmmi = _mm512_shuffle_epi32(src.zmmi, 3);
+ emulate_shuffle(&expected, &src, &zeros, 16, 3, 0xffff);
+ check_equal_nd(&res, &expected, 16, "_mm512_shuffle_epi32", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.zmmi = _mm512_mask_shuffle_epi32(passthru.zmmi, k16, src.zmmi, 3);
+ emulate_shuffle(&expected, &src, &passthru, 16, 3, k16);
+ check_equal_nd(&res, &expected, 16, "_mm512_mask_shuffle_epi32", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.zmmi = _mm512_maskz_shuffle_epi32(k16, src.zmmi, 3);
+ emulate_shuffle(&expected, &src, &zeros, 16, 3, k16);
+ check_equal_nd(&res, &expected, 16, "_mm512_maskz_shuffle_epi32", __LINE__);
+ soft_counts32_update();
+
+ // checking mm256 shuffle
+ soft_counts32_update();
+ res.ymmi[0] = _mm256_shuffle_epi32(src.ymmi[0], 3);
+ emulate_shuffle(&expected, &src, &zeros, 8, 3, 0xff);
+ check_equal_nd(&res, &expected, 8, "_mm256_shuffle_epi32", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.ymmi[0] = _mm256_mask_shuffle_epi32(passthru.ymmi[0], k8, src.ymmi[0], 3);
+ emulate_shuffle(&expected, &src, &passthru, 8, 3, k8);
+ check_equal_nd(&res, &expected, 8, "_mm256_mask_shuffle_epi32", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.ymmi[0] = _mm256_maskz_shuffle_epi32(k8, src.ymmi[0], 3);
+ emulate_shuffle(&expected, &src, &zeros, 8, 3, k8);
+ check_equal_nd(&res, &expected, 8, "_mm256_maskz_shuffle_epi32", __LINE__);
+ soft_counts32_update();
+
+ // checking mm shuffle
+ soft_counts32_update();
+ res.xmmi[0] = _mm_shuffle_epi32(src.xmmi[0], 3);
+ emulate_shuffle(&expected, &src, &zeros, 4, 3, 0xf);
+ check_equal_nd(&res, &expected, 4, "_mm_shuffle_epi32", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.xmmi[0] = _mm_mask_shuffle_epi32(passthru.xmmi[0], k8, src.xmmi[0], 3);
+ emulate_shuffle(&expected, &src, &passthru, 4, 3, k8);
+ check_equal_nd(&res, &expected, 4, "_mm_mask_shuffle_epi32", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.xmmi[0] = _mm_maskz_shuffle_epi32(k8, src.xmmi[0], 3);
+ emulate_shuffle(&expected, &src, &zeros, 4, 3, k8);
+ check_equal_nd(&res, &expected, 4, "_mm_maskz_shuffle_epi32", __LINE__);
+ soft_counts32_update();
+}
+
+void NOINLINE do_shufflehi_epi16() {
+ volatile V512 res;
+ V512 expected;
+
+ // checking mm512 shufflehi
+ soft_counts32_update();
+ res.zmmi = _mm512_shufflehi_epi16(src.zmmi, 3);
+ emulate_shuffle16(&expected, &src, &src, 32, 3, 0xffffffff, 0);
+ check_equal_nd(&res, &expected, 16, "_mm512_shufflehi_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.zmmi = _mm512_mask_shufflehi_epi16(passthru.zmmi, k16, src.zmmi, 3);
+ emulate_shuffle16(&expected, &src, &passthru, 32, 3, k16, 0);
+ check_equal_nd(&res, &expected, 16, "_mm512_mask_shufflehi_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.zmmi = _mm512_maskz_shufflehi_epi16(k16, src.zmmi, 3);
+ emulate_shuffle16(&expected, &src, &zeros, 32, 3, k16, 0);
+ check_equal_nd(&res, &expected, 16, "_mm512_maskz_shufflehi_epi16", __LINE__);
+ soft_counts32_update();
+
+ // checking mm256 shufflehi
+ soft_counts32_update();
+ res.ymmi[0] = _mm256_shufflehi_epi16(src.ymmi[0], 3);
+ emulate_shuffle16(&expected, &src, &zeros, 16, 3, 0xffff, 0);
+ check_equal_nd(&res, &expected, 8, "_mm256_shufflehi_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.ymmi[0] = _mm256_mask_shufflehi_epi16(passthru.ymmi[0], k16, src.ymmi[0], 3);
+ emulate_shuffle16(&expected, &src, &passthru, 16, 3, k16, 0);
+ check_equal_nd(&res, &expected, 8, "_mm256_mask_shufflehi_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.ymmi[0] = _mm256_maskz_shufflehi_epi16(k16, src.ymmi[0], 3);
+ emulate_shuffle16(&expected, &src, &zeros, 16, 3, k16, 0);
+ check_equal_nd(&res, &expected, 8, "_mm256_maskz_shufflehi_epi16", __LINE__);
+ soft_counts32_update();
+
+ // checking mm shufflehi
+ soft_counts32_update();
+ res.xmmi[0] = _mm_shufflehi_epi16(src.xmmi[0], 3);
+ emulate_shuffle16(&expected, &src, &zeros, 8, 3, 0xff, 0);
+ check_equal_nd(&res, &expected, 4, "_mm_shufflehi_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.xmmi[0] = _mm_mask_shufflehi_epi16(passthru.xmmi[0], k8, src.xmmi[0], 3);
+ emulate_shuffle16(&expected, &src, &passthru, 8, 3, k8, 0);
+ check_equal_nd(&res, &expected, 4, "_mm_mask_shufflehi_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.xmmi[0] = _mm_maskz_shufflehi_epi16(k8, src.xmmi[0], 3);
+ emulate_shuffle16(&expected, &src, &zeros, 8, 3, k8, 0);
+ check_equal_nd(&res, &expected, 4, "_mm_maskz_shufflehi_epi16", __LINE__);
+ soft_counts32_update();
+}
+
+void NOINLINE do_shufflelo_epi16() {
+ volatile V512 res;
+ V512 expected;
+
+ // checking mm512 shufflelo
+ soft_counts32_update();
+ res.zmmi = _mm512_shufflelo_epi16(src.zmmi, 3);
+ emulate_shuffle16(&expected, &src, &src, 32, 3, 0xffffffff, 1);
+ check_equal_nd(&res, &expected, 16, "_mm512_shufflelo_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.zmmi = _mm512_mask_shufflelo_epi16(passthru.zmmi, k16, src.zmmi, 3);
+ emulate_shuffle16(&expected, &src, &passthru, 32, 3, k16, 1);
+ check_equal_nd(&res, &expected, 16, "_mm512_mask_shufflelo_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.zmmi = _mm512_maskz_shufflelo_epi16(k16, src.zmmi, 3);
+ emulate_shuffle16(&expected, &src, &zeros, 32, 3, k16, 1);
+ check_equal_nd(&res, &expected, 16, "_mm512_maskz_shufflelo_epi16", __LINE__);
+ soft_counts32_update();
+
+ // checking mm256 shufflelo
+ soft_counts32_update();
+ res.ymmi[0] = _mm256_shufflelo_epi16(src.ymmi[0], 3);
+ emulate_shuffle16(&expected, &src, &zeros, 16, 3, 0xffff, 1);
+ check_equal_nd(&res, &expected, 8, "_mm256_shufflelo_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.ymmi[0] = _mm256_mask_shufflelo_epi16(passthru.ymmi[0], k16, src.ymmi[0], 3);
+ emulate_shuffle16(&expected, &src, &passthru, 16, 3, k16, 1);
+ check_equal_nd(&res, &expected, 8, "_mm256_mask_shufflelo_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.ymmi[0] = _mm256_maskz_shufflelo_epi16(k16, src.ymmi[0], 3);
+ emulate_shuffle16(&expected, &src, &zeros, 16, 3, k16, 1);
+ check_equal_nd(&res, &expected, 8, "_mm256_maskz_shufflelo_epi16", __LINE__);
+ soft_counts32_update();
+
+ // checking mm shufflelo
+ soft_counts32_update();
+ res.xmmi[0] = _mm_shufflelo_epi16(src.xmmi[0], 3);
+ emulate_shuffle16(&expected, &src, &zeros, 8, 3, 0xff, 1);
+ check_equal_nd(&res, &expected, 4, "_mm_shufflelo_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.xmmi[0] = _mm_mask_shufflelo_epi16(passthru.xmmi[0], k8, src.xmmi[0], 3);
+ emulate_shuffle16(&expected, &src, &passthru, 8, 3, k8, 1);
+ check_equal_nd(&res, &expected, 4, "_mm_mask_shufflelo_epi16", __LINE__);
+ soft_counts32_update();
+
+ soft_counts32_update();
+ res.xmmi[0] = _mm_maskz_shufflelo_epi16(k8, src.xmmi[0], 3);
+ emulate_shuffle16(&expected, &src, &zeros, 8, 3, k8, 1);
+ check_equal_nd(&res, &expected, 4, "_mm_maskz_shufflelo_epi16", __LINE__);
+ soft_counts32_update();
+}
+
+int main(int argc, char *argv[]) {
+ init();
+
+ do_shuffle_epi32();
+ do_shufflelo_epi16();
+ do_shufflehi_epi16();
+
+ if (n_errs != 0) {
+ printf("FAILED\n");
+ return 1;
+ }
+
+ printf("PASSED\n");
+ return 0;
+}