diff options
Diffstat (limited to 'SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c')
-rw-r--r-- | SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c b/SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c new file mode 100644 index 00000000..2d62f9e9 --- /dev/null +++ b/SingleSource/UnitTests/Vector/AVX512BWVL/shuffles.c @@ -0,0 +1,294 @@ +/* + * Test shifts and rotates. + * + * This test was created to check the correctness + * of the following intrinsics support: + * _mm_shuffle_epi32() + * _mm_shufflehi_epi16() + * _mm_shufflelo_epi16() + * _mm256_shuffle_epi32() + * _mm256_shufflehi_epi16() + * _mm256_shufflelo_epi16() + * _mm512_shuffle_epi32() + * _mm512_shufflehi_epi16() + * _mm512_shufflelo_epi16() + */ + +#include "m512_test_util.h" +#include <stdio.h> +#include <string.h> + +V512 counts16, counts32, counts64, src, passthru, zeros; +__mmask8 k8 = 0xf9; +__mmask16 k16 = 0x9ffe; + +volatile int vol0; + +/* + * Use this between tests to make compiler think src was updated. + * Prevents PRE'ing of a load of src. + */ +#define soft_src_update() src.xmmi[vol0] = src.xmmi[vol0] +#define soft_counts16_update() counts16.xmmi[vol0] = counts16.xmmi[vol0] +#define soft_counts32_update() counts32.xmmi[vol0] = counts32.xmmi[vol0] +#define soft_counts64_update() counts64.xmmi[vol0] = counts64.xmmi[vol0] + +void NOINLINE init() { + volatile int i; + + for (i = 0; i < 16; i++) { + counts32.s32[i] = 3; + zeros.u32[i] = 0; + src.s32[i] = -27 * i * i + 300 * i - 82; + if (i & 0x1) { + src.s32[i] *= -1; + } + passthru.s32[i] = 48 * i * i + 100 * i - 9; + } + + for (i = 0; i < 8; i++) { + counts64.s64[i] = 9; + } + + for (i = 0; i < 32; i++) { + counts16.s16[i] = 4; + } +} + + +void NOINLINE emulate_shuffle(void *presult, const void *p, +const void *mask_src, int size, int control, int mask) { + int i; + V512 *result = (V512 *)presult; + V512 *v = (V512 *)p; + V512 *src = (V512 *)mask_src; + for (i = 0; i < size; i++) { + if (((1 << i) & mask) == 0) + result->u32[i] = src->u32[i]; + else + result->u32[i] = v->u32[4 * (i / 4) + ((control >> (2 * (i % 4))) & 3)]; + } +} + + +void NOINLINE emulate_shuffle16(void *presult, const void *p, +const void *mask_src, int size, int control, int mask, int order) { + int i; + V512 *result = (V512 *)presult; + V512 *v = (V512 *)p; + V512 *src = (V512 *)mask_src; + for (i = 0; i < size; i++) { + if (((1 << i) & mask) == 0) { + result->u16[i] = src->u16[i]; + } else { + if ((i / 4) % 2 == order) { + result->u16[i] = v->u16[i]; + } else { + result->u16[i] = v->u16[4 * (i / 4) + ((control >> (2 * (i % 4))) & 3)]; + } + } + } +} + + +void NOINLINE do_shuffle_epi32() { + volatile V512 res; + V512 expected; + + // checking mm512 shuffle + soft_counts32_update(); + res.zmmi = _mm512_shuffle_epi32(src.zmmi, 3); + emulate_shuffle(&expected, &src, &zeros, 16, 3, 0xffff); + check_equal_nd(&res, &expected, 16, "_mm512_shuffle_epi32", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.zmmi = _mm512_mask_shuffle_epi32(passthru.zmmi, k16, src.zmmi, 3); + emulate_shuffle(&expected, &src, &passthru, 16, 3, k16); + check_equal_nd(&res, &expected, 16, "_mm512_mask_shuffle_epi32", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.zmmi = _mm512_maskz_shuffle_epi32(k16, src.zmmi, 3); + emulate_shuffle(&expected, &src, &zeros, 16, 3, k16); + check_equal_nd(&res, &expected, 16, "_mm512_maskz_shuffle_epi32", __LINE__); + soft_counts32_update(); + + // checking mm256 shuffle + soft_counts32_update(); + res.ymmi[0] = _mm256_shuffle_epi32(src.ymmi[0], 3); + emulate_shuffle(&expected, &src, &zeros, 8, 3, 0xff); + check_equal_nd(&res, &expected, 8, "_mm256_shuffle_epi32", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.ymmi[0] = _mm256_mask_shuffle_epi32(passthru.ymmi[0], k8, src.ymmi[0], 3); + emulate_shuffle(&expected, &src, &passthru, 8, 3, k8); + check_equal_nd(&res, &expected, 8, "_mm256_mask_shuffle_epi32", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.ymmi[0] = _mm256_maskz_shuffle_epi32(k8, src.ymmi[0], 3); + emulate_shuffle(&expected, &src, &zeros, 8, 3, k8); + check_equal_nd(&res, &expected, 8, "_mm256_maskz_shuffle_epi32", __LINE__); + soft_counts32_update(); + + // checking mm shuffle + soft_counts32_update(); + res.xmmi[0] = _mm_shuffle_epi32(src.xmmi[0], 3); + emulate_shuffle(&expected, &src, &zeros, 4, 3, 0xf); + check_equal_nd(&res, &expected, 4, "_mm_shuffle_epi32", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.xmmi[0] = _mm_mask_shuffle_epi32(passthru.xmmi[0], k8, src.xmmi[0], 3); + emulate_shuffle(&expected, &src, &passthru, 4, 3, k8); + check_equal_nd(&res, &expected, 4, "_mm_mask_shuffle_epi32", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.xmmi[0] = _mm_maskz_shuffle_epi32(k8, src.xmmi[0], 3); + emulate_shuffle(&expected, &src, &zeros, 4, 3, k8); + check_equal_nd(&res, &expected, 4, "_mm_maskz_shuffle_epi32", __LINE__); + soft_counts32_update(); +} + +void NOINLINE do_shufflehi_epi16() { + volatile V512 res; + V512 expected; + + // checking mm512 shufflehi + soft_counts32_update(); + res.zmmi = _mm512_shufflehi_epi16(src.zmmi, 3); + emulate_shuffle16(&expected, &src, &src, 32, 3, 0xffffffff, 0); + check_equal_nd(&res, &expected, 16, "_mm512_shufflehi_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.zmmi = _mm512_mask_shufflehi_epi16(passthru.zmmi, k16, src.zmmi, 3); + emulate_shuffle16(&expected, &src, &passthru, 32, 3, k16, 0); + check_equal_nd(&res, &expected, 16, "_mm512_mask_shufflehi_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.zmmi = _mm512_maskz_shufflehi_epi16(k16, src.zmmi, 3); + emulate_shuffle16(&expected, &src, &zeros, 32, 3, k16, 0); + check_equal_nd(&res, &expected, 16, "_mm512_maskz_shufflehi_epi16", __LINE__); + soft_counts32_update(); + + // checking mm256 shufflehi + soft_counts32_update(); + res.ymmi[0] = _mm256_shufflehi_epi16(src.ymmi[0], 3); + emulate_shuffle16(&expected, &src, &zeros, 16, 3, 0xffff, 0); + check_equal_nd(&res, &expected, 8, "_mm256_shufflehi_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.ymmi[0] = _mm256_mask_shufflehi_epi16(passthru.ymmi[0], k16, src.ymmi[0], 3); + emulate_shuffle16(&expected, &src, &passthru, 16, 3, k16, 0); + check_equal_nd(&res, &expected, 8, "_mm256_mask_shufflehi_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.ymmi[0] = _mm256_maskz_shufflehi_epi16(k16, src.ymmi[0], 3); + emulate_shuffle16(&expected, &src, &zeros, 16, 3, k16, 0); + check_equal_nd(&res, &expected, 8, "_mm256_maskz_shufflehi_epi16", __LINE__); + soft_counts32_update(); + + // checking mm shufflehi + soft_counts32_update(); + res.xmmi[0] = _mm_shufflehi_epi16(src.xmmi[0], 3); + emulate_shuffle16(&expected, &src, &zeros, 8, 3, 0xff, 0); + check_equal_nd(&res, &expected, 4, "_mm_shufflehi_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.xmmi[0] = _mm_mask_shufflehi_epi16(passthru.xmmi[0], k8, src.xmmi[0], 3); + emulate_shuffle16(&expected, &src, &passthru, 8, 3, k8, 0); + check_equal_nd(&res, &expected, 4, "_mm_mask_shufflehi_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.xmmi[0] = _mm_maskz_shufflehi_epi16(k8, src.xmmi[0], 3); + emulate_shuffle16(&expected, &src, &zeros, 8, 3, k8, 0); + check_equal_nd(&res, &expected, 4, "_mm_maskz_shufflehi_epi16", __LINE__); + soft_counts32_update(); +} + +void NOINLINE do_shufflelo_epi16() { + volatile V512 res; + V512 expected; + + // checking mm512 shufflelo + soft_counts32_update(); + res.zmmi = _mm512_shufflelo_epi16(src.zmmi, 3); + emulate_shuffle16(&expected, &src, &src, 32, 3, 0xffffffff, 1); + check_equal_nd(&res, &expected, 16, "_mm512_shufflelo_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.zmmi = _mm512_mask_shufflelo_epi16(passthru.zmmi, k16, src.zmmi, 3); + emulate_shuffle16(&expected, &src, &passthru, 32, 3, k16, 1); + check_equal_nd(&res, &expected, 16, "_mm512_mask_shufflelo_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.zmmi = _mm512_maskz_shufflelo_epi16(k16, src.zmmi, 3); + emulate_shuffle16(&expected, &src, &zeros, 32, 3, k16, 1); + check_equal_nd(&res, &expected, 16, "_mm512_maskz_shufflelo_epi16", __LINE__); + soft_counts32_update(); + + // checking mm256 shufflelo + soft_counts32_update(); + res.ymmi[0] = _mm256_shufflelo_epi16(src.ymmi[0], 3); + emulate_shuffle16(&expected, &src, &zeros, 16, 3, 0xffff, 1); + check_equal_nd(&res, &expected, 8, "_mm256_shufflelo_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.ymmi[0] = _mm256_mask_shufflelo_epi16(passthru.ymmi[0], k16, src.ymmi[0], 3); + emulate_shuffle16(&expected, &src, &passthru, 16, 3, k16, 1); + check_equal_nd(&res, &expected, 8, "_mm256_mask_shufflelo_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.ymmi[0] = _mm256_maskz_shufflelo_epi16(k16, src.ymmi[0], 3); + emulate_shuffle16(&expected, &src, &zeros, 16, 3, k16, 1); + check_equal_nd(&res, &expected, 8, "_mm256_maskz_shufflelo_epi16", __LINE__); + soft_counts32_update(); + + // checking mm shufflelo + soft_counts32_update(); + res.xmmi[0] = _mm_shufflelo_epi16(src.xmmi[0], 3); + emulate_shuffle16(&expected, &src, &zeros, 8, 3, 0xff, 1); + check_equal_nd(&res, &expected, 4, "_mm_shufflelo_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.xmmi[0] = _mm_mask_shufflelo_epi16(passthru.xmmi[0], k8, src.xmmi[0], 3); + emulate_shuffle16(&expected, &src, &passthru, 8, 3, k8, 1); + check_equal_nd(&res, &expected, 4, "_mm_mask_shufflelo_epi16", __LINE__); + soft_counts32_update(); + + soft_counts32_update(); + res.xmmi[0] = _mm_maskz_shufflelo_epi16(k8, src.xmmi[0], 3); + emulate_shuffle16(&expected, &src, &zeros, 8, 3, k8, 1); + check_equal_nd(&res, &expected, 4, "_mm_maskz_shufflelo_epi16", __LINE__); + soft_counts32_update(); +} + +int main(int argc, char *argv[]) { + init(); + + do_shuffle_epi32(); + do_shufflelo_epi16(); + do_shufflehi_epi16(); + + if (n_errs != 0) { + printf("FAILED\n"); + return 1; + } + + printf("PASSED\n"); + return 0; +} |