diff options
Diffstat (limited to 'SingleSource/UnitTests/Vector/AVX512F/expand_compress.c')
-rw-r--r-- | SingleSource/UnitTests/Vector/AVX512F/expand_compress.c | 515 |
1 files changed, 515 insertions, 0 deletions
diff --git a/SingleSource/UnitTests/Vector/AVX512F/expand_compress.c b/SingleSource/UnitTests/Vector/AVX512F/expand_compress.c new file mode 100644 index 00000000..27f6a128 --- /dev/null +++ b/SingleSource/UnitTests/Vector/AVX512F/expand_compress.c @@ -0,0 +1,515 @@ +/* + * Tests for expand intrinsics family. + * This test was created to check the correctness + * of the following intrinsics support: + * _mm512_mask_compress*() + * _mm512_mask_compressstoreu*() + * _mm512_mask_expand*() + * _mm512_mask_expandloadu*() + * _mm512_maskz_compress*() + * _mm512_maskz_expand*() + * _mm512_maskz_expandloadu*() + */ + +#include "m512_test_util.h" +#include <stdio.h> +#include <string.h> + +volatile __int64 vol0; + +V512 isrc1; +V512 isrc2; + +V512 fsrc1; +V512 fsrc2; + +V512 dsrc1; +V512 dsrc2; + +V512 res; +V512 mres; + +__mmask8 k8; +__mmask16 k16; + +void NOINLINE init() { + volatile int i; + + for (i = 0; i < 16; i++) { + isrc1.s32[i] = i; + isrc2.s32[i] = i + 1; + + fsrc1.f32[i] = i * 1.0f; + fsrc2.f32[i] = i * 2.0f; + } + + for (i = 0; i < 8; i++) { + dsrc1.f64[i] = i * 4.0; + dsrc2.f64[i] = i * 5.0; + } + + k8 = 0x5a; + k16 = 0x25d6; +} + +/* + * Use this between tests to make compiler think src was updated. + * Prevents PRE'ing of a load of src. + */ +#define soft_isrc1_update() isrc1.xmmi[vol0] = isrc1.xmmi[vol0] +#define soft_fsrc1_update() fsrc1.xmmi[vol0] = fsrc1.xmmi[vol0] +#define soft_dsrc1_update() dsrc1.xmmi[vol0] = dsrc1.xmmi[vol0] + +/* + * Model expand intrinsic behavior. + */ +void NOINLINE model_mask_expand_i32(void *input1, __int64 mask, void *input2, + void *output, int n_elems) { + int i, j = 0; + int *v1i = (int *)input1; + int *v2i = (int *)input2; + int *v3o = (int *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[i] = v2i[j]; + j++; + } else { + v3o[i] = v1i[i]; + } + } +} + +void NOINLINE model_maskz_expand_i32(__int64 mask, void *input2, void *output, + int n_elems) { + int i, j = 0; + int *v2i = (int *)input2; + int *v3o = (int *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[i] = v2i[j]; + j++; + } else { + v3o[i] = 0; + } + } +} + +void NOINLINE model_mask_expand_i64(void *input1, __int64 mask, void *input2, + void *output, int n_elems) { + int i, j = 0; + __int64 *v1i = (__int64 *)input1; + __int64 *v2i = (__int64 *)input2; + __int64 *v3o = (__int64 *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[i] = v2i[j]; + j++; + } else { + v3o[i] = v1i[i]; + } + } +} + +void NOINLINE model_maskz_expand_i64(__int64 mask, void *input2, void *output, + int n_elems) { + int i, j = 0; + __int64 *v2i = (__int64 *)input2; + __int64 *v3o = (__int64 *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[i] = v2i[j]; + j++; + } else { + v3o[i] = 0; + } + } +} + +void NOINLINE model_mask_expand_f32(void *input1, __int64 mask, void *input2, + void *output, int n_elems) { + int i, j = 0; + float *v1i = (float *)input1; + float *v2i = (float *)input2; + float *v3o = (float *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[i] = v2i[j]; + j++; + } else { + v3o[i] = v1i[i]; + } + } +} + +void NOINLINE model_maskz_expand_f32(__int64 mask, void *input2, void *output, + int n_elems) { + int i, j = 0; + float *v2i = (float *)input2; + float *v3o = (float *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[i] = v2i[j]; + j++; + } else { + v3o[i] = 0.f; + } + } +} + +void NOINLINE model_mask_expand_f64(void *input1, __int64 mask, void *input2, + void *output, int n_elems) { + int i, j = 0; + double *v1i = (double *)input1; + double *v2i = (double *)input2; + double *v3o = (double *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[i] = v2i[j]; + j++; + } else { + v3o[i] = v1i[i]; + } + } +} + +void NOINLINE model_maskz_expand_f64(__int64 mask, void *input2, void *output, + int n_elems) { + int i, j = 0; + double *v2i = (double *)input2; + double *v3o = (double *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[i] = v2i[j]; + j++; + } else { + v3o[i] = 0.; + } + } +} + +#define GEN_MASK_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, modeller, \ + checker) \ + res.suffix = intrin(prefix##src2.suffix, mask, prefix##src1.suffix); \ + modeller((void *)&prefix##src2.suffix, mask, (void *)&prefix##src1.suffix, \ + (void *)&mres.suffix, n_elem); \ + checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__) + +#define GEN_MASK_LOAD_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, \ + modeller, checker) \ + res.suffix = intrin(prefix##src2.suffix, mask, &prefix##src1.suffix); \ + modeller((void *)&prefix##src2.suffix, mask, (void *)&prefix##src1.suffix, \ + (void *)&mres.suffix, n_elem); \ + checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__) + +#define GEN_MASKZ_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, modeller, \ + checker) \ + res.suffix = intrin(mask, prefix##src1.suffix); \ + modeller(mask, (void *)&prefix##src1.suffix, (void *)&mres.suffix, n_elem); \ + checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__) + +#define GEN_MASKZ_LOAD_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, \ + modeller, checker) \ + res.suffix = intrin(mask, &prefix##src1.suffix); \ + modeller(mask, (void *)&prefix##src1.suffix, (void *)&mres.suffix, n_elem); \ + checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__) + +void NOINLINE do_m512_expand() { + volatile V512 res; + + soft_isrc1_update(); + GEN_MASK_CHECK_CASE(_mm512_mask_expand_epi32, i, zmmi, k16, 16, + model_mask_expand_i32, check_equal_nd); + + soft_isrc1_update(); + GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_epi32, i, zmmi, k16, 16, + model_mask_expand_i32, check_equal_nd); + + soft_isrc1_update(); + GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_epi32, i, zmmi, k16, 16, + model_maskz_expand_i32, check_equal_nd); + + soft_isrc1_update(); + GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_epi32, i, zmmi, k16, 16, + model_maskz_expand_i32, check_equal_nd); + + soft_isrc1_update(); + GEN_MASK_CHECK_CASE(_mm512_mask_expand_epi64, i, zmmi, k8, 8, + model_mask_expand_i64, check_equal_nq); + + soft_isrc1_update(); + GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_epi64, i, zmmi, k8, 8, + model_mask_expand_i64, check_equal_nq); + + soft_isrc1_update(); + GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_epi64, i, zmmi, k8, 8, + model_maskz_expand_i64, check_equal_nq); + + soft_isrc1_update(); + GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_epi64, i, zmmi, k8, 8, + model_maskz_expand_i64, check_equal_nq); + + soft_fsrc1_update(); + GEN_MASK_CHECK_CASE(_mm512_mask_expand_ps, f, zmm, k16, 16, + model_mask_expand_f32, check_equal_nsf); + + soft_fsrc1_update(); + GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_ps, f, zmm, k16, 16, + model_mask_expand_f32, check_equal_nsf); + + soft_fsrc1_update(); + GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_ps, f, zmm, k16, 16, + model_maskz_expand_f32, check_equal_nsf); + + soft_fsrc1_update(); + GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_ps, f, zmm, k16, 16, + model_maskz_expand_f32, check_equal_nsf); + + soft_dsrc1_update(); + GEN_MASK_CHECK_CASE(_mm512_mask_expand_pd, d, zmmd, k8, 8, + model_mask_expand_f64, check_equal_ndf); + + soft_dsrc1_update(); + GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_pd, d, zmmd, k8, 8, + model_mask_expand_f64, check_equal_ndf); + + soft_dsrc1_update(); + GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_pd, d, zmmd, k8, 8, + model_maskz_expand_f64, check_equal_ndf); + + soft_dsrc1_update(); + GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_pd, d, zmmd, k8, 8, + model_maskz_expand_f64, check_equal_ndf); +} + +/* + * Model compress intrinsic behavior. + */ +void NOINLINE model_mask_compress_i32(void *input1, __int64 mask, void *input2, + void *output, int n_elems) { + int i, j = 0; + int *v1i = (int *)input1; + int *v2i = (int *)input2; + int *v3o = (int *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[j] = v2i[i]; + j++; + } + } + + for (i = j; i < n_elems; i++) { + v3o[i] = v1i[i]; + } +} + +void NOINLINE model_maskz_compress_i32(__int64 mask, void *input2, void *output, + int n_elems) { + int i, j = 0; + int *v2i = (int *)input2; + int *v3o = (int *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[j] = v2i[i]; + j++; + } + } + + for (i = j; i < n_elems; i++) { + v3o[i] = 0; + } +} + +void NOINLINE model_mask_compress_i64(void *input1, __int64 mask, void *input2, + void *output, int n_elems) { + int i, j = 0; + __int64 *v1i = (__int64 *)input1; + __int64 *v2i = (__int64 *)input2; + __int64 *v3o = (__int64 *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[j] = v2i[i]; + j++; + } + } + + for (i = j; i < n_elems; i++) { + v3o[i] = v1i[i]; + } +} + +void NOINLINE model_maskz_compress_i64(__int64 mask, void *input2, void *output, + int n_elems) { + int i, j = 0; + __int64 *v2i = (__int64 *)input2; + __int64 *v3o = (__int64 *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[j] = v2i[i]; + j++; + } + } + + for (i = j; i < n_elems; i++) { + v3o[i] = 0; + } +} + +void NOINLINE model_mask_compress_f32(void *input1, __int64 mask, void *input2, + void *output, int n_elems) { + int i, j = 0; + float *v1i = (float *)input1; + float *v2i = (float *)input2; + float *v3o = (float *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[j] = v2i[i]; + j++; + } + } + + for (i = j; i < n_elems; i++) { + v3o[i] = v1i[i]; + } +} + +void NOINLINE model_maskz_compress_f32(__int64 mask, void *input2, void *output, + int n_elems) { + int i, j = 0; + float *v2i = (float *)input2; + float *v3o = (float *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[j] = v2i[i]; + j++; + } + } + + for (i = j; i < n_elems; i++) { + v3o[i] = 0; + } +} + +void NOINLINE model_mask_compress_f64(void *input1, __int64 mask, void *input2, + void *output, int n_elems) { + int i, j = 0; + double *v1i = (double *)input1; + double *v2i = (double *)input2; + double *v3o = (double *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[j] = v2i[i]; + j++; + } + } + + for (i = j; i < n_elems; i++) { + v3o[i] = v1i[i]; + } +} + +void NOINLINE model_maskz_compress_f64(__int64 mask, void *input2, void *output, + int n_elems) { + int i, j = 0; + double *v2i = (double *)input2; + double *v3o = (double *)output; + + for (i = 0; i < n_elems; i++) { + if ((mask & (1LL << i)) != 0) { + v3o[j] = v2i[i]; + j++; + } + } + + for (i = j; i < n_elems; i++) { + v3o[i] = 0; + } +} + +#define GEN_MASK_STORE_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, \ + modeller, checker) \ + intrin((void *)&res.suffix, mask, prefix##src1.suffix); \ + modeller((void *)&prefix##src2.suffix, mask, (void *)&prefix##src1.suffix, \ + (void *)&mres.suffix, n_elem); \ + checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__) + +void NOINLINE do_m512_compress() { + volatile V512 res; + + soft_isrc1_update(); + GEN_MASK_CHECK_CASE(_mm512_mask_compress_epi32, i, zmmi, k16, 16, + model_mask_compress_i32, check_equal_nd); + + soft_isrc1_update(); + GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_epi32, i, zmmi, k16, 16, + model_mask_compress_i32, check_equal_nd); + + soft_isrc1_update(); + GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_epi32, i, zmmi, k16, 16, + model_maskz_compress_i32, check_equal_nd); + + soft_isrc1_update(); + GEN_MASK_CHECK_CASE(_mm512_mask_compress_epi64, i, zmmi, k8, 8, + model_mask_compress_i64, check_equal_nq); + + soft_isrc1_update(); + GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_epi64, i, zmmi, k8, 8, + model_mask_compress_i64, check_equal_nq); + + soft_isrc1_update(); + GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_epi64, i, zmmi, k8, 8, + model_maskz_compress_i64, check_equal_nq); + + soft_fsrc1_update(); + GEN_MASK_CHECK_CASE(_mm512_mask_compress_ps, f, zmm, k16, 16, + model_mask_compress_f32, check_equal_nsf); + + soft_fsrc1_update(); + GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_ps, f, zmm, k16, 16, + model_mask_compress_f32, check_equal_nsf); + + soft_fsrc1_update(); + GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_ps, f, zmm, k16, 16, + model_maskz_compress_f32, check_equal_nsf); + + soft_dsrc1_update(); + GEN_MASK_CHECK_CASE(_mm512_mask_compress_pd, d, zmmd, k8, 8, + model_mask_compress_f64, check_equal_ndf); + + soft_dsrc1_update(); + GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_pd, d, zmmd, k8, 8, + model_mask_compress_f64, check_equal_ndf); + + soft_dsrc1_update(); + GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_pd, d, zmmd, k8, 8, + model_maskz_compress_f64, check_equal_ndf); +} + +int main() { + init(); + + do_m512_expand(); + do_m512_compress(); + + if (n_errs != 0) { + printf("FAILED\n"); + return 1; + } + + printf("PASSED\n"); + return 0; +} |