aboutsummaryrefslogtreecommitdiff
path: root/SingleSource/UnitTests/Vector/AVX512F/expand_compress.c
diff options
context:
space:
mode:
Diffstat (limited to 'SingleSource/UnitTests/Vector/AVX512F/expand_compress.c')
-rw-r--r--SingleSource/UnitTests/Vector/AVX512F/expand_compress.c515
1 files changed, 515 insertions, 0 deletions
diff --git a/SingleSource/UnitTests/Vector/AVX512F/expand_compress.c b/SingleSource/UnitTests/Vector/AVX512F/expand_compress.c
new file mode 100644
index 00000000..27f6a128
--- /dev/null
+++ b/SingleSource/UnitTests/Vector/AVX512F/expand_compress.c
@@ -0,0 +1,515 @@
+/*
+ * Tests for expand intrinsics family.
+ * This test was created to check the correctness
+ * of the following intrinsics support:
+ * _mm512_mask_compress*()
+ * _mm512_mask_compressstoreu*()
+ * _mm512_mask_expand*()
+ * _mm512_mask_expandloadu*()
+ * _mm512_maskz_compress*()
+ * _mm512_maskz_expand*()
+ * _mm512_maskz_expandloadu*()
+ */
+
+#include "m512_test_util.h"
+#include <stdio.h>
+#include <string.h>
+
+volatile __int64 vol0;
+
+V512 isrc1;
+V512 isrc2;
+
+V512 fsrc1;
+V512 fsrc2;
+
+V512 dsrc1;
+V512 dsrc2;
+
+V512 res;
+V512 mres;
+
+__mmask8 k8;
+__mmask16 k16;
+
+void NOINLINE init() {
+ volatile int i;
+
+ for (i = 0; i < 16; i++) {
+ isrc1.s32[i] = i;
+ isrc2.s32[i] = i + 1;
+
+ fsrc1.f32[i] = i * 1.0f;
+ fsrc2.f32[i] = i * 2.0f;
+ }
+
+ for (i = 0; i < 8; i++) {
+ dsrc1.f64[i] = i * 4.0;
+ dsrc2.f64[i] = i * 5.0;
+ }
+
+ k8 = 0x5a;
+ k16 = 0x25d6;
+}
+
+/*
+ * Use this between tests to make compiler think src was updated.
+ * Prevents PRE'ing of a load of src.
+ */
+#define soft_isrc1_update() isrc1.xmmi[vol0] = isrc1.xmmi[vol0]
+#define soft_fsrc1_update() fsrc1.xmmi[vol0] = fsrc1.xmmi[vol0]
+#define soft_dsrc1_update() dsrc1.xmmi[vol0] = dsrc1.xmmi[vol0]
+
+/*
+ * Model expand intrinsic behavior.
+ */
+void NOINLINE model_mask_expand_i32(void *input1, __int64 mask, void *input2,
+ void *output, int n_elems) {
+ int i, j = 0;
+ int *v1i = (int *)input1;
+ int *v2i = (int *)input2;
+ int *v3o = (int *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[i] = v2i[j];
+ j++;
+ } else {
+ v3o[i] = v1i[i];
+ }
+ }
+}
+
+void NOINLINE model_maskz_expand_i32(__int64 mask, void *input2, void *output,
+ int n_elems) {
+ int i, j = 0;
+ int *v2i = (int *)input2;
+ int *v3o = (int *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[i] = v2i[j];
+ j++;
+ } else {
+ v3o[i] = 0;
+ }
+ }
+}
+
+void NOINLINE model_mask_expand_i64(void *input1, __int64 mask, void *input2,
+ void *output, int n_elems) {
+ int i, j = 0;
+ __int64 *v1i = (__int64 *)input1;
+ __int64 *v2i = (__int64 *)input2;
+ __int64 *v3o = (__int64 *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[i] = v2i[j];
+ j++;
+ } else {
+ v3o[i] = v1i[i];
+ }
+ }
+}
+
+void NOINLINE model_maskz_expand_i64(__int64 mask, void *input2, void *output,
+ int n_elems) {
+ int i, j = 0;
+ __int64 *v2i = (__int64 *)input2;
+ __int64 *v3o = (__int64 *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[i] = v2i[j];
+ j++;
+ } else {
+ v3o[i] = 0;
+ }
+ }
+}
+
+void NOINLINE model_mask_expand_f32(void *input1, __int64 mask, void *input2,
+ void *output, int n_elems) {
+ int i, j = 0;
+ float *v1i = (float *)input1;
+ float *v2i = (float *)input2;
+ float *v3o = (float *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[i] = v2i[j];
+ j++;
+ } else {
+ v3o[i] = v1i[i];
+ }
+ }
+}
+
+void NOINLINE model_maskz_expand_f32(__int64 mask, void *input2, void *output,
+ int n_elems) {
+ int i, j = 0;
+ float *v2i = (float *)input2;
+ float *v3o = (float *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[i] = v2i[j];
+ j++;
+ } else {
+ v3o[i] = 0.f;
+ }
+ }
+}
+
+void NOINLINE model_mask_expand_f64(void *input1, __int64 mask, void *input2,
+ void *output, int n_elems) {
+ int i, j = 0;
+ double *v1i = (double *)input1;
+ double *v2i = (double *)input2;
+ double *v3o = (double *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[i] = v2i[j];
+ j++;
+ } else {
+ v3o[i] = v1i[i];
+ }
+ }
+}
+
+void NOINLINE model_maskz_expand_f64(__int64 mask, void *input2, void *output,
+ int n_elems) {
+ int i, j = 0;
+ double *v2i = (double *)input2;
+ double *v3o = (double *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[i] = v2i[j];
+ j++;
+ } else {
+ v3o[i] = 0.;
+ }
+ }
+}
+
+#define GEN_MASK_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, modeller, \
+ checker) \
+ res.suffix = intrin(prefix##src2.suffix, mask, prefix##src1.suffix); \
+ modeller((void *)&prefix##src2.suffix, mask, (void *)&prefix##src1.suffix, \
+ (void *)&mres.suffix, n_elem); \
+ checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__)
+
+#define GEN_MASK_LOAD_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, \
+ modeller, checker) \
+ res.suffix = intrin(prefix##src2.suffix, mask, &prefix##src1.suffix); \
+ modeller((void *)&prefix##src2.suffix, mask, (void *)&prefix##src1.suffix, \
+ (void *)&mres.suffix, n_elem); \
+ checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__)
+
+#define GEN_MASKZ_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, modeller, \
+ checker) \
+ res.suffix = intrin(mask, prefix##src1.suffix); \
+ modeller(mask, (void *)&prefix##src1.suffix, (void *)&mres.suffix, n_elem); \
+ checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__)
+
+#define GEN_MASKZ_LOAD_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, \
+ modeller, checker) \
+ res.suffix = intrin(mask, &prefix##src1.suffix); \
+ modeller(mask, (void *)&prefix##src1.suffix, (void *)&mres.suffix, n_elem); \
+ checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__)
+
+void NOINLINE do_m512_expand() {
+ volatile V512 res;
+
+ soft_isrc1_update();
+ GEN_MASK_CHECK_CASE(_mm512_mask_expand_epi32, i, zmmi, k16, 16,
+ model_mask_expand_i32, check_equal_nd);
+
+ soft_isrc1_update();
+ GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_epi32, i, zmmi, k16, 16,
+ model_mask_expand_i32, check_equal_nd);
+
+ soft_isrc1_update();
+ GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_epi32, i, zmmi, k16, 16,
+ model_maskz_expand_i32, check_equal_nd);
+
+ soft_isrc1_update();
+ GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_epi32, i, zmmi, k16, 16,
+ model_maskz_expand_i32, check_equal_nd);
+
+ soft_isrc1_update();
+ GEN_MASK_CHECK_CASE(_mm512_mask_expand_epi64, i, zmmi, k8, 8,
+ model_mask_expand_i64, check_equal_nq);
+
+ soft_isrc1_update();
+ GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_epi64, i, zmmi, k8, 8,
+ model_mask_expand_i64, check_equal_nq);
+
+ soft_isrc1_update();
+ GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_epi64, i, zmmi, k8, 8,
+ model_maskz_expand_i64, check_equal_nq);
+
+ soft_isrc1_update();
+ GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_epi64, i, zmmi, k8, 8,
+ model_maskz_expand_i64, check_equal_nq);
+
+ soft_fsrc1_update();
+ GEN_MASK_CHECK_CASE(_mm512_mask_expand_ps, f, zmm, k16, 16,
+ model_mask_expand_f32, check_equal_nsf);
+
+ soft_fsrc1_update();
+ GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_ps, f, zmm, k16, 16,
+ model_mask_expand_f32, check_equal_nsf);
+
+ soft_fsrc1_update();
+ GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_ps, f, zmm, k16, 16,
+ model_maskz_expand_f32, check_equal_nsf);
+
+ soft_fsrc1_update();
+ GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_ps, f, zmm, k16, 16,
+ model_maskz_expand_f32, check_equal_nsf);
+
+ soft_dsrc1_update();
+ GEN_MASK_CHECK_CASE(_mm512_mask_expand_pd, d, zmmd, k8, 8,
+ model_mask_expand_f64, check_equal_ndf);
+
+ soft_dsrc1_update();
+ GEN_MASK_LOAD_CHECK_CASE(_mm512_mask_expandloadu_pd, d, zmmd, k8, 8,
+ model_mask_expand_f64, check_equal_ndf);
+
+ soft_dsrc1_update();
+ GEN_MASKZ_CHECK_CASE(_mm512_maskz_expand_pd, d, zmmd, k8, 8,
+ model_maskz_expand_f64, check_equal_ndf);
+
+ soft_dsrc1_update();
+ GEN_MASKZ_LOAD_CHECK_CASE(_mm512_maskz_expandloadu_pd, d, zmmd, k8, 8,
+ model_maskz_expand_f64, check_equal_ndf);
+}
+
+/*
+ * Model compress intrinsic behavior.
+ */
+void NOINLINE model_mask_compress_i32(void *input1, __int64 mask, void *input2,
+ void *output, int n_elems) {
+ int i, j = 0;
+ int *v1i = (int *)input1;
+ int *v2i = (int *)input2;
+ int *v3o = (int *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[j] = v2i[i];
+ j++;
+ }
+ }
+
+ for (i = j; i < n_elems; i++) {
+ v3o[i] = v1i[i];
+ }
+}
+
+void NOINLINE model_maskz_compress_i32(__int64 mask, void *input2, void *output,
+ int n_elems) {
+ int i, j = 0;
+ int *v2i = (int *)input2;
+ int *v3o = (int *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[j] = v2i[i];
+ j++;
+ }
+ }
+
+ for (i = j; i < n_elems; i++) {
+ v3o[i] = 0;
+ }
+}
+
+void NOINLINE model_mask_compress_i64(void *input1, __int64 mask, void *input2,
+ void *output, int n_elems) {
+ int i, j = 0;
+ __int64 *v1i = (__int64 *)input1;
+ __int64 *v2i = (__int64 *)input2;
+ __int64 *v3o = (__int64 *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[j] = v2i[i];
+ j++;
+ }
+ }
+
+ for (i = j; i < n_elems; i++) {
+ v3o[i] = v1i[i];
+ }
+}
+
+void NOINLINE model_maskz_compress_i64(__int64 mask, void *input2, void *output,
+ int n_elems) {
+ int i, j = 0;
+ __int64 *v2i = (__int64 *)input2;
+ __int64 *v3o = (__int64 *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[j] = v2i[i];
+ j++;
+ }
+ }
+
+ for (i = j; i < n_elems; i++) {
+ v3o[i] = 0;
+ }
+}
+
+void NOINLINE model_mask_compress_f32(void *input1, __int64 mask, void *input2,
+ void *output, int n_elems) {
+ int i, j = 0;
+ float *v1i = (float *)input1;
+ float *v2i = (float *)input2;
+ float *v3o = (float *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[j] = v2i[i];
+ j++;
+ }
+ }
+
+ for (i = j; i < n_elems; i++) {
+ v3o[i] = v1i[i];
+ }
+}
+
+void NOINLINE model_maskz_compress_f32(__int64 mask, void *input2, void *output,
+ int n_elems) {
+ int i, j = 0;
+ float *v2i = (float *)input2;
+ float *v3o = (float *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[j] = v2i[i];
+ j++;
+ }
+ }
+
+ for (i = j; i < n_elems; i++) {
+ v3o[i] = 0;
+ }
+}
+
+void NOINLINE model_mask_compress_f64(void *input1, __int64 mask, void *input2,
+ void *output, int n_elems) {
+ int i, j = 0;
+ double *v1i = (double *)input1;
+ double *v2i = (double *)input2;
+ double *v3o = (double *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[j] = v2i[i];
+ j++;
+ }
+ }
+
+ for (i = j; i < n_elems; i++) {
+ v3o[i] = v1i[i];
+ }
+}
+
+void NOINLINE model_maskz_compress_f64(__int64 mask, void *input2, void *output,
+ int n_elems) {
+ int i, j = 0;
+ double *v2i = (double *)input2;
+ double *v3o = (double *)output;
+
+ for (i = 0; i < n_elems; i++) {
+ if ((mask & (1LL << i)) != 0) {
+ v3o[j] = v2i[i];
+ j++;
+ }
+ }
+
+ for (i = j; i < n_elems; i++) {
+ v3o[i] = 0;
+ }
+}
+
+#define GEN_MASK_STORE_CHECK_CASE(intrin, prefix, suffix, mask, n_elem, \
+ modeller, checker) \
+ intrin((void *)&res.suffix, mask, prefix##src1.suffix); \
+ modeller((void *)&prefix##src2.suffix, mask, (void *)&prefix##src1.suffix, \
+ (void *)&mres.suffix, n_elem); \
+ checker((void *)&res.suffix, (void *)&mres.suffix, n_elem, #intrin, __LINE__)
+
+void NOINLINE do_m512_compress() {
+ volatile V512 res;
+
+ soft_isrc1_update();
+ GEN_MASK_CHECK_CASE(_mm512_mask_compress_epi32, i, zmmi, k16, 16,
+ model_mask_compress_i32, check_equal_nd);
+
+ soft_isrc1_update();
+ GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_epi32, i, zmmi, k16, 16,
+ model_mask_compress_i32, check_equal_nd);
+
+ soft_isrc1_update();
+ GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_epi32, i, zmmi, k16, 16,
+ model_maskz_compress_i32, check_equal_nd);
+
+ soft_isrc1_update();
+ GEN_MASK_CHECK_CASE(_mm512_mask_compress_epi64, i, zmmi, k8, 8,
+ model_mask_compress_i64, check_equal_nq);
+
+ soft_isrc1_update();
+ GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_epi64, i, zmmi, k8, 8,
+ model_mask_compress_i64, check_equal_nq);
+
+ soft_isrc1_update();
+ GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_epi64, i, zmmi, k8, 8,
+ model_maskz_compress_i64, check_equal_nq);
+
+ soft_fsrc1_update();
+ GEN_MASK_CHECK_CASE(_mm512_mask_compress_ps, f, zmm, k16, 16,
+ model_mask_compress_f32, check_equal_nsf);
+
+ soft_fsrc1_update();
+ GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_ps, f, zmm, k16, 16,
+ model_mask_compress_f32, check_equal_nsf);
+
+ soft_fsrc1_update();
+ GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_ps, f, zmm, k16, 16,
+ model_maskz_compress_f32, check_equal_nsf);
+
+ soft_dsrc1_update();
+ GEN_MASK_CHECK_CASE(_mm512_mask_compress_pd, d, zmmd, k8, 8,
+ model_mask_compress_f64, check_equal_ndf);
+
+ soft_dsrc1_update();
+ GEN_MASK_STORE_CHECK_CASE(_mm512_mask_compressstoreu_pd, d, zmmd, k8, 8,
+ model_mask_compress_f64, check_equal_ndf);
+
+ soft_dsrc1_update();
+ GEN_MASKZ_CHECK_CASE(_mm512_maskz_compress_pd, d, zmmd, k8, 8,
+ model_maskz_compress_f64, check_equal_ndf);
+}
+
+int main() {
+ init();
+
+ do_m512_expand();
+ do_m512_compress();
+
+ if (n_errs != 0) {
+ printf("FAILED\n");
+ return 1;
+ }
+
+ printf("PASSED\n");
+ return 0;
+}