diff options
Diffstat (limited to 'SingleSource/UnitTests/Vector/AVX512DQ')
4 files changed, 233 insertions, 0 deletions
diff --git a/SingleSource/UnitTests/Vector/AVX512DQ/CMakeLists.txt b/SingleSource/UnitTests/Vector/AVX512DQ/CMakeLists.txt new file mode 100644 index 00000000..2c7a5183 --- /dev/null +++ b/SingleSource/UnitTests/Vector/AVX512DQ/CMakeLists.txt @@ -0,0 +1,5 @@ +list(APPEND CPPFLAGS -I ${CMAKE_SOURCE_DIR}/${VECTOR_MAIN_DIR}) +list(APPEND LDFLAGS -lm) +list(APPEND CFLAGS "-march=${X86CPU_ARCH}") +list(APPEND CFLAGS -fms-extensions) +llvm_singlesource(PREFIX "Vector-AVX512DQ-") diff --git a/SingleSource/UnitTests/Vector/AVX512DQ/Makefile b/SingleSource/UnitTests/Vector/AVX512DQ/Makefile new file mode 100644 index 00000000..d4269fdf --- /dev/null +++ b/SingleSource/UnitTests/Vector/AVX512DQ/Makefile @@ -0,0 +1,11 @@ +# SingleSource/UnitTests/Vector/AVX512DQ/Makefile + +DIRS = +LEVEL = ../../../.. +CFLAGS += -fms-extensions -march=native -mavx512dq -I${SourceDir}/.. +LDFLAGS += -lm + +include $(LEVEL)/SingleSource/Makefile.singlesrc + +TARGET_FLAGS += -march=native -mavx512dq +LCCFLAGS += -march=native -mavx512dq diff --git a/SingleSource/UnitTests/Vector/AVX512DQ/casts.c b/SingleSource/UnitTests/Vector/AVX512DQ/casts.c new file mode 100644 index 00000000..07b9317e --- /dev/null +++ b/SingleSource/UnitTests/Vector/AVX512DQ/casts.c @@ -0,0 +1,215 @@ +/* + * Here we check for _mm512_cast* and _mm512_xor_* intrinsics. + */ +#include "m512_test_util.h" + +volatile int vol = 0; /* Inhibit optimization */ + +__m512 f1, f2, f3, f3_orig; +__m512d d1, d2, d3, d3_orig; +__m512i i1, i2, i3, i3_orig; + +void NOINLINE set_nonzero(void *vp, int c) { + int i; + V512 *v = (V512 *)vp; + + for (i = 0; i < 16; i++) { + v->u32[i] = 10 * i * i - 3 * i + c; + if (v->u32[i] == 0) { + v->u32[i] = 1234; + } + } +} + +void NOINLINE check_zero(void *vp, char *banner) { + int i; + V512 *v = (V512 *)vp; + + for (i = 0; i < 16; i++) { + if (v->u32[i] + vol != 0) { + printf("ERROR: %s failed\n", banner ? banner : ""); + n_errs++; + break; + } + } +} + +void NOINLINE do_setzero() { + set_nonzero(&f1, 18); + f1 = _mm512_setzero_ps(); + check_zero(&f1, "_mm512_setzero_ps"); + + set_nonzero(&f2, 19); + f2 = _mm512_setzero(); + check_zero(&f2, "_mm512_setzero"); + + set_nonzero(&d1, 20); + d1 = _mm512_setzero_pd(); + check_zero(&d1, "_mm512_setzero_pd"); + + set_nonzero(&i1, 21); + i1 = _mm512_setzero_epi32(); + check_zero(&i1, "_mm512_setzero_epi32"); +} + +void NOINLINE do_cast() { + set_nonzero(&f1, 1); + d1 = _mm512_setzero_pd(); + f1 = _mm512_castpd_ps(d1); + check_zero(&f1, "_mm512_castpd_ps"); + + set_nonzero(&i1, 1); + d1 = _mm512_setzero_pd(); + i1 = _mm512_castpd_si512(d1); + check_zero(&i1, "_mm512_castpd_si512"); + + set_nonzero(&d2, 1); + f2 = _mm512_setzero_ps(); + d2 = _mm512_castps_pd(f2); + check_zero(&d2, "_mm512_castps_pd"); + + set_nonzero(&i2, 1); + f2 = _mm512_setzero_ps(); + i2 = _mm512_castps_si512(f2); + check_zero(&i2, "_mm512_castps_si512"); + + set_nonzero(&f3, 1); + i3 = _mm512_setzero_epi32(); + f3 = _mm512_castsi512_ps(i3); + check_zero(&f3, "_mm512_castsi512_ps"); + + set_nonzero(&d3, 1); + i3 = _mm512_setzero_epi32(); + d3 = _mm512_castsi512_pd(i3); + check_zero(&d3, "_mm512_castsi512_pd"); +} + +void NOINLINE do_size_casts() { + __m128d xd; + __m128 xs; + __m128i xi; + __m256d yd; + __m256 ys; + __m256i yi; + __m512d zd; + __m512 zs; + __m512i zi; + + set_nonzero(&f1, 1); + set_nonzero(&i1, 1); + set_nonzero(&d1, 1); + + xd = _mm512_castpd512_pd128(d1); + check_equal_nd(&xd, &d1, 4, "_mm512_castpd512_pd128", __LINE__); + xs = _mm512_castps512_ps128(f1); + check_equal_nd(&xs, &f1, 4, "_mm512_castps512_ps128", __LINE__); + xi = _mm512_castsi512_si128(i1); + check_equal_nd(&xi, &i1, 4, "_mm512_castsi512_si128", __LINE__); + + yd = _mm512_castpd512_pd256(d1); + check_equal_nd(&yd, &d1, 8, "_mm512_castpd512_pd256", __LINE__); + ys = _mm512_castps512_ps256(f1); + check_equal_nd(&ys, &f1, 8, "_mm512_castps512_ps256", __LINE__); + yi = _mm512_castsi512_si256(i1); + check_equal_nd(&yi, &i1, 8, "_mm512_castsi512_si256", __LINE__); + + zd = _mm512_castpd128_pd512(xd); + check_equal_nd(&zd, &d1, 4, "_mm512_castpd128_pd512", __LINE__); + zs = _mm512_castps128_ps512(xs); + check_equal_nd(&zs, &f1, 4, "_mm512_castps128_ps512", __LINE__); + zi = _mm512_castsi128_si512(xi); + check_equal_nd(&zi, &i1, 4, "_mm512_castsi128_si512", __LINE__); + + zd = _mm512_castpd256_pd512(yd); + check_equal_nd(&zd, &d1, 8, "_mm512_castpd256_pd512", __LINE__); + zs = _mm512_castps256_ps512(ys); + check_equal_nd(&zs, &f1, 8, "_mm512_castps256_ps512", __LINE__); + zi = _mm512_castsi256_si512(yi); + check_equal_nd(&zi, &i1, 8, "_mm512_castsi256_si512", __LINE__); +} + +void NOINLINE check_xor(void *vp1, void *vp2, void *vp3, void *vp_orig, + int mask, char *banner) { + int i; + V512 *v1 = (V512 *)vp1; + V512 *v2 = (V512 *)vp2; + V512 *v3 = (V512 *)vp3; + V512 *v_orig = (V512 *)vp_orig; + + for (i = 0; i < 16; i++) { + int actual = v3->u32[i]; + int expected = v_orig->u32[i]; + if (mask & (1 << i)) { + expected = v1->u32[i] ^ v2->u32[i]; + } + if (actual + vol != expected - vol) { + printf("ERROR: %s failed\n", banner ? banner : ""); + n_errs++; + break; + } + } +} + +void NOINLINE do_xor() { + set_nonzero(&i1, 99); + set_nonzero(&i2, 100); + set_nonzero(&f1, 33); + set_nonzero(&f2, -35); + set_nonzero(&d1, -11); + set_nonzero(&d2, 14); + + set_nonzero(&i3, 1000); + i3_orig = i3; + i3 = _mm512_xor_epi32(i1, i2); + check_xor(&i1, &i2, &i3, &i3_orig, 0xffff, "_mm512_xor_epi32"); + + set_nonzero(&i3, 1500); + i3_orig = i3; + i3 = _mm512_mask_xor_epi32(i3_orig, 0x5555, i1, i2); + check_xor(&i1, &i2, &i3, &i3_orig, 0x5555, "_mm512_mask_xor_epi32"); + + set_nonzero(&f3, 1000); + f3_orig = f3; + f3 = _mm512_xor_ps(f1, f2); + check_xor(&f1, &f2, &f3, &f3_orig, 0xffff, "_mm512_xor_ps"); + + set_nonzero(&f3, 1500); + f3_orig = f3; + f3 = _mm512_mask_xor_ps(f3_orig, 0x5795, f1, f2); + check_xor(&f1, &f2, &f3, &f3_orig, 0x5795, "_mm512_mask_xor_ps"); + + set_nonzero(&i3, 2000); + i3_orig = i3; + i3 = _mm512_xor_epi64(i1, i2); + check_xor(&i1, &i2, &i3, &i3_orig, 0xffff, "_mm512_xor_epi64"); + + set_nonzero(&i3, 2500); + i3_orig = i3; + i3 = _mm512_mask_xor_epi64(i3_orig, 0x55, i1, i2); + check_xor(&i1, &i2, &i3, &i3_orig, 0x3333, "_mm512_mask_xor_epi64"); + + set_nonzero(&d3, 2000); + d3_orig = d3; + d3 = _mm512_xor_pd(d1, d2); + check_xor(&d1, &d2, &d3, &d3_orig, 0xffff, "_mm512_xor_pd"); + + set_nonzero(&d3, 2500); + d3_orig = d3; + d3 = _mm512_mask_xor_pd(d3_orig, 0x55, d1, d2); + check_xor(&d1, &d2, &d3, &d3_orig, 0x3333, "_mm512_mask_xor_pd"); +} + +int main() { + do_setzero(); + do_cast(); + do_size_casts(); + do_xor(); + + if (n_errs != 0) { + printf("FAILED\n"); + return 1; + } + + printf("PASSED\n"); + return 0; +} diff --git a/SingleSource/UnitTests/Vector/AVX512DQ/casts.reference_output b/SingleSource/UnitTests/Vector/AVX512DQ/casts.reference_output new file mode 100644 index 00000000..bfae62d0 --- /dev/null +++ b/SingleSource/UnitTests/Vector/AVX512DQ/casts.reference_output @@ -0,0 +1,2 @@ +PASSED +exit 0 |