aboutsummaryrefslogtreecommitdiff
path: root/libstdc++-v3
diff options
context:
space:
mode:
authorMatthias Kretz <m.kretz@gsi.de>2023-02-23 14:55:08 +0100
committerMatthias Kretz <m.kretz@gsi.de>2023-05-25 09:04:03 +0200
commit9add8ab5ecf5172d794a869e5a20e5494c97a775 (patch)
tree7314d11ed72241ff050d34c30fd8ba594111a6e2 /libstdc++-v3
parent0cd11b5fd551cf9b96424dea60664646b64b1aa6 (diff)
libstdc++: Fix simd compilation with Clang
Clang fails to compile some constant expressions involving simd. Therefore, just disable this non-conforming extension for clang. Fix AVX512 blend implementation for Clang. It was converting the bitmask to bool before, which is obviously wrong. Instead use a Clang builtin to convert the bitmask to vector-mask before using a vector blend ?:. A similar change is required for the masked unary implementation, because the GCC builtins do not exist on Clang. Signed-off-by: Matthias Kretz <m.kretz@gsi.de> libstdc++-v3/ChangeLog: * include/experimental/bits/simd_detail.h: Don't declare the simd API as constexpr with Clang. * include/experimental/bits/simd_x86.h (__movm): New. (_S_blend_avx512): Resolve FIXME. Implement blend using __movm and ?:. (_SimdImplX86::_S_masked_unary): Clang does not implement the same builtins. Implement the function using __movm, ?:, and - operators on vector_size types instead. (cherry picked from commit 8ff3ca2d94721fab78f167d435d4ea4fa4fdca6a)
Diffstat (limited to 'libstdc++-v3')
-rw-r--r--libstdc++-v3/include/experimental/bits/simd_detail.h2
-rw-r--r--libstdc++-v3/include/experimental/bits/simd_x86.h58
2 files changed, 55 insertions, 5 deletions
diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h
index 2e73daac9be..03d219844cd 100644
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -227,7 +227,7 @@
#define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
#define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
-#if defined __STRICT_ANSI__ && __STRICT_ANSI__
+#if __STRICT_ANSI__ || defined __clang__
#define _GLIBCXX_SIMD_CONSTEXPR
#define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
#else
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index f76f838d036..7703bbfddcf 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -363,6 +363,53 @@ template <typename _Tp>
// }}}
+#ifdef __clang__
+template <size_t _Np, typename _Tp, typename _Kp>
+ _GLIBCXX_SIMD_INTRINSIC constexpr auto
+ __movm(_Kp __k) noexcept
+ {
+ static_assert(is_unsigned_v<_Kp>);
+ if constexpr (sizeof(_Tp) == 1 && __have_avx512bw)
+ {
+ if constexpr (_Np <= 16 && __have_avx512vl)
+ return __builtin_ia32_cvtmask2b128(__k);
+ else if constexpr (_Np <= 32 && __have_avx512vl)
+ return __builtin_ia32_cvtmask2b256(__k);
+ else
+ return __builtin_ia32_cvtmask2b512(__k);
+ }
+ else if constexpr (sizeof(_Tp) == 2 && __have_avx512bw)
+ {
+ if constexpr (_Np <= 8 && __have_avx512vl)
+ return __builtin_ia32_cvtmask2w128(__k);
+ else if constexpr (_Np <= 16 && __have_avx512vl)
+ return __builtin_ia32_cvtmask2w256(__k);
+ else
+ return __builtin_ia32_cvtmask2w512(__k);
+ }
+ else if constexpr (sizeof(_Tp) == 4 && __have_avx512dq)
+ {
+ if constexpr (_Np <= 4 && __have_avx512vl)
+ return __builtin_ia32_cvtmask2d128(__k);
+ else if constexpr (_Np <= 8 && __have_avx512vl)
+ return __builtin_ia32_cvtmask2d256(__k);
+ else
+ return __builtin_ia32_cvtmask2d512(__k);
+ }
+ else if constexpr (sizeof(_Tp) == 8 && __have_avx512dq)
+ {
+ if constexpr (_Np <= 2 && __have_avx512vl)
+ return __builtin_ia32_cvtmask2q128(__k);
+ else if constexpr (_Np <= 4 && __have_avx512vl)
+ return __builtin_ia32_cvtmask2q256(__k);
+ else
+ return __builtin_ia32_cvtmask2q512(__k);
+ }
+ else
+ __assert_unreachable<_Tp>();
+ }
+#endif // __clang__
+
#ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
#include "simd_x86_conversions.h"
#endif
@@ -619,14 +666,13 @@ struct _CommonImplX86 : _CommonImplBuiltin
_GLIBCXX_SIMD_INTRINSIC static _TV
_S_blend_avx512(const _Kp __k, const _TV __a, const _TV __b) noexcept
{
-#ifdef __clang__
- // FIXME: this does a boolean choice, not a blend
- return __k ? __a : __b;
-#else
static_assert(__is_vector_type_v<_TV>);
using _Tp = typename _VectorTraits<_TV>::value_type;
static_assert(sizeof(_TV) >= 16);
static_assert(sizeof(_Tp) <= 8);
+#ifdef __clang__
+ return __movm<_VectorTraits<_TV>::_S_full_size, _Tp>(__k) ? __b : __a;
+#else
using _IntT
= conditional_t<(sizeof(_Tp) > 2),
conditional_t<sizeof(_Tp) == 4, int, long long>,
@@ -3482,6 +3528,9 @@ template <typename _Abi>
// optimize masked unary increment and decrement as masked sub +/-1
constexpr int __pm_one
= is_same_v<_Op<void>, __increment<void>> ? -1 : 1;
+#ifdef __clang__
+ return __movm<_Np, _Tp>(__k._M_data) ? __v._M_data - __pm_one : __v._M_data;
+#else // __clang__
if constexpr (is_integral_v<_Tp>)
{
constexpr bool __lp64 = sizeof(long) == sizeof(long long);
@@ -3525,6 +3574,7 @@ template <typename _Abi>
_GLIBCXX_SIMD_MASK_SUB(8, 16, subpd128);
#undef _GLIBCXX_SIMD_MASK_SUB
}
+#endif // __clang__
}
else
return _Base::template _S_masked_unary<_Op>(__k, __v);