diff options
| author | Joseph Huber <huberjn@outlook.com> | 2025-09-02 06:11:06 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-02 06:11:06 -0500 |
| commit | eb7b162ea06091a1bc8b976ae43ee62783dc9fef (patch) | |
| tree | bc5e1f14121e67439e0172c0b75e679761cb2f4a /libc/src | |
| parent | abda8bed95dc3e8d3928288ac9b1e669b406cfe5 (diff) | |
[libc] Implement generic SIMD helper 'simd.h' and implement strlen (#152605)
Summary:
This PR introduces a new 'simd.h' header that implements an interface
similar to the proposed `stdx::simd` in C++. However, we instead wrap
around the LLVM internal type. This makes heavy use of the clang vector
extensions and boolean vectors, instead using primitive vector types
instead of a class (many benefits to this).
I use this interface to implement a generic strlen implementation, but
propse we use this for math. Right now this requires a feature only
introduced in clang-22.
Diffstat (limited to 'libc/src')
| -rw-r--r-- | libc/src/__support/CPP/CMakeLists.txt | 6 | ||||
| -rw-r--r-- | libc/src/__support/CPP/algorithm.h | 6 | ||||
| -rw-r--r-- | libc/src/__support/CPP/simd.h | 227 | ||||
| -rw-r--r-- | libc/src/__support/macros/attributes.h | 6 | ||||
| -rw-r--r-- | libc/src/__support/macros/properties/cpu_features.h | 4 | ||||
| -rw-r--r-- | libc/src/string/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/generic/inline_strlen.h | 53 | ||||
| -rw-r--r-- | libc/src/string/string_utils.h | 6 |
8 files changed, 307 insertions, 2 deletions
diff --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt index 8b65a8839ab2..a389a6d1702f 100644 --- a/libc/src/__support/CPP/CMakeLists.txt +++ b/libc/src/__support/CPP/CMakeLists.txt @@ -210,3 +210,9 @@ add_object_library( libc.src.__support.common libc.src.__support.macros.properties.os ) + +add_header_library( + simd + HDRS + simd.h +) diff --git a/libc/src/__support/CPP/algorithm.h b/libc/src/__support/CPP/algorithm.h index 7704b3fa81f0..de0c47369d94 100644 --- a/libc/src/__support/CPP/algorithm.h +++ b/libc/src/__support/CPP/algorithm.h @@ -18,6 +18,12 @@ namespace LIBC_NAMESPACE_DECL { namespace cpp { +template <class T = void> struct plus {}; +template <class T = void> struct multiplies {}; +template <class T = void> struct bit_and {}; +template <class T = void> struct bit_or {}; +template <class T = void> struct bit_xor {}; + template <class T> LIBC_INLINE constexpr const T &max(const T &a, const T &b) { return (a < b) ? b : a; } diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h new file mode 100644 index 000000000000..972f156419d4 --- /dev/null +++ b/libc/src/__support/CPP/simd.h @@ -0,0 +1,227 @@ +//===-- Portable SIMD library similar to stdx::simd -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a generic interface into fixed-size SIMD instructions +// using the clang vector type. The API shares some similarities with the +// stdx::simd proposal, but instead chooses to use vectors as primitive types +// with several extra helper functions. +// +//===----------------------------------------------------------------------===// + +#include "hdr/stdint_proxy.h" +#include "src/__support/CPP/algorithm.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/CPP/type_traits/integral_constant.h" +#include "src/__support/macros/attributes.h" +#include "src/__support/macros/config.h" + +#include <stddef.h> + +#ifndef LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H +#define LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H + +#if LIBC_HAS_VECTOR_TYPE + +namespace LIBC_NAMESPACE_DECL { +namespace cpp { + +namespace internal { + +template <typename T> +using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT); + +#if defined(LIBC_TARGET_CPU_HAS_AVX512F) +template <typename T> +inline constexpr size_t native_vector_size = 64 / sizeof(T); +#elif defined(LIBC_TARGET_CPU_HAS_AVX2) +template <typename T> +inline constexpr size_t native_vector_size = 32 / sizeof(T); +#elif defined(LIBC_TARGET_CPU_HAS_SSE2) || defined(LIBC_TARGET_CPU_HAS_ARM_NEON) +template <typename T> +inline constexpr size_t native_vector_size = 16 / sizeof(T); +#else +template <typename T> inline constexpr size_t native_vector_size = 1; +#endif + +template <typename T> LIBC_INLINE constexpr T poison() { + return __builtin_nondeterministic_value(T()); +} +} // namespace internal + +// Type aliases. +template <typename T, size_t N> +using fixed_size_simd = T [[clang::ext_vector_type(N)]]; +template <typename T, size_t N = internal::native_vector_size<T>> +using simd = T [[clang::ext_vector_type(N)]]; +template <typename T> +using simd_mask = simd<bool, internal::native_vector_size<T>>; + +// Type trait helpers. +template <typename T> +struct simd_size : cpp::integral_constant<size_t, __builtin_vectorelements(T)> { +}; +template <class T> constexpr size_t simd_size_v = simd_size<T>::value; + +template <typename T> struct is_simd : cpp::integral_constant<bool, false> {}; +template <typename T, unsigned N> +struct is_simd<simd<T, N>> : cpp::integral_constant<bool, true> {}; +template <class T> constexpr bool is_simd_v = is_simd<T>::value; + +template <typename T> +struct is_simd_mask : cpp::integral_constant<bool, false> {}; +template <unsigned N> +struct is_simd_mask<simd<bool, N>> : cpp::integral_constant<bool, true> {}; +template <class T> constexpr bool is_simd_mask_v = is_simd_mask<T>::value; + +template <typename T> struct simd_element_type; +template <typename T, size_t N> struct simd_element_type<simd<T, N>> { + using type = T; +}; +template <typename T> +using simd_element_type_t = typename simd_element_type<T>::type; + +template <typename T> +using enable_if_simd_t = cpp::enable_if_t<is_simd_v<T>, T>; + +// Casting. +template <typename To, typename From, size_t N> +LIBC_INLINE constexpr simd<To, N> simd_cast(simd<From, N> v) { + return __builtin_convertvector(v, simd<To, N>); +} + +// SIMD mask operations. +template <size_t N> LIBC_INLINE constexpr bool all_of(simd<bool, N> m) { + return __builtin_reduce_and(m); +} +template <size_t N> LIBC_INLINE constexpr bool any_of(simd<bool, N> m) { + return __builtin_reduce_or(m); +} +template <size_t N> LIBC_INLINE constexpr bool none_of(simd<bool, N> m) { + return !any_of(m); +} +template <size_t N> LIBC_INLINE constexpr bool some_of(simd<bool, N> m) { + return any_of(m) && !all_of(m); +} +template <size_t N> LIBC_INLINE constexpr int popcount(simd<bool, N> m) { + return __builtin_popcountg(m); +} +template <size_t N> LIBC_INLINE constexpr int find_first_set(simd<bool, N> m) { + return __builtin_ctzg(m); +} +template <size_t N> LIBC_INLINE constexpr int find_last_set(simd<bool, N> m) { + constexpr size_t size = simd_size_v<simd<bool, N>>; + return size - __builtin_clzg(m); +} + +// Elementwise operations. +template <typename T, size_t N> +LIBC_INLINE constexpr simd<T, N> min(simd<T, N> x, simd<T, N> y) { + return __builtin_elementwise_min(x, y); +} +template <typename T, size_t N> +LIBC_INLINE constexpr simd<T, N> max(simd<T, N> x, simd<T, N> y) { + return __builtin_elementwise_max(x, y); +} + +// Reduction operations. +template <typename T, size_t N, typename Op = cpp::plus<>> +LIBC_INLINE constexpr T reduce(simd<T, N> v, Op op = {}) { + return reduce(v, op); +} +template <typename T, size_t N> +LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::plus<>) { + return __builtin_reduce_add(v); +} +template <typename T, size_t N> +LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::multiplies<>) { + return __builtin_reduce_mul(v); +} +template <typename T, size_t N> +LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_and<>) { + return __builtin_reduce_and(v); +} +template <typename T, size_t N> +LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_or<>) { + return __builtin_reduce_or(v); +} +template <typename T, size_t N> +LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_xor<>) { + return __builtin_reduce_xor(v); +} +template <typename T, size_t N> LIBC_INLINE constexpr T hmin(simd<T, N> v) { + return __builtin_reduce_min(v); +} +template <typename T, size_t N> LIBC_INLINE constexpr T hmax(simd<T, N> v) { + return __builtin_reduce_max(v); +} + +// Accessor helpers. +template <typename T> +LIBC_INLINE enable_if_simd_t<T> load_unaligned(const void *ptr) { + T tmp; + __builtin_memcpy(&tmp, ptr, sizeof(T)); + return tmp; +} +template <typename T> +LIBC_INLINE enable_if_simd_t<T> load_aligned(const void *ptr) { + return load_unaligned<T>(__builtin_assume_aligned(ptr, alignof(T))); +} +template <typename T> +LIBC_INLINE enable_if_simd_t<T> store_unaligned(T v, void *ptr) { + __builtin_memcpy(ptr, &v, sizeof(T)); +} +template <typename T> +LIBC_INLINE enable_if_simd_t<T> store_aligned(T v, void *ptr) { + store_unaligned<T>(v, __builtin_assume_aligned(ptr, alignof(T))); +} +template <typename T> +LIBC_INLINE enable_if_simd_t<T> +masked_load(simd<bool, simd_size_v<T>> m, void *ptr, + T passthru = internal::poison<simd_element_type<T>>()) { + return __builtin_masked_load(m, ptr, passthru); +} +template <typename T> +LIBC_INLINE enable_if_simd_t<T> masked_store(simd<bool, simd_size_v<T>> m, T v, + void *ptr) { + __builtin_masked_store( + m, v, static_cast<T *>(__builtin_assume_aligned(ptr, alignof(T)))); +} + +// Construction helpers. +template <typename T, size_t N> LIBC_INLINE constexpr simd<T, N> splat(T v) { + return simd<T, N>(v); +} +template <typename T> LIBC_INLINE constexpr simd<T> splat(T v) { + return splat<T, simd_size_v<simd<T>>>(v); +} +template <typename T, unsigned N> +LIBC_INLINE constexpr simd<T, N> iota(T base = T(0), T step = T(1)) { + simd<T, N> v{}; + for (unsigned i = 0; i < N; ++i) + v[i] = base + T(i) * step; + return v; +} +template <typename T> +LIBC_INLINE constexpr simd<T> iota(T base = T(0), T step = T(1)) { + return iota<T, simd_size_v<simd<T>>>(base, step); +} + +// Conditional helpers. +template <typename T, size_t N> +LIBC_INLINE constexpr simd<T, N> select(simd<bool, N> m, simd<T, N> x, + simd<T, N> y) { + return m ? x : y; +} + +// TODO: where expressions, scalar overloads, ABI types. + +} // namespace cpp +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_HAS_VECTOR_TYPE +#endif diff --git a/libc/src/__support/macros/attributes.h b/libc/src/__support/macros/attributes.h index 4ff374b0e4fb..d350a06125f0 100644 --- a/libc/src/__support/macros/attributes.h +++ b/libc/src/__support/macros/attributes.h @@ -73,4 +73,10 @@ LIBC_THREAD_MODE_EXTERNAL. #define LIBC_PREFERED_TYPE(TYPE) #endif +#if __has_attribute(ext_vector_type) && __has_feature(ext_vector_type_boolean) +#define LIBC_HAS_VECTOR_TYPE 1 +#else +#define LIBC_HAS_VECTOR_TYPE 0 +#endif + #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h index fde30eadfd83..fc6099ca6ccc 100644 --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -59,6 +59,10 @@ #endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE #endif // __ARM_FP +#if defined(__ARM_NEON) +#define LIBC_TARGET_CPU_HAS_ARM_NEON +#endif + #if defined(__riscv_flen) // https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc #if defined(__riscv_zfhmin) diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 809decfbe5f0..5c9f622d4439 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -20,6 +20,7 @@ add_header_library( libc.hdr.stdint_proxy libc.src.__support.CPP.bitset libc.src.__support.CPP.type_traits + libc.src.__support.CPP.simd libc.src.__support.common ${string_config_options} ) diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h new file mode 100644 index 000000000000..111da35b85ee --- /dev/null +++ b/libc/src/string/memory_utils/generic/inline_strlen.h @@ -0,0 +1,53 @@ +//===-- Strlen for generic SIMD types -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H + +#include "src/__support/CPP/simd.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +// Exploit the underlying integer representation to do a variable shift. +LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m, + size_t shift) { + using bitmask_ty = cpp::internal::get_as_integer_type_t<cpp::simd_mask<char>>; + bitmask_ty r = cpp::bit_cast<bitmask_ty>(m) >> shift; + return cpp::bit_cast<cpp::simd_mask<char>>(r); +} + +[[clang::no_sanitize("address")]] LIBC_INLINE size_t +string_length(const char *src) { + constexpr cpp::simd<char> null_byte = cpp::splat('\0'); + + size_t alignment = alignof(cpp::simd<char>); + const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>( + __builtin_align_down(src, alignment)); + + cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned); + cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte); + size_t offset = src - reinterpret_cast<const char *>(aligned); + if (cpp::any_of(shift_mask(mask, offset))) + return cpp::find_first_set(shift_mask(mask, offset)); + + for (;;) { + cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned); + cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte); + if (cpp::any_of(mask)) + return (reinterpret_cast<const char *>(aligned) - src) + + cpp::find_first_set(mask); + } +} +} // namespace internal + +namespace string_length_impl = internal; +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index ce461581b9d9..26e9adde0d66 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -23,14 +23,16 @@ #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) -#if defined(LIBC_TARGET_ARCH_IS_X86) +#if LIBC_HAS_VECTOR_TYPE +#include "src/string/memory_utils/generic/inline_strlen.h" +#elif defined(LIBC_TARGET_ARCH_IS_X86) #include "src/string/memory_utils/x86_64/inline_strlen.h" #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON) #include "src/string/memory_utils/aarch64/inline_strlen.h" #else namespace string_length_impl = LIBC_NAMESPACE::wide_read; #endif -#endif +#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) namespace LIBC_NAMESPACE_DECL { namespace internal { |
