diff options
| author | Joseph Huber <huberjn@outlook.com> | 2025-09-02 06:11:06 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-02 06:11:06 -0500 |
| commit | eb7b162ea06091a1bc8b976ae43ee62783dc9fef (patch) | |
| tree | bc5e1f14121e67439e0172c0b75e679761cb2f4a /libc/src/string | |
| parent | abda8bed95dc3e8d3928288ac9b1e669b406cfe5 (diff) | |
[libc] Implement generic SIMD helper 'simd.h' and implement strlen (#152605)
Summary:
This PR introduces a new 'simd.h' header that implements an interface
similar to the proposed `stdx::simd` in C++. However, we instead wrap
around the LLVM internal type. This makes heavy use of the clang vector
extensions and boolean vectors, instead using primitive vector types
instead of a class (many benefits to this).
I use this interface to implement a generic strlen implementation, but
propse we use this for math. Right now this requires a feature only
introduced in clang-22.
Diffstat (limited to 'libc/src/string')
| -rw-r--r-- | libc/src/string/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/generic/inline_strlen.h | 53 | ||||
| -rw-r--r-- | libc/src/string/string_utils.h | 6 |
3 files changed, 58 insertions, 2 deletions
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 809decfbe5f0..5c9f622d4439 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -20,6 +20,7 @@ add_header_library( libc.hdr.stdint_proxy libc.src.__support.CPP.bitset libc.src.__support.CPP.type_traits + libc.src.__support.CPP.simd libc.src.__support.common ${string_config_options} ) diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h new file mode 100644 index 000000000000..111da35b85ee --- /dev/null +++ b/libc/src/string/memory_utils/generic/inline_strlen.h @@ -0,0 +1,53 @@ +//===-- Strlen for generic SIMD types -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H + +#include "src/__support/CPP/simd.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +// Exploit the underlying integer representation to do a variable shift. +LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m, + size_t shift) { + using bitmask_ty = cpp::internal::get_as_integer_type_t<cpp::simd_mask<char>>; + bitmask_ty r = cpp::bit_cast<bitmask_ty>(m) >> shift; + return cpp::bit_cast<cpp::simd_mask<char>>(r); +} + +[[clang::no_sanitize("address")]] LIBC_INLINE size_t +string_length(const char *src) { + constexpr cpp::simd<char> null_byte = cpp::splat('\0'); + + size_t alignment = alignof(cpp::simd<char>); + const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>( + __builtin_align_down(src, alignment)); + + cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned); + cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte); + size_t offset = src - reinterpret_cast<const char *>(aligned); + if (cpp::any_of(shift_mask(mask, offset))) + return cpp::find_first_set(shift_mask(mask, offset)); + + for (;;) { + cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned); + cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte); + if (cpp::any_of(mask)) + return (reinterpret_cast<const char *>(aligned) - src) + + cpp::find_first_set(mask); + } +} +} // namespace internal + +namespace string_length_impl = internal; +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index ce461581b9d9..26e9adde0d66 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -23,14 +23,16 @@ #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) -#if defined(LIBC_TARGET_ARCH_IS_X86) +#if LIBC_HAS_VECTOR_TYPE +#include "src/string/memory_utils/generic/inline_strlen.h" +#elif defined(LIBC_TARGET_ARCH_IS_X86) #include "src/string/memory_utils/x86_64/inline_strlen.h" #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON) #include "src/string/memory_utils/aarch64/inline_strlen.h" #else namespace string_length_impl = LIBC_NAMESPACE::wide_read; #endif -#endif +#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) namespace LIBC_NAMESPACE_DECL { namespace internal { |
