From eb7b162ea06091a1bc8b976ae43ee62783dc9fef Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 2 Sep 2025 06:11:06 -0500 Subject: [libc] Implement generic SIMD helper 'simd.h' and implement strlen (#152605) Summary: This PR introduces a new 'simd.h' header that implements an interface similar to the proposed `stdx::simd` in C++. However, we instead wrap around the LLVM internal type. This makes heavy use of the clang vector extensions and boolean vectors, instead using primitive vector types instead of a class (many benefits to this). I use this interface to implement a generic strlen implementation, but propse we use this for math. Right now this requires a feature only introduced in clang-22. --- libc/src/string/CMakeLists.txt | 1 + .../string/memory_utils/generic/inline_strlen.h | 53 ++++++++++++++++++++++ libc/src/string/string_utils.h | 6 ++- 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 libc/src/string/memory_utils/generic/inline_strlen.h (limited to 'libc/src/string') diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 809decfbe5f0..5c9f622d4439 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -20,6 +20,7 @@ add_header_library( libc.hdr.stdint_proxy libc.src.__support.CPP.bitset libc.src.__support.CPP.type_traits + libc.src.__support.CPP.simd libc.src.__support.common ${string_config_options} ) diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h new file mode 100644 index 000000000000..111da35b85ee --- /dev/null +++ b/libc/src/string/memory_utils/generic/inline_strlen.h @@ -0,0 +1,53 @@ +//===-- Strlen for generic SIMD types -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H + +#include "src/__support/CPP/simd.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +// Exploit the underlying integer representation to do a variable shift. +LIBC_INLINE constexpr cpp::simd_mask shift_mask(cpp::simd_mask m, + size_t shift) { + using bitmask_ty = cpp::internal::get_as_integer_type_t>; + bitmask_ty r = cpp::bit_cast(m) >> shift; + return cpp::bit_cast>(r); +} + +[[clang::no_sanitize("address")]] LIBC_INLINE size_t +string_length(const char *src) { + constexpr cpp::simd null_byte = cpp::splat('\0'); + + size_t alignment = alignof(cpp::simd); + const cpp::simd *aligned = reinterpret_cast *>( + __builtin_align_down(src, alignment)); + + cpp::simd chars = cpp::load_aligned>(aligned); + cpp::simd_mask mask = cpp::simd_cast(chars == null_byte); + size_t offset = src - reinterpret_cast(aligned); + if (cpp::any_of(shift_mask(mask, offset))) + return cpp::find_first_set(shift_mask(mask, offset)); + + for (;;) { + cpp::simd chars = cpp::load_aligned>(++aligned); + cpp::simd_mask mask = cpp::simd_cast(chars == null_byte); + if (cpp::any_of(mask)) + return (reinterpret_cast(aligned) - src) + + cpp::find_first_set(mask); + } +} +} // namespace internal + +namespace string_length_impl = internal; +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index ce461581b9d9..26e9adde0d66 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -23,14 +23,16 @@ #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) -#if defined(LIBC_TARGET_ARCH_IS_X86) +#if LIBC_HAS_VECTOR_TYPE +#include "src/string/memory_utils/generic/inline_strlen.h" +#elif defined(LIBC_TARGET_ARCH_IS_X86) #include "src/string/memory_utils/x86_64/inline_strlen.h" #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON) #include "src/string/memory_utils/aarch64/inline_strlen.h" #else namespace string_length_impl = LIBC_NAMESPACE::wide_read; #endif -#endif +#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) namespace LIBC_NAMESPACE_DECL { namespace internal { -- cgit v1.2.3