summaryrefslogtreecommitdiff
path: root/libc/src/string
diff options
context:
space:
mode:
authorJoseph Huber <huberjn@outlook.com>2025-09-02 06:11:06 -0500
committerGitHub <noreply@github.com>2025-09-02 06:11:06 -0500
commiteb7b162ea06091a1bc8b976ae43ee62783dc9fef (patch)
treebc5e1f14121e67439e0172c0b75e679761cb2f4a /libc/src/string
parentabda8bed95dc3e8d3928288ac9b1e669b406cfe5 (diff)
[libc] Implement generic SIMD helper 'simd.h' and implement strlen (#152605)
Summary: This PR introduces a new 'simd.h' header that implements an interface similar to the proposed `stdx::simd` in C++. However, we instead wrap around the LLVM internal type. This makes heavy use of the clang vector extensions and boolean vectors, instead using primitive vector types instead of a class (many benefits to this). I use this interface to implement a generic strlen implementation, but propse we use this for math. Right now this requires a feature only introduced in clang-22.
Diffstat (limited to 'libc/src/string')
-rw-r--r--libc/src/string/CMakeLists.txt1
-rw-r--r--libc/src/string/memory_utils/generic/inline_strlen.h53
-rw-r--r--libc/src/string/string_utils.h6
3 files changed, 58 insertions, 2 deletions
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index 809decfbe5f0..5c9f622d4439 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -20,6 +20,7 @@ add_header_library(
libc.hdr.stdint_proxy
libc.src.__support.CPP.bitset
libc.src.__support.CPP.type_traits
+ libc.src.__support.CPP.simd
libc.src.__support.common
${string_config_options}
)
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
new file mode 100644
index 000000000000..111da35b85ee
--- /dev/null
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -0,0 +1,53 @@
+//===-- Strlen for generic SIMD types -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
+
+#include "src/__support/CPP/simd.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+// Exploit the underlying integer representation to do a variable shift.
+LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
+ size_t shift) {
+ using bitmask_ty = cpp::internal::get_as_integer_type_t<cpp::simd_mask<char>>;
+ bitmask_ty r = cpp::bit_cast<bitmask_ty>(m) >> shift;
+ return cpp::bit_cast<cpp::simd_mask<char>>(r);
+}
+
+[[clang::no_sanitize("address")]] LIBC_INLINE size_t
+string_length(const char *src) {
+ constexpr cpp::simd<char> null_byte = cpp::splat('\0');
+
+ size_t alignment = alignof(cpp::simd<char>);
+ const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>(
+ __builtin_align_down(src, alignment));
+
+ cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned);
+ cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte);
+ size_t offset = src - reinterpret_cast<const char *>(aligned);
+ if (cpp::any_of(shift_mask(mask, offset)))
+ return cpp::find_first_set(shift_mask(mask, offset));
+
+ for (;;) {
+ cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned);
+ cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte);
+ if (cpp::any_of(mask))
+ return (reinterpret_cast<const char *>(aligned) - src) +
+ cpp::find_first_set(mask);
+ }
+}
+} // namespace internal
+
+namespace string_length_impl = internal;
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index ce461581b9d9..26e9adde0d66 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -23,14 +23,16 @@
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
-#if defined(LIBC_TARGET_ARCH_IS_X86)
+#if LIBC_HAS_VECTOR_TYPE
+#include "src/string/memory_utils/generic/inline_strlen.h"
+#elif defined(LIBC_TARGET_ARCH_IS_X86)
#include "src/string/memory_utils/x86_64/inline_strlen.h"
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON)
#include "src/string/memory_utils/aarch64/inline_strlen.h"
#else
namespace string_length_impl = LIBC_NAMESPACE::wide_read;
#endif
-#endif
+#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
namespace LIBC_NAMESPACE_DECL {
namespace internal {