diff options
Diffstat (limited to 'libc/src/string')
| -rw-r--r-- | libc/src/string/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/generic/inline_strlen.h | 54 | ||||
| -rw-r--r-- | libc/src/string/string_utils.h | 20 |
3 files changed, 66 insertions, 9 deletions
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 809decfbe5f0..5c9f622d4439 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -20,6 +20,7 @@ add_header_library( libc.hdr.stdint_proxy libc.src.__support.CPP.bitset libc.src.__support.CPP.type_traits + libc.src.__support.CPP.simd libc.src.__support.common ${string_config_options} ) diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h new file mode 100644 index 000000000000..68fba2afb3a5 --- /dev/null +++ b/libc/src/string/memory_utils/generic/inline_strlen.h @@ -0,0 +1,54 @@ +//===-- Strlen for generic SIMD types -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H + +#include "src/__support/CPP/bit.h" +#include "src/__support/CPP/simd.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +// Exploit the underlying integer representation to do a variable shift. +LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m, + size_t shift) { + using bitmask_ty = cpp::internal::get_as_integer_type_t<cpp::simd_mask<char>>; + bitmask_ty r = cpp::bit_cast<bitmask_ty>(m) >> shift; + return cpp::bit_cast<cpp::simd_mask<char>>(r); +} + +[[clang::no_sanitize("address")]] LIBC_INLINE size_t +string_length(const char *src) { + constexpr cpp::simd<char> null_byte = cpp::splat('\0'); + + size_t alignment = alignof(cpp::simd<char>); + const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>( + __builtin_align_down(src, alignment)); + + cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned); + cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte); + size_t offset = src - reinterpret_cast<const char *>(aligned); + if (cpp::any_of(shift_mask(mask, offset))) + return cpp::find_first_set(shift_mask(mask, offset)); + + for (;;) { + cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned); + cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte); + if (cpp::any_of(mask)) + return (reinterpret_cast<const char *>(aligned) - src) + + cpp::find_first_set(mask); + } +} +} // namespace internal + +namespace string_length_impl = internal; +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index ce461581b9d9..10803488b6cf 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -23,14 +23,16 @@ #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) -#if defined(LIBC_TARGET_ARCH_IS_X86) +#if LIBC_HAS_VECTOR_TYPE +#include "src/string/memory_utils/generic/inline_strlen.h" +#elif defined(LIBC_TARGET_ARCH_IS_X86) #include "src/string/memory_utils/x86_64/inline_strlen.h" #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON) #include "src/string/memory_utils/aarch64/inline_strlen.h" #else namespace string_length_impl = LIBC_NAMESPACE::wide_read; #endif -#endif +#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ) namespace LIBC_NAMESPACE_DECL { namespace internal { @@ -210,28 +212,28 @@ LIBC_INLINE char *string_token(char *__restrict src, static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits"); cpp::bitset<256> delims; for (; *delimiter_string != '\0'; ++delimiter_string) - delims.set(static_cast<size_t>(*delimiter_string)); + delims.set(*reinterpret_cast<const unsigned char *>(delimiter_string)); - char *tok_start = src; + unsigned char *tok_start = reinterpret_cast<unsigned char *>(src); if constexpr (SkipDelim) - while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start))) + while (*tok_start != '\0' && delims.test(*tok_start)) ++tok_start; if (*tok_start == '\0' && SkipDelim) { *context = nullptr; return nullptr; } - char *tok_end = tok_start; - while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end))) + unsigned char *tok_end = tok_start; + while (*tok_end != '\0' && !delims.test(*tok_end)) ++tok_end; if (*tok_end == '\0') { *context = nullptr; } else { *tok_end = '\0'; - *context = tok_end + 1; + *context = reinterpret_cast<char *>(tok_end + 1); } - return tok_start; + return reinterpret_cast<char *>(tok_start); } LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src, |
