diff options
| author | Joseph Huber <huberjn@outlook.com> | 2025-08-20 17:17:12 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-20 17:17:12 -0500 |
| commit | c80d1483c6d787edf62ff9e86b1e97af5eb5abf9 (patch) | |
| tree | fdcf3d2d5aa2ae688d47442bf8cc4e6dd5dd1800 /libc/src/string | |
| parent | ac8f0bb070c9071742b6f6ce03bebc9d87217830 (diff) | |
[libc] Enable wide-read memory operations by default on Linux (#154602)
Summary:
This patch changes the linux build to use the wide reads on the memory
operations by default. These memory functions will now potentially read
outside of the bounds explicitly allowed by the current function. While
technically undefined behavior in the standard, plenty of C library
implementations do this. it will not cause a segmentation fault on linux
as long as you do not cross a page boundary, and because we are only
*reading* memory it should not have atomic effects.
Diffstat (limited to 'libc/src/string')
| -rw-r--r-- | libc/src/string/memory_utils/aarch64/inline_strlen.h | 9 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/x86_64/inline_strlen.h | 23 |
2 files changed, 18 insertions, 14 deletions
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h index 79487f4752b8..ba28b1894e67 100644 --- a/libc/src/string/memory_utils/aarch64/inline_strlen.h +++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h @@ -17,14 +17,15 @@ namespace LIBC_NAMESPACE_DECL { namespace neon { -[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { +[[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) { using Vector __attribute__((may_alias)) = uint8x8_t; uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector); - Vector *block_ptr = reinterpret_cast<Vector *>(src - misalign_bytes); + const Vector *block_ptr = + reinterpret_cast<const Vector *>(src - misalign_bytes); Vector v = *block_ptr; Vector vcmp = vceqz_u8(v); - uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp); + uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp); uint64_t cmp = vget_lane_u64(cmp_mask, 0); cmp = cmp >> (misalign_bytes << 3); if (cmp) @@ -34,7 +35,7 @@ namespace neon { ++block_ptr; v = *block_ptr; vcmp = vceqz_u8(v); - cmp_mask = vreinterpret_u64_s8(vcmp); + cmp_mask = vreinterpret_u64_u8(vcmp); cmp = vget_lane_u64(cmp_mask, 0); if (cmp) return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) - diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h index 5eb184cbf810..379fbc11af8c 100644 --- a/libc/src/string/memory_utils/x86_64/inline_strlen.h +++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h @@ -18,22 +18,22 @@ namespace LIBC_NAMESPACE_DECL { namespace string_length_internal { // Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero. template <typename Vector, typename Mask> -Mask CompareAndMask(const Vector *block_ptr); +LIBC_INLINE static Mask compare_and_mask(const Vector *block_ptr); template <typename Vector, typename Mask, - decltype(CompareAndMask<Vector, Mask>)> + decltype(compare_and_mask<Vector, Mask>)> size_t string_length_vector(const char *src) { uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector); const Vector *block_ptr = reinterpret_cast<const Vector *>(src - misalign_bytes); - auto cmp = CompareAndMask<Vector, Mask>(block_ptr) >> misalign_bytes; + auto cmp = compare_and_mask<Vector, Mask>(block_ptr) >> misalign_bytes; if (cmp) return cpp::countr_zero(cmp); while (true) { block_ptr++; - cmp = CompareAndMask<Vector, Mask>(block_ptr); + cmp = compare_and_mask<Vector, Mask>(block_ptr); if (cmp) return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) - reinterpret_cast<uintptr_t>(src) + @@ -42,7 +42,8 @@ size_t string_length_vector(const char *src) { } template <> -uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) { +LIBC_INLINE uint32_t +compare_and_mask<__m128i, uint32_t>(const __m128i *block_ptr) { __m128i v = _mm_load_si128(block_ptr); __m128i z = _mm_setzero_si128(); __m128i c = _mm_cmpeq_epi8(z, v); @@ -52,13 +53,14 @@ uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) { namespace sse2 { [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { return string_length_vector<__m128i, uint32_t, - CompareAndMask<__m128i, uint32_t>>(src); + compare_and_mask<__m128i, uint32_t>>(src); } } // namespace sse2 #if defined(__AVX2__) template <> -uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) { +LIBC_INLINE uint32_t +compare_and_mask<__m256i, uint32_t>(const __m256i *block_ptr) { __m256i v = _mm256_load_si256(block_ptr); __m256i z = _mm256_setzero_si256(); __m256i c = _mm256_cmpeq_epi8(z, v); @@ -68,14 +70,15 @@ uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) { namespace avx2 { [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { return string_length_vector<__m256i, uint32_t, - CompareAndMask<__m256i, uint32_t>>(src); + compare_and_mask<__m256i, uint32_t>>(src); } } // namespace avx2 #endif #if defined(__AVX512F__) template <> -__mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) { +LIBC_INLINE __mmask64 +compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) { __m512i v = _mm512_load_si512(block_ptr); __m512i z = _mm512_setzero_si512(); return _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ); @@ -83,7 +86,7 @@ __mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) { namespace avx512 { [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) { return string_length_vector<__m512i, __mmask64, - CompareAndMask<__m512i, __mmask64>>(src); + compare_and_mask<__m512i, __mmask64>>(src); } } // namespace avx512 #endif |
