diff options
| author | William <113542065+saturn691@users.noreply.github.com> | 2025-05-02 12:36:00 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-02 13:36:00 +0200 |
| commit | da3022577e1f277999922acaef9be169c20dfd48 (patch) | |
| tree | 697cbdf0dc754a821c5e6fbd44f1060b0a077cd7 /libc/src/string | |
| parent | 72f5ac442da0e4af3a856d11f2c6dfd1c790998d (diff) | |
[libc] Add support for string/memory_utils functions for AArch64 without HW FP/SIMD (#137592)
Add conditional compilation to add support for AArch64 without vector
registers and/or hardware FPUs by using the generic implementation.
**Context:**
A few functions were hard-coded to use vector registers/hardware FPUs.
This meant that libc would not compile on architectures that did not
support these features. This fix falls back on the generic
implementation if a feature is not supported.
Diffstat (limited to 'libc/src/string')
| -rw-r--r-- | libc/src/string/memory_utils/aarch64/inline_bcmp.h | 50 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/aarch64/inline_memcmp.h | 65 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/aarch64/inline_memmove.h | 4 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/aarch64/inline_memset.h | 60 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/inline_bcmp.h | 2 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/inline_memcmp.h | 2 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/inline_memset.h | 2 | ||||
| -rw-r--r-- | libc/src/string/memory_utils/op_aarch64.h | 8 |
8 files changed, 148 insertions, 45 deletions
diff --git a/libc/src/string/memory_utils/aarch64/inline_bcmp.h b/libc/src/string/memory_utils/aarch64/inline_bcmp.h index e41ac202dbaa..66d24378095b 100644 --- a/libc/src/string/memory_utils/aarch64/inline_bcmp.h +++ b/libc/src/string/memory_utils/aarch64/inline_bcmp.h @@ -19,9 +19,43 @@ namespace LIBC_NAMESPACE_DECL { -[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1, - CPtr p2, - size_t count) { +[[maybe_unused]] LIBC_INLINE BcmpReturnType +inline_bcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) { + if (LIBC_LIKELY(count < 16)) { + switch (count) { + case 0: + return BcmpReturnType::zero(); + case 1: + return generic::Bcmp<uint8_t>::block(p1, p2); + case 2: + return generic::Bcmp<uint16_t>::block(p1, p2); + case 3: + return generic::Bcmp<uint16_t>::head_tail(p1, p2, count); + case 4: + return generic::Bcmp<uint32_t>::block(p1, p2); + case 5: + case 6: + case 7: + return generic::Bcmp<uint32_t>::head_tail(p1, p2, count); + case 8: + return generic::Bcmp<uint64_t>::block(p1, p2); + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + return generic::Bcmp<uint64_t>::head_tail(p1, p2, count); + } + } + + return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count); +} + +#ifdef __ARM_NEON +[[maybe_unused]] LIBC_INLINE BcmpReturnType +inline_bcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) { if (LIBC_LIKELY(count <= 32)) { if (LIBC_UNLIKELY(count >= 16)) { return aarch64::Bcmp<16>::head_tail(p1, p2, count); @@ -65,6 +99,16 @@ namespace LIBC_NAMESPACE_DECL { } return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count); } +#endif + +[[gnu::flatten]] LIBC_INLINE BcmpReturnType +inline_bcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) { +#if defined(__ARM_NEON) + return inline_bcmp_aarch64_with_fp(p1, p2, count); +#else + return inline_bcmp_aarch64_no_fp(p1, p2, count); +#endif +} } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/memory_utils/aarch64/inline_memcmp.h b/libc/src/string/memory_utils/aarch64/inline_memcmp.h index 35ca077dab52..380ebb410efb 100644 --- a/libc/src/string/memory_utils/aarch64/inline_memcmp.h +++ b/libc/src/string/memory_utils/aarch64/inline_memcmp.h @@ -17,17 +17,40 @@ namespace LIBC_NAMESPACE_DECL { [[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) { - if (LIBC_UNLIKELY(count >= 384)) { - if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2)) - return value; - align_to_next_boundary<16, Arg::P1>(p1, p2, count); - } - return generic::Memcmp<uint8x16_t>::loop_and_tail(p1, p2, count); +inline_memcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) { + if (count == 0) + return MemcmpReturnType::zero(); + if (count == 1) + return generic::Memcmp<uint8_t>::block(p1, p2); + if (count == 2) + return generic::Memcmp<uint16_t>::block(p1, p2); + if (count == 3) + return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2); + if (count <= 8) + return generic::Memcmp<uint32_t>::head_tail(p1, p2, count); + if (count <= 16) + return generic::Memcmp<uint64_t>::head_tail(p1, p2, count); + + return generic::Memcmp<uint64_t>::loop_and_tail_align_above(384, p1, p2, + count); } +#if defined(__ARM_NEON) [[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) { +inline_memcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) { + if (count == 0) + return MemcmpReturnType::zero(); + if (count == 1) + return generic::Memcmp<uint8_t>::block(p1, p2); + if (count == 2) + return generic::Memcmp<uint16_t>::block(p1, p2); + if (count == 3) + return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2); + if (count <= 8) + return generic::Memcmp<uint32_t>::head_tail(p1, p2, count); + if (count <= 16) + return generic::Memcmp<uint64_t>::head_tail(p1, p2, count); + if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞] if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2)) return value; @@ -46,25 +69,15 @@ inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) { return generic::Memcmp<uint8x16_t>::loop_and_tail(p1 + 32, p2 + 32, count - 32); } +#endif -LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2, - size_t count) { - if (count == 0) - return MemcmpReturnType::zero(); - if (count == 1) - return generic::Memcmp<uint8_t>::block(p1, p2); - if (count == 2) - return generic::Memcmp<uint16_t>::block(p1, p2); - if (count == 3) - return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2); - if (count <= 8) - return generic::Memcmp<uint32_t>::head_tail(p1, p2, count); - if (count <= 16) - return generic::Memcmp<uint64_t>::head_tail(p1, p2, count); - if constexpr (aarch64::kNeon) - return inline_memcmp_aarch64_neon_gt16(p1, p2, count); - else - return inline_memcmp_generic_gt16(p1, p2, count); +[[gnu::flatten]] LIBC_INLINE MemcmpReturnType +inline_memcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) { +#if defined(__ARM_NEON) + return inline_memcmp_aarch64_with_fp(p1, p2, count); +#else + return inline_memcmp_aarch64_no_fp(p1, p2, count); +#endif } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/memory_utils/aarch64/inline_memmove.h b/libc/src/string/memory_utils/aarch64/inline_memmove.h index 2b238031af49..d8d276966fd2 100644 --- a/libc/src/string/memory_utils/aarch64/inline_memmove.h +++ b/libc/src/string/memory_utils/aarch64/inline_memmove.h @@ -8,8 +8,7 @@ #ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H #define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H -#include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/string/memory_utils/op_aarch64.h" // aarch64::kNeon +#include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/op_generic.h" #include "src/string/memory_utils/utils.h" @@ -19,7 +18,6 @@ namespace LIBC_NAMESPACE_DECL { LIBC_INLINE void inline_memmove_aarch64(Ptr dst, CPtr src, size_t count) { - static_assert(aarch64::kNeon, "aarch64 supports vector types"); using uint128_t = generic_v128; using uint256_t = generic_v256; using uint512_t = generic_v512; diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h index efcbfd070598..1b4b871792c6 100644 --- a/libc/src/string/memory_utils/aarch64/inline_memset.h +++ b/libc/src/string/memory_utils/aarch64/inline_memset.h @@ -18,12 +18,12 @@ namespace LIBC_NAMESPACE_DECL { +using uint128_t = generic_v128; +using uint256_t = generic_v256; +using uint512_t = generic_v512; + [[maybe_unused]] LIBC_INLINE static void -inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) { - static_assert(aarch64::kNeon, "aarch64 supports vector types"); - using uint128_t = generic_v128; - using uint256_t = generic_v256; - using uint512_t = generic_v512; +inline_memset_aarch64_no_fp(Ptr dst, uint8_t value, size_t count) { if (count == 0) return; if (count <= 3) { @@ -46,15 +46,57 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) { generic::Memset<uint256_t>::tail(dst, value, count); return; } + + generic::Memset<uint128_t>::block(dst, value); + align_to_next_boundary<16>(dst, count); + return generic::Memset<uint512_t>::loop_and_tail(dst, value, count); +} + +#if defined(__ARM_NEON) +[[maybe_unused]] LIBC_INLINE static void +inline_memset_aarch64_with_fp(Ptr dst, uint8_t value, size_t count) { + if (count == 0) + return; + if (count <= 3) { + generic::Memset<uint8_t>::block(dst, value); + if (count > 1) + generic::Memset<uint16_t>::tail(dst, value, count); + return; + } + if (count <= 8) + return generic::Memset<uint32_t>::head_tail(dst, value, count); + if (count <= 16) + return generic::Memset<uint64_t>::head_tail(dst, value, count); + if (count <= 32) + return generic::Memset<uint128_t>::head_tail(dst, value, count); + if (count <= (32 + 64)) { + generic::Memset<uint256_t>::block(dst, value); + if (count <= 64) + return generic::Memset<uint256_t>::tail(dst, value, count); + generic::Memset<uint256_t>::block(dst + 32, value); + generic::Memset<uint256_t>::tail(dst, value, count); + return; + } + if (count >= 448 && value == 0 && aarch64::neon::hasZva()) { generic::Memset<uint512_t>::block(dst, 0); align_to_next_boundary<64>(dst, count); return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count); - } else { - generic::Memset<uint128_t>::block(dst, value); - align_to_next_boundary<16>(dst, count); - return generic::Memset<uint512_t>::loop_and_tail(dst, value, count); } + + generic::Memset<uint128_t>::block(dst, value); + align_to_next_boundary<16>(dst, count); + return generic::Memset<uint512_t>::loop_and_tail(dst, value, count); +} +#endif + +[[gnu::flatten]] [[maybe_unused]] LIBC_INLINE static void +inline_memset_aarch64_dispatch(Ptr dst, uint8_t value, size_t count) { +#if defined(__ARM_NEON) + return inline_memset_aarch64_with_fp(dst, value, count); +#else + return inline_memset_aarch64_no_fp(dst, value, count); +#endif } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/memory_utils/inline_bcmp.h b/libc/src/string/memory_utils/inline_bcmp.h index 3c1dc808cc5c..955d764aade2 100644 --- a/libc/src/string/memory_utils/inline_bcmp.h +++ b/libc/src/string/memory_utils/inline_bcmp.h @@ -21,7 +21,7 @@ #define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_x86 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) #include "src/string/memory_utils/aarch64/inline_bcmp.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64 +#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64_dispatch #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #include "src/string/memory_utils/riscv/inline_bcmp.h" #define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv diff --git a/libc/src/string/memory_utils/inline_memcmp.h b/libc/src/string/memory_utils/inline_memcmp.h index a2ca9afd7f79..85a614b2fb95 100644 --- a/libc/src/string/memory_utils/inline_memcmp.h +++ b/libc/src/string/memory_utils/inline_memcmp.h @@ -20,7 +20,7 @@ #define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_x86 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) #include "src/string/memory_utils/aarch64/inline_memcmp.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64 +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64_dispatch #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #include "src/string/memory_utils/riscv/inline_memcmp.h" #define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_riscv diff --git a/libc/src/string/memory_utils/inline_memset.h b/libc/src/string/memory_utils/inline_memset.h index aed37071265a..fd9c29ea4410 100644 --- a/libc/src/string/memory_utils/inline_memset.h +++ b/libc/src/string/memory_utils/inline_memset.h @@ -20,7 +20,7 @@ #define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_x86 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) #include "src/string/memory_utils/aarch64/inline_memset.h" -#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64 +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64_dispatch #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) #include "src/string/memory_utils/riscv/inline_memset.h" #define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_riscv diff --git a/libc/src/string/memory_utils/op_aarch64.h b/libc/src/string/memory_utils/op_aarch64.h index 868c64474c0b..e552601fbb70 100644 --- a/libc/src/string/memory_utils/op_aarch64.h +++ b/libc/src/string/memory_utils/op_aarch64.h @@ -25,7 +25,6 @@ #ifdef __ARM_NEON #include <arm_neon.h> -#endif //__ARM_NEON namespace LIBC_NAMESPACE_DECL { namespace aarch64 { @@ -176,6 +175,8 @@ template <size_t Size> struct Bcmp { } // namespace aarch64 } // namespace LIBC_NAMESPACE_DECL +#endif //__ARM_NEON + namespace LIBC_NAMESPACE_DECL { namespace generic { @@ -225,6 +226,8 @@ LIBC_INLINE MemcmpReturnType cmp<uint64_t>(CPtr p1, CPtr p2, size_t offset) { return MemcmpReturnType::zero(); } +#if defined(__ARM_NEON) + /////////////////////////////////////////////////////////////////////////////// // Specializations for uint8x16_t template <> struct is_vector<uint8x16_t> : cpp::true_type {}; @@ -269,6 +272,9 @@ LIBC_INLINE MemcmpReturnType cmp<uint8x16x2_t>(CPtr p1, CPtr p2, } return MemcmpReturnType::zero(); } + +#endif // __ARM_NEON + } // namespace generic } // namespace LIBC_NAMESPACE_DECL |
