diff options
| author | NAKAMURA Takumi <geek4civic@gmail.com> | 2025-01-09 18:49:54 +0900 |
|---|---|---|
| committer | NAKAMURA Takumi <geek4civic@gmail.com> | 2025-01-09 18:49:54 +0900 |
| commit | e2810c9a248f4c7fbfae84bb32b6f7e01027458b (patch) | |
| tree | ae0b02a8491b969a1cee94ea16ffe42c559143c5 /libc/src/stdlib | |
| parent | fa04eb4af95c1ca7377279728cb004bcd2324d01 (diff) | |
| parent | bdcf47e4bcb92889665825654bb80a8bbe30379e (diff) | |
Merge branch 'users/chapuni/cov/single/base' into users/chapuni/cov/single/switchusers/chapuni/cov/single/switch
Diffstat (limited to 'libc/src/stdlib')
| -rw-r--r-- | libc/src/stdlib/exit_handler.h | 2 | ||||
| -rw-r--r-- | libc/src/stdlib/heap_sort.h | 12 | ||||
| -rw-r--r-- | libc/src/stdlib/qsort.cpp | 10 | ||||
| -rw-r--r-- | libc/src/stdlib/qsort_data.h | 171 | ||||
| -rw-r--r-- | libc/src/stdlib/qsort_pivot.h | 85 | ||||
| -rw-r--r-- | libc/src/stdlib/qsort_r.cpp | 11 | ||||
| -rw-r--r-- | libc/src/stdlib/qsort_util.h | 47 | ||||
| -rw-r--r-- | libc/src/stdlib/quick_sort.h | 203 |
8 files changed, 391 insertions, 150 deletions
diff --git a/libc/src/stdlib/exit_handler.h b/libc/src/stdlib/exit_handler.h index 9720c5473940..e9d163dfe902 100644 --- a/libc/src/stdlib/exit_handler.h +++ b/libc/src/stdlib/exit_handler.h @@ -48,7 +48,7 @@ LIBC_INLINE void stdc_at_exit_func(void *payload) { LIBC_INLINE void call_exit_callbacks(ExitCallbackList &callbacks) { handler_list_mtx.lock(); while (!callbacks.empty()) { - AtExitUnit &unit = callbacks.back(); + AtExitUnit unit = callbacks.back(); callbacks.pop_back(); handler_list_mtx.unlock(); unit.callback(unit.payload); diff --git a/libc/src/stdlib/heap_sort.h b/libc/src/stdlib/heap_sort.h index ccb9ec5f8214..b9699776df89 100644 --- a/libc/src/stdlib/heap_sort.h +++ b/libc/src/stdlib/heap_sort.h @@ -18,11 +18,12 @@ namespace internal { // A simple in-place heapsort implementation. // Follow the implementation in https://en.wikipedia.org/wiki/Heapsort. -LIBC_INLINE void heap_sort(const Array &array) { - size_t end = array.size(); +template <typename A, typename F> +LIBC_INLINE void heap_sort(const A &array, const F &is_less) { + size_t end = array.len(); size_t start = end / 2; - auto left_child = [](size_t i) -> size_t { return 2 * i + 1; }; + const auto left_child = [](size_t i) -> size_t { return 2 * i + 1; }; while (end > 1) { if (start > 0) { @@ -40,12 +41,11 @@ LIBC_INLINE void heap_sort(const Array &array) { while (left_child(root) < end) { size_t child = left_child(root); // If there are two children, set child to the greater. - if (child + 1 < end && - array.elem_compare(child, array.get(child + 1)) < 0) + if ((child + 1 < end) && is_less(array.get(child), array.get(child + 1))) ++child; // If the root is less than the greater child - if (array.elem_compare(root, array.get(child)) >= 0) + if (!is_less(array.get(root), array.get(child))) break; // Swap the root with the greater child and continue sifting down. diff --git a/libc/src/stdlib/qsort.cpp b/libc/src/stdlib/qsort.cpp index 65a63c239f5c..0bf5fc798052 100644 --- a/libc/src/stdlib/qsort.cpp +++ b/libc/src/stdlib/qsort.cpp @@ -18,14 +18,12 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(void, qsort, (void *array, size_t array_size, size_t elem_size, int (*compare)(const void *, const void *))) { - if (array == nullptr || array_size == 0 || elem_size == 0) - return; - internal::Comparator c(compare); - auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size, - elem_size, c); + const auto is_less = [compare](const void *a, const void *b) -> bool { + return compare(a, b) < 0; + }; - internal::sort(arr); + internal::unstable_sort(array, array_size, elem_size, is_less); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/qsort_data.h b/libc/src/stdlib/qsort_data.h index c529d55ca46f..aa6d9bbc123d 100644 --- a/libc/src/stdlib/qsort_data.h +++ b/libc/src/stdlib/qsort_data.h @@ -17,91 +17,122 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -using Compare = int(const void *, const void *); -using CompareWithState = int(const void *, const void *, void *); - -enum class CompType { COMPARE, COMPARE_WITH_STATE }; - -struct Comparator { - union { - Compare *comp_func; - CompareWithState *comp_func_r; - }; - const CompType comp_type; - - void *arg; - - Comparator(Compare *func) - : comp_func(func), comp_type(CompType::COMPARE), arg(nullptr) {} - - Comparator(CompareWithState *func, void *arg_val) - : comp_func_r(func), comp_type(CompType::COMPARE_WITH_STATE), - arg(arg_val) {} - -#if defined(__clang__) - // Recent upstream changes to -fsanitize=function find more instances of - // function type mismatches. One case is with the comparator passed to this - // class. Libraries will tend to pass comparators that take pointers to - // varying types while this comparator expects to accept const void pointers. - // Ideally those tools would pass a function that strictly accepts const - // void*s to avoid UB, or would use qsort_r to pass their own comparator. - [[clang::no_sanitize("function")]] -#endif - int comp_vals(const void *a, const void *b) const { - if (comp_type == CompType::COMPARE) { - return comp_func(a, b); - } else { - return comp_func_r(a, b, arg); +class ArrayGenericSize { + cpp::byte *array_base; + size_t array_len; + size_t elem_size; + + LIBC_INLINE cpp::byte *get_internal(size_t i) const { + return array_base + (i * elem_size); + } + +public: + LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e) + : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s), + elem_size(e) {} + + static constexpr bool has_fixed_size() { return false; } + + LIBC_INLINE void *get(size_t i) const { return get_internal(i); } + + LIBC_INLINE void swap(size_t i, size_t j) const { + // It's possible to use 8 byte blocks with `uint64_t`, but that + // generates more machine code as the remainder loop gets + // unrolled, plus 4 byte operations are more likely to be + // efficient on a wider variety of hardware. On x86 LLVM tends + // to unroll the block loop again into 2 16 byte swaps per + // iteration which is another reason that 4 byte blocks yields + // good performance even for big types. + using block_t = uint32_t; + constexpr size_t BLOCK_SIZE = sizeof(block_t); + + alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE]; + + cpp::byte *elem_i = get_internal(i); + cpp::byte *elem_j = get_internal(j); + + const size_t elem_size_rem = elem_size % BLOCK_SIZE; + const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem); + + while (elem_i != elem_i_block_end) { + __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE); + __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE); + __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE); + + elem_i += BLOCK_SIZE; + elem_j += BLOCK_SIZE; + } + + for (size_t n = 0; n < elem_size_rem; ++n) { + cpp::byte tmp = elem_i[n]; + elem_i[n] = elem_j[n]; + elem_j[n] = tmp; } } + + LIBC_INLINE size_t len() const { return array_len; } + + // Make an Array starting at index |i| and length |s|. + LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const { + return ArrayGenericSize(get_internal(i), s, elem_size); + } + + // Reset this Array to point at a different interval of the same + // items starting at index |i|. + LIBC_INLINE void reset_bounds(size_t i, size_t s) { + array_base = get_internal(i); + array_len = s; + } }; -class Array { - uint8_t *array; - size_t array_size; - size_t elem_size; - Comparator compare; +// Having a specialized Array type for sorting that knows at +// compile-time what the size of the element is, allows for much more +// efficient swapping and for cheaper offset calculations. +template <size_t ELEM_SIZE> class ArrayFixedSize { + cpp::byte *array_base; + size_t array_len; -public: - Array(uint8_t *a, size_t s, size_t e, Comparator c) - : array(a), array_size(s), elem_size(e), compare(c) {} - - uint8_t *get(size_t i) const { return array + i * elem_size; } - - void swap(size_t i, size_t j) const { - uint8_t *elem_i = get(i); - uint8_t *elem_j = get(j); - for (size_t b = 0; b < elem_size; ++b) { - uint8_t temp = elem_i[b]; - elem_i[b] = elem_j[b]; - elem_j[b] = temp; - } + LIBC_INLINE cpp::byte *get_internal(size_t i) const { + return array_base + (i * ELEM_SIZE); } - int elem_compare(size_t i, const uint8_t *other) const { - // An element must compare equal to itself so we don't need to consult the - // user provided comparator. - if (get(i) == other) - return 0; - return compare.comp_vals(get(i), other); +public: + LIBC_INLINE ArrayFixedSize(void *a, size_t s) + : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {} + + // Beware this function is used a heuristic for cheap to swap types, so + // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad + // idea perf wise. + static constexpr bool has_fixed_size() { return true; } + + LIBC_INLINE void *get(size_t i) const { return get_internal(i); } + + LIBC_INLINE void swap(size_t i, size_t j) const { + alignas(32) cpp::byte tmp[ELEM_SIZE]; + + cpp::byte *elem_i = get_internal(i); + cpp::byte *elem_j = get_internal(j); + + __builtin_memcpy(tmp, elem_i, ELEM_SIZE); + __builtin_memmove(elem_i, elem_j, ELEM_SIZE); + __builtin_memcpy(elem_j, tmp, ELEM_SIZE); } - size_t size() const { return array_size; } + LIBC_INLINE size_t len() const { return array_len; } - // Make an Array starting at index |i| and size |s|. - LIBC_INLINE Array make_array(size_t i, size_t s) const { - return Array(get(i), s, elem_size, compare); + // Make an Array starting at index |i| and length |s|. + LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const { + return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s); } - // Reset this Array to point at a different interval of the same items. - LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) { - array = a; - array_size = s; + // Reset this Array to point at a different interval of the same + // items starting at index |i|. + LIBC_INLINE void reset_bounds(size_t i, size_t s) { + array_base = get_internal(i); + array_len = s; } }; -using SortingRoutine = void(const Array &); - } // namespace internal } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/qsort_pivot.h b/libc/src/stdlib/qsort_pivot.h new file mode 100644 index 000000000000..b27e74663d90 --- /dev/null +++ b/libc/src/stdlib/qsort_pivot.h @@ -0,0 +1,85 @@ +//===-- Implementation header for qsort utilities ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H +#define LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H + +#include <stddef.h> // For size_t + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +// Recursively select a pseudomedian if above this threshold. +constexpr size_t PSEUDO_MEDIAN_REC_THRESHOLD = 64; + +// Selects a pivot from `array`. Algorithm taken from glidesort by Orson Peters. +// +// This chooses a pivot by sampling an adaptive amount of points, approximating +// the quality of a median of sqrt(n) elements. +template <typename A, typename F> +size_t choose_pivot(const A &array, const F &is_less) { + const size_t len = array.len(); + + if (len < 8) { + return 0; + } + + const size_t len_div_8 = len / 8; + + const size_t a = 0; // [0, floor(n/8)) + const size_t b = len_div_8 * 4; // [4*floor(n/8), 5*floor(n/8)) + const size_t c = len_div_8 * 7; // [7*floor(n/8), 8*floor(n/8)) + + if (len < PSEUDO_MEDIAN_REC_THRESHOLD) + return median3(array, a, b, c, is_less); + else + return median3_rec(array, a, b, c, len_div_8, is_less); +} + +// Calculates an approximate median of 3 elements from sections a, b, c, or +// recursively from an approximation of each, if they're large enough. By +// dividing the size of each section by 8 when recursing we have logarithmic +// recursion depth and overall sample from f(n) = 3*f(n/8) -> f(n) = +// O(n^(log(3)/log(8))) ~= O(n^0.528) elements. +template <typename A, typename F> +size_t median3_rec(const A &array, size_t a, size_t b, size_t c, size_t n, + const F &is_less) { + if (n * 8 >= PSEUDO_MEDIAN_REC_THRESHOLD) { + const size_t n8 = n / 8; + a = median3_rec(array, a, a + (n8 * 4), a + (n8 * 7), n8, is_less); + b = median3_rec(array, b, b + (n8 * 4), b + (n8 * 7), n8, is_less); + c = median3_rec(array, c, c + (n8 * 4), c + (n8 * 7), n8, is_less); + } + return median3(array, a, b, c, is_less); +} + +/// Calculates the median of 3 elements. +template <typename A, typename F> +size_t median3(const A &array, size_t a, size_t b, size_t c, const F &is_less) { + const void *a_ptr = array.get(a); + const void *b_ptr = array.get(b); + const void *c_ptr = array.get(c); + + const bool x = is_less(a_ptr, b_ptr); + const bool y = is_less(a_ptr, c_ptr); + if (x == y) { + // If x=y=0 then b, c <= a. In this case we want to return max(b, c). + // If x=y=1 then a < b, c. In this case we want to return min(b, c). + // By toggling the outcome of b < c using XOR x we get this behavior. + const bool z = is_less(b_ptr, c_ptr); + return z ^ x ? c : b; + } else { + // Either c <= a < b or b <= a < c, thus a is our median. + return a; + } +} + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H diff --git a/libc/src/stdlib/qsort_r.cpp b/libc/src/stdlib/qsort_r.cpp index bf61a40e8473..4e60998b6a6d 100644 --- a/libc/src/stdlib/qsort_r.cpp +++ b/libc/src/stdlib/qsort_r.cpp @@ -19,13 +19,12 @@ LLVM_LIBC_FUNCTION(void, qsort_r, (void *array, size_t array_size, size_t elem_size, int (*compare)(const void *, const void *, void *), void *arg)) { - if (array == nullptr || array_size == 0 || elem_size == 0) - return; - internal::Comparator c(compare, arg); - auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size, - elem_size, c); - internal::sort(arr); + const auto is_less = [compare, arg](const void *a, const void *b) -> bool { + return compare(a, b, arg) < 0; + }; + + internal::unstable_sort(array, array_size, elem_size, is_less); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/qsort_util.h b/libc/src/stdlib/qsort_util.h index d42adde06d97..7882b829d327 100644 --- a/libc/src/stdlib/qsort_util.h +++ b/libc/src/stdlib/qsort_util.h @@ -27,11 +27,48 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -#if LIBC_QSORT_IMPL == LIBC_QSORT_QUICK_SORT -constexpr auto sort = quick_sort; -#elif LIBC_QSORT_IMPL == LIBC_QSORT_HEAP_SORT -constexpr auto sort = heap_sort; -#endif +template <bool USE_QUICKSORT, typename F> +LIBC_INLINE void unstable_sort_impl(void *array, size_t array_len, + size_t elem_size, const F &is_less) { + if (array == nullptr || array_len == 0 || elem_size == 0) + return; + + if constexpr (USE_QUICKSORT) { + switch (elem_size) { + case 4: { + auto arr_fixed_size = internal::ArrayFixedSize<4>(array, array_len); + quick_sort(arr_fixed_size, is_less); + return; + } + case 8: { + auto arr_fixed_size = internal::ArrayFixedSize<8>(array, array_len); + quick_sort(arr_fixed_size, is_less); + return; + } + case 16: { + auto arr_fixed_size = internal::ArrayFixedSize<16>(array, array_len); + quick_sort(arr_fixed_size, is_less); + return; + } + default: + auto arr_generic_size = + internal::ArrayGenericSize(array, array_len, elem_size); + quick_sort(arr_generic_size, is_less); + return; + } + } else { + auto arr_generic_size = + internal::ArrayGenericSize(array, array_len, elem_size); + heap_sort(arr_generic_size, is_less); + } +} + +template <typename F> +LIBC_INLINE void unstable_sort(void *array, size_t array_len, size_t elem_size, + const F &is_less) { +#define USE_QUICK_SORT ((LIBC_QSORT_IMPL) == (LIBC_QSORT_QUICK_SORT)) + unstable_sort_impl<USE_QUICK_SORT, F>(array, array_len, elem_size, is_less); +} } // namespace internal } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/quick_sort.h b/libc/src/stdlib/quick_sort.h index 82b90a7d511d..9ab283025001 100644 --- a/libc/src/stdlib/quick_sort.h +++ b/libc/src/stdlib/quick_sort.h @@ -9,84 +9,175 @@ #ifndef LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H #define LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H -#include "src/__support/macros/attributes.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/CPP/cstddef.h" #include "src/__support/macros/config.h" -#include "src/stdlib/qsort_data.h" +#include "src/stdlib/qsort_pivot.h" #include <stdint.h> namespace LIBC_NAMESPACE_DECL { namespace internal { -// A simple quicksort implementation using the Hoare partition scheme. -LIBC_INLINE size_t partition(const Array &array) { - const size_t array_size = array.size(); - size_t pivot_index = array_size / 2; - uint8_t *pivot = array.get(pivot_index); - size_t i = 0; - size_t j = array_size - 1; +// Branchless Lomuto partition based on the implementation by Lukas +// Bergdoll and Orson Peters +// https://github.com/Voultapher/sort-research-rs/blob/main/writeup/lomcyc_partition/text.md. +// Simplified to avoid having to stack allocate. +template <typename A, typename F> +LIBC_INLINE size_t partition_lomuto_branchless(const A &array, + const void *pivot, + const F &is_less) { + const size_t array_len = array.len(); + + size_t left = 0; + size_t right = 0; + + while (right < array_len) { + const bool right_is_lt = is_less(array.get(right), pivot); + array.swap(left, right); + left += static_cast<size_t>(right_is_lt); + right += 1; + } + + return left; +} + +// Optimized for large types that are expensive to move. Not optimized +// for integers. It's possible to use a cyclic permutation here for +// large types as done in ipnsort but the advantages of this are limited +// as `is_less` is a small wrapper around a call to a function pointer +// and won't incur much binary-size overhead. The other reason to use +// cyclic permutation is to have more efficient swapping, but we don't +// know the element size so this isn't applicable here either. +template <typename A, typename F> +LIBC_INLINE size_t partition_hoare_branchy(const A &array, const void *pivot, + const F &is_less) { + const size_t array_len = array.len(); + + size_t left = 0; + size_t right = array_len; while (true) { - int compare_i, compare_j; - - while ((compare_i = array.elem_compare(i, pivot)) < 0) - ++i; - while ((compare_j = array.elem_compare(j, pivot)) > 0) - --j; - - // At some point i will crossover j so we will definitely break out of - // this while loop. - if (i >= j) - return j + 1; - - array.swap(i, j); - - // The pivot itself might have got swapped so we will update the pivot. - if (i == pivot_index) { - pivot = array.get(j); - pivot_index = j; - } else if (j == pivot_index) { - pivot = array.get(i); - pivot_index = i; + while (left < right && is_less(array.get(left), pivot)) + ++left; + + while (true) { + --right; + if (left >= right || is_less(array.get(right), pivot)) { + break; + } } - if (compare_i == 0 && compare_j == 0) { - // If we do not move the pointers, we will end up with an - // infinite loop as i and j will be stuck without advancing. - ++i; - --j; - } + if (left >= right) + break; + + array.swap(left, right); + ++left; + } + + return left; +} + +template <typename A, typename F> +LIBC_INLINE size_t partition(const A &array, size_t pivot_index, + const F &is_less) { + // Place the pivot at the beginning of the array. + if (pivot_index != 0) { + array.swap(0, pivot_index); } + + const A array_without_pivot = array.make_array(1, array.len() - 1); + const void *pivot = array.get(0); + + size_t num_lt; + if constexpr (A::has_fixed_size()) { + // Branchless Lomuto avoid branch misprediction penalties, but + // it also swaps more often which is only faster if the swap is a fast + // constant operation. + num_lt = partition_lomuto_branchless(array_without_pivot, pivot, is_less); + } else { + num_lt = partition_hoare_branchy(array_without_pivot, pivot, is_less); + } + + // Place the pivot between the two partitions. + array.swap(0, num_lt); + + return num_lt; } -LIBC_INLINE void quick_sort(Array array) { +template <typename A, typename F> +LIBC_INLINE void quick_sort_impl(A &array, const void *ancestor_pivot, + size_t limit, const F &is_less) { while (true) { - const size_t array_size = array.size(); - if (array_size <= 1) + const size_t array_len = array.len(); + if (array_len <= 1) return; - size_t split_index = partition(array); - if (array_size == 2) - // The partition operation sorts the two element array. + + // If too many bad pivot choices were made, simply fall back to + // heapsort in order to guarantee `O(N x log(N))` worst-case. + if (limit == 0) { + heap_sort(array, is_less); return; + } - // Make Arrays describing the two sublists that still need sorting. - Array left = array.make_array(0, split_index); - Array right = array.make_array(split_index, array.size() - split_index); - - // Recurse to sort the smaller of the two, and then loop round within this - // function to sort the larger. This way, recursive call depth is bounded - // by log2 of the total array size, because every recursive call is sorting - // a list at most half the length of the one in its caller. - if (left.size() < right.size()) { - quick_sort(left); - array.reset_bounds(right.get(0), right.size()); - } else { - quick_sort(right); - array.reset_bounds(left.get(0), left.size()); + limit -= 1; + + const size_t pivot_index = choose_pivot(array, is_less); + + // If the chosen pivot is equal to the predecessor, then it's the smallest + // element in the slice. Partition the slice into elements equal to and + // elements greater than the pivot. This case is usually hit when the slice + // contains many duplicate elements. + if (ancestor_pivot) { + if (!is_less(ancestor_pivot, array.get(pivot_index))) { + const size_t num_lt = + partition(array, pivot_index, + [is_less](const void *a, const void *b) -> bool { + return !is_less(b, a); + }); + + // Continue sorting elements greater than the pivot. We know that + // `num_lt` cont + array.reset_bounds(num_lt + 1, array.len() - (num_lt + 1)); + ancestor_pivot = nullptr; + continue; + } } + + size_t split_index = partition(array, pivot_index, is_less); + + if (array_len == 2) + // The partition operation sorts the two element array. + return; + + // Split the array into `left`, `pivot`, and `right`. + A left = array.make_array(0, split_index); + const void *pivot = array.get(split_index); + const size_t right_start = split_index + 1; + A right = array.make_array(right_start, array.len() - right_start); + + // Recurse into the left side. We have a fixed recursion limit, + // testing shows no real benefit for recursing into the shorter + // side. + quick_sort_impl(left, ancestor_pivot, limit, is_less); + + // Continue with the right side. + array = right; + ancestor_pivot = pivot; } } +constexpr size_t ilog2(size_t n) { return cpp::bit_width(n) - 1; } + +template <typename A, typename F> +LIBC_INLINE void quick_sort(A &array, const F &is_less) { + const void *ancestor_pivot = nullptr; + // Limit the number of imbalanced partitions to `2 * floor(log2(len))`. + // The binary OR by one is used to eliminate the zero-check in the logarithm. + const size_t limit = 2 * ilog2((array.len() | 1)); + quick_sort_impl(array, ancestor_pivot, limit, is_less); +} + } // namespace internal } // namespace LIBC_NAMESPACE_DECL |
