summaryrefslogtreecommitdiff
path: root/libc/src/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'libc/src/stdlib')
-rw-r--r--libc/src/stdlib/exit_handler.h2
-rw-r--r--libc/src/stdlib/heap_sort.h12
-rw-r--r--libc/src/stdlib/qsort.cpp10
-rw-r--r--libc/src/stdlib/qsort_data.h171
-rw-r--r--libc/src/stdlib/qsort_pivot.h85
-rw-r--r--libc/src/stdlib/qsort_r.cpp11
-rw-r--r--libc/src/stdlib/qsort_util.h47
-rw-r--r--libc/src/stdlib/quick_sort.h203
8 files changed, 391 insertions, 150 deletions
diff --git a/libc/src/stdlib/exit_handler.h b/libc/src/stdlib/exit_handler.h
index 9720c5473940..e9d163dfe902 100644
--- a/libc/src/stdlib/exit_handler.h
+++ b/libc/src/stdlib/exit_handler.h
@@ -48,7 +48,7 @@ LIBC_INLINE void stdc_at_exit_func(void *payload) {
LIBC_INLINE void call_exit_callbacks(ExitCallbackList &callbacks) {
handler_list_mtx.lock();
while (!callbacks.empty()) {
- AtExitUnit &unit = callbacks.back();
+ AtExitUnit unit = callbacks.back();
callbacks.pop_back();
handler_list_mtx.unlock();
unit.callback(unit.payload);
diff --git a/libc/src/stdlib/heap_sort.h b/libc/src/stdlib/heap_sort.h
index ccb9ec5f8214..b9699776df89 100644
--- a/libc/src/stdlib/heap_sort.h
+++ b/libc/src/stdlib/heap_sort.h
@@ -18,11 +18,12 @@ namespace internal {
// A simple in-place heapsort implementation.
// Follow the implementation in https://en.wikipedia.org/wiki/Heapsort.
-LIBC_INLINE void heap_sort(const Array &array) {
- size_t end = array.size();
+template <typename A, typename F>
+LIBC_INLINE void heap_sort(const A &array, const F &is_less) {
+ size_t end = array.len();
size_t start = end / 2;
- auto left_child = [](size_t i) -> size_t { return 2 * i + 1; };
+ const auto left_child = [](size_t i) -> size_t { return 2 * i + 1; };
while (end > 1) {
if (start > 0) {
@@ -40,12 +41,11 @@ LIBC_INLINE void heap_sort(const Array &array) {
while (left_child(root) < end) {
size_t child = left_child(root);
// If there are two children, set child to the greater.
- if (child + 1 < end &&
- array.elem_compare(child, array.get(child + 1)) < 0)
+ if ((child + 1 < end) && is_less(array.get(child), array.get(child + 1)))
++child;
// If the root is less than the greater child
- if (array.elem_compare(root, array.get(child)) >= 0)
+ if (!is_less(array.get(root), array.get(child)))
break;
// Swap the root with the greater child and continue sifting down.
diff --git a/libc/src/stdlib/qsort.cpp b/libc/src/stdlib/qsort.cpp
index 65a63c239f5c..0bf5fc798052 100644
--- a/libc/src/stdlib/qsort.cpp
+++ b/libc/src/stdlib/qsort.cpp
@@ -18,14 +18,12 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(void, qsort,
(void *array, size_t array_size, size_t elem_size,
int (*compare)(const void *, const void *))) {
- if (array == nullptr || array_size == 0 || elem_size == 0)
- return;
- internal::Comparator c(compare);
- auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size,
- elem_size, c);
+ const auto is_less = [compare](const void *a, const void *b) -> bool {
+ return compare(a, b) < 0;
+ };
- internal::sort(arr);
+ internal::unstable_sort(array, array_size, elem_size, is_less);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/qsort_data.h b/libc/src/stdlib/qsort_data.h
index c529d55ca46f..aa6d9bbc123d 100644
--- a/libc/src/stdlib/qsort_data.h
+++ b/libc/src/stdlib/qsort_data.h
@@ -17,91 +17,122 @@
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-using Compare = int(const void *, const void *);
-using CompareWithState = int(const void *, const void *, void *);
-
-enum class CompType { COMPARE, COMPARE_WITH_STATE };
-
-struct Comparator {
- union {
- Compare *comp_func;
- CompareWithState *comp_func_r;
- };
- const CompType comp_type;
-
- void *arg;
-
- Comparator(Compare *func)
- : comp_func(func), comp_type(CompType::COMPARE), arg(nullptr) {}
-
- Comparator(CompareWithState *func, void *arg_val)
- : comp_func_r(func), comp_type(CompType::COMPARE_WITH_STATE),
- arg(arg_val) {}
-
-#if defined(__clang__)
- // Recent upstream changes to -fsanitize=function find more instances of
- // function type mismatches. One case is with the comparator passed to this
- // class. Libraries will tend to pass comparators that take pointers to
- // varying types while this comparator expects to accept const void pointers.
- // Ideally those tools would pass a function that strictly accepts const
- // void*s to avoid UB, or would use qsort_r to pass their own comparator.
- [[clang::no_sanitize("function")]]
-#endif
- int comp_vals(const void *a, const void *b) const {
- if (comp_type == CompType::COMPARE) {
- return comp_func(a, b);
- } else {
- return comp_func_r(a, b, arg);
+class ArrayGenericSize {
+ cpp::byte *array_base;
+ size_t array_len;
+ size_t elem_size;
+
+ LIBC_INLINE cpp::byte *get_internal(size_t i) const {
+ return array_base + (i * elem_size);
+ }
+
+public:
+ LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e)
+ : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s),
+ elem_size(e) {}
+
+ static constexpr bool has_fixed_size() { return false; }
+
+ LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
+
+ LIBC_INLINE void swap(size_t i, size_t j) const {
+ // It's possible to use 8 byte blocks with `uint64_t`, but that
+ // generates more machine code as the remainder loop gets
+ // unrolled, plus 4 byte operations are more likely to be
+ // efficient on a wider variety of hardware. On x86 LLVM tends
+ // to unroll the block loop again into 2 16 byte swaps per
+ // iteration which is another reason that 4 byte blocks yields
+ // good performance even for big types.
+ using block_t = uint32_t;
+ constexpr size_t BLOCK_SIZE = sizeof(block_t);
+
+ alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE];
+
+ cpp::byte *elem_i = get_internal(i);
+ cpp::byte *elem_j = get_internal(j);
+
+ const size_t elem_size_rem = elem_size % BLOCK_SIZE;
+ const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem);
+
+ while (elem_i != elem_i_block_end) {
+ __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE);
+ __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE);
+ __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE);
+
+ elem_i += BLOCK_SIZE;
+ elem_j += BLOCK_SIZE;
+ }
+
+ for (size_t n = 0; n < elem_size_rem; ++n) {
+ cpp::byte tmp = elem_i[n];
+ elem_i[n] = elem_j[n];
+ elem_j[n] = tmp;
}
}
+
+ LIBC_INLINE size_t len() const { return array_len; }
+
+ // Make an Array starting at index |i| and length |s|.
+ LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const {
+ return ArrayGenericSize(get_internal(i), s, elem_size);
+ }
+
+ // Reset this Array to point at a different interval of the same
+ // items starting at index |i|.
+ LIBC_INLINE void reset_bounds(size_t i, size_t s) {
+ array_base = get_internal(i);
+ array_len = s;
+ }
};
-class Array {
- uint8_t *array;
- size_t array_size;
- size_t elem_size;
- Comparator compare;
+// Having a specialized Array type for sorting that knows at
+// compile-time what the size of the element is, allows for much more
+// efficient swapping and for cheaper offset calculations.
+template <size_t ELEM_SIZE> class ArrayFixedSize {
+ cpp::byte *array_base;
+ size_t array_len;
-public:
- Array(uint8_t *a, size_t s, size_t e, Comparator c)
- : array(a), array_size(s), elem_size(e), compare(c) {}
-
- uint8_t *get(size_t i) const { return array + i * elem_size; }
-
- void swap(size_t i, size_t j) const {
- uint8_t *elem_i = get(i);
- uint8_t *elem_j = get(j);
- for (size_t b = 0; b < elem_size; ++b) {
- uint8_t temp = elem_i[b];
- elem_i[b] = elem_j[b];
- elem_j[b] = temp;
- }
+ LIBC_INLINE cpp::byte *get_internal(size_t i) const {
+ return array_base + (i * ELEM_SIZE);
}
- int elem_compare(size_t i, const uint8_t *other) const {
- // An element must compare equal to itself so we don't need to consult the
- // user provided comparator.
- if (get(i) == other)
- return 0;
- return compare.comp_vals(get(i), other);
+public:
+ LIBC_INLINE ArrayFixedSize(void *a, size_t s)
+ : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {}
+
+ // Beware this function is used a heuristic for cheap to swap types, so
+ // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad
+ // idea perf wise.
+ static constexpr bool has_fixed_size() { return true; }
+
+ LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
+
+ LIBC_INLINE void swap(size_t i, size_t j) const {
+ alignas(32) cpp::byte tmp[ELEM_SIZE];
+
+ cpp::byte *elem_i = get_internal(i);
+ cpp::byte *elem_j = get_internal(j);
+
+ __builtin_memcpy(tmp, elem_i, ELEM_SIZE);
+ __builtin_memmove(elem_i, elem_j, ELEM_SIZE);
+ __builtin_memcpy(elem_j, tmp, ELEM_SIZE);
}
- size_t size() const { return array_size; }
+ LIBC_INLINE size_t len() const { return array_len; }
- // Make an Array starting at index |i| and size |s|.
- LIBC_INLINE Array make_array(size_t i, size_t s) const {
- return Array(get(i), s, elem_size, compare);
+ // Make an Array starting at index |i| and length |s|.
+ LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const {
+ return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s);
}
- // Reset this Array to point at a different interval of the same items.
- LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) {
- array = a;
- array_size = s;
+ // Reset this Array to point at a different interval of the same
+ // items starting at index |i|.
+ LIBC_INLINE void reset_bounds(size_t i, size_t s) {
+ array_base = get_internal(i);
+ array_len = s;
}
};
-using SortingRoutine = void(const Array &);
-
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/qsort_pivot.h b/libc/src/stdlib/qsort_pivot.h
new file mode 100644
index 000000000000..b27e74663d90
--- /dev/null
+++ b/libc/src/stdlib/qsort_pivot.h
@@ -0,0 +1,85 @@
+//===-- Implementation header for qsort utilities ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
+#define LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
+
+#include <stddef.h> // For size_t
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+// Recursively select a pseudomedian if above this threshold.
+constexpr size_t PSEUDO_MEDIAN_REC_THRESHOLD = 64;
+
+// Selects a pivot from `array`. Algorithm taken from glidesort by Orson Peters.
+//
+// This chooses a pivot by sampling an adaptive amount of points, approximating
+// the quality of a median of sqrt(n) elements.
+template <typename A, typename F>
+size_t choose_pivot(const A &array, const F &is_less) {
+ const size_t len = array.len();
+
+ if (len < 8) {
+ return 0;
+ }
+
+ const size_t len_div_8 = len / 8;
+
+ const size_t a = 0; // [0, floor(n/8))
+ const size_t b = len_div_8 * 4; // [4*floor(n/8), 5*floor(n/8))
+ const size_t c = len_div_8 * 7; // [7*floor(n/8), 8*floor(n/8))
+
+ if (len < PSEUDO_MEDIAN_REC_THRESHOLD)
+ return median3(array, a, b, c, is_less);
+ else
+ return median3_rec(array, a, b, c, len_div_8, is_less);
+}
+
+// Calculates an approximate median of 3 elements from sections a, b, c, or
+// recursively from an approximation of each, if they're large enough. By
+// dividing the size of each section by 8 when recursing we have logarithmic
+// recursion depth and overall sample from f(n) = 3*f(n/8) -> f(n) =
+// O(n^(log(3)/log(8))) ~= O(n^0.528) elements.
+template <typename A, typename F>
+size_t median3_rec(const A &array, size_t a, size_t b, size_t c, size_t n,
+ const F &is_less) {
+ if (n * 8 >= PSEUDO_MEDIAN_REC_THRESHOLD) {
+ const size_t n8 = n / 8;
+ a = median3_rec(array, a, a + (n8 * 4), a + (n8 * 7), n8, is_less);
+ b = median3_rec(array, b, b + (n8 * 4), b + (n8 * 7), n8, is_less);
+ c = median3_rec(array, c, c + (n8 * 4), c + (n8 * 7), n8, is_less);
+ }
+ return median3(array, a, b, c, is_less);
+}
+
+/// Calculates the median of 3 elements.
+template <typename A, typename F>
+size_t median3(const A &array, size_t a, size_t b, size_t c, const F &is_less) {
+ const void *a_ptr = array.get(a);
+ const void *b_ptr = array.get(b);
+ const void *c_ptr = array.get(c);
+
+ const bool x = is_less(a_ptr, b_ptr);
+ const bool y = is_less(a_ptr, c_ptr);
+ if (x == y) {
+ // If x=y=0 then b, c <= a. In this case we want to return max(b, c).
+ // If x=y=1 then a < b, c. In this case we want to return min(b, c).
+ // By toggling the outcome of b < c using XOR x we get this behavior.
+ const bool z = is_less(b_ptr, c_ptr);
+ return z ^ x ? c : b;
+ } else {
+ // Either c <= a < b or b <= a < c, thus a is our median.
+ return a;
+ }
+}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H
diff --git a/libc/src/stdlib/qsort_r.cpp b/libc/src/stdlib/qsort_r.cpp
index bf61a40e8473..4e60998b6a6d 100644
--- a/libc/src/stdlib/qsort_r.cpp
+++ b/libc/src/stdlib/qsort_r.cpp
@@ -19,13 +19,12 @@ LLVM_LIBC_FUNCTION(void, qsort_r,
(void *array, size_t array_size, size_t elem_size,
int (*compare)(const void *, const void *, void *),
void *arg)) {
- if (array == nullptr || array_size == 0 || elem_size == 0)
- return;
- internal::Comparator c(compare, arg);
- auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size,
- elem_size, c);
- internal::sort(arr);
+ const auto is_less = [compare, arg](const void *a, const void *b) -> bool {
+ return compare(a, b, arg) < 0;
+ };
+
+ internal::unstable_sort(array, array_size, elem_size, is_less);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/qsort_util.h b/libc/src/stdlib/qsort_util.h
index d42adde06d97..7882b829d327 100644
--- a/libc/src/stdlib/qsort_util.h
+++ b/libc/src/stdlib/qsort_util.h
@@ -27,11 +27,48 @@
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-#if LIBC_QSORT_IMPL == LIBC_QSORT_QUICK_SORT
-constexpr auto sort = quick_sort;
-#elif LIBC_QSORT_IMPL == LIBC_QSORT_HEAP_SORT
-constexpr auto sort = heap_sort;
-#endif
+template <bool USE_QUICKSORT, typename F>
+LIBC_INLINE void unstable_sort_impl(void *array, size_t array_len,
+ size_t elem_size, const F &is_less) {
+ if (array == nullptr || array_len == 0 || elem_size == 0)
+ return;
+
+ if constexpr (USE_QUICKSORT) {
+ switch (elem_size) {
+ case 4: {
+ auto arr_fixed_size = internal::ArrayFixedSize<4>(array, array_len);
+ quick_sort(arr_fixed_size, is_less);
+ return;
+ }
+ case 8: {
+ auto arr_fixed_size = internal::ArrayFixedSize<8>(array, array_len);
+ quick_sort(arr_fixed_size, is_less);
+ return;
+ }
+ case 16: {
+ auto arr_fixed_size = internal::ArrayFixedSize<16>(array, array_len);
+ quick_sort(arr_fixed_size, is_less);
+ return;
+ }
+ default:
+ auto arr_generic_size =
+ internal::ArrayGenericSize(array, array_len, elem_size);
+ quick_sort(arr_generic_size, is_less);
+ return;
+ }
+ } else {
+ auto arr_generic_size =
+ internal::ArrayGenericSize(array, array_len, elem_size);
+ heap_sort(arr_generic_size, is_less);
+ }
+}
+
+template <typename F>
+LIBC_INLINE void unstable_sort(void *array, size_t array_len, size_t elem_size,
+ const F &is_less) {
+#define USE_QUICK_SORT ((LIBC_QSORT_IMPL) == (LIBC_QSORT_QUICK_SORT))
+ unstable_sort_impl<USE_QUICK_SORT, F>(array, array_len, elem_size, is_less);
+}
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/quick_sort.h b/libc/src/stdlib/quick_sort.h
index 82b90a7d511d..9ab283025001 100644
--- a/libc/src/stdlib/quick_sort.h
+++ b/libc/src/stdlib/quick_sort.h
@@ -9,84 +9,175 @@
#ifndef LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
#define LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H
-#include "src/__support/macros/attributes.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/cstddef.h"
#include "src/__support/macros/config.h"
-#include "src/stdlib/qsort_data.h"
+#include "src/stdlib/qsort_pivot.h"
#include <stdint.h>
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-// A simple quicksort implementation using the Hoare partition scheme.
-LIBC_INLINE size_t partition(const Array &array) {
- const size_t array_size = array.size();
- size_t pivot_index = array_size / 2;
- uint8_t *pivot = array.get(pivot_index);
- size_t i = 0;
- size_t j = array_size - 1;
+// Branchless Lomuto partition based on the implementation by Lukas
+// Bergdoll and Orson Peters
+// https://github.com/Voultapher/sort-research-rs/blob/main/writeup/lomcyc_partition/text.md.
+// Simplified to avoid having to stack allocate.
+template <typename A, typename F>
+LIBC_INLINE size_t partition_lomuto_branchless(const A &array,
+ const void *pivot,
+ const F &is_less) {
+ const size_t array_len = array.len();
+
+ size_t left = 0;
+ size_t right = 0;
+
+ while (right < array_len) {
+ const bool right_is_lt = is_less(array.get(right), pivot);
+ array.swap(left, right);
+ left += static_cast<size_t>(right_is_lt);
+ right += 1;
+ }
+
+ return left;
+}
+
+// Optimized for large types that are expensive to move. Not optimized
+// for integers. It's possible to use a cyclic permutation here for
+// large types as done in ipnsort but the advantages of this are limited
+// as `is_less` is a small wrapper around a call to a function pointer
+// and won't incur much binary-size overhead. The other reason to use
+// cyclic permutation is to have more efficient swapping, but we don't
+// know the element size so this isn't applicable here either.
+template <typename A, typename F>
+LIBC_INLINE size_t partition_hoare_branchy(const A &array, const void *pivot,
+ const F &is_less) {
+ const size_t array_len = array.len();
+
+ size_t left = 0;
+ size_t right = array_len;
while (true) {
- int compare_i, compare_j;
-
- while ((compare_i = array.elem_compare(i, pivot)) < 0)
- ++i;
- while ((compare_j = array.elem_compare(j, pivot)) > 0)
- --j;
-
- // At some point i will crossover j so we will definitely break out of
- // this while loop.
- if (i >= j)
- return j + 1;
-
- array.swap(i, j);
-
- // The pivot itself might have got swapped so we will update the pivot.
- if (i == pivot_index) {
- pivot = array.get(j);
- pivot_index = j;
- } else if (j == pivot_index) {
- pivot = array.get(i);
- pivot_index = i;
+ while (left < right && is_less(array.get(left), pivot))
+ ++left;
+
+ while (true) {
+ --right;
+ if (left >= right || is_less(array.get(right), pivot)) {
+ break;
+ }
}
- if (compare_i == 0 && compare_j == 0) {
- // If we do not move the pointers, we will end up with an
- // infinite loop as i and j will be stuck without advancing.
- ++i;
- --j;
- }
+ if (left >= right)
+ break;
+
+ array.swap(left, right);
+ ++left;
+ }
+
+ return left;
+}
+
+template <typename A, typename F>
+LIBC_INLINE size_t partition(const A &array, size_t pivot_index,
+ const F &is_less) {
+ // Place the pivot at the beginning of the array.
+ if (pivot_index != 0) {
+ array.swap(0, pivot_index);
}
+
+ const A array_without_pivot = array.make_array(1, array.len() - 1);
+ const void *pivot = array.get(0);
+
+ size_t num_lt;
+ if constexpr (A::has_fixed_size()) {
+ // Branchless Lomuto avoid branch misprediction penalties, but
+ // it also swaps more often which is only faster if the swap is a fast
+ // constant operation.
+ num_lt = partition_lomuto_branchless(array_without_pivot, pivot, is_less);
+ } else {
+ num_lt = partition_hoare_branchy(array_without_pivot, pivot, is_less);
+ }
+
+ // Place the pivot between the two partitions.
+ array.swap(0, num_lt);
+
+ return num_lt;
}
-LIBC_INLINE void quick_sort(Array array) {
+template <typename A, typename F>
+LIBC_INLINE void quick_sort_impl(A &array, const void *ancestor_pivot,
+ size_t limit, const F &is_less) {
while (true) {
- const size_t array_size = array.size();
- if (array_size <= 1)
+ const size_t array_len = array.len();
+ if (array_len <= 1)
return;
- size_t split_index = partition(array);
- if (array_size == 2)
- // The partition operation sorts the two element array.
+
+ // If too many bad pivot choices were made, simply fall back to
+ // heapsort in order to guarantee `O(N x log(N))` worst-case.
+ if (limit == 0) {
+ heap_sort(array, is_less);
return;
+ }
- // Make Arrays describing the two sublists that still need sorting.
- Array left = array.make_array(0, split_index);
- Array right = array.make_array(split_index, array.size() - split_index);
-
- // Recurse to sort the smaller of the two, and then loop round within this
- // function to sort the larger. This way, recursive call depth is bounded
- // by log2 of the total array size, because every recursive call is sorting
- // a list at most half the length of the one in its caller.
- if (left.size() < right.size()) {
- quick_sort(left);
- array.reset_bounds(right.get(0), right.size());
- } else {
- quick_sort(right);
- array.reset_bounds(left.get(0), left.size());
+ limit -= 1;
+
+ const size_t pivot_index = choose_pivot(array, is_less);
+
+ // If the chosen pivot is equal to the predecessor, then it's the smallest
+ // element in the slice. Partition the slice into elements equal to and
+ // elements greater than the pivot. This case is usually hit when the slice
+ // contains many duplicate elements.
+ if (ancestor_pivot) {
+ if (!is_less(ancestor_pivot, array.get(pivot_index))) {
+ const size_t num_lt =
+ partition(array, pivot_index,
+ [is_less](const void *a, const void *b) -> bool {
+ return !is_less(b, a);
+ });
+
+ // Continue sorting elements greater than the pivot. We know that
+ // `num_lt` cont
+ array.reset_bounds(num_lt + 1, array.len() - (num_lt + 1));
+ ancestor_pivot = nullptr;
+ continue;
+ }
}
+
+ size_t split_index = partition(array, pivot_index, is_less);
+
+ if (array_len == 2)
+ // The partition operation sorts the two element array.
+ return;
+
+ // Split the array into `left`, `pivot`, and `right`.
+ A left = array.make_array(0, split_index);
+ const void *pivot = array.get(split_index);
+ const size_t right_start = split_index + 1;
+ A right = array.make_array(right_start, array.len() - right_start);
+
+ // Recurse into the left side. We have a fixed recursion limit,
+ // testing shows no real benefit for recursing into the shorter
+ // side.
+ quick_sort_impl(left, ancestor_pivot, limit, is_less);
+
+ // Continue with the right side.
+ array = right;
+ ancestor_pivot = pivot;
}
}
+constexpr size_t ilog2(size_t n) { return cpp::bit_width(n) - 1; }
+
+template <typename A, typename F>
+LIBC_INLINE void quick_sort(A &array, const F &is_less) {
+ const void *ancestor_pivot = nullptr;
+ // Limit the number of imbalanced partitions to `2 * floor(log2(len))`.
+ // The binary OR by one is used to eliminate the zero-check in the logarithm.
+ const size_t limit = 2 * ilog2((array.len() | 1));
+ quick_sort_impl(array, ancestor_pivot, limit, is_less);
+}
+
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL