diff options
| author | Nikolas Klauser <nikolasklauser@berlin.de> | 2024-08-27 16:54:05 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-27 16:54:05 +0200 |
| commit | d4ffccfce103b01401b8a9222e373f2d404f8439 (patch) | |
| tree | fb79ead2c382c470936e889cc15847a803d84049 /libcxx/include/__algorithm | |
| parent | e8863748ba76462cdbfdcbd7bd99cadf392c01f4 (diff) | |
[libc++] Simplify the implementation of std::sort a bit (#104902)
This does a few things to canonicalize the library a bit. Specifically
- use `__desugars_to_v` instead of the custom `__is_simple_comparator`
- make `__use_branchless_sort` an inline variable
- remove the `_maybe_branchless` versions of the `__sortN` functions and
overload based on whether we can do branchless sorting instead.
Diffstat (limited to 'libcxx/include/__algorithm')
| -rw-r--r-- | libcxx/include/__algorithm/comp.h | 3 | ||||
| -rw-r--r-- | libcxx/include/__algorithm/ranges_minmax.h | 2 | ||||
| -rw-r--r-- | libcxx/include/__algorithm/sort.h | 285 |
3 files changed, 124 insertions, 166 deletions
diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h index 1f38f5d2d99b..ab3c59841882 100644 --- a/libcxx/include/__algorithm/comp.h +++ b/libcxx/include/__algorithm/comp.h @@ -43,6 +43,9 @@ struct __less<void, void> { }; template <class _Tp> +inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true; + +template <class _Tp> inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value; _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h index 9b8551d22134..1b43b1e19cde 100644 --- a/libcxx/include/__algorithm/ranges_minmax.h +++ b/libcxx/include/__algorithm/ranges_minmax.h @@ -88,7 +88,7 @@ struct __minmax { // vectorize the code. if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> && __is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value && - __desugars_to_v<__totally_ordered_less_tag, _Comp, _ValueT, _ValueT>) { + __desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) { minmax_result<_ValueT> __result = {__r[0], __r[0]}; for (auto __e : __r) { if (__e < __result.min) diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index 07b5814639e9..77ff1533c794 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -27,9 +27,11 @@ #include <__functional/ranges_operations.h> #include <__iterator/iterator_traits.h> #include <__type_traits/conditional.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/disjunction.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_constant_evaluated.h> +#include <__type_traits/is_trivially_copyable.h> #include <__utility/move.h> #include <__utility/pair.h> #include <climits> @@ -44,110 +46,11 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -// stable, 2-3 compares, 0-2 swaps - -template <class _AlgPolicy, class _Compare, class _ForwardIterator> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned -__sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; - - unsigned __r = 0; - if (!__c(*__y, *__x)) // if x <= y - { - if (!__c(*__z, *__y)) // if y <= z - return __r; // x <= y && y <= z - // x <= y && y > z - _Ops::iter_swap(__y, __z); // x <= z && y < z - __r = 1; - if (__c(*__y, *__x)) // if x > y - { - _Ops::iter_swap(__x, __y); // x < y && y <= z - __r = 2; - } - return __r; // x <= y && y < z - } - if (__c(*__z, *__y)) // x > y, if y > z - { - _Ops::iter_swap(__x, __z); // x < y && y < z - __r = 1; - return __r; - } - _Ops::iter_swap(__x, __y); // x > y && y <= z - __r = 1; // x < y && x <= z - if (__c(*__z, *__y)) // if y > z - { - _Ops::iter_swap(__y, __z); // x <= y && y < z - __r = 2; - } - return __r; -} // x <= y && y <= z - -// stable, 3-6 compares, 0-5 swaps - -template <class _AlgPolicy, class _Compare, class _ForwardIterator> -_LIBCPP_HIDE_FROM_ABI void -__sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; - std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); - if (__c(*__x4, *__x3)) { - _Ops::iter_swap(__x3, __x4); - if (__c(*__x3, *__x2)) { - _Ops::iter_swap(__x2, __x3); - if (__c(*__x2, *__x1)) { - _Ops::iter_swap(__x1, __x2); - } - } - } -} - -// stable, 4-10 compares, 0-9 swaps - -template <class _AlgPolicy, class _Comp, class _ForwardIterator> -_LIBCPP_HIDE_FROM_ABI void -__sort5(_ForwardIterator __x1, - _ForwardIterator __x2, - _ForwardIterator __x3, - _ForwardIterator __x4, - _ForwardIterator __x5, - _Comp __comp) { - using _Ops = _IterOps<_AlgPolicy>; - - std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp); - if (__comp(*__x5, *__x4)) { - _Ops::iter_swap(__x4, __x5); - if (__comp(*__x4, *__x3)) { - _Ops::iter_swap(__x3, __x4); - if (__comp(*__x3, *__x2)) { - _Ops::iter_swap(__x2, __x3); - if (__comp(*__x2, *__x1)) { - _Ops::iter_swap(__x1, __x2); - } - } - } - } -} - -// The comparator being simple is a prerequisite for using the branchless optimization. -template <class _Tp> -struct __is_simple_comparator : false_type {}; -template <> -struct __is_simple_comparator<__less<>&> : true_type {}; -template <class _Tp> -struct __is_simple_comparator<less<_Tp>&> : true_type {}; -template <class _Tp> -struct __is_simple_comparator<greater<_Tp>&> : true_type {}; -#if _LIBCPP_STD_VER >= 20 -template <> -struct __is_simple_comparator<ranges::less&> : true_type {}; -template <> -struct __is_simple_comparator<ranges::greater&> : true_type {}; -#endif - template <class _Compare, class _Iter, class _Tp = typename iterator_traits<_Iter>::value_type> -using __use_branchless_sort = - integral_constant<bool, - __libcpp_is_contiguous_iterator<_Iter>::value && sizeof(_Tp) <= sizeof(void*) && - is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>; +inline const bool __use_branchless_sort = + __libcpp_is_contiguous_iterator<_Iter>::value && __is_cheap_to_copy<_Tp> && is_arithmetic<_Tp>::value && + (__desugars_to_v<__less_tag, __remove_cvref_t<_Compare>, _Tp, _Tp> || + __desugars_to_v<__greater_tag, __remove_cvref_t<_Compare>, _Tp, _Tp>); namespace __detail { @@ -158,59 +61,88 @@ enum { __block_size = sizeof(uint64_t) * 8 }; // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary. template <class _Compare, class _RandomAccessIterator> -inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; bool __r = __c(*__x, *__y); value_type __tmp = __r ? *__x : *__y; *__y = __r ? *__y : *__x; *__x = __tmp; + return !__r; } // Ensures that *__x, *__y and *__z are ordered according to the comparator __c, // under the assumption that *__y and *__z are already ordered. template <class _Compare, class _RandomAccessIterator> -inline _LIBCPP_HIDE_FROM_ABI void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; - bool __r = __c(*__z, *__x); - value_type __tmp = __r ? *__z : *__x; - *__z = __r ? *__x : *__z; - __r = __c(__tmp, *__y); - *__x = __r ? *__x : *__y; - *__y = __r ? *__y : __tmp; + bool __r1 = __c(*__z, *__x); + value_type __tmp = __r1 ? *__z : *__x; + *__z = __r1 ? *__x : *__z; + bool __r2 = __c(__tmp, *__y); + *__x = __r2 ? *__x : *__y; + *__y = __r2 ? *__y : __tmp; + return !__r1 || !__r2; } +// stable, 2-3 compares, 0-2 swaps + template <class, class _Compare, class _RandomAccessIterator, - __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless( - _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - std::__cond_swap<_Compare>(__x2, __x3, __c); - std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__sort3(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { + bool __swapped1 = std::__cond_swap<_Compare>(__x2, __x3, __c); + bool __swapped2 = std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); + return __swapped1 || __swapped2; } template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, - __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless( - _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); -} + __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__sort3(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + + if (!__c(*__y, *__x)) // if x <= y + { + if (!__c(*__z, *__y)) // if y <= z + return false; // x <= y && y <= z + // x <= y && y > z + _Ops::iter_swap(__y, __z); // x <= z && y < z + if (__c(*__y, *__x)) // if x > y + _Ops::iter_swap(__x, __y); // x < y && y <= z + return true; // x <= y && y < z + } + if (__c(*__z, *__y)) // x > y, if y > z + { + _Ops::iter_swap(__x, __z); // x < y && y < z + return true; + } + _Ops::iter_swap(__x, __y); // x > y && y <= z + // x < y && x <= z + if (__c(*__z, *__y)) // if y > z + _Ops::iter_swap(__y, __z); // x <= y && y < z + return true; +} // x <= y && y <= z + +// stable, 3-6 compares, 0-5 swaps template <class, class _Compare, class _RandomAccessIterator, - __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _Compare __c) { + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort4(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _Compare __c) { std::__cond_swap<_Compare>(__x1, __x3, __c); std::__cond_swap<_Compare>(__x2, __x4, __c); std::__cond_swap<_Compare>(__x1, __x2, __c); @@ -221,27 +153,39 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, - __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _Compare __c) { - std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c); + __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort4(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); + if (__c(*__x4, *__x3)) { + _Ops::iter_swap(__x3, __x4); + if (__c(*__x3, *__x2)) { + _Ops::iter_swap(__x2, __x3); + if (__c(*__x2, *__x1)) { + _Ops::iter_swap(__x1, __x2); + } + } + } } +// stable, 4-10 compares, 0-9 swaps + template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, - __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort5(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _RandomAccessIterator __x5, + _Compare __c) { std::__cond_swap<_Compare>(__x1, __x2, __c); std::__cond_swap<_Compare>(__x4, __x5, __c); std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c); @@ -253,16 +197,29 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, - __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { - std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>( - std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c); + __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort5(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _RandomAccessIterator __x5, + _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + + std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __comp); + if (__comp(*__x5, *__x4)) { + _Ops::iter_swap(__x4, __x5); + if (__comp(*__x4, *__x3)) { + _Ops::iter_swap(__x3, __x4); + if (__comp(*__x3, *__x2)) { + _Ops::iter_swap(__x2, __x3); + if (__comp(*__x2, *__x1)) { + _Ops::iter_swap(__x1, __x2); + } + } + } + } } // Assumes size > 0 @@ -352,14 +309,14 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator _Ops::iter_swap(__first, __last); return true; case 3: - std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp); return true; case 4: - std::__sort4_maybe_branchless<_AlgPolicy, _Comp>( + std::__sort4<_AlgPolicy, _Comp>( __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return true; case 5: - std::__sort5_maybe_branchless<_AlgPolicy, _Comp>( + std::__sort5<_AlgPolicy, _Comp>( __first, __first + difference_type(1), __first + difference_type(2), @@ -370,7 +327,7 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator } typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; _RandomAccessIterator __j = __first + difference_type(2); - std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp); + std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp); const unsigned __limit = 8; unsigned __count = 0; for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) { @@ -777,14 +734,14 @@ void __introsort(_RandomAccessIterator __first, _Ops::iter_swap(__first, __last); return; case 3: - std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); return; case 4: - std::__sort4_maybe_branchless<_AlgPolicy, _Compare>( + std::__sort4<_AlgPolicy, _Compare>( __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return; case 5: - std::__sort5_maybe_branchless<_AlgPolicy, _Compare>( + std::__sort5<_AlgPolicy, _Compare>( __first, __first + difference_type(1), __first + difference_type(2), @@ -925,10 +882,8 @@ __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co // Only use bitset partitioning for arithmetic types. We should also check // that the default comparator is in use so that we are sure that there are no // branches in the comparator. - std::__introsort<_AlgPolicy, - _Comp&, - _RandomAccessIterator, - __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(__first, __last, __comp, __depth_limit); + std::__introsort<_AlgPolicy, _Comp&, _RandomAccessIterator, __use_branchless_sort<_Comp, _RandomAccessIterator> >( + __first, __last, __comp, __depth_limit); } template <class _Type, class... _Options> |
