summaryrefslogtreecommitdiff
path: root/libcxx/include/__bit_reference
diff options
context:
space:
mode:
authorPeng Liu <winner245@hotmail.com>2025-03-04 17:15:36 -0500
committerGitHub <noreply@github.com>2025-03-04 17:15:36 -0500
commita12744ff05bbc2d0de711afb8b3a1c7a03a33914 (patch)
treef962ce00e4d60322b88e114d474b4f29cc5ca2ac /libcxx/include/__bit_reference
parentb08769c3ec7ef486fd9b8a315856af53c5a58957 (diff)
[libc++] Optimize ranges::swap_ranges for vector<bool>::iterator (#121150)
This PR optimizes the performance of `std::ranges::swap_ranges` for `vector<bool>::iterator`, addressing a subtask outlined in issue #64038. The optimizations yield performance improvements of up to **611x** for aligned range swap and **78x** for unaligned range swap comparison. Additionally, comprehensive tests covering up to 4 storage words (256 bytes) with odd and even bit sizes are provided, which validate the proposed optimizations in this patch.
Diffstat (limited to 'libcxx/include/__bit_reference')
-rw-r--r--libcxx/include/__bit_reference157
1 files changed, 6 insertions, 151 deletions
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 079d1de9dd52..f91250c4a440 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -16,6 +16,7 @@
#include <__algorithm/copy_n.h>
#include <__algorithm/equal.h>
#include <__algorithm/min.h>
+#include <__algorithm/swap_ranges.h>
#include <__assert>
#include <__bit/countr.h>
#include <__compare/ordering.h>
@@ -215,152 +216,6 @@ private:
__mask_(__m) {}
};
-// swap_ranges
-
-template <class _Cl, class _Cr>
-_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_aligned(
- __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
- using _I1 = __bit_iterator<_Cl, false>;
- using difference_type = typename _I1::difference_type;
- using __storage_type = typename _I1::__storage_type;
-
- const int __bits_per_word = _I1::__bits_per_word;
- difference_type __n = __last - __first;
- if (__n > 0) {
- // do first word
- if (__first.__ctz_ != 0) {
- unsigned __clz = __bits_per_word - __first.__ctz_;
- difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
- __n -= __dn;
- __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1;
- *__first.__seg_ |= __b2;
- __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
- ++__first.__seg_;
- // __first.__ctz_ = 0;
- }
- // __first.__ctz_ == 0;
- // do middle words
- for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_)
- swap(*__first.__seg_, *__result.__seg_);
- // do last word
- if (__n > 0) {
- __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1;
- *__first.__seg_ |= __b2;
- __result.__ctz_ = static_cast<unsigned>(__n);
- }
- }
- return __result;
-}
-
-template <class _Cl, class _Cr>
-_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_unaligned(
- __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) {
- using _I1 = __bit_iterator<_Cl, false>;
- using difference_type = typename _I1::difference_type;
- using __storage_type = typename _I1::__storage_type;
-
- const int __bits_per_word = _I1::__bits_per_word;
- difference_type __n = __last - __first;
- if (__n > 0) {
- // do first word
- if (__first.__ctz_ != 0) {
- unsigned __clz_f = __bits_per_word - __first.__ctz_;
- difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
- __n -= __dn;
- __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- unsigned __clz_r = __bits_per_word - __result.__ctz_;
- __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
- __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- if (__result.__ctz_ > __first.__ctz_) {
- unsigned __s = __result.__ctz_ - __first.__ctz_;
- *__result.__seg_ |= __b1 << __s;
- *__first.__seg_ |= __b2 >> __s;
- } else {
- unsigned __s = __first.__ctz_ - __result.__ctz_;
- *__result.__seg_ |= __b1 >> __s;
- *__first.__seg_ |= __b2 << __s;
- }
- __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
- __dn -= __ddn;
- if (__dn > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __dn);
- __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- unsigned __s = __first.__ctz_ + __ddn;
- *__result.__seg_ |= __b1 >> __s;
- *__first.__seg_ |= __b2 << __s;
- __result.__ctz_ = static_cast<unsigned>(__dn);
- }
- ++__first.__seg_;
- // __first.__ctz_ = 0;
- }
- // __first.__ctz_ == 0;
- // do middle words
- __storage_type __m = ~__storage_type(0) << __result.__ctz_;
- unsigned __clz_r = __bits_per_word - __result.__ctz_;
- for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
- __storage_type __b1 = *__first.__seg_;
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 << __result.__ctz_;
- *__first.__seg_ = __b2 >> __result.__ctz_;
- ++__result.__seg_;
- __b2 = *__result.__seg_ & ~__m;
- *__result.__seg_ &= __m;
- *__result.__seg_ |= __b1 >> __clz_r;
- *__first.__seg_ |= __b2 << __clz_r;
- }
- // do last word
- if (__n > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __storage_type __b1 = *__first.__seg_ & __m;
- *__first.__seg_ &= ~__m;
- __storage_type __dn = std::min<__storage_type>(__n, __clz_r);
- __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
- __storage_type __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 << __result.__ctz_;
- *__first.__seg_ |= __b2 >> __result.__ctz_;
- __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
- __result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
- __n -= __dn;
- if (__n > 0) {
- __m = ~__storage_type(0) >> (__bits_per_word - __n);
- __b2 = *__result.__seg_ & __m;
- *__result.__seg_ &= ~__m;
- *__result.__seg_ |= __b1 >> __dn;
- *__first.__seg_ |= __b2 << __dn;
- __result.__ctz_ = static_cast<unsigned>(__n);
- }
- }
- }
- return __result;
-}
-
-template <class _Cl, class _Cr>
-inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> swap_ranges(
- __bit_iterator<_Cl, false> __first1, __bit_iterator<_Cl, false> __last1, __bit_iterator<_Cr, false> __first2) {
- if (__first1.__ctz_ == __first2.__ctz_)
- return std::__swap_ranges_aligned(__first1, __last1, __first2);
- return std::__swap_ranges_unaligned(__first1, __last1, __first2);
-}
-
// rotate
template <class _Cp>
@@ -644,14 +499,14 @@ private:
template <class _AlgPolicy>
friend struct __copy_backward_impl;
template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
__swap_ranges_aligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false>
__swap_ranges_unaligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
- template <class _Cl, class _Cr>
- friend __bit_iterator<_Cr, false>
- swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
+ template <class, class _Cl, class _Cr>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
+ __swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Dp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);