diff options
Diffstat (limited to 'libc/src/math/generic/explogxf.h')
| -rw-r--r-- | libc/src/math/generic/explogxf.h | 235 |
1 files changed, 3 insertions, 232 deletions
diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h index 212ede475854..be4328a4f48b 100644 --- a/libc/src/math/generic/explogxf.h +++ b/libc/src/math/generic/explogxf.h @@ -10,167 +10,17 @@ #define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H #include "common_constants.h" -#include "src/__support/CPP/bit.h" -#include "src/__support/CPP/optional.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/nearest_integer.h" + #include "src/__support/common.h" -#include "src/__support/macros/config.h" #include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/exp10f_utils.h" +#include "src/__support/math/exp_utils.h" namespace LIBC_NAMESPACE_DECL { -struct ExpBase { - // Base = e - static constexpr int MID_BITS = 5; - static constexpr int MID_MASK = (1 << MID_BITS) - 1; - // log2(e) * 2^5 - static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS); - // High and low parts of -log(2) * 2^(-5) - static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS); - static constexpr double M_LOGB_2_LO = - -0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS); - // Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya - // with: - // > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN)); - static constexpr int64_t EXP_2_MID[1 << MID_BITS] = { - 0x3ff0000000000000, 0x3ff059b0d3158574, 0x3ff0b5586cf9890f, - 0x3ff11301d0125b51, 0x3ff172b83c7d517b, 0x3ff1d4873168b9aa, - 0x3ff2387a6e756238, 0x3ff29e9df51fdee1, 0x3ff306fe0a31b715, - 0x3ff371a7373aa9cb, 0x3ff3dea64c123422, 0x3ff44e086061892d, - 0x3ff4bfdad5362a27, 0x3ff5342b569d4f82, 0x3ff5ab07dd485429, - 0x3ff6247eb03a5585, 0x3ff6a09e667f3bcd, 0x3ff71f75e8ec5f74, - 0x3ff7a11473eb0187, 0x3ff82589994cce13, 0x3ff8ace5422aa0db, - 0x3ff93737b0cdc5e5, 0x3ff9c49182a3f090, 0x3ffa5503b23e255d, - 0x3ffae89f995ad3ad, 0x3ffb7f76f2fb5e47, 0x3ffc199bdd85529c, - 0x3ffcb720dcef9069, 0x3ffd5818dcfba487, 0x3ffdfc97337b9b5f, - 0x3ffea4afa2a490da, 0x3fff50765b6e4540, - }; - - // Approximating e^dx with degree-5 minimax polynomial generated by Sollya: - // > Q = fpminimax(expm1(x)/x, 4, [|1, D...|], [-log(2)/64, log(2)/64]); - // Then: - // e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[3] * dx^5. - static constexpr double COEFFS[4] = { - 0x1.ffffffffe5bc8p-2, 0x1.555555555cd67p-3, 0x1.5555c2a9b48b4p-5, - 0x1.11112a0e34bdbp-7}; - - LIBC_INLINE static double powb_lo(double dx) { - using fputil::multiply_add; - double dx2 = dx * dx; - double c0 = 1.0 + dx; - // c1 = COEFFS[0] + COEFFS[1] * dx - double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]); - // c2 = COEFFS[2] + COEFFS[3] * dx - double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]); - // r = c4 + c5 * dx^4 - // = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7 - return fputil::polyeval(dx2, c0, c1, c2); - } -}; - -struct Exp10Base : public ExpBase { - // log2(10) * 2^5 - static constexpr double LOG2_B = 0x1.a934f0979a371p1 * (1 << MID_BITS); - // High and low parts of -log10(2) * 2^(-5). - // Notice that since |x * log2(10)| < 150: - // |k| = |round(x * log2(10) * 2^5)| < 2^8 * 2^5 = 2^13 - // So when the FMA instructions are not available, in order for the product - // k * M_LOGB_2_HI - // to be exact, we only store the high part of log10(2) up to 38 bits - // (= 53 - 15) of precision. - // It is generated by Sollya with: - // > round(log10(2), 44, RN); - static constexpr double M_LOGB_2_HI = -0x1.34413509f8p-2 / (1 << MID_BITS); - // > round(log10(2) - 0x1.34413509f8p-2, D, RN); - static constexpr double M_LOGB_2_LO = 0x1.80433b83b532ap-44 / (1 << MID_BITS); - - // Approximating 10^dx with degree-5 minimax polynomial generated by Sollya: - // > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]); - // Then: - // 10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5. - static constexpr double COEFFS[5] = {0x1.26bb1bbb55515p1, 0x1.53524c73bd3eap1, - 0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0, - 0x1.1429e74a98f43p-1}; - - static double powb_lo(double dx) { - using fputil::multiply_add; - double dx2 = dx * dx; - // c0 = 1 + COEFFS[0] * dx - double c0 = multiply_add(dx, Exp10Base::COEFFS[0], 1.0); - // c1 = COEFFS[1] + COEFFS[2] * dx - double c1 = multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]); - // c2 = COEFFS[3] + COEFFS[4] * dx - double c2 = multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]); - // r = c0 + dx^2 * (c1 + c2 * dx^2) - // = c0 + c1 * dx^2 + c2 * dx^4 - // = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5. - return fputil::polyeval(dx2, c0, c1, c2); - } -}; - constexpr int LOG_P1_BITS = 6; constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS; -// N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40] -extern const double LOG_P1_LOG2[LOG_P1_SIZE]; - -// N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40] -extern const double LOG_P1_1_OVER[LOG_P1_SIZE]; - -// Taylor series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers -// K_LOG2_ODD starts from x^3 -extern const double K_LOG2_ODD[4]; -extern const double K_LOG2_EVEN[4]; - -// Output of range reduction for exp_b: (2^(mid + hi), lo) -// where: -// b^x = 2^(mid + hi) * b^lo -struct exp_b_reduc_t { - double mh; // 2^(mid + hi) - double lo; -}; - -// The function correctly calculates b^x value with at least float precision -// in a limited range. -// Range reduction: -// b^x = 2^(hi + mid) * b^lo -// where: -// x = (hi + mid) * log_b(2) + lo -// hi is an integer, -// 0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer -// -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1) -// Base class needs to provide the following constants: -// - MID_BITS : number of bits after decimal points used for mid -// - MID_MASK : 2^MID_BITS - 1, mask to extract mid bits -// - LOG2_B : log2(b) * 2^MID_BITS for scaling -// - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS) -// - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS) -// - EXP_2_MID : look up table for bit fields of 2^mid -// Return: -// { 2^(hi + mid), lo } -template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) { - double xd = static_cast<double>(x); - // kd = round((hi + mid) * log2(b) * 2^MID_BITS) - double kd = fputil::nearest_integer(Base::LOG2_B * xd); - // k = round((hi + mid) * log2(b) * 2^MID_BITS) - int k = static_cast<int>(kd); - // hi = floor(kd * 2^(-MID_BITS)) - // exp_hi = shift hi to the exponent field of double precision. - uint64_t exp_hi = static_cast<uint64_t>(k >> Base::MID_BITS) - << fputil::FPBits<double>::FRACTION_LEN; - // mh = 2^hi * 2^mid - // mh_bits = bit field of mh - uint64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi; - double mh = fputil::FPBits<double>(mh_bits).get_val(); - // dx = lo = x - (hi + mid) * log(2) - double dx = fputil::multiply_add( - kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd)); - return {mh, dx}; -} - // The function correctly calculates sinh(x) and cosh(x) by calculating exp(x) // and exp(-x) simultaneously. // To compute e^x, we perform the following range @@ -271,33 +121,6 @@ template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) { } // x should be positive, normal finite value -LIBC_INLINE static double log2_eval(double x) { - using FPB = fputil::FPBits<double>; - FPB bs(x); - - double result = 0; - result += bs.get_exponent(); - - int p1 = (bs.get_mantissa() >> (FPB::FRACTION_LEN - LOG_P1_BITS)) & - (LOG_P1_SIZE - 1); - - bs.set_uintval(bs.uintval() & (FPB::FRACTION_MASK >> LOG_P1_BITS)); - bs.set_biased_exponent(FPB::EXP_BIAS); - double dx = (bs.get_val() - 1.0) * LOG_P1_1_OVER[p1]; - - // Taylor series for log(2,1+x) - double c1 = fputil::multiply_add(dx, K_LOG2_ODD[0], K_LOG2_EVEN[0]); - double c2 = fputil::multiply_add(dx, K_LOG2_ODD[1], K_LOG2_EVEN[1]); - double c3 = fputil::multiply_add(dx, K_LOG2_ODD[2], K_LOG2_EVEN[2]); - double c4 = fputil::multiply_add(dx, K_LOG2_ODD[3], K_LOG2_EVEN[3]); - - // c0 = dx * (1.0 / ln(2)) + LOG_P1_LOG2[p1] - double c0 = fputil::multiply_add(dx, 0x1.71547652b82fep+0, LOG_P1_LOG2[p1]); - result += LIBC_NAMESPACE::fputil::polyeval(dx * dx, c0, c1, c2, c3, c4); - return result; -} - -// x should be positive, normal finite value // TODO: Simplify range reduction and polynomial degree for float16. // See issue #137190. LIBC_INLINE static float log_eval_f(float x) { @@ -375,58 +198,6 @@ LIBC_INLINE static double log_eval(double x) { return result; } -// Rounding tests for 2^hi * (mid + lo) when the output might be denormal. We -// assume further that 1 <= mid < 2, mid + lo < 2, and |lo| << mid. -// Notice that, if 0 < x < 2^-1022, -// double(2^-1022 + x) - 2^-1022 = double(x). -// So if we scale x up by 2^1022, we can use -// double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range. -template <bool SKIP_ZIV_TEST = false> -LIBC_INLINE static cpp::optional<double> -ziv_test_denorm(int hi, double mid, double lo, double err) { - using FPBits = typename fputil::FPBits<double>; - - // Scaling factor = 1/(min normal number) = 2^1022 - int64_t exp_hi = static_cast<int64_t>(hi + 1022) << FPBits::FRACTION_LEN; - double mid_hi = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(mid)); - double lo_scaled = - (lo != 0.0) ? cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(lo)) - : 0.0; - - double extra_factor = 0.0; - uint64_t scale_down = 0x3FE0'0000'0000'0000; // 1022 in the exponent field. - - // Result is denormal if (mid_hi + lo_scale < 1.0). - if ((1.0 - mid_hi) > lo_scaled) { - // Extra rounding step is needed, which adds more rounding errors. - err += 0x1.0p-52; - extra_factor = 1.0; - scale_down = 0x3FF0'0000'0000'0000; // 1023 in the exponent field. - } - - // By adding 1.0, the results will have similar rounding points as denormal - // outputs. - if constexpr (SKIP_ZIV_TEST) { - double r = extra_factor + (mid_hi + lo_scaled); - return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(r) - scale_down); - } else { - double err_scaled = - cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(err)); - - double lo_u = lo_scaled + err_scaled; - double lo_l = lo_scaled - err_scaled; - - double upper = extra_factor + (mid_hi + lo_u); - double lower = extra_factor + (mid_hi + lo_l); - - if (LIBC_LIKELY(upper == lower)) { - return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(upper) - scale_down); - } - - return cpp::nullopt; - } -} - } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H |
