summaryrefslogtreecommitdiff
path: root/math
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2025-10-10 15:15:30 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2025-10-27 09:34:04 -0300
commitacaad9ab06d406503bfd0f4a29cf81cd56994ec4 (patch)
treeb6565810ef38454b96cbf07566b76ec9e4c5192c /math
parent72a48e45bdcc68decb3d7cd281f1262e0af817ff (diff)
math: Use erfc from CORE-MATH
The current implementation precision shows the following accuracy, on three ranges ([-DBL_MAX,5], [-5,5], [5,DBL_MAX]) with 10e9 uniform randomly generated numbers for each range (first column is the accuracy in ULP, with '0' being correctly rounded, second is the number of samples with the corresponding precision): * Range [-DBL_MAX, -5] * FE_TONEAREST 0: 10000000000 100.00% * FE_UPWARD 0: 10000000000 100.00% * FE_DOWNWARD 0: 10000000000 100.00% * FE_TOWARDZERO 0: 10000000000 100.00% * Range [-5, 5] * FE_TONEAREST 0: 8069309665 80.69% 1: 1882910247 18.83% 2: 47485296 0.47% 3: 293749 0.00% 4: 1043 0.00% * FE_UPWARD 0: 5540301026 55.40% 1: 2026739127 20.27% 2: 1774882486 17.75% 3: 567324466 5.67% 4: 86913847 0.87% 5: 3820789 0.04% 6: 18259 0.00% * FE_DOWNWARD 0: 5520969586 55.21% 1: 2057293099 20.57% 2: 1778334818 17.78% 3: 557521494 5.58% 4: 82473927 0.82% 5: 3393276 0.03% 6: 13800 0.00% * FE_TOWARDZERO 0: 6220287175 62.20% 1: 2323846149 23.24% 2: 1251999920 12.52% 3: 190748245 1.91% 4: 12996232 0.13% 5: 122279 0.00% * Range [5, DBL_MAX] * FE_TONEAREST 0: 10000000000 100.00% * FE_UPWARD 0: 10000000000 100.00% * FE_DOWNWARD 0: 10000000000 100.00% * FE_TOWARDZERO 0: 10000000000 100.00% The CORE-MATH implementation is correctly rounded for any rounding mode. The code was adapted to glibc style and to use the definition of math_config.h (to handle errno, overflow, and underflow). Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (Neoverse-N1, gcc 13.3.1), and powerpc (POWER10, gcc 13.2.1) shows: reciprocal-throughput master patched improvement x86_64 49.0980 267.0660 -443.94% x86_64v2 49.3220 257.6310 -422.34% x86_64v3 42.9539 84.9571 -97.79% aarch64 28.7266 52.9096 -84.18% power10 14.1673 25.1273 -77.36% Latency master patched improvement x86_64 95.6640 269.7060 -181.93% x86_64v2 95.8296 260.4860 -171.82% x86_64v3 91.1658 112.7150 -23.64% aarch64 37.0745 58.6791 -58.27% power10 23.3197 31.5737 -35.39% Checked on x86_64-linux-gnu, aarch64-linux-gnu, and powerpc64le-linux-gnu. Reviewed-by: DJ Delorie <dj@redhat.com>
Diffstat (limited to 'math')
-rw-r--r--math/auto-libm-test-in3
-rw-r--r--math/auto-libm-test-out-erfc69
2 files changed, 72 insertions, 0 deletions
diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in
index 3921666dbd..fcf19cc1e6 100644
--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@@ -5729,6 +5729,9 @@ erfc 0x2.56af04p+0
erfc 0x2.b7f8cb76737d2af98dead7c4c5eep+0
erfc 0x2.dfb9b4p+0
erfc 0x1.e33c9ep+0
+
+erfc 0x1.a8f7bfbd15495p+4
+
# the next value generates larger error bounds on x86_64 (binary64)
erfc 0x1.3ffcbf39febb4p+0
diff --git a/math/auto-libm-test-out-erfc b/math/auto-libm-test-out-erfc
index 76f922ba03..c9572194fd 100644
--- a/math/auto-libm-test-out-erfc
+++ b/math/auto-libm-test-out-erfc
@@ -4444,6 +4444,75 @@ erfc 0x1.e33c9ep+0
= erfc tonearest ibm128 0x1.e33c9ep+0 : 0x1.f1cb04b622e6f4d5035449633b8p-8 : inexact-ok
= erfc towardzero ibm128 0x1.e33c9ep+0 : 0x1.f1cb04b622e6f4d5035449633bp-8 : inexact-ok
= erfc upward ibm128 0x1.e33c9ep+0 : 0x1.f1cb04b622e6f4d5035449633b8p-8 : inexact-ok
+erfc 0x1.a8f7bfbd15495p+4
+= erfc downward binary32 0x1.a8f7cp+4 : 0x0p+0 : inexact-ok underflow errno-erange
+= erfc tonearest binary32 0x1.a8f7cp+4 : 0x0p+0 : inexact-ok underflow errno-erange
+= erfc towardzero binary32 0x1.a8f7cp+4 : 0x0p+0 : inexact-ok underflow errno-erange
+= erfc upward binary32 0x1.a8f7cp+4 : 0x8p-152 : inexact-ok underflow errno-erange-ok
+= erfc downward binary64 0x1.a8f7cp+4 : 0x1.99edf481adc28p-1024 : inexact-ok underflow errno-erange-ok
+= erfc tonearest binary64 0x1.a8f7cp+4 : 0x1.99edf481adc28p-1024 : inexact-ok underflow errno-erange-ok
+= erfc towardzero binary64 0x1.a8f7cp+4 : 0x1.99edf481adc28p-1024 : inexact-ok underflow errno-erange-ok
+= erfc upward binary64 0x1.a8f7cp+4 : 0x1.99edf481adc2cp-1024 : inexact-ok underflow errno-erange-ok
+= erfc downward intel96 0x1.a8f7cp+4 : 0x1.99edf481adc29f24p-1024 : inexact-ok
+= erfc tonearest intel96 0x1.a8f7cp+4 : 0x1.99edf481adc29f26p-1024 : inexact-ok
+= erfc towardzero intel96 0x1.a8f7cp+4 : 0x1.99edf481adc29f24p-1024 : inexact-ok
+= erfc upward intel96 0x1.a8f7cp+4 : 0x1.99edf481adc29f26p-1024 : inexact-ok
+= erfc downward m68k96 0x1.a8f7cp+4 : 0x1.99edf481adc29f24p-1024 : inexact-ok
+= erfc tonearest m68k96 0x1.a8f7cp+4 : 0x1.99edf481adc29f26p-1024 : inexact-ok
+= erfc towardzero m68k96 0x1.a8f7cp+4 : 0x1.99edf481adc29f24p-1024 : inexact-ok
+= erfc upward m68k96 0x1.a8f7cp+4 : 0x1.99edf481adc29f26p-1024 : inexact-ok
+= erfc downward binary128 0x1.a8f7cp+4 : 0x1.99edf481adc29f25df2cafcf9bbfp-1024 : inexact-ok
+= erfc tonearest binary128 0x1.a8f7cp+4 : 0x1.99edf481adc29f25df2cafcf9bcp-1024 : inexact-ok
+= erfc towardzero binary128 0x1.a8f7cp+4 : 0x1.99edf481adc29f25df2cafcf9bbfp-1024 : inexact-ok
+= erfc upward binary128 0x1.a8f7cp+4 : 0x1.99edf481adc29f25df2cafcf9bcp-1024 : inexact-ok
+= erfc downward ibm128 0x1.a8f7cp+4 : 0x1.99edf481adc28p-1024 : inexact-ok underflow errno-erange-ok
+= erfc tonearest ibm128 0x1.a8f7cp+4 : 0x1.99edf481adc28p-1024 : inexact-ok underflow errno-erange-ok
+= erfc towardzero ibm128 0x1.a8f7cp+4 : 0x1.99edf481adc28p-1024 : inexact-ok underflow errno-erange-ok
+= erfc upward ibm128 0x1.a8f7cp+4 : 0x1.99edf481adc2cp-1024 : inexact-ok underflow errno-erange-ok
+= erfc downward binary32 0x1.a8f7bep+4 : 0x0p+0 : inexact-ok underflow errno-erange
+= erfc tonearest binary32 0x1.a8f7bep+4 : 0x0p+0 : inexact-ok underflow errno-erange
+= erfc towardzero binary32 0x1.a8f7bep+4 : 0x0p+0 : inexact-ok underflow errno-erange
+= erfc upward binary32 0x1.a8f7bep+4 : 0x8p-152 : inexact-ok underflow errno-erange-ok
+= erfc downward binary64 0x1.a8f7bep+4 : 0x1.99f8988d84cbcp-1024 : inexact-ok underflow errno-erange-ok
+= erfc tonearest binary64 0x1.a8f7bep+4 : 0x1.99f8988d84ccp-1024 : inexact-ok underflow errno-erange-ok
+= erfc towardzero binary64 0x1.a8f7bep+4 : 0x1.99f8988d84cbcp-1024 : inexact-ok underflow errno-erange-ok
+= erfc upward binary64 0x1.a8f7bep+4 : 0x1.99f8988d84ccp-1024 : inexact-ok underflow errno-erange-ok
+= erfc downward intel96 0x1.a8f7bep+4 : 0x1.99f8988d84cbe06ep-1024 : inexact-ok
+= erfc tonearest intel96 0x1.a8f7bep+4 : 0x1.99f8988d84cbe06ep-1024 : inexact-ok
+= erfc towardzero intel96 0x1.a8f7bep+4 : 0x1.99f8988d84cbe06ep-1024 : inexact-ok
+= erfc upward intel96 0x1.a8f7bep+4 : 0x1.99f8988d84cbe07p-1024 : inexact-ok
+= erfc downward m68k96 0x1.a8f7bep+4 : 0x1.99f8988d84cbe06ep-1024 : inexact-ok
+= erfc tonearest m68k96 0x1.a8f7bep+4 : 0x1.99f8988d84cbe06ep-1024 : inexact-ok
+= erfc towardzero m68k96 0x1.a8f7bep+4 : 0x1.99f8988d84cbe06ep-1024 : inexact-ok
+= erfc upward m68k96 0x1.a8f7bep+4 : 0x1.99f8988d84cbe07p-1024 : inexact-ok
+= erfc downward binary128 0x1.a8f7bep+4 : 0x1.99f8988d84cbe06e999bcdeda04dp-1024 : inexact-ok
+= erfc tonearest binary128 0x1.a8f7bep+4 : 0x1.99f8988d84cbe06e999bcdeda04ep-1024 : inexact-ok
+= erfc towardzero binary128 0x1.a8f7bep+4 : 0x1.99f8988d84cbe06e999bcdeda04dp-1024 : inexact-ok
+= erfc upward binary128 0x1.a8f7bep+4 : 0x1.99f8988d84cbe06e999bcdeda04ep-1024 : inexact-ok
+= erfc downward ibm128 0x1.a8f7bep+4 : 0x1.99f8988d84cbcp-1024 : inexact-ok underflow errno-erange-ok
+= erfc tonearest ibm128 0x1.a8f7bep+4 : 0x1.99f8988d84ccp-1024 : inexact-ok underflow errno-erange-ok
+= erfc towardzero ibm128 0x1.a8f7bep+4 : 0x1.99f8988d84cbcp-1024 : inexact-ok underflow errno-erange-ok
+= erfc upward ibm128 0x1.a8f7bep+4 : 0x1.99f8988d84ccp-1024 : inexact-ok underflow errno-erange-ok
+= erfc downward binary64 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f6568p-1024 : inexact-ok underflow errno-erange-ok
+= erfc tonearest binary64 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656cp-1024 : inexact-ok underflow errno-erange-ok
+= erfc towardzero binary64 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f6568p-1024 : inexact-ok underflow errno-erange-ok
+= erfc upward binary64 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656cp-1024 : inexact-ok underflow errno-erange-ok
+= erfc downward intel96 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a7fep-1024 : inexact-ok
+= erfc tonearest intel96 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a8p-1024 : inexact-ok
+= erfc towardzero intel96 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a7fep-1024 : inexact-ok
+= erfc upward intel96 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a8p-1024 : inexact-ok
+= erfc downward m68k96 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a7fep-1024 : inexact-ok
+= erfc tonearest m68k96 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a8p-1024 : inexact-ok
+= erfc towardzero m68k96 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a7fep-1024 : inexact-ok
+= erfc upward m68k96 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a8p-1024 : inexact-ok
+= erfc downward binary128 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a7ffffffffff2272p-1024 : inexact-ok
+= erfc tonearest binary128 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a7ffffffffff2273p-1024 : inexact-ok
+= erfc towardzero binary128 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a7ffffffffff2272p-1024 : inexact-ok
+= erfc upward binary128 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656a7ffffffffff2273p-1024 : inexact-ok
+= erfc downward ibm128 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f6568p-1024 : inexact-ok underflow errno-erange-ok
+= erfc tonearest ibm128 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656cp-1024 : inexact-ok underflow errno-erange-ok
+= erfc towardzero ibm128 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f6568p-1024 : inexact-ok underflow errno-erange-ok
+= erfc upward ibm128 0x1.a8f7bfbd15495p+4 : 0x1.99ef5883f656cp-1024 : inexact-ok underflow errno-erange-ok
erfc 0x1.3ffcbf39febb4p+0
= erfc downward binary32 0x1.3ffccp+0 : 0x1.3bd95ep-4 : inexact-ok
= erfc tonearest binary32 0x1.3ffccp+0 : 0x1.3bd96p-4 : inexact-ok