summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2025-11-13 09:58:19 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2025-11-21 13:13:02 -0300
commit42f07a44ef88e0c9ff1bd5343786013272467414 (patch)
tree88092c5856d171875734a7afc3c50b675942a654
parent8ce2b5dd272342e2affe7f2245abb43c1398ebae (diff)
math: Remove ldbl-96 fma implementation
It is worse than the ldbl-64 version on recent x86 hardware. With Zen3 and gcc-15: ldbl-96 removal reciprocal-throughput master patched improvement x86_64 1176.2200 289.4640 4.06x i686 1476.0600 636.8660 2.32x latency master patched improvement x86_64 1176.2200 293.7360 4.00x i686 1480.0700 658.4160 2.25x Checked on x86_64-linux-gnu and i686-linux-gnu. Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
-rw-r--r--sysdeps/i386/i686/multiarch/s_fma.c2
-rw-r--r--sysdeps/ieee754/ldbl-96/s_fma.c106
2 files changed, 1 insertions, 107 deletions
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c
index 70d0afe529..6dadb9439f 100644
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ b/sysdeps/i386/i686/multiarch/s_fma.c
@@ -38,4 +38,4 @@ libm_alias_double_narrow (__fma, fma)
#define __fma __fma_ia32
-#include <sysdeps/ieee754/ldbl-96/s_fma.c>
+#include <sysdeps/ieee754/dbl-64/s_fma.c>
diff --git a/sysdeps/ieee754/ldbl-96/s_fma.c b/sysdeps/ieee754/ldbl-96/s_fma.c
deleted file mode 100644
index a9abf8f1de..0000000000
--- a/sysdeps/ieee754/ldbl-96/s_fma.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/* Compute x * y + z as ternary operation.
- Copyright (C) 2010-2025 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#define NO_MATH_REDIRECT
-#include <float.h>
-#define dfmal __hide_dfmal
-#define f32xfmaf64 __hide_f32xfmaf64
-#include <math.h>
-#undef dfmal
-#undef f32xfmaf64
-#include <fenv.h>
-#include <ieee754.h>
-#include <math-barriers.h>
-#include <libm-alias-double.h>
-#include <math-narrow-alias.h>
-
-/* This implementation uses rounding to odd to avoid problems with
- double rounding. See a paper by Boldo and Melquiond:
- http://www.lri.fr/~melquion/doc/08-tc.pdf */
-
-double
-__fma (double x, double y, double z)
-{
- if (__glibc_unlikely (!isfinite (x) || !isfinite (y)))
- return x * y + z;
- else if (__glibc_unlikely (!isfinite (z)))
- /* If z is Inf, but x and y are finite, the result should be z
- rather than NaN. */
- return (z + x) + y;
-
- /* Ensure correct sign of exact 0 + 0. */
- if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
- {
- x = math_opt_barrier (x);
- return x * y + z;
- }
-
- fenv_t env;
- __feholdexcept (&env);
- __fesetround (FE_TONEAREST);
-
- /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
-#define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1)
- long double x1 = (long double) x * C;
- long double y1 = (long double) y * C;
- long double m1 = (long double) x * y;
- x1 = (x - x1) + x1;
- y1 = (y - y1) + y1;
- long double x2 = x - x1;
- long double y2 = y - y1;
- long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
-
- /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
- long double a1 = z + m1;
- long double t1 = a1 - z;
- long double t2 = a1 - t1;
- t1 = m1 - t1;
- t2 = z - t2;
- long double a2 = t1 + t2;
- /* Ensure the arithmetic is not scheduled after feclearexcept call. */
- math_force_eval (m2);
- math_force_eval (a2);
- __feclearexcept (FE_INEXACT);
-
- /* If the result is an exact zero, ensure it has the correct sign. */
- if (a1 == 0 && m2 == 0)
- {
- __feupdateenv (&env);
- /* Ensure that round-to-nearest value of z + m1 is not reused. */
- z = math_opt_barrier (z);
- return z + m1;
- }
-
- __fesetround (FE_TOWARDZERO);
- /* Perform m2 + a2 addition with round to odd. */
- a2 = a2 + m2;
-
- /* Add that to a1 again using rounding to odd. */
- union ieee854_long_double u;
- u.d = a1 + a2;
- if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
- u.ieee.mantissa1 |= __fetestexcept (FE_INEXACT) != 0;
- __feupdateenv (&env);
-
- /* Add finally round to double precision. */
- return u.d;
-}
-#ifndef __fma
-libm_alias_double (__fma, fma)
-libm_alias_double_narrow (__fma, fma)
-#endif