math: Remove ldbl-96 fma implementation

It is worse than the ldbl-64 version on recent x86 hardware. With Zen3 and gcc-15: ldbl-96 removal reciprocal-throughput master patched improvement x86_64 1176.2200 289.4640 4.06x i686 1476.0600 636.8660 2.32x latency master patched improvement x86_64 1176.2200 293.7360 4.00x i686 1480.0700 658.4160 2.25x Checked on x86_64-linux-gnu and i686-linux-gnu. Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
author: Adhemerval Zanella <adhemerval.zanella@linaro.org> 2025-11-13 09:58:19 -0300
committer: Adhemerval Zanella <adhemerval.zanella@linaro.org> 2025-11-21 13:13:02 -0300
commit: 42f07a44ef88e0c9ff1bd5343786013272467414 (patch)
tree: 88092c5856d171875734a7afc3c50b675942a654 /sysdeps
parent: 8ce2b5dd272342e2affe7f2245abb43c1398ebae (diff)
2 files changed, 1 insertions, 107 deletions
diff --git a/sysdeps/i386/i686/multiarch/s_fma.c b/sysdeps/i386/i686/multiarch/s_fma.c
index 70d0afe529..6dadb9439f 100644
--- a/sysdeps/i386/i686/multiarch/s_fma.c
+++ b/sysdeps/i386/i686/multiarch/s_fma.c
@@ -38,4 +38,4 @@ libm_alias_double_narrow (__fma, fma)
 
 #define __fma __fma_ia32
 
-#include <sysdeps/ieee754/ldbl-96/s_fma.c>
+#include <sysdeps/ieee754/dbl-64/s_fma.c>
diff --git a/sysdeps/ieee754/ldbl-96/s_fma.c b/sysdeps/ieee754/ldbl-96/s_fma.c
deleted file mode 100644
index a9abf8f1de..0000000000
--- a/sysdeps/ieee754/ldbl-96/s_fma.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/* Compute x * y + z as ternary operation.
-   Copyright (C) 2010-2025 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#define NO_MATH_REDIRECT
-#include <float.h>
-#define dfmal __hide_dfmal
-#define f32xfmaf64 __hide_f32xfmaf64
-#include <math.h>
-#undef dfmal
-#undef f32xfmaf64
-#include <fenv.h>
-#include <ieee754.h>
-#include <math-barriers.h>
-#include <libm-alias-double.h>
-#include <math-narrow-alias.h>
-
-/* This implementation uses rounding to odd to avoid problems with
-   double rounding.  See a paper by Boldo and Melquiond:
-   http://www.lri.fr/~melquion/doc/08-tc.pdf  */
-
-double
-__fma (double x, double y, double z)
-{
-  if (__glibc_unlikely (!isfinite (x) || !isfinite (y)))
-    return x * y + z;
-  else if (__glibc_unlikely (!isfinite (z)))
-    /* If z is Inf, but x and y are finite, the result should be z
-       rather than NaN.  */
-    return (z + x) + y;
-
-  /* Ensure correct sign of exact 0 + 0.  */
-  if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
-    {
-      x = math_opt_barrier (x);
-      return x * y + z;
-    }
-
-  fenv_t env;
-  __feholdexcept (&env);
-  __fesetround (FE_TONEAREST);
-
-  /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
-#define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1)
-  long double x1 = (long double) x * C;
-  long double y1 = (long double) y * C;
-  long double m1 = (long double) x * y;
-  x1 = (x - x1) + x1;
-  y1 = (y - y1) + y1;
-  long double x2 = x - x1;
-  long double y2 = y - y1;
-  long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
-
-  /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
-  long double a1 = z + m1;
-  long double t1 = a1 - z;
-  long double t2 = a1 - t1;
-  t1 = m1 - t1;
-  t2 = z - t2;
-  long double a2 = t1 + t2;
-  /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
-  math_force_eval (m2);
-  math_force_eval (a2);
-  __feclearexcept (FE_INEXACT);
-
-  /* If the result is an exact zero, ensure it has the correct sign.  */
-  if (a1 == 0 && m2 == 0)
-    {
-      __feupdateenv (&env);
-      /* Ensure that round-to-nearest value of z + m1 is not reused.  */
-      z = math_opt_barrier (z);
-      return z + m1;
-    }
-
-  __fesetround (FE_TOWARDZERO);
-  /* Perform m2 + a2 addition with round to odd.  */
-  a2 = a2 + m2;
-
-  /* Add that to a1 again using rounding to odd.  */
-  union ieee854_long_double u;
-  u.d = a1 + a2;
-  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
-    u.ieee.mantissa1 |= __fetestexcept (FE_INEXACT) != 0;
-  __feupdateenv (&env);
-
-  /* Add finally round to double precision.  */
-  return u.d;
-}
-#ifndef __fma
-libm_alias_double (__fma, fma)
-libm_alias_double_narrow (__fma, fma)
-#endif
author	Adhemerval Zanella <adhemerval.zanella@linaro.org>	2025-11-13 09:58:19 -0300
committer	Adhemerval Zanella <adhemerval.zanella@linaro.org>	2025-11-21 13:13:02 -0300
commit	42f07a44ef88e0c9ff1bd5343786013272467414 (patch)
tree	88092c5856d171875734a7afc3c50b675942a654 /sysdeps
parent	8ce2b5dd272342e2affe7f2245abb43c1398ebae (diff)