git: 6d04e1422e70 - main - cosl(): fix polynomial approximation coefficients for ld128 version

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Tue, 02 Nov 2021 09:07:02 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=6d04e1422e70ca0a77552782c01c291f90716773

commit 6d04e1422e70ca0a77552782c01c291f90716773
Author:     Steve Kargl <kargl@FreeBSD.org>
AuthorDate: 2021-11-02 08:54:10 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2021-11-02 08:54:10 +0000

    cosl(): fix polynomial approximation coefficients for ld128 version
    
    As mention previously, the minmax polynomial approximation
    in the kernel for cosl() seem to have a bad set of coefficients.
    
    In testing, cosl() in the interval [0.785, pi/4] for 1 million
    values and pi/4 written to 37 decimal digits.  The old version
    on an aarch64 system gave
    
    % tlibm/tlibm_lmath -l -x 0.78 -X
    7.85398163397448309615660845819875721e-1L cos
    Interval tested for cosl: [0.78,0.785398]
    count: 1000000
      xm =  7.80213913234863919029058821396125599e-01L
      libm =  7.10763080972549562455058499280609083e-01L
      mpfr =  7.10763080972549562455058499280608983e-01L
      ULP = 1.04431
    
    The max ULP exceeds 1, which is not good.  So, I rinsed off a 10
    year code and recomputed coefficients.  The new minmax polynomial
    now yields
    
    % tlibm/tlibm_lmath -l -x 0.78 -X
    7.85398163397448309615660845819875721e-1L cos
    Interval tested for cosl: [0.78,0.785398]
    count: 1000000
      xm =  7.82916198746768272588844890973704219e-01L
      libm =  7.08859615479571058183956453286628396e-01L
      mpfr =  7.08859615479571058183956453286628469e-01L
      ULP = 0.75407
    
    which is very good.
    
    PR:     218514
    MFC after:      1 week
---
 lib/msun/ld128/k_cosl.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/lib/msun/ld128/k_cosl.c b/lib/msun/ld128/k_cosl.c
index fe57773a1348..422357bf8a6c 100644
--- a/lib/msun/ld128/k_cosl.c
+++ b/lib/msun/ld128/k_cosl.c
@@ -21,8 +21,8 @@ __FBSDID("$FreeBSD$");
 #include "math_private.h"
 
 /*
- * Domain [-0.7854, 0.7854], range ~[-1.80e-37, 1.79e-37]:
- * |cos(x) - c(x))| < 2**-122.0
+ * Domain [-0.7854, 0.7854], range ~[-1.17e-39, 1.19e-39]:
+ * |cos(x) - c(x))| < 2**-129.3
  *
  * 113-bit precision requires more care than 64-bit precision, since
  * simple methods give a minimax polynomial with coefficient for x^2
@@ -31,21 +31,19 @@ __FBSDID("$FreeBSD$");
  */
 static const double
 one = 1.0;
-
 static const long double
-C1 =  0.04166666666666666666666666666666658424671L,
-C2 = -0.001388888888888888888888888888863490893732L,
-C3 =  0.00002480158730158730158730158600795304914210L,
-C4 = -0.2755731922398589065255474947078934284324e-6L,
-C5 =  0.2087675698786809897659225313136400793948e-8L,
-C6 = -0.1147074559772972315817149986812031204775e-10L,
-C7 =  0.4779477332386808976875457937252120293400e-13L;
-
-static const double
-C8 = -0.1561920696721507929516718307820958119868e-15,
-C9 =  0.4110317413744594971475941557607804508039e-18,
-C10 = -0.8896592467191938803288521958313920156409e-21,
-C11 =  0.1601061435794535138244346256065192782581e-23;
+C1 =  4.16666666666666666666666666666666667e-02L,
+C2 = -1.38888888888888888888888888888888834e-03L,
+C3 =  2.48015873015873015873015873015446795e-05L,
+C4 = -2.75573192239858906525573190949988493e-07L,
+C5 =  2.08767569878680989792098886701451072e-09L,
+C6 = -1.14707455977297247136657111139971865e-11L,
+C7 =  4.77947733238738518870113294139830239e-14L,
+C8 = -1.56192069685858079920640872925306403e-16L,
+C9 =  4.11031762320473354032038893429515732e-19L,
+C10= -8.89679121027589608738005163931958096e-22L,
+C11=  1.61171797801314301767074036661901531e-24L,
+C12= -2.46748624357670948912574279501044295e-27L;
 
 long double
 __kernel_cosl(long double x, long double y)
@@ -54,7 +52,7 @@ __kernel_cosl(long double x, long double y)
 
 	z  = x*x;
 	r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*(C6+z*(C7+
-	    z*(C8+z*(C9+z*(C10+z*C11))))))))));
+	    z*(C8+z*(C9+z*(C10+z*(C11+z*C12)))))))))));
 	hz = 0.5*z;
 	w  = one-hz;
 	return w + (((one-w)-hz) + (z*r-x*y));