git: 33c82f11c267 - stable/14 - Improve accuracy of asinf(3) and acosf(3)

From: Dimitry Andric <dim_at_FreeBSD.org>
Date: Sun, 08 Sep 2024 07:54:19 UTC
The branch stable/14 has been updated by dim:

URL: https://cgit.FreeBSD.org/src/commit/?id=33c82f11c2674beb50304b14248ff46372def520

commit 33c82f11c2674beb50304b14248ff46372def520
Author:     Steve Kargl <kargl@FreeBSD.org>
AuthorDate: 2024-08-29 19:44:48 +0000
Commit:     Dimitry Andric <dim@FreeBSD.org>
CommitDate: 2024-09-08 07:37:52 +0000

    Improve accuracy of asinf(3) and acosf(3)
    
    This uses a better rational approximation to improve the accuracy of
    both functions. For exhaustive testing of asinf(3) in the interval, the
    current libm gives:
    
        % ./tlibm asin -fPED -x 0x1p-12f -X 1
        Interval tested for asinf: [0.000244141,1]
               ulp <= 0.5:  97.916% 98564994 |  97.916% 98564994
        0.5 <  ulp <  0.6:  2.038% 2051023 |  99.953% 100616017
        0.6 <  ulp <  0.7:  0.047%   47254 | 100.000% 100663271
        0.7 <  ulp <  0.8:  0.000%      25 | 100.000% 100663296
        Max ulp: 0.729891 at 5.00732839e-01
    
    which isn't too bad given that much of the computation is actually done
    in double floating point.
    
    With the new rational approximation, exhaustive testing yields:
    
        % ./tlibm asin -fPED -x 0x1p-12f -X 1
        Interval tested for asinf: [0.000244141,1]
               ulp <= 0.5:  99.711% 100372643 |  99.711% 100372643
        0.5 <  ulp <  0.6:  0.288%  290357 | 100.000% 100663000
        0.6 <  ulp <  0.7:  0.000%     296 | 100.000% 100663296
        Max ulp: 0.636344 at 5.09706438e-01
    
    Similarly, for exhaustive testing of asinf(3) in the interval, the
    current libm gives:
    
        % ./tlibm acos -fPED -x -1 -X -0x1p-12f
        Interval tested for acosf: [-1,-0.000244141]
               ulp <= 0.5:  97.008% 97651921 |  97.008% 97651921
        0.5 <  ulp <  0.6:   2.441%  2457242 |  99.450% 100109163
        0.6 <  ulp <  0.7:   0.472%   475503 |  99.922% 100584666
        0.7 <  ulp <  0.8:   0.071%    71309 |  99.993% 100655975
        0.8 <  ulp <  0.9:   0.007%     7319 | 100.000% 100663294
        0.9 <  ulp <  1.0:   0.000%        2 | 100.000% 100663296
        Max ulp: 0.914007 at -5.01484931e-01
    
        % ./tlibm acos -fPED -x 0x1p-12f -X 1
        Interval tested for acosf: [0.000244141,1]
               ulp <= 0.5:  97.317% 97962530 |  97.317% 97962530
        0.5 <  ulp <  0.6:   2.340%  2355182 |  99.657% 100317712
        0.6 <  ulp <  0.7:   0.314%   316134 |  99.971% 100633846
        0.7 <  ulp <  0.8:   0.029%    29450 | 100.000% 100663296
        Max ulp: 0.796035 at 4.99814630e-01
    
    With the new rational approximation, exhaustive testing yields:
    
        % ./tlibm acos -fPED -x -1 -X -0x1p-12f
        Interval tested for acosf: [-1,-0.000244141]
               ulp <= 0.5:  97.010% 97653245 |  97.010% 97653245
        0.5 <  ulp <  0.6:   2.442%  2458373 |  99.452% 100111618
        0.6 <  ulp <  0.7:   0.473%   476012 |  99.925% 100587630
        0.7 <  ulp <  0.8:   0.068%    68603 |  99.993% 100656233
        0.8 <  ulp <  0.9:   0.007%     7063 | 100.000% 100663296
        Max ulp: 0.896189 at -5.04511118e-01
    
        % ./tlibm acos -fPED -x 0x1p-12f -X 1
        Interval tested for acosf: [0.000244141,1]
               ulp <= 0.5:  97.650% 98298175 |  97.650% 98298175
        0.5 <  ulp <  0.6:   2.028%  2041709 |  99.679% 100339884
        0.6 <  ulp <  0.7:   0.292%   293555 |  99.970% 100633439
        0.7 <  ulp <  0.8:   0.030%    29857 | 100.000% 100663296
        Max ulp: 0.775875 at 4.91849005e-01
    
    PR:             281001
    MFC after:      1 week
    
    (cherry picked from commit 41e016289f77deb88b0ef1ec3f7b2ab3515ac7c8)
---
 lib/msun/src/e_acosf.c | 20 +++++++++++++-------
 lib/msun/src/e_asinf.c | 22 ++++++++++++++--------
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/lib/msun/src/e_acosf.c b/lib/msun/src/e_acosf.c
index 42ba126d1baa..ede552e7055a 100644
--- a/lib/msun/src/e_acosf.c
+++ b/lib/msun/src/e_acosf.c
@@ -22,11 +22,17 @@ pi =  3.1415925026e+00, /* 0x40490fda */
 pio2_hi =  1.5707962513e+00; /* 0x3fc90fda */
 static volatile float
 pio2_lo =  7.5497894159e-08; /* 0x33a22168 */
+
+/*
+ * The coefficients for the rational approximation were generated over
+ *  0x1p-12f <= x <= 0.5f.  The maximum error satisfies log2(e) < -30.084.
+ */
 static const float
-pS0 =  1.6666586697e-01,
-pS1 = -4.2743422091e-02,
-pS2 = -8.6563630030e-03,
-qS1 = -7.0662963390e-01;
+pS0 =  1.66666672e-01f, /* 0x3e2aaaab */
+pS1 = -1.19510300e-01f, /* 0xbdf4c1d1 */
+pS2 =  5.47002675e-03f, /* 0x3bb33de9 */
+qS1 = -1.16706085e+00f, /* 0xbf956240 */
+qS2 =  2.90115148e-01f; /* 0x3e9489f9 */
 
 float
 acosf(float x)
@@ -46,13 +52,13 @@ acosf(float x)
 	    if(ix<=0x32800000) return pio2_hi+pio2_lo;/*if|x|<2**-26*/
 	    z = x*x;
 	    p = z*(pS0+z*(pS1+z*pS2));
-	    q = one+z*qS1;
+	    q = one+z*(qS1+z*qS2);
 	    r = p/q;
 	    return pio2_hi - (x - (pio2_lo-x*r));
 	} else  if (hx<0) {		/* x < -0.5 */
 	    z = (one+x)*(float)0.5;
 	    p = z*(pS0+z*(pS1+z*pS2));
-	    q = one+z*qS1;
+	    q = one+z*(qS1+z*qS2);
 	    s = sqrtf(z);
 	    r = p/q;
 	    w = r*s-pio2_lo;
@@ -66,7 +72,7 @@ acosf(float x)
 	    SET_FLOAT_WORD(df,idf&0xfffff000);
 	    c  = (z-df*df)/(s+df);
 	    p = z*(pS0+z*(pS1+z*pS2));
-	    q = one+z*qS1;
+	    q = one+z*(qS1+z*qS2);
 	    r = p/q;
 	    w = r*s+c;
 	    return (float)2.0*(df+w);
diff --git a/lib/msun/src/e_asinf.c b/lib/msun/src/e_asinf.c
index a2ee1a166f03..8d1aca27df83 100644
--- a/lib/msun/src/e_asinf.c
+++ b/lib/msun/src/e_asinf.c
@@ -18,12 +18,18 @@
 
 static const float
 one =  1.0000000000e+00, /* 0x3F800000 */
-huge =  1.000e+30,
-	/* coefficient for R(x^2) */
-pS0 =  1.6666586697e-01,
-pS1 = -4.2743422091e-02,
-pS2 = -8.6563630030e-03,
-qS1 = -7.0662963390e-01;
+huge =  1.000e+30;
+
+/*
+ * The coefficients for the rational approximation were generated over
+ *  0x1p-12f <= x <= 0.5f.  The maximum error satisfies log2(e) < -30.084.
+ */
+static const float
+pS0 =  1.66666672e-01f, /* 0x3e2aaaab */
+pS1 = -1.19510300e-01f, /* 0xbdf4c1d1 */
+pS2 =  5.47002675e-03f, /* 0x3bb33de9 */
+qS1 = -1.16706085e+00f, /* 0xbf956240 */
+qS2 =  2.90115148e-01f; /* 0x3e9489f9 */
 
 static const double
 pio2 =  1.570796326794896558e+00;
@@ -46,7 +52,7 @@ asinf(float x)
 	    }
 	    t = x*x;
 	    p = t*(pS0+t*(pS1+t*pS2));
-	    q = one+t*qS1;
+	    q = one+t*(qS1+t*qS2);
 	    w = p/q;
 	    return x+x*w;
 	}
@@ -54,7 +60,7 @@ asinf(float x)
 	w = one-fabsf(x);
 	t = w*(float)0.5;
 	p = t*(pS0+t*(pS1+t*pS2));
-	q = one+t*qS1;
+	q = one+t*(qS1+t*qS2);
 	s = sqrt(t);
 	w = p/q;
 	t = pio2-2.0*(s+s*w);