Bit twiddling question
Steve Kargl
sgk at troutmask.apl.washington.edu
Thu Mar 23 20:54:57 UTC 2017
On Fri, Mar 10, 2017 at 05:46:34PM +1100, Bruce Evans wrote:
(A large chuck removed)
>
> C11 doesn't have sincos*() or sind*() to make us more incomplete.
>
I've had a sincos*() implementation for a few years now.
You've seen it, and were dissatisfied with it because I
combined sin[fl] and cos[fl] kernels into sincos[kl] kernels.
What follows are my current files for sinpif and cospif.
Exhaustive testing in the relevant interval of [0x1p-24,0.25]
of the kernels on my x86_64 system gave max ulp of 0.801 for
__kernel_sinpif() and 0.882 for __kernel_cospif(). Note,
everything is done with float types. If I accumulate the
Horner expression as a double, I get 0.51 or so. A shar file follows.
# This is a shell archive. Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file". Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
# src/k_cospif.c
# src/k_sinpif.c
# src/s_cospif.c
# src/s_sinpif.c
#
echo x - src/k_cospif.c
sed 's/^X//' >src/k_cospif.c << 'a0a85cc5a2741f3d5b51db36b4fa447e'
X/*
X * Compute cospi(x) = cos(pi*x) on the interval [0,1/4]. The
X * tested interval is [0x1p-24,0.25].
X */
X
X#define WANT_FMA 0
X
Xstatic inline float
X__kernel_cospif(float x)
X{
X static const float
X c0 = 1.00000000e+00f, /* 0x3fc00000 */
X c1hi = -4.93480206e+00f, /* 0xc09de9e6 */
X c1lo = -1.42206758e-07f, /* 0xb418b17f */
X c2 = 4.05871058e+00f, /* 0x4081e0f5 */
X c3 = -1.33513880e+00f, /* 0xbfaae5d4 */
X c4 = 2.32135549e-01f; /* 0x3e6db4f1 */
X float c, chi, clo, x2, x2hi, x2lo;
X uint32_t ix;
X
X#if WANT_FMA
X /**
X * MAX ULP: 0.89000183
X * Total tested: 184549377
X * 0.8 < ULP <= 0.9: 1601
X * 0.7 < ULP <= 0.8: 33958
X * 0.6 < ULP <= 0.7: 164301
X */
X x2 = x * x;
X c = c1lo + (c2 + (c3 + c4 * x2) * x2) * x2 + c1hi;
X c = fmaf(c, x2, c0);
X#else
X /**
X * MAX ULP: 0.88242111
X * Total tested: 184549377
X * 0.8 < ULP <= 0.9: 1625
X * 0.7 < ULP <= 0.8: 33900
X * 0.6 < ULP <= 0.7: 164698
X */
X x2 = x * x;
X c = (c2 + (c3 + c4 * x2) * x2) * x2 + c1hi;
X
X GET_FLOAT_WORD(ix, x2);
X if (ix < 0x3b800000)
X return (c0 + c * x2);
X
X SET_FLOAT_WORD(x2hi, (ix >> 14) << 14);
X x2lo = x2 - x2hi;
X GET_FLOAT_WORD(ix, c);
X SET_FLOAT_WORD(chi, (ix >> 14) << 14);
X clo = c - chi + c1lo;
X c = clo * x2lo + chi * x2lo + clo * x2hi + (chi * x2hi + c0);
X#endif
X return (c);
X}
a0a85cc5a2741f3d5b51db36b4fa447e
echo x - src/k_sinpif.c
sed 's/^X//' >src/k_sinpif.c << '6cba433b675b3984a1bef6bd3c304696'
X/**
X * Compute sinpi(x) = sin(pi*x) in the interval [0x1p-24,0.25].
X * The polynomial approximation in a Horner form is
X *
X * sinpi(x) = x * (x2 * (x2 * (x2 * (x2 * s4 + s3) + s2) + s1) + s0).
X *
X * with x2 = x * x. To avoid rounding problems, the multiplication
X * by x, it is split into low and high parts. Low an high parts are
X * accumulated separately, and the addition of s0 requires special
X * handling.
X *
X * Exhaustive testing in the interval gave
X * MAX ULP: 0.80103670
X * Total tested: 184549377
X * 0.8 < ULP <= 0.9: 1
X * 0.7 < ULP <= 0.8: 727
X * 0.6 < ULP <= 0.7: 19184
X * where the test function was sin(M_PI*(double)x).
X */
X
Xstatic inline float
X__kernel_sinpif(float x)
X{
X static const float
X s0hi = 3.14062500e+00f, /* 0x40490000 */
X s0md = 9.67653585e-04f, /* 0x3a7daa22 */
X s0lo = -8.19819820e-12f, /* 0xad103967 */
X s1 = -5.16771269e+00f, /* 0xc0a55de7 */
X s2 = 2.55016255e+00f, /* 0x402335dd */
X s3 = -5.99202454e-01f, /* 0xbf196555 */
X s4 = 8.10045525e-02f; /* 0x3da5e5b7 */
X
X float hi, lo, sm, x2, xhi, xlo;
X uint32_t ix;
X
X GET_FLOAT_WORD(ix, x);
X SET_FLOAT_WORD(xhi, (ix >> 14) << 14);
X xlo = x - xhi;
X
X x2 = x * x;
X sm = x2 * (x2 * (x2 * s4 + s3) + s2) + s1;
X lo = xlo * (x + xhi) * sm;
X hi = xhi * xhi * sm;
X lo = x * (lo + s0lo) + xlo * (s0hi + s0md) + x * hi;
X
X return (lo + xhi * s0md + xhi * s0hi);
X}
6cba433b675b3984a1bef6bd3c304696
echo x - src/s_cospif.c
sed 's/^X//' >src/s_cospif.c << 'f181e790e58fd6e440c5d458a5ecb2a7'
X/*
X * cospif(+-0) = 1.
X * cospi(n + 1/2) = +0, for integers n.
X * cospif(+-inf) = nan. Raises the "invalid" floating-point exception.
X * cospif(nan) = nan. Raises the "invalid" floating-point exception.
X */
X
X#define EBUG 1
X
X#include "math.h"
X#include "math_private.h"
X#include "k_cospif.c"
X#include "k_sinpif.c"
X
Xstatic inline float
X__compute_cospif(float x)
X{
X
X if (x <= 0.5f) {
X if (x <= 0.25f)
X return (__kernel_cospif(x));
X return (__kernel_sinpif(0.5f - x));
X } else {
X if (x <= 0.75f)
X return (-__kernel_sinpif(x - 0.5f));
X return (-__kernel_cospif(1 - x));
X }
X}
X
Xfloat
Xcospif(float x)
X{
X static const float huge = 1e30;
X float ax;
X uint32_t ix, j0;
X
X GET_FLOAT_WORD(ix, x);
X ix = ix & 0x7fffffff;
X SET_FLOAT_WORD(ax, ix);
X
X if (ix < 0x3f800000) { /* |x| < 1 */
X if (ix < 0x33800000) { /* |x| < 0x1p-24 */
X if (huge * ax == 0) /* Raise inexact iff != 0. */
X return (1);
X }
X return (__compute_cospif(ax));
X }
X
X if (ix < 0x4b000000) { /* 1 <= |x| < 0x1p23 */
X /* Determine integer part of ax. */
X j0 = ((ix >> 23) & 0xff) - 0x7f;
X ix &= ~(0x007fffff >> j0);
X SET_FLOAT_WORD(x, ix);
X
X ax -= x;
X if (ax == 0.5f)
X return (0);
X
X ix = (uint32_t)x;
X ax = ax == 0.f ? 1 : __compute_cospif(ax);
X return (ix & 1 ? -ax : ax);
X }
X
X /* x = +-inf or nan. */
X if (ix >= 0x7f800000)
X return (x - x);
X
X /*
X * |x| >= 0x1p23 is always an even integer, so return 1.
X * FIXME: should this raise FE_INEXACT or FE_INVALID.
X */
X return (1);
X}
f181e790e58fd6e440c5d458a5ecb2a7
echo x - src/s_sinpif.c
sed 's/^X//' >src/s_sinpif.c << '9ab66a9a28a64f54241e346b6de77999'
X/*
X * sinpif(+-0) = +-0
X * sinpi(+-n) = +-0, for positive integers n.
X * sinpif(+-inf) = nan. Raises the "invalid" floating-point exception.
X * sinpif(nan) = nan. Raises the "invalid" floating-point exception.
X */
X
X#define EBUG 1
X
X#include "math.h"
X#include "math_private.h"
X#include "k_cospif.c"
X#include "k_sinpif.c"
X
Xstatic inline float
X__compute_sinpif(float x)
X{
X
X if (x <= 0.5f) {
X if (x <= 0.25f)
X return (__kernel_sinpif(x));
X return (__kernel_cospif(0.5f - x));
X } else {
X if (x <= 0.75f)
X return (__kernel_cospif(x - 0.5f));
X return (__kernel_sinpif(1 - x));
X }
X}
X
Xfloat
Xsinpif(float x)
X{
X float ax;
X uint32_t hx, ix, j0;
X
X GET_FLOAT_WORD(hx, x);
X ix = hx & 0x7fffffff;
X SET_FLOAT_WORD(ax, ix);
X
X if (ix < 0x3f800000) { /* |x| < 1 */
X if (ix < 0x33800000) { /* |x| < 0x1p-24 */
X if (x == 0)
X return (x);
X return (M_PI * x);
X }
X ax = __compute_sinpif(ax);
X return ((hx & 0x80000000) ? -ax : ax);
X }
X
X if (ix < 0x4b000000) { /* 1 <= |x| < 0x1p23 */
X /* Determine integer part of ax. */
X j0 = ((ix >> 23) & 0xff) - 0x7f;
X ix &= ~(0x007fffff >> j0);
X SET_FLOAT_WORD(x, ix);
X
X ax -= x;
X if (ax == 0) {
X ax = 0;
X } else {
X ix = (uint32_t)x;
X ax = __compute_sinpif(ax);
X if (ix & 1) ax = -ax;
X }
X return ((hx & 0x80000000) ? -ax : ax);
X }
X
X /* x = +-inf or nan. */
X if (ix >= 0x7f800000)
X return (x - x);
X
X /*
X * |x| >= 0x1p23 is always an integer, so return +-0.
X * FIXME: should this raise FE_INEXACT or FE_INVALID.
X */
X return (copysignf(0, x));
X}
9ab66a9a28a64f54241e346b6de77999
exit
--
Steve
More information about the freebsd-numerics
mailing list