Bit twiddling question

Thu Mar 23 20:54:57 UTC 2017

On Fri, Mar 10, 2017 at 05:46:34PM +1100, Bruce Evans wrote:

(A large chuck removed)

> 
> C11 doesn't have sincos*() or sind*() to make us more incomplete.
> 

I've had a sincos*() implementation for a few years now.  
You've seen it, and were dissatisfied with it because I
combined sin[fl] and cos[fl] kernels into sincos[kl] kernels. 

What follows are my current files for sinpif and cospif.
Exhaustive testing in the relevant interval of [0x1p-24,0.25]
of the kernels on my x86_64 system gave max ulp of 0.801 for
__kernel_sinpif() and 0.882 for __kernel_cospif().  Note,
everything is done with float types.  If I accumulate the
Horner expression as a double, I get 0.51 or so.  A shar file follows.

# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
#	src/k_cospif.c
#	src/k_sinpif.c
#	src/s_cospif.c
#	src/s_sinpif.c
#
echo x - src/k_cospif.c
sed 's/^X//' >src/k_cospif.c << 'a0a85cc5a2741f3d5b51db36b4fa447e'
X/*
X * Compute cospi(x) = cos(pi*x) on the  interval [0,1/4].  The
X * tested interval is [0x1p-24,0.25].
X */
X
X#define WANT_FMA 0
X
Xstatic inline float
X__kernel_cospif(float x)
X{
X	static const float
X	    c0   =  1.00000000e+00f,	/* 0x3fc00000 */
X	    c1hi = -4.93480206e+00f,	/* 0xc09de9e6 */
X	    c1lo = -1.42206758e-07f,	/* 0xb418b17f */
X	    c2   =  4.05871058e+00f,	/* 0x4081e0f5 */
X	    c3   = -1.33513880e+00f,	/* 0xbfaae5d4 */
X	    c4   =  2.32135549e-01f;	/* 0x3e6db4f1 */
X  	float c, chi, clo, x2, x2hi, x2lo;
X	uint32_t ix;
X
X#if WANT_FMA
X	/**
X	 *          MAX ULP: 0.89000183
X	 *     Total tested: 184549377
X	 * 0.8 < ULP <= 0.9: 1601
X	 * 0.7 < ULP <= 0.8: 33958
X	 * 0.6 < ULP <= 0.7: 164301
X	 */
X	x2 = x * x;
X	c = c1lo + (c2 + (c3 + c4 * x2) * x2) * x2 + c1hi;
X	c = fmaf(c, x2, c0);
X#else
X	/**
X	 * MAX ULP: 0.88242111
X	 *     Total tested: 184549377
X	 * 0.8 < ULP <= 0.9: 1625
X	 * 0.7 < ULP <= 0.8: 33900
X	 * 0.6 < ULP <= 0.7: 164698
X	 */
X	x2 = x * x;
X	c = (c2 + (c3 + c4 * x2) * x2) * x2 + c1hi;
X
X	GET_FLOAT_WORD(ix, x2);
X	if (ix < 0x3b800000)
X		 return (c0 + c * x2);
X	
X	SET_FLOAT_WORD(x2hi, (ix >> 14) << 14);
X	x2lo = x2 - x2hi;
X	GET_FLOAT_WORD(ix, c);
X	SET_FLOAT_WORD(chi, (ix >> 14) << 14);
X	clo = c - chi + c1lo;
X	c = clo * x2lo + chi * x2lo + clo * x2hi + (chi * x2hi + c0);
X#endif
X	return (c);
X}
a0a85cc5a2741f3d5b51db36b4fa447e
echo x - src/k_sinpif.c
sed 's/^X//' >src/k_sinpif.c << '6cba433b675b3984a1bef6bd3c304696'
X/**
X * Compute sinpi(x) = sin(pi*x) in the interval [0x1p-24,0.25].
X * The polynomial approximation in a Horner form is
X *
X * sinpi(x) = x * (x2 * (x2 * (x2 * (x2 * s4 + s3) + s2) + s1) + s0).
X *
X * with x2 = x * x.  To avoid rounding problems, the multiplication
X * by x, it is split into low and high parts.  Low an high parts are
X * accumulated separately, and the addition of s0 requires special
X * handling.
X *
X * Exhaustive testing in the interval gave
X *          MAX ULP: 0.80103670
X *     Total tested: 184549377
X * 0.8 < ULP <= 0.9: 1
X * 0.7 < ULP <= 0.8: 727
X * 0.6 < ULP <= 0.7: 19184
X * where the test function was sin(M_PI*(double)x).
X */
X
Xstatic inline float
X__kernel_sinpif(float x)
X{
X	static const float
X	    s0hi =  3.14062500e+00f,	/* 0x40490000 */
X	    s0md =  9.67653585e-04f,	/* 0x3a7daa22 */
X	    s0lo = -8.19819820e-12f,	/* 0xad103967 */
X	    s1   = -5.16771269e+00f,	/* 0xc0a55de7 */
X	    s2   =  2.55016255e+00f,	/* 0x402335dd */
X	    s3   = -5.99202454e-01f,	/* 0xbf196555 */
X	    s4   =  8.10045525e-02f;	/* 0x3da5e5b7 */
X
X	float hi, lo, sm, x2, xhi, xlo;
X	uint32_t ix;
X
X	GET_FLOAT_WORD(ix, x);
X	SET_FLOAT_WORD(xhi, (ix >> 14) << 14);
X	xlo = x - xhi;
X
X	x2 = x * x;
X	sm = x2 * (x2 * (x2 * s4 + s3) + s2) + s1;
X	lo = xlo * (x + xhi) * sm;
X	hi = xhi * xhi * sm;
X	lo = x * (lo + s0lo) + xlo * (s0hi + s0md) + x * hi;
X
X	return (lo + xhi * s0md + xhi * s0hi);
X}
6cba433b675b3984a1bef6bd3c304696
echo x - src/s_cospif.c
sed 's/^X//' >src/s_cospif.c << 'f181e790e58fd6e440c5d458a5ecb2a7'
X/*
X * cospif(+-0) = 1.
X * cospi(n + 1/2) = +0, for integers n.
X * cospif(+-inf) = nan.  Raises the "invalid" floating-point exception.
X * cospif(nan) = nan.    Raises the "invalid" floating-point exception.
X */
X
X#define EBUG 1
X
X#include "math.h"
X#include "math_private.h"
X#include "k_cospif.c"
X#include "k_sinpif.c"
X
Xstatic inline float
X__compute_cospif(float x)
X{
X
X	if (x <= 0.5f) {
X		if (x <= 0.25f)
X			return (__kernel_cospif(x));
X		return (__kernel_sinpif(0.5f - x));
X	} else {
X		if (x <= 0.75f)
X			return (-__kernel_sinpif(x - 0.5f));
X		return (-__kernel_cospif(1 - x));
X	}
X}
X
Xfloat
Xcospif(float x)
X{
X	static const float huge = 1e30; 
X	float ax;
X	uint32_t ix, j0;
X
X	GET_FLOAT_WORD(ix, x);
X	ix = ix & 0x7fffffff;
X	SET_FLOAT_WORD(ax, ix);
X
X	if (ix < 0x3f800000) {			/* |x| < 1 */
X		if (ix < 0x33800000) {		/* |x| < 0x1p-24 */
X			if (huge * ax == 0)	/* Raise inexact iff != 0. */
X				return (1);
X		}
X		return (__compute_cospif(ax));
X	}
X
X	if (ix < 0x4b000000) {			/* 1 <= |x| < 0x1p23 */
X		/* Determine integer part of ax. */
X		j0 = ((ix >> 23) & 0xff) - 0x7f;
X		ix &= ~(0x007fffff >> j0);
X		SET_FLOAT_WORD(x, ix);
X
X		ax -= x;
X		if (ax == 0.5f)
X			return (0);
X
X		ix = (uint32_t)x;
X		ax = ax == 0.f ? 1 : __compute_cospif(ax);
X		return (ix & 1 ? -ax : ax);
X	}
X
X	/* x = +-inf or nan. */
X	if (ix >= 0x7f800000)
X		return (x - x);
X
X	/*
X	 * |x| >= 0x1p23 is always an even integer, so return 1.
X	 * FIXME: should this raise FE_INEXACT or FE_INVALID.
X	 */
X	return (1);
X}
f181e790e58fd6e440c5d458a5ecb2a7
echo x - src/s_sinpif.c
sed 's/^X//' >src/s_sinpif.c << '9ab66a9a28a64f54241e346b6de77999'
X/*
X * sinpif(+-0) = +-0
X * sinpi(+-n) = +-0, for positive integers n.
X * sinpif(+-inf) = nan.  Raises the "invalid" floating-point exception.
X * sinpif(nan) = nan.    Raises the "invalid" floating-point exception.
X */
X
X#define EBUG 1
X
X#include "math.h"
X#include "math_private.h"
X#include "k_cospif.c"
X#include "k_sinpif.c"
X
Xstatic inline float
X__compute_sinpif(float x)
X{
X
X	if (x <= 0.5f) {
X		if (x <= 0.25f)
X			return (__kernel_sinpif(x));
X		return (__kernel_cospif(0.5f - x));
X	} else {
X		if (x <= 0.75f)
X			return (__kernel_cospif(x - 0.5f));
X		return (__kernel_sinpif(1 - x));
X	}
X}
X
Xfloat
Xsinpif(float x)
X{
X	float ax;
X	uint32_t hx, ix, j0;
X
X	GET_FLOAT_WORD(hx, x);
X	ix = hx & 0x7fffffff;
X	SET_FLOAT_WORD(ax, ix);
X
X	if (ix < 0x3f800000) {		/* |x| < 1 */
X		if (ix < 0x33800000) {	/* |x| < 0x1p-24 */
X			if (x == 0)
X				return (x);
X			return (M_PI * x);
X		}
X		ax = __compute_sinpif(ax);
X		return ((hx & 0x80000000) ? -ax : ax);
X	}
X
X	if (ix < 0x4b000000) {		/* 1 <= |x| < 0x1p23 */
X		/* Determine integer part of ax. */
X		j0 = ((ix >> 23) & 0xff) - 0x7f;
X		ix &= ~(0x007fffff >> j0);
X		SET_FLOAT_WORD(x, ix);
X
X		ax -= x;
X		if (ax == 0) {
X			ax = 0;
X		} else {
X			ix = (uint32_t)x;
X			ax =  __compute_sinpif(ax);
X			if (ix & 1) ax = -ax;
X		}
X		return ((hx & 0x80000000) ? -ax : ax);
X	}
X
X	/* x = +-inf or nan. */
X	if (ix >= 0x7f800000)
X		return (x - x);
X
X	/*
X	 * |x| >= 0x1p23 is always an integer, so return +-0.
X	 * FIXME: should this raise FE_INEXACT or FE_INVALID.
X	 */
X	return (copysignf(0, x));
X}
9ab66a9a28a64f54241e346b6de77999
exit

-- 
Steve