[deliverable/linux.git] / arch / x86 / math-emu / poly_sin.c

/*---------------------------------------------------------------------------+
 |  poly_sin.c                                                               |
 |                                                                           |
 |  Computation of an approximation of the sin function and the cosine       |
 |  function by a polynomial.                                                |
 |                                                                           |
 | Copyright (C) 1992,1993,1994,1997,1999                                    |
 |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
 |                  E-mail   billm@melbpc.org.au                             |
 |                                                                           |
 |                                                                           |
 +---------------------------------------------------------------------------*/

#include "exception.h"
#include "reg_constant.h"
#include "fpu_emu.h"
#include "fpu_system.h"
#include "control_w.h"
#include "poly.h"

#define	N_COEFF_P	4
#define	N_COEFF_N	4

static const unsigned long long pos_terms_l[N_COEFF_P] = {
	0xaaaaaaaaaaaaaaabLL,
	0x00d00d00d00cf906LL,
	0x000006b99159a8bbLL,
	0x000000000d7392e6LL
};

static const unsigned long long neg_terms_l[N_COEFF_N] = {
	0x2222222222222167LL,
	0x0002e3bc74aab624LL,
	0x0000000b09229062LL,
	0x00000000000c7973LL
};

#define	N_COEFF_PH	4
#define	N_COEFF_NH	4
static const unsigned long long pos_terms_h[N_COEFF_PH] = {
	0x0000000000000000LL,
	0x05b05b05b05b0406LL,
	0x000049f93edd91a9LL,
	0x00000000c9c9ed62LL
};

static const unsigned long long neg_terms_h[N_COEFF_NH] = {
	0xaaaaaaaaaaaaaa98LL,
	0x001a01a01a019064LL,
	0x0000008f76c68a77LL,
	0x0000000000d58f5eLL
};

/*--- poly_sine() -----------------------------------------------------------+
 |                                                                           |
 +---------------------------------------------------------------------------*/
void poly_sine(FPU_REG *st0_ptr)
{
	int exponent, echange;
	Xsig accumulator, argSqrd, argTo4;
	unsigned long fix_up, adj;
	unsigned long long fixed_arg;
	FPU_REG result;

	exponent = exponent(st0_ptr);

	accumulator.lsw = accumulator.midw = accumulator.msw = 0;

	/* Split into two ranges, for arguments below and above 1.0 */
	/* The boundary between upper and lower is approx 0.88309101259 */
	if ((exponent < -1)
	    || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
		/* The argument is <= 0.88309101259 */

		argSqrd.msw = st0_ptr->sigh;
		argSqrd.midw = st0_ptr->sigl;
		argSqrd.lsw = 0;
		mul64_Xsig(&argSqrd, &significand(st0_ptr));
		shr_Xsig(&argSqrd, 2 * (-1 - exponent));
		argTo4.msw = argSqrd.msw;
		argTo4.midw = argSqrd.midw;
		argTo4.lsw = argSqrd.lsw;
		mul_Xsig_Xsig(&argTo4, &argTo4);

		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
				N_COEFF_N - 1);
		mul_Xsig_Xsig(&accumulator, &argSqrd);
		negate_Xsig(&accumulator);

		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
				N_COEFF_P - 1);

		shr_Xsig(&accumulator, 2);	/* Divide by four */
		accumulator.msw |= 0x80000000;	/* Add 1.0 */

		mul64_Xsig(&accumulator, &significand(st0_ptr));
		mul64_Xsig(&accumulator, &significand(st0_ptr));
		mul64_Xsig(&accumulator, &significand(st0_ptr));

		/* Divide by four, FPU_REG compatible, etc */
		exponent = 3 * exponent;

		/* The minimum exponent difference is 3 */
		shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);

		negate_Xsig(&accumulator);
		XSIG_LL(accumulator) += significand(st0_ptr);

		echange = round_Xsig(&accumulator);

		setexponentpos(&result, exponent(st0_ptr) + echange);
	} else {
		/* The argument is > 0.88309101259 */
		/* We use sin(st(0)) = cos(pi/2-st(0)) */

		fixed_arg = significand(st0_ptr);

		if (exponent == 0) {
			/* The argument is >= 1.0 */

			/* Put the binary point at the left. */
			fixed_arg <<= 1;
		}
		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
		/* There is a special case which arises due to rounding, to fix here. */
		if (fixed_arg == 0xffffffffffffffffLL)
			fixed_arg = 0;

		XSIG_LL(argSqrd) = fixed_arg;
		argSqrd.lsw = 0;
		mul64_Xsig(&argSqrd, &fixed_arg);

		XSIG_LL(argTo4) = XSIG_LL(argSqrd);
		argTo4.lsw = argSqrd.lsw;
		mul_Xsig_Xsig(&argTo4, &argTo4);

		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
				N_COEFF_NH - 1);
		mul_Xsig_Xsig(&accumulator, &argSqrd);
		negate_Xsig(&accumulator);

		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
				N_COEFF_PH - 1);
		negate_Xsig(&accumulator);

		mul64_Xsig(&accumulator, &fixed_arg);
		mul64_Xsig(&accumulator, &fixed_arg);

		shr_Xsig(&accumulator, 3);
		negate_Xsig(&accumulator);

		add_Xsig_Xsig(&accumulator, &argSqrd);

		shr_Xsig(&accumulator, 1);

		accumulator.lsw |= 1;	/* A zero accumulator here would cause problems */
		negate_Xsig(&accumulator);

		/* The basic computation is complete. Now fix the answer to
		   compensate for the error due to the approximation used for
		   pi/2
		 */

		/* This has an exponent of -65 */
		fix_up = 0x898cc517;
		/* The fix-up needs to be improved for larger args */
		if (argSqrd.msw & 0xffc00000) {
			/* Get about 32 bit precision in these: */
			fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
		}
		fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));

		adj = accumulator.lsw;	/* temp save */
		accumulator.lsw -= fix_up;
		if (accumulator.lsw > adj)
			XSIG_LL(accumulator)--;

		echange = round_Xsig(&accumulator);

		setexponentpos(&result, echange - 1);
	}

	significand(&result) = XSIG_LL(accumulator);
	setsign(&result, getsign(st0_ptr));
	FPU_copy_to_reg0(&result, TAG_Valid);

#ifdef PARANOID
	if ((exponent(&result) >= 0)
	    && (significand(&result) > 0x8000000000000000LL)) {
		EXCEPTION(EX_INTERNAL | 0x150);
	}
#endif /* PARANOID */

}

/*--- poly_cos() ------------------------------------------------------------+
 |                                                                           |
 +---------------------------------------------------------------------------*/
void poly_cos(FPU_REG *st0_ptr)
{
	FPU_REG result;
	long int exponent, exp2, echange;
	Xsig accumulator, argSqrd, fix_up, argTo4;
	unsigned long long fixed_arg;

#ifdef PARANOID
	if ((exponent(st0_ptr) > 0)
	    || ((exponent(st0_ptr) == 0)
		&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
		EXCEPTION(EX_Invalid);
		FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
		return;
	}
#endif /* PARANOID */

	exponent = exponent(st0_ptr);

	accumulator.lsw = accumulator.midw = accumulator.msw = 0;

	if ((exponent < -1)
	    || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
		/* arg is < 0.687705 */

		argSqrd.msw = st0_ptr->sigh;
		argSqrd.midw = st0_ptr->sigl;
		argSqrd.lsw = 0;
		mul64_Xsig(&argSqrd, &significand(st0_ptr));

		if (exponent < -1) {
			/* shift the argument right by the required places */
			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
		}

		argTo4.msw = argSqrd.msw;
		argTo4.midw = argSqrd.midw;
		argTo4.lsw = argSqrd.lsw;
		mul_Xsig_Xsig(&argTo4, &argTo4);

		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
				N_COEFF_NH - 1);
		mul_Xsig_Xsig(&accumulator, &argSqrd);
		negate_Xsig(&accumulator);

		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
				N_COEFF_PH - 1);
		negate_Xsig(&accumulator);

		mul64_Xsig(&accumulator, &significand(st0_ptr));
		mul64_Xsig(&accumulator, &significand(st0_ptr));
		shr_Xsig(&accumulator, -2 * (1 + exponent));

		shr_Xsig(&accumulator, 3);
		negate_Xsig(&accumulator);

		add_Xsig_Xsig(&accumulator, &argSqrd);

		shr_Xsig(&accumulator, 1);

		/* It doesn't matter if accumulator is all zero here, the
		   following code will work ok */
		negate_Xsig(&accumulator);

		if (accumulator.lsw & 0x80000000)
			XSIG_LL(accumulator)++;
		if (accumulator.msw == 0) {
			/* The result is 1.0 */
			FPU_copy_to_reg0(&CONST_1, TAG_Valid);
			return;
		} else {
			significand(&result) = XSIG_LL(accumulator);

			/* will be a valid positive nr with expon = -1 */
			setexponentpos(&result, -1);
		}
	} else {
		fixed_arg = significand(st0_ptr);

		if (exponent == 0) {
			/* The argument is >= 1.0 */

			/* Put the binary point at the left. */
			fixed_arg <<= 1;
		}
		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
		/* There is a special case which arises due to rounding, to fix here. */
		if (fixed_arg == 0xffffffffffffffffLL)
			fixed_arg = 0;

		exponent = -1;
		exp2 = -1;

		/* A shift is needed here only for a narrow range of arguments,
		   i.e. for fixed_arg approx 2^-32, but we pick up more... */
		if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
			fixed_arg <<= 16;
			exponent -= 16;
			exp2 -= 16;
		}

		XSIG_LL(argSqrd) = fixed_arg;
		argSqrd.lsw = 0;
		mul64_Xsig(&argSqrd, &fixed_arg);

		if (exponent < -1) {
			/* shift the argument right by the required places */
			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
		}

		argTo4.msw = argSqrd.msw;
		argTo4.midw = argSqrd.midw;
		argTo4.lsw = argSqrd.lsw;
		mul_Xsig_Xsig(&argTo4, &argTo4);

		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
				N_COEFF_N - 1);
		mul_Xsig_Xsig(&accumulator, &argSqrd);
		negate_Xsig(&accumulator);

		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
				N_COEFF_P - 1);

		shr_Xsig(&accumulator, 2);	/* Divide by four */
		accumulator.msw |= 0x80000000;	/* Add 1.0 */

		mul64_Xsig(&accumulator, &fixed_arg);
		mul64_Xsig(&accumulator, &fixed_arg);
		mul64_Xsig(&accumulator, &fixed_arg);

		/* Divide by four, FPU_REG compatible, etc */
		exponent = 3 * exponent;

		/* The minimum exponent difference is 3 */
		shr_Xsig(&accumulator, exp2 - exponent);

		negate_Xsig(&accumulator);
		XSIG_LL(accumulator) += fixed_arg;

		/* The basic computation is complete. Now fix the answer to
		   compensate for the error due to the approximation used for
		   pi/2
		 */

		/* This has an exponent of -65 */
		XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
		fix_up.lsw = 0;

		/* The fix-up needs to be improved for larger args */
		if (argSqrd.msw & 0xffc00000) {
			/* Get about 32 bit precision in these: */
			fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
			fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
		}

		exp2 += norm_Xsig(&accumulator);
		shr_Xsig(&accumulator, 1);	/* Prevent overflow */
		exp2++;
		shr_Xsig(&fix_up, 65 + exp2);

		add_Xsig_Xsig(&accumulator, &fix_up);

		echange = round_Xsig(&accumulator);

		setexponentpos(&result, exp2 + echange);
		significand(&result) = XSIG_LL(accumulator);
	}

	FPU_copy_to_reg0(&result, TAG_Valid);

#ifdef PARANOID
	if ((exponent(&result) >= 0)
	    && (significand(&result) > 0x8000000000000000LL)) {
		EXCEPTION(EX_INTERNAL | 0x151);
	}
#endif /* PARANOID */

}
Commit	Line	Data
1da177e4 LT	1	/*---------------------------------------------------------------------------+
	2	\| poly_sin.c \|
	3	\| \|
	4	\| Computation of an approximation of the sin function and the cosine \|
	5	\| function by a polynomial. \|
	6	\| \|
	7	\| Copyright (C) 1992,1993,1994,1997,1999 \|
	8	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia \|
	9	\| E-mail billm@melbpc.org.au \|
	10	\| \|
	11	\| \|
	12	+---------------------------------------------------------------------------*/
	13
1da177e4 LT	14	#include "exception.h"
	15	#include "reg_constant.h"
	16	#include "fpu_emu.h"
	17	#include "fpu_system.h"
	18	#include "control_w.h"
	19	#include "poly.h"
	20
1da177e4 LT	21	#define N_COEFF_P 4
	22	#define N_COEFF_N 4
	23
3d0d14f9 IM	24	static const unsigned long long pos_terms_l[N_COEFF_P] = {
	25	0xaaaaaaaaaaaaaaabLL,
	26	0x00d00d00d00cf906LL,
	27	0x000006b99159a8bbLL,
	28	0x000000000d7392e6LL
1da177e4 LT	29	};
1da177e4 LT	30
3d0d14f9 IM	31	static const unsigned long long neg_terms_l[N_COEFF_N] = {
	32	0x2222222222222167LL,
	33	0x0002e3bc74aab624LL,
	34	0x0000000b09229062LL,
	35	0x00000000000c7973LL
1da177e4 LT	36	};
1da177e4 LT	37
1da177e4 LT	38	#define N_COEFF_PH 4
1da177e4 LT	39	#define N_COEFF_NH 4
3d0d14f9 IM	40	static const unsigned long long pos_terms_h[N_COEFF_PH] = {
	41	0x0000000000000000LL,
	42	0x05b05b05b05b0406LL,
	43	0x000049f93edd91a9LL,
	44	0x00000000c9c9ed62LL
1da177e4 LT	45	};
1da177e4 LT	46
3d0d14f9 IM	47	static const unsigned long long neg_terms_h[N_COEFF_NH] = {
	48	0xaaaaaaaaaaaaaa98LL,
	49	0x001a01a01a019064LL,
	50	0x0000008f76c68a77LL,
	51	0x0000000000d58f5eLL
1da177e4 LT	52	};
1da177e4 LT	53
1da177e4 LT	54	/*--- poly_sine() -----------------------------------------------------------+
	55	\| \|
	56	+---------------------------------------------------------------------------*/
e8d591dc	57	void poly_sine(FPU_REG *st0_ptr)
1da177e4	58	{
3d0d14f9 IM	59	int exponent, echange;
	60	Xsig accumulator, argSqrd, argTo4;
	61	unsigned long fix_up, adj;
	62	unsigned long long fixed_arg;
	63	FPU_REG result;
1da177e4	64
3d0d14f9	65	exponent = exponent(st0_ptr);
1da177e4	66
3d0d14f9	67	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
1da177e4	68
3d0d14f9 IM	69	/* Split into two ranges, for arguments below and above 1.0 */
	70	/* The boundary between upper and lower is approx 0.88309101259 */
	71	if ((exponent < -1)
	72	\|\| ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
	73	/* The argument is <= 0.88309101259 */
	74
	75	argSqrd.msw = st0_ptr->sigh;
	76	argSqrd.midw = st0_ptr->sigl;
	77	argSqrd.lsw = 0;
	78	mul64_Xsig(&argSqrd, &significand(st0_ptr));
	79	shr_Xsig(&argSqrd, 2 * (-1 - exponent));
	80	argTo4.msw = argSqrd.msw;
	81	argTo4.midw = argSqrd.midw;
	82	argTo4.lsw = argSqrd.lsw;
	83	mul_Xsig_Xsig(&argTo4, &argTo4);
1da177e4	84
3d0d14f9 IM	85	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
	86	N_COEFF_N - 1);
	87	mul_Xsig_Xsig(&accumulator, &argSqrd);
	88	negate_Xsig(&accumulator);
1da177e4	89
3d0d14f9 IM	90	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
3d0d14f9 IM	91	N_COEFF_P - 1);
1da177e4	92
3d0d14f9 IM	93	shr_Xsig(&accumulator, 2); /* Divide by four */
3d0d14f9 IM	94	accumulator.msw \|= 0x80000000; /* Add 1.0 */
1da177e4	95
3d0d14f9 IM	96	mul64_Xsig(&accumulator, &significand(st0_ptr));
	97	mul64_Xsig(&accumulator, &significand(st0_ptr));
	98	mul64_Xsig(&accumulator, &significand(st0_ptr));
1da177e4	99
3d0d14f9 IM	100	/* Divide by four, FPU_REG compatible, etc */
3d0d14f9 IM	101	exponent = 3 * exponent;
1da177e4	102
3d0d14f9 IM	103	/* The minimum exponent difference is 3 */
3d0d14f9 IM	104	shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
1da177e4	105
3d0d14f9 IM	106	negate_Xsig(&accumulator);
3d0d14f9 IM	107	XSIG_LL(accumulator) += significand(st0_ptr);
1da177e4	108
3d0d14f9	109	echange = round_Xsig(&accumulator);
1da177e4	110
3d0d14f9 IM	111	setexponentpos(&result, exponent(st0_ptr) + echange);
	112	} else {
	113	/* The argument is > 0.88309101259 */
	114	/* We use sin(st(0)) = cos(pi/2-st(0)) */
1da177e4	115
3d0d14f9	116	fixed_arg = significand(st0_ptr);
1da177e4	117
3d0d14f9 IM	118	if (exponent == 0) {
3d0d14f9 IM	119	/* The argument is >= 1.0 */
1da177e4	120
3d0d14f9 IM	121	/* Put the binary point at the left. */
	122	fixed_arg <<= 1;
	123	}
	124	/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
	125	fixed_arg = 0x921fb54442d18469LL - fixed_arg;
	126	/* There is a special case which arises due to rounding, to fix here. */
	127	if (fixed_arg == 0xffffffffffffffffLL)
	128	fixed_arg = 0;
1da177e4	129
3d0d14f9 IM	130	XSIG_LL(argSqrd) = fixed_arg;
	131	argSqrd.lsw = 0;
	132	mul64_Xsig(&argSqrd, &fixed_arg);
1da177e4	133
3d0d14f9 IM	134	XSIG_LL(argTo4) = XSIG_LL(argSqrd);
	135	argTo4.lsw = argSqrd.lsw;
	136	mul_Xsig_Xsig(&argTo4, &argTo4);
1da177e4	137
3d0d14f9 IM	138	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
	139	N_COEFF_NH - 1);
	140	mul_Xsig_Xsig(&accumulator, &argSqrd);
	141	negate_Xsig(&accumulator);
1da177e4	142
3d0d14f9 IM	143	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
	144	N_COEFF_PH - 1);
	145	negate_Xsig(&accumulator);
1da177e4	146
3d0d14f9 IM	147	mul64_Xsig(&accumulator, &fixed_arg);
3d0d14f9 IM	148	mul64_Xsig(&accumulator, &fixed_arg);
1da177e4	149
3d0d14f9 IM	150	shr_Xsig(&accumulator, 3);
3d0d14f9 IM	151	negate_Xsig(&accumulator);
1da177e4	152
3d0d14f9	153	add_Xsig_Xsig(&accumulator, &argSqrd);
1da177e4	154
3d0d14f9	155	shr_Xsig(&accumulator, 1);
1da177e4	156
3d0d14f9 IM	157	accumulator.lsw \|= 1; /* A zero accumulator here would cause problems */
3d0d14f9 IM	158	negate_Xsig(&accumulator);
1da177e4	159
3d0d14f9 IM	160	/* The basic computation is complete. Now fix the answer to
	161	compensate for the error due to the approximation used for
	162	pi/2
	163	*/
1da177e4	164
3d0d14f9 IM	165	/* This has an exponent of -65 */
	166	fix_up = 0x898cc517;
	167	/* The fix-up needs to be improved for larger args */
	168	if (argSqrd.msw & 0xffc00000) {
	169	/* Get about 32 bit precision in these: */
	170	fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
	171	}
	172	fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
1da177e4	173
3d0d14f9 IM	174	adj = accumulator.lsw; /* temp save */
	175	accumulator.lsw -= fix_up;
	176	if (accumulator.lsw > adj)
	177	XSIG_LL(accumulator)--;
1da177e4	178
3d0d14f9	179	echange = round_Xsig(&accumulator);
1da177e4	180
3d0d14f9 IM	181	setexponentpos(&result, echange - 1);
3d0d14f9 IM	182	}
1da177e4	183
3d0d14f9 IM	184	significand(&result) = XSIG_LL(accumulator);
	185	setsign(&result, getsign(st0_ptr));
	186	FPU_copy_to_reg0(&result, TAG_Valid);
1da177e4 LT	187
1da177e4 LT	188	#ifdef PARANOID
3d0d14f9 IM	189	if ((exponent(&result) >= 0)
	190	&& (significand(&result) > 0x8000000000000000LL)) {
	191	EXCEPTION(EX_INTERNAL \| 0x150);
	192	}
1da177e4 LT	193	#endif /* PARANOID */
	194
	195	}
	196
1da177e4 LT	197	/*--- poly_cos() ------------------------------------------------------------+
	198	\| \|
	199	+---------------------------------------------------------------------------*/
e8d591dc	200	void poly_cos(FPU_REG *st0_ptr)
1da177e4	201	{
3d0d14f9 IM	202	FPU_REG result;
	203	long int exponent, exp2, echange;
	204	Xsig accumulator, argSqrd, fix_up, argTo4;
	205	unsigned long long fixed_arg;
1da177e4 LT	206
1da177e4 LT	207	#ifdef PARANOID
3d0d14f9 IM	208	if ((exponent(st0_ptr) > 0)
	209	\|\| ((exponent(st0_ptr) == 0)
	210	&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
	211	EXCEPTION(EX_Invalid);
	212	FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
	213	return;
1da177e4	214	}
3d0d14f9	215	#endif /* PARANOID */
1da177e4	216
3d0d14f9 IM	217	exponent = exponent(st0_ptr);
	218
	219	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
	220
	221	if ((exponent < -1)
	222	\|\| ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
	223	/* arg is < 0.687705 */
	224
	225	argSqrd.msw = st0_ptr->sigh;
	226	argSqrd.midw = st0_ptr->sigl;
	227	argSqrd.lsw = 0;
	228	mul64_Xsig(&argSqrd, &significand(st0_ptr));
	229
	230	if (exponent < -1) {
	231	/* shift the argument right by the required places */
	232	shr_Xsig(&argSqrd, 2 * (-1 - exponent));
	233	}
	234
	235	argTo4.msw = argSqrd.msw;
	236	argTo4.midw = argSqrd.midw;
	237	argTo4.lsw = argSqrd.lsw;
	238	mul_Xsig_Xsig(&argTo4, &argTo4);
	239
	240	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
	241	N_COEFF_NH - 1);
	242	mul_Xsig_Xsig(&accumulator, &argSqrd);
	243	negate_Xsig(&accumulator);
	244
	245	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
	246	N_COEFF_PH - 1);
	247	negate_Xsig(&accumulator);
	248
	249	mul64_Xsig(&accumulator, &significand(st0_ptr));
	250	mul64_Xsig(&accumulator, &significand(st0_ptr));
	251	shr_Xsig(&accumulator, -2 * (1 + exponent));
	252
	253	shr_Xsig(&accumulator, 3);
	254	negate_Xsig(&accumulator);
	255
	256	add_Xsig_Xsig(&accumulator, &argSqrd);
	257
	258	shr_Xsig(&accumulator, 1);
	259
	260	/* It doesn't matter if accumulator is all zero here, the
	261	following code will work ok */
	262	negate_Xsig(&accumulator);
	263
	264	if (accumulator.lsw & 0x80000000)
	265	XSIG_LL(accumulator)++;
	266	if (accumulator.msw == 0) {
	267	/* The result is 1.0 */
	268	FPU_copy_to_reg0(&CONST_1, TAG_Valid);
	269	return;
	270	} else {
	271	significand(&result) = XSIG_LL(accumulator);
	272
	273	/* will be a valid positive nr with expon = -1 */
	274	setexponentpos(&result, -1);
	275	}
	276	} else {
	277	fixed_arg = significand(st0_ptr);
	278
	279	if (exponent == 0) {
	280	/* The argument is >= 1.0 */
281
282	/* Put the binary point at the left. */
283	fixed_arg <<= 1;
284	}
285	/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
286	fixed_arg = 0x921fb54442d18469LL - fixed_arg;
287	/* There is a special case which arises due to rounding, to fix here. */
288	if (fixed_arg == 0xffffffffffffffffLL)
289	fixed_arg = 0;
290
291	exponent = -1;
292	exp2 = -1;
293
294	/* A shift is needed here only for a narrow range of arguments,
295	i.e. for fixed_arg approx 2^-32, but we pick up more... */
296	if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
297	fixed_arg <<= 16;
298	exponent -= 16;
299	exp2 -= 16;
300	}
301
302	XSIG_LL(argSqrd) = fixed_arg;
303	argSqrd.lsw = 0;
304	mul64_Xsig(&argSqrd, &fixed_arg);
305
306	if (exponent < -1) {
307	/* shift the argument right by the required places */
308	shr_Xsig(&argSqrd, 2 * (-1 - exponent));
309	}
310
311	argTo4.msw = argSqrd.msw;
312	argTo4.midw = argSqrd.midw;
313	argTo4.lsw = argSqrd.lsw;
314	mul_Xsig_Xsig(&argTo4, &argTo4);
315
316	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
317	N_COEFF_N - 1);
318	mul_Xsig_Xsig(&accumulator, &argSqrd);
319	negate_Xsig(&accumulator);
320
321	polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
322	N_COEFF_P - 1);
323
324	shr_Xsig(&accumulator, 2); /* Divide by four */
325	accumulator.msw \|= 0x80000000; /* Add 1.0 */
326
327	mul64_Xsig(&accumulator, &fixed_arg);
328	mul64_Xsig(&accumulator, &fixed_arg);
329	mul64_Xsig(&accumulator, &fixed_arg);
330
331	/* Divide by four, FPU_REG compatible, etc */
332	exponent = 3 * exponent;
333
334	/* The minimum exponent difference is 3 */
335	shr_Xsig(&accumulator, exp2 - exponent);
336
337	negate_Xsig(&accumulator);
338	XSIG_LL(accumulator) += fixed_arg;
339
340	/* The basic computation is complete. Now fix the answer to
341	compensate for the error due to the approximation used for
342	pi/2
343	*/
344
345	/* This has an exponent of -65 */
346	XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
347	fix_up.lsw = 0;
348
349	/* The fix-up needs to be improved for larger args */
350	if (argSqrd.msw & 0xffc00000) {
351	/* Get about 32 bit precision in these: */
352	fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
353	fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
354	}
355
356	exp2 += norm_Xsig(&accumulator);
357	shr_Xsig(&accumulator, 1); /* Prevent overflow */
358	exp2++;
359	shr_Xsig(&fix_up, 65 + exp2);
360
361	add_Xsig_Xsig(&accumulator, &fix_up);
362
363	echange = round_Xsig(&accumulator);
364
365	setexponentpos(&result, exp2 + echange);
366	significand(&result) = XSIG_LL(accumulator);
1da177e4 LT	367	}
1da177e4 LT	368
3d0d14f9	369	FPU_copy_to_reg0(&result, TAG_Valid);
1da177e4 LT	370
1da177e4 LT	371	#ifdef PARANOID
3d0d14f9 IM	372	if ((exponent(&result) >= 0)
	373	&& (significand(&result) > 0x8000000000000000LL)) {
	374	EXCEPTION(EX_INTERNAL \| 0x151);
	375	}
1da177e4 LT	376	#endif /* PARANOID */
	377
	378	}