Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /*---------------------------------------------------------------------------+ |
2 | | poly_sin.c | | |
3 | | | | |
4 | | Computation of an approximation of the sin function and the cosine | | |
5 | | function by a polynomial. | | |
6 | | | | |
7 | | Copyright (C) 1992,1993,1994,1997,1999 | | |
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | |
9 | | E-mail billm@melbpc.org.au | | |
10 | | | | |
11 | | | | |
12 | +---------------------------------------------------------------------------*/ | |
13 | ||
1da177e4 LT |
14 | #include "exception.h" |
15 | #include "reg_constant.h" | |
16 | #include "fpu_emu.h" | |
17 | #include "fpu_system.h" | |
18 | #include "control_w.h" | |
19 | #include "poly.h" | |
20 | ||
1da177e4 LT |
21 | #define N_COEFF_P 4 |
22 | #define N_COEFF_N 4 | |
23 | ||
3d0d14f9 IM |
24 | static const unsigned long long pos_terms_l[N_COEFF_P] = { |
25 | 0xaaaaaaaaaaaaaaabLL, | |
26 | 0x00d00d00d00cf906LL, | |
27 | 0x000006b99159a8bbLL, | |
28 | 0x000000000d7392e6LL | |
1da177e4 LT |
29 | }; |
30 | ||
3d0d14f9 IM |
31 | static const unsigned long long neg_terms_l[N_COEFF_N] = { |
32 | 0x2222222222222167LL, | |
33 | 0x0002e3bc74aab624LL, | |
34 | 0x0000000b09229062LL, | |
35 | 0x00000000000c7973LL | |
1da177e4 LT |
36 | }; |
37 | ||
1da177e4 LT |
38 | #define N_COEFF_PH 4 |
39 | #define N_COEFF_NH 4 | |
3d0d14f9 IM |
40 | static const unsigned long long pos_terms_h[N_COEFF_PH] = { |
41 | 0x0000000000000000LL, | |
42 | 0x05b05b05b05b0406LL, | |
43 | 0x000049f93edd91a9LL, | |
44 | 0x00000000c9c9ed62LL | |
1da177e4 LT |
45 | }; |
46 | ||
3d0d14f9 IM |
47 | static const unsigned long long neg_terms_h[N_COEFF_NH] = { |
48 | 0xaaaaaaaaaaaaaa98LL, | |
49 | 0x001a01a01a019064LL, | |
50 | 0x0000008f76c68a77LL, | |
51 | 0x0000000000d58f5eLL | |
1da177e4 LT |
52 | }; |
53 | ||
1da177e4 LT |
54 | /*--- poly_sine() -----------------------------------------------------------+ |
55 | | | | |
56 | +---------------------------------------------------------------------------*/ | |
e8d591dc | 57 | void poly_sine(FPU_REG *st0_ptr) |
1da177e4 | 58 | { |
3d0d14f9 IM |
59 | int exponent, echange; |
60 | Xsig accumulator, argSqrd, argTo4; | |
61 | unsigned long fix_up, adj; | |
62 | unsigned long long fixed_arg; | |
63 | FPU_REG result; | |
1da177e4 | 64 | |
3d0d14f9 | 65 | exponent = exponent(st0_ptr); |
1da177e4 | 66 | |
3d0d14f9 | 67 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; |
1da177e4 | 68 | |
3d0d14f9 IM |
69 | /* Split into two ranges, for arguments below and above 1.0 */ |
70 | /* The boundary between upper and lower is approx 0.88309101259 */ | |
71 | if ((exponent < -1) | |
72 | || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) { | |
73 | /* The argument is <= 0.88309101259 */ | |
74 | ||
75 | argSqrd.msw = st0_ptr->sigh; | |
76 | argSqrd.midw = st0_ptr->sigl; | |
77 | argSqrd.lsw = 0; | |
78 | mul64_Xsig(&argSqrd, &significand(st0_ptr)); | |
79 | shr_Xsig(&argSqrd, 2 * (-1 - exponent)); | |
80 | argTo4.msw = argSqrd.msw; | |
81 | argTo4.midw = argSqrd.midw; | |
82 | argTo4.lsw = argSqrd.lsw; | |
83 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
1da177e4 | 84 | |
3d0d14f9 IM |
85 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, |
86 | N_COEFF_N - 1); | |
87 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
88 | negate_Xsig(&accumulator); | |
1da177e4 | 89 | |
3d0d14f9 IM |
90 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, |
91 | N_COEFF_P - 1); | |
1da177e4 | 92 | |
3d0d14f9 IM |
93 | shr_Xsig(&accumulator, 2); /* Divide by four */ |
94 | accumulator.msw |= 0x80000000; /* Add 1.0 */ | |
1da177e4 | 95 | |
3d0d14f9 IM |
96 | mul64_Xsig(&accumulator, &significand(st0_ptr)); |
97 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
98 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
1da177e4 | 99 | |
3d0d14f9 IM |
100 | /* Divide by four, FPU_REG compatible, etc */ |
101 | exponent = 3 * exponent; | |
1da177e4 | 102 | |
3d0d14f9 IM |
103 | /* The minimum exponent difference is 3 */ |
104 | shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); | |
1da177e4 | 105 | |
3d0d14f9 IM |
106 | negate_Xsig(&accumulator); |
107 | XSIG_LL(accumulator) += significand(st0_ptr); | |
1da177e4 | 108 | |
3d0d14f9 | 109 | echange = round_Xsig(&accumulator); |
1da177e4 | 110 | |
3d0d14f9 IM |
111 | setexponentpos(&result, exponent(st0_ptr) + echange); |
112 | } else { | |
113 | /* The argument is > 0.88309101259 */ | |
114 | /* We use sin(st(0)) = cos(pi/2-st(0)) */ | |
1da177e4 | 115 | |
3d0d14f9 | 116 | fixed_arg = significand(st0_ptr); |
1da177e4 | 117 | |
3d0d14f9 IM |
118 | if (exponent == 0) { |
119 | /* The argument is >= 1.0 */ | |
1da177e4 | 120 | |
3d0d14f9 IM |
121 | /* Put the binary point at the left. */ |
122 | fixed_arg <<= 1; | |
123 | } | |
124 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | |
125 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; | |
126 | /* There is a special case which arises due to rounding, to fix here. */ | |
127 | if (fixed_arg == 0xffffffffffffffffLL) | |
128 | fixed_arg = 0; | |
1da177e4 | 129 | |
3d0d14f9 IM |
130 | XSIG_LL(argSqrd) = fixed_arg; |
131 | argSqrd.lsw = 0; | |
132 | mul64_Xsig(&argSqrd, &fixed_arg); | |
1da177e4 | 133 | |
3d0d14f9 IM |
134 | XSIG_LL(argTo4) = XSIG_LL(argSqrd); |
135 | argTo4.lsw = argSqrd.lsw; | |
136 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
1da177e4 | 137 | |
3d0d14f9 IM |
138 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, |
139 | N_COEFF_NH - 1); | |
140 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
141 | negate_Xsig(&accumulator); | |
1da177e4 | 142 | |
3d0d14f9 IM |
143 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, |
144 | N_COEFF_PH - 1); | |
145 | negate_Xsig(&accumulator); | |
1da177e4 | 146 | |
3d0d14f9 IM |
147 | mul64_Xsig(&accumulator, &fixed_arg); |
148 | mul64_Xsig(&accumulator, &fixed_arg); | |
1da177e4 | 149 | |
3d0d14f9 IM |
150 | shr_Xsig(&accumulator, 3); |
151 | negate_Xsig(&accumulator); | |
1da177e4 | 152 | |
3d0d14f9 | 153 | add_Xsig_Xsig(&accumulator, &argSqrd); |
1da177e4 | 154 | |
3d0d14f9 | 155 | shr_Xsig(&accumulator, 1); |
1da177e4 | 156 | |
3d0d14f9 IM |
157 | accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ |
158 | negate_Xsig(&accumulator); | |
1da177e4 | 159 | |
3d0d14f9 IM |
160 | /* The basic computation is complete. Now fix the answer to |
161 | compensate for the error due to the approximation used for | |
162 | pi/2 | |
163 | */ | |
1da177e4 | 164 | |
3d0d14f9 IM |
165 | /* This has an exponent of -65 */ |
166 | fix_up = 0x898cc517; | |
167 | /* The fix-up needs to be improved for larger args */ | |
168 | if (argSqrd.msw & 0xffc00000) { | |
169 | /* Get about 32 bit precision in these: */ | |
170 | fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; | |
171 | } | |
172 | fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); | |
1da177e4 | 173 | |
3d0d14f9 IM |
174 | adj = accumulator.lsw; /* temp save */ |
175 | accumulator.lsw -= fix_up; | |
176 | if (accumulator.lsw > adj) | |
177 | XSIG_LL(accumulator)--; | |
1da177e4 | 178 | |
3d0d14f9 | 179 | echange = round_Xsig(&accumulator); |
1da177e4 | 180 | |
3d0d14f9 IM |
181 | setexponentpos(&result, echange - 1); |
182 | } | |
1da177e4 | 183 | |
3d0d14f9 IM |
184 | significand(&result) = XSIG_LL(accumulator); |
185 | setsign(&result, getsign(st0_ptr)); | |
186 | FPU_copy_to_reg0(&result, TAG_Valid); | |
1da177e4 LT |
187 | |
188 | #ifdef PARANOID | |
3d0d14f9 IM |
189 | if ((exponent(&result) >= 0) |
190 | && (significand(&result) > 0x8000000000000000LL)) { | |
191 | EXCEPTION(EX_INTERNAL | 0x150); | |
192 | } | |
1da177e4 LT |
193 | #endif /* PARANOID */ |
194 | ||
195 | } | |
196 | ||
1da177e4 LT |
197 | /*--- poly_cos() ------------------------------------------------------------+ |
198 | | | | |
199 | +---------------------------------------------------------------------------*/ | |
e8d591dc | 200 | void poly_cos(FPU_REG *st0_ptr) |
1da177e4 | 201 | { |
3d0d14f9 IM |
202 | FPU_REG result; |
203 | long int exponent, exp2, echange; | |
204 | Xsig accumulator, argSqrd, fix_up, argTo4; | |
205 | unsigned long long fixed_arg; | |
1da177e4 LT |
206 | |
207 | #ifdef PARANOID | |
3d0d14f9 IM |
208 | if ((exponent(st0_ptr) > 0) |
209 | || ((exponent(st0_ptr) == 0) | |
210 | && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) { | |
211 | EXCEPTION(EX_Invalid); | |
212 | FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); | |
213 | return; | |
1da177e4 | 214 | } |
3d0d14f9 | 215 | #endif /* PARANOID */ |
1da177e4 | 216 | |
3d0d14f9 IM |
217 | exponent = exponent(st0_ptr); |
218 | ||
219 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; | |
220 | ||
221 | if ((exponent < -1) | |
222 | || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) { | |
223 | /* arg is < 0.687705 */ | |
224 | ||
225 | argSqrd.msw = st0_ptr->sigh; | |
226 | argSqrd.midw = st0_ptr->sigl; | |
227 | argSqrd.lsw = 0; | |
228 | mul64_Xsig(&argSqrd, &significand(st0_ptr)); | |
229 | ||
230 | if (exponent < -1) { | |
231 | /* shift the argument right by the required places */ | |
232 | shr_Xsig(&argSqrd, 2 * (-1 - exponent)); | |
233 | } | |
234 | ||
235 | argTo4.msw = argSqrd.msw; | |
236 | argTo4.midw = argSqrd.midw; | |
237 | argTo4.lsw = argSqrd.lsw; | |
238 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
239 | ||
240 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, | |
241 | N_COEFF_NH - 1); | |
242 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
243 | negate_Xsig(&accumulator); | |
244 | ||
245 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, | |
246 | N_COEFF_PH - 1); | |
247 | negate_Xsig(&accumulator); | |
248 | ||
249 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
250 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
251 | shr_Xsig(&accumulator, -2 * (1 + exponent)); | |
252 | ||
253 | shr_Xsig(&accumulator, 3); | |
254 | negate_Xsig(&accumulator); | |
255 | ||
256 | add_Xsig_Xsig(&accumulator, &argSqrd); | |
257 | ||
258 | shr_Xsig(&accumulator, 1); | |
259 | ||
260 | /* It doesn't matter if accumulator is all zero here, the | |
261 | following code will work ok */ | |
262 | negate_Xsig(&accumulator); | |
263 | ||
264 | if (accumulator.lsw & 0x80000000) | |
265 | XSIG_LL(accumulator)++; | |
266 | if (accumulator.msw == 0) { | |
267 | /* The result is 1.0 */ | |
268 | FPU_copy_to_reg0(&CONST_1, TAG_Valid); | |
269 | return; | |
270 | } else { | |
271 | significand(&result) = XSIG_LL(accumulator); | |
272 | ||
273 | /* will be a valid positive nr with expon = -1 */ | |
274 | setexponentpos(&result, -1); | |
275 | } | |
276 | } else { | |
277 | fixed_arg = significand(st0_ptr); | |
278 | ||
279 | if (exponent == 0) { | |
280 | /* The argument is >= 1.0 */ | |
281 | ||
282 | /* Put the binary point at the left. */ | |
283 | fixed_arg <<= 1; | |
284 | } | |
285 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | |
286 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; | |
287 | /* There is a special case which arises due to rounding, to fix here. */ | |
288 | if (fixed_arg == 0xffffffffffffffffLL) | |
289 | fixed_arg = 0; | |
290 | ||
291 | exponent = -1; | |
292 | exp2 = -1; | |
293 | ||
294 | /* A shift is needed here only for a narrow range of arguments, | |
295 | i.e. for fixed_arg approx 2^-32, but we pick up more... */ | |
296 | if (!(LL_MSW(fixed_arg) & 0xffff0000)) { | |
297 | fixed_arg <<= 16; | |
298 | exponent -= 16; | |
299 | exp2 -= 16; | |
300 | } | |
301 | ||
302 | XSIG_LL(argSqrd) = fixed_arg; | |
303 | argSqrd.lsw = 0; | |
304 | mul64_Xsig(&argSqrd, &fixed_arg); | |
305 | ||
306 | if (exponent < -1) { | |
307 | /* shift the argument right by the required places */ | |
308 | shr_Xsig(&argSqrd, 2 * (-1 - exponent)); | |
309 | } | |
310 | ||
311 | argTo4.msw = argSqrd.msw; | |
312 | argTo4.midw = argSqrd.midw; | |
313 | argTo4.lsw = argSqrd.lsw; | |
314 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
315 | ||
316 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, | |
317 | N_COEFF_N - 1); | |
318 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
319 | negate_Xsig(&accumulator); | |
320 | ||
321 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, | |
322 | N_COEFF_P - 1); | |
323 | ||
324 | shr_Xsig(&accumulator, 2); /* Divide by four */ | |
325 | accumulator.msw |= 0x80000000; /* Add 1.0 */ | |
326 | ||
327 | mul64_Xsig(&accumulator, &fixed_arg); | |
328 | mul64_Xsig(&accumulator, &fixed_arg); | |
329 | mul64_Xsig(&accumulator, &fixed_arg); | |
330 | ||
331 | /* Divide by four, FPU_REG compatible, etc */ | |
332 | exponent = 3 * exponent; | |
333 | ||
334 | /* The minimum exponent difference is 3 */ | |
335 | shr_Xsig(&accumulator, exp2 - exponent); | |
336 | ||
337 | negate_Xsig(&accumulator); | |
338 | XSIG_LL(accumulator) += fixed_arg; | |
339 | ||
340 | /* The basic computation is complete. Now fix the answer to | |
341 | compensate for the error due to the approximation used for | |
342 | pi/2 | |
343 | */ | |
344 | ||
345 | /* This has an exponent of -65 */ | |
346 | XSIG_LL(fix_up) = 0x898cc51701b839a2ll; | |
347 | fix_up.lsw = 0; | |
348 | ||
349 | /* The fix-up needs to be improved for larger args */ | |
350 | if (argSqrd.msw & 0xffc00000) { | |
351 | /* Get about 32 bit precision in these: */ | |
352 | fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; | |
353 | fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; | |
354 | } | |
355 | ||
356 | exp2 += norm_Xsig(&accumulator); | |
357 | shr_Xsig(&accumulator, 1); /* Prevent overflow */ | |
358 | exp2++; | |
359 | shr_Xsig(&fix_up, 65 + exp2); | |
360 | ||
361 | add_Xsig_Xsig(&accumulator, &fix_up); | |
362 | ||
363 | echange = round_Xsig(&accumulator); | |
364 | ||
365 | setexponentpos(&result, exp2 + echange); | |
366 | significand(&result) = XSIG_LL(accumulator); | |
1da177e4 LT |
367 | } |
368 | ||
3d0d14f9 | 369 | FPU_copy_to_reg0(&result, TAG_Valid); |
1da177e4 LT |
370 | |
371 | #ifdef PARANOID | |
3d0d14f9 IM |
372 | if ((exponent(&result) >= 0) |
373 | && (significand(&result) > 0x8000000000000000LL)) { | |
374 | EXCEPTION(EX_INTERNAL | 0x151); | |
375 | } | |
1da177e4 LT |
376 | #endif /* PARANOID */ |
377 | ||
378 | } |