Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | .file "div_Xsig.S" |
2 | /*---------------------------------------------------------------------------+ | |
3 | | div_Xsig.S | | |
4 | | | | |
5 | | Division subroutine for 96 bit quantities | | |
6 | | | | |
7 | | Copyright (C) 1994,1995 | | |
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | |
9 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | | |
10 | | | | |
11 | | | | |
12 | +---------------------------------------------------------------------------*/ | |
13 | ||
14 | /*---------------------------------------------------------------------------+ | |
15 | | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | | |
16 | | put the 96 bit result at the location d. | | |
17 | | | | |
18 | | The result may not be accurate to 96 bits. It is intended for use where | | |
19 | | a result better than 64 bits is required. The result should usually be | | |
20 | | good to at least 94 bits. | | |
21 | | The returned result is actually divided by one half. This is done to | | |
22 | | prevent overflow. | | |
23 | | | | |
24 | | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | | |
25 | | | | |
26 | | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | | |
27 | | | | |
28 | +---------------------------------------------------------------------------*/ | |
29 | ||
30 | #include "exception.h" | |
31 | #include "fpu_emu.h" | |
32 | ||
33 | ||
34 | #define XsigLL(x) (x) | |
35 | #define XsigL(x) 4(x) | |
36 | #define XsigH(x) 8(x) | |
37 | ||
38 | ||
39 | #ifndef NON_REENTRANT_FPU | |
40 | /* | |
41 | Local storage on the stack: | |
42 | Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 | |
43 | */ | |
44 | #define FPU_accum_3 -4(%ebp) | |
45 | #define FPU_accum_2 -8(%ebp) | |
46 | #define FPU_accum_1 -12(%ebp) | |
47 | #define FPU_accum_0 -16(%ebp) | |
48 | #define FPU_result_3 -20(%ebp) | |
49 | #define FPU_result_2 -24(%ebp) | |
50 | #define FPU_result_1 -28(%ebp) | |
51 | ||
52 | #else | |
53 | .data | |
54 | /* | |
55 | Local storage in a static area: | |
56 | Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 | |
57 | */ | |
58 | .align 4,0 | |
59 | FPU_accum_3: | |
60 | .long 0 | |
61 | FPU_accum_2: | |
62 | .long 0 | |
63 | FPU_accum_1: | |
64 | .long 0 | |
65 | FPU_accum_0: | |
66 | .long 0 | |
67 | FPU_result_3: | |
68 | .long 0 | |
69 | FPU_result_2: | |
70 | .long 0 | |
71 | FPU_result_1: | |
72 | .long 0 | |
73 | #endif /* NON_REENTRANT_FPU */ | |
74 | ||
75 | ||
76 | .text | |
77 | ENTRY(div_Xsig) | |
78 | pushl %ebp | |
79 | movl %esp,%ebp | |
80 | #ifndef NON_REENTRANT_FPU | |
81 | subl $28,%esp | |
82 | #endif /* NON_REENTRANT_FPU */ | |
83 | ||
84 | pushl %esi | |
85 | pushl %edi | |
86 | pushl %ebx | |
87 | ||
88 | movl PARAM1,%esi /* pointer to num */ | |
89 | movl PARAM2,%ebx /* pointer to denom */ | |
90 | ||
91 | #ifdef PARANOID | |
92 | testl $0x80000000, XsigH(%ebx) /* Divisor */ | |
93 | je L_bugged | |
94 | #endif /* PARANOID */ | |
95 | ||
96 | ||
97 | /*---------------------------------------------------------------------------+ | |
98 | | Divide: Return arg1/arg2 to arg3. | | |
99 | | | | |
100 | | The maximum returned value is (ignoring exponents) | | |
101 | | .ffffffff ffffffff | | |
102 | | ------------------ = 1.ffffffff fffffffe | | |
103 | | .80000000 00000000 | | |
104 | | and the minimum is | | |
105 | | .80000000 00000000 | | |
106 | | ------------------ = .80000000 00000001 (rounded) | | |
107 | | .ffffffff ffffffff | | |
108 | | | | |
109 | +---------------------------------------------------------------------------*/ | |
110 | ||
111 | /* Save extended dividend in local register */ | |
112 | ||
113 | /* Divide by 2 to prevent overflow */ | |
114 | clc | |
115 | movl XsigH(%esi),%eax | |
116 | rcrl %eax | |
117 | movl %eax,FPU_accum_3 | |
118 | movl XsigL(%esi),%eax | |
119 | rcrl %eax | |
120 | movl %eax,FPU_accum_2 | |
121 | movl XsigLL(%esi),%eax | |
122 | rcrl %eax | |
123 | movl %eax,FPU_accum_1 | |
124 | movl $0,%eax | |
125 | rcrl %eax | |
126 | movl %eax,FPU_accum_0 | |
127 | ||
128 | movl FPU_accum_2,%eax /* Get the current num */ | |
129 | movl FPU_accum_3,%edx | |
130 | ||
131 | /*----------------------------------------------------------------------*/ | |
132 | /* Initialization done. | |
133 | Do the first 32 bits. */ | |
134 | ||
135 | /* We will divide by a number which is too large */ | |
136 | movl XsigH(%ebx),%ecx | |
137 | addl $1,%ecx | |
138 | jnc LFirst_div_not_1 | |
139 | ||
140 | /* here we need to divide by 100000000h, | |
141 | i.e., no division at all.. */ | |
142 | mov %edx,%eax | |
143 | jmp LFirst_div_done | |
144 | ||
145 | LFirst_div_not_1: | |
146 | divl %ecx /* Divide the numerator by the augmented | |
147 | denom ms dw */ | |
148 | ||
149 | LFirst_div_done: | |
150 | movl %eax,FPU_result_3 /* Put the result in the answer */ | |
151 | ||
152 | mull XsigH(%ebx) /* mul by the ms dw of the denom */ | |
153 | ||
154 | subl %eax,FPU_accum_2 /* Subtract from the num local reg */ | |
155 | sbbl %edx,FPU_accum_3 | |
156 | ||
157 | movl FPU_result_3,%eax /* Get the result back */ | |
158 | mull XsigL(%ebx) /* now mul the ls dw of the denom */ | |
159 | ||
160 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ | |
161 | sbbl %edx,FPU_accum_2 | |
162 | sbbl $0,FPU_accum_3 | |
163 | je LDo_2nd_32_bits /* Must check for non-zero result here */ | |
164 | ||
165 | #ifdef PARANOID | |
166 | jb L_bugged_1 | |
167 | #endif /* PARANOID */ | |
168 | ||
169 | /* need to subtract another once of the denom */ | |
170 | incl FPU_result_3 /* Correct the answer */ | |
171 | ||
172 | movl XsigL(%ebx),%eax | |
173 | movl XsigH(%ebx),%edx | |
174 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ | |
175 | sbbl %edx,FPU_accum_2 | |
176 | ||
177 | #ifdef PARANOID | |
178 | sbbl $0,FPU_accum_3 | |
179 | jne L_bugged_1 /* Must check for non-zero result here */ | |
180 | #endif /* PARANOID */ | |
181 | ||
182 | /*----------------------------------------------------------------------*/ | |
183 | /* Half of the main problem is done, there is just a reduced numerator | |
184 | to handle now. | |
185 | Work with the second 32 bits, FPU_accum_0 not used from now on */ | |
186 | LDo_2nd_32_bits: | |
187 | movl FPU_accum_2,%edx /* get the reduced num */ | |
188 | movl FPU_accum_1,%eax | |
189 | ||
190 | /* need to check for possible subsequent overflow */ | |
191 | cmpl XsigH(%ebx),%edx | |
192 | jb LDo_2nd_div | |
193 | ja LPrevent_2nd_overflow | |
194 | ||
195 | cmpl XsigL(%ebx),%eax | |
196 | jb LDo_2nd_div | |
197 | ||
198 | LPrevent_2nd_overflow: | |
199 | /* The numerator is greater or equal, would cause overflow */ | |
200 | /* prevent overflow */ | |
201 | subl XsigL(%ebx),%eax | |
202 | sbbl XsigH(%ebx),%edx | |
203 | movl %edx,FPU_accum_2 | |
204 | movl %eax,FPU_accum_1 | |
205 | ||
206 | incl FPU_result_3 /* Reflect the subtraction in the answer */ | |
207 | ||
208 | #ifdef PARANOID | |
209 | je L_bugged_2 /* Can't bump the result to 1.0 */ | |
210 | #endif /* PARANOID */ | |
211 | ||
212 | LDo_2nd_div: | |
213 | cmpl $0,%ecx /* augmented denom msw */ | |
214 | jnz LSecond_div_not_1 | |
215 | ||
216 | /* %ecx == 0, we are dividing by 1.0 */ | |
217 | mov %edx,%eax | |
218 | jmp LSecond_div_done | |
219 | ||
220 | LSecond_div_not_1: | |
221 | divl %ecx /* Divide the numerator by the denom ms dw */ | |
222 | ||
223 | LSecond_div_done: | |
224 | movl %eax,FPU_result_2 /* Put the result in the answer */ | |
225 | ||
226 | mull XsigH(%ebx) /* mul by the ms dw of the denom */ | |
227 | ||
228 | subl %eax,FPU_accum_1 /* Subtract from the num local reg */ | |
229 | sbbl %edx,FPU_accum_2 | |
230 | ||
231 | #ifdef PARANOID | |
232 | jc L_bugged_2 | |
233 | #endif /* PARANOID */ | |
234 | ||
235 | movl FPU_result_2,%eax /* Get the result back */ | |
236 | mull XsigL(%ebx) /* now mul the ls dw of the denom */ | |
237 | ||
238 | subl %eax,FPU_accum_0 /* Subtract from the num local reg */ | |
239 | sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ | |
240 | sbbl $0,FPU_accum_2 | |
241 | ||
242 | #ifdef PARANOID | |
243 | jc L_bugged_2 | |
244 | #endif /* PARANOID */ | |
245 | ||
246 | jz LDo_3rd_32_bits | |
247 | ||
248 | #ifdef PARANOID | |
249 | cmpl $1,FPU_accum_2 | |
250 | jne L_bugged_2 | |
251 | #endif /* PARANOID */ | |
252 | ||
253 | /* need to subtract another once of the denom */ | |
254 | movl XsigL(%ebx),%eax | |
255 | movl XsigH(%ebx),%edx | |
256 | subl %eax,FPU_accum_0 /* Subtract from the num local reg */ | |
257 | sbbl %edx,FPU_accum_1 | |
258 | sbbl $0,FPU_accum_2 | |
259 | ||
260 | #ifdef PARANOID | |
261 | jc L_bugged_2 | |
262 | jne L_bugged_2 | |
263 | #endif /* PARANOID */ | |
264 | ||
265 | addl $1,FPU_result_2 /* Correct the answer */ | |
266 | adcl $0,FPU_result_3 | |
267 | ||
268 | #ifdef PARANOID | |
269 | jc L_bugged_2 /* Must check for non-zero result here */ | |
270 | #endif /* PARANOID */ | |
271 | ||
272 | /*----------------------------------------------------------------------*/ | |
273 | /* The division is essentially finished here, we just need to perform | |
274 | tidying operations. | |
275 | Deal with the 3rd 32 bits */ | |
276 | LDo_3rd_32_bits: | |
277 | /* We use an approximation for the third 32 bits. | |
278 | To take account of the 3rd 32 bits of the divisor | |
279 | (call them del), we subtract del * (a/b) */ | |
280 | ||
281 | movl FPU_result_3,%eax /* a/b */ | |
282 | mull XsigLL(%ebx) /* del */ | |
283 | ||
284 | subl %edx,FPU_accum_1 | |
285 | ||
286 | /* A borrow indicates that the result is negative */ | |
287 | jnb LTest_over | |
288 | ||
289 | movl XsigH(%ebx),%edx | |
290 | addl %edx,FPU_accum_1 | |
291 | ||
292 | subl $1,FPU_result_2 /* Adjust the answer */ | |
293 | sbbl $0,FPU_result_3 | |
294 | ||
295 | /* The above addition might not have been enough, check again. */ | |
296 | movl FPU_accum_1,%edx /* get the reduced num */ | |
297 | cmpl XsigH(%ebx),%edx /* denom */ | |
298 | jb LDo_3rd_div | |
299 | ||
300 | movl XsigH(%ebx),%edx | |
301 | addl %edx,FPU_accum_1 | |
302 | ||
303 | subl $1,FPU_result_2 /* Adjust the answer */ | |
304 | sbbl $0,FPU_result_3 | |
305 | jmp LDo_3rd_div | |
306 | ||
307 | LTest_over: | |
308 | movl FPU_accum_1,%edx /* get the reduced num */ | |
309 | ||
310 | /* need to check for possible subsequent overflow */ | |
311 | cmpl XsigH(%ebx),%edx /* denom */ | |
312 | jb LDo_3rd_div | |
313 | ||
314 | /* prevent overflow */ | |
315 | subl XsigH(%ebx),%edx | |
316 | movl %edx,FPU_accum_1 | |
317 | ||
318 | addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ | |
319 | adcl $0,FPU_result_3 | |
320 | ||
321 | LDo_3rd_div: | |
322 | movl FPU_accum_0,%eax | |
323 | movl FPU_accum_1,%edx | |
324 | divl XsigH(%ebx) | |
325 | ||
326 | movl %eax,FPU_result_1 /* Rough estimate of third word */ | |
327 | ||
328 | movl PARAM3,%esi /* pointer to answer */ | |
329 | ||
330 | movl FPU_result_1,%eax | |
331 | movl %eax,XsigLL(%esi) | |
332 | movl FPU_result_2,%eax | |
333 | movl %eax,XsigL(%esi) | |
334 | movl FPU_result_3,%eax | |
335 | movl %eax,XsigH(%esi) | |
336 | ||
337 | L_exit: | |
338 | popl %ebx | |
339 | popl %edi | |
340 | popl %esi | |
341 | ||
342 | leave | |
343 | ret | |
344 | ||
345 | ||
346 | #ifdef PARANOID | |
347 | /* The logic is wrong if we got here */ | |
348 | L_bugged: | |
349 | pushl EX_INTERNAL|0x240 | |
350 | call EXCEPTION | |
351 | pop %ebx | |
352 | jmp L_exit | |
353 | ||
354 | L_bugged_1: | |
355 | pushl EX_INTERNAL|0x241 | |
356 | call EXCEPTION | |
357 | pop %ebx | |
358 | jmp L_exit | |
359 | ||
360 | L_bugged_2: | |
361 | pushl EX_INTERNAL|0x242 | |
362 | call EXCEPTION | |
363 | pop %ebx | |
364 | jmp L_exit | |
365 | #endif /* PARANOID */ |