Commit | Line | Data |
---|---|---|
1394f032 BW |
1 | /* |
2 | * File: arch/blackfin/lib/udivsi3.S | |
3 | * Based on: | |
4 | * Author: | |
5 | * | |
6 | * Created: | |
7 | * Description: | |
8 | * | |
9 | * Modified: | |
10 | * Copyright 2004-2006 Analog Devices Inc. | |
11 | * | |
12 | * Bugs: Enter bugs at http://blackfin.uclinux.org/ | |
13 | * | |
14 | * This program is free software; you can redistribute it and/or modify | |
15 | * it under the terms of the GNU General Public License as published by | |
16 | * the Free Software Foundation; either version 2 of the License, or | |
17 | * (at your option) any later version. | |
18 | * | |
19 | * This program is distributed in the hope that it will be useful, | |
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | * GNU General Public License for more details. | |
23 | * | |
24 | * You should have received a copy of the GNU General Public License | |
25 | * along with this program; if not, see the file COPYING, or write | |
26 | * to the Free Software Foundation, Inc., | |
27 | * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
28 | */ | |
29 | ||
30 | #include <linux/linkage.h> | |
31 | ||
32 | #define CARRY AC0 | |
33 | ||
34 | #ifdef CONFIG_ARITHMETIC_OPS_L1 | |
35 | .section .l1.text | |
36 | #else | |
37 | .text | |
38 | #endif | |
39 | ||
40 | ||
41 | ENTRY(___udivsi3) | |
42 | ||
43 | CC = R0 < R1 (IU); /* If X < Y, always return 0 */ | |
44 | IF CC JUMP .Lreturn_ident; | |
45 | ||
46 | R2 = R1 << 16; | |
47 | CC = R2 <= R0 (IU); | |
48 | IF CC JUMP .Lidents; | |
49 | ||
50 | R2 = R0 >> 31; /* if X is a 31-bit number */ | |
51 | R3 = R1 >> 15; /* and Y is a 15-bit number */ | |
52 | R2 = R2 | R3; /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/ | |
53 | CC = R2; | |
54 | IF CC JUMP .Ly_16bit; | |
55 | ||
56 | /* METHOD 1: FAST DIVQ | |
57 | We know we have a 31-bit dividend, and 15-bit divisor so we can use the | |
58 | simple divq approach (first setting AQ to 0 - implying unsigned division, | |
59 | then 16 DIVQ's). | |
60 | */ | |
61 | ||
62 | AQ = CC; /* Clear AQ (CC==0) */ | |
63 | ||
64 | /* ISR States: When dividing two integers (32.0/16.0) using divide primitives, | |
65 | we need to shift the dividend one bit to the left. | |
66 | We have already checked that we have a 31-bit number so we are safe to do | |
67 | that. | |
68 | */ | |
69 | R0 <<= 1; | |
70 | DIVQ(R0, R1); // 1 | |
71 | DIVQ(R0, R1); // 2 | |
72 | DIVQ(R0, R1); // 3 | |
73 | DIVQ(R0, R1); // 4 | |
74 | DIVQ(R0, R1); // 5 | |
75 | DIVQ(R0, R1); // 6 | |
76 | DIVQ(R0, R1); // 7 | |
77 | DIVQ(R0, R1); // 8 | |
78 | DIVQ(R0, R1); // 9 | |
79 | DIVQ(R0, R1); // 10 | |
80 | DIVQ(R0, R1); // 11 | |
81 | DIVQ(R0, R1); // 12 | |
82 | DIVQ(R0, R1); // 13 | |
83 | DIVQ(R0, R1); // 14 | |
84 | DIVQ(R0, R1); // 15 | |
85 | DIVQ(R0, R1); // 16 | |
86 | R0 = R0.L (Z); | |
87 | RTS; | |
88 | ||
89 | .Ly_16bit: | |
90 | /* We know that the upper 17 bits of Y might have bits set, | |
91 | ** or that the sign bit of X might have a bit. If Y is a | |
92 | ** 16-bit number, but not bigger, then we can use the builtins | |
93 | ** with a post-divide correction. | |
94 | ** R3 currently holds Y>>15, which means R3's LSB is the | |
95 | ** bit we're interested in. | |
96 | */ | |
97 | ||
98 | /* According to the ISR, to use the Divide primitives for | |
99 | ** unsigned integer divide, the useable range is 31 bits | |
100 | */ | |
101 | CC = ! BITTST(R0, 31); | |
102 | ||
103 | /* IF condition is true we can scale our inputs and use the divide primitives, | |
104 | ** with some post-adjustment | |
105 | */ | |
106 | R3 += -1; /* if so, Y is 0x00008nnn */ | |
107 | CC &= AZ; | |
108 | ||
109 | /* If condition is true we can scale our inputs and use the divide primitives, | |
110 | ** with some post-adjustment | |
111 | */ | |
112 | R3 = R1 >> 1; /* Pre-scaled divisor for primitive case */ | |
113 | R2 = R0 >> 16; | |
114 | ||
115 | R2 = R3 - R2; /* shifted divisor < upper 16 bits of dividend */ | |
116 | CC &= CARRY; | |
117 | IF CC JUMP .Lshift_and_correct; | |
118 | ||
119 | /* Fall through to the identities */ | |
120 | ||
121 | /* METHOD 2: identities and manual calculation | |
122 | We are not able to use the divide primites, but may still catch some special | |
123 | cases. | |
124 | */ | |
125 | .Lidents: | |
126 | /* Test for common identities. Value to be returned is placed in R2. */ | |
127 | CC = R0 == 0; /* 0/Y => 0 */ | |
128 | IF CC JUMP .Lreturn_r0; | |
129 | CC = R0 == R1; /* X==Y => 1 */ | |
130 | IF CC JUMP .Lreturn_ident; | |
131 | CC = R1 == 1; /* X/1 => X */ | |
132 | IF CC JUMP .Lreturn_ident; | |
133 | ||
134 | R2.L = ONES R1; | |
135 | R2 = R2.L (Z); | |
136 | CC = R2 == 1; | |
137 | IF CC JUMP .Lpower_of_two; | |
138 | ||
139 | [--SP] = (R7:5); /* Push registers R5-R7 */ | |
140 | ||
141 | /* Idents don't match. Go for the full operation. */ | |
142 | ||
143 | ||
144 | R6 = 2; /* assume we'll shift two */ | |
145 | R3 = 1; | |
146 | ||
147 | P2 = R1; | |
148 | /* If either R0 or R1 have sign set, */ | |
149 | /* divide them by two, and note it's */ | |
150 | /* been done. */ | |
151 | CC = R1 < 0; | |
152 | R2 = R1 >> 1; | |
153 | IF CC R1 = R2; /* Possibly-shifted R1 */ | |
154 | IF !CC R6 = R3; /* R1 doesn't, so at most 1 shifted */ | |
155 | ||
156 | P0 = 0; | |
157 | R3 = -R1; | |
158 | [--SP] = R3; | |
159 | R2 = R0 >> 1; | |
160 | R2 = R0 >> 1; | |
161 | CC = R0 < 0; | |
162 | IF CC P0 = R6; /* Number of values divided */ | |
163 | IF !CC R2 = R0; /* Shifted R0 */ | |
164 | ||
165 | /* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */ | |
166 | ||
167 | /* r2 holds Copy dividend */ | |
168 | R3 = 0; /* Clear partial remainder */ | |
169 | R7 = 0; /* Initialise quotient bit */ | |
170 | ||
171 | P1 = 32; /* Set loop counter */ | |
172 | LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */ | |
173 | .Lulst: R6 = R2 >> 31; /* R6 = sign bit of R2, for carry */ | |
174 | R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */ | |
175 | R3 = R3 << 1 || R5 = [SP]; | |
176 | R3 = R3 | R6; /* Include any carry */ | |
177 | CC = R7 < 0; /* Check quotient(AQ) */ | |
178 | /* If AQ==0, we'll sub divisor */ | |
179 | IF CC R5 = R1; /* and if AQ==1, we'll add it. */ | |
180 | R3 = R3 + R5; /* Add/sub divsor to partial remainder */ | |
181 | R7 = R3 ^ R1; /* Generate next quotient bit */ | |
182 | ||
183 | R5 = R7 >> 31; /* Get AQ */ | |
184 | BITTGL(R5, 0); /* Invert it, to get what we'll shift */ | |
185 | .Lulend: R2 = R2 + R5; /* and "shift" it in. */ | |
186 | ||
187 | CC = P0 == 0; /* Check how many inputs we shifted */ | |
188 | IF CC JUMP .Lno_mult; /* if none... */ | |
189 | R6 = R2 << 1; | |
190 | CC = P0 == 1; | |
191 | IF CC R2 = R6; /* if 1, Q = Q*2 */ | |
192 | IF !CC R1 = P2; /* if 2, restore stored divisor */ | |
193 | ||
194 | R3 = R2; /* Copy of R2 */ | |
195 | R3 *= R1; /* Q * divisor */ | |
196 | R5 = R0 - R3; /* Z = (dividend - Q * divisor) */ | |
197 | CC = R1 <= R5 (IU); /* Check if divisor <= Z? */ | |
198 | R6 = CC; /* if yes, R6 = 1 */ | |
199 | R2 = R2 + R6; /* if yes, add one to quotient(Q) */ | |
200 | .Lno_mult: | |
201 | SP += 4; | |
202 | (R7:5) = [SP++]; /* Pop registers R5-R7 */ | |
203 | R0 = R2; /* Store quotient */ | |
204 | RTS; | |
205 | ||
206 | .Lreturn_ident: | |
207 | CC = R0 < R1 (IU); /* If X < Y, always return 0 */ | |
208 | R2 = 0; | |
209 | IF CC JUMP .Ltrue_return_ident; | |
210 | R2 = -1 (X); /* X/0 => 0xFFFFFFFF */ | |
211 | CC = R1 == 0; | |
212 | IF CC JUMP .Ltrue_return_ident; | |
213 | R2 = -R2; /* R2 now 1 */ | |
214 | CC = R0 == R1; /* X==Y => 1 */ | |
215 | IF CC JUMP .Ltrue_return_ident; | |
216 | R2 = R0; /* X/1 => X */ | |
217 | /*FALLTHRU*/ | |
218 | ||
219 | .Ltrue_return_ident: | |
220 | R0 = R2; | |
221 | .Lreturn_r0: | |
222 | RTS; | |
223 | ||
224 | .Lpower_of_two: | |
225 | /* Y has a single bit set, which means it's a power of two. | |
226 | ** That means we can perform the division just by shifting | |
227 | ** X to the right the appropriate number of bits | |
228 | */ | |
229 | ||
230 | /* signbits returns the number of sign bits, minus one. | |
231 | ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need | |
232 | ** to shift right n-signbits spaces. It also means 0x80000000 | |
233 | ** is a special case, because that *also* gives a signbits of 0 | |
234 | */ | |
235 | ||
236 | R2 = R0 >> 31; | |
237 | CC = R1 < 0; | |
238 | IF CC JUMP .Ltrue_return_ident; | |
239 | ||
240 | R1.l = SIGNBITS R1; | |
241 | R1 = R1.L (Z); | |
242 | R1 += -30; | |
243 | R0 = LSHIFT R0 by R1.L; | |
244 | RTS; | |
245 | ||
246 | /* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION | |
247 | Two scaling operations are required to use the divide primitives with a | |
248 | divisor > 0x7FFFF. | |
249 | Firstly (as in method 1) we need to shift the dividend 1 to the left for | |
250 | integer division. | |
251 | Secondly we need to shift both the divisor and dividend 1 to the right so | |
252 | both are in range for the primitives. | |
253 | The left/right shift of the dividend does nothing so we can skip it. | |
254 | */ | |
255 | .Lshift_and_correct: | |
256 | R2 = R0; | |
257 | // R3 is already R1 >> 1 | |
258 | CC=!CC; | |
259 | AQ = CC; /* Clear AQ, got here with CC = 0 */ | |
260 | DIVQ(R2, R3); // 1 | |
261 | DIVQ(R2, R3); // 2 | |
262 | DIVQ(R2, R3); // 3 | |
263 | DIVQ(R2, R3); // 4 | |
264 | DIVQ(R2, R3); // 5 | |
265 | DIVQ(R2, R3); // 6 | |
266 | DIVQ(R2, R3); // 7 | |
267 | DIVQ(R2, R3); // 8 | |
268 | DIVQ(R2, R3); // 9 | |
269 | DIVQ(R2, R3); // 10 | |
270 | DIVQ(R2, R3); // 11 | |
271 | DIVQ(R2, R3); // 12 | |
272 | DIVQ(R2, R3); // 13 | |
273 | DIVQ(R2, R3); // 14 | |
274 | DIVQ(R2, R3); // 15 | |
275 | DIVQ(R2, R3); // 16 | |
276 | ||
277 | /* According to the Instruction Set Reference: | |
278 | To divide by a divisor > 0x7FFF, | |
279 | 1. prescale and perform divide to obtain quotient (Q) (done above), | |
280 | 2. multiply quotient by unscaled divisor (result M) | |
281 | 3. subtract the product from the divident to get an error (E = X - M) | |
282 | 4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q) | |
283 | */ | |
284 | R3 = R2.L (Z); /* Q = X' / Y' */ | |
285 | R2 = R3; /* Preserve Q */ | |
286 | R2 *= R1; /* M = Q * Y */ | |
287 | R2 = R0 - R2; /* E = X - M */ | |
288 | R0 = R3; /* Copy Q into result reg */ | |
289 | ||
290 | /* Correction: If result of the multiply is negative, we overflowed | |
291 | and need to correct the result by subtracting 1 from the result.*/ | |
292 | R3 = 0xFFFF (Z); | |
293 | R2 = R2 >> 16; /* E >> 16 */ | |
294 | CC = R2 == R3; | |
295 | R3 = 1 ; | |
296 | R1 = R0 - R3; | |
297 | IF CC R0 = R1; | |
298 | RTS; |