Commit | Line | Data |
---|---|---|
83d43305 MC |
1 | /* |
2 | * IEEE754 floating point arithmetic | |
3 | * double precision: MSUB.f (Fused Multiply Subtract) | |
4 | * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) | |
5 | * | |
6 | * MIPS floating point support | |
7 | * Copyright (C) 2015 Imagination Technologies, Ltd. | |
8 | * Author: Markos Chandras <markos.chandras@imgtec.com> | |
9 | * | |
10 | * This program is free software; you can distribute it and/or modify it | |
11 | * under the terms of the GNU General Public License as published by the | |
12 | * Free Software Foundation; version 2 of the License. | |
13 | */ | |
14 | ||
15 | #include "ieee754dp.h" | |
16 | ||
17 | union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, | |
18 | union ieee754dp y) | |
19 | { | |
20 | int re; | |
21 | int rs; | |
22 | u64 rm; | |
23 | unsigned lxm; | |
24 | unsigned hxm; | |
25 | unsigned lym; | |
26 | unsigned hym; | |
27 | u64 lrm; | |
28 | u64 hrm; | |
29 | u64 t; | |
30 | u64 at; | |
31 | int s; | |
32 | ||
33 | COMPXDP; | |
34 | COMPYDP; | |
35 | ||
36 | u64 zm; int ze; int zs __maybe_unused; int zc; | |
37 | ||
38 | EXPLODEXDP; | |
39 | EXPLODEYDP; | |
40 | EXPLODEDP(z, zc, zs, ze, zm) | |
41 | ||
42 | FLUSHXDP; | |
43 | FLUSHYDP; | |
44 | FLUSHDP(z, zc, zs, ze, zm); | |
45 | ||
46 | ieee754_clearcx(); | |
47 | ||
48 | switch (zc) { | |
49 | case IEEE754_CLASS_SNAN: | |
50 | ieee754_setcx(IEEE754_INVALID_OPERATION); | |
51 | return ieee754dp_nanxcpt(z); | |
52 | case IEEE754_CLASS_DNORM: | |
53 | DPDNORMx(zm, ze); | |
54 | /* QNAN is handled separately below */ | |
55 | } | |
56 | ||
57 | switch (CLPAIR(xc, yc)) { | |
58 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): | |
59 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): | |
60 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): | |
61 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): | |
62 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): | |
63 | return ieee754dp_nanxcpt(y); | |
64 | ||
65 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): | |
66 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): | |
67 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): | |
68 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): | |
69 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): | |
70 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): | |
71 | return ieee754dp_nanxcpt(x); | |
72 | ||
73 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): | |
74 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): | |
75 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): | |
76 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): | |
77 | return y; | |
78 | ||
79 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): | |
80 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): | |
81 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): | |
82 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): | |
83 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): | |
84 | return x; | |
85 | ||
86 | ||
87 | /* | |
88 | * Infinity handling | |
89 | */ | |
90 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): | |
91 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): | |
92 | if (zc == IEEE754_CLASS_QNAN) | |
93 | return z; | |
94 | ieee754_setcx(IEEE754_INVALID_OPERATION); | |
95 | return ieee754dp_indef(); | |
96 | ||
97 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): | |
98 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): | |
99 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): | |
100 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): | |
101 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): | |
102 | if (zc == IEEE754_CLASS_QNAN) | |
103 | return z; | |
104 | return ieee754dp_inf(xs ^ ys); | |
105 | ||
106 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): | |
107 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): | |
108 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): | |
109 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): | |
110 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): | |
111 | if (zc == IEEE754_CLASS_INF) | |
112 | return ieee754dp_inf(zs); | |
113 | /* Multiplication is 0 so just return z */ | |
114 | return z; | |
115 | ||
116 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): | |
117 | DPDNORMX; | |
118 | ||
119 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): | |
120 | if (zc == IEEE754_CLASS_QNAN) | |
121 | return z; | |
122 | else if (zc == IEEE754_CLASS_INF) | |
123 | return ieee754dp_inf(zs); | |
124 | DPDNORMY; | |
125 | break; | |
126 | ||
127 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): | |
128 | if (zc == IEEE754_CLASS_QNAN) | |
129 | return z; | |
130 | else if (zc == IEEE754_CLASS_INF) | |
131 | return ieee754dp_inf(zs); | |
132 | DPDNORMX; | |
133 | break; | |
134 | ||
135 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): | |
136 | if (zc == IEEE754_CLASS_QNAN) | |
137 | return z; | |
138 | else if (zc == IEEE754_CLASS_INF) | |
139 | return ieee754dp_inf(zs); | |
140 | /* fall through to real computations */ | |
141 | } | |
142 | ||
143 | /* Finally get to do some computation */ | |
144 | ||
145 | /* | |
146 | * Do the multiplication bit first | |
147 | * | |
148 | * rm = xm * ym, re = xe + ye basically | |
149 | * | |
150 | * At this point xm and ym should have been normalized. | |
151 | */ | |
152 | assert(xm & DP_HIDDEN_BIT); | |
153 | assert(ym & DP_HIDDEN_BIT); | |
154 | ||
155 | re = xe + ye; | |
156 | rs = xs ^ ys; | |
157 | ||
158 | /* shunt to top of word */ | |
159 | xm <<= 64 - (DP_FBITS + 1); | |
160 | ym <<= 64 - (DP_FBITS + 1); | |
161 | ||
162 | /* | |
163 | * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. | |
164 | */ | |
165 | ||
166 | /* 32 * 32 => 64 */ | |
167 | #define DPXMULT(x, y) ((u64)(x) * (u64)y) | |
168 | ||
169 | lxm = xm; | |
170 | hxm = xm >> 32; | |
171 | lym = ym; | |
172 | hym = ym >> 32; | |
173 | ||
174 | lrm = DPXMULT(lxm, lym); | |
175 | hrm = DPXMULT(hxm, hym); | |
176 | ||
177 | t = DPXMULT(lxm, hym); | |
178 | ||
179 | at = lrm + (t << 32); | |
180 | hrm += at < lrm; | |
181 | lrm = at; | |
182 | ||
183 | hrm = hrm + (t >> 32); | |
184 | ||
185 | t = DPXMULT(hxm, lym); | |
186 | ||
187 | at = lrm + (t << 32); | |
188 | hrm += at < lrm; | |
189 | lrm = at; | |
190 | ||
191 | hrm = hrm + (t >> 32); | |
192 | ||
193 | rm = hrm | (lrm != 0); | |
194 | ||
195 | /* | |
196 | * Sticky shift down to normal rounding precision. | |
197 | */ | |
198 | if ((s64) rm < 0) { | |
199 | rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | | |
200 | ((rm << (DP_FBITS + 1 + 3)) != 0); | |
201 | re++; | |
202 | } else { | |
203 | rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | | |
204 | ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); | |
205 | } | |
206 | assert(rm & (DP_HIDDEN_BIT << 3)); | |
207 | ||
208 | /* And now the subtraction */ | |
209 | ||
210 | /* flip sign of r and handle as add */ | |
211 | rs ^= 1; | |
212 | ||
213 | assert(zm & DP_HIDDEN_BIT); | |
214 | ||
215 | /* | |
216 | * Provide guard,round and stick bit space. | |
217 | */ | |
218 | zm <<= 3; | |
219 | ||
220 | if (ze > re) { | |
221 | /* | |
222 | * Have to shift y fraction right to align. | |
223 | */ | |
224 | s = ze - re; | |
225 | rm = XDPSRS(rm, s); | |
226 | re += s; | |
227 | } else if (re > ze) { | |
228 | /* | |
229 | * Have to shift x fraction right to align. | |
230 | */ | |
231 | s = re - ze; | |
232 | zm = XDPSRS(zm, s); | |
233 | ze += s; | |
234 | } | |
235 | assert(ze == re); | |
236 | assert(ze <= DP_EMAX); | |
237 | ||
238 | if (zs == rs) { | |
239 | /* | |
240 | * Generate 28 bit result of adding two 27 bit numbers | |
241 | * leaving result in xm, xs and xe. | |
242 | */ | |
243 | zm = zm + rm; | |
244 | ||
245 | if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ | |
246 | zm = XDPSRS1(zm); | |
247 | ze++; | |
248 | } | |
249 | } else { | |
250 | if (zm >= rm) { | |
251 | zm = zm - rm; | |
252 | } else { | |
253 | zm = rm - zm; | |
254 | zs = rs; | |
255 | } | |
256 | if (zm == 0) | |
257 | return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); | |
258 | ||
259 | /* | |
260 | * Normalize to rounding precision. | |
261 | */ | |
262 | while ((zm >> (DP_FBITS + 3)) == 0) { | |
263 | zm <<= 1; | |
264 | ze--; | |
265 | } | |
266 | } | |
267 | ||
268 | return ieee754dp_format(zs, ze, zm); | |
269 | } |