Commit | Line | Data |
---|---|---|
83d43305 MC |
1 | /* |
2 | * IEEE754 floating point arithmetic | |
3 | * single precision: MSUB.f (Fused Multiply Subtract) | |
4 | * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) | |
5 | * | |
6 | * MIPS floating point support | |
7 | * Copyright (C) 2015 Imagination Technologies, Ltd. | |
8 | * Author: Markos Chandras <markos.chandras@imgtec.com> | |
9 | * | |
10 | * This program is free software; you can distribute it and/or modify it | |
11 | * under the terms of the GNU General Public License as published by the | |
12 | * Free Software Foundation; version 2 of the License. | |
13 | */ | |
14 | ||
15 | #include "ieee754sp.h" | |
16 | ||
17 | union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, | |
18 | union ieee754sp y) | |
19 | { | |
20 | int re; | |
21 | int rs; | |
22 | unsigned rm; | |
23 | unsigned short lxm; | |
24 | unsigned short hxm; | |
25 | unsigned short lym; | |
26 | unsigned short hym; | |
27 | unsigned lrm; | |
28 | unsigned hrm; | |
29 | unsigned t; | |
30 | unsigned at; | |
31 | int s; | |
32 | ||
33 | COMPXSP; | |
34 | COMPYSP; | |
35 | u32 zm; int ze; int zs __maybe_unused; int zc; | |
36 | ||
37 | EXPLODEXSP; | |
38 | EXPLODEYSP; | |
39 | EXPLODESP(z, zc, zs, ze, zm) | |
40 | ||
41 | FLUSHXSP; | |
42 | FLUSHYSP; | |
43 | FLUSHSP(z, zc, zs, ze, zm); | |
44 | ||
45 | ieee754_clearcx(); | |
46 | ||
47 | switch (zc) { | |
48 | case IEEE754_CLASS_SNAN: | |
49 | ieee754_setcx(IEEE754_INVALID_OPERATION); | |
50 | return ieee754sp_nanxcpt(z); | |
51 | case IEEE754_CLASS_DNORM: | |
52 | SPDNORMx(zm, ze); | |
53 | /* QNAN is handled separately below */ | |
54 | } | |
55 | ||
56 | switch (CLPAIR(xc, yc)) { | |
57 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): | |
58 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): | |
59 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): | |
60 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): | |
61 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): | |
62 | return ieee754sp_nanxcpt(y); | |
63 | ||
64 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): | |
65 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): | |
66 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): | |
67 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): | |
68 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): | |
69 | case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): | |
70 | return ieee754sp_nanxcpt(x); | |
71 | ||
72 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): | |
73 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): | |
74 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): | |
75 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): | |
76 | return y; | |
77 | ||
78 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): | |
79 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): | |
80 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): | |
81 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): | |
82 | case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): | |
83 | return x; | |
84 | ||
85 | /* | |
86 | * Infinity handling | |
87 | */ | |
88 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): | |
89 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): | |
90 | if (zc == IEEE754_CLASS_QNAN) | |
91 | return z; | |
92 | ieee754_setcx(IEEE754_INVALID_OPERATION); | |
93 | return ieee754sp_indef(); | |
94 | ||
95 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): | |
96 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): | |
97 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): | |
98 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): | |
99 | case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): | |
100 | if (zc == IEEE754_CLASS_QNAN) | |
101 | return z; | |
102 | return ieee754sp_inf(xs ^ ys); | |
103 | ||
104 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): | |
105 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): | |
106 | case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): | |
107 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): | |
108 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): | |
109 | if (zc == IEEE754_CLASS_INF) | |
110 | return ieee754sp_inf(zs); | |
111 | /* Multiplication is 0 so just return z */ | |
112 | return z; | |
113 | ||
114 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): | |
115 | SPDNORMX; | |
116 | ||
117 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): | |
118 | if (zc == IEEE754_CLASS_QNAN) | |
119 | return z; | |
120 | else if (zc == IEEE754_CLASS_INF) | |
121 | return ieee754sp_inf(zs); | |
122 | SPDNORMY; | |
123 | break; | |
124 | ||
125 | case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): | |
126 | if (zc == IEEE754_CLASS_QNAN) | |
127 | return z; | |
128 | else if (zc == IEEE754_CLASS_INF) | |
129 | return ieee754sp_inf(zs); | |
130 | SPDNORMX; | |
131 | break; | |
132 | ||
133 | case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): | |
134 | if (zc == IEEE754_CLASS_QNAN) | |
135 | return z; | |
136 | else if (zc == IEEE754_CLASS_INF) | |
137 | return ieee754sp_inf(zs); | |
138 | /* fall through to real compuation */ | |
139 | } | |
140 | ||
141 | /* Finally get to do some computation */ | |
142 | ||
143 | /* | |
144 | * Do the multiplication bit first | |
145 | * | |
146 | * rm = xm * ym, re = xe + ye basically | |
147 | * | |
148 | * At this point xm and ym should have been normalized. | |
149 | */ | |
150 | ||
151 | /* rm = xm * ym, re = xe+ye basically */ | |
152 | assert(xm & SP_HIDDEN_BIT); | |
153 | assert(ym & SP_HIDDEN_BIT); | |
154 | ||
155 | re = xe + ye; | |
156 | rs = xs ^ ys; | |
157 | ||
158 | /* shunt to top of word */ | |
159 | xm <<= 32 - (SP_FBITS + 1); | |
160 | ym <<= 32 - (SP_FBITS + 1); | |
161 | ||
162 | /* | |
163 | * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. | |
164 | */ | |
165 | lxm = xm & 0xffff; | |
166 | hxm = xm >> 16; | |
167 | lym = ym & 0xffff; | |
168 | hym = ym >> 16; | |
169 | ||
170 | lrm = lxm * lym; /* 16 * 16 => 32 */ | |
171 | hrm = hxm * hym; /* 16 * 16 => 32 */ | |
172 | ||
173 | t = lxm * hym; /* 16 * 16 => 32 */ | |
174 | at = lrm + (t << 16); | |
175 | hrm += at < lrm; | |
176 | lrm = at; | |
177 | hrm = hrm + (t >> 16); | |
178 | ||
179 | t = hxm * lym; /* 16 * 16 => 32 */ | |
180 | at = lrm + (t << 16); | |
181 | hrm += at < lrm; | |
182 | lrm = at; | |
183 | hrm = hrm + (t >> 16); | |
184 | ||
185 | rm = hrm | (lrm != 0); | |
186 | ||
187 | /* | |
188 | * Sticky shift down to normal rounding precision. | |
189 | */ | |
190 | if ((int) rm < 0) { | |
191 | rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | | |
192 | ((rm << (SP_FBITS + 1 + 3)) != 0); | |
193 | re++; | |
194 | } else { | |
195 | rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | | |
196 | ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); | |
197 | } | |
198 | assert(rm & (SP_HIDDEN_BIT << 3)); | |
199 | ||
200 | /* And now the subtraction */ | |
201 | ||
202 | /* Flip sign of r and handle as add */ | |
203 | rs ^= 1; | |
204 | ||
205 | assert(zm & SP_HIDDEN_BIT); | |
206 | ||
207 | /* | |
208 | * Provide guard,round and stick bit space. | |
209 | */ | |
210 | zm <<= 3; | |
211 | ||
212 | if (ze > re) { | |
213 | /* | |
214 | * Have to shift y fraction right to align. | |
215 | */ | |
216 | s = ze - re; | |
217 | SPXSRSYn(s); | |
218 | } else if (re > ze) { | |
219 | /* | |
220 | * Have to shift x fraction right to align. | |
221 | */ | |
222 | s = re - ze; | |
223 | SPXSRSYn(s); | |
224 | } | |
225 | assert(ze == re); | |
226 | assert(ze <= SP_EMAX); | |
227 | ||
228 | if (zs == rs) { | |
229 | /* | |
230 | * Generate 28 bit result of adding two 27 bit numbers | |
231 | * leaving result in zm, zs and ze. | |
232 | */ | |
233 | zm = zm + rm; | |
234 | ||
235 | if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ | |
236 | SPXSRSX1(); /* shift preserving sticky */ | |
237 | } | |
238 | } else { | |
239 | if (zm >= rm) { | |
240 | zm = zm - rm; | |
241 | } else { | |
242 | zm = rm - zm; | |
243 | zs = rs; | |
244 | } | |
245 | if (zm == 0) | |
246 | return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); | |
247 | ||
248 | /* | |
249 | * Normalize in extended single precision | |
250 | */ | |
251 | while ((zm >> (SP_MBITS + 3)) == 0) { | |
252 | zm <<= 1; | |
253 | ze--; | |
254 | } | |
255 | ||
256 | } | |
257 | return ieee754sp_format(zs, ze, zm); | |
258 | } |