sh: implement DMA_SLAVE capability in SH dmaengine driver
[deliverable/linux.git] / arch / sh / kernel / cpu / sh2a / fpu.c
1 /*
2 * Save/restore floating point context for signal handlers.
3 *
4 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
9 *
10 * FIXME! These routines can be optimized in big endian case.
11 */
12 #include <linux/sched.h>
13 #include <linux/signal.h>
14 #include <asm/processor.h>
15 #include <asm/io.h>
16 #include <asm/fpu.h>
17
18 /* The PR (precision) bit in the FP Status Register must be clear when
19 * an frchg instruction is executed, otherwise the instruction is undefined.
20 * Executing frchg with PR set causes a trap on some SH4 implementations.
21 */
22
23 #define FPSCR_RCHG 0x00000000
24
25
26 /*
27 * Save FPU registers onto task structure.
28 */
29 void
30 save_fpu(struct task_struct *tsk)
31 {
32 unsigned long dummy;
33
34 enable_fpu();
35 asm volatile("sts.l fpul, @-%0\n\t"
36 "sts.l fpscr, @-%0\n\t"
37 "fmov.s fr15, @-%0\n\t"
38 "fmov.s fr14, @-%0\n\t"
39 "fmov.s fr13, @-%0\n\t"
40 "fmov.s fr12, @-%0\n\t"
41 "fmov.s fr11, @-%0\n\t"
42 "fmov.s fr10, @-%0\n\t"
43 "fmov.s fr9, @-%0\n\t"
44 "fmov.s fr8, @-%0\n\t"
45 "fmov.s fr7, @-%0\n\t"
46 "fmov.s fr6, @-%0\n\t"
47 "fmov.s fr5, @-%0\n\t"
48 "fmov.s fr4, @-%0\n\t"
49 "fmov.s fr3, @-%0\n\t"
50 "fmov.s fr2, @-%0\n\t"
51 "fmov.s fr1, @-%0\n\t"
52 "fmov.s fr0, @-%0\n\t"
53 "lds %3, fpscr\n\t"
54 : "=r" (dummy)
55 : "0" ((char *)(&tsk->thread.fpu.hard.status)),
56 "r" (FPSCR_RCHG),
57 "r" (FPSCR_INIT)
58 : "memory");
59
60 disable_fpu();
61 }
62
63 static void
64 restore_fpu(struct task_struct *tsk)
65 {
66 unsigned long dummy;
67
68 enable_fpu();
69 asm volatile("fmov.s @%0+, fr0\n\t"
70 "fmov.s @%0+, fr1\n\t"
71 "fmov.s @%0+, fr2\n\t"
72 "fmov.s @%0+, fr3\n\t"
73 "fmov.s @%0+, fr4\n\t"
74 "fmov.s @%0+, fr5\n\t"
75 "fmov.s @%0+, fr6\n\t"
76 "fmov.s @%0+, fr7\n\t"
77 "fmov.s @%0+, fr8\n\t"
78 "fmov.s @%0+, fr9\n\t"
79 "fmov.s @%0+, fr10\n\t"
80 "fmov.s @%0+, fr11\n\t"
81 "fmov.s @%0+, fr12\n\t"
82 "fmov.s @%0+, fr13\n\t"
83 "fmov.s @%0+, fr14\n\t"
84 "fmov.s @%0+, fr15\n\t"
85 "lds.l @%0+, fpscr\n\t"
86 "lds.l @%0+, fpul\n\t"
87 : "=r" (dummy)
88 : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
89 : "memory");
90 disable_fpu();
91 }
92
93 /*
94 * Load the FPU with signalling NANS. This bit pattern we're using
95 * has the property that no matter wether considered as single or as
96 * double precission represents signaling NANS.
97 */
98
99 static void
100 fpu_init(void)
101 {
102 enable_fpu();
103 asm volatile("lds %0, fpul\n\t"
104 "fsts fpul, fr0\n\t"
105 "fsts fpul, fr1\n\t"
106 "fsts fpul, fr2\n\t"
107 "fsts fpul, fr3\n\t"
108 "fsts fpul, fr4\n\t"
109 "fsts fpul, fr5\n\t"
110 "fsts fpul, fr6\n\t"
111 "fsts fpul, fr7\n\t"
112 "fsts fpul, fr8\n\t"
113 "fsts fpul, fr9\n\t"
114 "fsts fpul, fr10\n\t"
115 "fsts fpul, fr11\n\t"
116 "fsts fpul, fr12\n\t"
117 "fsts fpul, fr13\n\t"
118 "fsts fpul, fr14\n\t"
119 "fsts fpul, fr15\n\t"
120 "lds %2, fpscr\n\t"
121 : /* no output */
122 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
123 disable_fpu();
124 }
125
126 /*
127 * Emulate arithmetic ops on denormalized number for some FPU insns.
128 */
129
130 /* denormalized float * float */
131 static int denormal_mulf(int hx, int hy)
132 {
133 unsigned int ix, iy;
134 unsigned long long m, n;
135 int exp, w;
136
137 ix = hx & 0x7fffffff;
138 iy = hy & 0x7fffffff;
139 if (iy < 0x00800000 || ix == 0)
140 return ((hx ^ hy) & 0x80000000);
141
142 exp = (iy & 0x7f800000) >> 23;
143 ix &= 0x007fffff;
144 iy = (iy & 0x007fffff) | 0x00800000;
145 m = (unsigned long long)ix * iy;
146 n = m;
147 w = -1;
148 while (n) { n >>= 1; w++; }
149
150 /* FIXME: use guard bits */
151 exp += w - 126 - 46;
152 if (exp > 0)
153 ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
154 else if (exp + 22 >= 0)
155 ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
156 else
157 ix = 0;
158
159 ix |= (hx ^ hy) & 0x80000000;
160 return ix;
161 }
162
163 /* denormalized double * double */
164 static void mult64(unsigned long long x, unsigned long long y,
165 unsigned long long *highp, unsigned long long *lowp)
166 {
167 unsigned long long sub0, sub1, sub2, sub3;
168 unsigned long long high, low;
169
170 sub0 = (x >> 32) * (unsigned long) (y >> 32);
171 sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
172 sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
173 sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
174 low = sub3;
175 high = 0LL;
176 sub3 += (sub1 << 32);
177 if (low > sub3)
178 high++;
179 low = sub3;
180 sub3 += (sub2 << 32);
181 if (low > sub3)
182 high++;
183 low = sub3;
184 high += (sub1 >> 32) + (sub2 >> 32);
185 high += sub0;
186 *lowp = low;
187 *highp = high;
188 }
189
190 static inline long long rshift64(unsigned long long mh,
191 unsigned long long ml, int n)
192 {
193 if (n >= 64)
194 return mh >> (n - 64);
195 return (mh << (64 - n)) | (ml >> n);
196 }
197
198 static long long denormal_muld(long long hx, long long hy)
199 {
200 unsigned long long ix, iy;
201 unsigned long long mh, ml, nh, nl;
202 int exp, w;
203
204 ix = hx & 0x7fffffffffffffffLL;
205 iy = hy & 0x7fffffffffffffffLL;
206 if (iy < 0x0010000000000000LL || ix == 0)
207 return ((hx ^ hy) & 0x8000000000000000LL);
208
209 exp = (iy & 0x7ff0000000000000LL) >> 52;
210 ix &= 0x000fffffffffffffLL;
211 iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
212 mult64(ix, iy, &mh, &ml);
213 nh = mh;
214 nl = ml;
215 w = -1;
216 if (nh) {
217 while (nh) { nh >>= 1; w++;}
218 w += 64;
219 } else
220 while (nl) { nl >>= 1; w++;}
221
222 /* FIXME: use guard bits */
223 exp += w - 1022 - 52 * 2;
224 if (exp > 0)
225 ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
226 | ((long long)exp << 52);
227 else if (exp + 51 >= 0)
228 ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
229 else
230 ix = 0;
231
232 ix |= (hx ^ hy) & 0x8000000000000000LL;
233 return ix;
234 }
235
236 /* ix - iy where iy: denormal and ix, iy >= 0 */
237 static int denormal_subf1(unsigned int ix, unsigned int iy)
238 {
239 int frac;
240 int exp;
241
242 if (ix < 0x00800000)
243 return ix - iy;
244
245 exp = (ix & 0x7f800000) >> 23;
246 if (exp - 1 > 31)
247 return ix;
248 iy >>= exp - 1;
249 if (iy == 0)
250 return ix;
251
252 frac = (ix & 0x007fffff) | 0x00800000;
253 frac -= iy;
254 while (frac < 0x00800000) {
255 if (--exp == 0)
256 return frac;
257 frac <<= 1;
258 }
259
260 return (exp << 23) | (frac & 0x007fffff);
261 }
262
263 /* ix + iy where iy: denormal and ix, iy >= 0 */
264 static int denormal_addf1(unsigned int ix, unsigned int iy)
265 {
266 int frac;
267 int exp;
268
269 if (ix < 0x00800000)
270 return ix + iy;
271
272 exp = (ix & 0x7f800000) >> 23;
273 if (exp - 1 > 31)
274 return ix;
275 iy >>= exp - 1;
276 if (iy == 0)
277 return ix;
278
279 frac = (ix & 0x007fffff) | 0x00800000;
280 frac += iy;
281 if (frac >= 0x01000000) {
282 frac >>= 1;
283 ++exp;
284 }
285
286 return (exp << 23) | (frac & 0x007fffff);
287 }
288
289 static int denormal_addf(int hx, int hy)
290 {
291 unsigned int ix, iy;
292 int sign;
293
294 if ((hx ^ hy) & 0x80000000) {
295 sign = hx & 0x80000000;
296 ix = hx & 0x7fffffff;
297 iy = hy & 0x7fffffff;
298 if (iy < 0x00800000) {
299 ix = denormal_subf1(ix, iy);
300 if ((int) ix < 0) {
301 ix = -ix;
302 sign ^= 0x80000000;
303 }
304 } else {
305 ix = denormal_subf1(iy, ix);
306 sign ^= 0x80000000;
307 }
308 } else {
309 sign = hx & 0x80000000;
310 ix = hx & 0x7fffffff;
311 iy = hy & 0x7fffffff;
312 if (iy < 0x00800000)
313 ix = denormal_addf1(ix, iy);
314 else
315 ix = denormal_addf1(iy, ix);
316 }
317
318 return sign | ix;
319 }
320
321 /* ix - iy where iy: denormal and ix, iy >= 0 */
322 static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
323 {
324 long long frac;
325 int exp;
326
327 if (ix < 0x0010000000000000LL)
328 return ix - iy;
329
330 exp = (ix & 0x7ff0000000000000LL) >> 52;
331 if (exp - 1 > 63)
332 return ix;
333 iy >>= exp - 1;
334 if (iy == 0)
335 return ix;
336
337 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
338 frac -= iy;
339 while (frac < 0x0010000000000000LL) {
340 if (--exp == 0)
341 return frac;
342 frac <<= 1;
343 }
344
345 return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
346 }
347
348 /* ix + iy where iy: denormal and ix, iy >= 0 */
349 static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
350 {
351 long long frac;
352 long long exp;
353
354 if (ix < 0x0010000000000000LL)
355 return ix + iy;
356
357 exp = (ix & 0x7ff0000000000000LL) >> 52;
358 if (exp - 1 > 63)
359 return ix;
360 iy >>= exp - 1;
361 if (iy == 0)
362 return ix;
363
364 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
365 frac += iy;
366 if (frac >= 0x0020000000000000LL) {
367 frac >>= 1;
368 ++exp;
369 }
370
371 return (exp << 52) | (frac & 0x000fffffffffffffLL);
372 }
373
374 static long long denormal_addd(long long hx, long long hy)
375 {
376 unsigned long long ix, iy;
377 long long sign;
378
379 if ((hx ^ hy) & 0x8000000000000000LL) {
380 sign = hx & 0x8000000000000000LL;
381 ix = hx & 0x7fffffffffffffffLL;
382 iy = hy & 0x7fffffffffffffffLL;
383 if (iy < 0x0010000000000000LL) {
384 ix = denormal_subd1(ix, iy);
385 if ((int) ix < 0) {
386 ix = -ix;
387 sign ^= 0x8000000000000000LL;
388 }
389 } else {
390 ix = denormal_subd1(iy, ix);
391 sign ^= 0x8000000000000000LL;
392 }
393 } else {
394 sign = hx & 0x8000000000000000LL;
395 ix = hx & 0x7fffffffffffffffLL;
396 iy = hy & 0x7fffffffffffffffLL;
397 if (iy < 0x0010000000000000LL)
398 ix = denormal_addd1(ix, iy);
399 else
400 ix = denormal_addd1(iy, ix);
401 }
402
403 return sign | ix;
404 }
405
406 /**
407 * denormal_to_double - Given denormalized float number,
408 * store double float
409 *
410 * @fpu: Pointer to sh_fpu_hard structure
411 * @n: Index to FP register
412 */
413 static void
414 denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
415 {
416 unsigned long du, dl;
417 unsigned long x = fpu->fpul;
418 int exp = 1023 - 126;
419
420 if (x != 0 && (x & 0x7f800000) == 0) {
421 du = (x & 0x80000000);
422 while ((x & 0x00800000) == 0) {
423 x <<= 1;
424 exp--;
425 }
426 x &= 0x007fffff;
427 du |= (exp << 20) | (x >> 3);
428 dl = x << 29;
429
430 fpu->fp_regs[n] = du;
431 fpu->fp_regs[n+1] = dl;
432 }
433 }
434
435 /**
436 * ieee_fpe_handler - Handle denormalized number exception
437 *
438 * @regs: Pointer to register structure
439 *
440 * Returns 1 when it's handled (should not cause exception).
441 */
442 static int
443 ieee_fpe_handler (struct pt_regs *regs)
444 {
445 unsigned short insn = *(unsigned short *) regs->pc;
446 unsigned short finsn;
447 unsigned long nextpc;
448 int nib[4] = {
449 (insn >> 12) & 0xf,
450 (insn >> 8) & 0xf,
451 (insn >> 4) & 0xf,
452 insn & 0xf};
453
454 if (nib[0] == 0xb ||
455 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
456 regs->pr = regs->pc + 4;
457 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
458 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
459 finsn = *(unsigned short *) (regs->pc + 2);
460 } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
461 if (regs->sr & 1)
462 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
463 else
464 nextpc = regs->pc + 4;
465 finsn = *(unsigned short *) (regs->pc + 2);
466 } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
467 if (regs->sr & 1)
468 nextpc = regs->pc + 4;
469 else
470 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
471 finsn = *(unsigned short *) (regs->pc + 2);
472 } else if (nib[0] == 0x4 && nib[3] == 0xb &&
473 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
474 nextpc = regs->regs[nib[1]];
475 finsn = *(unsigned short *) (regs->pc + 2);
476 } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
477 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
478 nextpc = regs->pc + 4 + regs->regs[nib[1]];
479 finsn = *(unsigned short *) (regs->pc + 2);
480 } else if (insn == 0x000b) { /* rts */
481 nextpc = regs->pr;
482 finsn = *(unsigned short *) (regs->pc + 2);
483 } else {
484 nextpc = regs->pc + 2;
485 finsn = insn;
486 }
487
488 #define FPSCR_FPU_ERROR (1 << 17)
489
490 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
491 struct task_struct *tsk = current;
492
493 if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
494 /* FPU error */
495 denormal_to_double (&tsk->thread.fpu.hard,
496 (finsn >> 8) & 0xf);
497 } else
498 return 0;
499
500 regs->pc = nextpc;
501 return 1;
502 } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
503 struct task_struct *tsk = current;
504 int fpscr;
505 int n, m, prec;
506 unsigned int hx, hy;
507
508 n = (finsn >> 8) & 0xf;
509 m = (finsn >> 4) & 0xf;
510 hx = tsk->thread.fpu.hard.fp_regs[n];
511 hy = tsk->thread.fpu.hard.fp_regs[m];
512 fpscr = tsk->thread.fpu.hard.fpscr;
513 prec = fpscr & (1 << 19);
514
515 if ((fpscr & FPSCR_FPU_ERROR)
516 && (prec && ((hx & 0x7fffffff) < 0x00100000
517 || (hy & 0x7fffffff) < 0x00100000))) {
518 long long llx, lly;
519
520 /* FPU error because of denormal */
521 llx = ((long long) hx << 32)
522 | tsk->thread.fpu.hard.fp_regs[n+1];
523 lly = ((long long) hy << 32)
524 | tsk->thread.fpu.hard.fp_regs[m+1];
525 if ((hx & 0x7fffffff) >= 0x00100000)
526 llx = denormal_muld(lly, llx);
527 else
528 llx = denormal_muld(llx, lly);
529 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
530 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
531 } else if ((fpscr & FPSCR_FPU_ERROR)
532 && (!prec && ((hx & 0x7fffffff) < 0x00800000
533 || (hy & 0x7fffffff) < 0x00800000))) {
534 /* FPU error because of denormal */
535 if ((hx & 0x7fffffff) >= 0x00800000)
536 hx = denormal_mulf(hy, hx);
537 else
538 hx = denormal_mulf(hx, hy);
539 tsk->thread.fpu.hard.fp_regs[n] = hx;
540 } else
541 return 0;
542
543 regs->pc = nextpc;
544 return 1;
545 } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
546 struct task_struct *tsk = current;
547 int fpscr;
548 int n, m, prec;
549 unsigned int hx, hy;
550
551 n = (finsn >> 8) & 0xf;
552 m = (finsn >> 4) & 0xf;
553 hx = tsk->thread.fpu.hard.fp_regs[n];
554 hy = tsk->thread.fpu.hard.fp_regs[m];
555 fpscr = tsk->thread.fpu.hard.fpscr;
556 prec = fpscr & (1 << 19);
557
558 if ((fpscr & FPSCR_FPU_ERROR)
559 && (prec && ((hx & 0x7fffffff) < 0x00100000
560 || (hy & 0x7fffffff) < 0x00100000))) {
561 long long llx, lly;
562
563 /* FPU error because of denormal */
564 llx = ((long long) hx << 32)
565 | tsk->thread.fpu.hard.fp_regs[n+1];
566 lly = ((long long) hy << 32)
567 | tsk->thread.fpu.hard.fp_regs[m+1];
568 if ((finsn & 0xf00f) == 0xf000)
569 llx = denormal_addd(llx, lly);
570 else
571 llx = denormal_addd(llx, lly ^ (1LL << 63));
572 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
573 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
574 } else if ((fpscr & FPSCR_FPU_ERROR)
575 && (!prec && ((hx & 0x7fffffff) < 0x00800000
576 || (hy & 0x7fffffff) < 0x00800000))) {
577 /* FPU error because of denormal */
578 if ((finsn & 0xf00f) == 0xf000)
579 hx = denormal_addf(hx, hy);
580 else
581 hx = denormal_addf(hx, hy ^ 0x80000000);
582 tsk->thread.fpu.hard.fp_regs[n] = hx;
583 } else
584 return 0;
585
586 regs->pc = nextpc;
587 return 1;
588 }
589
590 return 0;
591 }
592
593 BUILD_TRAP_HANDLER(fpu_error)
594 {
595 struct task_struct *tsk = current;
596 TRAP_HANDLER_DECL;
597
598 __unlazy_fpu(tsk, regs);
599 if (ieee_fpe_handler(regs)) {
600 tsk->thread.fpu.hard.fpscr &=
601 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
602 grab_fpu(regs);
603 restore_fpu(tsk);
604 task_thread_info(tsk)->status |= TS_USEDFPU;
605 return;
606 }
607
608 force_sig(SIGFPE, tsk);
609 }
610
611 void fpu_state_restore(struct pt_regs *regs)
612 {
613 struct task_struct *tsk = current;
614
615 grab_fpu(regs);
616 if (unlikely(!user_mode(regs))) {
617 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
618 BUG();
619 return;
620 }
621
622 if (likely(used_math())) {
623 /* Using the FPU again. */
624 restore_fpu(tsk);
625 } else {
626 /* First time FPU user. */
627 fpu_init();
628 set_used_math();
629 }
630 task_thread_info(tsk)->status |= TS_USEDFPU;
631 tsk->fpu_counter++;
632 }
633
634 BUILD_TRAP_HANDLER(fpu_state_restore)
635 {
636 TRAP_HANDLER_DECL;
637
638 fpu_state_restore(regs);
639 }
This page took 0.044883 seconds and 5 git commands to generate.