Merge commit 'origin/master' into next
[deliverable/linux.git] / arch / powerpc / kernel / vector.S
CommitLineData
e821ea70 1#include <asm/processor.h>
14cf11af 2#include <asm/ppc_asm.h>
b3b8dc6c 3#include <asm/reg.h>
e821ea70
BH
4#include <asm/asm-offsets.h>
5#include <asm/cputable.h>
6#include <asm/thread_info.h>
7#include <asm/page.h>
8
9/*
10 * load_up_altivec(unused, unused, tsk)
11 * Disable VMX for the task which had it previously,
12 * and save its vector registers in its thread_struct.
13 * Enables the VMX for use in the kernel on return.
14 * On SMP we know the VMX is free, since we give it up every
15 * switch (ie, no lazy save of the vector registers).
16 */
17_GLOBAL(load_up_altivec)
18 mfmsr r5 /* grab the current MSR */
19 oris r5,r5,MSR_VEC@h
20 MTMSRD(r5) /* enable use of AltiVec now */
21 isync
22
23/*
24 * For SMP, we don't do lazy VMX switching because it just gets too
25 * horrendously complex, especially when a task switches from one CPU
26 * to another. Instead we call giveup_altvec in switch_to.
27 * VRSAVE isn't dealt with here, that is done in the normal context
28 * switch code. Note that we could rely on vrsave value to eventually
29 * avoid saving all of the VREGs here...
30 */
31#ifndef CONFIG_SMP
32 LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
33 toreal(r3)
34 PPC_LL r4,ADDROFF(last_task_used_altivec)(r3)
35 PPC_LCMPI 0,r4,0
36 beq 1f
37
38 /* Save VMX state to last_task_used_altivec's THREAD struct */
39 toreal(r4)
40 addi r4,r4,THREAD
41 SAVE_32VRS(0,r5,r4)
42 mfvscr vr0
43 li r10,THREAD_VSCR
44 stvx vr0,r10,r4
45 /* Disable VMX for last_task_used_altivec */
46 PPC_LL r5,PT_REGS(r4)
47 toreal(r5)
48 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
49 lis r10,MSR_VEC@h
50 andc r4,r4,r10
51 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
521:
53#endif /* CONFIG_SMP */
54
55 /* Hack: if we get an altivec unavailable trap with VRSAVE
56 * set to all zeros, we assume this is a broken application
57 * that fails to set it properly, and thus we switch it to
58 * all 1's
59 */
60 mfspr r4,SPRN_VRSAVE
61 cmpdi 0,r4,0
62 bne+ 1f
63 li r4,-1
64 mtspr SPRN_VRSAVE,r4
651:
66 /* enable use of VMX after return */
67#ifdef CONFIG_PPC32
ee43eb78 68 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
e821ea70
BH
69 oris r9,r9,MSR_VEC@h
70#else
71 ld r4,PACACURRENT(r13)
72 addi r5,r4,THREAD /* Get THREAD */
73 oris r12,r12,MSR_VEC@h
74 std r12,_MSR(r1)
75#endif
76 li r4,1
77 li r10,THREAD_VSCR
78 stw r4,THREAD_USED_VR(r5)
79 lvx vr0,r10,r5
80 mtvscr vr0
81 REST_32VRS(0,r4,r5)
82#ifndef CONFIG_SMP
0115cb54 83 /* Update last_task_used_altivec to 'current' */
e821ea70
BH
84 subi r4,r5,THREAD /* Back to 'current' */
85 fromreal(r4)
0115cb54 86 PPC_STL r4,ADDROFF(last_task_used_altivec)(r3)
e821ea70
BH
87#endif /* CONFIG_SMP */
88 /* restore registers and return */
89 blr
90
91/*
92 * giveup_altivec(tsk)
93 * Disable VMX for the task given as the argument,
94 * and save the vector registers in its thread_struct.
95 * Enables the VMX for use in the kernel on return.
96 */
97_GLOBAL(giveup_altivec)
98 mfmsr r5
99 oris r5,r5,MSR_VEC@h
100 SYNC
101 MTMSRD(r5) /* enable use of VMX now */
102 isync
103 PPC_LCMPI 0,r3,0
104 beqlr- /* if no previous owner, done */
105 addi r3,r3,THREAD /* want THREAD of task */
106 PPC_LL r5,PT_REGS(r3)
107 PPC_LCMPI 0,r5,0
108 SAVE_32VRS(0,r4,r3)
109 mfvscr vr0
110 li r4,THREAD_VSCR
111 stvx vr0,r4,r3
112 beq 1f
113 PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
114#ifdef CONFIG_VSX
115BEGIN_FTR_SECTION
116 lis r3,(MSR_VEC|MSR_VSX)@h
117FTR_SECTION_ELSE
118 lis r3,MSR_VEC@h
119ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
120#else
121 lis r3,MSR_VEC@h
122#endif
123 andc r4,r4,r3 /* disable FP for previous task */
124 PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1251:
126#ifndef CONFIG_SMP
127 li r5,0
128 LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
129 PPC_STL r5,ADDROFF(last_task_used_altivec)(r4)
130#endif /* CONFIG_SMP */
131 blr
132
133#ifdef CONFIG_VSX
134
135#ifdef CONFIG_PPC32
136#error This asm code isn't ready for 32-bit kernels
137#endif
138
139/*
140 * load_up_vsx(unused, unused, tsk)
141 * Disable VSX for the task which had it previously,
142 * and save its vector registers in its thread_struct.
143 * Reuse the fp and vsx saves, but first check to see if they have
144 * been saved already.
145 */
146_GLOBAL(load_up_vsx)
147/* Load FP and VSX registers if they haven't been done yet */
148 andi. r5,r12,MSR_FP
149 beql+ load_up_fpu /* skip if already loaded */
150 andis. r5,r12,MSR_VEC@h
151 beql+ load_up_altivec /* skip if already loaded */
152
153#ifndef CONFIG_SMP
154 ld r3,last_task_used_vsx@got(r2)
155 ld r4,0(r3)
156 cmpdi 0,r4,0
157 beq 1f
158 /* Disable VSX for last_task_used_vsx */
159 addi r4,r4,THREAD
160 ld r5,PT_REGS(r4)
161 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
162 lis r6,MSR_VSX@h
163 andc r6,r4,r6
164 std r6,_MSR-STACK_FRAME_OVERHEAD(r5)
1651:
166#endif /* CONFIG_SMP */
167 ld r4,PACACURRENT(r13)
168 addi r4,r4,THREAD /* Get THREAD */
169 li r6,1
170 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
171 /* enable use of VSX after return */
172 oris r12,r12,MSR_VSX@h
173 std r12,_MSR(r1)
174#ifndef CONFIG_SMP
0115cb54 175 /* Update last_task_used_vsx to 'current' */
e821ea70
BH
176 ld r4,PACACURRENT(r13)
177 std r4,0(r3)
178#endif /* CONFIG_SMP */
179 b fast_exception_return
180
181/*
182 * __giveup_vsx(tsk)
183 * Disable VSX for the task given as the argument.
184 * Does NOT save vsx registers.
185 * Enables the VSX for use in the kernel on return.
186 */
187_GLOBAL(__giveup_vsx)
188 mfmsr r5
189 oris r5,r5,MSR_VSX@h
190 mtmsrd r5 /* enable use of VSX now */
191 isync
192
193 cmpdi 0,r3,0
194 beqlr- /* if no previous owner, done */
195 addi r3,r3,THREAD /* want THREAD of task */
196 ld r5,PT_REGS(r3)
197 cmpdi 0,r5,0
198 beq 1f
199 ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
200 lis r3,MSR_VSX@h
201 andc r4,r4,r3 /* disable VSX for previous task */
202 std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
2031:
204#ifndef CONFIG_SMP
205 li r5,0
206 ld r4,last_task_used_vsx@got(r2)
207 std r5,0(r4)
208#endif /* CONFIG_SMP */
209 blr
210
211#endif /* CONFIG_VSX */
212
14cf11af
PM
213
214/*
215 * The routines below are in assembler so we can closely control the
216 * usage of floating-point registers. These routines must be called
217 * with preempt disabled.
218 */
219#ifdef CONFIG_PPC32
220 .data
221fpzero:
222 .long 0
223fpone:
224 .long 0x3f800000 /* 1.0 in single-precision FP */
225fphalf:
226 .long 0x3f000000 /* 0.5 in single-precision FP */
227
228#define LDCONST(fr, name) \
229 lis r11,name@ha; \
230 lfs fr,name@l(r11)
231#else
232
233 .section ".toc","aw"
234fpzero:
235 .tc FD_0_0[TC],0
236fpone:
237 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
238fphalf:
239 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
240
241#define LDCONST(fr, name) \
242 lfd fr,name@toc(r2)
243#endif
244
245 .text
246/*
247 * Internal routine to enable floating point and set FPSCR to 0.
248 * Don't call it from C; it doesn't use the normal calling convention.
249 */
250fpenable:
251#ifdef CONFIG_PPC32
252 stwu r1,-64(r1)
253#else
254 stdu r1,-64(r1)
255#endif
256 mfmsr r10
257 ori r11,r10,MSR_FP
258 mtmsr r11
259 isync
260 stfd fr0,24(r1)
261 stfd fr1,16(r1)
262 stfd fr31,8(r1)
263 LDCONST(fr1, fpzero)
264 mffs fr31
3a2c48cf 265 MTFSF_L(fr1)
14cf11af
PM
266 blr
267
268fpdisable:
269 mtlr r12
3a2c48cf 270 MTFSF_L(fr31)
14cf11af
PM
271 lfd fr31,8(r1)
272 lfd fr1,16(r1)
273 lfd fr0,24(r1)
274 mtmsr r10
275 isync
276 addi r1,r1,64
277 blr
278
279/*
280 * Vector add, floating point.
281 */
282_GLOBAL(vaddfp)
283 mflr r12
284 bl fpenable
285 li r0,4
286 mtctr r0
287 li r6,0
2881: lfsx fr0,r4,r6
289 lfsx fr1,r5,r6
290 fadds fr0,fr0,fr1
291 stfsx fr0,r3,r6
292 addi r6,r6,4
293 bdnz 1b
294 b fpdisable
295
296/*
297 * Vector subtract, floating point.
298 */
299_GLOBAL(vsubfp)
300 mflr r12
301 bl fpenable
302 li r0,4
303 mtctr r0
304 li r6,0
3051: lfsx fr0,r4,r6
306 lfsx fr1,r5,r6
307 fsubs fr0,fr0,fr1
308 stfsx fr0,r3,r6
309 addi r6,r6,4
310 bdnz 1b
311 b fpdisable
312
313/*
314 * Vector multiply and add, floating point.
315 */
316_GLOBAL(vmaddfp)
317 mflr r12
318 bl fpenable
319 stfd fr2,32(r1)
320 li r0,4
321 mtctr r0
322 li r7,0
3231: lfsx fr0,r4,r7
324 lfsx fr1,r5,r7
325 lfsx fr2,r6,r7
326 fmadds fr0,fr0,fr2,fr1
327 stfsx fr0,r3,r7
328 addi r7,r7,4
329 bdnz 1b
330 lfd fr2,32(r1)
331 b fpdisable
332
333/*
334 * Vector negative multiply and subtract, floating point.
335 */
336_GLOBAL(vnmsubfp)
337 mflr r12
338 bl fpenable
339 stfd fr2,32(r1)
340 li r0,4
341 mtctr r0
342 li r7,0
3431: lfsx fr0,r4,r7
344 lfsx fr1,r5,r7
345 lfsx fr2,r6,r7
346 fnmsubs fr0,fr0,fr2,fr1
347 stfsx fr0,r3,r7
348 addi r7,r7,4
349 bdnz 1b
350 lfd fr2,32(r1)
351 b fpdisable
352
353/*
354 * Vector reciprocal estimate. We just compute 1.0/x.
355 * r3 -> destination, r4 -> source.
356 */
357_GLOBAL(vrefp)
358 mflr r12
359 bl fpenable
360 li r0,4
361 LDCONST(fr1, fpone)
362 mtctr r0
363 li r6,0
3641: lfsx fr0,r4,r6
365 fdivs fr0,fr1,fr0
366 stfsx fr0,r3,r6
367 addi r6,r6,4
368 bdnz 1b
369 b fpdisable
370
371/*
372 * Vector reciprocal square-root estimate, floating point.
373 * We use the frsqrte instruction for the initial estimate followed
374 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
375 * r3 -> destination, r4 -> source.
376 */
377_GLOBAL(vrsqrtefp)
378 mflr r12
379 bl fpenable
380 stfd fr2,32(r1)
381 stfd fr3,40(r1)
382 stfd fr4,48(r1)
383 stfd fr5,56(r1)
384 li r0,4
385 LDCONST(fr4, fpone)
386 LDCONST(fr5, fphalf)
387 mtctr r0
388 li r6,0
3891: lfsx fr0,r4,r6
390 frsqrte fr1,fr0 /* r = frsqrte(s) */
391 fmuls fr3,fr1,fr0 /* r * s */
392 fmuls fr2,fr1,fr5 /* r * 0.5 */
393 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
394 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
395 fmuls fr3,fr1,fr0 /* r * s */
396 fmuls fr2,fr1,fr5 /* r * 0.5 */
397 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
398 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
399 stfsx fr1,r3,r6
400 addi r6,r6,4
401 bdnz 1b
402 lfd fr5,56(r1)
403 lfd fr4,48(r1)
404 lfd fr3,40(r1)
405 lfd fr2,32(r1)
406 b fpdisable
This page took 0.3713 seconds and 5 git commands to generate.