powerpc/kexec: Fix race in kexec shutdown
[deliverable/linux.git] / arch / powerpc / kernel / misc_64.S
1 /*
2 * This file contains miscellaneous low-level functions.
3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
6 * and Paul Mackerras.
7 * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
8 * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 */
16
17 #include <linux/sys.h>
18 #include <asm/unistd.h>
19 #include <asm/errno.h>
20 #include <asm/processor.h>
21 #include <asm/page.h>
22 #include <asm/cache.h>
23 #include <asm/ppc_asm.h>
24 #include <asm/asm-offsets.h>
25 #include <asm/cputable.h>
26 #include <asm/thread_info.h>
27 #include <asm/kexec.h>
28
29 .text
30
31 #ifdef CONFIG_IRQSTACKS
32 _GLOBAL(call_do_softirq)
33 mflr r0
34 std r0,16(r1)
35 stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
36 mr r1,r3
37 bl .__do_softirq
38 ld r1,0(r1)
39 ld r0,16(r1)
40 mtlr r0
41 blr
42
43 _GLOBAL(call_handle_irq)
44 ld r8,0(r6)
45 mflr r0
46 std r0,16(r1)
47 mtctr r8
48 stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
49 mr r1,r5
50 bctrl
51 ld r1,0(r1)
52 ld r0,16(r1)
53 mtlr r0
54 blr
55 #endif /* CONFIG_IRQSTACKS */
56
57 .section ".toc","aw"
58 PPC64_CACHES:
59 .tc ppc64_caches[TC],ppc64_caches
60 .section ".text"
61
62 /*
63 * Write any modified data cache blocks out to memory
64 * and invalidate the corresponding instruction cache blocks.
65 *
66 * flush_icache_range(unsigned long start, unsigned long stop)
67 *
68 * flush all bytes from start through stop-1 inclusive
69 */
70
71 _KPROBE(__flush_icache_range)
72
73 /*
74 * Flush the data cache to memory
75 *
76 * Different systems have different cache line sizes
77 * and in some cases i-cache and d-cache line sizes differ from
78 * each other.
79 */
80 ld r10,PPC64_CACHES@toc(r2)
81 lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */
82 addi r5,r7,-1
83 andc r6,r3,r5 /* round low to line bdy */
84 subf r8,r6,r4 /* compute length */
85 add r8,r8,r5 /* ensure we get enough */
86 lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */
87 srw. r8,r8,r9 /* compute line count */
88 beqlr /* nothing to do? */
89 mtctr r8
90 1: dcbst 0,r6
91 add r6,r6,r7
92 bdnz 1b
93 sync
94
95 /* Now invalidate the instruction cache */
96
97 lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */
98 addi r5,r7,-1
99 andc r6,r3,r5 /* round low to line bdy */
100 subf r8,r6,r4 /* compute length */
101 add r8,r8,r5
102 lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */
103 srw. r8,r8,r9 /* compute line count */
104 beqlr /* nothing to do? */
105 mtctr r8
106 2: icbi 0,r6
107 add r6,r6,r7
108 bdnz 2b
109 isync
110 blr
111 .previous .text
112 /*
113 * Like above, but only do the D-cache.
114 *
115 * flush_dcache_range(unsigned long start, unsigned long stop)
116 *
117 * flush all bytes from start to stop-1 inclusive
118 */
119 _GLOBAL(flush_dcache_range)
120
121 /*
122 * Flush the data cache to memory
123 *
124 * Different systems have different cache line sizes
125 */
126 ld r10,PPC64_CACHES@toc(r2)
127 lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
128 addi r5,r7,-1
129 andc r6,r3,r5 /* round low to line bdy */
130 subf r8,r6,r4 /* compute length */
131 add r8,r8,r5 /* ensure we get enough */
132 lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */
133 srw. r8,r8,r9 /* compute line count */
134 beqlr /* nothing to do? */
135 mtctr r8
136 0: dcbst 0,r6
137 add r6,r6,r7
138 bdnz 0b
139 sync
140 blr
141
142 /*
143 * Like above, but works on non-mapped physical addresses.
144 * Use only for non-LPAR setups ! It also assumes real mode
145 * is cacheable. Used for flushing out the DART before using
146 * it as uncacheable memory
147 *
148 * flush_dcache_phys_range(unsigned long start, unsigned long stop)
149 *
150 * flush all bytes from start to stop-1 inclusive
151 */
152 _GLOBAL(flush_dcache_phys_range)
153 ld r10,PPC64_CACHES@toc(r2)
154 lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
155 addi r5,r7,-1
156 andc r6,r3,r5 /* round low to line bdy */
157 subf r8,r6,r4 /* compute length */
158 add r8,r8,r5 /* ensure we get enough */
159 lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */
160 srw. r8,r8,r9 /* compute line count */
161 beqlr /* nothing to do? */
162 mfmsr r5 /* Disable MMU Data Relocation */
163 ori r0,r5,MSR_DR
164 xori r0,r0,MSR_DR
165 sync
166 mtmsr r0
167 sync
168 isync
169 mtctr r8
170 0: dcbst 0,r6
171 add r6,r6,r7
172 bdnz 0b
173 sync
174 isync
175 mtmsr r5 /* Re-enable MMU Data Relocation */
176 sync
177 isync
178 blr
179
180 _GLOBAL(flush_inval_dcache_range)
181 ld r10,PPC64_CACHES@toc(r2)
182 lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
183 addi r5,r7,-1
184 andc r6,r3,r5 /* round low to line bdy */
185 subf r8,r6,r4 /* compute length */
186 add r8,r8,r5 /* ensure we get enough */
187 lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */
188 srw. r8,r8,r9 /* compute line count */
189 beqlr /* nothing to do? */
190 sync
191 isync
192 mtctr r8
193 0: dcbf 0,r6
194 add r6,r6,r7
195 bdnz 0b
196 sync
197 isync
198 blr
199
200
201 /*
202 * Flush a particular page from the data cache to RAM.
203 * Note: this is necessary because the instruction cache does *not*
204 * snoop from the data cache.
205 *
206 * void __flush_dcache_icache(void *page)
207 */
208 _GLOBAL(__flush_dcache_icache)
209 /*
210 * Flush the data cache to memory
211 *
212 * Different systems have different cache line sizes
213 */
214
215 /* Flush the dcache */
216 ld r7,PPC64_CACHES@toc(r2)
217 clrrdi r3,r3,PAGE_SHIFT /* Page align */
218 lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */
219 lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */
220 mr r6,r3
221 mtctr r4
222 0: dcbst 0,r6
223 add r6,r6,r5
224 bdnz 0b
225 sync
226
227 /* Now invalidate the icache */
228
229 lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */
230 lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */
231 mtctr r4
232 1: icbi 0,r3
233 add r3,r3,r5
234 bdnz 1b
235 isync
236 blr
237
238
239 #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
240 /*
241 * Do an IO access in real mode
242 */
243 _GLOBAL(real_readb)
244 mfmsr r7
245 ori r0,r7,MSR_DR
246 xori r0,r0,MSR_DR
247 sync
248 mtmsrd r0
249 sync
250 isync
251 mfspr r6,SPRN_HID4
252 rldicl r5,r6,32,0
253 ori r5,r5,0x100
254 rldicl r5,r5,32,0
255 sync
256 mtspr SPRN_HID4,r5
257 isync
258 slbia
259 isync
260 lbz r3,0(r3)
261 sync
262 mtspr SPRN_HID4,r6
263 isync
264 slbia
265 isync
266 mtmsrd r7
267 sync
268 isync
269 blr
270
271 /*
272 * Do an IO access in real mode
273 */
274 _GLOBAL(real_writeb)
275 mfmsr r7
276 ori r0,r7,MSR_DR
277 xori r0,r0,MSR_DR
278 sync
279 mtmsrd r0
280 sync
281 isync
282 mfspr r6,SPRN_HID4
283 rldicl r5,r6,32,0
284 ori r5,r5,0x100
285 rldicl r5,r5,32,0
286 sync
287 mtspr SPRN_HID4,r5
288 isync
289 slbia
290 isync
291 stb r3,0(r4)
292 sync
293 mtspr SPRN_HID4,r6
294 isync
295 slbia
296 isync
297 mtmsrd r7
298 sync
299 isync
300 blr
301 #endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
302
303 #ifdef CONFIG_PPC_PASEMI
304
305 /* No support in all binutils for these yet, so use defines */
306 #define LBZCIX(RT,RA,RB) .long (0x7c0006aa|(RT<<21)|(RA<<16)|(RB << 11))
307 #define STBCIX(RS,RA,RB) .long (0x7c0007aa|(RS<<21)|(RA<<16)|(RB << 11))
308
309
310 _GLOBAL(real_205_readb)
311 mfmsr r7
312 ori r0,r7,MSR_DR
313 xori r0,r0,MSR_DR
314 sync
315 mtmsrd r0
316 sync
317 isync
318 LBZCIX(r3,0,r3)
319 isync
320 mtmsrd r7
321 sync
322 isync
323 blr
324
325 _GLOBAL(real_205_writeb)
326 mfmsr r7
327 ori r0,r7,MSR_DR
328 xori r0,r0,MSR_DR
329 sync
330 mtmsrd r0
331 sync
332 isync
333 STBCIX(r3,0,r4)
334 isync
335 mtmsrd r7
336 sync
337 isync
338 blr
339
340 #endif /* CONFIG_PPC_PASEMI */
341
342
343 #ifdef CONFIG_CPU_FREQ_PMAC64
344 /*
345 * SCOM access functions for 970 (FX only for now)
346 *
347 * unsigned long scom970_read(unsigned int address);
348 * void scom970_write(unsigned int address, unsigned long value);
349 *
350 * The address passed in is the 24 bits register address. This code
351 * is 970 specific and will not check the status bits, so you should
352 * know what you are doing.
353 */
354 _GLOBAL(scom970_read)
355 /* interrupts off */
356 mfmsr r4
357 ori r0,r4,MSR_EE
358 xori r0,r0,MSR_EE
359 mtmsrd r0,1
360
361 /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
362 * (including parity). On current CPUs they must be 0'd,
363 * and finally or in RW bit
364 */
365 rlwinm r3,r3,8,0,15
366 ori r3,r3,0x8000
367
368 /* do the actual scom read */
369 sync
370 mtspr SPRN_SCOMC,r3
371 isync
372 mfspr r3,SPRN_SCOMD
373 isync
374 mfspr r0,SPRN_SCOMC
375 isync
376
377 /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah
378 * that's the best we can do). Not implemented yet as we don't use
379 * the scom on any of the bogus CPUs yet, but may have to be done
380 * ultimately
381 */
382
383 /* restore interrupts */
384 mtmsrd r4,1
385 blr
386
387
388 _GLOBAL(scom970_write)
389 /* interrupts off */
390 mfmsr r5
391 ori r0,r5,MSR_EE
392 xori r0,r0,MSR_EE
393 mtmsrd r0,1
394
395 /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
396 * (including parity). On current CPUs they must be 0'd.
397 */
398
399 rlwinm r3,r3,8,0,15
400
401 sync
402 mtspr SPRN_SCOMD,r4 /* write data */
403 isync
404 mtspr SPRN_SCOMC,r3 /* write command */
405 isync
406 mfspr 3,SPRN_SCOMC
407 isync
408
409 /* restore interrupts */
410 mtmsrd r5,1
411 blr
412 #endif /* CONFIG_CPU_FREQ_PMAC64 */
413
414
415 /*
416 * Create a kernel thread
417 * kernel_thread(fn, arg, flags)
418 */
419 _GLOBAL(kernel_thread)
420 std r29,-24(r1)
421 std r30,-16(r1)
422 stdu r1,-STACK_FRAME_OVERHEAD(r1)
423 mr r29,r3
424 mr r30,r4
425 ori r3,r5,CLONE_VM /* flags */
426 oris r3,r3,(CLONE_UNTRACED>>16)
427 li r4,0 /* new sp (unused) */
428 li r0,__NR_clone
429 sc
430 bns+ 1f /* did system call indicate error? */
431 neg r3,r3 /* if so, make return code negative */
432 1: cmpdi 0,r3,0 /* parent or child? */
433 bne 2f /* return if parent */
434 li r0,0
435 stdu r0,-STACK_FRAME_OVERHEAD(r1)
436 ld r2,8(r29)
437 ld r29,0(r29)
438 mtlr r29 /* fn addr in lr */
439 mr r3,r30 /* load arg and call fn */
440 blrl
441 li r0,__NR_exit /* exit after child exits */
442 li r3,0
443 sc
444 2: addi r1,r1,STACK_FRAME_OVERHEAD
445 ld r29,-24(r1)
446 ld r30,-16(r1)
447 blr
448
449 /*
450 * disable_kernel_fp()
451 * Disable the FPU.
452 */
453 _GLOBAL(disable_kernel_fp)
454 mfmsr r3
455 rldicl r0,r3,(63-MSR_FP_LG),1
456 rldicl r3,r0,(MSR_FP_LG+1),0
457 mtmsrd r3 /* disable use of fpu now */
458 isync
459 blr
460
461 /* kexec_wait(phys_cpu)
462 *
463 * wait for the flag to change, indicating this kernel is going away but
464 * the slave code for the next one is at addresses 0 to 100.
465 *
466 * This is used by all slaves.
467 *
468 * Physical (hardware) cpu id should be in r3.
469 */
470 _GLOBAL(kexec_wait)
471 bl 1f
472 1: mflr r5
473 addi r5,r5,kexec_flag-1b
474
475 li r4,KEXEC_STATE_REAL_MODE
476 stb r4,PACAKEXECSTATE(r13)
477 SYNC
478
479 99: HMT_LOW
480 #ifdef CONFIG_KEXEC /* use no memory without kexec */
481 lwz r4,0(r5)
482 cmpwi 0,r4,0
483 bnea 0x60
484 #endif
485 b 99b
486
487 /* this can be in text because we won't change it until we are
488 * running in real anyways
489 */
490 kexec_flag:
491 .long 0
492
493
494 #ifdef CONFIG_KEXEC
495
496 /* kexec_smp_wait(void)
497 *
498 * call with interrupts off
499 * note: this is a terminal routine, it does not save lr
500 *
501 * get phys id from paca
502 * switch to real mode
503 * join other cpus in kexec_wait(phys_id)
504 */
505 _GLOBAL(kexec_smp_wait)
506 lhz r3,PACAHWCPUID(r13)
507 bl real_mode
508 b .kexec_wait
509
510 /*
511 * switch to real mode (turn mmu off)
512 * we use the early kernel trick that the hardware ignores bits
513 * 0 and 1 (big endian) of the effective address in real mode
514 *
515 * don't overwrite r3 here, it is live for kexec_wait above.
516 */
517 real_mode: /* assume normal blr return */
518 1: li r9,MSR_RI
519 li r10,MSR_DR|MSR_IR
520 mflr r11 /* return address to SRR0 */
521 mfmsr r12
522 andc r9,r12,r9
523 andc r10,r12,r10
524
525 mtmsrd r9,1
526 mtspr SPRN_SRR1,r10
527 mtspr SPRN_SRR0,r11
528 rfid
529
530
531 /*
532 * kexec_sequence(newstack, start, image, control, clear_all())
533 *
534 * does the grungy work with stack switching and real mode switches
535 * also does simple calls to other code
536 */
537
538 _GLOBAL(kexec_sequence)
539 mflr r0
540 std r0,16(r1)
541
542 /* switch stacks to newstack -- &kexec_stack.stack */
543 stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
544 mr r1,r3
545
546 li r0,0
547 std r0,16(r1)
548
549 /* save regs for local vars on new stack.
550 * yes, we won't go back, but ...
551 */
552 std r31,-8(r1)
553 std r30,-16(r1)
554 std r29,-24(r1)
555 std r28,-32(r1)
556 std r27,-40(r1)
557 std r26,-48(r1)
558 std r25,-56(r1)
559
560 stdu r1,-STACK_FRAME_OVERHEAD-64(r1)
561
562 /* save args into preserved regs */
563 mr r31,r3 /* newstack (both) */
564 mr r30,r4 /* start (real) */
565 mr r29,r5 /* image (virt) */
566 mr r28,r6 /* control, unused */
567 mr r27,r7 /* clear_all() fn desc */
568 mr r26,r8 /* spare */
569 lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */
570
571 /* disable interrupts, we are overwriting kernel data next */
572 mfmsr r3
573 rlwinm r3,r3,0,17,15
574 mtmsrd r3,1
575
576 /* copy dest pages, flush whole dest image */
577 mr r3,r29
578 bl .kexec_copy_flush /* (image) */
579
580 /* turn off mmu */
581 bl real_mode
582
583 /* copy 0x100 bytes starting at start to 0 */
584 li r3,0
585 mr r4,r30 /* start, aka phys mem offset */
586 li r5,0x100
587 li r6,0
588 bl .copy_and_flush /* (dest, src, copy limit, start offset) */
589 1: /* assume normal blr return */
590
591 /* release other cpus to the new kernel secondary start at 0x60 */
592 mflr r5
593 li r6,1
594 stw r6,kexec_flag-1b(5)
595
596 /* clear out hardware hash page table and tlb */
597 ld r5,0(r27) /* deref function descriptor */
598 mtctr r5
599 bctrl /* ppc_md.hpte_clear_all(void); */
600
601 /*
602 * kexec image calling is:
603 * the first 0x100 bytes of the entry point are copied to 0
604 *
605 * all slaves branch to slave = 0x60 (absolute)
606 * slave(phys_cpu_id);
607 *
608 * master goes to start = entry point
609 * start(phys_cpu_id, start, 0);
610 *
611 *
612 * a wrapper is needed to call existing kernels, here is an approximate
613 * description of one method:
614 *
615 * v2: (2.6.10)
616 * start will be near the boot_block (maybe 0x100 bytes before it?)
617 * it will have a 0x60, which will b to boot_block, where it will wait
618 * and 0 will store phys into struct boot-block and load r3 from there,
619 * copy kernel 0-0x100 and tell slaves to back down to 0x60 again
620 *
621 * v1: (2.6.9)
622 * boot block will have all cpus scanning device tree to see if they
623 * are the boot cpu ?????
624 * other device tree differences (prop sizes, va vs pa, etc)...
625 */
626 mr r3,r25 # my phys cpu
627 mr r4,r30 # start, aka phys mem offset
628 mtlr 4
629 li r5,0
630 blr /* image->start(physid, image->start, 0); */
631 #endif /* CONFIG_KEXEC */
This page took 0.047288 seconds and 5 git commands to generate.