Commit | Line | Data |
---|---|---|
5033cba0 | 1 | /* |
835c34a1 | 2 | * handle transition of Linux booting another kernel |
5033cba0 EB |
3 | * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> |
4 | * | |
5 | * This source code is licensed under the GNU General Public License, | |
6 | * Version 2. See the file COPYING for more details. | |
7 | */ | |
8 | ||
9 | #include <linux/mm.h> | |
10 | #include <linux/kexec.h> | |
11 | #include <linux/delay.h> | |
1a3f239d | 12 | #include <linux/init.h> |
fd59d231 | 13 | #include <linux/numa.h> |
f43fdad8 | 14 | #include <linux/ftrace.h> |
3122c331 | 15 | #include <linux/suspend.h> |
f43fdad8 | 16 | |
5033cba0 EB |
17 | #include <asm/pgtable.h> |
18 | #include <asm/pgalloc.h> | |
19 | #include <asm/tlbflush.h> | |
20 | #include <asm/mmu_context.h> | |
21 | #include <asm/io.h> | |
22 | #include <asm/apic.h> | |
23 | #include <asm/cpufeature.h> | |
e7b47cca | 24 | #include <asm/desc.h> |
4bb0d3ec | 25 | #include <asm/system.h> |
3ab83521 | 26 | #include <asm/cacheflush.h> |
5033cba0 EB |
27 | |
28 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | |
3566561b MD |
29 | static u32 kexec_pgd[1024] PAGE_ALIGNED; |
30 | #ifdef CONFIG_X86_PAE | |
31 | static u32 kexec_pmd0[1024] PAGE_ALIGNED; | |
32 | static u32 kexec_pmd1[1024] PAGE_ALIGNED; | |
5033cba0 | 33 | #endif |
3566561b MD |
34 | static u32 kexec_pte0[1024] PAGE_ALIGNED; |
35 | static u32 kexec_pte1[1024] PAGE_ALIGNED; | |
5033cba0 | 36 | |
5033cba0 EB |
37 | static void set_idt(void *newidt, __u16 limit) |
38 | { | |
6b68f01b | 39 | struct desc_ptr curidt; |
5033cba0 EB |
40 | |
41 | /* ia32 supports unaliged loads & stores */ | |
e7b47cca EB |
42 | curidt.size = limit; |
43 | curidt.address = (unsigned long)newidt; | |
5033cba0 | 44 | |
f2ab4461 | 45 | load_idt(&curidt); |
378fc6ee | 46 | } |
5033cba0 EB |
47 | |
48 | ||
49 | static void set_gdt(void *newgdt, __u16 limit) | |
50 | { | |
6b68f01b | 51 | struct desc_ptr curgdt; |
5033cba0 EB |
52 | |
53 | /* ia32 supports unaligned loads & stores */ | |
e7b47cca EB |
54 | curgdt.size = limit; |
55 | curgdt.address = (unsigned long)newgdt; | |
5033cba0 | 56 | |
f2ab4461 | 57 | load_gdt(&curgdt); |
378fc6ee | 58 | } |
5033cba0 EB |
59 | |
60 | static void load_segments(void) | |
61 | { | |
62 | #define __STR(X) #X | |
63 | #define STR(X) __STR(X) | |
64 | ||
65 | __asm__ __volatile__ ( | |
66 | "\tljmp $"STR(__KERNEL_CS)",$1f\n" | |
67 | "\t1:\n" | |
2ec5e3a8 MM |
68 | "\tmovl $"STR(__KERNEL_DS)",%%eax\n" |
69 | "\tmovl %%eax,%%ds\n" | |
70 | "\tmovl %%eax,%%es\n" | |
71 | "\tmovl %%eax,%%fs\n" | |
72 | "\tmovl %%eax,%%gs\n" | |
73 | "\tmovl %%eax,%%ss\n" | |
74 | ::: "eax", "memory"); | |
5033cba0 EB |
75 | #undef STR |
76 | #undef __STR | |
77 | } | |
78 | ||
5033cba0 EB |
79 | /* |
80 | * A architecture hook called to validate the | |
81 | * proposed image and prepare the control pages | |
163f6876 | 82 | * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE |
5033cba0 EB |
83 | * have been allocated, but the segments have yet |
84 | * been copied into the kernel. | |
85 | * | |
86 | * Do what every setup is needed on image and the | |
87 | * reboot code buffer to allow us to avoid allocations | |
88 | * later. | |
89 | * | |
3ab83521 | 90 | * Make control page executable. |
5033cba0 EB |
91 | */ |
92 | int machine_kexec_prepare(struct kimage *image) | |
93 | { | |
3ab83521 HY |
94 | if (nx_enabled) |
95 | set_pages_x(image->control_code_page, 1); | |
5033cba0 EB |
96 | return 0; |
97 | } | |
98 | ||
99 | /* | |
100 | * Undo anything leftover by machine_kexec_prepare | |
101 | * when an image is freed. | |
102 | */ | |
103 | void machine_kexec_cleanup(struct kimage *image) | |
104 | { | |
3ab83521 HY |
105 | if (nx_enabled) |
106 | set_pages_nx(image->control_code_page, 1); | |
5033cba0 EB |
107 | } |
108 | ||
109 | /* | |
110 | * Do not allocate memory (or fail in any way) in machine_kexec(). | |
111 | * We are past the point of no return, committed to rebooting now. | |
112 | */ | |
3ab83521 | 113 | void machine_kexec(struct kimage *image) |
5033cba0 | 114 | { |
3566561b MD |
115 | unsigned long page_list[PAGES_NR]; |
116 | void *control_page; | |
3122c331 | 117 | int save_ftrace_enabled; |
3ab83521 HY |
118 | asmlinkage unsigned long |
119 | (*relocate_kernel_ptr)(unsigned long indirection_page, | |
120 | unsigned long control_page, | |
121 | unsigned long start_address, | |
122 | unsigned int has_pae, | |
123 | unsigned int preserve_context); | |
5033cba0 | 124 | |
3122c331 HY |
125 | #ifdef CONFIG_KEXEC_JUMP |
126 | if (kexec_image->preserve_context) | |
127 | save_processor_state(); | |
128 | #endif | |
129 | ||
130 | save_ftrace_enabled = __ftrace_enabled_save(); | |
f43fdad8 | 131 | |
5033cba0 EB |
132 | /* Interrupts aren't acceptable while we reboot */ |
133 | local_irq_disable(); | |
134 | ||
89081d17 HY |
135 | if (image->preserve_context) { |
136 | #ifdef CONFIG_X86_IO_APIC | |
137 | /* We need to put APICs in legacy mode so that we can | |
138 | * get timer interrupts in second kernel. kexec/kdump | |
139 | * paths already have calls to disable_IO_APIC() in | |
140 | * one form or other. kexec jump path also need | |
141 | * one. | |
142 | */ | |
143 | disable_IO_APIC(); | |
144 | #endif | |
145 | } | |
146 | ||
3566561b | 147 | control_page = page_address(image->control_code_page); |
fb45daa6 | 148 | memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); |
3566561b | 149 | |
3ab83521 | 150 | relocate_kernel_ptr = control_page; |
3566561b | 151 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
3ab83521 | 152 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
3566561b MD |
153 | page_list[PA_PGD] = __pa(kexec_pgd); |
154 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | |
155 | #ifdef CONFIG_X86_PAE | |
156 | page_list[PA_PMD_0] = __pa(kexec_pmd0); | |
157 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; | |
158 | page_list[PA_PMD_1] = __pa(kexec_pmd1); | |
159 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | |
160 | #endif | |
161 | page_list[PA_PTE_0] = __pa(kexec_pte0); | |
162 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | |
163 | page_list[PA_PTE_1] = __pa(kexec_pte1); | |
164 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | |
3ab83521 | 165 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); |
5033cba0 | 166 | |
2a8a3d5b EB |
167 | /* The segment registers are funny things, they have both a |
168 | * visible and an invisible part. Whenever the visible part is | |
169 | * set to a specific selector, the invisible part is loaded | |
170 | * with from a table in memory. At no other time is the | |
171 | * descriptor table in memory accessed. | |
5033cba0 EB |
172 | * |
173 | * I take advantage of this here by force loading the | |
174 | * segments, before I zap the gdt with an invalid value. | |
175 | */ | |
176 | load_segments(); | |
177 | /* The gdt & idt are now invalid. | |
178 | * If you want to load them you must set up your own idt & gdt. | |
179 | */ | |
180 | set_gdt(phys_to_virt(0),0); | |
181 | set_idt(phys_to_virt(0),0); | |
182 | ||
183 | /* now call it */ | |
3ab83521 HY |
184 | image->start = relocate_kernel_ptr((unsigned long)image->head, |
185 | (unsigned long)page_list, | |
186 | image->start, cpu_has_pae, | |
187 | image->preserve_context); | |
3122c331 HY |
188 | |
189 | #ifdef CONFIG_KEXEC_JUMP | |
190 | if (kexec_image->preserve_context) | |
191 | restore_processor_state(); | |
192 | #endif | |
193 | ||
194 | __ftrace_enabled_restore(save_ftrace_enabled); | |
5033cba0 | 195 | } |
1a3f239d | 196 | |
fd59d231 KO |
197 | void arch_crash_save_vmcoreinfo(void) |
198 | { | |
92df5c3e | 199 | #ifdef CONFIG_NUMA |
bcbba6c1 KO |
200 | VMCOREINFO_SYMBOL(node_data); |
201 | VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); | |
fd59d231 KO |
202 | #endif |
203 | #ifdef CONFIG_X86_PAE | |
bcbba6c1 | 204 | VMCOREINFO_CONFIG(X86_PAE); |
fd59d231 KO |
205 | #endif |
206 | } | |
207 |