Commit | Line | Data |
---|---|---|
5234f5eb | 1 | /* |
835c34a1 | 2 | * handle transition of Linux booting another kernel |
5234f5eb EB |
3 | * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> |
4 | * | |
5 | * This source code is licensed under the GNU General Public License, | |
6 | * Version 2. See the file COPYING for more details. | |
7 | */ | |
8 | ||
9 | #include <linux/mm.h> | |
10 | #include <linux/kexec.h> | |
5234f5eb EB |
11 | #include <linux/string.h> |
12 | #include <linux/reboot.h> | |
fd59d231 | 13 | #include <linux/numa.h> |
f43fdad8 IM |
14 | #include <linux/ftrace.h> |
15 | ||
5234f5eb | 16 | #include <asm/pgtable.h> |
5234f5eb EB |
17 | #include <asm/tlbflush.h> |
18 | #include <asm/mmu_context.h> | |
19 | #include <asm/io.h> | |
8bf27556 EB |
20 | |
21 | static void init_level2_page(pmd_t *level2p, unsigned long addr) | |
5234f5eb EB |
22 | { |
23 | unsigned long end_addr; | |
72414d3f | 24 | |
5234f5eb | 25 | addr &= PAGE_MASK; |
8bf27556 | 26 | end_addr = addr + PUD_SIZE; |
72414d3f | 27 | while (addr < end_addr) { |
8bf27556 EB |
28 | set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); |
29 | addr += PMD_SIZE; | |
5234f5eb EB |
30 | } |
31 | } | |
32 | ||
8bf27556 | 33 | static int init_level3_page(struct kimage *image, pud_t *level3p, |
72414d3f | 34 | unsigned long addr, unsigned long last_addr) |
5234f5eb EB |
35 | { |
36 | unsigned long end_addr; | |
37 | int result; | |
72414d3f | 38 | |
5234f5eb EB |
39 | result = 0; |
40 | addr &= PAGE_MASK; | |
8bf27556 | 41 | end_addr = addr + PGDIR_SIZE; |
72414d3f | 42 | while ((addr < last_addr) && (addr < end_addr)) { |
5234f5eb | 43 | struct page *page; |
8bf27556 | 44 | pmd_t *level2p; |
72414d3f | 45 | |
5234f5eb EB |
46 | page = kimage_alloc_control_pages(image, 0); |
47 | if (!page) { | |
48 | result = -ENOMEM; | |
49 | goto out; | |
50 | } | |
8bf27556 | 51 | level2p = (pmd_t *)page_address(page); |
5234f5eb | 52 | init_level2_page(level2p, addr); |
8bf27556 EB |
53 | set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); |
54 | addr += PUD_SIZE; | |
5234f5eb EB |
55 | } |
56 | /* clear the unused entries */ | |
72414d3f | 57 | while (addr < end_addr) { |
8bf27556 EB |
58 | pud_clear(level3p++); |
59 | addr += PUD_SIZE; | |
5234f5eb EB |
60 | } |
61 | out: | |
62 | return result; | |
63 | } | |
64 | ||
65 | ||
8bf27556 | 66 | static int init_level4_page(struct kimage *image, pgd_t *level4p, |
72414d3f | 67 | unsigned long addr, unsigned long last_addr) |
5234f5eb EB |
68 | { |
69 | unsigned long end_addr; | |
70 | int result; | |
72414d3f | 71 | |
5234f5eb EB |
72 | result = 0; |
73 | addr &= PAGE_MASK; | |
8bf27556 | 74 | end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE); |
72414d3f | 75 | while ((addr < last_addr) && (addr < end_addr)) { |
5234f5eb | 76 | struct page *page; |
8bf27556 | 77 | pud_t *level3p; |
72414d3f | 78 | |
5234f5eb EB |
79 | page = kimage_alloc_control_pages(image, 0); |
80 | if (!page) { | |
81 | result = -ENOMEM; | |
82 | goto out; | |
83 | } | |
8bf27556 | 84 | level3p = (pud_t *)page_address(page); |
5234f5eb EB |
85 | result = init_level3_page(image, level3p, addr, last_addr); |
86 | if (result) { | |
87 | goto out; | |
88 | } | |
8bf27556 EB |
89 | set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); |
90 | addr += PGDIR_SIZE; | |
5234f5eb EB |
91 | } |
92 | /* clear the unused entries */ | |
72414d3f | 93 | while (addr < end_addr) { |
8bf27556 EB |
94 | pgd_clear(level4p++); |
95 | addr += PGDIR_SIZE; | |
5234f5eb | 96 | } |
72414d3f | 97 | out: |
5234f5eb EB |
98 | return result; |
99 | } | |
100 | ||
f5deb796 HY |
101 | static void free_transition_pgtable(struct kimage *image) |
102 | { | |
103 | free_page((unsigned long)image->arch.pud); | |
104 | free_page((unsigned long)image->arch.pmd); | |
105 | free_page((unsigned long)image->arch.pte); | |
106 | } | |
107 | ||
108 | static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) | |
109 | { | |
110 | pud_t *pud; | |
111 | pmd_t *pmd; | |
112 | pte_t *pte; | |
113 | unsigned long vaddr, paddr; | |
114 | int result = -ENOMEM; | |
115 | ||
116 | vaddr = (unsigned long)relocate_kernel; | |
117 | paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); | |
118 | pgd += pgd_index(vaddr); | |
119 | if (!pgd_present(*pgd)) { | |
120 | pud = (pud_t *)get_zeroed_page(GFP_KERNEL); | |
121 | if (!pud) | |
122 | goto err; | |
123 | image->arch.pud = pud; | |
124 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | |
125 | } | |
126 | pud = pud_offset(pgd, vaddr); | |
127 | if (!pud_present(*pud)) { | |
128 | pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); | |
129 | if (!pmd) | |
130 | goto err; | |
131 | image->arch.pmd = pmd; | |
132 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | |
133 | } | |
134 | pmd = pmd_offset(pud, vaddr); | |
135 | if (!pmd_present(*pmd)) { | |
136 | pte = (pte_t *)get_zeroed_page(GFP_KERNEL); | |
137 | if (!pte) | |
138 | goto err; | |
139 | image->arch.pte = pte; | |
140 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); | |
141 | } | |
142 | pte = pte_offset_kernel(pmd, vaddr); | |
143 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); | |
144 | return 0; | |
145 | err: | |
146 | free_transition_pgtable(image); | |
147 | return result; | |
148 | } | |
149 | ||
5234f5eb EB |
150 | |
151 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) | |
152 | { | |
8bf27556 | 153 | pgd_t *level4p; |
f5deb796 | 154 | int result; |
8bf27556 | 155 | level4p = (pgd_t *)__va(start_pgtable); |
f5deb796 HY |
156 | result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); |
157 | if (result) | |
158 | return result; | |
159 | return init_transition_pgtable(image, level4p); | |
5234f5eb EB |
160 | } |
161 | ||
162 | static void set_idt(void *newidt, u16 limit) | |
163 | { | |
36c4fd23 | 164 | struct desc_ptr curidt; |
5234f5eb EB |
165 | |
166 | /* x86-64 supports unaliged loads & stores */ | |
36c4fd23 EB |
167 | curidt.size = limit; |
168 | curidt.address = (unsigned long)newidt; | |
5234f5eb EB |
169 | |
170 | __asm__ __volatile__ ( | |
36c4fd23 EB |
171 | "lidtq %0\n" |
172 | : : "m" (curidt) | |
5234f5eb EB |
173 | ); |
174 | }; | |
175 | ||
176 | ||
177 | static void set_gdt(void *newgdt, u16 limit) | |
178 | { | |
36c4fd23 | 179 | struct desc_ptr curgdt; |
5234f5eb EB |
180 | |
181 | /* x86-64 supports unaligned loads & stores */ | |
36c4fd23 EB |
182 | curgdt.size = limit; |
183 | curgdt.address = (unsigned long)newgdt; | |
5234f5eb EB |
184 | |
185 | __asm__ __volatile__ ( | |
36c4fd23 EB |
186 | "lgdtq %0\n" |
187 | : : "m" (curgdt) | |
5234f5eb EB |
188 | ); |
189 | }; | |
190 | ||
191 | static void load_segments(void) | |
192 | { | |
193 | __asm__ __volatile__ ( | |
36c4fd23 EB |
194 | "\tmovl %0,%%ds\n" |
195 | "\tmovl %0,%%es\n" | |
196 | "\tmovl %0,%%ss\n" | |
197 | "\tmovl %0,%%fs\n" | |
198 | "\tmovl %0,%%gs\n" | |
2ec5e3a8 | 199 | : : "a" (__KERNEL_DS) : "memory" |
5234f5eb | 200 | ); |
5234f5eb EB |
201 | } |
202 | ||
5234f5eb EB |
203 | int machine_kexec_prepare(struct kimage *image) |
204 | { | |
4bfaaef0 | 205 | unsigned long start_pgtable; |
5234f5eb EB |
206 | int result; |
207 | ||
208 | /* Calculate the offsets */ | |
72414d3f | 209 | start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; |
5234f5eb EB |
210 | |
211 | /* Setup the identity mapped 64bit page table */ | |
212 | result = init_pgtable(image, start_pgtable); | |
72414d3f | 213 | if (result) |
5234f5eb | 214 | return result; |
5234f5eb | 215 | |
5234f5eb EB |
216 | return 0; |
217 | } | |
218 | ||
219 | void machine_kexec_cleanup(struct kimage *image) | |
220 | { | |
f5deb796 | 221 | free_transition_pgtable(image); |
5234f5eb EB |
222 | } |
223 | ||
224 | /* | |
225 | * Do not allocate memory (or fail in any way) in machine_kexec(). | |
226 | * We are past the point of no return, committed to rebooting now. | |
227 | */ | |
3ab83521 | 228 | void machine_kexec(struct kimage *image) |
5234f5eb | 229 | { |
4bfaaef0 MD |
230 | unsigned long page_list[PAGES_NR]; |
231 | void *control_page; | |
5234f5eb | 232 | |
f43fdad8 IM |
233 | tracer_disable(); |
234 | ||
5234f5eb EB |
235 | /* Interrupts aren't acceptable while we reboot */ |
236 | local_irq_disable(); | |
237 | ||
4bfaaef0 MD |
238 | control_page = page_address(image->control_code_page) + PAGE_SIZE; |
239 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | |
240 | ||
e3ebadd9 | 241 | page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); |
4bfaaef0 MD |
242 | page_list[PA_TABLE_PAGE] = |
243 | (unsigned long)__pa(page_address(image->control_code_page)); | |
5234f5eb | 244 | |
2a8a3d5b EB |
245 | /* The segment registers are funny things, they have both a |
246 | * visible and an invisible part. Whenever the visible part is | |
247 | * set to a specific selector, the invisible part is loaded | |
248 | * with from a table in memory. At no other time is the | |
249 | * descriptor table in memory accessed. | |
5234f5eb EB |
250 | * |
251 | * I take advantage of this here by force loading the | |
252 | * segments, before I zap the gdt with an invalid value. | |
253 | */ | |
254 | load_segments(); | |
255 | /* The gdt & idt are now invalid. | |
256 | * If you want to load them you must set up your own idt & gdt. | |
257 | */ | |
258 | set_gdt(phys_to_virt(0),0); | |
259 | set_idt(phys_to_virt(0),0); | |
4bfaaef0 | 260 | |
5234f5eb | 261 | /* now call it */ |
4bfaaef0 MD |
262 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, |
263 | image->start); | |
5234f5eb | 264 | } |
2c8c0e6b | 265 | |
fd59d231 KO |
266 | void arch_crash_save_vmcoreinfo(void) |
267 | { | |
629c8b4c | 268 | VMCOREINFO_SYMBOL(phys_base); |
69243f91 | 269 | VMCOREINFO_SYMBOL(init_level4_pgt); |
92df5c3e KO |
270 | |
271 | #ifdef CONFIG_NUMA | |
272 | VMCOREINFO_SYMBOL(node_data); | |
273 | VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); | |
274 | #endif | |
fd59d231 KO |
275 | } |
276 |