Commit | Line | Data |
---|---|---|
d3561b7f RR |
1 | /* Paravirtualization interfaces |
2 | Copyright (C) 2006 Rusty Russell IBM Corporation | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | */ | |
18 | #include <linux/errno.h> | |
19 | #include <linux/module.h> | |
20 | #include <linux/efi.h> | |
21 | #include <linux/bcd.h> | |
ce6234b5 | 22 | #include <linux/highmem.h> |
d3561b7f RR |
23 | |
24 | #include <asm/bug.h> | |
25 | #include <asm/paravirt.h> | |
26 | #include <asm/desc.h> | |
27 | #include <asm/setup.h> | |
28 | #include <asm/arch_hooks.h> | |
29 | #include <asm/time.h> | |
30 | #include <asm/irq.h> | |
31 | #include <asm/delay.h> | |
13623d79 RR |
32 | #include <asm/fixmap.h> |
33 | #include <asm/apic.h> | |
da181a8b | 34 | #include <asm/tlbflush.h> |
6cb9a835 | 35 | #include <asm/timer.h> |
d3561b7f RR |
36 | |
37 | /* nop stub */ | |
45876233 | 38 | void _paravirt_nop(void) |
d3561b7f RR |
39 | { |
40 | } | |
41 | ||
42 | static void __init default_banner(void) | |
43 | { | |
44 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | |
93b1eab3 | 45 | pv_info.name); |
d3561b7f RR |
46 | } |
47 | ||
48 | char *memory_setup(void) | |
49 | { | |
93b1eab3 | 50 | return pv_init_ops.memory_setup(); |
d3561b7f RR |
51 | } |
52 | ||
139ec7c4 | 53 | /* Simple instruction patching code. */ |
93b1eab3 JF |
54 | #define DEF_NATIVE(ops, name, code) \ |
55 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | |
56 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | |
57 | ||
58 | DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); | |
59 | DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); | |
60 | DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); | |
61 | DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); | |
62 | DEF_NATIVE(pv_cpu_ops, iret, "iret"); | |
6abcd98f | 63 | DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); |
93b1eab3 JF |
64 | DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); |
65 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); | |
66 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); | |
67 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | |
68 | DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); | |
69 | ||
70 | /* Undefined instruction for dealing with missing ops pointers. */ | |
71 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; | |
139ec7c4 | 72 | |
ab144f5e AK |
73 | static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
74 | unsigned long addr, unsigned len) | |
139ec7c4 | 75 | { |
63f70270 JF |
76 | const unsigned char *start, *end; |
77 | unsigned ret; | |
78 | ||
79 | switch(type) { | |
93b1eab3 JF |
80 | #define SITE(ops, x) \ |
81 | case PARAVIRT_PATCH(ops.x): \ | |
82 | start = start_##ops##_##x; \ | |
83 | end = end_##ops##_##x; \ | |
84 | goto patch_site | |
85 | ||
86 | SITE(pv_irq_ops, irq_disable); | |
87 | SITE(pv_irq_ops, irq_enable); | |
88 | SITE(pv_irq_ops, restore_fl); | |
89 | SITE(pv_irq_ops, save_fl); | |
90 | SITE(pv_cpu_ops, iret); | |
6abcd98f | 91 | SITE(pv_cpu_ops, irq_enable_syscall_ret); |
93b1eab3 JF |
92 | SITE(pv_mmu_ops, read_cr2); |
93 | SITE(pv_mmu_ops, read_cr3); | |
94 | SITE(pv_mmu_ops, write_cr3); | |
95 | SITE(pv_cpu_ops, clts); | |
96 | SITE(pv_cpu_ops, read_tsc); | |
63f70270 JF |
97 | #undef SITE |
98 | ||
99 | patch_site: | |
ab144f5e | 100 | ret = paravirt_patch_insns(ibuf, len, start, end); |
63f70270 JF |
101 | break; |
102 | ||
63f70270 | 103 | default: |
ab144f5e | 104 | ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); |
63f70270 JF |
105 | break; |
106 | } | |
107 | ||
108 | return ret; | |
109 | } | |
110 | ||
111 | unsigned paravirt_patch_nop(void) | |
112 | { | |
113 | return 0; | |
114 | } | |
115 | ||
116 | unsigned paravirt_patch_ignore(unsigned len) | |
117 | { | |
118 | return len; | |
119 | } | |
120 | ||
19d36ccd AK |
121 | struct branch { |
122 | unsigned char opcode; | |
123 | u32 delta; | |
124 | } __attribute__((packed)); | |
125 | ||
ab144f5e AK |
126 | unsigned paravirt_patch_call(void *insnbuf, |
127 | const void *target, u16 tgt_clobbers, | |
128 | unsigned long addr, u16 site_clobbers, | |
63f70270 JF |
129 | unsigned len) |
130 | { | |
ab144f5e AK |
131 | struct branch *b = insnbuf; |
132 | unsigned long delta = (unsigned long)target - (addr+5); | |
63f70270 JF |
133 | |
134 | if (tgt_clobbers & ~site_clobbers) | |
135 | return len; /* target would clobber too much for this site */ | |
136 | if (len < 5) | |
137 | return len; /* call too long for patch site */ | |
139ec7c4 | 138 | |
ab144f5e AK |
139 | b->opcode = 0xe8; /* call */ |
140 | b->delta = delta; | |
141 | BUILD_BUG_ON(sizeof(*b) != 5); | |
139ec7c4 | 142 | |
63f70270 JF |
143 | return 5; |
144 | } | |
145 | ||
93b1eab3 | 146 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, |
ab144f5e | 147 | unsigned long addr, unsigned len) |
63f70270 | 148 | { |
ab144f5e AK |
149 | struct branch *b = insnbuf; |
150 | unsigned long delta = (unsigned long)target - (addr+5); | |
63f70270 JF |
151 | |
152 | if (len < 5) | |
153 | return len; /* call too long for patch site */ | |
154 | ||
ab144f5e AK |
155 | b->opcode = 0xe9; /* jmp */ |
156 | b->delta = delta; | |
63f70270 JF |
157 | |
158 | return 5; | |
159 | } | |
160 | ||
93b1eab3 JF |
161 | /* Neat trick to map patch type back to the call within the |
162 | * corresponding structure. */ | |
163 | static void *get_call_destination(u8 type) | |
164 | { | |
165 | struct paravirt_patch_template tmpl = { | |
166 | .pv_init_ops = pv_init_ops, | |
93b1eab3 JF |
167 | .pv_time_ops = pv_time_ops, |
168 | .pv_cpu_ops = pv_cpu_ops, | |
169 | .pv_irq_ops = pv_irq_ops, | |
170 | .pv_apic_ops = pv_apic_ops, | |
171 | .pv_mmu_ops = pv_mmu_ops, | |
172 | }; | |
173 | return *((void **)&tmpl + type); | |
174 | } | |
175 | ||
ab144f5e AK |
176 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, |
177 | unsigned long addr, unsigned len) | |
63f70270 | 178 | { |
93b1eab3 | 179 | void *opfunc = get_call_destination(type); |
63f70270 JF |
180 | unsigned ret; |
181 | ||
182 | if (opfunc == NULL) | |
183 | /* If there's no function, patch it with a ud2a (BUG) */ | |
93b1eab3 | 184 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); |
63f70270 JF |
185 | else if (opfunc == paravirt_nop) |
186 | /* If the operation is a nop, then nop the callsite */ | |
187 | ret = paravirt_patch_nop(); | |
93b1eab3 | 188 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
6abcd98f | 189 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) |
63f70270 | 190 | /* If operation requires a jmp, then jmp */ |
93b1eab3 | 191 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); |
63f70270 JF |
192 | else |
193 | /* Otherwise call the function; assume target could | |
194 | clobber any caller-save reg */ | |
ab144f5e AK |
195 | ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, |
196 | addr, clobbers, len); | |
63f70270 JF |
197 | |
198 | return ret; | |
199 | } | |
200 | ||
ab144f5e | 201 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, |
63f70270 JF |
202 | const char *start, const char *end) |
203 | { | |
204 | unsigned insn_len = end - start; | |
139ec7c4 | 205 | |
63f70270 JF |
206 | if (insn_len > len || start == NULL) |
207 | insn_len = len; | |
208 | else | |
ab144f5e | 209 | memcpy(insnbuf, start, insn_len); |
139ec7c4 | 210 | |
139ec7c4 RR |
211 | return insn_len; |
212 | } | |
213 | ||
d3561b7f RR |
214 | void init_IRQ(void) |
215 | { | |
93b1eab3 | 216 | pv_irq_ops.init_IRQ(); |
d3561b7f RR |
217 | } |
218 | ||
1a1eecd1 | 219 | static void native_flush_tlb(void) |
da181a8b RR |
220 | { |
221 | __native_flush_tlb(); | |
222 | } | |
223 | ||
224 | /* | |
225 | * Global pages have to be flushed a bit differently. Not a real | |
226 | * performance problem because this does not happen often. | |
227 | */ | |
1a1eecd1 | 228 | static void native_flush_tlb_global(void) |
da181a8b RR |
229 | { |
230 | __native_flush_tlb_global(); | |
231 | } | |
232 | ||
63f70270 | 233 | static void native_flush_tlb_single(unsigned long addr) |
da181a8b RR |
234 | { |
235 | __native_flush_tlb_single(addr); | |
236 | } | |
237 | ||
d3561b7f | 238 | /* These are in entry.S */ |
1a1eecd1 | 239 | extern void native_iret(void); |
6abcd98f | 240 | extern void native_irq_enable_syscall_ret(void); |
d3561b7f RR |
241 | |
242 | static int __init print_banner(void) | |
243 | { | |
93b1eab3 | 244 | pv_init_ops.banner(); |
d3561b7f RR |
245 | return 0; |
246 | } | |
247 | core_initcall(print_banner); | |
248 | ||
d572929c JF |
249 | static struct resource reserve_ioports = { |
250 | .start = 0, | |
251 | .end = IO_SPACE_LIMIT, | |
252 | .name = "paravirt-ioport", | |
253 | .flags = IORESOURCE_IO | IORESOURCE_BUSY, | |
254 | }; | |
255 | ||
256 | static struct resource reserve_iomem = { | |
257 | .start = 0, | |
258 | .end = -1, | |
259 | .name = "paravirt-iomem", | |
260 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, | |
261 | }; | |
262 | ||
263 | /* | |
264 | * Reserve the whole legacy IO space to prevent any legacy drivers | |
265 | * from wasting time probing for their hardware. This is a fairly | |
266 | * brute-force approach to disabling all non-virtual drivers. | |
267 | * | |
268 | * Note that this must be called very early to have any effect. | |
269 | */ | |
270 | int paravirt_disable_iospace(void) | |
271 | { | |
272 | int ret; | |
273 | ||
274 | ret = request_resource(&ioport_resource, &reserve_ioports); | |
275 | if (ret == 0) { | |
276 | ret = request_resource(&iomem_resource, &reserve_iomem); | |
277 | if (ret) | |
278 | release_resource(&reserve_ioports); | |
279 | } | |
280 | ||
281 | return ret; | |
282 | } | |
283 | ||
8965c1c0 JF |
284 | static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; |
285 | ||
286 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | |
287 | { | |
288 | BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); | |
289 | BUG_ON(preemptible()); | |
290 | ||
291 | x86_write_percpu(paravirt_lazy_mode, mode); | |
292 | } | |
293 | ||
294 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) | |
295 | { | |
296 | BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode); | |
297 | BUG_ON(preemptible()); | |
298 | ||
299 | x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); | |
300 | } | |
301 | ||
302 | void paravirt_enter_lazy_mmu(void) | |
303 | { | |
304 | enter_lazy(PARAVIRT_LAZY_MMU); | |
305 | } | |
306 | ||
307 | void paravirt_leave_lazy_mmu(void) | |
308 | { | |
309 | paravirt_leave_lazy(PARAVIRT_LAZY_MMU); | |
310 | } | |
311 | ||
312 | void paravirt_enter_lazy_cpu(void) | |
313 | { | |
314 | enter_lazy(PARAVIRT_LAZY_CPU); | |
315 | } | |
316 | ||
317 | void paravirt_leave_lazy_cpu(void) | |
318 | { | |
319 | paravirt_leave_lazy(PARAVIRT_LAZY_CPU); | |
320 | } | |
321 | ||
322 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |
323 | { | |
324 | return x86_read_percpu(paravirt_lazy_mode); | |
325 | } | |
326 | ||
93b1eab3 | 327 | struct pv_info pv_info = { |
d3561b7f RR |
328 | .name = "bare hardware", |
329 | .paravirt_enabled = 0, | |
330 | .kernel_rpl = 0, | |
5311ab62 | 331 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
93b1eab3 | 332 | }; |
d3561b7f | 333 | |
93b1eab3 JF |
334 | struct pv_init_ops pv_init_ops = { |
335 | .patch = native_patch, | |
d3561b7f | 336 | .banner = default_banner, |
45876233 | 337 | .arch_setup = paravirt_nop, |
d3561b7f | 338 | .memory_setup = machine_specific_memory_setup, |
93b1eab3 JF |
339 | }; |
340 | ||
341 | struct pv_time_ops pv_time_ops = { | |
342 | .time_init = hpet_time_init, | |
d3561b7f RR |
343 | .get_wallclock = native_get_wallclock, |
344 | .set_wallclock = native_set_wallclock, | |
93b1eab3 JF |
345 | .sched_clock = native_sched_clock, |
346 | .get_cpu_khz = native_calculate_cpu_khz, | |
347 | }; | |
348 | ||
349 | struct pv_irq_ops pv_irq_ops = { | |
d3561b7f | 350 | .init_IRQ = native_init_IRQ, |
93b1eab3 JF |
351 | .save_fl = native_save_fl, |
352 | .restore_fl = native_restore_fl, | |
353 | .irq_disable = native_irq_disable, | |
354 | .irq_enable = native_irq_enable, | |
355 | .safe_halt = native_safe_halt, | |
356 | .halt = native_halt, | |
357 | }; | |
d3561b7f | 358 | |
93b1eab3 | 359 | struct pv_cpu_ops pv_cpu_ops = { |
d3561b7f RR |
360 | .cpuid = native_cpuid, |
361 | .get_debugreg = native_get_debugreg, | |
362 | .set_debugreg = native_set_debugreg, | |
363 | .clts = native_clts, | |
364 | .read_cr0 = native_read_cr0, | |
365 | .write_cr0 = native_write_cr0, | |
d3561b7f RR |
366 | .read_cr4 = native_read_cr4, |
367 | .read_cr4_safe = native_read_cr4_safe, | |
368 | .write_cr4 = native_write_cr4, | |
d3561b7f | 369 | .wbinvd = native_wbinvd, |
90a0a06a RR |
370 | .read_msr = native_read_msr_safe, |
371 | .write_msr = native_write_msr_safe, | |
d3561b7f RR |
372 | .read_tsc = native_read_tsc, |
373 | .read_pmc = native_read_pmc, | |
374 | .load_tr_desc = native_load_tr_desc, | |
375 | .set_ldt = native_set_ldt, | |
376 | .load_gdt = native_load_gdt, | |
377 | .load_idt = native_load_idt, | |
378 | .store_gdt = native_store_gdt, | |
379 | .store_idt = native_store_idt, | |
380 | .store_tr = native_store_tr, | |
381 | .load_tls = native_load_tls, | |
90a0a06a RR |
382 | .write_ldt_entry = write_dt_entry, |
383 | .write_gdt_entry = write_dt_entry, | |
8d947344 | 384 | .write_idt_entry = native_write_idt_entry, |
faca6227 | 385 | .load_sp0 = native_load_sp0, |
d3561b7f | 386 | |
6abcd98f | 387 | .irq_enable_syscall_ret = native_irq_enable_syscall_ret, |
93b1eab3 JF |
388 | .iret = native_iret, |
389 | ||
d3561b7f RR |
390 | .set_iopl_mask = native_set_iopl_mask, |
391 | .io_delay = native_io_delay, | |
8965c1c0 JF |
392 | |
393 | .lazy_mode = { | |
394 | .enter = paravirt_nop, | |
395 | .leave = paravirt_nop, | |
396 | }, | |
93b1eab3 | 397 | }; |
d3561b7f | 398 | |
93b1eab3 | 399 | struct pv_apic_ops pv_apic_ops = { |
13623d79 RR |
400 | #ifdef CONFIG_X86_LOCAL_APIC |
401 | .apic_write = native_apic_write, | |
402 | .apic_write_atomic = native_apic_write_atomic, | |
403 | .apic_read = native_apic_read, | |
bbab4f3b ZA |
404 | .setup_boot_clock = setup_boot_APIC_clock, |
405 | .setup_secondary_clock = setup_secondary_APIC_clock, | |
0260c196 | 406 | .startup_ipi_hook = paravirt_nop, |
13623d79 | 407 | #endif |
93b1eab3 JF |
408 | }; |
409 | ||
93b1eab3 | 410 | struct pv_mmu_ops pv_mmu_ops = { |
b239fb25 JF |
411 | .pagetable_setup_start = native_pagetable_setup_start, |
412 | .pagetable_setup_done = native_pagetable_setup_done, | |
413 | ||
93b1eab3 JF |
414 | .read_cr2 = native_read_cr2, |
415 | .write_cr2 = native_write_cr2, | |
416 | .read_cr3 = native_read_cr3, | |
417 | .write_cr3 = native_write_cr3, | |
418 | ||
da181a8b RR |
419 | .flush_tlb_user = native_flush_tlb, |
420 | .flush_tlb_kernel = native_flush_tlb_global, | |
421 | .flush_tlb_single = native_flush_tlb_single, | |
d4c10477 | 422 | .flush_tlb_others = native_flush_tlb_others, |
da181a8b | 423 | |
45876233 JF |
424 | .alloc_pt = paravirt_nop, |
425 | .alloc_pd = paravirt_nop, | |
426 | .alloc_pd_clone = paravirt_nop, | |
427 | .release_pt = paravirt_nop, | |
428 | .release_pd = paravirt_nop, | |
c119ecce | 429 | |
da181a8b RR |
430 | .set_pte = native_set_pte, |
431 | .set_pte_at = native_set_pte_at, | |
432 | .set_pmd = native_set_pmd, | |
45876233 JF |
433 | .pte_update = paravirt_nop, |
434 | .pte_update_defer = paravirt_nop, | |
3dc494e8 | 435 | |
ce6234b5 JF |
436 | #ifdef CONFIG_HIGHPTE |
437 | .kmap_atomic_pte = kmap_atomic, | |
438 | #endif | |
439 | ||
da181a8b RR |
440 | #ifdef CONFIG_X86_PAE |
441 | .set_pte_atomic = native_set_pte_atomic, | |
442 | .set_pte_present = native_set_pte_present, | |
443 | .set_pud = native_set_pud, | |
444 | .pte_clear = native_pte_clear, | |
445 | .pmd_clear = native_pmd_clear, | |
3dc494e8 JF |
446 | |
447 | .pmd_val = native_pmd_val, | |
448 | .make_pmd = native_make_pmd, | |
da181a8b RR |
449 | #endif |
450 | ||
3dc494e8 JF |
451 | .pte_val = native_pte_val, |
452 | .pgd_val = native_pgd_val, | |
453 | ||
454 | .make_pte = native_make_pte, | |
455 | .make_pgd = native_make_pgd, | |
456 | ||
d6dd61c8 JF |
457 | .dup_mmap = paravirt_nop, |
458 | .exit_mmap = paravirt_nop, | |
459 | .activate_mm = paravirt_nop, | |
8965c1c0 JF |
460 | |
461 | .lazy_mode = { | |
462 | .enter = paravirt_nop, | |
463 | .leave = paravirt_nop, | |
464 | }, | |
d3561b7f | 465 | }; |
0dbe5a11 | 466 | |
93b1eab3 | 467 | EXPORT_SYMBOL_GPL(pv_time_ops); |
f97b8954 JF |
468 | EXPORT_SYMBOL (pv_cpu_ops); |
469 | EXPORT_SYMBOL (pv_mmu_ops); | |
93b1eab3 JF |
470 | EXPORT_SYMBOL_GPL(pv_apic_ops); |
471 | EXPORT_SYMBOL_GPL(pv_info); | |
472 | EXPORT_SYMBOL (pv_irq_ops); |