Commit | Line | Data |
---|---|---|
d3561b7f RR |
1 | /* Paravirtualization interfaces |
2 | Copyright (C) 2006 Rusty Russell IBM Corporation | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
b1df07bd GOC |
17 | |
18 | 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc | |
d3561b7f | 19 | */ |
b1df07bd | 20 | |
d3561b7f RR |
21 | #include <linux/errno.h> |
22 | #include <linux/module.h> | |
23 | #include <linux/efi.h> | |
24 | #include <linux/bcd.h> | |
ce6234b5 | 25 | #include <linux/highmem.h> |
d3561b7f RR |
26 | |
27 | #include <asm/bug.h> | |
28 | #include <asm/paravirt.h> | |
29 | #include <asm/desc.h> | |
30 | #include <asm/setup.h> | |
31 | #include <asm/arch_hooks.h> | |
a312b37b | 32 | #include <asm/pgtable.h> |
d3561b7f | 33 | #include <asm/time.h> |
eba0045f | 34 | #include <asm/pgalloc.h> |
d3561b7f RR |
35 | #include <asm/irq.h> |
36 | #include <asm/delay.h> | |
13623d79 RR |
37 | #include <asm/fixmap.h> |
38 | #include <asm/apic.h> | |
da181a8b | 39 | #include <asm/tlbflush.h> |
6cb9a835 | 40 | #include <asm/timer.h> |
d3561b7f RR |
41 | |
42 | /* nop stub */ | |
45876233 | 43 | void _paravirt_nop(void) |
d3561b7f RR |
44 | { |
45 | } | |
46 | ||
47 | static void __init default_banner(void) | |
48 | { | |
49 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | |
93b1eab3 | 50 | pv_info.name); |
d3561b7f RR |
51 | } |
52 | ||
53 | char *memory_setup(void) | |
54 | { | |
93b1eab3 | 55 | return pv_init_ops.memory_setup(); |
d3561b7f RR |
56 | } |
57 | ||
139ec7c4 | 58 | /* Simple instruction patching code. */ |
93b1eab3 JF |
59 | #define DEF_NATIVE(ops, name, code) \ |
60 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | |
61 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | |
62 | ||
93b1eab3 JF |
63 | /* Undefined instruction for dealing with missing ops pointers. */ |
64 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; | |
139ec7c4 | 65 | |
63f70270 JF |
66 | unsigned paravirt_patch_nop(void) |
67 | { | |
68 | return 0; | |
69 | } | |
70 | ||
71 | unsigned paravirt_patch_ignore(unsigned len) | |
72 | { | |
73 | return len; | |
74 | } | |
75 | ||
19d36ccd AK |
76 | struct branch { |
77 | unsigned char opcode; | |
78 | u32 delta; | |
79 | } __attribute__((packed)); | |
80 | ||
ab144f5e AK |
81 | unsigned paravirt_patch_call(void *insnbuf, |
82 | const void *target, u16 tgt_clobbers, | |
83 | unsigned long addr, u16 site_clobbers, | |
63f70270 JF |
84 | unsigned len) |
85 | { | |
ab144f5e AK |
86 | struct branch *b = insnbuf; |
87 | unsigned long delta = (unsigned long)target - (addr+5); | |
63f70270 JF |
88 | |
89 | if (tgt_clobbers & ~site_clobbers) | |
90 | return len; /* target would clobber too much for this site */ | |
91 | if (len < 5) | |
92 | return len; /* call too long for patch site */ | |
139ec7c4 | 93 | |
ab144f5e AK |
94 | b->opcode = 0xe8; /* call */ |
95 | b->delta = delta; | |
96 | BUILD_BUG_ON(sizeof(*b) != 5); | |
139ec7c4 | 97 | |
63f70270 JF |
98 | return 5; |
99 | } | |
100 | ||
93b1eab3 | 101 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, |
ab144f5e | 102 | unsigned long addr, unsigned len) |
63f70270 | 103 | { |
ab144f5e AK |
104 | struct branch *b = insnbuf; |
105 | unsigned long delta = (unsigned long)target - (addr+5); | |
63f70270 JF |
106 | |
107 | if (len < 5) | |
108 | return len; /* call too long for patch site */ | |
109 | ||
ab144f5e AK |
110 | b->opcode = 0xe9; /* jmp */ |
111 | b->delta = delta; | |
63f70270 JF |
112 | |
113 | return 5; | |
114 | } | |
115 | ||
93b1eab3 JF |
116 | /* Neat trick to map patch type back to the call within the |
117 | * corresponding structure. */ | |
118 | static void *get_call_destination(u8 type) | |
119 | { | |
120 | struct paravirt_patch_template tmpl = { | |
121 | .pv_init_ops = pv_init_ops, | |
93b1eab3 JF |
122 | .pv_time_ops = pv_time_ops, |
123 | .pv_cpu_ops = pv_cpu_ops, | |
124 | .pv_irq_ops = pv_irq_ops, | |
125 | .pv_apic_ops = pv_apic_ops, | |
126 | .pv_mmu_ops = pv_mmu_ops, | |
74d4affd | 127 | .pv_lock_ops = pv_lock_ops, |
93b1eab3 JF |
128 | }; |
129 | return *((void **)&tmpl + type); | |
130 | } | |
131 | ||
ab144f5e AK |
132 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, |
133 | unsigned long addr, unsigned len) | |
63f70270 | 134 | { |
93b1eab3 | 135 | void *opfunc = get_call_destination(type); |
63f70270 JF |
136 | unsigned ret; |
137 | ||
138 | if (opfunc == NULL) | |
139 | /* If there's no function, patch it with a ud2a (BUG) */ | |
93b1eab3 | 140 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); |
63f70270 JF |
141 | else if (opfunc == paravirt_nop) |
142 | /* If the operation is a nop, then nop the callsite */ | |
143 | ret = paravirt_patch_nop(); | |
93b1eab3 | 144 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
d75cd22f | 145 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || |
2be29982 JF |
146 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || |
147 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) | |
63f70270 | 148 | /* If operation requires a jmp, then jmp */ |
93b1eab3 | 149 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); |
63f70270 JF |
150 | else |
151 | /* Otherwise call the function; assume target could | |
152 | clobber any caller-save reg */ | |
ab144f5e AK |
153 | ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, |
154 | addr, clobbers, len); | |
63f70270 JF |
155 | |
156 | return ret; | |
157 | } | |
158 | ||
ab144f5e | 159 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, |
63f70270 JF |
160 | const char *start, const char *end) |
161 | { | |
162 | unsigned insn_len = end - start; | |
139ec7c4 | 163 | |
63f70270 JF |
164 | if (insn_len > len || start == NULL) |
165 | insn_len = len; | |
166 | else | |
ab144f5e | 167 | memcpy(insnbuf, start, insn_len); |
139ec7c4 | 168 | |
139ec7c4 RR |
169 | return insn_len; |
170 | } | |
171 | ||
d3561b7f RR |
172 | void init_IRQ(void) |
173 | { | |
93b1eab3 | 174 | pv_irq_ops.init_IRQ(); |
d3561b7f RR |
175 | } |
176 | ||
1a1eecd1 | 177 | static void native_flush_tlb(void) |
da181a8b RR |
178 | { |
179 | __native_flush_tlb(); | |
180 | } | |
181 | ||
182 | /* | |
183 | * Global pages have to be flushed a bit differently. Not a real | |
184 | * performance problem because this does not happen often. | |
185 | */ | |
1a1eecd1 | 186 | static void native_flush_tlb_global(void) |
da181a8b RR |
187 | { |
188 | __native_flush_tlb_global(); | |
189 | } | |
190 | ||
63f70270 | 191 | static void native_flush_tlb_single(unsigned long addr) |
da181a8b RR |
192 | { |
193 | __native_flush_tlb_single(addr); | |
194 | } | |
195 | ||
d3561b7f | 196 | /* These are in entry.S */ |
1a1eecd1 | 197 | extern void native_iret(void); |
d75cd22f | 198 | extern void native_irq_enable_sysexit(void); |
2be29982 JF |
199 | extern void native_usergs_sysret32(void); |
200 | extern void native_usergs_sysret64(void); | |
d3561b7f RR |
201 | |
202 | static int __init print_banner(void) | |
203 | { | |
93b1eab3 | 204 | pv_init_ops.banner(); |
d3561b7f RR |
205 | return 0; |
206 | } | |
207 | core_initcall(print_banner); | |
208 | ||
d572929c JF |
209 | static struct resource reserve_ioports = { |
210 | .start = 0, | |
211 | .end = IO_SPACE_LIMIT, | |
212 | .name = "paravirt-ioport", | |
213 | .flags = IORESOURCE_IO | IORESOURCE_BUSY, | |
214 | }; | |
215 | ||
d572929c JF |
216 | /* |
217 | * Reserve the whole legacy IO space to prevent any legacy drivers | |
218 | * from wasting time probing for their hardware. This is a fairly | |
219 | * brute-force approach to disabling all non-virtual drivers. | |
220 | * | |
221 | * Note that this must be called very early to have any effect. | |
222 | */ | |
223 | int paravirt_disable_iospace(void) | |
224 | { | |
f7743fe6 | 225 | return request_resource(&ioport_resource, &reserve_ioports); |
d572929c JF |
226 | } |
227 | ||
8965c1c0 JF |
228 | static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; |
229 | ||
230 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | |
231 | { | |
bfd074e0 | 232 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); |
8965c1c0 JF |
233 | BUG_ON(preemptible()); |
234 | ||
bfd074e0 | 235 | __get_cpu_var(paravirt_lazy_mode) = mode; |
8965c1c0 JF |
236 | } |
237 | ||
238 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) | |
239 | { | |
bfd074e0 | 240 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); |
8965c1c0 JF |
241 | BUG_ON(preemptible()); |
242 | ||
bfd074e0 | 243 | __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; |
8965c1c0 JF |
244 | } |
245 | ||
246 | void paravirt_enter_lazy_mmu(void) | |
247 | { | |
248 | enter_lazy(PARAVIRT_LAZY_MMU); | |
249 | } | |
250 | ||
251 | void paravirt_leave_lazy_mmu(void) | |
252 | { | |
253 | paravirt_leave_lazy(PARAVIRT_LAZY_MMU); | |
254 | } | |
255 | ||
256 | void paravirt_enter_lazy_cpu(void) | |
257 | { | |
258 | enter_lazy(PARAVIRT_LAZY_CPU); | |
259 | } | |
260 | ||
261 | void paravirt_leave_lazy_cpu(void) | |
262 | { | |
263 | paravirt_leave_lazy(PARAVIRT_LAZY_CPU); | |
264 | } | |
265 | ||
266 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |
267 | { | |
bfd074e0 | 268 | return __get_cpu_var(paravirt_lazy_mode); |
8965c1c0 JF |
269 | } |
270 | ||
93b1eab3 | 271 | struct pv_info pv_info = { |
d3561b7f RR |
272 | .name = "bare hardware", |
273 | .paravirt_enabled = 0, | |
274 | .kernel_rpl = 0, | |
5311ab62 | 275 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
93b1eab3 | 276 | }; |
d3561b7f | 277 | |
93b1eab3 JF |
278 | struct pv_init_ops pv_init_ops = { |
279 | .patch = native_patch, | |
d3561b7f | 280 | .banner = default_banner, |
45876233 | 281 | .arch_setup = paravirt_nop, |
d3561b7f | 282 | .memory_setup = machine_specific_memory_setup, |
93b1eab3 JF |
283 | }; |
284 | ||
285 | struct pv_time_ops pv_time_ops = { | |
286 | .time_init = hpet_time_init, | |
d3561b7f RR |
287 | .get_wallclock = native_get_wallclock, |
288 | .set_wallclock = native_set_wallclock, | |
93b1eab3 | 289 | .sched_clock = native_sched_clock, |
e93ef949 | 290 | .get_tsc_khz = native_calibrate_tsc, |
93b1eab3 JF |
291 | }; |
292 | ||
293 | struct pv_irq_ops pv_irq_ops = { | |
d3561b7f | 294 | .init_IRQ = native_init_IRQ, |
93b1eab3 JF |
295 | .save_fl = native_save_fl, |
296 | .restore_fl = native_restore_fl, | |
297 | .irq_disable = native_irq_disable, | |
298 | .irq_enable = native_irq_enable, | |
299 | .safe_halt = native_safe_halt, | |
300 | .halt = native_halt, | |
fab58420 JF |
301 | #ifdef CONFIG_X86_64 |
302 | .adjust_exception_frame = paravirt_nop, | |
303 | #endif | |
93b1eab3 | 304 | }; |
d3561b7f | 305 | |
93b1eab3 | 306 | struct pv_cpu_ops pv_cpu_ops = { |
d3561b7f RR |
307 | .cpuid = native_cpuid, |
308 | .get_debugreg = native_get_debugreg, | |
309 | .set_debugreg = native_set_debugreg, | |
310 | .clts = native_clts, | |
311 | .read_cr0 = native_read_cr0, | |
312 | .write_cr0 = native_write_cr0, | |
d3561b7f RR |
313 | .read_cr4 = native_read_cr4, |
314 | .read_cr4_safe = native_read_cr4_safe, | |
315 | .write_cr4 = native_write_cr4, | |
88b4755f GOC |
316 | #ifdef CONFIG_X86_64 |
317 | .read_cr8 = native_read_cr8, | |
318 | .write_cr8 = native_write_cr8, | |
319 | #endif | |
d3561b7f | 320 | .wbinvd = native_wbinvd, |
90a0a06a | 321 | .read_msr = native_read_msr_safe, |
b05f78f5 | 322 | .read_msr_amd = native_read_msr_amd_safe, |
90a0a06a | 323 | .write_msr = native_write_msr_safe, |
d3561b7f RR |
324 | .read_tsc = native_read_tsc, |
325 | .read_pmc = native_read_pmc, | |
e5aaac44 | 326 | .read_tscp = native_read_tscp, |
d3561b7f RR |
327 | .load_tr_desc = native_load_tr_desc, |
328 | .set_ldt = native_set_ldt, | |
329 | .load_gdt = native_load_gdt, | |
330 | .load_idt = native_load_idt, | |
331 | .store_gdt = native_store_gdt, | |
332 | .store_idt = native_store_idt, | |
333 | .store_tr = native_store_tr, | |
334 | .load_tls = native_load_tls, | |
9f9d489a JF |
335 | #ifdef CONFIG_X86_64 |
336 | .load_gs_index = native_load_gs_index, | |
337 | #endif | |
75b8bb3e | 338 | .write_ldt_entry = native_write_ldt_entry, |
014b15be | 339 | .write_gdt_entry = native_write_gdt_entry, |
8d947344 | 340 | .write_idt_entry = native_write_idt_entry, |
38ffbe66 JF |
341 | |
342 | .alloc_ldt = paravirt_nop, | |
343 | .free_ldt = paravirt_nop, | |
344 | ||
faca6227 | 345 | .load_sp0 = native_load_sp0, |
d3561b7f | 346 | |
102d0a4b | 347 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) |
d75cd22f | 348 | .irq_enable_sysexit = native_irq_enable_sysexit, |
102d0a4b | 349 | #endif |
2be29982 | 350 | #ifdef CONFIG_X86_64 |
102d0a4b | 351 | #ifdef CONFIG_IA32_EMULATION |
2be29982 | 352 | .usergs_sysret32 = native_usergs_sysret32, |
102d0a4b | 353 | #endif |
2be29982 | 354 | .usergs_sysret64 = native_usergs_sysret64, |
d75cd22f | 355 | #endif |
93b1eab3 | 356 | .iret = native_iret, |
e801f864 | 357 | .swapgs = native_swapgs, |
93b1eab3 | 358 | |
d3561b7f RR |
359 | .set_iopl_mask = native_set_iopl_mask, |
360 | .io_delay = native_io_delay, | |
8965c1c0 JF |
361 | |
362 | .lazy_mode = { | |
363 | .enter = paravirt_nop, | |
364 | .leave = paravirt_nop, | |
365 | }, | |
93b1eab3 | 366 | }; |
d3561b7f | 367 | |
93b1eab3 | 368 | struct pv_apic_ops pv_apic_ops = { |
13623d79 | 369 | #ifdef CONFIG_X86_LOCAL_APIC |
bbab4f3b ZA |
370 | .setup_boot_clock = setup_boot_APIC_clock, |
371 | .setup_secondary_clock = setup_secondary_APIC_clock, | |
0260c196 | 372 | .startup_ipi_hook = paravirt_nop, |
13623d79 | 373 | #endif |
93b1eab3 JF |
374 | }; |
375 | ||
93b1eab3 | 376 | struct pv_mmu_ops pv_mmu_ops = { |
d8dd8eec | 377 | #ifndef CONFIG_X86_64 |
b239fb25 JF |
378 | .pagetable_setup_start = native_pagetable_setup_start, |
379 | .pagetable_setup_done = native_pagetable_setup_done, | |
a312b37b EH |
380 | #else |
381 | .pagetable_setup_start = paravirt_nop, | |
382 | .pagetable_setup_done = paravirt_nop, | |
d8dd8eec | 383 | #endif |
b239fb25 | 384 | |
93b1eab3 JF |
385 | .read_cr2 = native_read_cr2, |
386 | .write_cr2 = native_write_cr2, | |
387 | .read_cr3 = native_read_cr3, | |
388 | .write_cr3 = native_write_cr3, | |
389 | ||
da181a8b RR |
390 | .flush_tlb_user = native_flush_tlb, |
391 | .flush_tlb_kernel = native_flush_tlb_global, | |
392 | .flush_tlb_single = native_flush_tlb_single, | |
d4c10477 | 393 | .flush_tlb_others = native_flush_tlb_others, |
da181a8b | 394 | |
eba0045f JF |
395 | .pgd_alloc = __paravirt_pgd_alloc, |
396 | .pgd_free = paravirt_nop, | |
397 | ||
6944a9c8 JF |
398 | .alloc_pte = paravirt_nop, |
399 | .alloc_pmd = paravirt_nop, | |
400 | .alloc_pmd_clone = paravirt_nop, | |
2761fa09 | 401 | .alloc_pud = paravirt_nop, |
6944a9c8 JF |
402 | .release_pte = paravirt_nop, |
403 | .release_pmd = paravirt_nop, | |
2761fa09 | 404 | .release_pud = paravirt_nop, |
c119ecce | 405 | |
da181a8b RR |
406 | .set_pte = native_set_pte, |
407 | .set_pte_at = native_set_pte_at, | |
408 | .set_pmd = native_set_pmd, | |
45876233 JF |
409 | .pte_update = paravirt_nop, |
410 | .pte_update_defer = paravirt_nop, | |
3dc494e8 | 411 | |
08b882c6 JF |
412 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
413 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | |
414 | ||
ce6234b5 JF |
415 | #ifdef CONFIG_HIGHPTE |
416 | .kmap_atomic_pte = kmap_atomic, | |
417 | #endif | |
418 | ||
f95f2f7b | 419 | #if PAGETABLE_LEVELS >= 3 |
da181a8b RR |
420 | #ifdef CONFIG_X86_PAE |
421 | .set_pte_atomic = native_set_pte_atomic, | |
422 | .set_pte_present = native_set_pte_present, | |
da181a8b RR |
423 | .pte_clear = native_pte_clear, |
424 | .pmd_clear = native_pmd_clear, | |
f95f2f7b EH |
425 | #endif |
426 | .set_pud = native_set_pud, | |
3dc494e8 JF |
427 | .pmd_val = native_pmd_val, |
428 | .make_pmd = native_make_pmd, | |
f95f2f7b EH |
429 | |
430 | #if PAGETABLE_LEVELS == 4 | |
431 | .pud_val = native_pud_val, | |
432 | .make_pud = native_make_pud, | |
433 | .set_pgd = native_set_pgd, | |
da181a8b | 434 | #endif |
f95f2f7b | 435 | #endif /* PAGETABLE_LEVELS >= 3 */ |
da181a8b | 436 | |
3dc494e8 | 437 | .pte_val = native_pte_val, |
c2e3277f | 438 | .pte_flags = native_pte_flags, |
3dc494e8 JF |
439 | .pgd_val = native_pgd_val, |
440 | ||
441 | .make_pte = native_make_pte, | |
442 | .make_pgd = native_make_pgd, | |
443 | ||
d6dd61c8 JF |
444 | .dup_mmap = paravirt_nop, |
445 | .exit_mmap = paravirt_nop, | |
446 | .activate_mm = paravirt_nop, | |
8965c1c0 JF |
447 | |
448 | .lazy_mode = { | |
449 | .enter = paravirt_nop, | |
450 | .leave = paravirt_nop, | |
451 | }, | |
aeaaa59c JF |
452 | |
453 | .set_fixmap = native_set_fixmap, | |
d3561b7f | 454 | }; |
0dbe5a11 | 455 | |
93b1eab3 | 456 | EXPORT_SYMBOL_GPL(pv_time_ops); |
f97b8954 JF |
457 | EXPORT_SYMBOL (pv_cpu_ops); |
458 | EXPORT_SYMBOL (pv_mmu_ops); | |
93b1eab3 JF |
459 | EXPORT_SYMBOL_GPL(pv_apic_ops); |
460 | EXPORT_SYMBOL_GPL(pv_info); | |
461 | EXPORT_SYMBOL (pv_irq_ops); |