Commit | Line | Data |
---|---|---|
d3561b7f RR |
1 | /* Paravirtualization interfaces |
2 | Copyright (C) 2006 Rusty Russell IBM Corporation | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
b1df07bd GOC |
17 | |
18 | 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc | |
d3561b7f | 19 | */ |
b1df07bd | 20 | |
d3561b7f RR |
21 | #include <linux/errno.h> |
22 | #include <linux/module.h> | |
23 | #include <linux/efi.h> | |
24 | #include <linux/bcd.h> | |
ce6234b5 | 25 | #include <linux/highmem.h> |
376e2424 | 26 | #include <linux/kprobes.h> |
d3561b7f RR |
27 | |
28 | #include <asm/bug.h> | |
29 | #include <asm/paravirt.h> | |
50af5ead | 30 | #include <asm/debugreg.h> |
d3561b7f RR |
31 | #include <asm/desc.h> |
32 | #include <asm/setup.h> | |
a312b37b | 33 | #include <asm/pgtable.h> |
d3561b7f | 34 | #include <asm/time.h> |
eba0045f | 35 | #include <asm/pgalloc.h> |
d3561b7f RR |
36 | #include <asm/irq.h> |
37 | #include <asm/delay.h> | |
13623d79 RR |
38 | #include <asm/fixmap.h> |
39 | #include <asm/apic.h> | |
da181a8b | 40 | #include <asm/tlbflush.h> |
6cb9a835 | 41 | #include <asm/timer.h> |
f05e798a | 42 | #include <asm/special_insns.h> |
d3561b7f | 43 | |
fc57a7c6 AL |
44 | /* |
45 | * nop stub, which must not clobber anything *including the stack* to | |
46 | * avoid confusing the entry prologues. | |
47 | */ | |
48 | extern void _paravirt_nop(void); | |
49 | asm (".pushsection .entry.text, \"ax\"\n" | |
50 | ".global _paravirt_nop\n" | |
51 | "_paravirt_nop:\n\t" | |
52 | "ret\n\t" | |
53 | ".size _paravirt_nop, . - _paravirt_nop\n\t" | |
54 | ".type _paravirt_nop, @function\n\t" | |
55 | ".popsection"); | |
d3561b7f | 56 | |
41edafdb JF |
57 | /* identity function, which can be inlined */ |
58 | u32 _paravirt_ident_32(u32 x) | |
59 | { | |
60 | return x; | |
61 | } | |
62 | ||
63 | u64 _paravirt_ident_64(u64 x) | |
64 | { | |
65 | return x; | |
66 | } | |
67 | ||
6f30c1ac | 68 | void __init default_banner(void) |
d3561b7f RR |
69 | { |
70 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | |
93b1eab3 | 71 | pv_info.name); |
d3561b7f RR |
72 | } |
73 | ||
93b1eab3 JF |
74 | /* Undefined instruction for dealing with missing ops pointers. */ |
75 | static const unsigned char ud2a[] = { 0x0f, 0x0b }; | |
139ec7c4 | 76 | |
19d36ccd AK |
77 | struct branch { |
78 | unsigned char opcode; | |
79 | u32 delta; | |
80 | } __attribute__((packed)); | |
81 | ||
ab144f5e AK |
82 | unsigned paravirt_patch_call(void *insnbuf, |
83 | const void *target, u16 tgt_clobbers, | |
84 | unsigned long addr, u16 site_clobbers, | |
63f70270 JF |
85 | unsigned len) |
86 | { | |
ab144f5e AK |
87 | struct branch *b = insnbuf; |
88 | unsigned long delta = (unsigned long)target - (addr+5); | |
63f70270 JF |
89 | |
90 | if (tgt_clobbers & ~site_clobbers) | |
91 | return len; /* target would clobber too much for this site */ | |
92 | if (len < 5) | |
93 | return len; /* call too long for patch site */ | |
139ec7c4 | 94 | |
ab144f5e AK |
95 | b->opcode = 0xe8; /* call */ |
96 | b->delta = delta; | |
97 | BUILD_BUG_ON(sizeof(*b) != 5); | |
139ec7c4 | 98 | |
63f70270 JF |
99 | return 5; |
100 | } | |
101 | ||
93b1eab3 | 102 | unsigned paravirt_patch_jmp(void *insnbuf, const void *target, |
ab144f5e | 103 | unsigned long addr, unsigned len) |
63f70270 | 104 | { |
ab144f5e AK |
105 | struct branch *b = insnbuf; |
106 | unsigned long delta = (unsigned long)target - (addr+5); | |
63f70270 JF |
107 | |
108 | if (len < 5) | |
109 | return len; /* call too long for patch site */ | |
110 | ||
ab144f5e AK |
111 | b->opcode = 0xe9; /* jmp */ |
112 | b->delta = delta; | |
63f70270 JF |
113 | |
114 | return 5; | |
115 | } | |
116 | ||
93b1eab3 JF |
117 | /* Neat trick to map patch type back to the call within the |
118 | * corresponding structure. */ | |
119 | static void *get_call_destination(u8 type) | |
120 | { | |
121 | struct paravirt_patch_template tmpl = { | |
122 | .pv_init_ops = pv_init_ops, | |
93b1eab3 JF |
123 | .pv_time_ops = pv_time_ops, |
124 | .pv_cpu_ops = pv_cpu_ops, | |
125 | .pv_irq_ops = pv_irq_ops, | |
93b1eab3 | 126 | .pv_mmu_ops = pv_mmu_ops, |
b4ecc126 | 127 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
74d4affd | 128 | .pv_lock_ops = pv_lock_ops, |
b4ecc126 | 129 | #endif |
93b1eab3 JF |
130 | }; |
131 | return *((void **)&tmpl + type); | |
132 | } | |
133 | ||
ab144f5e AK |
134 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, |
135 | unsigned long addr, unsigned len) | |
63f70270 | 136 | { |
93b1eab3 | 137 | void *opfunc = get_call_destination(type); |
63f70270 JF |
138 | unsigned ret; |
139 | ||
140 | if (opfunc == NULL) | |
141 | /* If there's no function, patch it with a ud2a (BUG) */ | |
93b1eab3 | 142 | ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); |
41edafdb | 143 | else if (opfunc == _paravirt_nop) |
79f1d836 | 144 | ret = 0; |
41edafdb JF |
145 | |
146 | /* identity functions just return their single argument */ | |
147 | else if (opfunc == _paravirt_ident_32) | |
148 | ret = paravirt_patch_ident_32(insnbuf, len); | |
149 | else if (opfunc == _paravirt_ident_64) | |
150 | ret = paravirt_patch_ident_64(insnbuf, len); | |
151 | ||
93b1eab3 | 152 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
2be29982 | 153 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) |
63f70270 | 154 | /* If operation requires a jmp, then jmp */ |
93b1eab3 | 155 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); |
63f70270 JF |
156 | else |
157 | /* Otherwise call the function; assume target could | |
158 | clobber any caller-save reg */ | |
ab144f5e AK |
159 | ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, |
160 | addr, clobbers, len); | |
63f70270 JF |
161 | |
162 | return ret; | |
163 | } | |
164 | ||
ab144f5e | 165 | unsigned paravirt_patch_insns(void *insnbuf, unsigned len, |
63f70270 JF |
166 | const char *start, const char *end) |
167 | { | |
168 | unsigned insn_len = end - start; | |
139ec7c4 | 169 | |
63f70270 JF |
170 | if (insn_len > len || start == NULL) |
171 | insn_len = len; | |
172 | else | |
ab144f5e | 173 | memcpy(insnbuf, start, insn_len); |
139ec7c4 | 174 | |
139ec7c4 RR |
175 | return insn_len; |
176 | } | |
177 | ||
1a1eecd1 | 178 | static void native_flush_tlb(void) |
da181a8b RR |
179 | { |
180 | __native_flush_tlb(); | |
181 | } | |
182 | ||
183 | /* | |
184 | * Global pages have to be flushed a bit differently. Not a real | |
185 | * performance problem because this does not happen often. | |
186 | */ | |
1a1eecd1 | 187 | static void native_flush_tlb_global(void) |
da181a8b RR |
188 | { |
189 | __native_flush_tlb_global(); | |
190 | } | |
191 | ||
63f70270 | 192 | static void native_flush_tlb_single(unsigned long addr) |
da181a8b RR |
193 | { |
194 | __native_flush_tlb_single(addr); | |
195 | } | |
196 | ||
c5905afb IM |
197 | struct static_key paravirt_steal_enabled; |
198 | struct static_key paravirt_steal_rq_enabled; | |
3c404b57 GC |
199 | |
200 | static u64 native_steal_clock(int cpu) | |
201 | { | |
202 | return 0; | |
203 | } | |
204 | ||
d3561b7f | 205 | /* These are in entry.S */ |
1a1eecd1 | 206 | extern void native_iret(void); |
2be29982 | 207 | extern void native_usergs_sysret64(void); |
d3561b7f | 208 | |
d572929c JF |
209 | static struct resource reserve_ioports = { |
210 | .start = 0, | |
211 | .end = IO_SPACE_LIMIT, | |
212 | .name = "paravirt-ioport", | |
213 | .flags = IORESOURCE_IO | IORESOURCE_BUSY, | |
214 | }; | |
215 | ||
d572929c JF |
216 | /* |
217 | * Reserve the whole legacy IO space to prevent any legacy drivers | |
218 | * from wasting time probing for their hardware. This is a fairly | |
219 | * brute-force approach to disabling all non-virtual drivers. | |
220 | * | |
221 | * Note that this must be called very early to have any effect. | |
222 | */ | |
223 | int paravirt_disable_iospace(void) | |
224 | { | |
f7743fe6 | 225 | return request_resource(&ioport_resource, &reserve_ioports); |
d572929c JF |
226 | } |
227 | ||
8965c1c0 JF |
228 | static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; |
229 | ||
230 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | |
231 | { | |
c6ae41e7 | 232 | BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); |
8965c1c0 | 233 | |
c6ae41e7 | 234 | this_cpu_write(paravirt_lazy_mode, mode); |
8965c1c0 JF |
235 | } |
236 | ||
b407fc57 | 237 | static void leave_lazy(enum paravirt_lazy_mode mode) |
8965c1c0 | 238 | { |
c6ae41e7 | 239 | BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode); |
8965c1c0 | 240 | |
c6ae41e7 | 241 | this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); |
8965c1c0 JF |
242 | } |
243 | ||
244 | void paravirt_enter_lazy_mmu(void) | |
245 | { | |
246 | enter_lazy(PARAVIRT_LAZY_MMU); | |
247 | } | |
248 | ||
249 | void paravirt_leave_lazy_mmu(void) | |
250 | { | |
b407fc57 | 251 | leave_lazy(PARAVIRT_LAZY_MMU); |
8965c1c0 JF |
252 | } |
253 | ||
511ba86e BO |
254 | void paravirt_flush_lazy_mmu(void) |
255 | { | |
256 | preempt_disable(); | |
257 | ||
258 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | |
259 | arch_leave_lazy_mmu_mode(); | |
260 | arch_enter_lazy_mmu_mode(); | |
261 | } | |
262 | ||
263 | preempt_enable(); | |
264 | } | |
265 | ||
224101ed | 266 | void paravirt_start_context_switch(struct task_struct *prev) |
8965c1c0 | 267 | { |
2829b449 JF |
268 | BUG_ON(preemptible()); |
269 | ||
c6ae41e7 | 270 | if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { |
b407fc57 | 271 | arch_leave_lazy_mmu_mode(); |
224101ed | 272 | set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); |
b407fc57 | 273 | } |
8965c1c0 JF |
274 | enter_lazy(PARAVIRT_LAZY_CPU); |
275 | } | |
276 | ||
224101ed | 277 | void paravirt_end_context_switch(struct task_struct *next) |
8965c1c0 | 278 | { |
2829b449 JF |
279 | BUG_ON(preemptible()); |
280 | ||
b407fc57 JF |
281 | leave_lazy(PARAVIRT_LAZY_CPU); |
282 | ||
224101ed | 283 | if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) |
b407fc57 | 284 | arch_enter_lazy_mmu_mode(); |
8965c1c0 JF |
285 | } |
286 | ||
287 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |
288 | { | |
b8bcfe99 JF |
289 | if (in_interrupt()) |
290 | return PARAVIRT_LAZY_NONE; | |
291 | ||
c6ae41e7 | 292 | return this_cpu_read(paravirt_lazy_mode); |
8965c1c0 JF |
293 | } |
294 | ||
93b1eab3 | 295 | struct pv_info pv_info = { |
d3561b7f | 296 | .name = "bare hardware", |
d3561b7f | 297 | .kernel_rpl = 0, |
5311ab62 | 298 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
318f5a2a AL |
299 | |
300 | #ifdef CONFIG_X86_64 | |
301 | .extra_user_64bit_cs = __USER_CS, | |
302 | #endif | |
93b1eab3 | 303 | }; |
d3561b7f | 304 | |
93b1eab3 JF |
305 | struct pv_init_ops pv_init_ops = { |
306 | .patch = native_patch, | |
93b1eab3 JF |
307 | }; |
308 | ||
309 | struct pv_time_ops pv_time_ops = { | |
93b1eab3 | 310 | .sched_clock = native_sched_clock, |
3c404b57 | 311 | .steal_clock = native_steal_clock, |
93b1eab3 JF |
312 | }; |
313 | ||
9a55fdbe | 314 | __visible struct pv_irq_ops pv_irq_ops = { |
ecb93d1c JF |
315 | .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), |
316 | .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), | |
317 | .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), | |
318 | .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), | |
93b1eab3 JF |
319 | .safe_halt = native_safe_halt, |
320 | .halt = native_halt, | |
fab58420 JF |
321 | #ifdef CONFIG_X86_64 |
322 | .adjust_exception_frame = paravirt_nop, | |
323 | #endif | |
93b1eab3 | 324 | }; |
d3561b7f | 325 | |
9a55fdbe | 326 | __visible struct pv_cpu_ops pv_cpu_ops = { |
d3561b7f RR |
327 | .cpuid = native_cpuid, |
328 | .get_debugreg = native_get_debugreg, | |
329 | .set_debugreg = native_set_debugreg, | |
330 | .clts = native_clts, | |
331 | .read_cr0 = native_read_cr0, | |
332 | .write_cr0 = native_write_cr0, | |
d3561b7f RR |
333 | .read_cr4 = native_read_cr4, |
334 | .read_cr4_safe = native_read_cr4_safe, | |
335 | .write_cr4 = native_write_cr4, | |
88b4755f GOC |
336 | #ifdef CONFIG_X86_64 |
337 | .read_cr8 = native_read_cr8, | |
338 | .write_cr8 = native_write_cr8, | |
339 | #endif | |
d3561b7f | 340 | .wbinvd = native_wbinvd, |
dd2f4a00 AL |
341 | .read_msr = native_read_msr, |
342 | .write_msr = native_write_msr, | |
c2ee03b2 AL |
343 | .read_msr_safe = native_read_msr_safe, |
344 | .write_msr_safe = native_write_msr_safe, | |
d3561b7f RR |
345 | .read_pmc = native_read_pmc, |
346 | .load_tr_desc = native_load_tr_desc, | |
347 | .set_ldt = native_set_ldt, | |
348 | .load_gdt = native_load_gdt, | |
349 | .load_idt = native_load_idt, | |
d3561b7f RR |
350 | .store_idt = native_store_idt, |
351 | .store_tr = native_store_tr, | |
352 | .load_tls = native_load_tls, | |
9f9d489a JF |
353 | #ifdef CONFIG_X86_64 |
354 | .load_gs_index = native_load_gs_index, | |
355 | #endif | |
75b8bb3e | 356 | .write_ldt_entry = native_write_ldt_entry, |
014b15be | 357 | .write_gdt_entry = native_write_gdt_entry, |
8d947344 | 358 | .write_idt_entry = native_write_idt_entry, |
38ffbe66 JF |
359 | |
360 | .alloc_ldt = paravirt_nop, | |
361 | .free_ldt = paravirt_nop, | |
362 | ||
faca6227 | 363 | .load_sp0 = native_load_sp0, |
d3561b7f | 364 | |
2be29982 | 365 | #ifdef CONFIG_X86_64 |
2be29982 | 366 | .usergs_sysret64 = native_usergs_sysret64, |
d75cd22f | 367 | #endif |
93b1eab3 | 368 | .iret = native_iret, |
e801f864 | 369 | .swapgs = native_swapgs, |
93b1eab3 | 370 | |
d3561b7f RR |
371 | .set_iopl_mask = native_set_iopl_mask, |
372 | .io_delay = native_io_delay, | |
8965c1c0 | 373 | |
224101ed JF |
374 | .start_context_switch = paravirt_nop, |
375 | .end_context_switch = paravirt_nop, | |
93b1eab3 | 376 | }; |
d3561b7f | 377 | |
80271972 | 378 | /* At this point, native_get/set_debugreg has real function entries */ |
376e2424 | 379 | NOKPROBE_SYMBOL(native_get_debugreg); |
80271972 MH |
380 | NOKPROBE_SYMBOL(native_set_debugreg); |
381 | NOKPROBE_SYMBOL(native_load_idt); | |
376e2424 | 382 | |
41edafdb JF |
383 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) |
384 | /* 32-bit pagetable entries */ | |
da5de7c2 | 385 | #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) |
41edafdb JF |
386 | #else |
387 | /* 64-bit pagetable entries */ | |
da5de7c2 | 388 | #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) |
41edafdb JF |
389 | #endif |
390 | ||
93b1eab3 | 391 | struct pv_mmu_ops pv_mmu_ops = { |
b239fb25 | 392 | |
93b1eab3 JF |
393 | .read_cr2 = native_read_cr2, |
394 | .write_cr2 = native_write_cr2, | |
395 | .read_cr3 = native_read_cr3, | |
396 | .write_cr3 = native_write_cr3, | |
397 | ||
da181a8b RR |
398 | .flush_tlb_user = native_flush_tlb, |
399 | .flush_tlb_kernel = native_flush_tlb_global, | |
400 | .flush_tlb_single = native_flush_tlb_single, | |
d4c10477 | 401 | .flush_tlb_others = native_flush_tlb_others, |
da181a8b | 402 | |
eba0045f JF |
403 | .pgd_alloc = __paravirt_pgd_alloc, |
404 | .pgd_free = paravirt_nop, | |
405 | ||
6944a9c8 JF |
406 | .alloc_pte = paravirt_nop, |
407 | .alloc_pmd = paravirt_nop, | |
2761fa09 | 408 | .alloc_pud = paravirt_nop, |
6944a9c8 JF |
409 | .release_pte = paravirt_nop, |
410 | .release_pmd = paravirt_nop, | |
2761fa09 | 411 | .release_pud = paravirt_nop, |
c119ecce | 412 | |
da181a8b RR |
413 | .set_pte = native_set_pte, |
414 | .set_pte_at = native_set_pte_at, | |
415 | .set_pmd = native_set_pmd, | |
331127f7 | 416 | .set_pmd_at = native_set_pmd_at, |
45876233 | 417 | .pte_update = paravirt_nop, |
3dc494e8 | 418 | |
08b882c6 JF |
419 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
420 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | |
421 | ||
98233368 | 422 | #if CONFIG_PGTABLE_LEVELS >= 3 |
da181a8b RR |
423 | #ifdef CONFIG_X86_PAE |
424 | .set_pte_atomic = native_set_pte_atomic, | |
da181a8b RR |
425 | .pte_clear = native_pte_clear, |
426 | .pmd_clear = native_pmd_clear, | |
f95f2f7b EH |
427 | #endif |
428 | .set_pud = native_set_pud, | |
da5de7c2 JF |
429 | |
430 | .pmd_val = PTE_IDENT, | |
431 | .make_pmd = PTE_IDENT, | |
f95f2f7b | 432 | |
98233368 | 433 | #if CONFIG_PGTABLE_LEVELS == 4 |
da5de7c2 JF |
434 | .pud_val = PTE_IDENT, |
435 | .make_pud = PTE_IDENT, | |
436 | ||
f95f2f7b | 437 | .set_pgd = native_set_pgd, |
da181a8b | 438 | #endif |
98233368 | 439 | #endif /* CONFIG_PGTABLE_LEVELS >= 3 */ |
da181a8b | 440 | |
da5de7c2 JF |
441 | .pte_val = PTE_IDENT, |
442 | .pgd_val = PTE_IDENT, | |
3dc494e8 | 443 | |
da5de7c2 JF |
444 | .make_pte = PTE_IDENT, |
445 | .make_pgd = PTE_IDENT, | |
3dc494e8 | 446 | |
d6dd61c8 JF |
447 | .dup_mmap = paravirt_nop, |
448 | .exit_mmap = paravirt_nop, | |
449 | .activate_mm = paravirt_nop, | |
8965c1c0 JF |
450 | |
451 | .lazy_mode = { | |
452 | .enter = paravirt_nop, | |
453 | .leave = paravirt_nop, | |
511ba86e | 454 | .flush = paravirt_nop, |
8965c1c0 | 455 | }, |
aeaaa59c JF |
456 | |
457 | .set_fixmap = native_set_fixmap, | |
d3561b7f | 458 | }; |
0dbe5a11 | 459 | |
93b1eab3 | 460 | EXPORT_SYMBOL_GPL(pv_time_ops); |
f97b8954 JF |
461 | EXPORT_SYMBOL (pv_cpu_ops); |
462 | EXPORT_SYMBOL (pv_mmu_ops); | |
93b1eab3 JF |
463 | EXPORT_SYMBOL_GPL(pv_info); |
464 | EXPORT_SYMBOL (pv_irq_ops); |