Commit | Line | Data |
---|---|---|
f87e4cac JF |
1 | /* |
2 | * Xen SMP support | |
3 | * | |
4 | * This file implements the Xen versions of smp_ops. SMP under Xen is | |
5 | * very straightforward. Bringing a CPU up is simply a matter of | |
6 | * loading its initial context and setting it running. | |
7 | * | |
8 | * IPIs are handled through the Xen event mechanism. | |
9 | * | |
10 | * Because virtual CPUs can be scheduled onto any real CPU, there's no | |
11 | * useful topology information for the kernel to make use of. As a | |
12 | * result, all CPUs are treated as if they're single-core and | |
13 | * single-threaded. | |
f87e4cac JF |
14 | */ |
15 | #include <linux/sched.h> | |
16 | #include <linux/err.h> | |
5a0e3ad6 | 17 | #include <linux/slab.h> |
f87e4cac | 18 | #include <linux/smp.h> |
1ff2b0c3 | 19 | #include <linux/irq_work.h> |
466318a8 | 20 | #include <linux/tick.h> |
f87e4cac JF |
21 | |
22 | #include <asm/paravirt.h> | |
23 | #include <asm/desc.h> | |
24 | #include <asm/pgtable.h> | |
25 | #include <asm/cpu.h> | |
26 | ||
27 | #include <xen/interface/xen.h> | |
28 | #include <xen/interface/vcpu.h> | |
65d0cf0b | 29 | #include <xen/interface/xenpmu.h> |
f87e4cac JF |
30 | |
31 | #include <asm/xen/interface.h> | |
32 | #include <asm/xen/hypercall.h> | |
33 | ||
ea5b8f73 | 34 | #include <xen/xen.h> |
f87e4cac JF |
35 | #include <xen/page.h> |
36 | #include <xen/events.h> | |
37 | ||
ed467e69 | 38 | #include <xen/hvc-console.h> |
f87e4cac JF |
39 | #include "xen-ops.h" |
40 | #include "mmu.h" | |
a2ef5dc2 | 41 | #include "smp.h" |
65d0cf0b | 42 | #include "pmu.h" |
f87e4cac | 43 | |
b78936e1 | 44 | cpumask_var_t xen_cpu_initialized_map; |
f87e4cac | 45 | |
9547689f KRW |
46 | struct xen_common_irq { |
47 | int irq; | |
48 | char *name; | |
49 | }; | |
ee336e10 KRW |
50 | static DEFINE_PER_CPU(struct xen_common_irq, xen_resched_irq) = { .irq = -1 }; |
51 | static DEFINE_PER_CPU(struct xen_common_irq, xen_callfunc_irq) = { .irq = -1 }; | |
52 | static DEFINE_PER_CPU(struct xen_common_irq, xen_callfuncsingle_irq) = { .irq = -1 }; | |
53 | static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; | |
9547689f | 54 | static DEFINE_PER_CPU(struct xen_common_irq, xen_debug_irq) = { .irq = -1 }; |
65d0cf0b | 55 | static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 }; |
f87e4cac JF |
56 | |
57 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | |
3b16cf87 | 58 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); |
1ff2b0c3 | 59 | static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id); |
f87e4cac JF |
60 | |
61 | /* | |
184748cc | 62 | * Reschedule call back. |
f87e4cac JF |
63 | */ |
64 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | |
65 | { | |
1b437c8c | 66 | inc_irq_stat(irq_resched_count); |
184748cc | 67 | scheduler_ipi(); |
38bb5ab4 | 68 | |
f87e4cac JF |
69 | return IRQ_HANDLED; |
70 | } | |
71 | ||
148f9bb8 | 72 | static void cpu_bringup(void) |
f87e4cac | 73 | { |
e8c9e788 | 74 | int cpu; |
f87e4cac JF |
75 | |
76 | cpu_init(); | |
d68d82af | 77 | touch_softlockup_watchdog(); |
c7b75947 JF |
78 | preempt_disable(); |
79 | ||
5840c84b MR |
80 | /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */ |
81 | if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { | |
82 | xen_enable_sysenter(); | |
83 | xen_enable_syscall(); | |
84 | } | |
c7b75947 JF |
85 | cpu = smp_processor_id(); |
86 | smp_store_cpu_info(cpu); | |
87 | cpu_data(cpu).x86_max_cores = 1; | |
88 | set_cpu_sibling_map(cpu); | |
f87e4cac | 89 | |
88540ad0 BO |
90 | /* |
91 | * identify_cpu() may have set logical_pkg_id to -1 due | |
92 | * to incorrect phys_proc_id. Let's re-comupte it. | |
93 | */ | |
94 | topology_update_package_map(apic->cpu_present_to_apicid(cpu), cpu); | |
95 | ||
f87e4cac JF |
96 | xen_setup_cpu_clockevents(); |
97 | ||
106b4438 KRW |
98 | notify_cpu_starting(cpu); |
99 | ||
d7d3756c | 100 | set_cpu_online(cpu, true); |
106b4438 | 101 | |
2a442c9c | 102 | cpu_set_state_online(cpu); /* Implies full memory barrier. */ |
c7b75947 | 103 | |
f87e4cac JF |
104 | /* We can take interrupts now: we're officially "up". */ |
105 | local_irq_enable(); | |
d68d82af AN |
106 | } |
107 | ||
a2ef5dc2 MR |
108 | /* |
109 | * Note: cpu parameter is only relevant for PVH. The reason for passing it | |
110 | * is we can't do smp_processor_id until the percpu segments are loaded, for | |
111 | * which we need the cpu number! So we pass it in rdi as first parameter. | |
112 | */ | |
113 | asmlinkage __visible void cpu_bringup_and_idle(int cpu) | |
d68d82af | 114 | { |
a2ef5dc2 | 115 | #ifdef CONFIG_XEN_PVH |
5840c84b MR |
116 | if (xen_feature(XENFEAT_auto_translated_physmap) && |
117 | xen_feature(XENFEAT_supervisor_mode_kernel)) | |
c9f6e997 | 118 | xen_pvh_secondary_vcpu_init(cpu); |
5840c84b | 119 | #endif |
d68d82af | 120 | cpu_bringup(); |
fc6d73d6 | 121 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); |
f87e4cac JF |
122 | } |
123 | ||
53b94fdc KRW |
124 | static void xen_smp_intr_free(unsigned int cpu) |
125 | { | |
ee336e10 | 126 | if (per_cpu(xen_resched_irq, cpu).irq >= 0) { |
9547689f | 127 | unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu).irq, NULL); |
ee336e10 | 128 | per_cpu(xen_resched_irq, cpu).irq = -1; |
b85fffec KRW |
129 | kfree(per_cpu(xen_resched_irq, cpu).name); |
130 | per_cpu(xen_resched_irq, cpu).name = NULL; | |
ee336e10 KRW |
131 | } |
132 | if (per_cpu(xen_callfunc_irq, cpu).irq >= 0) { | |
9547689f | 133 | unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu).irq, NULL); |
ee336e10 | 134 | per_cpu(xen_callfunc_irq, cpu).irq = -1; |
b85fffec KRW |
135 | kfree(per_cpu(xen_callfunc_irq, cpu).name); |
136 | per_cpu(xen_callfunc_irq, cpu).name = NULL; | |
ee336e10 KRW |
137 | } |
138 | if (per_cpu(xen_debug_irq, cpu).irq >= 0) { | |
9547689f | 139 | unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu).irq, NULL); |
ee336e10 | 140 | per_cpu(xen_debug_irq, cpu).irq = -1; |
b85fffec KRW |
141 | kfree(per_cpu(xen_debug_irq, cpu).name); |
142 | per_cpu(xen_debug_irq, cpu).name = NULL; | |
ee336e10 KRW |
143 | } |
144 | if (per_cpu(xen_callfuncsingle_irq, cpu).irq >= 0) { | |
9547689f | 145 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu).irq, |
53b94fdc | 146 | NULL); |
ee336e10 | 147 | per_cpu(xen_callfuncsingle_irq, cpu).irq = -1; |
b85fffec KRW |
148 | kfree(per_cpu(xen_callfuncsingle_irq, cpu).name); |
149 | per_cpu(xen_callfuncsingle_irq, cpu).name = NULL; | |
ee336e10 | 150 | } |
53b94fdc KRW |
151 | if (xen_hvm_domain()) |
152 | return; | |
153 | ||
ee336e10 | 154 | if (per_cpu(xen_irq_work, cpu).irq >= 0) { |
9547689f | 155 | unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL); |
ee336e10 | 156 | per_cpu(xen_irq_work, cpu).irq = -1; |
b85fffec KRW |
157 | kfree(per_cpu(xen_irq_work, cpu).name); |
158 | per_cpu(xen_irq_work, cpu).name = NULL; | |
ee336e10 | 159 | } |
65d0cf0b BO |
160 | |
161 | if (per_cpu(xen_pmu_irq, cpu).irq >= 0) { | |
162 | unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL); | |
163 | per_cpu(xen_pmu_irq, cpu).irq = -1; | |
164 | kfree(per_cpu(xen_pmu_irq, cpu).name); | |
165 | per_cpu(xen_pmu_irq, cpu).name = NULL; | |
166 | } | |
53b94fdc | 167 | }; |
f87e4cac JF |
168 | static int xen_smp_intr_init(unsigned int cpu) |
169 | { | |
170 | int rc; | |
65d0cf0b | 171 | char *resched_name, *callfunc_name, *debug_name, *pmu_name; |
f87e4cac JF |
172 | |
173 | resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); | |
174 | rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, | |
175 | cpu, | |
176 | xen_reschedule_interrupt, | |
9d71cee6 | 177 | IRQF_PERCPU|IRQF_NOBALANCING, |
f87e4cac JF |
178 | resched_name, |
179 | NULL); | |
180 | if (rc < 0) | |
181 | goto fail; | |
9547689f | 182 | per_cpu(xen_resched_irq, cpu).irq = rc; |
b85fffec | 183 | per_cpu(xen_resched_irq, cpu).name = resched_name; |
f87e4cac JF |
184 | |
185 | callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); | |
186 | rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, | |
187 | cpu, | |
188 | xen_call_function_interrupt, | |
9d71cee6 | 189 | IRQF_PERCPU|IRQF_NOBALANCING, |
f87e4cac JF |
190 | callfunc_name, |
191 | NULL); | |
192 | if (rc < 0) | |
193 | goto fail; | |
9547689f | 194 | per_cpu(xen_callfunc_irq, cpu).irq = rc; |
b85fffec | 195 | per_cpu(xen_callfunc_irq, cpu).name = callfunc_name; |
f87e4cac | 196 | |
ee523ca1 JF |
197 | debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); |
198 | rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, | |
9d71cee6 | 199 | IRQF_PERCPU | IRQF_NOBALANCING, |
ee523ca1 JF |
200 | debug_name, NULL); |
201 | if (rc < 0) | |
202 | goto fail; | |
9547689f | 203 | per_cpu(xen_debug_irq, cpu).irq = rc; |
b85fffec | 204 | per_cpu(xen_debug_irq, cpu).name = debug_name; |
ee523ca1 | 205 | |
3b16cf87 JA |
206 | callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); |
207 | rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, | |
208 | cpu, | |
209 | xen_call_function_single_interrupt, | |
9d71cee6 | 210 | IRQF_PERCPU|IRQF_NOBALANCING, |
3b16cf87 JA |
211 | callfunc_name, |
212 | NULL); | |
213 | if (rc < 0) | |
214 | goto fail; | |
9547689f | 215 | per_cpu(xen_callfuncsingle_irq, cpu).irq = rc; |
b85fffec | 216 | per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name; |
3b16cf87 | 217 | |
27d8b207 KRW |
218 | /* |
219 | * The IRQ worker on PVHVM goes through the native path and uses the | |
220 | * IPI mechanism. | |
221 | */ | |
222 | if (xen_hvm_domain()) | |
223 | return 0; | |
224 | ||
1ff2b0c3 LM |
225 | callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu); |
226 | rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR, | |
227 | cpu, | |
228 | xen_irq_work_interrupt, | |
9d71cee6 | 229 | IRQF_PERCPU|IRQF_NOBALANCING, |
1ff2b0c3 LM |
230 | callfunc_name, |
231 | NULL); | |
232 | if (rc < 0) | |
233 | goto fail; | |
9547689f | 234 | per_cpu(xen_irq_work, cpu).irq = rc; |
b85fffec | 235 | per_cpu(xen_irq_work, cpu).name = callfunc_name; |
1ff2b0c3 | 236 | |
65d0cf0b BO |
237 | if (is_xen_pmu(cpu)) { |
238 | pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu); | |
239 | rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu, | |
240 | xen_pmu_irq_handler, | |
241 | IRQF_PERCPU|IRQF_NOBALANCING, | |
242 | pmu_name, NULL); | |
243 | if (rc < 0) | |
244 | goto fail; | |
245 | per_cpu(xen_pmu_irq, cpu).irq = rc; | |
246 | per_cpu(xen_pmu_irq, cpu).name = pmu_name; | |
247 | } | |
248 | ||
f87e4cac JF |
249 | return 0; |
250 | ||
251 | fail: | |
53b94fdc | 252 | xen_smp_intr_free(cpu); |
f87e4cac JF |
253 | return rc; |
254 | } | |
255 | ||
c7b75947 | 256 | static void __init xen_fill_possible_map(void) |
f87e4cac JF |
257 | { |
258 | int i, rc; | |
259 | ||
ea5b8f73 SS |
260 | if (xen_initial_domain()) |
261 | return; | |
262 | ||
263 | for (i = 0; i < nr_cpu_ids; i++) { | |
264 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); | |
265 | if (rc >= 0) { | |
266 | num_processors++; | |
267 | set_cpu_possible(i, true); | |
268 | } | |
269 | } | |
270 | } | |
271 | ||
272 | static void __init xen_filter_cpu_maps(void) | |
273 | { | |
274 | int i, rc; | |
cf405ae6 | 275 | unsigned int subtract = 0; |
ea5b8f73 SS |
276 | |
277 | if (!xen_initial_domain()) | |
278 | return; | |
279 | ||
801fd14a SS |
280 | num_processors = 0; |
281 | disabled_cpus = 0; | |
e7986739 | 282 | for (i = 0; i < nr_cpu_ids; i++) { |
f87e4cac | 283 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); |
4560a294 JF |
284 | if (rc >= 0) { |
285 | num_processors++; | |
4f062896 | 286 | set_cpu_possible(i, true); |
801fd14a SS |
287 | } else { |
288 | set_cpu_possible(i, false); | |
289 | set_cpu_present(i, false); | |
cf405ae6 | 290 | subtract++; |
4560a294 | 291 | } |
f87e4cac | 292 | } |
cf405ae6 KRW |
293 | #ifdef CONFIG_HOTPLUG_CPU |
294 | /* This is akin to using 'nr_cpus' on the Linux command line. | |
295 | * Which is OK as when we use 'dom0_max_vcpus=X' we can only | |
296 | * have up to X, while nr_cpu_ids is greater than X. This | |
297 | * normally is not a problem, except when CPU hotplugging | |
298 | * is involved and then there might be more than X CPUs | |
299 | * in the guest - which will not work as there is no | |
300 | * hypercall to expand the max number of VCPUs an already | |
301 | * running guest has. So cap it up to X. */ | |
302 | if (subtract) | |
303 | nr_cpu_ids = nr_cpu_ids - subtract; | |
304 | #endif | |
305 | ||
f87e4cac JF |
306 | } |
307 | ||
a9e7062d | 308 | static void __init xen_smp_prepare_boot_cpu(void) |
f87e4cac | 309 | { |
f87e4cac JF |
310 | BUG_ON(smp_processor_id() != 0); |
311 | native_smp_prepare_boot_cpu(); | |
312 | ||
26a79995 | 313 | if (xen_pv_domain()) { |
5840c84b MR |
314 | if (!xen_feature(XENFEAT_writable_page_tables)) |
315 | /* We've switched to the "real" per-cpu gdt, so make | |
316 | * sure the old memory can be recycled. */ | |
317 | make_lowmem_page_readwrite(xen_initial_gdt); | |
60223a32 | 318 | |
7cde9b27 FZ |
319 | #ifdef CONFIG_X86_32 |
320 | /* | |
321 | * Xen starts us with XEN_FLAT_RING1_DS, but linux code | |
322 | * expects __USER_DS | |
323 | */ | |
324 | loadsegment(ds, __USER_DS); | |
325 | loadsegment(es, __USER_DS); | |
326 | #endif | |
327 | ||
26a79995 KRW |
328 | xen_filter_cpu_maps(); |
329 | xen_setup_vcpu_info_placement(); | |
330 | } | |
ee42d665 VK |
331 | |
332 | /* | |
333 | * Setup vcpu_info for boot CPU. | |
334 | */ | |
335 | if (xen_hvm_domain()) | |
336 | xen_vcpu_setup(0); | |
337 | ||
26a79995 KRW |
338 | /* |
339 | * The alternative logic (which patches the unlock/lock) runs before | |
340 | * the smp bootup up code is activated. Hence we need to set this up | |
341 | * the core kernel is being patched. Otherwise we will have only | |
342 | * modules patched but not core code. | |
343 | */ | |
bf7aab3a | 344 | xen_init_spinlocks(); |
f87e4cac JF |
345 | } |
346 | ||
a9e7062d | 347 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) |
f87e4cac JF |
348 | { |
349 | unsigned cpu; | |
900cba88 | 350 | unsigned int i; |
f87e4cac | 351 | |
ed467e69 KRW |
352 | if (skip_ioapic_setup) { |
353 | char *m = (max_cpus == 0) ? | |
354 | "The nosmp parameter is incompatible with Xen; " \ | |
355 | "use Xen dom0_max_vcpus=1 parameter" : | |
356 | "The noapic parameter is incompatible with Xen"; | |
357 | ||
358 | xen_raw_printk(m); | |
359 | panic(m); | |
360 | } | |
2d9e1e2f JF |
361 | xen_init_lock_cpu(0); |
362 | ||
06d0b5d9 | 363 | smp_store_boot_cpu_info(); |
c7b75947 | 364 | cpu_data(0).x86_max_cores = 1; |
900cba88 AJ |
365 | |
366 | for_each_possible_cpu(i) { | |
367 | zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); | |
368 | zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); | |
369 | zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); | |
370 | } | |
f87e4cac JF |
371 | set_cpu_sibling_map(0); |
372 | ||
65d0cf0b BO |
373 | xen_pmu_init(0); |
374 | ||
f87e4cac JF |
375 | if (xen_smp_intr_init(0)) |
376 | BUG(); | |
377 | ||
b78936e1 MT |
378 | if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) |
379 | panic("could not allocate xen_cpu_initialized_map\n"); | |
380 | ||
381 | cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); | |
f87e4cac JF |
382 | |
383 | /* Restrict the possible_map according to max_cpus. */ | |
384 | while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { | |
e7986739 | 385 | for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) |
f87e4cac | 386 | continue; |
4f062896 | 387 | set_cpu_possible(cpu, false); |
f87e4cac JF |
388 | } |
389 | ||
7eb43a6d | 390 | for_each_possible_cpu(cpu) |
4f062896 | 391 | set_cpu_present(cpu, true); |
f87e4cac JF |
392 | } |
393 | ||
148f9bb8 | 394 | static int |
f87e4cac JF |
395 | cpu_initialize_context(unsigned int cpu, struct task_struct *idle) |
396 | { | |
397 | struct vcpu_guest_context *ctxt; | |
c7b75947 | 398 | struct desc_struct *gdt; |
9976b39b | 399 | unsigned long gdt_mfn; |
f87e4cac | 400 | |
ce4b1b16 IM |
401 | /* used to tell cpu_init() that it can proceed with initialization */ |
402 | cpumask_set_cpu(cpu, cpu_callout_mask); | |
b78936e1 | 403 | if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) |
f87e4cac JF |
404 | return 0; |
405 | ||
406 | ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); | |
407 | if (ctxt == NULL) | |
408 | return -ENOMEM; | |
409 | ||
c7b75947 JF |
410 | gdt = get_cpu_gdt_table(cpu); |
411 | ||
c7b75947 | 412 | #ifdef CONFIG_X86_32 |
5840c84b | 413 | /* Note: PVH is not yet supported on x86_32. */ |
c7b75947 | 414 | ctxt->user_regs.fs = __KERNEL_PERCPU; |
577eebea | 415 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; |
c7b75947 | 416 | #endif |
f87e4cac JF |
417 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); |
418 | ||
5840c84b | 419 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
a2ef5dc2 | 420 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; |
5840c84b | 421 | ctxt->flags = VGCF_IN_KERNEL; |
dacd45f4 KRW |
422 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ |
423 | ctxt->user_regs.ds = __USER_DS; | |
424 | ctxt->user_regs.es = __USER_DS; | |
5840c84b | 425 | ctxt->user_regs.ss = __KERNEL_DS; |
f87e4cac | 426 | |
dacd45f4 | 427 | xen_copy_trap_info(ctxt->trap_ctxt); |
f87e4cac | 428 | |
dacd45f4 | 429 | ctxt->ldt_ents = 0; |
9976b39b | 430 | |
dacd45f4 | 431 | BUG_ON((unsigned long)gdt & ~PAGE_MASK); |
f87e4cac | 432 | |
dacd45f4 KRW |
433 | gdt_mfn = arbitrary_virt_to_mfn(gdt); |
434 | make_lowmem_page_readonly(gdt); | |
435 | make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); | |
f87e4cac | 436 | |
dacd45f4 KRW |
437 | ctxt->gdt_frames[0] = gdt_mfn; |
438 | ctxt->gdt_ents = GDT_ENTRIES; | |
f87e4cac | 439 | |
dacd45f4 KRW |
440 | ctxt->kernel_ss = __KERNEL_DS; |
441 | ctxt->kernel_sp = idle->thread.sp0; | |
f87e4cac | 442 | |
c7b75947 | 443 | #ifdef CONFIG_X86_32 |
dacd45f4 KRW |
444 | ctxt->event_callback_cs = __KERNEL_CS; |
445 | ctxt->failsafe_callback_cs = __KERNEL_CS; | |
5840c84b MR |
446 | #else |
447 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | |
c7b75947 | 448 | #endif |
dacd45f4 KRW |
449 | ctxt->event_callback_eip = |
450 | (unsigned long)xen_hypervisor_callback; | |
451 | ctxt->failsafe_callback_eip = | |
452 | (unsigned long)xen_failsafe_callback; | |
5840c84b MR |
453 | ctxt->user_regs.cs = __KERNEL_CS; |
454 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | |
dacd45f4 | 455 | } |
a2ef5dc2 MR |
456 | #ifdef CONFIG_XEN_PVH |
457 | else { | |
458 | /* | |
459 | * The vcpu comes on kernel page tables which have the NX pte | |
460 | * bit set. This means before DS/SS is touched, NX in | |
461 | * EFER must be set. Hence the following assembly glue code. | |
5840c84b | 462 | */ |
a2ef5dc2 | 463 | ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init; |
5840c84b | 464 | ctxt->user_regs.rdi = cpu; |
a2ef5dc2 MR |
465 | ctxt->user_regs.rsi = true; /* entry == true */ |
466 | } | |
5840c84b | 467 | #endif |
dacd45f4 | 468 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); |
0df4f266 | 469 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); |
ad5475f9 | 470 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) |
f87e4cac JF |
471 | BUG(); |
472 | ||
473 | kfree(ctxt); | |
474 | return 0; | |
475 | } | |
476 | ||
148f9bb8 | 477 | static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) |
f87e4cac | 478 | { |
f87e4cac JF |
479 | int rc; |
480 | ||
3f85483b | 481 | common_cpu_up(cpu, idle); |
4461bbc0 | 482 | |
02889672 | 483 | xen_setup_runstate_info(cpu); |
f87e4cac | 484 | xen_setup_timer(cpu); |
2d9e1e2f | 485 | xen_init_lock_cpu(cpu); |
f87e4cac | 486 | |
2a442c9c PM |
487 | /* |
488 | * PV VCPUs are always successfully taken down (see 'while' loop | |
489 | * in xen_cpu_die()), so -EBUSY is an error. | |
490 | */ | |
491 | rc = cpu_check_up_prepare(cpu); | |
492 | if (rc) | |
493 | return rc; | |
c7b75947 | 494 | |
f87e4cac JF |
495 | /* make sure interrupts start blocked */ |
496 | per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; | |
497 | ||
498 | rc = cpu_initialize_context(cpu, idle); | |
499 | if (rc) | |
500 | return rc; | |
501 | ||
65d0cf0b BO |
502 | xen_pmu_init(cpu); |
503 | ||
f87e4cac JF |
504 | rc = xen_smp_intr_init(cpu); |
505 | if (rc) | |
506 | return rc; | |
507 | ||
ad5475f9 | 508 | rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL); |
f87e4cac JF |
509 | BUG_ON(rc); |
510 | ||
2a442c9c | 511 | while (cpu_report_state(cpu) != CPU_ONLINE) |
1207cf8e | 512 | HYPERVISOR_sched_op(SCHEDOP_yield, NULL); |
c7b75947 | 513 | |
f87e4cac JF |
514 | return 0; |
515 | } | |
516 | ||
a9e7062d | 517 | static void xen_smp_cpus_done(unsigned int max_cpus) |
f87e4cac JF |
518 | { |
519 | } | |
520 | ||
2737146b | 521 | #ifdef CONFIG_HOTPLUG_CPU |
26fd1051 | 522 | static int xen_cpu_disable(void) |
d68d82af AN |
523 | { |
524 | unsigned int cpu = smp_processor_id(); | |
525 | if (cpu == 0) | |
526 | return -EBUSY; | |
527 | ||
528 | cpu_disable_common(); | |
529 | ||
530 | load_cr3(swapper_pg_dir); | |
531 | return 0; | |
532 | } | |
533 | ||
26fd1051 | 534 | static void xen_cpu_die(unsigned int cpu) |
d68d82af | 535 | { |
ad5475f9 VK |
536 | while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, |
537 | xen_vcpu_nr(cpu), NULL)) { | |
57b6b99b | 538 | __set_current_state(TASK_UNINTERRUPTIBLE); |
d68d82af AN |
539 | schedule_timeout(HZ/10); |
540 | } | |
54279552 | 541 | |
2a442c9c PM |
542 | if (common_cpu_die(cpu) == 0) { |
543 | xen_smp_intr_free(cpu); | |
544 | xen_uninit_lock_cpu(cpu); | |
545 | xen_teardown_timer(cpu); | |
65d0cf0b | 546 | xen_pmu_finish(cpu); |
2a442c9c | 547 | } |
d68d82af AN |
548 | } |
549 | ||
148f9bb8 | 550 | static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ |
d68d82af AN |
551 | { |
552 | play_dead_common(); | |
ad5475f9 | 553 | HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL); |
d68d82af | 554 | cpu_bringup(); |
466318a8 KRW |
555 | /* |
556 | * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down) | |
557 | * clears certain data that the cpu_idle loop (which called us | |
558 | * and that we return from) expects. The only way to get that | |
559 | * data back is to call: | |
560 | */ | |
561 | tick_nohz_idle_enter(); | |
dc6416f1 BO |
562 | |
563 | cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); | |
d68d82af AN |
564 | } |
565 | ||
2737146b | 566 | #else /* !CONFIG_HOTPLUG_CPU */ |
26fd1051 | 567 | static int xen_cpu_disable(void) |
2737146b AN |
568 | { |
569 | return -ENOSYS; | |
570 | } | |
571 | ||
26fd1051 | 572 | static void xen_cpu_die(unsigned int cpu) |
2737146b AN |
573 | { |
574 | BUG(); | |
575 | } | |
576 | ||
26fd1051 | 577 | static void xen_play_dead(void) |
2737146b AN |
578 | { |
579 | BUG(); | |
580 | } | |
581 | ||
582 | #endif | |
f87e4cac JF |
583 | static void stop_self(void *v) |
584 | { | |
585 | int cpu = smp_processor_id(); | |
586 | ||
587 | /* make sure we're not pinning something down */ | |
588 | load_cr3(swapper_pg_dir); | |
589 | /* should set up a minimal gdt */ | |
590 | ||
086748e5 IC |
591 | set_cpu_online(cpu, false); |
592 | ||
ad5475f9 | 593 | HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL); |
f87e4cac JF |
594 | BUG(); |
595 | } | |
596 | ||
76fac077 | 597 | static void xen_stop_other_cpus(int wait) |
f87e4cac | 598 | { |
76fac077 | 599 | smp_call_function(stop_self, NULL, wait); |
f87e4cac JF |
600 | } |
601 | ||
a9e7062d | 602 | static void xen_smp_send_reschedule(int cpu) |
f87e4cac JF |
603 | { |
604 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); | |
605 | } | |
606 | ||
f447d56d BG |
607 | static void __xen_send_IPI_mask(const struct cpumask *mask, |
608 | int vector) | |
f87e4cac JF |
609 | { |
610 | unsigned cpu; | |
611 | ||
bcda016e | 612 | for_each_cpu_and(cpu, mask, cpu_online_mask) |
f87e4cac JF |
613 | xen_send_IPI_one(cpu, vector); |
614 | } | |
615 | ||
bcda016e | 616 | static void xen_smp_send_call_function_ipi(const struct cpumask *mask) |
3b16cf87 JA |
617 | { |
618 | int cpu; | |
619 | ||
f447d56d | 620 | __xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); |
3b16cf87 JA |
621 | |
622 | /* Make sure other vcpus get a chance to run if they need to. */ | |
bcda016e | 623 | for_each_cpu(cpu, mask) { |
3b16cf87 | 624 | if (xen_vcpu_stolen(cpu)) { |
1207cf8e | 625 | HYPERVISOR_sched_op(SCHEDOP_yield, NULL); |
3b16cf87 JA |
626 | break; |
627 | } | |
628 | } | |
629 | } | |
630 | ||
a9e7062d | 631 | static void xen_smp_send_call_function_single_ipi(int cpu) |
3b16cf87 | 632 | { |
f447d56d | 633 | __xen_send_IPI_mask(cpumask_of(cpu), |
e7986739 | 634 | XEN_CALL_FUNCTION_SINGLE_VECTOR); |
3b16cf87 JA |
635 | } |
636 | ||
f447d56d BG |
637 | static inline int xen_map_vector(int vector) |
638 | { | |
639 | int xen_vector; | |
640 | ||
641 | switch (vector) { | |
642 | case RESCHEDULE_VECTOR: | |
643 | xen_vector = XEN_RESCHEDULE_VECTOR; | |
644 | break; | |
645 | case CALL_FUNCTION_VECTOR: | |
646 | xen_vector = XEN_CALL_FUNCTION_VECTOR; | |
647 | break; | |
648 | case CALL_FUNCTION_SINGLE_VECTOR: | |
649 | xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR; | |
650 | break; | |
1ff2b0c3 LM |
651 | case IRQ_WORK_VECTOR: |
652 | xen_vector = XEN_IRQ_WORK_VECTOR; | |
653 | break; | |
6efa20e4 KRW |
654 | #ifdef CONFIG_X86_64 |
655 | case NMI_VECTOR: | |
656 | case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */ | |
657 | xen_vector = XEN_NMI_VECTOR; | |
658 | break; | |
659 | #endif | |
f447d56d BG |
660 | default: |
661 | xen_vector = -1; | |
662 | printk(KERN_ERR "xen: vector 0x%x is not implemented\n", | |
663 | vector); | |
664 | } | |
665 | ||
666 | return xen_vector; | |
667 | } | |
668 | ||
669 | void xen_send_IPI_mask(const struct cpumask *mask, | |
670 | int vector) | |
671 | { | |
672 | int xen_vector = xen_map_vector(vector); | |
673 | ||
674 | if (xen_vector >= 0) | |
675 | __xen_send_IPI_mask(mask, xen_vector); | |
676 | } | |
677 | ||
678 | void xen_send_IPI_all(int vector) | |
679 | { | |
680 | int xen_vector = xen_map_vector(vector); | |
681 | ||
682 | if (xen_vector >= 0) | |
683 | __xen_send_IPI_mask(cpu_online_mask, xen_vector); | |
684 | } | |
685 | ||
686 | void xen_send_IPI_self(int vector) | |
687 | { | |
688 | int xen_vector = xen_map_vector(vector); | |
689 | ||
690 | if (xen_vector >= 0) | |
691 | xen_send_IPI_one(smp_processor_id(), xen_vector); | |
692 | } | |
693 | ||
694 | void xen_send_IPI_mask_allbutself(const struct cpumask *mask, | |
695 | int vector) | |
696 | { | |
697 | unsigned cpu; | |
698 | unsigned int this_cpu = smp_processor_id(); | |
1db01b49 | 699 | int xen_vector = xen_map_vector(vector); |
f447d56d | 700 | |
1db01b49 | 701 | if (!(num_online_cpus() > 1) || (xen_vector < 0)) |
f447d56d BG |
702 | return; |
703 | ||
704 | for_each_cpu_and(cpu, mask, cpu_online_mask) { | |
705 | if (this_cpu == cpu) | |
706 | continue; | |
707 | ||
1db01b49 | 708 | xen_send_IPI_one(cpu, xen_vector); |
f447d56d BG |
709 | } |
710 | } | |
711 | ||
712 | void xen_send_IPI_allbutself(int vector) | |
713 | { | |
1db01b49 | 714 | xen_send_IPI_mask_allbutself(cpu_online_mask, vector); |
f447d56d BG |
715 | } |
716 | ||
f87e4cac JF |
717 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) |
718 | { | |
f87e4cac | 719 | irq_enter(); |
3b16cf87 | 720 | generic_smp_call_function_interrupt(); |
1b437c8c | 721 | inc_irq_stat(irq_call_count); |
f87e4cac JF |
722 | irq_exit(); |
723 | ||
f87e4cac JF |
724 | return IRQ_HANDLED; |
725 | } | |
726 | ||
3b16cf87 | 727 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) |
f87e4cac | 728 | { |
3b16cf87 JA |
729 | irq_enter(); |
730 | generic_smp_call_function_single_interrupt(); | |
1b437c8c | 731 | inc_irq_stat(irq_call_count); |
3b16cf87 | 732 | irq_exit(); |
f87e4cac | 733 | |
3b16cf87 | 734 | return IRQ_HANDLED; |
f87e4cac | 735 | } |
a9e7062d | 736 | |
1ff2b0c3 LM |
737 | static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) |
738 | { | |
739 | irq_enter(); | |
740 | irq_work_run(); | |
741 | inc_irq_stat(apic_irq_work_irqs); | |
742 | irq_exit(); | |
743 | ||
744 | return IRQ_HANDLED; | |
745 | } | |
746 | ||
b53cedeb | 747 | static const struct smp_ops xen_smp_ops __initconst = { |
a9e7062d JF |
748 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, |
749 | .smp_prepare_cpus = xen_smp_prepare_cpus, | |
a9e7062d JF |
750 | .smp_cpus_done = xen_smp_cpus_done, |
751 | ||
d68d82af AN |
752 | .cpu_up = xen_cpu_up, |
753 | .cpu_die = xen_cpu_die, | |
754 | .cpu_disable = xen_cpu_disable, | |
755 | .play_dead = xen_play_dead, | |
756 | ||
76fac077 | 757 | .stop_other_cpus = xen_stop_other_cpus, |
a9e7062d JF |
758 | .smp_send_reschedule = xen_smp_send_reschedule, |
759 | ||
760 | .send_call_func_ipi = xen_smp_send_call_function_ipi, | |
761 | .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, | |
762 | }; | |
763 | ||
764 | void __init xen_smp_init(void) | |
765 | { | |
766 | smp_ops = xen_smp_ops; | |
c7b75947 | 767 | xen_fill_possible_map(); |
a9e7062d | 768 | } |
99bbb3a8 SS |
769 | |
770 | static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | |
771 | { | |
772 | native_smp_prepare_cpus(max_cpus); | |
773 | WARN_ON(xen_smp_intr_init(0)); | |
774 | ||
99bbb3a8 | 775 | xen_init_lock_cpu(0); |
99bbb3a8 SS |
776 | } |
777 | ||
148f9bb8 | 778 | static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) |
99bbb3a8 SS |
779 | { |
780 | int rc; | |
2a442c9c PM |
781 | |
782 | /* | |
783 | * This can happen if CPU was offlined earlier and | |
784 | * offlining timed out in common_cpu_die(). | |
785 | */ | |
786 | if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { | |
787 | xen_smp_intr_free(cpu); | |
788 | xen_uninit_lock_cpu(cpu); | |
789 | } | |
790 | ||
fc78d343 CA |
791 | /* |
792 | * xen_smp_intr_init() needs to run before native_cpu_up() | |
793 | * so that IPI vectors are set up on the booting CPU before | |
794 | * it is marked online in native_cpu_up(). | |
795 | */ | |
796 | rc = xen_smp_intr_init(cpu); | |
797 | WARN_ON(rc); | |
798 | if (!rc) | |
799 | rc = native_cpu_up(cpu, tidle); | |
1fb3a8b2 KRW |
800 | |
801 | /* | |
802 | * We must initialize the slowpath CPU kicker _after_ the native | |
803 | * path has executed. If we initialized it before none of the | |
804 | * unlocker IPI kicks would reach the booting CPU as the booting | |
805 | * CPU had not set itself 'online' in cpu_online_mask. That mask | |
806 | * is checked when IPIs are sent (on HVM at least). | |
807 | */ | |
808 | xen_init_lock_cpu(cpu); | |
99bbb3a8 SS |
809 | return rc; |
810 | } | |
811 | ||
99bbb3a8 SS |
812 | void __init xen_hvm_smp_init(void) |
813 | { | |
3c05c4be SS |
814 | if (!xen_have_vector_callback) |
815 | return; | |
99bbb3a8 SS |
816 | smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; |
817 | smp_ops.smp_send_reschedule = xen_smp_send_reschedule; | |
818 | smp_ops.cpu_up = xen_hvm_cpu_up; | |
2a442c9c | 819 | smp_ops.cpu_die = xen_cpu_die; |
99bbb3a8 SS |
820 | smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; |
821 | smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; | |
26a79995 | 822 | smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu; |
99bbb3a8 | 823 | } |