[PATCH] x86_64: Make vsyscall.c compile without CONFIG_SYSCTL
[deliverable/linux.git] / arch / x86_64 / kernel / smpboot.c
CommitLineData
1da177e4
LT
1/*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to
9 * whom a great many thanks are extended.
10 *
11 * Thanks to Intel for making available several different Pentium,
12 * Pentium Pro and Pentium-II/Xeon MP machines.
13 * Original development of Linux SMP code supported by Caldera.
14 *
a8ab26fe 15 * This code is released under the GNU General Public License version 2
1da177e4
LT
16 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIP report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Andi Kleen : Changed for SMP boot into long mode.
a8ab26fe
AK
33 * Rusty Russell : Hacked into shape for new "hotplug" boot process.
34 * Andi Kleen : Converted to new state machine.
35 * Various cleanups.
36 * Probably mostly hotplug CPU ready now.
1da177e4
LT
37 */
38
a8ab26fe 39
1da177e4
LT
40#include <linux/config.h>
41#include <linux/init.h>
42
43#include <linux/mm.h>
44#include <linux/kernel_stat.h>
45#include <linux/smp_lock.h>
46#include <linux/irq.h>
47#include <linux/bootmem.h>
48#include <linux/thread_info.h>
49#include <linux/module.h>
50
51#include <linux/delay.h>
52#include <linux/mc146818rtc.h>
53#include <asm/mtrr.h>
54#include <asm/pgalloc.h>
55#include <asm/desc.h>
56#include <asm/kdebug.h>
57#include <asm/tlbflush.h>
58#include <asm/proto.h>
59
a8ab26fe
AK
60/* Change for real CPU hotplug. Note other files need to be fixed
61 first too. */
62#define __cpuinit __init
63#define __cpuinitdata __initdata
64
1da177e4
LT
65/* Number of siblings per CPU package */
66int smp_num_siblings = 1;
67/* Package ID of each logical CPU */
68u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
3dd9d514 69u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
1da177e4 70EXPORT_SYMBOL(phys_proc_id);
3dd9d514 71EXPORT_SYMBOL(cpu_core_id);
1da177e4
LT
72
73/* Bitmask of currently online CPUs */
74cpumask_t cpu_online_map;
75
a8ab26fe
AK
76EXPORT_SYMBOL(cpu_online_map);
77
78/*
79 * Private maps to synchronize booting between AP and BP.
80 * Probably not needed anymore, but it makes for easier debugging. -AK
81 */
1da177e4
LT
82cpumask_t cpu_callin_map;
83cpumask_t cpu_callout_map;
a8ab26fe
AK
84
85cpumask_t cpu_possible_map;
86EXPORT_SYMBOL(cpu_possible_map);
1da177e4
LT
87
88/* Per CPU bogomips and other parameters */
89struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
90
a8ab26fe
AK
91/* Set when the idlers are all forked */
92int smp_threads_ready;
93
1da177e4 94cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
3dd9d514 95cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
1da177e4
LT
96
97/*
98 * Trampoline 80x86 program as an array.
99 */
100
a8ab26fe
AK
101extern unsigned char trampoline_data[];
102extern unsigned char trampoline_end[];
1da177e4
LT
103
104/*
105 * Currently trivial. Write the real->protected mode
106 * bootstrap into the page concerned. The caller
107 * has made sure it's suitably aligned.
108 */
109
a8ab26fe 110static unsigned long __cpuinit setup_trampoline(void)
1da177e4
LT
111{
112 void *tramp = __va(SMP_TRAMPOLINE_BASE);
113 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
114 return virt_to_phys(tramp);
115}
116
117/*
118 * The bootstrap kernel entry code has set these up. Save them for
119 * a given CPU
120 */
121
a8ab26fe 122static void __cpuinit smp_store_cpu_info(int id)
1da177e4
LT
123{
124 struct cpuinfo_x86 *c = cpu_data + id;
125
126 *c = boot_cpu_data;
127 identify_cpu(c);
dda50e71 128 print_cpu_info(c);
1da177e4
LT
129}
130
131/*
dda50e71
AK
132 * New Funky TSC sync algorithm borrowed from IA64.
133 * Main advantage is that it doesn't reset the TSCs fully and
134 * in general looks more robust and it works better than my earlier
135 * attempts. I believe it was written by David Mosberger. Some minor
136 * adjustments for x86-64 by me -AK
1da177e4 137 *
dda50e71
AK
138 * Original comment reproduced below.
139 *
140 * Synchronize TSC of the current (slave) CPU with the TSC of the
141 * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
142 * eliminate the possibility of unaccounted-for errors (such as
143 * getting a machine check in the middle of a calibration step). The
144 * basic idea is for the slave to ask the master what itc value it has
145 * and to read its own itc before and after the master responds. Each
146 * iteration gives us three timestamps:
147 *
148 * slave master
149 *
150 * t0 ---\
151 * ---\
152 * --->
153 * tm
154 * /---
155 * /---
156 * t1 <---
157 *
158 *
159 * The goal is to adjust the slave's TSC such that tm falls exactly
160 * half-way between t0 and t1. If we achieve this, the clocks are
161 * synchronized provided the interconnect between the slave and the
162 * master is symmetric. Even if the interconnect were asymmetric, we
163 * would still know that the synchronization error is smaller than the
164 * roundtrip latency (t0 - t1).
165 *
166 * When the interconnect is quiet and symmetric, this lets us
167 * synchronize the TSC to within one or two cycles. However, we can
168 * only *guarantee* that the synchronization is accurate to within a
169 * round-trip time, which is typically in the range of several hundred
170 * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
171 * are usually almost perfectly synchronized, but we shouldn't assume
172 * that the accuracy is much better than half a micro second or so.
173 *
174 * [there are other errors like the latency of RDTSC and of the
175 * WRMSR. These can also account to hundreds of cycles. So it's
176 * probably worse. It claims 153 cycles error on a dual Opteron,
177 * but I suspect the numbers are actually somewhat worse -AK]
1da177e4
LT
178 */
179
dda50e71
AK
180#define MASTER 0
181#define SLAVE (SMP_CACHE_BYTES/8)
182
183/* Intentionally don't use cpu_relax() while TSC synchronization
184 because we don't want to go into funky power save modi or cause
185 hypervisors to schedule us away. Going to sleep would likely affect
186 latency and low latency is the primary objective here. -AK */
187#define no_cpu_relax() barrier()
188
a8ab26fe 189static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
dda50e71
AK
190static volatile __cpuinitdata unsigned long go[SLAVE + 1];
191static int notscsync __cpuinitdata;
192
193#undef DEBUG_TSC_SYNC
1da177e4 194
dda50e71
AK
195#define NUM_ROUNDS 64 /* magic value */
196#define NUM_ITERS 5 /* likewise */
1da177e4 197
dda50e71
AK
198/* Callback on boot CPU */
199static __cpuinit void sync_master(void *arg)
1da177e4 200{
dda50e71
AK
201 unsigned long flags, i;
202
203 if (smp_processor_id() != boot_cpu_id)
204 return;
205
206 go[MASTER] = 0;
207
208 local_irq_save(flags);
209 {
210 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
211 while (!go[MASTER])
212 no_cpu_relax();
213 go[MASTER] = 0;
214 rdtscll(go[SLAVE]);
215 }
216 }
217 local_irq_restore(flags);
a8ab26fe 218}
1da177e4 219
a8ab26fe 220/*
dda50e71
AK
221 * Return the number of cycles by which our tsc differs from the tsc
222 * on the master (time-keeper) CPU. A positive number indicates our
223 * tsc is ahead of the master, negative that it is behind.
a8ab26fe 224 */
dda50e71
AK
225static inline long
226get_delta(long *rt, long *master)
a8ab26fe 227{
dda50e71
AK
228 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
229 unsigned long tcenter, t0, t1, tm;
230 int i;
a8ab26fe 231
dda50e71
AK
232 for (i = 0; i < NUM_ITERS; ++i) {
233 rdtscll(t0);
234 go[MASTER] = 1;
235 while (!(tm = go[SLAVE]))
236 no_cpu_relax();
237 go[SLAVE] = 0;
238 rdtscll(t1);
239
240 if (t1 - t0 < best_t1 - best_t0)
241 best_t0 = t0, best_t1 = t1, best_tm = tm;
242 }
243
244 *rt = best_t1 - best_t0;
245 *master = best_tm - best_t0;
246
247 /* average best_t0 and best_t1 without overflow: */
248 tcenter = (best_t0/2 + best_t1/2);
249 if (best_t0 % 2 + best_t1 % 2 == 2)
250 ++tcenter;
251 return tcenter - best_tm;
1da177e4
LT
252}
253
dda50e71 254static __cpuinit void sync_tsc(void)
1da177e4 255{
dda50e71
AK
256 int i, done = 0;
257 long delta, adj, adjust_latency = 0;
258 unsigned long flags, rt, master_time_stamp, bound;
259#if DEBUG_TSC_SYNC
260 static struct syncdebug {
261 long rt; /* roundtrip time */
262 long master; /* master's timestamp */
263 long diff; /* difference between midpoint and master's timestamp */
264 long lat; /* estimate of tsc adjustment latency */
265 } t[NUM_ROUNDS] __cpuinitdata;
266#endif
267
268 go[MASTER] = 1;
269
270 smp_call_function(sync_master, NULL, 1, 0);
271
272 while (go[MASTER]) /* wait for master to be ready */
273 no_cpu_relax();
274
275 spin_lock_irqsave(&tsc_sync_lock, flags);
276 {
277 for (i = 0; i < NUM_ROUNDS; ++i) {
278 delta = get_delta(&rt, &master_time_stamp);
279 if (delta == 0) {
280 done = 1; /* let's lock on to this... */
281 bound = rt;
282 }
283
284 if (!done) {
285 unsigned long t;
286 if (i > 0) {
287 adjust_latency += -delta;
288 adj = -delta + adjust_latency/4;
289 } else
290 adj = -delta;
291
292 rdtscll(t);
293 wrmsrl(MSR_IA32_TSC, t + adj);
294 }
295#if DEBUG_TSC_SYNC
296 t[i].rt = rt;
297 t[i].master = master_time_stamp;
298 t[i].diff = delta;
299 t[i].lat = adjust_latency/4;
300#endif
301 }
302 }
303 spin_unlock_irqrestore(&tsc_sync_lock, flags);
304
305#if DEBUG_TSC_SYNC
306 for (i = 0; i < NUM_ROUNDS; ++i)
307 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
308 t[i].rt, t[i].master, t[i].diff, t[i].lat);
309#endif
310
311 printk(KERN_INFO
312 "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
313 "maxerr %lu cycles)\n",
314 smp_processor_id(), boot_cpu_id, delta, rt);
a8ab26fe 315}
1da177e4 316
dda50e71 317static void __cpuinit tsc_sync_wait(void)
a8ab26fe 318{
dda50e71 319 if (notscsync || !cpu_has_tsc)
a8ab26fe 320 return;
dda50e71
AK
321 printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
322 boot_cpu_id);
323 sync_tsc();
a8ab26fe 324}
1da177e4 325
dda50e71 326static __init int notscsync_setup(char *s)
a8ab26fe 327{
dda50e71
AK
328 notscsync = 1;
329 return 0;
1da177e4 330}
dda50e71 331__setup("notscsync", notscsync_setup);
1da177e4 332
a8ab26fe 333static atomic_t init_deasserted __cpuinitdata;
1da177e4 334
a8ab26fe
AK
335/*
336 * Report back to the Boot Processor.
337 * Running on AP.
338 */
339void __cpuinit smp_callin(void)
1da177e4
LT
340{
341 int cpuid, phys_id;
342 unsigned long timeout;
343
344 /*
345 * If waken up by an INIT in an 82489DX configuration
346 * we may get here before an INIT-deassert IPI reaches
347 * our local APIC. We have to wait for the IPI or we'll
348 * lock up on an APIC access.
349 */
a8ab26fe
AK
350 while (!atomic_read(&init_deasserted))
351 cpu_relax();
1da177e4
LT
352
353 /*
354 * (This works even if the APIC is not enabled.)
355 */
356 phys_id = GET_APIC_ID(apic_read(APIC_ID));
357 cpuid = smp_processor_id();
358 if (cpu_isset(cpuid, cpu_callin_map)) {
359 panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
360 phys_id, cpuid);
361 }
362 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
363
364 /*
365 * STARTUP IPIs are fragile beasts as they might sometimes
366 * trigger some glue motherboard logic. Complete APIC bus
367 * silence for 1 second, this overestimates the time the
368 * boot CPU is spending to send the up to 2 STARTUP IPIs
369 * by a factor of two. This should be enough.
370 */
371
372 /*
373 * Waiting 2s total for startup (udelay is not yet working)
374 */
375 timeout = jiffies + 2*HZ;
376 while (time_before(jiffies, timeout)) {
377 /*
378 * Has the boot CPU finished it's STARTUP sequence?
379 */
380 if (cpu_isset(cpuid, cpu_callout_map))
381 break;
a8ab26fe 382 cpu_relax();
1da177e4
LT
383 }
384
385 if (!time_before(jiffies, timeout)) {
386 panic("smp_callin: CPU%d started up but did not get a callout!\n",
387 cpuid);
388 }
389
390 /*
391 * the boot CPU has finished the init stage and is spinning
392 * on callin_map until we finish. We are free to set up this
393 * CPU, first the APIC. (this is probably redundant on most
394 * boards)
395 */
396
397 Dprintk("CALLIN, before setup_local_APIC().\n");
398 setup_local_APIC();
399
1da177e4
LT
400 /*
401 * Get our bogomips.
402 */
403 calibrate_delay();
404 Dprintk("Stack at about %p\n",&cpuid);
405
406 disable_APIC_timer();
407
408 /*
409 * Save our processor parameters
410 */
411 smp_store_cpu_info(cpuid);
412
1da177e4
LT
413 /*
414 * Allow the master to continue.
415 */
416 cpu_set(cpuid, cpu_callin_map);
1da177e4
LT
417}
418
1da177e4 419/*
a8ab26fe 420 * Setup code on secondary processor (after comming out of the trampoline)
1da177e4 421 */
a8ab26fe 422void __cpuinit start_secondary(void)
1da177e4
LT
423{
424 /*
425 * Dont put anything before smp_callin(), SMP
426 * booting is too fragile that we want to limit the
427 * things done here to the most necessary things.
428 */
429 cpu_init();
430 smp_callin();
431
432 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
433 barrier();
434
1da177e4
LT
435 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
436 setup_secondary_APIC_clock();
437
a8ab26fe 438 Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
1da177e4
LT
439
440 if (nmi_watchdog == NMI_IO_APIC) {
441 disable_8259A_irq(0);
442 enable_NMI_through_LVT0(NULL);
443 enable_8259A_irq(0);
444 }
445
a8ab26fe 446 enable_APIC_timer();
1da177e4
LT
447
448 /*
a8ab26fe 449 * Allow the master to continue.
1da177e4 450 */
1da177e4 451 cpu_set(smp_processor_id(), cpu_online_map);
a8ab26fe
AK
452 mb();
453
dda50e71
AK
454 /* Wait for TSC sync to not schedule things before.
455 We still process interrupts, which could see an inconsistent
456 time in that window unfortunately. */
457 tsc_sync_wait();
458
1da177e4
LT
459 cpu_idle();
460}
461
a8ab26fe 462extern volatile unsigned long init_rsp;
1da177e4
LT
463extern void (*initial_code)(void);
464
465#if APIC_DEBUG
a8ab26fe 466static void inquire_remote_apic(int apicid)
1da177e4
LT
467{
468 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
469 char *names[] = { "ID", "VERSION", "SPIV" };
470 int timeout, status;
471
472 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
473
474 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
475 printk("... APIC #%d %s: ", apicid, names[i]);
476
477 /*
478 * Wait for idle.
479 */
480 apic_wait_icr_idle();
481
482 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
483 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
484
485 timeout = 0;
486 do {
487 udelay(100);
488 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
489 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
490
491 switch (status) {
492 case APIC_ICR_RR_VALID:
493 status = apic_read(APIC_RRR);
494 printk("%08x\n", status);
495 break;
496 default:
497 printk("failed\n");
498 }
499 }
500}
501#endif
502
a8ab26fe
AK
503/*
504 * Kick the secondary to wake up.
505 */
506static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
1da177e4
LT
507{
508 unsigned long send_status = 0, accept_status = 0;
509 int maxlvt, timeout, num_starts, j;
510
511 Dprintk("Asserting INIT.\n");
512
513 /*
514 * Turn INIT on target chip
515 */
516 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
517
518 /*
519 * Send IPI
520 */
521 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
522 | APIC_DM_INIT);
523
524 Dprintk("Waiting for send to finish...\n");
525 timeout = 0;
526 do {
527 Dprintk("+");
528 udelay(100);
529 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
530 } while (send_status && (timeout++ < 1000));
531
532 mdelay(10);
533
534 Dprintk("Deasserting INIT.\n");
535
536 /* Target chip */
537 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
538
539 /* Send IPI */
540 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
541
542 Dprintk("Waiting for send to finish...\n");
543 timeout = 0;
544 do {
545 Dprintk("+");
546 udelay(100);
547 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
548 } while (send_status && (timeout++ < 1000));
549
550 atomic_set(&init_deasserted, 1);
551
552 /*
553 * Should we send STARTUP IPIs ?
554 *
555 * Determine this based on the APIC version.
556 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
557 */
558 if (APIC_INTEGRATED(apic_version[phys_apicid]))
559 num_starts = 2;
560 else
561 num_starts = 0;
562
563 /*
564 * Run STARTUP IPI loop.
565 */
566 Dprintk("#startup loops: %d.\n", num_starts);
567
568 maxlvt = get_maxlvt();
569
570 for (j = 1; j <= num_starts; j++) {
571 Dprintk("Sending STARTUP #%d.\n",j);
572 apic_read_around(APIC_SPIV);
573 apic_write(APIC_ESR, 0);
574 apic_read(APIC_ESR);
575 Dprintk("After apic_write.\n");
576
577 /*
578 * STARTUP IPI
579 */
580
581 /* Target chip */
582 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
583
584 /* Boot on the stack */
585 /* Kick the second */
586 apic_write_around(APIC_ICR, APIC_DM_STARTUP
587 | (start_rip >> 12));
588
589 /*
590 * Give the other CPU some time to accept the IPI.
591 */
592 udelay(300);
593
594 Dprintk("Startup point 1.\n");
595
596 Dprintk("Waiting for send to finish...\n");
597 timeout = 0;
598 do {
599 Dprintk("+");
600 udelay(100);
601 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
602 } while (send_status && (timeout++ < 1000));
603
604 /*
605 * Give the other CPU some time to accept the IPI.
606 */
607 udelay(200);
608 /*
609 * Due to the Pentium erratum 3AP.
610 */
611 if (maxlvt > 3) {
612 apic_read_around(APIC_SPIV);
613 apic_write(APIC_ESR, 0);
614 }
615 accept_status = (apic_read(APIC_ESR) & 0xEF);
616 if (send_status || accept_status)
617 break;
618 }
619 Dprintk("After Startup.\n");
620
621 if (send_status)
622 printk(KERN_ERR "APIC never delivered???\n");
623 if (accept_status)
624 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
625
626 return (send_status | accept_status);
627}
628
a8ab26fe
AK
629/*
630 * Boot one CPU.
631 */
632static int __cpuinit do_boot_cpu(int cpu, int apicid)
1da177e4
LT
633{
634 struct task_struct *idle;
635 unsigned long boot_error;
a8ab26fe 636 int timeout;
1da177e4 637 unsigned long start_rip;
1da177e4
LT
638 /*
639 * We can't use kernel_thread since we must avoid to
640 * reschedule the child.
641 */
642 idle = fork_idle(cpu);
a8ab26fe
AK
643 if (IS_ERR(idle)) {
644 printk("failed fork for CPU %d\n", cpu);
645 return PTR_ERR(idle);
646 }
1da177e4
LT
647 x86_cpu_to_apicid[cpu] = apicid;
648
649 cpu_pda[cpu].pcurrent = idle;
650
651 start_rip = setup_trampoline();
652
a8ab26fe 653 init_rsp = idle->thread.rsp;
1da177e4
LT
654 per_cpu(init_tss,cpu).rsp0 = init_rsp;
655 initial_code = start_secondary;
656 clear_ti_thread_flag(idle->thread_info, TIF_FORK);
657
a8ab26fe 658 printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
1da177e4
LT
659 start_rip, init_rsp);
660
661 /*
662 * This grunge runs the startup process for
663 * the targeted processor.
664 */
665
666 atomic_set(&init_deasserted, 0);
667
668 Dprintk("Setting warm reset code and vector.\n");
669
670 CMOS_WRITE(0xa, 0xf);
671 local_flush_tlb();
672 Dprintk("1.\n");
673 *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
674 Dprintk("2.\n");
675 *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
676 Dprintk("3.\n");
677
678 /*
679 * Be paranoid about clearing APIC errors.
680 */
681 if (APIC_INTEGRATED(apic_version[apicid])) {
682 apic_read_around(APIC_SPIV);
683 apic_write(APIC_ESR, 0);
684 apic_read(APIC_ESR);
685 }
686
687 /*
688 * Status is now clean
689 */
690 boot_error = 0;
691
692 /*
693 * Starting actual IPI sequence...
694 */
a8ab26fe 695 boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
1da177e4
LT
696
697 if (!boot_error) {
698 /*
699 * allow APs to start initializing.
700 */
701 Dprintk("Before Callout %d.\n", cpu);
702 cpu_set(cpu, cpu_callout_map);
703 Dprintk("After Callout %d.\n", cpu);
704
705 /*
706 * Wait 5s total for a response
707 */
708 for (timeout = 0; timeout < 50000; timeout++) {
709 if (cpu_isset(cpu, cpu_callin_map))
710 break; /* It has booted */
711 udelay(100);
712 }
713
714 if (cpu_isset(cpu, cpu_callin_map)) {
715 /* number CPUs logically, starting from 1 (BSP is 0) */
1da177e4
LT
716 Dprintk("CPU has booted.\n");
717 } else {
718 boot_error = 1;
719 if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
720 == 0xA5)
721 /* trampoline started but...? */
722 printk("Stuck ??\n");
723 else
724 /* trampoline code not run */
725 printk("Not responding.\n");
726#if APIC_DEBUG
727 inquire_remote_apic(apicid);
728#endif
729 }
730 }
731 if (boot_error) {
732 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
733 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
a8ab26fe
AK
734 cpu_clear(cpu, cpu_present_map);
735 cpu_clear(cpu, cpu_possible_map);
1da177e4
LT
736 x86_cpu_to_apicid[cpu] = BAD_APICID;
737 x86_cpu_to_log_apicid[cpu] = BAD_APICID;
a8ab26fe 738 return -EIO;
1da177e4 739 }
a8ab26fe
AK
740
741 return 0;
1da177e4
LT
742}
743
a8ab26fe
AK
744cycles_t cacheflush_time;
745unsigned long cache_decay_ticks;
746
747/*
748 * Construct cpu_sibling_map[], so that we can tell the sibling CPU
749 * on SMT systems efficiently.
750 */
751static __cpuinit void detect_siblings(void)
1da177e4 752{
a8ab26fe 753 int cpu;
1da177e4 754
a8ab26fe
AK
755 for (cpu = 0; cpu < NR_CPUS; cpu++) {
756 cpus_clear(cpu_sibling_map[cpu]);
757 cpus_clear(cpu_core_map[cpu]);
758 }
759
760 for_each_online_cpu (cpu) {
761 struct cpuinfo_x86 *c = cpu_data + cpu;
762 int siblings = 0;
763 int i;
764 if (smp_num_siblings > 1) {
765 for_each_online_cpu (i) {
d31ddaa1 766 if (cpu_core_id[cpu] == cpu_core_id[i]) {
a8ab26fe
AK
767 siblings++;
768 cpu_set(i, cpu_sibling_map[cpu]);
769 }
770 }
771 } else {
772 siblings++;
773 cpu_set(cpu, cpu_sibling_map[cpu]);
774 }
775
776 if (siblings != smp_num_siblings) {
777 printk(KERN_WARNING
778 "WARNING: %d siblings found for CPU%d, should be %d\n",
779 siblings, cpu, smp_num_siblings);
780 smp_num_siblings = siblings;
1da177e4 781 }
a8ab26fe
AK
782 if (c->x86_num_cores > 1) {
783 for_each_online_cpu(i) {
784 if (phys_proc_id[cpu] == phys_proc_id[i])
785 cpu_set(i, cpu_core_map[cpu]);
786 }
787 } else
788 cpu_core_map[cpu] = cpu_sibling_map[cpu];
1da177e4
LT
789 }
790}
791
792/*
a8ab26fe 793 * Cleanup possible dangling ends...
1da177e4 794 */
a8ab26fe 795static __cpuinit void smp_cleanup_boot(void)
1da177e4 796{
a8ab26fe
AK
797 /*
798 * Paranoid: Set warm reset code and vector here back
799 * to default values.
800 */
801 CMOS_WRITE(0, 0xf);
1da177e4 802
a8ab26fe
AK
803 /*
804 * Reset trampoline flag
805 */
806 *((volatile int *) phys_to_virt(0x467)) = 0;
1da177e4 807
a8ab26fe 808#ifndef CONFIG_HOTPLUG_CPU
1da177e4 809 /*
a8ab26fe
AK
810 * Free pages reserved for SMP bootup.
811 * When you add hotplug CPU support later remove this
812 * Note there is more work to be done for later CPU bootup.
1da177e4 813 */
1da177e4 814
a8ab26fe
AK
815 free_page((unsigned long) __va(PAGE_SIZE));
816 free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
817#endif
818}
819
820/*
821 * Fall back to non SMP mode after errors.
822 *
823 * RED-PEN audit/test this more. I bet there is more state messed up here.
824 */
825static __cpuinit void disable_smp(void)
826{
827 cpu_present_map = cpumask_of_cpu(0);
828 cpu_possible_map = cpumask_of_cpu(0);
829 if (smp_found_config)
830 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
831 else
832 phys_cpu_present_map = physid_mask_of_physid(0);
833 cpu_set(0, cpu_sibling_map[0]);
834 cpu_set(0, cpu_core_map[0]);
835}
836
837/*
838 * Handle user cpus=... parameter.
839 */
840static __cpuinit void enforce_max_cpus(unsigned max_cpus)
841{
842 int i, k;
843 k = 0;
844 for (i = 0; i < NR_CPUS; i++) {
845 if (!cpu_possible(i))
846 continue;
847 if (++k > max_cpus) {
848 cpu_clear(i, cpu_possible_map);
849 cpu_clear(i, cpu_present_map);
850 }
851 }
852}
1da177e4 853
a8ab26fe
AK
854/*
855 * Various sanity checks.
856 */
857static int __cpuinit smp_sanity_check(unsigned max_cpus)
858{
1da177e4
LT
859 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
860 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
861 hard_smp_processor_id());
862 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
863 }
864
865 /*
866 * If we couldn't find an SMP configuration at boot time,
867 * get out of here now!
868 */
869 if (!smp_found_config) {
870 printk(KERN_NOTICE "SMP motherboard not detected.\n");
a8ab26fe 871 disable_smp();
1da177e4
LT
872 if (APIC_init_uniprocessor())
873 printk(KERN_NOTICE "Local APIC not detected."
874 " Using dummy APIC emulation.\n");
a8ab26fe 875 return -1;
1da177e4
LT
876 }
877
878 /*
879 * Should not be necessary because the MP table should list the boot
880 * CPU too, but we do it for the sake of robustness anyway.
881 */
882 if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
883 printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
884 boot_cpu_id);
885 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
886 }
887
888 /*
889 * If we couldn't find a local APIC, then get out of here now!
890 */
891 if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) {
892 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
893 boot_cpu_id);
894 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
a8ab26fe
AK
895 nr_ioapics = 0;
896 return -1;
1da177e4
LT
897 }
898
1da177e4
LT
899 /*
900 * If SMP should be disabled, then really disable it!
901 */
902 if (!max_cpus) {
1da177e4 903 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
a8ab26fe
AK
904 nr_ioapics = 0;
905 return -1;
1da177e4
LT
906 }
907
a8ab26fe
AK
908 return 0;
909}
1da177e4 910
a8ab26fe
AK
911/*
912 * Prepare for SMP bootup. The MP table or ACPI has been read
913 * earlier. Just do some sanity checking here and enable APIC mode.
914 */
915void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
916{
917 int i;
1da177e4 918
a8ab26fe
AK
919 nmi_watchdog_default();
920 current_cpu_data = boot_cpu_data;
921 current_thread_info()->cpu = 0; /* needed? */
1da177e4 922
a8ab26fe 923 enforce_max_cpus(max_cpus);
1da177e4
LT
924
925 /*
a8ab26fe 926 * Fill in cpu_present_mask
1da177e4 927 */
a8ab26fe
AK
928 for (i = 0; i < NR_CPUS; i++) {
929 int apicid = cpu_present_to_apicid(i);
930 if (physid_isset(apicid, phys_cpu_present_map)) {
931 cpu_set(i, cpu_present_map);
932 /* possible map would be different if we supported real
933 CPU hotplug. */
934 cpu_set(i, cpu_possible_map);
935 }
1da177e4
LT
936 }
937
a8ab26fe
AK
938 if (smp_sanity_check(max_cpus) < 0) {
939 printk(KERN_INFO "SMP disabled\n");
940 disable_smp();
941 return;
1da177e4
LT
942 }
943
a8ab26fe 944
1da177e4 945 /*
a8ab26fe 946 * Switch from PIC to APIC mode.
1da177e4 947 */
a8ab26fe
AK
948 connect_bsp_APIC();
949 setup_local_APIC();
1da177e4 950
a8ab26fe
AK
951 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
952 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
953 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
954 /* Or can we switch back to PIC here? */
1da177e4 955 }
a8ab26fe 956 x86_cpu_to_apicid[0] = boot_cpu_id;
1da177e4
LT
957
958 /*
a8ab26fe 959 * Now start the IO-APICs
1da177e4
LT
960 */
961 if (!skip_ioapic_setup && nr_ioapics)
962 setup_IO_APIC();
963 else
964 nr_ioapics = 0;
965
1da177e4 966 /*
a8ab26fe 967 * Set up local APIC timer on boot CPU.
1da177e4 968 */
1da177e4 969
a8ab26fe 970 setup_boot_APIC_clock();
1da177e4
LT
971}
972
a8ab26fe
AK
973/*
974 * Early setup to make printk work.
975 */
976void __init smp_prepare_boot_cpu(void)
1da177e4 977{
a8ab26fe
AK
978 int me = smp_processor_id();
979 cpu_set(me, cpu_online_map);
980 cpu_set(me, cpu_callout_map);
1da177e4
LT
981}
982
a8ab26fe
AK
983/*
984 * Entry point to boot a CPU.
985 *
986 * This is all __cpuinit, not __devinit for now because we don't support
987 * CPU hotplug (yet).
988 */
989int __cpuinit __cpu_up(unsigned int cpu)
1da177e4 990{
a8ab26fe
AK
991 int err;
992 int apicid = cpu_present_to_apicid(cpu);
1da177e4 993
a8ab26fe 994 WARN_ON(irqs_disabled());
1da177e4 995
a8ab26fe
AK
996 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
997
998 if (apicid == BAD_APICID || apicid == boot_cpu_id ||
999 !physid_isset(apicid, phys_cpu_present_map)) {
1000 printk("__cpu_up: bad cpu %d\n", cpu);
1001 return -EINVAL;
1002 }
a8ab26fe
AK
1003
1004 /* Boot it! */
1005 err = do_boot_cpu(cpu, apicid);
1006 if (err < 0) {
a8ab26fe
AK
1007 Dprintk("do_boot_cpu failed %d\n", err);
1008 return err;
1da177e4 1009 }
a8ab26fe 1010
1da177e4
LT
1011 /* Unleash the CPU! */
1012 Dprintk("waiting for cpu %d\n", cpu);
1013
1da177e4 1014 while (!cpu_isset(cpu, cpu_online_map))
a8ab26fe 1015 cpu_relax();
1da177e4
LT
1016 return 0;
1017}
1018
a8ab26fe
AK
1019/*
1020 * Finish the SMP boot.
1021 */
1022void __cpuinit smp_cpus_done(unsigned int max_cpus)
1da177e4 1023{
a8ab26fe
AK
1024 zap_low_mappings();
1025 smp_cleanup_boot();
1026
1da177e4
LT
1027#ifdef CONFIG_X86_IO_APIC
1028 setup_ioapic_dest();
1029#endif
1da177e4 1030
a8ab26fe
AK
1031 detect_siblings();
1032 time_init_gtod();
1033}
This page took 0.084941 seconds and 5 git commands to generate.