Commit | Line | Data |
---|---|---|
0332c2d4 ME |
1 | /* |
2 | * pseries CPU Hotplug infrastructure. | |
3 | * | |
413f7c40 ME |
4 | * Split out from arch/powerpc/platforms/pseries/setup.c |
5 | * arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c | |
0332c2d4 ME |
6 | * |
7 | * Peter Bergner, IBM March 2001. | |
8 | * Copyright (C) 2001 IBM. | |
413f7c40 ME |
9 | * Dave Engebretsen, Peter Bergner, and |
10 | * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com | |
11 | * Plus various changes from other IBM teams... | |
0332c2d4 ME |
12 | * |
13 | * Copyright (C) 2006 Michael Ellerman, IBM Corporation | |
14 | * | |
15 | * This program is free software; you can redistribute it and/or | |
16 | * modify it under the terms of the GNU General Public License | |
17 | * as published by the Free Software Foundation; either version | |
18 | * 2 of the License, or (at your option) any later version. | |
19 | */ | |
20 | ||
21 | #include <linux/kernel.h> | |
22 | #include <linux/delay.h> | |
23 | #include <linux/cpu.h> | |
24 | #include <asm/system.h> | |
25 | #include <asm/prom.h> | |
26 | #include <asm/rtas.h> | |
27 | #include <asm/firmware.h> | |
28 | #include <asm/machdep.h> | |
29 | #include <asm/vdso_datapage.h> | |
30 | #include <asm/pSeries_reconfig.h> | |
31 | #include "xics.h" | |
473980a9 | 32 | #include "plpar_wrappers.h" |
3aa565f5 | 33 | #include "offline_states.h" |
0332c2d4 ME |
34 | |
35 | /* This version can't take the spinlock, because it never returns */ | |
36 | static struct rtas_args rtas_stop_self_args = { | |
37 | .token = RTAS_UNKNOWN_SERVICE, | |
38 | .nargs = 0, | |
39 | .nret = 1, | |
40 | .rets = &rtas_stop_self_args.args[0], | |
41 | }; | |
42 | ||
3aa565f5 GS |
43 | static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = |
44 | CPU_STATE_OFFLINE; | |
45 | static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; | |
46 | ||
47 | static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; | |
48 | ||
49 | static int cede_offline_enabled __read_mostly = 1; | |
50 | ||
51 | /* | |
52 | * Enable/disable cede_offline when available. | |
53 | */ | |
54 | static int __init setup_cede_offline(char *str) | |
55 | { | |
56 | if (!strcmp(str, "off")) | |
57 | cede_offline_enabled = 0; | |
58 | else if (!strcmp(str, "on")) | |
59 | cede_offline_enabled = 1; | |
60 | else | |
61 | return 0; | |
62 | return 1; | |
63 | } | |
64 | ||
65 | __setup("cede_offline=", setup_cede_offline); | |
66 | ||
67 | enum cpu_state_vals get_cpu_current_state(int cpu) | |
68 | { | |
69 | return per_cpu(current_state, cpu); | |
70 | } | |
71 | ||
72 | void set_cpu_current_state(int cpu, enum cpu_state_vals state) | |
73 | { | |
74 | per_cpu(current_state, cpu) = state; | |
75 | } | |
76 | ||
77 | enum cpu_state_vals get_preferred_offline_state(int cpu) | |
78 | { | |
79 | return per_cpu(preferred_offline_state, cpu); | |
80 | } | |
81 | ||
82 | void set_preferred_offline_state(int cpu, enum cpu_state_vals state) | |
83 | { | |
84 | per_cpu(preferred_offline_state, cpu) = state; | |
85 | } | |
86 | ||
87 | void set_default_offline_state(int cpu) | |
88 | { | |
89 | per_cpu(preferred_offline_state, cpu) = default_offline_state; | |
90 | } | |
91 | ||
04da6af9 | 92 | static void rtas_stop_self(void) |
0332c2d4 ME |
93 | { |
94 | struct rtas_args *args = &rtas_stop_self_args; | |
95 | ||
96 | local_irq_disable(); | |
97 | ||
98 | BUG_ON(args->token == RTAS_UNKNOWN_SERVICE); | |
99 | ||
100 | printk("cpu %u (hwid %u) Ready to die...\n", | |
101 | smp_processor_id(), hard_smp_processor_id()); | |
102 | enter_rtas(__pa(args)); | |
103 | ||
104 | panic("Alas, I survived.\n"); | |
105 | } | |
106 | ||
06ba30b6 | 107 | static void pseries_mach_cpu_die(void) |
04da6af9 | 108 | { |
3aa565f5 GS |
109 | unsigned int cpu = smp_processor_id(); |
110 | unsigned int hwcpu = hard_smp_processor_id(); | |
111 | u8 cede_latency_hint = 0; | |
112 | ||
04da6af9 ME |
113 | local_irq_disable(); |
114 | idle_task_exit(); | |
c3e8506c | 115 | xics_teardown_cpu(); |
3aa565f5 GS |
116 | |
117 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { | |
118 | set_cpu_current_state(cpu, CPU_STATE_INACTIVE); | |
119 | cede_latency_hint = 2; | |
120 | ||
121 | get_lppaca()->idle = 1; | |
122 | if (!get_lppaca()->shared_proc) | |
123 | get_lppaca()->donate_dedicated_cpu = 1; | |
124 | ||
125 | printk(KERN_INFO | |
126 | "cpu %u (hwid %u) ceding for offline with hint %d\n", | |
127 | cpu, hwcpu, cede_latency_hint); | |
128 | while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { | |
129 | extended_cede_processor(cede_latency_hint); | |
130 | printk(KERN_INFO "cpu %u (hwid %u) returned from cede.\n", | |
131 | cpu, hwcpu); | |
132 | printk(KERN_INFO | |
133 | "Decrementer value = %x Timebase value = %llx\n", | |
134 | get_dec(), get_tb()); | |
135 | } | |
136 | ||
137 | printk(KERN_INFO "cpu %u (hwid %u) got prodded to go online\n", | |
138 | cpu, hwcpu); | |
139 | ||
140 | if (!get_lppaca()->shared_proc) | |
141 | get_lppaca()->donate_dedicated_cpu = 0; | |
142 | get_lppaca()->idle = 0; | |
143 | } | |
144 | ||
145 | if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { | |
146 | unregister_slb_shadow(hwcpu, __pa(get_slb_shadow())); | |
147 | ||
148 | /* | |
149 | * NOTE: Calling start_secondary() here for now to | |
150 | * start new context. | |
151 | * However, need to do it cleanly by resetting the | |
152 | * stack pointer. | |
153 | */ | |
154 | start_secondary(); | |
155 | ||
156 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | |
157 | ||
158 | set_cpu_current_state(cpu, CPU_STATE_OFFLINE); | |
159 | unregister_slb_shadow(hard_smp_processor_id(), | |
160 | __pa(get_slb_shadow())); | |
161 | rtas_stop_self(); | |
162 | } | |
163 | ||
04da6af9 ME |
164 | /* Should never get here... */ |
165 | BUG(); | |
166 | for(;;); | |
167 | } | |
168 | ||
674fa677 ME |
169 | static int qcss_tok; /* query-cpu-stopped-state token */ |
170 | ||
413f7c40 ME |
171 | /* Get state of physical CPU. |
172 | * Return codes: | |
173 | * 0 - The processor is in the RTAS stopped state | |
174 | * 1 - stop-self is in progress | |
175 | * 2 - The processor is not in the RTAS stopped state | |
176 | * -1 - Hardware Error | |
177 | * -2 - Hardware Busy, Try again later. | |
178 | */ | |
179 | static int query_cpu_stopped(unsigned int pcpu) | |
180 | { | |
674fa677 | 181 | int cpu_status, status; |
413f7c40 | 182 | |
413f7c40 ME |
183 | status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); |
184 | if (status != 0) { | |
185 | printk(KERN_ERR | |
186 | "RTAS query-cpu-stopped-state failed: %i\n", status); | |
187 | return status; | |
188 | } | |
189 | ||
190 | return cpu_status; | |
191 | } | |
192 | ||
06ba30b6 | 193 | static int pseries_cpu_disable(void) |
413f7c40 ME |
194 | { |
195 | int cpu = smp_processor_id(); | |
196 | ||
ea0f1cab | 197 | set_cpu_online(cpu, false); |
413f7c40 ME |
198 | vdso_data->processorCount--; |
199 | ||
200 | /*fix boot_cpuid here*/ | |
201 | if (cpu == boot_cpuid) | |
202 | boot_cpuid = any_online_cpu(cpu_online_map); | |
203 | ||
204 | /* FIXME: abstract this to not be platform specific later on */ | |
205 | xics_migrate_irqs_away(); | |
206 | return 0; | |
207 | } | |
208 | ||
3aa565f5 GS |
209 | /* |
210 | * pseries_cpu_die: Wait for the cpu to die. | |
211 | * @cpu: logical processor id of the CPU whose death we're awaiting. | |
212 | * | |
213 | * This function is called from the context of the thread which is performing | |
214 | * the cpu-offline. Here we wait for long enough to allow the cpu in question | |
215 | * to self-destroy so that the cpu-offline thread can send the CPU_DEAD | |
216 | * notifications. | |
217 | * | |
218 | * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to | |
219 | * self-destruct. | |
220 | */ | |
06ba30b6 | 221 | static void pseries_cpu_die(unsigned int cpu) |
413f7c40 ME |
222 | { |
223 | int tries; | |
3aa565f5 | 224 | int cpu_status = 1; |
413f7c40 ME |
225 | unsigned int pcpu = get_hard_smp_processor_id(cpu); |
226 | ||
3aa565f5 GS |
227 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
228 | cpu_status = 1; | |
229 | for (tries = 0; tries < 1000; tries++) { | |
230 | if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { | |
231 | cpu_status = 0; | |
232 | break; | |
233 | } | |
234 | cpu_relax(); | |
235 | } | |
236 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | |
237 | ||
238 | for (tries = 0; tries < 25; tries++) { | |
239 | cpu_status = query_cpu_stopped(pcpu); | |
240 | if (cpu_status == 0 || cpu_status == -1) | |
241 | break; | |
242 | cpu_relax(); | |
243 | } | |
413f7c40 | 244 | } |
3aa565f5 | 245 | |
413f7c40 ME |
246 | if (cpu_status != 0) { |
247 | printk("Querying DEAD? cpu %i (%i) shows %i\n", | |
248 | cpu, pcpu, cpu_status); | |
249 | } | |
250 | ||
251 | /* Isolation and deallocation are definatly done by | |
252 | * drslot_chrp_cpu. If they were not they would be | |
253 | * done here. Change isolate state to Isolate and | |
254 | * change allocation-state to Unusable. | |
255 | */ | |
256 | paca[cpu].cpu_start = 0; | |
257 | } | |
258 | ||
259 | /* | |
260 | * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle | |
261 | * here is that a cpu device node may represent up to two logical cpus | |
262 | * in the SMT case. We must honor the assumption in other code that | |
263 | * the logical ids for sibling SMT threads x and y are adjacent, such | |
264 | * that x^1 == y and y^1 == x. | |
265 | */ | |
06ba30b6 | 266 | static int pseries_add_processor(struct device_node *np) |
413f7c40 ME |
267 | { |
268 | unsigned int cpu; | |
269 | cpumask_t candidate_map, tmp = CPU_MASK_NONE; | |
270 | int err = -ENOSPC, len, nthreads, i; | |
271 | const u32 *intserv; | |
272 | ||
e2eb6392 | 273 | intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); |
413f7c40 ME |
274 | if (!intserv) |
275 | return 0; | |
276 | ||
277 | nthreads = len / sizeof(u32); | |
278 | for (i = 0; i < nthreads; i++) | |
279 | cpu_set(i, tmp); | |
280 | ||
86ef5c9a | 281 | cpu_maps_update_begin(); |
413f7c40 ME |
282 | |
283 | BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map)); | |
284 | ||
285 | /* Get a bitmap of unoccupied slots. */ | |
286 | cpus_xor(candidate_map, cpu_possible_map, cpu_present_map); | |
287 | if (cpus_empty(candidate_map)) { | |
288 | /* If we get here, it most likely means that NR_CPUS is | |
289 | * less than the partition's max processors setting. | |
290 | */ | |
291 | printk(KERN_ERR "Cannot add cpu %s; this system configuration" | |
292 | " supports %d logical cpus.\n", np->full_name, | |
293 | cpus_weight(cpu_possible_map)); | |
294 | goto out_unlock; | |
295 | } | |
296 | ||
297 | while (!cpus_empty(tmp)) | |
298 | if (cpus_subset(tmp, candidate_map)) | |
299 | /* Found a range where we can insert the new cpu(s) */ | |
300 | break; | |
301 | else | |
302 | cpus_shift_left(tmp, tmp, nthreads); | |
303 | ||
304 | if (cpus_empty(tmp)) { | |
305 | printk(KERN_ERR "Unable to find space in cpu_present_map for" | |
306 | " processor %s with %d thread(s)\n", np->name, | |
307 | nthreads); | |
308 | goto out_unlock; | |
309 | } | |
310 | ||
311 | for_each_cpu_mask(cpu, tmp) { | |
312 | BUG_ON(cpu_isset(cpu, cpu_present_map)); | |
ea0f1cab | 313 | set_cpu_present(cpu, true); |
413f7c40 ME |
314 | set_hard_smp_processor_id(cpu, *intserv++); |
315 | } | |
316 | err = 0; | |
317 | out_unlock: | |
86ef5c9a | 318 | cpu_maps_update_done(); |
413f7c40 ME |
319 | return err; |
320 | } | |
321 | ||
322 | /* | |
323 | * Update the present map for a cpu node which is going away, and set | |
324 | * the hard id in the paca(s) to -1 to be consistent with boot time | |
325 | * convention for non-present cpus. | |
326 | */ | |
06ba30b6 | 327 | static void pseries_remove_processor(struct device_node *np) |
413f7c40 ME |
328 | { |
329 | unsigned int cpu; | |
330 | int len, nthreads, i; | |
331 | const u32 *intserv; | |
332 | ||
e2eb6392 | 333 | intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); |
413f7c40 ME |
334 | if (!intserv) |
335 | return; | |
336 | ||
337 | nthreads = len / sizeof(u32); | |
338 | ||
86ef5c9a | 339 | cpu_maps_update_begin(); |
413f7c40 ME |
340 | for (i = 0; i < nthreads; i++) { |
341 | for_each_present_cpu(cpu) { | |
342 | if (get_hard_smp_processor_id(cpu) != intserv[i]) | |
343 | continue; | |
344 | BUG_ON(cpu_online(cpu)); | |
ea0f1cab | 345 | set_cpu_present(cpu, false); |
413f7c40 ME |
346 | set_hard_smp_processor_id(cpu, -1); |
347 | break; | |
348 | } | |
349 | if (cpu == NR_CPUS) | |
350 | printk(KERN_WARNING "Could not find cpu to remove " | |
351 | "with physical id 0x%x\n", intserv[i]); | |
352 | } | |
86ef5c9a | 353 | cpu_maps_update_done(); |
413f7c40 ME |
354 | } |
355 | ||
06ba30b6 ME |
356 | static int pseries_smp_notifier(struct notifier_block *nb, |
357 | unsigned long action, void *node) | |
413f7c40 ME |
358 | { |
359 | int err = NOTIFY_OK; | |
360 | ||
361 | switch (action) { | |
362 | case PSERIES_RECONFIG_ADD: | |
06ba30b6 | 363 | if (pseries_add_processor(node)) |
413f7c40 ME |
364 | err = NOTIFY_BAD; |
365 | break; | |
366 | case PSERIES_RECONFIG_REMOVE: | |
06ba30b6 | 367 | pseries_remove_processor(node); |
413f7c40 ME |
368 | break; |
369 | default: | |
370 | err = NOTIFY_DONE; | |
371 | break; | |
372 | } | |
373 | return err; | |
374 | } | |
375 | ||
06ba30b6 ME |
376 | static struct notifier_block pseries_smp_nb = { |
377 | .notifier_call = pseries_smp_notifier, | |
413f7c40 ME |
378 | }; |
379 | ||
3aa565f5 GS |
380 | #define MAX_CEDE_LATENCY_LEVELS 4 |
381 | #define CEDE_LATENCY_PARAM_LENGTH 10 | |
382 | #define CEDE_LATENCY_PARAM_MAX_LENGTH \ | |
383 | (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char)) | |
384 | #define CEDE_LATENCY_TOKEN 45 | |
385 | ||
386 | static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; | |
387 | ||
388 | static int parse_cede_parameters(void) | |
389 | { | |
3aa565f5 | 390 | memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); |
20a8ab97 AB |
391 | return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, |
392 | NULL, | |
393 | CEDE_LATENCY_TOKEN, | |
394 | __pa(cede_parameters), | |
395 | CEDE_LATENCY_PARAM_MAX_LENGTH); | |
3aa565f5 GS |
396 | } |
397 | ||
0332c2d4 ME |
398 | static int __init pseries_cpu_hotplug_init(void) |
399 | { | |
64f27585 OJ |
400 | struct device_node *np; |
401 | const char *typep; | |
3aa565f5 | 402 | int cpu; |
64f27585 OJ |
403 | |
404 | for_each_node_by_name(np, "interrupt-controller") { | |
405 | typep = of_get_property(np, "compatible", NULL); | |
406 | if (strstr(typep, "open-pic")) { | |
407 | of_node_put(np); | |
408 | ||
409 | printk(KERN_INFO "CPU Hotplug not supported on " | |
410 | "systems using MPIC\n"); | |
411 | return 0; | |
412 | } | |
413 | } | |
414 | ||
0332c2d4 | 415 | rtas_stop_self_args.token = rtas_token("stop-self"); |
674fa677 | 416 | qcss_tok = rtas_token("query-cpu-stopped-state"); |
0332c2d4 | 417 | |
674fa677 ME |
418 | if (rtas_stop_self_args.token == RTAS_UNKNOWN_SERVICE || |
419 | qcss_tok == RTAS_UNKNOWN_SERVICE) { | |
420 | printk(KERN_INFO "CPU Hotplug not supported by firmware " | |
421 | "- disabling.\n"); | |
422 | return 0; | |
423 | } | |
04da6af9 | 424 | |
06ba30b6 ME |
425 | ppc_md.cpu_die = pseries_mach_cpu_die; |
426 | smp_ops->cpu_disable = pseries_cpu_disable; | |
427 | smp_ops->cpu_die = pseries_cpu_die; | |
413f7c40 ME |
428 | |
429 | /* Processors can be added/removed only on LPAR */ | |
3aa565f5 | 430 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
06ba30b6 | 431 | pSeries_reconfig_notifier_register(&pseries_smp_nb); |
3aa565f5 GS |
432 | cpu_maps_update_begin(); |
433 | if (cede_offline_enabled && parse_cede_parameters() == 0) { | |
434 | default_offline_state = CPU_STATE_INACTIVE; | |
435 | for_each_online_cpu(cpu) | |
436 | set_default_offline_state(cpu); | |
437 | } | |
438 | cpu_maps_update_done(); | |
439 | } | |
413f7c40 | 440 | |
0332c2d4 ME |
441 | return 0; |
442 | } | |
443 | arch_initcall(pseries_cpu_hotplug_init); |