e52afe35e7eae439f026bf8edccd50e39397d179
[deliverable/linux.git] / drivers / staging / lustre / lustre / libcfs / linux / linux-cpu.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
20 *
21 * GPL HEADER END
22 */
23 /*
24 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
25 *
26 * Copyright (c) 2012, 2015 Intel Corporation.
27 */
28 /*
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
31 *
32 * Author: liang@whamcloud.com
33 */
34
35 #define DEBUG_SUBSYSTEM S_LNET
36
37 #include <linux/cpu.h>
38 #include <linux/sched.h>
39 #include "../../../include/linux/libcfs/libcfs.h"
40
41 #ifdef CONFIG_SMP
42
43 /**
44 * modparam for setting number of partitions
45 *
46 * 0 : estimate best value based on cores or NUMA nodes
47 * 1 : disable multiple partitions
48 * >1 : specify number of partitions
49 */
50 static int cpu_npartitions;
51 module_param(cpu_npartitions, int, 0444);
52 MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
53
54 /**
55 * modparam for setting CPU partitions patterns:
56 *
57 * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID,
58 * number in bracket is processor ID (core or HT)
59 *
60 * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket
61 * are NUMA node ID, number before bracket is CPU partition ID.
62 *
63 * NB: If user specified cpu_pattern, cpu_npartitions will be ignored
64 */
65 static char *cpu_pattern = "";
66 module_param(cpu_pattern, charp, 0444);
67 MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
68
69 struct cfs_cpt_data {
70 /* serialize hotplug etc */
71 spinlock_t cpt_lock;
72 /* reserved for hotplug */
73 unsigned long cpt_version;
74 /* mutex to protect cpt_cpumask */
75 struct mutex cpt_mutex;
76 /* scratch buffer for set/unset_node */
77 cpumask_t *cpt_cpumask;
78 };
79
80 static struct cfs_cpt_data cpt_data;
81
82 void
83 cfs_cpt_table_free(struct cfs_cpt_table *cptab)
84 {
85 int i;
86
87 if (cptab->ctb_cpu2cpt != NULL) {
88 LIBCFS_FREE(cptab->ctb_cpu2cpt,
89 num_possible_cpus() *
90 sizeof(cptab->ctb_cpu2cpt[0]));
91 }
92
93 for (i = 0; cptab->ctb_parts != NULL && i < cptab->ctb_nparts; i++) {
94 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
95
96 if (part->cpt_nodemask != NULL) {
97 LIBCFS_FREE(part->cpt_nodemask,
98 sizeof(*part->cpt_nodemask));
99 }
100
101 if (part->cpt_cpumask != NULL)
102 LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
103 }
104
105 if (cptab->ctb_parts != NULL) {
106 LIBCFS_FREE(cptab->ctb_parts,
107 cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
108 }
109
110 if (cptab->ctb_nodemask != NULL)
111 LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
112 if (cptab->ctb_cpumask != NULL)
113 LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
114
115 LIBCFS_FREE(cptab, sizeof(*cptab));
116 }
117 EXPORT_SYMBOL(cfs_cpt_table_free);
118
119 struct cfs_cpt_table *
120 cfs_cpt_table_alloc(unsigned int ncpt)
121 {
122 struct cfs_cpt_table *cptab;
123 int i;
124
125 LIBCFS_ALLOC(cptab, sizeof(*cptab));
126 if (cptab == NULL)
127 return NULL;
128
129 cptab->ctb_nparts = ncpt;
130
131 LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
132 LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
133
134 if (cptab->ctb_cpumask == NULL || cptab->ctb_nodemask == NULL)
135 goto failed;
136
137 LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
138 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
139 if (cptab->ctb_cpu2cpt == NULL)
140 goto failed;
141
142 memset(cptab->ctb_cpu2cpt, -1,
143 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
144
145 LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
146 if (cptab->ctb_parts == NULL)
147 goto failed;
148
149 for (i = 0; i < ncpt; i++) {
150 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
151
152 LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
153 LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
154 if (part->cpt_cpumask == NULL || part->cpt_nodemask == NULL)
155 goto failed;
156 }
157
158 spin_lock(&cpt_data.cpt_lock);
159 /* Reserved for hotplug */
160 cptab->ctb_version = cpt_data.cpt_version;
161 spin_unlock(&cpt_data.cpt_lock);
162
163 return cptab;
164
165 failed:
166 cfs_cpt_table_free(cptab);
167 return NULL;
168 }
169 EXPORT_SYMBOL(cfs_cpt_table_alloc);
170
171 int
172 cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
173 {
174 char *tmp = buf;
175 int rc = 0;
176 int i;
177 int j;
178
179 for (i = 0; i < cptab->ctb_nparts; i++) {
180 if (len > 0) {
181 rc = snprintf(tmp, len, "%d\t: ", i);
182 len -= rc;
183 }
184
185 if (len <= 0) {
186 rc = -EFBIG;
187 goto out;
188 }
189
190 tmp += rc;
191 for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
192 rc = snprintf(tmp, len, "%d ", j);
193 len -= rc;
194 if (len <= 0) {
195 rc = -EFBIG;
196 goto out;
197 }
198 tmp += rc;
199 }
200
201 *tmp = '\n';
202 tmp++;
203 len--;
204 }
205
206 out:
207 if (rc < 0)
208 return rc;
209
210 return tmp - buf;
211 }
212 EXPORT_SYMBOL(cfs_cpt_table_print);
213
214 int
215 cfs_cpt_number(struct cfs_cpt_table *cptab)
216 {
217 return cptab->ctb_nparts;
218 }
219 EXPORT_SYMBOL(cfs_cpt_number);
220
221 int
222 cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
223 {
224 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
225
226 return cpt == CFS_CPT_ANY ?
227 cpumask_weight(cptab->ctb_cpumask) :
228 cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
229 }
230 EXPORT_SYMBOL(cfs_cpt_weight);
231
232 int
233 cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
234 {
235 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
236
237 return cpt == CFS_CPT_ANY ?
238 cpumask_any_and(cptab->ctb_cpumask,
239 cpu_online_mask) < nr_cpu_ids :
240 cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
241 cpu_online_mask) < nr_cpu_ids;
242 }
243 EXPORT_SYMBOL(cfs_cpt_online);
244
245 cpumask_t *
246 cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
247 {
248 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
249
250 return cpt == CFS_CPT_ANY ?
251 cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
252 }
253 EXPORT_SYMBOL(cfs_cpt_cpumask);
254
255 nodemask_t *
256 cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
257 {
258 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
259
260 return cpt == CFS_CPT_ANY ?
261 cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
262 }
263 EXPORT_SYMBOL(cfs_cpt_nodemask);
264
265 int
266 cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
267 {
268 int node;
269
270 LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
271
272 if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
273 CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
274 return 0;
275 }
276
277 if (cptab->ctb_cpu2cpt[cpu] != -1) {
278 CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
279 cpu, cptab->ctb_cpu2cpt[cpu]);
280 return 0;
281 }
282
283 cptab->ctb_cpu2cpt[cpu] = cpt;
284
285 LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
286 LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
287
288 cpumask_set_cpu(cpu, cptab->ctb_cpumask);
289 cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
290
291 node = cpu_to_node(cpu);
292
293 /* first CPU of @node in this CPT table */
294 if (!node_isset(node, *cptab->ctb_nodemask))
295 node_set(node, *cptab->ctb_nodemask);
296
297 /* first CPU of @node in this partition */
298 if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
299 node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
300
301 return 1;
302 }
303 EXPORT_SYMBOL(cfs_cpt_set_cpu);
304
305 void
306 cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
307 {
308 int node;
309 int i;
310
311 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
312
313 if (cpu < 0 || cpu >= nr_cpu_ids) {
314 CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
315 return;
316 }
317
318 if (cpt == CFS_CPT_ANY) {
319 /* caller doesn't know the partition ID */
320 cpt = cptab->ctb_cpu2cpt[cpu];
321 if (cpt < 0) { /* not set in this CPT-table */
322 CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
323 cpt, cptab);
324 return;
325 }
326
327 } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
328 CDEBUG(D_INFO,
329 "CPU %d is not in cpu-partition %d\n", cpu, cpt);
330 return;
331 }
332
333 LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
334 LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
335
336 cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
337 cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
338 cptab->ctb_cpu2cpt[cpu] = -1;
339
340 node = cpu_to_node(cpu);
341
342 LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
343 LASSERT(node_isset(node, *cptab->ctb_nodemask));
344
345 for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
346 /* this CPT has other CPU belonging to this node? */
347 if (cpu_to_node(i) == node)
348 break;
349 }
350
351 if (i >= nr_cpu_ids)
352 node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
353
354 for_each_cpu(i, cptab->ctb_cpumask) {
355 /* this CPT-table has other CPU belonging to this node? */
356 if (cpu_to_node(i) == node)
357 break;
358 }
359
360 if (i >= nr_cpu_ids)
361 node_clear(node, *cptab->ctb_nodemask);
362
363 return;
364 }
365 EXPORT_SYMBOL(cfs_cpt_unset_cpu);
366
367 int
368 cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
369 {
370 int i;
371
372 if (cpumask_weight(mask) == 0 ||
373 cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
374 CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
375 cpt);
376 return 0;
377 }
378
379 for_each_cpu(i, mask) {
380 if (!cfs_cpt_set_cpu(cptab, cpt, i))
381 return 0;
382 }
383
384 return 1;
385 }
386 EXPORT_SYMBOL(cfs_cpt_set_cpumask);
387
388 void
389 cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
390 {
391 int i;
392
393 for_each_cpu(i, mask)
394 cfs_cpt_unset_cpu(cptab, cpt, i);
395 }
396 EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
397
398 int
399 cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
400 {
401 cpumask_t *mask;
402 int rc;
403
404 if (node < 0 || node >= MAX_NUMNODES) {
405 CDEBUG(D_INFO,
406 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
407 return 0;
408 }
409
410 mutex_lock(&cpt_data.cpt_mutex);
411
412 mask = cpt_data.cpt_cpumask;
413 cpumask_copy(mask, cpumask_of_node(node));
414
415 rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
416
417 mutex_unlock(&cpt_data.cpt_mutex);
418
419 return rc;
420 }
421 EXPORT_SYMBOL(cfs_cpt_set_node);
422
423 void
424 cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
425 {
426 cpumask_t *mask;
427
428 if (node < 0 || node >= MAX_NUMNODES) {
429 CDEBUG(D_INFO,
430 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
431 return;
432 }
433
434 mutex_lock(&cpt_data.cpt_mutex);
435
436 mask = cpt_data.cpt_cpumask;
437 cpumask_copy(mask, cpumask_of_node(node));
438
439 cfs_cpt_unset_cpumask(cptab, cpt, mask);
440
441 mutex_unlock(&cpt_data.cpt_mutex);
442 }
443 EXPORT_SYMBOL(cfs_cpt_unset_node);
444
445 int
446 cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
447 {
448 int i;
449
450 for_each_node_mask(i, *mask) {
451 if (!cfs_cpt_set_node(cptab, cpt, i))
452 return 0;
453 }
454
455 return 1;
456 }
457 EXPORT_SYMBOL(cfs_cpt_set_nodemask);
458
459 void
460 cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
461 {
462 int i;
463
464 for_each_node_mask(i, *mask)
465 cfs_cpt_unset_node(cptab, cpt, i);
466 }
467 EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
468
469 void
470 cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
471 {
472 int last;
473 int i;
474
475 if (cpt == CFS_CPT_ANY) {
476 last = cptab->ctb_nparts - 1;
477 cpt = 0;
478 } else {
479 last = cpt;
480 }
481
482 for (; cpt <= last; cpt++) {
483 for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
484 cfs_cpt_unset_cpu(cptab, cpt, i);
485 }
486 }
487 EXPORT_SYMBOL(cfs_cpt_clear);
488
489 int
490 cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
491 {
492 nodemask_t *mask;
493 int weight;
494 int rotor;
495 int node;
496
497 /* convert CPU partition ID to HW node id */
498
499 if (cpt < 0 || cpt >= cptab->ctb_nparts) {
500 mask = cptab->ctb_nodemask;
501 rotor = cptab->ctb_spread_rotor++;
502 } else {
503 mask = cptab->ctb_parts[cpt].cpt_nodemask;
504 rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
505 }
506
507 weight = nodes_weight(*mask);
508 LASSERT(weight > 0);
509
510 rotor %= weight;
511
512 for_each_node_mask(node, *mask) {
513 if (rotor-- == 0)
514 return node;
515 }
516
517 LBUG();
518 return 0;
519 }
520 EXPORT_SYMBOL(cfs_cpt_spread_node);
521
522 int
523 cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
524 {
525 int cpu = smp_processor_id();
526 int cpt = cptab->ctb_cpu2cpt[cpu];
527
528 if (cpt < 0) {
529 if (!remap)
530 return cpt;
531
532 /* don't return negative value for safety of upper layer,
533 * instead we shadow the unknown cpu to a valid partition ID */
534 cpt = cpu % cptab->ctb_nparts;
535 }
536
537 return cpt;
538 }
539 EXPORT_SYMBOL(cfs_cpt_current);
540
541 int
542 cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
543 {
544 LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
545
546 return cptab->ctb_cpu2cpt[cpu];
547 }
548 EXPORT_SYMBOL(cfs_cpt_of_cpu);
549
550 int
551 cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
552 {
553 cpumask_t *cpumask;
554 nodemask_t *nodemask;
555 int rc;
556 int i;
557
558 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
559
560 if (cpt == CFS_CPT_ANY) {
561 cpumask = cptab->ctb_cpumask;
562 nodemask = cptab->ctb_nodemask;
563 } else {
564 cpumask = cptab->ctb_parts[cpt].cpt_cpumask;
565 nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
566 }
567
568 if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) {
569 CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
570 cpt);
571 return -EINVAL;
572 }
573
574 for_each_online_cpu(i) {
575 if (cpumask_test_cpu(i, cpumask))
576 continue;
577
578 rc = set_cpus_allowed_ptr(current, cpumask);
579 set_mems_allowed(*nodemask);
580 if (rc == 0)
581 schedule(); /* switch to allowed CPU */
582
583 return rc;
584 }
585
586 /* don't need to set affinity because all online CPUs are covered */
587 return 0;
588 }
589 EXPORT_SYMBOL(cfs_cpt_bind);
590
591 /**
592 * Choose max to \a number CPUs from \a node and set them in \a cpt.
593 * We always prefer to choose CPU in the same core/socket.
594 */
595 static int
596 cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
597 cpumask_t *node, int number)
598 {
599 cpumask_t *socket = NULL;
600 cpumask_t *core = NULL;
601 int rc = 0;
602 int cpu;
603
604 LASSERT(number > 0);
605
606 if (number >= cpumask_weight(node)) {
607 while (!cpumask_empty(node)) {
608 cpu = cpumask_first(node);
609
610 rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
611 if (!rc)
612 return -EINVAL;
613 cpumask_clear_cpu(cpu, node);
614 }
615 return 0;
616 }
617
618 /* allocate scratch buffer */
619 LIBCFS_ALLOC(socket, cpumask_size());
620 LIBCFS_ALLOC(core, cpumask_size());
621 if (socket == NULL || core == NULL) {
622 rc = -ENOMEM;
623 goto out;
624 }
625
626 while (!cpumask_empty(node)) {
627 cpu = cpumask_first(node);
628
629 /* get cpumask for cores in the same socket */
630 cpumask_copy(socket, topology_core_cpumask(cpu));
631 cpumask_and(socket, socket, node);
632
633 LASSERT(!cpumask_empty(socket));
634
635 while (!cpumask_empty(socket)) {
636 int i;
637
638 /* get cpumask for hts in the same core */
639 cpumask_copy(core, topology_sibling_cpumask(cpu));
640 cpumask_and(core, core, node);
641
642 LASSERT(!cpumask_empty(core));
643
644 for_each_cpu(i, core) {
645 cpumask_clear_cpu(i, socket);
646 cpumask_clear_cpu(i, node);
647
648 rc = cfs_cpt_set_cpu(cptab, cpt, i);
649 if (!rc) {
650 rc = -EINVAL;
651 goto out;
652 }
653
654 if (--number == 0)
655 goto out;
656 }
657 cpu = cpumask_first(socket);
658 }
659 }
660
661 out:
662 if (socket != NULL)
663 LIBCFS_FREE(socket, cpumask_size());
664 if (core != NULL)
665 LIBCFS_FREE(core, cpumask_size());
666 return rc;
667 }
668
669 #define CPT_WEIGHT_MIN 4u
670
671 static unsigned int
672 cfs_cpt_num_estimate(void)
673 {
674 unsigned nnode = num_online_nodes();
675 unsigned ncpu = num_online_cpus();
676 unsigned ncpt;
677
678 if (ncpu <= CPT_WEIGHT_MIN) {
679 ncpt = 1;
680 goto out;
681 }
682
683 /* generate reasonable number of CPU partitions based on total number
684 * of CPUs, Preferred N should be power2 and match this condition:
685 * 2 * (N - 1)^2 < NCPUS <= 2 * N^2 */
686 for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
687 ;
688
689 if (ncpt <= nnode) { /* fat numa system */
690 while (nnode > ncpt)
691 nnode >>= 1;
692
693 } else { /* ncpt > nnode */
694 while ((nnode << 1) <= ncpt)
695 nnode <<= 1;
696 }
697
698 ncpt = nnode;
699
700 out:
701 #if (BITS_PER_LONG == 32)
702 /* config many CPU partitions on 32-bit system could consume
703 * too much memory */
704 ncpt = min(2U, ncpt);
705 #endif
706 while (ncpu % ncpt != 0)
707 ncpt--; /* worst case is 1 */
708
709 return ncpt;
710 }
711
712 static struct cfs_cpt_table *
713 cfs_cpt_table_create(int ncpt)
714 {
715 struct cfs_cpt_table *cptab = NULL;
716 cpumask_t *mask = NULL;
717 int cpt = 0;
718 int num;
719 int rc;
720 int i;
721
722 rc = cfs_cpt_num_estimate();
723 if (ncpt <= 0)
724 ncpt = rc;
725
726 if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
727 CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
728 ncpt, rc);
729 }
730
731 if (num_online_cpus() % ncpt != 0) {
732 CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
733 (int)num_online_cpus(), ncpt);
734 goto failed;
735 }
736
737 cptab = cfs_cpt_table_alloc(ncpt);
738 if (cptab == NULL) {
739 CERROR("Failed to allocate CPU map(%d)\n", ncpt);
740 goto failed;
741 }
742
743 num = num_online_cpus() / ncpt;
744 if (num == 0) {
745 CERROR("CPU changed while setting CPU partition\n");
746 goto failed;
747 }
748
749 LIBCFS_ALLOC(mask, cpumask_size());
750 if (mask == NULL) {
751 CERROR("Failed to allocate scratch cpumask\n");
752 goto failed;
753 }
754
755 for_each_online_node(i) {
756 cpumask_copy(mask, cpumask_of_node(i));
757
758 while (!cpumask_empty(mask)) {
759 struct cfs_cpu_partition *part;
760 int n;
761
762 if (cpt >= ncpt)
763 goto failed;
764
765 part = &cptab->ctb_parts[cpt];
766
767 n = num - cpumask_weight(part->cpt_cpumask);
768 LASSERT(n > 0);
769
770 rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
771 if (rc < 0)
772 goto failed;
773
774 LASSERT(num >= cpumask_weight(part->cpt_cpumask));
775 if (num == cpumask_weight(part->cpt_cpumask))
776 cpt++;
777 }
778 }
779
780 if (cpt != ncpt ||
781 num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
782 CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
783 cptab->ctb_nparts, num, cpt,
784 cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
785 goto failed;
786 }
787
788 LIBCFS_FREE(mask, cpumask_size());
789
790 return cptab;
791
792 failed:
793 CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
794 ncpt, num_online_nodes(), num_online_cpus());
795
796 if (mask != NULL)
797 LIBCFS_FREE(mask, cpumask_size());
798
799 if (cptab != NULL)
800 cfs_cpt_table_free(cptab);
801
802 return NULL;
803 }
804
805 static struct cfs_cpt_table *
806 cfs_cpt_table_create_pattern(char *pattern)
807 {
808 struct cfs_cpt_table *cptab;
809 char *str = pattern;
810 int node = 0;
811 int high;
812 int ncpt;
813 int c;
814
815 for (ncpt = 0;; ncpt++) { /* quick scan bracket */
816 str = strchr(str, '[');
817 if (str == NULL)
818 break;
819 str++;
820 }
821
822 str = cfs_trimwhite(pattern);
823 if (*str == 'n' || *str == 'N') {
824 pattern = str + 1;
825 node = 1;
826 }
827
828 if (ncpt == 0 ||
829 (node && ncpt > num_online_nodes()) ||
830 (!node && ncpt > num_online_cpus())) {
831 CERROR("Invalid pattern %s, or too many partitions %d\n",
832 pattern, ncpt);
833 return NULL;
834 }
835
836 high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
837
838 cptab = cfs_cpt_table_alloc(ncpt);
839 if (cptab == NULL) {
840 CERROR("Failed to allocate cpu partition table\n");
841 return NULL;
842 }
843
844 for (str = cfs_trimwhite(pattern), c = 0;; c++) {
845 struct cfs_range_expr *range;
846 struct cfs_expr_list *el;
847 char *bracket = strchr(str, '[');
848 int cpt;
849 int rc;
850 int i;
851 int n;
852
853 if (bracket == NULL) {
854 if (*str != 0) {
855 CERROR("Invalid pattern %s\n", str);
856 goto failed;
857 } else if (c != ncpt) {
858 CERROR("expect %d partitions but found %d\n",
859 ncpt, c);
860 goto failed;
861 }
862 break;
863 }
864
865 if (sscanf(str, "%d%n", &cpt, &n) < 1) {
866 CERROR("Invalid cpu pattern %s\n", str);
867 goto failed;
868 }
869
870 if (cpt < 0 || cpt >= ncpt) {
871 CERROR("Invalid partition id %d, total partitions %d\n",
872 cpt, ncpt);
873 goto failed;
874 }
875
876 if (cfs_cpt_weight(cptab, cpt) != 0) {
877 CERROR("Partition %d has already been set.\n", cpt);
878 goto failed;
879 }
880
881 str = cfs_trimwhite(str + n);
882 if (str != bracket) {
883 CERROR("Invalid pattern %s\n", str);
884 goto failed;
885 }
886
887 bracket = strchr(str, ']');
888 if (bracket == NULL) {
889 CERROR("missing right bracket for cpt %d, %s\n",
890 cpt, str);
891 goto failed;
892 }
893
894 if (cfs_expr_list_parse(str, (bracket - str) + 1,
895 0, high, &el) != 0) {
896 CERROR("Can't parse number range: %s\n", str);
897 goto failed;
898 }
899
900 list_for_each_entry(range, &el->el_exprs, re_link) {
901 for (i = range->re_lo; i <= range->re_hi; i++) {
902 if ((i - range->re_lo) % range->re_stride != 0)
903 continue;
904
905 rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
906 cfs_cpt_set_cpu(cptab, cpt, i);
907 if (!rc) {
908 cfs_expr_list_free(el);
909 goto failed;
910 }
911 }
912 }
913
914 cfs_expr_list_free(el);
915
916 if (!cfs_cpt_online(cptab, cpt)) {
917 CERROR("No online CPU is found on partition %d\n", cpt);
918 goto failed;
919 }
920
921 str = cfs_trimwhite(bracket + 1);
922 }
923
924 return cptab;
925
926 failed:
927 cfs_cpt_table_free(cptab);
928 return NULL;
929 }
930
931 #ifdef CONFIG_HOTPLUG_CPU
932 static int
933 cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
934 {
935 unsigned int cpu = (unsigned long)hcpu;
936 bool warn;
937
938 switch (action) {
939 case CPU_DEAD:
940 case CPU_DEAD_FROZEN:
941 case CPU_ONLINE:
942 case CPU_ONLINE_FROZEN:
943 spin_lock(&cpt_data.cpt_lock);
944 cpt_data.cpt_version++;
945 spin_unlock(&cpt_data.cpt_lock);
946 default:
947 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) {
948 CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n",
949 cpu, action);
950 break;
951 }
952
953 mutex_lock(&cpt_data.cpt_mutex);
954 /* if all HTs in a core are offline, it may break affinity */
955 cpumask_copy(cpt_data.cpt_cpumask,
956 topology_sibling_cpumask(cpu));
957 warn = cpumask_any_and(cpt_data.cpt_cpumask,
958 cpu_online_mask) >= nr_cpu_ids;
959 mutex_unlock(&cpt_data.cpt_mutex);
960 CDEBUG(warn ? D_WARNING : D_INFO,
961 "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n",
962 cpu, action);
963 }
964
965 return NOTIFY_OK;
966 }
967
968 static struct notifier_block cfs_cpu_notifier = {
969 .notifier_call = cfs_cpu_notify,
970 .priority = 0
971 };
972
973 #endif
974
975 void
976 cfs_cpu_fini(void)
977 {
978 if (cfs_cpt_table != NULL)
979 cfs_cpt_table_free(cfs_cpt_table);
980
981 #ifdef CONFIG_HOTPLUG_CPU
982 unregister_hotcpu_notifier(&cfs_cpu_notifier);
983 #endif
984 if (cpt_data.cpt_cpumask != NULL)
985 LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
986 }
987
988 int
989 cfs_cpu_init(void)
990 {
991 LASSERT(cfs_cpt_table == NULL);
992
993 memset(&cpt_data, 0, sizeof(cpt_data));
994
995 LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size());
996 if (cpt_data.cpt_cpumask == NULL) {
997 CERROR("Failed to allocate scratch buffer\n");
998 return -1;
999 }
1000
1001 spin_lock_init(&cpt_data.cpt_lock);
1002 mutex_init(&cpt_data.cpt_mutex);
1003
1004 #ifdef CONFIG_HOTPLUG_CPU
1005 register_hotcpu_notifier(&cfs_cpu_notifier);
1006 #endif
1007
1008 if (*cpu_pattern != 0) {
1009 cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
1010 if (cfs_cpt_table == NULL) {
1011 CERROR("Failed to create cptab from pattern %s\n",
1012 cpu_pattern);
1013 goto failed;
1014 }
1015
1016 } else {
1017 cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
1018 if (cfs_cpt_table == NULL) {
1019 CERROR("Failed to create ptable with npartitions %d\n",
1020 cpu_npartitions);
1021 goto failed;
1022 }
1023 }
1024
1025 spin_lock(&cpt_data.cpt_lock);
1026 if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
1027 spin_unlock(&cpt_data.cpt_lock);
1028 CERROR("CPU hotplug/unplug during setup\n");
1029 goto failed;
1030 }
1031 spin_unlock(&cpt_data.cpt_lock);
1032
1033 LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n",
1034 num_online_cpus(), cfs_cpt_number(cfs_cpt_table));
1035 return 0;
1036
1037 failed:
1038 cfs_cpu_fini();
1039 return -1;
1040 }
1041
1042 #endif
This page took 0.051512 seconds and 4 git commands to generate.