1 /******************************************************************************
2 * cpupool.c
3 *
4 * Generic cpupool-handling functions.
5 *
6 * Cpupools are a feature to have configurable scheduling domains. Each
7 * cpupool runs an own scheduler on a dedicated set of physical cpus.
8 * A domain is bound to one cpupool at any time, but it can be moved to
9 * another cpupool.
10 *
11 * (C) 2009, Juergen Gross, Fujitsu Technology Solutions
12 */
13
14 #include <xen/lib.h>
15 #include <xen/init.h>
16 #include <xen/cpumask.h>
17 #include <xen/param.h>
18 #include <xen/percpu.h>
19 #include <xen/sched.h>
20 #include <xen/warning.h>
21 #include <xen/keyhandler.h>
22 #include <xen/cpu.h>
23
24 #include "private.h"
25
26 #define for_each_cpupool(ptr) \
27 for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next))
28
29 struct cpupool *cpupool0; /* Initial cpupool with Dom0 */
30 cpumask_t cpupool_free_cpus; /* cpus not in any cpupool */
31
32 static struct cpupool *cpupool_list; /* linked list, sorted by poolid */
33
34 static int cpupool_moving_cpu = -1;
35 static struct cpupool *cpupool_cpu_moving = NULL;
36 static cpumask_t cpupool_locked_cpus;
37
38 static DEFINE_SPINLOCK(cpupool_lock);
39
40 static enum sched_gran __read_mostly opt_sched_granularity = SCHED_GRAN_cpu;
41 static unsigned int __read_mostly sched_granularity = 1;
42
43 struct sched_gran_name {
44 enum sched_gran mode;
45 char name[8];
46 };
47
48 static const struct sched_gran_name sg_name[] = {
49 {SCHED_GRAN_cpu, "cpu"},
50 {SCHED_GRAN_core, "core"},
51 {SCHED_GRAN_socket, "socket"},
52 };
53
sched_gran_print(enum sched_gran mode,unsigned int gran)54 static void sched_gran_print(enum sched_gran mode, unsigned int gran)
55 {
56 const char *name = "";
57 unsigned int i;
58
59 for ( i = 0; i < ARRAY_SIZE(sg_name); i++ )
60 {
61 if ( mode == sg_name[i].mode )
62 {
63 name = sg_name[i].name;
64 break;
65 }
66 }
67
68 printk("Scheduling granularity: %s, %u CPU%s per sched-resource\n",
69 name, gran, gran == 1 ? "" : "s");
70 }
71
72 #ifdef CONFIG_HAS_SCHED_GRANULARITY
sched_select_granularity(const char * str)73 static int __init sched_select_granularity(const char *str)
74 {
75 unsigned int i;
76
77 for ( i = 0; i < ARRAY_SIZE(sg_name); i++ )
78 {
79 if ( strcmp(sg_name[i].name, str) == 0 )
80 {
81 opt_sched_granularity = sg_name[i].mode;
82 return 0;
83 }
84 }
85
86 return -EINVAL;
87 }
88 custom_param("sched-gran", sched_select_granularity);
89 #endif
90
cpupool_check_granularity(void)91 static unsigned int __init cpupool_check_granularity(void)
92 {
93 unsigned int cpu;
94 unsigned int siblings, gran = 0;
95
96 if ( opt_sched_granularity == SCHED_GRAN_cpu )
97 return 1;
98
99 for_each_online_cpu ( cpu )
100 {
101 siblings = cpumask_weight(sched_get_opt_cpumask(opt_sched_granularity,
102 cpu));
103 if ( gran == 0 )
104 gran = siblings;
105 else if ( gran != siblings )
106 return 0;
107 }
108
109 sched_disable_smt_switching = true;
110
111 return gran;
112 }
113
114 /* Setup data for selected scheduler granularity. */
cpupool_gran_init(void)115 static void __init cpupool_gran_init(void)
116 {
117 unsigned int gran = 0;
118 const char *fallback = NULL;
119
120 while ( gran == 0 )
121 {
122 gran = cpupool_check_granularity();
123
124 if ( gran == 0 )
125 {
126 switch ( opt_sched_granularity )
127 {
128 case SCHED_GRAN_core:
129 opt_sched_granularity = SCHED_GRAN_cpu;
130 fallback = "Asymmetric cpu configuration.\n"
131 "Falling back to sched-gran=cpu.\n";
132 break;
133 case SCHED_GRAN_socket:
134 opt_sched_granularity = SCHED_GRAN_core;
135 fallback = "Asymmetric cpu configuration.\n"
136 "Falling back to sched-gran=core.\n";
137 break;
138 default:
139 ASSERT_UNREACHABLE();
140 break;
141 }
142 }
143 }
144
145 if ( fallback )
146 warning_add(fallback);
147
148 sched_granularity = gran;
149 sched_gran_print(opt_sched_granularity, sched_granularity);
150 }
151
cpupool_get_granularity(const struct cpupool * c)152 unsigned int cpupool_get_granularity(const struct cpupool *c)
153 {
154 return c ? sched_granularity : 1;
155 }
156
free_cpupool_struct(struct cpupool * c)157 static void free_cpupool_struct(struct cpupool *c)
158 {
159 if ( c )
160 {
161 free_cpumask_var(c->res_valid);
162 free_cpumask_var(c->cpu_valid);
163 }
164 xfree(c);
165 }
166
alloc_cpupool_struct(void)167 static struct cpupool *alloc_cpupool_struct(void)
168 {
169 struct cpupool *c = xzalloc(struct cpupool);
170
171 if ( !c )
172 return NULL;
173
174 if ( !zalloc_cpumask_var(&c->cpu_valid) ||
175 !zalloc_cpumask_var(&c->res_valid) )
176 {
177 free_cpupool_struct(c);
178 c = NULL;
179 }
180
181 return c;
182 }
183
184 /*
185 * find a cpupool by it's id. to be called with cpupool lock held
186 * if exact is not specified, the first cpupool with an id larger or equal to
187 * the searched id is returned
188 * returns NULL if not found.
189 */
__cpupool_find_by_id(int id,bool exact)190 static struct cpupool *__cpupool_find_by_id(int id, bool exact)
191 {
192 struct cpupool **q;
193
194 ASSERT(spin_is_locked(&cpupool_lock));
195
196 for_each_cpupool(q)
197 if ( (*q)->cpupool_id >= id )
198 break;
199
200 return (!exact || (*q == NULL) || ((*q)->cpupool_id == id)) ? *q : NULL;
201 }
202
cpupool_find_by_id(int poolid)203 static struct cpupool *cpupool_find_by_id(int poolid)
204 {
205 return __cpupool_find_by_id(poolid, true);
206 }
207
__cpupool_get_by_id(int poolid,bool exact)208 static struct cpupool *__cpupool_get_by_id(int poolid, bool exact)
209 {
210 struct cpupool *c;
211 spin_lock(&cpupool_lock);
212 c = __cpupool_find_by_id(poolid, exact);
213 if ( c != NULL )
214 atomic_inc(&c->refcnt);
215 spin_unlock(&cpupool_lock);
216 return c;
217 }
218
cpupool_get_by_id(int poolid)219 struct cpupool *cpupool_get_by_id(int poolid)
220 {
221 return __cpupool_get_by_id(poolid, true);
222 }
223
cpupool_get_next_by_id(int poolid)224 static struct cpupool *cpupool_get_next_by_id(int poolid)
225 {
226 return __cpupool_get_by_id(poolid, false);
227 }
228
cpupool_put(struct cpupool * pool)229 void cpupool_put(struct cpupool *pool)
230 {
231 if ( !atomic_dec_and_test(&pool->refcnt) )
232 return;
233 scheduler_free(pool->sched);
234 free_cpupool_struct(pool);
235 }
236
237 /*
238 * create a new cpupool with specified poolid and scheduler
239 * returns pointer to new cpupool structure if okay, NULL else
240 * possible failures:
241 * - no memory
242 * - poolid already used
243 * - unknown scheduler
244 */
cpupool_create(int poolid,unsigned int sched_id,int * perr)245 static struct cpupool *cpupool_create(
246 int poolid, unsigned int sched_id, int *perr)
247 {
248 struct cpupool *c;
249 struct cpupool **q;
250 int last = 0;
251
252 *perr = -ENOMEM;
253 if ( (c = alloc_cpupool_struct()) == NULL )
254 return NULL;
255
256 /* One reference for caller, one reference for cpupool_destroy(). */
257 atomic_set(&c->refcnt, 2);
258
259 debugtrace_printk("cpupool_create(pool=%d,sched=%u)\n", poolid, sched_id);
260
261 spin_lock(&cpupool_lock);
262
263 for_each_cpupool(q)
264 {
265 last = (*q)->cpupool_id;
266 if ( (poolid != CPUPOOLID_NONE) && (last >= poolid) )
267 break;
268 }
269 if ( *q != NULL )
270 {
271 if ( (*q)->cpupool_id == poolid )
272 {
273 *perr = -EEXIST;
274 goto err;
275 }
276 c->next = *q;
277 }
278
279 c->cpupool_id = (poolid == CPUPOOLID_NONE) ? (last + 1) : poolid;
280 if ( poolid == 0 )
281 {
282 c->sched = scheduler_get_default();
283 }
284 else
285 {
286 c->sched = scheduler_alloc(sched_id, perr);
287 if ( c->sched == NULL )
288 goto err;
289 }
290 c->sched->cpupool = c;
291 c->gran = opt_sched_granularity;
292
293 *q = c;
294
295 spin_unlock(&cpupool_lock);
296
297 debugtrace_printk("Created cpupool %d with scheduler %s (%s)\n",
298 c->cpupool_id, c->sched->name, c->sched->opt_name);
299
300 *perr = 0;
301 return c;
302
303 err:
304 spin_unlock(&cpupool_lock);
305 free_cpupool_struct(c);
306 return NULL;
307 }
308 /*
309 * destroys the given cpupool
310 * returns 0 on success, 1 else
311 * possible failures:
312 * - pool still in use
313 * - cpus still assigned to pool
314 * - pool not in list
315 */
cpupool_destroy(struct cpupool * c)316 static int cpupool_destroy(struct cpupool *c)
317 {
318 struct cpupool **q;
319
320 spin_lock(&cpupool_lock);
321 for_each_cpupool(q)
322 if ( *q == c )
323 break;
324 if ( *q != c )
325 {
326 spin_unlock(&cpupool_lock);
327 return -ENOENT;
328 }
329 if ( (c->n_dom != 0) || cpumask_weight(c->cpu_valid) )
330 {
331 spin_unlock(&cpupool_lock);
332 return -EBUSY;
333 }
334 *q = c->next;
335 spin_unlock(&cpupool_lock);
336
337 cpupool_put(c);
338
339 debugtrace_printk("cpupool_destroy(pool=%d)\n", c->cpupool_id);
340 return 0;
341 }
342
343 /*
344 * Move domain to another cpupool
345 */
cpupool_move_domain_locked(struct domain * d,struct cpupool * c)346 static int cpupool_move_domain_locked(struct domain *d, struct cpupool *c)
347 {
348 int ret;
349
350 if ( unlikely(d->cpupool == c) )
351 return 0;
352
353 d->cpupool->n_dom--;
354 ret = sched_move_domain(d, c);
355 if ( ret )
356 d->cpupool->n_dom++;
357 else
358 c->n_dom++;
359
360 return ret;
361 }
cpupool_move_domain(struct domain * d,struct cpupool * c)362 int cpupool_move_domain(struct domain *d, struct cpupool *c)
363 {
364 int ret;
365
366 spin_lock(&cpupool_lock);
367
368 ret = cpupool_move_domain_locked(d, c);
369
370 spin_unlock(&cpupool_lock);
371
372 return ret;
373 }
374
375 /*
376 * assign a specific cpu to a cpupool
377 * cpupool_lock must be held
378 */
cpupool_assign_cpu_locked(struct cpupool * c,unsigned int cpu)379 static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
380 {
381 int ret;
382 struct domain *d;
383 const cpumask_t *cpus;
384
385 cpus = sched_get_opt_cpumask(c->gran, cpu);
386
387 if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) )
388 return -EADDRNOTAVAIL;
389 ret = schedule_cpu_add(cpumask_first(cpus), c);
390 if ( ret )
391 return ret;
392
393 rcu_read_lock(&sched_res_rculock);
394
395 cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
396 if (cpupool_moving_cpu == cpu)
397 {
398 cpupool_moving_cpu = -1;
399 cpupool_put(cpupool_cpu_moving);
400 cpupool_cpu_moving = NULL;
401 }
402 cpumask_or(c->cpu_valid, c->cpu_valid, cpus);
403 cpumask_and(c->res_valid, c->cpu_valid, &sched_res_mask);
404
405 rcu_read_unlock(&sched_res_rculock);
406
407 rcu_read_lock(&domlist_read_lock);
408 for_each_domain_in_cpupool(d, c)
409 {
410 domain_update_node_affinity(d);
411 }
412 rcu_read_unlock(&domlist_read_lock);
413
414 return 0;
415 }
416
cpupool_unassign_cpu_finish(struct cpupool * c)417 static int cpupool_unassign_cpu_finish(struct cpupool *c)
418 {
419 int cpu = cpupool_moving_cpu;
420 const cpumask_t *cpus;
421 struct domain *d;
422 int ret;
423
424 if ( c != cpupool_cpu_moving )
425 return -EADDRNOTAVAIL;
426
427 /*
428 * We need this for scanning the domain list, both in
429 * cpu_disable_scheduler(), and at the bottom of this function.
430 */
431 rcu_read_lock(&domlist_read_lock);
432 ret = cpu_disable_scheduler(cpu);
433
434 rcu_read_lock(&sched_res_rculock);
435 cpus = get_sched_res(cpu)->cpus;
436 cpumask_or(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
437
438 /*
439 * cpu_disable_scheduler() returning an error doesn't require resetting
440 * cpupool_free_cpus' cpu bit. All error cases should be of temporary
441 * nature and tools will retry the operation. Even if the number of
442 * retries may be limited, the in-between state can easily be repaired
443 * by adding the cpu to the cpupool again.
444 */
445 if ( !ret )
446 {
447 ret = schedule_cpu_rm(cpu);
448 if ( ret )
449 cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
450 else
451 {
452 cpupool_moving_cpu = -1;
453 cpupool_put(cpupool_cpu_moving);
454 cpupool_cpu_moving = NULL;
455 }
456 }
457 rcu_read_unlock(&sched_res_rculock);
458
459 for_each_domain_in_cpupool(d, c)
460 {
461 domain_update_node_affinity(d);
462 }
463 rcu_read_unlock(&domlist_read_lock);
464
465 return ret;
466 }
467
cpupool_unassign_cpu_start(struct cpupool * c,unsigned int cpu)468 static int cpupool_unassign_cpu_start(struct cpupool *c, unsigned int cpu)
469 {
470 int ret;
471 struct domain *d;
472 const cpumask_t *cpus;
473
474 spin_lock(&cpupool_lock);
475 ret = -EADDRNOTAVAIL;
476 if ( ((cpupool_moving_cpu != -1) || !cpumask_test_cpu(cpu, c->cpu_valid))
477 && (cpu != cpupool_moving_cpu) )
478 goto out;
479
480 ret = 0;
481 rcu_read_lock(&sched_res_rculock);
482 cpus = get_sched_res(cpu)->cpus;
483
484 if ( (c->n_dom > 0) &&
485 (cpumask_weight(c->cpu_valid) == cpumask_weight(cpus)) &&
486 (cpu != cpupool_moving_cpu) )
487 {
488 rcu_read_lock(&domlist_read_lock);
489 for_each_domain_in_cpupool(d, c)
490 {
491 if ( !d->is_dying && system_state == SYS_STATE_active )
492 {
493 ret = -EBUSY;
494 break;
495 }
496 ret = cpupool_move_domain_locked(d, cpupool0);
497 if ( ret )
498 break;
499 }
500 rcu_read_unlock(&domlist_read_lock);
501 if ( ret )
502 goto out_rcu;
503 }
504 cpupool_moving_cpu = cpu;
505 atomic_inc(&c->refcnt);
506 cpupool_cpu_moving = c;
507 cpumask_andnot(c->cpu_valid, c->cpu_valid, cpus);
508 cpumask_and(c->res_valid, c->cpu_valid, &sched_res_mask);
509
510 out_rcu:
511 rcu_read_unlock(&sched_res_rculock);
512 out:
513 spin_unlock(&cpupool_lock);
514
515 return ret;
516 }
517
cpupool_unassign_cpu_helper(void * info)518 static long cpupool_unassign_cpu_helper(void *info)
519 {
520 struct cpupool *c = info;
521 long ret;
522
523 debugtrace_printk("cpupool_unassign_cpu(pool=%d,cpu=%d)\n",
524 cpupool_cpu_moving->cpupool_id, cpupool_moving_cpu);
525 spin_lock(&cpupool_lock);
526
527 ret = cpupool_unassign_cpu_finish(c);
528
529 spin_unlock(&cpupool_lock);
530 debugtrace_printk("cpupool_unassign_cpu ret=%ld\n", ret);
531
532 return ret;
533 }
534
535 /*
536 * unassign a specific cpu from a cpupool
537 * we must be sure not to run on the cpu to be unassigned! to achieve this
538 * the main functionality is performed via continue_hypercall_on_cpu on a
539 * specific cpu.
540 * if the cpu to be removed is the last one of the cpupool no active domain
541 * must be bound to the cpupool. dying domains are moved to cpupool0 as they
542 * might be zombies.
543 * possible failures:
544 * - last cpu and still active domains in cpupool
545 * - cpu just being unplugged
546 */
cpupool_unassign_cpu(struct cpupool * c,unsigned int cpu)547 static int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
548 {
549 int work_cpu;
550 int ret;
551 unsigned int master_cpu;
552
553 debugtrace_printk("cpupool_unassign_cpu(pool=%d,cpu=%d)\n",
554 c->cpupool_id, cpu);
555
556 if ( !cpu_online(cpu) )
557 return -EINVAL;
558
559 master_cpu = sched_get_resource_cpu(cpu);
560 ret = cpupool_unassign_cpu_start(c, master_cpu);
561 if ( ret )
562 {
563 debugtrace_printk("cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n",
564 c->cpupool_id, cpu, ret);
565 return ret;
566 }
567
568 work_cpu = sched_get_resource_cpu(smp_processor_id());
569 if ( work_cpu == master_cpu )
570 {
571 work_cpu = cpumask_first(cpupool0->cpu_valid);
572 if ( work_cpu == master_cpu )
573 work_cpu = cpumask_last(cpupool0->cpu_valid);
574 }
575 return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c);
576 }
577
578 /*
579 * add a new domain to a cpupool
580 * possible failures:
581 * - pool does not exist
582 * - no cpu assigned to pool
583 */
cpupool_add_domain(struct domain * d,int poolid)584 int cpupool_add_domain(struct domain *d, int poolid)
585 {
586 struct cpupool *c;
587 int rc;
588 int n_dom = 0;
589
590 if ( poolid == CPUPOOLID_NONE )
591 return 0;
592 spin_lock(&cpupool_lock);
593 c = cpupool_find_by_id(poolid);
594 if ( c == NULL )
595 rc = -ESRCH;
596 else if ( !cpumask_weight(c->cpu_valid) )
597 rc = -ENODEV;
598 else
599 {
600 c->n_dom++;
601 n_dom = c->n_dom;
602 d->cpupool = c;
603 rc = 0;
604 }
605 spin_unlock(&cpupool_lock);
606 debugtrace_printk("cpupool_add_domain(dom=%d,pool=%d) n_dom %d rc %d\n",
607 d->domain_id, poolid, n_dom, rc);
608 return rc;
609 }
610
611 /*
612 * remove a domain from a cpupool
613 */
cpupool_rm_domain(struct domain * d)614 void cpupool_rm_domain(struct domain *d)
615 {
616 int cpupool_id;
617 int n_dom;
618
619 if ( d->cpupool == NULL )
620 return;
621 spin_lock(&cpupool_lock);
622 cpupool_id = d->cpupool->cpupool_id;
623 d->cpupool->n_dom--;
624 n_dom = d->cpupool->n_dom;
625 d->cpupool = NULL;
626 spin_unlock(&cpupool_lock);
627 debugtrace_printk("cpupool_rm_domain(dom=%d,pool=%d) n_dom %d\n",
628 d->domain_id, cpupool_id, n_dom);
629 return;
630 }
631
632 /*
633 * Called to add a cpu to a pool. CPUs being hot-plugged are added to pool0,
634 * as they must have been in there when unplugged.
635 */
cpupool_cpu_add(unsigned int cpu)636 static int cpupool_cpu_add(unsigned int cpu)
637 {
638 int ret = 0;
639 const cpumask_t *cpus;
640
641 spin_lock(&cpupool_lock);
642 cpumask_clear_cpu(cpu, &cpupool_locked_cpus);
643 cpumask_set_cpu(cpu, &cpupool_free_cpus);
644
645 /*
646 * If we are not resuming, we are hot-plugging cpu, and in which case
647 * we add it to pool0, as it certainly was there when hot-unplagged
648 * (or unplugging would have failed) and that is the default behavior
649 * anyway.
650 */
651 rcu_read_lock(&sched_res_rculock);
652 get_sched_res(cpu)->cpupool = NULL;
653
654 cpus = sched_get_opt_cpumask(cpupool0->gran, cpu);
655 if ( cpumask_subset(cpus, &cpupool_free_cpus) &&
656 cpumask_weight(cpus) == cpupool_get_granularity(cpupool0) )
657 ret = cpupool_assign_cpu_locked(cpupool0, cpu);
658
659 rcu_read_unlock(&sched_res_rculock);
660
661 spin_unlock(&cpupool_lock);
662
663 return ret;
664 }
665
666 /*
667 * This function is called in stop_machine context, so we can be sure no
668 * non-idle vcpu is active on the system.
669 */
cpupool_cpu_remove(unsigned int cpu)670 static void cpupool_cpu_remove(unsigned int cpu)
671 {
672 int ret;
673
674 ASSERT(is_idle_vcpu(current));
675
676 if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) )
677 {
678 ret = cpupool_unassign_cpu_finish(cpupool0);
679 BUG_ON(ret);
680 }
681 cpumask_clear_cpu(cpu, &cpupool_free_cpus);
682 }
683
684 /*
685 * Called before a CPU is being removed from the system.
686 * Removing a CPU is allowed for free CPUs or CPUs in Pool-0 (those are moved
687 * to free cpus actually before removing them).
688 * The CPU is locked, to forbid adding it again to another cpupool.
689 */
cpupool_cpu_remove_prologue(unsigned int cpu)690 static int cpupool_cpu_remove_prologue(unsigned int cpu)
691 {
692 int ret = 0;
693 cpumask_t *cpus;
694 unsigned int master_cpu;
695
696 spin_lock(&cpupool_lock);
697
698 rcu_read_lock(&sched_res_rculock);
699 cpus = get_sched_res(cpu)->cpus;
700 master_cpu = sched_get_resource_cpu(cpu);
701 if ( cpumask_intersects(cpus, &cpupool_locked_cpus) )
702 ret = -EBUSY;
703 else
704 cpumask_set_cpu(cpu, &cpupool_locked_cpus);
705 rcu_read_unlock(&sched_res_rculock);
706
707 spin_unlock(&cpupool_lock);
708
709 if ( ret )
710 return ret;
711
712 if ( cpumask_test_cpu(master_cpu, cpupool0->cpu_valid) )
713 {
714 /* Cpupool0 is populated only after all cpus are up. */
715 ASSERT(system_state == SYS_STATE_active);
716
717 ret = cpupool_unassign_cpu_start(cpupool0, master_cpu);
718 }
719 else if ( !cpumask_test_cpu(master_cpu, &cpupool_free_cpus) )
720 ret = -ENODEV;
721
722 return ret;
723 }
724
725 /*
726 * Called during resume for all cpus which didn't come up again. The cpu must
727 * be removed from the cpupool it is assigned to. In case a cpupool will be
728 * left without cpu we move all domains of that cpupool to cpupool0.
729 * As we are called with all domains still frozen there is no need to take the
730 * cpupool lock here.
731 */
cpupool_cpu_remove_forced(unsigned int cpu)732 static void cpupool_cpu_remove_forced(unsigned int cpu)
733 {
734 struct cpupool **c;
735 int ret;
736 unsigned int master_cpu = sched_get_resource_cpu(cpu);
737
738 for_each_cpupool ( c )
739 {
740 if ( cpumask_test_cpu(master_cpu, (*c)->cpu_valid) )
741 {
742 ret = cpupool_unassign_cpu_start(*c, master_cpu);
743 BUG_ON(ret);
744 ret = cpupool_unassign_cpu_finish(*c);
745 BUG_ON(ret);
746 }
747 }
748
749 cpumask_clear_cpu(cpu, &cpupool_free_cpus);
750
751 rcu_read_lock(&sched_res_rculock);
752 sched_rm_cpu(cpu);
753 rcu_read_unlock(&sched_res_rculock);
754 }
755
756 /*
757 * do cpupool related sysctl operations
758 */
cpupool_do_sysctl(struct xen_sysctl_cpupool_op * op)759 int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
760 {
761 int ret;
762 struct cpupool *c;
763
764 switch ( op->op )
765 {
766
767 case XEN_SYSCTL_CPUPOOL_OP_CREATE:
768 {
769 int poolid;
770
771 poolid = (op->cpupool_id == XEN_SYSCTL_CPUPOOL_PAR_ANY) ?
772 CPUPOOLID_NONE: op->cpupool_id;
773 c = cpupool_create(poolid, op->sched_id, &ret);
774 if ( c != NULL )
775 {
776 op->cpupool_id = c->cpupool_id;
777 cpupool_put(c);
778 }
779 }
780 break;
781
782 case XEN_SYSCTL_CPUPOOL_OP_DESTROY:
783 {
784 c = cpupool_get_by_id(op->cpupool_id);
785 ret = -ENOENT;
786 if ( c == NULL )
787 break;
788 ret = cpupool_destroy(c);
789 cpupool_put(c);
790 }
791 break;
792
793 case XEN_SYSCTL_CPUPOOL_OP_INFO:
794 {
795 c = cpupool_get_next_by_id(op->cpupool_id);
796 ret = -ENOENT;
797 if ( c == NULL )
798 break;
799 op->cpupool_id = c->cpupool_id;
800 op->sched_id = c->sched->sched_id;
801 op->n_dom = c->n_dom;
802 ret = cpumask_to_xenctl_bitmap(&op->cpumap, c->cpu_valid);
803 cpupool_put(c);
804 }
805 break;
806
807 case XEN_SYSCTL_CPUPOOL_OP_ADDCPU:
808 {
809 unsigned cpu;
810 const cpumask_t *cpus;
811
812 cpu = op->cpu;
813 debugtrace_printk("cpupool_assign_cpu(pool=%d,cpu=%d)\n",
814 op->cpupool_id, cpu);
815
816 spin_lock(&cpupool_lock);
817
818 c = cpupool_find_by_id(op->cpupool_id);
819 ret = -ENOENT;
820 if ( c == NULL )
821 goto addcpu_out;
822 if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY )
823 {
824 for_each_cpu ( cpu, &cpupool_free_cpus )
825 {
826 cpus = sched_get_opt_cpumask(c->gran, cpu);
827 if ( cpumask_subset(cpus, &cpupool_free_cpus) )
828 break;
829 }
830 ret = -ENODEV;
831 if ( cpu >= nr_cpu_ids )
832 goto addcpu_out;
833 }
834 ret = -EINVAL;
835 if ( cpu >= nr_cpu_ids )
836 goto addcpu_out;
837 ret = -ENODEV;
838 cpus = sched_get_opt_cpumask(c->gran, cpu);
839 if ( !cpumask_subset(cpus, &cpupool_free_cpus) ||
840 cpumask_intersects(cpus, &cpupool_locked_cpus) )
841 goto addcpu_out;
842 ret = cpupool_assign_cpu_locked(c, cpu);
843
844 addcpu_out:
845 spin_unlock(&cpupool_lock);
846 debugtrace_printk("cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n",
847 op->cpupool_id, cpu, ret);
848
849 }
850 break;
851
852 case XEN_SYSCTL_CPUPOOL_OP_RMCPU:
853 {
854 unsigned cpu;
855
856 c = cpupool_get_by_id(op->cpupool_id);
857 ret = -ENOENT;
858 if ( c == NULL )
859 break;
860 cpu = op->cpu;
861 if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY )
862 cpu = cpumask_last(c->cpu_valid);
863 ret = (cpu < nr_cpu_ids) ? cpupool_unassign_cpu(c, cpu) : -EINVAL;
864 cpupool_put(c);
865 }
866 break;
867
868 case XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN:
869 {
870 struct domain *d;
871
872 ret = rcu_lock_remote_domain_by_id(op->domid, &d);
873 if ( ret )
874 break;
875 if ( d->cpupool == NULL )
876 {
877 ret = -EINVAL;
878 rcu_unlock_domain(d);
879 break;
880 }
881 if ( op->cpupool_id == d->cpupool->cpupool_id )
882 {
883 ret = 0;
884 rcu_unlock_domain(d);
885 break;
886 }
887 debugtrace_printk("cpupool move_domain(dom=%d)->pool=%d\n",
888 d->domain_id, op->cpupool_id);
889 ret = -ENOENT;
890 spin_lock(&cpupool_lock);
891
892 c = cpupool_find_by_id(op->cpupool_id);
893 if ( (c != NULL) && cpumask_weight(c->cpu_valid) )
894 ret = cpupool_move_domain_locked(d, c);
895
896 spin_unlock(&cpupool_lock);
897 debugtrace_printk("cpupool move_domain(dom=%d)->pool=%d ret %d\n",
898 d->domain_id, op->cpupool_id, ret);
899 rcu_unlock_domain(d);
900 }
901 break;
902
903 case XEN_SYSCTL_CPUPOOL_OP_FREEINFO:
904 {
905 ret = cpumask_to_xenctl_bitmap(
906 &op->cpumap, &cpupool_free_cpus);
907 }
908 break;
909
910 default:
911 ret = -ENOSYS;
912 break;
913 }
914
915 return ret;
916 }
917
cpupool_get_id(const struct domain * d)918 int cpupool_get_id(const struct domain *d)
919 {
920 return d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
921 }
922
cpupool_valid_cpus(const struct cpupool * pool)923 const cpumask_t *cpupool_valid_cpus(const struct cpupool *pool)
924 {
925 return pool->cpu_valid;
926 }
927
dump_runq(unsigned char key)928 void dump_runq(unsigned char key)
929 {
930 s_time_t now = NOW();
931 struct cpupool **c;
932
933 spin_lock(&cpupool_lock);
934
935 printk("sched_smt_power_savings: %s\n",
936 sched_smt_power_savings? "enabled":"disabled");
937 printk("NOW=%"PRI_stime"\n", now);
938
939 printk("Online Cpus: %*pbl\n", CPUMASK_PR(&cpu_online_map));
940 if ( !cpumask_empty(&cpupool_free_cpus) )
941 {
942 printk("Free Cpus: %*pbl\n", CPUMASK_PR(&cpupool_free_cpus));
943 schedule_dump(NULL);
944 }
945
946 for_each_cpupool(c)
947 {
948 printk("Cpupool %d:\n", (*c)->cpupool_id);
949 printk("Cpus: %*pbl\n", CPUMASK_PR((*c)->cpu_valid));
950 sched_gran_print((*c)->gran, cpupool_get_granularity(*c));
951 schedule_dump(*c);
952 }
953
954 spin_unlock(&cpupool_lock);
955 }
956
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)957 static int cpu_callback(
958 struct notifier_block *nfb, unsigned long action, void *hcpu)
959 {
960 unsigned int cpu = (unsigned long)hcpu;
961 int rc = 0;
962
963 switch ( action )
964 {
965 case CPU_DOWN_FAILED:
966 case CPU_ONLINE:
967 if ( system_state <= SYS_STATE_active )
968 rc = cpupool_cpu_add(cpu);
969 break;
970 case CPU_DOWN_PREPARE:
971 /* Suspend/Resume don't change assignments of cpus to cpupools. */
972 if ( system_state <= SYS_STATE_active )
973 rc = cpupool_cpu_remove_prologue(cpu);
974 break;
975 case CPU_DYING:
976 /* Suspend/Resume don't change assignments of cpus to cpupools. */
977 if ( system_state <= SYS_STATE_active )
978 cpupool_cpu_remove(cpu);
979 break;
980 case CPU_RESUME_FAILED:
981 cpupool_cpu_remove_forced(cpu);
982 break;
983 default:
984 break;
985 }
986
987 return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
988 }
989
990 static struct notifier_block cpu_nfb = {
991 .notifier_call = cpu_callback
992 };
993
cpupool_init(void)994 static int __init cpupool_init(void)
995 {
996 unsigned int cpu;
997 int err;
998
999 cpupool_gran_init();
1000
1001 cpupool0 = cpupool_create(0, 0, &err);
1002 BUG_ON(cpupool0 == NULL);
1003 cpupool_put(cpupool0);
1004 register_cpu_notifier(&cpu_nfb);
1005
1006 spin_lock(&cpupool_lock);
1007
1008 cpumask_copy(&cpupool_free_cpus, &cpu_online_map);
1009
1010 for_each_cpu ( cpu, &cpupool_free_cpus )
1011 cpupool_assign_cpu_locked(cpupool0, cpu);
1012
1013 spin_unlock(&cpupool_lock);
1014
1015 return 0;
1016 }
1017 __initcall(cpupool_init);
1018
1019 /*
1020 * Local variables:
1021 * mode: C
1022 * c-file-style: "BSD"
1023 * c-basic-offset: 4
1024 * tab-width: 4
1025 * indent-tabs-mode: nil
1026 * End:
1027 */
1028