1 /******************************************************************************
2  * cpupool.c
3  *
4  * Generic cpupool-handling functions.
5  *
6  * Cpupools are a feature to have configurable scheduling domains. Each
7  * cpupool runs an own scheduler on a dedicated set of physical cpus.
8  * A domain is bound to one cpupool at any time, but it can be moved to
9  * another cpupool.
10  *
11  * (C) 2009, Juergen Gross, Fujitsu Technology Solutions
12  */
13 
14 #include <xen/lib.h>
15 #include <xen/init.h>
16 #include <xen/cpumask.h>
17 #include <xen/param.h>
18 #include <xen/percpu.h>
19 #include <xen/sched.h>
20 #include <xen/warning.h>
21 #include <xen/keyhandler.h>
22 #include <xen/cpu.h>
23 
24 #include "private.h"
25 
26 #define for_each_cpupool(ptr)    \
27     for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next))
28 
29 struct cpupool *cpupool0;                /* Initial cpupool with Dom0 */
30 cpumask_t cpupool_free_cpus;             /* cpus not in any cpupool */
31 
32 static struct cpupool *cpupool_list;     /* linked list, sorted by poolid */
33 
34 static int cpupool_moving_cpu = -1;
35 static struct cpupool *cpupool_cpu_moving = NULL;
36 static cpumask_t cpupool_locked_cpus;
37 
38 static DEFINE_SPINLOCK(cpupool_lock);
39 
40 static enum sched_gran __read_mostly opt_sched_granularity = SCHED_GRAN_cpu;
41 static unsigned int __read_mostly sched_granularity = 1;
42 
43 struct sched_gran_name {
44     enum sched_gran mode;
45     char name[8];
46 };
47 
48 static const struct sched_gran_name sg_name[] = {
49     {SCHED_GRAN_cpu, "cpu"},
50     {SCHED_GRAN_core, "core"},
51     {SCHED_GRAN_socket, "socket"},
52 };
53 
sched_gran_print(enum sched_gran mode,unsigned int gran)54 static void sched_gran_print(enum sched_gran mode, unsigned int gran)
55 {
56     const char *name = "";
57     unsigned int i;
58 
59     for ( i = 0; i < ARRAY_SIZE(sg_name); i++ )
60     {
61         if ( mode == sg_name[i].mode )
62         {
63             name = sg_name[i].name;
64             break;
65         }
66     }
67 
68     printk("Scheduling granularity: %s, %u CPU%s per sched-resource\n",
69            name, gran, gran == 1 ? "" : "s");
70 }
71 
72 #ifdef CONFIG_HAS_SCHED_GRANULARITY
sched_select_granularity(const char * str)73 static int __init sched_select_granularity(const char *str)
74 {
75     unsigned int i;
76 
77     for ( i = 0; i < ARRAY_SIZE(sg_name); i++ )
78     {
79         if ( strcmp(sg_name[i].name, str) == 0 )
80         {
81             opt_sched_granularity = sg_name[i].mode;
82             return 0;
83         }
84     }
85 
86     return -EINVAL;
87 }
88 custom_param("sched-gran", sched_select_granularity);
89 #endif
90 
cpupool_check_granularity(void)91 static unsigned int __init cpupool_check_granularity(void)
92 {
93     unsigned int cpu;
94     unsigned int siblings, gran = 0;
95 
96     if ( opt_sched_granularity == SCHED_GRAN_cpu )
97         return 1;
98 
99     for_each_online_cpu ( cpu )
100     {
101         siblings = cpumask_weight(sched_get_opt_cpumask(opt_sched_granularity,
102                                                         cpu));
103         if ( gran == 0 )
104             gran = siblings;
105         else if ( gran != siblings )
106             return 0;
107     }
108 
109     sched_disable_smt_switching = true;
110 
111     return gran;
112 }
113 
114 /* Setup data for selected scheduler granularity. */
cpupool_gran_init(void)115 static void __init cpupool_gran_init(void)
116 {
117     unsigned int gran = 0;
118     const char *fallback = NULL;
119 
120     while ( gran == 0 )
121     {
122         gran = cpupool_check_granularity();
123 
124         if ( gran == 0 )
125         {
126             switch ( opt_sched_granularity )
127             {
128             case SCHED_GRAN_core:
129                 opt_sched_granularity = SCHED_GRAN_cpu;
130                 fallback = "Asymmetric cpu configuration.\n"
131                            "Falling back to sched-gran=cpu.\n";
132                 break;
133             case SCHED_GRAN_socket:
134                 opt_sched_granularity = SCHED_GRAN_core;
135                 fallback = "Asymmetric cpu configuration.\n"
136                            "Falling back to sched-gran=core.\n";
137                 break;
138             default:
139                 ASSERT_UNREACHABLE();
140                 break;
141             }
142         }
143     }
144 
145     if ( fallback )
146         warning_add(fallback);
147 
148     sched_granularity = gran;
149     sched_gran_print(opt_sched_granularity, sched_granularity);
150 }
151 
cpupool_get_granularity(const struct cpupool * c)152 unsigned int cpupool_get_granularity(const struct cpupool *c)
153 {
154     return c ? sched_granularity : 1;
155 }
156 
free_cpupool_struct(struct cpupool * c)157 static void free_cpupool_struct(struct cpupool *c)
158 {
159     if ( c )
160     {
161         free_cpumask_var(c->res_valid);
162         free_cpumask_var(c->cpu_valid);
163     }
164     xfree(c);
165 }
166 
alloc_cpupool_struct(void)167 static struct cpupool *alloc_cpupool_struct(void)
168 {
169     struct cpupool *c = xzalloc(struct cpupool);
170 
171     if ( !c )
172         return NULL;
173 
174     if ( !zalloc_cpumask_var(&c->cpu_valid) ||
175          !zalloc_cpumask_var(&c->res_valid) )
176     {
177         free_cpupool_struct(c);
178         c = NULL;
179     }
180 
181     return c;
182 }
183 
184 /*
185  * find a cpupool by it's id. to be called with cpupool lock held
186  * if exact is not specified, the first cpupool with an id larger or equal to
187  * the searched id is returned
188  * returns NULL if not found.
189  */
__cpupool_find_by_id(int id,bool exact)190 static struct cpupool *__cpupool_find_by_id(int id, bool exact)
191 {
192     struct cpupool **q;
193 
194     ASSERT(spin_is_locked(&cpupool_lock));
195 
196     for_each_cpupool(q)
197         if ( (*q)->cpupool_id >= id )
198             break;
199 
200     return (!exact || (*q == NULL) || ((*q)->cpupool_id == id)) ? *q : NULL;
201 }
202 
cpupool_find_by_id(int poolid)203 static struct cpupool *cpupool_find_by_id(int poolid)
204 {
205     return __cpupool_find_by_id(poolid, true);
206 }
207 
__cpupool_get_by_id(int poolid,bool exact)208 static struct cpupool *__cpupool_get_by_id(int poolid, bool exact)
209 {
210     struct cpupool *c;
211     spin_lock(&cpupool_lock);
212     c = __cpupool_find_by_id(poolid, exact);
213     if ( c != NULL )
214         atomic_inc(&c->refcnt);
215     spin_unlock(&cpupool_lock);
216     return c;
217 }
218 
cpupool_get_by_id(int poolid)219 struct cpupool *cpupool_get_by_id(int poolid)
220 {
221     return __cpupool_get_by_id(poolid, true);
222 }
223 
cpupool_get_next_by_id(int poolid)224 static struct cpupool *cpupool_get_next_by_id(int poolid)
225 {
226     return __cpupool_get_by_id(poolid, false);
227 }
228 
cpupool_put(struct cpupool * pool)229 void cpupool_put(struct cpupool *pool)
230 {
231     if ( !atomic_dec_and_test(&pool->refcnt) )
232         return;
233     scheduler_free(pool->sched);
234     free_cpupool_struct(pool);
235 }
236 
237 /*
238  * create a new cpupool with specified poolid and scheduler
239  * returns pointer to new cpupool structure if okay, NULL else
240  * possible failures:
241  * - no memory
242  * - poolid already used
243  * - unknown scheduler
244  */
cpupool_create(int poolid,unsigned int sched_id,int * perr)245 static struct cpupool *cpupool_create(
246     int poolid, unsigned int sched_id, int *perr)
247 {
248     struct cpupool *c;
249     struct cpupool **q;
250     int last = 0;
251 
252     *perr = -ENOMEM;
253     if ( (c = alloc_cpupool_struct()) == NULL )
254         return NULL;
255 
256     /* One reference for caller, one reference for cpupool_destroy(). */
257     atomic_set(&c->refcnt, 2);
258 
259     debugtrace_printk("cpupool_create(pool=%d,sched=%u)\n", poolid, sched_id);
260 
261     spin_lock(&cpupool_lock);
262 
263     for_each_cpupool(q)
264     {
265         last = (*q)->cpupool_id;
266         if ( (poolid != CPUPOOLID_NONE) && (last >= poolid) )
267             break;
268     }
269     if ( *q != NULL )
270     {
271         if ( (*q)->cpupool_id == poolid )
272         {
273             *perr = -EEXIST;
274             goto err;
275         }
276         c->next = *q;
277     }
278 
279     c->cpupool_id = (poolid == CPUPOOLID_NONE) ? (last + 1) : poolid;
280     if ( poolid == 0 )
281     {
282         c->sched = scheduler_get_default();
283     }
284     else
285     {
286         c->sched = scheduler_alloc(sched_id, perr);
287         if ( c->sched == NULL )
288             goto err;
289     }
290     c->sched->cpupool = c;
291     c->gran = opt_sched_granularity;
292 
293     *q = c;
294 
295     spin_unlock(&cpupool_lock);
296 
297     debugtrace_printk("Created cpupool %d with scheduler %s (%s)\n",
298                       c->cpupool_id, c->sched->name, c->sched->opt_name);
299 
300     *perr = 0;
301     return c;
302 
303  err:
304     spin_unlock(&cpupool_lock);
305     free_cpupool_struct(c);
306     return NULL;
307 }
308 /*
309  * destroys the given cpupool
310  * returns 0 on success, 1 else
311  * possible failures:
312  * - pool still in use
313  * - cpus still assigned to pool
314  * - pool not in list
315  */
cpupool_destroy(struct cpupool * c)316 static int cpupool_destroy(struct cpupool *c)
317 {
318     struct cpupool **q;
319 
320     spin_lock(&cpupool_lock);
321     for_each_cpupool(q)
322         if ( *q == c )
323             break;
324     if ( *q != c )
325     {
326         spin_unlock(&cpupool_lock);
327         return -ENOENT;
328     }
329     if ( (c->n_dom != 0) || cpumask_weight(c->cpu_valid) )
330     {
331         spin_unlock(&cpupool_lock);
332         return -EBUSY;
333     }
334     *q = c->next;
335     spin_unlock(&cpupool_lock);
336 
337     cpupool_put(c);
338 
339     debugtrace_printk("cpupool_destroy(pool=%d)\n", c->cpupool_id);
340     return 0;
341 }
342 
343 /*
344  * Move domain to another cpupool
345  */
cpupool_move_domain_locked(struct domain * d,struct cpupool * c)346 static int cpupool_move_domain_locked(struct domain *d, struct cpupool *c)
347 {
348     int ret;
349 
350     if ( unlikely(d->cpupool == c) )
351         return 0;
352 
353     d->cpupool->n_dom--;
354     ret = sched_move_domain(d, c);
355     if ( ret )
356         d->cpupool->n_dom++;
357     else
358         c->n_dom++;
359 
360     return ret;
361 }
cpupool_move_domain(struct domain * d,struct cpupool * c)362 int cpupool_move_domain(struct domain *d, struct cpupool *c)
363 {
364     int ret;
365 
366     spin_lock(&cpupool_lock);
367 
368     ret = cpupool_move_domain_locked(d, c);
369 
370     spin_unlock(&cpupool_lock);
371 
372     return ret;
373 }
374 
375 /*
376  * assign a specific cpu to a cpupool
377  * cpupool_lock must be held
378  */
cpupool_assign_cpu_locked(struct cpupool * c,unsigned int cpu)379 static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
380 {
381     int ret;
382     struct domain *d;
383     const cpumask_t *cpus;
384 
385     cpus = sched_get_opt_cpumask(c->gran, cpu);
386 
387     if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) )
388         return -EADDRNOTAVAIL;
389     ret = schedule_cpu_add(cpumask_first(cpus), c);
390     if ( ret )
391         return ret;
392 
393     rcu_read_lock(&sched_res_rculock);
394 
395     cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
396     if (cpupool_moving_cpu == cpu)
397     {
398         cpupool_moving_cpu = -1;
399         cpupool_put(cpupool_cpu_moving);
400         cpupool_cpu_moving = NULL;
401     }
402     cpumask_or(c->cpu_valid, c->cpu_valid, cpus);
403     cpumask_and(c->res_valid, c->cpu_valid, &sched_res_mask);
404 
405     rcu_read_unlock(&sched_res_rculock);
406 
407     rcu_read_lock(&domlist_read_lock);
408     for_each_domain_in_cpupool(d, c)
409     {
410         domain_update_node_affinity(d);
411     }
412     rcu_read_unlock(&domlist_read_lock);
413 
414     return 0;
415 }
416 
cpupool_unassign_cpu_finish(struct cpupool * c)417 static int cpupool_unassign_cpu_finish(struct cpupool *c)
418 {
419     int cpu = cpupool_moving_cpu;
420     const cpumask_t *cpus;
421     struct domain *d;
422     int ret;
423 
424     if ( c != cpupool_cpu_moving )
425         return -EADDRNOTAVAIL;
426 
427     /*
428      * We need this for scanning the domain list, both in
429      * cpu_disable_scheduler(), and at the bottom of this function.
430      */
431     rcu_read_lock(&domlist_read_lock);
432     ret = cpu_disable_scheduler(cpu);
433 
434     rcu_read_lock(&sched_res_rculock);
435     cpus = get_sched_res(cpu)->cpus;
436     cpumask_or(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
437 
438     /*
439      * cpu_disable_scheduler() returning an error doesn't require resetting
440      * cpupool_free_cpus' cpu bit. All error cases should be of temporary
441      * nature and tools will retry the operation. Even if the number of
442      * retries may be limited, the in-between state can easily be repaired
443      * by adding the cpu to the cpupool again.
444      */
445     if ( !ret )
446     {
447         ret = schedule_cpu_rm(cpu);
448         if ( ret )
449             cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
450         else
451         {
452             cpupool_moving_cpu = -1;
453             cpupool_put(cpupool_cpu_moving);
454             cpupool_cpu_moving = NULL;
455         }
456     }
457     rcu_read_unlock(&sched_res_rculock);
458 
459     for_each_domain_in_cpupool(d, c)
460     {
461         domain_update_node_affinity(d);
462     }
463     rcu_read_unlock(&domlist_read_lock);
464 
465     return ret;
466 }
467 
cpupool_unassign_cpu_start(struct cpupool * c,unsigned int cpu)468 static int cpupool_unassign_cpu_start(struct cpupool *c, unsigned int cpu)
469 {
470     int ret;
471     struct domain *d;
472     const cpumask_t *cpus;
473 
474     spin_lock(&cpupool_lock);
475     ret = -EADDRNOTAVAIL;
476     if ( ((cpupool_moving_cpu != -1) || !cpumask_test_cpu(cpu, c->cpu_valid))
477          && (cpu != cpupool_moving_cpu) )
478         goto out;
479 
480     ret = 0;
481     rcu_read_lock(&sched_res_rculock);
482     cpus = get_sched_res(cpu)->cpus;
483 
484     if ( (c->n_dom > 0) &&
485          (cpumask_weight(c->cpu_valid) == cpumask_weight(cpus)) &&
486          (cpu != cpupool_moving_cpu) )
487     {
488         rcu_read_lock(&domlist_read_lock);
489         for_each_domain_in_cpupool(d, c)
490         {
491             if ( !d->is_dying && system_state == SYS_STATE_active )
492             {
493                 ret = -EBUSY;
494                 break;
495             }
496             ret = cpupool_move_domain_locked(d, cpupool0);
497             if ( ret )
498                 break;
499         }
500         rcu_read_unlock(&domlist_read_lock);
501         if ( ret )
502             goto out_rcu;
503     }
504     cpupool_moving_cpu = cpu;
505     atomic_inc(&c->refcnt);
506     cpupool_cpu_moving = c;
507     cpumask_andnot(c->cpu_valid, c->cpu_valid, cpus);
508     cpumask_and(c->res_valid, c->cpu_valid, &sched_res_mask);
509 
510  out_rcu:
511     rcu_read_unlock(&sched_res_rculock);
512  out:
513     spin_unlock(&cpupool_lock);
514 
515     return ret;
516 }
517 
cpupool_unassign_cpu_helper(void * info)518 static long cpupool_unassign_cpu_helper(void *info)
519 {
520     struct cpupool *c = info;
521     long ret;
522 
523     debugtrace_printk("cpupool_unassign_cpu(pool=%d,cpu=%d)\n",
524                       cpupool_cpu_moving->cpupool_id, cpupool_moving_cpu);
525     spin_lock(&cpupool_lock);
526 
527     ret = cpupool_unassign_cpu_finish(c);
528 
529     spin_unlock(&cpupool_lock);
530     debugtrace_printk("cpupool_unassign_cpu ret=%ld\n", ret);
531 
532     return ret;
533 }
534 
535 /*
536  * unassign a specific cpu from a cpupool
537  * we must be sure not to run on the cpu to be unassigned! to achieve this
538  * the main functionality is performed via continue_hypercall_on_cpu on a
539  * specific cpu.
540  * if the cpu to be removed is the last one of the cpupool no active domain
541  * must be bound to the cpupool. dying domains are moved to cpupool0 as they
542  * might be zombies.
543  * possible failures:
544  * - last cpu and still active domains in cpupool
545  * - cpu just being unplugged
546  */
cpupool_unassign_cpu(struct cpupool * c,unsigned int cpu)547 static int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
548 {
549     int work_cpu;
550     int ret;
551     unsigned int master_cpu;
552 
553     debugtrace_printk("cpupool_unassign_cpu(pool=%d,cpu=%d)\n",
554                       c->cpupool_id, cpu);
555 
556     if ( !cpu_online(cpu) )
557         return -EINVAL;
558 
559     master_cpu = sched_get_resource_cpu(cpu);
560     ret = cpupool_unassign_cpu_start(c, master_cpu);
561     if ( ret )
562     {
563         debugtrace_printk("cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n",
564                           c->cpupool_id, cpu, ret);
565         return ret;
566     }
567 
568     work_cpu = sched_get_resource_cpu(smp_processor_id());
569     if ( work_cpu == master_cpu )
570     {
571         work_cpu = cpumask_first(cpupool0->cpu_valid);
572         if ( work_cpu == master_cpu )
573             work_cpu = cpumask_last(cpupool0->cpu_valid);
574     }
575     return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c);
576 }
577 
578 /*
579  * add a new domain to a cpupool
580  * possible failures:
581  * - pool does not exist
582  * - no cpu assigned to pool
583  */
cpupool_add_domain(struct domain * d,int poolid)584 int cpupool_add_domain(struct domain *d, int poolid)
585 {
586     struct cpupool *c;
587     int rc;
588     int n_dom = 0;
589 
590     if ( poolid == CPUPOOLID_NONE )
591         return 0;
592     spin_lock(&cpupool_lock);
593     c = cpupool_find_by_id(poolid);
594     if ( c == NULL )
595         rc = -ESRCH;
596     else if ( !cpumask_weight(c->cpu_valid) )
597         rc = -ENODEV;
598     else
599     {
600         c->n_dom++;
601         n_dom = c->n_dom;
602         d->cpupool = c;
603         rc = 0;
604     }
605     spin_unlock(&cpupool_lock);
606     debugtrace_printk("cpupool_add_domain(dom=%d,pool=%d) n_dom %d rc %d\n",
607                       d->domain_id, poolid, n_dom, rc);
608     return rc;
609 }
610 
611 /*
612  * remove a domain from a cpupool
613  */
cpupool_rm_domain(struct domain * d)614 void cpupool_rm_domain(struct domain *d)
615 {
616     int cpupool_id;
617     int n_dom;
618 
619     if ( d->cpupool == NULL )
620         return;
621     spin_lock(&cpupool_lock);
622     cpupool_id = d->cpupool->cpupool_id;
623     d->cpupool->n_dom--;
624     n_dom = d->cpupool->n_dom;
625     d->cpupool = NULL;
626     spin_unlock(&cpupool_lock);
627     debugtrace_printk("cpupool_rm_domain(dom=%d,pool=%d) n_dom %d\n",
628                       d->domain_id, cpupool_id, n_dom);
629     return;
630 }
631 
632 /*
633  * Called to add a cpu to a pool. CPUs being hot-plugged are added to pool0,
634  * as they must have been in there when unplugged.
635  */
cpupool_cpu_add(unsigned int cpu)636 static int cpupool_cpu_add(unsigned int cpu)
637 {
638     int ret = 0;
639     const cpumask_t *cpus;
640 
641     spin_lock(&cpupool_lock);
642     cpumask_clear_cpu(cpu, &cpupool_locked_cpus);
643     cpumask_set_cpu(cpu, &cpupool_free_cpus);
644 
645     /*
646      * If we are not resuming, we are hot-plugging cpu, and in which case
647      * we add it to pool0, as it certainly was there when hot-unplagged
648      * (or unplugging would have failed) and that is the default behavior
649      * anyway.
650      */
651     rcu_read_lock(&sched_res_rculock);
652     get_sched_res(cpu)->cpupool = NULL;
653 
654     cpus = sched_get_opt_cpumask(cpupool0->gran, cpu);
655     if ( cpumask_subset(cpus, &cpupool_free_cpus) &&
656          cpumask_weight(cpus) == cpupool_get_granularity(cpupool0) )
657         ret = cpupool_assign_cpu_locked(cpupool0, cpu);
658 
659     rcu_read_unlock(&sched_res_rculock);
660 
661     spin_unlock(&cpupool_lock);
662 
663     return ret;
664 }
665 
666 /*
667  * This function is called in stop_machine context, so we can be sure no
668  * non-idle vcpu is active on the system.
669  */
cpupool_cpu_remove(unsigned int cpu)670 static void cpupool_cpu_remove(unsigned int cpu)
671 {
672     int ret;
673 
674     ASSERT(is_idle_vcpu(current));
675 
676     if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) )
677     {
678         ret = cpupool_unassign_cpu_finish(cpupool0);
679         BUG_ON(ret);
680     }
681     cpumask_clear_cpu(cpu, &cpupool_free_cpus);
682 }
683 
684 /*
685  * Called before a CPU is being removed from the system.
686  * Removing a CPU is allowed for free CPUs or CPUs in Pool-0 (those are moved
687  * to free cpus actually before removing them).
688  * The CPU is locked, to forbid adding it again to another cpupool.
689  */
cpupool_cpu_remove_prologue(unsigned int cpu)690 static int cpupool_cpu_remove_prologue(unsigned int cpu)
691 {
692     int ret = 0;
693     cpumask_t *cpus;
694     unsigned int master_cpu;
695 
696     spin_lock(&cpupool_lock);
697 
698     rcu_read_lock(&sched_res_rculock);
699     cpus = get_sched_res(cpu)->cpus;
700     master_cpu = sched_get_resource_cpu(cpu);
701     if ( cpumask_intersects(cpus, &cpupool_locked_cpus) )
702         ret = -EBUSY;
703     else
704         cpumask_set_cpu(cpu, &cpupool_locked_cpus);
705     rcu_read_unlock(&sched_res_rculock);
706 
707     spin_unlock(&cpupool_lock);
708 
709     if ( ret )
710         return  ret;
711 
712     if ( cpumask_test_cpu(master_cpu, cpupool0->cpu_valid) )
713     {
714         /* Cpupool0 is populated only after all cpus are up. */
715         ASSERT(system_state == SYS_STATE_active);
716 
717         ret = cpupool_unassign_cpu_start(cpupool0, master_cpu);
718     }
719     else if ( !cpumask_test_cpu(master_cpu, &cpupool_free_cpus) )
720         ret = -ENODEV;
721 
722     return ret;
723 }
724 
725 /*
726  * Called during resume for all cpus which didn't come up again. The cpu must
727  * be removed from the cpupool it is assigned to. In case a cpupool will be
728  * left without cpu we move all domains of that cpupool to cpupool0.
729  * As we are called with all domains still frozen there is no need to take the
730  * cpupool lock here.
731  */
cpupool_cpu_remove_forced(unsigned int cpu)732 static void cpupool_cpu_remove_forced(unsigned int cpu)
733 {
734     struct cpupool **c;
735     int ret;
736     unsigned int master_cpu = sched_get_resource_cpu(cpu);
737 
738     for_each_cpupool ( c )
739     {
740         if ( cpumask_test_cpu(master_cpu, (*c)->cpu_valid) )
741         {
742             ret = cpupool_unassign_cpu_start(*c, master_cpu);
743             BUG_ON(ret);
744             ret = cpupool_unassign_cpu_finish(*c);
745             BUG_ON(ret);
746         }
747     }
748 
749     cpumask_clear_cpu(cpu, &cpupool_free_cpus);
750 
751     rcu_read_lock(&sched_res_rculock);
752     sched_rm_cpu(cpu);
753     rcu_read_unlock(&sched_res_rculock);
754 }
755 
756 /*
757  * do cpupool related sysctl operations
758  */
cpupool_do_sysctl(struct xen_sysctl_cpupool_op * op)759 int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
760 {
761     int ret;
762     struct cpupool *c;
763 
764     switch ( op->op )
765     {
766 
767     case XEN_SYSCTL_CPUPOOL_OP_CREATE:
768     {
769         int poolid;
770 
771         poolid = (op->cpupool_id == XEN_SYSCTL_CPUPOOL_PAR_ANY) ?
772             CPUPOOLID_NONE: op->cpupool_id;
773         c = cpupool_create(poolid, op->sched_id, &ret);
774         if ( c != NULL )
775         {
776             op->cpupool_id = c->cpupool_id;
777             cpupool_put(c);
778         }
779     }
780     break;
781 
782     case XEN_SYSCTL_CPUPOOL_OP_DESTROY:
783     {
784         c = cpupool_get_by_id(op->cpupool_id);
785         ret = -ENOENT;
786         if ( c == NULL )
787             break;
788         ret = cpupool_destroy(c);
789         cpupool_put(c);
790     }
791     break;
792 
793     case XEN_SYSCTL_CPUPOOL_OP_INFO:
794     {
795         c = cpupool_get_next_by_id(op->cpupool_id);
796         ret = -ENOENT;
797         if ( c == NULL )
798             break;
799         op->cpupool_id = c->cpupool_id;
800         op->sched_id = c->sched->sched_id;
801         op->n_dom = c->n_dom;
802         ret = cpumask_to_xenctl_bitmap(&op->cpumap, c->cpu_valid);
803         cpupool_put(c);
804     }
805     break;
806 
807     case XEN_SYSCTL_CPUPOOL_OP_ADDCPU:
808     {
809         unsigned cpu;
810         const cpumask_t *cpus;
811 
812         cpu = op->cpu;
813         debugtrace_printk("cpupool_assign_cpu(pool=%d,cpu=%d)\n",
814                           op->cpupool_id, cpu);
815 
816         spin_lock(&cpupool_lock);
817 
818         c = cpupool_find_by_id(op->cpupool_id);
819         ret = -ENOENT;
820         if ( c == NULL )
821             goto addcpu_out;
822         if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY )
823         {
824             for_each_cpu ( cpu, &cpupool_free_cpus )
825             {
826                 cpus = sched_get_opt_cpumask(c->gran, cpu);
827                 if ( cpumask_subset(cpus, &cpupool_free_cpus) )
828                     break;
829             }
830             ret = -ENODEV;
831             if ( cpu >= nr_cpu_ids )
832                 goto addcpu_out;
833         }
834         ret = -EINVAL;
835         if ( cpu >= nr_cpu_ids )
836             goto addcpu_out;
837         ret = -ENODEV;
838         cpus = sched_get_opt_cpumask(c->gran, cpu);
839         if ( !cpumask_subset(cpus, &cpupool_free_cpus) ||
840              cpumask_intersects(cpus, &cpupool_locked_cpus) )
841             goto addcpu_out;
842         ret = cpupool_assign_cpu_locked(c, cpu);
843 
844     addcpu_out:
845         spin_unlock(&cpupool_lock);
846         debugtrace_printk("cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n",
847                           op->cpupool_id, cpu, ret);
848 
849     }
850     break;
851 
852     case XEN_SYSCTL_CPUPOOL_OP_RMCPU:
853     {
854         unsigned cpu;
855 
856         c = cpupool_get_by_id(op->cpupool_id);
857         ret = -ENOENT;
858         if ( c == NULL )
859             break;
860         cpu = op->cpu;
861         if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY )
862             cpu = cpumask_last(c->cpu_valid);
863         ret = (cpu < nr_cpu_ids) ? cpupool_unassign_cpu(c, cpu) : -EINVAL;
864         cpupool_put(c);
865     }
866     break;
867 
868     case XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN:
869     {
870         struct domain *d;
871 
872         ret = rcu_lock_remote_domain_by_id(op->domid, &d);
873         if ( ret )
874             break;
875         if ( d->cpupool == NULL )
876         {
877             ret = -EINVAL;
878             rcu_unlock_domain(d);
879             break;
880         }
881         if ( op->cpupool_id == d->cpupool->cpupool_id )
882         {
883             ret = 0;
884             rcu_unlock_domain(d);
885             break;
886         }
887         debugtrace_printk("cpupool move_domain(dom=%d)->pool=%d\n",
888                           d->domain_id, op->cpupool_id);
889         ret = -ENOENT;
890         spin_lock(&cpupool_lock);
891 
892         c = cpupool_find_by_id(op->cpupool_id);
893         if ( (c != NULL) && cpumask_weight(c->cpu_valid) )
894             ret = cpupool_move_domain_locked(d, c);
895 
896         spin_unlock(&cpupool_lock);
897         debugtrace_printk("cpupool move_domain(dom=%d)->pool=%d ret %d\n",
898                           d->domain_id, op->cpupool_id, ret);
899         rcu_unlock_domain(d);
900     }
901     break;
902 
903     case XEN_SYSCTL_CPUPOOL_OP_FREEINFO:
904     {
905         ret = cpumask_to_xenctl_bitmap(
906             &op->cpumap, &cpupool_free_cpus);
907     }
908     break;
909 
910     default:
911         ret = -ENOSYS;
912         break;
913     }
914 
915     return ret;
916 }
917 
cpupool_get_id(const struct domain * d)918 int cpupool_get_id(const struct domain *d)
919 {
920     return d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
921 }
922 
cpupool_valid_cpus(const struct cpupool * pool)923 const cpumask_t *cpupool_valid_cpus(const struct cpupool *pool)
924 {
925     return pool->cpu_valid;
926 }
927 
dump_runq(unsigned char key)928 void dump_runq(unsigned char key)
929 {
930     s_time_t         now = NOW();
931     struct cpupool **c;
932 
933     spin_lock(&cpupool_lock);
934 
935     printk("sched_smt_power_savings: %s\n",
936             sched_smt_power_savings? "enabled":"disabled");
937     printk("NOW=%"PRI_stime"\n", now);
938 
939     printk("Online Cpus: %*pbl\n", CPUMASK_PR(&cpu_online_map));
940     if ( !cpumask_empty(&cpupool_free_cpus) )
941     {
942         printk("Free Cpus: %*pbl\n", CPUMASK_PR(&cpupool_free_cpus));
943         schedule_dump(NULL);
944     }
945 
946     for_each_cpupool(c)
947     {
948         printk("Cpupool %d:\n", (*c)->cpupool_id);
949         printk("Cpus: %*pbl\n", CPUMASK_PR((*c)->cpu_valid));
950         sched_gran_print((*c)->gran, cpupool_get_granularity(*c));
951         schedule_dump(*c);
952     }
953 
954     spin_unlock(&cpupool_lock);
955 }
956 
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)957 static int cpu_callback(
958     struct notifier_block *nfb, unsigned long action, void *hcpu)
959 {
960     unsigned int cpu = (unsigned long)hcpu;
961     int rc = 0;
962 
963     switch ( action )
964     {
965     case CPU_DOWN_FAILED:
966     case CPU_ONLINE:
967         if ( system_state <= SYS_STATE_active )
968             rc = cpupool_cpu_add(cpu);
969         break;
970     case CPU_DOWN_PREPARE:
971         /* Suspend/Resume don't change assignments of cpus to cpupools. */
972         if ( system_state <= SYS_STATE_active )
973             rc = cpupool_cpu_remove_prologue(cpu);
974         break;
975     case CPU_DYING:
976         /* Suspend/Resume don't change assignments of cpus to cpupools. */
977         if ( system_state <= SYS_STATE_active )
978             cpupool_cpu_remove(cpu);
979         break;
980     case CPU_RESUME_FAILED:
981         cpupool_cpu_remove_forced(cpu);
982         break;
983     default:
984         break;
985     }
986 
987     return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
988 }
989 
990 static struct notifier_block cpu_nfb = {
991     .notifier_call = cpu_callback
992 };
993 
cpupool_init(void)994 static int __init cpupool_init(void)
995 {
996     unsigned int cpu;
997     int err;
998 
999     cpupool_gran_init();
1000 
1001     cpupool0 = cpupool_create(0, 0, &err);
1002     BUG_ON(cpupool0 == NULL);
1003     cpupool_put(cpupool0);
1004     register_cpu_notifier(&cpu_nfb);
1005 
1006     spin_lock(&cpupool_lock);
1007 
1008     cpumask_copy(&cpupool_free_cpus, &cpu_online_map);
1009 
1010     for_each_cpu ( cpu, &cpupool_free_cpus )
1011         cpupool_assign_cpu_locked(cpupool0, cpu);
1012 
1013     spin_unlock(&cpupool_lock);
1014 
1015     return 0;
1016 }
1017 __initcall(cpupool_init);
1018 
1019 /*
1020  * Local variables:
1021  * mode: C
1022  * c-file-style: "BSD"
1023  * c-basic-offset: 4
1024  * tab-width: 4
1025  * indent-tabs-mode: nil
1026  * End:
1027  */
1028