1 /******************************************************************************
2 * Additional declarations for the generic scheduler interface. This should
3 * only be included by files that implement conforming schedulers.
4 *
5 * Portions by Mark Williamson are (C) 2004 Intel Research Cambridge
6 */
7
8 #ifndef __XEN_SCHED_IF_H__
9 #define __XEN_SCHED_IF_H__
10
11 #include <xen/percpu.h>
12 #include <xen/err.h>
13 #include <xen/rcupdate.h>
14
15 /* cpus currently in no cpupool */
16 extern cpumask_t cpupool_free_cpus;
17
18 /* Scheduler generic parameters
19 * */
20 #define SCHED_DEFAULT_RATELIMIT_US 1000
21 extern int sched_ratelimit_us;
22
23 /* Scheduling resource mask. */
24 extern cpumask_t sched_res_mask;
25
26 /* Number of vcpus per struct sched_unit. */
27 enum sched_gran {
28 SCHED_GRAN_cpu,
29 SCHED_GRAN_core,
30 SCHED_GRAN_socket
31 };
32
33 /*
34 * In order to allow a scheduler to remap the lock->cpu mapping,
35 * we have a per-cpu pointer, along with a pre-allocated set of
36 * locks. The generic schedule init code will point each schedule lock
37 * pointer to the schedule lock; if the scheduler wants to remap them,
38 * it can simply modify the schedule locks.
39 *
40 * For cache betterness, keep the actual lock in the same cache area
41 * as the rest of the struct. Just have the scheduler point to the
42 * one it wants (This may be the one right in front of it).*/
43 struct sched_resource {
44 struct scheduler *scheduler;
45 struct cpupool *cpupool;
46 spinlock_t *schedule_lock,
47 _lock;
48 struct sched_unit *curr;
49 struct sched_unit *sched_unit_idle;
50 struct sched_unit *prev;
51 void *sched_priv;
52 struct timer s_timer; /* scheduling timer */
53
54 /* Cpu with lowest id in scheduling resource. */
55 unsigned int master_cpu;
56 unsigned int granularity;
57 cpumask_var_t cpus; /* cpus covered by this struct */
58 struct rcu_head rcu;
59 };
60
61 DECLARE_PER_CPU(struct sched_resource *, sched_res);
62 extern rcu_read_lock_t sched_res_rculock;
63
get_sched_res(unsigned int cpu)64 static inline struct sched_resource *get_sched_res(unsigned int cpu)
65 {
66 return rcu_dereference(per_cpu(sched_res, cpu));
67 }
68
set_sched_res(unsigned int cpu,struct sched_resource * res)69 static inline void set_sched_res(unsigned int cpu, struct sched_resource *res)
70 {
71 rcu_assign_pointer(per_cpu(sched_res, cpu), res);
72 }
73
curr_on_cpu(unsigned int cpu)74 static inline struct sched_unit *curr_on_cpu(unsigned int cpu)
75 {
76 return get_sched_res(cpu)->curr;
77 }
78
is_idle_unit(const struct sched_unit * unit)79 static inline bool is_idle_unit(const struct sched_unit *unit)
80 {
81 return is_idle_vcpu(unit->vcpu_list);
82 }
83
84 /* Returns true if at least one vcpu of the unit is online. */
is_unit_online(const struct sched_unit * unit)85 static inline bool is_unit_online(const struct sched_unit *unit)
86 {
87 const struct vcpu *v;
88
89 for_each_sched_unit_vcpu ( unit, v )
90 if ( is_vcpu_online(v) )
91 return true;
92
93 return false;
94 }
95
unit_running(const struct sched_unit * unit)96 static inline unsigned int unit_running(const struct sched_unit *unit)
97 {
98 return unit->runstate_cnt[RUNSTATE_running];
99 }
100
101 /* Returns true if at least one vcpu of the unit is runnable. */
unit_runnable(const struct sched_unit * unit)102 static inline bool unit_runnable(const struct sched_unit *unit)
103 {
104 const struct vcpu *v;
105
106 for_each_sched_unit_vcpu ( unit, v )
107 if ( vcpu_runnable(v) )
108 return true;
109
110 return false;
111 }
112
vcpu_runstate_blocked(const struct vcpu * v)113 static inline int vcpu_runstate_blocked(const struct vcpu *v)
114 {
115 return (v->pause_flags & VPF_blocked) ? RUNSTATE_blocked : RUNSTATE_offline;
116 }
117
118 /*
119 * Returns whether a sched_unit is runnable and sets new_state for each of its
120 * vcpus. It is mandatory to determine the new runstate for all vcpus of a unit
121 * without dropping the schedule lock (which happens when synchronizing the
122 * context switch of the vcpus of a unit) in order to avoid races with e.g.
123 * vcpu_sleep().
124 */
unit_runnable_state(const struct sched_unit * unit)125 static inline bool unit_runnable_state(const struct sched_unit *unit)
126 {
127 struct vcpu *v;
128 bool runnable, ret = false;
129
130 if ( is_idle_unit(unit) )
131 return true;
132
133 for_each_sched_unit_vcpu ( unit, v )
134 {
135 runnable = vcpu_runnable(v);
136
137 v->new_state = runnable ? RUNSTATE_running : vcpu_runstate_blocked(v);
138
139 if ( runnable )
140 ret = true;
141 }
142
143 return ret;
144 }
145
sched_set_res(struct sched_unit * unit,struct sched_resource * res)146 static inline void sched_set_res(struct sched_unit *unit,
147 struct sched_resource *res)
148 {
149 unsigned int cpu = cpumask_first(res->cpus);
150 struct vcpu *v;
151
152 for_each_sched_unit_vcpu ( unit, v )
153 {
154 ASSERT(cpu < nr_cpu_ids);
155 v->processor = cpu;
156 cpu = cpumask_next(cpu, res->cpus);
157 }
158
159 unit->res = res;
160 }
161
162 /* Return master cpu of the scheduling resource the unit is assigned to. */
sched_unit_master(const struct sched_unit * unit)163 static inline unsigned int sched_unit_master(const struct sched_unit *unit)
164 {
165 return unit->res->master_cpu;
166 }
167
168 /* Set a bit in pause_flags of all vcpus of a unit. */
sched_set_pause_flags(struct sched_unit * unit,unsigned int bit)169 static inline void sched_set_pause_flags(struct sched_unit *unit,
170 unsigned int bit)
171 {
172 struct vcpu *v;
173
174 for_each_sched_unit_vcpu ( unit, v )
175 set_bit(bit, &v->pause_flags);
176 }
177
178 /* Clear a bit in pause_flags of all vcpus of a unit. */
sched_clear_pause_flags(struct sched_unit * unit,unsigned int bit)179 static inline void sched_clear_pause_flags(struct sched_unit *unit,
180 unsigned int bit)
181 {
182 struct vcpu *v;
183
184 for_each_sched_unit_vcpu ( unit, v )
185 clear_bit(bit, &v->pause_flags);
186 }
187
sched_idle_unit(unsigned int cpu)188 static inline struct sched_unit *sched_idle_unit(unsigned int cpu)
189 {
190 return get_sched_res(cpu)->sched_unit_idle;
191 }
192
sched_get_resource_cpu(unsigned int cpu)193 static inline unsigned int sched_get_resource_cpu(unsigned int cpu)
194 {
195 return get_sched_res(cpu)->master_cpu;
196 }
197
198 /*
199 * Scratch space, for avoiding having too many cpumask_t on the stack.
200 * Within each scheduler, when using the scratch mask of one pCPU:
201 * - the pCPU must belong to the scheduler,
202 * - the caller must own the per-pCPU scheduler lock (a.k.a. runqueue
203 * lock).
204 */
205 DECLARE_PER_CPU(cpumask_t, cpumask_scratch);
206 #define cpumask_scratch (&this_cpu(cpumask_scratch))
207 #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c))
208
209 #define sched_lock(kind, param, cpu, irq, arg...) \
210 static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
211 { \
212 for ( ; ; ) \
213 { \
214 spinlock_t *lock = get_sched_res(cpu)->schedule_lock; \
215 /* \
216 * v->processor may change when grabbing the lock; but \
217 * per_cpu(v->processor) may also change, if changing cpu pool \
218 * also changes the scheduler lock. Retry until they match. \
219 * \
220 * It may also be the case that v->processor may change but the \
221 * lock may be the same; this will succeed in that case. \
222 */ \
223 spin_lock##irq(lock, ## arg); \
224 if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \
225 return lock; \
226 spin_unlock##irq(lock, ## arg); \
227 } \
228 }
229
230 #define sched_unlock(kind, param, cpu, irq, arg...) \
231 static inline void kind##_schedule_unlock##irq(spinlock_t *lock \
232 EXTRA_TYPE(arg), param) \
233 { \
234 ASSERT(lock == get_sched_res(cpu)->schedule_lock); \
235 spin_unlock##irq(lock, ## arg); \
236 }
237
238 #define EXTRA_TYPE(arg)
239 sched_lock(pcpu, unsigned int cpu, cpu, )
240 sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, )
sched_lock(pcpu,unsigned int cpu,cpu,_irq)241 sched_lock(pcpu, unsigned int cpu, cpu, _irq)
242 sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, _irq)
243 sched_unlock(pcpu, unsigned int cpu, cpu, )
244 sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, )
245 sched_unlock(pcpu, unsigned int cpu, cpu, _irq)
246 sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, _irq)
247 #undef EXTRA_TYPE
248
249 #define EXTRA_TYPE(arg) , unsigned long arg
250 #define spin_unlock_irqsave spin_unlock_irqrestore
251 sched_lock(pcpu, unsigned int cpu, cpu, _irqsave, *flags)
252 sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, _irqsave, *flags)
253 #undef spin_unlock_irqsave
254 sched_unlock(pcpu, unsigned int cpu, cpu, _irqrestore, flags)
255 sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, _irqrestore, flags)
256 #undef EXTRA_TYPE
257
258 #undef sched_unlock
259 #undef sched_lock
260
261 static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu)
262 {
263 spinlock_t *lock = get_sched_res(cpu)->schedule_lock;
264
265 if ( !spin_trylock(lock) )
266 return NULL;
267 if ( lock == get_sched_res(cpu)->schedule_lock )
268 return lock;
269 spin_unlock(lock);
270 return NULL;
271 }
272
273 struct scheduler {
274 char *name; /* full name for this scheduler */
275 char *opt_name; /* option name for this scheduler */
276 unsigned int sched_id; /* ID for this scheduler */
277 void *sched_data; /* global data pointer */
278 struct cpupool *cpupool;/* points to this scheduler's pool */
279
280 int (*global_init) (void);
281
282 int (*init) (struct scheduler *);
283 void (*deinit) (struct scheduler *);
284
285 void (*free_udata) (const struct scheduler *, void *);
286 void * (*alloc_udata) (const struct scheduler *,
287 struct sched_unit *, void *);
288 void (*free_pdata) (const struct scheduler *, void *, int);
289 void * (*alloc_pdata) (const struct scheduler *, int);
290 void (*deinit_pdata) (const struct scheduler *, void *, int);
291
292 /* Returns ERR_PTR(-err) for error, NULL for 'nothing needed'. */
293 void * (*alloc_domdata) (const struct scheduler *, struct domain *);
294 /* Idempotent. */
295 void (*free_domdata) (const struct scheduler *, void *);
296
297 spinlock_t * (*switch_sched) (struct scheduler *, unsigned int,
298 void *, void *);
299
300 /* Activate / deactivate units in a cpu pool */
301 void (*insert_unit) (const struct scheduler *,
302 struct sched_unit *);
303 void (*remove_unit) (const struct scheduler *,
304 struct sched_unit *);
305
306 void (*sleep) (const struct scheduler *,
307 struct sched_unit *);
308 void (*wake) (const struct scheduler *,
309 struct sched_unit *);
310 void (*yield) (const struct scheduler *,
311 struct sched_unit *);
312 void (*context_saved) (const struct scheduler *,
313 struct sched_unit *);
314
315 void (*do_schedule) (const struct scheduler *,
316 struct sched_unit *, s_time_t,
317 bool tasklet_work_scheduled);
318
319 struct sched_resource *(*pick_resource)(const struct scheduler *,
320 const struct sched_unit *);
321 void (*migrate) (const struct scheduler *,
322 struct sched_unit *, unsigned int);
323 int (*adjust) (const struct scheduler *, struct domain *,
324 struct xen_domctl_scheduler_op *);
325 void (*adjust_affinity)(const struct scheduler *,
326 struct sched_unit *,
327 const struct cpumask *,
328 const struct cpumask *);
329 int (*adjust_global) (const struct scheduler *,
330 struct xen_sysctl_scheduler_op *);
331 void (*dump_settings) (const struct scheduler *);
332 void (*dump_cpu_state) (const struct scheduler *, int);
333 };
334
sched_init(struct scheduler * s)335 static inline int sched_init(struct scheduler *s)
336 {
337 return s->init(s);
338 }
339
sched_deinit(struct scheduler * s)340 static inline void sched_deinit(struct scheduler *s)
341 {
342 s->deinit(s);
343 }
344
sched_switch_sched(struct scheduler * s,unsigned int cpu,void * pdata,void * vdata)345 static inline spinlock_t *sched_switch_sched(struct scheduler *s,
346 unsigned int cpu,
347 void *pdata, void *vdata)
348 {
349 return s->switch_sched(s, cpu, pdata, vdata);
350 }
351
sched_dump_settings(const struct scheduler * s)352 static inline void sched_dump_settings(const struct scheduler *s)
353 {
354 if ( s->dump_settings )
355 s->dump_settings(s);
356 }
357
sched_dump_cpu_state(const struct scheduler * s,int cpu)358 static inline void sched_dump_cpu_state(const struct scheduler *s, int cpu)
359 {
360 if ( s->dump_cpu_state )
361 s->dump_cpu_state(s, cpu);
362 }
363
sched_alloc_domdata(const struct scheduler * s,struct domain * d)364 static inline void *sched_alloc_domdata(const struct scheduler *s,
365 struct domain *d)
366 {
367 return s->alloc_domdata ? s->alloc_domdata(s, d) : NULL;
368 }
369
sched_free_domdata(const struct scheduler * s,void * data)370 static inline void sched_free_domdata(const struct scheduler *s,
371 void *data)
372 {
373 ASSERT(s->free_domdata || !data);
374 if ( s->free_domdata )
375 s->free_domdata(s, data);
376 }
377
sched_alloc_pdata(const struct scheduler * s,int cpu)378 static inline void *sched_alloc_pdata(const struct scheduler *s, int cpu)
379 {
380 return s->alloc_pdata ? s->alloc_pdata(s, cpu) : NULL;
381 }
382
sched_free_pdata(const struct scheduler * s,void * data,int cpu)383 static inline void sched_free_pdata(const struct scheduler *s, void *data,
384 int cpu)
385 {
386 ASSERT(s->free_pdata || !data);
387 if ( s->free_pdata )
388 s->free_pdata(s, data, cpu);
389 }
390
sched_deinit_pdata(const struct scheduler * s,void * data,int cpu)391 static inline void sched_deinit_pdata(const struct scheduler *s, void *data,
392 int cpu)
393 {
394 if ( s->deinit_pdata )
395 s->deinit_pdata(s, data, cpu);
396 }
397
sched_alloc_udata(const struct scheduler * s,struct sched_unit * unit,void * dom_data)398 static inline void *sched_alloc_udata(const struct scheduler *s,
399 struct sched_unit *unit, void *dom_data)
400 {
401 return s->alloc_udata(s, unit, dom_data);
402 }
403
sched_free_udata(const struct scheduler * s,void * data)404 static inline void sched_free_udata(const struct scheduler *s, void *data)
405 {
406 s->free_udata(s, data);
407 }
408
sched_insert_unit(const struct scheduler * s,struct sched_unit * unit)409 static inline void sched_insert_unit(const struct scheduler *s,
410 struct sched_unit *unit)
411 {
412 if ( s->insert_unit )
413 s->insert_unit(s, unit);
414 }
415
sched_remove_unit(const struct scheduler * s,struct sched_unit * unit)416 static inline void sched_remove_unit(const struct scheduler *s,
417 struct sched_unit *unit)
418 {
419 if ( s->remove_unit )
420 s->remove_unit(s, unit);
421 }
422
sched_sleep(const struct scheduler * s,struct sched_unit * unit)423 static inline void sched_sleep(const struct scheduler *s,
424 struct sched_unit *unit)
425 {
426 if ( s->sleep )
427 s->sleep(s, unit);
428 }
429
sched_wake(const struct scheduler * s,struct sched_unit * unit)430 static inline void sched_wake(const struct scheduler *s,
431 struct sched_unit *unit)
432 {
433 if ( s->wake )
434 s->wake(s, unit);
435 }
436
sched_yield(const struct scheduler * s,struct sched_unit * unit)437 static inline void sched_yield(const struct scheduler *s,
438 struct sched_unit *unit)
439 {
440 if ( s->yield )
441 s->yield(s, unit);
442 }
443
sched_context_saved(const struct scheduler * s,struct sched_unit * unit)444 static inline void sched_context_saved(const struct scheduler *s,
445 struct sched_unit *unit)
446 {
447 if ( s->context_saved )
448 s->context_saved(s, unit);
449 }
450
sched_migrate(const struct scheduler * s,struct sched_unit * unit,unsigned int cpu)451 static inline void sched_migrate(const struct scheduler *s,
452 struct sched_unit *unit, unsigned int cpu)
453 {
454 if ( s->migrate )
455 s->migrate(s, unit, cpu);
456 else
457 sched_set_res(unit, get_sched_res(cpu));
458 }
459
sched_pick_resource(const struct scheduler * s,const struct sched_unit * unit)460 static inline struct sched_resource *sched_pick_resource(
461 const struct scheduler *s, const struct sched_unit *unit)
462 {
463 return s->pick_resource(s, unit);
464 }
465
sched_adjust_affinity(const struct scheduler * s,struct sched_unit * unit,const cpumask_t * hard,const cpumask_t * soft)466 static inline void sched_adjust_affinity(const struct scheduler *s,
467 struct sched_unit *unit,
468 const cpumask_t *hard,
469 const cpumask_t *soft)
470 {
471 if ( s->adjust_affinity )
472 s->adjust_affinity(s, unit, hard, soft);
473 }
474
sched_adjust_dom(const struct scheduler * s,struct domain * d,struct xen_domctl_scheduler_op * op)475 static inline int sched_adjust_dom(const struct scheduler *s, struct domain *d,
476 struct xen_domctl_scheduler_op *op)
477 {
478 return s->adjust ? s->adjust(s, d, op) : 0;
479 }
480
sched_adjust_cpupool(const struct scheduler * s,struct xen_sysctl_scheduler_op * op)481 static inline int sched_adjust_cpupool(const struct scheduler *s,
482 struct xen_sysctl_scheduler_op *op)
483 {
484 return s->adjust_global ? s->adjust_global(s, op) : 0;
485 }
486
sched_unit_pause_nosync(const struct sched_unit * unit)487 static inline void sched_unit_pause_nosync(const struct sched_unit *unit)
488 {
489 struct vcpu *v;
490
491 for_each_sched_unit_vcpu ( unit, v )
492 vcpu_pause_nosync(v);
493 }
494
sched_unit_unpause(const struct sched_unit * unit)495 static inline void sched_unit_unpause(const struct sched_unit *unit)
496 {
497 struct vcpu *v;
498
499 for_each_sched_unit_vcpu ( unit, v )
500 vcpu_unpause(v);
501 }
502
503 #define REGISTER_SCHEDULER(x) static const struct scheduler *x##_entry \
504 __used_section(".data.schedulers") = &x;
505
506 struct cpupool
507 {
508 int cpupool_id;
509 #define CPUPOOLID_NONE (-1)
510 unsigned int n_dom;
511 cpumask_var_t cpu_valid; /* all cpus assigned to pool */
512 cpumask_var_t res_valid; /* all scheduling resources of pool */
513 struct cpupool *next;
514 struct scheduler *sched;
515 atomic_t refcnt;
516 enum sched_gran gran;
517 };
518
cpupool_domain_master_cpumask(const struct domain * d)519 static inline cpumask_t *cpupool_domain_master_cpumask(const struct domain *d)
520 {
521 /*
522 * d->cpupool is NULL only for the idle domain, and no one should
523 * be interested in calling this for the idle domain.
524 */
525 ASSERT(d->cpupool != NULL);
526 return d->cpupool->res_valid;
527 }
528
529 unsigned int cpupool_get_granularity(const struct cpupool *c);
530
531 /*
532 * Hard and soft affinity load balancing.
533 *
534 * Idea is each vcpu has some pcpus that it prefers, some that it does not
535 * prefer but is OK with, and some that it cannot run on at all. The first
536 * set of pcpus are the ones that are both in the soft affinity *and* in the
537 * hard affinity; the second set of pcpus are the ones that are in the hard
538 * affinity but *not* in the soft affinity; the third set of pcpus are the
539 * ones that are not in the hard affinity.
540 *
541 * We implement a two step balancing logic. Basically, every time there is
542 * the need to decide where to run a vcpu, we first check the soft affinity
543 * (well, actually, the && between soft and hard affinity), to see if we can
544 * send it where it prefers to (and can) run on. However, if the first step
545 * does not find any suitable and free pcpu, we fall back checking the hard
546 * affinity.
547 */
548 #define BALANCE_SOFT_AFFINITY 0
549 #define BALANCE_HARD_AFFINITY 1
550
551 #define for_each_affinity_balance_step(step) \
552 for ( (step) = 0; (step) <= BALANCE_HARD_AFFINITY; (step)++ )
553
554 /*
555 * Hard affinity balancing is always necessary and must never be skipped.
556 * But soft affinity need only be considered when it has a functionally
557 * different effect than other constraints (such as hard affinity, cpus
558 * online, or cpupools).
559 *
560 * Soft affinity only needs to be considered if:
561 * * The cpus in the cpupool are not a subset of soft affinity
562 * * The hard affinity is not a subset of soft affinity
563 * * There is an overlap between the soft and hard affinity masks
564 */
has_soft_affinity(const struct sched_unit * unit)565 static inline bool has_soft_affinity(const struct sched_unit *unit)
566 {
567 return unit->soft_aff_effective &&
568 !cpumask_subset(cpupool_domain_master_cpumask(unit->domain),
569 unit->cpu_soft_affinity);
570 }
571
572 /*
573 * This function copies in mask the cpumask that should be used for a
574 * particular affinity balancing step. For the soft affinity one, the pcpus
575 * that are not part of vc's hard affinity are filtered out from the result,
576 * to avoid running a vcpu where it would like, but is not allowed to!
577 */
578 static inline void
affinity_balance_cpumask(const struct sched_unit * unit,int step,cpumask_t * mask)579 affinity_balance_cpumask(const struct sched_unit *unit, int step,
580 cpumask_t *mask)
581 {
582 if ( step == BALANCE_SOFT_AFFINITY )
583 {
584 cpumask_and(mask, unit->cpu_soft_affinity, unit->cpu_hard_affinity);
585
586 if ( unlikely(cpumask_empty(mask)) )
587 cpumask_copy(mask, unit->cpu_hard_affinity);
588 }
589 else /* step == BALANCE_HARD_AFFINITY */
590 cpumask_copy(mask, unit->cpu_hard_affinity);
591 }
592
593 void sched_rm_cpu(unsigned int cpu);
594 const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
595 void schedule_dump(struct cpupool *c);
596 struct scheduler *scheduler_get_default(void);
597 struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr);
598 void scheduler_free(struct scheduler *sched);
599 int cpu_disable_scheduler(unsigned int cpu);
600 int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
601 int schedule_cpu_rm(unsigned int cpu);
602 int sched_move_domain(struct domain *d, struct cpupool *c);
603 struct cpupool *cpupool_get_by_id(int poolid);
604 void cpupool_put(struct cpupool *pool);
605 int cpupool_add_domain(struct domain *d, int poolid);
606 void cpupool_rm_domain(struct domain *d);
607
608 #endif /* __XEN_SCHED_IF_H__ */
609