1 /******************************************************************************
2 * domain.c
3 *
4 * Generic domain-handling functions.
5 */
6
7 #include <xen/compat.h>
8 #include <xen/init.h>
9 #include <xen/lib.h>
10 #include <xen/ctype.h>
11 #include <xen/err.h>
12 #include <xen/param.h>
13 #include <xen/sched.h>
14 #include <xen/domain.h>
15 #include <xen/mm.h>
16 #include <xen/event.h>
17 #include <xen/vm_event.h>
18 #include <xen/time.h>
19 #include <xen/console.h>
20 #include <xen/softirq.h>
21 #include <xen/tasklet.h>
22 #include <xen/domain_page.h>
23 #include <xen/rangeset.h>
24 #include <xen/guest_access.h>
25 #include <xen/hypercall.h>
26 #include <xen/delay.h>
27 #include <xen/shutdown.h>
28 #include <xen/percpu.h>
29 #include <xen/multicall.h>
30 #include <xen/rcupdate.h>
31 #include <xen/wait.h>
32 #include <xen/grant_table.h>
33 #include <xen/xenoprof.h>
34 #include <xen/irq.h>
35 #include <xen/argo.h>
36 #include <asm/debugger.h>
37 #include <asm/p2m.h>
38 #include <asm/processor.h>
39 #include <public/sched.h>
40 #include <public/sysctl.h>
41 #include <public/vcpu.h>
42 #include <xsm/xsm.h>
43 #include <xen/trace.h>
44 #include <asm/setup.h>
45
46 #ifdef CONFIG_X86
47 #include <asm/guest.h>
48 #endif
49
50 /* Linux config option: propageted to domain0 */
51 /* xen_processor_pmbits: xen control Cx, Px, ... */
52 unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
53
54 /* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
55 bool_t opt_dom0_vcpus_pin;
56 boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
57
58 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
59 DEFINE_SPINLOCK(domlist_update_lock);
60 DEFINE_RCU_READ_LOCK(domlist_read_lock);
61
62 #define DOMAIN_HASH_SIZE 256
63 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
64 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
65 struct domain *domain_list;
66
67 struct domain *hardware_domain __read_mostly;
68
69 #ifdef CONFIG_LATE_HWDOM
70 domid_t hardware_domid __read_mostly;
71 integer_param("hardware_dom", hardware_domid);
72 #endif
73
74 /* Private domain structs for DOMID_XEN, DOMID_IO, etc. */
75 struct domain *__read_mostly dom_xen;
76 struct domain *__read_mostly dom_io;
77 #ifdef CONFIG_MEM_SHARING
78 struct domain *__read_mostly dom_cow;
79 #endif
80
81 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
82
83 vcpu_info_t dummy_vcpu_info;
84
__domain_finalise_shutdown(struct domain * d)85 static void __domain_finalise_shutdown(struct domain *d)
86 {
87 struct vcpu *v;
88
89 BUG_ON(!spin_is_locked(&d->shutdown_lock));
90
91 if ( d->is_shut_down )
92 return;
93
94 for_each_vcpu ( d, v )
95 if ( !v->paused_for_shutdown )
96 return;
97
98 d->is_shut_down = 1;
99 if ( (d->shutdown_code == SHUTDOWN_suspend) && d->suspend_evtchn )
100 evtchn_send(d, d->suspend_evtchn);
101 else
102 send_global_virq(VIRQ_DOM_EXC);
103 }
104
vcpu_check_shutdown(struct vcpu * v)105 static void vcpu_check_shutdown(struct vcpu *v)
106 {
107 struct domain *d = v->domain;
108
109 spin_lock(&d->shutdown_lock);
110
111 if ( d->is_shutting_down )
112 {
113 if ( !v->paused_for_shutdown )
114 vcpu_pause_nosync(v);
115 v->paused_for_shutdown = 1;
116 v->defer_shutdown = 0;
117 __domain_finalise_shutdown(d);
118 }
119
120 spin_unlock(&d->shutdown_lock);
121 }
122
vcpu_info_reset(struct vcpu * v)123 static void vcpu_info_reset(struct vcpu *v)
124 {
125 struct domain *d = v->domain;
126
127 v->vcpu_info = ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS)
128 ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id])
129 : &dummy_vcpu_info);
130 v->vcpu_info_mfn = INVALID_MFN;
131 }
132
vcpu_destroy(struct vcpu * v)133 static void vcpu_destroy(struct vcpu *v)
134 {
135 free_vcpu_struct(v);
136 }
137
vcpu_create(struct domain * d,unsigned int vcpu_id)138 struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
139 {
140 struct vcpu *v;
141
142 /*
143 * Sanity check some input expectations:
144 * - vcpu_id should be bounded by d->max_vcpus, and not previously
145 * allocated.
146 * - VCPUs should be tightly packed and allocated in ascending order,
147 * except for the idle domain which may vary based on PCPU numbering.
148 */
149 if ( vcpu_id >= d->max_vcpus || d->vcpu[vcpu_id] ||
150 (!is_idle_domain(d) && vcpu_id && !d->vcpu[vcpu_id - 1]) )
151 {
152 ASSERT_UNREACHABLE();
153 return NULL;
154 }
155
156 if ( (v = alloc_vcpu_struct(d)) == NULL )
157 return NULL;
158
159 v->domain = d;
160 v->vcpu_id = vcpu_id;
161 v->dirty_cpu = VCPU_CPU_CLEAN;
162
163 spin_lock_init(&v->virq_lock);
164
165 tasklet_init(&v->continue_hypercall_tasklet, NULL, NULL);
166
167 grant_table_init_vcpu(v);
168
169 if ( is_idle_domain(d) )
170 {
171 v->runstate.state = RUNSTATE_running;
172 v->new_state = RUNSTATE_running;
173 }
174 else
175 {
176 v->runstate.state = RUNSTATE_offline;
177 v->runstate.state_entry_time = NOW();
178 set_bit(_VPF_down, &v->pause_flags);
179 vcpu_info_reset(v);
180 init_waitqueue_vcpu(v);
181 }
182
183 if ( sched_init_vcpu(v) != 0 )
184 goto fail_wq;
185
186 if ( arch_vcpu_create(v) != 0 )
187 goto fail_sched;
188
189 d->vcpu[vcpu_id] = v;
190 if ( vcpu_id != 0 )
191 {
192 int prev_id = v->vcpu_id - 1;
193 while ( (prev_id >= 0) && (d->vcpu[prev_id] == NULL) )
194 prev_id--;
195 BUG_ON(prev_id < 0);
196 v->next_in_list = d->vcpu[prev_id]->next_in_list;
197 d->vcpu[prev_id]->next_in_list = v;
198 }
199
200 /* Must be called after making new vcpu visible to for_each_vcpu(). */
201 vcpu_check_shutdown(v);
202
203 return v;
204
205 fail_sched:
206 sched_destroy_vcpu(v);
207 fail_wq:
208 destroy_waitqueue_vcpu(v);
209 vcpu_destroy(v);
210
211 return NULL;
212 }
213
late_hwdom_init(struct domain * d)214 static int late_hwdom_init(struct domain *d)
215 {
216 #ifdef CONFIG_LATE_HWDOM
217 struct domain *dom0;
218 int rv;
219
220 if ( d != hardware_domain || d->domain_id == 0 )
221 return 0;
222
223 rv = xsm_init_hardware_domain(XSM_HOOK, d);
224 if ( rv )
225 return rv;
226
227 printk("Initialising hardware domain %d\n", hardware_domid);
228
229 dom0 = rcu_lock_domain_by_id(0);
230 ASSERT(dom0 != NULL);
231 /*
232 * Hardware resource ranges for domain 0 have been set up from
233 * various sources intended to restrict the hardware domain's
234 * access. Apply these ranges to the actual hardware domain.
235 *
236 * Because the lists are being swapped, a side effect of this
237 * operation is that Domain 0's rangesets are cleared. Since
238 * domain 0 should not be accessing the hardware when it constructs
239 * a hardware domain, this should not be a problem. Both lists
240 * may be modified after this hypercall returns if a more complex
241 * device model is desired.
242 */
243 rangeset_swap(d->irq_caps, dom0->irq_caps);
244 rangeset_swap(d->iomem_caps, dom0->iomem_caps);
245 #ifdef CONFIG_X86
246 rangeset_swap(d->arch.ioport_caps, dom0->arch.ioport_caps);
247 setup_io_bitmap(d);
248 setup_io_bitmap(dom0);
249 #endif
250
251 rcu_unlock_domain(dom0);
252
253 iommu_hwdom_init(d);
254
255 return rv;
256 #else
257 return 0;
258 #endif
259 }
260
261 static unsigned int __read_mostly extra_hwdom_irqs;
262 static unsigned int __read_mostly extra_domU_irqs = 32;
263
parse_extra_guest_irqs(const char * s)264 static int __init parse_extra_guest_irqs(const char *s)
265 {
266 if ( isdigit(*s) )
267 extra_domU_irqs = simple_strtoul(s, &s, 0);
268 if ( *s == ',' && isdigit(*++s) )
269 extra_hwdom_irqs = simple_strtoul(s, &s, 0);
270
271 return *s ? -EINVAL : 0;
272 }
273 custom_param("extra_guest_irqs", parse_extra_guest_irqs);
274
275 /*
276 * Destroy a domain once all references to it have been dropped. Used either
277 * from the RCU path, or from the domain_create() error path before the domain
278 * is inserted into the domlist.
279 */
_domain_destroy(struct domain * d)280 static void _domain_destroy(struct domain *d)
281 {
282 BUG_ON(!d->is_dying);
283 BUG_ON(atomic_read(&d->refcnt) != DOMAIN_DESTROYED);
284
285 xfree(d->pbuf);
286
287 argo_destroy(d);
288
289 rangeset_domain_destroy(d);
290
291 free_cpumask_var(d->dirty_cpumask);
292
293 xsm_free_security_domain(d);
294
295 lock_profile_deregister_struct(LOCKPROF_TYPE_PERDOM, d);
296
297 free_domain_struct(d);
298 }
299
sanitise_domain_config(struct xen_domctl_createdomain * config)300 static int sanitise_domain_config(struct xen_domctl_createdomain *config)
301 {
302 if ( config->flags & ~(XEN_DOMCTL_CDF_hvm |
303 XEN_DOMCTL_CDF_hap |
304 XEN_DOMCTL_CDF_s3_integrity |
305 XEN_DOMCTL_CDF_oos_off |
306 XEN_DOMCTL_CDF_xs_domain |
307 XEN_DOMCTL_CDF_iommu) )
308 {
309 dprintk(XENLOG_INFO, "Unknown CDF flags %#x\n", config->flags);
310 return -EINVAL;
311 }
312
313 if ( config->iommu_opts & ~XEN_DOMCTL_IOMMU_no_sharept )
314 {
315 dprintk(XENLOG_INFO, "Unknown IOMMU options %#x\n", config->iommu_opts);
316 return -EINVAL;
317 }
318
319 if ( !(config->flags & XEN_DOMCTL_CDF_iommu) && config->iommu_opts )
320 {
321 dprintk(XENLOG_INFO,
322 "IOMMU options specified but IOMMU not enabled\n");
323 return -EINVAL;
324 }
325
326 if ( config->max_vcpus < 1 )
327 {
328 dprintk(XENLOG_INFO, "No vCPUS\n");
329 return -EINVAL;
330 }
331
332 if ( !(config->flags & XEN_DOMCTL_CDF_hvm) &&
333 (config->flags & XEN_DOMCTL_CDF_hap) )
334 {
335 dprintk(XENLOG_INFO, "HAP requested for non-HVM guest\n");
336 return -EINVAL;
337 }
338
339 if ( (config->flags & XEN_DOMCTL_CDF_iommu) && !iommu_enabled )
340 {
341 dprintk(XENLOG_INFO, "IOMMU is not enabled\n");
342 return -EINVAL;
343 }
344
345 return arch_sanitise_domain_config(config);
346 }
347
domain_create(domid_t domid,struct xen_domctl_createdomain * config,bool is_priv)348 struct domain *domain_create(domid_t domid,
349 struct xen_domctl_createdomain *config,
350 bool is_priv)
351 {
352 struct domain *d, **pd, *old_hwdom = NULL;
353 enum { INIT_watchdog = 1u<<1,
354 INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
355 int err, init_status = 0;
356
357 if ( config && (err = sanitise_domain_config(config)) )
358 return ERR_PTR(err);
359
360 if ( (d = alloc_domain_struct()) == NULL )
361 return ERR_PTR(-ENOMEM);
362
363 d->options = config ? config->flags : 0;
364
365 /* Sort out our idea of is_system_domain(). */
366 d->domain_id = domid;
367
368 /* Debug sanity. */
369 ASSERT(is_system_domain(d) ? config == NULL : config != NULL);
370
371 /* Sort out our idea of is_control_domain(). */
372 d->is_privileged = is_priv;
373
374 /* Sort out our idea of is_hardware_domain(). */
375 if ( domid == 0 || domid == hardware_domid )
376 {
377 if ( hardware_domid < 0 || hardware_domid >= DOMID_FIRST_RESERVED )
378 panic("The value of hardware_dom must be a valid domain ID\n");
379
380 d->disable_migrate = true;
381 old_hwdom = hardware_domain;
382 hardware_domain = d;
383 }
384
385 TRACE_1D(TRC_DOM0_DOM_ADD, d->domain_id);
386
387 /*
388 * Allocate d->vcpu[] and set ->max_vcpus up early. Various per-domain
389 * resources want to be sized based on max_vcpus.
390 */
391 if ( !is_system_domain(d) )
392 {
393 err = -ENOMEM;
394 d->vcpu = xzalloc_array(struct vcpu *, config->max_vcpus);
395 if ( !d->vcpu )
396 goto fail;
397
398 d->max_vcpus = config->max_vcpus;
399 }
400
401 lock_profile_register_struct(LOCKPROF_TYPE_PERDOM, d, domid, "Domain");
402
403 if ( (err = xsm_alloc_security_domain(d)) != 0 )
404 goto fail;
405
406 atomic_set(&d->refcnt, 1);
407 RCU_READ_LOCK_INIT(&d->rcu_lock);
408 spin_lock_init_prof(d, domain_lock);
409 spin_lock_init_prof(d, page_alloc_lock);
410 spin_lock_init(&d->hypercall_deadlock_mutex);
411 INIT_PAGE_LIST_HEAD(&d->page_list);
412 INIT_PAGE_LIST_HEAD(&d->extra_page_list);
413 INIT_PAGE_LIST_HEAD(&d->xenpage_list);
414
415 spin_lock_init(&d->node_affinity_lock);
416 d->node_affinity = NODE_MASK_ALL;
417 d->auto_node_affinity = 1;
418
419 spin_lock_init(&d->shutdown_lock);
420 d->shutdown_code = SHUTDOWN_CODE_INVALID;
421
422 spin_lock_init(&d->pbuf_lock);
423
424 rwlock_init(&d->vnuma_rwlock);
425
426 #ifdef CONFIG_HAS_PCI
427 INIT_LIST_HEAD(&d->pdev_list);
428 #endif
429
430 err = -ENOMEM;
431 if ( !zalloc_cpumask_var(&d->dirty_cpumask) )
432 goto fail;
433
434 rangeset_domain_initialise(d);
435
436 /* DOMID_{XEN,IO,etc} (other than IDLE) are sufficiently constructed. */
437 if ( is_system_domain(d) && !is_idle_domain(d) )
438 return d;
439
440 if ( !is_idle_domain(d) )
441 {
442 if ( !is_hardware_domain(d) )
443 d->nr_pirqs = nr_static_irqs + extra_domU_irqs;
444 else
445 d->nr_pirqs = extra_hwdom_irqs ? nr_static_irqs + extra_hwdom_irqs
446 : arch_hwdom_irqs(domid);
447 d->nr_pirqs = min(d->nr_pirqs, nr_irqs);
448
449 radix_tree_init(&d->pirq_tree);
450 }
451
452 if ( (err = arch_domain_create(d, config)) != 0 )
453 goto fail;
454 init_status |= INIT_arch;
455
456 if ( !is_idle_domain(d) )
457 {
458 watchdog_domain_init(d);
459 init_status |= INIT_watchdog;
460
461 if ( is_xenstore_domain(d) )
462 d->disable_migrate = true;
463
464 d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
465 d->irq_caps = rangeset_new(d, "Interrupts", 0);
466 if ( !d->iomem_caps || !d->irq_caps )
467 goto fail;
468
469 if ( (err = xsm_domain_create(XSM_HOOK, d, config->ssidref)) != 0 )
470 goto fail;
471
472 d->controller_pause_count = 1;
473 atomic_inc(&d->pause_count);
474
475 if ( (err = evtchn_init(d, config->max_evtchn_port)) != 0 )
476 goto fail;
477 init_status |= INIT_evtchn;
478
479 if ( (err = grant_table_init(d, config->max_grant_frames,
480 config->max_maptrack_frames)) != 0 )
481 goto fail;
482 init_status |= INIT_gnttab;
483
484 if ( (err = argo_init(d)) != 0 )
485 goto fail;
486
487 err = -ENOMEM;
488
489 d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
490 if ( !d->pbuf )
491 goto fail;
492
493 if ( (err = sched_init_domain(d, 0)) != 0 )
494 goto fail;
495
496 if ( (err = late_hwdom_init(d)) != 0 )
497 goto fail;
498
499 /*
500 * Must not fail beyond this point, as our caller doesn't know whether
501 * the domain has been entered into domain_list or not.
502 */
503
504 spin_lock(&domlist_update_lock);
505 pd = &domain_list; /* NB. domain_list maintained in order of domid. */
506 for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
507 if ( (*pd)->domain_id > d->domain_id )
508 break;
509 d->next_in_list = *pd;
510 d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
511 rcu_assign_pointer(*pd, d);
512 rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
513 spin_unlock(&domlist_update_lock);
514
515 memcpy(d->handle, config->handle, sizeof(d->handle));
516 }
517
518 return d;
519
520 fail:
521 ASSERT(err < 0); /* Sanity check paths leading here. */
522 err = err ?: -EILSEQ; /* Release build safety. */
523
524 d->is_dying = DOMDYING_dead;
525 if ( hardware_domain == d )
526 hardware_domain = old_hwdom;
527 atomic_set(&d->refcnt, DOMAIN_DESTROYED);
528
529 sched_destroy_domain(d);
530
531 if ( d->max_vcpus )
532 {
533 d->max_vcpus = 0;
534 XFREE(d->vcpu);
535 }
536 if ( init_status & INIT_arch )
537 arch_domain_destroy(d);
538 if ( init_status & INIT_gnttab )
539 grant_table_destroy(d);
540 if ( init_status & INIT_evtchn )
541 {
542 evtchn_destroy(d);
543 evtchn_destroy_final(d);
544 radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
545 }
546 if ( init_status & INIT_watchdog )
547 watchdog_domain_destroy(d);
548
549 _domain_destroy(d);
550
551 return ERR_PTR(err);
552 }
553
setup_system_domains(void)554 void __init setup_system_domains(void)
555 {
556 /*
557 * Initialise our DOMID_XEN domain.
558 * Any Xen-heap pages that we will allow to be mapped will have
559 * their domain field set to dom_xen.
560 * Hidden PCI devices will also be associated with this domain
561 * (but be [partly] controlled by Dom0 nevertheless).
562 */
563 dom_xen = domain_create(DOMID_XEN, NULL, false);
564 if ( IS_ERR(dom_xen) )
565 panic("Failed to create d[XEN]: %ld\n", PTR_ERR(dom_xen));
566
567 /*
568 * Initialise our DOMID_IO domain.
569 * This domain owns I/O pages that are within the range of the page_info
570 * array. Mappings occur at the priv of the caller.
571 * Quarantined PCI devices will be associated with this domain.
572 */
573 dom_io = domain_create(DOMID_IO, NULL, false);
574 if ( IS_ERR(dom_io) )
575 panic("Failed to create d[IO]: %ld\n", PTR_ERR(dom_io));
576
577 #ifdef CONFIG_MEM_SHARING
578 /*
579 * Initialise our COW domain.
580 * This domain owns sharable pages.
581 */
582 dom_cow = domain_create(DOMID_COW, NULL, false);
583 if ( IS_ERR(dom_cow) )
584 panic("Failed to create d[COW]: %ld\n", PTR_ERR(dom_cow));
585 #endif
586 }
587
domain_set_node_affinity(struct domain * d,const nodemask_t * affinity)588 int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
589 {
590 /* Being disjoint with the system is just wrong. */
591 if ( !nodes_intersects(*affinity, node_online_map) )
592 return -EINVAL;
593
594 spin_lock(&d->node_affinity_lock);
595
596 /*
597 * Being/becoming explicitly affine to all nodes is not particularly
598 * useful. Let's take it as the `reset node affinity` command.
599 */
600 if ( nodes_full(*affinity) )
601 {
602 d->auto_node_affinity = 1;
603 goto out;
604 }
605
606 d->auto_node_affinity = 0;
607 d->node_affinity = *affinity;
608
609 out:
610 spin_unlock(&d->node_affinity_lock);
611
612 domain_update_node_affinity(d);
613
614 return 0;
615 }
616
617
get_domain_by_id(domid_t dom)618 struct domain *get_domain_by_id(domid_t dom)
619 {
620 struct domain *d;
621
622 rcu_read_lock(&domlist_read_lock);
623
624 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
625 d != NULL;
626 d = rcu_dereference(d->next_in_hashbucket) )
627 {
628 if ( d->domain_id == dom )
629 {
630 if ( unlikely(!get_domain(d)) )
631 d = NULL;
632 break;
633 }
634 }
635
636 rcu_read_unlock(&domlist_read_lock);
637
638 return d;
639 }
640
641
rcu_lock_domain_by_id(domid_t dom)642 struct domain *rcu_lock_domain_by_id(domid_t dom)
643 {
644 struct domain *d = NULL;
645
646 rcu_read_lock(&domlist_read_lock);
647
648 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
649 d != NULL;
650 d = rcu_dereference(d->next_in_hashbucket) )
651 {
652 if ( d->domain_id == dom )
653 {
654 rcu_lock_domain(d);
655 break;
656 }
657 }
658
659 rcu_read_unlock(&domlist_read_lock);
660
661 return d;
662 }
663
rcu_lock_domain_by_any_id(domid_t dom)664 struct domain *rcu_lock_domain_by_any_id(domid_t dom)
665 {
666 if ( dom == DOMID_SELF )
667 return rcu_lock_current_domain();
668 return rcu_lock_domain_by_id(dom);
669 }
670
rcu_lock_remote_domain_by_id(domid_t dom,struct domain ** d)671 int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d)
672 {
673 if ( (*d = rcu_lock_domain_by_id(dom)) == NULL )
674 return -ESRCH;
675
676 if ( *d == current->domain )
677 {
678 rcu_unlock_domain(*d);
679 return -EPERM;
680 }
681
682 return 0;
683 }
684
rcu_lock_live_remote_domain_by_id(domid_t dom,struct domain ** d)685 int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
686 {
687 int rv;
688 rv = rcu_lock_remote_domain_by_id(dom, d);
689 if ( rv )
690 return rv;
691 if ( (*d)->is_dying )
692 {
693 rcu_unlock_domain(*d);
694 return -EINVAL;
695 }
696
697 return 0;
698 }
699
domain_kill(struct domain * d)700 int domain_kill(struct domain *d)
701 {
702 int rc = 0;
703 struct vcpu *v;
704
705 if ( d == current->domain )
706 return -EINVAL;
707
708 /* Protected by d->domain_lock. */
709 switch ( d->is_dying )
710 {
711 case DOMDYING_alive:
712 domain_unlock(d);
713 domain_pause(d);
714 domain_lock(d);
715 /*
716 * With the domain lock dropped, d->is_dying may have changed. Call
717 * ourselves recursively if so, which is safe as then we won't come
718 * back here.
719 */
720 if ( d->is_dying != DOMDYING_alive )
721 return domain_kill(d);
722 d->is_dying = DOMDYING_dying;
723 argo_destroy(d);
724 gnttab_release_mappings(d);
725 vnuma_destroy(d->vnuma);
726 domain_set_outstanding_pages(d, 0);
727 /* fallthrough */
728 case DOMDYING_dying:
729 rc = evtchn_destroy(d);
730 if ( rc )
731 break;
732 rc = domain_relinquish_resources(d);
733 if ( rc != 0 )
734 break;
735 if ( cpupool_move_domain(d, cpupool0) )
736 return -ERESTART;
737 for_each_vcpu ( d, v )
738 unmap_vcpu_info(v);
739 d->is_dying = DOMDYING_dead;
740 /* Mem event cleanup has to go here because the rings
741 * have to be put before we call put_domain. */
742 vm_event_cleanup(d);
743 put_domain(d);
744 send_global_virq(VIRQ_DOM_EXC);
745 /* fallthrough */
746 case DOMDYING_dead:
747 break;
748 }
749
750 return rc;
751 }
752
753
__domain_crash(struct domain * d)754 void __domain_crash(struct domain *d)
755 {
756 if ( d->is_shutting_down )
757 {
758 /* Print nothing: the domain is already shutting down. */
759 }
760 else if ( d == current->domain )
761 {
762 printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
763 d->domain_id, current->vcpu_id, smp_processor_id());
764 show_execution_state(guest_cpu_user_regs());
765 }
766 else
767 {
768 printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
769 d->domain_id, current->domain->domain_id, smp_processor_id());
770 }
771
772 domain_shutdown(d, SHUTDOWN_crash);
773 }
774
775
domain_shutdown(struct domain * d,u8 reason)776 int domain_shutdown(struct domain *d, u8 reason)
777 {
778 struct vcpu *v;
779
780 #ifdef CONFIG_X86
781 if ( pv_shim )
782 return pv_shim_shutdown(reason);
783 #endif
784
785 spin_lock(&d->shutdown_lock);
786
787 if ( d->shutdown_code == SHUTDOWN_CODE_INVALID )
788 d->shutdown_code = reason;
789 reason = d->shutdown_code;
790
791 if ( is_hardware_domain(d) )
792 hwdom_shutdown(reason);
793
794 if ( d->is_shutting_down )
795 {
796 spin_unlock(&d->shutdown_lock);
797 return 0;
798 }
799
800 d->is_shutting_down = 1;
801
802 smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
803
804 for_each_vcpu ( d, v )
805 {
806 if ( reason == SHUTDOWN_crash )
807 v->defer_shutdown = 0;
808 else if ( v->defer_shutdown )
809 continue;
810 vcpu_pause_nosync(v);
811 v->paused_for_shutdown = 1;
812 }
813
814 arch_domain_shutdown(d);
815
816 __domain_finalise_shutdown(d);
817
818 spin_unlock(&d->shutdown_lock);
819
820 return 0;
821 }
822
domain_resume(struct domain * d)823 void domain_resume(struct domain *d)
824 {
825 struct vcpu *v;
826
827 /*
828 * Some code paths assume that shutdown status does not get reset under
829 * their feet (e.g., some assertions make this assumption).
830 */
831 domain_pause(d);
832
833 spin_lock(&d->shutdown_lock);
834
835 d->is_shutting_down = d->is_shut_down = 0;
836 d->shutdown_code = SHUTDOWN_CODE_INVALID;
837
838 for_each_vcpu ( d, v )
839 {
840 if ( v->paused_for_shutdown )
841 vcpu_unpause(v);
842 v->paused_for_shutdown = 0;
843 }
844
845 spin_unlock(&d->shutdown_lock);
846
847 domain_unpause(d);
848 }
849
vcpu_start_shutdown_deferral(struct vcpu * v)850 int vcpu_start_shutdown_deferral(struct vcpu *v)
851 {
852 if ( v->defer_shutdown )
853 return 1;
854
855 v->defer_shutdown = 1;
856 smp_mb(); /* set deferral status /then/ check for shutdown */
857 if ( unlikely(v->domain->is_shutting_down) )
858 vcpu_check_shutdown(v);
859
860 return v->defer_shutdown;
861 }
862
vcpu_end_shutdown_deferral(struct vcpu * v)863 void vcpu_end_shutdown_deferral(struct vcpu *v)
864 {
865 v->defer_shutdown = 0;
866 smp_mb(); /* clear deferral status /then/ check for shutdown */
867 if ( unlikely(v->domain->is_shutting_down) )
868 vcpu_check_shutdown(v);
869 }
870
871 /* Complete domain destroy after RCU readers are not holding old references. */
complete_domain_destroy(struct rcu_head * head)872 static void complete_domain_destroy(struct rcu_head *head)
873 {
874 struct domain *d = container_of(head, struct domain, rcu);
875 struct vcpu *v;
876 int i;
877
878 /*
879 * Flush all state for the vCPU previously having run on the current CPU.
880 * This is in particular relevant for x86 HVM ones on VMX, so that this
881 * flushing of state won't happen from the TLB flush IPI handler behind
882 * the back of a vmx_vmcs_enter() / vmx_vmcs_exit() section.
883 */
884 sync_local_execstate();
885
886 for ( i = d->max_vcpus - 1; i >= 0; i-- )
887 {
888 if ( (v = d->vcpu[i]) == NULL )
889 continue;
890 tasklet_kill(&v->continue_hypercall_tasklet);
891 arch_vcpu_destroy(v);
892 sched_destroy_vcpu(v);
893 destroy_waitqueue_vcpu(v);
894 }
895
896 grant_table_destroy(d);
897
898 arch_domain_destroy(d);
899
900 watchdog_domain_destroy(d);
901
902 sched_destroy_domain(d);
903
904 /* Free page used by xen oprofile buffer. */
905 #ifdef CONFIG_XENOPROF
906 free_xenoprof_pages(d);
907 #endif
908
909 #ifdef CONFIG_HAS_MEM_PAGING
910 xfree(d->vm_event_paging);
911 #endif
912 xfree(d->vm_event_monitor);
913 #ifdef CONFIG_MEM_SHARING
914 xfree(d->vm_event_share);
915 #endif
916
917 for ( i = d->max_vcpus - 1; i >= 0; i-- )
918 if ( (v = d->vcpu[i]) != NULL )
919 vcpu_destroy(v);
920
921 if ( d->target != NULL )
922 put_domain(d->target);
923
924 evtchn_destroy_final(d);
925
926 radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
927
928 xfree(d->vcpu);
929
930 _domain_destroy(d);
931
932 send_global_virq(VIRQ_DOM_EXC);
933 }
934
935 /* Release resources belonging to task @p. */
domain_destroy(struct domain * d)936 void domain_destroy(struct domain *d)
937 {
938 struct domain **pd;
939
940 BUG_ON(!d->is_dying);
941
942 /* May be already destroyed, or get_domain() can race us. */
943 if ( atomic_cmpxchg(&d->refcnt, 0, DOMAIN_DESTROYED) != 0 )
944 return;
945
946 TRACE_1D(TRC_DOM0_DOM_REM, d->domain_id);
947
948 /* Delete from task list and task hashtable. */
949 spin_lock(&domlist_update_lock);
950 pd = &domain_list;
951 while ( *pd != d )
952 pd = &(*pd)->next_in_list;
953 rcu_assign_pointer(*pd, d->next_in_list);
954 pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
955 while ( *pd != d )
956 pd = &(*pd)->next_in_hashbucket;
957 rcu_assign_pointer(*pd, d->next_in_hashbucket);
958 spin_unlock(&domlist_update_lock);
959
960 /* Schedule RCU asynchronous completion of domain destroy. */
961 call_rcu(&d->rcu, complete_domain_destroy);
962 }
963
vcpu_pause(struct vcpu * v)964 void vcpu_pause(struct vcpu *v)
965 {
966 ASSERT(v != current);
967 atomic_inc(&v->pause_count);
968 vcpu_sleep_sync(v);
969 }
970
vcpu_pause_nosync(struct vcpu * v)971 void vcpu_pause_nosync(struct vcpu *v)
972 {
973 atomic_inc(&v->pause_count);
974 vcpu_sleep_nosync(v);
975 }
976
vcpu_unpause(struct vcpu * v)977 void vcpu_unpause(struct vcpu *v)
978 {
979 if ( atomic_dec_and_test(&v->pause_count) )
980 vcpu_wake(v);
981 }
982
vcpu_pause_by_systemcontroller(struct vcpu * v)983 int vcpu_pause_by_systemcontroller(struct vcpu *v)
984 {
985 int old, new, prev = v->controller_pause_count;
986
987 do
988 {
989 old = prev;
990 new = old + 1;
991
992 if ( new > 255 )
993 return -EOVERFLOW;
994
995 prev = cmpxchg(&v->controller_pause_count, old, new);
996 } while ( prev != old );
997
998 vcpu_pause(v);
999
1000 return 0;
1001 }
1002
vcpu_unpause_by_systemcontroller(struct vcpu * v)1003 int vcpu_unpause_by_systemcontroller(struct vcpu *v)
1004 {
1005 int old, new, prev = v->controller_pause_count;
1006
1007 do
1008 {
1009 old = prev;
1010 new = old - 1;
1011
1012 if ( new < 0 )
1013 return -EINVAL;
1014
1015 prev = cmpxchg(&v->controller_pause_count, old, new);
1016 } while ( prev != old );
1017
1018 vcpu_unpause(v);
1019
1020 return 0;
1021 }
1022
do_domain_pause(struct domain * d,void (* sleep_fn)(struct vcpu * v))1023 static void do_domain_pause(struct domain *d,
1024 void (*sleep_fn)(struct vcpu *v))
1025 {
1026 struct vcpu *v;
1027
1028 atomic_inc(&d->pause_count);
1029
1030 for_each_vcpu( d, v )
1031 sleep_fn(v);
1032
1033 arch_domain_pause(d);
1034 }
1035
domain_pause(struct domain * d)1036 void domain_pause(struct domain *d)
1037 {
1038 ASSERT(d != current->domain);
1039 do_domain_pause(d, vcpu_sleep_sync);
1040 }
1041
domain_pause_nosync(struct domain * d)1042 void domain_pause_nosync(struct domain *d)
1043 {
1044 do_domain_pause(d, vcpu_sleep_nosync);
1045 }
1046
domain_unpause(struct domain * d)1047 void domain_unpause(struct domain *d)
1048 {
1049 struct vcpu *v;
1050
1051 arch_domain_unpause(d);
1052
1053 if ( atomic_dec_and_test(&d->pause_count) )
1054 for_each_vcpu( d, v )
1055 vcpu_wake(v);
1056 }
1057
__domain_pause_by_systemcontroller(struct domain * d,void (* pause_fn)(struct domain * d))1058 int __domain_pause_by_systemcontroller(struct domain *d,
1059 void (*pause_fn)(struct domain *d))
1060 {
1061 int old, new, prev = d->controller_pause_count;
1062
1063 do
1064 {
1065 old = prev;
1066 new = old + 1;
1067
1068 /*
1069 * Limit the toolstack pause count to an arbitrary 255 to prevent the
1070 * toolstack overflowing d->pause_count with many repeated hypercalls.
1071 */
1072 if ( new > 255 )
1073 return -EOVERFLOW;
1074
1075 prev = cmpxchg(&d->controller_pause_count, old, new);
1076 } while ( prev != old );
1077
1078 pause_fn(d);
1079
1080 return 0;
1081 }
1082
domain_unpause_by_systemcontroller(struct domain * d)1083 int domain_unpause_by_systemcontroller(struct domain *d)
1084 {
1085 int old, new, prev = d->controller_pause_count;
1086
1087 do
1088 {
1089 old = prev;
1090 new = old - 1;
1091
1092 if ( new < 0 )
1093 return -EINVAL;
1094
1095 prev = cmpxchg(&d->controller_pause_count, old, new);
1096 } while ( prev != old );
1097
1098 /*
1099 * d->controller_pause_count is initialised to 1, and the toolstack is
1100 * responsible for making one unpause hypercall when it wishes the guest
1101 * to start running.
1102 *
1103 * All other toolstack operations should make a pair of pause/unpause
1104 * calls and rely on the reference counting here.
1105 *
1106 * Creation is considered finished when the controller reference count
1107 * first drops to 0.
1108 */
1109 if ( new == 0 && !d->creation_finished )
1110 {
1111 d->creation_finished = true;
1112 arch_domain_creation_finished(d);
1113 }
1114
1115 domain_unpause(d);
1116
1117 return 0;
1118 }
1119
domain_pause_except_self(struct domain * d)1120 int domain_pause_except_self(struct domain *d)
1121 {
1122 struct vcpu *v, *curr = current;
1123
1124 if ( curr->domain == d )
1125 {
1126 /* Avoid racing with other vcpus which may want to be pausing us */
1127 if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
1128 return -ERESTART;
1129 for_each_vcpu( d, v )
1130 if ( likely(v != curr) )
1131 vcpu_pause(v);
1132 spin_unlock(&d->hypercall_deadlock_mutex);
1133 }
1134 else
1135 domain_pause(d);
1136
1137 return 0;
1138 }
1139
domain_unpause_except_self(struct domain * d)1140 void domain_unpause_except_self(struct domain *d)
1141 {
1142 struct vcpu *v, *curr = current;
1143
1144 if ( curr->domain == d )
1145 {
1146 for_each_vcpu( d, v )
1147 if ( likely(v != curr) )
1148 vcpu_unpause(v);
1149 }
1150 else
1151 domain_unpause(d);
1152 }
1153
domain_soft_reset(struct domain * d,bool resuming)1154 int domain_soft_reset(struct domain *d, bool resuming)
1155 {
1156 struct vcpu *v;
1157 int rc;
1158
1159 spin_lock(&d->shutdown_lock);
1160 for_each_vcpu ( d, v )
1161 if ( !v->paused_for_shutdown )
1162 {
1163 spin_unlock(&d->shutdown_lock);
1164 return -EINVAL;
1165 }
1166 spin_unlock(&d->shutdown_lock);
1167
1168 rc = evtchn_reset(d, resuming);
1169 if ( rc )
1170 return rc;
1171
1172 grant_table_warn_active_grants(d);
1173
1174 argo_soft_reset(d);
1175
1176 for_each_vcpu ( d, v )
1177 {
1178 set_xen_guest_handle(runstate_guest(v), NULL);
1179 unmap_vcpu_info(v);
1180 }
1181
1182 rc = arch_domain_soft_reset(d);
1183 if ( !rc )
1184 domain_resume(d);
1185 else
1186 domain_crash(d);
1187
1188 return rc;
1189 }
1190
vcpu_reset(struct vcpu * v)1191 int vcpu_reset(struct vcpu *v)
1192 {
1193 struct domain *d = v->domain;
1194 int rc;
1195
1196 vcpu_pause(v);
1197 domain_lock(d);
1198
1199 set_bit(_VPF_in_reset, &v->pause_flags);
1200 rc = arch_vcpu_reset(v);
1201 if ( rc )
1202 goto out_unlock;
1203
1204 set_bit(_VPF_down, &v->pause_flags);
1205
1206 clear_bit(v->vcpu_id, d->poll_mask);
1207 v->poll_evtchn = 0;
1208
1209 v->fpu_initialised = 0;
1210 v->fpu_dirtied = 0;
1211 v->is_initialised = 0;
1212 if ( v->affinity_broken & VCPU_AFFINITY_OVERRIDE )
1213 vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
1214 if ( v->affinity_broken & VCPU_AFFINITY_WAIT )
1215 vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_WAIT);
1216 clear_bit(_VPF_blocked, &v->pause_flags);
1217 clear_bit(_VPF_in_reset, &v->pause_flags);
1218
1219 out_unlock:
1220 domain_unlock(v->domain);
1221 vcpu_unpause(v);
1222
1223 return rc;
1224 }
1225
1226 /*
1227 * Map a guest page in and point the vcpu_info pointer at it. This
1228 * makes sure that the vcpu_info is always pointing at a valid piece
1229 * of memory, and it sets a pending event to make sure that a pending
1230 * event doesn't get missed.
1231 */
map_vcpu_info(struct vcpu * v,unsigned long gfn,unsigned offset)1232 int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset)
1233 {
1234 struct domain *d = v->domain;
1235 void *mapping;
1236 vcpu_info_t *new_info;
1237 struct page_info *page;
1238 unsigned int align;
1239
1240 if ( offset > (PAGE_SIZE - sizeof(vcpu_info_t)) )
1241 return -EINVAL;
1242
1243 #ifdef CONFIG_COMPAT
1244 if ( has_32bit_shinfo(d) )
1245 align = alignof(new_info->compat);
1246 else
1247 #endif
1248 align = alignof(*new_info);
1249 if ( offset & (align - 1) )
1250 return -EINVAL;
1251
1252 if ( !mfn_eq(v->vcpu_info_mfn, INVALID_MFN) )
1253 return -EINVAL;
1254
1255 /* Run this command on yourself or on other offline VCPUS. */
1256 if ( (v != current) && !(v->pause_flags & VPF_down) )
1257 return -EINVAL;
1258
1259 page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
1260 if ( !page )
1261 return -EINVAL;
1262
1263 if ( !get_page_type(page, PGT_writable_page) )
1264 {
1265 put_page(page);
1266 return -EINVAL;
1267 }
1268
1269 mapping = __map_domain_page_global(page);
1270 if ( mapping == NULL )
1271 {
1272 put_page_and_type(page);
1273 return -ENOMEM;
1274 }
1275
1276 new_info = (vcpu_info_t *)(mapping + offset);
1277
1278 if ( v->vcpu_info == &dummy_vcpu_info )
1279 {
1280 memset(new_info, 0, sizeof(*new_info));
1281 #ifdef XEN_HAVE_PV_UPCALL_MASK
1282 __vcpu_info(v, new_info, evtchn_upcall_mask) = 1;
1283 #endif
1284 }
1285 else
1286 {
1287 memcpy(new_info, v->vcpu_info, sizeof(*new_info));
1288 }
1289
1290 v->vcpu_info = new_info;
1291 v->vcpu_info_mfn = page_to_mfn(page);
1292
1293 /* Set new vcpu_info pointer /before/ setting pending flags. */
1294 smp_wmb();
1295
1296 /*
1297 * Mark everything as being pending just to make sure nothing gets
1298 * lost. The domain will get a spurious event, but it can cope.
1299 */
1300 #ifdef CONFIG_COMPAT
1301 if ( !has_32bit_shinfo(d) )
1302 write_atomic(&new_info->native.evtchn_pending_sel, ~0);
1303 else
1304 #endif
1305 write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);
1306 vcpu_mark_events_pending(v);
1307
1308 return 0;
1309 }
1310
1311 /*
1312 * Unmap the vcpu info page if the guest decided to place it somewhere
1313 * else. This is used from domain_kill() and domain_soft_reset().
1314 */
unmap_vcpu_info(struct vcpu * v)1315 void unmap_vcpu_info(struct vcpu *v)
1316 {
1317 mfn_t mfn = v->vcpu_info_mfn;
1318
1319 if ( mfn_eq(mfn, INVALID_MFN) )
1320 return;
1321
1322 unmap_domain_page_global((void *)
1323 ((unsigned long)v->vcpu_info & PAGE_MASK));
1324
1325 vcpu_info_reset(v); /* NB: Clobbers v->vcpu_info_mfn */
1326
1327 put_page_and_type(mfn_to_page(mfn));
1328 }
1329
default_initialise_vcpu(struct vcpu * v,XEN_GUEST_HANDLE_PARAM (void)arg)1330 int default_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
1331 {
1332 struct vcpu_guest_context *ctxt;
1333 struct domain *d = v->domain;
1334 int rc;
1335
1336 if ( (ctxt = alloc_vcpu_guest_context()) == NULL )
1337 return -ENOMEM;
1338
1339 if ( copy_from_guest(ctxt, arg, 1) )
1340 {
1341 free_vcpu_guest_context(ctxt);
1342 return -EFAULT;
1343 }
1344
1345 domain_lock(d);
1346 rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt);
1347 domain_unlock(d);
1348
1349 free_vcpu_guest_context(ctxt);
1350
1351 return rc;
1352 }
1353
do_vcpu_op(int cmd,unsigned int vcpuid,XEN_GUEST_HANDLE_PARAM (void)arg)1354 long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
1355 {
1356 struct domain *d = current->domain;
1357 struct vcpu *v;
1358 long rc = 0;
1359
1360 if ( (v = domain_vcpu(d, vcpuid)) == NULL )
1361 return -ENOENT;
1362
1363 switch ( cmd )
1364 {
1365 case VCPUOP_initialise:
1366 if ( v->vcpu_info == &dummy_vcpu_info )
1367 return -EINVAL;
1368
1369 rc = arch_initialise_vcpu(v, arg);
1370 if ( rc == -ERESTART )
1371 rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
1372 cmd, vcpuid, arg);
1373
1374 break;
1375
1376 case VCPUOP_up:
1377 #ifdef CONFIG_X86
1378 if ( pv_shim )
1379 rc = continue_hypercall_on_cpu(0, pv_shim_cpu_up, v);
1380 else
1381 #endif
1382 {
1383 bool wake = false;
1384
1385 domain_lock(d);
1386 if ( !v->is_initialised )
1387 rc = -EINVAL;
1388 else
1389 wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
1390 domain_unlock(d);
1391 if ( wake )
1392 vcpu_wake(v);
1393 }
1394
1395 break;
1396
1397 case VCPUOP_down:
1398 for_each_vcpu ( d, v )
1399 if ( v->vcpu_id != vcpuid && !test_bit(_VPF_down, &v->pause_flags) )
1400 {
1401 rc = 1;
1402 break;
1403 }
1404
1405 if ( !rc ) /* Last vcpu going down? */
1406 {
1407 domain_shutdown(d, SHUTDOWN_poweroff);
1408 break;
1409 }
1410
1411 rc = 0;
1412 v = d->vcpu[vcpuid];
1413
1414 #ifdef CONFIG_X86
1415 if ( pv_shim )
1416 rc = continue_hypercall_on_cpu(0, pv_shim_cpu_down, v);
1417 else
1418 #endif
1419 if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
1420 vcpu_sleep_nosync(v);
1421
1422 break;
1423
1424 case VCPUOP_is_up:
1425 rc = !(v->pause_flags & VPF_down);
1426 break;
1427
1428 case VCPUOP_get_runstate_info:
1429 {
1430 struct vcpu_runstate_info runstate;
1431 vcpu_runstate_get(v, &runstate);
1432 if ( copy_to_guest(arg, &runstate, 1) )
1433 rc = -EFAULT;
1434 break;
1435 }
1436
1437 case VCPUOP_set_periodic_timer:
1438 {
1439 struct vcpu_set_periodic_timer set;
1440
1441 if ( copy_from_guest(&set, arg, 1) )
1442 return -EFAULT;
1443
1444 if ( set.period_ns < MILLISECS(1) )
1445 return -EINVAL;
1446
1447 if ( set.period_ns > STIME_DELTA_MAX )
1448 return -EINVAL;
1449
1450 vcpu_set_periodic_timer(v, set.period_ns);
1451
1452 break;
1453 }
1454
1455 case VCPUOP_stop_periodic_timer:
1456 vcpu_set_periodic_timer(v, 0);
1457 break;
1458
1459 case VCPUOP_set_singleshot_timer:
1460 {
1461 struct vcpu_set_singleshot_timer set;
1462
1463 if ( v != current )
1464 return -EINVAL;
1465
1466 if ( copy_from_guest(&set, arg, 1) )
1467 return -EFAULT;
1468
1469 if ( (set.flags & VCPU_SSHOTTMR_future) &&
1470 (set.timeout_abs_ns < NOW()) )
1471 return -ETIME;
1472
1473 migrate_timer(&v->singleshot_timer, smp_processor_id());
1474 set_timer(&v->singleshot_timer, set.timeout_abs_ns);
1475
1476 break;
1477 }
1478
1479 case VCPUOP_stop_singleshot_timer:
1480 if ( v != current )
1481 return -EINVAL;
1482
1483 stop_timer(&v->singleshot_timer);
1484
1485 break;
1486
1487 case VCPUOP_register_vcpu_info:
1488 {
1489 struct vcpu_register_vcpu_info info;
1490
1491 rc = -EFAULT;
1492 if ( copy_from_guest(&info, arg, 1) )
1493 break;
1494
1495 domain_lock(d);
1496 rc = map_vcpu_info(v, info.mfn, info.offset);
1497 domain_unlock(d);
1498
1499 break;
1500 }
1501
1502 case VCPUOP_register_runstate_memory_area:
1503 {
1504 struct vcpu_register_runstate_memory_area area;
1505 struct vcpu_runstate_info runstate;
1506
1507 rc = -EFAULT;
1508 if ( copy_from_guest(&area, arg, 1) )
1509 break;
1510
1511 if ( !guest_handle_okay(area.addr.h, 1) )
1512 break;
1513
1514 rc = 0;
1515 runstate_guest(v) = area.addr.h;
1516
1517 if ( v == current )
1518 {
1519 __copy_to_guest(runstate_guest(v), &v->runstate, 1);
1520 }
1521 else
1522 {
1523 vcpu_runstate_get(v, &runstate);
1524 __copy_to_guest(runstate_guest(v), &runstate, 1);
1525 }
1526
1527 break;
1528 }
1529
1530 default:
1531 rc = arch_do_vcpu_op(cmd, v, arg);
1532 break;
1533 }
1534
1535 return rc;
1536 }
1537
1538 #ifdef arch_vm_assist_valid_mask
do_vm_assist(unsigned int cmd,unsigned int type)1539 long do_vm_assist(unsigned int cmd, unsigned int type)
1540 {
1541 struct domain *currd = current->domain;
1542 const unsigned long valid = arch_vm_assist_valid_mask(currd);
1543
1544 if ( type >= BITS_PER_LONG || !test_bit(type, &valid) )
1545 return -EINVAL;
1546
1547 switch ( cmd )
1548 {
1549 case VMASST_CMD_enable:
1550 set_bit(type, &currd->vm_assist);
1551 return 0;
1552
1553 case VMASST_CMD_disable:
1554 clear_bit(type, &currd->vm_assist);
1555 return 0;
1556 }
1557
1558 return -ENOSYS;
1559 }
1560 #endif
1561
pirq_get_info(struct domain * d,int pirq)1562 struct pirq *pirq_get_info(struct domain *d, int pirq)
1563 {
1564 struct pirq *info = pirq_info(d, pirq);
1565
1566 if ( !info && (info = alloc_pirq_struct(d)) != NULL )
1567 {
1568 info->pirq = pirq;
1569 if ( radix_tree_insert(&d->pirq_tree, pirq, info) )
1570 {
1571 free_pirq_struct(info);
1572 info = NULL;
1573 }
1574 }
1575
1576 return info;
1577 }
1578
_free_pirq_struct(struct rcu_head * head)1579 static void _free_pirq_struct(struct rcu_head *head)
1580 {
1581 xfree(container_of(head, struct pirq, rcu_head));
1582 }
1583
free_pirq_struct(void * ptr)1584 void free_pirq_struct(void *ptr)
1585 {
1586 struct pirq *pirq = ptr;
1587
1588 call_rcu(&pirq->rcu_head, _free_pirq_struct);
1589 }
1590
1591 struct migrate_info {
1592 long (*func)(void *data);
1593 void *data;
1594 struct vcpu *vcpu;
1595 unsigned int cpu;
1596 unsigned int nest;
1597 };
1598
1599 static DEFINE_PER_CPU(struct migrate_info *, continue_info);
1600
continue_hypercall_tasklet_handler(void * data)1601 static void continue_hypercall_tasklet_handler(void *data)
1602 {
1603 struct migrate_info *info = data;
1604 struct vcpu *v = info->vcpu;
1605 long res = -EINVAL;
1606
1607 /* Wait for vcpu to sleep so that we can access its register state. */
1608 vcpu_sleep_sync(v);
1609
1610 this_cpu(continue_info) = info;
1611
1612 if ( likely(info->cpu == smp_processor_id()) )
1613 res = info->func(info->data);
1614
1615 arch_hypercall_tasklet_result(v, res);
1616
1617 this_cpu(continue_info) = NULL;
1618
1619 if ( info->nest-- == 0 )
1620 {
1621 xfree(info);
1622 vcpu_unpause(v);
1623 put_domain(v->domain);
1624 }
1625 }
1626
continue_hypercall_on_cpu(unsigned int cpu,long (* func)(void * data),void * data)1627 int continue_hypercall_on_cpu(
1628 unsigned int cpu, long (*func)(void *data), void *data)
1629 {
1630 struct migrate_info *info;
1631
1632 if ( (cpu >= nr_cpu_ids) || !cpu_online(cpu) )
1633 return -EINVAL;
1634
1635 info = this_cpu(continue_info);
1636 if ( info == NULL )
1637 {
1638 struct vcpu *curr = current;
1639
1640 info = xmalloc(struct migrate_info);
1641 if ( info == NULL )
1642 return -ENOMEM;
1643
1644 info->vcpu = curr;
1645 info->nest = 0;
1646
1647 tasklet_kill(&curr->continue_hypercall_tasklet);
1648 tasklet_init(&curr->continue_hypercall_tasklet,
1649 continue_hypercall_tasklet_handler, info);
1650
1651 get_knownalive_domain(curr->domain);
1652 vcpu_pause_nosync(curr);
1653 }
1654 else
1655 {
1656 BUG_ON(info->nest != 0);
1657 info->nest++;
1658 }
1659
1660 info->func = func;
1661 info->data = data;
1662 info->cpu = cpu;
1663
1664 tasklet_schedule_on_cpu(&info->vcpu->continue_hypercall_tasklet, cpu);
1665
1666 /* Dummy return value will be overwritten by tasklet. */
1667 return 0;
1668 }
1669
1670 /*
1671 * Local variables:
1672 * mode: C
1673 * c-file-style: "BSD"
1674 * c-basic-offset: 4
1675 * tab-width: 4
1676 * indent-tabs-mode: nil
1677 * End:
1678 */
1679