1 /******************************************************************************
2  * domain.c
3  *
4  * Generic domain-handling functions.
5  */
6 
7 #include <xen/compat.h>
8 #include <xen/init.h>
9 #include <xen/lib.h>
10 #include <xen/ctype.h>
11 #include <xen/err.h>
12 #include <xen/param.h>
13 #include <xen/sched.h>
14 #include <xen/domain.h>
15 #include <xen/mm.h>
16 #include <xen/event.h>
17 #include <xen/vm_event.h>
18 #include <xen/time.h>
19 #include <xen/console.h>
20 #include <xen/softirq.h>
21 #include <xen/tasklet.h>
22 #include <xen/domain_page.h>
23 #include <xen/rangeset.h>
24 #include <xen/guest_access.h>
25 #include <xen/hypercall.h>
26 #include <xen/delay.h>
27 #include <xen/shutdown.h>
28 #include <xen/percpu.h>
29 #include <xen/multicall.h>
30 #include <xen/rcupdate.h>
31 #include <xen/wait.h>
32 #include <xen/grant_table.h>
33 #include <xen/xenoprof.h>
34 #include <xen/irq.h>
35 #include <xen/argo.h>
36 #include <asm/debugger.h>
37 #include <asm/p2m.h>
38 #include <asm/processor.h>
39 #include <public/sched.h>
40 #include <public/sysctl.h>
41 #include <public/vcpu.h>
42 #include <xsm/xsm.h>
43 #include <xen/trace.h>
44 #include <asm/setup.h>
45 
46 #ifdef CONFIG_X86
47 #include <asm/guest.h>
48 #endif
49 
50 /* Linux config option: propageted to domain0 */
51 /* xen_processor_pmbits: xen control Cx, Px, ... */
52 unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
53 
54 /* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
55 bool_t opt_dom0_vcpus_pin;
56 boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
57 
58 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
59 DEFINE_SPINLOCK(domlist_update_lock);
60 DEFINE_RCU_READ_LOCK(domlist_read_lock);
61 
62 #define DOMAIN_HASH_SIZE 256
63 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
64 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
65 struct domain *domain_list;
66 
67 struct domain *hardware_domain __read_mostly;
68 
69 #ifdef CONFIG_LATE_HWDOM
70 domid_t hardware_domid __read_mostly;
71 integer_param("hardware_dom", hardware_domid);
72 #endif
73 
74 /* Private domain structs for DOMID_XEN, DOMID_IO, etc. */
75 struct domain *__read_mostly dom_xen;
76 struct domain *__read_mostly dom_io;
77 #ifdef CONFIG_MEM_SHARING
78 struct domain *__read_mostly dom_cow;
79 #endif
80 
81 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
82 
83 vcpu_info_t dummy_vcpu_info;
84 
__domain_finalise_shutdown(struct domain * d)85 static void __domain_finalise_shutdown(struct domain *d)
86 {
87     struct vcpu *v;
88 
89     BUG_ON(!spin_is_locked(&d->shutdown_lock));
90 
91     if ( d->is_shut_down )
92         return;
93 
94     for_each_vcpu ( d, v )
95         if ( !v->paused_for_shutdown )
96             return;
97 
98     d->is_shut_down = 1;
99     if ( (d->shutdown_code == SHUTDOWN_suspend) && d->suspend_evtchn )
100         evtchn_send(d, d->suspend_evtchn);
101     else
102         send_global_virq(VIRQ_DOM_EXC);
103 }
104 
vcpu_check_shutdown(struct vcpu * v)105 static void vcpu_check_shutdown(struct vcpu *v)
106 {
107     struct domain *d = v->domain;
108 
109     spin_lock(&d->shutdown_lock);
110 
111     if ( d->is_shutting_down )
112     {
113         if ( !v->paused_for_shutdown )
114             vcpu_pause_nosync(v);
115         v->paused_for_shutdown = 1;
116         v->defer_shutdown = 0;
117         __domain_finalise_shutdown(d);
118     }
119 
120     spin_unlock(&d->shutdown_lock);
121 }
122 
vcpu_info_reset(struct vcpu * v)123 static void vcpu_info_reset(struct vcpu *v)
124 {
125     struct domain *d = v->domain;
126 
127     v->vcpu_info = ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS)
128                     ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id])
129                     : &dummy_vcpu_info);
130     v->vcpu_info_mfn = INVALID_MFN;
131 }
132 
vcpu_destroy(struct vcpu * v)133 static void vcpu_destroy(struct vcpu *v)
134 {
135     free_vcpu_struct(v);
136 }
137 
vcpu_create(struct domain * d,unsigned int vcpu_id)138 struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
139 {
140     struct vcpu *v;
141 
142     /*
143      * Sanity check some input expectations:
144      * - vcpu_id should be bounded by d->max_vcpus, and not previously
145      *   allocated.
146      * - VCPUs should be tightly packed and allocated in ascending order,
147      *   except for the idle domain which may vary based on PCPU numbering.
148      */
149     if ( vcpu_id >= d->max_vcpus || d->vcpu[vcpu_id] ||
150          (!is_idle_domain(d) && vcpu_id && !d->vcpu[vcpu_id - 1]) )
151     {
152         ASSERT_UNREACHABLE();
153         return NULL;
154     }
155 
156     if ( (v = alloc_vcpu_struct(d)) == NULL )
157         return NULL;
158 
159     v->domain = d;
160     v->vcpu_id = vcpu_id;
161     v->dirty_cpu = VCPU_CPU_CLEAN;
162 
163     spin_lock_init(&v->virq_lock);
164 
165     tasklet_init(&v->continue_hypercall_tasklet, NULL, NULL);
166 
167     grant_table_init_vcpu(v);
168 
169     if ( is_idle_domain(d) )
170     {
171         v->runstate.state = RUNSTATE_running;
172         v->new_state = RUNSTATE_running;
173     }
174     else
175     {
176         v->runstate.state = RUNSTATE_offline;
177         v->runstate.state_entry_time = NOW();
178         set_bit(_VPF_down, &v->pause_flags);
179         vcpu_info_reset(v);
180         init_waitqueue_vcpu(v);
181     }
182 
183     if ( sched_init_vcpu(v) != 0 )
184         goto fail_wq;
185 
186     if ( arch_vcpu_create(v) != 0 )
187         goto fail_sched;
188 
189     d->vcpu[vcpu_id] = v;
190     if ( vcpu_id != 0 )
191     {
192         int prev_id = v->vcpu_id - 1;
193         while ( (prev_id >= 0) && (d->vcpu[prev_id] == NULL) )
194             prev_id--;
195         BUG_ON(prev_id < 0);
196         v->next_in_list = d->vcpu[prev_id]->next_in_list;
197         d->vcpu[prev_id]->next_in_list = v;
198     }
199 
200     /* Must be called after making new vcpu visible to for_each_vcpu(). */
201     vcpu_check_shutdown(v);
202 
203     return v;
204 
205  fail_sched:
206     sched_destroy_vcpu(v);
207  fail_wq:
208     destroy_waitqueue_vcpu(v);
209     vcpu_destroy(v);
210 
211     return NULL;
212 }
213 
late_hwdom_init(struct domain * d)214 static int late_hwdom_init(struct domain *d)
215 {
216 #ifdef CONFIG_LATE_HWDOM
217     struct domain *dom0;
218     int rv;
219 
220     if ( d != hardware_domain || d->domain_id == 0 )
221         return 0;
222 
223     rv = xsm_init_hardware_domain(XSM_HOOK, d);
224     if ( rv )
225         return rv;
226 
227     printk("Initialising hardware domain %d\n", hardware_domid);
228 
229     dom0 = rcu_lock_domain_by_id(0);
230     ASSERT(dom0 != NULL);
231     /*
232      * Hardware resource ranges for domain 0 have been set up from
233      * various sources intended to restrict the hardware domain's
234      * access.  Apply these ranges to the actual hardware domain.
235      *
236      * Because the lists are being swapped, a side effect of this
237      * operation is that Domain 0's rangesets are cleared.  Since
238      * domain 0 should not be accessing the hardware when it constructs
239      * a hardware domain, this should not be a problem.  Both lists
240      * may be modified after this hypercall returns if a more complex
241      * device model is desired.
242      */
243     rangeset_swap(d->irq_caps, dom0->irq_caps);
244     rangeset_swap(d->iomem_caps, dom0->iomem_caps);
245 #ifdef CONFIG_X86
246     rangeset_swap(d->arch.ioport_caps, dom0->arch.ioport_caps);
247     setup_io_bitmap(d);
248     setup_io_bitmap(dom0);
249 #endif
250 
251     rcu_unlock_domain(dom0);
252 
253     iommu_hwdom_init(d);
254 
255     return rv;
256 #else
257     return 0;
258 #endif
259 }
260 
261 static unsigned int __read_mostly extra_hwdom_irqs;
262 static unsigned int __read_mostly extra_domU_irqs = 32;
263 
parse_extra_guest_irqs(const char * s)264 static int __init parse_extra_guest_irqs(const char *s)
265 {
266     if ( isdigit(*s) )
267         extra_domU_irqs = simple_strtoul(s, &s, 0);
268     if ( *s == ',' && isdigit(*++s) )
269         extra_hwdom_irqs = simple_strtoul(s, &s, 0);
270 
271     return *s ? -EINVAL : 0;
272 }
273 custom_param("extra_guest_irqs", parse_extra_guest_irqs);
274 
275 /*
276  * Destroy a domain once all references to it have been dropped.  Used either
277  * from the RCU path, or from the domain_create() error path before the domain
278  * is inserted into the domlist.
279  */
_domain_destroy(struct domain * d)280 static void _domain_destroy(struct domain *d)
281 {
282     BUG_ON(!d->is_dying);
283     BUG_ON(atomic_read(&d->refcnt) != DOMAIN_DESTROYED);
284 
285     xfree(d->pbuf);
286 
287     argo_destroy(d);
288 
289     rangeset_domain_destroy(d);
290 
291     free_cpumask_var(d->dirty_cpumask);
292 
293     xsm_free_security_domain(d);
294 
295     lock_profile_deregister_struct(LOCKPROF_TYPE_PERDOM, d);
296 
297     free_domain_struct(d);
298 }
299 
sanitise_domain_config(struct xen_domctl_createdomain * config)300 static int sanitise_domain_config(struct xen_domctl_createdomain *config)
301 {
302     if ( config->flags & ~(XEN_DOMCTL_CDF_hvm |
303                            XEN_DOMCTL_CDF_hap |
304                            XEN_DOMCTL_CDF_s3_integrity |
305                            XEN_DOMCTL_CDF_oos_off |
306                            XEN_DOMCTL_CDF_xs_domain |
307                            XEN_DOMCTL_CDF_iommu) )
308     {
309         dprintk(XENLOG_INFO, "Unknown CDF flags %#x\n", config->flags);
310         return -EINVAL;
311     }
312 
313     if ( config->iommu_opts & ~XEN_DOMCTL_IOMMU_no_sharept )
314     {
315         dprintk(XENLOG_INFO, "Unknown IOMMU options %#x\n", config->iommu_opts);
316         return -EINVAL;
317     }
318 
319     if ( !(config->flags & XEN_DOMCTL_CDF_iommu) && config->iommu_opts )
320     {
321         dprintk(XENLOG_INFO,
322                 "IOMMU options specified but IOMMU not enabled\n");
323         return -EINVAL;
324     }
325 
326     if ( config->max_vcpus < 1 )
327     {
328         dprintk(XENLOG_INFO, "No vCPUS\n");
329         return -EINVAL;
330     }
331 
332     if ( !(config->flags & XEN_DOMCTL_CDF_hvm) &&
333          (config->flags & XEN_DOMCTL_CDF_hap) )
334     {
335         dprintk(XENLOG_INFO, "HAP requested for non-HVM guest\n");
336         return -EINVAL;
337     }
338 
339     if ( (config->flags & XEN_DOMCTL_CDF_iommu) && !iommu_enabled )
340     {
341         dprintk(XENLOG_INFO, "IOMMU is not enabled\n");
342         return -EINVAL;
343     }
344 
345     return arch_sanitise_domain_config(config);
346 }
347 
domain_create(domid_t domid,struct xen_domctl_createdomain * config,bool is_priv)348 struct domain *domain_create(domid_t domid,
349                              struct xen_domctl_createdomain *config,
350                              bool is_priv)
351 {
352     struct domain *d, **pd, *old_hwdom = NULL;
353     enum { INIT_watchdog = 1u<<1,
354            INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
355     int err, init_status = 0;
356 
357     if ( config && (err = sanitise_domain_config(config)) )
358         return ERR_PTR(err);
359 
360     if ( (d = alloc_domain_struct()) == NULL )
361         return ERR_PTR(-ENOMEM);
362 
363     d->options = config ? config->flags : 0;
364 
365     /* Sort out our idea of is_system_domain(). */
366     d->domain_id = domid;
367 
368     /* Debug sanity. */
369     ASSERT(is_system_domain(d) ? config == NULL : config != NULL);
370 
371     /* Sort out our idea of is_control_domain(). */
372     d->is_privileged = is_priv;
373 
374     /* Sort out our idea of is_hardware_domain(). */
375     if ( domid == 0 || domid == hardware_domid )
376     {
377         if ( hardware_domid < 0 || hardware_domid >= DOMID_FIRST_RESERVED )
378             panic("The value of hardware_dom must be a valid domain ID\n");
379 
380         d->disable_migrate = true;
381         old_hwdom = hardware_domain;
382         hardware_domain = d;
383     }
384 
385     TRACE_1D(TRC_DOM0_DOM_ADD, d->domain_id);
386 
387     /*
388      * Allocate d->vcpu[] and set ->max_vcpus up early.  Various per-domain
389      * resources want to be sized based on max_vcpus.
390      */
391     if ( !is_system_domain(d) )
392     {
393         err = -ENOMEM;
394         d->vcpu = xzalloc_array(struct vcpu *, config->max_vcpus);
395         if ( !d->vcpu )
396             goto fail;
397 
398         d->max_vcpus = config->max_vcpus;
399     }
400 
401     lock_profile_register_struct(LOCKPROF_TYPE_PERDOM, d, domid, "Domain");
402 
403     if ( (err = xsm_alloc_security_domain(d)) != 0 )
404         goto fail;
405 
406     atomic_set(&d->refcnt, 1);
407     RCU_READ_LOCK_INIT(&d->rcu_lock);
408     spin_lock_init_prof(d, domain_lock);
409     spin_lock_init_prof(d, page_alloc_lock);
410     spin_lock_init(&d->hypercall_deadlock_mutex);
411     INIT_PAGE_LIST_HEAD(&d->page_list);
412     INIT_PAGE_LIST_HEAD(&d->extra_page_list);
413     INIT_PAGE_LIST_HEAD(&d->xenpage_list);
414 
415     spin_lock_init(&d->node_affinity_lock);
416     d->node_affinity = NODE_MASK_ALL;
417     d->auto_node_affinity = 1;
418 
419     spin_lock_init(&d->shutdown_lock);
420     d->shutdown_code = SHUTDOWN_CODE_INVALID;
421 
422     spin_lock_init(&d->pbuf_lock);
423 
424     rwlock_init(&d->vnuma_rwlock);
425 
426 #ifdef CONFIG_HAS_PCI
427     INIT_LIST_HEAD(&d->pdev_list);
428 #endif
429 
430     err = -ENOMEM;
431     if ( !zalloc_cpumask_var(&d->dirty_cpumask) )
432         goto fail;
433 
434     rangeset_domain_initialise(d);
435 
436     /* DOMID_{XEN,IO,etc} (other than IDLE) are sufficiently constructed. */
437     if ( is_system_domain(d) && !is_idle_domain(d) )
438         return d;
439 
440     if ( !is_idle_domain(d) )
441     {
442         if ( !is_hardware_domain(d) )
443             d->nr_pirqs = nr_static_irqs + extra_domU_irqs;
444         else
445             d->nr_pirqs = extra_hwdom_irqs ? nr_static_irqs + extra_hwdom_irqs
446                                            : arch_hwdom_irqs(domid);
447         d->nr_pirqs = min(d->nr_pirqs, nr_irqs);
448 
449         radix_tree_init(&d->pirq_tree);
450     }
451 
452     if ( (err = arch_domain_create(d, config)) != 0 )
453         goto fail;
454     init_status |= INIT_arch;
455 
456     if ( !is_idle_domain(d) )
457     {
458         watchdog_domain_init(d);
459         init_status |= INIT_watchdog;
460 
461         if ( is_xenstore_domain(d) )
462             d->disable_migrate = true;
463 
464         d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
465         d->irq_caps   = rangeset_new(d, "Interrupts", 0);
466         if ( !d->iomem_caps || !d->irq_caps )
467             goto fail;
468 
469         if ( (err = xsm_domain_create(XSM_HOOK, d, config->ssidref)) != 0 )
470             goto fail;
471 
472         d->controller_pause_count = 1;
473         atomic_inc(&d->pause_count);
474 
475         if ( (err = evtchn_init(d, config->max_evtchn_port)) != 0 )
476             goto fail;
477         init_status |= INIT_evtchn;
478 
479         if ( (err = grant_table_init(d, config->max_grant_frames,
480                                      config->max_maptrack_frames)) != 0 )
481             goto fail;
482         init_status |= INIT_gnttab;
483 
484         if ( (err = argo_init(d)) != 0 )
485             goto fail;
486 
487         err = -ENOMEM;
488 
489         d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
490         if ( !d->pbuf )
491             goto fail;
492 
493         if ( (err = sched_init_domain(d, 0)) != 0 )
494             goto fail;
495 
496         if ( (err = late_hwdom_init(d)) != 0 )
497             goto fail;
498 
499         /*
500          * Must not fail beyond this point, as our caller doesn't know whether
501          * the domain has been entered into domain_list or not.
502          */
503 
504         spin_lock(&domlist_update_lock);
505         pd = &domain_list; /* NB. domain_list maintained in order of domid. */
506         for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
507             if ( (*pd)->domain_id > d->domain_id )
508                 break;
509         d->next_in_list = *pd;
510         d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
511         rcu_assign_pointer(*pd, d);
512         rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
513         spin_unlock(&domlist_update_lock);
514 
515         memcpy(d->handle, config->handle, sizeof(d->handle));
516     }
517 
518     return d;
519 
520  fail:
521     ASSERT(err < 0);      /* Sanity check paths leading here. */
522     err = err ?: -EILSEQ; /* Release build safety. */
523 
524     d->is_dying = DOMDYING_dead;
525     if ( hardware_domain == d )
526         hardware_domain = old_hwdom;
527     atomic_set(&d->refcnt, DOMAIN_DESTROYED);
528 
529     sched_destroy_domain(d);
530 
531     if ( d->max_vcpus )
532     {
533         d->max_vcpus = 0;
534         XFREE(d->vcpu);
535     }
536     if ( init_status & INIT_arch )
537         arch_domain_destroy(d);
538     if ( init_status & INIT_gnttab )
539         grant_table_destroy(d);
540     if ( init_status & INIT_evtchn )
541     {
542         evtchn_destroy(d);
543         evtchn_destroy_final(d);
544         radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
545     }
546     if ( init_status & INIT_watchdog )
547         watchdog_domain_destroy(d);
548 
549     _domain_destroy(d);
550 
551     return ERR_PTR(err);
552 }
553 
setup_system_domains(void)554 void __init setup_system_domains(void)
555 {
556     /*
557      * Initialise our DOMID_XEN domain.
558      * Any Xen-heap pages that we will allow to be mapped will have
559      * their domain field set to dom_xen.
560      * Hidden PCI devices will also be associated with this domain
561      * (but be [partly] controlled by Dom0 nevertheless).
562      */
563     dom_xen = domain_create(DOMID_XEN, NULL, false);
564     if ( IS_ERR(dom_xen) )
565         panic("Failed to create d[XEN]: %ld\n", PTR_ERR(dom_xen));
566 
567     /*
568      * Initialise our DOMID_IO domain.
569      * This domain owns I/O pages that are within the range of the page_info
570      * array. Mappings occur at the priv of the caller.
571      * Quarantined PCI devices will be associated with this domain.
572      */
573     dom_io = domain_create(DOMID_IO, NULL, false);
574     if ( IS_ERR(dom_io) )
575         panic("Failed to create d[IO]: %ld\n", PTR_ERR(dom_io));
576 
577 #ifdef CONFIG_MEM_SHARING
578     /*
579      * Initialise our COW domain.
580      * This domain owns sharable pages.
581      */
582     dom_cow = domain_create(DOMID_COW, NULL, false);
583     if ( IS_ERR(dom_cow) )
584         panic("Failed to create d[COW]: %ld\n", PTR_ERR(dom_cow));
585 #endif
586 }
587 
domain_set_node_affinity(struct domain * d,const nodemask_t * affinity)588 int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
589 {
590     /* Being disjoint with the system is just wrong. */
591     if ( !nodes_intersects(*affinity, node_online_map) )
592         return -EINVAL;
593 
594     spin_lock(&d->node_affinity_lock);
595 
596     /*
597      * Being/becoming explicitly affine to all nodes is not particularly
598      * useful. Let's take it as the `reset node affinity` command.
599      */
600     if ( nodes_full(*affinity) )
601     {
602         d->auto_node_affinity = 1;
603         goto out;
604     }
605 
606     d->auto_node_affinity = 0;
607     d->node_affinity = *affinity;
608 
609 out:
610     spin_unlock(&d->node_affinity_lock);
611 
612     domain_update_node_affinity(d);
613 
614     return 0;
615 }
616 
617 
get_domain_by_id(domid_t dom)618 struct domain *get_domain_by_id(domid_t dom)
619 {
620     struct domain *d;
621 
622     rcu_read_lock(&domlist_read_lock);
623 
624     for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
625           d != NULL;
626           d = rcu_dereference(d->next_in_hashbucket) )
627     {
628         if ( d->domain_id == dom )
629         {
630             if ( unlikely(!get_domain(d)) )
631                 d = NULL;
632             break;
633         }
634     }
635 
636     rcu_read_unlock(&domlist_read_lock);
637 
638     return d;
639 }
640 
641 
rcu_lock_domain_by_id(domid_t dom)642 struct domain *rcu_lock_domain_by_id(domid_t dom)
643 {
644     struct domain *d = NULL;
645 
646     rcu_read_lock(&domlist_read_lock);
647 
648     for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
649           d != NULL;
650           d = rcu_dereference(d->next_in_hashbucket) )
651     {
652         if ( d->domain_id == dom )
653         {
654             rcu_lock_domain(d);
655             break;
656         }
657     }
658 
659     rcu_read_unlock(&domlist_read_lock);
660 
661     return d;
662 }
663 
rcu_lock_domain_by_any_id(domid_t dom)664 struct domain *rcu_lock_domain_by_any_id(domid_t dom)
665 {
666     if ( dom == DOMID_SELF )
667         return rcu_lock_current_domain();
668     return rcu_lock_domain_by_id(dom);
669 }
670 
rcu_lock_remote_domain_by_id(domid_t dom,struct domain ** d)671 int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d)
672 {
673     if ( (*d = rcu_lock_domain_by_id(dom)) == NULL )
674         return -ESRCH;
675 
676     if ( *d == current->domain )
677     {
678         rcu_unlock_domain(*d);
679         return -EPERM;
680     }
681 
682     return 0;
683 }
684 
rcu_lock_live_remote_domain_by_id(domid_t dom,struct domain ** d)685 int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
686 {
687     int rv;
688     rv = rcu_lock_remote_domain_by_id(dom, d);
689     if ( rv )
690         return rv;
691     if ( (*d)->is_dying )
692     {
693         rcu_unlock_domain(*d);
694         return -EINVAL;
695     }
696 
697     return 0;
698 }
699 
domain_kill(struct domain * d)700 int domain_kill(struct domain *d)
701 {
702     int rc = 0;
703     struct vcpu *v;
704 
705     if ( d == current->domain )
706         return -EINVAL;
707 
708     /* Protected by d->domain_lock. */
709     switch ( d->is_dying )
710     {
711     case DOMDYING_alive:
712         domain_unlock(d);
713         domain_pause(d);
714         domain_lock(d);
715         /*
716          * With the domain lock dropped, d->is_dying may have changed. Call
717          * ourselves recursively if so, which is safe as then we won't come
718          * back here.
719          */
720         if ( d->is_dying != DOMDYING_alive )
721             return domain_kill(d);
722         d->is_dying = DOMDYING_dying;
723         argo_destroy(d);
724         gnttab_release_mappings(d);
725         vnuma_destroy(d->vnuma);
726         domain_set_outstanding_pages(d, 0);
727         /* fallthrough */
728     case DOMDYING_dying:
729         rc = evtchn_destroy(d);
730         if ( rc )
731             break;
732         rc = domain_relinquish_resources(d);
733         if ( rc != 0 )
734             break;
735         if ( cpupool_move_domain(d, cpupool0) )
736             return -ERESTART;
737         for_each_vcpu ( d, v )
738             unmap_vcpu_info(v);
739         d->is_dying = DOMDYING_dead;
740         /* Mem event cleanup has to go here because the rings
741          * have to be put before we call put_domain. */
742         vm_event_cleanup(d);
743         put_domain(d);
744         send_global_virq(VIRQ_DOM_EXC);
745         /* fallthrough */
746     case DOMDYING_dead:
747         break;
748     }
749 
750     return rc;
751 }
752 
753 
__domain_crash(struct domain * d)754 void __domain_crash(struct domain *d)
755 {
756     if ( d->is_shutting_down )
757     {
758         /* Print nothing: the domain is already shutting down. */
759     }
760     else if ( d == current->domain )
761     {
762         printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
763                d->domain_id, current->vcpu_id, smp_processor_id());
764         show_execution_state(guest_cpu_user_regs());
765     }
766     else
767     {
768         printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
769                d->domain_id, current->domain->domain_id, smp_processor_id());
770     }
771 
772     domain_shutdown(d, SHUTDOWN_crash);
773 }
774 
775 
domain_shutdown(struct domain * d,u8 reason)776 int domain_shutdown(struct domain *d, u8 reason)
777 {
778     struct vcpu *v;
779 
780 #ifdef CONFIG_X86
781     if ( pv_shim )
782         return pv_shim_shutdown(reason);
783 #endif
784 
785     spin_lock(&d->shutdown_lock);
786 
787     if ( d->shutdown_code == SHUTDOWN_CODE_INVALID )
788         d->shutdown_code = reason;
789     reason = d->shutdown_code;
790 
791     if ( is_hardware_domain(d) )
792         hwdom_shutdown(reason);
793 
794     if ( d->is_shutting_down )
795     {
796         spin_unlock(&d->shutdown_lock);
797         return 0;
798     }
799 
800     d->is_shutting_down = 1;
801 
802     smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
803 
804     for_each_vcpu ( d, v )
805     {
806         if ( reason == SHUTDOWN_crash )
807             v->defer_shutdown = 0;
808         else if ( v->defer_shutdown )
809             continue;
810         vcpu_pause_nosync(v);
811         v->paused_for_shutdown = 1;
812     }
813 
814     arch_domain_shutdown(d);
815 
816     __domain_finalise_shutdown(d);
817 
818     spin_unlock(&d->shutdown_lock);
819 
820     return 0;
821 }
822 
domain_resume(struct domain * d)823 void domain_resume(struct domain *d)
824 {
825     struct vcpu *v;
826 
827     /*
828      * Some code paths assume that shutdown status does not get reset under
829      * their feet (e.g., some assertions make this assumption).
830      */
831     domain_pause(d);
832 
833     spin_lock(&d->shutdown_lock);
834 
835     d->is_shutting_down = d->is_shut_down = 0;
836     d->shutdown_code = SHUTDOWN_CODE_INVALID;
837 
838     for_each_vcpu ( d, v )
839     {
840         if ( v->paused_for_shutdown )
841             vcpu_unpause(v);
842         v->paused_for_shutdown = 0;
843     }
844 
845     spin_unlock(&d->shutdown_lock);
846 
847     domain_unpause(d);
848 }
849 
vcpu_start_shutdown_deferral(struct vcpu * v)850 int vcpu_start_shutdown_deferral(struct vcpu *v)
851 {
852     if ( v->defer_shutdown )
853         return 1;
854 
855     v->defer_shutdown = 1;
856     smp_mb(); /* set deferral status /then/ check for shutdown */
857     if ( unlikely(v->domain->is_shutting_down) )
858         vcpu_check_shutdown(v);
859 
860     return v->defer_shutdown;
861 }
862 
vcpu_end_shutdown_deferral(struct vcpu * v)863 void vcpu_end_shutdown_deferral(struct vcpu *v)
864 {
865     v->defer_shutdown = 0;
866     smp_mb(); /* clear deferral status /then/ check for shutdown */
867     if ( unlikely(v->domain->is_shutting_down) )
868         vcpu_check_shutdown(v);
869 }
870 
871 /* Complete domain destroy after RCU readers are not holding old references. */
complete_domain_destroy(struct rcu_head * head)872 static void complete_domain_destroy(struct rcu_head *head)
873 {
874     struct domain *d = container_of(head, struct domain, rcu);
875     struct vcpu *v;
876     int i;
877 
878     /*
879      * Flush all state for the vCPU previously having run on the current CPU.
880      * This is in particular relevant for x86 HVM ones on VMX, so that this
881      * flushing of state won't happen from the TLB flush IPI handler behind
882      * the back of a vmx_vmcs_enter() / vmx_vmcs_exit() section.
883      */
884     sync_local_execstate();
885 
886     for ( i = d->max_vcpus - 1; i >= 0; i-- )
887     {
888         if ( (v = d->vcpu[i]) == NULL )
889             continue;
890         tasklet_kill(&v->continue_hypercall_tasklet);
891         arch_vcpu_destroy(v);
892         sched_destroy_vcpu(v);
893         destroy_waitqueue_vcpu(v);
894     }
895 
896     grant_table_destroy(d);
897 
898     arch_domain_destroy(d);
899 
900     watchdog_domain_destroy(d);
901 
902     sched_destroy_domain(d);
903 
904     /* Free page used by xen oprofile buffer. */
905 #ifdef CONFIG_XENOPROF
906     free_xenoprof_pages(d);
907 #endif
908 
909 #ifdef CONFIG_HAS_MEM_PAGING
910     xfree(d->vm_event_paging);
911 #endif
912     xfree(d->vm_event_monitor);
913 #ifdef CONFIG_MEM_SHARING
914     xfree(d->vm_event_share);
915 #endif
916 
917     for ( i = d->max_vcpus - 1; i >= 0; i-- )
918         if ( (v = d->vcpu[i]) != NULL )
919             vcpu_destroy(v);
920 
921     if ( d->target != NULL )
922         put_domain(d->target);
923 
924     evtchn_destroy_final(d);
925 
926     radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
927 
928     xfree(d->vcpu);
929 
930     _domain_destroy(d);
931 
932     send_global_virq(VIRQ_DOM_EXC);
933 }
934 
935 /* Release resources belonging to task @p. */
domain_destroy(struct domain * d)936 void domain_destroy(struct domain *d)
937 {
938     struct domain **pd;
939 
940     BUG_ON(!d->is_dying);
941 
942     /* May be already destroyed, or get_domain() can race us. */
943     if ( atomic_cmpxchg(&d->refcnt, 0, DOMAIN_DESTROYED) != 0 )
944         return;
945 
946     TRACE_1D(TRC_DOM0_DOM_REM, d->domain_id);
947 
948     /* Delete from task list and task hashtable. */
949     spin_lock(&domlist_update_lock);
950     pd = &domain_list;
951     while ( *pd != d )
952         pd = &(*pd)->next_in_list;
953     rcu_assign_pointer(*pd, d->next_in_list);
954     pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
955     while ( *pd != d )
956         pd = &(*pd)->next_in_hashbucket;
957     rcu_assign_pointer(*pd, d->next_in_hashbucket);
958     spin_unlock(&domlist_update_lock);
959 
960     /* Schedule RCU asynchronous completion of domain destroy. */
961     call_rcu(&d->rcu, complete_domain_destroy);
962 }
963 
vcpu_pause(struct vcpu * v)964 void vcpu_pause(struct vcpu *v)
965 {
966     ASSERT(v != current);
967     atomic_inc(&v->pause_count);
968     vcpu_sleep_sync(v);
969 }
970 
vcpu_pause_nosync(struct vcpu * v)971 void vcpu_pause_nosync(struct vcpu *v)
972 {
973     atomic_inc(&v->pause_count);
974     vcpu_sleep_nosync(v);
975 }
976 
vcpu_unpause(struct vcpu * v)977 void vcpu_unpause(struct vcpu *v)
978 {
979     if ( atomic_dec_and_test(&v->pause_count) )
980         vcpu_wake(v);
981 }
982 
vcpu_pause_by_systemcontroller(struct vcpu * v)983 int vcpu_pause_by_systemcontroller(struct vcpu *v)
984 {
985     int old, new, prev = v->controller_pause_count;
986 
987     do
988     {
989         old = prev;
990         new = old + 1;
991 
992         if ( new > 255 )
993             return -EOVERFLOW;
994 
995         prev = cmpxchg(&v->controller_pause_count, old, new);
996     } while ( prev != old );
997 
998     vcpu_pause(v);
999 
1000     return 0;
1001 }
1002 
vcpu_unpause_by_systemcontroller(struct vcpu * v)1003 int vcpu_unpause_by_systemcontroller(struct vcpu *v)
1004 {
1005     int old, new, prev = v->controller_pause_count;
1006 
1007     do
1008     {
1009         old = prev;
1010         new = old - 1;
1011 
1012         if ( new < 0 )
1013             return -EINVAL;
1014 
1015         prev = cmpxchg(&v->controller_pause_count, old, new);
1016     } while ( prev != old );
1017 
1018     vcpu_unpause(v);
1019 
1020     return 0;
1021 }
1022 
do_domain_pause(struct domain * d,void (* sleep_fn)(struct vcpu * v))1023 static void do_domain_pause(struct domain *d,
1024                             void (*sleep_fn)(struct vcpu *v))
1025 {
1026     struct vcpu *v;
1027 
1028     atomic_inc(&d->pause_count);
1029 
1030     for_each_vcpu( d, v )
1031         sleep_fn(v);
1032 
1033     arch_domain_pause(d);
1034 }
1035 
domain_pause(struct domain * d)1036 void domain_pause(struct domain *d)
1037 {
1038     ASSERT(d != current->domain);
1039     do_domain_pause(d, vcpu_sleep_sync);
1040 }
1041 
domain_pause_nosync(struct domain * d)1042 void domain_pause_nosync(struct domain *d)
1043 {
1044     do_domain_pause(d, vcpu_sleep_nosync);
1045 }
1046 
domain_unpause(struct domain * d)1047 void domain_unpause(struct domain *d)
1048 {
1049     struct vcpu *v;
1050 
1051     arch_domain_unpause(d);
1052 
1053     if ( atomic_dec_and_test(&d->pause_count) )
1054         for_each_vcpu( d, v )
1055             vcpu_wake(v);
1056 }
1057 
__domain_pause_by_systemcontroller(struct domain * d,void (* pause_fn)(struct domain * d))1058 int __domain_pause_by_systemcontroller(struct domain *d,
1059                                        void (*pause_fn)(struct domain *d))
1060 {
1061     int old, new, prev = d->controller_pause_count;
1062 
1063     do
1064     {
1065         old = prev;
1066         new = old + 1;
1067 
1068         /*
1069          * Limit the toolstack pause count to an arbitrary 255 to prevent the
1070          * toolstack overflowing d->pause_count with many repeated hypercalls.
1071          */
1072         if ( new > 255 )
1073             return -EOVERFLOW;
1074 
1075         prev = cmpxchg(&d->controller_pause_count, old, new);
1076     } while ( prev != old );
1077 
1078     pause_fn(d);
1079 
1080     return 0;
1081 }
1082 
domain_unpause_by_systemcontroller(struct domain * d)1083 int domain_unpause_by_systemcontroller(struct domain *d)
1084 {
1085     int old, new, prev = d->controller_pause_count;
1086 
1087     do
1088     {
1089         old = prev;
1090         new = old - 1;
1091 
1092         if ( new < 0 )
1093             return -EINVAL;
1094 
1095         prev = cmpxchg(&d->controller_pause_count, old, new);
1096     } while ( prev != old );
1097 
1098     /*
1099      * d->controller_pause_count is initialised to 1, and the toolstack is
1100      * responsible for making one unpause hypercall when it wishes the guest
1101      * to start running.
1102      *
1103      * All other toolstack operations should make a pair of pause/unpause
1104      * calls and rely on the reference counting here.
1105      *
1106      * Creation is considered finished when the controller reference count
1107      * first drops to 0.
1108      */
1109     if ( new == 0 && !d->creation_finished )
1110     {
1111         d->creation_finished = true;
1112         arch_domain_creation_finished(d);
1113     }
1114 
1115     domain_unpause(d);
1116 
1117     return 0;
1118 }
1119 
domain_pause_except_self(struct domain * d)1120 int domain_pause_except_self(struct domain *d)
1121 {
1122     struct vcpu *v, *curr = current;
1123 
1124     if ( curr->domain == d )
1125     {
1126         /* Avoid racing with other vcpus which may want to be pausing us */
1127         if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
1128             return -ERESTART;
1129         for_each_vcpu( d, v )
1130             if ( likely(v != curr) )
1131                 vcpu_pause(v);
1132         spin_unlock(&d->hypercall_deadlock_mutex);
1133     }
1134     else
1135         domain_pause(d);
1136 
1137     return 0;
1138 }
1139 
domain_unpause_except_self(struct domain * d)1140 void domain_unpause_except_self(struct domain *d)
1141 {
1142     struct vcpu *v, *curr = current;
1143 
1144     if ( curr->domain == d )
1145     {
1146         for_each_vcpu( d, v )
1147             if ( likely(v != curr) )
1148                 vcpu_unpause(v);
1149     }
1150     else
1151         domain_unpause(d);
1152 }
1153 
domain_soft_reset(struct domain * d,bool resuming)1154 int domain_soft_reset(struct domain *d, bool resuming)
1155 {
1156     struct vcpu *v;
1157     int rc;
1158 
1159     spin_lock(&d->shutdown_lock);
1160     for_each_vcpu ( d, v )
1161         if ( !v->paused_for_shutdown )
1162         {
1163             spin_unlock(&d->shutdown_lock);
1164             return -EINVAL;
1165         }
1166     spin_unlock(&d->shutdown_lock);
1167 
1168     rc = evtchn_reset(d, resuming);
1169     if ( rc )
1170         return rc;
1171 
1172     grant_table_warn_active_grants(d);
1173 
1174     argo_soft_reset(d);
1175 
1176     for_each_vcpu ( d, v )
1177     {
1178         set_xen_guest_handle(runstate_guest(v), NULL);
1179         unmap_vcpu_info(v);
1180     }
1181 
1182     rc = arch_domain_soft_reset(d);
1183     if ( !rc )
1184         domain_resume(d);
1185     else
1186         domain_crash(d);
1187 
1188     return rc;
1189 }
1190 
vcpu_reset(struct vcpu * v)1191 int vcpu_reset(struct vcpu *v)
1192 {
1193     struct domain *d = v->domain;
1194     int rc;
1195 
1196     vcpu_pause(v);
1197     domain_lock(d);
1198 
1199     set_bit(_VPF_in_reset, &v->pause_flags);
1200     rc = arch_vcpu_reset(v);
1201     if ( rc )
1202         goto out_unlock;
1203 
1204     set_bit(_VPF_down, &v->pause_flags);
1205 
1206     clear_bit(v->vcpu_id, d->poll_mask);
1207     v->poll_evtchn = 0;
1208 
1209     v->fpu_initialised = 0;
1210     v->fpu_dirtied     = 0;
1211     v->is_initialised  = 0;
1212     if ( v->affinity_broken & VCPU_AFFINITY_OVERRIDE )
1213         vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
1214     if ( v->affinity_broken & VCPU_AFFINITY_WAIT )
1215         vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_WAIT);
1216     clear_bit(_VPF_blocked, &v->pause_flags);
1217     clear_bit(_VPF_in_reset, &v->pause_flags);
1218 
1219  out_unlock:
1220     domain_unlock(v->domain);
1221     vcpu_unpause(v);
1222 
1223     return rc;
1224 }
1225 
1226 /*
1227  * Map a guest page in and point the vcpu_info pointer at it.  This
1228  * makes sure that the vcpu_info is always pointing at a valid piece
1229  * of memory, and it sets a pending event to make sure that a pending
1230  * event doesn't get missed.
1231  */
map_vcpu_info(struct vcpu * v,unsigned long gfn,unsigned offset)1232 int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset)
1233 {
1234     struct domain *d = v->domain;
1235     void *mapping;
1236     vcpu_info_t *new_info;
1237     struct page_info *page;
1238     unsigned int align;
1239 
1240     if ( offset > (PAGE_SIZE - sizeof(vcpu_info_t)) )
1241         return -EINVAL;
1242 
1243 #ifdef CONFIG_COMPAT
1244     if ( has_32bit_shinfo(d) )
1245         align = alignof(new_info->compat);
1246     else
1247 #endif
1248         align = alignof(*new_info);
1249     if ( offset & (align - 1) )
1250         return -EINVAL;
1251 
1252     if ( !mfn_eq(v->vcpu_info_mfn, INVALID_MFN) )
1253         return -EINVAL;
1254 
1255     /* Run this command on yourself or on other offline VCPUS. */
1256     if ( (v != current) && !(v->pause_flags & VPF_down) )
1257         return -EINVAL;
1258 
1259     page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
1260     if ( !page )
1261         return -EINVAL;
1262 
1263     if ( !get_page_type(page, PGT_writable_page) )
1264     {
1265         put_page(page);
1266         return -EINVAL;
1267     }
1268 
1269     mapping = __map_domain_page_global(page);
1270     if ( mapping == NULL )
1271     {
1272         put_page_and_type(page);
1273         return -ENOMEM;
1274     }
1275 
1276     new_info = (vcpu_info_t *)(mapping + offset);
1277 
1278     if ( v->vcpu_info == &dummy_vcpu_info )
1279     {
1280         memset(new_info, 0, sizeof(*new_info));
1281 #ifdef XEN_HAVE_PV_UPCALL_MASK
1282         __vcpu_info(v, new_info, evtchn_upcall_mask) = 1;
1283 #endif
1284     }
1285     else
1286     {
1287         memcpy(new_info, v->vcpu_info, sizeof(*new_info));
1288     }
1289 
1290     v->vcpu_info = new_info;
1291     v->vcpu_info_mfn = page_to_mfn(page);
1292 
1293     /* Set new vcpu_info pointer /before/ setting pending flags. */
1294     smp_wmb();
1295 
1296     /*
1297      * Mark everything as being pending just to make sure nothing gets
1298      * lost.  The domain will get a spurious event, but it can cope.
1299      */
1300 #ifdef CONFIG_COMPAT
1301     if ( !has_32bit_shinfo(d) )
1302         write_atomic(&new_info->native.evtchn_pending_sel, ~0);
1303     else
1304 #endif
1305         write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);
1306     vcpu_mark_events_pending(v);
1307 
1308     return 0;
1309 }
1310 
1311 /*
1312  * Unmap the vcpu info page if the guest decided to place it somewhere
1313  * else. This is used from domain_kill() and domain_soft_reset().
1314  */
unmap_vcpu_info(struct vcpu * v)1315 void unmap_vcpu_info(struct vcpu *v)
1316 {
1317     mfn_t mfn = v->vcpu_info_mfn;
1318 
1319     if ( mfn_eq(mfn, INVALID_MFN) )
1320         return;
1321 
1322     unmap_domain_page_global((void *)
1323                              ((unsigned long)v->vcpu_info & PAGE_MASK));
1324 
1325     vcpu_info_reset(v); /* NB: Clobbers v->vcpu_info_mfn */
1326 
1327     put_page_and_type(mfn_to_page(mfn));
1328 }
1329 
default_initialise_vcpu(struct vcpu * v,XEN_GUEST_HANDLE_PARAM (void)arg)1330 int default_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
1331 {
1332     struct vcpu_guest_context *ctxt;
1333     struct domain *d = v->domain;
1334     int rc;
1335 
1336     if ( (ctxt = alloc_vcpu_guest_context()) == NULL )
1337         return -ENOMEM;
1338 
1339     if ( copy_from_guest(ctxt, arg, 1) )
1340     {
1341         free_vcpu_guest_context(ctxt);
1342         return -EFAULT;
1343     }
1344 
1345     domain_lock(d);
1346     rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt);
1347     domain_unlock(d);
1348 
1349     free_vcpu_guest_context(ctxt);
1350 
1351     return rc;
1352 }
1353 
do_vcpu_op(int cmd,unsigned int vcpuid,XEN_GUEST_HANDLE_PARAM (void)arg)1354 long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
1355 {
1356     struct domain *d = current->domain;
1357     struct vcpu *v;
1358     long rc = 0;
1359 
1360     if ( (v = domain_vcpu(d, vcpuid)) == NULL )
1361         return -ENOENT;
1362 
1363     switch ( cmd )
1364     {
1365     case VCPUOP_initialise:
1366         if ( v->vcpu_info == &dummy_vcpu_info )
1367             return -EINVAL;
1368 
1369         rc = arch_initialise_vcpu(v, arg);
1370         if ( rc == -ERESTART )
1371             rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
1372                                                cmd, vcpuid, arg);
1373 
1374         break;
1375 
1376     case VCPUOP_up:
1377 #ifdef CONFIG_X86
1378         if ( pv_shim )
1379             rc = continue_hypercall_on_cpu(0, pv_shim_cpu_up, v);
1380         else
1381 #endif
1382         {
1383             bool wake = false;
1384 
1385             domain_lock(d);
1386             if ( !v->is_initialised )
1387                 rc = -EINVAL;
1388             else
1389                 wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
1390             domain_unlock(d);
1391             if ( wake )
1392                 vcpu_wake(v);
1393         }
1394 
1395         break;
1396 
1397     case VCPUOP_down:
1398         for_each_vcpu ( d, v )
1399             if ( v->vcpu_id != vcpuid && !test_bit(_VPF_down, &v->pause_flags) )
1400             {
1401                rc = 1;
1402                break;
1403             }
1404 
1405         if ( !rc ) /* Last vcpu going down? */
1406         {
1407             domain_shutdown(d, SHUTDOWN_poweroff);
1408             break;
1409         }
1410 
1411         rc = 0;
1412         v = d->vcpu[vcpuid];
1413 
1414 #ifdef CONFIG_X86
1415         if ( pv_shim )
1416             rc = continue_hypercall_on_cpu(0, pv_shim_cpu_down, v);
1417         else
1418 #endif
1419             if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
1420                 vcpu_sleep_nosync(v);
1421 
1422         break;
1423 
1424     case VCPUOP_is_up:
1425         rc = !(v->pause_flags & VPF_down);
1426         break;
1427 
1428     case VCPUOP_get_runstate_info:
1429     {
1430         struct vcpu_runstate_info runstate;
1431         vcpu_runstate_get(v, &runstate);
1432         if ( copy_to_guest(arg, &runstate, 1) )
1433             rc = -EFAULT;
1434         break;
1435     }
1436 
1437     case VCPUOP_set_periodic_timer:
1438     {
1439         struct vcpu_set_periodic_timer set;
1440 
1441         if ( copy_from_guest(&set, arg, 1) )
1442             return -EFAULT;
1443 
1444         if ( set.period_ns < MILLISECS(1) )
1445             return -EINVAL;
1446 
1447         if ( set.period_ns > STIME_DELTA_MAX )
1448             return -EINVAL;
1449 
1450         vcpu_set_periodic_timer(v, set.period_ns);
1451 
1452         break;
1453     }
1454 
1455     case VCPUOP_stop_periodic_timer:
1456         vcpu_set_periodic_timer(v, 0);
1457         break;
1458 
1459     case VCPUOP_set_singleshot_timer:
1460     {
1461         struct vcpu_set_singleshot_timer set;
1462 
1463         if ( v != current )
1464             return -EINVAL;
1465 
1466         if ( copy_from_guest(&set, arg, 1) )
1467             return -EFAULT;
1468 
1469         if ( (set.flags & VCPU_SSHOTTMR_future) &&
1470              (set.timeout_abs_ns < NOW()) )
1471             return -ETIME;
1472 
1473         migrate_timer(&v->singleshot_timer, smp_processor_id());
1474         set_timer(&v->singleshot_timer, set.timeout_abs_ns);
1475 
1476         break;
1477     }
1478 
1479     case VCPUOP_stop_singleshot_timer:
1480         if ( v != current )
1481             return -EINVAL;
1482 
1483         stop_timer(&v->singleshot_timer);
1484 
1485         break;
1486 
1487     case VCPUOP_register_vcpu_info:
1488     {
1489         struct vcpu_register_vcpu_info info;
1490 
1491         rc = -EFAULT;
1492         if ( copy_from_guest(&info, arg, 1) )
1493             break;
1494 
1495         domain_lock(d);
1496         rc = map_vcpu_info(v, info.mfn, info.offset);
1497         domain_unlock(d);
1498 
1499         break;
1500     }
1501 
1502     case VCPUOP_register_runstate_memory_area:
1503     {
1504         struct vcpu_register_runstate_memory_area area;
1505         struct vcpu_runstate_info runstate;
1506 
1507         rc = -EFAULT;
1508         if ( copy_from_guest(&area, arg, 1) )
1509             break;
1510 
1511         if ( !guest_handle_okay(area.addr.h, 1) )
1512             break;
1513 
1514         rc = 0;
1515         runstate_guest(v) = area.addr.h;
1516 
1517         if ( v == current )
1518         {
1519             __copy_to_guest(runstate_guest(v), &v->runstate, 1);
1520         }
1521         else
1522         {
1523             vcpu_runstate_get(v, &runstate);
1524             __copy_to_guest(runstate_guest(v), &runstate, 1);
1525         }
1526 
1527         break;
1528     }
1529 
1530     default:
1531         rc = arch_do_vcpu_op(cmd, v, arg);
1532         break;
1533     }
1534 
1535     return rc;
1536 }
1537 
1538 #ifdef arch_vm_assist_valid_mask
do_vm_assist(unsigned int cmd,unsigned int type)1539 long do_vm_assist(unsigned int cmd, unsigned int type)
1540 {
1541     struct domain *currd = current->domain;
1542     const unsigned long valid = arch_vm_assist_valid_mask(currd);
1543 
1544     if ( type >= BITS_PER_LONG || !test_bit(type, &valid) )
1545         return -EINVAL;
1546 
1547     switch ( cmd )
1548     {
1549     case VMASST_CMD_enable:
1550         set_bit(type, &currd->vm_assist);
1551         return 0;
1552 
1553     case VMASST_CMD_disable:
1554         clear_bit(type, &currd->vm_assist);
1555         return 0;
1556     }
1557 
1558     return -ENOSYS;
1559 }
1560 #endif
1561 
pirq_get_info(struct domain * d,int pirq)1562 struct pirq *pirq_get_info(struct domain *d, int pirq)
1563 {
1564     struct pirq *info = pirq_info(d, pirq);
1565 
1566     if ( !info && (info = alloc_pirq_struct(d)) != NULL )
1567     {
1568         info->pirq = pirq;
1569         if ( radix_tree_insert(&d->pirq_tree, pirq, info) )
1570         {
1571             free_pirq_struct(info);
1572             info = NULL;
1573         }
1574     }
1575 
1576     return info;
1577 }
1578 
_free_pirq_struct(struct rcu_head * head)1579 static void _free_pirq_struct(struct rcu_head *head)
1580 {
1581     xfree(container_of(head, struct pirq, rcu_head));
1582 }
1583 
free_pirq_struct(void * ptr)1584 void free_pirq_struct(void *ptr)
1585 {
1586     struct pirq *pirq = ptr;
1587 
1588     call_rcu(&pirq->rcu_head, _free_pirq_struct);
1589 }
1590 
1591 struct migrate_info {
1592     long (*func)(void *data);
1593     void *data;
1594     struct vcpu *vcpu;
1595     unsigned int cpu;
1596     unsigned int nest;
1597 };
1598 
1599 static DEFINE_PER_CPU(struct migrate_info *, continue_info);
1600 
continue_hypercall_tasklet_handler(void * data)1601 static void continue_hypercall_tasklet_handler(void *data)
1602 {
1603     struct migrate_info *info = data;
1604     struct vcpu *v = info->vcpu;
1605     long res = -EINVAL;
1606 
1607     /* Wait for vcpu to sleep so that we can access its register state. */
1608     vcpu_sleep_sync(v);
1609 
1610     this_cpu(continue_info) = info;
1611 
1612     if ( likely(info->cpu == smp_processor_id()) )
1613         res = info->func(info->data);
1614 
1615     arch_hypercall_tasklet_result(v, res);
1616 
1617     this_cpu(continue_info) = NULL;
1618 
1619     if ( info->nest-- == 0 )
1620     {
1621         xfree(info);
1622         vcpu_unpause(v);
1623         put_domain(v->domain);
1624     }
1625 }
1626 
continue_hypercall_on_cpu(unsigned int cpu,long (* func)(void * data),void * data)1627 int continue_hypercall_on_cpu(
1628     unsigned int cpu, long (*func)(void *data), void *data)
1629 {
1630     struct migrate_info *info;
1631 
1632     if ( (cpu >= nr_cpu_ids) || !cpu_online(cpu) )
1633         return -EINVAL;
1634 
1635     info = this_cpu(continue_info);
1636     if ( info == NULL )
1637     {
1638         struct vcpu *curr = current;
1639 
1640         info = xmalloc(struct migrate_info);
1641         if ( info == NULL )
1642             return -ENOMEM;
1643 
1644         info->vcpu = curr;
1645         info->nest = 0;
1646 
1647         tasklet_kill(&curr->continue_hypercall_tasklet);
1648         tasklet_init(&curr->continue_hypercall_tasklet,
1649                      continue_hypercall_tasklet_handler, info);
1650 
1651         get_knownalive_domain(curr->domain);
1652         vcpu_pause_nosync(curr);
1653     }
1654     else
1655     {
1656         BUG_ON(info->nest != 0);
1657         info->nest++;
1658     }
1659 
1660     info->func = func;
1661     info->data = data;
1662     info->cpu  = cpu;
1663 
1664     tasklet_schedule_on_cpu(&info->vcpu->continue_hypercall_tasklet, cpu);
1665 
1666     /* Dummy return value will be overwritten by tasklet. */
1667     return 0;
1668 }
1669 
1670 /*
1671  * Local variables:
1672  * mode: C
1673  * c-file-style: "BSD"
1674  * c-basic-offset: 4
1675  * tab-width: 4
1676  * indent-tabs-mode: nil
1677  * End:
1678  */
1679