1 /******************************************************************************
2  * domctl.c
3  *
4  * Domain management operations. For use by node control stack.
5  *
6  * Copyright (c) 2002-2006, K A Fraser
7  */
8 
9 #include <xen/types.h>
10 #include <xen/lib.h>
11 #include <xen/err.h>
12 #include <xen/mm.h>
13 #include <xen/sched.h>
14 #include <xen/domain.h>
15 #include <xen/event.h>
16 #include <xen/grant_table.h>
17 #include <xen/domain_page.h>
18 #include <xen/trace.h>
19 #include <xen/console.h>
20 #include <xen/iocap.h>
21 #include <xen/rcupdate.h>
22 #include <xen/guest_access.h>
23 #include <xen/bitmap.h>
24 #include <xen/paging.h>
25 #include <xen/hypercall.h>
26 #include <xen/vm_event.h>
27 #include <xen/monitor.h>
28 #include <asm/current.h>
29 #include <asm/irq.h>
30 #include <asm/page.h>
31 #include <asm/p2m.h>
32 #include <public/domctl.h>
33 #include <xsm/xsm.h>
34 
35 static DEFINE_SPINLOCK(domctl_lock);
36 
bitmap_to_xenctl_bitmap(struct xenctl_bitmap * xenctl_bitmap,const unsigned long * bitmap,unsigned int nbits)37 static int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
38                                    const unsigned long *bitmap,
39                                    unsigned int nbits)
40 {
41     unsigned int guest_bytes, copy_bytes, i;
42     uint8_t zero = 0;
43     int err = 0;
44     uint8_t *bytemap = xmalloc_array(uint8_t, (nbits + 7) / 8);
45 
46     if ( !bytemap )
47         return -ENOMEM;
48 
49     guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
50     copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
51 
52     bitmap_long_to_byte(bytemap, bitmap, nbits);
53 
54     if ( copy_bytes != 0 )
55         if ( copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
56             err = -EFAULT;
57 
58     for ( i = copy_bytes; !err && i < guest_bytes; i++ )
59         if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) )
60             err = -EFAULT;
61 
62     xfree(bytemap);
63 
64     return err;
65 }
66 
xenctl_bitmap_to_bitmap(unsigned long * bitmap,const struct xenctl_bitmap * xenctl_bitmap,unsigned int nbits)67 int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
68                             const struct xenctl_bitmap *xenctl_bitmap,
69                             unsigned int nbits)
70 {
71     unsigned int guest_bytes, copy_bytes;
72     int err = 0;
73     uint8_t *bytemap = xzalloc_array(uint8_t, (nbits + 7) / 8);
74 
75     if ( !bytemap )
76         return -ENOMEM;
77 
78     guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
79     copy_bytes  = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
80 
81     if ( copy_bytes != 0 )
82     {
83         if ( copy_from_guest(bytemap, xenctl_bitmap->bitmap, copy_bytes) )
84             err = -EFAULT;
85         if ( (xenctl_bitmap->nr_bits & 7) && (guest_bytes == copy_bytes) )
86             bytemap[guest_bytes-1] &= ~(0xff << (xenctl_bitmap->nr_bits & 7));
87     }
88 
89     if ( !err )
90         bitmap_byte_to_long(bitmap, bytemap, nbits);
91 
92     xfree(bytemap);
93 
94     return err;
95 }
96 
cpumask_to_xenctl_bitmap(struct xenctl_bitmap * xenctl_cpumap,const cpumask_t * cpumask)97 int cpumask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_cpumap,
98                              const cpumask_t *cpumask)
99 {
100     return bitmap_to_xenctl_bitmap(xenctl_cpumap, cpumask_bits(cpumask),
101                                    nr_cpu_ids);
102 }
103 
xenctl_bitmap_to_cpumask(cpumask_var_t * cpumask,const struct xenctl_bitmap * xenctl_cpumap)104 int xenctl_bitmap_to_cpumask(cpumask_var_t *cpumask,
105                              const struct xenctl_bitmap *xenctl_cpumap)
106 {
107     int err = 0;
108 
109     if ( alloc_cpumask_var(cpumask) ) {
110         err = xenctl_bitmap_to_bitmap(cpumask_bits(*cpumask), xenctl_cpumap,
111                                       nr_cpu_ids);
112         /* In case of error, cleanup is up to us, as the caller won't care! */
113         if ( err )
114             free_cpumask_var(*cpumask);
115     }
116     else
117         err = -ENOMEM;
118 
119     return err;
120 }
121 
nodemask_to_xenctl_bitmap(struct xenctl_bitmap * xenctl_nodemap,const nodemask_t * nodemask)122 static int nodemask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_nodemap,
123                                      const nodemask_t *nodemask)
124 {
125     return bitmap_to_xenctl_bitmap(xenctl_nodemap, nodemask_bits(nodemask),
126                                    MAX_NUMNODES);
127 }
128 
xenctl_bitmap_to_nodemask(nodemask_t * nodemask,const struct xenctl_bitmap * xenctl_nodemap)129 static int xenctl_bitmap_to_nodemask(nodemask_t *nodemask,
130                                      const struct xenctl_bitmap *xenctl_nodemap)
131 {
132     return xenctl_bitmap_to_bitmap(nodemask_bits(nodemask), xenctl_nodemap,
133                                    MAX_NUMNODES);
134 }
135 
is_free_domid(domid_t dom)136 static inline int is_free_domid(domid_t dom)
137 {
138     struct domain *d;
139 
140     if ( dom >= DOMID_FIRST_RESERVED )
141         return 0;
142 
143     if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
144         return 1;
145 
146     rcu_unlock_domain(d);
147     return 0;
148 }
149 
getdomaininfo(struct domain * d,struct xen_domctl_getdomaininfo * info)150 void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
151 {
152     struct vcpu *v;
153     u64 cpu_time = 0;
154     int flags = XEN_DOMINF_blocked;
155     struct vcpu_runstate_info runstate;
156 
157     info->domain = d->domain_id;
158     info->max_vcpu_id = XEN_INVALID_MAX_VCPU_ID;
159     info->nr_online_vcpus = 0;
160     info->ssidref = 0;
161 
162     /*
163      * - domain is marked as blocked only if all its vcpus are blocked
164      * - domain is marked as running if any of its vcpus is running
165      */
166     for_each_vcpu ( d, v )
167     {
168         vcpu_runstate_get(v, &runstate);
169         cpu_time += runstate.time[RUNSTATE_running];
170         info->max_vcpu_id = v->vcpu_id;
171         if ( !(v->pause_flags & VPF_down) )
172         {
173             if ( !(v->pause_flags & VPF_blocked) )
174                 flags &= ~XEN_DOMINF_blocked;
175             if ( v->is_running )
176                 flags |= XEN_DOMINF_running;
177             info->nr_online_vcpus++;
178         }
179     }
180 
181     info->cpu_time = cpu_time;
182 
183     info->flags = (info->nr_online_vcpus ? flags : 0) |
184         ((d->is_dying == DOMDYING_dead) ? XEN_DOMINF_dying     : 0) |
185         (d->is_shut_down                ? XEN_DOMINF_shutdown  : 0) |
186         (d->controller_pause_count > 0  ? XEN_DOMINF_paused    : 0) |
187         (d->debugger_attached           ? XEN_DOMINF_debugged  : 0) |
188         (is_xenstore_domain(d)          ? XEN_DOMINF_xs_domain : 0) |
189         (is_hvm_domain(d)               ? XEN_DOMINF_hvm_guest : 0) |
190         d->shutdown_code << XEN_DOMINF_shutdownshift;
191 
192     xsm_security_domaininfo(d, info);
193 
194     info->tot_pages         = domain_tot_pages(d);
195     info->max_pages         = d->max_pages;
196     info->outstanding_pages = d->outstanding_pages;
197     info->shr_pages         = atomic_read(&d->shr_pages);
198     info->paged_pages       = atomic_read(&d->paged_pages);
199     info->shared_info_frame = mfn_to_gmfn(d, virt_to_mfn(d->shared_info));
200     BUG_ON(SHARED_M2P(info->shared_info_frame));
201 
202     info->cpupool = cpupool_get_id(d);
203 
204     memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
205 
206     arch_get_domain_info(d, info);
207 }
208 
domctl_lock_acquire(void)209 bool_t domctl_lock_acquire(void)
210 {
211     /*
212      * Caller may try to pause its own VCPUs. We must prevent deadlock
213      * against other non-domctl routines which try to do the same.
214      */
215     if ( !spin_trylock(&current->domain->hypercall_deadlock_mutex) )
216         return 0;
217 
218     /*
219      * Trylock here is paranoia if we have multiple privileged domains. Then
220      * we could have one domain trying to pause another which is spinning
221      * on domctl_lock -- results in deadlock.
222      */
223     if ( spin_trylock(&domctl_lock) )
224         return 1;
225 
226     spin_unlock(&current->domain->hypercall_deadlock_mutex);
227     return 0;
228 }
229 
domctl_lock_release(void)230 void domctl_lock_release(void)
231 {
232     spin_unlock(&domctl_lock);
233     spin_unlock(&current->domain->hypercall_deadlock_mutex);
234 }
235 
vnuma_destroy(struct vnuma_info * vnuma)236 void vnuma_destroy(struct vnuma_info *vnuma)
237 {
238     if ( vnuma )
239     {
240         xfree(vnuma->vmemrange);
241         xfree(vnuma->vcpu_to_vnode);
242         xfree(vnuma->vdistance);
243         xfree(vnuma->vnode_to_pnode);
244         xfree(vnuma);
245     }
246 }
247 
248 /*
249  * Allocates memory for vNUMA, **vnuma should be NULL.
250  * Caller has to make sure that domain has max_pages
251  * and number of vcpus set for domain.
252  * Verifies that single allocation does not exceed
253  * PAGE_SIZE.
254  */
vnuma_alloc(unsigned int nr_vnodes,unsigned int nr_ranges,unsigned int nr_vcpus)255 static struct vnuma_info *vnuma_alloc(unsigned int nr_vnodes,
256                                       unsigned int nr_ranges,
257                                       unsigned int nr_vcpus)
258 {
259 
260     struct vnuma_info *vnuma;
261 
262     /*
263      * Check if any of the allocations are bigger than PAGE_SIZE.
264      * See XSA-77.
265      */
266     if ( nr_vnodes == 0 ||
267          nr_vnodes > (PAGE_SIZE / sizeof(*vnuma->vdistance) / nr_vnodes) ||
268          nr_ranges > (PAGE_SIZE / sizeof(*vnuma->vmemrange)) )
269         return ERR_PTR(-EINVAL);
270 
271     /*
272      * If allocations become larger then PAGE_SIZE, these allocations
273      * should be split into PAGE_SIZE allocations due to XSA-77.
274      */
275     vnuma = xmalloc(struct vnuma_info);
276     if ( !vnuma )
277         return ERR_PTR(-ENOMEM);
278 
279     vnuma->vdistance = xmalloc_array(unsigned int, nr_vnodes * nr_vnodes);
280     vnuma->vcpu_to_vnode = xmalloc_array(unsigned int, nr_vcpus);
281     vnuma->vnode_to_pnode = xmalloc_array(nodeid_t, nr_vnodes);
282     vnuma->vmemrange = xmalloc_array(xen_vmemrange_t, nr_ranges);
283 
284     if ( vnuma->vdistance == NULL || vnuma->vmemrange == NULL ||
285          vnuma->vcpu_to_vnode == NULL || vnuma->vnode_to_pnode == NULL )
286     {
287         vnuma_destroy(vnuma);
288         return ERR_PTR(-ENOMEM);
289     }
290 
291     return vnuma;
292 }
293 
294 /*
295  * Construct vNUMA topology form uinfo.
296  */
vnuma_init(const struct xen_domctl_vnuma * uinfo,const struct domain * d)297 static struct vnuma_info *vnuma_init(const struct xen_domctl_vnuma *uinfo,
298                                      const struct domain *d)
299 {
300     unsigned int i, nr_vnodes;
301     int ret = -EINVAL;
302     struct vnuma_info *info;
303 
304     nr_vnodes = uinfo->nr_vnodes;
305 
306     if ( uinfo->nr_vcpus != d->max_vcpus || uinfo->pad != 0 )
307         return ERR_PTR(ret);
308 
309     info = vnuma_alloc(nr_vnodes, uinfo->nr_vmemranges, d->max_vcpus);
310     if ( IS_ERR(info) )
311         return info;
312 
313     ret = -EFAULT;
314 
315     if ( copy_from_guest(info->vdistance, uinfo->vdistance,
316                          nr_vnodes * nr_vnodes) )
317         goto vnuma_fail;
318 
319     if ( copy_from_guest(info->vmemrange, uinfo->vmemrange,
320                          uinfo->nr_vmemranges) )
321         goto vnuma_fail;
322 
323     if ( copy_from_guest(info->vcpu_to_vnode, uinfo->vcpu_to_vnode,
324                          d->max_vcpus) )
325         goto vnuma_fail;
326 
327     ret = -E2BIG;
328     for ( i = 0; i < d->max_vcpus; ++i )
329         if ( info->vcpu_to_vnode[i] >= nr_vnodes )
330             goto vnuma_fail;
331 
332     for ( i = 0; i < nr_vnodes; ++i )
333     {
334         unsigned int pnode;
335 
336         ret = -EFAULT;
337         if ( copy_from_guest_offset(&pnode, uinfo->vnode_to_pnode, i, 1) )
338             goto vnuma_fail;
339         ret = -E2BIG;
340         if ( pnode >= MAX_NUMNODES )
341             goto vnuma_fail;
342         info->vnode_to_pnode[i] = pnode;
343     }
344 
345     info->nr_vnodes = nr_vnodes;
346     info->nr_vmemranges = uinfo->nr_vmemranges;
347 
348     /* Check that vmemranges flags are zero. */
349     ret = -EINVAL;
350     for ( i = 0; i < info->nr_vmemranges; i++ )
351         if ( info->vmemrange[i].flags != 0 )
352             goto vnuma_fail;
353 
354     return info;
355 
356  vnuma_fail:
357     vnuma_destroy(info);
358     return ERR_PTR(ret);
359 }
360 
do_domctl(XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)361 long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
362 {
363     long ret = 0;
364     bool_t copyback = 0;
365     struct xen_domctl curop, *op = &curop;
366     struct domain *d;
367 
368     if ( copy_from_guest(op, u_domctl, 1) )
369         return -EFAULT;
370 
371     if ( op->interface_version != XEN_DOMCTL_INTERFACE_VERSION )
372         return -EACCES;
373 
374     switch ( op->cmd )
375     {
376     case XEN_DOMCTL_assign_device:
377     case XEN_DOMCTL_deassign_device:
378         if ( op->domain == DOMID_IO )
379         {
380             d = dom_io;
381             break;
382         }
383         else if ( op->domain == DOMID_INVALID )
384             return -ESRCH;
385         /* fall through */
386     case XEN_DOMCTL_test_assign_device:
387     case XEN_DOMCTL_vm_event_op:
388         if ( op->domain == DOMID_INVALID )
389         {
390     case XEN_DOMCTL_createdomain:
391     case XEN_DOMCTL_gdbsx_guestmemio:
392             d = NULL;
393             break;
394         }
395         /* fall through */
396     default:
397         d = rcu_lock_domain_by_id(op->domain);
398         if ( !d && op->cmd != XEN_DOMCTL_getdomaininfo )
399             return -ESRCH;
400     }
401 
402     ret = xsm_domctl(XSM_OTHER, d, op->cmd);
403     if ( ret )
404         goto domctl_out_unlock_domonly;
405 
406     if ( !domctl_lock_acquire() )
407     {
408         if ( d && d != dom_io )
409             rcu_unlock_domain(d);
410         return hypercall_create_continuation(
411             __HYPERVISOR_domctl, "h", u_domctl);
412     }
413 
414     switch ( op->cmd )
415     {
416 
417     case XEN_DOMCTL_setvcpucontext:
418     {
419         vcpu_guest_context_u c = { .nat = NULL };
420         unsigned int vcpu = op->u.vcpucontext.vcpu;
421         struct vcpu *v;
422 
423         ret = -EINVAL;
424         if ( (d == current->domain) || /* no domain_pause() */
425              (vcpu >= d->max_vcpus) || ((v = d->vcpu[vcpu]) == NULL) )
426             break;
427 
428         if ( guest_handle_is_null(op->u.vcpucontext.ctxt) )
429         {
430             ret = vcpu_reset(v);
431             if ( ret == -ERESTART )
432                 ret = hypercall_create_continuation(
433                           __HYPERVISOR_domctl, "h", u_domctl);
434             break;
435         }
436 
437 #ifdef CONFIG_COMPAT
438         BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
439                      < sizeof(struct compat_vcpu_guest_context));
440 #endif
441         ret = -ENOMEM;
442         if ( (c.nat = alloc_vcpu_guest_context()) == NULL )
443             break;
444 
445 #ifdef CONFIG_COMPAT
446         if ( !is_pv_32bit_domain(d) )
447             ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
448         else
449             ret = copy_from_guest(c.cmp,
450                                   guest_handle_cast(op->u.vcpucontext.ctxt,
451                                                     void), 1);
452 #else
453         ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
454 #endif
455         ret = ret ? -EFAULT : 0;
456 
457         if ( ret == 0 )
458         {
459             domain_pause(d);
460             ret = arch_set_info_guest(v, c);
461             domain_unpause(d);
462 
463             if ( ret == -ERESTART )
464                 ret = hypercall_create_continuation(
465                           __HYPERVISOR_domctl, "h", u_domctl);
466         }
467 
468         free_vcpu_guest_context(c.nat);
469         break;
470     }
471 
472     case XEN_DOMCTL_pausedomain:
473         ret = -EINVAL;
474         if ( d != current->domain )
475             ret = domain_pause_by_systemcontroller(d);
476         break;
477 
478     case XEN_DOMCTL_unpausedomain:
479         ret = domain_unpause_by_systemcontroller(d);
480         break;
481 
482     case XEN_DOMCTL_resumedomain:
483         if ( d == current->domain ) /* no domain_pause() */
484             ret = -EINVAL;
485         else
486             domain_resume(d);
487         break;
488 
489     case XEN_DOMCTL_createdomain:
490     {
491         domid_t        dom;
492         static domid_t rover = 0;
493 
494         dom = op->domain;
495         if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
496         {
497             ret = -EEXIST;
498             if ( !is_free_domid(dom) )
499                 break;
500         }
501         else
502         {
503             for ( dom = rover + 1; dom != rover; dom++ )
504             {
505                 if ( dom == DOMID_FIRST_RESERVED )
506                     dom = 1;
507                 if ( is_free_domid(dom) )
508                     break;
509             }
510 
511             ret = -ENOMEM;
512             if ( dom == rover )
513                 break;
514 
515             rover = dom;
516         }
517 
518         d = domain_create(dom, &op->u.createdomain, false);
519         if ( IS_ERR(d) )
520         {
521             ret = PTR_ERR(d);
522             d = NULL;
523             break;
524         }
525 
526         ret = 0;
527         op->domain = d->domain_id;
528         copyback = 1;
529         d = NULL;
530         break;
531     }
532 
533     case XEN_DOMCTL_max_vcpus:
534     {
535         unsigned int i, max = op->u.max_vcpus.max;
536 
537         ret = -EINVAL;
538         if ( (d == current->domain) || /* no domain_pause() */
539              (max != d->max_vcpus) )   /* max_vcpus set up in createdomain */
540             break;
541 
542         /* Needed, for example, to ensure writable p.t. state is synced. */
543         domain_pause(d);
544 
545         ret = -ENOMEM;
546 
547         for ( i = 0; i < max; i++ )
548         {
549             if ( d->vcpu[i] != NULL )
550                 continue;
551 
552             if ( vcpu_create(d, i) == NULL )
553                 goto maxvcpu_out;
554         }
555 
556         domain_update_node_affinity(d);
557         ret = 0;
558 
559     maxvcpu_out:
560         domain_unpause(d);
561         break;
562     }
563 
564     case XEN_DOMCTL_soft_reset:
565     case XEN_DOMCTL_soft_reset_cont:
566         if ( d == current->domain ) /* no domain_pause() */
567         {
568             ret = -EINVAL;
569             break;
570         }
571         ret = domain_soft_reset(d, op->cmd == XEN_DOMCTL_soft_reset_cont);
572         if ( ret == -ERESTART )
573         {
574             op->cmd = XEN_DOMCTL_soft_reset_cont;
575             if ( !__copy_field_to_guest(u_domctl, op, cmd) )
576                 ret = hypercall_create_continuation(__HYPERVISOR_domctl,
577                                                     "h", u_domctl);
578             else
579                 ret = -EFAULT;
580         }
581         break;
582 
583     case XEN_DOMCTL_destroydomain:
584         domctl_lock_release();
585         domain_lock(d);
586         ret = domain_kill(d);
587         domain_unlock(d);
588         if ( ret == -ERESTART )
589             ret = hypercall_create_continuation(
590                 __HYPERVISOR_domctl, "h", u_domctl);
591         goto domctl_out_unlock_domonly;
592 
593     case XEN_DOMCTL_setnodeaffinity:
594     {
595         nodemask_t new_affinity;
596 
597         ret = xenctl_bitmap_to_nodemask(&new_affinity,
598                                         &op->u.nodeaffinity.nodemap);
599         if ( !ret )
600             ret = domain_set_node_affinity(d, &new_affinity);
601         break;
602     }
603 
604     case XEN_DOMCTL_getnodeaffinity:
605         ret = nodemask_to_xenctl_bitmap(&op->u.nodeaffinity.nodemap,
606                                         &d->node_affinity);
607         break;
608 
609     case XEN_DOMCTL_setvcpuaffinity:
610     case XEN_DOMCTL_getvcpuaffinity:
611         ret = vcpu_affinity_domctl(d, op->cmd, &op->u.vcpuaffinity);
612         break;
613 
614     case XEN_DOMCTL_scheduler_op:
615         ret = sched_adjust(d, &op->u.scheduler_op);
616         copyback = 1;
617         break;
618 
619     case XEN_DOMCTL_getdomaininfo:
620     {
621         domid_t dom = DOMID_INVALID;
622 
623         if ( !d )
624         {
625             ret = -EINVAL;
626             if ( op->domain >= DOMID_FIRST_RESERVED )
627                 break;
628 
629             rcu_read_lock(&domlist_read_lock);
630 
631             dom = op->domain;
632             for_each_domain ( d )
633                 if ( d->domain_id >= dom )
634                     break;
635         }
636 
637         ret = -ESRCH;
638         if ( d == NULL )
639             goto getdomaininfo_out;
640 
641         ret = xsm_getdomaininfo(XSM_HOOK, d);
642         if ( ret )
643             goto getdomaininfo_out;
644 
645         getdomaininfo(d, &op->u.getdomaininfo);
646 
647         op->domain = op->u.getdomaininfo.domain;
648         copyback = 1;
649 
650     getdomaininfo_out:
651         /* When d was non-NULL upon entry, no cleanup is needed. */
652         if ( dom == DOMID_INVALID )
653             break;
654 
655         rcu_read_unlock(&domlist_read_lock);
656         d = NULL;
657         break;
658     }
659 
660     case XEN_DOMCTL_getvcpucontext:
661     {
662         vcpu_guest_context_u c = { .nat = NULL };
663         struct vcpu         *v;
664 
665         ret = -EINVAL;
666         if ( op->u.vcpucontext.vcpu >= d->max_vcpus ||
667              (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL ||
668              v == current ) /* no vcpu_pause() */
669             goto getvcpucontext_out;
670 
671         ret = -ENODATA;
672         if ( !v->is_initialised )
673             goto getvcpucontext_out;
674 
675 #ifdef CONFIG_COMPAT
676         BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
677                      < sizeof(struct compat_vcpu_guest_context));
678 #endif
679         ret = -ENOMEM;
680         if ( (c.nat = xzalloc(struct vcpu_guest_context)) == NULL )
681             goto getvcpucontext_out;
682 
683         vcpu_pause(v);
684 
685         arch_get_info_guest(v, c);
686         ret = 0;
687 
688         vcpu_unpause(v);
689 
690 #ifdef CONFIG_COMPAT
691         if ( !is_pv_32bit_domain(d) )
692             ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
693         else
694             ret = copy_to_guest(guest_handle_cast(op->u.vcpucontext.ctxt,
695                                                   void), c.cmp, 1);
696 #else
697         ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
698 #endif
699 
700         if ( ret )
701             ret = -EFAULT;
702         copyback = 1;
703 
704     getvcpucontext_out:
705         xfree(c.nat);
706         break;
707     }
708 
709     case XEN_DOMCTL_getvcpuinfo:
710     {
711         struct vcpu   *v;
712         struct vcpu_runstate_info runstate;
713 
714         ret = -EINVAL;
715         if ( op->u.getvcpuinfo.vcpu >= d->max_vcpus )
716             break;
717 
718         ret = -ESRCH;
719         if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL )
720             break;
721 
722         vcpu_runstate_get(v, &runstate);
723 
724         op->u.getvcpuinfo.online   = !(v->pause_flags & VPF_down);
725         op->u.getvcpuinfo.blocked  = !!(v->pause_flags & VPF_blocked);
726         op->u.getvcpuinfo.running  = v->is_running;
727         op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running];
728         op->u.getvcpuinfo.cpu      = v->processor;
729         ret = 0;
730         copyback = 1;
731         break;
732     }
733 
734     case XEN_DOMCTL_max_mem:
735     {
736         uint64_t new_max = op->u.max_mem.max_memkb >> (PAGE_SHIFT - 10);
737 
738         spin_lock(&d->page_alloc_lock);
739         /*
740          * NB. We removed a check that new_max >= current tot_pages; this means
741          * that the domain will now be allowed to "ratchet" down to new_max. In
742          * the meantime, while tot > max, all new allocations are disallowed.
743          */
744         d->max_pages = min(new_max, (uint64_t)(typeof(d->max_pages))-1);
745         spin_unlock(&d->page_alloc_lock);
746         break;
747     }
748 
749     case XEN_DOMCTL_setdomainhandle:
750         memcpy(d->handle, op->u.setdomainhandle.handle,
751                sizeof(xen_domain_handle_t));
752         break;
753 
754     case XEN_DOMCTL_setdebugging:
755         if ( unlikely(d == current->domain) ) /* no domain_pause() */
756             ret = -EINVAL;
757         else
758         {
759             domain_pause(d);
760             d->debugger_attached = !!op->u.setdebugging.enable;
761             domain_unpause(d); /* causes guest to latch new status */
762         }
763         break;
764 
765     case XEN_DOMCTL_irq_permission:
766     {
767         unsigned int pirq = op->u.irq_permission.pirq, irq;
768         int allow = op->u.irq_permission.allow_access;
769 
770         if ( pirq >= current->domain->nr_pirqs )
771         {
772             ret = -EINVAL;
773             break;
774         }
775         irq = pirq_access_permitted(current->domain, pirq);
776         if ( !irq || xsm_irq_permission(XSM_HOOK, d, irq, allow) )
777             ret = -EPERM;
778         else if ( allow )
779             ret = irq_permit_access(d, irq);
780         else
781             ret = irq_deny_access(d, irq);
782         break;
783     }
784 
785     case XEN_DOMCTL_iomem_permission:
786     {
787         unsigned long mfn = op->u.iomem_permission.first_mfn;
788         unsigned long nr_mfns = op->u.iomem_permission.nr_mfns;
789         int allow = op->u.iomem_permission.allow_access;
790 
791         ret = -EINVAL;
792         if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */
793             break;
794 
795         if ( !iomem_access_permitted(current->domain,
796                                      mfn, mfn + nr_mfns - 1) ||
797              xsm_iomem_permission(XSM_HOOK, d, mfn, mfn + nr_mfns - 1, allow) )
798             ret = -EPERM;
799         else if ( allow )
800             ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
801         else
802             ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
803         if ( !ret )
804             memory_type_changed(d);
805         break;
806     }
807 
808     case XEN_DOMCTL_memory_mapping:
809     {
810         unsigned long gfn = op->u.memory_mapping.first_gfn;
811         unsigned long mfn = op->u.memory_mapping.first_mfn;
812         unsigned long nr_mfns = op->u.memory_mapping.nr_mfns;
813         unsigned long mfn_end = mfn + nr_mfns - 1;
814         int add = op->u.memory_mapping.add_mapping;
815 
816         ret = -EINVAL;
817         if ( mfn_end < mfn || /* wrap? */
818              ((mfn | mfn_end) >> (paddr_bits - PAGE_SHIFT)) ||
819              (gfn + nr_mfns - 1) < gfn ) /* wrap? */
820             break;
821 
822 #ifndef CONFIG_X86 /* XXX ARM!? */
823         ret = -E2BIG;
824         /* Must break hypercall up as this could take a while. */
825         if ( nr_mfns > 64 )
826             break;
827 #endif
828 
829         ret = -EPERM;
830         if ( !iomem_access_permitted(current->domain, mfn, mfn_end) ||
831              !iomem_access_permitted(d, mfn, mfn_end) )
832             break;
833 
834         ret = xsm_iomem_mapping(XSM_HOOK, d, mfn, mfn_end, add);
835         if ( ret )
836             break;
837 
838         if ( add )
839         {
840             printk(XENLOG_G_DEBUG
841                    "memory_map:add: dom%d gfn=%lx mfn=%lx nr=%lx\n",
842                    d->domain_id, gfn, mfn, nr_mfns);
843 
844             ret = map_mmio_regions(d, _gfn(gfn), nr_mfns, _mfn(mfn));
845             if ( ret < 0 )
846                 printk(XENLOG_G_WARNING
847                        "memory_map:fail: dom%d gfn=%lx mfn=%lx nr=%lx ret:%ld\n",
848                        d->domain_id, gfn, mfn, nr_mfns, ret);
849         }
850         else
851         {
852             printk(XENLOG_G_DEBUG
853                    "memory_map:remove: dom%d gfn=%lx mfn=%lx nr=%lx\n",
854                    d->domain_id, gfn, mfn, nr_mfns);
855 
856             ret = unmap_mmio_regions(d, _gfn(gfn), nr_mfns, _mfn(mfn));
857             if ( ret < 0 && is_hardware_domain(current->domain) )
858                 printk(XENLOG_ERR
859                        "memory_map: error %ld removing dom%d access to [%lx,%lx]\n",
860                        ret, d->domain_id, mfn, mfn_end);
861         }
862         /* Do this unconditionally to cover errors on above failure paths. */
863         memory_type_changed(d);
864         break;
865     }
866 
867     case XEN_DOMCTL_settimeoffset:
868         domain_set_time_offset(d, op->u.settimeoffset.time_offset_seconds);
869         break;
870 
871     case XEN_DOMCTL_set_target:
872     {
873         struct domain *e;
874 
875         ret = -ESRCH;
876         e = get_domain_by_id(op->u.set_target.target);
877         if ( e == NULL )
878             break;
879 
880         ret = -EINVAL;
881         if ( (d == e) || (d->target != NULL) )
882         {
883             put_domain(e);
884             break;
885         }
886 
887         ret = -EOPNOTSUPP;
888         if ( is_hvm_domain(e) )
889             ret = xsm_set_target(XSM_HOOK, d, e);
890         if ( ret )
891         {
892             put_domain(e);
893             break;
894         }
895 
896         /* Hold reference on @e until we destroy @d. */
897         d->target = e;
898         break;
899     }
900 
901     case XEN_DOMCTL_subscribe:
902         d->suspend_evtchn = op->u.subscribe.port;
903         break;
904 
905     case XEN_DOMCTL_vm_event_op:
906         ret = vm_event_domctl(d, &op->u.vm_event_op);
907         if ( ret == 0 )
908             copyback = true;
909         break;
910 
911 #ifdef CONFIG_MEM_ACCESS
912     case XEN_DOMCTL_set_access_required:
913         if ( unlikely(current->domain == d) ) /* no domain_pause() */
914             ret = -EPERM;
915         else
916         {
917             domain_pause(d);
918             arch_p2m_set_access_required(d,
919                 op->u.access_required.access_required);
920             domain_unpause(d);
921         }
922         break;
923 #endif
924 
925     case XEN_DOMCTL_set_virq_handler:
926         ret = set_global_virq_handler(d, op->u.set_virq_handler.virq);
927         break;
928 
929     case XEN_DOMCTL_setvnumainfo:
930     {
931         struct vnuma_info *vnuma;
932 
933         vnuma = vnuma_init(&op->u.vnuma, d);
934         if ( IS_ERR(vnuma) )
935         {
936             ret = PTR_ERR(vnuma);
937             break;
938         }
939 
940         /* overwrite vnuma topology for domain. */
941         write_lock(&d->vnuma_rwlock);
942         vnuma_destroy(d->vnuma);
943         d->vnuma = vnuma;
944         write_unlock(&d->vnuma_rwlock);
945 
946         break;
947     }
948 
949     case XEN_DOMCTL_monitor_op:
950         ret = monitor_domctl(d, &op->u.monitor_op);
951         if ( !ret )
952             copyback = 1;
953         break;
954 
955     default:
956         ret = arch_do_domctl(op, d, u_domctl);
957         break;
958     }
959 
960     domctl_lock_release();
961 
962  domctl_out_unlock_domonly:
963     if ( d && d != dom_io )
964         rcu_unlock_domain(d);
965 
966     if ( copyback && __copy_to_guest(u_domctl, op, 1) )
967         ret = -EFAULT;
968 
969     return ret;
970 }
971 
972 /*
973  * Local variables:
974  * mode: C
975  * c-file-style: "BSD"
976  * c-basic-offset: 4
977  * tab-width: 4
978  * indent-tabs-mode: nil
979  * End:
980  */
981