1 /*
2  * Copyright (C) 2009      Citrix Ltd.
3  * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published
7  * by the Free Software Foundation; version 2.1 only. with the special
8  * exception on linking described in file LICENSE.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  */
15 
16 #include "libxl_osdeps.h" /* must come before any other headers */
17 
18 #include <glob.h>
19 
20 #include "libxl_internal.h"
21 #include "libxl_arch.h"
22 
23 #include <xc_dom.h>
24 #include <xen/hvm/hvm_info_table.h>
25 #include <xen/hvm/hvm_xs_strings.h>
26 #include <xen/hvm/e820.h>
27 
28 #include "_paths.h"
29 
30 //#define DEBUG 1
31 
libxl__domain_type(libxl__gc * gc,uint32_t domid)32 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
33 {
34     libxl_ctx *ctx = libxl__gc_owner(gc);
35     xc_domaininfo_t info;
36     int ret;
37 
38     ret = xc_domain_getinfolist(ctx->xch, domid, 1, &info);
39     if (ret != 1 || info.domain != domid) {
40         LOG(ERROR, "unable to get domain type for domid=%"PRIu32, domid);
41         return LIBXL_DOMAIN_TYPE_INVALID;
42     }
43     if (info.flags & XEN_DOMINF_hvm_guest) {
44         const char *type_path = GCSPRINTF("%s/type",
45                                           libxl__xs_libxl_path(gc, domid));
46         const char *type;
47         libxl_domain_type t;
48         int rc;
49 
50         rc = libxl__xs_read_mandatory(gc, XBT_NULL, type_path, &type);
51         if (rc) {
52             LOG(WARN,
53             "unable to get domain type for domid=%"PRIu32", assuming HVM",
54                 domid);
55             return LIBXL_DOMAIN_TYPE_HVM;
56         }
57 
58         rc = libxl_domain_type_from_string(type, &t);
59         if (rc) {
60             LOG(WARN,
61             "unable to get domain type for domid=%"PRIu32", assuming HVM",
62                 domid);
63             return LIBXL_DOMAIN_TYPE_HVM;
64         }
65 
66         return t;
67     } else
68         return LIBXL_DOMAIN_TYPE_PV;
69 }
70 
libxl__domain_cpupool(libxl__gc * gc,uint32_t domid)71 int libxl__domain_cpupool(libxl__gc *gc, uint32_t domid)
72 {
73     xc_domaininfo_t info;
74     int ret;
75 
76     ret = xc_domain_getinfolist(CTX->xch, domid, 1, &info);
77     if (ret != 1)
78     {
79         LOGE(ERROR, "getinfolist failed %d", ret);
80         return ERROR_FAIL;
81     }
82     if (info.domain != domid)
83     {
84         LOGE(ERROR, "got info for dom%d, wanted dom%d\n", info.domain, domid);
85         return ERROR_FAIL;
86     }
87     return info.cpupool;
88 }
89 
libxl__domain_scheduler(libxl__gc * gc,uint32_t domid)90 libxl_scheduler libxl__domain_scheduler(libxl__gc *gc, uint32_t domid)
91 {
92     int cpupool = libxl__domain_cpupool(gc, domid);
93     libxl_cpupoolinfo poolinfo;
94     libxl_scheduler sched = LIBXL_SCHEDULER_UNKNOWN;
95     int rc;
96 
97     if (cpupool < 0)
98         return sched;
99 
100     libxl_cpupoolinfo_init(&poolinfo);
101     rc = libxl_cpupool_info(CTX, &poolinfo, cpupool);
102     if (rc < 0)
103         goto out;
104 
105     sched = poolinfo.sched;
106 
107 out:
108     libxl_cpupoolinfo_dispose(&poolinfo);
109     return sched;
110 }
111 
112 /*
113  * Two NUMA placement candidates are compared by means of the following
114  * heuristics:
115 
116  *  - the number of vcpus runnable on the candidates is considered, and
117  *    candidates with fewer of them are preferred. If two candidate have
118  *    the same number of runnable vcpus,
119  *  - the amount of free memory in the candidates is considered, and the
120  *    candidate with greater amount of it is preferred.
121  *
122  * In fact, leaving larger memory holes, maximizes the probability of being
123  * able to put other domains on the node. That hopefully means many domains
124  * will benefit from local memory accesses, but also introduces the risk of
125  * overloading large (from a memory POV) nodes. That's right the effect
126  * that counting the vcpus able to run on the nodes tries to prevent.
127  *
128  * Note that this completely ignore the number of nodes each candidate span,
129  * as the fact that fewer nodes is better is already accounted for in the
130  * algorithm.
131  */
numa_cmpf(const libxl__numa_candidate * c1,const libxl__numa_candidate * c2)132 static int numa_cmpf(const libxl__numa_candidate *c1,
133                      const libxl__numa_candidate *c2)
134 {
135     if (c1->nr_vcpus != c2->nr_vcpus)
136         return c1->nr_vcpus - c2->nr_vcpus;
137 
138     return c2->free_memkb - c1->free_memkb;
139 }
140 
141 /* The actual automatic NUMA placement routine */
numa_place_domain(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config)142 static int numa_place_domain(libxl__gc *gc, uint32_t domid,
143                              libxl_domain_config *d_config)
144 {
145     libxl_domain_build_info *info = &d_config->b_info;
146     int found;
147     libxl__numa_candidate candidate;
148     libxl_bitmap cpumap, cpupool_nodemap, *map;
149     libxl_cpupoolinfo cpupool_info;
150     int i, cpupool, rc = 0;
151     uint64_t memkb;
152 
153     libxl__numa_candidate_init(&candidate);
154     libxl_bitmap_init(&cpumap);
155     libxl_bitmap_init(&cpupool_nodemap);
156     libxl_cpupoolinfo_init(&cpupool_info);
157 
158     /*
159      * Extract the cpumap from the cpupool the domain belong to. In fact,
160      * it only makes sense to consider the cpus/nodes that are in there
161      * for placement.
162      */
163     rc = cpupool = libxl__domain_cpupool(gc, domid);
164     if (rc < 0)
165         goto out;
166     rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
167     if (rc)
168         goto out;
169     map = &cpupool_info.cpumap;
170 
171     /*
172      * If there's a well defined hard affinity mask (i.e., the same one for all
173      * the vcpus), we can try to run the placement considering only the pcpus
174      * within such mask.
175      */
176     if (info->num_vcpu_hard_affinity)
177     {
178 #ifdef DEBUG
179         int j;
180 
181         for (j = 0; j < info->num_vcpu_hard_affinity; j++)
182             assert(libxl_bitmap_equal(&info->vcpu_hard_affinity[0],
183                                       &info->vcpu_hard_affinity[j], 0));
184 #endif /* DEBUG */
185 
186         rc = libxl_bitmap_and(CTX, &cpumap, &info->vcpu_hard_affinity[0],
187                               &cpupool_info.cpumap);
188         if (rc)
189             goto out;
190 
191         /* Hard affinity must contain at least one cpu of our cpupool */
192         if (libxl_bitmap_is_empty(&cpumap)) {
193             LOG(ERROR, "Hard affinity completely outside of domain's cpupool!");
194             rc = ERROR_INVAL;
195             goto out;
196         }
197     }
198 
199     rc = libxl__domain_need_memory_calculate(gc, info, &memkb);
200     if (rc)
201         goto out;
202     if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
203         rc = ERROR_FAIL;
204         goto out;
205     }
206 
207     /* Find the best candidate with enough free memory and at least
208      * as much pcpus as the domain has vcpus.  */
209     rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
210                                    0, 0, map, numa_cmpf, &candidate, &found);
211     if (rc)
212         goto out;
213 
214     /* Not even a suitable placement candidate! Let's just don't touch the
215      * domain's info->cpumap. It will have affinity with all nodes/cpus. */
216     if (found == 0)
217         goto out;
218 
219     /* Map the candidate's node map to the domain's info->nodemap */
220     libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
221 
222     /* Avoid trying to set the affinity to nodes that might be in the
223      * candidate's nodemap but out of our cpupool. */
224     rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
225                                  &cpupool_nodemap);
226     if (rc)
227         goto out;
228 
229     libxl_for_each_set_bit(i, info->nodemap) {
230         if (!libxl_bitmap_test(&cpupool_nodemap, i))
231             libxl_bitmap_reset(&info->nodemap, i);
232     }
233 
234     LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
235                 "%"PRIu64" KB free selected", candidate.nr_nodes,
236                 candidate.nr_cpus, candidate.free_memkb / 1024);
237 
238  out:
239     libxl__numa_candidate_dispose(&candidate);
240     libxl_bitmap_dispose(&cpupool_nodemap);
241     libxl_bitmap_dispose(&cpumap);
242     libxl_cpupoolinfo_dispose(&cpupool_info);
243     return rc;
244 }
245 
libxl__build_pre(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)246 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
247               libxl_domain_config *d_config, libxl__domain_build_state *state)
248 {
249     libxl_domain_build_info *const info = &d_config->b_info;
250     libxl_ctx *ctx = libxl__gc_owner(gc);
251     char *xs_domid, *con_domid;
252     int rc;
253     uint64_t size;
254 
255     if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
256         LOG(ERROR, "Couldn't set max vcpu count");
257         return ERROR_FAIL;
258     }
259 
260     if (libxl_defbool_val(d_config->b_info.disable_migrate) &&
261         xc_domain_disable_migrate(ctx->xch, domid) != 0) {
262         LOG(ERROR, "Couldn't set nomigrate");
263         return ERROR_FAIL;
264     }
265 
266     /*
267      * Check if the domain has any CPU or node affinity already. If not, try
268      * to build up the latter via automatic NUMA placement. In fact, in case
269      * numa_place_domain() manage to find a placement, in info->nodemap is
270      * updated accordingly; if it does not manage, info->nodemap is just left
271      * alone. It is then the the subsequent call to
272      * libxl_domain_set_nodeaffinity() that enacts the actual placement.
273      *
274      * As far as scheduling is concerned, we achieve NUMA-aware scheduling
275      * by having the results of placement affect the soft affinity of all
276      * the vcpus of the domain. Of course, we want that iff placement is
277      * enabled and actually happens, so we only change info->cpumap_soft to
278      * reflect the placement result if that is the case
279      */
280     if (libxl_defbool_val(info->numa_placement)) {
281         if (info->cpumap.size || info->num_vcpu_soft_affinity)
282             LOG(WARN, "Can't run NUMA placement, as a soft "
283                       "affinity has been specified explicitly");
284         else if (info->nodemap.size)
285             LOG(WARN, "Can't run NUMA placement, as the domain has "
286                       "NUMA node affinity set already");
287         else {
288             libxl_bitmap cpumap_soft;
289 
290             rc = libxl_node_bitmap_alloc(ctx, &info->nodemap, 0);
291             if (rc)
292                 return rc;
293             libxl_bitmap_set_any(&info->nodemap);
294 
295             rc = libxl_cpu_bitmap_alloc(ctx, &cpumap_soft, 0);
296             if (rc)
297                 return rc;
298 
299             rc = numa_place_domain(gc, domid, d_config);
300             if (rc) {
301                 libxl_bitmap_dispose(&cpumap_soft);
302                 return rc;
303             }
304 
305             /*
306              * All we need to do now is converting the result of automatic
307              * placement from nodemap to cpumap, and then use such cpumap
308              * as the soft affinity for all the vcpus of the domain.
309              *
310              * When calling libxl_set_vcpuaffinity_all(), it is ok to use
311              * NULL as hard affinity, as we know we don't have one, or we
312              * won't be here.
313              */
314             libxl_nodemap_to_cpumap(ctx, &info->nodemap, &cpumap_soft);
315             libxl_set_vcpuaffinity_all(ctx, domid, info->max_vcpus,
316                                        NULL, &cpumap_soft);
317 
318             libxl_bitmap_dispose(&cpumap_soft);
319 
320             /*
321              * Placement has run, so avoid for it to be re-run, if this
322              * same config we are using and building here is ever re-used.
323              * This means that people re-using configs will get the same
324              * results, consistently, across every re-use, which is what
325              * we expect most people to want.
326              */
327             libxl_defbool_set(&info->numa_placement, false);
328         }
329     }
330 
331     if (info->nodemap.size)
332         libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap);
333 
334     if (info->num_vcpu_hard_affinity || info->num_vcpu_soft_affinity) {
335         libxl_bitmap *hard_affinity, *soft_affinity;
336         int i, n_vcpus;
337 
338         n_vcpus = info->num_vcpu_hard_affinity > info->num_vcpu_soft_affinity ?
339             info->num_vcpu_hard_affinity : info->num_vcpu_soft_affinity;
340 
341         for (i = 0; i < n_vcpus; i++) {
342             /*
343              * Prepare hard and soft affinity pointers in a way that allows
344              * us to issue only one call to libxl_set_vcpuaffinity(), setting,
345              * for each vcpu, both hard and soft affinity "atomically".
346              */
347             hard_affinity = NULL;
348             if (info->num_vcpu_hard_affinity &&
349                 i < info->num_vcpu_hard_affinity)
350                 hard_affinity = &info->vcpu_hard_affinity[i];
351 
352             soft_affinity = NULL;
353             if (info->num_vcpu_soft_affinity &&
354                 i < info->num_vcpu_soft_affinity)
355                 soft_affinity = &info->vcpu_soft_affinity[i];
356 
357             if (libxl_set_vcpuaffinity(ctx, domid, i,
358                                        hard_affinity, soft_affinity)) {
359                 LOG(ERROR, "setting affinity failed on vcpu `%d'", i);
360                 return ERROR_FAIL;
361             }
362         }
363     }
364 
365 
366     rc = libxl__arch_extra_memory(gc, info, &size);
367     if (rc < 0) {
368         LOGE(ERROR, "Couldn't get arch extra constant memory size");
369         return ERROR_FAIL;
370     }
371 
372     if (xc_domain_setmaxmem(ctx->xch, domid, info->target_memkb + size) < 0) {
373         LOGE(ERROR, "Couldn't set max memory");
374         return ERROR_FAIL;
375     }
376 
377     xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
378     state->store_domid = xs_domid ? atoi(xs_domid) : 0;
379     free(xs_domid);
380 
381     con_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenconsoled/domid", NULL);
382     state->console_domid = con_domid ? atoi(con_domid) : 0;
383     free(con_domid);
384 
385     state->store_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->store_domid);
386     state->console_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->console_domid);
387 
388     rc = libxl__arch_domain_create(gc, d_config, domid);
389 
390     /* Construct a CPUID policy, but only for brand new domains.  Domains
391      * being migrated-in/restored have CPUID handled during the
392      * static_data_done() callback. */
393     if (!state->restore)
394         libxl__cpuid_legacy(ctx, domid, false, info);
395 
396     return rc;
397 }
398 
set_vnuma_affinity(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info)399 static int set_vnuma_affinity(libxl__gc *gc, uint32_t domid,
400                               libxl_domain_build_info *info)
401 {
402     libxl_bitmap cpumap;
403     libxl_vnode_info *v;
404     unsigned int i, j;
405     int rc = 0;
406 
407     libxl_bitmap_init(&cpumap);
408 
409     rc = libxl_cpu_bitmap_alloc(CTX, &cpumap, 0);
410     if (rc) {
411         LOG(ERROR, "Can't allocate nodemap");
412         goto out;
413     }
414 
415     /*
416      * For each vcpu in each vnode, set its soft affinity to
417      * the pcpus belonging to the pnode the vnode is on
418      */
419     for (i = 0; i < info->num_vnuma_nodes; i++) {
420         v = &info->vnuma_nodes[i];
421 
422         rc = libxl_node_to_cpumap(CTX, v->pnode, &cpumap);
423         if (rc) {
424             LOG(ERROR, "Can't get cpumap for vnode %d", i);
425             goto out;
426         }
427 
428         libxl_for_each_set_bit(j, v->vcpus) {
429             rc = libxl_set_vcpuaffinity(CTX, domid, j, NULL, &cpumap);
430             if (rc) {
431                 LOG(ERROR, "Can't set cpu affinity for %d", j);
432                 goto out;
433             }
434         }
435     }
436 
437 out:
438     libxl_bitmap_dispose(&cpumap);
439     return rc;
440 }
441 
libxl__build_post(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state,char ** vms_ents,char ** local_ents)442 int libxl__build_post(libxl__gc *gc, uint32_t domid,
443                       libxl_domain_build_info *info,
444                       libxl__domain_build_state *state,
445                       char **vms_ents, char **local_ents)
446 {
447     libxl_ctx *ctx = libxl__gc_owner(gc);
448     char *dom_path, *vm_path;
449     xs_transaction_t t;
450     char **ents;
451     int i, rc;
452 
453     if (info->num_vnuma_nodes && !info->num_vcpu_soft_affinity) {
454         rc = set_vnuma_affinity(gc, domid, info);
455         if (rc)
456             return rc;
457     }
458 
459     rc = libxl_domain_sched_params_set(CTX, domid, &info->sched_params);
460     if (rc)
461         return rc;
462 
463     if (info->type == LIBXL_DOMAIN_TYPE_HVM
464         && !libxl_ms_vm_genid_is_zero(&info->u.hvm.ms_vm_genid)) {
465         rc = libxl__ms_vm_genid_set(gc, domid,
466                                     &info->u.hvm.ms_vm_genid);
467         if (rc) {
468             LOG(ERROR, "Failed to set VM Generation ID");
469             return rc;
470         }
471     }
472 
473     ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
474     ents[0] = "memory/static-max";
475     ents[1] = GCSPRINTF("%"PRId64, info->max_memkb);
476     ents[2] = "memory/target";
477     ents[3] = GCSPRINTF("%"PRId64, info->target_memkb -
478                         libxl__get_targetmem_fudge(gc, info));
479     ents[4] = "memory/videoram";
480     ents[5] = GCSPRINTF("%"PRId64, info->video_memkb);
481     ents[6] = "domid";
482     ents[7] = GCSPRINTF("%d", domid);
483     ents[8] = "store/port";
484     ents[9] = GCSPRINTF("%"PRIu32, state->store_port);
485     ents[10] = "store/ring-ref";
486     ents[11] = GCSPRINTF("%lu", state->store_mfn);
487     for (i = 0; i < info->max_vcpus; i++) {
488         ents[12+(i*2)]   = GCSPRINTF("cpu/%d/availability", i);
489         ents[12+(i*2)+1] = libxl_bitmap_test(&info->avail_vcpus, i)
490                             ? "online" : "offline";
491     }
492 
493     dom_path = libxl__xs_get_dompath(gc, domid);
494     if (!dom_path) {
495         return ERROR_FAIL;
496     }
497 
498     vm_path = xs_read(ctx->xsh, XBT_NULL, GCSPRINTF("%s/vm", dom_path), NULL);
499 retry_transaction:
500     t = xs_transaction_start(ctx->xsh);
501 
502     libxl__xs_writev(gc, t, dom_path, ents);
503     libxl__xs_writev(gc, t, dom_path, local_ents);
504     libxl__xs_writev(gc, t, vm_path, vms_ents);
505 
506     if (!xs_transaction_end(ctx->xsh, t, 0))
507         if (errno == EAGAIN)
508             goto retry_transaction;
509     xs_introduce_domain(ctx->xsh, domid, state->store_mfn, state->store_port);
510     free(vm_path);
511     return 0;
512 }
513 
set_vnuma_info(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info,const libxl__domain_build_state * state)514 static int set_vnuma_info(libxl__gc *gc, uint32_t domid,
515                           const libxl_domain_build_info *info,
516                           const libxl__domain_build_state *state)
517 {
518     int rc = 0;
519     unsigned int i, nr_vdistance;
520     unsigned int *vcpu_to_vnode, *vnode_to_pnode, *vdistance = NULL;
521 
522     vcpu_to_vnode = libxl__calloc(gc, info->max_vcpus,
523                                   sizeof(unsigned int));
524     vnode_to_pnode = libxl__calloc(gc, info->num_vnuma_nodes,
525                                    sizeof(unsigned int));
526 
527     nr_vdistance = info->num_vnuma_nodes * info->num_vnuma_nodes;
528     vdistance = libxl__calloc(gc, nr_vdistance, sizeof(unsigned int));
529 
530     for (i = 0; i < info->num_vnuma_nodes; i++) {
531         libxl_vnode_info *v = &info->vnuma_nodes[i];
532         int j;
533 
534         /* vnode to pnode mapping */
535         vnode_to_pnode[i] = v->pnode;
536 
537         /* vcpu to vnode mapping */
538         libxl_for_each_set_bit(j, v->vcpus)
539             vcpu_to_vnode[j] = i;
540 
541         /* node distances */
542         assert(info->num_vnuma_nodes == v->num_distances);
543         memcpy(vdistance + (i * info->num_vnuma_nodes),
544                v->distances,
545                v->num_distances * sizeof(unsigned int));
546     }
547 
548     if (xc_domain_setvnuma(CTX->xch, domid, info->num_vnuma_nodes,
549                            state->num_vmemranges, info->max_vcpus,
550                            state->vmemranges, vdistance,
551                            vcpu_to_vnode, vnode_to_pnode) < 0) {
552         LOGE(ERROR, "xc_domain_setvnuma failed");
553         rc = ERROR_FAIL;
554     }
555 
556     return rc;
557 }
558 
libxl__build_dom(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state,struct xc_dom_image * dom)559 static int libxl__build_dom(libxl__gc *gc, uint32_t domid,
560              libxl_domain_config *d_config, libxl__domain_build_state *state,
561              struct xc_dom_image *dom)
562 {
563     libxl_domain_build_info *const info = &d_config->b_info;
564     uint64_t mem_kb;
565     int ret;
566 
567     if ( (ret = xc_dom_boot_xen_init(dom, CTX->xch, domid)) != 0 ) {
568         LOGE(ERROR, "xc_dom_boot_xen_init failed");
569         goto out;
570     }
571 #ifdef GUEST_RAM_BASE
572     if ( (ret = xc_dom_rambase_init(dom, GUEST_RAM_BASE)) != 0 ) {
573         LOGE(ERROR, "xc_dom_rambase failed");
574         goto out;
575     }
576 #endif
577     if ( (ret = xc_dom_parse_image(dom)) != 0 ) {
578         LOG(ERROR, "xc_dom_parse_image failed");
579         goto out;
580     }
581     if ( (ret = libxl__arch_domain_init_hw_description(gc, info, state, dom)) != 0 ) {
582         LOGE(ERROR, "libxl__arch_domain_init_hw_description failed");
583         goto out;
584     }
585 
586     mem_kb = dom->container_type == XC_DOM_HVM_CONTAINER ?
587              (info->max_memkb - info->video_memkb) : info->target_memkb;
588     if ( (ret = xc_dom_mem_init(dom, mem_kb / 1024)) != 0 ) {
589         LOGE(ERROR, "xc_dom_mem_init failed");
590         goto out;
591     }
592     if ( (ret = xc_dom_boot_mem_init(dom)) != 0 ) {
593         LOGE(ERROR, "xc_dom_boot_mem_init failed");
594         goto out;
595     }
596     if ( (ret = libxl__arch_domain_finalise_hw_description(gc, domid, d_config, dom)) != 0 ) {
597         LOGE(ERROR, "libxl__arch_domain_finalise_hw_description failed");
598         goto out;
599     }
600     if ( (ret = xc_dom_build_image(dom)) != 0 ) {
601         LOGE(ERROR, "xc_dom_build_image failed");
602         goto out;
603     }
604     if ( (ret = xc_dom_boot_image(dom)) != 0 ) {
605         LOGE(ERROR, "xc_dom_boot_image failed");
606         goto out;
607     }
608     if ( (ret = xc_dom_gnttab_init(dom)) != 0 ) {
609         LOGE(ERROR, "xc_dom_gnttab_init failed");
610         goto out;
611     }
612     if ((ret = libxl__arch_build_dom_finish(gc, info, dom, state)) != 0) {
613         LOGE(ERROR, "libxl__arch_build_dom_finish failed");
614         goto out;
615     }
616 
617 out:
618     return ret != 0 ? ERROR_FAIL : 0;
619 }
620 
libxl__build_pv(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)621 int libxl__build_pv(libxl__gc *gc, uint32_t domid,
622              libxl_domain_config *d_config, libxl__domain_build_state *state)
623 {
624     libxl_ctx *ctx = libxl__gc_owner(gc);
625     libxl_domain_build_info *const info = &d_config->b_info;
626     struct xc_dom_image *dom;
627     int ret;
628     int flags = 0;
629 
630     xc_dom_loginit(ctx->xch);
631 
632     dom = xc_dom_allocate(ctx->xch, state->pv_cmdline, info->u.pv.features);
633     if (!dom) {
634         LOGE(ERROR, "xc_dom_allocate failed");
635         return ERROR_FAIL;
636     }
637 
638     dom->container_type = XC_DOM_PV_CONTAINER;
639 
640     LOG(DEBUG, "pv kernel mapped %d path %s", state->pv_kernel.mapped, state->pv_kernel.path);
641 
642     if (state->pv_kernel.mapped) {
643         ret = xc_dom_kernel_mem(dom,
644                                 state->pv_kernel.data,
645                                 state->pv_kernel.size);
646         if ( ret != 0) {
647             LOGE(ERROR, "xc_dom_kernel_mem failed");
648             goto out;
649         }
650     } else {
651         ret = xc_dom_kernel_file(dom, state->pv_kernel.path);
652         if ( ret != 0) {
653             LOGE(ERROR, "xc_dom_kernel_file failed");
654             goto out;
655         }
656     }
657 
658     if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
659         if (state->pv_ramdisk.mapped) {
660             if ( (ret = xc_dom_module_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size, NULL)) != 0 ) {
661                 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
662                 goto out;
663             }
664         } else {
665             if ( (ret = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL)) != 0 ) {
666                 LOGE(ERROR, "xc_dom_ramdisk_file failed");
667                 goto out;
668             }
669         }
670     }
671 
672     dom->flags = flags;
673     dom->console_evtchn = state->console_port;
674     dom->console_domid = state->console_domid;
675     dom->xenstore_evtchn = state->store_port;
676     dom->xenstore_domid = state->store_domid;
677     dom->claim_enabled = libxl_defbool_val(info->claim_mode);
678     dom->max_vcpus = info->max_vcpus;
679 
680     if (info->num_vnuma_nodes != 0) {
681         unsigned int i;
682 
683         ret = libxl__vnuma_build_vmemrange_pv(gc, domid, info, state);
684         if (ret) {
685             LOGE(ERROR, "cannot build vmemranges");
686             goto out;
687         }
688         ret = libxl__vnuma_config_check(gc, info, state);
689         if (ret) goto out;
690 
691         ret = set_vnuma_info(gc, domid, info, state);
692         if (ret) goto out;
693 
694         dom->nr_vmemranges = state->num_vmemranges;
695         dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges) *
696                                         dom->nr_vmemranges);
697 
698         for (i = 0; i < dom->nr_vmemranges; i++) {
699             dom->vmemranges[i].start = state->vmemranges[i].start;
700             dom->vmemranges[i].end   = state->vmemranges[i].end;
701             dom->vmemranges[i].flags = state->vmemranges[i].flags;
702             dom->vmemranges[i].nid   = state->vmemranges[i].nid;
703         }
704 
705         dom->nr_vnodes = info->num_vnuma_nodes;
706         dom->vnode_to_pnode = xc_dom_malloc(dom, sizeof(*dom->vnode_to_pnode) *
707                                             dom->nr_vnodes);
708         for (i = 0; i < info->num_vnuma_nodes; i++)
709             dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
710     }
711 
712     ret = libxl__build_dom(gc, domid, d_config, state, dom);
713     if (ret != 0)
714         goto out;
715 
716     if (xc_dom_translated(dom)) {
717         state->console_mfn = dom->console_pfn;
718         state->store_mfn = dom->xenstore_pfn;
719         state->vuart_gfn = dom->vuart_gfn;
720     } else {
721         state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
722         state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
723     }
724 
725     ret = 0;
726 out:
727     xc_dom_release(dom);
728     return ret == 0 ? 0 : ERROR_FAIL;
729 }
730 
hvm_build_set_params(xc_interface * handle,uint32_t domid,libxl_domain_build_info * info,int store_evtchn,unsigned long * store_mfn,int console_evtchn,unsigned long * console_mfn,domid_t store_domid,domid_t console_domid)731 static int hvm_build_set_params(xc_interface *handle, uint32_t domid,
732                                 libxl_domain_build_info *info,
733                                 int store_evtchn, unsigned long *store_mfn,
734                                 int console_evtchn, unsigned long *console_mfn,
735                                 domid_t store_domid, domid_t console_domid)
736 {
737     struct hvm_info_table *va_hvm;
738     uint8_t *va_map, sum;
739     uint64_t str_mfn, cons_mfn;
740     int i;
741 
742     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
743         va_map = xc_map_foreign_range(handle, domid,
744                                       XC_PAGE_SIZE, PROT_READ | PROT_WRITE,
745                                       HVM_INFO_PFN);
746         if (va_map == NULL)
747             return ERROR_FAIL;
748 
749         va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
750         va_hvm->apic_mode = libxl_defbool_val(info->apic);
751         va_hvm->nr_vcpus = info->max_vcpus;
752         memset(va_hvm->vcpu_online, 0, sizeof(va_hvm->vcpu_online));
753         memcpy(va_hvm->vcpu_online, info->avail_vcpus.map, info->avail_vcpus.size);
754         for (i = 0, sum = 0; i < va_hvm->length; i++)
755             sum += ((uint8_t *) va_hvm)[i];
756         va_hvm->checksum -= sum;
757         munmap(va_map, XC_PAGE_SIZE);
758     }
759 
760     xc_hvm_param_get(handle, domid, HVM_PARAM_STORE_PFN, &str_mfn);
761     xc_hvm_param_get(handle, domid, HVM_PARAM_CONSOLE_PFN, &cons_mfn);
762     xc_hvm_param_set(handle, domid, HVM_PARAM_STORE_EVTCHN, store_evtchn);
763     xc_hvm_param_set(handle, domid, HVM_PARAM_CONSOLE_EVTCHN, console_evtchn);
764 
765     *store_mfn = str_mfn;
766     *console_mfn = cons_mfn;
767 
768     return 0;
769 }
770 
hvm_build_set_xs_values(libxl__gc * gc,uint32_t domid,struct xc_dom_image * dom,const libxl_domain_build_info * info)771 static int hvm_build_set_xs_values(libxl__gc *gc,
772                                    uint32_t domid,
773                                    struct xc_dom_image *dom,
774                                    const libxl_domain_build_info *info)
775 {
776     char *path = NULL;
777     int ret = 0;
778 
779     if (dom->smbios_module.guest_addr_out) {
780         path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_ADDRESS, domid);
781 
782         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
783                                dom->smbios_module.guest_addr_out);
784         if (ret)
785             goto err;
786 
787         path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_LENGTH, domid);
788 
789         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
790                                dom->smbios_module.length);
791         if (ret)
792             goto err;
793     }
794 
795     /* Only one module can be passed. PVHv2 guests do not support this. */
796     if (dom->acpi_modules[0].guest_addr_out &&
797         info->type == LIBXL_DOMAIN_TYPE_HVM) {
798         path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_ADDRESS, domid);
799 
800         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
801                                dom->acpi_modules[0].guest_addr_out);
802         if (ret)
803             goto err;
804 
805         path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_LENGTH, domid);
806 
807         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
808                                dom->acpi_modules[0].length);
809         if (ret)
810             goto err;
811     }
812 
813     return 0;
814 
815 err:
816     LOG(ERROR, "failed to write firmware xenstore value, err: %d", ret);
817     return ret;
818 }
819 
libxl__load_hvm_firmware_module(libxl__gc * gc,const char * filename,const char * what,struct xc_hvm_firmware_module * m)820 static int libxl__load_hvm_firmware_module(libxl__gc *gc,
821                                            const char *filename,
822                                            const char *what,
823                                            struct xc_hvm_firmware_module *m)
824 {
825     int datalen = 0;
826     void *data = NULL;
827     int r, rc;
828 
829     LOG(DEBUG, "Loading %s: %s", what, filename);
830     r = libxl_read_file_contents(CTX, filename, &data, &datalen);
831     if (r) {
832         /*
833          * Print a message only on ENOENT, other errors are logged by the
834          * function libxl_read_file_contents().
835          */
836         if (r == ENOENT)
837             LOGEV(ERROR, r, "failed to read %s file", what);
838         rc =  ERROR_FAIL;
839         goto out;
840     }
841     libxl__ptr_add(gc, data);
842     if (datalen) {
843         /* Only accept non-empty files */
844         m->data = data;
845         m->length = datalen;
846     } else {
847         LOG(ERROR, "file %s for %s is empty", filename, what);
848         rc = ERROR_INVAL;
849         goto out;
850     }
851     rc = 0;
852 out:
853     return rc;
854 }
855 
libxl__domain_firmware(libxl__gc * gc,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)856 static int libxl__domain_firmware(libxl__gc *gc,
857                                   libxl_domain_build_info *info,
858                                   libxl__domain_build_state *state,
859                                   struct xc_dom_image *dom)
860 {
861     libxl_ctx *ctx = libxl__gc_owner(gc);
862     const char *firmware = NULL;
863     int e, rc;
864     int datalen = 0;
865     void *data;
866     const char *bios_filename = NULL;
867 
868     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
869         if (info->u.hvm.firmware) {
870             firmware = info->u.hvm.firmware;
871         } else {
872             switch (info->device_model_version)
873             {
874             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
875             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
876                 firmware = "hvmloader";
877                 break;
878             default:
879                 LOG(ERROR, "invalid device model version %d",
880                     info->device_model_version);
881                 rc = ERROR_FAIL;
882                 goto out;
883             }
884         }
885     }
886 
887     if (state->pv_kernel.path != NULL &&
888         info->type == LIBXL_DOMAIN_TYPE_PVH) {
889 
890         if (state->shim_path) {
891             rc = xc_dom_kernel_file(dom, state->shim_path);
892             if (rc) {
893                 LOGE(ERROR, "xc_dom_kernel_file failed");
894                 goto out;
895             }
896 
897             /* We've loaded the shim, so load the kernel as a secondary module */
898             if (state->pv_kernel.mapped) {
899                 LOG(DEBUG, "xc_dom_module_mem, cmdline %s",
900                     state->pv_cmdline);
901                 rc = xc_dom_module_mem(dom, state->pv_kernel.data,
902                                        state->pv_kernel.size, state->pv_cmdline);
903                 if (rc) {
904                     LOGE(ERROR, "xc_dom_kernel_mem failed");
905                     goto out;
906                 }
907             } else {
908                 LOG(DEBUG, "xc_dom_module_file, path %s cmdline %s",
909                     state->pv_kernel.path, state->pv_cmdline);
910                 rc = xc_dom_module_file(dom, state->pv_kernel.path, state->pv_cmdline);
911                 if (rc) {
912                     LOGE(ERROR, "xc_dom_kernel_file failed");
913                     goto out;
914                 }
915             }
916         } else {
917             /* No shim, so load the kernel directly */
918             if (state->pv_kernel.mapped) {
919                 rc = xc_dom_kernel_mem(dom, state->pv_kernel.data,
920                                        state->pv_kernel.size);
921                 if (rc) {
922                     LOGE(ERROR, "xc_dom_kernel_mem failed");
923                     goto out;
924                 }
925             } else {
926                 rc = xc_dom_kernel_file(dom, state->pv_kernel.path);
927                 if (rc) {
928                     LOGE(ERROR, "xc_dom_kernel_file failed");
929                     goto out;
930                 }
931             }
932         }
933 
934         if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) {
935             if (state->pv_ramdisk.mapped) {
936                 rc = xc_dom_module_mem(dom, state->pv_ramdisk.data,
937                                        state->pv_ramdisk.size, NULL);
938                 if (rc) {
939                     LOGE(ERROR, "xc_dom_ramdisk_mem failed");
940                     goto out;
941                 }
942             } else {
943                 rc = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL);
944                 if (rc) {
945                     LOGE(ERROR, "xc_dom_ramdisk_file failed");
946                     goto out;
947                 }
948             }
949         }
950     } else {
951         /*
952          * Only HVM guests should get here, PVH should always have a set
953          * kernel at this point.
954          */
955         assert(info->type == LIBXL_DOMAIN_TYPE_HVM);
956         rc = xc_dom_kernel_file(dom, libxl__abs_path(gc, firmware,
957                                                  libxl__xenfirmwaredir_path()));
958     }
959 
960     if (rc != 0) {
961         LOGE(ERROR, "xc_dom_{kernel_file/ramdisk_file} failed");
962         goto out;
963     }
964 
965     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
966         info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
967         if (info->u.hvm.system_firmware) {
968             bios_filename = info->u.hvm.system_firmware;
969         } else {
970             switch (info->u.hvm.bios) {
971             case LIBXL_BIOS_TYPE_SEABIOS:
972                 bios_filename = libxl__seabios_path();
973                 break;
974             case LIBXL_BIOS_TYPE_OVMF:
975                 bios_filename = libxl__ovmf_path();
976                 break;
977             case LIBXL_BIOS_TYPE_ROMBIOS:
978             default:
979                 abort();
980             }
981         }
982     }
983 
984     if (bios_filename) {
985         rc = libxl__load_hvm_firmware_module(gc, bios_filename, "BIOS",
986                                              &dom->system_firmware_module);
987         if (rc) goto out;
988     }
989 
990     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
991         info->u.hvm.bios == LIBXL_BIOS_TYPE_ROMBIOS &&
992         libxl__ipxe_path()) {
993         const char *fp = libxl__ipxe_path();
994         rc = xc_dom_module_file(dom, fp, "ipxe");
995 
996         if (rc) {
997             LOGE(ERROR, "failed to load IPXE %s (%d)", fp, rc);
998             rc = ERROR_FAIL;
999             goto out;
1000         }
1001     }
1002 
1003     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1004         info->u.hvm.smbios_firmware) {
1005         data = NULL;
1006         e = libxl_read_file_contents(ctx, info->u.hvm.smbios_firmware,
1007                                      &data, &datalen);
1008         if (e) {
1009             LOGEV(ERROR, e, "failed to read SMBIOS firmware file %s",
1010                 info->u.hvm.smbios_firmware);
1011             rc = ERROR_FAIL;
1012             goto out;
1013         }
1014         libxl__ptr_add(gc, data);
1015         if (datalen) {
1016             /* Only accept non-empty files */
1017             dom->smbios_module.data = data;
1018             dom->smbios_module.length = (uint32_t)datalen;
1019         }
1020     }
1021 
1022     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1023         info->u.hvm.acpi_firmware) {
1024         data = NULL;
1025         e = libxl_read_file_contents(ctx, info->u.hvm.acpi_firmware,
1026                                      &data, &datalen);
1027         if (e) {
1028             LOGEV(ERROR, e, "failed to read ACPI firmware file %s",
1029                 info->u.hvm.acpi_firmware);
1030             rc = ERROR_FAIL;
1031             goto out;
1032         }
1033         libxl__ptr_add(gc, data);
1034         if (datalen) {
1035             /* Only accept a non-empty file */
1036             dom->acpi_modules[0].data = data;
1037             dom->acpi_modules[0].length = (uint32_t)datalen;
1038         }
1039     }
1040 
1041     return 0;
1042 out:
1043     assert(rc != 0);
1044     return rc;
1045 }
1046 
libxl__build_hvm(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)1047 int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
1048               libxl_domain_config *d_config,
1049               libxl__domain_build_state *state)
1050 {
1051     libxl_ctx *ctx = libxl__gc_owner(gc);
1052     int rc;
1053     uint64_t mmio_start, lowmem_end, highmem_end, mem_size;
1054     libxl_domain_build_info *const info = &d_config->b_info;
1055     struct xc_dom_image *dom = NULL;
1056     bool device_model = info->type == LIBXL_DOMAIN_TYPE_HVM ? true : false;
1057 
1058     xc_dom_loginit(ctx->xch);
1059 
1060     /*
1061      * If PVH and we have a shim override, use the shim cmdline.
1062      * If PVH and no shim override, use the pv cmdline.
1063      * If not PVH, use info->cmdline.
1064      */
1065     dom = xc_dom_allocate(ctx->xch, info->type == LIBXL_DOMAIN_TYPE_PVH ?
1066                           (state->shim_path ? state->shim_cmdline : state->pv_cmdline) :
1067                           info->cmdline, NULL);
1068     if (!dom) {
1069         LOGE(ERROR, "xc_dom_allocate failed");
1070         rc = ERROR_NOMEM;
1071         goto out;
1072     }
1073 
1074     dom->container_type = XC_DOM_HVM_CONTAINER;
1075 
1076     /* The params from the configuration file are in Mb, which are then
1077      * multiplied by 1 Kb. This was then divided off when calling
1078      * the old xc_hvm_build_target_mem() which then turned them to bytes.
1079      * Do all this in one step here...
1080      */
1081     mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
1082     dom->target_pages = (uint64_t)(info->target_memkb - info->video_memkb) >> 2;
1083     dom->claim_enabled = libxl_defbool_val(info->claim_mode);
1084     if (info->u.hvm.mmio_hole_memkb) {
1085         uint64_t max_ram_below_4g = (1ULL << 32) -
1086             (info->u.hvm.mmio_hole_memkb << 10);
1087 
1088         if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START)
1089             dom->mmio_size = info->u.hvm.mmio_hole_memkb << 10;
1090     }
1091 
1092     rc = libxl__domain_firmware(gc, info, state, dom);
1093     if (rc != 0) {
1094         LOG(ERROR, "initializing domain firmware failed");
1095         goto out;
1096     }
1097 
1098     if (dom->target_pages == 0)
1099         dom->target_pages = mem_size >> XC_PAGE_SHIFT;
1100     if (dom->mmio_size == 0 && device_model)
1101         dom->mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
1102     else if (dom->mmio_size == 0 && !device_model) {
1103 #if defined(__i386__) || defined(__x86_64__)
1104         /*
1105          * Make sure the local APIC page, the ACPI tables and the special pages
1106          * are inside the MMIO hole.
1107          */
1108         xen_paddr_t start =
1109             (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES) <<
1110             XC_PAGE_SHIFT;
1111 
1112         start = min_t(xen_paddr_t, start, LAPIC_BASE_ADDRESS);
1113         start = min_t(xen_paddr_t, start, ACPI_INFO_PHYSICAL_ADDRESS);
1114         dom->mmio_size = GB(4) - start;
1115 #else
1116         assert(1);
1117 #endif
1118     }
1119     lowmem_end = mem_size;
1120     highmem_end = 0;
1121     mmio_start = (1ull << 32) - dom->mmio_size;
1122     if (lowmem_end > mmio_start)
1123     {
1124         highmem_end = (1ull << 32) + (lowmem_end - mmio_start);
1125         lowmem_end = mmio_start;
1126     }
1127     dom->lowmem_end = lowmem_end;
1128     dom->highmem_end = highmem_end;
1129     dom->mmio_start = mmio_start;
1130     dom->vga_hole_size = device_model ? LIBXL_VGA_HOLE_SIZE : 0;
1131     dom->device_model = device_model;
1132     dom->max_vcpus = info->max_vcpus;
1133     dom->console_domid = state->console_domid;
1134     dom->xenstore_domid = state->store_domid;
1135 
1136     rc = libxl__domain_device_construct_rdm(gc, d_config,
1137                                             info->u.hvm.rdm_mem_boundary_memkb*1024,
1138                                             dom);
1139     if (rc) {
1140         LOG(ERROR, "checking reserved device memory failed");
1141         goto out;
1142     }
1143 
1144     if (info->num_vnuma_nodes != 0) {
1145         int i;
1146 
1147         rc = libxl__vnuma_build_vmemrange_hvm(gc, domid, info, state, dom);
1148         if (rc != 0) {
1149             LOG(ERROR, "hvm build vmemranges failed");
1150             goto out;
1151         }
1152         rc = libxl__vnuma_config_check(gc, info, state);
1153         if (rc != 0) goto out;
1154         rc = set_vnuma_info(gc, domid, info, state);
1155         if (rc != 0) goto out;
1156 
1157         dom->nr_vmemranges = state->num_vmemranges;
1158         dom->vmemranges = libxl__malloc(gc, sizeof(*dom->vmemranges) *
1159                                         dom->nr_vmemranges);
1160 
1161         for (i = 0; i < dom->nr_vmemranges; i++) {
1162             dom->vmemranges[i].start = state->vmemranges[i].start;
1163             dom->vmemranges[i].end   = state->vmemranges[i].end;
1164             dom->vmemranges[i].flags = state->vmemranges[i].flags;
1165             dom->vmemranges[i].nid   = state->vmemranges[i].nid;
1166         }
1167 
1168         dom->nr_vnodes = info->num_vnuma_nodes;
1169         dom->vnode_to_pnode = libxl__malloc(gc, sizeof(*dom->vnode_to_pnode) *
1170                                             dom->nr_vnodes);
1171         for (i = 0; i < dom->nr_vnodes; i++)
1172             dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
1173     }
1174 
1175     rc = libxl__build_dom(gc, domid, d_config, state, dom);
1176     if (rc != 0)
1177         goto out;
1178 
1179     rc = hvm_build_set_params(ctx->xch, domid, info, state->store_port,
1180                                &state->store_mfn, state->console_port,
1181                                &state->console_mfn, state->store_domid,
1182                                state->console_domid);
1183     if (rc != 0) {
1184         LOG(ERROR, "hvm build set params failed");
1185         goto out;
1186     }
1187 
1188     rc = hvm_build_set_xs_values(gc, domid, dom, info);
1189     if (rc != 0) {
1190         LOG(ERROR, "hvm build set xenstore values failed");
1191         goto out;
1192     }
1193 
1194     xc_dom_release(dom);
1195     return 0;
1196 
1197 out:
1198     assert(rc != 0);
1199     if (dom != NULL) xc_dom_release(dom);
1200     return rc;
1201 }
1202 
libxl__qemu_traditional_cmd(libxl__gc * gc,uint32_t domid,const char * cmd)1203 int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
1204                                 const char *cmd)
1205 {
1206     char *path = NULL;
1207     uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
1208     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/command");
1209     return libxl__xs_printf(gc, XBT_NULL, path, "%s", cmd);
1210 }
1211 
1212 /*==================== Miscellaneous ====================*/
1213 
libxl__uuid2string(libxl__gc * gc,const libxl_uuid uuid)1214 char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
1215 {
1216     return GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(uuid));
1217 }
1218 
libxl__userdata_path(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const char * wh)1219 const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
1220                                  const char *userdata_userid,
1221                                  const char *wh)
1222 {
1223     libxl_ctx *ctx = libxl__gc_owner(gc);
1224     char *uuid_string, *path;
1225     libxl_dominfo info;
1226     int rc;
1227 
1228     libxl_dominfo_init(&info);
1229 
1230     rc = libxl_domain_info(ctx, &info, domid);
1231     if (rc) {
1232         LOGE(ERROR, "unable to find domain info for domain %"PRIu32, domid);
1233         path = NULL;
1234         goto out;
1235     }
1236     uuid_string = GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(info.uuid));
1237     path = GCSPRINTF(XEN_LIB_DIR "/userdata-%s.%u.%s.%s",
1238                      wh, domid, uuid_string, userdata_userid);
1239 
1240  out:
1241     libxl_dominfo_dispose(&info);
1242     return path;
1243 }
1244 
userdata_delete(libxl__gc * gc,const char * path)1245 static int userdata_delete(libxl__gc *gc, const char *path)
1246 {
1247     int r;
1248     r = unlink(path);
1249     if (r) {
1250         LOGE(ERROR, "remove failed for %s", path);
1251         return errno;
1252     }
1253     return 0;
1254 }
1255 
libxl__userdata_destroyall(libxl__gc * gc,uint32_t domid)1256 void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid)
1257 {
1258     const char *pattern;
1259     glob_t gl;
1260     int r, i;
1261 
1262     pattern = libxl__userdata_path(gc, domid, "*", "?");
1263     if (!pattern)
1264         goto out;
1265 
1266     gl.gl_pathc = 0;
1267     gl.gl_pathv = 0;
1268     gl.gl_offs = 0;
1269     r = glob(pattern, GLOB_ERR|GLOB_NOSORT|GLOB_MARK, 0, &gl);
1270     if (r == GLOB_NOMATCH)
1271         goto out;
1272     if (r)
1273         LOGE(ERROR, "glob failed for %s", pattern);
1274 
1275     /* Note: don't delete domain-userdata-lock, it will be handled by
1276      * unlock function.
1277      */
1278     for (i=0; i<gl.gl_pathc; i++) {
1279         if (!strstr(gl.gl_pathv[i], "domain-userdata-lock"))
1280             userdata_delete(gc, gl.gl_pathv[i]);
1281     }
1282     globfree(&gl);
1283 out:
1284     return;
1285 }
1286 
libxl__userdata_store(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1287 int libxl__userdata_store(libxl__gc *gc, uint32_t domid,
1288                           const char *userdata_userid,
1289                           const uint8_t *data, int datalen)
1290 {
1291     const char *filename;
1292     const char *newfilename;
1293     int e, rc;
1294     int fd = -1;
1295 
1296     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1297     if (!filename) {
1298         rc = ERROR_NOMEM;
1299         goto out;
1300     }
1301 
1302     if (!datalen) {
1303         rc = userdata_delete(gc, filename);
1304         goto out;
1305     }
1306 
1307     newfilename = libxl__userdata_path(gc, domid, userdata_userid, "n");
1308     if (!newfilename) {
1309         rc = ERROR_NOMEM;
1310         goto out;
1311     }
1312 
1313     rc = ERROR_FAIL;
1314 
1315     fd = open(newfilename, O_RDWR | O_CREAT | O_TRUNC, 0600);
1316     if (fd < 0)
1317         goto err;
1318 
1319     if (libxl_write_exactly(CTX, fd, data, datalen, "userdata", newfilename))
1320         goto err;
1321 
1322     if (close(fd) < 0) {
1323         fd = -1;
1324         goto err;
1325     }
1326     fd = -1;
1327 
1328     if (rename(newfilename, filename))
1329         goto err;
1330 
1331     rc = 0;
1332 
1333 err:
1334     if (fd >= 0) {
1335         e = errno;
1336         close(fd);
1337         errno = e;
1338     }
1339 
1340     if (rc)
1341         LOGE(ERROR, "cannot write/rename %s for %s", newfilename, filename);
1342 out:
1343     return rc;
1344 }
1345 
libxl_userdata_store(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1346 int libxl_userdata_store(libxl_ctx *ctx, uint32_t domid,
1347                               const char *userdata_userid,
1348                               const uint8_t *data, int datalen)
1349 {
1350     GC_INIT(ctx);
1351     int rc;
1352     libxl__flock *lock;
1353 
1354     CTX_LOCK;
1355     lock = libxl__lock_domain_userdata(gc, domid);
1356     if (!lock) {
1357         rc = ERROR_LOCK_FAIL;
1358         goto out;
1359     }
1360 
1361     rc = libxl__userdata_store(gc, domid, userdata_userid,
1362                                data, datalen);
1363 
1364     libxl__unlock_file(lock);
1365 
1366 out:
1367     CTX_UNLOCK;
1368     GC_FREE;
1369     return rc;
1370 }
1371 
libxl__userdata_retrieve(libxl__gc * gc,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1372 int libxl__userdata_retrieve(libxl__gc *gc, uint32_t domid,
1373                              const char *userdata_userid,
1374                              uint8_t **data_r, int *datalen_r)
1375 {
1376     const char *filename;
1377     int e, rc;
1378     int datalen = 0;
1379     void *data = 0;
1380 
1381     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1382     if (!filename) {
1383         rc = ERROR_NOMEM;
1384         goto out;
1385     }
1386 
1387     e = libxl_read_file_contents(CTX, filename, data_r ? &data : 0, &datalen);
1388     if (e && errno != ENOENT) {
1389         rc = ERROR_FAIL;
1390         goto out;
1391     }
1392     if (!e && !datalen) {
1393         LOG(ERROR, "userdata file %s is empty", filename);
1394         if (data_r) assert(!*data_r);
1395         rc = ERROR_FAIL;
1396         goto out;
1397     }
1398 
1399     if (data_r) *data_r = data;
1400     if (datalen_r) *datalen_r = datalen;
1401     rc = 0;
1402 
1403 out:
1404     return rc;
1405 }
1406 
libxl_userdata_retrieve(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1407 int libxl_userdata_retrieve(libxl_ctx *ctx, uint32_t domid,
1408                                  const char *userdata_userid,
1409                                  uint8_t **data_r, int *datalen_r)
1410 {
1411     GC_INIT(ctx);
1412     int rc;
1413     libxl__flock *lock;
1414 
1415     CTX_LOCK;
1416     lock = libxl__lock_domain_userdata(gc, domid);
1417     if (!lock) {
1418         rc = ERROR_LOCK_FAIL;
1419         goto out;
1420     }
1421 
1422     rc = libxl__userdata_retrieve(gc, domid, userdata_userid,
1423                                   data_r, datalen_r);
1424 
1425 
1426     libxl__unlock_file(lock);
1427 out:
1428     CTX_UNLOCK;
1429     GC_FREE;
1430     return rc;
1431 }
1432 
libxl_userdata_unlink(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid)1433 int libxl_userdata_unlink(libxl_ctx *ctx, uint32_t domid,
1434                           const char *userdata_userid)
1435 {
1436     GC_INIT(ctx);
1437     CTX_LOCK;
1438 
1439     int rc;
1440     libxl__flock *lock = NULL;
1441     const char *filename;
1442 
1443     lock = libxl__lock_domain_userdata(gc, domid);
1444     if (!lock) {
1445         rc = ERROR_LOCK_FAIL;
1446         goto out;
1447     }
1448 
1449     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1450     if (!filename) {
1451         rc = ERROR_FAIL;
1452         goto out;
1453     }
1454     if (unlink(filename)) {
1455         LOGE(ERROR, "error deleting userdata file: %s", filename);
1456         rc = ERROR_FAIL;
1457         goto out;
1458     }
1459 
1460     rc = 0;
1461 out:
1462     if (lock)
1463         libxl__unlock_file(lock);
1464     CTX_UNLOCK;
1465     GC_FREE;
1466     return rc;
1467 }
1468 
1469 /*
1470  * Local variables:
1471  * mode: C
1472  * c-basic-offset: 4
1473  * indent-tabs-mode: nil
1474  * End:
1475  */
1476