1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; version 2.1 only. with the special
8 * exception on linking described in file LICENSE.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 */
15
16 #include "libxl_osdeps.h" /* must come before any other headers */
17
18 #include <glob.h>
19
20 #include "libxl_internal.h"
21 #include "libxl_arch.h"
22
23 #include <xc_dom.h>
24 #include <xen/hvm/hvm_info_table.h>
25 #include <xen/hvm/hvm_xs_strings.h>
26 #include <xen/hvm/e820.h>
27
28 #include "_paths.h"
29
30 //#define DEBUG 1
31
libxl__domain_type(libxl__gc * gc,uint32_t domid)32 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
33 {
34 libxl_ctx *ctx = libxl__gc_owner(gc);
35 xc_domaininfo_t info;
36 int ret;
37
38 ret = xc_domain_getinfolist(ctx->xch, domid, 1, &info);
39 if (ret != 1 || info.domain != domid) {
40 LOG(ERROR, "unable to get domain type for domid=%"PRIu32, domid);
41 return LIBXL_DOMAIN_TYPE_INVALID;
42 }
43 if (info.flags & XEN_DOMINF_hvm_guest) {
44 const char *type_path = GCSPRINTF("%s/type",
45 libxl__xs_libxl_path(gc, domid));
46 const char *type;
47 libxl_domain_type t;
48 int rc;
49
50 rc = libxl__xs_read_mandatory(gc, XBT_NULL, type_path, &type);
51 if (rc) {
52 LOG(WARN,
53 "unable to get domain type for domid=%"PRIu32", assuming HVM",
54 domid);
55 return LIBXL_DOMAIN_TYPE_HVM;
56 }
57
58 rc = libxl_domain_type_from_string(type, &t);
59 if (rc) {
60 LOG(WARN,
61 "unable to get domain type for domid=%"PRIu32", assuming HVM",
62 domid);
63 return LIBXL_DOMAIN_TYPE_HVM;
64 }
65
66 return t;
67 } else
68 return LIBXL_DOMAIN_TYPE_PV;
69 }
70
libxl__domain_cpupool(libxl__gc * gc,uint32_t domid)71 int libxl__domain_cpupool(libxl__gc *gc, uint32_t domid)
72 {
73 xc_domaininfo_t info;
74 int ret;
75
76 ret = xc_domain_getinfolist(CTX->xch, domid, 1, &info);
77 if (ret != 1)
78 {
79 LOGE(ERROR, "getinfolist failed %d", ret);
80 return ERROR_FAIL;
81 }
82 if (info.domain != domid)
83 {
84 LOGE(ERROR, "got info for dom%d, wanted dom%d\n", info.domain, domid);
85 return ERROR_FAIL;
86 }
87 return info.cpupool;
88 }
89
libxl__domain_scheduler(libxl__gc * gc,uint32_t domid)90 libxl_scheduler libxl__domain_scheduler(libxl__gc *gc, uint32_t domid)
91 {
92 int cpupool = libxl__domain_cpupool(gc, domid);
93 libxl_cpupoolinfo poolinfo;
94 libxl_scheduler sched = LIBXL_SCHEDULER_UNKNOWN;
95 int rc;
96
97 if (cpupool < 0)
98 return sched;
99
100 libxl_cpupoolinfo_init(&poolinfo);
101 rc = libxl_cpupool_info(CTX, &poolinfo, cpupool);
102 if (rc < 0)
103 goto out;
104
105 sched = poolinfo.sched;
106
107 out:
108 libxl_cpupoolinfo_dispose(&poolinfo);
109 return sched;
110 }
111
112 /*
113 * Two NUMA placement candidates are compared by means of the following
114 * heuristics:
115
116 * - the number of vcpus runnable on the candidates is considered, and
117 * candidates with fewer of them are preferred. If two candidate have
118 * the same number of runnable vcpus,
119 * - the amount of free memory in the candidates is considered, and the
120 * candidate with greater amount of it is preferred.
121 *
122 * In fact, leaving larger memory holes, maximizes the probability of being
123 * able to put other domains on the node. That hopefully means many domains
124 * will benefit from local memory accesses, but also introduces the risk of
125 * overloading large (from a memory POV) nodes. That's right the effect
126 * that counting the vcpus able to run on the nodes tries to prevent.
127 *
128 * Note that this completely ignore the number of nodes each candidate span,
129 * as the fact that fewer nodes is better is already accounted for in the
130 * algorithm.
131 */
numa_cmpf(const libxl__numa_candidate * c1,const libxl__numa_candidate * c2)132 static int numa_cmpf(const libxl__numa_candidate *c1,
133 const libxl__numa_candidate *c2)
134 {
135 if (c1->nr_vcpus != c2->nr_vcpus)
136 return c1->nr_vcpus - c2->nr_vcpus;
137
138 return c2->free_memkb - c1->free_memkb;
139 }
140
141 /* The actual automatic NUMA placement routine */
numa_place_domain(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config)142 static int numa_place_domain(libxl__gc *gc, uint32_t domid,
143 libxl_domain_config *d_config)
144 {
145 libxl_domain_build_info *info = &d_config->b_info;
146 int found;
147 libxl__numa_candidate candidate;
148 libxl_bitmap cpumap, cpupool_nodemap, *map;
149 libxl_cpupoolinfo cpupool_info;
150 int i, cpupool, rc = 0;
151 uint64_t memkb;
152
153 libxl__numa_candidate_init(&candidate);
154 libxl_bitmap_init(&cpumap);
155 libxl_bitmap_init(&cpupool_nodemap);
156 libxl_cpupoolinfo_init(&cpupool_info);
157
158 /*
159 * Extract the cpumap from the cpupool the domain belong to. In fact,
160 * it only makes sense to consider the cpus/nodes that are in there
161 * for placement.
162 */
163 rc = cpupool = libxl__domain_cpupool(gc, domid);
164 if (rc < 0)
165 goto out;
166 rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
167 if (rc)
168 goto out;
169 map = &cpupool_info.cpumap;
170
171 /*
172 * If there's a well defined hard affinity mask (i.e., the same one for all
173 * the vcpus), we can try to run the placement considering only the pcpus
174 * within such mask.
175 */
176 if (info->num_vcpu_hard_affinity)
177 {
178 #ifdef DEBUG
179 int j;
180
181 for (j = 0; j < info->num_vcpu_hard_affinity; j++)
182 assert(libxl_bitmap_equal(&info->vcpu_hard_affinity[0],
183 &info->vcpu_hard_affinity[j], 0));
184 #endif /* DEBUG */
185
186 rc = libxl_bitmap_and(CTX, &cpumap, &info->vcpu_hard_affinity[0],
187 &cpupool_info.cpumap);
188 if (rc)
189 goto out;
190
191 /* Hard affinity must contain at least one cpu of our cpupool */
192 if (libxl_bitmap_is_empty(&cpumap)) {
193 LOG(ERROR, "Hard affinity completely outside of domain's cpupool!");
194 rc = ERROR_INVAL;
195 goto out;
196 }
197 }
198
199 rc = libxl__domain_need_memory_calculate(gc, info, &memkb);
200 if (rc)
201 goto out;
202 if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
203 rc = ERROR_FAIL;
204 goto out;
205 }
206
207 /* Find the best candidate with enough free memory and at least
208 * as much pcpus as the domain has vcpus. */
209 rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
210 0, 0, map, numa_cmpf, &candidate, &found);
211 if (rc)
212 goto out;
213
214 /* Not even a suitable placement candidate! Let's just don't touch the
215 * domain's info->cpumap. It will have affinity with all nodes/cpus. */
216 if (found == 0)
217 goto out;
218
219 /* Map the candidate's node map to the domain's info->nodemap */
220 libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
221
222 /* Avoid trying to set the affinity to nodes that might be in the
223 * candidate's nodemap but out of our cpupool. */
224 rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
225 &cpupool_nodemap);
226 if (rc)
227 goto out;
228
229 libxl_for_each_set_bit(i, info->nodemap) {
230 if (!libxl_bitmap_test(&cpupool_nodemap, i))
231 libxl_bitmap_reset(&info->nodemap, i);
232 }
233
234 LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
235 "%"PRIu64" KB free selected", candidate.nr_nodes,
236 candidate.nr_cpus, candidate.free_memkb / 1024);
237
238 out:
239 libxl__numa_candidate_dispose(&candidate);
240 libxl_bitmap_dispose(&cpupool_nodemap);
241 libxl_bitmap_dispose(&cpumap);
242 libxl_cpupoolinfo_dispose(&cpupool_info);
243 return rc;
244 }
245
libxl__build_pre(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)246 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
247 libxl_domain_config *d_config, libxl__domain_build_state *state)
248 {
249 libxl_domain_build_info *const info = &d_config->b_info;
250 libxl_ctx *ctx = libxl__gc_owner(gc);
251 char *xs_domid, *con_domid;
252 int rc;
253 uint64_t size;
254
255 if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
256 LOG(ERROR, "Couldn't set max vcpu count");
257 return ERROR_FAIL;
258 }
259
260 if (libxl_defbool_val(d_config->b_info.disable_migrate) &&
261 xc_domain_disable_migrate(ctx->xch, domid) != 0) {
262 LOG(ERROR, "Couldn't set nomigrate");
263 return ERROR_FAIL;
264 }
265
266 /*
267 * Check if the domain has any CPU or node affinity already. If not, try
268 * to build up the latter via automatic NUMA placement. In fact, in case
269 * numa_place_domain() manage to find a placement, in info->nodemap is
270 * updated accordingly; if it does not manage, info->nodemap is just left
271 * alone. It is then the the subsequent call to
272 * libxl_domain_set_nodeaffinity() that enacts the actual placement.
273 *
274 * As far as scheduling is concerned, we achieve NUMA-aware scheduling
275 * by having the results of placement affect the soft affinity of all
276 * the vcpus of the domain. Of course, we want that iff placement is
277 * enabled and actually happens, so we only change info->cpumap_soft to
278 * reflect the placement result if that is the case
279 */
280 if (libxl_defbool_val(info->numa_placement)) {
281 if (info->cpumap.size || info->num_vcpu_soft_affinity)
282 LOG(WARN, "Can't run NUMA placement, as a soft "
283 "affinity has been specified explicitly");
284 else if (info->nodemap.size)
285 LOG(WARN, "Can't run NUMA placement, as the domain has "
286 "NUMA node affinity set already");
287 else {
288 libxl_bitmap cpumap_soft;
289
290 rc = libxl_node_bitmap_alloc(ctx, &info->nodemap, 0);
291 if (rc)
292 return rc;
293 libxl_bitmap_set_any(&info->nodemap);
294
295 rc = libxl_cpu_bitmap_alloc(ctx, &cpumap_soft, 0);
296 if (rc)
297 return rc;
298
299 rc = numa_place_domain(gc, domid, d_config);
300 if (rc) {
301 libxl_bitmap_dispose(&cpumap_soft);
302 return rc;
303 }
304
305 /*
306 * All we need to do now is converting the result of automatic
307 * placement from nodemap to cpumap, and then use such cpumap
308 * as the soft affinity for all the vcpus of the domain.
309 *
310 * When calling libxl_set_vcpuaffinity_all(), it is ok to use
311 * NULL as hard affinity, as we know we don't have one, or we
312 * won't be here.
313 */
314 libxl_nodemap_to_cpumap(ctx, &info->nodemap, &cpumap_soft);
315 libxl_set_vcpuaffinity_all(ctx, domid, info->max_vcpus,
316 NULL, &cpumap_soft);
317
318 libxl_bitmap_dispose(&cpumap_soft);
319
320 /*
321 * Placement has run, so avoid for it to be re-run, if this
322 * same config we are using and building here is ever re-used.
323 * This means that people re-using configs will get the same
324 * results, consistently, across every re-use, which is what
325 * we expect most people to want.
326 */
327 libxl_defbool_set(&info->numa_placement, false);
328 }
329 }
330
331 if (info->nodemap.size)
332 libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap);
333
334 if (info->num_vcpu_hard_affinity || info->num_vcpu_soft_affinity) {
335 libxl_bitmap *hard_affinity, *soft_affinity;
336 int i, n_vcpus;
337
338 n_vcpus = info->num_vcpu_hard_affinity > info->num_vcpu_soft_affinity ?
339 info->num_vcpu_hard_affinity : info->num_vcpu_soft_affinity;
340
341 for (i = 0; i < n_vcpus; i++) {
342 /*
343 * Prepare hard and soft affinity pointers in a way that allows
344 * us to issue only one call to libxl_set_vcpuaffinity(), setting,
345 * for each vcpu, both hard and soft affinity "atomically".
346 */
347 hard_affinity = NULL;
348 if (info->num_vcpu_hard_affinity &&
349 i < info->num_vcpu_hard_affinity)
350 hard_affinity = &info->vcpu_hard_affinity[i];
351
352 soft_affinity = NULL;
353 if (info->num_vcpu_soft_affinity &&
354 i < info->num_vcpu_soft_affinity)
355 soft_affinity = &info->vcpu_soft_affinity[i];
356
357 if (libxl_set_vcpuaffinity(ctx, domid, i,
358 hard_affinity, soft_affinity)) {
359 LOG(ERROR, "setting affinity failed on vcpu `%d'", i);
360 return ERROR_FAIL;
361 }
362 }
363 }
364
365
366 rc = libxl__arch_extra_memory(gc, info, &size);
367 if (rc < 0) {
368 LOGE(ERROR, "Couldn't get arch extra constant memory size");
369 return ERROR_FAIL;
370 }
371
372 if (xc_domain_setmaxmem(ctx->xch, domid, info->target_memkb + size) < 0) {
373 LOGE(ERROR, "Couldn't set max memory");
374 return ERROR_FAIL;
375 }
376
377 xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
378 state->store_domid = xs_domid ? atoi(xs_domid) : 0;
379 free(xs_domid);
380
381 con_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenconsoled/domid", NULL);
382 state->console_domid = con_domid ? atoi(con_domid) : 0;
383 free(con_domid);
384
385 state->store_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->store_domid);
386 state->console_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->console_domid);
387
388 rc = libxl__arch_domain_create(gc, d_config, domid);
389
390 /* Construct a CPUID policy, but only for brand new domains. Domains
391 * being migrated-in/restored have CPUID handled during the
392 * static_data_done() callback. */
393 if (!state->restore)
394 libxl__cpuid_legacy(ctx, domid, false, info);
395
396 return rc;
397 }
398
set_vnuma_affinity(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info)399 static int set_vnuma_affinity(libxl__gc *gc, uint32_t domid,
400 libxl_domain_build_info *info)
401 {
402 libxl_bitmap cpumap;
403 libxl_vnode_info *v;
404 unsigned int i, j;
405 int rc = 0;
406
407 libxl_bitmap_init(&cpumap);
408
409 rc = libxl_cpu_bitmap_alloc(CTX, &cpumap, 0);
410 if (rc) {
411 LOG(ERROR, "Can't allocate nodemap");
412 goto out;
413 }
414
415 /*
416 * For each vcpu in each vnode, set its soft affinity to
417 * the pcpus belonging to the pnode the vnode is on
418 */
419 for (i = 0; i < info->num_vnuma_nodes; i++) {
420 v = &info->vnuma_nodes[i];
421
422 rc = libxl_node_to_cpumap(CTX, v->pnode, &cpumap);
423 if (rc) {
424 LOG(ERROR, "Can't get cpumap for vnode %d", i);
425 goto out;
426 }
427
428 libxl_for_each_set_bit(j, v->vcpus) {
429 rc = libxl_set_vcpuaffinity(CTX, domid, j, NULL, &cpumap);
430 if (rc) {
431 LOG(ERROR, "Can't set cpu affinity for %d", j);
432 goto out;
433 }
434 }
435 }
436
437 out:
438 libxl_bitmap_dispose(&cpumap);
439 return rc;
440 }
441
libxl__build_post(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state,char ** vms_ents,char ** local_ents)442 int libxl__build_post(libxl__gc *gc, uint32_t domid,
443 libxl_domain_build_info *info,
444 libxl__domain_build_state *state,
445 char **vms_ents, char **local_ents)
446 {
447 libxl_ctx *ctx = libxl__gc_owner(gc);
448 char *dom_path, *vm_path;
449 xs_transaction_t t;
450 char **ents;
451 int i, rc;
452
453 if (info->num_vnuma_nodes && !info->num_vcpu_soft_affinity) {
454 rc = set_vnuma_affinity(gc, domid, info);
455 if (rc)
456 return rc;
457 }
458
459 rc = libxl_domain_sched_params_set(CTX, domid, &info->sched_params);
460 if (rc)
461 return rc;
462
463 if (info->type == LIBXL_DOMAIN_TYPE_HVM
464 && !libxl_ms_vm_genid_is_zero(&info->u.hvm.ms_vm_genid)) {
465 rc = libxl__ms_vm_genid_set(gc, domid,
466 &info->u.hvm.ms_vm_genid);
467 if (rc) {
468 LOG(ERROR, "Failed to set VM Generation ID");
469 return rc;
470 }
471 }
472
473 ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
474 ents[0] = "memory/static-max";
475 ents[1] = GCSPRINTF("%"PRId64, info->max_memkb);
476 ents[2] = "memory/target";
477 ents[3] = GCSPRINTF("%"PRId64, info->target_memkb -
478 libxl__get_targetmem_fudge(gc, info));
479 ents[4] = "memory/videoram";
480 ents[5] = GCSPRINTF("%"PRId64, info->video_memkb);
481 ents[6] = "domid";
482 ents[7] = GCSPRINTF("%d", domid);
483 ents[8] = "store/port";
484 ents[9] = GCSPRINTF("%"PRIu32, state->store_port);
485 ents[10] = "store/ring-ref";
486 ents[11] = GCSPRINTF("%lu", state->store_mfn);
487 for (i = 0; i < info->max_vcpus; i++) {
488 ents[12+(i*2)] = GCSPRINTF("cpu/%d/availability", i);
489 ents[12+(i*2)+1] = libxl_bitmap_test(&info->avail_vcpus, i)
490 ? "online" : "offline";
491 }
492
493 dom_path = libxl__xs_get_dompath(gc, domid);
494 if (!dom_path) {
495 return ERROR_FAIL;
496 }
497
498 vm_path = xs_read(ctx->xsh, XBT_NULL, GCSPRINTF("%s/vm", dom_path), NULL);
499 retry_transaction:
500 t = xs_transaction_start(ctx->xsh);
501
502 libxl__xs_writev(gc, t, dom_path, ents);
503 libxl__xs_writev(gc, t, dom_path, local_ents);
504 libxl__xs_writev(gc, t, vm_path, vms_ents);
505
506 if (!xs_transaction_end(ctx->xsh, t, 0))
507 if (errno == EAGAIN)
508 goto retry_transaction;
509 xs_introduce_domain(ctx->xsh, domid, state->store_mfn, state->store_port);
510 free(vm_path);
511 return 0;
512 }
513
set_vnuma_info(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info,const libxl__domain_build_state * state)514 static int set_vnuma_info(libxl__gc *gc, uint32_t domid,
515 const libxl_domain_build_info *info,
516 const libxl__domain_build_state *state)
517 {
518 int rc = 0;
519 unsigned int i, nr_vdistance;
520 unsigned int *vcpu_to_vnode, *vnode_to_pnode, *vdistance = NULL;
521
522 vcpu_to_vnode = libxl__calloc(gc, info->max_vcpus,
523 sizeof(unsigned int));
524 vnode_to_pnode = libxl__calloc(gc, info->num_vnuma_nodes,
525 sizeof(unsigned int));
526
527 nr_vdistance = info->num_vnuma_nodes * info->num_vnuma_nodes;
528 vdistance = libxl__calloc(gc, nr_vdistance, sizeof(unsigned int));
529
530 for (i = 0; i < info->num_vnuma_nodes; i++) {
531 libxl_vnode_info *v = &info->vnuma_nodes[i];
532 int j;
533
534 /* vnode to pnode mapping */
535 vnode_to_pnode[i] = v->pnode;
536
537 /* vcpu to vnode mapping */
538 libxl_for_each_set_bit(j, v->vcpus)
539 vcpu_to_vnode[j] = i;
540
541 /* node distances */
542 assert(info->num_vnuma_nodes == v->num_distances);
543 memcpy(vdistance + (i * info->num_vnuma_nodes),
544 v->distances,
545 v->num_distances * sizeof(unsigned int));
546 }
547
548 if (xc_domain_setvnuma(CTX->xch, domid, info->num_vnuma_nodes,
549 state->num_vmemranges, info->max_vcpus,
550 state->vmemranges, vdistance,
551 vcpu_to_vnode, vnode_to_pnode) < 0) {
552 LOGE(ERROR, "xc_domain_setvnuma failed");
553 rc = ERROR_FAIL;
554 }
555
556 return rc;
557 }
558
libxl__build_dom(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state,struct xc_dom_image * dom)559 static int libxl__build_dom(libxl__gc *gc, uint32_t domid,
560 libxl_domain_config *d_config, libxl__domain_build_state *state,
561 struct xc_dom_image *dom)
562 {
563 libxl_domain_build_info *const info = &d_config->b_info;
564 uint64_t mem_kb;
565 int ret;
566
567 if ( (ret = xc_dom_boot_xen_init(dom, CTX->xch, domid)) != 0 ) {
568 LOGE(ERROR, "xc_dom_boot_xen_init failed");
569 goto out;
570 }
571 #ifdef GUEST_RAM_BASE
572 if ( (ret = xc_dom_rambase_init(dom, GUEST_RAM_BASE)) != 0 ) {
573 LOGE(ERROR, "xc_dom_rambase failed");
574 goto out;
575 }
576 #endif
577 if ( (ret = xc_dom_parse_image(dom)) != 0 ) {
578 LOG(ERROR, "xc_dom_parse_image failed");
579 goto out;
580 }
581 if ( (ret = libxl__arch_domain_init_hw_description(gc, info, state, dom)) != 0 ) {
582 LOGE(ERROR, "libxl__arch_domain_init_hw_description failed");
583 goto out;
584 }
585
586 mem_kb = dom->container_type == XC_DOM_HVM_CONTAINER ?
587 (info->max_memkb - info->video_memkb) : info->target_memkb;
588 if ( (ret = xc_dom_mem_init(dom, mem_kb / 1024)) != 0 ) {
589 LOGE(ERROR, "xc_dom_mem_init failed");
590 goto out;
591 }
592 if ( (ret = xc_dom_boot_mem_init(dom)) != 0 ) {
593 LOGE(ERROR, "xc_dom_boot_mem_init failed");
594 goto out;
595 }
596 if ( (ret = libxl__arch_domain_finalise_hw_description(gc, domid, d_config, dom)) != 0 ) {
597 LOGE(ERROR, "libxl__arch_domain_finalise_hw_description failed");
598 goto out;
599 }
600 if ( (ret = xc_dom_build_image(dom)) != 0 ) {
601 LOGE(ERROR, "xc_dom_build_image failed");
602 goto out;
603 }
604 if ( (ret = xc_dom_boot_image(dom)) != 0 ) {
605 LOGE(ERROR, "xc_dom_boot_image failed");
606 goto out;
607 }
608 if ( (ret = xc_dom_gnttab_init(dom)) != 0 ) {
609 LOGE(ERROR, "xc_dom_gnttab_init failed");
610 goto out;
611 }
612 if ((ret = libxl__arch_build_dom_finish(gc, info, dom, state)) != 0) {
613 LOGE(ERROR, "libxl__arch_build_dom_finish failed");
614 goto out;
615 }
616
617 out:
618 return ret != 0 ? ERROR_FAIL : 0;
619 }
620
libxl__build_pv(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)621 int libxl__build_pv(libxl__gc *gc, uint32_t domid,
622 libxl_domain_config *d_config, libxl__domain_build_state *state)
623 {
624 libxl_ctx *ctx = libxl__gc_owner(gc);
625 libxl_domain_build_info *const info = &d_config->b_info;
626 struct xc_dom_image *dom;
627 int ret;
628 int flags = 0;
629
630 xc_dom_loginit(ctx->xch);
631
632 dom = xc_dom_allocate(ctx->xch, state->pv_cmdline, info->u.pv.features);
633 if (!dom) {
634 LOGE(ERROR, "xc_dom_allocate failed");
635 return ERROR_FAIL;
636 }
637
638 dom->container_type = XC_DOM_PV_CONTAINER;
639
640 LOG(DEBUG, "pv kernel mapped %d path %s", state->pv_kernel.mapped, state->pv_kernel.path);
641
642 if (state->pv_kernel.mapped) {
643 ret = xc_dom_kernel_mem(dom,
644 state->pv_kernel.data,
645 state->pv_kernel.size);
646 if ( ret != 0) {
647 LOGE(ERROR, "xc_dom_kernel_mem failed");
648 goto out;
649 }
650 } else {
651 ret = xc_dom_kernel_file(dom, state->pv_kernel.path);
652 if ( ret != 0) {
653 LOGE(ERROR, "xc_dom_kernel_file failed");
654 goto out;
655 }
656 }
657
658 if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
659 if (state->pv_ramdisk.mapped) {
660 if ( (ret = xc_dom_module_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size, NULL)) != 0 ) {
661 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
662 goto out;
663 }
664 } else {
665 if ( (ret = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL)) != 0 ) {
666 LOGE(ERROR, "xc_dom_ramdisk_file failed");
667 goto out;
668 }
669 }
670 }
671
672 dom->flags = flags;
673 dom->console_evtchn = state->console_port;
674 dom->console_domid = state->console_domid;
675 dom->xenstore_evtchn = state->store_port;
676 dom->xenstore_domid = state->store_domid;
677 dom->claim_enabled = libxl_defbool_val(info->claim_mode);
678 dom->max_vcpus = info->max_vcpus;
679
680 if (info->num_vnuma_nodes != 0) {
681 unsigned int i;
682
683 ret = libxl__vnuma_build_vmemrange_pv(gc, domid, info, state);
684 if (ret) {
685 LOGE(ERROR, "cannot build vmemranges");
686 goto out;
687 }
688 ret = libxl__vnuma_config_check(gc, info, state);
689 if (ret) goto out;
690
691 ret = set_vnuma_info(gc, domid, info, state);
692 if (ret) goto out;
693
694 dom->nr_vmemranges = state->num_vmemranges;
695 dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges) *
696 dom->nr_vmemranges);
697
698 for (i = 0; i < dom->nr_vmemranges; i++) {
699 dom->vmemranges[i].start = state->vmemranges[i].start;
700 dom->vmemranges[i].end = state->vmemranges[i].end;
701 dom->vmemranges[i].flags = state->vmemranges[i].flags;
702 dom->vmemranges[i].nid = state->vmemranges[i].nid;
703 }
704
705 dom->nr_vnodes = info->num_vnuma_nodes;
706 dom->vnode_to_pnode = xc_dom_malloc(dom, sizeof(*dom->vnode_to_pnode) *
707 dom->nr_vnodes);
708 for (i = 0; i < info->num_vnuma_nodes; i++)
709 dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
710 }
711
712 ret = libxl__build_dom(gc, domid, d_config, state, dom);
713 if (ret != 0)
714 goto out;
715
716 if (xc_dom_translated(dom)) {
717 state->console_mfn = dom->console_pfn;
718 state->store_mfn = dom->xenstore_pfn;
719 state->vuart_gfn = dom->vuart_gfn;
720 } else {
721 state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
722 state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
723 }
724
725 ret = 0;
726 out:
727 xc_dom_release(dom);
728 return ret == 0 ? 0 : ERROR_FAIL;
729 }
730
hvm_build_set_params(xc_interface * handle,uint32_t domid,libxl_domain_build_info * info,int store_evtchn,unsigned long * store_mfn,int console_evtchn,unsigned long * console_mfn,domid_t store_domid,domid_t console_domid)731 static int hvm_build_set_params(xc_interface *handle, uint32_t domid,
732 libxl_domain_build_info *info,
733 int store_evtchn, unsigned long *store_mfn,
734 int console_evtchn, unsigned long *console_mfn,
735 domid_t store_domid, domid_t console_domid)
736 {
737 struct hvm_info_table *va_hvm;
738 uint8_t *va_map, sum;
739 uint64_t str_mfn, cons_mfn;
740 int i;
741
742 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
743 va_map = xc_map_foreign_range(handle, domid,
744 XC_PAGE_SIZE, PROT_READ | PROT_WRITE,
745 HVM_INFO_PFN);
746 if (va_map == NULL)
747 return ERROR_FAIL;
748
749 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
750 va_hvm->apic_mode = libxl_defbool_val(info->apic);
751 va_hvm->nr_vcpus = info->max_vcpus;
752 memset(va_hvm->vcpu_online, 0, sizeof(va_hvm->vcpu_online));
753 memcpy(va_hvm->vcpu_online, info->avail_vcpus.map, info->avail_vcpus.size);
754 for (i = 0, sum = 0; i < va_hvm->length; i++)
755 sum += ((uint8_t *) va_hvm)[i];
756 va_hvm->checksum -= sum;
757 munmap(va_map, XC_PAGE_SIZE);
758 }
759
760 xc_hvm_param_get(handle, domid, HVM_PARAM_STORE_PFN, &str_mfn);
761 xc_hvm_param_get(handle, domid, HVM_PARAM_CONSOLE_PFN, &cons_mfn);
762 xc_hvm_param_set(handle, domid, HVM_PARAM_STORE_EVTCHN, store_evtchn);
763 xc_hvm_param_set(handle, domid, HVM_PARAM_CONSOLE_EVTCHN, console_evtchn);
764
765 *store_mfn = str_mfn;
766 *console_mfn = cons_mfn;
767
768 return 0;
769 }
770
hvm_build_set_xs_values(libxl__gc * gc,uint32_t domid,struct xc_dom_image * dom,const libxl_domain_build_info * info)771 static int hvm_build_set_xs_values(libxl__gc *gc,
772 uint32_t domid,
773 struct xc_dom_image *dom,
774 const libxl_domain_build_info *info)
775 {
776 char *path = NULL;
777 int ret = 0;
778
779 if (dom->smbios_module.guest_addr_out) {
780 path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_ADDRESS, domid);
781
782 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
783 dom->smbios_module.guest_addr_out);
784 if (ret)
785 goto err;
786
787 path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_LENGTH, domid);
788
789 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
790 dom->smbios_module.length);
791 if (ret)
792 goto err;
793 }
794
795 /* Only one module can be passed. PVHv2 guests do not support this. */
796 if (dom->acpi_modules[0].guest_addr_out &&
797 info->type == LIBXL_DOMAIN_TYPE_HVM) {
798 path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_ADDRESS, domid);
799
800 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
801 dom->acpi_modules[0].guest_addr_out);
802 if (ret)
803 goto err;
804
805 path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_LENGTH, domid);
806
807 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
808 dom->acpi_modules[0].length);
809 if (ret)
810 goto err;
811 }
812
813 return 0;
814
815 err:
816 LOG(ERROR, "failed to write firmware xenstore value, err: %d", ret);
817 return ret;
818 }
819
libxl__load_hvm_firmware_module(libxl__gc * gc,const char * filename,const char * what,struct xc_hvm_firmware_module * m)820 static int libxl__load_hvm_firmware_module(libxl__gc *gc,
821 const char *filename,
822 const char *what,
823 struct xc_hvm_firmware_module *m)
824 {
825 int datalen = 0;
826 void *data = NULL;
827 int r, rc;
828
829 LOG(DEBUG, "Loading %s: %s", what, filename);
830 r = libxl_read_file_contents(CTX, filename, &data, &datalen);
831 if (r) {
832 /*
833 * Print a message only on ENOENT, other errors are logged by the
834 * function libxl_read_file_contents().
835 */
836 if (r == ENOENT)
837 LOGEV(ERROR, r, "failed to read %s file", what);
838 rc = ERROR_FAIL;
839 goto out;
840 }
841 libxl__ptr_add(gc, data);
842 if (datalen) {
843 /* Only accept non-empty files */
844 m->data = data;
845 m->length = datalen;
846 } else {
847 LOG(ERROR, "file %s for %s is empty", filename, what);
848 rc = ERROR_INVAL;
849 goto out;
850 }
851 rc = 0;
852 out:
853 return rc;
854 }
855
libxl__domain_firmware(libxl__gc * gc,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)856 static int libxl__domain_firmware(libxl__gc *gc,
857 libxl_domain_build_info *info,
858 libxl__domain_build_state *state,
859 struct xc_dom_image *dom)
860 {
861 libxl_ctx *ctx = libxl__gc_owner(gc);
862 const char *firmware = NULL;
863 int e, rc;
864 int datalen = 0;
865 void *data;
866 const char *bios_filename = NULL;
867
868 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
869 if (info->u.hvm.firmware) {
870 firmware = info->u.hvm.firmware;
871 } else {
872 switch (info->device_model_version)
873 {
874 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
875 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
876 firmware = "hvmloader";
877 break;
878 default:
879 LOG(ERROR, "invalid device model version %d",
880 info->device_model_version);
881 rc = ERROR_FAIL;
882 goto out;
883 }
884 }
885 }
886
887 if (state->pv_kernel.path != NULL &&
888 info->type == LIBXL_DOMAIN_TYPE_PVH) {
889
890 if (state->shim_path) {
891 rc = xc_dom_kernel_file(dom, state->shim_path);
892 if (rc) {
893 LOGE(ERROR, "xc_dom_kernel_file failed");
894 goto out;
895 }
896
897 /* We've loaded the shim, so load the kernel as a secondary module */
898 if (state->pv_kernel.mapped) {
899 LOG(DEBUG, "xc_dom_module_mem, cmdline %s",
900 state->pv_cmdline);
901 rc = xc_dom_module_mem(dom, state->pv_kernel.data,
902 state->pv_kernel.size, state->pv_cmdline);
903 if (rc) {
904 LOGE(ERROR, "xc_dom_kernel_mem failed");
905 goto out;
906 }
907 } else {
908 LOG(DEBUG, "xc_dom_module_file, path %s cmdline %s",
909 state->pv_kernel.path, state->pv_cmdline);
910 rc = xc_dom_module_file(dom, state->pv_kernel.path, state->pv_cmdline);
911 if (rc) {
912 LOGE(ERROR, "xc_dom_kernel_file failed");
913 goto out;
914 }
915 }
916 } else {
917 /* No shim, so load the kernel directly */
918 if (state->pv_kernel.mapped) {
919 rc = xc_dom_kernel_mem(dom, state->pv_kernel.data,
920 state->pv_kernel.size);
921 if (rc) {
922 LOGE(ERROR, "xc_dom_kernel_mem failed");
923 goto out;
924 }
925 } else {
926 rc = xc_dom_kernel_file(dom, state->pv_kernel.path);
927 if (rc) {
928 LOGE(ERROR, "xc_dom_kernel_file failed");
929 goto out;
930 }
931 }
932 }
933
934 if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) {
935 if (state->pv_ramdisk.mapped) {
936 rc = xc_dom_module_mem(dom, state->pv_ramdisk.data,
937 state->pv_ramdisk.size, NULL);
938 if (rc) {
939 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
940 goto out;
941 }
942 } else {
943 rc = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL);
944 if (rc) {
945 LOGE(ERROR, "xc_dom_ramdisk_file failed");
946 goto out;
947 }
948 }
949 }
950 } else {
951 /*
952 * Only HVM guests should get here, PVH should always have a set
953 * kernel at this point.
954 */
955 assert(info->type == LIBXL_DOMAIN_TYPE_HVM);
956 rc = xc_dom_kernel_file(dom, libxl__abs_path(gc, firmware,
957 libxl__xenfirmwaredir_path()));
958 }
959
960 if (rc != 0) {
961 LOGE(ERROR, "xc_dom_{kernel_file/ramdisk_file} failed");
962 goto out;
963 }
964
965 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
966 info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
967 if (info->u.hvm.system_firmware) {
968 bios_filename = info->u.hvm.system_firmware;
969 } else {
970 switch (info->u.hvm.bios) {
971 case LIBXL_BIOS_TYPE_SEABIOS:
972 bios_filename = libxl__seabios_path();
973 break;
974 case LIBXL_BIOS_TYPE_OVMF:
975 bios_filename = libxl__ovmf_path();
976 break;
977 case LIBXL_BIOS_TYPE_ROMBIOS:
978 default:
979 abort();
980 }
981 }
982 }
983
984 if (bios_filename) {
985 rc = libxl__load_hvm_firmware_module(gc, bios_filename, "BIOS",
986 &dom->system_firmware_module);
987 if (rc) goto out;
988 }
989
990 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
991 info->u.hvm.bios == LIBXL_BIOS_TYPE_ROMBIOS &&
992 libxl__ipxe_path()) {
993 const char *fp = libxl__ipxe_path();
994 rc = xc_dom_module_file(dom, fp, "ipxe");
995
996 if (rc) {
997 LOGE(ERROR, "failed to load IPXE %s (%d)", fp, rc);
998 rc = ERROR_FAIL;
999 goto out;
1000 }
1001 }
1002
1003 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1004 info->u.hvm.smbios_firmware) {
1005 data = NULL;
1006 e = libxl_read_file_contents(ctx, info->u.hvm.smbios_firmware,
1007 &data, &datalen);
1008 if (e) {
1009 LOGEV(ERROR, e, "failed to read SMBIOS firmware file %s",
1010 info->u.hvm.smbios_firmware);
1011 rc = ERROR_FAIL;
1012 goto out;
1013 }
1014 libxl__ptr_add(gc, data);
1015 if (datalen) {
1016 /* Only accept non-empty files */
1017 dom->smbios_module.data = data;
1018 dom->smbios_module.length = (uint32_t)datalen;
1019 }
1020 }
1021
1022 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1023 info->u.hvm.acpi_firmware) {
1024 data = NULL;
1025 e = libxl_read_file_contents(ctx, info->u.hvm.acpi_firmware,
1026 &data, &datalen);
1027 if (e) {
1028 LOGEV(ERROR, e, "failed to read ACPI firmware file %s",
1029 info->u.hvm.acpi_firmware);
1030 rc = ERROR_FAIL;
1031 goto out;
1032 }
1033 libxl__ptr_add(gc, data);
1034 if (datalen) {
1035 /* Only accept a non-empty file */
1036 dom->acpi_modules[0].data = data;
1037 dom->acpi_modules[0].length = (uint32_t)datalen;
1038 }
1039 }
1040
1041 return 0;
1042 out:
1043 assert(rc != 0);
1044 return rc;
1045 }
1046
libxl__build_hvm(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)1047 int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
1048 libxl_domain_config *d_config,
1049 libxl__domain_build_state *state)
1050 {
1051 libxl_ctx *ctx = libxl__gc_owner(gc);
1052 int rc;
1053 uint64_t mmio_start, lowmem_end, highmem_end, mem_size;
1054 libxl_domain_build_info *const info = &d_config->b_info;
1055 struct xc_dom_image *dom = NULL;
1056 bool device_model = info->type == LIBXL_DOMAIN_TYPE_HVM ? true : false;
1057
1058 xc_dom_loginit(ctx->xch);
1059
1060 /*
1061 * If PVH and we have a shim override, use the shim cmdline.
1062 * If PVH and no shim override, use the pv cmdline.
1063 * If not PVH, use info->cmdline.
1064 */
1065 dom = xc_dom_allocate(ctx->xch, info->type == LIBXL_DOMAIN_TYPE_PVH ?
1066 (state->shim_path ? state->shim_cmdline : state->pv_cmdline) :
1067 info->cmdline, NULL);
1068 if (!dom) {
1069 LOGE(ERROR, "xc_dom_allocate failed");
1070 rc = ERROR_NOMEM;
1071 goto out;
1072 }
1073
1074 dom->container_type = XC_DOM_HVM_CONTAINER;
1075
1076 /* The params from the configuration file are in Mb, which are then
1077 * multiplied by 1 Kb. This was then divided off when calling
1078 * the old xc_hvm_build_target_mem() which then turned them to bytes.
1079 * Do all this in one step here...
1080 */
1081 mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
1082 dom->target_pages = (uint64_t)(info->target_memkb - info->video_memkb) >> 2;
1083 dom->claim_enabled = libxl_defbool_val(info->claim_mode);
1084 if (info->u.hvm.mmio_hole_memkb) {
1085 uint64_t max_ram_below_4g = (1ULL << 32) -
1086 (info->u.hvm.mmio_hole_memkb << 10);
1087
1088 if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START)
1089 dom->mmio_size = info->u.hvm.mmio_hole_memkb << 10;
1090 }
1091
1092 rc = libxl__domain_firmware(gc, info, state, dom);
1093 if (rc != 0) {
1094 LOG(ERROR, "initializing domain firmware failed");
1095 goto out;
1096 }
1097
1098 if (dom->target_pages == 0)
1099 dom->target_pages = mem_size >> XC_PAGE_SHIFT;
1100 if (dom->mmio_size == 0 && device_model)
1101 dom->mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
1102 else if (dom->mmio_size == 0 && !device_model) {
1103 #if defined(__i386__) || defined(__x86_64__)
1104 /*
1105 * Make sure the local APIC page, the ACPI tables and the special pages
1106 * are inside the MMIO hole.
1107 */
1108 xen_paddr_t start =
1109 (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES) <<
1110 XC_PAGE_SHIFT;
1111
1112 start = min_t(xen_paddr_t, start, LAPIC_BASE_ADDRESS);
1113 start = min_t(xen_paddr_t, start, ACPI_INFO_PHYSICAL_ADDRESS);
1114 dom->mmio_size = GB(4) - start;
1115 #else
1116 assert(1);
1117 #endif
1118 }
1119 lowmem_end = mem_size;
1120 highmem_end = 0;
1121 mmio_start = (1ull << 32) - dom->mmio_size;
1122 if (lowmem_end > mmio_start)
1123 {
1124 highmem_end = (1ull << 32) + (lowmem_end - mmio_start);
1125 lowmem_end = mmio_start;
1126 }
1127 dom->lowmem_end = lowmem_end;
1128 dom->highmem_end = highmem_end;
1129 dom->mmio_start = mmio_start;
1130 dom->vga_hole_size = device_model ? LIBXL_VGA_HOLE_SIZE : 0;
1131 dom->device_model = device_model;
1132 dom->max_vcpus = info->max_vcpus;
1133 dom->console_domid = state->console_domid;
1134 dom->xenstore_domid = state->store_domid;
1135
1136 rc = libxl__domain_device_construct_rdm(gc, d_config,
1137 info->u.hvm.rdm_mem_boundary_memkb*1024,
1138 dom);
1139 if (rc) {
1140 LOG(ERROR, "checking reserved device memory failed");
1141 goto out;
1142 }
1143
1144 if (info->num_vnuma_nodes != 0) {
1145 int i;
1146
1147 rc = libxl__vnuma_build_vmemrange_hvm(gc, domid, info, state, dom);
1148 if (rc != 0) {
1149 LOG(ERROR, "hvm build vmemranges failed");
1150 goto out;
1151 }
1152 rc = libxl__vnuma_config_check(gc, info, state);
1153 if (rc != 0) goto out;
1154 rc = set_vnuma_info(gc, domid, info, state);
1155 if (rc != 0) goto out;
1156
1157 dom->nr_vmemranges = state->num_vmemranges;
1158 dom->vmemranges = libxl__malloc(gc, sizeof(*dom->vmemranges) *
1159 dom->nr_vmemranges);
1160
1161 for (i = 0; i < dom->nr_vmemranges; i++) {
1162 dom->vmemranges[i].start = state->vmemranges[i].start;
1163 dom->vmemranges[i].end = state->vmemranges[i].end;
1164 dom->vmemranges[i].flags = state->vmemranges[i].flags;
1165 dom->vmemranges[i].nid = state->vmemranges[i].nid;
1166 }
1167
1168 dom->nr_vnodes = info->num_vnuma_nodes;
1169 dom->vnode_to_pnode = libxl__malloc(gc, sizeof(*dom->vnode_to_pnode) *
1170 dom->nr_vnodes);
1171 for (i = 0; i < dom->nr_vnodes; i++)
1172 dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
1173 }
1174
1175 rc = libxl__build_dom(gc, domid, d_config, state, dom);
1176 if (rc != 0)
1177 goto out;
1178
1179 rc = hvm_build_set_params(ctx->xch, domid, info, state->store_port,
1180 &state->store_mfn, state->console_port,
1181 &state->console_mfn, state->store_domid,
1182 state->console_domid);
1183 if (rc != 0) {
1184 LOG(ERROR, "hvm build set params failed");
1185 goto out;
1186 }
1187
1188 rc = hvm_build_set_xs_values(gc, domid, dom, info);
1189 if (rc != 0) {
1190 LOG(ERROR, "hvm build set xenstore values failed");
1191 goto out;
1192 }
1193
1194 xc_dom_release(dom);
1195 return 0;
1196
1197 out:
1198 assert(rc != 0);
1199 if (dom != NULL) xc_dom_release(dom);
1200 return rc;
1201 }
1202
libxl__qemu_traditional_cmd(libxl__gc * gc,uint32_t domid,const char * cmd)1203 int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
1204 const char *cmd)
1205 {
1206 char *path = NULL;
1207 uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
1208 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/command");
1209 return libxl__xs_printf(gc, XBT_NULL, path, "%s", cmd);
1210 }
1211
1212 /*==================== Miscellaneous ====================*/
1213
libxl__uuid2string(libxl__gc * gc,const libxl_uuid uuid)1214 char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
1215 {
1216 return GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(uuid));
1217 }
1218
libxl__userdata_path(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const char * wh)1219 const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
1220 const char *userdata_userid,
1221 const char *wh)
1222 {
1223 libxl_ctx *ctx = libxl__gc_owner(gc);
1224 char *uuid_string, *path;
1225 libxl_dominfo info;
1226 int rc;
1227
1228 libxl_dominfo_init(&info);
1229
1230 rc = libxl_domain_info(ctx, &info, domid);
1231 if (rc) {
1232 LOGE(ERROR, "unable to find domain info for domain %"PRIu32, domid);
1233 path = NULL;
1234 goto out;
1235 }
1236 uuid_string = GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(info.uuid));
1237 path = GCSPRINTF(XEN_LIB_DIR "/userdata-%s.%u.%s.%s",
1238 wh, domid, uuid_string, userdata_userid);
1239
1240 out:
1241 libxl_dominfo_dispose(&info);
1242 return path;
1243 }
1244
userdata_delete(libxl__gc * gc,const char * path)1245 static int userdata_delete(libxl__gc *gc, const char *path)
1246 {
1247 int r;
1248 r = unlink(path);
1249 if (r) {
1250 LOGE(ERROR, "remove failed for %s", path);
1251 return errno;
1252 }
1253 return 0;
1254 }
1255
libxl__userdata_destroyall(libxl__gc * gc,uint32_t domid)1256 void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid)
1257 {
1258 const char *pattern;
1259 glob_t gl;
1260 int r, i;
1261
1262 pattern = libxl__userdata_path(gc, domid, "*", "?");
1263 if (!pattern)
1264 goto out;
1265
1266 gl.gl_pathc = 0;
1267 gl.gl_pathv = 0;
1268 gl.gl_offs = 0;
1269 r = glob(pattern, GLOB_ERR|GLOB_NOSORT|GLOB_MARK, 0, &gl);
1270 if (r == GLOB_NOMATCH)
1271 goto out;
1272 if (r)
1273 LOGE(ERROR, "glob failed for %s", pattern);
1274
1275 /* Note: don't delete domain-userdata-lock, it will be handled by
1276 * unlock function.
1277 */
1278 for (i=0; i<gl.gl_pathc; i++) {
1279 if (!strstr(gl.gl_pathv[i], "domain-userdata-lock"))
1280 userdata_delete(gc, gl.gl_pathv[i]);
1281 }
1282 globfree(&gl);
1283 out:
1284 return;
1285 }
1286
libxl__userdata_store(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1287 int libxl__userdata_store(libxl__gc *gc, uint32_t domid,
1288 const char *userdata_userid,
1289 const uint8_t *data, int datalen)
1290 {
1291 const char *filename;
1292 const char *newfilename;
1293 int e, rc;
1294 int fd = -1;
1295
1296 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1297 if (!filename) {
1298 rc = ERROR_NOMEM;
1299 goto out;
1300 }
1301
1302 if (!datalen) {
1303 rc = userdata_delete(gc, filename);
1304 goto out;
1305 }
1306
1307 newfilename = libxl__userdata_path(gc, domid, userdata_userid, "n");
1308 if (!newfilename) {
1309 rc = ERROR_NOMEM;
1310 goto out;
1311 }
1312
1313 rc = ERROR_FAIL;
1314
1315 fd = open(newfilename, O_RDWR | O_CREAT | O_TRUNC, 0600);
1316 if (fd < 0)
1317 goto err;
1318
1319 if (libxl_write_exactly(CTX, fd, data, datalen, "userdata", newfilename))
1320 goto err;
1321
1322 if (close(fd) < 0) {
1323 fd = -1;
1324 goto err;
1325 }
1326 fd = -1;
1327
1328 if (rename(newfilename, filename))
1329 goto err;
1330
1331 rc = 0;
1332
1333 err:
1334 if (fd >= 0) {
1335 e = errno;
1336 close(fd);
1337 errno = e;
1338 }
1339
1340 if (rc)
1341 LOGE(ERROR, "cannot write/rename %s for %s", newfilename, filename);
1342 out:
1343 return rc;
1344 }
1345
libxl_userdata_store(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1346 int libxl_userdata_store(libxl_ctx *ctx, uint32_t domid,
1347 const char *userdata_userid,
1348 const uint8_t *data, int datalen)
1349 {
1350 GC_INIT(ctx);
1351 int rc;
1352 libxl__flock *lock;
1353
1354 CTX_LOCK;
1355 lock = libxl__lock_domain_userdata(gc, domid);
1356 if (!lock) {
1357 rc = ERROR_LOCK_FAIL;
1358 goto out;
1359 }
1360
1361 rc = libxl__userdata_store(gc, domid, userdata_userid,
1362 data, datalen);
1363
1364 libxl__unlock_file(lock);
1365
1366 out:
1367 CTX_UNLOCK;
1368 GC_FREE;
1369 return rc;
1370 }
1371
libxl__userdata_retrieve(libxl__gc * gc,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1372 int libxl__userdata_retrieve(libxl__gc *gc, uint32_t domid,
1373 const char *userdata_userid,
1374 uint8_t **data_r, int *datalen_r)
1375 {
1376 const char *filename;
1377 int e, rc;
1378 int datalen = 0;
1379 void *data = 0;
1380
1381 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1382 if (!filename) {
1383 rc = ERROR_NOMEM;
1384 goto out;
1385 }
1386
1387 e = libxl_read_file_contents(CTX, filename, data_r ? &data : 0, &datalen);
1388 if (e && errno != ENOENT) {
1389 rc = ERROR_FAIL;
1390 goto out;
1391 }
1392 if (!e && !datalen) {
1393 LOG(ERROR, "userdata file %s is empty", filename);
1394 if (data_r) assert(!*data_r);
1395 rc = ERROR_FAIL;
1396 goto out;
1397 }
1398
1399 if (data_r) *data_r = data;
1400 if (datalen_r) *datalen_r = datalen;
1401 rc = 0;
1402
1403 out:
1404 return rc;
1405 }
1406
libxl_userdata_retrieve(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1407 int libxl_userdata_retrieve(libxl_ctx *ctx, uint32_t domid,
1408 const char *userdata_userid,
1409 uint8_t **data_r, int *datalen_r)
1410 {
1411 GC_INIT(ctx);
1412 int rc;
1413 libxl__flock *lock;
1414
1415 CTX_LOCK;
1416 lock = libxl__lock_domain_userdata(gc, domid);
1417 if (!lock) {
1418 rc = ERROR_LOCK_FAIL;
1419 goto out;
1420 }
1421
1422 rc = libxl__userdata_retrieve(gc, domid, userdata_userid,
1423 data_r, datalen_r);
1424
1425
1426 libxl__unlock_file(lock);
1427 out:
1428 CTX_UNLOCK;
1429 GC_FREE;
1430 return rc;
1431 }
1432
libxl_userdata_unlink(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid)1433 int libxl_userdata_unlink(libxl_ctx *ctx, uint32_t domid,
1434 const char *userdata_userid)
1435 {
1436 GC_INIT(ctx);
1437 CTX_LOCK;
1438
1439 int rc;
1440 libxl__flock *lock = NULL;
1441 const char *filename;
1442
1443 lock = libxl__lock_domain_userdata(gc, domid);
1444 if (!lock) {
1445 rc = ERROR_LOCK_FAIL;
1446 goto out;
1447 }
1448
1449 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1450 if (!filename) {
1451 rc = ERROR_FAIL;
1452 goto out;
1453 }
1454 if (unlink(filename)) {
1455 LOGE(ERROR, "error deleting userdata file: %s", filename);
1456 rc = ERROR_FAIL;
1457 goto out;
1458 }
1459
1460 rc = 0;
1461 out:
1462 if (lock)
1463 libxl__unlock_file(lock);
1464 CTX_UNLOCK;
1465 GC_FREE;
1466 return rc;
1467 }
1468
1469 /*
1470 * Local variables:
1471 * mode: C
1472 * c-basic-offset: 4
1473 * indent-tabs-mode: nil
1474 * End:
1475 */
1476