1 #include "libxl_internal.h"
2 #include "libxl_arch.h"
3
4 #include <xc_dom.h>
5
libxl__arch_domain_prepare_config(libxl__gc * gc,libxl_domain_config * d_config,struct xen_domctl_createdomain * config)6 int libxl__arch_domain_prepare_config(libxl__gc *gc,
7 libxl_domain_config *d_config,
8 struct xen_domctl_createdomain *config)
9 {
10 switch(d_config->c_info.type) {
11 case LIBXL_DOMAIN_TYPE_HVM:
12 config->arch.emulation_flags = (XEN_X86_EMU_ALL & ~XEN_X86_EMU_VPCI);
13 break;
14 case LIBXL_DOMAIN_TYPE_PVH:
15 config->arch.emulation_flags = XEN_X86_EMU_LAPIC;
16 break;
17 case LIBXL_DOMAIN_TYPE_PV:
18 config->arch.emulation_flags = 0;
19 break;
20 default:
21 abort();
22 }
23
24 return 0;
25 }
26
libxl__arch_domain_save_config(libxl__gc * gc,libxl_domain_config * d_config,libxl__domain_build_state * state,const struct xen_domctl_createdomain * config)27 int libxl__arch_domain_save_config(libxl__gc *gc,
28 libxl_domain_config *d_config,
29 libxl__domain_build_state *state,
30 const struct xen_domctl_createdomain *config)
31 {
32 return 0;
33 }
34
e820_names(int type)35 static const char *e820_names(int type)
36 {
37 switch (type) {
38 case E820_RAM: return "RAM";
39 case E820_RESERVED: return "Reserved";
40 case E820_ACPI: return "ACPI";
41 case E820_NVS: return "ACPI NVS";
42 case E820_UNUSABLE: return "Unusable";
43 default: break;
44 }
45 return "Unknown";
46 }
47
e820_sanitize(libxl__gc * gc,struct e820entry src[],uint32_t * nr_entries,unsigned long map_limitkb,unsigned long balloon_kb)48 static int e820_sanitize(libxl__gc *gc, struct e820entry src[],
49 uint32_t *nr_entries,
50 unsigned long map_limitkb,
51 unsigned long balloon_kb)
52 {
53 uint64_t delta_kb = 0, start = 0, start_kb = 0, last = 0, ram_end;
54 uint32_t i, idx = 0, nr;
55 struct e820entry e820[E820MAX];
56
57 if (!src || !map_limitkb || !nr_entries)
58 return ERROR_INVAL;
59
60 nr = *nr_entries;
61 if (!nr)
62 return ERROR_INVAL;
63
64 if (nr > E820MAX)
65 return ERROR_NOMEM;
66
67 /* Weed out anything under 1MB */
68 for (i = 0; i < nr; i++) {
69 if (src[i].addr > 0x100000)
70 continue;
71
72 src[i].type = 0;
73 src[i].size = 0;
74 src[i].addr = -1ULL;
75 }
76
77 /* Find the lowest and highest entry in E820, skipping over
78 * undesired entries. */
79 start = -1ULL;
80 last = 0;
81 for (i = 0; i < nr; i++) {
82 if ((src[i].type == E820_RAM) ||
83 (src[i].type == E820_UNUSABLE) ||
84 (src[i].type == 0))
85 continue;
86
87 start = src[i].addr < start ? src[i].addr : start;
88 last = src[i].addr + src[i].size > last ?
89 src[i].addr + src[i].size > last : last;
90 }
91 if (start > 1024)
92 start_kb = start >> 10;
93
94 /* Add the memory RAM region for the guest */
95 e820[idx].addr = 0;
96 e820[idx].size = (uint64_t)map_limitkb << 10;
97 e820[idx].type = E820_RAM;
98
99 /* .. and trim if neccessary */
100 if (start_kb && map_limitkb > start_kb) {
101 delta_kb = map_limitkb - start_kb;
102 if (delta_kb)
103 e820[idx].size -= (uint64_t)(delta_kb << 10);
104 }
105 /* Note: We don't touch balloon_kb here. Will add it at the end. */
106 ram_end = e820[idx].addr + e820[idx].size;
107 idx ++;
108
109 LOG(DEBUG, "Memory: %"PRIu64"kB End of RAM: " \
110 "0x%"PRIx64" (PFN) Delta: %"PRIu64"kB, PCI start: %"PRIu64"kB " \
111 "(0x%"PRIx64" PFN), Balloon %"PRIu64"kB\n", (uint64_t)map_limitkb,
112 ram_end >> 12, delta_kb, start_kb ,start >> 12,
113 (uint64_t)balloon_kb);
114
115
116 /* This whole code below is to guard against if the Intel IGD is passed into
117 * the guest. If we don't pass in IGD, this whole code can be ignored.
118 *
119 * The reason for this code is that Intel boxes fill their E820 with
120 * E820_RAM amongst E820_RESERVED and we can't just ditch those E820_RAM.
121 * That is b/c any "gaps" in the E820 is considered PCI I/O space by
122 * Linux and it would be utilized by the Intel IGD as I/O space while
123 * in reality it was an RAM region.
124 *
125 * What this means is that we have to walk the E820 and for any region
126 * that is RAM and below 4GB and above ram_end, needs to change its type
127 * to E820_UNUSED. We also need to move some of the E820_RAM regions if
128 * the overlap with ram_end. */
129 for (i = 0; i < nr; i++) {
130 uint64_t end = src[i].addr + src[i].size;
131
132 /* We don't care about E820_UNUSABLE, but we need to
133 * change the type to zero b/c the loop after this
134 * sticks E820_UNUSABLE on the guest's E820 but ignores
135 * the ones with type zero. */
136 if ((src[i].type == E820_UNUSABLE) ||
137 /* Any region that is within the "RAM region" can
138 * be safely ditched. */
139 (end < ram_end)) {
140 src[i].type = 0;
141 continue;
142 }
143
144 /* Look only at RAM regions. */
145 if (src[i].type != E820_RAM)
146 continue;
147
148 /* We only care about RAM regions below 4GB. */
149 if (src[i].addr >= (1ULL<<32))
150 continue;
151
152 /* E820_RAM overlaps with our RAM region. Move it */
153 if (src[i].addr < ram_end) {
154 uint64_t delta;
155
156 src[i].type = E820_UNUSABLE;
157 delta = ram_end - src[i].addr;
158 /* The end < ram_end should weed this out */
159 if (src[i].size < delta)
160 src[i].type = 0;
161 else {
162 src[i].size -= delta;
163 src[i].addr = ram_end;
164 }
165 if (src[i].addr + src[i].size != end) {
166 /* We messed up somewhere */
167 src[i].type = 0;
168 LOGE(ERROR, "Computed E820 wrongly. Continuing on.");
169 }
170 }
171 /* Lastly, convert the RAM to UNSUABLE. Look in the Linux kernel
172 at git commit 2f14ddc3a7146ea4cd5a3d1ecd993f85f2e4f948
173 "xen/setup: Inhibit resource API from using System RAM E820
174 gaps as PCI mem gaps" for full explanation. */
175 if (end > ram_end)
176 src[i].type = E820_UNUSABLE;
177 }
178
179 /* Check if there is a region between ram_end and start. */
180 if (start > ram_end) {
181 int add_unusable = 1;
182 for (i = 0; i < nr && add_unusable; i++) {
183 if (src[i].type != E820_UNUSABLE)
184 continue;
185 if (ram_end != src[i].addr)
186 continue;
187 if (start != src[i].addr + src[i].size) {
188 /* there is one, adjust it */
189 src[i].size = start - src[i].addr;
190 }
191 add_unusable = 0;
192 }
193 /* .. and if not present, add it in. This is to guard against
194 the Linux guest assuming that the gap between the end of
195 RAM region and the start of the E820_[ACPI,NVS,RESERVED]
196 is PCI I/O space. Which it certainly is _not_. */
197 if (add_unusable) {
198 e820[idx].type = E820_UNUSABLE;
199 e820[idx].addr = ram_end;
200 e820[idx].size = start - ram_end;
201 idx++;
202 }
203 }
204 /* Almost done: copy them over, ignoring the undesireable ones */
205 for (i = 0; i < nr; i++) {
206 if ((src[i].type == E820_RAM) ||
207 (src[i].type == 0))
208 continue;
209
210 e820[idx].type = src[i].type;
211 e820[idx].addr = src[i].addr;
212 e820[idx].size = src[i].size;
213 idx++;
214 }
215 /* At this point we have the mapped RAM + E820 entries from src. */
216 if (balloon_kb || delta_kb) {
217 /* and if we truncated the RAM region, then add it to the end. */
218 e820[idx].type = E820_RAM;
219 e820[idx].addr = (uint64_t)(1ULL << 32) > last ?
220 (uint64_t)(1ULL << 32) : last;
221 /* also add the balloon memory to the end. */
222 e820[idx].size = (uint64_t)(delta_kb << 10) +
223 (uint64_t)(balloon_kb << 10);
224 idx++;
225
226 }
227 nr = idx;
228
229 for (i = 0; i < nr; i++) {
230 LOG(DEBUG, ":\t[%"PRIx64" -> %"PRIx64"] %s", e820[i].addr >> 12,
231 (e820[i].addr + e820[i].size) >> 12, e820_names(e820[i].type));
232 }
233
234 /* Done: copy the sanitized version. */
235 *nr_entries = nr;
236 memcpy(src, e820, nr * sizeof(struct e820entry));
237 return 0;
238 }
239
e820_host_sanitize(libxl__gc * gc,libxl_domain_build_info * b_info,struct e820entry map[],uint32_t * nr)240 static int e820_host_sanitize(libxl__gc *gc,
241 libxl_domain_build_info *b_info,
242 struct e820entry map[],
243 uint32_t *nr)
244 {
245 int rc;
246
247 rc = xc_get_machine_memory_map(CTX->xch, map, *nr);
248 if (rc < 0)
249 return ERROR_FAIL;
250
251 *nr = rc;
252
253 rc = e820_sanitize(gc, map, nr, b_info->target_memkb,
254 (b_info->max_memkb - b_info->target_memkb) +
255 b_info->u.pv.slack_memkb);
256 return rc;
257 }
258
libxl__e820_alloc(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config)259 static int libxl__e820_alloc(libxl__gc *gc, uint32_t domid,
260 libxl_domain_config *d_config)
261 {
262 libxl_ctx *ctx = libxl__gc_owner(gc);
263 int rc;
264 uint32_t nr;
265 struct e820entry map[E820MAX];
266 libxl_domain_build_info *b_info;
267
268 if (d_config == NULL || d_config->c_info.type != LIBXL_DOMAIN_TYPE_PV)
269 return ERROR_INVAL;
270
271 b_info = &d_config->b_info;
272 if (!libxl_defbool_val(b_info->u.pv.e820_host))
273 return ERROR_INVAL;
274
275 nr = E820MAX;
276 rc = e820_host_sanitize(gc, b_info, map, &nr);
277 if (rc)
278 return ERROR_FAIL;
279
280 rc = xc_domain_set_memory_map(ctx->xch, domid, map, nr);
281
282 if (rc < 0)
283 return ERROR_FAIL;
284
285 return 0;
286 }
287
timer_mode(const libxl_domain_build_info * info)288 static unsigned long timer_mode(const libxl_domain_build_info *info)
289 {
290 const libxl_timer_mode mode = info->timer_mode;
291 assert(mode >= LIBXL_TIMER_MODE_DELAY_FOR_MISSED_TICKS &&
292 mode <= LIBXL_TIMER_MODE_ONE_MISSED_TICK_PENDING);
293 return ((unsigned long)mode);
294 }
295
hvm_set_viridian_features(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info)296 static int hvm_set_viridian_features(libxl__gc *gc, uint32_t domid,
297 const libxl_domain_build_info *info)
298 {
299 libxl_bitmap enlightenments;
300 libxl_viridian_enlightenment v;
301 uint64_t mask = 0;
302
303 libxl_bitmap_init(&enlightenments);
304 libxl_bitmap_alloc(CTX, &enlightenments,
305 LIBXL_BUILDINFO_HVM_VIRIDIAN_ENABLE_DISABLE_WIDTH);
306
307 if (libxl_defbool_val(info->u.hvm.viridian)) {
308 /* Enable defaults */
309 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE);
310 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ);
311 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT);
312 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST);
313 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL);
314 }
315
316 libxl_for_each_set_bit(v, info->u.hvm.viridian_enable) {
317 if (libxl_bitmap_test(&info->u.hvm.viridian_disable, v)) {
318 LOG(ERROR, "%s group both enabled and disabled",
319 libxl_viridian_enlightenment_to_string(v));
320 goto err;
321 }
322 if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
323 libxl_bitmap_set(&enlightenments, v);
324 }
325
326 libxl_for_each_set_bit(v, info->u.hvm.viridian_disable)
327 if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
328 libxl_bitmap_reset(&enlightenments, v);
329
330 /* The base set is a pre-requisite for all others */
331 if (!libxl_bitmap_is_empty(&enlightenments) &&
332 !libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
333 LOG(ERROR, "base group not enabled");
334 goto err;
335 }
336
337 libxl_for_each_set_bit(v, enlightenments)
338 LOG(DETAIL, "%s group enabled", libxl_viridian_enlightenment_to_string(v));
339
340 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
341 mask |= HVMPV_base_freq;
342
343 if (!libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ))
344 mask |= HVMPV_no_freq;
345 }
346
347 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT))
348 mask |= HVMPV_time_ref_count;
349
350 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC))
351 mask |= HVMPV_reference_tsc;
352
353 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_REMOTE_TLB_FLUSH))
354 mask |= HVMPV_hcall_remote_tlb_flush;
355
356 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST))
357 mask |= HVMPV_apic_assist;
358
359 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL))
360 mask |= HVMPV_crash_ctl;
361
362 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_SYNIC))
363 mask |= HVMPV_synic;
364
365 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_STIMER))
366 mask |= HVMPV_time_ref_count | HVMPV_synic | HVMPV_stimer;
367
368 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_IPI))
369 mask |= HVMPV_hcall_ipi;
370
371 if (mask != 0 &&
372 xc_hvm_param_set(CTX->xch,
373 domid,
374 HVM_PARAM_VIRIDIAN,
375 mask) != 0) {
376 LOGE(ERROR, "Couldn't set viridian feature mask (0x%"PRIx64")", mask);
377 goto err;
378 }
379
380 libxl_bitmap_dispose(&enlightenments);
381 return 0;
382
383 err:
384 libxl_bitmap_dispose(&enlightenments);
385 return ERROR_FAIL;
386 }
387
hvm_set_conf_params(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info)388 static int hvm_set_conf_params(libxl__gc *gc, uint32_t domid,
389 const libxl_domain_build_info *info)
390 {
391 libxl_ctx *ctx = libxl__gc_owner(gc);
392 xc_interface *xch = ctx->xch;
393 int ret = ERROR_FAIL;
394 unsigned int altp2m = info->altp2m;
395
396 switch(info->type) {
397 case LIBXL_DOMAIN_TYPE_HVM:
398 /* The config parameter "altp2m" replaces the parameter "altp2mhvm". For
399 * legacy reasons, both parameters are accepted on x86 HVM guests.
400 *
401 * If the legacy field info->u.hvm.altp2m is set, activate altp2m.
402 * Otherwise set altp2m based on the field info->altp2m. */
403 if (info->altp2m == LIBXL_ALTP2M_MODE_DISABLED &&
404 libxl_defbool_val(info->u.hvm.altp2m))
405 altp2m = libxl_defbool_val(info->u.hvm.altp2m);
406
407 if (xc_hvm_param_set(xch, domid, HVM_PARAM_HPET_ENABLED,
408 libxl_defbool_val(info->u.hvm.hpet))) {
409 LOG(ERROR, "Couldn't set HVM_PARAM_HPET_ENABLED");
410 goto out;
411 }
412 if (xc_hvm_param_set(xch, domid, HVM_PARAM_VPT_ALIGN,
413 libxl_defbool_val(info->u.hvm.vpt_align))) {
414 LOG(ERROR, "Couldn't set HVM_PARAM_VPT_ALIGN");
415 goto out;
416 }
417 if (info->u.hvm.mca_caps &&
418 xc_hvm_param_set(CTX->xch, domid, HVM_PARAM_MCA_CAP,
419 info->u.hvm.mca_caps)) {
420 LOG(ERROR, "Couldn't set HVM_PARAM_MCA_CAP");
421 goto out;
422 }
423
424 /* Fallthrough */
425 case LIBXL_DOMAIN_TYPE_PVH:
426 if (xc_hvm_param_set(xch, domid, HVM_PARAM_TIMER_MODE,
427 timer_mode(info))) {
428 LOG(ERROR, "Couldn't set HVM_PARAM_TIMER_MODE");
429 goto out;
430 }
431 if (xc_hvm_param_set(xch, domid, HVM_PARAM_NESTEDHVM,
432 libxl_defbool_val(info->nested_hvm))) {
433 LOG(ERROR, "Couldn't set HVM_PARAM_NESTEDHVM");
434 goto out;
435 }
436 if (xc_hvm_param_set(xch, domid, HVM_PARAM_ALTP2M, altp2m)) {
437 LOG(ERROR, "Couldn't set HVM_PARAM_ALTP2M");
438 goto out;
439 }
440 break;
441
442 default:
443 abort();
444 }
445
446 ret = 0;
447
448 out:
449 return ret;
450 }
451
libxl__arch_domain_create(libxl__gc * gc,libxl_domain_config * d_config,uint32_t domid)452 int libxl__arch_domain_create(libxl__gc *gc, libxl_domain_config *d_config,
453 uint32_t domid)
454 {
455 const libxl_domain_build_info *info = &d_config->b_info;
456 int ret = 0;
457 int tsc_mode;
458 uint32_t rtc_timeoffset;
459 libxl_ctx *ctx = libxl__gc_owner(gc);
460
461 if (info->type != LIBXL_DOMAIN_TYPE_PV &&
462 (ret = hvm_set_conf_params(gc, domid, info)) != 0)
463 goto out;
464
465 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
466 (ret = hvm_set_viridian_features(gc, domid, info)) != 0)
467 goto out;
468
469 if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PV)
470 xc_domain_set_memmap_limit(ctx->xch, domid,
471 (d_config->b_info.max_memkb +
472 d_config->b_info.u.pv.slack_memkb));
473
474 switch (d_config->b_info.tsc_mode) {
475 case LIBXL_TSC_MODE_DEFAULT:
476 tsc_mode = 0;
477 break;
478 case LIBXL_TSC_MODE_ALWAYS_EMULATE:
479 tsc_mode = 1;
480 break;
481 case LIBXL_TSC_MODE_NATIVE:
482 tsc_mode = 2;
483 break;
484 case LIBXL_TSC_MODE_NATIVE_PARAVIRT:
485 LOGD(ERROR, domid, "TSC Mode native_paravirt (a.k.a PVRDTSCP) has been removed");
486 ret = ERROR_FEATURE_REMOVED;
487 goto out;
488 default:
489 abort();
490 }
491
492 if (xc_domain_set_tsc_info(ctx->xch, domid, tsc_mode, 0, 0, 0)) {
493 LOGE(ERROR, "xc_domain_set_tsc_info() failed");
494 ret = ERROR_FAIL;
495 goto out;
496 }
497
498 rtc_timeoffset = d_config->b_info.rtc_timeoffset;
499 if (libxl_defbool_val(d_config->b_info.localtime)) {
500 time_t t;
501 struct tm *tm, result;
502
503 t = time(NULL);
504 tm = localtime_r(&t, &result);
505
506 if (!tm) {
507 LOGED(ERROR, domid, "Failed to call localtime_r");
508 ret = ERROR_FAIL;
509 goto out;
510 }
511
512 rtc_timeoffset += tm->tm_gmtoff;
513 }
514
515 if (rtc_timeoffset)
516 xc_domain_set_time_offset(ctx->xch, domid, rtc_timeoffset);
517
518 if (d_config->b_info.type != LIBXL_DOMAIN_TYPE_PV) {
519 unsigned long shadow = DIV_ROUNDUP(d_config->b_info.shadow_memkb,
520 1024);
521 xc_shadow_control(ctx->xch, domid, XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION,
522 NULL, 0, &shadow, 0, NULL);
523 }
524
525 if (d_config->c_info.type == LIBXL_DOMAIN_TYPE_PV &&
526 libxl_defbool_val(d_config->b_info.u.pv.e820_host)) {
527 ret = libxl__e820_alloc(gc, domid, d_config);
528 if (ret) {
529 LOGED(ERROR, domid, "Failed while collecting E820 with: %d (errno:%d)\n",
530 ret, errno);
531 }
532 }
533
534 out:
535 return ret;
536 }
537
libxl__arch_extra_memory(libxl__gc * gc,const libxl_domain_build_info * info,uint64_t * out)538 int libxl__arch_extra_memory(libxl__gc *gc,
539 const libxl_domain_build_info *info,
540 uint64_t *out)
541 {
542 *out = LIBXL_MAXMEM_CONSTANT;
543
544 return 0;
545 }
546
libxl__arch_domain_init_hw_description(libxl__gc * gc,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)547 int libxl__arch_domain_init_hw_description(libxl__gc *gc,
548 libxl_domain_build_info *info,
549 libxl__domain_build_state *state,
550 struct xc_dom_image *dom)
551 {
552 return 0;
553 }
554
libxl__arch_build_dom_finish(libxl__gc * gc,libxl_domain_build_info * info,struct xc_dom_image * dom,libxl__domain_build_state * state)555 int libxl__arch_build_dom_finish(libxl__gc *gc,
556 libxl_domain_build_info *info,
557 struct xc_dom_image *dom,
558 libxl__domain_build_state *state)
559 {
560 return 0;
561 }
562
563 /* Return 0 on success, ERROR_* on failure. */
libxl__arch_vnuma_build_vmemrange(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * b_info,libxl__domain_build_state * state)564 int libxl__arch_vnuma_build_vmemrange(libxl__gc *gc,
565 uint32_t domid,
566 libxl_domain_build_info *b_info,
567 libxl__domain_build_state *state)
568 {
569 int nid, nr_vmemrange, rc;
570 uint32_t nr_e820, e820_count;
571 struct e820entry map[E820MAX];
572 xen_vmemrange_t *vmemranges;
573 unsigned int array_size;
574
575 /* If e820_host is not set, call the generic function */
576 if (!(b_info->type == LIBXL_DOMAIN_TYPE_PV &&
577 libxl_defbool_val(b_info->u.pv.e820_host)))
578 return libxl__vnuma_build_vmemrange_pv_generic(gc, domid, b_info,
579 state);
580
581 assert(state->vmemranges == NULL);
582
583 nr_e820 = E820MAX;
584 rc = e820_host_sanitize(gc, b_info, map, &nr_e820);
585 if (rc) goto out;
586
587 e820_count = 0;
588 nr_vmemrange = 0;
589 vmemranges = NULL;
590 array_size = 0;
591 for (nid = 0; nid < b_info->num_vnuma_nodes; nid++) {
592 libxl_vnode_info *p = &b_info->vnuma_nodes[nid];
593 uint64_t remaining_bytes = (p->memkb << 10), bytes;
594
595 while (remaining_bytes > 0) {
596 if (e820_count >= nr_e820) {
597 rc = ERROR_NOMEM;
598 goto out;
599 }
600
601 /* Skip non RAM region */
602 if (map[e820_count].type != E820_RAM) {
603 e820_count++;
604 continue;
605 }
606
607 if (nr_vmemrange >= array_size) {
608 array_size += 32;
609 GCREALLOC_ARRAY(vmemranges, array_size);
610 }
611
612 bytes = map[e820_count].size >= remaining_bytes ?
613 remaining_bytes : map[e820_count].size;
614
615 vmemranges[nr_vmemrange].start = map[e820_count].addr;
616 vmemranges[nr_vmemrange].end = map[e820_count].addr + bytes;
617
618 if (map[e820_count].size >= remaining_bytes) {
619 map[e820_count].addr += bytes;
620 map[e820_count].size -= bytes;
621 } else {
622 e820_count++;
623 }
624
625 remaining_bytes -= bytes;
626
627 vmemranges[nr_vmemrange].flags = 0;
628 vmemranges[nr_vmemrange].nid = nid;
629 nr_vmemrange++;
630 }
631 }
632
633 state->vmemranges = vmemranges;
634 state->num_vmemranges = nr_vmemrange;
635
636 rc = 0;
637 out:
638 return rc;
639 }
640
libxl__arch_domain_map_irq(libxl__gc * gc,uint32_t domid,int irq)641 int libxl__arch_domain_map_irq(libxl__gc *gc, uint32_t domid, int irq)
642 {
643 int ret;
644
645 ret = xc_physdev_map_pirq(CTX->xch, domid, irq, &irq);
646 if (ret)
647 return ret;
648
649 ret = xc_domain_irq_permission(CTX->xch, domid, irq, 1);
650
651 return ret;
652 }
653
654 /*
655 * Here we're just trying to set these kinds of e820 mappings:
656 *
657 * #1. Low memory region
658 *
659 * Low RAM starts at least from 1M to make sure all standard regions
660 * of the PC memory map, like BIOS, VGA memory-mapped I/O and vgabios,
661 * have enough space.
662 * Note: Those stuffs below 1M are still constructed with multiple
663 * e820 entries by hvmloader. At this point we don't change anything.
664 *
665 * #2. RDM region if it exists
666 *
667 * #3. High memory region if it exists
668 *
669 * Note: these regions are not overlapping since we already check
670 * to adjust them. Please refer to libxl__domain_device_construct_rdm().
671 */
672 #define GUEST_LOW_MEM_START_DEFAULT 0x100000
domain_construct_memmap(libxl__gc * gc,libxl_domain_config * d_config,uint32_t domid,struct xc_dom_image * dom)673 static int domain_construct_memmap(libxl__gc *gc,
674 libxl_domain_config *d_config,
675 uint32_t domid,
676 struct xc_dom_image *dom)
677 {
678 int rc = 0;
679 unsigned int nr = 0, i;
680 /* We always own at least one lowmem entry. */
681 unsigned int e820_entries = 1;
682 struct e820entry *e820 = NULL;
683 uint64_t highmem_size =
684 dom->highmem_end ? dom->highmem_end - (1ull << 32) : 0;
685 uint32_t lowmem_start = dom->device_model ? GUEST_LOW_MEM_START_DEFAULT : 0;
686 unsigned page_size = XC_DOM_PAGE_SIZE(dom);
687
688 /* Add all rdm entries. */
689 for (i = 0; i < d_config->num_rdms; i++)
690 if (d_config->rdms[i].policy != LIBXL_RDM_RESERVE_POLICY_INVALID)
691 e820_entries++;
692
693 /* Add the HVM special pages to PVH memmap as RESERVED. */
694 if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PVH)
695 e820_entries++;
696
697 /* If we should have a highmem range. */
698 if (highmem_size)
699 e820_entries++;
700
701 for (i = 0; i < MAX_ACPI_MODULES; i++)
702 if (dom->acpi_modules[i].length)
703 e820_entries++;
704
705 if (e820_entries >= E820MAX) {
706 LOGD(ERROR, domid, "Ooops! Too many entries in the memory map!");
707 rc = ERROR_INVAL;
708 goto out;
709 }
710
711 e820 = libxl__malloc(gc, sizeof(struct e820entry) * e820_entries);
712
713 /* Low memory */
714 e820[nr].addr = lowmem_start;
715 e820[nr].size = dom->lowmem_end - lowmem_start;
716 e820[nr].type = E820_RAM;
717 nr++;
718
719 /* RDM mapping */
720 for (i = 0; i < d_config->num_rdms; i++) {
721 if (d_config->rdms[i].policy == LIBXL_RDM_RESERVE_POLICY_INVALID)
722 continue;
723
724 e820[nr].addr = d_config->rdms[i].start;
725 e820[nr].size = d_config->rdms[i].size;
726 e820[nr].type = E820_RESERVED;
727 nr++;
728 }
729
730 /* HVM special pages */
731 if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PVH) {
732 e820[nr].addr = (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES)
733 << XC_PAGE_SHIFT;
734 e820[nr].size = X86_HVM_NR_SPECIAL_PAGES << XC_PAGE_SHIFT;
735 e820[nr].type = E820_RESERVED;
736 nr++;
737 }
738
739 for (i = 0; i < MAX_ACPI_MODULES; i++) {
740 if (dom->acpi_modules[i].length) {
741 e820[nr].addr = dom->acpi_modules[i].guest_addr_out & ~(page_size - 1);
742 e820[nr].size = dom->acpi_modules[i].length +
743 (dom->acpi_modules[i].guest_addr_out & (page_size - 1));
744 e820[nr].type = E820_ACPI;
745 nr++;
746 }
747 }
748
749 /* High memory */
750 if (highmem_size) {
751 e820[nr].addr = ((uint64_t)1 << 32);
752 e820[nr].size = highmem_size;
753 e820[nr].type = E820_RAM;
754 }
755
756 if (xc_domain_set_memory_map(CTX->xch, domid, e820, e820_entries) != 0) {
757 rc = ERROR_FAIL;
758 goto out;
759 }
760
761 dom->e820 = e820;
762 dom->e820_entries = e820_entries;
763
764 out:
765 return rc;
766 }
767
libxl__arch_domain_finalise_hw_description(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,struct xc_dom_image * dom)768 int libxl__arch_domain_finalise_hw_description(libxl__gc *gc,
769 uint32_t domid,
770 libxl_domain_config *d_config,
771 struct xc_dom_image *dom)
772 {
773 libxl_domain_build_info *const info = &d_config->b_info;
774 int rc;
775
776 if (info->type == LIBXL_DOMAIN_TYPE_PV)
777 return 0;
778
779 if (info->type == LIBXL_DOMAIN_TYPE_PVH) {
780 rc = libxl__dom_load_acpi(gc, info, dom);
781 if (rc != 0) {
782 LOGE(ERROR, "libxl_dom_load_acpi failed");
783 return rc;
784 }
785 }
786
787 rc = domain_construct_memmap(gc, d_config, domid, dom);
788 if (rc != 0)
789 LOGE(ERROR, "setting domain memory map failed");
790
791 return rc;
792 }
793
libxl__arch_domain_create_info_setdefault(libxl__gc * gc,libxl_domain_create_info * c_info)794 void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
795 libxl_domain_create_info *c_info)
796 {
797 }
798
libxl__arch_domain_build_info_setdefault(libxl__gc * gc,libxl_domain_build_info * b_info)799 void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
800 libxl_domain_build_info *b_info)
801 {
802 libxl_defbool_setdefault(&b_info->acpi, true);
803 }
804
libxl__arch_passthrough_mode_setdefault(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,const libxl_physinfo * physinfo)805 int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
806 uint32_t domid,
807 libxl_domain_config *d_config,
808 const libxl_physinfo *physinfo)
809 {
810 int rc;
811 libxl_domain_create_info *const c_info = &d_config->c_info;
812
813 if (c_info->passthrough != LIBXL_PASSTHROUGH_DISABLED &&
814 c_info->type == LIBXL_DOMAIN_TYPE_PVH) {
815 LOGD(ERROR, domid,
816 "passthrough not yet supported for x86 PVH guests\n");
817 rc = ERROR_INVAL;
818 goto out;
819 }
820
821 const char *whynot_pt_share =
822 c_info->type == LIBXL_DOMAIN_TYPE_PV ? "not valid for PV domain" :
823 !physinfo->cap_iommu_hap_pt_share ? "not supported on this platform" :
824 !libxl_defbool_val(d_config->c_info.hap) ?"only valid for HAP guests":
825 NULL;
826
827 if (c_info->passthrough == LIBXL_PASSTHROUGH_ENABLED) {
828 c_info->passthrough = whynot_pt_share
829 ? LIBXL_PASSTHROUGH_SYNC_PT : LIBXL_PASSTHROUGH_SHARE_PT;
830 }
831
832 if (c_info->passthrough == LIBXL_PASSTHROUGH_SHARE_PT && whynot_pt_share) {
833 LOGD(ERROR, domid,
834 "passthrough=\"share_pt\" %s\n",
835 whynot_pt_share);
836 rc = ERROR_INVAL;
837 goto out;
838 }
839
840 rc = 0;
841 out:
842 return rc;
843 }
844
845
846 /*
847 * Local variables:
848 * mode: C
849 * c-basic-offset: 4
850 * indent-tabs-mode: nil
851 * End:
852 */
853