1 #include "libxl_internal.h"
2 #include "libxl_arch.h"
3 
4 #include <xc_dom.h>
5 
libxl__arch_domain_prepare_config(libxl__gc * gc,libxl_domain_config * d_config,struct xen_domctl_createdomain * config)6 int libxl__arch_domain_prepare_config(libxl__gc *gc,
7                                       libxl_domain_config *d_config,
8                                       struct xen_domctl_createdomain *config)
9 {
10     switch(d_config->c_info.type) {
11     case LIBXL_DOMAIN_TYPE_HVM:
12         config->arch.emulation_flags = (XEN_X86_EMU_ALL & ~XEN_X86_EMU_VPCI);
13         break;
14     case LIBXL_DOMAIN_TYPE_PVH:
15         config->arch.emulation_flags = XEN_X86_EMU_LAPIC;
16         break;
17     case LIBXL_DOMAIN_TYPE_PV:
18         config->arch.emulation_flags = 0;
19         break;
20     default:
21         abort();
22     }
23 
24     return 0;
25 }
26 
libxl__arch_domain_save_config(libxl__gc * gc,libxl_domain_config * d_config,libxl__domain_build_state * state,const struct xen_domctl_createdomain * config)27 int libxl__arch_domain_save_config(libxl__gc *gc,
28                                    libxl_domain_config *d_config,
29                                    libxl__domain_build_state *state,
30                                    const struct xen_domctl_createdomain *config)
31 {
32     return 0;
33 }
34 
e820_names(int type)35 static const char *e820_names(int type)
36 {
37     switch (type) {
38         case E820_RAM: return "RAM";
39         case E820_RESERVED: return "Reserved";
40         case E820_ACPI: return "ACPI";
41         case E820_NVS: return "ACPI NVS";
42         case E820_UNUSABLE: return "Unusable";
43         default: break;
44     }
45     return "Unknown";
46 }
47 
e820_sanitize(libxl__gc * gc,struct e820entry src[],uint32_t * nr_entries,unsigned long map_limitkb,unsigned long balloon_kb)48 static int e820_sanitize(libxl__gc *gc, struct e820entry src[],
49                          uint32_t *nr_entries,
50                          unsigned long map_limitkb,
51                          unsigned long balloon_kb)
52 {
53     uint64_t delta_kb = 0, start = 0, start_kb = 0, last = 0, ram_end;
54     uint32_t i, idx = 0, nr;
55     struct e820entry e820[E820MAX];
56 
57     if (!src || !map_limitkb || !nr_entries)
58         return ERROR_INVAL;
59 
60     nr = *nr_entries;
61     if (!nr)
62         return ERROR_INVAL;
63 
64     if (nr > E820MAX)
65         return ERROR_NOMEM;
66 
67     /* Weed out anything under 1MB */
68     for (i = 0; i < nr; i++) {
69         if (src[i].addr > 0x100000)
70             continue;
71 
72         src[i].type = 0;
73         src[i].size = 0;
74         src[i].addr = -1ULL;
75     }
76 
77     /* Find the lowest and highest entry in E820, skipping over
78      * undesired entries. */
79     start = -1ULL;
80     last = 0;
81     for (i = 0; i < nr; i++) {
82         if ((src[i].type == E820_RAM) ||
83             (src[i].type == E820_UNUSABLE) ||
84             (src[i].type == 0))
85             continue;
86 
87         start = src[i].addr < start ? src[i].addr : start;
88         last = src[i].addr + src[i].size > last ?
89                src[i].addr + src[i].size > last : last;
90     }
91     if (start > 1024)
92         start_kb = start >> 10;
93 
94     /* Add the memory RAM region for the guest */
95     e820[idx].addr = 0;
96     e820[idx].size = (uint64_t)map_limitkb << 10;
97     e820[idx].type = E820_RAM;
98 
99     /* .. and trim if neccessary */
100     if (start_kb && map_limitkb > start_kb) {
101         delta_kb = map_limitkb - start_kb;
102         if (delta_kb)
103             e820[idx].size -= (uint64_t)(delta_kb << 10);
104     }
105     /* Note: We don't touch balloon_kb here. Will add it at the end. */
106     ram_end = e820[idx].addr + e820[idx].size;
107     idx ++;
108 
109     LOG(DEBUG, "Memory: %"PRIu64"kB End of RAM: " \
110         "0x%"PRIx64" (PFN) Delta: %"PRIu64"kB, PCI start: %"PRIu64"kB " \
111         "(0x%"PRIx64" PFN), Balloon %"PRIu64"kB\n", (uint64_t)map_limitkb,
112         ram_end >> 12, delta_kb, start_kb ,start >> 12,
113         (uint64_t)balloon_kb);
114 
115 
116     /* This whole code below is to guard against if the Intel IGD is passed into
117      * the guest. If we don't pass in IGD, this whole code can be ignored.
118      *
119      * The reason for this code is that Intel boxes fill their E820 with
120      * E820_RAM amongst E820_RESERVED and we can't just ditch those E820_RAM.
121      * That is b/c any "gaps" in the E820 is considered PCI I/O space by
122      * Linux and it would be utilized by the Intel IGD as I/O space while
123      * in reality it was an RAM region.
124      *
125      * What this means is that we have to walk the E820 and for any region
126      * that is RAM and below 4GB and above ram_end, needs to change its type
127      * to E820_UNUSED. We also need to move some of the E820_RAM regions if
128      * the overlap with ram_end. */
129     for (i = 0; i < nr; i++) {
130         uint64_t end = src[i].addr + src[i].size;
131 
132         /* We don't care about E820_UNUSABLE, but we need to
133          * change the type to zero b/c the loop after this
134          * sticks E820_UNUSABLE on the guest's E820 but ignores
135          * the ones with type zero. */
136         if ((src[i].type == E820_UNUSABLE) ||
137             /* Any region that is within the "RAM region" can
138              * be safely ditched. */
139             (end < ram_end)) {
140                 src[i].type = 0;
141                 continue;
142         }
143 
144         /* Look only at RAM regions. */
145         if (src[i].type != E820_RAM)
146             continue;
147 
148         /* We only care about RAM regions below 4GB. */
149         if (src[i].addr >= (1ULL<<32))
150             continue;
151 
152         /* E820_RAM overlaps with our RAM region. Move it */
153         if (src[i].addr < ram_end) {
154             uint64_t delta;
155 
156             src[i].type = E820_UNUSABLE;
157             delta = ram_end - src[i].addr;
158             /* The end < ram_end should weed this out */
159             if (src[i].size < delta)
160                 src[i].type = 0;
161             else {
162                 src[i].size -= delta;
163                 src[i].addr = ram_end;
164             }
165             if (src[i].addr + src[i].size != end) {
166                 /* We messed up somewhere */
167                 src[i].type = 0;
168                 LOGE(ERROR, "Computed E820 wrongly. Continuing on.");
169             }
170         }
171         /* Lastly, convert the RAM to UNSUABLE. Look in the Linux kernel
172            at git commit 2f14ddc3a7146ea4cd5a3d1ecd993f85f2e4f948
173             "xen/setup: Inhibit resource API from using System RAM E820
174            gaps as PCI mem gaps" for full explanation. */
175         if (end > ram_end)
176             src[i].type = E820_UNUSABLE;
177     }
178 
179     /* Check if there is a region between ram_end and start. */
180     if (start > ram_end) {
181         int add_unusable = 1;
182         for (i = 0; i < nr && add_unusable; i++) {
183             if (src[i].type != E820_UNUSABLE)
184                 continue;
185             if (ram_end != src[i].addr)
186                 continue;
187             if (start != src[i].addr + src[i].size) {
188                 /* there is one, adjust it */
189                 src[i].size = start - src[i].addr;
190             }
191             add_unusable = 0;
192         }
193         /* .. and if not present, add it in. This is to guard against
194            the Linux guest assuming that the gap between the end of
195            RAM region and the start of the E820_[ACPI,NVS,RESERVED]
196            is PCI I/O space. Which it certainly is _not_. */
197         if (add_unusable) {
198             e820[idx].type = E820_UNUSABLE;
199             e820[idx].addr = ram_end;
200             e820[idx].size = start - ram_end;
201             idx++;
202         }
203     }
204     /* Almost done: copy them over, ignoring the undesireable ones */
205     for (i = 0; i < nr; i++) {
206         if ((src[i].type == E820_RAM) ||
207             (src[i].type == 0))
208             continue;
209 
210         e820[idx].type = src[i].type;
211         e820[idx].addr = src[i].addr;
212         e820[idx].size = src[i].size;
213         idx++;
214     }
215     /* At this point we have the mapped RAM + E820 entries from src. */
216     if (balloon_kb || delta_kb) {
217         /* and if we truncated the RAM region, then add it to the end. */
218         e820[idx].type = E820_RAM;
219         e820[idx].addr = (uint64_t)(1ULL << 32) > last ?
220                          (uint64_t)(1ULL << 32) : last;
221         /* also add the balloon memory to the end. */
222         e820[idx].size = (uint64_t)(delta_kb << 10) +
223                          (uint64_t)(balloon_kb << 10);
224         idx++;
225 
226     }
227     nr = idx;
228 
229     for (i = 0; i < nr; i++) {
230       LOG(DEBUG, ":\t[%"PRIx64" -> %"PRIx64"] %s", e820[i].addr >> 12,
231           (e820[i].addr + e820[i].size) >> 12, e820_names(e820[i].type));
232     }
233 
234     /* Done: copy the sanitized version. */
235     *nr_entries = nr;
236     memcpy(src, e820, nr * sizeof(struct e820entry));
237     return 0;
238 }
239 
e820_host_sanitize(libxl__gc * gc,libxl_domain_build_info * b_info,struct e820entry map[],uint32_t * nr)240 static int e820_host_sanitize(libxl__gc *gc,
241                               libxl_domain_build_info *b_info,
242                               struct e820entry map[],
243                               uint32_t *nr)
244 {
245     int rc;
246 
247     rc = xc_get_machine_memory_map(CTX->xch, map, *nr);
248     if (rc < 0)
249         return ERROR_FAIL;
250 
251     *nr = rc;
252 
253     rc = e820_sanitize(gc, map, nr, b_info->target_memkb,
254                        (b_info->max_memkb - b_info->target_memkb) +
255                        b_info->u.pv.slack_memkb);
256     return rc;
257 }
258 
libxl__e820_alloc(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config)259 static int libxl__e820_alloc(libxl__gc *gc, uint32_t domid,
260         libxl_domain_config *d_config)
261 {
262     libxl_ctx *ctx = libxl__gc_owner(gc);
263     int rc;
264     uint32_t nr;
265     struct e820entry map[E820MAX];
266     libxl_domain_build_info *b_info;
267 
268     if (d_config == NULL || d_config->c_info.type != LIBXL_DOMAIN_TYPE_PV)
269         return ERROR_INVAL;
270 
271     b_info = &d_config->b_info;
272     if (!libxl_defbool_val(b_info->u.pv.e820_host))
273         return ERROR_INVAL;
274 
275     nr = E820MAX;
276     rc = e820_host_sanitize(gc, b_info, map, &nr);
277     if (rc)
278         return ERROR_FAIL;
279 
280     rc = xc_domain_set_memory_map(ctx->xch, domid, map, nr);
281 
282     if (rc < 0)
283         return ERROR_FAIL;
284 
285     return 0;
286 }
287 
timer_mode(const libxl_domain_build_info * info)288 static unsigned long timer_mode(const libxl_domain_build_info *info)
289 {
290     const libxl_timer_mode mode = info->timer_mode;
291     assert(mode >= LIBXL_TIMER_MODE_DELAY_FOR_MISSED_TICKS &&
292            mode <= LIBXL_TIMER_MODE_ONE_MISSED_TICK_PENDING);
293     return ((unsigned long)mode);
294 }
295 
hvm_set_viridian_features(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info)296 static int hvm_set_viridian_features(libxl__gc *gc, uint32_t domid,
297                                      const libxl_domain_build_info *info)
298 {
299     libxl_bitmap enlightenments;
300     libxl_viridian_enlightenment v;
301     uint64_t mask = 0;
302 
303     libxl_bitmap_init(&enlightenments);
304     libxl_bitmap_alloc(CTX, &enlightenments,
305                        LIBXL_BUILDINFO_HVM_VIRIDIAN_ENABLE_DISABLE_WIDTH);
306 
307     if (libxl_defbool_val(info->u.hvm.viridian)) {
308         /* Enable defaults */
309         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE);
310         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ);
311         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT);
312         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST);
313         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL);
314     }
315 
316     libxl_for_each_set_bit(v, info->u.hvm.viridian_enable) {
317         if (libxl_bitmap_test(&info->u.hvm.viridian_disable, v)) {
318             LOG(ERROR, "%s group both enabled and disabled",
319                 libxl_viridian_enlightenment_to_string(v));
320             goto err;
321         }
322         if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
323             libxl_bitmap_set(&enlightenments, v);
324     }
325 
326     libxl_for_each_set_bit(v, info->u.hvm.viridian_disable)
327         if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
328             libxl_bitmap_reset(&enlightenments, v);
329 
330     /* The base set is a pre-requisite for all others */
331     if (!libxl_bitmap_is_empty(&enlightenments) &&
332         !libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
333         LOG(ERROR, "base group not enabled");
334         goto err;
335     }
336 
337     libxl_for_each_set_bit(v, enlightenments)
338         LOG(DETAIL, "%s group enabled", libxl_viridian_enlightenment_to_string(v));
339 
340     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
341         mask |= HVMPV_base_freq;
342 
343         if (!libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ))
344             mask |= HVMPV_no_freq;
345     }
346 
347     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT))
348         mask |= HVMPV_time_ref_count;
349 
350     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC))
351         mask |= HVMPV_reference_tsc;
352 
353     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_REMOTE_TLB_FLUSH))
354         mask |= HVMPV_hcall_remote_tlb_flush;
355 
356     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST))
357         mask |= HVMPV_apic_assist;
358 
359     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL))
360         mask |= HVMPV_crash_ctl;
361 
362     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_SYNIC))
363         mask |= HVMPV_synic;
364 
365     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_STIMER))
366         mask |= HVMPV_time_ref_count | HVMPV_synic | HVMPV_stimer;
367 
368     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_IPI))
369         mask |= HVMPV_hcall_ipi;
370 
371     if (mask != 0 &&
372         xc_hvm_param_set(CTX->xch,
373                          domid,
374                          HVM_PARAM_VIRIDIAN,
375                          mask) != 0) {
376         LOGE(ERROR, "Couldn't set viridian feature mask (0x%"PRIx64")", mask);
377         goto err;
378     }
379 
380     libxl_bitmap_dispose(&enlightenments);
381     return 0;
382 
383 err:
384     libxl_bitmap_dispose(&enlightenments);
385     return ERROR_FAIL;
386 }
387 
hvm_set_conf_params(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info)388 static int hvm_set_conf_params(libxl__gc *gc, uint32_t domid,
389                                const libxl_domain_build_info *info)
390 {
391     libxl_ctx *ctx = libxl__gc_owner(gc);
392     xc_interface *xch = ctx->xch;
393     int ret = ERROR_FAIL;
394     unsigned int altp2m = info->altp2m;
395 
396     switch(info->type) {
397     case LIBXL_DOMAIN_TYPE_HVM:
398         /* The config parameter "altp2m" replaces the parameter "altp2mhvm". For
399          * legacy reasons, both parameters are accepted on x86 HVM guests.
400          *
401          * If the legacy field info->u.hvm.altp2m is set, activate altp2m.
402          * Otherwise set altp2m based on the field info->altp2m. */
403         if (info->altp2m == LIBXL_ALTP2M_MODE_DISABLED &&
404             libxl_defbool_val(info->u.hvm.altp2m))
405             altp2m = libxl_defbool_val(info->u.hvm.altp2m);
406 
407         if (xc_hvm_param_set(xch, domid, HVM_PARAM_HPET_ENABLED,
408                              libxl_defbool_val(info->u.hvm.hpet))) {
409             LOG(ERROR, "Couldn't set HVM_PARAM_HPET_ENABLED");
410             goto out;
411         }
412         if (xc_hvm_param_set(xch, domid, HVM_PARAM_VPT_ALIGN,
413                              libxl_defbool_val(info->u.hvm.vpt_align))) {
414             LOG(ERROR, "Couldn't set HVM_PARAM_VPT_ALIGN");
415             goto out;
416         }
417         if (info->u.hvm.mca_caps &&
418             xc_hvm_param_set(CTX->xch, domid, HVM_PARAM_MCA_CAP,
419                              info->u.hvm.mca_caps)) {
420             LOG(ERROR, "Couldn't set HVM_PARAM_MCA_CAP");
421             goto out;
422         }
423 
424         /* Fallthrough */
425     case LIBXL_DOMAIN_TYPE_PVH:
426         if (xc_hvm_param_set(xch, domid, HVM_PARAM_TIMER_MODE,
427                              timer_mode(info))) {
428             LOG(ERROR, "Couldn't set HVM_PARAM_TIMER_MODE");
429             goto out;
430         }
431         if (xc_hvm_param_set(xch, domid, HVM_PARAM_NESTEDHVM,
432                              libxl_defbool_val(info->nested_hvm))) {
433             LOG(ERROR, "Couldn't set HVM_PARAM_NESTEDHVM");
434             goto out;
435         }
436         if (xc_hvm_param_set(xch, domid, HVM_PARAM_ALTP2M, altp2m)) {
437             LOG(ERROR, "Couldn't set HVM_PARAM_ALTP2M");
438             goto out;
439         }
440         break;
441 
442     default:
443         abort();
444     }
445 
446     ret = 0;
447 
448  out:
449     return ret;
450 }
451 
libxl__arch_domain_create(libxl__gc * gc,libxl_domain_config * d_config,uint32_t domid)452 int libxl__arch_domain_create(libxl__gc *gc, libxl_domain_config *d_config,
453         uint32_t domid)
454 {
455     const libxl_domain_build_info *info = &d_config->b_info;
456     int ret = 0;
457     int tsc_mode;
458     uint32_t rtc_timeoffset;
459     libxl_ctx *ctx = libxl__gc_owner(gc);
460 
461     if (info->type != LIBXL_DOMAIN_TYPE_PV &&
462         (ret = hvm_set_conf_params(gc, domid, info)) != 0)
463         goto out;
464 
465     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
466         (ret = hvm_set_viridian_features(gc, domid, info)) != 0)
467         goto out;
468 
469     if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PV)
470         xc_domain_set_memmap_limit(ctx->xch, domid,
471                                    (d_config->b_info.max_memkb +
472                                     d_config->b_info.u.pv.slack_memkb));
473 
474     switch (d_config->b_info.tsc_mode) {
475     case LIBXL_TSC_MODE_DEFAULT:
476         tsc_mode = 0;
477         break;
478     case LIBXL_TSC_MODE_ALWAYS_EMULATE:
479         tsc_mode = 1;
480         break;
481     case LIBXL_TSC_MODE_NATIVE:
482         tsc_mode = 2;
483         break;
484     case LIBXL_TSC_MODE_NATIVE_PARAVIRT:
485         LOGD(ERROR, domid, "TSC Mode native_paravirt (a.k.a PVRDTSCP) has been removed");
486         ret = ERROR_FEATURE_REMOVED;
487         goto out;
488     default:
489         abort();
490     }
491 
492     if (xc_domain_set_tsc_info(ctx->xch, domid, tsc_mode, 0, 0, 0)) {
493         LOGE(ERROR, "xc_domain_set_tsc_info() failed");
494         ret = ERROR_FAIL;
495         goto out;
496     }
497 
498     rtc_timeoffset = d_config->b_info.rtc_timeoffset;
499     if (libxl_defbool_val(d_config->b_info.localtime)) {
500         time_t t;
501         struct tm *tm, result;
502 
503         t = time(NULL);
504         tm = localtime_r(&t, &result);
505 
506         if (!tm) {
507             LOGED(ERROR, domid, "Failed to call localtime_r");
508             ret = ERROR_FAIL;
509             goto out;
510         }
511 
512         rtc_timeoffset += tm->tm_gmtoff;
513     }
514 
515     if (rtc_timeoffset)
516         xc_domain_set_time_offset(ctx->xch, domid, rtc_timeoffset);
517 
518     if (d_config->b_info.type != LIBXL_DOMAIN_TYPE_PV) {
519         unsigned long shadow = DIV_ROUNDUP(d_config->b_info.shadow_memkb,
520                                            1024);
521         xc_shadow_control(ctx->xch, domid, XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION,
522                           NULL, 0, &shadow, 0, NULL);
523     }
524 
525     if (d_config->c_info.type == LIBXL_DOMAIN_TYPE_PV &&
526             libxl_defbool_val(d_config->b_info.u.pv.e820_host)) {
527         ret = libxl__e820_alloc(gc, domid, d_config);
528         if (ret) {
529             LOGED(ERROR, domid, "Failed while collecting E820 with: %d (errno:%d)\n",
530                  ret, errno);
531         }
532     }
533 
534 out:
535     return ret;
536 }
537 
libxl__arch_extra_memory(libxl__gc * gc,const libxl_domain_build_info * info,uint64_t * out)538 int libxl__arch_extra_memory(libxl__gc *gc,
539                              const libxl_domain_build_info *info,
540                              uint64_t *out)
541 {
542     *out = LIBXL_MAXMEM_CONSTANT;
543 
544     return 0;
545 }
546 
libxl__arch_domain_init_hw_description(libxl__gc * gc,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)547 int libxl__arch_domain_init_hw_description(libxl__gc *gc,
548                                            libxl_domain_build_info *info,
549                                            libxl__domain_build_state *state,
550                                            struct xc_dom_image *dom)
551 {
552     return 0;
553 }
554 
libxl__arch_build_dom_finish(libxl__gc * gc,libxl_domain_build_info * info,struct xc_dom_image * dom,libxl__domain_build_state * state)555 int libxl__arch_build_dom_finish(libxl__gc *gc,
556                                  libxl_domain_build_info *info,
557                                  struct xc_dom_image *dom,
558                                  libxl__domain_build_state *state)
559 {
560     return 0;
561 }
562 
563 /* Return 0 on success, ERROR_* on failure. */
libxl__arch_vnuma_build_vmemrange(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * b_info,libxl__domain_build_state * state)564 int libxl__arch_vnuma_build_vmemrange(libxl__gc *gc,
565                                       uint32_t domid,
566                                       libxl_domain_build_info *b_info,
567                                       libxl__domain_build_state *state)
568 {
569     int nid, nr_vmemrange, rc;
570     uint32_t nr_e820, e820_count;
571     struct e820entry map[E820MAX];
572     xen_vmemrange_t *vmemranges;
573     unsigned int array_size;
574 
575     /* If e820_host is not set, call the generic function */
576     if (!(b_info->type == LIBXL_DOMAIN_TYPE_PV &&
577           libxl_defbool_val(b_info->u.pv.e820_host)))
578         return libxl__vnuma_build_vmemrange_pv_generic(gc, domid, b_info,
579                                                        state);
580 
581     assert(state->vmemranges == NULL);
582 
583     nr_e820 = E820MAX;
584     rc = e820_host_sanitize(gc, b_info, map, &nr_e820);
585     if (rc) goto out;
586 
587     e820_count = 0;
588     nr_vmemrange = 0;
589     vmemranges = NULL;
590     array_size = 0;
591     for (nid = 0; nid < b_info->num_vnuma_nodes; nid++) {
592         libxl_vnode_info *p = &b_info->vnuma_nodes[nid];
593         uint64_t remaining_bytes = (p->memkb << 10), bytes;
594 
595         while (remaining_bytes > 0) {
596             if (e820_count >= nr_e820) {
597                 rc = ERROR_NOMEM;
598                 goto out;
599             }
600 
601             /* Skip non RAM region */
602             if (map[e820_count].type != E820_RAM) {
603                 e820_count++;
604                 continue;
605             }
606 
607             if (nr_vmemrange >= array_size) {
608                 array_size += 32;
609                 GCREALLOC_ARRAY(vmemranges, array_size);
610             }
611 
612             bytes = map[e820_count].size >= remaining_bytes ?
613                 remaining_bytes : map[e820_count].size;
614 
615             vmemranges[nr_vmemrange].start = map[e820_count].addr;
616             vmemranges[nr_vmemrange].end = map[e820_count].addr + bytes;
617 
618             if (map[e820_count].size >= remaining_bytes) {
619                 map[e820_count].addr += bytes;
620                 map[e820_count].size -= bytes;
621             } else {
622                 e820_count++;
623             }
624 
625             remaining_bytes -= bytes;
626 
627             vmemranges[nr_vmemrange].flags = 0;
628             vmemranges[nr_vmemrange].nid = nid;
629             nr_vmemrange++;
630         }
631     }
632 
633     state->vmemranges = vmemranges;
634     state->num_vmemranges = nr_vmemrange;
635 
636     rc = 0;
637 out:
638     return rc;
639 }
640 
libxl__arch_domain_map_irq(libxl__gc * gc,uint32_t domid,int irq)641 int libxl__arch_domain_map_irq(libxl__gc *gc, uint32_t domid, int irq)
642 {
643     int ret;
644 
645     ret = xc_physdev_map_pirq(CTX->xch, domid, irq, &irq);
646     if (ret)
647         return ret;
648 
649     ret = xc_domain_irq_permission(CTX->xch, domid, irq, 1);
650 
651     return ret;
652 }
653 
654 /*
655  * Here we're just trying to set these kinds of e820 mappings:
656  *
657  * #1. Low memory region
658  *
659  * Low RAM starts at least from 1M to make sure all standard regions
660  * of the PC memory map, like BIOS, VGA memory-mapped I/O and vgabios,
661  * have enough space.
662  * Note: Those stuffs below 1M are still constructed with multiple
663  * e820 entries by hvmloader. At this point we don't change anything.
664  *
665  * #2. RDM region if it exists
666  *
667  * #3. High memory region if it exists
668  *
669  * Note: these regions are not overlapping since we already check
670  * to adjust them. Please refer to libxl__domain_device_construct_rdm().
671  */
672 #define GUEST_LOW_MEM_START_DEFAULT 0x100000
domain_construct_memmap(libxl__gc * gc,libxl_domain_config * d_config,uint32_t domid,struct xc_dom_image * dom)673 static int domain_construct_memmap(libxl__gc *gc,
674                                    libxl_domain_config *d_config,
675                                    uint32_t domid,
676                                    struct xc_dom_image *dom)
677 {
678     int rc = 0;
679     unsigned int nr = 0, i;
680     /* We always own at least one lowmem entry. */
681     unsigned int e820_entries = 1;
682     struct e820entry *e820 = NULL;
683     uint64_t highmem_size =
684                     dom->highmem_end ? dom->highmem_end - (1ull << 32) : 0;
685     uint32_t lowmem_start = dom->device_model ? GUEST_LOW_MEM_START_DEFAULT : 0;
686     unsigned page_size = XC_DOM_PAGE_SIZE(dom);
687 
688     /* Add all rdm entries. */
689     for (i = 0; i < d_config->num_rdms; i++)
690         if (d_config->rdms[i].policy != LIBXL_RDM_RESERVE_POLICY_INVALID)
691             e820_entries++;
692 
693     /* Add the HVM special pages to PVH memmap as RESERVED. */
694     if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PVH)
695         e820_entries++;
696 
697     /* If we should have a highmem range. */
698     if (highmem_size)
699         e820_entries++;
700 
701     for (i = 0; i < MAX_ACPI_MODULES; i++)
702         if (dom->acpi_modules[i].length)
703             e820_entries++;
704 
705     if (e820_entries >= E820MAX) {
706         LOGD(ERROR, domid, "Ooops! Too many entries in the memory map!");
707         rc = ERROR_INVAL;
708         goto out;
709     }
710 
711     e820 = libxl__malloc(gc, sizeof(struct e820entry) * e820_entries);
712 
713     /* Low memory */
714     e820[nr].addr = lowmem_start;
715     e820[nr].size = dom->lowmem_end - lowmem_start;
716     e820[nr].type = E820_RAM;
717     nr++;
718 
719     /* RDM mapping */
720     for (i = 0; i < d_config->num_rdms; i++) {
721         if (d_config->rdms[i].policy == LIBXL_RDM_RESERVE_POLICY_INVALID)
722             continue;
723 
724         e820[nr].addr = d_config->rdms[i].start;
725         e820[nr].size = d_config->rdms[i].size;
726         e820[nr].type = E820_RESERVED;
727         nr++;
728     }
729 
730     /* HVM special pages */
731     if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PVH) {
732         e820[nr].addr = (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES)
733                         << XC_PAGE_SHIFT;
734         e820[nr].size = X86_HVM_NR_SPECIAL_PAGES << XC_PAGE_SHIFT;
735         e820[nr].type = E820_RESERVED;
736         nr++;
737     }
738 
739     for (i = 0; i < MAX_ACPI_MODULES; i++) {
740         if (dom->acpi_modules[i].length) {
741             e820[nr].addr = dom->acpi_modules[i].guest_addr_out & ~(page_size - 1);
742             e820[nr].size = dom->acpi_modules[i].length +
743                 (dom->acpi_modules[i].guest_addr_out & (page_size - 1));
744             e820[nr].type = E820_ACPI;
745             nr++;
746         }
747     }
748 
749     /* High memory */
750     if (highmem_size) {
751         e820[nr].addr = ((uint64_t)1 << 32);
752         e820[nr].size = highmem_size;
753         e820[nr].type = E820_RAM;
754     }
755 
756     if (xc_domain_set_memory_map(CTX->xch, domid, e820, e820_entries) != 0) {
757         rc = ERROR_FAIL;
758         goto out;
759     }
760 
761     dom->e820 = e820;
762     dom->e820_entries = e820_entries;
763 
764 out:
765     return rc;
766 }
767 
libxl__arch_domain_finalise_hw_description(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,struct xc_dom_image * dom)768 int libxl__arch_domain_finalise_hw_description(libxl__gc *gc,
769                                                uint32_t domid,
770                                                libxl_domain_config *d_config,
771                                                struct xc_dom_image *dom)
772 {
773     libxl_domain_build_info *const info = &d_config->b_info;
774     int rc;
775 
776     if (info->type == LIBXL_DOMAIN_TYPE_PV)
777         return 0;
778 
779     if (info->type == LIBXL_DOMAIN_TYPE_PVH) {
780         rc = libxl__dom_load_acpi(gc, info, dom);
781         if (rc != 0) {
782             LOGE(ERROR, "libxl_dom_load_acpi failed");
783             return rc;
784         }
785     }
786 
787     rc = domain_construct_memmap(gc, d_config, domid, dom);
788     if (rc != 0)
789         LOGE(ERROR, "setting domain memory map failed");
790 
791     return rc;
792 }
793 
libxl__arch_domain_create_info_setdefault(libxl__gc * gc,libxl_domain_create_info * c_info)794 void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
795                                                libxl_domain_create_info *c_info)
796 {
797 }
798 
libxl__arch_domain_build_info_setdefault(libxl__gc * gc,libxl_domain_build_info * b_info)799 void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
800                                               libxl_domain_build_info *b_info)
801 {
802     libxl_defbool_setdefault(&b_info->acpi, true);
803 }
804 
libxl__arch_passthrough_mode_setdefault(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,const libxl_physinfo * physinfo)805 int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
806                                             uint32_t domid,
807                                             libxl_domain_config *d_config,
808                                             const libxl_physinfo *physinfo)
809 {
810     int rc;
811     libxl_domain_create_info *const c_info = &d_config->c_info;
812 
813     if (c_info->passthrough != LIBXL_PASSTHROUGH_DISABLED &&
814         c_info->type == LIBXL_DOMAIN_TYPE_PVH) {
815         LOGD(ERROR, domid,
816              "passthrough not yet supported for x86 PVH guests\n");
817         rc = ERROR_INVAL;
818         goto out;
819     }
820 
821     const char *whynot_pt_share =
822         c_info->type == LIBXL_DOMAIN_TYPE_PV ? "not valid for PV domain" :
823         !physinfo->cap_iommu_hap_pt_share ? "not supported on this platform" :
824         !libxl_defbool_val(d_config->c_info.hap) ?"only valid for HAP guests":
825         NULL;
826 
827     if (c_info->passthrough == LIBXL_PASSTHROUGH_ENABLED) {
828         c_info->passthrough = whynot_pt_share
829             ? LIBXL_PASSTHROUGH_SYNC_PT : LIBXL_PASSTHROUGH_SHARE_PT;
830     }
831 
832     if (c_info->passthrough == LIBXL_PASSTHROUGH_SHARE_PT && whynot_pt_share) {
833         LOGD(ERROR, domid,
834              "passthrough=\"share_pt\" %s\n",
835              whynot_pt_share);
836         rc = ERROR_INVAL;
837         goto out;
838     }
839 
840     rc = 0;
841  out:
842     return rc;
843 }
844 
845 
846 /*
847  * Local variables:
848  * mode: C
849  * c-basic-offset: 4
850  * indent-tabs-mode: nil
851  * End:
852  */
853