1 #include <assert.h>
2
3 #include "xc_sr_common_x86_pv.h"
4
pfn_to_mfn(const struct xc_sr_context * ctx,xen_pfn_t pfn)5 static xen_pfn_t pfn_to_mfn(const struct xc_sr_context *ctx, xen_pfn_t pfn)
6 {
7 assert(pfn <= ctx->x86.pv.max_pfn);
8
9 return xc_pfn_to_mfn(pfn, ctx->x86.pv.p2m, ctx->x86.pv.width);
10 }
11
12 /*
13 * Expand our local tracking information for the p2m table and domains maximum
14 * size. Normally this will be called once to expand from 0 to max_pfn, but
15 * is liable to expand multiple times if the domain grows on the sending side
16 * after migration has started.
17 */
expand_p2m(struct xc_sr_context * ctx,unsigned long max_pfn)18 static int expand_p2m(struct xc_sr_context *ctx, unsigned long max_pfn)
19 {
20 xc_interface *xch = ctx->xch;
21 unsigned long old_max = ctx->x86.pv.max_pfn, i;
22 unsigned int fpp = PAGE_SIZE / ctx->x86.pv.width;
23 unsigned long end_frame = (max_pfn / fpp) + 1;
24 unsigned long old_end_frame = (old_max / fpp) + 1;
25 xen_pfn_t *p2m = NULL, *p2m_pfns = NULL;
26 uint32_t *pfn_types = NULL;
27 size_t p2msz, p2m_pfnsz, pfn_typesz;
28
29 assert(max_pfn > old_max);
30
31 p2msz = (max_pfn + 1) * ctx->x86.pv.width;
32 p2m = realloc(ctx->x86.pv.p2m, p2msz);
33 if ( !p2m )
34 {
35 ERROR("Failed to (re)alloc %zu bytes for p2m", p2msz);
36 return -1;
37 }
38 ctx->x86.pv.p2m = p2m;
39
40 pfn_typesz = (max_pfn + 1) * sizeof(*pfn_types);
41 pfn_types = realloc(ctx->x86.pv.restore.pfn_types, pfn_typesz);
42 if ( !pfn_types )
43 {
44 ERROR("Failed to (re)alloc %zu bytes for pfn_types", pfn_typesz);
45 return -1;
46 }
47 ctx->x86.pv.restore.pfn_types = pfn_types;
48
49 p2m_pfnsz = (end_frame + 1) * sizeof(*p2m_pfns);
50 p2m_pfns = realloc(ctx->x86.pv.p2m_pfns, p2m_pfnsz);
51 if ( !p2m_pfns )
52 {
53 ERROR("Failed to (re)alloc %zu bytes for p2m frame list", p2m_pfnsz);
54 return -1;
55 }
56 ctx->x86.pv.p2m_frames = end_frame;
57 ctx->x86.pv.p2m_pfns = p2m_pfns;
58
59 ctx->x86.pv.max_pfn = max_pfn;
60 for ( i = (old_max ? old_max + 1 : 0); i <= max_pfn; ++i )
61 {
62 ctx->restore.ops.set_gfn(ctx, i, INVALID_MFN);
63 ctx->restore.ops.set_page_type(ctx, i, 0);
64 }
65
66 for ( i = (old_end_frame ? old_end_frame + 1 : 0); i <= end_frame; ++i )
67 ctx->x86.pv.p2m_pfns[i] = INVALID_MFN;
68
69 DPRINTF("Changed max_pfn from %#lx to %#lx", old_max, max_pfn);
70 return 0;
71 }
72
73 /*
74 * Pin all of the pagetables.
75 */
pin_pagetables(struct xc_sr_context * ctx)76 static int pin_pagetables(struct xc_sr_context *ctx)
77 {
78 xc_interface *xch = ctx->xch;
79 unsigned long i, nr_pins;
80 struct mmuext_op pin[MAX_PIN_BATCH];
81
82 for ( i = nr_pins = 0; i <= ctx->x86.pv.max_pfn; ++i )
83 {
84 if ( (ctx->x86.pv.restore.pfn_types[i] &
85 XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
86 continue;
87
88 switch ( (ctx->x86.pv.restore.pfn_types[i] &
89 XEN_DOMCTL_PFINFO_LTABTYPE_MASK) )
90 {
91 case XEN_DOMCTL_PFINFO_L1TAB:
92 pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
93 break;
94 case XEN_DOMCTL_PFINFO_L2TAB:
95 pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
96 break;
97 case XEN_DOMCTL_PFINFO_L3TAB:
98 pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
99 break;
100 case XEN_DOMCTL_PFINFO_L4TAB:
101 pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
102 break;
103 default:
104 continue;
105 }
106
107 pin[nr_pins].arg1.mfn = pfn_to_mfn(ctx, i);
108 nr_pins++;
109
110 if ( nr_pins == MAX_PIN_BATCH )
111 {
112 if ( xc_mmuext_op(xch, pin, nr_pins, ctx->domid) != 0 )
113 {
114 PERROR("Failed to pin batch of pagetables");
115 return -1;
116 }
117 nr_pins = 0;
118 }
119 }
120
121 if ( (nr_pins > 0) && (xc_mmuext_op(xch, pin, nr_pins, ctx->domid) < 0) )
122 {
123 PERROR("Failed to pin batch of pagetables");
124 return -1;
125 }
126
127 return 0;
128 }
129
130 /*
131 * Update details in a guests start_info structure.
132 */
process_start_info(struct xc_sr_context * ctx,vcpu_guest_context_any_t * vcpu)133 static int process_start_info(struct xc_sr_context *ctx,
134 vcpu_guest_context_any_t *vcpu)
135 {
136 xc_interface *xch = ctx->xch;
137 xen_pfn_t pfn, mfn;
138 start_info_any_t *guest_start_info = NULL;
139 int rc = -1;
140
141 pfn = GET_FIELD(vcpu, user_regs.edx, ctx->x86.pv.width);
142
143 if ( pfn > ctx->x86.pv.max_pfn )
144 {
145 ERROR("Start Info pfn %#lx out of range", pfn);
146 goto err;
147 }
148
149 if ( ctx->x86.pv.restore.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB )
150 {
151 ERROR("Start Info pfn %#lx has bad type %u", pfn,
152 (ctx->x86.pv.restore.pfn_types[pfn] >>
153 XEN_DOMCTL_PFINFO_LTAB_SHIFT));
154 goto err;
155 }
156
157 mfn = pfn_to_mfn(ctx, pfn);
158 if ( !mfn_in_pseudophysmap(ctx, mfn) )
159 {
160 ERROR("Start Info has bad mfn");
161 dump_bad_pseudophysmap_entry(ctx, mfn);
162 goto err;
163 }
164
165 SET_FIELD(vcpu, user_regs.edx, mfn, ctx->x86.pv.width);
166 guest_start_info = xc_map_foreign_range(
167 xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
168 if ( !guest_start_info )
169 {
170 PERROR("Failed to map Start Info at mfn %#lx", mfn);
171 goto err;
172 }
173
174 /* Deal with xenstore stuff */
175 pfn = GET_FIELD(guest_start_info, store_mfn, ctx->x86.pv.width);
176 if ( pfn > ctx->x86.pv.max_pfn )
177 {
178 ERROR("XenStore pfn %#lx out of range", pfn);
179 goto err;
180 }
181
182 mfn = pfn_to_mfn(ctx, pfn);
183 if ( !mfn_in_pseudophysmap(ctx, mfn) )
184 {
185 ERROR("XenStore pfn has bad mfn");
186 dump_bad_pseudophysmap_entry(ctx, mfn);
187 goto err;
188 }
189
190 ctx->restore.xenstore_gfn = mfn;
191 SET_FIELD(guest_start_info, store_mfn, mfn, ctx->x86.pv.width);
192 SET_FIELD(guest_start_info, store_evtchn,
193 ctx->restore.xenstore_evtchn, ctx->x86.pv.width);
194
195 /* Deal with console stuff */
196 pfn = GET_FIELD(guest_start_info, console.domU.mfn, ctx->x86.pv.width);
197 if ( pfn > ctx->x86.pv.max_pfn )
198 {
199 ERROR("Console pfn %#lx out of range", pfn);
200 goto err;
201 }
202
203 mfn = pfn_to_mfn(ctx, pfn);
204 if ( !mfn_in_pseudophysmap(ctx, mfn) )
205 {
206 ERROR("Console pfn has bad mfn");
207 dump_bad_pseudophysmap_entry(ctx, mfn);
208 goto err;
209 }
210
211 ctx->restore.console_gfn = mfn;
212 SET_FIELD(guest_start_info, console.domU.mfn, mfn, ctx->x86.pv.width);
213 SET_FIELD(guest_start_info, console.domU.evtchn,
214 ctx->restore.console_evtchn, ctx->x86.pv.width);
215
216 /* Set other information */
217 SET_FIELD(guest_start_info, nr_pages,
218 ctx->x86.pv.max_pfn + 1, ctx->x86.pv.width);
219 SET_FIELD(guest_start_info, shared_info,
220 ctx->dominfo.shared_info_frame << PAGE_SHIFT, ctx->x86.pv.width);
221 SET_FIELD(guest_start_info, flags, 0, ctx->x86.pv.width);
222
223 rc = 0;
224
225 err:
226 if ( guest_start_info )
227 munmap(guest_start_info, PAGE_SIZE);
228
229 return rc;
230 }
231
232 /*
233 * Process one stashed vcpu worth of basic state and send to Xen.
234 */
process_vcpu_basic(struct xc_sr_context * ctx,unsigned int vcpuid)235 static int process_vcpu_basic(struct xc_sr_context *ctx,
236 unsigned int vcpuid)
237 {
238 xc_interface *xch = ctx->xch;
239 vcpu_guest_context_any_t *vcpu = ctx->x86.pv.restore.vcpus[vcpuid].basic.ptr;
240 xen_pfn_t pfn, mfn;
241 unsigned int i, gdt_count;
242 int rc = -1;
243
244 /* Vcpu 0 is special: Convert the suspend record to an mfn. */
245 if ( vcpuid == 0 )
246 {
247 rc = process_start_info(ctx, vcpu);
248 if ( rc )
249 return rc;
250 rc = -1;
251 }
252
253 SET_FIELD(vcpu, flags,
254 GET_FIELD(vcpu, flags, ctx->x86.pv.width) | VGCF_online,
255 ctx->x86.pv.width);
256
257 gdt_count = GET_FIELD(vcpu, gdt_ents, ctx->x86.pv.width);
258 if ( gdt_count > FIRST_RESERVED_GDT_ENTRY )
259 {
260 ERROR("GDT entry count (%u) out of range (max %u)",
261 gdt_count, FIRST_RESERVED_GDT_ENTRY);
262 errno = ERANGE;
263 goto err;
264 }
265 gdt_count = (gdt_count + 511) / 512; /* gdt_count now in units of frames. */
266
267 /* Convert GDT frames to mfns. */
268 for ( i = 0; i < gdt_count; ++i )
269 {
270 pfn = GET_FIELD(vcpu, gdt_frames[i], ctx->x86.pv.width);
271 if ( pfn > ctx->x86.pv.max_pfn )
272 {
273 ERROR("GDT frame %u (pfn %#lx) out of range", i, pfn);
274 goto err;
275 }
276
277 if ( (ctx->x86.pv.restore.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
278 {
279 ERROR("GDT frame %u (pfn %#lx) has bad type %u", i, pfn,
280 (ctx->x86.pv.restore.pfn_types[pfn] >>
281 XEN_DOMCTL_PFINFO_LTAB_SHIFT));
282 goto err;
283 }
284
285 mfn = pfn_to_mfn(ctx, pfn);
286 if ( !mfn_in_pseudophysmap(ctx, mfn) )
287 {
288 ERROR("GDT frame %u has bad mfn", i);
289 dump_bad_pseudophysmap_entry(ctx, mfn);
290 goto err;
291 }
292
293 SET_FIELD(vcpu, gdt_frames[i], mfn, ctx->x86.pv.width);
294 }
295
296 /* Convert CR3 to an mfn. */
297 pfn = cr3_to_mfn(ctx, GET_FIELD(vcpu, ctrlreg[3], ctx->x86.pv.width));
298 if ( pfn > ctx->x86.pv.max_pfn )
299 {
300 ERROR("cr3 (pfn %#lx) out of range", pfn);
301 goto err;
302 }
303
304 if ( (ctx->x86.pv.restore.pfn_types[pfn] &
305 XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
306 (((xen_pfn_t)ctx->x86.pv.levels) << XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
307 {
308 ERROR("cr3 (pfn %#lx) has bad type %u, expected %u", pfn,
309 (ctx->x86.pv.restore.pfn_types[pfn] >>
310 XEN_DOMCTL_PFINFO_LTAB_SHIFT),
311 ctx->x86.pv.levels);
312 goto err;
313 }
314
315 mfn = pfn_to_mfn(ctx, pfn);
316 if ( !mfn_in_pseudophysmap(ctx, mfn) )
317 {
318 ERROR("cr3 has bad mfn");
319 dump_bad_pseudophysmap_entry(ctx, mfn);
320 goto err;
321 }
322
323 SET_FIELD(vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86.pv.width);
324
325 /* 64bit guests: Convert CR1 (guest pagetables) to mfn. */
326 if ( ctx->x86.pv.levels == 4 && (vcpu->x64.ctrlreg[1] & 1) )
327 {
328 pfn = vcpu->x64.ctrlreg[1] >> PAGE_SHIFT;
329
330 if ( pfn > ctx->x86.pv.max_pfn )
331 {
332 ERROR("cr1 (pfn %#lx) out of range", pfn);
333 goto err;
334 }
335
336 if ( (ctx->x86.pv.restore.pfn_types[pfn] &
337 XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
338 (((xen_pfn_t)ctx->x86.pv.levels) << XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
339 {
340 ERROR("cr1 (pfn %#lx) has bad type %u, expected %u", pfn,
341 (ctx->x86.pv.restore.pfn_types[pfn] >>
342 XEN_DOMCTL_PFINFO_LTAB_SHIFT),
343 ctx->x86.pv.levels);
344 goto err;
345 }
346
347 mfn = pfn_to_mfn(ctx, pfn);
348 if ( !mfn_in_pseudophysmap(ctx, mfn) )
349 {
350 ERROR("cr1 has bad mfn");
351 dump_bad_pseudophysmap_entry(ctx, mfn);
352 goto err;
353 }
354
355 vcpu->x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
356 }
357
358 if ( xc_vcpu_setcontext(xch, ctx->domid, vcpuid, vcpu) )
359 {
360 PERROR("Failed to set vcpu%u's basic info", vcpuid);
361 goto err;
362 }
363
364 rc = 0;
365
366 err:
367 return rc;
368 }
369
370 /*
371 * Process one stashed vcpu worth of extended state and send to Xen.
372 */
process_vcpu_extended(struct xc_sr_context * ctx,unsigned int vcpuid)373 static int process_vcpu_extended(struct xc_sr_context *ctx,
374 unsigned int vcpuid)
375 {
376 xc_interface *xch = ctx->xch;
377 struct xc_sr_x86_pv_restore_vcpu *vcpu =
378 &ctx->x86.pv.restore.vcpus[vcpuid];
379 DECLARE_DOMCTL;
380
381 domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
382 domctl.domain = ctx->domid;
383 memcpy(&domctl.u.ext_vcpucontext, vcpu->extd.ptr, vcpu->extd.size);
384
385 if ( xc_domctl(xch, &domctl) != 0 )
386 {
387 PERROR("Failed to set vcpu%u's extended info", vcpuid);
388 return -1;
389 }
390
391 return 0;
392 }
393
394 /*
395 * Process one stashed vcpu worth of xsave state and send to Xen.
396 */
process_vcpu_xsave(struct xc_sr_context * ctx,unsigned int vcpuid)397 static int process_vcpu_xsave(struct xc_sr_context *ctx,
398 unsigned int vcpuid)
399 {
400 xc_interface *xch = ctx->xch;
401 struct xc_sr_x86_pv_restore_vcpu *vcpu =
402 &ctx->x86.pv.restore.vcpus[vcpuid];
403 int rc;
404 DECLARE_DOMCTL;
405 DECLARE_HYPERCALL_BUFFER(void, buffer);
406
407 buffer = xc_hypercall_buffer_alloc(xch, buffer, vcpu->xsave.size);
408 if ( !buffer )
409 {
410 ERROR("Unable to allocate %zu bytes for xsave hypercall buffer",
411 vcpu->xsave.size);
412 return -1;
413 }
414
415 domctl.cmd = XEN_DOMCTL_setvcpuextstate;
416 domctl.domain = ctx->domid;
417 domctl.u.vcpuextstate.vcpu = vcpuid;
418 domctl.u.vcpuextstate.size = vcpu->xsave.size;
419 set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
420
421 memcpy(buffer, vcpu->xsave.ptr, vcpu->xsave.size);
422
423 rc = xc_domctl(xch, &domctl);
424 if ( rc )
425 PERROR("Failed to set vcpu%u's xsave info", vcpuid);
426
427 xc_hypercall_buffer_free(xch, buffer);
428
429 return rc;
430 }
431
432 /*
433 * Process one stashed vcpu worth of msr state and send to Xen.
434 */
process_vcpu_msrs(struct xc_sr_context * ctx,unsigned int vcpuid)435 static int process_vcpu_msrs(struct xc_sr_context *ctx,
436 unsigned int vcpuid)
437 {
438 xc_interface *xch = ctx->xch;
439 struct xc_sr_x86_pv_restore_vcpu *vcpu =
440 &ctx->x86.pv.restore.vcpus[vcpuid];
441 int rc;
442 DECLARE_DOMCTL;
443 DECLARE_HYPERCALL_BUFFER(void, buffer);
444
445 buffer = xc_hypercall_buffer_alloc(xch, buffer, vcpu->msr.size);
446 if ( !buffer )
447 {
448 ERROR("Unable to allocate %zu bytes for msr hypercall buffer",
449 vcpu->msr.size);
450 return -1;
451 }
452
453 domctl.cmd = XEN_DOMCTL_set_vcpu_msrs;
454 domctl.domain = ctx->domid;
455 domctl.u.vcpu_msrs.vcpu = vcpuid;
456 domctl.u.vcpu_msrs.msr_count = vcpu->msr.size / sizeof(xen_domctl_vcpu_msr_t);
457 set_xen_guest_handle(domctl.u.vcpu_msrs.msrs, buffer);
458
459 memcpy(buffer, vcpu->msr.ptr, vcpu->msr.size);
460
461 rc = xc_domctl(xch, &domctl);
462 if ( rc )
463 PERROR("Failed to set vcpu%u's msrs", vcpuid);
464
465 xc_hypercall_buffer_free(xch, buffer);
466
467 return rc;
468 }
469
470 /*
471 * Process all stashed vcpu context and send to Xen.
472 */
update_vcpu_context(struct xc_sr_context * ctx)473 static int update_vcpu_context(struct xc_sr_context *ctx)
474 {
475 xc_interface *xch = ctx->xch;
476 struct xc_sr_x86_pv_restore_vcpu *vcpu;
477 unsigned int i;
478 int rc = 0;
479
480 for ( i = 0; i < ctx->x86.pv.restore.nr_vcpus; ++i )
481 {
482 vcpu = &ctx->x86.pv.restore.vcpus[i];
483
484 if ( vcpu->basic.ptr )
485 {
486 rc = process_vcpu_basic(ctx, i);
487 if ( rc )
488 return rc;
489 }
490 else if ( i == 0 )
491 {
492 ERROR("Sender didn't send vcpu0's basic state");
493 return -1;
494 }
495
496 if ( vcpu->extd.ptr )
497 {
498 rc = process_vcpu_extended(ctx, i);
499 if ( rc )
500 return rc;
501 }
502
503 if ( vcpu->xsave.ptr )
504 {
505 rc = process_vcpu_xsave(ctx, i);
506 if ( rc )
507 return rc;
508 }
509
510 if ( vcpu->msr.ptr )
511 {
512 rc = process_vcpu_msrs(ctx, i);
513 if ( rc )
514 return rc;
515 }
516 }
517
518 return rc;
519 }
520
521 /*
522 * Copy the p2m which has been constructed locally as memory has been
523 * allocated, over the p2m in guest, so the guest can find its memory again on
524 * resume.
525 */
update_guest_p2m(struct xc_sr_context * ctx)526 static int update_guest_p2m(struct xc_sr_context *ctx)
527 {
528 xc_interface *xch = ctx->xch;
529 xen_pfn_t mfn, pfn, *guest_p2m = NULL;
530 unsigned int i;
531 int rc = -1;
532
533 for ( i = 0; i < ctx->x86.pv.p2m_frames; ++i )
534 {
535 pfn = ctx->x86.pv.p2m_pfns[i];
536
537 if ( pfn > ctx->x86.pv.max_pfn )
538 {
539 ERROR("pfn (%#lx) for p2m_frame_list[%u] out of range",
540 pfn, i);
541 goto err;
542 }
543
544 if ( (ctx->x86.pv.restore.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
545 {
546 ERROR("pfn (%#lx) for p2m_frame_list[%u] has bad type %u", pfn, i,
547 (ctx->x86.pv.restore.pfn_types[pfn] >>
548 XEN_DOMCTL_PFINFO_LTAB_SHIFT));
549 goto err;
550 }
551
552 mfn = pfn_to_mfn(ctx, pfn);
553 if ( !mfn_in_pseudophysmap(ctx, mfn) )
554 {
555 ERROR("p2m_frame_list[%u] has bad mfn", i);
556 dump_bad_pseudophysmap_entry(ctx, mfn);
557 goto err;
558 }
559
560 ctx->x86.pv.p2m_pfns[i] = mfn;
561 }
562
563 guest_p2m = xc_map_foreign_pages(xch, ctx->domid, PROT_WRITE,
564 ctx->x86.pv.p2m_pfns,
565 ctx->x86.pv.p2m_frames);
566 if ( !guest_p2m )
567 {
568 PERROR("Failed to map p2m frames");
569 goto err;
570 }
571
572 memcpy(guest_p2m, ctx->x86.pv.p2m,
573 (ctx->x86.pv.max_pfn + 1) * ctx->x86.pv.width);
574 rc = 0;
575
576 err:
577 if ( guest_p2m )
578 munmap(guest_p2m, ctx->x86.pv.p2m_frames * PAGE_SIZE);
579
580 return rc;
581 }
582
583 /*
584 * The valid width/pt_levels values in X86_PV_INFO are inextricably linked.
585 * Cross-check the legitimate combinations.
586 */
valid_x86_pv_info_combination(const struct xc_sr_rec_x86_pv_info * info)587 static bool valid_x86_pv_info_combination(
588 const struct xc_sr_rec_x86_pv_info *info)
589 {
590 switch ( info->guest_width )
591 {
592 case 4: return info->pt_levels == 3;
593 case 8: return info->pt_levels == 4;
594 default: return false;
595 }
596 }
597
598 /*
599 * Process an X86_PV_INFO record.
600 */
handle_x86_pv_info(struct xc_sr_context * ctx,struct xc_sr_record * rec)601 static int handle_x86_pv_info(struct xc_sr_context *ctx,
602 struct xc_sr_record *rec)
603 {
604 xc_interface *xch = ctx->xch;
605 struct xc_sr_rec_x86_pv_info *info = rec->data;
606
607 if ( ctx->x86.pv.restore.seen_pv_info )
608 {
609 ERROR("Already received X86_PV_INFO record");
610 return -1;
611 }
612
613 if ( rec->length < sizeof(*info) )
614 {
615 ERROR("X86_PV_INFO record truncated: length %u, expected %zu",
616 rec->length, sizeof(*info));
617 return -1;
618 }
619
620 if ( !valid_x86_pv_info_combination(info) )
621 {
622 ERROR("Invalid X86_PV_INFO combination: width %u, pt_levels %u",
623 info->guest_width, info->pt_levels);
624 return -1;
625 }
626
627 /*
628 * PV domains default to native width. For an incomming compat domain, we
629 * will typically be the first entity to inform Xen.
630 */
631 if ( info->guest_width != ctx->x86.pv.width )
632 {
633 struct xen_domctl domctl = {
634 .domain = ctx->domid,
635 .cmd = XEN_DOMCTL_set_address_size,
636 .u.address_size.size = info->guest_width * 8,
637 };
638 int rc = do_domctl(xch, &domctl);
639
640 if ( rc != 0 )
641 {
642 ERROR("Failed to update d%d address size to %u",
643 ctx->domid, info->guest_width * 8);
644 return -1;
645 }
646
647 /* Domain's information changed, better to refresh. */
648 rc = x86_pv_domain_info(ctx);
649 if ( rc != 0 )
650 {
651 ERROR("Unable to refresh guest information");
652 return -1;
653 }
654 }
655
656 /* Sanity check (possibly new) domain settings. */
657 if ( (info->guest_width != ctx->x86.pv.width) ||
658 (info->pt_levels != ctx->x86.pv.levels) )
659 {
660 ERROR("X86_PV_INFO width/pt_levels settings %u/%u mismatch with d%d %u/%u",
661 info->guest_width, info->pt_levels, ctx->domid,
662 ctx->x86.pv.width, ctx->x86.pv.levels);
663 return -1;
664 }
665
666 ctx->x86.pv.restore.seen_pv_info = true;
667 return 0;
668 }
669
670 /*
671 * Process an X86_PV_P2M_FRAMES record. Takes care of expanding the local p2m
672 * state if needed.
673 */
handle_x86_pv_p2m_frames(struct xc_sr_context * ctx,struct xc_sr_record * rec)674 static int handle_x86_pv_p2m_frames(struct xc_sr_context *ctx,
675 struct xc_sr_record *rec)
676 {
677 xc_interface *xch = ctx->xch;
678 struct xc_sr_rec_x86_pv_p2m_frames *data = rec->data;
679 unsigned int start, end, x, fpp = PAGE_SIZE / ctx->x86.pv.width;
680 int rc;
681
682 /* v2 compat. Infer the position of STATIC_DATA_END. */
683 if ( ctx->restore.format_version < 3 && !ctx->restore.seen_static_data_end )
684 {
685 rc = handle_static_data_end(ctx);
686 if ( rc )
687 {
688 ERROR("Inferred STATIC_DATA_END record failed");
689 return rc;
690 }
691 }
692
693 if ( !ctx->restore.seen_static_data_end )
694 {
695 ERROR("No STATIC_DATA_END seen");
696 return -1;
697 }
698
699 if ( !ctx->x86.pv.restore.seen_pv_info )
700 {
701 ERROR("Not yet received X86_PV_INFO record");
702 return -1;
703 }
704
705 if ( rec->length < sizeof(*data) )
706 {
707 ERROR("X86_PV_P2M_FRAMES record truncated: length %u, min %zu",
708 rec->length, sizeof(*data) + sizeof(uint64_t));
709 return -1;
710 }
711
712 if ( data->start_pfn > data->end_pfn )
713 {
714 ERROR("End pfn in stream (%#x) exceeds Start (%#x)",
715 data->end_pfn, data->start_pfn);
716 return -1;
717 }
718
719 start = data->start_pfn / fpp;
720 end = data->end_pfn / fpp + 1;
721
722 if ( rec->length != sizeof(*data) + ((end - start) * sizeof(uint64_t)) )
723 {
724 ERROR("X86_PV_P2M_FRAMES record wrong size: start_pfn %#x"
725 ", end_pfn %#x, length %u, expected %zu + (%u - %u) * %zu",
726 data->start_pfn, data->end_pfn, rec->length,
727 sizeof(*data), end, start, sizeof(uint64_t));
728 return -1;
729 }
730
731 if ( data->end_pfn > ctx->x86.pv.max_pfn )
732 {
733 rc = expand_p2m(ctx, data->end_pfn);
734 if ( rc )
735 return rc;
736 }
737
738 for ( x = 0; x < (end - start); ++x )
739 ctx->x86.pv.p2m_pfns[start + x] = data->p2m_pfns[x];
740
741 return 0;
742 }
743
744 /*
745 * Processes X86_PV_VCPU_{BASIC,EXTENDED,XSAVE,MSRS} records from the stream.
746 * The blobs are all stashed to one side as they need to be deferred until the
747 * very end of the stream, rather than being send to Xen at the point they
748 * arrive in the stream. It performs all pre-hypercall size validation.
749 */
handle_x86_pv_vcpu_blob(struct xc_sr_context * ctx,struct xc_sr_record * rec)750 static int handle_x86_pv_vcpu_blob(struct xc_sr_context *ctx,
751 struct xc_sr_record *rec)
752 {
753 xc_interface *xch = ctx->xch;
754 struct xc_sr_rec_x86_pv_vcpu_hdr *vhdr = rec->data;
755 struct xc_sr_x86_pv_restore_vcpu *vcpu;
756 const char *rec_name;
757 size_t blobsz;
758 struct xc_sr_blob *blob = NULL;
759 int rc = -1;
760
761 switch ( rec->type )
762 {
763 case REC_TYPE_X86_PV_VCPU_BASIC:
764 rec_name = "X86_PV_VCPU_BASIC";
765 break;
766
767 case REC_TYPE_X86_PV_VCPU_EXTENDED:
768 rec_name = "X86_PV_VCPU_EXTENDED";
769 break;
770
771 case REC_TYPE_X86_PV_VCPU_XSAVE:
772 rec_name = "X86_PV_VCPU_XSAVE";
773 break;
774
775 case REC_TYPE_X86_PV_VCPU_MSRS:
776 rec_name = "X86_PV_VCPU_MSRS";
777 break;
778
779 default:
780 ERROR("Unrecognised vcpu blob record %s (%u)",
781 rec_type_to_str(rec->type), rec->type);
782 goto out;
783 }
784
785 /* Confirm that there is a complete header. */
786 if ( rec->length < sizeof(*vhdr) )
787 {
788 ERROR("%s record truncated: length %u, header size %zu",
789 rec_name, rec->length, sizeof(*vhdr));
790 goto out;
791 }
792
793 blobsz = rec->length - sizeof(*vhdr);
794
795 /*
796 * Tolerate empty records. Older sending sides used to accidentally
797 * generate them.
798 */
799 if ( blobsz == 0 )
800 {
801 DBGPRINTF("Skipping empty %s record for vcpu %u\n",
802 rec_type_to_str(rec->type), vhdr->vcpu_id);
803 rc = 0;
804 goto out;
805 }
806
807 /* Check that the vcpu id is within range. */
808 if ( vhdr->vcpu_id >= ctx->x86.pv.restore.nr_vcpus )
809 {
810 ERROR("%s record vcpu_id (%u) exceeds domain max (%u)",
811 rec_name, vhdr->vcpu_id, ctx->x86.pv.restore.nr_vcpus - 1);
812 goto out;
813 }
814
815 vcpu = &ctx->x86.pv.restore.vcpus[vhdr->vcpu_id];
816
817 /* Further per-record checks, where possible. */
818 switch ( rec->type )
819 {
820 case REC_TYPE_X86_PV_VCPU_BASIC:
821 {
822 size_t vcpusz = ctx->x86.pv.width == 8 ?
823 sizeof(vcpu_guest_context_x86_64_t) :
824 sizeof(vcpu_guest_context_x86_32_t);
825
826 if ( blobsz != vcpusz )
827 {
828 ERROR("%s record wrong size: expected %zu, got %u",
829 rec_name, sizeof(*vhdr) + vcpusz, rec->length);
830 goto out;
831 }
832 blob = &vcpu->basic;
833 break;
834 }
835
836 case REC_TYPE_X86_PV_VCPU_EXTENDED:
837 if ( blobsz > 128 )
838 {
839 ERROR("%s record too long: max %zu, got %u",
840 rec_name, sizeof(*vhdr) + 128, rec->length);
841 goto out;
842 }
843 blob = &vcpu->extd;
844 break;
845
846 case REC_TYPE_X86_PV_VCPU_XSAVE:
847 if ( blobsz < 16 )
848 {
849 ERROR("%s record too short: min %zu, got %u",
850 rec_name, sizeof(*vhdr) + 16, rec->length);
851 goto out;
852 }
853 blob = &vcpu->xsave;
854 break;
855
856 case REC_TYPE_X86_PV_VCPU_MSRS:
857 if ( blobsz % sizeof(xen_domctl_vcpu_msr_t) != 0 )
858 {
859 ERROR("%s record payload size %zu expected to be a multiple of %zu",
860 rec_name, blobsz, sizeof(xen_domctl_vcpu_msr_t));
861 goto out;
862 }
863 blob = &vcpu->msr;
864 break;
865 }
866
867 rc = update_blob(blob, vhdr->context, blobsz);
868 if ( rc )
869 ERROR("Unable to allocate %zu bytes for vcpu%u %s blob",
870 blobsz, vhdr->vcpu_id, rec_name);
871
872 out:
873 return rc;
874 }
875
876 /*
877 * Process a SHARED_INFO record from the stream.
878 */
handle_shared_info(struct xc_sr_context * ctx,struct xc_sr_record * rec)879 static int handle_shared_info(struct xc_sr_context *ctx,
880 struct xc_sr_record *rec)
881 {
882 xc_interface *xch = ctx->xch;
883 unsigned int i;
884 int rc = -1;
885 shared_info_any_t *guest_shinfo = NULL;
886 const shared_info_any_t *old_shinfo = rec->data;
887
888 if ( !ctx->x86.pv.restore.seen_pv_info )
889 {
890 ERROR("Not yet received X86_PV_INFO record");
891 return -1;
892 }
893
894 if ( rec->length != PAGE_SIZE )
895 {
896 ERROR("X86_PV_SHARED_INFO record wrong size: length %u"
897 ", expected 4096", rec->length);
898 goto err;
899 }
900
901 guest_shinfo = xc_map_foreign_range(
902 xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
903 ctx->dominfo.shared_info_frame);
904 if ( !guest_shinfo )
905 {
906 PERROR("Failed to map Shared Info at mfn %#lx",
907 ctx->dominfo.shared_info_frame);
908 goto err;
909 }
910
911 MEMCPY_FIELD(guest_shinfo, old_shinfo, vcpu_info, ctx->x86.pv.width);
912 MEMCPY_FIELD(guest_shinfo, old_shinfo, arch, ctx->x86.pv.width);
913
914 SET_FIELD(guest_shinfo, arch.pfn_to_mfn_frame_list_list,
915 0, ctx->x86.pv.width);
916
917 MEMSET_ARRAY_FIELD(guest_shinfo, evtchn_pending, 0, ctx->x86.pv.width);
918 for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
919 SET_FIELD(guest_shinfo, vcpu_info[i].evtchn_pending_sel,
920 0, ctx->x86.pv.width);
921
922 MEMSET_ARRAY_FIELD(guest_shinfo, evtchn_mask, 0xff, ctx->x86.pv.width);
923
924 rc = 0;
925
926 err:
927 if ( guest_shinfo )
928 munmap(guest_shinfo, PAGE_SIZE);
929
930 return rc;
931 }
932
933 /* restore_ops function. */
x86_pv_pfn_is_valid(const struct xc_sr_context * ctx,xen_pfn_t pfn)934 static bool x86_pv_pfn_is_valid(const struct xc_sr_context *ctx, xen_pfn_t pfn)
935 {
936 return pfn <= ctx->x86.pv.max_pfn;
937 }
938
939 /* restore_ops function. */
x86_pv_set_page_type(struct xc_sr_context * ctx,xen_pfn_t pfn,unsigned long type)940 static void x86_pv_set_page_type(struct xc_sr_context *ctx, xen_pfn_t pfn,
941 unsigned long type)
942 {
943 assert(pfn <= ctx->x86.pv.max_pfn);
944
945 ctx->x86.pv.restore.pfn_types[pfn] = type;
946 }
947
948 /* restore_ops function. */
x86_pv_set_gfn(struct xc_sr_context * ctx,xen_pfn_t pfn,xen_pfn_t mfn)949 static void x86_pv_set_gfn(struct xc_sr_context *ctx, xen_pfn_t pfn,
950 xen_pfn_t mfn)
951 {
952 assert(pfn <= ctx->x86.pv.max_pfn);
953
954 if ( ctx->x86.pv.width == sizeof(uint64_t) )
955 /* 64 bit guest. Need to expand INVALID_MFN for 32 bit toolstacks. */
956 ((uint64_t *)ctx->x86.pv.p2m)[pfn] = mfn == INVALID_MFN ? ~0ULL : mfn;
957 else
958 /* 32 bit guest. Can truncate INVALID_MFN for 64 bit toolstacks. */
959 ((uint32_t *)ctx->x86.pv.p2m)[pfn] = mfn;
960 }
961
962 /*
963 * restore_ops function. Convert pfns back to mfns in pagetables. Possibly
964 * needs to populate new frames if a PTE is found referring to a frame which
965 * hasn't yet been seen from PAGE_DATA records.
966 */
x86_pv_localise_page(struct xc_sr_context * ctx,uint32_t type,void * page)967 static int x86_pv_localise_page(struct xc_sr_context *ctx,
968 uint32_t type, void *page)
969 {
970 xc_interface *xch = ctx->xch;
971 uint64_t *table = page;
972 uint64_t pte;
973 unsigned int i, to_populate;
974 xen_pfn_t pfns[(PAGE_SIZE / sizeof(uint64_t))];
975
976 type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
977
978 /* Only page tables need localisation. */
979 if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
980 return 0;
981
982 /* Check to see whether we need to populate any new frames. */
983 for ( i = 0, to_populate = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
984 {
985 pte = table[i];
986
987 if ( pte & _PAGE_PRESENT )
988 {
989 xen_pfn_t pfn = pte_to_frame(pte);
990
991 #ifdef __i386__
992 if ( pfn == INVALID_MFN )
993 {
994 ERROR("PTE truncation detected. L%u[%u] = %016"PRIx64,
995 type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
996 errno = E2BIG;
997 return -1;
998 }
999 #endif
1000
1001 if ( pfn_to_mfn(ctx, pfn) == INVALID_MFN )
1002 pfns[to_populate++] = pfn;
1003 }
1004 }
1005
1006 if ( to_populate && populate_pfns(ctx, to_populate, pfns, NULL) )
1007 return -1;
1008
1009 for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
1010 {
1011 pte = table[i];
1012
1013 if ( pte & _PAGE_PRESENT )
1014 {
1015 xen_pfn_t mfn, pfn;
1016
1017 pfn = pte_to_frame(pte);
1018 mfn = pfn_to_mfn(ctx, pfn);
1019
1020 if ( !mfn_in_pseudophysmap(ctx, mfn) )
1021 {
1022 ERROR("Bad mfn for L%u[%u] - pte %"PRIx64,
1023 type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
1024 dump_bad_pseudophysmap_entry(ctx, mfn);
1025 errno = ERANGE;
1026 return -1;
1027 }
1028
1029 table[i] = merge_pte(pte, mfn);
1030 }
1031 }
1032
1033 return 0;
1034 }
1035
1036 /*
1037 * restore_ops function. Confirm that the incoming stream matches the type of
1038 * domain we are attempting to restore into.
1039 */
x86_pv_setup(struct xc_sr_context * ctx)1040 static int x86_pv_setup(struct xc_sr_context *ctx)
1041 {
1042 xc_interface *xch = ctx->xch;
1043 int rc;
1044
1045 if ( ctx->restore.guest_type != DHDR_TYPE_X86_PV )
1046 {
1047 ERROR("Unable to restore %s domain into an x86_pv domain",
1048 dhdr_type_to_str(ctx->restore.guest_type));
1049 return -1;
1050 }
1051
1052 if ( ctx->restore.guest_page_size != PAGE_SIZE )
1053 {
1054 ERROR("Invalid page size %d for x86_pv domains",
1055 ctx->restore.guest_page_size);
1056 return -1;
1057 }
1058
1059 rc = x86_pv_domain_info(ctx);
1060 if ( rc )
1061 return rc;
1062
1063 ctx->x86.pv.restore.nr_vcpus = ctx->dominfo.max_vcpu_id + 1;
1064 ctx->x86.pv.restore.vcpus = calloc(sizeof(struct xc_sr_x86_pv_restore_vcpu),
1065 ctx->x86.pv.restore.nr_vcpus);
1066 if ( !ctx->x86.pv.restore.vcpus )
1067 {
1068 errno = ENOMEM;
1069 return -1;
1070 }
1071
1072 rc = x86_pv_map_m2p(ctx);
1073 if ( rc )
1074 return rc;
1075
1076 return rc;
1077 }
1078
1079 /*
1080 * restore_ops function.
1081 */
x86_pv_process_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)1082 static int x86_pv_process_record(struct xc_sr_context *ctx,
1083 struct xc_sr_record *rec)
1084 {
1085 switch ( rec->type )
1086 {
1087 case REC_TYPE_X86_PV_INFO:
1088 return handle_x86_pv_info(ctx, rec);
1089
1090 case REC_TYPE_X86_PV_P2M_FRAMES:
1091 return handle_x86_pv_p2m_frames(ctx, rec);
1092
1093 case REC_TYPE_X86_PV_VCPU_BASIC:
1094 case REC_TYPE_X86_PV_VCPU_EXTENDED:
1095 case REC_TYPE_X86_PV_VCPU_XSAVE:
1096 case REC_TYPE_X86_PV_VCPU_MSRS:
1097 return handle_x86_pv_vcpu_blob(ctx, rec);
1098
1099 case REC_TYPE_SHARED_INFO:
1100 return handle_shared_info(ctx, rec);
1101
1102 case REC_TYPE_X86_TSC_INFO:
1103 return handle_x86_tsc_info(ctx, rec);
1104
1105 case REC_TYPE_X86_CPUID_POLICY:
1106 return handle_x86_cpuid_policy(ctx, rec);
1107
1108 case REC_TYPE_X86_MSR_POLICY:
1109 return handle_x86_msr_policy(ctx, rec);
1110
1111 default:
1112 return RECORD_NOT_PROCESSED;
1113 }
1114 }
1115
1116 /*
1117 * restore_ops function. Update the vcpu context in Xen, pin the pagetables,
1118 * rewrite the p2m and seed the grant table.
1119 */
x86_pv_stream_complete(struct xc_sr_context * ctx)1120 static int x86_pv_stream_complete(struct xc_sr_context *ctx)
1121 {
1122 xc_interface *xch = ctx->xch;
1123 int rc;
1124
1125 rc = update_vcpu_context(ctx);
1126 if ( rc )
1127 return rc;
1128
1129 rc = pin_pagetables(ctx);
1130 if ( rc )
1131 return rc;
1132
1133 rc = update_guest_p2m(ctx);
1134 if ( rc )
1135 return rc;
1136
1137 rc = xc_dom_gnttab_seed(xch, ctx->domid, false,
1138 ctx->restore.console_gfn,
1139 ctx->restore.xenstore_gfn,
1140 ctx->restore.console_domid,
1141 ctx->restore.xenstore_domid);
1142 if ( rc )
1143 {
1144 PERROR("Failed to seed grant table");
1145 return rc;
1146 }
1147
1148 return rc;
1149 }
1150
1151 /*
1152 * restore_ops function.
1153 */
x86_pv_cleanup(struct xc_sr_context * ctx)1154 static int x86_pv_cleanup(struct xc_sr_context *ctx)
1155 {
1156 free(ctx->x86.pv.p2m);
1157 free(ctx->x86.pv.p2m_pfns);
1158
1159 if ( ctx->x86.pv.restore.vcpus )
1160 {
1161 unsigned int i;
1162
1163 for ( i = 0; i < ctx->x86.pv.restore.nr_vcpus; ++i )
1164 {
1165 struct xc_sr_x86_pv_restore_vcpu *vcpu =
1166 &ctx->x86.pv.restore.vcpus[i];
1167
1168 free(vcpu->basic.ptr);
1169 free(vcpu->extd.ptr);
1170 free(vcpu->xsave.ptr);
1171 free(vcpu->msr.ptr);
1172 }
1173
1174 free(ctx->x86.pv.restore.vcpus);
1175 }
1176
1177 free(ctx->x86.pv.restore.pfn_types);
1178
1179 if ( ctx->x86.pv.m2p )
1180 munmap(ctx->x86.pv.m2p, ctx->x86.pv.nr_m2p_frames * PAGE_SIZE);
1181
1182 free(ctx->x86.restore.cpuid.ptr);
1183 free(ctx->x86.restore.msr.ptr);
1184
1185 return 0;
1186 }
1187
1188 struct xc_sr_restore_ops restore_ops_x86_pv =
1189 {
1190 .pfn_is_valid = x86_pv_pfn_is_valid,
1191 .pfn_to_gfn = pfn_to_mfn,
1192 .set_page_type = x86_pv_set_page_type,
1193 .set_gfn = x86_pv_set_gfn,
1194 .localise_page = x86_pv_localise_page,
1195 .setup = x86_pv_setup,
1196 .process_record = x86_pv_process_record,
1197 .static_data_complete = x86_static_data_complete,
1198 .stream_complete = x86_pv_stream_complete,
1199 .cleanup = x86_pv_cleanup,
1200 };
1201
1202 /*
1203 * Local variables:
1204 * mode: C
1205 * c-file-style: "BSD"
1206 * c-basic-offset: 4
1207 * tab-width: 4
1208 * indent-tabs-mode: nil
1209 * End:
1210 */
1211