1 #include <arpa/inet.h>
2
3 #include <assert.h>
4
5 #include "xc_sr_common.h"
6
7 /*
8 * Read and validate the Image and Domain headers.
9 */
read_headers(struct xc_sr_context * ctx)10 static int read_headers(struct xc_sr_context *ctx)
11 {
12 xc_interface *xch = ctx->xch;
13 struct xc_sr_ihdr ihdr;
14 struct xc_sr_dhdr dhdr;
15
16 if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
17 {
18 PERROR("Failed to read Image Header from stream");
19 return -1;
20 }
21
22 ihdr.id = ntohl(ihdr.id);
23 ihdr.version = ntohl(ihdr.version);
24 ihdr.options = ntohs(ihdr.options);
25
26 if ( ihdr.marker != IHDR_MARKER )
27 {
28 ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
29 return -1;
30 }
31
32 if ( ihdr.id != IHDR_ID )
33 {
34 ERROR("Invalid ID: Expected 0x%08x, Got 0x%08x", IHDR_ID, ihdr.id);
35 return -1;
36 }
37
38 if ( ihdr.version < 2 || ihdr.version > 3 )
39 {
40 ERROR("Invalid Version: Expected 2 <= ver <= 3, Got %d",
41 ihdr.version);
42 return -1;
43 }
44
45 if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
46 {
47 ERROR("Unable to handle big endian streams");
48 return -1;
49 }
50
51 ctx->restore.format_version = ihdr.version;
52
53 if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
54 {
55 PERROR("Failed to read Domain Header from stream");
56 return -1;
57 }
58
59 ctx->restore.guest_type = dhdr.type;
60 ctx->restore.guest_page_size = (1U << dhdr.page_shift);
61
62 if ( dhdr.xen_major == 0 )
63 {
64 IPRINTF("Found %s domain, converted from legacy stream format",
65 dhdr_type_to_str(dhdr.type));
66 DPRINTF(" Legacy conversion script version %u", dhdr.xen_minor);
67 }
68 else
69 IPRINTF("Found %s domain from Xen %u.%u",
70 dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
71 return 0;
72 }
73
74 /*
75 * Is a pfn populated?
76 */
pfn_is_populated(const struct xc_sr_context * ctx,xen_pfn_t pfn)77 static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
78 {
79 if ( pfn > ctx->restore.max_populated_pfn )
80 return false;
81 return test_bit(pfn, ctx->restore.populated_pfns);
82 }
83
84 /*
85 * Set a pfn as populated, expanding the tracking structures if needed. To
86 * avoid realloc()ing too excessively, the size increased to the nearest power
87 * of two large enough to contain the required pfn.
88 */
pfn_set_populated(struct xc_sr_context * ctx,xen_pfn_t pfn)89 static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
90 {
91 xc_interface *xch = ctx->xch;
92
93 if ( pfn > ctx->restore.max_populated_pfn )
94 {
95 xen_pfn_t new_max;
96 size_t old_sz, new_sz;
97 unsigned long *p;
98
99 /* Round up to the nearest power of two larger than pfn, less 1. */
100 new_max = pfn;
101 new_max |= new_max >> 1;
102 new_max |= new_max >> 2;
103 new_max |= new_max >> 4;
104 new_max |= new_max >> 8;
105 new_max |= new_max >> 16;
106 #ifdef __x86_64__
107 new_max |= new_max >> 32;
108 #endif
109
110 old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
111 new_sz = bitmap_size(new_max + 1);
112 p = realloc(ctx->restore.populated_pfns, new_sz);
113 if ( !p )
114 {
115 ERROR("Failed to realloc populated bitmap");
116 errno = ENOMEM;
117 return -1;
118 }
119
120 memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
121
122 ctx->restore.populated_pfns = p;
123 ctx->restore.max_populated_pfn = new_max;
124 }
125
126 assert(!test_bit(pfn, ctx->restore.populated_pfns));
127 set_bit(pfn, ctx->restore.populated_pfns);
128
129 return 0;
130 }
131
132 /*
133 * Given a set of pfns, obtain memory from Xen to fill the physmap for the
134 * unpopulated subset. If types is NULL, no page type checking is performed
135 * and all unpopulated pfns are populated.
136 */
populate_pfns(struct xc_sr_context * ctx,unsigned int count,const xen_pfn_t * original_pfns,const uint32_t * types)137 int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
138 const xen_pfn_t *original_pfns, const uint32_t *types)
139 {
140 xc_interface *xch = ctx->xch;
141 xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
142 *pfns = malloc(count * sizeof(*pfns));
143 unsigned int i, nr_pfns = 0;
144 int rc = -1;
145
146 if ( !mfns || !pfns )
147 {
148 ERROR("Failed to allocate %zu bytes for populating the physmap",
149 2 * count * sizeof(*mfns));
150 goto err;
151 }
152
153 for ( i = 0; i < count; ++i )
154 {
155 if ( (!types || (types &&
156 (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
157 types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
158 !pfn_is_populated(ctx, original_pfns[i]) )
159 {
160 rc = pfn_set_populated(ctx, original_pfns[i]);
161 if ( rc )
162 goto err;
163 pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
164 ++nr_pfns;
165 }
166 }
167
168 if ( nr_pfns )
169 {
170 rc = xc_domain_populate_physmap_exact(
171 xch, ctx->domid, nr_pfns, 0, 0, mfns);
172 if ( rc )
173 {
174 PERROR("Failed to populate physmap");
175 goto err;
176 }
177
178 for ( i = 0; i < nr_pfns; ++i )
179 {
180 if ( mfns[i] == INVALID_MFN )
181 {
182 ERROR("Populate physmap failed for pfn %u", i);
183 rc = -1;
184 goto err;
185 }
186
187 ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
188 }
189 }
190
191 rc = 0;
192
193 err:
194 free(pfns);
195 free(mfns);
196
197 return rc;
198 }
199
200 /*
201 * Given a list of pfns, their types, and a block of page data from the
202 * stream, populate and record their types, map the relevant subset and copy
203 * the data into the guest.
204 */
process_page_data(struct xc_sr_context * ctx,unsigned int count,xen_pfn_t * pfns,uint32_t * types,void * page_data)205 static int process_page_data(struct xc_sr_context *ctx, unsigned int count,
206 xen_pfn_t *pfns, uint32_t *types, void *page_data)
207 {
208 xc_interface *xch = ctx->xch;
209 xen_pfn_t *mfns = malloc(count * sizeof(*mfns));
210 int *map_errs = malloc(count * sizeof(*map_errs));
211 int rc;
212 void *mapping = NULL, *guest_page = NULL;
213 unsigned int i, /* i indexes the pfns from the record. */
214 j, /* j indexes the subset of pfns we decide to map. */
215 nr_pages = 0;
216
217 if ( !mfns || !map_errs )
218 {
219 rc = -1;
220 ERROR("Failed to allocate %zu bytes to process page data",
221 count * (sizeof(*mfns) + sizeof(*map_errs)));
222 goto err;
223 }
224
225 rc = populate_pfns(ctx, count, pfns, types);
226 if ( rc )
227 {
228 ERROR("Failed to populate pfns for batch of %u pages", count);
229 goto err;
230 }
231
232 for ( i = 0; i < count; ++i )
233 {
234 ctx->restore.ops.set_page_type(ctx, pfns[i], types[i]);
235
236 switch ( types[i] )
237 {
238 case XEN_DOMCTL_PFINFO_NOTAB:
239
240 case XEN_DOMCTL_PFINFO_L1TAB:
241 case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB:
242
243 case XEN_DOMCTL_PFINFO_L2TAB:
244 case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB:
245
246 case XEN_DOMCTL_PFINFO_L3TAB:
247 case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB:
248
249 case XEN_DOMCTL_PFINFO_L4TAB:
250 case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB:
251
252 mfns[nr_pages++] = ctx->restore.ops.pfn_to_gfn(ctx, pfns[i]);
253 break;
254 }
255 }
256
257 /* Nothing to do? */
258 if ( nr_pages == 0 )
259 goto done;
260
261 mapping = guest_page = xenforeignmemory_map(
262 xch->fmem, ctx->domid, PROT_READ | PROT_WRITE,
263 nr_pages, mfns, map_errs);
264 if ( !mapping )
265 {
266 rc = -1;
267 PERROR("Unable to map %u mfns for %u pages of data",
268 nr_pages, count);
269 goto err;
270 }
271
272 for ( i = 0, j = 0; i < count; ++i )
273 {
274 switch ( types[i] )
275 {
276 case XEN_DOMCTL_PFINFO_XTAB:
277 case XEN_DOMCTL_PFINFO_BROKEN:
278 case XEN_DOMCTL_PFINFO_XALLOC:
279 /* No page data to deal with. */
280 continue;
281 }
282
283 if ( map_errs[j] )
284 {
285 rc = -1;
286 ERROR("Mapping pfn %#"PRIpfn" (mfn %#"PRIpfn", type %#"PRIx32") failed with %d",
287 pfns[i], mfns[j], types[i], map_errs[j]);
288 goto err;
289 }
290
291 /* Undo page normalisation done by the saver. */
292 rc = ctx->restore.ops.localise_page(ctx, types[i], page_data);
293 if ( rc )
294 {
295 ERROR("Failed to localise pfn %#"PRIpfn" (type %#"PRIx32")",
296 pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
297 goto err;
298 }
299
300 if ( ctx->restore.verify )
301 {
302 /* Verify mode - compare incoming data to what we already have. */
303 if ( memcmp(guest_page, page_data, PAGE_SIZE) )
304 ERROR("verify pfn %#"PRIpfn" failed (type %#"PRIx32")",
305 pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
306 }
307 else
308 {
309 /* Regular mode - copy incoming data into place. */
310 memcpy(guest_page, page_data, PAGE_SIZE);
311 }
312
313 ++j;
314 guest_page += PAGE_SIZE;
315 page_data += PAGE_SIZE;
316 }
317
318 done:
319 rc = 0;
320
321 err:
322 if ( mapping )
323 xenforeignmemory_unmap(xch->fmem, mapping, nr_pages);
324
325 free(map_errs);
326 free(mfns);
327
328 return rc;
329 }
330
331 /*
332 * Validate a PAGE_DATA record from the stream, and pass the results to
333 * process_page_data() to actually perform the legwork.
334 */
handle_page_data(struct xc_sr_context * ctx,struct xc_sr_record * rec)335 static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
336 {
337 xc_interface *xch = ctx->xch;
338 struct xc_sr_rec_page_data_header *pages = rec->data;
339 unsigned int i, pages_of_data = 0;
340 int rc = -1;
341
342 xen_pfn_t *pfns = NULL, pfn;
343 uint32_t *types = NULL, type;
344
345 /*
346 * v2 compatibility only exists for x86 streams. This is a bit of a
347 * bodge, but it is less bad than duplicating handle_page_data() between
348 * different architectures.
349 */
350 #if defined(__i386__) || defined(__x86_64__)
351 /* v2 compat. Infer the position of STATIC_DATA_END. */
352 if ( ctx->restore.format_version < 3 && !ctx->restore.seen_static_data_end )
353 {
354 rc = handle_static_data_end(ctx);
355 if ( rc )
356 {
357 ERROR("Inferred STATIC_DATA_END record failed");
358 goto err;
359 }
360 rc = -1;
361 }
362
363 if ( !ctx->restore.seen_static_data_end )
364 {
365 ERROR("No STATIC_DATA_END seen");
366 goto err;
367 }
368 #endif
369
370 if ( rec->length < sizeof(*pages) )
371 {
372 ERROR("PAGE_DATA record truncated: length %u, min %zu",
373 rec->length, sizeof(*pages));
374 goto err;
375 }
376
377 if ( pages->count < 1 )
378 {
379 ERROR("Expected at least 1 pfn in PAGE_DATA record");
380 goto err;
381 }
382
383 if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) )
384 {
385 ERROR("PAGE_DATA record (length %u) too short to contain %u"
386 " pfns worth of information", rec->length, pages->count);
387 goto err;
388 }
389
390 pfns = malloc(pages->count * sizeof(*pfns));
391 types = malloc(pages->count * sizeof(*types));
392 if ( !pfns || !types )
393 {
394 ERROR("Unable to allocate enough memory for %u pfns",
395 pages->count);
396 goto err;
397 }
398
399 for ( i = 0; i < pages->count; ++i )
400 {
401 pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
402 if ( !ctx->restore.ops.pfn_is_valid(ctx, pfn) )
403 {
404 ERROR("pfn %#"PRIpfn" (index %u) outside domain maximum", pfn, i);
405 goto err;
406 }
407
408 type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
409 if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
410 ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
411 {
412 ERROR("Invalid type %#"PRIx32" for pfn %#"PRIpfn" (index %u)",
413 type, pfn, i);
414 goto err;
415 }
416
417 if ( type < XEN_DOMCTL_PFINFO_BROKEN )
418 /* NOTAB and all L1 through L4 tables (including pinned) should
419 * have a page worth of data in the record. */
420 pages_of_data++;
421
422 pfns[i] = pfn;
423 types[i] = type;
424 }
425
426 if ( rec->length != (sizeof(*pages) +
427 (sizeof(uint64_t) * pages->count) +
428 (PAGE_SIZE * pages_of_data)) )
429 {
430 ERROR("PAGE_DATA record wrong size: length %u, expected "
431 "%zu + %zu + %lu", rec->length, sizeof(*pages),
432 (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
433 goto err;
434 }
435
436 rc = process_page_data(ctx, pages->count, pfns, types,
437 &pages->pfn[pages->count]);
438 err:
439 free(types);
440 free(pfns);
441
442 return rc;
443 }
444
445 /*
446 * Send checkpoint dirty pfn list to primary.
447 */
send_checkpoint_dirty_pfn_list(struct xc_sr_context * ctx)448 static int send_checkpoint_dirty_pfn_list(struct xc_sr_context *ctx)
449 {
450 xc_interface *xch = ctx->xch;
451 int rc = -1;
452 unsigned int count, written;
453 uint64_t i, *pfns = NULL;
454 struct iovec *iov = NULL;
455 xc_shadow_op_stats_t stats = { 0, ctx->restore.p2m_size };
456 struct xc_sr_record rec = {
457 .type = REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST,
458 };
459 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
460 &ctx->restore.dirty_bitmap_hbuf);
461
462 if ( xc_shadow_control(
463 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
464 HYPERCALL_BUFFER(dirty_bitmap), ctx->restore.p2m_size,
465 NULL, 0, &stats) != ctx->restore.p2m_size )
466 {
467 PERROR("Failed to retrieve logdirty bitmap");
468 goto err;
469 }
470
471 for ( i = 0, count = 0; i < ctx->restore.p2m_size; i++ )
472 {
473 if ( test_bit(i, dirty_bitmap) )
474 count++;
475 }
476
477
478 pfns = malloc(count * sizeof(*pfns));
479 if ( !pfns )
480 {
481 ERROR("Unable to allocate %zu bytes of memory for dirty pfn list",
482 count * sizeof(*pfns));
483 goto err;
484 }
485
486 for ( i = 0, written = 0; i < ctx->restore.p2m_size; ++i )
487 {
488 if ( !test_bit(i, dirty_bitmap) )
489 continue;
490
491 if ( written > count )
492 {
493 ERROR("Dirty pfn list exceed");
494 goto err;
495 }
496
497 pfns[written++] = i;
498 }
499
500 /* iovec[] for writev(). */
501 iov = malloc(3 * sizeof(*iov));
502 if ( !iov )
503 {
504 ERROR("Unable to allocate memory for sending dirty bitmap");
505 goto err;
506 }
507
508 rec.length = count * sizeof(*pfns);
509
510 iov[0].iov_base = &rec.type;
511 iov[0].iov_len = sizeof(rec.type);
512
513 iov[1].iov_base = &rec.length;
514 iov[1].iov_len = sizeof(rec.length);
515
516 iov[2].iov_base = pfns;
517 iov[2].iov_len = count * sizeof(*pfns);
518
519 if ( writev_exact(ctx->restore.send_back_fd, iov, 3) )
520 {
521 PERROR("Failed to write dirty bitmap to stream");
522 goto err;
523 }
524
525 rc = 0;
526 err:
527 free(pfns);
528 free(iov);
529 return rc;
530 }
531
532 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
handle_checkpoint(struct xc_sr_context * ctx)533 static int handle_checkpoint(struct xc_sr_context *ctx)
534 {
535 xc_interface *xch = ctx->xch;
536 int rc = 0, ret;
537 unsigned int i;
538
539 if ( ctx->stream_type == XC_STREAM_PLAIN )
540 {
541 ERROR("Found checkpoint in non-checkpointed stream");
542 rc = -1;
543 goto err;
544 }
545
546 ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data);
547 switch ( ret )
548 {
549 case XGR_CHECKPOINT_SUCCESS:
550 break;
551
552 case XGR_CHECKPOINT_FAILOVER:
553 if ( ctx->restore.buffer_all_records )
554 rc = BROKEN_CHANNEL;
555 else
556 /* We don't have a consistent state */
557 rc = -1;
558 goto err;
559
560 default: /* Other fatal error */
561 rc = -1;
562 goto err;
563 }
564
565 if ( ctx->restore.buffer_all_records )
566 {
567 IPRINTF("All records buffered");
568
569 for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
570 {
571 rc = process_record(ctx, &ctx->restore.buffered_records[i]);
572 if ( rc )
573 goto err;
574 }
575 ctx->restore.buffered_rec_num = 0;
576 IPRINTF("All records processed");
577 }
578 else
579 ctx->restore.buffer_all_records = true;
580
581 if ( ctx->stream_type == XC_STREAM_COLO )
582 {
583 #define HANDLE_CALLBACK_RETURN_VALUE(ret) \
584 do { \
585 if ( ret == 1 ) \
586 rc = 0; /* Success */ \
587 else \
588 { \
589 if ( ret == 2 ) \
590 rc = BROKEN_CHANNEL; \
591 else \
592 rc = -1; /* Some unspecified error */ \
593 goto err; \
594 } \
595 } while (0)
596
597 /* COLO */
598
599 /* We need to resume guest */
600 rc = ctx->restore.ops.stream_complete(ctx);
601 if ( rc )
602 goto err;
603
604 ctx->restore.callbacks->restore_results(ctx->restore.xenstore_gfn,
605 ctx->restore.console_gfn,
606 ctx->restore.callbacks->data);
607
608 /* Resume secondary vm */
609 ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data);
610 HANDLE_CALLBACK_RETURN_VALUE(ret);
611
612 /* Wait for a new checkpoint */
613 ret = ctx->restore.callbacks->wait_checkpoint(
614 ctx->restore.callbacks->data);
615 HANDLE_CALLBACK_RETURN_VALUE(ret);
616
617 /* suspend secondary vm */
618 ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data);
619 HANDLE_CALLBACK_RETURN_VALUE(ret);
620
621 #undef HANDLE_CALLBACK_RETURN_VALUE
622
623 rc = send_checkpoint_dirty_pfn_list(ctx);
624 if ( rc )
625 goto err;
626 }
627
628 err:
629 return rc;
630 }
631
buffer_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)632 static int buffer_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
633 {
634 xc_interface *xch = ctx->xch;
635 unsigned int new_alloc_num;
636 struct xc_sr_record *p;
637
638 if ( ctx->restore.buffered_rec_num >= ctx->restore.allocated_rec_num )
639 {
640 new_alloc_num = ctx->restore.allocated_rec_num + DEFAULT_BUF_RECORDS;
641 p = realloc(ctx->restore.buffered_records,
642 new_alloc_num * sizeof(struct xc_sr_record));
643 if ( !p )
644 {
645 ERROR("Failed to realloc memory for buffered records");
646 return -1;
647 }
648
649 ctx->restore.buffered_records = p;
650 ctx->restore.allocated_rec_num = new_alloc_num;
651 }
652
653 memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
654 rec, sizeof(*rec));
655
656 return 0;
657 }
658
handle_static_data_end(struct xc_sr_context * ctx)659 int handle_static_data_end(struct xc_sr_context *ctx)
660 {
661 xc_interface *xch = ctx->xch;
662 unsigned int missing = 0;
663 int rc = 0;
664
665 if ( ctx->restore.seen_static_data_end )
666 {
667 ERROR("Multiple STATIC_DATA_END records found");
668 return -1;
669 }
670
671 ctx->restore.seen_static_data_end = true;
672
673 rc = ctx->restore.ops.static_data_complete(ctx, &missing);
674 if ( rc )
675 return rc;
676
677 if ( ctx->restore.callbacks->static_data_done &&
678 (rc = ctx->restore.callbacks->static_data_done(
679 missing, ctx->restore.callbacks->data) != 0) )
680 ERROR("static_data_done() callback failed: %d\n", rc);
681
682 return rc;
683 }
684
process_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)685 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
686 {
687 xc_interface *xch = ctx->xch;
688 int rc = 0;
689
690 switch ( rec->type )
691 {
692 case REC_TYPE_END:
693 break;
694
695 case REC_TYPE_PAGE_DATA:
696 rc = handle_page_data(ctx, rec);
697 break;
698
699 case REC_TYPE_VERIFY:
700 DPRINTF("Verify mode enabled");
701 ctx->restore.verify = true;
702 break;
703
704 case REC_TYPE_CHECKPOINT:
705 rc = handle_checkpoint(ctx);
706 break;
707
708 case REC_TYPE_STATIC_DATA_END:
709 rc = handle_static_data_end(ctx);
710 break;
711
712 default:
713 rc = ctx->restore.ops.process_record(ctx, rec);
714 break;
715 }
716
717 free(rec->data);
718 rec->data = NULL;
719
720 return rc;
721 }
722
setup(struct xc_sr_context * ctx)723 static int setup(struct xc_sr_context *ctx)
724 {
725 xc_interface *xch = ctx->xch;
726 int rc;
727 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
728 &ctx->restore.dirty_bitmap_hbuf);
729
730 if ( ctx->stream_type == XC_STREAM_COLO )
731 {
732 dirty_bitmap = xc_hypercall_buffer_alloc_pages(
733 xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
734
735 if ( !dirty_bitmap )
736 {
737 ERROR("Unable to allocate memory for dirty bitmap");
738 rc = -1;
739 goto err;
740 }
741 }
742
743 rc = ctx->restore.ops.setup(ctx);
744 if ( rc )
745 goto err;
746
747 ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1;
748 ctx->restore.populated_pfns = bitmap_alloc(
749 ctx->restore.max_populated_pfn + 1);
750 if ( !ctx->restore.populated_pfns )
751 {
752 ERROR("Unable to allocate memory for populated_pfns bitmap");
753 rc = -1;
754 goto err;
755 }
756
757 ctx->restore.buffered_records = malloc(
758 DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
759 if ( !ctx->restore.buffered_records )
760 {
761 ERROR("Unable to allocate memory for buffered records");
762 rc = -1;
763 goto err;
764 }
765 ctx->restore.allocated_rec_num = DEFAULT_BUF_RECORDS;
766
767 err:
768 return rc;
769 }
770
cleanup(struct xc_sr_context * ctx)771 static void cleanup(struct xc_sr_context *ctx)
772 {
773 xc_interface *xch = ctx->xch;
774 unsigned int i;
775 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
776 &ctx->restore.dirty_bitmap_hbuf);
777
778 for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
779 free(ctx->restore.buffered_records[i].data);
780
781 if ( ctx->stream_type == XC_STREAM_COLO )
782 xc_hypercall_buffer_free_pages(
783 xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
784
785 free(ctx->restore.buffered_records);
786 free(ctx->restore.populated_pfns);
787
788 if ( ctx->restore.ops.cleanup(ctx) )
789 PERROR("Failed to clean up");
790 }
791
792 /*
793 * Restore a domain.
794 */
restore(struct xc_sr_context * ctx)795 static int restore(struct xc_sr_context *ctx)
796 {
797 xc_interface *xch = ctx->xch;
798 struct xc_sr_record rec;
799 int rc, saved_rc = 0, saved_errno = 0;
800
801 IPRINTF("Restoring domain");
802
803 rc = setup(ctx);
804 if ( rc )
805 goto err;
806
807 do
808 {
809 rc = read_record(ctx, ctx->fd, &rec);
810 if ( rc )
811 {
812 if ( ctx->restore.buffer_all_records )
813 goto remus_failover;
814 else
815 goto err;
816 }
817
818 if ( ctx->restore.buffer_all_records &&
819 rec.type != REC_TYPE_END &&
820 rec.type != REC_TYPE_CHECKPOINT )
821 {
822 rc = buffer_record(ctx, &rec);
823 if ( rc )
824 goto err;
825 }
826 else
827 {
828 rc = process_record(ctx, &rec);
829 if ( rc == RECORD_NOT_PROCESSED )
830 {
831 if ( rec.type & REC_TYPE_OPTIONAL )
832 DPRINTF("Ignoring optional record %#x (%s)",
833 rec.type, rec_type_to_str(rec.type));
834 else
835 {
836 ERROR("Mandatory record %#x (%s) not handled",
837 rec.type, rec_type_to_str(rec.type));
838 rc = -1;
839 goto err;
840 }
841 }
842 else if ( rc == BROKEN_CHANNEL )
843 goto remus_failover;
844 else if ( rc )
845 goto err;
846 }
847
848 } while ( rec.type != REC_TYPE_END );
849
850 remus_failover:
851 if ( ctx->stream_type == XC_STREAM_COLO )
852 {
853 /* With COLO, we have already called stream_complete */
854 rc = 0;
855 IPRINTF("COLO Failover");
856 goto done;
857 }
858
859 /*
860 * With Remus, if we reach here, there must be some error on primary,
861 * failover from the last checkpoint state.
862 */
863 rc = ctx->restore.ops.stream_complete(ctx);
864 if ( rc )
865 goto err;
866
867 IPRINTF("Restore successful");
868 goto done;
869
870 err:
871 saved_errno = errno;
872 saved_rc = rc;
873 PERROR("Restore failed");
874
875 done:
876 cleanup(ctx);
877
878 if ( saved_rc )
879 {
880 rc = saved_rc;
881 errno = saved_errno;
882 }
883
884 return rc;
885 }
886
xc_domain_restore(xc_interface * xch,int io_fd,uint32_t dom,unsigned int store_evtchn,unsigned long * store_mfn,uint32_t store_domid,unsigned int console_evtchn,unsigned long * console_gfn,uint32_t console_domid,xc_stream_type_t stream_type,struct restore_callbacks * callbacks,int send_back_fd)887 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
888 unsigned int store_evtchn, unsigned long *store_mfn,
889 uint32_t store_domid, unsigned int console_evtchn,
890 unsigned long *console_gfn, uint32_t console_domid,
891 xc_stream_type_t stream_type,
892 struct restore_callbacks *callbacks, int send_back_fd)
893 {
894 xen_pfn_t nr_pfns;
895 struct xc_sr_context ctx = {
896 .xch = xch,
897 .fd = io_fd,
898 .stream_type = stream_type,
899 };
900
901 /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
902 ctx.restore.console_evtchn = console_evtchn;
903 ctx.restore.console_domid = console_domid;
904 ctx.restore.xenstore_evtchn = store_evtchn;
905 ctx.restore.xenstore_domid = store_domid;
906 ctx.restore.callbacks = callbacks;
907 ctx.restore.send_back_fd = send_back_fd;
908
909 /* Sanity check stream_type-related parameters */
910 switch ( stream_type )
911 {
912 case XC_STREAM_COLO:
913 assert(callbacks->suspend &&
914 callbacks->postcopy &&
915 callbacks->wait_checkpoint &&
916 callbacks->restore_results);
917 /* Fallthrough */
918 case XC_STREAM_REMUS:
919 assert(callbacks->checkpoint);
920 /* Fallthrough */
921 case XC_STREAM_PLAIN:
922 break;
923
924 default:
925 assert(!"Bad stream_type");
926 break;
927 }
928
929 if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
930 {
931 PERROR("Failed to get domain info");
932 return -1;
933 }
934
935 if ( ctx.dominfo.domid != dom )
936 {
937 ERROR("Domain %u does not exist", dom);
938 return -1;
939 }
940
941 DPRINTF("fd %d, dom %u, hvm %u, stream_type %d",
942 io_fd, dom, ctx.dominfo.hvm, stream_type);
943
944 ctx.domid = dom;
945
946 if ( read_headers(&ctx) )
947 return -1;
948
949 if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 )
950 {
951 PERROR("Unable to obtain the guest p2m size");
952 return -1;
953 }
954
955 ctx.restore.p2m_size = nr_pfns;
956 ctx.restore.ops = ctx.dominfo.hvm
957 ? restore_ops_x86_hvm : restore_ops_x86_pv;
958
959 if ( restore(&ctx) )
960 return -1;
961
962 IPRINTF("XenStore: mfn %#"PRIpfn", dom %d, evt %u",
963 ctx.restore.xenstore_gfn,
964 ctx.restore.xenstore_domid,
965 ctx.restore.xenstore_evtchn);
966
967 IPRINTF("Console: mfn %#"PRIpfn", dom %d, evt %u",
968 ctx.restore.console_gfn,
969 ctx.restore.console_domid,
970 ctx.restore.console_evtchn);
971
972 *console_gfn = ctx.restore.console_gfn;
973 *store_mfn = ctx.restore.xenstore_gfn;
974
975 return 0;
976 }
977
978 /*
979 * Local variables:
980 * mode: C
981 * c-file-style: "BSD"
982 * c-basic-offset: 4
983 * tab-width: 4
984 * indent-tabs-mode: nil
985 * End:
986 */
987