1 #include <arpa/inet.h>
2 
3 #include <assert.h>
4 
5 #include "xc_sr_common.h"
6 
7 /*
8  * Read and validate the Image and Domain headers.
9  */
read_headers(struct xc_sr_context * ctx)10 static int read_headers(struct xc_sr_context *ctx)
11 {
12     xc_interface *xch = ctx->xch;
13     struct xc_sr_ihdr ihdr;
14     struct xc_sr_dhdr dhdr;
15 
16     if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
17     {
18         PERROR("Failed to read Image Header from stream");
19         return -1;
20     }
21 
22     ihdr.id      = ntohl(ihdr.id);
23     ihdr.version = ntohl(ihdr.version);
24     ihdr.options = ntohs(ihdr.options);
25 
26     if ( ihdr.marker != IHDR_MARKER )
27     {
28         ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
29         return -1;
30     }
31 
32     if ( ihdr.id != IHDR_ID )
33     {
34         ERROR("Invalid ID: Expected 0x%08x, Got 0x%08x", IHDR_ID, ihdr.id);
35         return -1;
36     }
37 
38     if ( ihdr.version < 2 || ihdr.version > 3 )
39     {
40         ERROR("Invalid Version: Expected 2 <= ver <= 3, Got %d",
41               ihdr.version);
42         return -1;
43     }
44 
45     if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
46     {
47         ERROR("Unable to handle big endian streams");
48         return -1;
49     }
50 
51     ctx->restore.format_version = ihdr.version;
52 
53     if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
54     {
55         PERROR("Failed to read Domain Header from stream");
56         return -1;
57     }
58 
59     ctx->restore.guest_type = dhdr.type;
60     ctx->restore.guest_page_size = (1U << dhdr.page_shift);
61 
62     if ( dhdr.xen_major == 0 )
63     {
64         IPRINTF("Found %s domain, converted from legacy stream format",
65                 dhdr_type_to_str(dhdr.type));
66         DPRINTF("  Legacy conversion script version %u", dhdr.xen_minor);
67     }
68     else
69         IPRINTF("Found %s domain from Xen %u.%u",
70                 dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
71     return 0;
72 }
73 
74 /*
75  * Is a pfn populated?
76  */
pfn_is_populated(const struct xc_sr_context * ctx,xen_pfn_t pfn)77 static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
78 {
79     if ( pfn > ctx->restore.max_populated_pfn )
80         return false;
81     return test_bit(pfn, ctx->restore.populated_pfns);
82 }
83 
84 /*
85  * Set a pfn as populated, expanding the tracking structures if needed. To
86  * avoid realloc()ing too excessively, the size increased to the nearest power
87  * of two large enough to contain the required pfn.
88  */
pfn_set_populated(struct xc_sr_context * ctx,xen_pfn_t pfn)89 static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
90 {
91     xc_interface *xch = ctx->xch;
92 
93     if ( pfn > ctx->restore.max_populated_pfn )
94     {
95         xen_pfn_t new_max;
96         size_t old_sz, new_sz;
97         unsigned long *p;
98 
99         /* Round up to the nearest power of two larger than pfn, less 1. */
100         new_max = pfn;
101         new_max |= new_max >> 1;
102         new_max |= new_max >> 2;
103         new_max |= new_max >> 4;
104         new_max |= new_max >> 8;
105         new_max |= new_max >> 16;
106 #ifdef __x86_64__
107         new_max |= new_max >> 32;
108 #endif
109 
110         old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
111         new_sz = bitmap_size(new_max + 1);
112         p = realloc(ctx->restore.populated_pfns, new_sz);
113         if ( !p )
114         {
115             ERROR("Failed to realloc populated bitmap");
116             errno = ENOMEM;
117             return -1;
118         }
119 
120         memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
121 
122         ctx->restore.populated_pfns    = p;
123         ctx->restore.max_populated_pfn = new_max;
124     }
125 
126     assert(!test_bit(pfn, ctx->restore.populated_pfns));
127     set_bit(pfn, ctx->restore.populated_pfns);
128 
129     return 0;
130 }
131 
132 /*
133  * Given a set of pfns, obtain memory from Xen to fill the physmap for the
134  * unpopulated subset.  If types is NULL, no page type checking is performed
135  * and all unpopulated pfns are populated.
136  */
populate_pfns(struct xc_sr_context * ctx,unsigned int count,const xen_pfn_t * original_pfns,const uint32_t * types)137 int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
138                   const xen_pfn_t *original_pfns, const uint32_t *types)
139 {
140     xc_interface *xch = ctx->xch;
141     xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
142         *pfns = malloc(count * sizeof(*pfns));
143     unsigned int i, nr_pfns = 0;
144     int rc = -1;
145 
146     if ( !mfns || !pfns )
147     {
148         ERROR("Failed to allocate %zu bytes for populating the physmap",
149               2 * count * sizeof(*mfns));
150         goto err;
151     }
152 
153     for ( i = 0; i < count; ++i )
154     {
155         if ( (!types || (types &&
156                          (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
157                           types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
158              !pfn_is_populated(ctx, original_pfns[i]) )
159         {
160             rc = pfn_set_populated(ctx, original_pfns[i]);
161             if ( rc )
162                 goto err;
163             pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
164             ++nr_pfns;
165         }
166     }
167 
168     if ( nr_pfns )
169     {
170         rc = xc_domain_populate_physmap_exact(
171             xch, ctx->domid, nr_pfns, 0, 0, mfns);
172         if ( rc )
173         {
174             PERROR("Failed to populate physmap");
175             goto err;
176         }
177 
178         for ( i = 0; i < nr_pfns; ++i )
179         {
180             if ( mfns[i] == INVALID_MFN )
181             {
182                 ERROR("Populate physmap failed for pfn %u", i);
183                 rc = -1;
184                 goto err;
185             }
186 
187             ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
188         }
189     }
190 
191     rc = 0;
192 
193  err:
194     free(pfns);
195     free(mfns);
196 
197     return rc;
198 }
199 
200 /*
201  * Given a list of pfns, their types, and a block of page data from the
202  * stream, populate and record their types, map the relevant subset and copy
203  * the data into the guest.
204  */
process_page_data(struct xc_sr_context * ctx,unsigned int count,xen_pfn_t * pfns,uint32_t * types,void * page_data)205 static int process_page_data(struct xc_sr_context *ctx, unsigned int count,
206                              xen_pfn_t *pfns, uint32_t *types, void *page_data)
207 {
208     xc_interface *xch = ctx->xch;
209     xen_pfn_t *mfns = malloc(count * sizeof(*mfns));
210     int *map_errs = malloc(count * sizeof(*map_errs));
211     int rc;
212     void *mapping = NULL, *guest_page = NULL;
213     unsigned int i, /* i indexes the pfns from the record. */
214         j,          /* j indexes the subset of pfns we decide to map. */
215         nr_pages = 0;
216 
217     if ( !mfns || !map_errs )
218     {
219         rc = -1;
220         ERROR("Failed to allocate %zu bytes to process page data",
221               count * (sizeof(*mfns) + sizeof(*map_errs)));
222         goto err;
223     }
224 
225     rc = populate_pfns(ctx, count, pfns, types);
226     if ( rc )
227     {
228         ERROR("Failed to populate pfns for batch of %u pages", count);
229         goto err;
230     }
231 
232     for ( i = 0; i < count; ++i )
233     {
234         ctx->restore.ops.set_page_type(ctx, pfns[i], types[i]);
235 
236         switch ( types[i] )
237         {
238         case XEN_DOMCTL_PFINFO_NOTAB:
239 
240         case XEN_DOMCTL_PFINFO_L1TAB:
241         case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB:
242 
243         case XEN_DOMCTL_PFINFO_L2TAB:
244         case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB:
245 
246         case XEN_DOMCTL_PFINFO_L3TAB:
247         case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB:
248 
249         case XEN_DOMCTL_PFINFO_L4TAB:
250         case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB:
251 
252             mfns[nr_pages++] = ctx->restore.ops.pfn_to_gfn(ctx, pfns[i]);
253             break;
254         }
255     }
256 
257     /* Nothing to do? */
258     if ( nr_pages == 0 )
259         goto done;
260 
261     mapping = guest_page = xenforeignmemory_map(
262         xch->fmem, ctx->domid, PROT_READ | PROT_WRITE,
263         nr_pages, mfns, map_errs);
264     if ( !mapping )
265     {
266         rc = -1;
267         PERROR("Unable to map %u mfns for %u pages of data",
268                nr_pages, count);
269         goto err;
270     }
271 
272     for ( i = 0, j = 0; i < count; ++i )
273     {
274         switch ( types[i] )
275         {
276         case XEN_DOMCTL_PFINFO_XTAB:
277         case XEN_DOMCTL_PFINFO_BROKEN:
278         case XEN_DOMCTL_PFINFO_XALLOC:
279             /* No page data to deal with. */
280             continue;
281         }
282 
283         if ( map_errs[j] )
284         {
285             rc = -1;
286             ERROR("Mapping pfn %#"PRIpfn" (mfn %#"PRIpfn", type %#"PRIx32") failed with %d",
287                   pfns[i], mfns[j], types[i], map_errs[j]);
288             goto err;
289         }
290 
291         /* Undo page normalisation done by the saver. */
292         rc = ctx->restore.ops.localise_page(ctx, types[i], page_data);
293         if ( rc )
294         {
295             ERROR("Failed to localise pfn %#"PRIpfn" (type %#"PRIx32")",
296                   pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
297             goto err;
298         }
299 
300         if ( ctx->restore.verify )
301         {
302             /* Verify mode - compare incoming data to what we already have. */
303             if ( memcmp(guest_page, page_data, PAGE_SIZE) )
304                 ERROR("verify pfn %#"PRIpfn" failed (type %#"PRIx32")",
305                       pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
306         }
307         else
308         {
309             /* Regular mode - copy incoming data into place. */
310             memcpy(guest_page, page_data, PAGE_SIZE);
311         }
312 
313         ++j;
314         guest_page += PAGE_SIZE;
315         page_data += PAGE_SIZE;
316     }
317 
318  done:
319     rc = 0;
320 
321  err:
322     if ( mapping )
323         xenforeignmemory_unmap(xch->fmem, mapping, nr_pages);
324 
325     free(map_errs);
326     free(mfns);
327 
328     return rc;
329 }
330 
331 /*
332  * Validate a PAGE_DATA record from the stream, and pass the results to
333  * process_page_data() to actually perform the legwork.
334  */
handle_page_data(struct xc_sr_context * ctx,struct xc_sr_record * rec)335 static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
336 {
337     xc_interface *xch = ctx->xch;
338     struct xc_sr_rec_page_data_header *pages = rec->data;
339     unsigned int i, pages_of_data = 0;
340     int rc = -1;
341 
342     xen_pfn_t *pfns = NULL, pfn;
343     uint32_t *types = NULL, type;
344 
345     /*
346      * v2 compatibility only exists for x86 streams.  This is a bit of a
347      * bodge, but it is less bad than duplicating handle_page_data() between
348      * different architectures.
349      */
350 #if defined(__i386__) || defined(__x86_64__)
351     /* v2 compat.  Infer the position of STATIC_DATA_END. */
352     if ( ctx->restore.format_version < 3 && !ctx->restore.seen_static_data_end )
353     {
354         rc = handle_static_data_end(ctx);
355         if ( rc )
356         {
357             ERROR("Inferred STATIC_DATA_END record failed");
358             goto err;
359         }
360         rc = -1;
361     }
362 
363     if ( !ctx->restore.seen_static_data_end )
364     {
365         ERROR("No STATIC_DATA_END seen");
366         goto err;
367     }
368 #endif
369 
370     if ( rec->length < sizeof(*pages) )
371     {
372         ERROR("PAGE_DATA record truncated: length %u, min %zu",
373               rec->length, sizeof(*pages));
374         goto err;
375     }
376 
377     if ( pages->count < 1 )
378     {
379         ERROR("Expected at least 1 pfn in PAGE_DATA record");
380         goto err;
381     }
382 
383     if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) )
384     {
385         ERROR("PAGE_DATA record (length %u) too short to contain %u"
386               " pfns worth of information", rec->length, pages->count);
387         goto err;
388     }
389 
390     pfns = malloc(pages->count * sizeof(*pfns));
391     types = malloc(pages->count * sizeof(*types));
392     if ( !pfns || !types )
393     {
394         ERROR("Unable to allocate enough memory for %u pfns",
395               pages->count);
396         goto err;
397     }
398 
399     for ( i = 0; i < pages->count; ++i )
400     {
401         pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
402         if ( !ctx->restore.ops.pfn_is_valid(ctx, pfn) )
403         {
404             ERROR("pfn %#"PRIpfn" (index %u) outside domain maximum", pfn, i);
405             goto err;
406         }
407 
408         type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
409         if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
410              ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
411         {
412             ERROR("Invalid type %#"PRIx32" for pfn %#"PRIpfn" (index %u)",
413                   type, pfn, i);
414             goto err;
415         }
416 
417         if ( type < XEN_DOMCTL_PFINFO_BROKEN )
418             /* NOTAB and all L1 through L4 tables (including pinned) should
419              * have a page worth of data in the record. */
420             pages_of_data++;
421 
422         pfns[i] = pfn;
423         types[i] = type;
424     }
425 
426     if ( rec->length != (sizeof(*pages) +
427                          (sizeof(uint64_t) * pages->count) +
428                          (PAGE_SIZE * pages_of_data)) )
429     {
430         ERROR("PAGE_DATA record wrong size: length %u, expected "
431               "%zu + %zu + %lu", rec->length, sizeof(*pages),
432               (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
433         goto err;
434     }
435 
436     rc = process_page_data(ctx, pages->count, pfns, types,
437                            &pages->pfn[pages->count]);
438  err:
439     free(types);
440     free(pfns);
441 
442     return rc;
443 }
444 
445 /*
446  * Send checkpoint dirty pfn list to primary.
447  */
send_checkpoint_dirty_pfn_list(struct xc_sr_context * ctx)448 static int send_checkpoint_dirty_pfn_list(struct xc_sr_context *ctx)
449 {
450     xc_interface *xch = ctx->xch;
451     int rc = -1;
452     unsigned int count, written;
453     uint64_t i, *pfns = NULL;
454     struct iovec *iov = NULL;
455     xc_shadow_op_stats_t stats = { 0, ctx->restore.p2m_size };
456     struct xc_sr_record rec = {
457         .type = REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST,
458     };
459     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
460                                     &ctx->restore.dirty_bitmap_hbuf);
461 
462     if ( xc_shadow_control(
463              xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
464              HYPERCALL_BUFFER(dirty_bitmap), ctx->restore.p2m_size,
465              NULL, 0, &stats) != ctx->restore.p2m_size )
466     {
467         PERROR("Failed to retrieve logdirty bitmap");
468         goto err;
469     }
470 
471     for ( i = 0, count = 0; i < ctx->restore.p2m_size; i++ )
472     {
473         if ( test_bit(i, dirty_bitmap) )
474             count++;
475     }
476 
477 
478     pfns = malloc(count * sizeof(*pfns));
479     if ( !pfns )
480     {
481         ERROR("Unable to allocate %zu bytes of memory for dirty pfn list",
482               count * sizeof(*pfns));
483         goto err;
484     }
485 
486     for ( i = 0, written = 0; i < ctx->restore.p2m_size; ++i )
487     {
488         if ( !test_bit(i, dirty_bitmap) )
489             continue;
490 
491         if ( written > count )
492         {
493             ERROR("Dirty pfn list exceed");
494             goto err;
495         }
496 
497         pfns[written++] = i;
498     }
499 
500     /* iovec[] for writev(). */
501     iov = malloc(3 * sizeof(*iov));
502     if ( !iov )
503     {
504         ERROR("Unable to allocate memory for sending dirty bitmap");
505         goto err;
506     }
507 
508     rec.length = count * sizeof(*pfns);
509 
510     iov[0].iov_base = &rec.type;
511     iov[0].iov_len = sizeof(rec.type);
512 
513     iov[1].iov_base = &rec.length;
514     iov[1].iov_len = sizeof(rec.length);
515 
516     iov[2].iov_base = pfns;
517     iov[2].iov_len = count * sizeof(*pfns);
518 
519     if ( writev_exact(ctx->restore.send_back_fd, iov, 3) )
520     {
521         PERROR("Failed to write dirty bitmap to stream");
522         goto err;
523     }
524 
525     rc = 0;
526  err:
527     free(pfns);
528     free(iov);
529     return rc;
530 }
531 
532 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
handle_checkpoint(struct xc_sr_context * ctx)533 static int handle_checkpoint(struct xc_sr_context *ctx)
534 {
535     xc_interface *xch = ctx->xch;
536     int rc = 0, ret;
537     unsigned int i;
538 
539     if ( ctx->stream_type == XC_STREAM_PLAIN )
540     {
541         ERROR("Found checkpoint in non-checkpointed stream");
542         rc = -1;
543         goto err;
544     }
545 
546     ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data);
547     switch ( ret )
548     {
549     case XGR_CHECKPOINT_SUCCESS:
550         break;
551 
552     case XGR_CHECKPOINT_FAILOVER:
553         if ( ctx->restore.buffer_all_records )
554             rc = BROKEN_CHANNEL;
555         else
556             /* We don't have a consistent state */
557             rc = -1;
558         goto err;
559 
560     default: /* Other fatal error */
561         rc = -1;
562         goto err;
563     }
564 
565     if ( ctx->restore.buffer_all_records )
566     {
567         IPRINTF("All records buffered");
568 
569         for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
570         {
571             rc = process_record(ctx, &ctx->restore.buffered_records[i]);
572             if ( rc )
573                 goto err;
574         }
575         ctx->restore.buffered_rec_num = 0;
576         IPRINTF("All records processed");
577     }
578     else
579         ctx->restore.buffer_all_records = true;
580 
581     if ( ctx->stream_type == XC_STREAM_COLO )
582     {
583 #define HANDLE_CALLBACK_RETURN_VALUE(ret)                   \
584     do {                                                    \
585         if ( ret == 1 )                                     \
586             rc = 0; /* Success */                           \
587         else                                                \
588         {                                                   \
589             if ( ret == 2 )                                 \
590                 rc = BROKEN_CHANNEL;                        \
591             else                                            \
592                 rc = -1; /* Some unspecified error */       \
593             goto err;                                       \
594         }                                                   \
595     } while (0)
596 
597         /* COLO */
598 
599         /* We need to resume guest */
600         rc = ctx->restore.ops.stream_complete(ctx);
601         if ( rc )
602             goto err;
603 
604         ctx->restore.callbacks->restore_results(ctx->restore.xenstore_gfn,
605                                                 ctx->restore.console_gfn,
606                                                 ctx->restore.callbacks->data);
607 
608         /* Resume secondary vm */
609         ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data);
610         HANDLE_CALLBACK_RETURN_VALUE(ret);
611 
612         /* Wait for a new checkpoint */
613         ret = ctx->restore.callbacks->wait_checkpoint(
614             ctx->restore.callbacks->data);
615         HANDLE_CALLBACK_RETURN_VALUE(ret);
616 
617         /* suspend secondary vm */
618         ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data);
619         HANDLE_CALLBACK_RETURN_VALUE(ret);
620 
621 #undef HANDLE_CALLBACK_RETURN_VALUE
622 
623         rc = send_checkpoint_dirty_pfn_list(ctx);
624         if ( rc )
625             goto err;
626     }
627 
628  err:
629     return rc;
630 }
631 
buffer_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)632 static int buffer_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
633 {
634     xc_interface *xch = ctx->xch;
635     unsigned int new_alloc_num;
636     struct xc_sr_record *p;
637 
638     if ( ctx->restore.buffered_rec_num >= ctx->restore.allocated_rec_num )
639     {
640         new_alloc_num = ctx->restore.allocated_rec_num + DEFAULT_BUF_RECORDS;
641         p = realloc(ctx->restore.buffered_records,
642                     new_alloc_num * sizeof(struct xc_sr_record));
643         if ( !p )
644         {
645             ERROR("Failed to realloc memory for buffered records");
646             return -1;
647         }
648 
649         ctx->restore.buffered_records = p;
650         ctx->restore.allocated_rec_num = new_alloc_num;
651     }
652 
653     memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
654            rec, sizeof(*rec));
655 
656     return 0;
657 }
658 
handle_static_data_end(struct xc_sr_context * ctx)659 int handle_static_data_end(struct xc_sr_context *ctx)
660 {
661     xc_interface *xch = ctx->xch;
662     unsigned int missing = 0;
663     int rc = 0;
664 
665     if ( ctx->restore.seen_static_data_end )
666     {
667         ERROR("Multiple STATIC_DATA_END records found");
668         return -1;
669     }
670 
671     ctx->restore.seen_static_data_end = true;
672 
673     rc = ctx->restore.ops.static_data_complete(ctx, &missing);
674     if ( rc )
675         return rc;
676 
677     if ( ctx->restore.callbacks->static_data_done &&
678          (rc = ctx->restore.callbacks->static_data_done(
679              missing, ctx->restore.callbacks->data) != 0) )
680         ERROR("static_data_done() callback failed: %d\n", rc);
681 
682     return rc;
683 }
684 
process_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)685 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
686 {
687     xc_interface *xch = ctx->xch;
688     int rc = 0;
689 
690     switch ( rec->type )
691     {
692     case REC_TYPE_END:
693         break;
694 
695     case REC_TYPE_PAGE_DATA:
696         rc = handle_page_data(ctx, rec);
697         break;
698 
699     case REC_TYPE_VERIFY:
700         DPRINTF("Verify mode enabled");
701         ctx->restore.verify = true;
702         break;
703 
704     case REC_TYPE_CHECKPOINT:
705         rc = handle_checkpoint(ctx);
706         break;
707 
708     case REC_TYPE_STATIC_DATA_END:
709         rc = handle_static_data_end(ctx);
710         break;
711 
712     default:
713         rc = ctx->restore.ops.process_record(ctx, rec);
714         break;
715     }
716 
717     free(rec->data);
718     rec->data = NULL;
719 
720     return rc;
721 }
722 
setup(struct xc_sr_context * ctx)723 static int setup(struct xc_sr_context *ctx)
724 {
725     xc_interface *xch = ctx->xch;
726     int rc;
727     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
728                                     &ctx->restore.dirty_bitmap_hbuf);
729 
730     if ( ctx->stream_type == XC_STREAM_COLO )
731     {
732         dirty_bitmap = xc_hypercall_buffer_alloc_pages(
733             xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
734 
735         if ( !dirty_bitmap )
736         {
737             ERROR("Unable to allocate memory for dirty bitmap");
738             rc = -1;
739             goto err;
740         }
741     }
742 
743     rc = ctx->restore.ops.setup(ctx);
744     if ( rc )
745         goto err;
746 
747     ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1;
748     ctx->restore.populated_pfns = bitmap_alloc(
749         ctx->restore.max_populated_pfn + 1);
750     if ( !ctx->restore.populated_pfns )
751     {
752         ERROR("Unable to allocate memory for populated_pfns bitmap");
753         rc = -1;
754         goto err;
755     }
756 
757     ctx->restore.buffered_records = malloc(
758         DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
759     if ( !ctx->restore.buffered_records )
760     {
761         ERROR("Unable to allocate memory for buffered records");
762         rc = -1;
763         goto err;
764     }
765     ctx->restore.allocated_rec_num = DEFAULT_BUF_RECORDS;
766 
767  err:
768     return rc;
769 }
770 
cleanup(struct xc_sr_context * ctx)771 static void cleanup(struct xc_sr_context *ctx)
772 {
773     xc_interface *xch = ctx->xch;
774     unsigned int i;
775     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
776                                     &ctx->restore.dirty_bitmap_hbuf);
777 
778     for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
779         free(ctx->restore.buffered_records[i].data);
780 
781     if ( ctx->stream_type == XC_STREAM_COLO )
782         xc_hypercall_buffer_free_pages(
783             xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
784 
785     free(ctx->restore.buffered_records);
786     free(ctx->restore.populated_pfns);
787 
788     if ( ctx->restore.ops.cleanup(ctx) )
789         PERROR("Failed to clean up");
790 }
791 
792 /*
793  * Restore a domain.
794  */
restore(struct xc_sr_context * ctx)795 static int restore(struct xc_sr_context *ctx)
796 {
797     xc_interface *xch = ctx->xch;
798     struct xc_sr_record rec;
799     int rc, saved_rc = 0, saved_errno = 0;
800 
801     IPRINTF("Restoring domain");
802 
803     rc = setup(ctx);
804     if ( rc )
805         goto err;
806 
807     do
808     {
809         rc = read_record(ctx, ctx->fd, &rec);
810         if ( rc )
811         {
812             if ( ctx->restore.buffer_all_records )
813                 goto remus_failover;
814             else
815                 goto err;
816         }
817 
818         if ( ctx->restore.buffer_all_records &&
819              rec.type != REC_TYPE_END &&
820              rec.type != REC_TYPE_CHECKPOINT )
821         {
822             rc = buffer_record(ctx, &rec);
823             if ( rc )
824                 goto err;
825         }
826         else
827         {
828             rc = process_record(ctx, &rec);
829             if ( rc == RECORD_NOT_PROCESSED )
830             {
831                 if ( rec.type & REC_TYPE_OPTIONAL )
832                     DPRINTF("Ignoring optional record %#x (%s)",
833                             rec.type, rec_type_to_str(rec.type));
834                 else
835                 {
836                     ERROR("Mandatory record %#x (%s) not handled",
837                           rec.type, rec_type_to_str(rec.type));
838                     rc = -1;
839                     goto err;
840                 }
841             }
842             else if ( rc == BROKEN_CHANNEL )
843                 goto remus_failover;
844             else if ( rc )
845                 goto err;
846         }
847 
848     } while ( rec.type != REC_TYPE_END );
849 
850  remus_failover:
851     if ( ctx->stream_type == XC_STREAM_COLO )
852     {
853         /* With COLO, we have already called stream_complete */
854         rc = 0;
855         IPRINTF("COLO Failover");
856         goto done;
857     }
858 
859     /*
860      * With Remus, if we reach here, there must be some error on primary,
861      * failover from the last checkpoint state.
862      */
863     rc = ctx->restore.ops.stream_complete(ctx);
864     if ( rc )
865         goto err;
866 
867     IPRINTF("Restore successful");
868     goto done;
869 
870  err:
871     saved_errno = errno;
872     saved_rc = rc;
873     PERROR("Restore failed");
874 
875  done:
876     cleanup(ctx);
877 
878     if ( saved_rc )
879     {
880         rc = saved_rc;
881         errno = saved_errno;
882     }
883 
884     return rc;
885 }
886 
xc_domain_restore(xc_interface * xch,int io_fd,uint32_t dom,unsigned int store_evtchn,unsigned long * store_mfn,uint32_t store_domid,unsigned int console_evtchn,unsigned long * console_gfn,uint32_t console_domid,xc_stream_type_t stream_type,struct restore_callbacks * callbacks,int send_back_fd)887 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
888                       unsigned int store_evtchn, unsigned long *store_mfn,
889                       uint32_t store_domid, unsigned int console_evtchn,
890                       unsigned long *console_gfn, uint32_t console_domid,
891                       xc_stream_type_t stream_type,
892                       struct restore_callbacks *callbacks, int send_back_fd)
893 {
894     xen_pfn_t nr_pfns;
895     struct xc_sr_context ctx = {
896         .xch = xch,
897         .fd = io_fd,
898         .stream_type = stream_type,
899     };
900 
901     /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
902     ctx.restore.console_evtchn = console_evtchn;
903     ctx.restore.console_domid = console_domid;
904     ctx.restore.xenstore_evtchn = store_evtchn;
905     ctx.restore.xenstore_domid = store_domid;
906     ctx.restore.callbacks = callbacks;
907     ctx.restore.send_back_fd = send_back_fd;
908 
909     /* Sanity check stream_type-related parameters */
910     switch ( stream_type )
911     {
912     case XC_STREAM_COLO:
913         assert(callbacks->suspend &&
914                callbacks->postcopy &&
915                callbacks->wait_checkpoint &&
916                callbacks->restore_results);
917         /* Fallthrough */
918     case XC_STREAM_REMUS:
919         assert(callbacks->checkpoint);
920         /* Fallthrough */
921     case XC_STREAM_PLAIN:
922         break;
923 
924     default:
925         assert(!"Bad stream_type");
926         break;
927     }
928 
929     if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
930     {
931         PERROR("Failed to get domain info");
932         return -1;
933     }
934 
935     if ( ctx.dominfo.domid != dom )
936     {
937         ERROR("Domain %u does not exist", dom);
938         return -1;
939     }
940 
941     DPRINTF("fd %d, dom %u, hvm %u, stream_type %d",
942             io_fd, dom, ctx.dominfo.hvm, stream_type);
943 
944     ctx.domid = dom;
945 
946     if ( read_headers(&ctx) )
947         return -1;
948 
949     if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 )
950     {
951         PERROR("Unable to obtain the guest p2m size");
952         return -1;
953     }
954 
955     ctx.restore.p2m_size = nr_pfns;
956     ctx.restore.ops = ctx.dominfo.hvm
957         ? restore_ops_x86_hvm : restore_ops_x86_pv;
958 
959     if ( restore(&ctx) )
960         return -1;
961 
962     IPRINTF("XenStore: mfn %#"PRIpfn", dom %d, evt %u",
963             ctx.restore.xenstore_gfn,
964             ctx.restore.xenstore_domid,
965             ctx.restore.xenstore_evtchn);
966 
967     IPRINTF("Console: mfn %#"PRIpfn", dom %d, evt %u",
968             ctx.restore.console_gfn,
969             ctx.restore.console_domid,
970             ctx.restore.console_evtchn);
971 
972     *console_gfn = ctx.restore.console_gfn;
973     *store_mfn = ctx.restore.xenstore_gfn;
974 
975     return 0;
976 }
977 
978 /*
979  * Local variables:
980  * mode: C
981  * c-file-style: "BSD"
982  * c-basic-offset: 4
983  * tab-width: 4
984  * indent-tabs-mode: nil
985  * End:
986  */
987