1 #ifndef __COMMON__H
2 #define __COMMON__H
3 
4 #include <stdbool.h>
5 
6 #include "xg_private.h"
7 #include "xg_save_restore.h"
8 #include "xc_dom.h"
9 #include "xc_bitops.h"
10 
11 #include "xc_sr_stream_format.h"
12 
13 /* String representation of Domain Header types. */
14 const char *dhdr_type_to_str(uint32_t type);
15 
16 /* String representation of Record types. */
17 const char *rec_type_to_str(uint32_t type);
18 
19 struct xc_sr_context;
20 struct xc_sr_record;
21 
22 /**
23  * Save operations.  To be implemented for each type of guest, for use by the
24  * common save algorithm.
25  *
26  * Every function must be implemented, even if only with a no-op stub.
27  */
28 struct xc_sr_save_ops
29 {
30     /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
31     xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
32 
33     /**
34      * Optionally transform the contents of a page from being specific to the
35      * sending environment, to being generic for the stream.
36      *
37      * The page of data at the end of 'page' may be a read-only mapping of a
38      * running guest; it must not be modified.  If no transformation is
39      * required, the callee should leave '*pages' untouched.
40      *
41      * If a transformation is required, the callee should allocate themselves
42      * a local page using malloc() and return it via '*page'.
43      *
44      * The caller shall free() '*page' in all cases.  In the case that the
45      * callee encounters an error, it should *NOT* free() the memory it
46      * allocated for '*page'.
47      *
48      * It is valid to fail with EAGAIN if the transformation is not able to be
49      * completed at this point.  The page shall be retried later.
50      *
51      * @returns 0 for success, -1 for failure, with errno appropriately set.
52      */
53     int (*normalise_page)(struct xc_sr_context *ctx, xen_pfn_t type,
54                           void **page);
55 
56     /**
57      * Set up local environment to save a domain. (Typically querying
58      * running domain state, setting up mappings etc.)
59      *
60      * This is called once before any common setup has occurred, allowing for
61      * guest-specific adjustments to be made to common state.
62      */
63     int (*setup)(struct xc_sr_context *ctx);
64 
65     /**
66      * Send static records at the head of the stream.  This is called once,
67      * after the Image and Domain headers are written.
68      */
69     int (*static_data)(struct xc_sr_context *ctx);
70 
71     /**
72      * Send dynamic records which need to be at the start of the stream.  This
73      * is called after the STATIC_DATA_END record is written.
74      */
75     int (*start_of_stream)(struct xc_sr_context *ctx);
76 
77     /**
78      * Send records which need to be at the start of a checkpoint.  This is
79      * called once, or once per checkpoint in a checkpointed stream, and is
80      * ahead of memory data.
81      */
82     int (*start_of_checkpoint)(struct xc_sr_context *ctx);
83 
84     /**
85      * Send records which need to be at the end of the checkpoint.  This is
86      * called once, or once per checkpoint in a checkpointed stream, and is
87      * after the memory data.
88      */
89     int (*end_of_checkpoint)(struct xc_sr_context *ctx);
90 
91     /**
92      * Check state of guest to decide whether it makes sense to continue
93      * migration.  This is called in each iteration or checkpoint to check
94      * whether all criteria for the migration are still met.  If that's not
95      * the case either migration is cancelled via a bad rc or the situation
96      * is handled, e.g. by sending appropriate records.
97      */
98     int (*check_vm_state)(struct xc_sr_context *ctx);
99 
100     /**
101      * Clean up the local environment.  Will be called exactly once, either
102      * after a successful save, or upon encountering an error.
103      */
104     int (*cleanup)(struct xc_sr_context *ctx);
105 };
106 
107 
108 /**
109  * Restore operations.  To be implemented for each type of guest, for use by
110  * the common restore algorithm.
111  *
112  * Every function must be implemented, even if only with a no-op stub.
113  */
114 struct xc_sr_restore_ops
115 {
116     /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
117     xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
118 
119     /* Check to see whether a PFN is valid. */
120     bool (*pfn_is_valid)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
121 
122     /* Set the GFN of a PFN. */
123     void (*set_gfn)(struct xc_sr_context *ctx, xen_pfn_t pfn, xen_pfn_t gfn);
124 
125     /* Set the type of a PFN. */
126     void (*set_page_type)(struct xc_sr_context *ctx, xen_pfn_t pfn,
127                           xen_pfn_t type);
128 
129     /**
130      * Optionally transform the contents of a page from being generic in the
131      * stream, to being specific to the restoring environment.
132      *
133      * 'page' is expected to be modified in-place if a transformation is
134      * required.
135      *
136      * @returns 0 for success, -1 for failure, with errno appropriately set.
137      */
138     int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page);
139 
140     /**
141      * Set up local environment to restore a domain.
142      *
143      * This is called once before any common setup has occurred, allowing for
144      * guest-specific adjustments to be made to common state.
145      */
146     int (*setup)(struct xc_sr_context *ctx);
147 
148     /**
149      * Process an individual record from the stream.  The caller shall take
150      * care of processing common records (e.g. END, PAGE_DATA).
151      *
152      * @return 0 for success, -1 for failure, or the following sentinels:
153      *  - RECORD_NOT_PROCESSED
154      *  - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and
155      *    a failover is needed.
156      */
157 #define RECORD_NOT_PROCESSED 1
158 #define BROKEN_CHANNEL 2
159     int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec);
160 
161     /**
162      * Perform any actions required after the static data has arrived.  Called
163      * when the STATIC_DATA_COMPLETE record has been recieved/inferred.
164      * 'missing' should be filled in for any data item the higher level
165      * toolstack needs to provide compatiblity for.
166      */
167     int (*static_data_complete)(struct xc_sr_context *ctx,
168                                 unsigned int *missing);
169 
170     /**
171      * Perform any actions required after the stream has been finished. Called
172      * after the END record has been received.
173      */
174     int (*stream_complete)(struct xc_sr_context *ctx);
175 
176     /**
177      * Clean up the local environment.  Will be called exactly once, either
178      * after a successful restore, or upon encountering an error.
179      */
180     int (*cleanup)(struct xc_sr_context *ctx);
181 };
182 
183 /* Wrapper for blobs of data heading Xen-wards. */
184 struct xc_sr_blob
185 {
186     void *ptr;
187     size_t size;
188 };
189 
190 /*
191  * Update a blob.  Duplicate src/size, freeing the old blob if necessary.  May
192  * fail due to memory allocation.
193  */
update_blob(struct xc_sr_blob * blob,const void * src,size_t size)194 static inline int update_blob(struct xc_sr_blob *blob,
195                               const void *src, size_t size)
196 {
197     void *ptr;
198 
199     if ( !src || !size )
200     {
201         errno = EINVAL;
202         return -1;
203     }
204 
205     if ( (ptr = malloc(size)) == NULL )
206         return -1;
207 
208     free(blob->ptr);
209     blob->ptr = memcpy(ptr, src, size);
210     blob->size = size;
211 
212     return 0;
213 }
214 
215 struct xc_sr_context
216 {
217     xc_interface *xch;
218     uint32_t domid;
219     int fd;
220 
221     /* Plain VM, or checkpoints over time. */
222     xc_stream_type_t stream_type;
223 
224     xc_dominfo_t dominfo;
225 
226     union /* Common save or restore data. */
227     {
228         struct /* Save data. */
229         {
230             int recv_fd;
231 
232             struct xc_sr_save_ops ops;
233             struct save_callbacks *callbacks;
234 
235             /* Live migrate vs non live suspend. */
236             bool live;
237 
238             /* Further debugging information in the stream. */
239             bool debug;
240 
241             unsigned long p2m_size;
242 
243             struct precopy_stats stats;
244 
245             xen_pfn_t *batch_pfns;
246             unsigned int nr_batch_pfns;
247             unsigned long *deferred_pages;
248             unsigned long nr_deferred_pages;
249             xc_hypercall_buffer_t dirty_bitmap_hbuf;
250         } save;
251 
252         struct /* Restore data. */
253         {
254             struct xc_sr_restore_ops ops;
255             struct restore_callbacks *callbacks;
256 
257             int send_back_fd;
258             unsigned long p2m_size;
259             xc_hypercall_buffer_t dirty_bitmap_hbuf;
260 
261             /* From Image Header. */
262             uint32_t format_version;
263 
264             /* From Domain Header. */
265             uint32_t guest_type;
266             uint32_t guest_page_size;
267 
268             /* Currently buffering records between a checkpoint */
269             bool buffer_all_records;
270 
271             /* Whether a STATIC_DATA_END record has been seen/inferred. */
272             bool seen_static_data_end;
273 
274 /*
275  * With Remus/COLO, we buffer the records sent by the primary at checkpoint,
276  * in case the primary will fail, we can recover from the last
277  * checkpoint state.
278  * This should be enough for most of the cases because primary only send
279  * dirty pages at checkpoint.
280  */
281 #define DEFAULT_BUF_RECORDS 1024
282             struct xc_sr_record *buffered_records;
283             unsigned int allocated_rec_num;
284             unsigned int buffered_rec_num;
285 
286             /*
287              * Xenstore and Console parameters.
288              * INPUT:  evtchn & domid
289              * OUTPUT: gfn
290              */
291             xen_pfn_t    xenstore_gfn,    console_gfn;
292             unsigned int xenstore_evtchn, console_evtchn;
293             uint32_t     xenstore_domid,  console_domid;
294 
295             /* Bitmap of currently populated PFNs during restore. */
296             unsigned long *populated_pfns;
297             xen_pfn_t max_populated_pfn;
298 
299             /* Sender has invoked verify mode on the stream. */
300             bool verify;
301         } restore;
302     };
303 
304     union /* Guest-arch specific data. */
305     {
306         struct /* x86 */
307         {
308             /* Common save/restore data. */
309             union
310             {
311                 struct
312                 {
313                     /* X86_{CPUID,MSR}_DATA blobs for CPU Policy. */
314                     struct xc_sr_blob cpuid, msr;
315                 } restore;
316             };
317 
318             struct /* x86 PV guest. */
319             {
320                 /* 4 or 8; 32 or 64 bit domain */
321                 unsigned int width;
322                 /* 3 or 4 pagetable levels */
323                 unsigned int levels;
324 
325                 /* Maximum Xen frame */
326                 xen_pfn_t max_mfn;
327                 /* Read-only machine to phys map */
328                 xen_pfn_t *m2p;
329                 /* first mfn of the compat m2p (Only needed for 32bit PV guests) */
330                 xen_pfn_t compat_m2p_mfn0;
331                 /* Number of m2p frames mapped */
332                 unsigned long nr_m2p_frames;
333 
334                 /* Maximum guest frame */
335                 xen_pfn_t max_pfn;
336 
337                 /* Number of frames making up the p2m */
338                 unsigned int p2m_frames;
339                 /* Guest's phys to machine map.  Mapped read-only (save) or
340                  * allocated locally (restore).  Uses guest unsigned longs. */
341                 void *p2m;
342                 /* The guest pfns containing the p2m leaves */
343                 xen_pfn_t *p2m_pfns;
344 
345                 /* Read-only mapping of guests shared info page */
346                 shared_info_any_t *shinfo;
347 
348                 /* p2m generation count for verifying validity of local p2m. */
349                 uint64_t p2m_generation;
350 
351                 union
352                 {
353                     struct
354                     {
355                         /* State machine for the order of received records. */
356                         bool seen_pv_info;
357 
358                         /* Types for each page (bounded by max_pfn). */
359                         uint32_t *pfn_types;
360 
361                         /* x86 PV per-vcpu storage structure for blobs. */
362                         struct xc_sr_x86_pv_restore_vcpu
363                         {
364                             struct xc_sr_blob basic, extd, xsave, msr;
365                         } *vcpus;
366                         unsigned int nr_vcpus;
367                     } restore;
368                 };
369             } pv;
370 
371             struct /* x86 HVM guest. */
372             {
373                 union
374                 {
375                     struct
376                     {
377                         /* Whether qemu enabled logdirty mode, and we should
378                          * disable on cleanup. */
379                         bool qemu_enabled_logdirty;
380                     } save;
381 
382                     struct
383                     {
384                         /* HVM context blob. */
385                         struct xc_sr_blob context;
386                     } restore;
387                 };
388             } hvm;
389 
390         } x86;
391     };
392 };
393 
394 extern struct xc_sr_save_ops save_ops_x86_pv;
395 extern struct xc_sr_save_ops save_ops_x86_hvm;
396 
397 extern struct xc_sr_restore_ops restore_ops_x86_pv;
398 extern struct xc_sr_restore_ops restore_ops_x86_hvm;
399 
400 struct xc_sr_record
401 {
402     uint32_t type;
403     uint32_t length;
404     void *data;
405 };
406 
407 /*
408  * Writes a split record to the stream, applying correct padding where
409  * appropriate.  It is common when sending records containing blobs from Xen
410  * that the header and blob data are separate.  This function accepts a second
411  * buffer and length, and will merge it with the main record when sending.
412  *
413  * Records with a non-zero length must provide a valid data field; records
414  * with a 0 length shall have their data field ignored.
415  *
416  * Returns 0 on success and non0 on failure.
417  */
418 int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
419                        void *buf, size_t sz);
420 
421 /*
422  * Writes a record to the stream, applying correct padding where appropriate.
423  * Records with a non-zero length must provide a valid data field; records
424  * with a 0 length shall have their data field ignored.
425  *
426  * Returns 0 on success and non0 on failure.
427  */
write_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)428 static inline int write_record(struct xc_sr_context *ctx,
429                                struct xc_sr_record *rec)
430 {
431     return write_split_record(ctx, rec, NULL, 0);
432 }
433 
434 /*
435  * Reads a record from the stream, and fills in the record structure.
436  *
437  * Returns 0 on success and non-0 on failure.
438  *
439  * On success, the records type and size shall be valid.
440  * - If size is 0, data shall be NULL.
441  * - If size is non-0, data shall be a buffer allocated by malloc() which must
442  *   be passed to free() by the caller.
443  *
444  * On failure, the contents of the record structure are undefined.
445  */
446 int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec);
447 
448 /*
449  * This would ideally be private in restore.c, but is needed by
450  * x86_pv_localise_page() if we receive pagetables frames ahead of the
451  * contents of the frames they point at.
452  */
453 int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
454                   const xen_pfn_t *original_pfns, const uint32_t *types);
455 
456 /* Handle a STATIC_DATA_END record. */
457 int handle_static_data_end(struct xc_sr_context *ctx);
458 
459 #endif
460 /*
461  * Local variables:
462  * mode: C
463  * c-file-style: "BSD"
464  * c-basic-offset: 4
465  * tab-width: 4
466  * indent-tabs-mode: nil
467  * End:
468  */
469