1  #ifndef __COMMON__H
2  #define __COMMON__H
3  
4  #include <stdbool.h>
5  
6  #include "xg_private.h"
7  #include "xg_save_restore.h"
8  #include "xc_dom.h"
9  #include "xc_bitops.h"
10  
11  #include "xc_sr_stream_format.h"
12  
13  /* String representation of Domain Header types. */
14  const char *dhdr_type_to_str(uint32_t type);
15  
16  /* String representation of Record types. */
17  const char *rec_type_to_str(uint32_t type);
18  
19  struct xc_sr_context;
20  struct xc_sr_record;
21  
22  /**
23   * Save operations.  To be implemented for each type of guest, for use by the
24   * common save algorithm.
25   *
26   * Every function must be implemented, even if only with a no-op stub.
27   */
28  struct xc_sr_save_ops
29  {
30      /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
31      xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
32  
33      /**
34       * Optionally transform the contents of a page from being specific to the
35       * sending environment, to being generic for the stream.
36       *
37       * The page of data at the end of 'page' may be a read-only mapping of a
38       * running guest; it must not be modified.  If no transformation is
39       * required, the callee should leave '*pages' untouched.
40       *
41       * If a transformation is required, the callee should allocate themselves
42       * a local page using malloc() and return it via '*page'.
43       *
44       * The caller shall free() '*page' in all cases.  In the case that the
45       * callee encounters an error, it should *NOT* free() the memory it
46       * allocated for '*page'.
47       *
48       * It is valid to fail with EAGAIN if the transformation is not able to be
49       * completed at this point.  The page shall be retried later.
50       *
51       * @returns 0 for success, -1 for failure, with errno appropriately set.
52       */
53      int (*normalise_page)(struct xc_sr_context *ctx, xen_pfn_t type,
54                            void **page);
55  
56      /**
57       * Set up local environment to save a domain. (Typically querying
58       * running domain state, setting up mappings etc.)
59       *
60       * This is called once before any common setup has occurred, allowing for
61       * guest-specific adjustments to be made to common state.
62       */
63      int (*setup)(struct xc_sr_context *ctx);
64  
65      /**
66       * Send static records at the head of the stream.  This is called once,
67       * after the Image and Domain headers are written.
68       */
69      int (*static_data)(struct xc_sr_context *ctx);
70  
71      /**
72       * Send dynamic records which need to be at the start of the stream.  This
73       * is called after the STATIC_DATA_END record is written.
74       */
75      int (*start_of_stream)(struct xc_sr_context *ctx);
76  
77      /**
78       * Send records which need to be at the start of a checkpoint.  This is
79       * called once, or once per checkpoint in a checkpointed stream, and is
80       * ahead of memory data.
81       */
82      int (*start_of_checkpoint)(struct xc_sr_context *ctx);
83  
84      /**
85       * Send records which need to be at the end of the checkpoint.  This is
86       * called once, or once per checkpoint in a checkpointed stream, and is
87       * after the memory data.
88       */
89      int (*end_of_checkpoint)(struct xc_sr_context *ctx);
90  
91      /**
92       * Check state of guest to decide whether it makes sense to continue
93       * migration.  This is called in each iteration or checkpoint to check
94       * whether all criteria for the migration are still met.  If that's not
95       * the case either migration is cancelled via a bad rc or the situation
96       * is handled, e.g. by sending appropriate records.
97       */
98      int (*check_vm_state)(struct xc_sr_context *ctx);
99  
100      /**
101       * Clean up the local environment.  Will be called exactly once, either
102       * after a successful save, or upon encountering an error.
103       */
104      int (*cleanup)(struct xc_sr_context *ctx);
105  };
106  
107  
108  /**
109   * Restore operations.  To be implemented for each type of guest, for use by
110   * the common restore algorithm.
111   *
112   * Every function must be implemented, even if only with a no-op stub.
113   */
114  struct xc_sr_restore_ops
115  {
116      /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
117      xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
118  
119      /* Check to see whether a PFN is valid. */
120      bool (*pfn_is_valid)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
121  
122      /* Set the GFN of a PFN. */
123      void (*set_gfn)(struct xc_sr_context *ctx, xen_pfn_t pfn, xen_pfn_t gfn);
124  
125      /* Set the type of a PFN. */
126      void (*set_page_type)(struct xc_sr_context *ctx, xen_pfn_t pfn,
127                            xen_pfn_t type);
128  
129      /**
130       * Optionally transform the contents of a page from being generic in the
131       * stream, to being specific to the restoring environment.
132       *
133       * 'page' is expected to be modified in-place if a transformation is
134       * required.
135       *
136       * @returns 0 for success, -1 for failure, with errno appropriately set.
137       */
138      int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page);
139  
140      /**
141       * Set up local environment to restore a domain.
142       *
143       * This is called once before any common setup has occurred, allowing for
144       * guest-specific adjustments to be made to common state.
145       */
146      int (*setup)(struct xc_sr_context *ctx);
147  
148      /**
149       * Process an individual record from the stream.  The caller shall take
150       * care of processing common records (e.g. END, PAGE_DATA).
151       *
152       * @return 0 for success, -1 for failure, or the following sentinels:
153       *  - RECORD_NOT_PROCESSED
154       *  - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and
155       *    a failover is needed.
156       */
157  #define RECORD_NOT_PROCESSED 1
158  #define BROKEN_CHANNEL 2
159      int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec);
160  
161      /**
162       * Perform any actions required after the static data has arrived.  Called
163       * when the STATIC_DATA_COMPLETE record has been recieved/inferred.
164       * 'missing' should be filled in for any data item the higher level
165       * toolstack needs to provide compatiblity for.
166       */
167      int (*static_data_complete)(struct xc_sr_context *ctx,
168                                  unsigned int *missing);
169  
170      /**
171       * Perform any actions required after the stream has been finished. Called
172       * after the END record has been received.
173       */
174      int (*stream_complete)(struct xc_sr_context *ctx);
175  
176      /**
177       * Clean up the local environment.  Will be called exactly once, either
178       * after a successful restore, or upon encountering an error.
179       */
180      int (*cleanup)(struct xc_sr_context *ctx);
181  };
182  
183  /* Wrapper for blobs of data heading Xen-wards. */
184  struct xc_sr_blob
185  {
186      void *ptr;
187      size_t size;
188  };
189  
190  /*
191   * Update a blob.  Duplicate src/size, freeing the old blob if necessary.  May
192   * fail due to memory allocation.
193   */
update_blob(struct xc_sr_blob * blob,const void * src,size_t size)194  static inline int update_blob(struct xc_sr_blob *blob,
195                                const void *src, size_t size)
196  {
197      void *ptr;
198  
199      if ( !src || !size )
200      {
201          errno = EINVAL;
202          return -1;
203      }
204  
205      if ( (ptr = malloc(size)) == NULL )
206          return -1;
207  
208      free(blob->ptr);
209      blob->ptr = memcpy(ptr, src, size);
210      blob->size = size;
211  
212      return 0;
213  }
214  
215  struct xc_sr_context
216  {
217      xc_interface *xch;
218      uint32_t domid;
219      int fd;
220  
221      /* Plain VM, or checkpoints over time. */
222      xc_stream_type_t stream_type;
223  
224      xc_dominfo_t dominfo;
225  
226      union /* Common save or restore data. */
227      {
228          struct /* Save data. */
229          {
230              int recv_fd;
231  
232              struct xc_sr_save_ops ops;
233              struct save_callbacks *callbacks;
234  
235              /* Live migrate vs non live suspend. */
236              bool live;
237  
238              /* Further debugging information in the stream. */
239              bool debug;
240  
241              unsigned long p2m_size;
242  
243              struct precopy_stats stats;
244  
245              xen_pfn_t *batch_pfns;
246              unsigned int nr_batch_pfns;
247              unsigned long *deferred_pages;
248              unsigned long nr_deferred_pages;
249              xc_hypercall_buffer_t dirty_bitmap_hbuf;
250          } save;
251  
252          struct /* Restore data. */
253          {
254              struct xc_sr_restore_ops ops;
255              struct restore_callbacks *callbacks;
256  
257              int send_back_fd;
258              unsigned long p2m_size;
259              xc_hypercall_buffer_t dirty_bitmap_hbuf;
260  
261              /* From Image Header. */
262              uint32_t format_version;
263  
264              /* From Domain Header. */
265              uint32_t guest_type;
266              uint32_t guest_page_size;
267  
268              /* Currently buffering records between a checkpoint */
269              bool buffer_all_records;
270  
271              /* Whether a STATIC_DATA_END record has been seen/inferred. */
272              bool seen_static_data_end;
273  
274  /*
275   * With Remus/COLO, we buffer the records sent by the primary at checkpoint,
276   * in case the primary will fail, we can recover from the last
277   * checkpoint state.
278   * This should be enough for most of the cases because primary only send
279   * dirty pages at checkpoint.
280   */
281  #define DEFAULT_BUF_RECORDS 1024
282              struct xc_sr_record *buffered_records;
283              unsigned int allocated_rec_num;
284              unsigned int buffered_rec_num;
285  
286              /*
287               * Xenstore and Console parameters.
288               * INPUT:  evtchn & domid
289               * OUTPUT: gfn
290               */
291              xen_pfn_t    xenstore_gfn,    console_gfn;
292              unsigned int xenstore_evtchn, console_evtchn;
293              uint32_t     xenstore_domid,  console_domid;
294  
295              /* Bitmap of currently populated PFNs during restore. */
296              unsigned long *populated_pfns;
297              xen_pfn_t max_populated_pfn;
298  
299              /* Sender has invoked verify mode on the stream. */
300              bool verify;
301          } restore;
302      };
303  
304      union /* Guest-arch specific data. */
305      {
306          struct /* x86 */
307          {
308              /* Common save/restore data. */
309              union
310              {
311                  struct
312                  {
313                      /* X86_{CPUID,MSR}_DATA blobs for CPU Policy. */
314                      struct xc_sr_blob cpuid, msr;
315                  } restore;
316              };
317  
318              struct /* x86 PV guest. */
319              {
320                  /* 4 or 8; 32 or 64 bit domain */
321                  unsigned int width;
322                  /* 3 or 4 pagetable levels */
323                  unsigned int levels;
324  
325                  /* Maximum Xen frame */
326                  xen_pfn_t max_mfn;
327                  /* Read-only machine to phys map */
328                  xen_pfn_t *m2p;
329                  /* first mfn of the compat m2p (Only needed for 32bit PV guests) */
330                  xen_pfn_t compat_m2p_mfn0;
331                  /* Number of m2p frames mapped */
332                  unsigned long nr_m2p_frames;
333  
334                  /* Maximum guest frame */
335                  xen_pfn_t max_pfn;
336  
337                  /* Number of frames making up the p2m */
338                  unsigned int p2m_frames;
339                  /* Guest's phys to machine map.  Mapped read-only (save) or
340                   * allocated locally (restore).  Uses guest unsigned longs. */
341                  void *p2m;
342                  /* The guest pfns containing the p2m leaves */
343                  xen_pfn_t *p2m_pfns;
344  
345                  /* Read-only mapping of guests shared info page */
346                  shared_info_any_t *shinfo;
347  
348                  /* p2m generation count for verifying validity of local p2m. */
349                  uint64_t p2m_generation;
350  
351                  union
352                  {
353                      struct
354                      {
355                          /* State machine for the order of received records. */
356                          bool seen_pv_info;
357  
358                          /* Types for each page (bounded by max_pfn). */
359                          uint32_t *pfn_types;
360  
361                          /* x86 PV per-vcpu storage structure for blobs. */
362                          struct xc_sr_x86_pv_restore_vcpu
363                          {
364                              struct xc_sr_blob basic, extd, xsave, msr;
365                          } *vcpus;
366                          unsigned int nr_vcpus;
367                      } restore;
368                  };
369              } pv;
370  
371              struct /* x86 HVM guest. */
372              {
373                  union
374                  {
375                      struct
376                      {
377                          /* Whether qemu enabled logdirty mode, and we should
378                           * disable on cleanup. */
379                          bool qemu_enabled_logdirty;
380                      } save;
381  
382                      struct
383                      {
384                          /* HVM context blob. */
385                          struct xc_sr_blob context;
386                      } restore;
387                  };
388              } hvm;
389  
390          } x86;
391      };
392  };
393  
394  extern struct xc_sr_save_ops save_ops_x86_pv;
395  extern struct xc_sr_save_ops save_ops_x86_hvm;
396  
397  extern struct xc_sr_restore_ops restore_ops_x86_pv;
398  extern struct xc_sr_restore_ops restore_ops_x86_hvm;
399  
400  struct xc_sr_record
401  {
402      uint32_t type;
403      uint32_t length;
404      void *data;
405  };
406  
407  /*
408   * Writes a split record to the stream, applying correct padding where
409   * appropriate.  It is common when sending records containing blobs from Xen
410   * that the header and blob data are separate.  This function accepts a second
411   * buffer and length, and will merge it with the main record when sending.
412   *
413   * Records with a non-zero length must provide a valid data field; records
414   * with a 0 length shall have their data field ignored.
415   *
416   * Returns 0 on success and non0 on failure.
417   */
418  int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
419                         void *buf, size_t sz);
420  
421  /*
422   * Writes a record to the stream, applying correct padding where appropriate.
423   * Records with a non-zero length must provide a valid data field; records
424   * with a 0 length shall have their data field ignored.
425   *
426   * Returns 0 on success and non0 on failure.
427   */
write_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)428  static inline int write_record(struct xc_sr_context *ctx,
429                                 struct xc_sr_record *rec)
430  {
431      return write_split_record(ctx, rec, NULL, 0);
432  }
433  
434  /*
435   * Reads a record from the stream, and fills in the record structure.
436   *
437   * Returns 0 on success and non-0 on failure.
438   *
439   * On success, the records type and size shall be valid.
440   * - If size is 0, data shall be NULL.
441   * - If size is non-0, data shall be a buffer allocated by malloc() which must
442   *   be passed to free() by the caller.
443   *
444   * On failure, the contents of the record structure are undefined.
445   */
446  int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec);
447  
448  /*
449   * This would ideally be private in restore.c, but is needed by
450   * x86_pv_localise_page() if we receive pagetables frames ahead of the
451   * contents of the frames they point at.
452   */
453  int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
454                    const xen_pfn_t *original_pfns, const uint32_t *types);
455  
456  /* Handle a STATIC_DATA_END record. */
457  int handle_static_data_end(struct xc_sr_context *ctx);
458  
459  #endif
460  /*
461   * Local variables:
462   * mode: C
463   * c-file-style: "BSD"
464   * c-basic-offset: 4
465   * tab-width: 4
466   * indent-tabs-mode: nil
467   * End:
468   */
469