1 #ifndef __COMMON__H
2 #define __COMMON__H
3
4 #include <stdbool.h>
5
6 #include "xg_private.h"
7 #include "xg_save_restore.h"
8 #include "xc_dom.h"
9 #include "xc_bitops.h"
10
11 #include "xc_sr_stream_format.h"
12
13 /* String representation of Domain Header types. */
14 const char *dhdr_type_to_str(uint32_t type);
15
16 /* String representation of Record types. */
17 const char *rec_type_to_str(uint32_t type);
18
19 struct xc_sr_context;
20 struct xc_sr_record;
21
22 /**
23 * Save operations. To be implemented for each type of guest, for use by the
24 * common save algorithm.
25 *
26 * Every function must be implemented, even if only with a no-op stub.
27 */
28 struct xc_sr_save_ops
29 {
30 /* Convert a PFN to GFN. May return ~0UL for an invalid mapping. */
31 xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
32
33 /**
34 * Optionally transform the contents of a page from being specific to the
35 * sending environment, to being generic for the stream.
36 *
37 * The page of data at the end of 'page' may be a read-only mapping of a
38 * running guest; it must not be modified. If no transformation is
39 * required, the callee should leave '*pages' untouched.
40 *
41 * If a transformation is required, the callee should allocate themselves
42 * a local page using malloc() and return it via '*page'.
43 *
44 * The caller shall free() '*page' in all cases. In the case that the
45 * callee encounters an error, it should *NOT* free() the memory it
46 * allocated for '*page'.
47 *
48 * It is valid to fail with EAGAIN if the transformation is not able to be
49 * completed at this point. The page shall be retried later.
50 *
51 * @returns 0 for success, -1 for failure, with errno appropriately set.
52 */
53 int (*normalise_page)(struct xc_sr_context *ctx, xen_pfn_t type,
54 void **page);
55
56 /**
57 * Set up local environment to save a domain. (Typically querying
58 * running domain state, setting up mappings etc.)
59 *
60 * This is called once before any common setup has occurred, allowing for
61 * guest-specific adjustments to be made to common state.
62 */
63 int (*setup)(struct xc_sr_context *ctx);
64
65 /**
66 * Send static records at the head of the stream. This is called once,
67 * after the Image and Domain headers are written.
68 */
69 int (*static_data)(struct xc_sr_context *ctx);
70
71 /**
72 * Send dynamic records which need to be at the start of the stream. This
73 * is called after the STATIC_DATA_END record is written.
74 */
75 int (*start_of_stream)(struct xc_sr_context *ctx);
76
77 /**
78 * Send records which need to be at the start of a checkpoint. This is
79 * called once, or once per checkpoint in a checkpointed stream, and is
80 * ahead of memory data.
81 */
82 int (*start_of_checkpoint)(struct xc_sr_context *ctx);
83
84 /**
85 * Send records which need to be at the end of the checkpoint. This is
86 * called once, or once per checkpoint in a checkpointed stream, and is
87 * after the memory data.
88 */
89 int (*end_of_checkpoint)(struct xc_sr_context *ctx);
90
91 /**
92 * Check state of guest to decide whether it makes sense to continue
93 * migration. This is called in each iteration or checkpoint to check
94 * whether all criteria for the migration are still met. If that's not
95 * the case either migration is cancelled via a bad rc or the situation
96 * is handled, e.g. by sending appropriate records.
97 */
98 int (*check_vm_state)(struct xc_sr_context *ctx);
99
100 /**
101 * Clean up the local environment. Will be called exactly once, either
102 * after a successful save, or upon encountering an error.
103 */
104 int (*cleanup)(struct xc_sr_context *ctx);
105 };
106
107
108 /**
109 * Restore operations. To be implemented for each type of guest, for use by
110 * the common restore algorithm.
111 *
112 * Every function must be implemented, even if only with a no-op stub.
113 */
114 struct xc_sr_restore_ops
115 {
116 /* Convert a PFN to GFN. May return ~0UL for an invalid mapping. */
117 xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
118
119 /* Check to see whether a PFN is valid. */
120 bool (*pfn_is_valid)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
121
122 /* Set the GFN of a PFN. */
123 void (*set_gfn)(struct xc_sr_context *ctx, xen_pfn_t pfn, xen_pfn_t gfn);
124
125 /* Set the type of a PFN. */
126 void (*set_page_type)(struct xc_sr_context *ctx, xen_pfn_t pfn,
127 xen_pfn_t type);
128
129 /**
130 * Optionally transform the contents of a page from being generic in the
131 * stream, to being specific to the restoring environment.
132 *
133 * 'page' is expected to be modified in-place if a transformation is
134 * required.
135 *
136 * @returns 0 for success, -1 for failure, with errno appropriately set.
137 */
138 int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page);
139
140 /**
141 * Set up local environment to restore a domain.
142 *
143 * This is called once before any common setup has occurred, allowing for
144 * guest-specific adjustments to be made to common state.
145 */
146 int (*setup)(struct xc_sr_context *ctx);
147
148 /**
149 * Process an individual record from the stream. The caller shall take
150 * care of processing common records (e.g. END, PAGE_DATA).
151 *
152 * @return 0 for success, -1 for failure, or the following sentinels:
153 * - RECORD_NOT_PROCESSED
154 * - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and
155 * a failover is needed.
156 */
157 #define RECORD_NOT_PROCESSED 1
158 #define BROKEN_CHANNEL 2
159 int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec);
160
161 /**
162 * Perform any actions required after the static data has arrived. Called
163 * when the STATIC_DATA_COMPLETE record has been recieved/inferred.
164 * 'missing' should be filled in for any data item the higher level
165 * toolstack needs to provide compatiblity for.
166 */
167 int (*static_data_complete)(struct xc_sr_context *ctx,
168 unsigned int *missing);
169
170 /**
171 * Perform any actions required after the stream has been finished. Called
172 * after the END record has been received.
173 */
174 int (*stream_complete)(struct xc_sr_context *ctx);
175
176 /**
177 * Clean up the local environment. Will be called exactly once, either
178 * after a successful restore, or upon encountering an error.
179 */
180 int (*cleanup)(struct xc_sr_context *ctx);
181 };
182
183 /* Wrapper for blobs of data heading Xen-wards. */
184 struct xc_sr_blob
185 {
186 void *ptr;
187 size_t size;
188 };
189
190 /*
191 * Update a blob. Duplicate src/size, freeing the old blob if necessary. May
192 * fail due to memory allocation.
193 */
update_blob(struct xc_sr_blob * blob,const void * src,size_t size)194 static inline int update_blob(struct xc_sr_blob *blob,
195 const void *src, size_t size)
196 {
197 void *ptr;
198
199 if ( !src || !size )
200 {
201 errno = EINVAL;
202 return -1;
203 }
204
205 if ( (ptr = malloc(size)) == NULL )
206 return -1;
207
208 free(blob->ptr);
209 blob->ptr = memcpy(ptr, src, size);
210 blob->size = size;
211
212 return 0;
213 }
214
215 struct xc_sr_context
216 {
217 xc_interface *xch;
218 uint32_t domid;
219 int fd;
220
221 /* Plain VM, or checkpoints over time. */
222 xc_stream_type_t stream_type;
223
224 xc_dominfo_t dominfo;
225
226 union /* Common save or restore data. */
227 {
228 struct /* Save data. */
229 {
230 int recv_fd;
231
232 struct xc_sr_save_ops ops;
233 struct save_callbacks *callbacks;
234
235 /* Live migrate vs non live suspend. */
236 bool live;
237
238 /* Further debugging information in the stream. */
239 bool debug;
240
241 unsigned long p2m_size;
242
243 struct precopy_stats stats;
244
245 xen_pfn_t *batch_pfns;
246 unsigned int nr_batch_pfns;
247 unsigned long *deferred_pages;
248 unsigned long nr_deferred_pages;
249 xc_hypercall_buffer_t dirty_bitmap_hbuf;
250 } save;
251
252 struct /* Restore data. */
253 {
254 struct xc_sr_restore_ops ops;
255 struct restore_callbacks *callbacks;
256
257 int send_back_fd;
258 unsigned long p2m_size;
259 xc_hypercall_buffer_t dirty_bitmap_hbuf;
260
261 /* From Image Header. */
262 uint32_t format_version;
263
264 /* From Domain Header. */
265 uint32_t guest_type;
266 uint32_t guest_page_size;
267
268 /* Currently buffering records between a checkpoint */
269 bool buffer_all_records;
270
271 /* Whether a STATIC_DATA_END record has been seen/inferred. */
272 bool seen_static_data_end;
273
274 /*
275 * With Remus/COLO, we buffer the records sent by the primary at checkpoint,
276 * in case the primary will fail, we can recover from the last
277 * checkpoint state.
278 * This should be enough for most of the cases because primary only send
279 * dirty pages at checkpoint.
280 */
281 #define DEFAULT_BUF_RECORDS 1024
282 struct xc_sr_record *buffered_records;
283 unsigned int allocated_rec_num;
284 unsigned int buffered_rec_num;
285
286 /*
287 * Xenstore and Console parameters.
288 * INPUT: evtchn & domid
289 * OUTPUT: gfn
290 */
291 xen_pfn_t xenstore_gfn, console_gfn;
292 unsigned int xenstore_evtchn, console_evtchn;
293 uint32_t xenstore_domid, console_domid;
294
295 /* Bitmap of currently populated PFNs during restore. */
296 unsigned long *populated_pfns;
297 xen_pfn_t max_populated_pfn;
298
299 /* Sender has invoked verify mode on the stream. */
300 bool verify;
301 } restore;
302 };
303
304 union /* Guest-arch specific data. */
305 {
306 struct /* x86 */
307 {
308 /* Common save/restore data. */
309 union
310 {
311 struct
312 {
313 /* X86_{CPUID,MSR}_DATA blobs for CPU Policy. */
314 struct xc_sr_blob cpuid, msr;
315 } restore;
316 };
317
318 struct /* x86 PV guest. */
319 {
320 /* 4 or 8; 32 or 64 bit domain */
321 unsigned int width;
322 /* 3 or 4 pagetable levels */
323 unsigned int levels;
324
325 /* Maximum Xen frame */
326 xen_pfn_t max_mfn;
327 /* Read-only machine to phys map */
328 xen_pfn_t *m2p;
329 /* first mfn of the compat m2p (Only needed for 32bit PV guests) */
330 xen_pfn_t compat_m2p_mfn0;
331 /* Number of m2p frames mapped */
332 unsigned long nr_m2p_frames;
333
334 /* Maximum guest frame */
335 xen_pfn_t max_pfn;
336
337 /* Number of frames making up the p2m */
338 unsigned int p2m_frames;
339 /* Guest's phys to machine map. Mapped read-only (save) or
340 * allocated locally (restore). Uses guest unsigned longs. */
341 void *p2m;
342 /* The guest pfns containing the p2m leaves */
343 xen_pfn_t *p2m_pfns;
344
345 /* Read-only mapping of guests shared info page */
346 shared_info_any_t *shinfo;
347
348 /* p2m generation count for verifying validity of local p2m. */
349 uint64_t p2m_generation;
350
351 union
352 {
353 struct
354 {
355 /* State machine for the order of received records. */
356 bool seen_pv_info;
357
358 /* Types for each page (bounded by max_pfn). */
359 uint32_t *pfn_types;
360
361 /* x86 PV per-vcpu storage structure for blobs. */
362 struct xc_sr_x86_pv_restore_vcpu
363 {
364 struct xc_sr_blob basic, extd, xsave, msr;
365 } *vcpus;
366 unsigned int nr_vcpus;
367 } restore;
368 };
369 } pv;
370
371 struct /* x86 HVM guest. */
372 {
373 union
374 {
375 struct
376 {
377 /* Whether qemu enabled logdirty mode, and we should
378 * disable on cleanup. */
379 bool qemu_enabled_logdirty;
380 } save;
381
382 struct
383 {
384 /* HVM context blob. */
385 struct xc_sr_blob context;
386 } restore;
387 };
388 } hvm;
389
390 } x86;
391 };
392 };
393
394 extern struct xc_sr_save_ops save_ops_x86_pv;
395 extern struct xc_sr_save_ops save_ops_x86_hvm;
396
397 extern struct xc_sr_restore_ops restore_ops_x86_pv;
398 extern struct xc_sr_restore_ops restore_ops_x86_hvm;
399
400 struct xc_sr_record
401 {
402 uint32_t type;
403 uint32_t length;
404 void *data;
405 };
406
407 /*
408 * Writes a split record to the stream, applying correct padding where
409 * appropriate. It is common when sending records containing blobs from Xen
410 * that the header and blob data are separate. This function accepts a second
411 * buffer and length, and will merge it with the main record when sending.
412 *
413 * Records with a non-zero length must provide a valid data field; records
414 * with a 0 length shall have their data field ignored.
415 *
416 * Returns 0 on success and non0 on failure.
417 */
418 int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
419 void *buf, size_t sz);
420
421 /*
422 * Writes a record to the stream, applying correct padding where appropriate.
423 * Records with a non-zero length must provide a valid data field; records
424 * with a 0 length shall have their data field ignored.
425 *
426 * Returns 0 on success and non0 on failure.
427 */
write_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)428 static inline int write_record(struct xc_sr_context *ctx,
429 struct xc_sr_record *rec)
430 {
431 return write_split_record(ctx, rec, NULL, 0);
432 }
433
434 /*
435 * Reads a record from the stream, and fills in the record structure.
436 *
437 * Returns 0 on success and non-0 on failure.
438 *
439 * On success, the records type and size shall be valid.
440 * - If size is 0, data shall be NULL.
441 * - If size is non-0, data shall be a buffer allocated by malloc() which must
442 * be passed to free() by the caller.
443 *
444 * On failure, the contents of the record structure are undefined.
445 */
446 int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec);
447
448 /*
449 * This would ideally be private in restore.c, but is needed by
450 * x86_pv_localise_page() if we receive pagetables frames ahead of the
451 * contents of the frames they point at.
452 */
453 int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
454 const xen_pfn_t *original_pfns, const uint32_t *types);
455
456 /* Handle a STATIC_DATA_END record. */
457 int handle_static_data_end(struct xc_sr_context *ctx);
458
459 #endif
460 /*
461 * Local variables:
462 * mode: C
463 * c-file-style: "BSD"
464 * c-basic-offset: 4
465 * tab-width: 4
466 * indent-tabs-mode: nil
467 * End:
468 */
469