1 #ifndef __COMMON__H 2 #define __COMMON__H 3 4 #include <stdbool.h> 5 6 #include "xg_private.h" 7 #include "xg_save_restore.h" 8 #include "xc_dom.h" 9 #include "xc_bitops.h" 10 11 #include "xc_sr_stream_format.h" 12 13 /* String representation of Domain Header types. */ 14 const char *dhdr_type_to_str(uint32_t type); 15 16 /* String representation of Record types. */ 17 const char *rec_type_to_str(uint32_t type); 18 19 struct xc_sr_context; 20 struct xc_sr_record; 21 22 /** 23 * Save operations. To be implemented for each type of guest, for use by the 24 * common save algorithm. 25 * 26 * Every function must be implemented, even if only with a no-op stub. 27 */ 28 struct xc_sr_save_ops 29 { 30 /* Convert a PFN to GFN. May return ~0UL for an invalid mapping. */ 31 xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn); 32 33 /** 34 * Optionally transform the contents of a page from being specific to the 35 * sending environment, to being generic for the stream. 36 * 37 * The page of data at the end of 'page' may be a read-only mapping of a 38 * running guest; it must not be modified. If no transformation is 39 * required, the callee should leave '*pages' untouched. 40 * 41 * If a transformation is required, the callee should allocate themselves 42 * a local page using malloc() and return it via '*page'. 43 * 44 * The caller shall free() '*page' in all cases. In the case that the 45 * callee encounters an error, it should *NOT* free() the memory it 46 * allocated for '*page'. 47 * 48 * It is valid to fail with EAGAIN if the transformation is not able to be 49 * completed at this point. The page shall be retried later. 50 * 51 * @returns 0 for success, -1 for failure, with errno appropriately set. 52 */ 53 int (*normalise_page)(struct xc_sr_context *ctx, xen_pfn_t type, 54 void **page); 55 56 /** 57 * Set up local environment to save a domain. (Typically querying 58 * running domain state, setting up mappings etc.) 59 * 60 * This is called once before any common setup has occurred, allowing for 61 * guest-specific adjustments to be made to common state. 62 */ 63 int (*setup)(struct xc_sr_context *ctx); 64 65 /** 66 * Send static records at the head of the stream. This is called once, 67 * after the Image and Domain headers are written. 68 */ 69 int (*static_data)(struct xc_sr_context *ctx); 70 71 /** 72 * Send dynamic records which need to be at the start of the stream. This 73 * is called after the STATIC_DATA_END record is written. 74 */ 75 int (*start_of_stream)(struct xc_sr_context *ctx); 76 77 /** 78 * Send records which need to be at the start of a checkpoint. This is 79 * called once, or once per checkpoint in a checkpointed stream, and is 80 * ahead of memory data. 81 */ 82 int (*start_of_checkpoint)(struct xc_sr_context *ctx); 83 84 /** 85 * Send records which need to be at the end of the checkpoint. This is 86 * called once, or once per checkpoint in a checkpointed stream, and is 87 * after the memory data. 88 */ 89 int (*end_of_checkpoint)(struct xc_sr_context *ctx); 90 91 /** 92 * Check state of guest to decide whether it makes sense to continue 93 * migration. This is called in each iteration or checkpoint to check 94 * whether all criteria for the migration are still met. If that's not 95 * the case either migration is cancelled via a bad rc or the situation 96 * is handled, e.g. by sending appropriate records. 97 */ 98 int (*check_vm_state)(struct xc_sr_context *ctx); 99 100 /** 101 * Clean up the local environment. Will be called exactly once, either 102 * after a successful save, or upon encountering an error. 103 */ 104 int (*cleanup)(struct xc_sr_context *ctx); 105 }; 106 107 108 /** 109 * Restore operations. To be implemented for each type of guest, for use by 110 * the common restore algorithm. 111 * 112 * Every function must be implemented, even if only with a no-op stub. 113 */ 114 struct xc_sr_restore_ops 115 { 116 /* Convert a PFN to GFN. May return ~0UL for an invalid mapping. */ 117 xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn); 118 119 /* Check to see whether a PFN is valid. */ 120 bool (*pfn_is_valid)(const struct xc_sr_context *ctx, xen_pfn_t pfn); 121 122 /* Set the GFN of a PFN. */ 123 void (*set_gfn)(struct xc_sr_context *ctx, xen_pfn_t pfn, xen_pfn_t gfn); 124 125 /* Set the type of a PFN. */ 126 void (*set_page_type)(struct xc_sr_context *ctx, xen_pfn_t pfn, 127 xen_pfn_t type); 128 129 /** 130 * Optionally transform the contents of a page from being generic in the 131 * stream, to being specific to the restoring environment. 132 * 133 * 'page' is expected to be modified in-place if a transformation is 134 * required. 135 * 136 * @returns 0 for success, -1 for failure, with errno appropriately set. 137 */ 138 int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page); 139 140 /** 141 * Set up local environment to restore a domain. 142 * 143 * This is called once before any common setup has occurred, allowing for 144 * guest-specific adjustments to be made to common state. 145 */ 146 int (*setup)(struct xc_sr_context *ctx); 147 148 /** 149 * Process an individual record from the stream. The caller shall take 150 * care of processing common records (e.g. END, PAGE_DATA). 151 * 152 * @return 0 for success, -1 for failure, or the following sentinels: 153 * - RECORD_NOT_PROCESSED 154 * - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and 155 * a failover is needed. 156 */ 157 #define RECORD_NOT_PROCESSED 1 158 #define BROKEN_CHANNEL 2 159 int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec); 160 161 /** 162 * Perform any actions required after the static data has arrived. Called 163 * when the STATIC_DATA_COMPLETE record has been recieved/inferred. 164 * 'missing' should be filled in for any data item the higher level 165 * toolstack needs to provide compatiblity for. 166 */ 167 int (*static_data_complete)(struct xc_sr_context *ctx, 168 unsigned int *missing); 169 170 /** 171 * Perform any actions required after the stream has been finished. Called 172 * after the END record has been received. 173 */ 174 int (*stream_complete)(struct xc_sr_context *ctx); 175 176 /** 177 * Clean up the local environment. Will be called exactly once, either 178 * after a successful restore, or upon encountering an error. 179 */ 180 int (*cleanup)(struct xc_sr_context *ctx); 181 }; 182 183 /* Wrapper for blobs of data heading Xen-wards. */ 184 struct xc_sr_blob 185 { 186 void *ptr; 187 size_t size; 188 }; 189 190 /* 191 * Update a blob. Duplicate src/size, freeing the old blob if necessary. May 192 * fail due to memory allocation. 193 */ update_blob(struct xc_sr_blob * blob,const void * src,size_t size)194 static inline int update_blob(struct xc_sr_blob *blob, 195 const void *src, size_t size) 196 { 197 void *ptr; 198 199 if ( !src || !size ) 200 { 201 errno = EINVAL; 202 return -1; 203 } 204 205 if ( (ptr = malloc(size)) == NULL ) 206 return -1; 207 208 free(blob->ptr); 209 blob->ptr = memcpy(ptr, src, size); 210 blob->size = size; 211 212 return 0; 213 } 214 215 struct xc_sr_context 216 { 217 xc_interface *xch; 218 uint32_t domid; 219 int fd; 220 221 /* Plain VM, or checkpoints over time. */ 222 xc_stream_type_t stream_type; 223 224 xc_dominfo_t dominfo; 225 226 union /* Common save or restore data. */ 227 { 228 struct /* Save data. */ 229 { 230 int recv_fd; 231 232 struct xc_sr_save_ops ops; 233 struct save_callbacks *callbacks; 234 235 /* Live migrate vs non live suspend. */ 236 bool live; 237 238 /* Further debugging information in the stream. */ 239 bool debug; 240 241 unsigned long p2m_size; 242 243 struct precopy_stats stats; 244 245 xen_pfn_t *batch_pfns; 246 unsigned int nr_batch_pfns; 247 unsigned long *deferred_pages; 248 unsigned long nr_deferred_pages; 249 xc_hypercall_buffer_t dirty_bitmap_hbuf; 250 } save; 251 252 struct /* Restore data. */ 253 { 254 struct xc_sr_restore_ops ops; 255 struct restore_callbacks *callbacks; 256 257 int send_back_fd; 258 unsigned long p2m_size; 259 xc_hypercall_buffer_t dirty_bitmap_hbuf; 260 261 /* From Image Header. */ 262 uint32_t format_version; 263 264 /* From Domain Header. */ 265 uint32_t guest_type; 266 uint32_t guest_page_size; 267 268 /* Currently buffering records between a checkpoint */ 269 bool buffer_all_records; 270 271 /* Whether a STATIC_DATA_END record has been seen/inferred. */ 272 bool seen_static_data_end; 273 274 /* 275 * With Remus/COLO, we buffer the records sent by the primary at checkpoint, 276 * in case the primary will fail, we can recover from the last 277 * checkpoint state. 278 * This should be enough for most of the cases because primary only send 279 * dirty pages at checkpoint. 280 */ 281 #define DEFAULT_BUF_RECORDS 1024 282 struct xc_sr_record *buffered_records; 283 unsigned int allocated_rec_num; 284 unsigned int buffered_rec_num; 285 286 /* 287 * Xenstore and Console parameters. 288 * INPUT: evtchn & domid 289 * OUTPUT: gfn 290 */ 291 xen_pfn_t xenstore_gfn, console_gfn; 292 unsigned int xenstore_evtchn, console_evtchn; 293 uint32_t xenstore_domid, console_domid; 294 295 /* Bitmap of currently populated PFNs during restore. */ 296 unsigned long *populated_pfns; 297 xen_pfn_t max_populated_pfn; 298 299 /* Sender has invoked verify mode on the stream. */ 300 bool verify; 301 } restore; 302 }; 303 304 union /* Guest-arch specific data. */ 305 { 306 struct /* x86 */ 307 { 308 /* Common save/restore data. */ 309 union 310 { 311 struct 312 { 313 /* X86_{CPUID,MSR}_DATA blobs for CPU Policy. */ 314 struct xc_sr_blob cpuid, msr; 315 } restore; 316 }; 317 318 struct /* x86 PV guest. */ 319 { 320 /* 4 or 8; 32 or 64 bit domain */ 321 unsigned int width; 322 /* 3 or 4 pagetable levels */ 323 unsigned int levels; 324 325 /* Maximum Xen frame */ 326 xen_pfn_t max_mfn; 327 /* Read-only machine to phys map */ 328 xen_pfn_t *m2p; 329 /* first mfn of the compat m2p (Only needed for 32bit PV guests) */ 330 xen_pfn_t compat_m2p_mfn0; 331 /* Number of m2p frames mapped */ 332 unsigned long nr_m2p_frames; 333 334 /* Maximum guest frame */ 335 xen_pfn_t max_pfn; 336 337 /* Number of frames making up the p2m */ 338 unsigned int p2m_frames; 339 /* Guest's phys to machine map. Mapped read-only (save) or 340 * allocated locally (restore). Uses guest unsigned longs. */ 341 void *p2m; 342 /* The guest pfns containing the p2m leaves */ 343 xen_pfn_t *p2m_pfns; 344 345 /* Read-only mapping of guests shared info page */ 346 shared_info_any_t *shinfo; 347 348 /* p2m generation count for verifying validity of local p2m. */ 349 uint64_t p2m_generation; 350 351 union 352 { 353 struct 354 { 355 /* State machine for the order of received records. */ 356 bool seen_pv_info; 357 358 /* Types for each page (bounded by max_pfn). */ 359 uint32_t *pfn_types; 360 361 /* x86 PV per-vcpu storage structure for blobs. */ 362 struct xc_sr_x86_pv_restore_vcpu 363 { 364 struct xc_sr_blob basic, extd, xsave, msr; 365 } *vcpus; 366 unsigned int nr_vcpus; 367 } restore; 368 }; 369 } pv; 370 371 struct /* x86 HVM guest. */ 372 { 373 union 374 { 375 struct 376 { 377 /* Whether qemu enabled logdirty mode, and we should 378 * disable on cleanup. */ 379 bool qemu_enabled_logdirty; 380 } save; 381 382 struct 383 { 384 /* HVM context blob. */ 385 struct xc_sr_blob context; 386 } restore; 387 }; 388 } hvm; 389 390 } x86; 391 }; 392 }; 393 394 extern struct xc_sr_save_ops save_ops_x86_pv; 395 extern struct xc_sr_save_ops save_ops_x86_hvm; 396 397 extern struct xc_sr_restore_ops restore_ops_x86_pv; 398 extern struct xc_sr_restore_ops restore_ops_x86_hvm; 399 400 struct xc_sr_record 401 { 402 uint32_t type; 403 uint32_t length; 404 void *data; 405 }; 406 407 /* 408 * Writes a split record to the stream, applying correct padding where 409 * appropriate. It is common when sending records containing blobs from Xen 410 * that the header and blob data are separate. This function accepts a second 411 * buffer and length, and will merge it with the main record when sending. 412 * 413 * Records with a non-zero length must provide a valid data field; records 414 * with a 0 length shall have their data field ignored. 415 * 416 * Returns 0 on success and non0 on failure. 417 */ 418 int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec, 419 void *buf, size_t sz); 420 421 /* 422 * Writes a record to the stream, applying correct padding where appropriate. 423 * Records with a non-zero length must provide a valid data field; records 424 * with a 0 length shall have their data field ignored. 425 * 426 * Returns 0 on success and non0 on failure. 427 */ write_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)428 static inline int write_record(struct xc_sr_context *ctx, 429 struct xc_sr_record *rec) 430 { 431 return write_split_record(ctx, rec, NULL, 0); 432 } 433 434 /* 435 * Reads a record from the stream, and fills in the record structure. 436 * 437 * Returns 0 on success and non-0 on failure. 438 * 439 * On success, the records type and size shall be valid. 440 * - If size is 0, data shall be NULL. 441 * - If size is non-0, data shall be a buffer allocated by malloc() which must 442 * be passed to free() by the caller. 443 * 444 * On failure, the contents of the record structure are undefined. 445 */ 446 int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec); 447 448 /* 449 * This would ideally be private in restore.c, but is needed by 450 * x86_pv_localise_page() if we receive pagetables frames ahead of the 451 * contents of the frames they point at. 452 */ 453 int populate_pfns(struct xc_sr_context *ctx, unsigned int count, 454 const xen_pfn_t *original_pfns, const uint32_t *types); 455 456 /* Handle a STATIC_DATA_END record. */ 457 int handle_static_data_end(struct xc_sr_context *ctx); 458 459 #endif 460 /* 461 * Local variables: 462 * mode: C 463 * c-file-style: "BSD" 464 * c-basic-offset: 4 465 * tab-width: 4 466 * indent-tabs-mode: nil 467 * End: 468 */ 469