1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; version 2.1 only. with the special
8 * exception on linking described in file LICENSE.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 */
15
16 #include "libxl_osdeps.h" /* must come before any other headers */
17
18 #include "libxl_internal.h"
19
20 #include <xen/errno.h>
21
22 /*========================= Domain save ============================*/
23
24 static void stream_done(libxl__egc *egc,
25 libxl__stream_write_state *sws, int rc);
26 static void domain_save_done(libxl__egc *egc,
27 libxl__domain_save_state *dss, int rc);
28
29 /*----- complicated callback, called by xc_domain_save -----*/
30
31 /*
32 * We implement the other end of protocol for controlling qemu-dm's
33 * logdirty. There is no documentation for this protocol, but our
34 * counterparty's implementation is in
35 * qemu-xen-traditional.git:xenstore.c in the function
36 * xenstore_process_logdirty_event
37 */
38
39 static void domain_suspend_switch_qemu_xen_traditional_logdirty
40 (libxl__egc *egc, int domid, unsigned enable,
41 libxl__logdirty_switch *lds);
42 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
43 const char *watch_path, const char *event_path);
44 static void domain_suspend_switch_qemu_xen_logdirty
45 (libxl__egc *egc, int domid, unsigned enable,
46 libxl__logdirty_switch *lds);
47 static void switch_qemu_xen_logdirty_done(libxl__egc *egc,
48 libxl__ev_qmp *qmp,
49 const libxl__json_object *,
50 int rc);
51 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
52 const struct timeval *requested_abs,
53 int rc);
54 static void switch_logdirty_done(libxl__egc *egc,
55 libxl__logdirty_switch *lds, int rc);
56
libxl__logdirty_init(libxl__logdirty_switch * lds)57 void libxl__logdirty_init(libxl__logdirty_switch *lds)
58 {
59 lds->cmd_path = 0;
60 libxl__ev_xswatch_init(&lds->watch);
61 libxl__ev_time_init(&lds->timeout);
62 libxl__ev_qmp_init(&lds->qmp);
63 }
64
libxl__domain_common_switch_qemu_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)65 void libxl__domain_common_switch_qemu_logdirty(libxl__egc *egc,
66 int domid, unsigned enable,
67 libxl__logdirty_switch *lds)
68 {
69 STATE_AO_GC(lds->ao);
70
71 switch (libxl__device_model_version_running(gc, domid)) {
72 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
73 domain_suspend_switch_qemu_xen_traditional_logdirty(egc, domid, enable,
74 lds);
75 break;
76 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
77 domain_suspend_switch_qemu_xen_logdirty(egc, domid, enable, lds);
78 break;
79 default:
80 LOGD(ERROR, domid, "logdirty switch failed"
81 ", no valid device model version found, abandoning suspend");
82 lds->callback(egc, lds, ERROR_FAIL);
83 }
84 }
85
domain_suspend_switch_qemu_xen_traditional_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)86 static void domain_suspend_switch_qemu_xen_traditional_logdirty
87 (libxl__egc *egc, int domid, unsigned enable,
88 libxl__logdirty_switch *lds)
89 {
90 STATE_AO_GC(lds->ao);
91 int rc;
92 xs_transaction_t t = 0;
93 const char *got;
94
95 if (!lds->cmd_path) {
96 uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
97 lds->cmd_path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid,
98 "/logdirty/cmd");
99 lds->ret_path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid,
100 "/logdirty/ret");
101 }
102 lds->cmd = enable ? "enable" : "disable";
103
104 rc = libxl__ev_xswatch_register(gc, &lds->watch,
105 switch_logdirty_xswatch, lds->ret_path);
106 if (rc) goto out;
107
108 rc = libxl__ev_time_register_rel(ao, &lds->timeout,
109 switch_logdirty_timeout, 10*1000);
110 if (rc) goto out;
111
112 for (;;) {
113 rc = libxl__xs_transaction_start(gc, &t);
114 if (rc) goto out;
115
116 rc = libxl__xs_read_checked(gc, t, lds->cmd_path, &got);
117 if (rc) goto out;
118
119 if (got) {
120 const char *got_ret;
121 rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got_ret);
122 if (rc) goto out;
123
124 if (!got_ret || strcmp(got, got_ret)) {
125 LOGD(ERROR, domid, "controlling logdirty: qemu was already sent"
126 " command `%s' (xenstore path `%s') but result is `%s'",
127 got, lds->cmd_path, got_ret ? got_ret : "<none>");
128 rc = ERROR_FAIL;
129 goto out;
130 }
131 rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
132 if (rc) goto out;
133 }
134
135 rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
136 if (rc) goto out;
137
138 rc = libxl__xs_write_checked(gc, t, lds->cmd_path, lds->cmd);
139 if (rc) goto out;
140
141 rc = libxl__xs_transaction_commit(gc, &t);
142 if (!rc) break;
143 if (rc<0) goto out;
144 }
145
146 /* OK, wait for some callback */
147 return;
148
149 out:
150 LOGD(ERROR, domid, "logdirty switch failed (rc=%d), abandoning suspend",rc);
151 libxl__xs_transaction_abort(gc, &t);
152 switch_logdirty_done(egc,lds,rc);
153 }
154
switch_logdirty_xswatch(libxl__egc * egc,libxl__ev_xswatch * watch,const char * watch_path,const char * event_path)155 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch *watch,
156 const char *watch_path, const char *event_path)
157 {
158 libxl__logdirty_switch *lds = CONTAINER_OF(watch, *lds, watch);
159 STATE_AO_GC(lds->ao);
160 const char *got;
161 xs_transaction_t t = 0;
162 int rc;
163
164 for (;;) {
165 rc = libxl__xs_transaction_start(gc, &t);
166 if (rc) goto out;
167
168 rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got);
169 if (rc) goto out;
170
171 if (!got) {
172 rc = +1;
173 goto out;
174 }
175
176 if (strcmp(got, lds->cmd)) {
177 LOG(ERROR,"logdirty switch: sent command `%s' but got reply `%s'"
178 " (xenstore paths `%s' / `%s')", lds->cmd, got,
179 lds->cmd_path, lds->ret_path);
180 rc = ERROR_FAIL;
181 goto out;
182 }
183
184 rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
185 if (rc) goto out;
186
187 rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
188 if (rc) goto out;
189
190 rc = libxl__xs_transaction_commit(gc, &t);
191 if (!rc) break;
192 if (rc<0) goto out;
193 }
194
195 out:
196 /* rc < 0: error
197 * rc == 0: ok, we are done
198 * rc == +1: need to keep waiting
199 */
200 libxl__xs_transaction_abort(gc, &t);
201
202 if (rc <= 0) {
203 if (rc < 0)
204 LOG(ERROR,"logdirty switch: failed (rc=%d)",rc);
205 switch_logdirty_done(egc,lds,rc);
206 }
207 }
208
domain_suspend_switch_qemu_xen_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)209 static void domain_suspend_switch_qemu_xen_logdirty
210 (libxl__egc *egc, int domid, unsigned enable,
211 libxl__logdirty_switch *lds)
212 {
213 STATE_AO_GC(lds->ao);
214 int rc;
215 libxl__json_object *args = NULL;
216
217 /* Convenience aliases. */
218 libxl__ev_qmp *const qmp = &lds->qmp;
219
220 rc = libxl__ev_time_register_rel(ao, &lds->timeout,
221 switch_logdirty_timeout, 10 * 1000);
222 if (rc) goto out;
223
224 qmp->ao = ao;
225 qmp->domid = domid;
226 qmp->payload_fd = -1;
227 qmp->callback = switch_qemu_xen_logdirty_done;
228 libxl__qmp_param_add_bool(gc, &args, "enable", enable);
229 rc = libxl__ev_qmp_send(egc, qmp, "xen-set-global-dirty-log", args);
230 if (rc) goto out;
231
232 return;
233 out:
234 switch_qemu_xen_logdirty_done(egc, qmp, NULL, rc);
235 }
236
switch_qemu_xen_logdirty_done(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * r,int rc)237 static void switch_qemu_xen_logdirty_done(libxl__egc *egc,
238 libxl__ev_qmp *qmp,
239 const libxl__json_object *r,
240 int rc)
241 {
242 EGC_GC;
243 libxl__logdirty_switch *lds = CONTAINER_OF(qmp, *lds, qmp);
244
245 if (rc)
246 LOGD(ERROR, qmp->domid,
247 "logdirty switch failed (rc=%d), abandoning suspend",rc);
248 switch_logdirty_done(egc, lds, rc);
249 }
250
switch_logdirty_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)251 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
252 const struct timeval *requested_abs,
253 int rc)
254 {
255 libxl__logdirty_switch *lds = CONTAINER_OF(ev, *lds, timeout);
256 STATE_AO_GC(lds->ao);
257 LOG(ERROR,"logdirty switch: wait for device model timed out");
258 switch_logdirty_done(egc,lds,ERROR_FAIL);
259 }
260
switch_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)261 static void switch_logdirty_done(libxl__egc *egc,
262 libxl__logdirty_switch *lds,
263 int rc)
264 {
265 STATE_AO_GC(lds->ao);
266
267 libxl__ev_xswatch_deregister(gc, &lds->watch);
268 libxl__ev_time_deregister(gc, &lds->timeout);
269 libxl__ev_qmp_dispose(gc, &lds->qmp);
270
271 lds->callback(egc, lds, rc);
272 }
273
274 static void domain_suspend_switch_qemu_logdirty_done
275 (libxl__egc *egc, libxl__logdirty_switch *lds, int rc);
276
libxl__domain_suspend_common_switch_qemu_logdirty(uint32_t domid,unsigned enable,void * user)277 void libxl__domain_suspend_common_switch_qemu_logdirty
278 (uint32_t domid, unsigned enable, void *user)
279 {
280 libxl__save_helper_state *shs = user;
281 libxl__egc *egc = shs->egc;
282 libxl__domain_save_state *dss = shs->caller_state;
283
284 /* Convenience aliases. */
285 libxl__logdirty_switch *const lds = &dss->logdirty;
286
287 if (dss->type == LIBXL_DOMAIN_TYPE_PVH) {
288 domain_suspend_switch_qemu_logdirty_done(egc, lds, 0);
289 return;
290 }
291
292 lds->callback = domain_suspend_switch_qemu_logdirty_done;
293 libxl__domain_common_switch_qemu_logdirty(egc, domid, enable, lds);
294 }
295
domain_suspend_switch_qemu_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)296 static void domain_suspend_switch_qemu_logdirty_done
297 (libxl__egc *egc, libxl__logdirty_switch *lds, int rc)
298 {
299 libxl__domain_save_state *dss = CONTAINER_OF(lds, *dss, logdirty);
300
301 if (rc) {
302 dss->rc = rc;
303 libxl__xc_domain_saverestore_async_callback_done(egc,
304 &dss->sws.shs, -1);
305 } else
306 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
307 }
308
309 /*----- callbacks, called by xc_domain_save -----*/
310
311 /*
312 * Expand the buffer 'buf' of length 'len', to append 'str' including its NUL
313 * terminator.
314 */
append_string(libxl__gc * gc,char ** buf,uint32_t * len,const char * str)315 static void append_string(libxl__gc *gc, char **buf, uint32_t *len,
316 const char *str)
317 {
318 size_t extralen = strlen(str) + 1;
319 char *new = libxl__realloc(gc, *buf, *len + extralen);
320
321 *buf = new;
322 memcpy(new + *len, str, extralen);
323 *len += extralen;
324 }
325
libxl__save_emulator_xenstore_data(libxl__domain_save_state * dss,char ** callee_buf,uint32_t * callee_len)326 int libxl__save_emulator_xenstore_data(libxl__domain_save_state *dss,
327 char **callee_buf,
328 uint32_t *callee_len)
329 {
330 STATE_AO_GC(dss->ao);
331 const char *xs_root;
332 char **entries, *buf = NULL;
333 unsigned int nr_entries, i, j, len = 0;
334 int rc;
335
336 const uint32_t domid = dss->domid;
337 const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
338
339 xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
340
341 entries = libxl__xs_directory(gc, 0, GCSPRINTF("%s/physmap", xs_root),
342 &nr_entries);
343 if (!entries || nr_entries == 0) { rc = 0; goto out; }
344
345 for (i = 0; i < nr_entries; ++i) {
346 static const char *const physmap_subkeys[] = {
347 "start_addr", "size", "name"
348 };
349
350 for (j = 0; j < ARRAY_SIZE(physmap_subkeys); ++j) {
351 const char *key = GCSPRINTF("physmap/%s/%s",
352 entries[i], physmap_subkeys[j]);
353
354 const char *val =
355 libxl__xs_read(gc, XBT_NULL,
356 GCSPRINTF("%s/%s", xs_root, key));
357
358 if (!val) { rc = ERROR_FAIL; goto out; }
359
360 append_string(gc, &buf, &len, key);
361 append_string(gc, &buf, &len, val);
362 }
363 }
364
365 rc = 0;
366
367 out:
368 if (!rc) {
369 *callee_buf = buf;
370 *callee_len = len;
371 }
372
373 return rc;
374 }
375
376 /*----- main code for saving, in order of execution -----*/
377
libxl__domain_save(libxl__egc * egc,libxl__domain_save_state * dss)378 void libxl__domain_save(libxl__egc *egc, libxl__domain_save_state *dss)
379 {
380 STATE_AO_GC(dss->ao);
381 int rc, ret;
382
383 /* Convenience aliases */
384 const uint32_t domid = dss->domid;
385 const libxl_domain_type type = dss->type;
386 const int live = dss->live;
387 const int debug = dss->debug;
388 const libxl_domain_remus_info *const r_info = dss->remus;
389 libxl__srm_save_autogen_callbacks *const callbacks =
390 &dss->sws.shs.callbacks.save.a;
391 unsigned int nr_vnodes = 0, nr_vmemranges = 0, nr_vcpus = 0;
392 libxl__domain_suspend_state *dsps = &dss->dsps;
393
394 if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE && !r_info) {
395 LOGD(ERROR, domid, "Migration stream is checkpointed, but there's no "
396 "checkpoint info!");
397 rc = ERROR_INVAL;
398 goto out;
399 }
400
401 dss->rc = 0;
402 libxl__logdirty_init(&dss->logdirty);
403 dss->logdirty.ao = ao;
404
405 dsps->ao = ao;
406 dsps->domid = domid;
407 dsps->live = !!live;
408 rc = libxl__domain_suspend_init(egc, dsps, type);
409 if (rc) goto out;
410
411 dss->xcflags = (live ? XCFLAGS_LIVE : 0)
412 | (debug ? XCFLAGS_DEBUG : 0);
413
414 /* Disallow saving a guest with vNUMA configured because migration
415 * stream does not preserve node information.
416 *
417 * Reject any domain which has vnuma enabled, even if the
418 * configuration is empty. Only domains which have no vnuma
419 * configuration at all are supported.
420 */
421 ret = xc_domain_getvnuma(CTX->xch, domid, &nr_vnodes, &nr_vmemranges,
422 &nr_vcpus, NULL, NULL, NULL);
423 if (ret != -1 || errno != EOPNOTSUPP) {
424 LOGD(ERROR, domid, "Cannot save a guest with vNUMA configured");
425 rc = ERROR_FAIL;
426 goto out;
427 }
428
429 if (dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_NONE)
430 callbacks->suspend = libxl__domain_suspend_callback;
431
432 callbacks->switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty;
433
434 dss->sws.ao = dss->ao;
435 dss->sws.dss = dss;
436 dss->sws.fd = dss->fd;
437 dss->sws.back_channel = false;
438 dss->sws.completion_callback = stream_done;
439
440 libxl__stream_write_start(egc, &dss->sws);
441 return;
442
443 out:
444 domain_save_done(egc, dss, rc);
445 }
446
stream_done(libxl__egc * egc,libxl__stream_write_state * sws,int rc)447 static void stream_done(libxl__egc *egc,
448 libxl__stream_write_state *sws, int rc)
449 {
450 domain_save_done(egc, sws->dss, rc);
451 }
452
domain_save_done(libxl__egc * egc,libxl__domain_save_state * dss,int rc)453 static void domain_save_done(libxl__egc *egc,
454 libxl__domain_save_state *dss, int rc)
455 {
456 STATE_AO_GC(dss->ao);
457
458 /* Convenience aliases */
459 const uint32_t domid = dss->domid;
460 libxl__domain_suspend_state *dsps = &dss->dsps;
461
462 libxl__ev_evtchn_cancel(gc, &dsps->guest_evtchn);
463
464 if (dsps->guest_evtchn.port > 0)
465 xc_suspend_evtchn_release(CTX->xch, CTX->xce, domid,
466 dsps->guest_evtchn.port, &dsps->guest_evtchn_lockfd);
467
468 if (dss->remus) {
469 /*
470 * With Remus/COLO, if we reach this point, it means either
471 * backup died or some network error occurred preventing us
472 * from sending checkpoints. Teardown the network buffers and
473 * release netlink resources. This is an async op.
474 */
475 if (libxl_defbool_val(dss->remus->colo))
476 libxl__colo_save_teardown(egc, &dss->css, rc);
477 else
478 libxl__remus_teardown(egc, &dss->rs, rc);
479 return;
480 }
481
482 dss->callback(egc, dss, rc);
483 }
484
485 /*========================= Domain restore ============================*/
486
487 /*
488 * Inspect the buffer between start and end, and return a pointer to the
489 * character following the NUL terminator of start, or NULL if start is not
490 * terminated before end.
491 */
next_string(const char * start,const char * end)492 static const char *next_string(const char *start, const char *end)
493 {
494 if (start >= end) return NULL;
495
496 size_t total_len = end - start;
497 size_t len = strnlen(start, total_len);
498
499 if (len == total_len)
500 return NULL;
501 else
502 return start + len + 1;
503 }
504
libxl__restore_emulator_xenstore_data(libxl__domain_create_state * dcs,const char * ptr,uint32_t size)505 int libxl__restore_emulator_xenstore_data(libxl__domain_create_state *dcs,
506 const char *ptr, uint32_t size)
507 {
508 STATE_AO_GC(dcs->ao);
509 const char *next = ptr, *end = ptr + size, *key, *val;
510 int rc;
511
512 const uint32_t domid = dcs->guest_domid;
513 const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
514 const char *xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
515
516 while (next < end) {
517 key = next;
518 next = next_string(next, end);
519
520 /* Sanitise 'key'. */
521 if (!next) {
522 rc = ERROR_FAIL;
523 LOGD(ERROR, domid, "Key in xenstore data not NUL terminated");
524 goto out;
525 }
526 if (key[0] == '\0') {
527 rc = ERROR_FAIL;
528 LOGD(ERROR, domid, "empty key found in xenstore data");
529 goto out;
530 }
531 if (key[0] == '/') {
532 rc = ERROR_FAIL;
533 LOGD(ERROR, domid, "Key in xenstore data not relative");
534 goto out;
535 }
536
537 val = next;
538 next = next_string(next, end);
539
540 /* Sanitise 'val'. */
541 if (!next) {
542 rc = ERROR_FAIL;
543 LOGD(ERROR, domid, "Val in xenstore data not NUL terminated");
544 goto out;
545 }
546
547 libxl__xs_printf(gc, XBT_NULL,
548 GCSPRINTF("%s/%s", xs_root, key),
549 "%s", val);
550 }
551
552 rc = 0;
553
554 out:
555 return rc;
556 }
557
558 /*
559 * Local variables:
560 * mode: C
561 * c-basic-offset: 4
562 * indent-tabs-mode: nil
563 * End:
564 */
565