1 /*
2  * Copyright (C) 2009      Citrix Ltd.
3  * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published
7  * by the Free Software Foundation; version 2.1 only. with the special
8  * exception on linking described in file LICENSE.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  */
15 
16 #include "libxl_osdeps.h" /* must come before any other headers */
17 
18 #include "libxl_internal.h"
19 
20 #include <xen/errno.h>
21 
22 /*========================= Domain save ============================*/
23 
24 static void stream_done(libxl__egc *egc,
25                         libxl__stream_write_state *sws, int rc);
26 static void domain_save_done(libxl__egc *egc,
27                              libxl__domain_save_state *dss, int rc);
28 
29 /*----- complicated callback, called by xc_domain_save -----*/
30 
31 /*
32  * We implement the other end of protocol for controlling qemu-dm's
33  * logdirty.  There is no documentation for this protocol, but our
34  * counterparty's implementation is in
35  * qemu-xen-traditional.git:xenstore.c in the function
36  * xenstore_process_logdirty_event
37  */
38 
39 static void domain_suspend_switch_qemu_xen_traditional_logdirty
40                                (libxl__egc *egc, int domid, unsigned enable,
41                                 libxl__logdirty_switch *lds);
42 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
43                             const char *watch_path, const char *event_path);
44 static void domain_suspend_switch_qemu_xen_logdirty
45                                (libxl__egc *egc, int domid, unsigned enable,
46                                 libxl__logdirty_switch *lds);
47 static void switch_qemu_xen_logdirty_done(libxl__egc *egc,
48                                           libxl__ev_qmp *qmp,
49                                           const libxl__json_object *,
50                                           int rc);
51 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
52                                     const struct timeval *requested_abs,
53                                     int rc);
54 static void switch_logdirty_done(libxl__egc *egc,
55                                  libxl__logdirty_switch *lds, int rc);
56 
libxl__logdirty_init(libxl__logdirty_switch * lds)57 void libxl__logdirty_init(libxl__logdirty_switch *lds)
58 {
59     lds->cmd_path = 0;
60     libxl__ev_xswatch_init(&lds->watch);
61     libxl__ev_time_init(&lds->timeout);
62     libxl__ev_qmp_init(&lds->qmp);
63 }
64 
libxl__domain_common_switch_qemu_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)65 void libxl__domain_common_switch_qemu_logdirty(libxl__egc *egc,
66                                                int domid, unsigned enable,
67                                                libxl__logdirty_switch *lds)
68 {
69     STATE_AO_GC(lds->ao);
70 
71     switch (libxl__device_model_version_running(gc, domid)) {
72     case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
73         domain_suspend_switch_qemu_xen_traditional_logdirty(egc, domid, enable,
74                                                             lds);
75         break;
76     case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
77         domain_suspend_switch_qemu_xen_logdirty(egc, domid, enable, lds);
78         break;
79     default:
80         LOGD(ERROR, domid, "logdirty switch failed"
81              ", no valid device model version found, abandoning suspend");
82         lds->callback(egc, lds, ERROR_FAIL);
83     }
84 }
85 
domain_suspend_switch_qemu_xen_traditional_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)86 static void domain_suspend_switch_qemu_xen_traditional_logdirty
87                                (libxl__egc *egc, int domid, unsigned enable,
88                                 libxl__logdirty_switch *lds)
89 {
90     STATE_AO_GC(lds->ao);
91     int rc;
92     xs_transaction_t t = 0;
93     const char *got;
94 
95     if (!lds->cmd_path) {
96         uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
97         lds->cmd_path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid,
98                                              "/logdirty/cmd");
99         lds->ret_path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid,
100                                              "/logdirty/ret");
101     }
102     lds->cmd = enable ? "enable" : "disable";
103 
104     rc = libxl__ev_xswatch_register(gc, &lds->watch,
105                                 switch_logdirty_xswatch, lds->ret_path);
106     if (rc) goto out;
107 
108     rc = libxl__ev_time_register_rel(ao, &lds->timeout,
109                                 switch_logdirty_timeout, 10*1000);
110     if (rc) goto out;
111 
112     for (;;) {
113         rc = libxl__xs_transaction_start(gc, &t);
114         if (rc) goto out;
115 
116         rc = libxl__xs_read_checked(gc, t, lds->cmd_path, &got);
117         if (rc) goto out;
118 
119         if (got) {
120             const char *got_ret;
121             rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got_ret);
122             if (rc) goto out;
123 
124             if (!got_ret || strcmp(got, got_ret)) {
125                 LOGD(ERROR, domid, "controlling logdirty: qemu was already sent"
126                      " command `%s' (xenstore path `%s') but result is `%s'",
127                      got, lds->cmd_path, got_ret ? got_ret : "<none>");
128                 rc = ERROR_FAIL;
129                 goto out;
130             }
131             rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
132             if (rc) goto out;
133         }
134 
135         rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
136         if (rc) goto out;
137 
138         rc = libxl__xs_write_checked(gc, t, lds->cmd_path, lds->cmd);
139         if (rc) goto out;
140 
141         rc = libxl__xs_transaction_commit(gc, &t);
142         if (!rc) break;
143         if (rc<0) goto out;
144     }
145 
146     /* OK, wait for some callback */
147     return;
148 
149  out:
150     LOGD(ERROR, domid, "logdirty switch failed (rc=%d), abandoning suspend",rc);
151     libxl__xs_transaction_abort(gc, &t);
152     switch_logdirty_done(egc,lds,rc);
153 }
154 
switch_logdirty_xswatch(libxl__egc * egc,libxl__ev_xswatch * watch,const char * watch_path,const char * event_path)155 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch *watch,
156                             const char *watch_path, const char *event_path)
157 {
158     libxl__logdirty_switch *lds = CONTAINER_OF(watch, *lds, watch);
159     STATE_AO_GC(lds->ao);
160     const char *got;
161     xs_transaction_t t = 0;
162     int rc;
163 
164     for (;;) {
165         rc = libxl__xs_transaction_start(gc, &t);
166         if (rc) goto out;
167 
168         rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got);
169         if (rc) goto out;
170 
171         if (!got) {
172             rc = +1;
173             goto out;
174         }
175 
176         if (strcmp(got, lds->cmd)) {
177             LOG(ERROR,"logdirty switch: sent command `%s' but got reply `%s'"
178                 " (xenstore paths `%s' / `%s')", lds->cmd, got,
179                 lds->cmd_path, lds->ret_path);
180             rc = ERROR_FAIL;
181             goto out;
182         }
183 
184         rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
185         if (rc) goto out;
186 
187         rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
188         if (rc) goto out;
189 
190         rc = libxl__xs_transaction_commit(gc, &t);
191         if (!rc) break;
192         if (rc<0) goto out;
193     }
194 
195  out:
196     /* rc < 0: error
197      * rc == 0: ok, we are done
198      * rc == +1: need to keep waiting
199      */
200     libxl__xs_transaction_abort(gc, &t);
201 
202     if (rc <= 0) {
203         if (rc < 0)
204             LOG(ERROR,"logdirty switch: failed (rc=%d)",rc);
205         switch_logdirty_done(egc,lds,rc);
206     }
207 }
208 
domain_suspend_switch_qemu_xen_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)209 static void domain_suspend_switch_qemu_xen_logdirty
210                                (libxl__egc *egc, int domid, unsigned enable,
211                                 libxl__logdirty_switch *lds)
212 {
213     STATE_AO_GC(lds->ao);
214     int rc;
215     libxl__json_object *args = NULL;
216 
217     /* Convenience aliases. */
218     libxl__ev_qmp *const qmp = &lds->qmp;
219 
220     rc = libxl__ev_time_register_rel(ao, &lds->timeout,
221                                      switch_logdirty_timeout, 10 * 1000);
222     if (rc) goto out;
223 
224     qmp->ao = ao;
225     qmp->domid = domid;
226     qmp->payload_fd = -1;
227     qmp->callback = switch_qemu_xen_logdirty_done;
228     libxl__qmp_param_add_bool(gc, &args, "enable", enable);
229     rc = libxl__ev_qmp_send(egc, qmp, "xen-set-global-dirty-log", args);
230     if (rc) goto out;
231 
232     return;
233 out:
234     switch_qemu_xen_logdirty_done(egc, qmp, NULL, rc);
235 }
236 
switch_qemu_xen_logdirty_done(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * r,int rc)237 static void switch_qemu_xen_logdirty_done(libxl__egc *egc,
238                                           libxl__ev_qmp *qmp,
239                                           const libxl__json_object *r,
240                                           int rc)
241 {
242     EGC_GC;
243     libxl__logdirty_switch *lds = CONTAINER_OF(qmp, *lds, qmp);
244 
245     if (rc)
246         LOGD(ERROR, qmp->domid,
247              "logdirty switch failed (rc=%d), abandoning suspend",rc);
248     switch_logdirty_done(egc, lds, rc);
249 }
250 
switch_logdirty_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)251 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
252                                     const struct timeval *requested_abs,
253                                     int rc)
254 {
255     libxl__logdirty_switch *lds = CONTAINER_OF(ev, *lds, timeout);
256     STATE_AO_GC(lds->ao);
257     LOG(ERROR,"logdirty switch: wait for device model timed out");
258     switch_logdirty_done(egc,lds,ERROR_FAIL);
259 }
260 
switch_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)261 static void switch_logdirty_done(libxl__egc *egc,
262                                  libxl__logdirty_switch *lds,
263                                  int rc)
264 {
265     STATE_AO_GC(lds->ao);
266 
267     libxl__ev_xswatch_deregister(gc, &lds->watch);
268     libxl__ev_time_deregister(gc, &lds->timeout);
269     libxl__ev_qmp_dispose(gc, &lds->qmp);
270 
271     lds->callback(egc, lds, rc);
272 }
273 
274 static void domain_suspend_switch_qemu_logdirty_done
275                         (libxl__egc *egc, libxl__logdirty_switch *lds, int rc);
276 
libxl__domain_suspend_common_switch_qemu_logdirty(uint32_t domid,unsigned enable,void * user)277 void libxl__domain_suspend_common_switch_qemu_logdirty
278                                (uint32_t domid, unsigned enable, void *user)
279 {
280     libxl__save_helper_state *shs = user;
281     libxl__egc *egc = shs->egc;
282     libxl__domain_save_state *dss = shs->caller_state;
283 
284     /* Convenience aliases. */
285     libxl__logdirty_switch *const lds = &dss->logdirty;
286 
287     if (dss->type == LIBXL_DOMAIN_TYPE_PVH) {
288         domain_suspend_switch_qemu_logdirty_done(egc, lds, 0);
289         return;
290     }
291 
292     lds->callback = domain_suspend_switch_qemu_logdirty_done;
293     libxl__domain_common_switch_qemu_logdirty(egc, domid, enable, lds);
294 }
295 
domain_suspend_switch_qemu_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)296 static void domain_suspend_switch_qemu_logdirty_done
297                         (libxl__egc *egc, libxl__logdirty_switch *lds, int rc)
298 {
299     libxl__domain_save_state *dss = CONTAINER_OF(lds, *dss, logdirty);
300 
301     if (rc) {
302         dss->rc = rc;
303         libxl__xc_domain_saverestore_async_callback_done(egc,
304                                                          &dss->sws.shs, -1);
305     } else
306         libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
307 }
308 
309 /*----- callbacks, called by xc_domain_save -----*/
310 
311 /*
312  * Expand the buffer 'buf' of length 'len', to append 'str' including its NUL
313  * terminator.
314  */
append_string(libxl__gc * gc,char ** buf,uint32_t * len,const char * str)315 static void append_string(libxl__gc *gc, char **buf, uint32_t *len,
316                           const char *str)
317 {
318     size_t extralen = strlen(str) + 1;
319     char *new = libxl__realloc(gc, *buf, *len + extralen);
320 
321     *buf = new;
322     memcpy(new + *len, str, extralen);
323     *len += extralen;
324 }
325 
libxl__save_emulator_xenstore_data(libxl__domain_save_state * dss,char ** callee_buf,uint32_t * callee_len)326 int libxl__save_emulator_xenstore_data(libxl__domain_save_state *dss,
327                                        char **callee_buf,
328                                        uint32_t *callee_len)
329 {
330     STATE_AO_GC(dss->ao);
331     const char *xs_root;
332     char **entries, *buf = NULL;
333     unsigned int nr_entries, i, j, len = 0;
334     int rc;
335 
336     const uint32_t domid = dss->domid;
337     const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
338 
339     xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
340 
341     entries = libxl__xs_directory(gc, 0, GCSPRINTF("%s/physmap", xs_root),
342                                   &nr_entries);
343     if (!entries || nr_entries == 0) { rc = 0; goto out; }
344 
345     for (i = 0; i < nr_entries; ++i) {
346         static const char *const physmap_subkeys[] = {
347             "start_addr", "size", "name"
348         };
349 
350         for (j = 0; j < ARRAY_SIZE(physmap_subkeys); ++j) {
351             const char *key = GCSPRINTF("physmap/%s/%s",
352                                         entries[i], physmap_subkeys[j]);
353 
354             const char *val =
355                 libxl__xs_read(gc, XBT_NULL,
356                                GCSPRINTF("%s/%s", xs_root, key));
357 
358             if (!val) { rc = ERROR_FAIL; goto out; }
359 
360             append_string(gc, &buf, &len, key);
361             append_string(gc, &buf, &len, val);
362         }
363     }
364 
365     rc = 0;
366 
367  out:
368     if (!rc) {
369         *callee_buf = buf;
370         *callee_len = len;
371     }
372 
373     return rc;
374 }
375 
376 /*----- main code for saving, in order of execution -----*/
377 
libxl__domain_save(libxl__egc * egc,libxl__domain_save_state * dss)378 void libxl__domain_save(libxl__egc *egc, libxl__domain_save_state *dss)
379 {
380     STATE_AO_GC(dss->ao);
381     int rc, ret;
382 
383     /* Convenience aliases */
384     const uint32_t domid = dss->domid;
385     const libxl_domain_type type = dss->type;
386     const int live = dss->live;
387     const int debug = dss->debug;
388     const libxl_domain_remus_info *const r_info = dss->remus;
389     libxl__srm_save_autogen_callbacks *const callbacks =
390         &dss->sws.shs.callbacks.save.a;
391     unsigned int nr_vnodes = 0, nr_vmemranges = 0, nr_vcpus = 0;
392     libxl__domain_suspend_state *dsps = &dss->dsps;
393 
394     if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE && !r_info) {
395         LOGD(ERROR, domid, "Migration stream is checkpointed, but there's no "
396                            "checkpoint info!");
397         rc = ERROR_INVAL;
398         goto out;
399     }
400 
401     dss->rc = 0;
402     libxl__logdirty_init(&dss->logdirty);
403     dss->logdirty.ao = ao;
404 
405     dsps->ao = ao;
406     dsps->domid = domid;
407     dsps->live = !!live;
408     rc = libxl__domain_suspend_init(egc, dsps, type);
409     if (rc) goto out;
410 
411     dss->xcflags = (live ? XCFLAGS_LIVE : 0)
412           | (debug ? XCFLAGS_DEBUG : 0);
413 
414     /* Disallow saving a guest with vNUMA configured because migration
415      * stream does not preserve node information.
416      *
417      * Reject any domain which has vnuma enabled, even if the
418      * configuration is empty. Only domains which have no vnuma
419      * configuration at all are supported.
420      */
421     ret = xc_domain_getvnuma(CTX->xch, domid, &nr_vnodes, &nr_vmemranges,
422                              &nr_vcpus, NULL, NULL, NULL);
423     if (ret != -1 || errno != EOPNOTSUPP) {
424         LOGD(ERROR, domid, "Cannot save a guest with vNUMA configured");
425         rc = ERROR_FAIL;
426         goto out;
427     }
428 
429     if (dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_NONE)
430         callbacks->suspend = libxl__domain_suspend_callback;
431 
432     callbacks->switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty;
433 
434     dss->sws.ao  = dss->ao;
435     dss->sws.dss = dss;
436     dss->sws.fd  = dss->fd;
437     dss->sws.back_channel = false;
438     dss->sws.completion_callback = stream_done;
439 
440     libxl__stream_write_start(egc, &dss->sws);
441     return;
442 
443  out:
444     domain_save_done(egc, dss, rc);
445 }
446 
stream_done(libxl__egc * egc,libxl__stream_write_state * sws,int rc)447 static void stream_done(libxl__egc *egc,
448                         libxl__stream_write_state *sws, int rc)
449 {
450     domain_save_done(egc, sws->dss, rc);
451 }
452 
domain_save_done(libxl__egc * egc,libxl__domain_save_state * dss,int rc)453 static void domain_save_done(libxl__egc *egc,
454                              libxl__domain_save_state *dss, int rc)
455 {
456     STATE_AO_GC(dss->ao);
457 
458     /* Convenience aliases */
459     const uint32_t domid = dss->domid;
460     libxl__domain_suspend_state *dsps = &dss->dsps;
461 
462     libxl__ev_evtchn_cancel(gc, &dsps->guest_evtchn);
463 
464     if (dsps->guest_evtchn.port > 0)
465         xc_suspend_evtchn_release(CTX->xch, CTX->xce, domid,
466                         dsps->guest_evtchn.port, &dsps->guest_evtchn_lockfd);
467 
468     if (dss->remus) {
469         /*
470          * With Remus/COLO, if we reach this point, it means either
471          * backup died or some network error occurred preventing us
472          * from sending checkpoints. Teardown the network buffers and
473          * release netlink resources.  This is an async op.
474          */
475         if (libxl_defbool_val(dss->remus->colo))
476             libxl__colo_save_teardown(egc, &dss->css, rc);
477         else
478             libxl__remus_teardown(egc, &dss->rs, rc);
479         return;
480     }
481 
482     dss->callback(egc, dss, rc);
483 }
484 
485 /*========================= Domain restore ============================*/
486 
487 /*
488  * Inspect the buffer between start and end, and return a pointer to the
489  * character following the NUL terminator of start, or NULL if start is not
490  * terminated before end.
491  */
next_string(const char * start,const char * end)492 static const char *next_string(const char *start, const char *end)
493 {
494     if (start >= end) return NULL;
495 
496     size_t total_len = end - start;
497     size_t len = strnlen(start, total_len);
498 
499     if (len == total_len)
500         return NULL;
501     else
502         return start + len + 1;
503 }
504 
libxl__restore_emulator_xenstore_data(libxl__domain_create_state * dcs,const char * ptr,uint32_t size)505 int libxl__restore_emulator_xenstore_data(libxl__domain_create_state *dcs,
506                                           const char *ptr, uint32_t size)
507 {
508     STATE_AO_GC(dcs->ao);
509     const char *next = ptr, *end = ptr + size, *key, *val;
510     int rc;
511 
512     const uint32_t domid = dcs->guest_domid;
513     const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
514     const char *xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
515 
516     while (next < end) {
517         key = next;
518         next = next_string(next, end);
519 
520         /* Sanitise 'key'. */
521         if (!next) {
522             rc = ERROR_FAIL;
523             LOGD(ERROR, domid, "Key in xenstore data not NUL terminated");
524             goto out;
525         }
526         if (key[0] == '\0') {
527             rc = ERROR_FAIL;
528             LOGD(ERROR, domid, "empty key found in xenstore data");
529             goto out;
530         }
531         if (key[0] == '/') {
532             rc = ERROR_FAIL;
533             LOGD(ERROR, domid, "Key in xenstore data not relative");
534             goto out;
535         }
536 
537         val = next;
538         next = next_string(next, end);
539 
540         /* Sanitise 'val'. */
541         if (!next) {
542             rc = ERROR_FAIL;
543             LOGD(ERROR, domid, "Val in xenstore data not NUL terminated");
544             goto out;
545         }
546 
547         libxl__xs_printf(gc, XBT_NULL,
548                          GCSPRINTF("%s/%s", xs_root, key),
549                          "%s", val);
550     }
551 
552     rc = 0;
553 
554  out:
555     return rc;
556 }
557 
558 /*
559  * Local variables:
560  * mode: C
561  * c-basic-offset: 4
562  * indent-tabs-mode: nil
563  * End:
564  */
565