1 /*
2  * Copyright (C) 2009      Citrix Ltd.
3  * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4  *        Yang Hongyang <hongyang.yang@easystack.cn>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; version 2.1 only. with the special
9  * exception on linking described in file LICENSE.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  */
16 
17 #include "libxl_osdeps.h" /* must come before any other headers */
18 
19 #include "libxl_internal.h"
20 
21 extern const libxl__checkpoint_device_instance_ops remus_device_nic;
22 extern const libxl__checkpoint_device_instance_ops remus_device_drbd_disk;
23 static const libxl__checkpoint_device_instance_ops *remus_ops[] = {
24     &remus_device_nic,
25     &remus_device_drbd_disk,
26     NULL,
27 };
28 
29 /*----- helper functions -----*/
30 
init_device_subkind(libxl__checkpoint_devices_state * cds)31 static int init_device_subkind(libxl__checkpoint_devices_state *cds)
32 {
33     /* init device subkind-specific state in the libxl ctx */
34     int rc;
35     STATE_AO_GC(cds->ao);
36 
37     if (libxl__netbuffer_enabled(gc)) {
38         rc = init_subkind_nic(cds);
39         if (rc) goto out;
40     }
41 
42     rc = init_subkind_drbd_disk(cds);
43     if (rc) goto out;
44 
45     rc = 0;
46 out:
47     return rc;
48 }
49 
cleanup_device_subkind(libxl__checkpoint_devices_state * cds)50 static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
51 {
52     /* cleanup device subkind-specific state in the libxl ctx */
53     STATE_AO_GC(cds->ao);
54 
55     if (libxl__netbuffer_enabled(gc))
56         cleanup_subkind_nic(cds);
57 
58     cleanup_subkind_drbd_disk(cds);
59 }
60 
61 /*-------------------- Remus setup and teardown ---------------------*/
62 
63 static void remus_setup_done(libxl__egc *egc,
64                              libxl__checkpoint_devices_state *cds, int rc);
65 static void remus_setup_failed(libxl__egc *egc,
66                                libxl__checkpoint_devices_state *cds, int rc);
67 static void remus_checkpoint_stream_written(
68     libxl__egc *egc, libxl__stream_write_state *sws, int rc);
69 static void libxl__remus_domain_suspend_callback(void *data);
70 static void libxl__remus_domain_resume_callback(void *data);
71 static void libxl__remus_domain_save_checkpoint_callback(void *data);
72 
libxl__remus_setup(libxl__egc * egc,libxl__remus_state * rs)73 void libxl__remus_setup(libxl__egc *egc, libxl__remus_state *rs)
74 {
75     libxl__domain_save_state *dss = CONTAINER_OF(rs, *dss, rs);
76 
77     /* Convenience aliases */
78     libxl__checkpoint_devices_state *const cds = &dss->cds;
79     const libxl_domain_remus_info *const info = dss->remus;
80     libxl__srm_save_autogen_callbacks *const callbacks =
81         &dss->sws.shs.callbacks.save.a;
82 
83     STATE_AO_GC(dss->ao);
84 
85     if (libxl_defbool_val(info->netbuf)) {
86         if (!libxl__netbuffer_enabled(gc)) {
87             LOGD(ERROR, dss->domid,
88                  "Remus: No support for network buffering");
89             goto out;
90         }
91         cds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VIF);
92     }
93 
94     if (libxl_defbool_val(info->diskbuf))
95         cds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VBD);
96 
97     cds->ao = ao;
98     cds->domid = dss->domid;
99     cds->callback = remus_setup_done;
100     cds->ops = remus_ops;
101     cds->concrete_data = rs;
102     rs->interval = info->interval;
103 
104     if (init_device_subkind(cds)) {
105         LOGD(ERROR, dss->domid,
106              "Remus: failed to init device subkind");
107         goto out;
108     }
109 
110     dss->sws.checkpoint_callback = remus_checkpoint_stream_written;
111 
112     callbacks->suspend = libxl__remus_domain_suspend_callback;
113     callbacks->postcopy = libxl__remus_domain_resume_callback;
114     callbacks->checkpoint = libxl__remus_domain_save_checkpoint_callback;
115 
116     libxl__checkpoint_devices_setup(egc, cds);
117     return;
118 
119 out:
120     dss->callback(egc, dss, ERROR_FAIL);
121 }
122 
remus_setup_done(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)123 static void remus_setup_done(libxl__egc *egc,
124                              libxl__checkpoint_devices_state *cds, int rc)
125 {
126     libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
127     STATE_AO_GC(dss->ao);
128 
129     if (!rc) {
130         libxl__domain_save(egc, dss);
131         return;
132     }
133 
134     LOGD(ERROR, dss->domid, "Remus: failed to setup device, rc %d", rc);
135     cds->callback = remus_setup_failed;
136     libxl__checkpoint_devices_teardown(egc, cds);
137 }
138 
remus_setup_failed(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)139 static void remus_setup_failed(libxl__egc *egc,
140                                libxl__checkpoint_devices_state *cds, int rc)
141 {
142     libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
143     STATE_AO_GC(dss->ao);
144 
145     if (rc)
146         LOGD(ERROR, dss->domid,
147              "Remus: failed to teardown device after setup failed, rc %d", rc);
148 
149     cleanup_device_subkind(cds);
150 
151     dss->callback(egc, dss, rc);
152 }
153 
154 static void remus_teardown_done(libxl__egc *egc,
155                                 libxl__checkpoint_devices_state *cds,
156                                 int rc);
libxl__remus_teardown(libxl__egc * egc,libxl__remus_state * rs,int rc)157 void libxl__remus_teardown(libxl__egc *egc,
158                            libxl__remus_state *rs,
159                            int rc)
160 {
161     libxl__domain_save_state *dss = CONTAINER_OF(rs, *dss, rs);
162 
163     /* Convenience aliases */
164     libxl__checkpoint_devices_state *const cds = &dss->cds;
165 
166     EGC_GC;
167 
168     LOGD(WARN, dss->domid, "Remus: Domain suspend terminated with rc %d,"
169          " teardown Remus devices...", rc);
170     cds->callback = remus_teardown_done;
171     libxl__checkpoint_devices_teardown(egc, cds);
172 }
173 
remus_teardown_done(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)174 static void remus_teardown_done(libxl__egc *egc,
175                                 libxl__checkpoint_devices_state *cds,
176                                 int rc)
177 {
178     libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
179     STATE_AO_GC(dss->ao);
180 
181     if (rc)
182         LOGD(ERROR, dss->domid, "Remus: failed to teardown device,"
183             " rc %d", rc);
184 
185     cleanup_device_subkind(cds);
186 
187     dss->callback(egc, dss, rc);
188 }
189 
190 /*---------------------- remus callbacks (save) -----------------------*/
191 
192 static void remus_domain_suspend_callback_common_done(libxl__egc *egc,
193                                 libxl__domain_suspend_state *dsps, int ok);
194 static void remus_devices_postsuspend_cb(libxl__egc *egc,
195                                          libxl__checkpoint_devices_state *cds,
196                                          int rc);
197 static void remus_devices_preresume_cb(libxl__egc *egc,
198                                        libxl__checkpoint_devices_state *cds,
199                                        int rc);
200 
libxl__remus_domain_suspend_callback(void * data)201 static void libxl__remus_domain_suspend_callback(void *data)
202 {
203     libxl__save_helper_state *shs = data;
204     libxl__egc *egc = shs->egc;
205     libxl__domain_save_state *dss = shs->caller_state;
206     libxl__domain_suspend_state *dsps = &dss->dsps;
207 
208     dsps->callback_common_done = remus_domain_suspend_callback_common_done;
209     libxl__domain_suspend(egc, dsps);
210 }
211 
remus_domain_suspend_callback_common_done(libxl__egc * egc,libxl__domain_suspend_state * dsps,int rc)212 static void remus_domain_suspend_callback_common_done(libxl__egc *egc,
213                                 libxl__domain_suspend_state *dsps, int rc)
214 {
215     libxl__domain_save_state *dss = CONTAINER_OF(dsps, *dss, dsps);
216 
217     if (rc)
218         goto out;
219 
220     libxl__checkpoint_devices_state *const cds = &dss->cds;
221     cds->callback = remus_devices_postsuspend_cb;
222     libxl__checkpoint_devices_postsuspend(egc, cds);
223     return;
224 
225 out:
226     dss->rc = rc;
227     libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, !rc);
228 }
229 
remus_devices_postsuspend_cb(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)230 static void remus_devices_postsuspend_cb(libxl__egc *egc,
231                                          libxl__checkpoint_devices_state *cds,
232                                          int rc)
233 {
234     libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
235 
236     if (rc)
237         goto out;
238 
239     rc = 0;
240 
241 out:
242     if (rc)
243         dss->rc = rc;
244     libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, !rc);
245 }
246 
libxl__remus_domain_resume_callback(void * data)247 static void libxl__remus_domain_resume_callback(void *data)
248 {
249     libxl__save_helper_state *shs = data;
250     libxl__egc *egc = shs->egc;
251     libxl__domain_save_state *dss = shs->caller_state;
252     STATE_AO_GC(dss->ao);
253 
254     libxl__checkpoint_devices_state *const cds = &dss->cds;
255     cds->callback = remus_devices_preresume_cb;
256     libxl__checkpoint_devices_preresume(egc, cds);
257 }
258 
remus_devices_preresume_cb(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)259 static void remus_devices_preresume_cb(libxl__egc *egc,
260                                        libxl__checkpoint_devices_state *cds,
261                                        int rc)
262 {
263     libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
264     STATE_AO_GC(dss->ao);
265 
266     if (rc)
267         goto out;
268 
269     /* Resumes the domain and the device model */
270     rc = libxl__domain_resume_deprecated(gc, dss->domid, /* Fast Suspend */1);
271     if (rc)
272         goto out;
273 
274     rc = 0;
275 
276 out:
277     if (rc)
278         dss->rc = rc;
279     libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, !rc);
280 }
281 
282 /*----- remus asynchronous checkpoint callback -----*/
283 
284 static void remus_devices_commit_cb(libxl__egc *egc,
285                                     libxl__checkpoint_devices_state *cds,
286                                     int rc);
287 static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev,
288                                   const struct timeval *requested_abs,
289                                   int rc);
290 
libxl__remus_domain_save_checkpoint_callback(void * data)291 static void libxl__remus_domain_save_checkpoint_callback(void *data)
292 {
293     libxl__save_helper_state *shs = data;
294     libxl__domain_save_state *dss = shs->caller_state;
295     libxl__egc *egc = shs->egc;
296     STATE_AO_GC(dss->ao);
297 
298     libxl__stream_write_start_checkpoint(egc, &dss->sws);
299 }
300 
remus_checkpoint_stream_written(libxl__egc * egc,libxl__stream_write_state * sws,int rc)301 static void remus_checkpoint_stream_written(
302     libxl__egc *egc, libxl__stream_write_state *sws, int rc)
303 {
304     libxl__domain_save_state *dss = CONTAINER_OF(sws, *dss, sws);
305 
306     /* Convenience aliases */
307     libxl__checkpoint_devices_state *const cds = &dss->cds;
308 
309     STATE_AO_GC(dss->ao);
310 
311     if (rc) {
312         LOGD(ERROR, dss->domid, "Failed to save device model."
313              " Terminating Remus..");
314         goto out;
315     }
316 
317     cds->callback = remus_devices_commit_cb;
318     libxl__checkpoint_devices_commit(egc, cds);
319 
320     return;
321 
322 out:
323     libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
324 }
325 
remus_devices_commit_cb(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)326 static void remus_devices_commit_cb(libxl__egc *egc,
327                                     libxl__checkpoint_devices_state *cds,
328                                     int rc)
329 {
330     libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
331 
332     STATE_AO_GC(dss->ao);
333 
334     if (rc) {
335         LOGD(ERROR, dss->domid, "Failed to do device commit op."
336             " Terminating Remus..");
337         goto out;
338     }
339 
340     /*
341      * At this point, we have successfully checkpointed the guest and
342      * committed it at the backup. We'll come back after the checkpoint
343      * interval to checkpoint the guest again. Until then, let the guest
344      * continue execution.
345      */
346 
347     /* Set checkpoint interval timeout */
348     rc = libxl__ev_time_register_rel(ao, &dss->rs.checkpoint_timeout,
349                                      remus_next_checkpoint,
350                                      dss->rs.interval);
351 
352     if (rc)
353         goto out;
354 
355     return;
356 
357 out:
358     libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
359 }
360 
remus_next_checkpoint(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)361 static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev,
362                                   const struct timeval *requested_abs,
363                                   int rc)
364 {
365     libxl__domain_save_state *dss =
366                             CONTAINER_OF(ev, *dss, rs.checkpoint_timeout);
367 
368     STATE_AO_GC(dss->ao);
369 
370     if (rc == ERROR_TIMEDOUT) /* As intended */
371         rc = 0;
372 
373     /*
374      * Time to checkpoint the guest again. We return 1 to libxc
375      * (xc_domain_save.c). in order to continue executing the infinite loop
376      * (suspend, checkpoint, resume) in xc_domain_save().
377      */
378 
379     if (rc)
380         dss->rc = rc;
381 
382     libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, !rc);
383 }
384 
385 /*---------------------- remus callbacks (restore) -----------------------*/
386 
387 /*----- remus asynchronous checkpoint callback -----*/
388 
389 static void remus_checkpoint_stream_done(
390     libxl__egc *egc, libxl__stream_read_state *srs, int rc);
391 
libxl__remus_domain_restore_checkpoint_callback(void * data)392 static void libxl__remus_domain_restore_checkpoint_callback(void *data)
393 {
394     libxl__save_helper_state *shs = data;
395     libxl__domain_create_state *dcs = shs->caller_state;
396     libxl__egc *egc = shs->egc;
397     STATE_AO_GC(dcs->ao);
398 
399     libxl__stream_read_start_checkpoint(egc, &dcs->srs);
400 }
401 
remus_checkpoint_stream_done(libxl__egc * egc,libxl__stream_read_state * stream,int rc)402 static void remus_checkpoint_stream_done(
403     libxl__egc *egc, libxl__stream_read_state *stream, int rc)
404 {
405     libxl__xc_domain_saverestore_async_callback_done(egc, &stream->shs, rc);
406 }
407 
libxl__remus_restore_setup(libxl__egc * egc,libxl__domain_create_state * dcs)408 void libxl__remus_restore_setup(libxl__egc *egc,
409                                 libxl__domain_create_state *dcs)
410 {
411     /* Convenience aliases */
412     libxl__srm_restore_autogen_callbacks *const callbacks =
413         &dcs->srs.shs.callbacks.restore.a;
414 
415     callbacks->checkpoint = libxl__remus_domain_restore_checkpoint_callback;
416     dcs->srs.checkpoint_callback = remus_checkpoint_stream_done;
417 }
418 
419 /*
420  * Local variables:
421  * mode: C
422  * c-basic-offset: 4
423  * indent-tabs-mode: nil
424  * End:
425  */
426