1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 * Yang Hongyang <hongyang.yang@easystack.cn>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; version 2.1 only. with the special
9 * exception on linking described in file LICENSE.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 */
16
17 #include "libxl_osdeps.h" /* must come before any other headers */
18
19 #include "libxl_internal.h"
20
21 extern const libxl__checkpoint_device_instance_ops remus_device_nic;
22 extern const libxl__checkpoint_device_instance_ops remus_device_drbd_disk;
23 static const libxl__checkpoint_device_instance_ops *remus_ops[] = {
24 &remus_device_nic,
25 &remus_device_drbd_disk,
26 NULL,
27 };
28
29 /*----- helper functions -----*/
30
init_device_subkind(libxl__checkpoint_devices_state * cds)31 static int init_device_subkind(libxl__checkpoint_devices_state *cds)
32 {
33 /* init device subkind-specific state in the libxl ctx */
34 int rc;
35 STATE_AO_GC(cds->ao);
36
37 if (libxl__netbuffer_enabled(gc)) {
38 rc = init_subkind_nic(cds);
39 if (rc) goto out;
40 }
41
42 rc = init_subkind_drbd_disk(cds);
43 if (rc) goto out;
44
45 rc = 0;
46 out:
47 return rc;
48 }
49
cleanup_device_subkind(libxl__checkpoint_devices_state * cds)50 static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
51 {
52 /* cleanup device subkind-specific state in the libxl ctx */
53 STATE_AO_GC(cds->ao);
54
55 if (libxl__netbuffer_enabled(gc))
56 cleanup_subkind_nic(cds);
57
58 cleanup_subkind_drbd_disk(cds);
59 }
60
61 /*-------------------- Remus setup and teardown ---------------------*/
62
63 static void remus_setup_done(libxl__egc *egc,
64 libxl__checkpoint_devices_state *cds, int rc);
65 static void remus_setup_failed(libxl__egc *egc,
66 libxl__checkpoint_devices_state *cds, int rc);
67 static void remus_checkpoint_stream_written(
68 libxl__egc *egc, libxl__stream_write_state *sws, int rc);
69 static void libxl__remus_domain_suspend_callback(void *data);
70 static void libxl__remus_domain_resume_callback(void *data);
71 static void libxl__remus_domain_save_checkpoint_callback(void *data);
72
libxl__remus_setup(libxl__egc * egc,libxl__remus_state * rs)73 void libxl__remus_setup(libxl__egc *egc, libxl__remus_state *rs)
74 {
75 libxl__domain_save_state *dss = CONTAINER_OF(rs, *dss, rs);
76
77 /* Convenience aliases */
78 libxl__checkpoint_devices_state *const cds = &dss->cds;
79 const libxl_domain_remus_info *const info = dss->remus;
80 libxl__srm_save_autogen_callbacks *const callbacks =
81 &dss->sws.shs.callbacks.save.a;
82
83 STATE_AO_GC(dss->ao);
84
85 if (libxl_defbool_val(info->netbuf)) {
86 if (!libxl__netbuffer_enabled(gc)) {
87 LOGD(ERROR, dss->domid,
88 "Remus: No support for network buffering");
89 goto out;
90 }
91 cds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VIF);
92 }
93
94 if (libxl_defbool_val(info->diskbuf))
95 cds->device_kind_flags |= (1 << LIBXL__DEVICE_KIND_VBD);
96
97 cds->ao = ao;
98 cds->domid = dss->domid;
99 cds->callback = remus_setup_done;
100 cds->ops = remus_ops;
101 cds->concrete_data = rs;
102 rs->interval = info->interval;
103
104 if (init_device_subkind(cds)) {
105 LOGD(ERROR, dss->domid,
106 "Remus: failed to init device subkind");
107 goto out;
108 }
109
110 dss->sws.checkpoint_callback = remus_checkpoint_stream_written;
111
112 callbacks->suspend = libxl__remus_domain_suspend_callback;
113 callbacks->postcopy = libxl__remus_domain_resume_callback;
114 callbacks->checkpoint = libxl__remus_domain_save_checkpoint_callback;
115
116 libxl__checkpoint_devices_setup(egc, cds);
117 return;
118
119 out:
120 dss->callback(egc, dss, ERROR_FAIL);
121 }
122
remus_setup_done(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)123 static void remus_setup_done(libxl__egc *egc,
124 libxl__checkpoint_devices_state *cds, int rc)
125 {
126 libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
127 STATE_AO_GC(dss->ao);
128
129 if (!rc) {
130 libxl__domain_save(egc, dss);
131 return;
132 }
133
134 LOGD(ERROR, dss->domid, "Remus: failed to setup device, rc %d", rc);
135 cds->callback = remus_setup_failed;
136 libxl__checkpoint_devices_teardown(egc, cds);
137 }
138
remus_setup_failed(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)139 static void remus_setup_failed(libxl__egc *egc,
140 libxl__checkpoint_devices_state *cds, int rc)
141 {
142 libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
143 STATE_AO_GC(dss->ao);
144
145 if (rc)
146 LOGD(ERROR, dss->domid,
147 "Remus: failed to teardown device after setup failed, rc %d", rc);
148
149 cleanup_device_subkind(cds);
150
151 dss->callback(egc, dss, rc);
152 }
153
154 static void remus_teardown_done(libxl__egc *egc,
155 libxl__checkpoint_devices_state *cds,
156 int rc);
libxl__remus_teardown(libxl__egc * egc,libxl__remus_state * rs,int rc)157 void libxl__remus_teardown(libxl__egc *egc,
158 libxl__remus_state *rs,
159 int rc)
160 {
161 libxl__domain_save_state *dss = CONTAINER_OF(rs, *dss, rs);
162
163 /* Convenience aliases */
164 libxl__checkpoint_devices_state *const cds = &dss->cds;
165
166 EGC_GC;
167
168 LOGD(WARN, dss->domid, "Remus: Domain suspend terminated with rc %d,"
169 " teardown Remus devices...", rc);
170 cds->callback = remus_teardown_done;
171 libxl__checkpoint_devices_teardown(egc, cds);
172 }
173
remus_teardown_done(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)174 static void remus_teardown_done(libxl__egc *egc,
175 libxl__checkpoint_devices_state *cds,
176 int rc)
177 {
178 libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
179 STATE_AO_GC(dss->ao);
180
181 if (rc)
182 LOGD(ERROR, dss->domid, "Remus: failed to teardown device,"
183 " rc %d", rc);
184
185 cleanup_device_subkind(cds);
186
187 dss->callback(egc, dss, rc);
188 }
189
190 /*---------------------- remus callbacks (save) -----------------------*/
191
192 static void remus_domain_suspend_callback_common_done(libxl__egc *egc,
193 libxl__domain_suspend_state *dsps, int ok);
194 static void remus_devices_postsuspend_cb(libxl__egc *egc,
195 libxl__checkpoint_devices_state *cds,
196 int rc);
197 static void remus_devices_preresume_cb(libxl__egc *egc,
198 libxl__checkpoint_devices_state *cds,
199 int rc);
200
libxl__remus_domain_suspend_callback(void * data)201 static void libxl__remus_domain_suspend_callback(void *data)
202 {
203 libxl__save_helper_state *shs = data;
204 libxl__egc *egc = shs->egc;
205 libxl__domain_save_state *dss = shs->caller_state;
206 libxl__domain_suspend_state *dsps = &dss->dsps;
207
208 dsps->callback_common_done = remus_domain_suspend_callback_common_done;
209 libxl__domain_suspend(egc, dsps);
210 }
211
remus_domain_suspend_callback_common_done(libxl__egc * egc,libxl__domain_suspend_state * dsps,int rc)212 static void remus_domain_suspend_callback_common_done(libxl__egc *egc,
213 libxl__domain_suspend_state *dsps, int rc)
214 {
215 libxl__domain_save_state *dss = CONTAINER_OF(dsps, *dss, dsps);
216
217 if (rc)
218 goto out;
219
220 libxl__checkpoint_devices_state *const cds = &dss->cds;
221 cds->callback = remus_devices_postsuspend_cb;
222 libxl__checkpoint_devices_postsuspend(egc, cds);
223 return;
224
225 out:
226 dss->rc = rc;
227 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, !rc);
228 }
229
remus_devices_postsuspend_cb(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)230 static void remus_devices_postsuspend_cb(libxl__egc *egc,
231 libxl__checkpoint_devices_state *cds,
232 int rc)
233 {
234 libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
235
236 if (rc)
237 goto out;
238
239 rc = 0;
240
241 out:
242 if (rc)
243 dss->rc = rc;
244 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, !rc);
245 }
246
libxl__remus_domain_resume_callback(void * data)247 static void libxl__remus_domain_resume_callback(void *data)
248 {
249 libxl__save_helper_state *shs = data;
250 libxl__egc *egc = shs->egc;
251 libxl__domain_save_state *dss = shs->caller_state;
252 STATE_AO_GC(dss->ao);
253
254 libxl__checkpoint_devices_state *const cds = &dss->cds;
255 cds->callback = remus_devices_preresume_cb;
256 libxl__checkpoint_devices_preresume(egc, cds);
257 }
258
remus_devices_preresume_cb(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)259 static void remus_devices_preresume_cb(libxl__egc *egc,
260 libxl__checkpoint_devices_state *cds,
261 int rc)
262 {
263 libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
264 STATE_AO_GC(dss->ao);
265
266 if (rc)
267 goto out;
268
269 /* Resumes the domain and the device model */
270 rc = libxl__domain_resume_deprecated(gc, dss->domid, /* Fast Suspend */1);
271 if (rc)
272 goto out;
273
274 rc = 0;
275
276 out:
277 if (rc)
278 dss->rc = rc;
279 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, !rc);
280 }
281
282 /*----- remus asynchronous checkpoint callback -----*/
283
284 static void remus_devices_commit_cb(libxl__egc *egc,
285 libxl__checkpoint_devices_state *cds,
286 int rc);
287 static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev,
288 const struct timeval *requested_abs,
289 int rc);
290
libxl__remus_domain_save_checkpoint_callback(void * data)291 static void libxl__remus_domain_save_checkpoint_callback(void *data)
292 {
293 libxl__save_helper_state *shs = data;
294 libxl__domain_save_state *dss = shs->caller_state;
295 libxl__egc *egc = shs->egc;
296 STATE_AO_GC(dss->ao);
297
298 libxl__stream_write_start_checkpoint(egc, &dss->sws);
299 }
300
remus_checkpoint_stream_written(libxl__egc * egc,libxl__stream_write_state * sws,int rc)301 static void remus_checkpoint_stream_written(
302 libxl__egc *egc, libxl__stream_write_state *sws, int rc)
303 {
304 libxl__domain_save_state *dss = CONTAINER_OF(sws, *dss, sws);
305
306 /* Convenience aliases */
307 libxl__checkpoint_devices_state *const cds = &dss->cds;
308
309 STATE_AO_GC(dss->ao);
310
311 if (rc) {
312 LOGD(ERROR, dss->domid, "Failed to save device model."
313 " Terminating Remus..");
314 goto out;
315 }
316
317 cds->callback = remus_devices_commit_cb;
318 libxl__checkpoint_devices_commit(egc, cds);
319
320 return;
321
322 out:
323 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
324 }
325
remus_devices_commit_cb(libxl__egc * egc,libxl__checkpoint_devices_state * cds,int rc)326 static void remus_devices_commit_cb(libxl__egc *egc,
327 libxl__checkpoint_devices_state *cds,
328 int rc)
329 {
330 libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
331
332 STATE_AO_GC(dss->ao);
333
334 if (rc) {
335 LOGD(ERROR, dss->domid, "Failed to do device commit op."
336 " Terminating Remus..");
337 goto out;
338 }
339
340 /*
341 * At this point, we have successfully checkpointed the guest and
342 * committed it at the backup. We'll come back after the checkpoint
343 * interval to checkpoint the guest again. Until then, let the guest
344 * continue execution.
345 */
346
347 /* Set checkpoint interval timeout */
348 rc = libxl__ev_time_register_rel(ao, &dss->rs.checkpoint_timeout,
349 remus_next_checkpoint,
350 dss->rs.interval);
351
352 if (rc)
353 goto out;
354
355 return;
356
357 out:
358 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
359 }
360
remus_next_checkpoint(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)361 static void remus_next_checkpoint(libxl__egc *egc, libxl__ev_time *ev,
362 const struct timeval *requested_abs,
363 int rc)
364 {
365 libxl__domain_save_state *dss =
366 CONTAINER_OF(ev, *dss, rs.checkpoint_timeout);
367
368 STATE_AO_GC(dss->ao);
369
370 if (rc == ERROR_TIMEDOUT) /* As intended */
371 rc = 0;
372
373 /*
374 * Time to checkpoint the guest again. We return 1 to libxc
375 * (xc_domain_save.c). in order to continue executing the infinite loop
376 * (suspend, checkpoint, resume) in xc_domain_save().
377 */
378
379 if (rc)
380 dss->rc = rc;
381
382 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, !rc);
383 }
384
385 /*---------------------- remus callbacks (restore) -----------------------*/
386
387 /*----- remus asynchronous checkpoint callback -----*/
388
389 static void remus_checkpoint_stream_done(
390 libxl__egc *egc, libxl__stream_read_state *srs, int rc);
391
libxl__remus_domain_restore_checkpoint_callback(void * data)392 static void libxl__remus_domain_restore_checkpoint_callback(void *data)
393 {
394 libxl__save_helper_state *shs = data;
395 libxl__domain_create_state *dcs = shs->caller_state;
396 libxl__egc *egc = shs->egc;
397 STATE_AO_GC(dcs->ao);
398
399 libxl__stream_read_start_checkpoint(egc, &dcs->srs);
400 }
401
remus_checkpoint_stream_done(libxl__egc * egc,libxl__stream_read_state * stream,int rc)402 static void remus_checkpoint_stream_done(
403 libxl__egc *egc, libxl__stream_read_state *stream, int rc)
404 {
405 libxl__xc_domain_saverestore_async_callback_done(egc, &stream->shs, rc);
406 }
407
libxl__remus_restore_setup(libxl__egc * egc,libxl__domain_create_state * dcs)408 void libxl__remus_restore_setup(libxl__egc *egc,
409 libxl__domain_create_state *dcs)
410 {
411 /* Convenience aliases */
412 libxl__srm_restore_autogen_callbacks *const callbacks =
413 &dcs->srs.shs.callbacks.restore.a;
414
415 callbacks->checkpoint = libxl__remus_domain_restore_checkpoint_callback;
416 dcs->srs.checkpoint_callback = remus_checkpoint_stream_done;
417 }
418
419 /*
420 * Local variables:
421 * mode: C
422 * c-basic-offset: 4
423 * indent-tabs-mode: nil
424 * End:
425 */
426