1 /*
2 * Copyright 2009-2017 Citrix Ltd and other contributors
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published
6 * by the Free Software Foundation; version 2.1 only. with the special
7 * exception on linking described in file LICENSE.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 */
14
15 #include <fcntl.h>
16 #include <inttypes.h>
17 #include <signal.h>
18 #include <stdlib.h>
19 #include <sys/stat.h>
20 #include <sys/types.h>
21 #include <sys/utsname.h>
22 #include <time.h>
23 #include <unistd.h>
24
25 #include <libxl.h>
26 #include <libxl_utils.h>
27 #include <libxlutil.h>
28
29 #include "xl.h"
30 #include "xl_utils.h"
31 #include "xl_parse.h"
32
33 #ifndef LIBXL_HAVE_NO_SUSPEND_RESUME
34
create_migration_child(const char * rune,int * send_fd,int * recv_fd)35 static pid_t create_migration_child(const char *rune, int *send_fd,
36 int *recv_fd)
37 {
38 int sendpipe[2], recvpipe[2];
39 pid_t child;
40
41 if (!rune || !send_fd || !recv_fd)
42 return -1;
43
44 MUST( libxl_pipe(ctx, sendpipe) );
45 MUST( libxl_pipe(ctx, recvpipe) );
46
47 child = xl_fork(child_migration, "migration transport process");
48
49 if (!child) {
50 dup2(sendpipe[0], 0);
51 dup2(recvpipe[1], 1);
52 close(sendpipe[0]); close(sendpipe[1]);
53 close(recvpipe[0]); close(recvpipe[1]);
54 execlp("sh","sh","-c",rune,(char*)0);
55 perror("failed to exec sh");
56 exit(EXIT_FAILURE);
57 }
58
59 close(sendpipe[0]);
60 close(recvpipe[1]);
61 *send_fd = sendpipe[1];
62 *recv_fd = recvpipe[0];
63
64 /* if receiver dies, we get an error and can clean up
65 rather than just dying */
66 signal(SIGPIPE, SIG_IGN);
67
68 return child;
69 }
70
migrate_read_fixedmessage(int fd,const void * msg,int msgsz,const char * what,const char * rune)71 static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz,
72 const char *what, const char *rune) {
73 char buf[msgsz];
74 const char *stream;
75 int rc;
76
77 stream = rune ? "migration receiver stream" : "migration stream";
78 rc = libxl_read_exactly(ctx, fd, buf, msgsz, stream, what);
79 if (rc) return 1;
80
81 if (memcmp(buf, msg, msgsz)) {
82 fprintf(stderr, "%s contained unexpected data instead of %s\n",
83 stream, what);
84 if (rune)
85 fprintf(stderr, "(command run was: %s )\n", rune);
86 return 1;
87 }
88 return 0;
89 }
90
migration_child_report(int recv_fd)91 static void migration_child_report(int recv_fd) {
92 pid_t child;
93 int status, sr;
94 struct timeval now, waituntil, timeout;
95 static const struct timeval pollinterval = { 0, 1000 }; /* 1ms */
96
97 if (!xl_child_pid(child_migration)) return;
98
99 CHK_SYSCALL(gettimeofday(&waituntil, 0));
100 waituntil.tv_sec += 2;
101
102 for (;;) {
103 pid_t migration_child = xl_child_pid(child_migration);
104 child = xl_waitpid(child_migration, &status, WNOHANG);
105
106 if (child == migration_child) {
107 if (status)
108 xl_report_child_exitstatus(XTL_INFO, child_migration,
109 migration_child, status);
110 break;
111 }
112 if (child == -1) {
113 fprintf(stderr, "wait for migration child [%ld] failed: %s\n",
114 (long)migration_child, strerror(errno));
115 break;
116 }
117 assert(child == 0);
118
119 CHK_SYSCALL(gettimeofday(&now, 0));
120 if (timercmp(&now, &waituntil, >)) {
121 fprintf(stderr, "migration child [%ld] not exiting, no longer"
122 " waiting (exit status will be unreported)\n",
123 (long)migration_child);
124 break;
125 }
126 timersub(&waituntil, &now, &timeout);
127
128 if (recv_fd >= 0) {
129 fd_set readfds, exceptfds;
130 FD_ZERO(&readfds);
131 FD_ZERO(&exceptfds);
132 FD_SET(recv_fd, &readfds);
133 FD_SET(recv_fd, &exceptfds);
134 sr = select(recv_fd+1, &readfds,0,&exceptfds, &timeout);
135 } else {
136 if (timercmp(&timeout, &pollinterval, >))
137 timeout = pollinterval;
138 sr = select(0,0,0,0, &timeout);
139 }
140 if (sr > 0) {
141 recv_fd = -1;
142 } else if (sr == 0) {
143 } else if (sr == -1) {
144 if (errno != EINTR) {
145 fprintf(stderr, "migration child [%ld] exit wait select"
146 " failed unexpectedly: %s\n",
147 (long)migration_child, strerror(errno));
148 break;
149 }
150 }
151 }
152 }
153
migrate_do_preamble(int send_fd,int recv_fd,pid_t child,uint8_t * config_data,int config_len,const char * rune)154 static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child,
155 uint8_t *config_data, int config_len,
156 const char *rune)
157 {
158 int rc = 0;
159
160 if (send_fd < 0 || recv_fd < 0) {
161 fprintf(stderr, "migrate_do_preamble: invalid file descriptors\n");
162 exit(EXIT_FAILURE);
163 }
164
165 rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_banner,
166 sizeof(migrate_receiver_banner)-1,
167 "banner", rune);
168 if (rc) {
169 close(send_fd);
170 migration_child_report(recv_fd);
171 exit(EXIT_FAILURE);
172 }
173
174 save_domain_core_writeconfig(send_fd, "migration stream",
175 config_data, config_len);
176
177 }
178
migrate_domain(uint32_t domid,int preserve_domid,const char * rune,int debug,const char * override_config_file)179 static void migrate_domain(uint32_t domid, int preserve_domid,
180 const char *rune, int debug,
181 const char *override_config_file)
182 {
183 pid_t child = -1;
184 int rc;
185 int send_fd = -1, recv_fd = -1;
186 char *away_domname;
187 char rc_buf;
188 uint8_t *config_data;
189 int config_len, flags = LIBXL_SUSPEND_LIVE;
190
191 save_domain_core_begin(domid, preserve_domid, override_config_file,
192 &config_data, &config_len);
193
194 if (!config_len) {
195 fprintf(stderr, "No config file stored for running domain and "
196 "none supplied - cannot migrate.\n");
197 exit(EXIT_FAILURE);
198 }
199
200 child = create_migration_child(rune, &send_fd, &recv_fd);
201
202 migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len,
203 rune);
204
205 xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0);
206
207 if (debug)
208 flags |= LIBXL_SUSPEND_DEBUG;
209 rc = libxl_domain_suspend(ctx, domid, send_fd, flags, NULL);
210 if (rc) {
211 fprintf(stderr, "migration sender: libxl_domain_suspend failed"
212 " (rc=%d)\n", rc);
213 if (rc == ERROR_GUEST_TIMEDOUT)
214 goto failed_suspend;
215 else
216 goto failed_resume;
217 }
218
219 //fprintf(stderr, "migration sender: Transfer complete.\n");
220 // Should only be printed when debugging as it's a bit messy with
221 // progress indication.
222
223 rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_ready,
224 sizeof(migrate_receiver_ready),
225 "ready message", rune);
226 if (rc) goto failed_resume;
227
228 xtl_stdiostream_adjust_flags(logger, 0, XTL_STDIOSTREAM_HIDE_PROGRESS);
229
230 /* right, at this point we are about give the destination
231 * permission to rename and resume, so we must first rename the
232 * domain away ourselves */
233
234 fprintf(stderr, "migration sender: Target has acknowledged transfer.\n");
235
236 if (common_domname) {
237 xasprintf(&away_domname, "%s--migratedaway", common_domname);
238 rc = libxl_domain_rename(ctx, domid, common_domname, away_domname);
239 if (rc) goto failed_resume;
240 }
241
242 /* point of no return - as soon as we have tried to say
243 * "go" to the receiver, it's not safe to carry on. We leave
244 * the domain renamed to %s--migratedaway in case that's helpful.
245 */
246
247 fprintf(stderr, "migration sender: Giving target permission to start.\n");
248
249 rc = libxl_write_exactly(ctx, send_fd,
250 migrate_permission_to_go,
251 sizeof(migrate_permission_to_go),
252 "migration stream", "GO message");
253 if (rc) goto failed_badly;
254
255 rc = migrate_read_fixedmessage(recv_fd, migrate_report,
256 sizeof(migrate_report),
257 "success/failure report message", rune);
258 if (rc) goto failed_badly;
259
260 rc = libxl_read_exactly(ctx, recv_fd,
261 &rc_buf, 1,
262 "migration ack stream", "success/failure status");
263 if (rc) goto failed_badly;
264
265 if (rc_buf) {
266 fprintf(stderr, "migration sender: Target reports startup failure"
267 " (status code %d).\n", rc_buf);
268
269 rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go,
270 sizeof(migrate_permission_to_go),
271 "permission for sender to resume",
272 rune);
273 if (rc) goto failed_badly;
274
275 fprintf(stderr, "migration sender: Trying to resume at our end.\n");
276
277 if (common_domname) {
278 libxl_domain_rename(ctx, domid, away_domname, common_domname);
279 }
280 rc = libxl_domain_resume(ctx, domid, 1, 0);
281 if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n");
282
283 fprintf(stderr, "Migration failed due to problems at target.\n");
284 exit(EXIT_FAILURE);
285 }
286
287 fprintf(stderr, "migration sender: Target reports successful startup.\n");
288 libxl_domain_destroy(ctx, domid, 0); /* bang! */
289 fprintf(stderr, "Migration successful.\n");
290 exit(EXIT_SUCCESS);
291
292 failed_suspend:
293 close(send_fd);
294 migration_child_report(recv_fd);
295 fprintf(stderr, "Migration failed, failed to suspend at sender.\n");
296 exit(EXIT_FAILURE);
297
298 failed_resume:
299 close(send_fd);
300 migration_child_report(recv_fd);
301 fprintf(stderr, "Migration failed, resuming at sender.\n");
302 libxl_domain_resume(ctx, domid, 1, 0);
303 exit(EXIT_FAILURE);
304
305 failed_badly:
306 fprintf(stderr,
307 "** Migration failed during final handshake **\n"
308 "Domain state is now undefined !\n"
309 "Please CHECK AT BOTH ENDS for running instances, before renaming and\n"
310 " resuming at most one instance. Two simultaneous instances of the domain\n"
311 " would probably result in SEVERE DATA LOSS and it is now your\n"
312 " responsibility to avoid that. Sorry.\n");
313
314 close(send_fd);
315 migration_child_report(recv_fd);
316 exit(EXIT_FAILURE);
317 }
318
migrate_receive(int debug,int daemonize,int monitor,int pause_after_migration,int send_fd,int recv_fd,libxl_checkpointed_stream checkpointed,char * colo_proxy_script,bool userspace_colo_proxy)319 static void migrate_receive(int debug, int daemonize, int monitor,
320 int pause_after_migration,
321 int send_fd, int recv_fd,
322 libxl_checkpointed_stream checkpointed,
323 char *colo_proxy_script,
324 bool userspace_colo_proxy)
325 {
326 uint32_t domid;
327 int rc, rc2;
328 char rc_buf;
329 char *migration_domname;
330 struct domain_create dom_info;
331
332 signal(SIGPIPE, SIG_IGN);
333 /* if we get SIGPIPE we'd rather just have it as an error */
334
335 fprintf(stderr, "migration target: Ready to receive domain.\n");
336
337 CHK_ERRNOVAL(libxl_write_exactly(
338 ctx, send_fd, migrate_receiver_banner,
339 sizeof(migrate_receiver_banner)-1,
340 "migration ack stream", "banner") );
341
342 memset(&dom_info, 0, sizeof(dom_info));
343 dom_info.debug = debug;
344 dom_info.daemonize = daemonize;
345 dom_info.monitor = monitor;
346 dom_info.paused = 1;
347 dom_info.migrate_fd = recv_fd;
348 dom_info.send_back_fd = send_fd;
349 dom_info.migration_domname_r = &migration_domname;
350 dom_info.checkpointed_stream = checkpointed;
351 dom_info.colo_proxy_script = colo_proxy_script;
352 dom_info.userspace_colo_proxy = userspace_colo_proxy;
353
354 rc = create_domain(&dom_info);
355 if (rc < 0) {
356 fprintf(stderr, "migration target: Domain creation failed"
357 " (code %d).\n", rc);
358 exit(EXIT_FAILURE);
359 }
360
361 domid = rc;
362
363 switch (checkpointed) {
364 case LIBXL_CHECKPOINTED_STREAM_REMUS:
365 case LIBXL_CHECKPOINTED_STREAM_COLO:
366 {
367 const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ?
368 "COLO" : "Remus";
369 /* If we are here, it means that the sender (primary) has crashed.
370 * TODO: Split-Brain Check.
371 */
372 fprintf(stderr, "migration target: %s Failover for domain %u\n",
373 ha, domid);
374
375 /*
376 * If domain renaming fails, lets just continue (as we need the domain
377 * to be up & dom names may not matter much, as long as its reachable
378 * over network).
379 *
380 * If domain unpausing fails, destroy domain ? Or is it better to have
381 * a consistent copy of the domain (memory, cpu state, disk)
382 * on atleast one physical host ? Right now, lets just leave the domain
383 * as is and let the Administrator decide (or troubleshoot).
384 */
385 if (migration_domname) {
386 rc = libxl_domain_rename(ctx, domid, migration_domname,
387 common_domname);
388 if (rc)
389 fprintf(stderr, "migration target (%s): "
390 "Failed to rename domain from %s to %s:%d\n",
391 ha, migration_domname, common_domname, rc);
392 }
393
394 if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO)
395 /* The guest is running after failover in COLO mode */
396 exit(rc ? -ERROR_FAIL: 0);
397
398 rc = libxl_domain_unpause(ctx, domid, NULL);
399 if (rc)
400 fprintf(stderr, "migration target (%s): "
401 "Failed to unpause domain %s (id: %u):%d\n",
402 ha, common_domname, domid, rc);
403
404 exit(rc ? EXIT_FAILURE : EXIT_SUCCESS);
405 }
406 default:
407 /* do nothing */
408 break;
409 }
410
411 fprintf(stderr, "migration target: Transfer complete,"
412 " requesting permission to start domain.\n");
413
414 rc = libxl_write_exactly(ctx, send_fd,
415 migrate_receiver_ready,
416 sizeof(migrate_receiver_ready),
417 "migration ack stream", "ready message");
418 if (rc) exit(EXIT_FAILURE);
419
420 rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go,
421 sizeof(migrate_permission_to_go),
422 "GO message", 0);
423 if (rc) goto perhaps_destroy_notify_rc;
424
425 fprintf(stderr, "migration target: Got permission, starting domain.\n");
426
427 if (migration_domname) {
428 rc = libxl_domain_rename(ctx, domid, migration_domname, common_domname);
429 if (rc) goto perhaps_destroy_notify_rc;
430 }
431
432 if (!pause_after_migration) {
433 rc = libxl_domain_unpause(ctx, domid, NULL);
434 if (rc) goto perhaps_destroy_notify_rc;
435 }
436
437 fprintf(stderr, "migration target: Domain started successsfully.\n");
438 rc = 0;
439
440 perhaps_destroy_notify_rc:
441 rc2 = libxl_write_exactly(ctx, send_fd,
442 migrate_report, sizeof(migrate_report),
443 "migration ack stream",
444 "success/failure report");
445 if (rc2) exit(EXIT_FAILURE);
446
447 rc_buf = -rc;
448 assert(!!rc_buf == !!rc);
449 rc2 = libxl_write_exactly(ctx, send_fd, &rc_buf, 1,
450 "migration ack stream",
451 "success/failure code");
452 if (rc2) exit(EXIT_FAILURE);
453
454 if (rc) {
455 fprintf(stderr, "migration target: Failure, destroying our copy.\n");
456
457 rc2 = libxl_domain_destroy(ctx, domid, 0);
458 if (rc2) {
459 fprintf(stderr, "migration target: Failed to destroy our copy"
460 " (code %d).\n", rc2);
461 exit(EXIT_FAILURE);
462 }
463
464 fprintf(stderr, "migration target: Cleanup OK, granting sender"
465 " permission to resume.\n");
466
467 rc2 = libxl_write_exactly(ctx, send_fd,
468 migrate_permission_to_go,
469 sizeof(migrate_permission_to_go),
470 "migration ack stream",
471 "permission to sender to have domain back");
472 if (rc2) exit(EXIT_FAILURE);
473 }
474
475 exit(EXIT_SUCCESS);
476 }
477
478
main_migrate_receive(int argc,char ** argv)479 int main_migrate_receive(int argc, char **argv)
480 {
481 int debug = 0, daemonize = 1, monitor = 1, pause_after_migration = 0;
482 libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE;
483 int opt;
484 bool userspace_colo_proxy = false;
485 char *script = NULL;
486 static struct option opts[] = {
487 {"colo", 0, 0, 0x100},
488 /* It is a shame that the management code for disk is not here. */
489 {"coloft-script", 1, 0, 0x200},
490 {"userspace-colo-proxy", 0, 0, 0x300},
491 COMMON_LONG_OPTS
492 };
493
494 SWITCH_FOREACH_OPT(opt, "Fedrp", opts, "migrate-receive", 0) {
495 case 'F':
496 daemonize = 0;
497 break;
498 case 'e':
499 daemonize = 0;
500 monitor = 0;
501 break;
502 case 'd':
503 debug = 1;
504 break;
505 case 'r':
506 checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS;
507 break;
508 case 0x100:
509 checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO;
510 break;
511 case 0x200:
512 script = optarg;
513 break;
514 case 0x300:
515 userspace_colo_proxy = true;
516 break;
517 case 'p':
518 pause_after_migration = 1;
519 break;
520 }
521
522 if (argc-optind != 0) {
523 help("migrate-receive");
524 return EXIT_FAILURE;
525 }
526 migrate_receive(debug, daemonize, monitor, pause_after_migration,
527 STDOUT_FILENO, STDIN_FILENO,
528 checkpointed, script, userspace_colo_proxy);
529
530 return EXIT_SUCCESS;
531 }
532
main_migrate(int argc,char ** argv)533 int main_migrate(int argc, char **argv)
534 {
535 uint32_t domid;
536 const char *config_filename = NULL;
537 const char *ssh_command = "ssh";
538 char *rune = NULL;
539 char *host;
540 int opt, daemonize = 1, monitor = 1, debug = 0, pause_after_migration = 0;
541 int preserve_domid = 0;
542 static struct option opts[] = {
543 {"debug", 0, 0, 0x100},
544 {"live", 0, 0, 0x200},
545 COMMON_LONG_OPTS
546 };
547
548 SWITCH_FOREACH_OPT(opt, "FC:s:epD", opts, "migrate", 2) {
549 case 'C':
550 config_filename = optarg;
551 break;
552 case 's':
553 ssh_command = optarg;
554 break;
555 case 'F':
556 daemonize = 0;
557 break;
558 case 'e':
559 daemonize = 0;
560 monitor = 0;
561 break;
562 case 'p':
563 pause_after_migration = 1;
564 break;
565 case 'D':
566 preserve_domid = 1;
567 break;
568 case 0x100: /* --debug */
569 debug = 1;
570 break;
571 case 0x200: /* --live */
572 /* ignored for compatibility with xm */
573 break;
574 }
575
576 domid = find_domain(argv[optind]);
577 host = argv[optind + 1];
578
579 bool pass_tty_arg = progress_use_cr || (isatty(2) > 0);
580
581 if (!ssh_command[0]) {
582 rune= host;
583 } else {
584 char verbose_buf[minmsglevel_default+3];
585 int verbose_len;
586 verbose_buf[0] = ' ';
587 verbose_buf[1] = '-';
588 memset(verbose_buf+2, 'v', minmsglevel_default);
589 verbose_buf[sizeof(verbose_buf)-1] = 0;
590 if (minmsglevel == minmsglevel_default) {
591 verbose_len = 0;
592 } else {
593 verbose_len = (minmsglevel_default - minmsglevel) + 2;
594 }
595 xasprintf(&rune, "exec %s %s xl%s%.*s migrate-receive%s%s%s",
596 ssh_command, host,
597 pass_tty_arg ? " -t" : "",
598 verbose_len, verbose_buf,
599 daemonize ? "" : " -e",
600 debug ? " -d" : "",
601 pause_after_migration ? " -p" : "");
602 }
603
604 migrate_domain(domid, preserve_domid, rune, debug, config_filename);
605 return EXIT_SUCCESS;
606 }
607
main_remus(int argc,char ** argv)608 int main_remus(int argc, char **argv)
609 {
610 uint32_t domid;
611 int opt, rc, daemonize = 1;
612 const char *ssh_command = "ssh";
613 char *host = NULL, *rune = NULL;
614 libxl_domain_remus_info r_info;
615 int send_fd = -1, recv_fd = -1;
616 pid_t child = -1;
617 uint8_t *config_data;
618 int config_len;
619
620 memset(&r_info, 0, sizeof(libxl_domain_remus_info));
621
622 SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ecp", NULL, "remus", 2) {
623 case 'i':
624 r_info.interval = atoi(optarg);
625 break;
626 case 'F':
627 libxl_defbool_set(&r_info.allow_unsafe, true);
628 break;
629 case 'b':
630 libxl_defbool_set(&r_info.blackhole, true);
631 break;
632 case 'u':
633 libxl_defbool_set(&r_info.compression, false);
634 break;
635 case 'n':
636 libxl_defbool_set(&r_info.netbuf, false);
637 break;
638 case 'N':
639 r_info.netbufscript = optarg;
640 break;
641 case 'd':
642 libxl_defbool_set(&r_info.diskbuf, false);
643 break;
644 case 's':
645 ssh_command = optarg;
646 break;
647 case 'e':
648 daemonize = 0;
649 break;
650 case 'c':
651 libxl_defbool_set(&r_info.colo, true);
652 break;
653 case 'p':
654 libxl_defbool_set(&r_info.userspace_colo_proxy, true);
655 }
656
657 domid = find_domain(argv[optind]);
658 host = argv[optind + 1];
659
660 /* Defaults */
661 libxl_defbool_setdefault(&r_info.blackhole, false);
662 libxl_defbool_setdefault(&r_info.colo, false);
663 libxl_defbool_setdefault(&r_info.userspace_colo_proxy, false);
664
665 if (!libxl_defbool_val(r_info.colo) && !r_info.interval)
666 r_info.interval = 200;
667
668 if (libxl_defbool_val(r_info.userspace_colo_proxy) &&
669 !libxl_defbool_val(r_info.colo)) {
670 fprintf(stderr, "Option -p must be used in conjunction with -c");
671 exit(-1);
672 }
673
674 if (libxl_defbool_val(r_info.colo)) {
675 if (r_info.interval || libxl_defbool_val(r_info.blackhole) ||
676 !libxl_defbool_is_default(r_info.netbuf) ||
677 !libxl_defbool_is_default(r_info.diskbuf)) {
678 perror("option -c is conflict with -i, -d, -n or -b");
679 exit(-1);
680 }
681
682 if (libxl_defbool_is_default(r_info.compression)) {
683 perror("COLO can't be used with memory compression. "
684 "Disable memory checkpoint compression now...");
685 libxl_defbool_set(&r_info.compression, false);
686 }
687 }
688
689 if (!r_info.netbufscript) {
690 if (libxl_defbool_val(r_info.colo))
691 r_info.netbufscript = default_colo_proxy_script;
692 else
693 r_info.netbufscript = default_remus_netbufscript;
694 }
695
696 if (libxl_defbool_val(r_info.blackhole)) {
697 send_fd = open("/dev/null", O_RDWR, 0644);
698 if (send_fd < 0) {
699 perror("failed to open /dev/null");
700 exit(EXIT_FAILURE);
701 }
702 } else {
703
704 if (!ssh_command[0]) {
705 rune = host;
706 } else {
707 if (!libxl_defbool_val(r_info.colo)) {
708 xasprintf(&rune, "exec %s %s xl migrate-receive %s %s",
709 ssh_command, host,
710 "-r",
711 daemonize ? "" : " -e");
712 } else {
713 xasprintf(&rune, "exec %s %s xl migrate-receive %s %s %s %s %s",
714 ssh_command, host,
715 "--colo",
716 r_info.netbufscript ? "--coloft-script" : "",
717 r_info.netbufscript ? r_info.netbufscript : "",
718 libxl_defbool_val(r_info.userspace_colo_proxy) ?
719 "--userspace-colo-proxy" : "",
720 daemonize ? "" : " -e");
721 }
722 }
723
724 save_domain_core_begin(domid, 0, NULL, &config_data, &config_len);
725
726 if (!config_len) {
727 fprintf(stderr, "No config file stored for running domain and "
728 "none supplied - cannot start remus.\n");
729 exit(EXIT_FAILURE);
730 }
731
732 child = create_migration_child(rune, &send_fd, &recv_fd);
733
734 migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len,
735 rune);
736
737 if (ssh_command[0])
738 free(rune);
739 }
740
741 /* Point of no return */
742 rc = libxl_domain_remus_start(ctx, &r_info, domid, send_fd, recv_fd, 0);
743
744 /* check if the domain exists. User may have xl destroyed the
745 * domain to force failover
746 */
747 if (libxl_domain_info(ctx, 0, domid)) {
748 fprintf(stderr, "%s: Primary domain has been destroyed.\n",
749 libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
750 close(send_fd);
751 return EXIT_SUCCESS;
752 }
753
754 /* If we are here, it means remus setup/domain suspend/backup has
755 * failed. Try to resume the domain and exit gracefully.
756 * TODO: Split-Brain check.
757 */
758 if (rc == ERROR_GUEST_TIMEDOUT)
759 fprintf(stderr, "Failed to suspend domain at primary.\n");
760 else {
761 fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n",
762 libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
763 libxl_domain_resume(ctx, domid, 1, 0);
764 }
765
766 close(send_fd);
767 return EXIT_FAILURE;
768 }
769 #endif
770
771
772 /*
773 * Local variables:
774 * mode: C
775 * c-basic-offset: 4
776 * indent-tabs-mode: nil
777 * End:
778 */
779