1 /*
2  * Copyright 2009-2017 Citrix Ltd and other contributors
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License as published
6  * by the Free Software Foundation; version 2.1 only. with the special
7  * exception on linking described in file LICENSE.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  */
14 
15 #include <fcntl.h>
16 #include <inttypes.h>
17 #include <signal.h>
18 #include <stdlib.h>
19 #include <sys/stat.h>
20 #include <sys/types.h>
21 #include <sys/utsname.h>
22 #include <time.h>
23 #include <unistd.h>
24 
25 #include <libxl.h>
26 #include <libxl_utils.h>
27 #include <libxlutil.h>
28 
29 #include "xl.h"
30 #include "xl_utils.h"
31 #include "xl_parse.h"
32 
33 #ifndef LIBXL_HAVE_NO_SUSPEND_RESUME
34 
create_migration_child(const char * rune,int * send_fd,int * recv_fd)35 static pid_t create_migration_child(const char *rune, int *send_fd,
36                                         int *recv_fd)
37 {
38     int sendpipe[2], recvpipe[2];
39     pid_t child;
40 
41     if (!rune || !send_fd || !recv_fd)
42         return -1;
43 
44     MUST( libxl_pipe(ctx, sendpipe) );
45     MUST( libxl_pipe(ctx, recvpipe) );
46 
47     child = xl_fork(child_migration, "migration transport process");
48 
49     if (!child) {
50         dup2(sendpipe[0], 0);
51         dup2(recvpipe[1], 1);
52         close(sendpipe[0]); close(sendpipe[1]);
53         close(recvpipe[0]); close(recvpipe[1]);
54         execlp("sh","sh","-c",rune,(char*)0);
55         perror("failed to exec sh");
56         exit(EXIT_FAILURE);
57     }
58 
59     close(sendpipe[0]);
60     close(recvpipe[1]);
61     *send_fd = sendpipe[1];
62     *recv_fd = recvpipe[0];
63 
64     /* if receiver dies, we get an error and can clean up
65        rather than just dying */
66     signal(SIGPIPE, SIG_IGN);
67 
68     return child;
69 }
70 
migrate_read_fixedmessage(int fd,const void * msg,int msgsz,const char * what,const char * rune)71 static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz,
72                                      const char *what, const char *rune) {
73     char buf[msgsz];
74     const char *stream;
75     int rc;
76 
77     stream = rune ? "migration receiver stream" : "migration stream";
78     rc = libxl_read_exactly(ctx, fd, buf, msgsz, stream, what);
79     if (rc) return 1;
80 
81     if (memcmp(buf, msg, msgsz)) {
82         fprintf(stderr, "%s contained unexpected data instead of %s\n",
83                 stream, what);
84         if (rune)
85             fprintf(stderr, "(command run was: %s )\n", rune);
86         return 1;
87     }
88     return 0;
89 }
90 
migration_child_report(int recv_fd)91 static void migration_child_report(int recv_fd) {
92     pid_t child;
93     int status, sr;
94     struct timeval now, waituntil, timeout;
95     static const struct timeval pollinterval = { 0, 1000 }; /* 1ms */
96 
97     if (!xl_child_pid(child_migration)) return;
98 
99     CHK_SYSCALL(gettimeofday(&waituntil, 0));
100     waituntil.tv_sec += 2;
101 
102     for (;;) {
103         pid_t migration_child = xl_child_pid(child_migration);
104         child = xl_waitpid(child_migration, &status, WNOHANG);
105 
106         if (child == migration_child) {
107             if (status)
108                 xl_report_child_exitstatus(XTL_INFO, child_migration,
109                                            migration_child, status);
110             break;
111         }
112         if (child == -1) {
113             fprintf(stderr, "wait for migration child [%ld] failed: %s\n",
114                     (long)migration_child, strerror(errno));
115             break;
116         }
117         assert(child == 0);
118 
119         CHK_SYSCALL(gettimeofday(&now, 0));
120         if (timercmp(&now, &waituntil, >)) {
121             fprintf(stderr, "migration child [%ld] not exiting, no longer"
122                     " waiting (exit status will be unreported)\n",
123                     (long)migration_child);
124             break;
125         }
126         timersub(&waituntil, &now, &timeout);
127 
128         if (recv_fd >= 0) {
129             fd_set readfds, exceptfds;
130             FD_ZERO(&readfds);
131             FD_ZERO(&exceptfds);
132             FD_SET(recv_fd, &readfds);
133             FD_SET(recv_fd, &exceptfds);
134             sr = select(recv_fd+1, &readfds,0,&exceptfds, &timeout);
135         } else {
136             if (timercmp(&timeout, &pollinterval, >))
137                 timeout = pollinterval;
138             sr = select(0,0,0,0, &timeout);
139         }
140         if (sr > 0) {
141             recv_fd = -1;
142         } else if (sr == 0) {
143         } else if (sr == -1) {
144             if (errno != EINTR) {
145                 fprintf(stderr, "migration child [%ld] exit wait select"
146                         " failed unexpectedly: %s\n",
147                         (long)migration_child, strerror(errno));
148                 break;
149             }
150         }
151     }
152 }
153 
migrate_do_preamble(int send_fd,int recv_fd,pid_t child,uint8_t * config_data,int config_len,const char * rune)154 static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child,
155                                 uint8_t *config_data, int config_len,
156                                 const char *rune)
157 {
158     int rc = 0;
159 
160     if (send_fd < 0 || recv_fd < 0) {
161         fprintf(stderr, "migrate_do_preamble: invalid file descriptors\n");
162         exit(EXIT_FAILURE);
163     }
164 
165     rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_banner,
166                                    sizeof(migrate_receiver_banner)-1,
167                                    "banner", rune);
168     if (rc) {
169         close(send_fd);
170         migration_child_report(recv_fd);
171         exit(EXIT_FAILURE);
172     }
173 
174     save_domain_core_writeconfig(send_fd, "migration stream",
175                                  config_data, config_len);
176 
177 }
178 
migrate_domain(uint32_t domid,int preserve_domid,const char * rune,int debug,const char * override_config_file)179 static void migrate_domain(uint32_t domid, int preserve_domid,
180                            const char *rune, int debug,
181                            const char *override_config_file)
182 {
183     pid_t child = -1;
184     int rc;
185     int send_fd = -1, recv_fd = -1;
186     char *away_domname;
187     char rc_buf;
188     uint8_t *config_data;
189     int config_len, flags = LIBXL_SUSPEND_LIVE;
190 
191     save_domain_core_begin(domid, preserve_domid, override_config_file,
192                            &config_data, &config_len);
193 
194     if (!config_len) {
195         fprintf(stderr, "No config file stored for running domain and "
196                 "none supplied - cannot migrate.\n");
197         exit(EXIT_FAILURE);
198     }
199 
200     child = create_migration_child(rune, &send_fd, &recv_fd);
201 
202     migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len,
203                         rune);
204 
205     xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0);
206 
207     if (debug)
208         flags |= LIBXL_SUSPEND_DEBUG;
209     rc = libxl_domain_suspend(ctx, domid, send_fd, flags, NULL);
210     if (rc) {
211         fprintf(stderr, "migration sender: libxl_domain_suspend failed"
212                 " (rc=%d)\n", rc);
213         if (rc == ERROR_GUEST_TIMEDOUT)
214             goto failed_suspend;
215         else
216             goto failed_resume;
217     }
218 
219     //fprintf(stderr, "migration sender: Transfer complete.\n");
220     // Should only be printed when debugging as it's a bit messy with
221     // progress indication.
222 
223     rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_ready,
224                                    sizeof(migrate_receiver_ready),
225                                    "ready message", rune);
226     if (rc) goto failed_resume;
227 
228     xtl_stdiostream_adjust_flags(logger, 0, XTL_STDIOSTREAM_HIDE_PROGRESS);
229 
230     /* right, at this point we are about give the destination
231      * permission to rename and resume, so we must first rename the
232      * domain away ourselves */
233 
234     fprintf(stderr, "migration sender: Target has acknowledged transfer.\n");
235 
236     if (common_domname) {
237         xasprintf(&away_domname, "%s--migratedaway", common_domname);
238         rc = libxl_domain_rename(ctx, domid, common_domname, away_domname);
239         if (rc) goto failed_resume;
240     }
241 
242     /* point of no return - as soon as we have tried to say
243      * "go" to the receiver, it's not safe to carry on.  We leave
244      * the domain renamed to %s--migratedaway in case that's helpful.
245      */
246 
247     fprintf(stderr, "migration sender: Giving target permission to start.\n");
248 
249     rc = libxl_write_exactly(ctx, send_fd,
250                              migrate_permission_to_go,
251                              sizeof(migrate_permission_to_go),
252                              "migration stream", "GO message");
253     if (rc) goto failed_badly;
254 
255     rc = migrate_read_fixedmessage(recv_fd, migrate_report,
256                                    sizeof(migrate_report),
257                                    "success/failure report message", rune);
258     if (rc) goto failed_badly;
259 
260     rc = libxl_read_exactly(ctx, recv_fd,
261                             &rc_buf, 1,
262                             "migration ack stream", "success/failure status");
263     if (rc) goto failed_badly;
264 
265     if (rc_buf) {
266         fprintf(stderr, "migration sender: Target reports startup failure"
267                 " (status code %d).\n", rc_buf);
268 
269         rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go,
270                                        sizeof(migrate_permission_to_go),
271                                        "permission for sender to resume",
272                                        rune);
273         if (rc) goto failed_badly;
274 
275         fprintf(stderr, "migration sender: Trying to resume at our end.\n");
276 
277         if (common_domname) {
278             libxl_domain_rename(ctx, domid, away_domname, common_domname);
279         }
280         rc = libxl_domain_resume(ctx, domid, 1, 0);
281         if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n");
282 
283         fprintf(stderr, "Migration failed due to problems at target.\n");
284         exit(EXIT_FAILURE);
285     }
286 
287     fprintf(stderr, "migration sender: Target reports successful startup.\n");
288     libxl_domain_destroy(ctx, domid, 0); /* bang! */
289     fprintf(stderr, "Migration successful.\n");
290     exit(EXIT_SUCCESS);
291 
292  failed_suspend:
293     close(send_fd);
294     migration_child_report(recv_fd);
295     fprintf(stderr, "Migration failed, failed to suspend at sender.\n");
296     exit(EXIT_FAILURE);
297 
298  failed_resume:
299     close(send_fd);
300     migration_child_report(recv_fd);
301     fprintf(stderr, "Migration failed, resuming at sender.\n");
302     libxl_domain_resume(ctx, domid, 1, 0);
303     exit(EXIT_FAILURE);
304 
305  failed_badly:
306     fprintf(stderr,
307  "** Migration failed during final handshake **\n"
308  "Domain state is now undefined !\n"
309  "Please CHECK AT BOTH ENDS for running instances, before renaming and\n"
310  " resuming at most one instance.  Two simultaneous instances of the domain\n"
311  " would probably result in SEVERE DATA LOSS and it is now your\n"
312  " responsibility to avoid that.  Sorry.\n");
313 
314     close(send_fd);
315     migration_child_report(recv_fd);
316     exit(EXIT_FAILURE);
317 }
318 
migrate_receive(int debug,int daemonize,int monitor,int pause_after_migration,int send_fd,int recv_fd,libxl_checkpointed_stream checkpointed,char * colo_proxy_script,bool userspace_colo_proxy)319 static void migrate_receive(int debug, int daemonize, int monitor,
320                             int pause_after_migration,
321                             int send_fd, int recv_fd,
322                             libxl_checkpointed_stream checkpointed,
323                             char *colo_proxy_script,
324                             bool userspace_colo_proxy)
325 {
326     uint32_t domid;
327     int rc, rc2;
328     char rc_buf;
329     char *migration_domname;
330     struct domain_create dom_info;
331 
332     signal(SIGPIPE, SIG_IGN);
333     /* if we get SIGPIPE we'd rather just have it as an error */
334 
335     fprintf(stderr, "migration target: Ready to receive domain.\n");
336 
337     CHK_ERRNOVAL(libxl_write_exactly(
338                      ctx, send_fd, migrate_receiver_banner,
339                      sizeof(migrate_receiver_banner)-1,
340                      "migration ack stream", "banner") );
341 
342     memset(&dom_info, 0, sizeof(dom_info));
343     dom_info.debug = debug;
344     dom_info.daemonize = daemonize;
345     dom_info.monitor = monitor;
346     dom_info.paused = 1;
347     dom_info.migrate_fd = recv_fd;
348     dom_info.send_back_fd = send_fd;
349     dom_info.migration_domname_r = &migration_domname;
350     dom_info.checkpointed_stream = checkpointed;
351     dom_info.colo_proxy_script = colo_proxy_script;
352     dom_info.userspace_colo_proxy = userspace_colo_proxy;
353 
354     rc = create_domain(&dom_info);
355     if (rc < 0) {
356         fprintf(stderr, "migration target: Domain creation failed"
357                 " (code %d).\n", rc);
358         exit(EXIT_FAILURE);
359     }
360 
361     domid = rc;
362 
363     switch (checkpointed) {
364     case LIBXL_CHECKPOINTED_STREAM_REMUS:
365     case LIBXL_CHECKPOINTED_STREAM_COLO:
366     {
367         const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ?
368                          "COLO" : "Remus";
369         /* If we are here, it means that the sender (primary) has crashed.
370          * TODO: Split-Brain Check.
371          */
372         fprintf(stderr, "migration target: %s Failover for domain %u\n",
373                 ha, domid);
374 
375         /*
376          * If domain renaming fails, lets just continue (as we need the domain
377          * to be up & dom names may not matter much, as long as its reachable
378          * over network).
379          *
380          * If domain unpausing fails, destroy domain ? Or is it better to have
381          * a consistent copy of the domain (memory, cpu state, disk)
382          * on atleast one physical host ? Right now, lets just leave the domain
383          * as is and let the Administrator decide (or troubleshoot).
384          */
385         if (migration_domname) {
386             rc = libxl_domain_rename(ctx, domid, migration_domname,
387                                      common_domname);
388             if (rc)
389                 fprintf(stderr, "migration target (%s): "
390                         "Failed to rename domain from %s to %s:%d\n",
391                         ha, migration_domname, common_domname, rc);
392         }
393 
394         if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO)
395             /* The guest is running after failover in COLO mode */
396             exit(rc ? -ERROR_FAIL: 0);
397 
398         rc = libxl_domain_unpause(ctx, domid, NULL);
399         if (rc)
400             fprintf(stderr, "migration target (%s): "
401                     "Failed to unpause domain %s (id: %u):%d\n",
402                     ha, common_domname, domid, rc);
403 
404         exit(rc ? EXIT_FAILURE : EXIT_SUCCESS);
405     }
406     default:
407         /* do nothing */
408         break;
409     }
410 
411     fprintf(stderr, "migration target: Transfer complete,"
412             " requesting permission to start domain.\n");
413 
414     rc = libxl_write_exactly(ctx, send_fd,
415                              migrate_receiver_ready,
416                              sizeof(migrate_receiver_ready),
417                              "migration ack stream", "ready message");
418     if (rc) exit(EXIT_FAILURE);
419 
420     rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go,
421                                    sizeof(migrate_permission_to_go),
422                                    "GO message", 0);
423     if (rc) goto perhaps_destroy_notify_rc;
424 
425     fprintf(stderr, "migration target: Got permission, starting domain.\n");
426 
427     if (migration_domname) {
428         rc = libxl_domain_rename(ctx, domid, migration_domname, common_domname);
429         if (rc) goto perhaps_destroy_notify_rc;
430     }
431 
432     if (!pause_after_migration) {
433         rc = libxl_domain_unpause(ctx, domid, NULL);
434         if (rc) goto perhaps_destroy_notify_rc;
435     }
436 
437     fprintf(stderr, "migration target: Domain started successsfully.\n");
438     rc = 0;
439 
440  perhaps_destroy_notify_rc:
441     rc2 = libxl_write_exactly(ctx, send_fd,
442                               migrate_report, sizeof(migrate_report),
443                               "migration ack stream",
444                               "success/failure report");
445     if (rc2) exit(EXIT_FAILURE);
446 
447     rc_buf = -rc;
448     assert(!!rc_buf == !!rc);
449     rc2 = libxl_write_exactly(ctx, send_fd, &rc_buf, 1,
450                               "migration ack stream",
451                               "success/failure code");
452     if (rc2) exit(EXIT_FAILURE);
453 
454     if (rc) {
455         fprintf(stderr, "migration target: Failure, destroying our copy.\n");
456 
457         rc2 = libxl_domain_destroy(ctx, domid, 0);
458         if (rc2) {
459             fprintf(stderr, "migration target: Failed to destroy our copy"
460                     " (code %d).\n", rc2);
461             exit(EXIT_FAILURE);
462         }
463 
464         fprintf(stderr, "migration target: Cleanup OK, granting sender"
465                 " permission to resume.\n");
466 
467         rc2 = libxl_write_exactly(ctx, send_fd,
468                                   migrate_permission_to_go,
469                                   sizeof(migrate_permission_to_go),
470                                   "migration ack stream",
471                                   "permission to sender to have domain back");
472         if (rc2) exit(EXIT_FAILURE);
473     }
474 
475     exit(EXIT_SUCCESS);
476 }
477 
478 
main_migrate_receive(int argc,char ** argv)479 int main_migrate_receive(int argc, char **argv)
480 {
481     int debug = 0, daemonize = 1, monitor = 1, pause_after_migration = 0;
482     libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE;
483     int opt;
484     bool userspace_colo_proxy = false;
485     char *script = NULL;
486     static struct option opts[] = {
487         {"colo", 0, 0, 0x100},
488         /* It is a shame that the management code for disk is not here. */
489         {"coloft-script", 1, 0, 0x200},
490         {"userspace-colo-proxy", 0, 0, 0x300},
491         COMMON_LONG_OPTS
492     };
493 
494     SWITCH_FOREACH_OPT(opt, "Fedrp", opts, "migrate-receive", 0) {
495     case 'F':
496         daemonize = 0;
497         break;
498     case 'e':
499         daemonize = 0;
500         monitor = 0;
501         break;
502     case 'd':
503         debug = 1;
504         break;
505     case 'r':
506         checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS;
507         break;
508     case 0x100:
509         checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO;
510         break;
511     case 0x200:
512         script = optarg;
513         break;
514     case 0x300:
515         userspace_colo_proxy = true;
516         break;
517     case 'p':
518         pause_after_migration = 1;
519         break;
520     }
521 
522     if (argc-optind != 0) {
523         help("migrate-receive");
524         return EXIT_FAILURE;
525     }
526     migrate_receive(debug, daemonize, monitor, pause_after_migration,
527                     STDOUT_FILENO, STDIN_FILENO,
528                     checkpointed, script, userspace_colo_proxy);
529 
530     return EXIT_SUCCESS;
531 }
532 
main_migrate(int argc,char ** argv)533 int main_migrate(int argc, char **argv)
534 {
535     uint32_t domid;
536     const char *config_filename = NULL;
537     const char *ssh_command = "ssh";
538     char *rune = NULL;
539     char *host;
540     int opt, daemonize = 1, monitor = 1, debug = 0, pause_after_migration = 0;
541     int preserve_domid = 0;
542     static struct option opts[] = {
543         {"debug", 0, 0, 0x100},
544         {"live", 0, 0, 0x200},
545         COMMON_LONG_OPTS
546     };
547 
548     SWITCH_FOREACH_OPT(opt, "FC:s:epD", opts, "migrate", 2) {
549     case 'C':
550         config_filename = optarg;
551         break;
552     case 's':
553         ssh_command = optarg;
554         break;
555     case 'F':
556         daemonize = 0;
557         break;
558     case 'e':
559         daemonize = 0;
560         monitor = 0;
561         break;
562     case 'p':
563         pause_after_migration = 1;
564         break;
565     case 'D':
566         preserve_domid = 1;
567         break;
568     case 0x100: /* --debug */
569         debug = 1;
570         break;
571     case 0x200: /* --live */
572         /* ignored for compatibility with xm */
573         break;
574     }
575 
576     domid = find_domain(argv[optind]);
577     host = argv[optind + 1];
578 
579     bool pass_tty_arg = progress_use_cr || (isatty(2) > 0);
580 
581     if (!ssh_command[0]) {
582         rune= host;
583     } else {
584         char verbose_buf[minmsglevel_default+3];
585         int verbose_len;
586         verbose_buf[0] = ' ';
587         verbose_buf[1] = '-';
588         memset(verbose_buf+2, 'v', minmsglevel_default);
589         verbose_buf[sizeof(verbose_buf)-1] = 0;
590         if (minmsglevel == minmsglevel_default) {
591             verbose_len = 0;
592         } else {
593             verbose_len = (minmsglevel_default - minmsglevel) + 2;
594         }
595         xasprintf(&rune, "exec %s %s xl%s%.*s migrate-receive%s%s%s",
596                   ssh_command, host,
597                   pass_tty_arg ? " -t" : "",
598                   verbose_len, verbose_buf,
599                   daemonize ? "" : " -e",
600                   debug ? " -d" : "",
601                   pause_after_migration ? " -p" : "");
602     }
603 
604     migrate_domain(domid, preserve_domid, rune, debug, config_filename);
605     return EXIT_SUCCESS;
606 }
607 
main_remus(int argc,char ** argv)608 int main_remus(int argc, char **argv)
609 {
610     uint32_t domid;
611     int opt, rc, daemonize = 1;
612     const char *ssh_command = "ssh";
613     char *host = NULL, *rune = NULL;
614     libxl_domain_remus_info r_info;
615     int send_fd = -1, recv_fd = -1;
616     pid_t child = -1;
617     uint8_t *config_data;
618     int config_len;
619 
620     memset(&r_info, 0, sizeof(libxl_domain_remus_info));
621 
622     SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ecp", NULL, "remus", 2) {
623     case 'i':
624         r_info.interval = atoi(optarg);
625         break;
626     case 'F':
627         libxl_defbool_set(&r_info.allow_unsafe, true);
628         break;
629     case 'b':
630         libxl_defbool_set(&r_info.blackhole, true);
631         break;
632     case 'u':
633         libxl_defbool_set(&r_info.compression, false);
634         break;
635     case 'n':
636         libxl_defbool_set(&r_info.netbuf, false);
637         break;
638     case 'N':
639         r_info.netbufscript = optarg;
640         break;
641     case 'd':
642         libxl_defbool_set(&r_info.diskbuf, false);
643         break;
644     case 's':
645         ssh_command = optarg;
646         break;
647     case 'e':
648         daemonize = 0;
649         break;
650     case 'c':
651         libxl_defbool_set(&r_info.colo, true);
652         break;
653     case 'p':
654         libxl_defbool_set(&r_info.userspace_colo_proxy, true);
655     }
656 
657     domid = find_domain(argv[optind]);
658     host = argv[optind + 1];
659 
660     /* Defaults */
661     libxl_defbool_setdefault(&r_info.blackhole, false);
662     libxl_defbool_setdefault(&r_info.colo, false);
663     libxl_defbool_setdefault(&r_info.userspace_colo_proxy, false);
664 
665     if (!libxl_defbool_val(r_info.colo) && !r_info.interval)
666         r_info.interval = 200;
667 
668     if (libxl_defbool_val(r_info.userspace_colo_proxy) &&
669         !libxl_defbool_val(r_info.colo)) {
670         fprintf(stderr, "Option -p must be used in conjunction with -c");
671         exit(-1);
672     }
673 
674     if (libxl_defbool_val(r_info.colo)) {
675         if (r_info.interval || libxl_defbool_val(r_info.blackhole) ||
676             !libxl_defbool_is_default(r_info.netbuf) ||
677             !libxl_defbool_is_default(r_info.diskbuf)) {
678             perror("option -c is conflict with -i, -d, -n or -b");
679             exit(-1);
680         }
681 
682         if (libxl_defbool_is_default(r_info.compression)) {
683             perror("COLO can't be used with memory compression. "
684                    "Disable memory checkpoint compression now...");
685             libxl_defbool_set(&r_info.compression, false);
686         }
687     }
688 
689     if (!r_info.netbufscript) {
690         if (libxl_defbool_val(r_info.colo))
691             r_info.netbufscript = default_colo_proxy_script;
692         else
693             r_info.netbufscript = default_remus_netbufscript;
694     }
695 
696     if (libxl_defbool_val(r_info.blackhole)) {
697         send_fd = open("/dev/null", O_RDWR, 0644);
698         if (send_fd < 0) {
699             perror("failed to open /dev/null");
700             exit(EXIT_FAILURE);
701         }
702     } else {
703 
704         if (!ssh_command[0]) {
705             rune = host;
706         } else {
707             if (!libxl_defbool_val(r_info.colo)) {
708                 xasprintf(&rune, "exec %s %s xl migrate-receive %s %s",
709                           ssh_command, host,
710                           "-r",
711                           daemonize ? "" : " -e");
712             } else {
713                 xasprintf(&rune, "exec %s %s xl migrate-receive %s %s %s %s %s",
714                           ssh_command, host,
715                           "--colo",
716                           r_info.netbufscript ? "--coloft-script" : "",
717                           r_info.netbufscript ? r_info.netbufscript : "",
718                           libxl_defbool_val(r_info.userspace_colo_proxy) ?
719                           "--userspace-colo-proxy" : "",
720                           daemonize ? "" : " -e");
721             }
722         }
723 
724         save_domain_core_begin(domid, 0, NULL, &config_data, &config_len);
725 
726         if (!config_len) {
727             fprintf(stderr, "No config file stored for running domain and "
728                     "none supplied - cannot start remus.\n");
729             exit(EXIT_FAILURE);
730         }
731 
732         child = create_migration_child(rune, &send_fd, &recv_fd);
733 
734         migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len,
735                             rune);
736 
737         if (ssh_command[0])
738             free(rune);
739     }
740 
741     /* Point of no return */
742     rc = libxl_domain_remus_start(ctx, &r_info, domid, send_fd, recv_fd, 0);
743 
744     /* check if the domain exists. User may have xl destroyed the
745      * domain to force failover
746      */
747     if (libxl_domain_info(ctx, 0, domid)) {
748         fprintf(stderr, "%s: Primary domain has been destroyed.\n",
749                 libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
750         close(send_fd);
751         return EXIT_SUCCESS;
752     }
753 
754     /* If we are here, it means remus setup/domain suspend/backup has
755      * failed. Try to resume the domain and exit gracefully.
756      * TODO: Split-Brain check.
757      */
758     if (rc == ERROR_GUEST_TIMEDOUT)
759         fprintf(stderr, "Failed to suspend domain at primary.\n");
760     else {
761         fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n",
762                 libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
763         libxl_domain_resume(ctx, domid, 1, 0);
764     }
765 
766     close(send_fd);
767     return EXIT_FAILURE;
768 }
769 #endif
770 
771 
772 /*
773  * Local variables:
774  * mode: C
775  * c-basic-offset: 4
776  * indent-tabs-mode: nil
777  * End:
778  */
779