1 /*
2  * Copyright (C) 2014
3  * Author Shriram Rajagopalan <rshriram@cs.ubc.ca>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published
7  * by the Free Software Foundation; version 2.1 only. with the special
8  * exception on linking described in file LICENSE.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  */
15 
16 #include "libxl_osdeps.h" /* must come before any other headers */
17 
18 #include "libxl_internal.h"
19 
20 #include <netlink/cache.h>
21 #include <netlink/socket.h>
22 #include <netlink/attr.h>
23 #include <netlink/route/link.h>
24 #include <netlink/route/route.h>
25 #include <netlink/route/qdisc.h>
26 #include <netlink/route/qdisc/plug.h>
27 
28 typedef struct libxl__remus_device_nic {
29     int devid;
30 
31     const char *vif;
32     const char *ifb;
33     struct rtnl_qdisc *qdisc;
34 } libxl__remus_device_nic;
35 
libxl__netbuffer_enabled(libxl__gc * gc)36 int libxl__netbuffer_enabled(libxl__gc *gc)
37 {
38     return 1;
39 }
40 
init_subkind_nic(libxl__checkpoint_devices_state * cds)41 int init_subkind_nic(libxl__checkpoint_devices_state *cds)
42 {
43     int rc, ret;
44     libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
45     libxl__remus_state *rs = cds->concrete_data;
46 
47     STATE_AO_GC(cds->ao);
48 
49     rs->nlsock = nl_socket_alloc();
50     if (!rs->nlsock) {
51         LOGD(ERROR, dss->domid, "cannot allocate nl socket");
52         rc = ERROR_FAIL;
53         goto out;
54     }
55 
56     ret = nl_connect(rs->nlsock, NETLINK_ROUTE);
57     if (ret) {
58         LOGD(ERROR, dss->domid, "failed to open netlink socket: %s",
59              nl_geterror(ret));
60         rc = ERROR_FAIL;
61         goto out;
62     }
63 
64     /* get list of all qdiscs installed on network devs. */
65     ret = rtnl_qdisc_alloc_cache(rs->nlsock, &rs->qdisc_cache);
66     if (ret) {
67         LOGD(ERROR, dss->domid, "failed to allocate qdisc cache: %s",
68              nl_geterror(ret));
69         rc = ERROR_FAIL;
70         goto out;
71     }
72 
73     if (dss->remus->netbufscript) {
74         rs->netbufscript = libxl__strdup(gc, dss->remus->netbufscript);
75     } else {
76         rs->netbufscript = GCSPRINTF("%s/remus-netbuf-setup",
77                                      libxl__xen_script_dir_path());
78     }
79 
80     rc = 0;
81 
82 out:
83     return rc;
84 }
85 
cleanup_subkind_nic(libxl__checkpoint_devices_state * cds)86 void cleanup_subkind_nic(libxl__checkpoint_devices_state *cds)
87 {
88     libxl__remus_state *rs = cds->concrete_data;
89 
90     STATE_AO_GC(cds->ao);
91 
92     /* free qdisc cache */
93     if (rs->qdisc_cache) {
94         nl_cache_clear(rs->qdisc_cache);
95         nl_cache_free(rs->qdisc_cache);
96         rs->qdisc_cache = NULL;
97     }
98 
99     /* close & free nlsock */
100     if (rs->nlsock) {
101         nl_close(rs->nlsock);
102         nl_socket_free(rs->nlsock);
103         rs->nlsock = NULL;
104     }
105 }
106 
107 /*----- setup() and teardown() -----*/
108 
109 /* helper functions */
110 
111 /*
112  * If the device has a vifname, then use that instead of
113  * the vifX.Y format.
114  * it must ONLY be used for remus because if driver domains
115  * were in use it would constitute a security vulnerability.
116  */
get_vifname(libxl__checkpoint_device * dev,const libxl_device_nic * nic)117 static const char *get_vifname(libxl__checkpoint_device *dev,
118                                const libxl_device_nic *nic)
119 {
120     const char *vifname = NULL;
121     const char *path;
122     int rc;
123 
124     STATE_AO_GC(dev->cds->ao);
125 
126     /* Convenience aliases */
127     const uint32_t domid = dev->cds->domid;
128 
129     path = GCSPRINTF("%s/vifname",
130                      libxl__domain_device_backend_path(gc, 0, domid,
131                      nic->devid, LIBXL__DEVICE_KIND_VIF));
132 
133     rc = libxl__xs_read_checked(gc, XBT_NULL, path, &vifname);
134     if (!rc && !vifname) {
135         vifname = libxl__device_nic_devname(gc, domid,
136                                             nic->devid,
137                                             nic->nictype);
138     }
139 
140     return vifname;
141 }
142 
free_qdisc(libxl__remus_device_nic * remus_nic)143 static void free_qdisc(libxl__remus_device_nic *remus_nic)
144 {
145     if (remus_nic->qdisc == NULL)
146         return;
147 
148     nl_object_put((struct nl_object *)(remus_nic->qdisc));
149     remus_nic->qdisc = NULL;
150 }
151 
init_qdisc(libxl__checkpoint_devices_state * cds,libxl__remus_device_nic * remus_nic)152 static int init_qdisc(libxl__checkpoint_devices_state *cds,
153                       libxl__remus_device_nic *remus_nic)
154 {
155     int rc, ret, ifindex;
156     struct rtnl_link *ifb = NULL;
157     struct rtnl_qdisc *qdisc = NULL;
158     libxl__remus_state *rs = cds->concrete_data;
159 
160     STATE_AO_GC(cds->ao);
161 
162     /* Now that we have brought up REMUS_IFB device with plug qdisc for
163      * this vif, so we need to refill the qdisc cache.
164      */
165     ret = nl_cache_refill(rs->nlsock, rs->qdisc_cache);
166     if (ret) {
167         LOGD(ERROR, cds->domid,
168              "cannot refill qdisc cache: %s", nl_geterror(ret));
169         rc = ERROR_FAIL;
170         goto out;
171     }
172 
173     /* get a handle to the REMUS_IFB interface */
174     ret = rtnl_link_get_kernel(rs->nlsock, 0, remus_nic->ifb, &ifb);
175     if (ret) {
176         LOGD(ERROR, cds->domid,
177              "cannot obtain handle for %s: %s", remus_nic->ifb,
178             nl_geterror(ret));
179         rc = ERROR_FAIL;
180         goto out;
181     }
182 
183     ifindex = rtnl_link_get_ifindex(ifb);
184     if (!ifindex) {
185         LOGD(ERROR, cds->domid,
186              "interface %s has no index", remus_nic->ifb);
187         rc = ERROR_FAIL;
188         goto out;
189     }
190 
191     /* Get a reference to the root qdisc installed on the REMUS_IFB, by
192      * querying the qdisc list we obtained earlier. The netbufscript
193      * sets up the plug qdisc as the root qdisc, so we don't have to
194      * search the entire qdisc tree on the REMUS_IFB dev.
195 
196      * There is no need to explicitly free this qdisc as its just a
197      * reference from the qdisc cache we allocated earlier.
198      */
199     qdisc = rtnl_qdisc_get_by_parent(rs->qdisc_cache, ifindex, TC_H_ROOT);
200     if (qdisc) {
201         const char *tc_kind = rtnl_tc_get_kind(TC_CAST(qdisc));
202         /* Sanity check: Ensure that the root qdisc is a plug qdisc. */
203         if (!tc_kind || strcmp(tc_kind, "plug")) {
204             LOGD(ERROR, cds->domid,
205                  "plug qdisc is not installed on %s", remus_nic->ifb);
206             rc = ERROR_FAIL;
207             goto out;
208         }
209         remus_nic->qdisc = qdisc;
210     } else {
211         LOGD(ERROR, cds->domid,
212              "Cannot get qdisc handle from ifb %s", remus_nic->ifb);
213         rc = ERROR_FAIL;
214         goto out;
215     }
216 
217     rc = 0;
218 
219 out:
220     if (ifb)
221         rtnl_link_put(ifb);
222 
223     if (rc && qdisc)
224         nl_object_put((struct nl_object *)qdisc);
225 
226     return rc;
227 }
228 
229 /* callbacks */
230 
231 static void netbuf_setup_script_cb(libxl__egc *egc,
232                                    libxl__async_exec_state *aes,
233                                    int rc, int status);
234 static void netbuf_teardown_script_cb(libxl__egc *egc,
235                                       libxl__async_exec_state *aes,
236                                       int rc, int status);
237 
238 /*
239  * the script needs the following env & args
240  * $vifname
241  * $XENBUS_PATH (/libxl/<domid>/remus/netbuf/<devid>/)
242  * $REMUS_IFB (for teardown)
243  * setup/teardown as command line arg.
244  */
setup_async_exec(libxl__checkpoint_device * dev,char * op)245 static void setup_async_exec(libxl__checkpoint_device *dev, char *op)
246 {
247     int arraysize, nr = 0;
248     char **env = NULL, **args = NULL;
249     libxl__remus_device_nic *remus_nic = dev->concrete_data;
250     libxl__checkpoint_devices_state *cds = dev->cds;
251     libxl__async_exec_state *aes = &dev->aodev.aes;
252     libxl__remus_state *rs = cds->concrete_data;
253 
254     STATE_AO_GC(cds->ao);
255 
256     /* Convenience aliases */
257     char *const script = libxl__strdup(gc, rs->netbufscript);
258     const uint32_t domid = cds->domid;
259     const int dev_id = remus_nic->devid;
260     const char *const vif = remus_nic->vif;
261     const char *const ifb = remus_nic->ifb;
262 
263     arraysize = 7;
264     GCNEW_ARRAY(env, arraysize);
265     env[nr++] = "vifname";
266     env[nr++] = libxl__strdup(gc, vif);
267     env[nr++] = "XENBUS_PATH";
268     env[nr++] = GCSPRINTF("%s/remus/netbuf/%d",
269                           libxl__xs_libxl_path(gc, domid), dev_id);
270     if (!strcmp(op, "teardown") && ifb) {
271         env[nr++] = "REMUS_IFB";
272         env[nr++] = libxl__strdup(gc, ifb);
273     }
274     env[nr++] = NULL;
275     assert(nr <= arraysize);
276 
277     arraysize = 3; nr = 0;
278     GCNEW_ARRAY(args, arraysize);
279     args[nr++] = script;
280     args[nr++] = op;
281     args[nr++] = NULL;
282     assert(nr == arraysize);
283 
284     aes->ao = dev->cds->ao;
285     aes->what = GCSPRINTF("%s %s", args[0], args[1]);
286     aes->env = env;
287     aes->args = args;
288     aes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000;
289     aes->stdfds[0] = -1;
290     aes->stdfds[1] = -1;
291     aes->stdfds[2] = -1;
292 
293     if (!strcmp(op, "teardown"))
294         aes->callback = netbuf_teardown_script_cb;
295     else
296         aes->callback = netbuf_setup_script_cb;
297 }
298 
299 /* setup() and teardown() */
300 
nic_setup(libxl__egc * egc,libxl__checkpoint_device * dev)301 static void nic_setup(libxl__egc *egc, libxl__checkpoint_device *dev)
302 {
303     int rc;
304     libxl__remus_device_nic *remus_nic;
305     const libxl_device_nic *nic = dev->backend_dev;
306 
307     STATE_AO_GC(dev->cds->ao);
308 
309     /*
310      * thers's no subkind of nic devices, so nic ops is always matched
311      * with nic devices
312      */
313     dev->matched = true;
314 
315     GCNEW(remus_nic);
316     dev->concrete_data = remus_nic;
317     remus_nic->devid = nic->devid;
318     remus_nic->vif = get_vifname(dev, nic);
319     if (!remus_nic->vif) {
320         rc = ERROR_FAIL;
321         goto out;
322     }
323 
324     setup_async_exec(dev, "setup");
325     rc = libxl__async_exec_start(&dev->aodev.aes);
326     if (rc)
327         goto out;
328 
329     return;
330 
331 out:
332     dev->aodev.rc = rc;
333     dev->aodev.callback(egc, &dev->aodev);
334 }
335 
336 /*
337  * In return, the script writes the name of REMUS_IFB device (during setup)
338  * to be used for output buffering into XENBUS_PATH/ifb
339  */
netbuf_setup_script_cb(libxl__egc * egc,libxl__async_exec_state * aes,int rc,int status)340 static void netbuf_setup_script_cb(libxl__egc *egc,
341                                    libxl__async_exec_state *aes,
342                                    int rc, int status)
343 {
344     libxl__ao_device *aodev = CONTAINER_OF(aes, *aodev, aes);
345     libxl__checkpoint_device *dev = CONTAINER_OF(aodev, *dev, aodev);
346     libxl__remus_device_nic *remus_nic = dev->concrete_data;
347     libxl__checkpoint_devices_state *cds = dev->cds;
348     libxl__remus_state *rs = cds->concrete_data;
349     const char *out_path_base, *hotplug_error = NULL;
350 
351     STATE_AO_GC(cds->ao);
352 
353     /* Convenience aliases */
354     const uint32_t domid = cds->domid;
355     const int devid = remus_nic->devid;
356     const char *const vif = remus_nic->vif;
357     const char **const ifb = &remus_nic->ifb;
358 
359     if (status && !rc)
360         rc = ERROR_FAIL;
361     if (rc)
362         goto out;
363 
364     /*
365      * we need to get ifb first because it's needed for teardown
366      */
367     rc = libxl__xs_read_checked(gc, XBT_NULL,
368                                 GCSPRINTF("%s/remus/netbuf/%d/ifb",
369                                           libxl__xs_libxl_path(gc, domid),
370                                           devid),
371                                 ifb);
372     if (rc)
373         goto out;
374 
375     if (!(*ifb)) {
376         LOGD(ERROR, domid, "Cannot get ifb dev name for domain %u dev %s",
377              domid, vif);
378         rc = ERROR_FAIL;
379         goto out;
380     }
381 
382     out_path_base = GCSPRINTF("%s/remus/netbuf/%d",
383                               libxl__xs_libxl_path(gc, domid), devid);
384 
385     rc = libxl__xs_read_checked(gc, XBT_NULL,
386                                 GCSPRINTF("%s/hotplug-error", out_path_base),
387                                 &hotplug_error);
388     if (rc)
389         goto out;
390 
391     if (hotplug_error) {
392         LOGD(ERROR, domid, "netbuf script %s setup failed for vif %s: %s",
393              rs->netbufscript, vif, hotplug_error);
394         rc = ERROR_FAIL;
395         goto out;
396     }
397 
398     if (status) {
399         rc = ERROR_FAIL;
400         goto out;
401     }
402 
403     LOGD(DEBUG, domid, "%s will buffer packets from vif %s", *ifb, vif);
404     rc = init_qdisc(cds, remus_nic);
405 
406 out:
407     aodev->rc = rc;
408     aodev->callback(egc, aodev);
409 }
410 
nic_teardown(libxl__egc * egc,libxl__checkpoint_device * dev)411 static void nic_teardown(libxl__egc *egc, libxl__checkpoint_device *dev)
412 {
413     int rc;
414     STATE_AO_GC(dev->cds->ao);
415 
416     setup_async_exec(dev, "teardown");
417 
418     rc = libxl__async_exec_start(&dev->aodev.aes);
419     if (rc)
420         goto out;
421 
422     return;
423 
424 out:
425     dev->aodev.rc = rc;
426     dev->aodev.callback(egc, &dev->aodev);
427 }
428 
netbuf_teardown_script_cb(libxl__egc * egc,libxl__async_exec_state * aes,int rc,int status)429 static void netbuf_teardown_script_cb(libxl__egc *egc,
430                                       libxl__async_exec_state *aes,
431                                       int rc, int status)
432 {
433     libxl__ao_device *aodev = CONTAINER_OF(aes, *aodev, aes);
434     libxl__checkpoint_device *dev = CONTAINER_OF(aodev, *dev, aodev);
435     libxl__remus_device_nic *remus_nic = dev->concrete_data;
436 
437     if (status && !rc)
438         rc = ERROR_FAIL;
439 
440     free_qdisc(remus_nic);
441 
442     aodev->rc = rc;
443     aodev->callback(egc, aodev);
444 }
445 
446 /*----- checkpointing APIs -----*/
447 
448 /* The value of buffer_op, not the value passed to kernel */
449 enum {
450     tc_buffer_start,
451     tc_buffer_release
452 };
453 
454 /* API implementations */
455 
remus_netbuf_op(libxl__remus_device_nic * remus_nic,libxl__checkpoint_devices_state * cds,int buffer_op)456 static int remus_netbuf_op(libxl__remus_device_nic *remus_nic,
457                            libxl__checkpoint_devices_state *cds,
458                            int buffer_op)
459 {
460     int rc, ret;
461     libxl__remus_state *rs = cds->concrete_data;
462 
463     STATE_AO_GC(cds->ao);
464 
465     if (buffer_op == tc_buffer_start)
466         ret = rtnl_qdisc_plug_buffer(remus_nic->qdisc);
467     else
468         ret = rtnl_qdisc_plug_release_one(remus_nic->qdisc);
469 
470     if (ret) {
471         rc = ERROR_FAIL;
472         goto out;
473     }
474 
475     ret = rtnl_qdisc_add(rs->nlsock, remus_nic->qdisc, NLM_F_REQUEST);
476     if (ret) {
477         rc = ERROR_FAIL;
478         goto out;
479     }
480 
481     rc = 0;
482 
483 out:
484     if (rc)
485         LOGD(ERROR, cds-> domid, "Remus: cannot do netbuf op %s on %s:%s",
486              ((buffer_op == tc_buffer_start) ?
487              "start_new_epoch" : "release_prev_epoch"),
488              remus_nic->ifb, nl_geterror(ret));
489     return rc;
490 }
491 
nic_postsuspend(libxl__egc * egc,libxl__checkpoint_device * dev)492 static void nic_postsuspend(libxl__egc *egc, libxl__checkpoint_device *dev)
493 {
494     int rc;
495     libxl__remus_device_nic *remus_nic = dev->concrete_data;
496 
497     STATE_AO_GC(dev->cds->ao);
498 
499     rc = remus_netbuf_op(remus_nic, dev->cds, tc_buffer_start);
500 
501     dev->aodev.rc = rc;
502     dev->aodev.callback(egc, &dev->aodev);
503 }
504 
nic_commit(libxl__egc * egc,libxl__checkpoint_device * dev)505 static void nic_commit(libxl__egc *egc, libxl__checkpoint_device *dev)
506 {
507     int rc;
508     libxl__remus_device_nic *remus_nic = dev->concrete_data;
509 
510     STATE_AO_GC(dev->cds->ao);
511 
512     rc = remus_netbuf_op(remus_nic, dev->cds, tc_buffer_release);
513 
514     dev->aodev.rc = rc;
515     dev->aodev.callback(egc, &dev->aodev);
516 }
517 
518 const libxl__checkpoint_device_instance_ops remus_device_nic = {
519     .kind = LIBXL__DEVICE_KIND_VIF,
520     .setup = nic_setup,
521     .teardown = nic_teardown,
522     .postsuspend = nic_postsuspend,
523     .commit = nic_commit,
524 };
525 
526 /*
527  * Local variables:
528  * mode: C
529  * c-basic-offset: 4
530  * indent-tabs-mode: nil
531  * End:
532  */
533