1 /*
2 * Copyright (C) 2014
3 * Author Shriram Rajagopalan <rshriram@cs.ubc.ca>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; version 2.1 only. with the special
8 * exception on linking described in file LICENSE.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 */
15
16 #include "libxl_osdeps.h" /* must come before any other headers */
17
18 #include "libxl_internal.h"
19
20 #include <netlink/cache.h>
21 #include <netlink/socket.h>
22 #include <netlink/attr.h>
23 #include <netlink/route/link.h>
24 #include <netlink/route/route.h>
25 #include <netlink/route/qdisc.h>
26 #include <netlink/route/qdisc/plug.h>
27
28 typedef struct libxl__remus_device_nic {
29 int devid;
30
31 const char *vif;
32 const char *ifb;
33 struct rtnl_qdisc *qdisc;
34 } libxl__remus_device_nic;
35
libxl__netbuffer_enabled(libxl__gc * gc)36 int libxl__netbuffer_enabled(libxl__gc *gc)
37 {
38 return 1;
39 }
40
init_subkind_nic(libxl__checkpoint_devices_state * cds)41 int init_subkind_nic(libxl__checkpoint_devices_state *cds)
42 {
43 int rc, ret;
44 libxl__domain_save_state *dss = CONTAINER_OF(cds, *dss, cds);
45 libxl__remus_state *rs = cds->concrete_data;
46
47 STATE_AO_GC(cds->ao);
48
49 rs->nlsock = nl_socket_alloc();
50 if (!rs->nlsock) {
51 LOGD(ERROR, dss->domid, "cannot allocate nl socket");
52 rc = ERROR_FAIL;
53 goto out;
54 }
55
56 ret = nl_connect(rs->nlsock, NETLINK_ROUTE);
57 if (ret) {
58 LOGD(ERROR, dss->domid, "failed to open netlink socket: %s",
59 nl_geterror(ret));
60 rc = ERROR_FAIL;
61 goto out;
62 }
63
64 /* get list of all qdiscs installed on network devs. */
65 ret = rtnl_qdisc_alloc_cache(rs->nlsock, &rs->qdisc_cache);
66 if (ret) {
67 LOGD(ERROR, dss->domid, "failed to allocate qdisc cache: %s",
68 nl_geterror(ret));
69 rc = ERROR_FAIL;
70 goto out;
71 }
72
73 if (dss->remus->netbufscript) {
74 rs->netbufscript = libxl__strdup(gc, dss->remus->netbufscript);
75 } else {
76 rs->netbufscript = GCSPRINTF("%s/remus-netbuf-setup",
77 libxl__xen_script_dir_path());
78 }
79
80 rc = 0;
81
82 out:
83 return rc;
84 }
85
cleanup_subkind_nic(libxl__checkpoint_devices_state * cds)86 void cleanup_subkind_nic(libxl__checkpoint_devices_state *cds)
87 {
88 libxl__remus_state *rs = cds->concrete_data;
89
90 STATE_AO_GC(cds->ao);
91
92 /* free qdisc cache */
93 if (rs->qdisc_cache) {
94 nl_cache_clear(rs->qdisc_cache);
95 nl_cache_free(rs->qdisc_cache);
96 rs->qdisc_cache = NULL;
97 }
98
99 /* close & free nlsock */
100 if (rs->nlsock) {
101 nl_close(rs->nlsock);
102 nl_socket_free(rs->nlsock);
103 rs->nlsock = NULL;
104 }
105 }
106
107 /*----- setup() and teardown() -----*/
108
109 /* helper functions */
110
111 /*
112 * If the device has a vifname, then use that instead of
113 * the vifX.Y format.
114 * it must ONLY be used for remus because if driver domains
115 * were in use it would constitute a security vulnerability.
116 */
get_vifname(libxl__checkpoint_device * dev,const libxl_device_nic * nic)117 static const char *get_vifname(libxl__checkpoint_device *dev,
118 const libxl_device_nic *nic)
119 {
120 const char *vifname = NULL;
121 const char *path;
122 int rc;
123
124 STATE_AO_GC(dev->cds->ao);
125
126 /* Convenience aliases */
127 const uint32_t domid = dev->cds->domid;
128
129 path = GCSPRINTF("%s/vifname",
130 libxl__domain_device_backend_path(gc, 0, domid,
131 nic->devid, LIBXL__DEVICE_KIND_VIF));
132
133 rc = libxl__xs_read_checked(gc, XBT_NULL, path, &vifname);
134 if (!rc && !vifname) {
135 vifname = libxl__device_nic_devname(gc, domid,
136 nic->devid,
137 nic->nictype);
138 }
139
140 return vifname;
141 }
142
free_qdisc(libxl__remus_device_nic * remus_nic)143 static void free_qdisc(libxl__remus_device_nic *remus_nic)
144 {
145 if (remus_nic->qdisc == NULL)
146 return;
147
148 nl_object_put((struct nl_object *)(remus_nic->qdisc));
149 remus_nic->qdisc = NULL;
150 }
151
init_qdisc(libxl__checkpoint_devices_state * cds,libxl__remus_device_nic * remus_nic)152 static int init_qdisc(libxl__checkpoint_devices_state *cds,
153 libxl__remus_device_nic *remus_nic)
154 {
155 int rc, ret, ifindex;
156 struct rtnl_link *ifb = NULL;
157 struct rtnl_qdisc *qdisc = NULL;
158 libxl__remus_state *rs = cds->concrete_data;
159
160 STATE_AO_GC(cds->ao);
161
162 /* Now that we have brought up REMUS_IFB device with plug qdisc for
163 * this vif, so we need to refill the qdisc cache.
164 */
165 ret = nl_cache_refill(rs->nlsock, rs->qdisc_cache);
166 if (ret) {
167 LOGD(ERROR, cds->domid,
168 "cannot refill qdisc cache: %s", nl_geterror(ret));
169 rc = ERROR_FAIL;
170 goto out;
171 }
172
173 /* get a handle to the REMUS_IFB interface */
174 ret = rtnl_link_get_kernel(rs->nlsock, 0, remus_nic->ifb, &ifb);
175 if (ret) {
176 LOGD(ERROR, cds->domid,
177 "cannot obtain handle for %s: %s", remus_nic->ifb,
178 nl_geterror(ret));
179 rc = ERROR_FAIL;
180 goto out;
181 }
182
183 ifindex = rtnl_link_get_ifindex(ifb);
184 if (!ifindex) {
185 LOGD(ERROR, cds->domid,
186 "interface %s has no index", remus_nic->ifb);
187 rc = ERROR_FAIL;
188 goto out;
189 }
190
191 /* Get a reference to the root qdisc installed on the REMUS_IFB, by
192 * querying the qdisc list we obtained earlier. The netbufscript
193 * sets up the plug qdisc as the root qdisc, so we don't have to
194 * search the entire qdisc tree on the REMUS_IFB dev.
195
196 * There is no need to explicitly free this qdisc as its just a
197 * reference from the qdisc cache we allocated earlier.
198 */
199 qdisc = rtnl_qdisc_get_by_parent(rs->qdisc_cache, ifindex, TC_H_ROOT);
200 if (qdisc) {
201 const char *tc_kind = rtnl_tc_get_kind(TC_CAST(qdisc));
202 /* Sanity check: Ensure that the root qdisc is a plug qdisc. */
203 if (!tc_kind || strcmp(tc_kind, "plug")) {
204 LOGD(ERROR, cds->domid,
205 "plug qdisc is not installed on %s", remus_nic->ifb);
206 rc = ERROR_FAIL;
207 goto out;
208 }
209 remus_nic->qdisc = qdisc;
210 } else {
211 LOGD(ERROR, cds->domid,
212 "Cannot get qdisc handle from ifb %s", remus_nic->ifb);
213 rc = ERROR_FAIL;
214 goto out;
215 }
216
217 rc = 0;
218
219 out:
220 if (ifb)
221 rtnl_link_put(ifb);
222
223 if (rc && qdisc)
224 nl_object_put((struct nl_object *)qdisc);
225
226 return rc;
227 }
228
229 /* callbacks */
230
231 static void netbuf_setup_script_cb(libxl__egc *egc,
232 libxl__async_exec_state *aes,
233 int rc, int status);
234 static void netbuf_teardown_script_cb(libxl__egc *egc,
235 libxl__async_exec_state *aes,
236 int rc, int status);
237
238 /*
239 * the script needs the following env & args
240 * $vifname
241 * $XENBUS_PATH (/libxl/<domid>/remus/netbuf/<devid>/)
242 * $REMUS_IFB (for teardown)
243 * setup/teardown as command line arg.
244 */
setup_async_exec(libxl__checkpoint_device * dev,char * op)245 static void setup_async_exec(libxl__checkpoint_device *dev, char *op)
246 {
247 int arraysize, nr = 0;
248 char **env = NULL, **args = NULL;
249 libxl__remus_device_nic *remus_nic = dev->concrete_data;
250 libxl__checkpoint_devices_state *cds = dev->cds;
251 libxl__async_exec_state *aes = &dev->aodev.aes;
252 libxl__remus_state *rs = cds->concrete_data;
253
254 STATE_AO_GC(cds->ao);
255
256 /* Convenience aliases */
257 char *const script = libxl__strdup(gc, rs->netbufscript);
258 const uint32_t domid = cds->domid;
259 const int dev_id = remus_nic->devid;
260 const char *const vif = remus_nic->vif;
261 const char *const ifb = remus_nic->ifb;
262
263 arraysize = 7;
264 GCNEW_ARRAY(env, arraysize);
265 env[nr++] = "vifname";
266 env[nr++] = libxl__strdup(gc, vif);
267 env[nr++] = "XENBUS_PATH";
268 env[nr++] = GCSPRINTF("%s/remus/netbuf/%d",
269 libxl__xs_libxl_path(gc, domid), dev_id);
270 if (!strcmp(op, "teardown") && ifb) {
271 env[nr++] = "REMUS_IFB";
272 env[nr++] = libxl__strdup(gc, ifb);
273 }
274 env[nr++] = NULL;
275 assert(nr <= arraysize);
276
277 arraysize = 3; nr = 0;
278 GCNEW_ARRAY(args, arraysize);
279 args[nr++] = script;
280 args[nr++] = op;
281 args[nr++] = NULL;
282 assert(nr == arraysize);
283
284 aes->ao = dev->cds->ao;
285 aes->what = GCSPRINTF("%s %s", args[0], args[1]);
286 aes->env = env;
287 aes->args = args;
288 aes->timeout_ms = LIBXL_HOTPLUG_TIMEOUT * 1000;
289 aes->stdfds[0] = -1;
290 aes->stdfds[1] = -1;
291 aes->stdfds[2] = -1;
292
293 if (!strcmp(op, "teardown"))
294 aes->callback = netbuf_teardown_script_cb;
295 else
296 aes->callback = netbuf_setup_script_cb;
297 }
298
299 /* setup() and teardown() */
300
nic_setup(libxl__egc * egc,libxl__checkpoint_device * dev)301 static void nic_setup(libxl__egc *egc, libxl__checkpoint_device *dev)
302 {
303 int rc;
304 libxl__remus_device_nic *remus_nic;
305 const libxl_device_nic *nic = dev->backend_dev;
306
307 STATE_AO_GC(dev->cds->ao);
308
309 /*
310 * thers's no subkind of nic devices, so nic ops is always matched
311 * with nic devices
312 */
313 dev->matched = true;
314
315 GCNEW(remus_nic);
316 dev->concrete_data = remus_nic;
317 remus_nic->devid = nic->devid;
318 remus_nic->vif = get_vifname(dev, nic);
319 if (!remus_nic->vif) {
320 rc = ERROR_FAIL;
321 goto out;
322 }
323
324 setup_async_exec(dev, "setup");
325 rc = libxl__async_exec_start(&dev->aodev.aes);
326 if (rc)
327 goto out;
328
329 return;
330
331 out:
332 dev->aodev.rc = rc;
333 dev->aodev.callback(egc, &dev->aodev);
334 }
335
336 /*
337 * In return, the script writes the name of REMUS_IFB device (during setup)
338 * to be used for output buffering into XENBUS_PATH/ifb
339 */
netbuf_setup_script_cb(libxl__egc * egc,libxl__async_exec_state * aes,int rc,int status)340 static void netbuf_setup_script_cb(libxl__egc *egc,
341 libxl__async_exec_state *aes,
342 int rc, int status)
343 {
344 libxl__ao_device *aodev = CONTAINER_OF(aes, *aodev, aes);
345 libxl__checkpoint_device *dev = CONTAINER_OF(aodev, *dev, aodev);
346 libxl__remus_device_nic *remus_nic = dev->concrete_data;
347 libxl__checkpoint_devices_state *cds = dev->cds;
348 libxl__remus_state *rs = cds->concrete_data;
349 const char *out_path_base, *hotplug_error = NULL;
350
351 STATE_AO_GC(cds->ao);
352
353 /* Convenience aliases */
354 const uint32_t domid = cds->domid;
355 const int devid = remus_nic->devid;
356 const char *const vif = remus_nic->vif;
357 const char **const ifb = &remus_nic->ifb;
358
359 if (status && !rc)
360 rc = ERROR_FAIL;
361 if (rc)
362 goto out;
363
364 /*
365 * we need to get ifb first because it's needed for teardown
366 */
367 rc = libxl__xs_read_checked(gc, XBT_NULL,
368 GCSPRINTF("%s/remus/netbuf/%d/ifb",
369 libxl__xs_libxl_path(gc, domid),
370 devid),
371 ifb);
372 if (rc)
373 goto out;
374
375 if (!(*ifb)) {
376 LOGD(ERROR, domid, "Cannot get ifb dev name for domain %u dev %s",
377 domid, vif);
378 rc = ERROR_FAIL;
379 goto out;
380 }
381
382 out_path_base = GCSPRINTF("%s/remus/netbuf/%d",
383 libxl__xs_libxl_path(gc, domid), devid);
384
385 rc = libxl__xs_read_checked(gc, XBT_NULL,
386 GCSPRINTF("%s/hotplug-error", out_path_base),
387 &hotplug_error);
388 if (rc)
389 goto out;
390
391 if (hotplug_error) {
392 LOGD(ERROR, domid, "netbuf script %s setup failed for vif %s: %s",
393 rs->netbufscript, vif, hotplug_error);
394 rc = ERROR_FAIL;
395 goto out;
396 }
397
398 if (status) {
399 rc = ERROR_FAIL;
400 goto out;
401 }
402
403 LOGD(DEBUG, domid, "%s will buffer packets from vif %s", *ifb, vif);
404 rc = init_qdisc(cds, remus_nic);
405
406 out:
407 aodev->rc = rc;
408 aodev->callback(egc, aodev);
409 }
410
nic_teardown(libxl__egc * egc,libxl__checkpoint_device * dev)411 static void nic_teardown(libxl__egc *egc, libxl__checkpoint_device *dev)
412 {
413 int rc;
414 STATE_AO_GC(dev->cds->ao);
415
416 setup_async_exec(dev, "teardown");
417
418 rc = libxl__async_exec_start(&dev->aodev.aes);
419 if (rc)
420 goto out;
421
422 return;
423
424 out:
425 dev->aodev.rc = rc;
426 dev->aodev.callback(egc, &dev->aodev);
427 }
428
netbuf_teardown_script_cb(libxl__egc * egc,libxl__async_exec_state * aes,int rc,int status)429 static void netbuf_teardown_script_cb(libxl__egc *egc,
430 libxl__async_exec_state *aes,
431 int rc, int status)
432 {
433 libxl__ao_device *aodev = CONTAINER_OF(aes, *aodev, aes);
434 libxl__checkpoint_device *dev = CONTAINER_OF(aodev, *dev, aodev);
435 libxl__remus_device_nic *remus_nic = dev->concrete_data;
436
437 if (status && !rc)
438 rc = ERROR_FAIL;
439
440 free_qdisc(remus_nic);
441
442 aodev->rc = rc;
443 aodev->callback(egc, aodev);
444 }
445
446 /*----- checkpointing APIs -----*/
447
448 /* The value of buffer_op, not the value passed to kernel */
449 enum {
450 tc_buffer_start,
451 tc_buffer_release
452 };
453
454 /* API implementations */
455
remus_netbuf_op(libxl__remus_device_nic * remus_nic,libxl__checkpoint_devices_state * cds,int buffer_op)456 static int remus_netbuf_op(libxl__remus_device_nic *remus_nic,
457 libxl__checkpoint_devices_state *cds,
458 int buffer_op)
459 {
460 int rc, ret;
461 libxl__remus_state *rs = cds->concrete_data;
462
463 STATE_AO_GC(cds->ao);
464
465 if (buffer_op == tc_buffer_start)
466 ret = rtnl_qdisc_plug_buffer(remus_nic->qdisc);
467 else
468 ret = rtnl_qdisc_plug_release_one(remus_nic->qdisc);
469
470 if (ret) {
471 rc = ERROR_FAIL;
472 goto out;
473 }
474
475 ret = rtnl_qdisc_add(rs->nlsock, remus_nic->qdisc, NLM_F_REQUEST);
476 if (ret) {
477 rc = ERROR_FAIL;
478 goto out;
479 }
480
481 rc = 0;
482
483 out:
484 if (rc)
485 LOGD(ERROR, cds-> domid, "Remus: cannot do netbuf op %s on %s:%s",
486 ((buffer_op == tc_buffer_start) ?
487 "start_new_epoch" : "release_prev_epoch"),
488 remus_nic->ifb, nl_geterror(ret));
489 return rc;
490 }
491
nic_postsuspend(libxl__egc * egc,libxl__checkpoint_device * dev)492 static void nic_postsuspend(libxl__egc *egc, libxl__checkpoint_device *dev)
493 {
494 int rc;
495 libxl__remus_device_nic *remus_nic = dev->concrete_data;
496
497 STATE_AO_GC(dev->cds->ao);
498
499 rc = remus_netbuf_op(remus_nic, dev->cds, tc_buffer_start);
500
501 dev->aodev.rc = rc;
502 dev->aodev.callback(egc, &dev->aodev);
503 }
504
nic_commit(libxl__egc * egc,libxl__checkpoint_device * dev)505 static void nic_commit(libxl__egc *egc, libxl__checkpoint_device *dev)
506 {
507 int rc;
508 libxl__remus_device_nic *remus_nic = dev->concrete_data;
509
510 STATE_AO_GC(dev->cds->ao);
511
512 rc = remus_netbuf_op(remus_nic, dev->cds, tc_buffer_release);
513
514 dev->aodev.rc = rc;
515 dev->aodev.callback(egc, &dev->aodev);
516 }
517
518 const libxl__checkpoint_device_instance_ops remus_device_nic = {
519 .kind = LIBXL__DEVICE_KIND_VIF,
520 .setup = nic_setup,
521 .teardown = nic_teardown,
522 .postsuspend = nic_postsuspend,
523 .commit = nic_commit,
524 };
525
526 /*
527 * Local variables:
528 * mode: C
529 * c-basic-offset: 4
530 * indent-tabs-mode: nil
531 * End:
532 */
533