1 /*
2 Domain communications for Xen Store Daemon.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include <stdio.h>
20 #include <sys/mman.h>
21 #include <unistd.h>
22 #include <stdlib.h>
23 #include <stdarg.h>
24 #include <time.h>
25 #include <syslog.h>
26
27 #include "utils.h"
28 #include "talloc.h"
29 #include "xenstored_core.h"
30 #include "xenstored_domain.h"
31 #include "xenstored_transaction.h"
32 #include "xenstored_watch.h"
33
34 #include <xenevtchn.h>
35 #include <xenctrl.h>
36 #include <xen/grant_table.h>
37
38 static xc_interface **xc_handle;
39 xengnttab_handle **xgt_handle;
40 static evtchn_port_t virq_port;
41
42 xenevtchn_handle *xce_handle = NULL;
43
44 static struct node_perms dom_release_perms;
45 static struct node_perms dom_introduce_perms;
46
47 struct domain
48 {
49 struct list_head list;
50
51 /* The id of this domain */
52 unsigned int domid;
53
54 /* Event channel port */
55 evtchn_port_t port;
56
57 /* The remote end of the event channel, used only to validate
58 repeated domain introductions. */
59 evtchn_port_t remote_port;
60
61 /* Domain path in store. */
62 char *path;
63
64 /* Shared page. */
65 struct xenstore_domain_interface *interface;
66
67 /* The connection associated with this. */
68 struct connection *conn;
69
70 /* Generation count at domain introduction time. */
71 uint64_t generation;
72
73 /* Have we noticed that this domain is shutdown? */
74 bool shutdown;
75
76 /* Has domain been officially introduced? */
77 bool introduced;
78
79 /* number of entry from this domain in the store */
80 int nbentry;
81
82 /* number of watch for this domain */
83 int nbwatch;
84
85 /* write rate limit */
86 wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */
87 struct wrl_timestampt wrl_timestamp;
88 bool wrl_delay_logged;
89 };
90
91 static LIST_HEAD(domains);
92
check_indexes(XENSTORE_RING_IDX cons,XENSTORE_RING_IDX prod)93 static bool check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
94 {
95 return ((prod - cons) <= XENSTORE_RING_SIZE);
96 }
97
get_output_chunk(XENSTORE_RING_IDX cons,XENSTORE_RING_IDX prod,char * buf,uint32_t * len)98 static void *get_output_chunk(XENSTORE_RING_IDX cons,
99 XENSTORE_RING_IDX prod,
100 char *buf, uint32_t *len)
101 {
102 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
103 if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
104 *len = XENSTORE_RING_SIZE - (prod - cons);
105 return buf + MASK_XENSTORE_IDX(prod);
106 }
107
get_input_chunk(XENSTORE_RING_IDX cons,XENSTORE_RING_IDX prod,const char * buf,uint32_t * len)108 static const void *get_input_chunk(XENSTORE_RING_IDX cons,
109 XENSTORE_RING_IDX prod,
110 const char *buf, uint32_t *len)
111 {
112 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
113 if ((prod - cons) < *len)
114 *len = prod - cons;
115 return buf + MASK_XENSTORE_IDX(cons);
116 }
117
writechn(struct connection * conn,const void * data,unsigned int len)118 static int writechn(struct connection *conn,
119 const void *data, unsigned int len)
120 {
121 uint32_t avail;
122 void *dest;
123 struct xenstore_domain_interface *intf = conn->domain->interface;
124 XENSTORE_RING_IDX cons, prod;
125
126 /* Must read indexes once, and before anything else, and verified. */
127 cons = intf->rsp_cons;
128 prod = intf->rsp_prod;
129 xen_mb();
130
131 if (!check_indexes(cons, prod)) {
132 errno = EIO;
133 return -1;
134 }
135
136 dest = get_output_chunk(cons, prod, intf->rsp, &avail);
137 if (avail < len)
138 len = avail;
139
140 memcpy(dest, data, len);
141 xen_mb();
142 intf->rsp_prod += len;
143
144 xenevtchn_notify(xce_handle, conn->domain->port);
145
146 return len;
147 }
148
readchn(struct connection * conn,void * data,unsigned int len)149 static int readchn(struct connection *conn, void *data, unsigned int len)
150 {
151 uint32_t avail;
152 const void *src;
153 struct xenstore_domain_interface *intf = conn->domain->interface;
154 XENSTORE_RING_IDX cons, prod;
155
156 /* Must read indexes once, and before anything else, and verified. */
157 cons = intf->req_cons;
158 prod = intf->req_prod;
159 xen_mb();
160
161 if (!check_indexes(cons, prod)) {
162 errno = EIO;
163 return -1;
164 }
165
166 src = get_input_chunk(cons, prod, intf->req, &avail);
167 if (avail < len)
168 len = avail;
169
170 memcpy(data, src, len);
171 xen_mb();
172 intf->req_cons += len;
173
174 xenevtchn_notify(xce_handle, conn->domain->port);
175
176 return len;
177 }
178
map_interface(domid_t domid)179 static void *map_interface(domid_t domid)
180 {
181 return xengnttab_map_grant_ref(*xgt_handle, domid,
182 GNTTAB_RESERVED_XENSTORE,
183 PROT_READ|PROT_WRITE);
184 }
185
unmap_interface(void * interface)186 static void unmap_interface(void *interface)
187 {
188 xengnttab_unmap(*xgt_handle, interface, 1);
189 }
190
destroy_domain(void * _domain)191 static int destroy_domain(void *_domain)
192 {
193 struct domain *domain = _domain;
194
195 list_del(&domain->list);
196
197 if (!domain->introduced)
198 return 0;
199
200 if (domain->port) {
201 if (xenevtchn_unbind(xce_handle, domain->port) == -1)
202 eprintf("> Unbinding port %i failed!\n", domain->port);
203 }
204
205 if (domain->interface) {
206 /* Domain 0 was mapped by dom0_init, so it must be unmapped
207 using munmap() and not the grant unmap call. */
208 if (domain->domid == 0)
209 unmap_xenbus(domain->interface);
210 else
211 unmap_interface(domain->interface);
212 }
213
214 fire_watches(NULL, domain, "@releaseDomain", NULL, false, NULL);
215
216 wrl_domain_destroy(domain);
217
218 return 0;
219 }
220
get_domain_info(unsigned int domid,xc_dominfo_t * dominfo)221 static bool get_domain_info(unsigned int domid, xc_dominfo_t *dominfo)
222 {
223 return xc_domain_getinfo(*xc_handle, domid, 1, dominfo) == 1 &&
224 dominfo->domid == domid;
225 }
226
domain_cleanup(void)227 static void domain_cleanup(void)
228 {
229 xc_dominfo_t dominfo;
230 struct domain *domain;
231 struct connection *conn;
232 int notify = 0;
233 bool dom_valid;
234
235 again:
236 list_for_each_entry(domain, &domains, list) {
237 dom_valid = get_domain_info(domain->domid, &dominfo);
238 if (!domain->introduced) {
239 if (!dom_valid) {
240 talloc_free(domain);
241 goto again;
242 }
243 continue;
244 }
245 if (dom_valid) {
246 if ((dominfo.crashed || dominfo.shutdown)
247 && !domain->shutdown) {
248 domain->shutdown = true;
249 notify = 1;
250 }
251 if (!dominfo.dying)
252 continue;
253 }
254 if (domain->conn) {
255 /* domain is a talloc child of domain->conn. */
256 conn = domain->conn;
257 domain->conn = NULL;
258 talloc_unlink(talloc_autofree_context(), conn);
259 notify = 0; /* destroy_domain() fires the watch */
260 goto again;
261 }
262 }
263
264 if (notify)
265 fire_watches(NULL, NULL, "@releaseDomain", NULL, false, NULL);
266 }
267
268 /* We scan all domains rather than use the information given here. */
handle_event(void)269 void handle_event(void)
270 {
271 evtchn_port_t port;
272
273 if ((port = xenevtchn_pending(xce_handle)) == -1)
274 barf_perror("Failed to read from event fd");
275
276 if (port == virq_port)
277 domain_cleanup();
278
279 if (xenevtchn_unmask(xce_handle, port) == -1)
280 barf_perror("Failed to write to event fd");
281 }
282
domain_can_read(struct connection * conn)283 bool domain_can_read(struct connection *conn)
284 {
285 struct xenstore_domain_interface *intf = conn->domain->interface;
286
287 if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0)
288 return false;
289
290 if (conn->is_ignored)
291 return false;
292
293 return (intf->req_cons != intf->req_prod);
294 }
295
domid_is_unprivileged(unsigned int domid)296 static bool domid_is_unprivileged(unsigned int domid)
297 {
298 return domid != 0 && domid != priv_domid;
299 }
300
domain_is_unprivileged(struct connection * conn)301 bool domain_is_unprivileged(struct connection *conn)
302 {
303 return conn && conn->domain &&
304 domid_is_unprivileged(conn->domain->domid);
305 }
306
domain_can_write(struct connection * conn)307 bool domain_can_write(struct connection *conn)
308 {
309 struct xenstore_domain_interface *intf = conn->domain->interface;
310
311 if (conn->is_ignored)
312 return false;
313
314 return ((intf->rsp_prod - intf->rsp_cons) != XENSTORE_RING_SIZE);
315 }
316
talloc_domain_path(void * context,unsigned int domid)317 static char *talloc_domain_path(void *context, unsigned int domid)
318 {
319 return talloc_asprintf(context, "/local/domain/%u", domid);
320 }
321
find_domain_struct(unsigned int domid)322 static struct domain *find_domain_struct(unsigned int domid)
323 {
324 struct domain *i;
325
326 list_for_each_entry(i, &domains, list) {
327 if (i->domid == domid)
328 return i;
329 }
330 return NULL;
331 }
332
alloc_domain(void * context,unsigned int domid)333 static struct domain *alloc_domain(void *context, unsigned int domid)
334 {
335 struct domain *domain;
336
337 domain = talloc(context, struct domain);
338 if (!domain) {
339 errno = ENOMEM;
340 return NULL;
341 }
342
343 domain->domid = domid;
344 domain->generation = generation;
345 domain->introduced = false;
346
347 talloc_set_destructor(domain, destroy_domain);
348
349 list_add(&domain->list, &domains);
350
351 return domain;
352 }
353
new_domain(struct domain * domain,int port)354 static int new_domain(struct domain *domain, int port)
355 {
356 int rc;
357
358 domain->port = 0;
359 domain->shutdown = false;
360 domain->path = talloc_domain_path(domain, domain->domid);
361 if (!domain->path) {
362 errno = ENOMEM;
363 return errno;
364 }
365
366 wrl_domain_new(domain);
367
368 /* Tell kernel we're interested in this event. */
369 rc = xenevtchn_bind_interdomain(xce_handle, domain->domid, port);
370 if (rc == -1)
371 return errno;
372 domain->port = rc;
373
374 domain->introduced = true;
375
376 domain->conn = new_connection(writechn, readchn);
377 if (!domain->conn) {
378 errno = ENOMEM;
379 return errno;
380 }
381
382 domain->conn->domain = domain;
383 domain->conn->id = domain->domid;
384
385 domain->remote_port = port;
386 domain->nbentry = 0;
387 domain->nbwatch = 0;
388
389 return 0;
390 }
391
392
find_domain_by_domid(unsigned int domid)393 static struct domain *find_domain_by_domid(unsigned int domid)
394 {
395 struct domain *d;
396
397 d = find_domain_struct(domid);
398
399 return (d && d->introduced) ? d : NULL;
400 }
401
domain_conn_reset(struct domain * domain)402 static void domain_conn_reset(struct domain *domain)
403 {
404 struct connection *conn = domain->conn;
405 struct buffered_data *out;
406
407 conn_delete_all_watches(conn);
408 conn_delete_all_transactions(conn);
409
410 while ((out = list_top(&conn->out_list, struct buffered_data, list))) {
411 list_del(&out->list);
412 talloc_free(out);
413 }
414
415 talloc_free(conn->in);
416
417 domain->interface->req_cons = domain->interface->req_prod = 0;
418 domain->interface->rsp_cons = domain->interface->rsp_prod = 0;
419 }
420
421 /* domid, gfn, evtchn, path */
do_introduce(struct connection * conn,struct buffered_data * in)422 int do_introduce(struct connection *conn, struct buffered_data *in)
423 {
424 struct domain *domain;
425 char *vec[3];
426 unsigned int domid;
427 evtchn_port_t port;
428 int rc;
429 struct xenstore_domain_interface *interface;
430
431 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
432 return EINVAL;
433
434 if (!conn->can_write)
435 return EACCES;
436
437 domid = atoi(vec[0]);
438 /* Ignore the gfn, we don't need it. */
439 port = atoi(vec[2]);
440
441 /* Sanity check args. */
442 if (port <= 0)
443 return EINVAL;
444
445 domain = find_domain_struct(domid);
446
447 if (domain == NULL) {
448 /* Hang domain off "in" until we're finished. */
449 domain = alloc_domain(in, domid);
450 if (domain == NULL)
451 return ENOMEM;
452 }
453
454 if (!domain->introduced) {
455 interface = map_interface(domid);
456 if (!interface)
457 return errno;
458 /* Hang domain off "in" until we're finished. */
459 if (new_domain(domain, port)) {
460 rc = errno;
461 unmap_interface(interface);
462 return rc;
463 }
464 domain->interface = interface;
465
466 /* Now domain belongs to its connection. */
467 talloc_steal(domain->conn, domain);
468
469 fire_watches(NULL, in, "@introduceDomain", NULL, false, NULL);
470 } else {
471 /* Use XS_INTRODUCE for recreating the xenbus event-channel. */
472 if (domain->port)
473 xenevtchn_unbind(xce_handle, domain->port);
474 rc = xenevtchn_bind_interdomain(xce_handle, domid, port);
475 domain->port = (rc == -1) ? 0 : rc;
476 domain->remote_port = port;
477 }
478
479 domain_conn_reset(domain);
480
481 send_ack(conn, XS_INTRODUCE);
482
483 return 0;
484 }
485
find_connected_domain(unsigned int domid)486 static struct domain *find_connected_domain(unsigned int domid)
487 {
488 struct domain *domain;
489
490 domain = find_domain_by_domid(domid);
491 if (!domain)
492 return ERR_PTR(-ENOENT);
493 if (!domain->conn)
494 return ERR_PTR(-EINVAL);
495 return domain;
496 }
497
do_set_target(struct connection * conn,struct buffered_data * in)498 int do_set_target(struct connection *conn, struct buffered_data *in)
499 {
500 char *vec[2];
501 unsigned int domid, tdomid;
502 struct domain *domain, *tdomain;
503 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
504 return EINVAL;
505
506 if (!conn->can_write)
507 return EACCES;
508
509 domid = atoi(vec[0]);
510 tdomid = atoi(vec[1]);
511
512 domain = find_connected_domain(domid);
513 if (IS_ERR(domain))
514 return -PTR_ERR(domain);
515
516 tdomain = find_connected_domain(tdomid);
517 if (IS_ERR(tdomain))
518 return -PTR_ERR(tdomain);
519
520 talloc_reference(domain->conn, tdomain->conn);
521 domain->conn->target = tdomain->conn;
522
523 send_ack(conn, XS_SET_TARGET);
524
525 return 0;
526 }
527
onearg_domain(struct connection * conn,struct buffered_data * in)528 static struct domain *onearg_domain(struct connection *conn,
529 struct buffered_data *in)
530 {
531 const char *domid_str = onearg(in);
532 unsigned int domid;
533
534 if (!domid_str)
535 return ERR_PTR(-EINVAL);
536
537 domid = atoi(domid_str);
538 if (!domid)
539 return ERR_PTR(-EINVAL);
540
541 return find_connected_domain(domid);
542 }
543
544 /* domid */
do_release(struct connection * conn,struct buffered_data * in)545 int do_release(struct connection *conn, struct buffered_data *in)
546 {
547 struct domain *domain;
548
549 domain = onearg_domain(conn, in);
550 if (IS_ERR(domain))
551 return -PTR_ERR(domain);
552
553 talloc_free(domain->conn);
554
555 send_ack(conn, XS_RELEASE);
556
557 return 0;
558 }
559
do_resume(struct connection * conn,struct buffered_data * in)560 int do_resume(struct connection *conn, struct buffered_data *in)
561 {
562 struct domain *domain;
563
564 domain = onearg_domain(conn, in);
565 if (IS_ERR(domain))
566 return -PTR_ERR(domain);
567
568 domain->shutdown = false;
569
570 send_ack(conn, XS_RESUME);
571
572 return 0;
573 }
574
do_get_domain_path(struct connection * conn,struct buffered_data * in)575 int do_get_domain_path(struct connection *conn, struct buffered_data *in)
576 {
577 char *path;
578 const char *domid_str = onearg(in);
579
580 if (!domid_str)
581 return EINVAL;
582
583 path = talloc_domain_path(conn, atoi(domid_str));
584 if (!path)
585 return errno;
586
587 send_reply(conn, XS_GET_DOMAIN_PATH, path, strlen(path) + 1);
588
589 talloc_free(path);
590
591 return 0;
592 }
593
do_is_domain_introduced(struct connection * conn,struct buffered_data * in)594 int do_is_domain_introduced(struct connection *conn, struct buffered_data *in)
595 {
596 int result;
597 unsigned int domid;
598 const char *domid_str = onearg(in);
599
600 if (!domid_str)
601 return EINVAL;
602
603 domid = atoi(domid_str);
604 if (domid == DOMID_SELF)
605 result = 1;
606 else
607 result = (find_domain_by_domid(domid) != NULL);
608
609 send_reply(conn, XS_IS_DOMAIN_INTRODUCED, result ? "T" : "F", 2);
610
611 return 0;
612 }
613
614 /* Allow guest to reset all watches */
do_reset_watches(struct connection * conn,struct buffered_data * in)615 int do_reset_watches(struct connection *conn, struct buffered_data *in)
616 {
617 conn_delete_all_watches(conn);
618 conn_delete_all_transactions(conn);
619
620 send_ack(conn, XS_RESET_WATCHES);
621
622 return 0;
623 }
624
close_xc_handle(void * _handle)625 static int close_xc_handle(void *_handle)
626 {
627 xc_interface_close(*(xc_interface**)_handle);
628 return 0;
629 }
630
close_xgt_handle(void * _handle)631 static int close_xgt_handle(void *_handle)
632 {
633 xengnttab_close(*(xengnttab_handle **)_handle);
634 return 0;
635 }
636
637 /* Returns the implicit path of a connection (only domains have this) */
get_implicit_path(const struct connection * conn)638 const char *get_implicit_path(const struct connection *conn)
639 {
640 if (!conn->domain)
641 return "/local/domain/0";
642 return conn->domain->path;
643 }
644
645 /* Restore existing connections. */
restore_existing_connections(void)646 void restore_existing_connections(void)
647 {
648 }
649
set_dom_perms_default(struct node_perms * perms)650 static int set_dom_perms_default(struct node_perms *perms)
651 {
652 perms->num = 1;
653 perms->p = talloc_array(NULL, struct xs_permissions, perms->num);
654 if (!perms->p)
655 return -1;
656 perms->p->id = 0;
657 perms->p->perms = XS_PERM_NONE;
658
659 return 0;
660 }
661
get_perms_special(const char * name)662 static struct node_perms *get_perms_special(const char *name)
663 {
664 if (!strcmp(name, "@releaseDomain"))
665 return &dom_release_perms;
666 if (!strcmp(name, "@introduceDomain"))
667 return &dom_introduce_perms;
668 return NULL;
669 }
670
set_perms_special(struct connection * conn,const char * name,struct node_perms * perms)671 int set_perms_special(struct connection *conn, const char *name,
672 struct node_perms *perms)
673 {
674 struct node_perms *p;
675
676 p = get_perms_special(name);
677 if (!p)
678 return EINVAL;
679
680 if ((perm_for_conn(conn, p) & (XS_PERM_WRITE | XS_PERM_OWNER)) !=
681 (XS_PERM_WRITE | XS_PERM_OWNER))
682 return EACCES;
683
684 p->num = perms->num;
685 talloc_free(p->p);
686 p->p = perms->p;
687 talloc_steal(NULL, perms->p);
688
689 return 0;
690 }
691
check_perms_special(const char * name,struct connection * conn)692 bool check_perms_special(const char *name, struct connection *conn)
693 {
694 struct node_perms *p;
695
696 p = get_perms_special(name);
697 if (!p)
698 return false;
699
700 return perm_for_conn(conn, p) & XS_PERM_READ;
701 }
702
dom0_init(void)703 static int dom0_init(void)
704 {
705 evtchn_port_t port;
706 struct domain *dom0;
707
708 port = xenbus_evtchn();
709 if (port == -1)
710 return -1;
711
712 dom0 = alloc_domain(NULL, xenbus_master_domid());
713 if (!dom0)
714 return -1;
715 if (new_domain(dom0, port))
716 return -1;
717
718 dom0->interface = xenbus_map();
719 if (dom0->interface == NULL)
720 return -1;
721
722 talloc_steal(dom0->conn, dom0);
723
724 xenevtchn_notify(xce_handle, dom0->port);
725
726 if (set_dom_perms_default(&dom_release_perms) ||
727 set_dom_perms_default(&dom_introduce_perms))
728 return -1;
729
730 return 0;
731 }
732
domain_init(void)733 void domain_init(void)
734 {
735 int rc;
736
737 xc_handle = talloc(talloc_autofree_context(), xc_interface*);
738 if (!xc_handle)
739 barf_perror("Failed to allocate domain handle");
740
741 *xc_handle = xc_interface_open(0,0,0);
742 if (!*xc_handle)
743 barf_perror("Failed to open connection to hypervisor");
744
745 talloc_set_destructor(xc_handle, close_xc_handle);
746
747 xgt_handle = talloc(talloc_autofree_context(), xengnttab_handle*);
748 if (!xgt_handle)
749 barf_perror("Failed to allocate domain gnttab handle");
750
751 *xgt_handle = xengnttab_open(NULL, 0);
752 if (*xgt_handle == NULL)
753 barf_perror("Failed to open connection to gnttab");
754
755 talloc_set_destructor(xgt_handle, close_xgt_handle);
756
757 xce_handle = xenevtchn_open(NULL, 0);
758
759 if (xce_handle == NULL)
760 barf_perror("Failed to open evtchn device");
761
762 if (dom0_init() != 0)
763 barf_perror("Failed to initialize dom0 state");
764
765 if ((rc = xenevtchn_bind_virq(xce_handle, VIRQ_DOM_EXC)) == -1)
766 barf_perror("Failed to bind to domain exception virq port");
767 virq_port = rc;
768 }
769
domain_entry_inc(struct connection * conn,struct node * node)770 void domain_entry_inc(struct connection *conn, struct node *node)
771 {
772 struct domain *d;
773
774 if (!conn)
775 return;
776
777 if (node->perms.p && node->perms.p[0].id != conn->id) {
778 if (conn->transaction) {
779 transaction_entry_inc(conn->transaction,
780 node->perms.p[0].id);
781 } else {
782 d = find_domain_by_domid(node->perms.p[0].id);
783 if (d)
784 d->nbentry++;
785 }
786 } else if (conn->domain) {
787 if (conn->transaction) {
788 transaction_entry_inc(conn->transaction,
789 conn->domain->domid);
790 } else {
791 conn->domain->nbentry++;
792 }
793 }
794 }
795
796 /*
797 * Check whether a domain was created before or after a specific generation
798 * count (used for testing whether a node permission is older than a domain).
799 *
800 * Return values:
801 * -1: error
802 * 0: domain has higher generation count (it is younger than a node with the
803 * given count), or domain isn't existing any longer
804 * 1: domain is older than the node
805 */
chk_domain_generation(unsigned int domid,uint64_t gen)806 static int chk_domain_generation(unsigned int domid, uint64_t gen)
807 {
808 struct domain *d;
809 xc_dominfo_t dominfo;
810
811 if (!xc_handle && domid == 0)
812 return 1;
813
814 d = find_domain_struct(domid);
815 if (d)
816 return (d->generation <= gen) ? 1 : 0;
817
818 if (!get_domain_info(domid, &dominfo))
819 return 0;
820
821 d = alloc_domain(NULL, domid);
822 return d ? 1 : -1;
823 }
824
825 /*
826 * Remove permissions for no longer existing domains in order to avoid a new
827 * domain with the same domid inheriting the permissions.
828 */
domain_adjust_node_perms(struct node * node)829 int domain_adjust_node_perms(struct node *node)
830 {
831 unsigned int i;
832 int ret;
833
834 ret = chk_domain_generation(node->perms.p[0].id, node->generation);
835 if (ret < 0)
836 return errno;
837
838 /* If the owner doesn't exist any longer give it to priv domain. */
839 if (!ret)
840 node->perms.p[0].id = priv_domid;
841
842 for (i = 1; i < node->perms.num; i++) {
843 if (node->perms.p[i].perms & XS_PERM_IGNORE)
844 continue;
845 ret = chk_domain_generation(node->perms.p[i].id,
846 node->generation);
847 if (ret < 0)
848 return errno;
849 if (!ret)
850 node->perms.p[i].perms |= XS_PERM_IGNORE;
851 }
852
853 return 0;
854 }
855
domain_entry_dec(struct connection * conn,struct node * node)856 void domain_entry_dec(struct connection *conn, struct node *node)
857 {
858 struct domain *d;
859
860 if (!conn)
861 return;
862
863 if (node->perms.p && node->perms.p[0].id != conn->id) {
864 if (conn->transaction) {
865 transaction_entry_dec(conn->transaction,
866 node->perms.p[0].id);
867 } else {
868 d = find_domain_by_domid(node->perms.p[0].id);
869 if (d && d->nbentry)
870 d->nbentry--;
871 }
872 } else if (conn->domain && conn->domain->nbentry) {
873 if (conn->transaction) {
874 transaction_entry_dec(conn->transaction,
875 conn->domain->domid);
876 } else {
877 conn->domain->nbentry--;
878 }
879 }
880 }
881
domain_entry_fix(unsigned int domid,int num,bool update)882 int domain_entry_fix(unsigned int domid, int num, bool update)
883 {
884 struct domain *d;
885 int cnt;
886
887 d = find_domain_by_domid(domid);
888 if (!d)
889 return 0;
890
891 cnt = d->nbentry + num;
892 if (cnt < 0)
893 cnt = 0;
894
895 if (update)
896 d->nbentry = cnt;
897
898 return domid_is_unprivileged(domid) ? cnt : 0;
899 }
900
domain_entry(struct connection * conn)901 int domain_entry(struct connection *conn)
902 {
903 return (domain_is_unprivileged(conn))
904 ? conn->domain->nbentry
905 : 0;
906 }
907
domain_watch_inc(struct connection * conn)908 void domain_watch_inc(struct connection *conn)
909 {
910 if (!conn || !conn->domain)
911 return;
912 conn->domain->nbwatch++;
913 }
914
domain_watch_dec(struct connection * conn)915 void domain_watch_dec(struct connection *conn)
916 {
917 if (!conn || !conn->domain)
918 return;
919 if (conn->domain->nbwatch)
920 conn->domain->nbwatch--;
921 }
922
domain_watch(struct connection * conn)923 int domain_watch(struct connection *conn)
924 {
925 return (domain_is_unprivileged(conn))
926 ? conn->domain->nbwatch
927 : 0;
928 }
929
930 static wrl_creditt wrl_config_writecost = WRL_FACTOR;
931 static wrl_creditt wrl_config_rate = WRL_RATE * WRL_FACTOR;
932 static wrl_creditt wrl_config_dburst = WRL_DBURST * WRL_FACTOR;
933 static wrl_creditt wrl_config_gburst = WRL_GBURST * WRL_FACTOR;
934 static wrl_creditt wrl_config_newdoms_dburst =
935 WRL_DBURST * WRL_NEWDOMS * WRL_FACTOR;
936
937 long wrl_ntransactions;
938
939 static long wrl_ndomains;
940 static wrl_creditt wrl_reserve; /* [-wrl_config_newdoms_dburst, +_gburst ] */
941 static time_t wrl_log_last_warning; /* 0: no previous warning */
942
wrl_gettime_now(struct wrl_timestampt * now_wt)943 void wrl_gettime_now(struct wrl_timestampt *now_wt)
944 {
945 struct timespec now_ts;
946 int r;
947
948 r = clock_gettime(CLOCK_MONOTONIC, &now_ts);
949 if (r)
950 barf_perror("Could not find time (clock_gettime failed)");
951
952 now_wt->sec = now_ts.tv_sec;
953 now_wt->msec = now_ts.tv_nsec / 1000000;
954 }
955
wrl_xfer_credit(wrl_creditt * debit,wrl_creditt debit_floor,wrl_creditt * credit,wrl_creditt credit_ceil)956 static void wrl_xfer_credit(wrl_creditt *debit, wrl_creditt debit_floor,
957 wrl_creditt *credit, wrl_creditt credit_ceil)
958 /*
959 * Transfers zero or more credit from "debit" to "credit".
960 * Transfers as much as possible while maintaining
961 * debit >= debit_floor and credit <= credit_ceil.
962 * (If that's violated already, does nothing.)
963 *
964 * Sufficient conditions to avoid overflow, either of:
965 * |every argument| <= 0x3fffffff
966 * |every argument| <= 1E9
967 * |every argument| <= WRL_CREDIT_MAX
968 * (And this condition is preserved.)
969 */
970 {
971 wrl_creditt xfer = MIN( *debit - debit_floor,
972 credit_ceil - *credit );
973 if (xfer > 0) {
974 *debit -= xfer;
975 *credit += xfer;
976 }
977 }
978
wrl_domain_new(struct domain * domain)979 void wrl_domain_new(struct domain *domain)
980 {
981 domain->wrl_credit = 0;
982 wrl_gettime_now(&domain->wrl_timestamp);
983 wrl_ndomains++;
984 /* Steal up to DBURST from the reserve */
985 wrl_xfer_credit(&wrl_reserve, -wrl_config_newdoms_dburst,
986 &domain->wrl_credit, wrl_config_dburst);
987 }
988
wrl_domain_destroy(struct domain * domain)989 void wrl_domain_destroy(struct domain *domain)
990 {
991 wrl_ndomains--;
992 /*
993 * Don't bother recalculating domain's credit - this just
994 * means we don't give the reserve the ending domain's credit
995 * for time elapsed since last update.
996 */
997 wrl_xfer_credit(&domain->wrl_credit, 0,
998 &wrl_reserve, wrl_config_dburst);
999 }
1000
wrl_credit_update(struct domain * domain,struct wrl_timestampt now)1001 void wrl_credit_update(struct domain *domain, struct wrl_timestampt now)
1002 {
1003 /*
1004 * We want to calculate
1005 * credit += (now - timestamp) * RATE / ndoms;
1006 * But we want it to saturate, and to avoid floating point.
1007 * To avoid rounding errors from constantly adding small
1008 * amounts of credit, we only add credit for whole milliseconds.
1009 */
1010 long seconds = now.sec - domain->wrl_timestamp.sec;
1011 long milliseconds = now.msec - domain->wrl_timestamp.msec;
1012 long msec;
1013 int64_t denom, num;
1014 wrl_creditt surplus;
1015
1016 seconds = MIN(seconds, 1000*1000); /* arbitrary, prevents overflow */
1017 msec = seconds * 1000 + milliseconds;
1018
1019 if (msec < 0)
1020 /* shouldn't happen with CLOCK_MONOTONIC */
1021 msec = 0;
1022
1023 /* 32x32 -> 64 cannot overflow */
1024 denom = (int64_t)msec * wrl_config_rate;
1025 num = (int64_t)wrl_ndomains * 1000;
1026 /* denom / num <= 1E6 * wrl_config_rate, so with
1027 reasonable wrl_config_rate, denom / num << 2^64 */
1028
1029 /* at last! */
1030 domain->wrl_credit = MIN( (int64_t)domain->wrl_credit + denom / num,
1031 WRL_CREDIT_MAX );
1032 /* (maybe briefly violating the DBURST cap on wrl_credit) */
1033
1034 /* maybe take from the reserve to make us nonnegative */
1035 wrl_xfer_credit(&wrl_reserve, 0,
1036 &domain->wrl_credit, 0);
1037
1038 /* return any surplus (over DBURST) to the reserve */
1039 surplus = 0;
1040 wrl_xfer_credit(&domain->wrl_credit, wrl_config_dburst,
1041 &surplus, WRL_CREDIT_MAX);
1042 wrl_xfer_credit(&surplus, 0,
1043 &wrl_reserve, wrl_config_gburst);
1044 /* surplus is now implicitly discarded */
1045
1046 domain->wrl_timestamp = now;
1047
1048 trace("wrl: dom %4d %6ld msec %9ld credit %9ld reserve"
1049 " %9ld discard\n",
1050 domain->domid,
1051 msec,
1052 (long)domain->wrl_credit, (long)wrl_reserve,
1053 (long)surplus);
1054 }
1055
wrl_check_timeout(struct domain * domain,struct wrl_timestampt now,int * ptimeout)1056 void wrl_check_timeout(struct domain *domain,
1057 struct wrl_timestampt now,
1058 int *ptimeout)
1059 {
1060 uint64_t num, denom;
1061 int wakeup;
1062
1063 wrl_credit_update(domain, now);
1064
1065 if (domain->wrl_credit >= 0)
1066 /* not blocked */
1067 return;
1068
1069 if (!*ptimeout)
1070 /* already decided on immediate wakeup,
1071 so no need to calculate our timeout */
1072 return;
1073
1074 /* calculate wakeup = now + -credit / (RATE / ndoms); */
1075
1076 /* credit cannot go more -ve than one transaction,
1077 * so the first multiplication cannot overflow even 32-bit */
1078 num = (uint64_t)(-domain->wrl_credit * 1000) * wrl_ndomains;
1079 denom = wrl_config_rate;
1080
1081 wakeup = MIN( num / denom /* uint64_t */, INT_MAX );
1082 if (*ptimeout==-1 || wakeup < *ptimeout)
1083 *ptimeout = wakeup;
1084
1085 trace("wrl: domain %u credit=%ld (reserve=%ld) SLEEPING for %d\n",
1086 domain->domid,
1087 (long)domain->wrl_credit, (long)wrl_reserve,
1088 wakeup);
1089 }
1090
1091 #define WRL_LOG(now, ...) \
1092 (syslog(LOG_WARNING, "write rate limit: " __VA_ARGS__))
1093
wrl_apply_debit_actual(struct domain * domain)1094 void wrl_apply_debit_actual(struct domain *domain)
1095 {
1096 struct wrl_timestampt now;
1097
1098 if (!domain || !domid_is_unprivileged(domain->domid))
1099 /* sockets and privileged domain escape the write rate limit */
1100 return;
1101
1102 wrl_gettime_now(&now);
1103 wrl_credit_update(domain, now);
1104
1105 domain->wrl_credit -= wrl_config_writecost;
1106 trace("wrl: domain %u credit=%ld (reserve=%ld)\n",
1107 domain->domid,
1108 (long)domain->wrl_credit, (long)wrl_reserve);
1109
1110 if (domain->wrl_credit < 0) {
1111 if (!domain->wrl_delay_logged) {
1112 domain->wrl_delay_logged = true;
1113 WRL_LOG(now, "domain %ld is affected",
1114 (long)domain->domid);
1115 } else if (!wrl_log_last_warning) {
1116 WRL_LOG(now, "rate limiting restarts");
1117 }
1118 wrl_log_last_warning = now.sec;
1119 }
1120 }
1121
wrl_log_periodic(struct wrl_timestampt now)1122 void wrl_log_periodic(struct wrl_timestampt now)
1123 {
1124 if (wrl_log_last_warning &&
1125 (now.sec - wrl_log_last_warning) > WRL_LOGEVERY) {
1126 WRL_LOG(now, "not in force recently");
1127 wrl_log_last_warning = 0;
1128 }
1129 }
1130
wrl_apply_debit_direct(struct connection * conn)1131 void wrl_apply_debit_direct(struct connection *conn)
1132 {
1133 if (!conn)
1134 /* some writes are generated internally */
1135 return;
1136
1137 if (conn->transaction)
1138 /* these are accounted for when the transaction ends */
1139 return;
1140
1141 if (!wrl_ntransactions)
1142 /* we don't conflict with anyone */
1143 return;
1144
1145 wrl_apply_debit_actual(conn->domain);
1146 }
1147
wrl_apply_debit_trans_commit(struct connection * conn)1148 void wrl_apply_debit_trans_commit(struct connection *conn)
1149 {
1150 if (wrl_ntransactions <= 1)
1151 /* our own transaction appears in the counter */
1152 return;
1153
1154 wrl_apply_debit_actual(conn->domain);
1155 }
1156
1157 /*
1158 * Local variables:
1159 * mode: C
1160 * c-file-style: "linux"
1161 * indent-tabs-mode: t
1162 * c-basic-offset: 8
1163 * tab-width: 8
1164 * End:
1165 */
1166