1 /*
2     Simple prototype Xen Store Daemon providing simple tree-like database.
3     Copyright (C) 2005 Rusty Russell IBM Corporation
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 #include <inttypes.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <poll.h>
23 #ifndef NO_SOCKETS
24 #include <sys/socket.h>
25 #include <sys/un.h>
26 #endif
27 #include <sys/time.h>
28 #include <time.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <stdbool.h>
32 #include <stdio.h>
33 #include <stdarg.h>
34 #include <stdlib.h>
35 #include <syslog.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <dirent.h>
39 #include <getopt.h>
40 #include <signal.h>
41 #include <assert.h>
42 #include <setjmp.h>
43 
44 #include <xenevtchn.h>
45 
46 #include "utils.h"
47 #include "list.h"
48 #include "talloc.h"
49 #include "xenstore_lib.h"
50 #include "xenstored_core.h"
51 #include "xenstored_watch.h"
52 #include "xenstored_transaction.h"
53 #include "xenstored_domain.h"
54 #include "xenstored_control.h"
55 #include "tdb.h"
56 
57 #ifndef NO_SOCKETS
58 #if defined(HAVE_SYSTEMD)
59 #define XEN_SYSTEMD_ENABLED 1
60 #endif
61 #endif
62 
63 #if defined(XEN_SYSTEMD_ENABLED)
64 #include <systemd/sd-daemon.h>
65 #endif
66 
67 extern xenevtchn_handle *xce_handle; /* in xenstored_domain.c */
68 static int xce_pollfd_idx = -1;
69 static struct pollfd *fds;
70 static unsigned int current_array_size;
71 static unsigned int nr_fds;
72 
73 static int sock = -1;
74 static int ro_sock = -1;
75 
76 #define ROUNDUP(_x, _w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
77 
78 static bool verbose = false;
79 LIST_HEAD(connections);
80 int tracefd = -1;
81 static bool recovery = true;
82 static int reopen_log_pipe[2];
83 static int reopen_log_pipe0_pollfd_idx = -1;
84 char *tracefile = NULL;
85 TDB_CONTEXT *tdb_ctx = NULL;
86 
87 static const char *sockmsg_string(enum xsd_sockmsg_type type);
88 
89 #define log(...)							\
90 	do {								\
91 		char *s = talloc_asprintf(NULL, __VA_ARGS__);		\
92 		if (s) {						\
93 			trace("%s\n", s);				\
94 			syslog(LOG_ERR, "%s\n",  s);			\
95 			talloc_free(s);					\
96 		} else {						\
97 			trace("talloc failure during logging\n");	\
98 			syslog(LOG_ERR, "talloc failure during logging\n"); \
99 		}							\
100 	} while (0)
101 
102 
103 int quota_nb_entry_per_domain = 1000;
104 int quota_nb_watch_per_domain = 128;
105 int quota_max_entry_size = 2048; /* 2K */
106 int quota_max_transaction = 10;
107 int quota_nb_perms_per_node = 5;
108 
trace(const char * fmt,...)109 void trace(const char *fmt, ...)
110 {
111 	va_list arglist;
112 	char *str;
113 	char sbuf[1024];
114 	int ret, dummy;
115 
116 	if (tracefd < 0)
117 		return;
118 
119 	/* try to use a static buffer */
120 	va_start(arglist, fmt);
121 	ret = vsnprintf(sbuf, 1024, fmt, arglist);
122 	va_end(arglist);
123 
124 	if (ret <= 1024) {
125 		dummy = write(tracefd, sbuf, ret);
126 		return;
127 	}
128 
129 	/* fail back to dynamic allocation */
130 	va_start(arglist, fmt);
131 	str = talloc_vasprintf(NULL, fmt, arglist);
132 	va_end(arglist);
133 	if (str) {
134 		dummy = write(tracefd, str, strlen(str));
135 		talloc_free(str);
136 	}
137 }
138 
trace_io(const struct connection * conn,const struct buffered_data * data,int out)139 static void trace_io(const struct connection *conn,
140 		     const struct buffered_data *data,
141 		     int out)
142 {
143 	unsigned int i;
144 	time_t now;
145 	struct tm *tm;
146 
147 #ifdef HAVE_DTRACE
148 	dtrace_io(conn, data, out);
149 #endif
150 
151 	if (tracefd < 0)
152 		return;
153 
154 	now = time(NULL);
155 	tm = localtime(&now);
156 
157 	trace("%s %p %04d%02d%02d %02d:%02d:%02d %s (",
158 	      out ? "OUT" : "IN", conn,
159 	      tm->tm_year + 1900, tm->tm_mon + 1,
160 	      tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
161 	      sockmsg_string(data->hdr.msg.type));
162 
163 	for (i = 0; i < data->hdr.msg.len; i++)
164 		trace("%c", (data->buffer[i] != '\0') ? data->buffer[i] : ' ');
165 	trace(")\n");
166 }
167 
trace_create(const void * data,const char * type)168 void trace_create(const void *data, const char *type)
169 {
170 	trace("CREATE %s %p\n", type, data);
171 }
172 
trace_destroy(const void * data,const char * type)173 void trace_destroy(const void *data, const char *type)
174 {
175 	trace("DESTROY %s %p\n", type, data);
176 }
177 
178 /**
179  * Signal handler for SIGHUP, which requests that the trace log is reopened
180  * (in the main loop).  A single byte is written to reopen_log_pipe, to awaken
181  * the poll() in the main loop.
182  */
trigger_reopen_log(int signal)183 static void trigger_reopen_log(int signal __attribute__((unused)))
184 {
185 	char c = 'A';
186 	int dummy;
187 	dummy = write(reopen_log_pipe[1], &c, 1);
188 }
189 
close_log(void)190 void close_log(void)
191 {
192 	if (tracefd >= 0)
193 		close(tracefd);
194 	tracefd = -1;
195 }
196 
reopen_log(void)197 void reopen_log(void)
198 {
199 	if (tracefile) {
200 		close_log();
201 
202 		tracefd = open(tracefile, O_WRONLY|O_CREAT|O_APPEND, 0600);
203 
204 		if (tracefd < 0)
205 			perror("Could not open tracefile");
206 		else
207 			trace("\n***\n");
208 	}
209 }
210 
write_messages(struct connection * conn)211 static bool write_messages(struct connection *conn)
212 {
213 	int ret;
214 	struct buffered_data *out;
215 
216 	out = list_top(&conn->out_list, struct buffered_data, list);
217 	if (out == NULL)
218 		return true;
219 
220 	if (out->inhdr) {
221 		if (verbose)
222 			xprintf("Writing msg %s (%.*s) out to %p\n",
223 				sockmsg_string(out->hdr.msg.type),
224 				out->hdr.msg.len,
225 				out->buffer, conn);
226 		ret = conn->write(conn, out->hdr.raw + out->used,
227 				  sizeof(out->hdr) - out->used);
228 		if (ret < 0)
229 			return false;
230 
231 		out->used += ret;
232 		if (out->used < sizeof(out->hdr))
233 			return true;
234 
235 		out->inhdr = false;
236 		out->used = 0;
237 
238 		/* Second write might block if non-zero. */
239 		if (out->hdr.msg.len && !conn->domain)
240 			return true;
241 	}
242 
243 	ret = conn->write(conn, out->buffer + out->used,
244 			  out->hdr.msg.len - out->used);
245 	if (ret < 0)
246 		return false;
247 
248 	out->used += ret;
249 	if (out->used != out->hdr.msg.len)
250 		return true;
251 
252 	trace_io(conn, out, 1);
253 
254 	list_del(&out->list);
255 	talloc_free(out);
256 
257 	return true;
258 }
259 
destroy_conn(void * _conn)260 static int destroy_conn(void *_conn)
261 {
262 	struct connection *conn = _conn;
263 
264 	/* Flush outgoing if possible, but don't block. */
265 	if (!conn->domain) {
266 		struct pollfd pfd;
267 		pfd.fd = conn->fd;
268 		pfd.events = POLLOUT;
269 
270 		while (!list_empty(&conn->out_list)
271 		       && poll(&pfd, 1, 0) == 1)
272 			if (!write_messages(conn))
273 				break;
274 		close(conn->fd);
275 	}
276         if (conn->target)
277                 talloc_unlink(conn, conn->target);
278 	list_del(&conn->list);
279 	trace_destroy(conn, "connection");
280 	return 0;
281 }
282 
283 /* This function returns index inside the array if succeed, -1 if fail */
set_fd(int fd,short events)284 static int set_fd(int fd, short events)
285 {
286 	int ret;
287 	if (current_array_size < nr_fds + 1) {
288 		struct pollfd *new_fds = NULL;
289 		unsigned long newsize;
290 
291 		/* Round up to 2^8 boundary, in practice this just
292 		 * make newsize larger than current_array_size.
293 		 */
294 		newsize = ROUNDUP(nr_fds + 1, 8);
295 
296 		new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
297 		if (!new_fds)
298 			goto fail;
299 		fds = new_fds;
300 
301 		memset(&fds[0] + current_array_size, 0,
302 		       sizeof(struct pollfd ) * (newsize-current_array_size));
303 		current_array_size = newsize;
304 	}
305 
306 	fds[nr_fds].fd = fd;
307 	fds[nr_fds].events = events;
308 	ret = nr_fds;
309 	nr_fds++;
310 
311 	return ret;
312 fail:
313 	syslog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
314 	return -1;
315 }
316 
initialize_fds(int * p_sock_pollfd_idx,int * p_ro_sock_pollfd_idx,int * ptimeout)317 static void initialize_fds(int *p_sock_pollfd_idx, int *p_ro_sock_pollfd_idx,
318 			   int *ptimeout)
319 {
320 	struct connection *conn;
321 	struct wrl_timestampt now;
322 
323 	if (fds)
324 		memset(fds, 0, sizeof(struct pollfd) * current_array_size);
325 	nr_fds = 0;
326 
327 	*ptimeout = -1;
328 
329 	if (sock != -1)
330 		*p_sock_pollfd_idx = set_fd(sock, POLLIN|POLLPRI);
331 	if (ro_sock != -1)
332 		*p_ro_sock_pollfd_idx = set_fd(ro_sock, POLLIN|POLLPRI);
333 	if (reopen_log_pipe[0] != -1)
334 		reopen_log_pipe0_pollfd_idx =
335 			set_fd(reopen_log_pipe[0], POLLIN|POLLPRI);
336 
337 	if (xce_handle != NULL)
338 		xce_pollfd_idx = set_fd(xenevtchn_fd(xce_handle),
339 					POLLIN|POLLPRI);
340 
341 	wrl_gettime_now(&now);
342 	wrl_log_periodic(now);
343 
344 	list_for_each_entry(conn, &connections, list) {
345 		if (conn->domain) {
346 			wrl_check_timeout(conn->domain, now, ptimeout);
347 			if (domain_can_read(conn) ||
348 			    (domain_can_write(conn) &&
349 			     !list_empty(&conn->out_list)))
350 				*ptimeout = 0;
351 		} else {
352 			short events = POLLIN|POLLPRI;
353 			if (!list_empty(&conn->out_list))
354 				events |= POLLOUT;
355 			conn->pollfd_idx = set_fd(conn->fd, events);
356 		}
357 	}
358 }
359 
360 /*
361  * If it fails, returns NULL and sets errno.
362  * Temporary memory allocations will be done with ctx.
363  */
read_node(struct connection * conn,const void * ctx,const char * name)364 struct node *read_node(struct connection *conn, const void *ctx,
365 		       const char *name)
366 {
367 	TDB_DATA key, data;
368 	struct xs_tdb_record_hdr *hdr;
369 	struct node *node;
370 
371 	node = talloc(ctx, struct node);
372 	if (!node) {
373 		errno = ENOMEM;
374 		return NULL;
375 	}
376 	node->name = talloc_strdup(node, name);
377 	if (!node->name) {
378 		talloc_free(node);
379 		errno = ENOMEM;
380 		return NULL;
381 	}
382 
383 	if (transaction_prepend(conn, name, &key))
384 		return NULL;
385 
386 	data = tdb_fetch(tdb_ctx, key);
387 
388 	if (data.dptr == NULL) {
389 		if (tdb_error(tdb_ctx) == TDB_ERR_NOEXIST) {
390 			node->generation = NO_GENERATION;
391 			access_node(conn, node, NODE_ACCESS_READ, NULL);
392 			errno = ENOENT;
393 		} else {
394 			log("TDB error on read: %s", tdb_errorstr(tdb_ctx));
395 			errno = EIO;
396 		}
397 		talloc_free(node);
398 		return NULL;
399 	}
400 
401 	node->parent = NULL;
402 	talloc_steal(node, data.dptr);
403 
404 	/* Datalen, childlen, number of permissions */
405 	hdr = (void *)data.dptr;
406 	node->generation = hdr->generation;
407 	node->perms.num = hdr->num_perms;
408 	node->datalen = hdr->datalen;
409 	node->childlen = hdr->childlen;
410 
411 	/* Permissions are struct xs_permissions. */
412 	node->perms.p = hdr->perms;
413 	if (domain_adjust_node_perms(node)) {
414 		talloc_free(node);
415 		return NULL;
416 	}
417 
418 	/* Data is binary blob (usually ascii, no nul). */
419 	node->data = node->perms.p + hdr->num_perms;
420 	/* Children is strings, nul separated. */
421 	node->children = node->data + node->datalen;
422 
423 	access_node(conn, node, NODE_ACCESS_READ, NULL);
424 
425 	return node;
426 }
427 
write_node_raw(struct connection * conn,TDB_DATA * key,struct node * node,bool no_quota_check)428 int write_node_raw(struct connection *conn, TDB_DATA *key, struct node *node,
429 		   bool no_quota_check)
430 {
431 	TDB_DATA data;
432 	void *p;
433 	struct xs_tdb_record_hdr *hdr;
434 
435 	if (domain_adjust_node_perms(node))
436 		return errno;
437 
438 	data.dsize = sizeof(*hdr)
439 		+ node->perms.num * sizeof(node->perms.p[0])
440 		+ node->datalen + node->childlen;
441 
442 	if (!no_quota_check && domain_is_unprivileged(conn) &&
443 	    data.dsize >= quota_max_entry_size) {
444 		errno = ENOSPC;
445 		return errno;
446 	}
447 
448 	data.dptr = talloc_size(node, data.dsize);
449 	hdr = (void *)data.dptr;
450 	hdr->generation = node->generation;
451 	hdr->num_perms = node->perms.num;
452 	hdr->datalen = node->datalen;
453 	hdr->childlen = node->childlen;
454 
455 	memcpy(hdr->perms, node->perms.p,
456 	       node->perms.num * sizeof(*node->perms.p));
457 	p = hdr->perms + node->perms.num;
458 	memcpy(p, node->data, node->datalen);
459 	p += node->datalen;
460 	memcpy(p, node->children, node->childlen);
461 
462 	/* TDB should set errno, but doesn't even set ecode AFAICT. */
463 	if (tdb_store(tdb_ctx, *key, data, TDB_REPLACE) != 0) {
464 		corrupt(conn, "Write of %s failed", key->dptr);
465 		errno = EIO;
466 		return errno;
467 	}
468 	return 0;
469 }
470 
write_node(struct connection * conn,struct node * node,bool no_quota_check)471 static int write_node(struct connection *conn, struct node *node,
472 		      bool no_quota_check)
473 {
474 	TDB_DATA key;
475 
476 	if (access_node(conn, node, NODE_ACCESS_WRITE, &key))
477 		return errno;
478 
479 	return write_node_raw(conn, &key, node, no_quota_check);
480 }
481 
perm_for_conn(struct connection * conn,const struct node_perms * perms)482 enum xs_perm_type perm_for_conn(struct connection *conn,
483 				const struct node_perms *perms)
484 {
485 	unsigned int i;
486 	enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
487 
488 	if (!conn->can_write)
489 		mask &= ~XS_PERM_WRITE;
490 
491 	/* Owners and tools get it all... */
492 	if (!domain_is_unprivileged(conn) || perms->p[0].id == conn->id
493                 || (conn->target && perms->p[0].id == conn->target->id))
494 		return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
495 
496 	for (i = 1; i < perms->num; i++)
497 		if (!(perms->p[i].perms & XS_PERM_IGNORE) &&
498 		    (perms->p[i].id == conn->id ||
499 		     (conn->target && perms->p[i].id == conn->target->id)))
500 			return perms->p[i].perms & mask;
501 
502 	return perms->p[0].perms & mask;
503 }
504 
505 /*
506  * Get name of node parent.
507  * Temporary memory allocations are done with ctx.
508  */
get_parent(const void * ctx,const char * node)509 char *get_parent(const void *ctx, const char *node)
510 {
511 	char *parent;
512 	char *slash = strrchr(node + 1, '/');
513 
514 	parent = slash ? talloc_asprintf(ctx, "%.*s", (int)(slash - node), node)
515 		       : talloc_strdup(ctx, "/");
516 	if (!parent)
517 		errno = ENOMEM;
518 
519 	return parent;
520 }
521 
522 /*
523  * What do parents say?
524  * Temporary memory allocations are done with ctx.
525  */
ask_parents(struct connection * conn,const void * ctx,const char * name,enum xs_perm_type * perm)526 static int ask_parents(struct connection *conn, const void *ctx,
527 		       const char *name, enum xs_perm_type *perm)
528 {
529 	struct node *node;
530 
531 	do {
532 		name = get_parent(ctx, name);
533 		if (!name)
534 			return errno;
535 		node = read_node(conn, ctx, name);
536 		if (node)
537 			break;
538 		if (errno == ENOMEM)
539 			return errno;
540 	} while (!streq(name, "/"));
541 
542 	/* No permission at root?  We're in trouble. */
543 	if (!node) {
544 		corrupt(conn, "No permissions file at root");
545 		*perm = XS_PERM_NONE;
546 		return 0;
547 	}
548 
549 	*perm = perm_for_conn(conn, &node->perms);
550 	return 0;
551 }
552 
553 /*
554  * We have a weird permissions system.  You can allow someone into a
555  * specific node without allowing it in the parents.  If it's going to
556  * fail, however, we don't want the errno to indicate any information
557  * about the node.
558  * Temporary memory allocations are done with ctx.
559  */
errno_from_parents(struct connection * conn,const void * ctx,const char * node,int errnum,enum xs_perm_type perm)560 static int errno_from_parents(struct connection *conn, const void *ctx,
561 			      const char *node, int errnum,
562 			      enum xs_perm_type perm)
563 {
564 	enum xs_perm_type parent_perm = XS_PERM_NONE;
565 
566 	/* We always tell them about memory failures. */
567 	if (errnum == ENOMEM)
568 		return errnum;
569 
570 	if (ask_parents(conn, ctx, node, &parent_perm))
571 		return errno;
572 	if (parent_perm & perm)
573 		return errnum;
574 	return EACCES;
575 }
576 
577 /*
578  * If it fails, returns NULL and sets errno.
579  * Temporary memory allocations are done with ctx.
580  */
get_node(struct connection * conn,const void * ctx,const char * name,enum xs_perm_type perm)581 static struct node *get_node(struct connection *conn,
582 			     const void *ctx,
583 			     const char *name,
584 			     enum xs_perm_type perm)
585 {
586 	struct node *node;
587 
588 	if (!name || !is_valid_nodename(name)) {
589 		errno = EINVAL;
590 		return NULL;
591 	}
592 	node = read_node(conn, ctx, name);
593 	/* If we don't have permission, we don't have node. */
594 	if (node) {
595 		if ((perm_for_conn(conn, &node->perms) & perm) != perm) {
596 			errno = EACCES;
597 			node = NULL;
598 		}
599 	}
600 	/* Clean up errno if they weren't supposed to know. */
601 	if (!node && errno != ENOMEM)
602 		errno = errno_from_parents(conn, ctx, name, errno, perm);
603 	return node;
604 }
605 
new_buffer(void * ctx)606 static struct buffered_data *new_buffer(void *ctx)
607 {
608 	struct buffered_data *data;
609 
610 	data = talloc_zero(ctx, struct buffered_data);
611 	if (data == NULL)
612 		return NULL;
613 
614 	data->inhdr = true;
615 	return data;
616 }
617 
618 /* Return length of string (including nul) at this offset.
619  * If there is no nul, returns 0 for failure.
620  */
get_string(const struct buffered_data * data,unsigned int offset)621 static unsigned int get_string(const struct buffered_data *data,
622 			       unsigned int offset)
623 {
624 	const char *nul;
625 
626 	if (offset >= data->used)
627 		return 0;
628 
629 	nul = memchr(data->buffer + offset, 0, data->used - offset);
630 	if (!nul)
631 		return 0;
632 
633 	return nul - (data->buffer + offset) + 1;
634 }
635 
636 /* Break input into vectors, return the number, fill in up to num of them.
637  * Always returns the actual number of nuls in the input.  Stores the
638  * positions of the starts of the nul-terminated strings in vec.
639  * Callers who use this and then rely only on vec[] will
640  * ignore any data after the final nul.
641  */
get_strings(struct buffered_data * data,char * vec[],unsigned int num)642 unsigned int get_strings(struct buffered_data *data,
643 			 char *vec[], unsigned int num)
644 {
645 	unsigned int off, i, len;
646 
647 	off = i = 0;
648 	while ((len = get_string(data, off)) != 0) {
649 		if (i < num)
650 			vec[i] = data->buffer + off;
651 		i++;
652 		off += len;
653 	}
654 	return i;
655 }
656 
send_error(struct connection * conn,int error)657 static void send_error(struct connection *conn, int error)
658 {
659 	unsigned int i;
660 
661 	for (i = 0; error != xsd_errors[i].errnum; i++) {
662 		if (i == ARRAY_SIZE(xsd_errors) - 1) {
663 			eprintf("xenstored: error %i untranslatable", error);
664 			i = 0; /* EINVAL */
665 			break;
666 		}
667 	}
668 	send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
669 			  strlen(xsd_errors[i].errstring) + 1);
670 }
671 
send_reply(struct connection * conn,enum xsd_sockmsg_type type,const void * data,unsigned int len)672 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
673 		const void *data, unsigned int len)
674 {
675 	struct buffered_data *bdata;
676 
677 	if ( len > XENSTORE_PAYLOAD_MAX ) {
678 		send_error(conn, E2BIG);
679 		return;
680 	}
681 
682 	/* Replies reuse the request buffer, events need a new one. */
683 	if (type != XS_WATCH_EVENT) {
684 		bdata = conn->in;
685 		/* Drop asynchronous responses, e.g. errors for watch events. */
686 		if (!bdata)
687 			return;
688 		bdata->inhdr = true;
689 		bdata->used = 0;
690 		conn->in = NULL;
691 	} else {
692 		/* Message is a child of the connection for auto-cleanup. */
693 		bdata = new_buffer(conn);
694 
695 		/*
696 		 * Allocation failure here is unfortunate: we have no way to
697 		 * tell anybody about it.
698 		 */
699 		if (!bdata)
700 			return;
701 	}
702 	if (len <= DEFAULT_BUFFER_SIZE)
703 		bdata->buffer = bdata->default_buffer;
704 	else
705 		bdata->buffer = talloc_array(bdata, char, len);
706 	if (!bdata->buffer) {
707 		if (type == XS_WATCH_EVENT) {
708 			/* Same as above: no way to tell someone. */
709 			talloc_free(bdata);
710 			return;
711 		}
712 		/* re-establish request buffer for sending ENOMEM. */
713 		conn->in = bdata;
714 		send_error(conn, ENOMEM);
715 		return;
716 	}
717 
718 	/* Update relevant header fields and fill in the message body. */
719 	bdata->hdr.msg.type = type;
720 	bdata->hdr.msg.len = len;
721 	memcpy(bdata->buffer, data, len);
722 
723 	/* Queue for later transmission. */
724 	list_add_tail(&bdata->list, &conn->out_list);
725 
726 	return;
727 }
728 
729 /* Some routines (write, mkdir, etc) just need a non-error return */
send_ack(struct connection * conn,enum xsd_sockmsg_type type)730 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
731 {
732 	send_reply(conn, type, "OK", sizeof("OK"));
733 }
734 
valid_chars(const char * node)735 static bool valid_chars(const char *node)
736 {
737 	/* Nodes can have lots of crap. */
738 	return (strspn(node,
739 		       "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
740 		       "abcdefghijklmnopqrstuvwxyz"
741 		       "0123456789-/_@") == strlen(node));
742 }
743 
is_valid_nodename(const char * node)744 bool is_valid_nodename(const char *node)
745 {
746 	/* Must start in /. */
747 	if (!strstarts(node, "/"))
748 		return false;
749 
750 	/* Cannot end in / (unless it's just "/"). */
751 	if (strends(node, "/") && !streq(node, "/"))
752 		return false;
753 
754 	/* No double //. */
755 	if (strstr(node, "//"))
756 		return false;
757 
758 	if (strlen(node) > XENSTORE_ABS_PATH_MAX)
759 		return false;
760 
761 	return valid_chars(node);
762 }
763 
764 /* We expect one arg in the input: return NULL otherwise.
765  * The payload must contain exactly one nul, at the end.
766  */
onearg(struct buffered_data * in)767 const char *onearg(struct buffered_data *in)
768 {
769 	if (!in->used || get_string(in, 0) != in->used)
770 		return NULL;
771 	return in->buffer;
772 }
773 
perms_to_strings(const void * ctx,const struct node_perms * perms,unsigned int * len)774 static char *perms_to_strings(const void *ctx, const struct node_perms *perms,
775 			      unsigned int *len)
776 {
777 	unsigned int i;
778 	char *strings = NULL;
779 	char buffer[MAX_STRLEN(unsigned int) + 1];
780 
781 	for (*len = 0, i = 0; i < perms->num; i++) {
782 		if (!xs_perm_to_string(&perms->p[i], buffer, sizeof(buffer)))
783 			return NULL;
784 
785 		strings = talloc_realloc(ctx, strings, char,
786 					 *len + strlen(buffer) + 1);
787 		if (!strings)
788 			return NULL;
789 		strcpy(strings + *len, buffer);
790 		*len += strlen(buffer) + 1;
791 	}
792 	return strings;
793 }
794 
canonicalize(struct connection * conn,const void * ctx,const char * node)795 char *canonicalize(struct connection *conn, const void *ctx, const char *node)
796 {
797 	const char *prefix;
798 
799 	if (!node || (node[0] == '/') || (node[0] == '@'))
800 		return (char *)node;
801 	prefix = get_implicit_path(conn);
802 	if (prefix)
803 		return talloc_asprintf(ctx, "%s/%s", prefix, node);
804 	return (char *)node;
805 }
806 
get_node_canonicalized(struct connection * conn,const void * ctx,const char * name,char ** canonical_name,enum xs_perm_type perm)807 static struct node *get_node_canonicalized(struct connection *conn,
808 					   const void *ctx,
809 					   const char *name,
810 					   char **canonical_name,
811 					   enum xs_perm_type perm)
812 {
813 	char *tmp_name;
814 
815 	if (!canonical_name)
816 		canonical_name = &tmp_name;
817 	*canonical_name = canonicalize(conn, ctx, name);
818 	return get_node(conn, ctx, *canonical_name, perm);
819 }
820 
send_directory(struct connection * conn,struct buffered_data * in)821 static int send_directory(struct connection *conn, struct buffered_data *in)
822 {
823 	struct node *node;
824 
825 	node = get_node_canonicalized(conn, in, onearg(in), NULL, XS_PERM_READ);
826 	if (!node)
827 		return errno;
828 
829 	send_reply(conn, XS_DIRECTORY, node->children, node->childlen);
830 
831 	return 0;
832 }
833 
send_directory_part(struct connection * conn,struct buffered_data * in)834 static int send_directory_part(struct connection *conn,
835 			       struct buffered_data *in)
836 {
837 	unsigned int off, len, maxlen, genlen;
838 	char *child, *data;
839 	struct node *node;
840 	char gen[24];
841 
842 	if (xs_count_strings(in->buffer, in->used) != 2)
843 		return EINVAL;
844 
845 	/* First arg is node name. */
846 	node = get_node_canonicalized(conn, in, in->buffer, NULL, XS_PERM_READ);
847 	if (!node)
848 		return errno;
849 
850 	/* Second arg is childlist offset. */
851 	off = atoi(in->buffer + strlen(in->buffer) + 1);
852 
853 	genlen = snprintf(gen, sizeof(gen), "%"PRIu64, node->generation) + 1;
854 
855 	/* Offset behind list: just return a list with an empty string. */
856 	if (off >= node->childlen) {
857 		gen[genlen] = 0;
858 		send_reply(conn, XS_DIRECTORY_PART, gen, genlen + 1);
859 		return 0;
860 	}
861 
862 	len = 0;
863 	maxlen = XENSTORE_PAYLOAD_MAX - genlen - 1;
864 	child = node->children + off;
865 
866 	while (len + strlen(child) < maxlen) {
867 		len += strlen(child) + 1;
868 		child += strlen(child) + 1;
869 		if (off + len == node->childlen)
870 			break;
871 	}
872 
873 	data = talloc_array(in, char, genlen + len + 1);
874 	if (!data)
875 		return ENOMEM;
876 
877 	memcpy(data, gen, genlen);
878 	memcpy(data + genlen, node->children + off, len);
879 	if (off + len == node->childlen) {
880 		data[genlen + len] = 0;
881 		len++;
882 	}
883 
884 	send_reply(conn, XS_DIRECTORY_PART, data, genlen + len);
885 
886 	return 0;
887 }
888 
do_read(struct connection * conn,struct buffered_data * in)889 static int do_read(struct connection *conn, struct buffered_data *in)
890 {
891 	struct node *node;
892 
893 	node = get_node_canonicalized(conn, in, onearg(in), NULL, XS_PERM_READ);
894 	if (!node)
895 		return errno;
896 
897 	send_reply(conn, XS_READ, node->data, node->datalen);
898 
899 	return 0;
900 }
901 
delete_node_single(struct connection * conn,struct node * node)902 static void delete_node_single(struct connection *conn, struct node *node)
903 {
904 	TDB_DATA key;
905 
906 	if (access_node(conn, node, NODE_ACCESS_DELETE, &key))
907 		return;
908 
909 	if (tdb_delete(tdb_ctx, key) != 0) {
910 		corrupt(conn, "Could not delete '%s'", node->name);
911 		return;
912 	}
913 
914 	domain_entry_dec(conn, node);
915 }
916 
917 /* Must not be / */
basename(const char * name)918 static char *basename(const char *name)
919 {
920 	return strrchr(name, '/') + 1;
921 }
922 
construct_node(struct connection * conn,const void * ctx,const char * name)923 static struct node *construct_node(struct connection *conn, const void *ctx,
924 				   const char *name)
925 {
926 	const char *base;
927 	unsigned int baselen;
928 	struct node *parent, *node;
929 	char *children, *parentname = get_parent(ctx, name);
930 
931 	if (!parentname)
932 		return NULL;
933 
934 	/* If parent doesn't exist, create it. */
935 	parent = read_node(conn, parentname, parentname);
936 	if (!parent)
937 		parent = construct_node(conn, ctx, parentname);
938 	if (!parent)
939 		return NULL;
940 
941 	/* Add child to parent. */
942 	base = basename(name);
943 	baselen = strlen(base) + 1;
944 	children = talloc_array(ctx, char, parent->childlen + baselen);
945 	if (!children)
946 		goto nomem;
947 	memcpy(children, parent->children, parent->childlen);
948 	memcpy(children + parent->childlen, base, baselen);
949 	parent->children = children;
950 	parent->childlen += baselen;
951 
952 	/* Allocate node */
953 	node = talloc(ctx, struct node);
954 	if (!node)
955 		goto nomem;
956 	node->name = talloc_strdup(node, name);
957 	if (!node->name)
958 		goto nomem;
959 
960 	/* Inherit permissions, except unprivileged domains own what they create */
961 	node->perms.num = parent->perms.num;
962 	node->perms.p = talloc_memdup(node, parent->perms.p,
963 				      node->perms.num * sizeof(*node->perms.p));
964 	if (!node->perms.p)
965 		goto nomem;
966 	if (domain_is_unprivileged(conn))
967 		node->perms.p[0].id = conn->id;
968 
969 	/* No children, no data */
970 	node->children = node->data = NULL;
971 	node->childlen = node->datalen = 0;
972 	node->parent = parent;
973 	return node;
974 
975 nomem:
976 	errno = ENOMEM;
977 	return NULL;
978 }
979 
destroy_node(void * _node)980 static int destroy_node(void *_node)
981 {
982 	struct node *node = _node;
983 	TDB_DATA key;
984 
985 	if (streq(node->name, "/"))
986 		corrupt(NULL, "Destroying root node!");
987 
988 	key.dptr = (void *)node->name;
989 	key.dsize = strlen(node->name);
990 
991 	tdb_delete(tdb_ctx, key);
992 
993 	domain_entry_dec(talloc_parent(node), node);
994 
995 	return 0;
996 }
997 
create_node(struct connection * conn,const void * ctx,const char * name,void * data,unsigned int datalen)998 static struct node *create_node(struct connection *conn, const void *ctx,
999 				const char *name,
1000 				void *data, unsigned int datalen)
1001 {
1002 	struct node *node, *i;
1003 
1004 	node = construct_node(conn, ctx, name);
1005 	if (!node)
1006 		return NULL;
1007 
1008 	node->data = data;
1009 	node->datalen = datalen;
1010 
1011 	/*
1012 	 * We write out the nodes bottom up.
1013 	 * All new created nodes will have i->parent set, while the final
1014 	 * node will be already existing and won't have i->parent set.
1015 	 * New nodes are subject to quota handling.
1016 	 * Initially set a destructor for all new nodes removing them from
1017 	 * TDB again and undoing quota accounting for the case of an error
1018 	 * during the write loop.
1019 	 */
1020 	for (i = node; i; i = i->parent) {
1021 		/* i->parent is set for each new node, so check quota. */
1022 		if (i->parent &&
1023 		    domain_entry(conn) >= quota_nb_entry_per_domain) {
1024 			errno = ENOSPC;
1025 			return NULL;
1026 		}
1027 		if (write_node(conn, i, false))
1028 			return NULL;
1029 
1030 		/* Account for new node, set destructor for error case. */
1031 		if (i->parent) {
1032 			domain_entry_inc(conn, i);
1033 			talloc_set_destructor(i, destroy_node);
1034 		}
1035 	}
1036 
1037 	/* OK, now remove destructors so they stay around */
1038 	for (i = node; i->parent; i = i->parent)
1039 		talloc_set_destructor(i, NULL);
1040 	return node;
1041 }
1042 
1043 /* path, data... */
do_write(struct connection * conn,struct buffered_data * in)1044 static int do_write(struct connection *conn, struct buffered_data *in)
1045 {
1046 	unsigned int offset, datalen;
1047 	struct node *node;
1048 	char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
1049 	char *name;
1050 
1051 	/* Extra "strings" can be created by binary data. */
1052 	if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec))
1053 		return EINVAL;
1054 
1055 	offset = strlen(vec[0]) + 1;
1056 	datalen = in->used - offset;
1057 
1058 	node = get_node_canonicalized(conn, in, vec[0], &name, XS_PERM_WRITE);
1059 	if (!node) {
1060 		/* No permissions, invalid input? */
1061 		if (errno != ENOENT)
1062 			return errno;
1063 		node = create_node(conn, in, name, in->buffer + offset,
1064 				   datalen);
1065 		if (!node)
1066 			return errno;
1067 	} else {
1068 		node->data = in->buffer + offset;
1069 		node->datalen = datalen;
1070 		if (write_node(conn, node, false))
1071 			return errno;
1072 	}
1073 
1074 	fire_watches(conn, in, name, node, false, NULL);
1075 	send_ack(conn, XS_WRITE);
1076 
1077 	return 0;
1078 }
1079 
do_mkdir(struct connection * conn,struct buffered_data * in)1080 static int do_mkdir(struct connection *conn, struct buffered_data *in)
1081 {
1082 	struct node *node;
1083 	char *name;
1084 
1085 	node = get_node_canonicalized(conn, in, onearg(in), &name,
1086 				      XS_PERM_WRITE);
1087 
1088 	/* If it already exists, fine. */
1089 	if (!node) {
1090 		/* No permissions? */
1091 		if (errno != ENOENT)
1092 			return errno;
1093 		node = create_node(conn, in, name, NULL, 0);
1094 		if (!node)
1095 			return errno;
1096 		fire_watches(conn, in, name, node, false, NULL);
1097 	}
1098 	send_ack(conn, XS_MKDIR);
1099 
1100 	return 0;
1101 }
1102 
1103 /* Delete memory using memmove. */
memdel(void * mem,unsigned off,unsigned len,unsigned total)1104 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
1105 {
1106 	memmove(mem + off, mem + off + len, total - off - len);
1107 }
1108 
remove_child_entry(struct connection * conn,struct node * node,size_t offset)1109 static void remove_child_entry(struct connection *conn, struct node *node,
1110 			       size_t offset)
1111 {
1112 	size_t childlen = strlen(node->children + offset);
1113 
1114 	memdel(node->children, offset, childlen + 1, node->childlen);
1115 	node->childlen -= childlen + 1;
1116 	if (write_node(conn, node, true))
1117 		corrupt(conn, "Can't update parent node '%s'", node->name);
1118 }
1119 
delete_child(struct connection * conn,struct node * node,const char * childname)1120 static void delete_child(struct connection *conn,
1121 			 struct node *node, const char *childname)
1122 {
1123 	unsigned int i;
1124 
1125 	for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
1126 		if (streq(node->children+i, childname)) {
1127 			remove_child_entry(conn, node, i);
1128 			return;
1129 		}
1130 	}
1131 	corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
1132 }
1133 
delete_node(struct connection * conn,const void * ctx,struct node * parent,struct node * node)1134 static int delete_node(struct connection *conn, const void *ctx,
1135 		       struct node *parent, struct node *node)
1136 {
1137 	char *name;
1138 
1139 	/* Delete children. */
1140 	while (node->childlen) {
1141 		struct node *child;
1142 
1143 		name = talloc_asprintf(node, "%s/%s", node->name,
1144 				       node->children);
1145 		child = name ? read_node(conn, node, name) : NULL;
1146 		if (child) {
1147 			if (delete_node(conn, ctx, node, child))
1148 				return errno;
1149 		} else {
1150 			trace("delete_node: Error deleting child '%s/%s'!\n",
1151 			      node->name, node->children);
1152 			/* Quit deleting. */
1153 			errno = ENOMEM;
1154 			return errno;
1155 		}
1156 		talloc_free(name);
1157 	}
1158 
1159 	fire_watches(conn, ctx, node->name, node, true, NULL);
1160 	delete_node_single(conn, node);
1161 	delete_child(conn, parent, basename(node->name));
1162 	talloc_free(node);
1163 
1164 	return 0;
1165 }
1166 
_rm(struct connection * conn,const void * ctx,struct node * node,const char * name)1167 static int _rm(struct connection *conn, const void *ctx, struct node *node,
1168 	       const char *name)
1169 {
1170 	/*
1171 	 * Deleting node by node, so the result is always consistent even in
1172 	 * case of a failure.
1173 	 */
1174 	struct node *parent;
1175 	char *parentname = get_parent(ctx, name);
1176 
1177 	if (!parentname)
1178 		return errno;
1179 
1180 	parent = read_node(conn, ctx, parentname);
1181 	if (!parent)
1182 		return (errno == ENOMEM) ? ENOMEM : EINVAL;
1183 	node->parent = parent;
1184 
1185 	/*
1186 	 * Fire the watches now, when we can still see the node permissions.
1187 	 * This fine as we are single threaded and the next possible read will
1188 	 * be handled only after the node has been really removed.
1189 	 */
1190 	fire_watches(conn, ctx, name, node, false, NULL);
1191 	return delete_node(conn, ctx, parent, node);
1192 }
1193 
1194 
do_rm(struct connection * conn,struct buffered_data * in)1195 static int do_rm(struct connection *conn, struct buffered_data *in)
1196 {
1197 	struct node *node;
1198 	int ret;
1199 	char *name;
1200 	char *parentname;
1201 
1202 	node = get_node_canonicalized(conn, in, onearg(in), &name,
1203 				      XS_PERM_WRITE);
1204 	if (!node) {
1205 		/* Didn't exist already?  Fine, if parent exists. */
1206 		if (errno == ENOENT) {
1207 			parentname = get_parent(in, name);
1208 			if (!parentname)
1209 				return errno;
1210 			node = read_node(conn, in, parentname);
1211 			if (node) {
1212 				send_ack(conn, XS_RM);
1213 				return 0;
1214 			}
1215 			/* Restore errno, just in case. */
1216 			if (errno != ENOMEM)
1217 				errno = ENOENT;
1218 		}
1219 		return errno;
1220 	}
1221 
1222 	if (streq(name, "/"))
1223 		return EINVAL;
1224 
1225 	ret = _rm(conn, in, node, name);
1226 	if (ret)
1227 		return ret;
1228 
1229 	send_ack(conn, XS_RM);
1230 
1231 	return 0;
1232 }
1233 
1234 
do_get_perms(struct connection * conn,struct buffered_data * in)1235 static int do_get_perms(struct connection *conn, struct buffered_data *in)
1236 {
1237 	struct node *node;
1238 	char *strings;
1239 	unsigned int len;
1240 
1241 	node = get_node_canonicalized(conn, in, onearg(in), NULL, XS_PERM_READ);
1242 	if (!node)
1243 		return errno;
1244 
1245 	strings = perms_to_strings(node, &node->perms, &len);
1246 	if (!strings)
1247 		return errno;
1248 
1249 	send_reply(conn, XS_GET_PERMS, strings, len);
1250 
1251 	return 0;
1252 }
1253 
do_set_perms(struct connection * conn,struct buffered_data * in)1254 static int do_set_perms(struct connection *conn, struct buffered_data *in)
1255 {
1256 	struct node_perms perms, old_perms;
1257 	char *name, *permstr;
1258 	struct node *node;
1259 
1260 	perms.num = xs_count_strings(in->buffer, in->used);
1261 	if (perms.num < 2)
1262 		return EINVAL;
1263 
1264 	perms.num--;
1265 	if (domain_is_unprivileged(conn) &&
1266 	    perms.num > quota_nb_perms_per_node)
1267 		return ENOSPC;
1268 
1269 	permstr = in->buffer + strlen(in->buffer) + 1;
1270 
1271 	perms.p = talloc_array(in, struct xs_permissions, perms.num);
1272 	if (!perms.p)
1273 		return ENOMEM;
1274 	if (!xs_strings_to_perms(perms.p, perms.num, permstr))
1275 		return errno;
1276 
1277 	/* First arg is node name. */
1278 	if (strstarts(in->buffer, "@")) {
1279 		if (set_perms_special(conn, in->buffer, &perms))
1280 			return errno;
1281 		send_ack(conn, XS_SET_PERMS);
1282 		return 0;
1283 	}
1284 
1285 	/* We must own node to do this (tools can do this too). */
1286 	node = get_node_canonicalized(conn, in, in->buffer, &name,
1287 				      XS_PERM_WRITE | XS_PERM_OWNER);
1288 	if (!node)
1289 		return errno;
1290 
1291 	/* Unprivileged domains may not change the owner. */
1292 	if (domain_is_unprivileged(conn) &&
1293 	    perms.p[0].id != node->perms.p[0].id)
1294 		return EPERM;
1295 
1296 	old_perms = node->perms;
1297 	domain_entry_dec(conn, node);
1298 	node->perms = perms;
1299 	domain_entry_inc(conn, node);
1300 
1301 	if (write_node(conn, node, false))
1302 		return errno;
1303 
1304 	fire_watches(conn, in, name, node, false, &old_perms);
1305 	send_ack(conn, XS_SET_PERMS);
1306 
1307 	return 0;
1308 }
1309 
1310 static struct {
1311 	const char *str;
1312 	int (*func)(struct connection *conn, struct buffered_data *in);
1313 	unsigned int flags;
1314 #define XS_FLAG_NOTID		(1U << 0)	/* Ignore transaction id. */
1315 #define XS_FLAG_PRIV		(1U << 1)	/* Privileged domain only. */
1316 } const wire_funcs[XS_TYPE_COUNT] = {
1317 	[XS_CONTROL]           =
1318 	    { "CONTROL",       do_control,      XS_FLAG_PRIV },
1319 	[XS_DIRECTORY]         = { "DIRECTORY",         send_directory },
1320 	[XS_READ]              = { "READ",              do_read },
1321 	[XS_GET_PERMS]         = { "GET_PERMS",         do_get_perms },
1322 	[XS_WATCH]             =
1323 	    { "WATCH",         do_watch,        XS_FLAG_NOTID },
1324 	[XS_UNWATCH]           =
1325 	    { "UNWATCH",       do_unwatch,      XS_FLAG_NOTID },
1326 	[XS_TRANSACTION_START] = { "TRANSACTION_START", do_transaction_start },
1327 	[XS_TRANSACTION_END]   = { "TRANSACTION_END",   do_transaction_end },
1328 	[XS_INTRODUCE]         =
1329 	    { "INTRODUCE",     do_introduce,    XS_FLAG_PRIV },
1330 	[XS_RELEASE]           =
1331 	    { "RELEASE",       do_release,      XS_FLAG_PRIV },
1332 	[XS_GET_DOMAIN_PATH]   = { "GET_DOMAIN_PATH",   do_get_domain_path },
1333 	[XS_WRITE]             = { "WRITE",             do_write },
1334 	[XS_MKDIR]             = { "MKDIR",             do_mkdir },
1335 	[XS_RM]                = { "RM",                do_rm },
1336 	[XS_SET_PERMS]         = { "SET_PERMS",         do_set_perms },
1337 	[XS_WATCH_EVENT]       = { "WATCH_EVENT",       NULL },
1338 	[XS_ERROR]             = { "ERROR",             NULL },
1339 	[XS_IS_DOMAIN_INTRODUCED] =
1340 	    { "IS_DOMAIN_INTRODUCED", do_is_domain_introduced, XS_FLAG_PRIV },
1341 	[XS_RESUME]            =
1342 	    { "RESUME",        do_resume,       XS_FLAG_PRIV },
1343 	[XS_SET_TARGET]        =
1344 	    { "SET_TARGET",    do_set_target,   XS_FLAG_PRIV },
1345 	[XS_RESET_WATCHES]     = { "RESET_WATCHES",     do_reset_watches },
1346 	[XS_DIRECTORY_PART]    = { "DIRECTORY_PART",    send_directory_part },
1347 };
1348 
1349 /*
1350  * Keep the connection alive but stop processing any new request or sending
1351  * reponse. This is to allow sending @releaseDomain watch event at the correct
1352  * moment and/or to allow the connection to restart (not yet implemented).
1353  *
1354  * All watches, transactions, buffers will be freed.
1355  */
ignore_connection(struct connection * conn)1356 static void ignore_connection(struct connection *conn)
1357 {
1358 	struct buffered_data *out, *tmp;
1359 
1360 	trace("CONN %p ignored\n", conn);
1361 
1362 	conn->is_ignored = true;
1363 	conn_delete_all_watches(conn);
1364 	conn_delete_all_transactions(conn);
1365 
1366 	list_for_each_entry_safe(out, tmp, &conn->out_list, list) {
1367 		list_del(&out->list);
1368 		talloc_free(out);
1369 	}
1370 
1371 	talloc_free(conn->in);
1372 	conn->in = NULL;
1373 }
1374 
sockmsg_string(enum xsd_sockmsg_type type)1375 static const char *sockmsg_string(enum xsd_sockmsg_type type)
1376 {
1377 	if ((unsigned int)type < ARRAY_SIZE(wire_funcs) && wire_funcs[type].str)
1378 		return wire_funcs[type].str;
1379 
1380 	return "**UNKNOWN**";
1381 }
1382 
1383 /* Process "in" for conn: "in" will vanish after this conversation, so
1384  * we can talloc off it for temporary variables.  May free "conn".
1385  */
process_message(struct connection * conn,struct buffered_data * in)1386 static void process_message(struct connection *conn, struct buffered_data *in)
1387 {
1388 	struct transaction *trans;
1389 	enum xsd_sockmsg_type type = in->hdr.msg.type;
1390 	int ret;
1391 
1392 	if ((unsigned int)type >= XS_TYPE_COUNT || !wire_funcs[type].func) {
1393 		eprintf("Client unknown operation %i", type);
1394 		send_error(conn, ENOSYS);
1395 		return;
1396 	}
1397 
1398 	if ((wire_funcs[type].flags & XS_FLAG_PRIV) &&
1399 	    domain_is_unprivileged(conn)) {
1400 		send_error(conn, EACCES);
1401 		return;
1402 	}
1403 
1404 	trans = (wire_funcs[type].flags & XS_FLAG_NOTID)
1405 		? NULL : transaction_lookup(conn, in->hdr.msg.tx_id);
1406 	if (IS_ERR(trans)) {
1407 		send_error(conn, -PTR_ERR(trans));
1408 		return;
1409 	}
1410 
1411 	assert(conn->transaction == NULL);
1412 	conn->transaction = trans;
1413 
1414 	ret = wire_funcs[type].func(conn, in);
1415 	if (ret)
1416 		send_error(conn, ret);
1417 
1418 	conn->transaction = NULL;
1419 }
1420 
consider_message(struct connection * conn)1421 static void consider_message(struct connection *conn)
1422 {
1423 	if (verbose)
1424 		xprintf("Got message %s len %i from %p\n",
1425 			sockmsg_string(conn->in->hdr.msg.type),
1426 			conn->in->hdr.msg.len, conn);
1427 
1428 	process_message(conn, conn->in);
1429 
1430 	assert(conn->in == NULL);
1431 }
1432 
1433 /*
1434  * Errors in reading or allocating here means we get out of sync, so we mark
1435  * the connection as ignored.
1436  */
handle_input(struct connection * conn)1437 static void handle_input(struct connection *conn)
1438 {
1439 	int bytes;
1440 	struct buffered_data *in;
1441 
1442 	if (!conn->in) {
1443 		conn->in = new_buffer(conn);
1444 		/* In case of no memory just try it again next time. */
1445 		if (!conn->in)
1446 			return;
1447 	}
1448 	in = conn->in;
1449 
1450 	/* Not finished header yet? */
1451 	if (in->inhdr) {
1452 		if (in->used != sizeof(in->hdr)) {
1453 			bytes = conn->read(conn, in->hdr.raw + in->used,
1454 					   sizeof(in->hdr) - in->used);
1455 			if (bytes < 0)
1456 				goto bad_client;
1457 			in->used += bytes;
1458 			if (in->used != sizeof(in->hdr))
1459 				return;
1460 
1461 			if (in->hdr.msg.len > XENSTORE_PAYLOAD_MAX) {
1462 				syslog(LOG_ERR, "Client tried to feed us %i",
1463 				       in->hdr.msg.len);
1464 				goto bad_client;
1465 			}
1466 		}
1467 
1468 		if (in->hdr.msg.len <= DEFAULT_BUFFER_SIZE)
1469 			in->buffer = in->default_buffer;
1470 		else
1471 			in->buffer = talloc_array(in, char, in->hdr.msg.len);
1472 		/* In case of no memory just try it again next time. */
1473 		if (!in->buffer)
1474 			return;
1475 		in->used = 0;
1476 		in->inhdr = false;
1477 	}
1478 
1479 	bytes = conn->read(conn, in->buffer + in->used,
1480 			   in->hdr.msg.len - in->used);
1481 	if (bytes < 0)
1482 		goto bad_client;
1483 
1484 	in->used += bytes;
1485 	if (in->used != in->hdr.msg.len)
1486 		return;
1487 
1488 	trace_io(conn, in, 0);
1489 	consider_message(conn);
1490 	return;
1491 
1492 bad_client:
1493 	ignore_connection(conn);
1494 }
1495 
handle_output(struct connection * conn)1496 static void handle_output(struct connection *conn)
1497 {
1498 	/* Ignore the connection if an error occured */
1499 	if (!write_messages(conn))
1500 		ignore_connection(conn);
1501 }
1502 
new_connection(connwritefn_t * write,connreadfn_t * read)1503 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1504 {
1505 	struct connection *new;
1506 
1507 	new = talloc_zero(talloc_autofree_context(), struct connection);
1508 	if (!new)
1509 		return NULL;
1510 
1511 	new->fd = -1;
1512 	new->pollfd_idx = -1;
1513 	new->write = write;
1514 	new->read = read;
1515 	new->can_write = true;
1516 	new->is_ignored = false;
1517 	new->transaction_started = 0;
1518 	INIT_LIST_HEAD(&new->out_list);
1519 	INIT_LIST_HEAD(&new->watches);
1520 	INIT_LIST_HEAD(&new->transaction_list);
1521 
1522 	list_add_tail(&new->list, &connections);
1523 	talloc_set_destructor(new, destroy_conn);
1524 	trace_create(new, "connection");
1525 	return new;
1526 }
1527 
1528 #ifdef NO_SOCKETS
accept_connection(int sock,bool canwrite)1529 static void accept_connection(int sock, bool canwrite)
1530 {
1531 }
1532 #else
writefd(struct connection * conn,const void * data,unsigned int len)1533 static int writefd(struct connection *conn, const void *data, unsigned int len)
1534 {
1535 	int rc;
1536 
1537 	while ((rc = write(conn->fd, data, len)) < 0) {
1538 		if (errno == EAGAIN) {
1539 			rc = 0;
1540 			break;
1541 		}
1542 		if (errno != EINTR)
1543 			break;
1544 	}
1545 
1546 	return rc;
1547 }
1548 
readfd(struct connection * conn,void * data,unsigned int len)1549 static int readfd(struct connection *conn, void *data, unsigned int len)
1550 {
1551 	int rc;
1552 
1553 	while ((rc = read(conn->fd, data, len)) < 0) {
1554 		if (errno == EAGAIN) {
1555 			rc = 0;
1556 			break;
1557 		}
1558 		if (errno != EINTR)
1559 			break;
1560 	}
1561 
1562 	/* Reading zero length means we're done with this connection. */
1563 	if ((rc == 0) && (len != 0)) {
1564 		errno = EBADF;
1565 		rc = -1;
1566 	}
1567 
1568 	return rc;
1569 }
1570 
accept_connection(int sock,bool canwrite)1571 static void accept_connection(int sock, bool canwrite)
1572 {
1573 	int fd;
1574 	struct connection *conn;
1575 
1576 	fd = accept(sock, NULL, NULL);
1577 	if (fd < 0)
1578 		return;
1579 
1580 	conn = new_connection(writefd, readfd);
1581 	if (conn) {
1582 		conn->fd = fd;
1583 		conn->can_write = canwrite;
1584 	} else
1585 		close(fd);
1586 }
1587 #endif
1588 
1589 static int tdb_flags;
1590 
1591 /* We create initial nodes manually. */
manual_node(const char * name,const char * child)1592 static void manual_node(const char *name, const char *child)
1593 {
1594 	struct node *node;
1595 	struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
1596 
1597 	node = talloc_zero(NULL, struct node);
1598 	if (!node)
1599 		barf_perror("Could not allocate initial node %s", name);
1600 
1601 	node->name = name;
1602 	node->perms.p = &perms;
1603 	node->perms.num = 1;
1604 	node->children = (char *)child;
1605 	if (child)
1606 		node->childlen = strlen(child) + 1;
1607 
1608 	if (write_node(NULL, node, false))
1609 		barf_perror("Could not create initial node %s", name);
1610 	talloc_free(node);
1611 }
1612 
tdb_logger(TDB_CONTEXT * tdb,int level,const char * fmt,...)1613 static void tdb_logger(TDB_CONTEXT *tdb, int level, const char * fmt, ...)
1614 {
1615 	va_list ap;
1616 	char *s;
1617 
1618 	va_start(ap, fmt);
1619 	s = talloc_vasprintf(NULL, fmt, ap);
1620 	va_end(ap);
1621 
1622 	if (s) {
1623 		trace("TDB: %s\n", s);
1624 		syslog(LOG_ERR, "TDB: %s",  s);
1625 		if (verbose)
1626 			xprintf("TDB: %s", s);
1627 		talloc_free(s);
1628 	} else {
1629 		trace("talloc failure during logging\n");
1630 		syslog(LOG_ERR, "talloc failure during logging\n");
1631 	}
1632 }
1633 
setup_structure(void)1634 static void setup_structure(void)
1635 {
1636 	char *tdbname;
1637 	tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
1638 	if (!tdbname)
1639 		barf_perror("Could not create tdbname");
1640 
1641 	if (!(tdb_flags & TDB_INTERNAL))
1642 		unlink(tdbname);
1643 
1644 	tdb_ctx = tdb_open_ex(tdbname, 7919, tdb_flags, O_RDWR|O_CREAT|O_EXCL,
1645 			      0640, &tdb_logger, NULL);
1646 	if (!tdb_ctx)
1647 		barf_perror("Could not create tdb file %s", tdbname);
1648 
1649 	manual_node("/", "tool");
1650 	manual_node("/tool", "xenstored");
1651 	manual_node("/tool/xenstored", NULL);
1652 
1653 	check_store();
1654 }
1655 
1656 
hash_from_key_fn(void * k)1657 static unsigned int hash_from_key_fn(void *k)
1658 {
1659 	char *str = k;
1660 	unsigned int hash = 5381;
1661 	char c;
1662 
1663 	while ((c = *str++))
1664 		hash = ((hash << 5) + hash) + (unsigned int)c;
1665 
1666 	return hash;
1667 }
1668 
1669 
keys_equal_fn(void * key1,void * key2)1670 static int keys_equal_fn(void *key1, void *key2)
1671 {
1672 	return 0 == strcmp((char *)key1, (char *)key2);
1673 }
1674 
1675 
child_name(const char * s1,const char * s2)1676 static char *child_name(const char *s1, const char *s2)
1677 {
1678 	if (strcmp(s1, "/")) {
1679 		return talloc_asprintf(NULL, "%s/%s", s1, s2);
1680 	}
1681 	else {
1682 		return talloc_asprintf(NULL, "/%s", s2);
1683 	}
1684 }
1685 
1686 
remember_string(struct hashtable * hash,const char * str)1687 int remember_string(struct hashtable *hash, const char *str)
1688 {
1689 	char *k = malloc(strlen(str) + 1);
1690 
1691 	if (!k)
1692 		return 0;
1693 	strcpy(k, str);
1694 	return hashtable_insert(hash, k, (void *)1);
1695 }
1696 
1697 
1698 /**
1699  * A node has a children field that names the children of the node, separated
1700  * by NULs.  We check whether there are entries in there that are duplicated
1701  * (and if so, delete the second one), and whether there are any that do not
1702  * have a corresponding child node (and if so, delete them).  Each valid child
1703  * is then recursively checked.
1704  *
1705  * No deleting is performed if the recovery flag is cleared (i.e. -R was
1706  * passed on the command line).
1707  *
1708  * As we go, we record each node in the given reachable hashtable.  These
1709  * entries will be used later in clean_store.
1710  */
check_store_(const char * name,struct hashtable * reachable)1711 static int check_store_(const char *name, struct hashtable *reachable)
1712 {
1713 	struct node *node = read_node(NULL, name, name);
1714 	int ret = 0;
1715 
1716 	if (node) {
1717 		size_t i = 0;
1718 
1719 		struct hashtable * children =
1720 			create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1721 
1722 		if (!remember_string(reachable, name)) {
1723 			hashtable_destroy(children, 0);
1724 			log("check_store: ENOMEM");
1725 			return ENOMEM;
1726 		}
1727 
1728 		while (i < node->childlen && !ret) {
1729 			struct node *childnode;
1730 			size_t childlen = strlen(node->children + i);
1731 			char * childname = child_name(node->name,
1732 						      node->children + i);
1733 
1734 			if (!childname) {
1735 				log("check_store: ENOMEM");
1736 				ret = ENOMEM;
1737 				break;
1738 			}
1739 			childnode = read_node(NULL, childname, childname);
1740 
1741 			if (childnode) {
1742 				if (hashtable_search(children, childname)) {
1743 					log("check_store: '%s' is duplicated!",
1744 					    childname);
1745 
1746 					if (recovery) {
1747 						remove_child_entry(NULL, node,
1748 								   i);
1749 						i -= childlen + 1;
1750 					}
1751 				}
1752 				else {
1753 					if (!remember_string(children,
1754 							     childname)) {
1755 						log("check_store: ENOMEM");
1756 						talloc_free(childnode);
1757 						talloc_free(childname);
1758 						ret = ENOMEM;
1759 						break;
1760 					}
1761 					ret = check_store_(childname,
1762 							   reachable);
1763 				}
1764 			} else if (errno != ENOMEM) {
1765 				log("check_store: No child '%s' found!\n",
1766 				    childname);
1767 
1768 				if (recovery) {
1769 					remove_child_entry(NULL, node, i);
1770 					i -= childlen + 1;
1771 				}
1772 			} else {
1773 				log("check_store: ENOMEM");
1774 				ret = ENOMEM;
1775 			}
1776 
1777 			talloc_free(childnode);
1778 			talloc_free(childname);
1779 			i += childlen + 1;
1780 		}
1781 
1782 		hashtable_destroy(children, 0 /* Don't free values (they are
1783 						 all (void *)1) */);
1784 		talloc_free(node);
1785 	} else if (errno != ENOMEM) {
1786 		/* Impossible, because no database should ever be without the
1787 		   root, and otherwise, we've just checked in our caller
1788 		   (which made a recursive call to get here). */
1789 
1790 		log("check_store: No child '%s' found: impossible!", name);
1791 	} else {
1792 		log("check_store: ENOMEM");
1793 		ret = ENOMEM;
1794 	}
1795 
1796 	return ret;
1797 }
1798 
1799 
1800 /**
1801  * Helper to clean_store below.
1802  */
clean_store_(TDB_CONTEXT * tdb,TDB_DATA key,TDB_DATA val,void * private)1803 static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
1804 			void *private)
1805 {
1806 	struct hashtable *reachable = private;
1807 	char *slash;
1808 	char * name = talloc_strndup(NULL, key.dptr, key.dsize);
1809 
1810 	if (!name) {
1811 		log("clean_store: ENOMEM");
1812 		return 1;
1813 	}
1814 
1815 	if (name[0] != '/') {
1816 		slash = strchr(name, '/');
1817 		if (slash)
1818 			*slash = 0;
1819 	}
1820 	if (!hashtable_search(reachable, name)) {
1821 		log("clean_store: '%s' is orphaned!", name);
1822 		if (recovery) {
1823 			tdb_delete(tdb, key);
1824 		}
1825 	}
1826 
1827 	talloc_free(name);
1828 
1829 	return 0;
1830 }
1831 
1832 
1833 /**
1834  * Given the list of reachable nodes, iterate over the whole store, and
1835  * remove any that were not reached.
1836  */
clean_store(struct hashtable * reachable)1837 static void clean_store(struct hashtable *reachable)
1838 {
1839 	tdb_traverse(tdb_ctx, &clean_store_, reachable);
1840 }
1841 
1842 
check_store(void)1843 void check_store(void)
1844 {
1845 	char * root = talloc_strdup(NULL, "/");
1846 	struct hashtable * reachable =
1847 		create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1848 
1849 	if (!reachable) {
1850 		log("check_store: ENOMEM");
1851 		return;
1852 	}
1853 
1854 	log("Checking store ...");
1855 	if (!check_store_(root, reachable) &&
1856 	    !check_transactions(reachable))
1857 		clean_store(reachable);
1858 	log("Checking store complete.");
1859 
1860 	hashtable_destroy(reachable, 0 /* Don't free values (they are all
1861 					  (void *)1) */);
1862 	talloc_free(root);
1863 }
1864 
1865 
1866 /* Something is horribly wrong: check the store. */
corrupt(struct connection * conn,const char * fmt,...)1867 void corrupt(struct connection *conn, const char *fmt, ...)
1868 {
1869 	va_list arglist;
1870 	char *str;
1871 	int saved_errno = errno;
1872 
1873 	va_start(arglist, fmt);
1874 	str = talloc_vasprintf(NULL, fmt, arglist);
1875 	va_end(arglist);
1876 
1877 	log("corruption detected by connection %i: err %s: %s",
1878 	    conn ? (int)conn->id : -1, strerror(saved_errno), str);
1879 
1880 	check_store();
1881 }
1882 
1883 #ifndef NO_SOCKETS
destroy_fds(void)1884 static void destroy_fds(void)
1885 {
1886 	if (sock >= 0)
1887 		close(sock);
1888 	if (ro_sock >= 0)
1889 		close(ro_sock);
1890 }
1891 
init_sockets(void)1892 static void init_sockets(void)
1893 {
1894 	struct sockaddr_un addr;
1895 	const char *soc_str = xs_daemon_socket();
1896 	const char *soc_str_ro = xs_daemon_socket_ro();
1897 
1898 	/* Create sockets for them to listen to. */
1899 	atexit(destroy_fds);
1900 	sock = socket(PF_UNIX, SOCK_STREAM, 0);
1901 	if (sock < 0)
1902 		barf_perror("Could not create socket");
1903 	ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1904 	if (ro_sock < 0)
1905 		barf_perror("Could not create socket");
1906 
1907 	/* FIXME: Be more sophisticated, don't mug running daemon. */
1908 	unlink(soc_str);
1909 	unlink(soc_str_ro);
1910 
1911 	addr.sun_family = AF_UNIX;
1912 
1913 	if(strlen(soc_str) >= sizeof(addr.sun_path))
1914 		barf_perror("socket string '%s' too long", soc_str);
1915 	strcpy(addr.sun_path, soc_str);
1916 	if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1917 		barf_perror("Could not bind socket to %s", soc_str);
1918 
1919 	if(strlen(soc_str_ro) >= sizeof(addr.sun_path))
1920 		barf_perror("socket string '%s' too long", soc_str_ro);
1921 	strcpy(addr.sun_path, soc_str_ro);
1922 	if (bind(ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1923 		barf_perror("Could not bind socket to %s", soc_str_ro);
1924 
1925 	if (chmod(soc_str, 0600) != 0
1926 	    || chmod(soc_str_ro, 0660) != 0)
1927 		barf_perror("Could not chmod sockets");
1928 
1929 	if (listen(sock, 1) != 0 || listen(ro_sock, 1) != 0)
1930 		barf_perror("Could not listen on sockets");
1931 }
1932 #endif
1933 
usage(void)1934 static void usage(void)
1935 {
1936 	fprintf(stderr,
1937 "Usage:\n"
1938 "\n"
1939 "  xenstored <options>\n"
1940 "\n"
1941 "where options may include:\n"
1942 "\n"
1943 "  -D, --no-domain-init    to state that xenstored should not initialise dom0,\n"
1944 "  -F, --pid-file <file>   giving a file for the daemon's pid to be written,\n"
1945 "  -H, --help              to output this message,\n"
1946 "  -N, --no-fork           to request that the daemon does not fork,\n"
1947 "  -P, --output-pid        to request that the pid of the daemon is output,\n"
1948 "  -T, --trace-file <file> giving the file for logging, and\n"
1949 "  -E, --entry-nb <nb>     limit the number of entries per domain,\n"
1950 "  -S, --entry-size <size> limit the size of entry per domain, and\n"
1951 "  -W, --watch-nb <nb>     limit the number of watches per domain,\n"
1952 "  -t, --transaction <nb>  limit the number of transaction allowed per domain,\n"
1953 "  -A, --perm-nb <nb>      limit the number of permissions per node,\n"
1954 "  -R, --no-recovery       to request that no recovery should be attempted when\n"
1955 "                          the store is corrupted (debug only),\n"
1956 "  -I, --internal-db       store database in memory, not on disk\n"
1957 "  -V, --verbose           to request verbose execution.\n");
1958 }
1959 
1960 
1961 static struct option options[] = {
1962 	{ "no-domain-init", 0, NULL, 'D' },
1963 	{ "entry-nb", 1, NULL, 'E' },
1964 	{ "pid-file", 1, NULL, 'F' },
1965 	{ "event", 1, NULL, 'e' },
1966 	{ "master-domid", 1, NULL, 'm' },
1967 	{ "help", 0, NULL, 'H' },
1968 	{ "no-fork", 0, NULL, 'N' },
1969 	{ "priv-domid", 1, NULL, 'p' },
1970 	{ "output-pid", 0, NULL, 'P' },
1971 	{ "entry-size", 1, NULL, 'S' },
1972 	{ "trace-file", 1, NULL, 'T' },
1973 	{ "transaction", 1, NULL, 't' },
1974 	{ "perm-nb", 1, NULL, 'A' },
1975 	{ "no-recovery", 0, NULL, 'R' },
1976 	{ "internal-db", 0, NULL, 'I' },
1977 	{ "verbose", 0, NULL, 'V' },
1978 	{ "watch-nb", 1, NULL, 'W' },
1979 	{ NULL, 0, NULL, 0 } };
1980 
1981 extern void dump_conn(struct connection *conn);
1982 int dom0_domid = 0;
1983 int dom0_event = 0;
1984 int priv_domid = 0;
1985 
main(int argc,char * argv[])1986 int main(int argc, char *argv[])
1987 {
1988 	int opt;
1989 	int sock_pollfd_idx = -1, ro_sock_pollfd_idx = -1;
1990 	bool dofork = true;
1991 	bool outputpid = false;
1992 	bool no_domain_init = false;
1993 	const char *pidfile = NULL;
1994 	int timeout;
1995 
1996 
1997 	while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:A:T:RVW:", options,
1998 				  NULL)) != -1) {
1999 		switch (opt) {
2000 		case 'D':
2001 			no_domain_init = true;
2002 			break;
2003 		case 'E':
2004 			quota_nb_entry_per_domain = strtol(optarg, NULL, 10);
2005 			break;
2006 		case 'F':
2007 			pidfile = optarg;
2008 			break;
2009 		case 'H':
2010 			usage();
2011 			return 0;
2012 		case 'N':
2013 			dofork = false;
2014 			break;
2015 		case 'P':
2016 			outputpid = true;
2017 			break;
2018 		case 'R':
2019 			recovery = false;
2020 			break;
2021 		case 'S':
2022 			quota_max_entry_size = strtol(optarg, NULL, 10);
2023 			break;
2024 		case 't':
2025 			quota_max_transaction = strtol(optarg, NULL, 10);
2026 			break;
2027 		case 'T':
2028 			tracefile = optarg;
2029 			break;
2030 		case 'I':
2031 			tdb_flags = TDB_INTERNAL|TDB_NOLOCK;
2032 			break;
2033 		case 'V':
2034 			verbose = true;
2035 			break;
2036 		case 'W':
2037 			quota_nb_watch_per_domain = strtol(optarg, NULL, 10);
2038 			break;
2039 		case 'A':
2040 			quota_nb_perms_per_node = strtol(optarg, NULL, 10);
2041 			break;
2042 		case 'e':
2043 			dom0_event = strtol(optarg, NULL, 10);
2044 			break;
2045 		case 'm':
2046 			dom0_domid = strtol(optarg, NULL, 10);
2047 			break;
2048 		case 'p':
2049 			priv_domid = strtol(optarg, NULL, 10);
2050 			break;
2051 		}
2052 	}
2053 	if (optind != argc)
2054 		barf("%s: No arguments desired", argv[0]);
2055 
2056 	reopen_log();
2057 
2058 	/* make sure xenstored directories exist */
2059 	/* Errors ignored here, will be reported when we open files */
2060 	mkdir(xs_daemon_rundir(), 0755);
2061 	mkdir(xs_daemon_rootdir(), 0755);
2062 
2063 	if (dofork) {
2064 		openlog("xenstored", 0, LOG_DAEMON);
2065 		daemonize();
2066 	}
2067 	if (pidfile)
2068 		write_pidfile(pidfile);
2069 
2070 	/* Talloc leak reports go to stderr, which is closed if we fork. */
2071 	if (!dofork)
2072 		talloc_enable_leak_report_full();
2073 
2074 	/* Don't kill us with SIGPIPE. */
2075 	signal(SIGPIPE, SIG_IGN);
2076 
2077 	talloc_enable_null_tracking();
2078 
2079 #ifndef NO_SOCKETS
2080 	init_sockets();
2081 #endif
2082 
2083 	init_pipe(reopen_log_pipe);
2084 
2085 	/* Setup the database */
2086 	setup_structure();
2087 
2088 	/* Listen to hypervisor. */
2089 	if (!no_domain_init)
2090 		domain_init();
2091 
2092 	/* Restore existing connections. */
2093 	restore_existing_connections();
2094 
2095 	if (outputpid) {
2096 		printf("%ld\n", (long)getpid());
2097 		fflush(stdout);
2098 	}
2099 
2100 	/* redirect to /dev/null now we're ready to accept connections */
2101 	if (dofork)
2102 		finish_daemonize();
2103 
2104 	signal(SIGHUP, trigger_reopen_log);
2105 	if (tracefile)
2106 		tracefile = talloc_strdup(NULL, tracefile);
2107 
2108 	/* Get ready to listen to the tools. */
2109 	initialize_fds(&sock_pollfd_idx, &ro_sock_pollfd_idx, &timeout);
2110 
2111 	/* Tell the kernel we're up and running. */
2112 	xenbus_notify_running();
2113 
2114 #if defined(XEN_SYSTEMD_ENABLED)
2115 	sd_notify(1, "READY=1");
2116 	fprintf(stderr, SD_NOTICE "xenstored is ready\n");
2117 #endif
2118 
2119 	/* Main loop. */
2120 	for (;;) {
2121 		struct connection *conn, *next;
2122 
2123 		if (poll(fds, nr_fds, timeout) < 0) {
2124 			if (errno == EINTR)
2125 				continue;
2126 			barf_perror("Poll failed");
2127 		}
2128 
2129 		if (reopen_log_pipe0_pollfd_idx != -1) {
2130 			if (fds[reopen_log_pipe0_pollfd_idx].revents
2131 			    & ~POLLIN) {
2132 				close(reopen_log_pipe[0]);
2133 				close(reopen_log_pipe[1]);
2134 				init_pipe(reopen_log_pipe);
2135 			} else if (fds[reopen_log_pipe0_pollfd_idx].revents
2136 				   & POLLIN) {
2137 				char c;
2138 				if (read(reopen_log_pipe[0], &c, 1) != 1)
2139 					barf_perror("read failed");
2140 				reopen_log();
2141 			}
2142 			reopen_log_pipe0_pollfd_idx = -1;
2143 		}
2144 
2145 		if (sock_pollfd_idx != -1) {
2146 			if (fds[sock_pollfd_idx].revents & ~POLLIN) {
2147 				barf_perror("sock poll failed");
2148 				break;
2149 			} else if (fds[sock_pollfd_idx].revents & POLLIN) {
2150 				accept_connection(sock, true);
2151 				sock_pollfd_idx = -1;
2152 			}
2153 		}
2154 
2155 		if (ro_sock_pollfd_idx != -1) {
2156 			if (fds[ro_sock_pollfd_idx].revents & ~POLLIN) {
2157 				barf_perror("ro sock poll failed");
2158 				break;
2159 			} else if (fds[ro_sock_pollfd_idx].revents & POLLIN) {
2160 				accept_connection(ro_sock, false);
2161 				ro_sock_pollfd_idx = -1;
2162 			}
2163 		}
2164 
2165 		if (xce_pollfd_idx != -1) {
2166 			if (fds[xce_pollfd_idx].revents & ~POLLIN) {
2167 				barf_perror("xce_handle poll failed");
2168 				break;
2169 			} else if (fds[xce_pollfd_idx].revents & POLLIN) {
2170 				handle_event();
2171 				xce_pollfd_idx = -1;
2172 			}
2173 		}
2174 
2175 		next = list_entry(connections.next, typeof(*conn), list);
2176 		if (&next->list != &connections)
2177 			talloc_increase_ref_count(next);
2178 		while (&next->list != &connections) {
2179 			conn = next;
2180 
2181 			next = list_entry(conn->list.next,
2182 					  typeof(*conn), list);
2183 			if (&next->list != &connections)
2184 				talloc_increase_ref_count(next);
2185 
2186 			if (conn->domain) {
2187 				if (domain_can_read(conn))
2188 					handle_input(conn);
2189 				if (talloc_free(conn) == 0)
2190 					continue;
2191 
2192 				talloc_increase_ref_count(conn);
2193 				if (domain_can_write(conn) &&
2194 				    !list_empty(&conn->out_list))
2195 					handle_output(conn);
2196 				if (talloc_free(conn) == 0)
2197 					continue;
2198 			} else {
2199 				if (conn->pollfd_idx != -1) {
2200 					if (fds[conn->pollfd_idx].revents
2201 					    & ~(POLLIN|POLLOUT))
2202 						talloc_free(conn);
2203 					else if ((fds[conn->pollfd_idx].revents
2204 						  & POLLIN) &&
2205 						 !conn->is_ignored)
2206 						handle_input(conn);
2207 				}
2208 				if (talloc_free(conn) == 0)
2209 					continue;
2210 
2211 				talloc_increase_ref_count(conn);
2212 
2213 				if (conn->pollfd_idx != -1) {
2214 					if (fds[conn->pollfd_idx].revents
2215 					    & ~(POLLIN|POLLOUT))
2216 						talloc_free(conn);
2217 					else if ((fds[conn->pollfd_idx].revents
2218 						  & POLLOUT) &&
2219 						 !conn->is_ignored)
2220 						handle_output(conn);
2221 				}
2222 				if (talloc_free(conn) == 0)
2223 					continue;
2224 
2225 				conn->pollfd_idx = -1;
2226 			}
2227 		}
2228 
2229 		initialize_fds(&sock_pollfd_idx, &ro_sock_pollfd_idx, &timeout);
2230 	}
2231 }
2232 
2233 /*
2234  * Local variables:
2235  *  mode: C
2236  *  c-file-style: "linux"
2237  *  indent-tabs-mode: t
2238  *  c-basic-offset: 8
2239  *  tab-width: 8
2240  * End:
2241  */
2242