1 /******************************************************************************
2 * Argo : Hypervisor-Mediated data eXchange
3 *
4 * Derived from v4v, the version 2 of v2v.
5 *
6 * Copyright (c) 2010, Citrix Systems
7 * Copyright (c) 2018-2019 BAE Systems
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 #include <xen/argo.h>
20 #include <xen/domain.h>
21 #include <xen/domain_page.h>
22 #include <xen/errno.h>
23 #include <xen/event.h>
24 #include <xen/guest_access.h>
25 #include <xen/lib.h>
26 #include <xen/nospec.h>
27 #include <xen/param.h>
28 #include <xen/sched.h>
29 #include <xen/time.h>
30 #include <xsm/xsm.h>
31
32 #include <public/argo.h>
33
34 #ifdef CONFIG_COMPAT
35 #include <compat/argo.h>
36 CHECK_argo_addr;
37 #undef CHECK_argo_addr
38 #define CHECK_argo_addr struct xen_argo_addr
39 CHECK_argo_register_ring;
40 CHECK_argo_ring;
41 CHECK_argo_ring_data_ent;
42 #undef CHECK_argo_ring_data_ent
43 #define CHECK_argo_ring_data_ent struct xen_argo_ring_data_ent
44 CHECK_argo_ring_data;
45 CHECK_argo_ring_message_header;
46 CHECK_argo_unregister_ring;
47 CHECK_argo_send_addr;
48 #endif
49
50 #define MAX_RINGS_PER_DOMAIN 128U
51 #define MAX_NOTIFY_COUNT 256U
52 #define MAX_PENDING_PER_RING 32U
53
54 /* All messages on the ring are padded to a multiple of the slot size. */
55 #define ROUNDUP_MESSAGE(a) ROUNDUP((a), XEN_ARGO_MSG_SLOT_SIZE)
56
57 /* The maximum size of a message that may be sent on the largest Argo ring. */
58 #define MAX_ARGO_MESSAGE_SIZE ((XEN_ARGO_MAX_RING_SIZE) - \
59 (sizeof(struct xen_argo_ring_message_header)) - ROUNDUP_MESSAGE(1))
60
61 /* Number of PAGEs needed to hold a ring of a given size in bytes */
62 #define NPAGES_RING(ring_len) \
63 (ROUNDUP((ROUNDUP_MESSAGE(ring_len) + sizeof(xen_argo_ring_t)), PAGE_SIZE) \
64 >> PAGE_SHIFT)
65
66 DEFINE_XEN_GUEST_HANDLE(xen_argo_addr_t);
67 DEFINE_XEN_GUEST_HANDLE(xen_argo_gfn_t);
68 DEFINE_XEN_GUEST_HANDLE(xen_argo_iov_t);
69 DEFINE_XEN_GUEST_HANDLE(xen_argo_register_ring_t);
70 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_t);
71 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_t);
72 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t);
73 DEFINE_XEN_GUEST_HANDLE(xen_argo_send_addr_t);
74 DEFINE_XEN_GUEST_HANDLE(xen_argo_unregister_ring_t);
75 #ifdef CONFIG_COMPAT
76 DEFINE_COMPAT_HANDLE(compat_argo_iov_t);
77 #endif
78
79 static bool __read_mostly opt_argo;
80 static bool __read_mostly opt_argo_mac_permissive;
81
parse_argo(const char * s)82 static int __init parse_argo(const char *s)
83 {
84 const char *ss;
85 int val, rc = 0;
86
87 do {
88 ss = strchr(s, ',');
89 if ( !ss )
90 ss = strchr(s, '\0');
91
92 if ( (val = parse_bool(s, ss)) >= 0 )
93 opt_argo = val;
94 else if ( (val = parse_boolean("mac-permissive", s, ss)) >= 0 )
95 opt_argo_mac_permissive = val;
96 else
97 rc = -EINVAL;
98
99 s = ss + 1;
100 } while ( *ss );
101
102 return rc;
103 }
104 custom_param("argo", parse_argo);
105
106 typedef struct argo_ring_id
107 {
108 xen_argo_port_t aport;
109 domid_t partner_id;
110 domid_t domain_id;
111 } argo_ring_id;
112
113 /* Data about a domain's own ring that it has registered */
114 struct argo_ring_info
115 {
116 /* next node in the hash, protected by rings_L2 */
117 struct list_head node;
118 /* this ring's id, protected by rings_L2 */
119 struct argo_ring_id id;
120 /* L3, the ring_info lock: protects the members of this struct below */
121 spinlock_t L3_lock;
122 /* length of the ring, protected by L3 */
123 unsigned int len;
124 /* number of pages translated into mfns, protected by L3 */
125 unsigned int nmfns;
126 /* cached tx pointer location, protected by L3 */
127 unsigned int tx_ptr;
128 /* mapped ring pages protected by L3 */
129 void **mfn_mapping;
130 /* list of mfns of guest ring, protected by L3 */
131 mfn_t *mfns;
132 /* list of struct pending_ent for this ring, protected by L3 */
133 struct list_head pending;
134 /* number of pending entries queued for this ring, protected by L3 */
135 unsigned int npending;
136 };
137
138 /* Data about a single-sender ring, held by the sender (partner) domain */
139 struct argo_send_info
140 {
141 /* next node in the hash, protected by send_L2 */
142 struct list_head node;
143 /* this ring's id, protected by send_L2 */
144 struct argo_ring_id id;
145 };
146
147 /* A space-available notification that is awaiting sufficient space */
148 struct pending_ent
149 {
150 /* List node within argo_ring_info's pending list */
151 struct list_head node;
152 /*
153 * List node within argo_domain's wildcard_pend_list. Only used if the
154 * ring is one with a wildcard partner (ie. that any domain may send to)
155 * to enable cancelling signals on wildcard rings on domain destroy.
156 */
157 struct list_head wildcard_node;
158 /*
159 * Pointer to the ring_info that this ent pertains to. Used to ensure that
160 * ring_info->npending is decremented when ents for wildcard rings are
161 * cancelled for domain destroy.
162 * Caution: Must hold the correct locks before accessing ring_info via this.
163 */
164 struct argo_ring_info *ring_info;
165 /* minimum ring space available that this signal is waiting upon */
166 unsigned int len;
167 /* domain to be notified when space is available */
168 domid_t domain_id;
169 };
170
171 /*
172 * The value of the argo element in a struct domain is
173 * protected by L1_global_argo_rwlock
174 */
175 #define ARGO_HASHTABLE_SIZE 32
176 struct argo_domain
177 {
178 /* rings_L2 */
179 rwlock_t rings_L2_rwlock;
180 /*
181 * Hash table of argo_ring_info about rings this domain has registered.
182 * Protected by rings_L2.
183 */
184 struct list_head ring_hash[ARGO_HASHTABLE_SIZE];
185 /* Counter of rings registered by this domain. Protected by rings_L2. */
186 unsigned int ring_count;
187
188 /* send_L2 */
189 spinlock_t send_L2_lock;
190 /*
191 * Hash table of argo_send_info about rings other domains have registered
192 * for this domain to send to. Single partner, non-wildcard rings.
193 * Protected by send_L2.
194 */
195 struct list_head send_hash[ARGO_HASHTABLE_SIZE];
196
197 /* wildcard_L2 */
198 spinlock_t wildcard_L2_lock;
199 /*
200 * List of pending space-available signals for this domain about wildcard
201 * rings registered by other domains. Protected by wildcard_L2.
202 */
203 struct list_head wildcard_pend_list;
204 };
205
206 /*
207 * Locking is organized as follows:
208 *
209 * Terminology: R(<lock>) means taking a read lock on the specified lock;
210 * W(<lock>) means taking a write lock on it.
211 *
212 * == L1 : The global read/write lock: L1_global_argo_rwlock
213 * Protects the argo elements of all struct domain *d in the system.
214 *
215 * R(L1) does not protect any of the elements of d->argo; it protects their
216 * addresses. W(L1) protects those and more since it implies W on all the lower
217 * level locks - see the notes on those locks below.
218 *
219 * The destruction of an argo-enabled domain, which must have a non-NULL d->argo
220 * pointer, will need to free that d->argo pointer, which requires W(L1).
221 * Since holding R(L1) will block acquiring W(L1), it will ensure that
222 * no domains pointers that argo is interested in become invalid while either
223 * W(L1) or R(L1) are held.
224 */
225
226 static DEFINE_RWLOCK(L1_global_argo_rwlock); /* L1 */
227
228 /*
229 * == rings_L2 : The per-domain ring hash lock: d->argo->rings_L2_rwlock
230 *
231 * Holding a read lock on rings_L2 protects the ring hash table and
232 * the elements in the hash_table d->argo->ring_hash, and
233 * the node and id fields in struct argo_ring_info in the
234 * hash table.
235 * Holding a write lock on rings_L2 protects all of the elements of all the
236 * struct argo_ring_info belonging to this domain.
237 *
238 * To take rings_L2 you must already have R(L1). W(L1) implies W(rings_L2) and
239 * L3.
240 *
241 * == L3 : The individual ring_info lock: ring_info->L3_lock
242 *
243 * Protects all the fields within the argo_ring_info, aside from the ones that
244 * rings_L2 already protects: node, id, lock.
245 *
246 * To acquire L3 you must already have R(rings_L2). W(rings_L2) implies L3.
247 *
248 * == send_L2 : The per-domain single-sender partner rings lock:
249 * d->argo->send_L2_lock
250 *
251 * Protects the per-domain send hash table : d->argo->send_hash
252 * and the elements in the hash table, and the node and id fields
253 * in struct argo_send_info in the hash table.
254 *
255 * To take send_L2, you must already have R(L1). W(L1) implies send_L2.
256 * Do not attempt to acquire a rings_L2 on any domain after taking and while
257 * holding a send_L2 lock -- acquire the rings_L2 (if one is needed) beforehand.
258 *
259 * == wildcard_L2 : The per-domain wildcard pending list lock:
260 * d->argo->wildcard_L2_lock
261 *
262 * Protects the per-domain list of outstanding signals for space availability
263 * on wildcard rings.
264 *
265 * To take wildcard_L2, you must already have R(L1). W(L1) implies wildcard_L2.
266 * No other locks are acquired after obtaining wildcard_L2.
267 */
268
269 /*
270 * Lock state validations macros
271 *
272 * These macros encode the logic to verify that the locking has adhered to the
273 * locking discipline above.
274 * eg. On entry to logic that requires holding at least R(rings_L2), this:
275 * ASSERT(LOCKING_Read_rings_L2(d));
276 *
277 * checks that the lock state is sufficient, validating that one of the
278 * following must be true when executed: R(rings_L2) && R(L1)
279 * or: W(rings_L2) && R(L1)
280 * or: W(L1)
281 *
282 * The LOCKING macros defined below here are for use at verification points.
283 */
284 #define LOCKING_Write_L1 (rw_is_write_locked(&L1_global_argo_rwlock))
285 /*
286 * While LOCKING_Read_L1 will return true even if the lock is write-locked,
287 * that's OK because everywhere that a Read lock is needed with these macros,
288 * holding a Write lock there instead is OK too: we're checking that _at least_
289 * the specified level of locks are held.
290 */
291 #define LOCKING_Read_L1 (rw_is_locked(&L1_global_argo_rwlock))
292
293 #define LOCKING_Write_rings_L2(d) \
294 ((LOCKING_Read_L1 && rw_is_write_locked(&(d)->argo->rings_L2_rwlock)) || \
295 LOCKING_Write_L1)
296 /*
297 * Skip checking LOCKING_Write_rings_L2(d) within this LOCKING_Read_rings_L2
298 * definition because the first clause that is testing R(L1) && R(L2) will also
299 * return true if R(L1) && W(L2) is true, because of the way that rw_is_locked
300 * behaves. This results in a slightly shorter and faster implementation.
301 */
302 #define LOCKING_Read_rings_L2(d) \
303 ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock)) || \
304 LOCKING_Write_L1)
305 /*
306 * Skip checking LOCKING_Write_L1 within this LOCKING_L3 definition because
307 * LOCKING_Write_rings_L2(d) will return true for that condition.
308 */
309 #define LOCKING_L3(d, r) \
310 ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock) \
311 && spin_is_locked(&(r)->L3_lock)) || LOCKING_Write_rings_L2(d))
312
313 #define LOCKING_send_L2(d) \
314 ((LOCKING_Read_L1 && spin_is_locked(&(d)->argo->send_L2_lock)) || \
315 LOCKING_Write_L1)
316
317 /* Change this to #define ARGO_DEBUG here to enable more debug messages */
318 #undef ARGO_DEBUG
319
320 #ifdef ARGO_DEBUG
321 #define argo_dprintk(format, args...) printk("argo: " format, ## args )
322 #else
323 #define argo_dprintk(format, ... ) ((void)0)
324 #endif
325
326 /*
327 * This hash function is used to distribute rings within the per-domain
328 * hash tables (d->argo->ring_hash and d->argo_send_hash). The hash table
329 * will provide a struct if a match is found with a 'argo_ring_id' key:
330 * ie. the key is a (domain id, argo port, partner domain id) tuple.
331 * The algorithm approximates the string hashing function 'djb2'.
332 */
333 static unsigned int
hash_index(const struct argo_ring_id * id)334 hash_index(const struct argo_ring_id *id)
335 {
336 unsigned int hash = 5381; /* prime constant from djb2 */
337
338 /* For each input: hash = hash * 33 + <new input character value> */
339 hash = ((hash << 5) + hash) + (id->aport & 0xff);
340 hash = ((hash << 5) + hash) + ((id->aport >> 8) & 0xff);
341 hash = ((hash << 5) + hash) + ((id->aport >> 16) & 0xff);
342 hash = ((hash << 5) + hash) + ((id->aport >> 24) & 0xff);
343 hash = ((hash << 5) + hash) + (id->domain_id & 0xff);
344 hash = ((hash << 5) + hash) + ((id->domain_id >> 8) & 0xff);
345 hash = ((hash << 5) + hash) + (id->partner_id & 0xff);
346 hash = ((hash << 5) + hash) + ((id->partner_id >> 8) & 0xff);
347
348 /*
349 * Since ARGO_HASHTABLE_SIZE is small, use higher-order bits of the
350 * hash to contribute to the lower-order bits before masking off.
351 */
352 return (hash ^ (hash >> 15)) & (ARGO_HASHTABLE_SIZE - 1);
353 }
354
355 static struct argo_ring_info *
find_ring_info(const struct domain * d,const struct argo_ring_id * id)356 find_ring_info(const struct domain *d, const struct argo_ring_id *id)
357 {
358 struct argo_ring_info *ring_info;
359 const struct list_head *bucket;
360
361 ASSERT(LOCKING_Read_rings_L2(d));
362
363 /* List is not modified here. Search and return the match if found. */
364 bucket = &d->argo->ring_hash[hash_index(id)];
365
366 list_for_each_entry(ring_info, bucket, node)
367 {
368 const struct argo_ring_id *cmpid = &ring_info->id;
369
370 if ( cmpid->aport == id->aport &&
371 cmpid->domain_id == id->domain_id &&
372 cmpid->partner_id == id->partner_id )
373 {
374 argo_dprintk("found ring_info for ring(%u:%x %u)\n",
375 id->domain_id, id->aport, id->partner_id);
376 return ring_info;
377 }
378 }
379 argo_dprintk("no ring_info for ring(%u:%x %u)\n",
380 id->domain_id, id->aport, id->partner_id);
381
382 return NULL;
383 }
384
385 static struct argo_ring_info *
find_ring_info_by_match(const struct domain * d,xen_argo_port_t aport,domid_t partner_id)386 find_ring_info_by_match(const struct domain *d, xen_argo_port_t aport,
387 domid_t partner_id)
388 {
389 struct argo_ring_id id;
390 struct argo_ring_info *ring_info;
391
392 ASSERT(LOCKING_Read_rings_L2(d));
393
394 id.aport = aport;
395 id.domain_id = d->domain_id;
396 id.partner_id = partner_id;
397
398 ring_info = find_ring_info(d, &id);
399 if ( ring_info )
400 return ring_info;
401
402 id.partner_id = XEN_ARGO_DOMID_ANY;
403
404 return find_ring_info(d, &id);
405 }
406
407 static struct argo_send_info *
find_send_info(const struct domain * d,const struct argo_ring_id * id)408 find_send_info(const struct domain *d, const struct argo_ring_id *id)
409 {
410 struct argo_send_info *send_info;
411 const struct list_head *bucket;
412
413 ASSERT(LOCKING_send_L2(d));
414
415 /* List is not modified here. Search and return the match if found. */
416 bucket = &d->argo->send_hash[hash_index(id)];
417
418 list_for_each_entry(send_info, bucket, node)
419 {
420 const struct argo_ring_id *cmpid = &send_info->id;
421
422 if ( cmpid->aport == id->aport &&
423 cmpid->domain_id == id->domain_id &&
424 cmpid->partner_id == id->partner_id )
425 {
426 argo_dprintk("found send_info for ring(%u:%x %u)\n",
427 id->domain_id, id->aport, id->partner_id);
428 return send_info;
429 }
430 }
431 argo_dprintk("no send_info for ring(%u:%x %u)\n",
432 id->domain_id, id->aport, id->partner_id);
433
434 return NULL;
435 }
436
437 static void
signal_domain(struct domain * d)438 signal_domain(struct domain *d)
439 {
440 argo_dprintk("signalling domid:%u\n", d->domain_id);
441
442 send_guest_global_virq(d, VIRQ_ARGO);
443 }
444
445 static void
signal_domid(domid_t domain_id)446 signal_domid(domid_t domain_id)
447 {
448 struct domain *d = get_domain_by_id(domain_id);
449
450 if ( !d )
451 return;
452
453 signal_domain(d);
454 put_domain(d);
455 }
456
457 static void
ring_unmap(const struct domain * d,struct argo_ring_info * ring_info)458 ring_unmap(const struct domain *d, struct argo_ring_info *ring_info)
459 {
460 unsigned int i;
461
462 ASSERT(LOCKING_L3(d, ring_info));
463
464 if ( !ring_info->mfn_mapping )
465 return;
466
467 ASSERT(!ring_info->nmfns || ring_info->mfns);
468
469 for ( i = 0; i < ring_info->nmfns; i++ )
470 {
471 if ( !ring_info->mfn_mapping[i] )
472 continue;
473
474 ASSERT(!mfn_eq(ring_info->mfns[i], INVALID_MFN));
475 argo_dprintk(XENLOG_ERR "argo: unmapping page %"PRI_mfn" from %p\n",
476 mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
477
478 unmap_domain_page_global(ring_info->mfn_mapping[i]);
479 ring_info->mfn_mapping[i] = NULL;
480 }
481 }
482
483 static int
ring_map_page(const struct domain * d,struct argo_ring_info * ring_info,unsigned int i,void ** out_ptr)484 ring_map_page(const struct domain *d, struct argo_ring_info *ring_info,
485 unsigned int i, void **out_ptr)
486 {
487 ASSERT(LOCKING_L3(d, ring_info));
488
489 /*
490 * FIXME: Investigate using vmap to create a single contiguous virtual
491 * address space mapping of the ring instead of using the array of single
492 * page mappings.
493 * Affects logic in memcpy_to_guest_ring, the mfn_mapping array data
494 * structure, and places where ring mappings are added or removed.
495 */
496
497 if ( i >= ring_info->nmfns )
498 {
499 gprintk(XENLOG_ERR,
500 "argo: ring (vm%u:%x vm%u) %p attempted to map page %u of %u\n",
501 ring_info->id.domain_id, ring_info->id.aport,
502 ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
503 return -ENOMEM;
504 }
505 i = array_index_nospec(i, ring_info->nmfns);
506
507 if ( !ring_info->mfns || !ring_info->mfn_mapping )
508 {
509 ASSERT_UNREACHABLE();
510 ring_info->len = 0;
511 return -ENOMEM;
512 }
513
514 if ( !ring_info->mfn_mapping[i] )
515 {
516 ring_info->mfn_mapping[i] = map_domain_page_global(ring_info->mfns[i]);
517 if ( !ring_info->mfn_mapping[i] )
518 {
519 gprintk(XENLOG_ERR, "argo: ring (vm%u:%x vm%u) %p attempted to map "
520 "page %u of %u\n",
521 ring_info->id.domain_id, ring_info->id.aport,
522 ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
523 return -ENOMEM;
524 }
525 argo_dprintk("mapping page %"PRI_mfn" to %p\n",
526 mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
527 }
528
529 if ( out_ptr )
530 *out_ptr = ring_info->mfn_mapping[i];
531
532 return 0;
533 }
534
535 static void
update_tx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t tx_ptr)536 update_tx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
537 uint32_t tx_ptr)
538 {
539 xen_argo_ring_t *ringp;
540
541 ASSERT(LOCKING_L3(d, ring_info));
542 ASSERT(ring_info->mfn_mapping[0]);
543
544 ring_info->tx_ptr = tx_ptr;
545 ringp = ring_info->mfn_mapping[0];
546
547 write_atomic(&ringp->tx_ptr, tx_ptr);
548 smp_wmb();
549 }
550
551 static int
memcpy_to_guest_ring(const struct domain * d,struct argo_ring_info * ring_info,unsigned int offset,const void * src,XEN_GUEST_HANDLE (uint8)src_hnd,unsigned int len)552 memcpy_to_guest_ring(const struct domain *d, struct argo_ring_info *ring_info,
553 unsigned int offset,
554 const void *src, XEN_GUEST_HANDLE(uint8) src_hnd,
555 unsigned int len)
556 {
557 unsigned int mfns_index = offset >> PAGE_SHIFT;
558 void *dst;
559 int ret;
560 unsigned int src_offset = 0;
561
562 ASSERT(LOCKING_L3(d, ring_info));
563
564 offset &= ~PAGE_MASK;
565
566 if ( len + offset > XEN_ARGO_MAX_RING_SIZE )
567 return -EFAULT;
568
569 while ( len )
570 {
571 unsigned int head_len = (offset + len) > PAGE_SIZE ? PAGE_SIZE - offset
572 : len;
573
574 ret = ring_map_page(d, ring_info, mfns_index, &dst);
575 if ( ret )
576 return ret;
577
578 if ( src )
579 {
580 memcpy(dst + offset, src + src_offset, head_len);
581 src_offset += head_len;
582 }
583 else
584 {
585 if ( copy_from_guest(dst + offset, src_hnd, head_len) )
586 return -EFAULT;
587
588 guest_handle_add_offset(src_hnd, head_len);
589 }
590
591 mfns_index++;
592 len -= head_len;
593 offset = 0;
594 }
595
596 return 0;
597 }
598
599 /*
600 * Use this with caution: rx_ptr is under guest control and may be bogus.
601 * See get_sanitized_ring for a safer alternative.
602 */
603 static int
get_rx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t * rx_ptr)604 get_rx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
605 uint32_t *rx_ptr)
606 {
607 void *src;
608 xen_argo_ring_t *ringp;
609 int ret;
610
611 ASSERT(LOCKING_L3(d, ring_info));
612
613 if ( !ring_info->nmfns || ring_info->nmfns < NPAGES_RING(ring_info->len) )
614 return -EINVAL;
615
616 ret = ring_map_page(d, ring_info, 0, &src);
617 if ( ret )
618 return ret;
619
620 ringp = (xen_argo_ring_t *)src;
621
622 *rx_ptr = read_atomic(&ringp->rx_ptr);
623
624 return 0;
625 }
626
627 /*
628 * get_sanitized_ring creates a modified copy of the ring pointers where
629 * the rx_ptr is rounded up to ensure it is aligned, and then ring
630 * wrap is handled. Simplifies safe use of the rx_ptr for available
631 * space calculation.
632 */
633 static int
get_sanitized_ring(const struct domain * d,xen_argo_ring_t * ring,struct argo_ring_info * ring_info)634 get_sanitized_ring(const struct domain *d, xen_argo_ring_t *ring,
635 struct argo_ring_info *ring_info)
636 {
637 uint32_t rx_ptr;
638 int ret;
639
640 ASSERT(LOCKING_L3(d, ring_info));
641
642 ret = get_rx_ptr(d, ring_info, &rx_ptr);
643 if ( ret )
644 return ret;
645
646 ring->tx_ptr = ring_info->tx_ptr;
647
648 rx_ptr = ROUNDUP_MESSAGE(rx_ptr);
649 if ( rx_ptr >= ring_info->len )
650 rx_ptr = 0;
651
652 ring->rx_ptr = rx_ptr;
653
654 return 0;
655 }
656
657 static unsigned int
ringbuf_payload_space(const struct domain * d,struct argo_ring_info * ring_info)658 ringbuf_payload_space(const struct domain *d, struct argo_ring_info *ring_info)
659 {
660 xen_argo_ring_t ring;
661 unsigned int len;
662 int ret;
663
664 ASSERT(LOCKING_L3(d, ring_info));
665
666 len = ring_info->len;
667 if ( !len )
668 return 0;
669
670 if ( get_sanitized_ring(d, &ring, ring_info) )
671 return 0;
672
673 argo_dprintk("sanitized ringbuf_payload_space: tx_ptr=%u rx_ptr=%u\n",
674 ring.tx_ptr, ring.rx_ptr);
675
676 /*
677 * rx_ptr == tx_ptr means that the ring has been emptied.
678 * See message size checking logic in the entry to ringbuf_insert which
679 * ensures that there is always one message slot of size ROUNDUP_MESSAGE(1)
680 * left available, preventing a ring from being entirely filled.
681 * This ensures that matching ring indexes always indicate an empty ring
682 * and never a full one.
683 */
684 ret = ring.rx_ptr - ring.tx_ptr;
685 if ( ret <= 0 )
686 ret += len;
687
688 /*
689 * In a sanitized ring, we can rely on:
690 * (rx_ptr < ring_info->len) &&
691 * (tx_ptr < ring_info->len) &&
692 * (ring_info->len <= XEN_ARGO_MAX_RING_SIZE)
693 *
694 * and since: XEN_ARGO_MAX_RING_SIZE < INT32_MAX
695 * therefore right here: ret < INT32_MAX
696 * and we are safe to return it as a unsigned value from this function.
697 * The subtractions below cannot increase its value.
698 */
699
700 /*
701 * The maximum size payload for a message that will be accepted is:
702 * (the available space between the ring indexes)
703 * minus (space for a message header)
704 * minus (space for one message slot)
705 * since ringbuf_insert requires that one message slot be left
706 * unfilled, to avoid filling the ring to capacity and confusing a full
707 * ring with an empty one.
708 * Since the ring indexes are sanitized, the value in ret is aligned, so
709 * the simple subtraction here works to return the aligned value needed:
710 */
711 ret -= sizeof(struct xen_argo_ring_message_header);
712 ret -= ROUNDUP_MESSAGE(1);
713
714 return (ret < 0) ? 0 : ret;
715 }
716
717 /*
718 * iov_count returns its count on success via an out variable to avoid
719 * potential for a negative return value to be used incorrectly
720 * (eg. coerced into an unsigned variable resulting in a large incorrect value)
721 */
722 static int
iov_count(const xen_argo_iov_t * piov,unsigned int niov,unsigned int * count)723 iov_count(const xen_argo_iov_t *piov, unsigned int niov,
724 unsigned int *count)
725 {
726 unsigned int sum_iov_lens = 0;
727
728 if ( niov > XEN_ARGO_MAXIOV )
729 return -EINVAL;
730
731 for ( ; niov--; piov++ )
732 {
733 /* valid iovs must have the padding field set to zero */
734 if ( piov->pad )
735 {
736 argo_dprintk("invalid iov: padding is not zero\n");
737 return -EINVAL;
738 }
739
740 /* check each to protect sum against integer overflow */
741 if ( piov->iov_len > MAX_ARGO_MESSAGE_SIZE )
742 {
743 argo_dprintk("invalid iov_len: too big (%u)>%llu\n",
744 piov->iov_len, MAX_ARGO_MESSAGE_SIZE);
745 return -EINVAL;
746 }
747
748 sum_iov_lens += piov->iov_len;
749
750 /*
751 * Again protect sum from integer overflow
752 * and ensure total msg size will be within bounds.
753 */
754 if ( sum_iov_lens > MAX_ARGO_MESSAGE_SIZE )
755 {
756 argo_dprintk("invalid iov series: total message too big\n");
757 return -EMSGSIZE;
758 }
759 }
760
761 *count = sum_iov_lens;
762
763 return 0;
764 }
765
766 static int
ringbuf_insert(const struct domain * d,struct argo_ring_info * ring_info,const struct argo_ring_id * src_id,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type,unsigned int len)767 ringbuf_insert(const struct domain *d, struct argo_ring_info *ring_info,
768 const struct argo_ring_id *src_id, xen_argo_iov_t *iovs,
769 unsigned int niov, uint32_t message_type, unsigned int len)
770 {
771 xen_argo_ring_t ring;
772 struct xen_argo_ring_message_header mh = { };
773 int sp, ret;
774 xen_argo_iov_t *piov;
775 XEN_GUEST_HANDLE(uint8) NULL_hnd = { };
776
777 ASSERT(LOCKING_L3(d, ring_info));
778
779 /*
780 * Enforced below: no more than 'len' bytes of guest data
781 * (plus the message header) will be sent in this operation.
782 */
783
784 /*
785 * Upper bound check the message len against the ring size.
786 * The message must not fill the ring; there must be at least one slot
787 * remaining so we can distinguish a full ring from an empty one.
788 * iov_count has already verified: len <= MAX_ARGO_MESSAGE_SIZE.
789 */
790 if ( ring_info->len <= (sizeof(struct xen_argo_ring_message_header) +
791 ROUNDUP_MESSAGE(len)) )
792 return -EMSGSIZE;
793
794 ret = get_sanitized_ring(d, &ring, ring_info);
795 if ( ret )
796 return ret;
797
798 argo_dprintk("ring.tx_ptr=%u ring.rx_ptr=%u ring len=%u"
799 " ring_info->tx_ptr=%u\n",
800 ring.tx_ptr, ring.rx_ptr, ring_info->len, ring_info->tx_ptr);
801
802 if ( ring.rx_ptr == ring.tx_ptr )
803 sp = ring_info->len;
804 else
805 {
806 sp = ring.rx_ptr - ring.tx_ptr;
807 if ( sp < 0 )
808 sp += ring_info->len;
809 }
810
811 /*
812 * Size bounds check against currently available space in the ring.
813 * Again: the message must not fill the ring leaving no space remaining.
814 */
815 if ( (ROUNDUP_MESSAGE(len) +
816 sizeof(struct xen_argo_ring_message_header)) >= sp )
817 {
818 argo_dprintk("EAGAIN\n");
819 return -EAGAIN;
820 }
821
822 mh.len = len + sizeof(struct xen_argo_ring_message_header);
823 mh.source.aport = src_id->aport;
824 mh.source.domain_id = src_id->domain_id;
825 mh.message_type = message_type;
826
827 /*
828 * For this copy to the guest ring, tx_ptr is always 16-byte aligned
829 * and the message header is 16 bytes long.
830 */
831 BUILD_BUG_ON(
832 sizeof(struct xen_argo_ring_message_header) != ROUNDUP_MESSAGE(1));
833
834 /*
835 * First data write into the destination ring: fixed size, message header.
836 * This cannot overrun because the available free space (value in 'sp')
837 * is checked above and must be at least this size.
838 */
839 ret = memcpy_to_guest_ring(d, ring_info,
840 ring.tx_ptr + sizeof(xen_argo_ring_t),
841 &mh, NULL_hnd, sizeof(mh));
842 if ( ret )
843 {
844 gprintk(XENLOG_ERR,
845 "argo: failed to write message header to ring (vm%u:%x vm%u)\n",
846 ring_info->id.domain_id, ring_info->id.aport,
847 ring_info->id.partner_id);
848
849 return ret;
850 }
851
852 ring.tx_ptr += sizeof(mh);
853 if ( ring.tx_ptr == ring_info->len )
854 ring.tx_ptr = 0;
855
856 for ( piov = iovs; niov--; piov++ )
857 {
858 XEN_GUEST_HANDLE(uint8) buf_hnd = piov->iov_hnd;
859 unsigned int iov_len = piov->iov_len;
860
861 /* If no data is provided in this iov, moan and skip on to the next */
862 if ( !iov_len )
863 {
864 gprintk(XENLOG_WARNING,
865 "argo: no data iov_len=0 iov_hnd=%p ring (vm%u:%x vm%u)\n",
866 buf_hnd.p, ring_info->id.domain_id, ring_info->id.aport,
867 ring_info->id.partner_id);
868
869 continue;
870 }
871
872 if ( unlikely(!guest_handle_okay(buf_hnd, iov_len)) )
873 {
874 gprintk(XENLOG_ERR,
875 "argo: bad iov handle [%p, %u] (vm%u:%x vm%u)\n",
876 buf_hnd.p, iov_len,
877 ring_info->id.domain_id, ring_info->id.aport,
878 ring_info->id.partner_id);
879
880 return -EFAULT;
881 }
882
883 sp = ring_info->len - ring.tx_ptr;
884
885 /* Check: iov data size versus free space at the tail of the ring */
886 if ( iov_len > sp )
887 {
888 /*
889 * Second possible data write: ring-tail-wrap-write.
890 * Populate the ring tail and update the internal tx_ptr to handle
891 * wrapping at the end of ring.
892 * Size of data written here: sp
893 * which is the exact full amount of free space available at the
894 * tail of the ring, so this cannot overrun.
895 */
896 ret = memcpy_to_guest_ring(d, ring_info,
897 ring.tx_ptr + sizeof(xen_argo_ring_t),
898 NULL, buf_hnd, sp);
899 if ( ret )
900 {
901 gprintk(XENLOG_ERR,
902 "argo: failed to copy {%p, %d} (vm%u:%x vm%u)\n",
903 buf_hnd.p, sp,
904 ring_info->id.domain_id, ring_info->id.aport,
905 ring_info->id.partner_id);
906
907 return ret;
908 }
909
910 ring.tx_ptr = 0;
911 iov_len -= sp;
912 guest_handle_add_offset(buf_hnd, sp);
913
914 ASSERT(iov_len <= ring_info->len);
915 }
916
917 /*
918 * Third possible data write: all data remaining for this iov.
919 * Size of data written here: iov_len
920 *
921 * Case 1: if the ring-tail-wrap-write above was performed, then
922 * iov_len has been decreased by 'sp' and ring.tx_ptr is zero.
923 *
924 * We know from checking the result of iov_count:
925 * len + sizeof(message_header) <= ring_info->len
926 * We also know that len is the total of summing all iov_lens, so:
927 * iov_len <= len
928 * so by transitivity:
929 * iov_len <= len <= (ring_info->len - sizeof(msgheader))
930 * and therefore:
931 * (iov_len + sizeof(msgheader) <= ring_info->len) &&
932 * (ring.tx_ptr == 0)
933 * so this write cannot overrun here.
934 *
935 * Case 2: ring-tail-wrap-write above was not performed
936 * -> so iov_len is the guest-supplied value and: (iov_len <= sp)
937 * ie. less than available space at the tail of the ring:
938 * so this write cannot overrun.
939 */
940 ret = memcpy_to_guest_ring(d, ring_info,
941 ring.tx_ptr + sizeof(xen_argo_ring_t),
942 NULL, buf_hnd, iov_len);
943 if ( ret )
944 {
945 gprintk(XENLOG_ERR,
946 "argo: failed to copy [%p, %u] (vm%u:%x vm%u)\n",
947 buf_hnd.p, iov_len, ring_info->id.domain_id,
948 ring_info->id.aport, ring_info->id.partner_id);
949
950 return ret;
951 }
952
953 ring.tx_ptr += iov_len;
954
955 if ( ring.tx_ptr == ring_info->len )
956 ring.tx_ptr = 0;
957 }
958
959 /*
960 * Finished writing data from all iovs into the ring: now need to round up
961 * tx_ptr to align to the next message boundary, and then wrap if necessary.
962 */
963 ring.tx_ptr = ROUNDUP_MESSAGE(ring.tx_ptr);
964
965 if ( ring.tx_ptr >= ring_info->len )
966 ring.tx_ptr -= ring_info->len;
967
968 update_tx_ptr(d, ring_info, ring.tx_ptr);
969
970 /*
971 * At this point (and also on an error exit paths from this function) it is
972 * possible to unmap the ring_info, ie:
973 * ring_unmap(d, ring_info);
974 * but performance should be improved by not doing so, and retaining
975 * the mapping.
976 * An XSM policy control over level of confidentiality required
977 * versus performance cost could be added to decide that here.
978 */
979
980 return ret;
981 }
982
983 static void
wildcard_pending_list_remove(domid_t domain_id,struct pending_ent * ent)984 wildcard_pending_list_remove(domid_t domain_id, struct pending_ent *ent)
985 {
986 struct domain *d = get_domain_by_id(domain_id);
987
988 if ( !d )
989 return;
990
991 ASSERT(LOCKING_Read_L1);
992
993 if ( d->argo )
994 {
995 spin_lock(&d->argo->wildcard_L2_lock);
996 list_del(&ent->wildcard_node);
997 spin_unlock(&d->argo->wildcard_L2_lock);
998 }
999 put_domain(d);
1000 }
1001
1002 static void
wildcard_pending_list_insert(domid_t domain_id,struct pending_ent * ent)1003 wildcard_pending_list_insert(domid_t domain_id, struct pending_ent *ent)
1004 {
1005 struct domain *d = get_domain_by_id(domain_id);
1006
1007 if ( !d )
1008 return;
1009
1010 ASSERT(LOCKING_Read_L1);
1011
1012 if ( d->argo )
1013 {
1014 spin_lock(&d->argo->wildcard_L2_lock);
1015 list_add(&ent->wildcard_node, &d->argo->wildcard_pend_list);
1016 spin_unlock(&d->argo->wildcard_L2_lock);
1017 }
1018 put_domain(d);
1019 }
1020
1021 static void
pending_remove_all(const struct domain * d,struct argo_ring_info * ring_info)1022 pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info)
1023 {
1024 struct pending_ent *ent;
1025
1026 ASSERT(LOCKING_L3(d, ring_info));
1027
1028 /* Delete all pending notifications from this ring's list. */
1029 while ( (ent = list_first_entry_or_null(&ring_info->pending,
1030 struct pending_ent, node)) )
1031 {
1032 /* For wildcard rings, remove each from their wildcard list too. */
1033 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1034 wildcard_pending_list_remove(ent->domain_id, ent);
1035 list_del(&ent->node);
1036 xfree(ent);
1037 }
1038 ring_info->npending = 0;
1039 }
1040
1041 static void
pending_notify(struct list_head * to_notify)1042 pending_notify(struct list_head *to_notify)
1043 {
1044 struct pending_ent *ent;
1045
1046 ASSERT(LOCKING_Read_L1);
1047
1048 /* Sending signals for all ents in this list, draining until it is empty. */
1049 while ( (ent = list_first_entry_or_null(to_notify, struct pending_ent,
1050 node)) )
1051 {
1052 list_del(&ent->node);
1053 signal_domid(ent->domain_id);
1054 xfree(ent);
1055 }
1056 }
1057
1058 static void
pending_find(const struct domain * d,struct argo_ring_info * ring_info,unsigned int payload_space,struct list_head * to_notify)1059 pending_find(const struct domain *d, struct argo_ring_info *ring_info,
1060 unsigned int payload_space, struct list_head *to_notify)
1061 {
1062 struct pending_ent *ent, *next;
1063
1064 ASSERT(LOCKING_Read_rings_L2(d));
1065
1066 /*
1067 * TODO: Current policy here is to signal _all_ of the waiting domains
1068 * interested in sending a message of size less than payload_space.
1069 *
1070 * This is likely to be suboptimal, since once one of them has added
1071 * their message to the ring, there may well be insufficient room
1072 * available for any of the others to transmit, meaning that they were
1073 * woken in vain, which created extra work just to requeue their wait.
1074 *
1075 * Retain this simple policy for now since it at least avoids starving a
1076 * domain of available space notifications because of a policy that only
1077 * notified other domains instead. Improvement may be possible;
1078 * investigation required.
1079 */
1080 spin_lock(&ring_info->L3_lock);
1081
1082 /* Remove matching ents from the ring list, and add them to "to_notify" */
1083 list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1084 {
1085 if ( payload_space >= ent->len )
1086 {
1087 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1088 wildcard_pending_list_remove(ent->domain_id, ent);
1089
1090 list_del(&ent->node);
1091 ring_info->npending--;
1092 list_add(&ent->node, to_notify);
1093 }
1094 }
1095
1096 spin_unlock(&ring_info->L3_lock);
1097 }
1098
1099 static int
pending_queue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1100 pending_queue(const struct domain *d, struct argo_ring_info *ring_info,
1101 domid_t src_id, unsigned int len)
1102 {
1103 struct pending_ent *ent;
1104
1105 ASSERT(LOCKING_L3(d, ring_info));
1106
1107 if ( ring_info->npending >= MAX_PENDING_PER_RING )
1108 return -EBUSY;
1109
1110 ent = xmalloc(struct pending_ent);
1111 if ( !ent )
1112 return -ENOMEM;
1113
1114 ent->len = len;
1115 ent->domain_id = src_id;
1116 ent->ring_info = ring_info;
1117
1118 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1119 wildcard_pending_list_insert(src_id, ent);
1120 list_add(&ent->node, &ring_info->pending);
1121 ring_info->npending++;
1122
1123 return 0;
1124 }
1125
1126 static int
pending_requeue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1127 pending_requeue(const struct domain *d, struct argo_ring_info *ring_info,
1128 domid_t src_id, unsigned int len)
1129 {
1130 struct pending_ent *ent;
1131
1132 ASSERT(LOCKING_L3(d, ring_info));
1133
1134 /* List structure is not modified here. Update len in a match if found. */
1135 list_for_each_entry(ent, &ring_info->pending, node)
1136 {
1137 if ( ent->domain_id == src_id )
1138 {
1139 /*
1140 * Reuse an existing queue entry for a notification rather than add
1141 * another. If the existing entry is waiting for a smaller size than
1142 * the current message then adjust the record to wait for the
1143 * current (larger) size to be available before triggering a
1144 * notification.
1145 * This assists the waiting sender by ensuring that whenever a
1146 * notification is triggered, there is sufficient space available
1147 * for (at least) any one of the messages awaiting transmission.
1148 */
1149 if ( ent->len < len )
1150 ent->len = len;
1151
1152 return 0;
1153 }
1154 }
1155
1156 return pending_queue(d, ring_info, src_id, len);
1157 }
1158
1159 static void
pending_cancel(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id)1160 pending_cancel(const struct domain *d, struct argo_ring_info *ring_info,
1161 domid_t src_id)
1162 {
1163 struct pending_ent *ent, *next;
1164
1165 ASSERT(LOCKING_L3(d, ring_info));
1166
1167 /* Remove all ents where domain_id matches src_id from the ring's list. */
1168 list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1169 {
1170 if ( ent->domain_id == src_id )
1171 {
1172 /* For wildcard rings, remove each from their wildcard list too. */
1173 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1174 wildcard_pending_list_remove(ent->domain_id, ent);
1175 list_del(&ent->node);
1176 xfree(ent);
1177 ring_info->npending--;
1178 }
1179 }
1180 }
1181
1182 static void
wildcard_rings_pending_remove(struct domain * d)1183 wildcard_rings_pending_remove(struct domain *d)
1184 {
1185 struct pending_ent *ent;
1186
1187 ASSERT(LOCKING_Write_L1);
1188
1189 /* Delete all pending signals to the domain about wildcard rings. */
1190 while ( (ent = list_first_entry_or_null(&d->argo->wildcard_pend_list,
1191 struct pending_ent, node)) )
1192 {
1193 /*
1194 * The ent->node deleted here, and the npending value decreased,
1195 * belong to the ring_info of another domain, which is why this
1196 * function requires holding W(L1):
1197 * it implies the L3 lock that protects that ring_info struct.
1198 */
1199 ent->ring_info->npending--;
1200 list_del(&ent->node);
1201 list_del(&ent->wildcard_node);
1202 xfree(ent);
1203 }
1204 }
1205
1206 static void
ring_remove_mfns(const struct domain * d,struct argo_ring_info * ring_info)1207 ring_remove_mfns(const struct domain *d, struct argo_ring_info *ring_info)
1208 {
1209 unsigned int i;
1210
1211 ASSERT(LOCKING_Write_rings_L2(d));
1212
1213 if ( !ring_info->mfns )
1214 return;
1215
1216 if ( !ring_info->mfn_mapping )
1217 {
1218 ASSERT_UNREACHABLE();
1219 return;
1220 }
1221
1222 ring_unmap(d, ring_info);
1223
1224 for ( i = 0; i < ring_info->nmfns; i++ )
1225 if ( !mfn_eq(ring_info->mfns[i], INVALID_MFN) )
1226 put_page_and_type(mfn_to_page(ring_info->mfns[i]));
1227
1228 ring_info->nmfns = 0;
1229 XFREE(ring_info->mfns);
1230 XFREE(ring_info->mfn_mapping);
1231 }
1232
1233 static void
ring_remove_info(const struct domain * d,struct argo_ring_info * ring_info)1234 ring_remove_info(const struct domain *d, struct argo_ring_info *ring_info)
1235 {
1236 ASSERT(LOCKING_Write_rings_L2(d));
1237
1238 pending_remove_all(d, ring_info);
1239 list_del(&ring_info->node);
1240 ring_remove_mfns(d, ring_info);
1241 xfree(ring_info);
1242 }
1243
1244 static void
domain_rings_remove_all(struct domain * d)1245 domain_rings_remove_all(struct domain *d)
1246 {
1247 unsigned int i;
1248
1249 ASSERT(LOCKING_Write_rings_L2(d));
1250
1251 for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1252 {
1253 struct argo_ring_info *ring_info;
1254 struct list_head *bucket = &d->argo->ring_hash[i];
1255
1256 while ( (ring_info = list_first_entry_or_null(bucket,
1257 struct argo_ring_info,
1258 node)) )
1259 ring_remove_info(d, ring_info);
1260 }
1261 d->argo->ring_count = 0;
1262 }
1263
1264 /*
1265 * Tear down all rings of other domains where src_d domain is the partner.
1266 * (ie. it is the single domain that can send to those rings.)
1267 * This will also cancel any pending notifications about those rings.
1268 */
1269 static void
partner_rings_remove(struct domain * src_d)1270 partner_rings_remove(struct domain *src_d)
1271 {
1272 unsigned int i;
1273
1274 ASSERT(LOCKING_Write_L1);
1275
1276 for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1277 {
1278 struct argo_send_info *send_info;
1279 struct list_head *bucket = &src_d->argo->send_hash[i];
1280
1281 /* Remove all ents from the send list. Take each off their ring list. */
1282 while ( (send_info = list_first_entry_or_null(bucket,
1283 struct argo_send_info,
1284 node)) )
1285 {
1286 struct domain *dst_d = get_domain_by_id(send_info->id.domain_id);
1287
1288 if ( dst_d && dst_d->argo )
1289 {
1290 struct argo_ring_info *ring_info =
1291 find_ring_info(dst_d, &send_info->id);
1292
1293 if ( ring_info )
1294 {
1295 ring_remove_info(dst_d, ring_info);
1296 dst_d->argo->ring_count--;
1297 }
1298 else
1299 ASSERT_UNREACHABLE();
1300 }
1301 else
1302 ASSERT_UNREACHABLE();
1303
1304 if ( dst_d )
1305 put_domain(dst_d);
1306
1307 list_del(&send_info->node);
1308 xfree(send_info);
1309 }
1310 }
1311 }
1312
1313 static int
fill_ring_data(const struct domain * currd,XEN_GUEST_HANDLE (xen_argo_ring_data_ent_t)data_ent_hnd)1314 fill_ring_data(const struct domain *currd,
1315 XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) data_ent_hnd)
1316 {
1317 xen_argo_ring_data_ent_t ent;
1318 struct domain *dst_d;
1319 struct argo_ring_info *ring_info;
1320 int ret = 0;
1321
1322 ASSERT(currd == current->domain);
1323 ASSERT(LOCKING_Read_L1);
1324
1325 if ( __copy_from_guest(&ent, data_ent_hnd, 1) )
1326 return -EFAULT;
1327
1328 argo_dprintk("fill_ring_data: ent.ring.domain=%u,ent.ring.aport=%x\n",
1329 ent.ring.domain_id, ent.ring.aport);
1330
1331 ent.flags = 0;
1332
1333 dst_d = get_domain_by_id(ent.ring.domain_id);
1334 if ( !dst_d || !dst_d->argo )
1335 goto out;
1336
1337 /*
1338 * Don't supply information about rings that a guest is not
1339 * allowed to send to.
1340 */
1341 ret = xsm_argo_send(currd, dst_d);
1342 if ( ret )
1343 {
1344 put_domain(dst_d);
1345 return ret;
1346 }
1347
1348 read_lock(&dst_d->argo->rings_L2_rwlock);
1349
1350 ring_info = find_ring_info_by_match(dst_d, ent.ring.aport,
1351 currd->domain_id);
1352 if ( ring_info )
1353 {
1354 unsigned int space_avail;
1355
1356 ent.flags |= XEN_ARGO_RING_EXISTS;
1357
1358 spin_lock(&ring_info->L3_lock);
1359
1360 ent.max_message_size = ring_info->len -
1361 sizeof(struct xen_argo_ring_message_header) -
1362 ROUNDUP_MESSAGE(1);
1363
1364 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1365 ent.flags |= XEN_ARGO_RING_SHARED;
1366
1367 space_avail = ringbuf_payload_space(dst_d, ring_info);
1368
1369 argo_dprintk("fill_ring_data: aport=%x space_avail=%u"
1370 " space_wanted=%u\n",
1371 ring_info->id.aport, space_avail, ent.space_required);
1372
1373 /* Do not queue a notification for an unachievable size */
1374 if ( ent.space_required > ent.max_message_size )
1375 ent.flags |= XEN_ARGO_RING_EMSGSIZE;
1376 else if ( space_avail >= ent.space_required )
1377 {
1378 pending_cancel(dst_d, ring_info, currd->domain_id);
1379 ent.flags |= XEN_ARGO_RING_SUFFICIENT;
1380 }
1381 else
1382 {
1383 ret = pending_requeue(dst_d, ring_info, currd->domain_id,
1384 ent.space_required);
1385 if ( ret == -EBUSY )
1386 {
1387 /*
1388 * Too many other domains are already awaiting notification
1389 * about available space on this ring. Indicate this state via
1390 * flag. No need to return an error to the caller; allow the
1391 * processing of queries about other rings to continue.
1392 */
1393 ent.flags |= XEN_ARGO_RING_EBUSY;
1394 ret = 0;
1395 }
1396 }
1397
1398 spin_unlock(&ring_info->L3_lock);
1399
1400 if ( space_avail == ent.max_message_size )
1401 ent.flags |= XEN_ARGO_RING_EMPTY;
1402
1403 }
1404 read_unlock(&dst_d->argo->rings_L2_rwlock);
1405
1406 out:
1407 if ( dst_d )
1408 put_domain(dst_d);
1409
1410 if ( !ret && (__copy_field_to_guest(data_ent_hnd, &ent, flags) ||
1411 __copy_field_to_guest(data_ent_hnd, &ent, max_message_size)) )
1412 return -EFAULT;
1413
1414 return ret;
1415 }
1416
1417 static int
find_ring_mfn(struct domain * d,gfn_t gfn,mfn_t * mfn)1418 find_ring_mfn(struct domain *d, gfn_t gfn, mfn_t *mfn)
1419 {
1420 struct page_info *page;
1421 p2m_type_t p2mt;
1422 int ret;
1423
1424 ret = check_get_page_from_gfn(d, gfn, false, &p2mt, &page);
1425 if ( unlikely(ret) )
1426 return ret;
1427
1428 *mfn = page_to_mfn(page);
1429 if ( !mfn_valid(*mfn) )
1430 ret = -EINVAL;
1431 #ifdef CONFIG_X86
1432 else if ( p2mt == p2m_ram_logdirty )
1433 ret = -EAGAIN;
1434 #endif
1435 else if ( (p2mt != p2m_ram_rw) ||
1436 !get_page_and_type(page, d, PGT_writable_page) )
1437 ret = -EINVAL;
1438
1439 put_page(page);
1440
1441 return ret;
1442 }
1443
1444 static int
find_ring_mfns(struct domain * d,struct argo_ring_info * ring_info,const unsigned int npage,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,const unsigned int len)1445 find_ring_mfns(struct domain *d, struct argo_ring_info *ring_info,
1446 const unsigned int npage,
1447 XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1448 const unsigned int len)
1449 {
1450 unsigned int i;
1451 int ret = 0;
1452 mfn_t *mfns;
1453 void **mfn_mapping;
1454
1455 ASSERT(LOCKING_Write_rings_L2(d));
1456
1457 if ( ring_info->mfns )
1458 {
1459 /* Ring already existed: drop the previous mapping. */
1460 argo_dprintk("argo: vm%u re-register existing ring "
1461 "(vm%u:%x vm%u) clears mapping\n",
1462 d->domain_id, ring_info->id.domain_id,
1463 ring_info->id.aport, ring_info->id.partner_id);
1464
1465 ring_remove_mfns(d, ring_info);
1466 ASSERT(!ring_info->mfns);
1467 }
1468
1469 mfns = xmalloc_array(mfn_t, npage);
1470 if ( !mfns )
1471 return -ENOMEM;
1472
1473 for ( i = 0; i < npage; i++ )
1474 mfns[i] = INVALID_MFN;
1475
1476 mfn_mapping = xzalloc_array(void *, npage);
1477 if ( !mfn_mapping )
1478 {
1479 xfree(mfns);
1480 return -ENOMEM;
1481 }
1482
1483 ring_info->mfns = mfns;
1484 ring_info->mfn_mapping = mfn_mapping;
1485
1486 for ( i = 0; i < npage; i++ )
1487 {
1488 mfn_t mfn;
1489 xen_argo_gfn_t argo_gfn;
1490
1491 ret = __copy_from_guest_offset(&argo_gfn, gfn_hnd, i, 1) ? -EFAULT : 0;
1492 if ( ret )
1493 break;
1494
1495 ret = find_ring_mfn(d, _gfn(argo_gfn), &mfn);
1496 if ( ret )
1497 {
1498 gprintk(XENLOG_ERR, "argo: vm%u: invalid gfn %"PRI_gfn" "
1499 "r:(vm%u:%x vm%u) %p %u/%u\n",
1500 d->domain_id, gfn_x(_gfn(argo_gfn)),
1501 ring_info->id.domain_id, ring_info->id.aport,
1502 ring_info->id.partner_id, ring_info, i, npage);
1503 break;
1504 }
1505
1506 ring_info->mfns[i] = mfn;
1507
1508 argo_dprintk("%u: %"PRI_gfn" -> %"PRI_mfn"\n",
1509 i, gfn_x(_gfn(argo_gfn)), mfn_x(ring_info->mfns[i]));
1510 }
1511
1512 ring_info->nmfns = i;
1513
1514 if ( ret )
1515 ring_remove_mfns(d, ring_info);
1516 else
1517 {
1518 ASSERT(ring_info->nmfns == NPAGES_RING(len));
1519
1520 argo_dprintk("argo: vm%u ring (vm%u:%x vm%u) %p "
1521 "mfn_mapping %p len %u nmfns %u\n",
1522 d->domain_id, ring_info->id.domain_id,
1523 ring_info->id.aport, ring_info->id.partner_id, ring_info,
1524 ring_info->mfn_mapping, ring_info->len, ring_info->nmfns);
1525 }
1526
1527 return ret;
1528 }
1529
1530 static long
unregister_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_unregister_ring_t)unreg_hnd)1531 unregister_ring(struct domain *currd,
1532 XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd)
1533 {
1534 xen_argo_unregister_ring_t unreg;
1535 struct argo_ring_id ring_id;
1536 struct argo_ring_info *ring_info = NULL;
1537 struct argo_send_info *send_info = NULL;
1538 struct domain *dst_d = NULL;
1539
1540 ASSERT(currd == current->domain);
1541
1542 if ( copy_from_guest(&unreg, unreg_hnd, 1) )
1543 return -EFAULT;
1544
1545 if ( unreg.pad )
1546 return -EINVAL;
1547
1548 ring_id.partner_id = unreg.partner_id;
1549 ring_id.aport = unreg.aport;
1550 ring_id.domain_id = currd->domain_id;
1551
1552 read_lock(&L1_global_argo_rwlock);
1553
1554 if ( unlikely(!currd->argo) )
1555 {
1556 read_unlock(&L1_global_argo_rwlock);
1557 return -ENODEV;
1558 }
1559
1560 write_lock(&currd->argo->rings_L2_rwlock);
1561
1562 ring_info = find_ring_info(currd, &ring_id);
1563 if ( !ring_info )
1564 goto out;
1565
1566 ring_remove_info(currd, ring_info);
1567 currd->argo->ring_count--;
1568
1569 if ( ring_id.partner_id == XEN_ARGO_DOMID_ANY )
1570 goto out;
1571
1572 dst_d = get_domain_by_id(ring_id.partner_id);
1573 if ( !dst_d || !dst_d->argo )
1574 {
1575 ASSERT_UNREACHABLE();
1576 goto out;
1577 }
1578
1579 spin_lock(&dst_d->argo->send_L2_lock);
1580
1581 send_info = find_send_info(dst_d, &ring_id);
1582 if ( send_info )
1583 list_del(&send_info->node);
1584 else
1585 ASSERT_UNREACHABLE();
1586
1587 spin_unlock(&dst_d->argo->send_L2_lock);
1588
1589 out:
1590 write_unlock(&currd->argo->rings_L2_rwlock);
1591
1592 read_unlock(&L1_global_argo_rwlock);
1593
1594 if ( dst_d )
1595 put_domain(dst_d);
1596
1597 xfree(send_info);
1598
1599 if ( !ring_info )
1600 {
1601 argo_dprintk("unregister_ring: no ring_info found for ring(%u:%x %u)\n",
1602 ring_id.domain_id, ring_id.aport, ring_id.partner_id);
1603 return -ENOENT;
1604 }
1605
1606 return 0;
1607 }
1608
1609 static long
register_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_register_ring_t)reg_hnd,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,unsigned int npage,unsigned int flags)1610 register_ring(struct domain *currd,
1611 XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd,
1612 XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1613 unsigned int npage, unsigned int flags)
1614 {
1615 xen_argo_register_ring_t reg;
1616 struct argo_ring_id ring_id;
1617 void *map_ringp;
1618 xen_argo_ring_t *ringp;
1619 struct argo_ring_info *ring_info, *new_ring_info = NULL;
1620 struct argo_send_info *send_info = NULL;
1621 struct domain *dst_d = NULL;
1622 int ret = 0;
1623 unsigned int private_tx_ptr;
1624
1625 ASSERT(currd == current->domain);
1626
1627 /* flags: reserve currently-undefined bits, require zero. */
1628 if ( unlikely(flags & ~XEN_ARGO_REGISTER_FLAG_MASK) )
1629 return -EINVAL;
1630
1631 if ( copy_from_guest(®, reg_hnd, 1) )
1632 return -EFAULT;
1633
1634 /*
1635 * A ring must be large enough to transmit messages, so requires space for:
1636 * * 1 message header, plus
1637 * * 1 payload slot (payload is always rounded to a multiple of 16 bytes)
1638 * for the message payload to be written into, plus
1639 * * 1 more slot, so that the ring cannot be filled to capacity with a
1640 * single minimum-size message -- see the logic in ringbuf_insert --
1641 * allowing for this ensures that there can be space remaining when a
1642 * message is present.
1643 * The above determines the minimum acceptable ring size.
1644 */
1645 if ( (reg.len < (sizeof(struct xen_argo_ring_message_header)
1646 + ROUNDUP_MESSAGE(1) + ROUNDUP_MESSAGE(1))) ||
1647 (reg.len > XEN_ARGO_MAX_RING_SIZE) ||
1648 (reg.len != ROUNDUP_MESSAGE(reg.len)) ||
1649 (NPAGES_RING(reg.len) != npage) ||
1650 (reg.pad != 0) )
1651 return -EINVAL;
1652
1653 ring_id.partner_id = reg.partner_id;
1654 ring_id.aport = reg.aport;
1655 ring_id.domain_id = currd->domain_id;
1656
1657 if ( reg.partner_id == XEN_ARGO_DOMID_ANY )
1658 {
1659 ret = opt_argo_mac_permissive ? xsm_argo_register_any_source(currd) :
1660 -EPERM;
1661 if ( ret )
1662 return ret;
1663 }
1664 else
1665 {
1666 dst_d = get_domain_by_id(reg.partner_id);
1667 if ( !dst_d )
1668 {
1669 argo_dprintk("!dst_d, ESRCH\n");
1670 return -ESRCH;
1671 }
1672
1673 ret = xsm_argo_register_single_source(currd, dst_d);
1674 if ( ret )
1675 goto out;
1676
1677 send_info = xzalloc(struct argo_send_info);
1678 if ( !send_info )
1679 {
1680 ret = -ENOMEM;
1681 goto out;
1682 }
1683 send_info->id = ring_id;
1684 }
1685
1686 /*
1687 * Common case is that the ring doesn't already exist, so do the alloc here
1688 * before picking up any locks.
1689 */
1690 new_ring_info = xzalloc(struct argo_ring_info);
1691 if ( !new_ring_info )
1692 {
1693 ret = -ENOMEM;
1694 goto out;
1695 }
1696
1697 read_lock(&L1_global_argo_rwlock);
1698
1699 if ( !currd->argo )
1700 {
1701 ret = -ENODEV;
1702 goto out_unlock;
1703 }
1704
1705 if ( dst_d && !dst_d->argo )
1706 {
1707 argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
1708 ret = -ECONNREFUSED;
1709 goto out_unlock;
1710 }
1711
1712 write_lock(&currd->argo->rings_L2_rwlock);
1713
1714 if ( currd->argo->ring_count >= MAX_RINGS_PER_DOMAIN )
1715 {
1716 ret = -ENOSPC;
1717 goto out_unlock2;
1718 }
1719
1720 ring_info = find_ring_info(currd, &ring_id);
1721 if ( !ring_info )
1722 {
1723 ring_info = new_ring_info;
1724 new_ring_info = NULL;
1725
1726 spin_lock_init(&ring_info->L3_lock);
1727
1728 ring_info->id = ring_id;
1729 INIT_LIST_HEAD(&ring_info->pending);
1730
1731 list_add(&ring_info->node,
1732 &currd->argo->ring_hash[hash_index(&ring_info->id)]);
1733
1734 argo_dprintk("argo: vm%u registering ring (vm%u:%x vm%u)\n",
1735 currd->domain_id, ring_id.domain_id, ring_id.aport,
1736 ring_id.partner_id);
1737 }
1738 else if ( ring_info->len )
1739 {
1740 /*
1741 * If the caller specified that the ring must not already exist,
1742 * fail at attempt to add a completed ring which already exists.
1743 */
1744 if ( flags & XEN_ARGO_REGISTER_FLAG_FAIL_EXIST )
1745 {
1746 gprintk(XENLOG_ERR, "argo: vm%u disallowed reregistration of "
1747 "existing ring (vm%u:%x vm%u)\n",
1748 currd->domain_id, ring_id.domain_id, ring_id.aport,
1749 ring_id.partner_id);
1750 ret = -EEXIST;
1751 goto out_unlock2;
1752 }
1753
1754 if ( ring_info->len != reg.len )
1755 {
1756 /*
1757 * Change of ring size could result in entries on the pending
1758 * notifications list that will never trigger.
1759 * Simple blunt solution: disallow ring resize for now.
1760 * TODO: investigate enabling ring resize.
1761 */
1762 gprintk(XENLOG_ERR, "argo: vm%u attempted to change ring size "
1763 "(vm%u:%x vm%u)\n",
1764 currd->domain_id, ring_id.domain_id, ring_id.aport,
1765 ring_id.partner_id);
1766 /*
1767 * Could return EINVAL here, but if the ring didn't already
1768 * exist then the arguments would have been valid, so: EEXIST.
1769 */
1770 ret = -EEXIST;
1771 goto out_unlock2;
1772 }
1773
1774 argo_dprintk("argo: vm%u re-registering existing ring (vm%u:%x vm%u)\n",
1775 currd->domain_id, ring_id.domain_id, ring_id.aport,
1776 ring_id.partner_id);
1777 }
1778
1779 ret = find_ring_mfns(currd, ring_info, npage, gfn_hnd, reg.len);
1780 if ( ret )
1781 {
1782 gprintk(XENLOG_ERR,
1783 "argo: vm%u failed to find ring mfns (vm%u:%x vm%u)\n",
1784 currd->domain_id, ring_id.domain_id, ring_id.aport,
1785 ring_id.partner_id);
1786
1787 ring_remove_info(currd, ring_info);
1788 goto out_unlock2;
1789 }
1790
1791 /*
1792 * The first page of the memory supplied for the ring has the xen_argo_ring
1793 * structure at its head, which is where the ring indexes reside.
1794 */
1795 ret = ring_map_page(currd, ring_info, 0, &map_ringp);
1796 if ( ret )
1797 {
1798 gprintk(XENLOG_ERR,
1799 "argo: vm%u failed to map ring mfn 0 (vm%u:%x vm%u)\n",
1800 currd->domain_id, ring_id.domain_id, ring_id.aport,
1801 ring_id.partner_id);
1802
1803 ring_remove_info(currd, ring_info);
1804 goto out_unlock2;
1805 }
1806 ringp = map_ringp;
1807
1808 private_tx_ptr = read_atomic(&ringp->tx_ptr);
1809
1810 if ( (private_tx_ptr >= reg.len) ||
1811 (ROUNDUP_MESSAGE(private_tx_ptr) != private_tx_ptr) )
1812 {
1813 /*
1814 * Since the ring is a mess, attempt to flush the contents of it
1815 * here by setting the tx_ptr to the next aligned message slot past
1816 * the latest rx_ptr we have observed. Handle ring wrap correctly.
1817 */
1818 private_tx_ptr = ROUNDUP_MESSAGE(read_atomic(&ringp->rx_ptr));
1819
1820 if ( private_tx_ptr >= reg.len )
1821 private_tx_ptr = 0;
1822
1823 update_tx_ptr(currd, ring_info, private_tx_ptr);
1824 }
1825
1826 ring_info->tx_ptr = private_tx_ptr;
1827 ring_info->len = reg.len;
1828 currd->argo->ring_count++;
1829
1830 if ( send_info )
1831 {
1832 spin_lock(&dst_d->argo->send_L2_lock);
1833
1834 list_add(&send_info->node,
1835 &dst_d->argo->send_hash[hash_index(&send_info->id)]);
1836
1837 spin_unlock(&dst_d->argo->send_L2_lock);
1838 }
1839
1840 out_unlock2:
1841 write_unlock(&currd->argo->rings_L2_rwlock);
1842
1843 out_unlock:
1844 read_unlock(&L1_global_argo_rwlock);
1845
1846 out:
1847 if ( dst_d )
1848 put_domain(dst_d);
1849
1850 if ( ret )
1851 xfree(send_info);
1852
1853 xfree(new_ring_info);
1854
1855 return ret;
1856 }
1857
1858 static void
notify_ring(const struct domain * d,struct argo_ring_info * ring_info,struct list_head * to_notify)1859 notify_ring(const struct domain *d, struct argo_ring_info *ring_info,
1860 struct list_head *to_notify)
1861 {
1862 unsigned int space;
1863
1864 ASSERT(LOCKING_Read_rings_L2(d));
1865
1866 spin_lock(&ring_info->L3_lock);
1867
1868 if ( ring_info->len )
1869 space = ringbuf_payload_space(d, ring_info);
1870 else
1871 space = 0;
1872
1873 spin_unlock(&ring_info->L3_lock);
1874
1875 if ( space )
1876 pending_find(d, ring_info, space, to_notify);
1877 }
1878
1879 static void
notify_check_pending(struct domain * d)1880 notify_check_pending(struct domain *d)
1881 {
1882 unsigned int i;
1883 LIST_HEAD(to_notify);
1884
1885 ASSERT(LOCKING_Read_L1);
1886
1887 read_lock(&d->argo->rings_L2_rwlock);
1888
1889 /* Walk all rings, call notify_ring on each to populate to_notify list */
1890 for ( i = 0; i < ARGO_HASHTABLE_SIZE; i++ )
1891 {
1892 struct argo_ring_info *ring_info, *next;
1893 struct list_head *bucket = &d->argo->ring_hash[i];
1894
1895 list_for_each_entry_safe(ring_info, next, bucket, node)
1896 notify_ring(d, ring_info, &to_notify);
1897 }
1898
1899 read_unlock(&d->argo->rings_L2_rwlock);
1900
1901 if ( !list_empty(&to_notify) )
1902 pending_notify(&to_notify);
1903 }
1904
1905 static long
notify(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_ring_data_t)ring_data_hnd)1906 notify(struct domain *currd,
1907 XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd)
1908 {
1909 XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) ent_hnd;
1910 xen_argo_ring_data_t ring_data;
1911 int ret = 0;
1912
1913 ASSERT(currd == current->domain);
1914
1915 read_lock(&L1_global_argo_rwlock);
1916
1917 if ( !currd->argo )
1918 {
1919 argo_dprintk("!d->argo, ENODEV\n");
1920 ret = -ENODEV;
1921 goto out;
1922 }
1923
1924 notify_check_pending(currd);
1925
1926 if ( guest_handle_is_null(ring_data_hnd) )
1927 goto out;
1928
1929 ret = copy_from_guest(&ring_data, ring_data_hnd, 1) ? -EFAULT : 0;
1930 if ( ret )
1931 goto out;
1932
1933 if ( ring_data.nent > MAX_NOTIFY_COUNT )
1934 {
1935 gprintk(XENLOG_ERR, "argo: notify entry count(%u) exceeds max(%u)\n",
1936 ring_data.nent, MAX_NOTIFY_COUNT);
1937 ret = -EACCES;
1938 goto out;
1939 }
1940
1941 ent_hnd = guest_handle_for_field(ring_data_hnd,
1942 xen_argo_ring_data_ent_t, data[0]);
1943 if ( unlikely(!guest_handle_okay(ent_hnd, ring_data.nent)) )
1944 {
1945 ret = -EFAULT;
1946 goto out;
1947 }
1948
1949 while ( !ret && ring_data.nent-- )
1950 {
1951 ret = fill_ring_data(currd, ent_hnd);
1952 guest_handle_add_offset(ent_hnd, 1);
1953 }
1954
1955 out:
1956 read_unlock(&L1_global_argo_rwlock);
1957
1958 return ret;
1959 }
1960
1961 static long
sendv(struct domain * src_d,xen_argo_addr_t * src_addr,const xen_argo_addr_t * dst_addr,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type)1962 sendv(struct domain *src_d, xen_argo_addr_t *src_addr,
1963 const xen_argo_addr_t *dst_addr, xen_argo_iov_t *iovs, unsigned int niov,
1964 uint32_t message_type)
1965 {
1966 struct domain *dst_d = NULL;
1967 struct argo_ring_id src_id;
1968 struct argo_ring_info *ring_info;
1969 int ret = 0;
1970 unsigned int len = 0;
1971
1972 argo_dprintk("sendv: (%u:%x)->(%u:%x) niov:%u type:%x\n",
1973 src_addr->domain_id, src_addr->aport, dst_addr->domain_id,
1974 dst_addr->aport, niov, message_type);
1975
1976 /* Check padding is zeroed. */
1977 if ( unlikely(src_addr->pad || dst_addr->pad) )
1978 return -EINVAL;
1979
1980 if ( src_addr->domain_id == XEN_ARGO_DOMID_ANY )
1981 src_addr->domain_id = src_d->domain_id;
1982
1983 /* No domain is currently authorized to send on behalf of another */
1984 if ( unlikely(src_addr->domain_id != src_d->domain_id) )
1985 return -EPERM;
1986
1987 src_id.aport = src_addr->aport;
1988 src_id.domain_id = src_d->domain_id;
1989 src_id.partner_id = dst_addr->domain_id;
1990
1991 dst_d = get_domain_by_id(dst_addr->domain_id);
1992 if ( !dst_d )
1993 return -ESRCH;
1994
1995 ret = xsm_argo_send(src_d, dst_d);
1996 if ( ret )
1997 {
1998 gprintk(XENLOG_ERR, "argo: XSM REJECTED %i -> %i\n",
1999 src_d->domain_id, dst_d->domain_id);
2000
2001 put_domain(dst_d);
2002
2003 return ret;
2004 }
2005
2006 read_lock(&L1_global_argo_rwlock);
2007
2008 if ( !src_d->argo )
2009 {
2010 ret = -ENODEV;
2011 goto out_unlock;
2012 }
2013
2014 if ( !dst_d->argo )
2015 {
2016 argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
2017 ret = -ECONNREFUSED;
2018 goto out_unlock;
2019 }
2020
2021 read_lock(&dst_d->argo->rings_L2_rwlock);
2022
2023 ring_info = find_ring_info_by_match(dst_d, dst_addr->aport,
2024 src_id.domain_id);
2025 if ( !ring_info )
2026 {
2027 gprintk(XENLOG_ERR,
2028 "argo: vm%u connection refused, src (vm%u:%x) dst (vm%u:%x)\n",
2029 current->domain->domain_id, src_id.domain_id, src_id.aport,
2030 dst_addr->domain_id, dst_addr->aport);
2031
2032 ret = -ECONNREFUSED;
2033 }
2034 else
2035 {
2036 spin_lock(&ring_info->L3_lock);
2037
2038 /*
2039 * Obtain the total size of data to transmit -- sets the 'len' variable
2040 * -- and sanity check that the iovs conform to size and number limits.
2041 */
2042 ret = iov_count(iovs, niov, &len);
2043 if ( !ret )
2044 {
2045 ret = ringbuf_insert(dst_d, ring_info, &src_id, iovs, niov,
2046 message_type, len);
2047 if ( ret == -EAGAIN )
2048 {
2049 int rc;
2050
2051 argo_dprintk("argo_ringbuf_sendv failed, EAGAIN\n");
2052 /* requeue to issue a notification when space is there */
2053 rc = pending_requeue(dst_d, ring_info, src_id.domain_id, len);
2054 if ( rc )
2055 ret = rc;
2056 }
2057 }
2058
2059 spin_unlock(&ring_info->L3_lock);
2060 }
2061
2062 read_unlock(&dst_d->argo->rings_L2_rwlock);
2063
2064 out_unlock:
2065 read_unlock(&L1_global_argo_rwlock);
2066
2067 if ( ret >= 0 )
2068 signal_domain(dst_d);
2069
2070 if ( dst_d )
2071 put_domain(dst_d);
2072
2073 return ( ret < 0 ) ? ret : len;
2074 }
2075
2076 long
do_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long raw_arg3,unsigned long raw_arg4)2077 do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2078 XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long raw_arg3,
2079 unsigned long raw_arg4)
2080 {
2081 struct domain *currd = current->domain;
2082 long rc;
2083 unsigned int arg3 = raw_arg3, arg4 = raw_arg4;
2084
2085 argo_dprintk("->do_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2086 (void *)arg1.p, (void *)arg2.p, raw_arg3, raw_arg4);
2087
2088 /* Reject numeric hypercall args outside 32-bit range */
2089 if ( (arg3 != raw_arg3) || (arg4 != raw_arg4) )
2090 return -EINVAL;
2091
2092 if ( unlikely(!opt_argo) )
2093 return -EOPNOTSUPP;
2094
2095 rc = xsm_argo_enable(currd);
2096 if ( rc )
2097 return rc;
2098
2099 switch ( cmd )
2100 {
2101 case XEN_ARGO_OP_register_ring:
2102 {
2103 XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd =
2104 guest_handle_cast(arg1, xen_argo_register_ring_t);
2105 XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd =
2106 guest_handle_cast(arg2, xen_argo_gfn_t);
2107 /* arg3: npage, arg4: flags */
2108
2109 BUILD_BUG_ON(!IS_ALIGNED(XEN_ARGO_MAX_RING_SIZE, PAGE_SIZE));
2110
2111 if ( unlikely(arg3 > (XEN_ARGO_MAX_RING_SIZE >> PAGE_SHIFT)) )
2112 {
2113 rc = -EINVAL;
2114 break;
2115 }
2116
2117 /* Check array to allow use of the faster __copy operations later */
2118 if ( unlikely(!guest_handle_okay(gfn_hnd, arg3)) )
2119 {
2120 rc = -EFAULT;
2121 break;
2122 }
2123
2124 rc = register_ring(currd, reg_hnd, gfn_hnd, arg3, arg4);
2125 break;
2126 }
2127
2128 case XEN_ARGO_OP_unregister_ring:
2129 {
2130 XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd =
2131 guest_handle_cast(arg1, xen_argo_unregister_ring_t);
2132
2133 if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2134 {
2135 rc = -EINVAL;
2136 break;
2137 }
2138
2139 rc = unregister_ring(currd, unreg_hnd);
2140 break;
2141 }
2142
2143 case XEN_ARGO_OP_sendv:
2144 {
2145 xen_argo_send_addr_t send_addr;
2146 xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2147 unsigned int niov;
2148
2149 XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd =
2150 guest_handle_cast(arg1, xen_argo_send_addr_t);
2151 XEN_GUEST_HANDLE_PARAM(xen_argo_iov_t) iovs_hnd =
2152 guest_handle_cast(arg2, xen_argo_iov_t);
2153 /* arg3 is niov */
2154 /* arg4 is message_type. Must be a 32-bit value. */
2155
2156 /* XEN_ARGO_MAXIOV value determines size of iov array on stack */
2157 BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2158
2159 rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2160 if ( rc )
2161 {
2162 rc = -EFAULT;
2163 break;
2164 }
2165
2166 /*
2167 * Reject niov above maximum limit or message_types that are outside
2168 * 32 bit range.
2169 */
2170 if ( unlikely((arg3 > XEN_ARGO_MAXIOV) || (arg4 != (uint32_t)arg4)) )
2171 {
2172 rc = -EINVAL;
2173 break;
2174 }
2175 niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2176
2177 rc = copy_from_guest(iovs, iovs_hnd, niov) ? -EFAULT : 0;
2178 if ( rc )
2179 {
2180 rc = -EFAULT;
2181 break;
2182 }
2183
2184 rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2185 break;
2186 }
2187
2188 case XEN_ARGO_OP_notify:
2189 {
2190 XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd =
2191 guest_handle_cast(arg1, xen_argo_ring_data_t);
2192
2193 if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2194 {
2195 rc = -EINVAL;
2196 break;
2197 }
2198
2199 rc = notify(currd, ring_data_hnd);
2200 break;
2201 }
2202
2203 default:
2204 rc = -EOPNOTSUPP;
2205 break;
2206 }
2207
2208 argo_dprintk("<-do_argo_op(%u)=%ld\n", cmd, rc);
2209
2210 return rc;
2211 }
2212
2213 #ifdef CONFIG_COMPAT
2214 long
compat_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long arg3,unsigned long arg4)2215 compat_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2216 XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long arg3,
2217 unsigned long arg4)
2218 {
2219 struct domain *currd = current->domain;
2220 long rc;
2221 xen_argo_send_addr_t send_addr;
2222 xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2223 compat_argo_iov_t compat_iovs[XEN_ARGO_MAXIOV];
2224 unsigned int i, niov;
2225 XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd;
2226
2227 /* check XEN_ARGO_MAXIOV as it sizes stack arrays: iovs, compat_iovs */
2228 BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2229
2230 /* Forward all ops besides sendv to the native handler. */
2231 if ( cmd != XEN_ARGO_OP_sendv )
2232 return do_argo_op(cmd, arg1, arg2, arg3, arg4);
2233
2234 if ( unlikely(!opt_argo) )
2235 return -EOPNOTSUPP;
2236
2237 rc = xsm_argo_enable(currd);
2238 if ( rc )
2239 return rc;
2240
2241 argo_dprintk("->compat_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2242 (void *)arg1.p, (void *)arg2.p, arg3, arg4);
2243
2244 send_addr_hnd = guest_handle_cast(arg1, xen_argo_send_addr_t);
2245 /* arg2: iovs, arg3: niov, arg4: message_type */
2246
2247 rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2248 if ( rc )
2249 goto out;
2250
2251 if ( unlikely(arg3 > XEN_ARGO_MAXIOV) )
2252 {
2253 rc = -EINVAL;
2254 goto out;
2255 }
2256 niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2257
2258 rc = copy_from_guest(compat_iovs, arg2, niov) ? -EFAULT : 0;
2259 if ( rc )
2260 goto out;
2261
2262 for ( i = 0; i < niov; i++ )
2263 {
2264 #define XLAT_argo_iov_HNDL_iov_hnd(_d_, _s_) \
2265 guest_from_compat_handle((_d_)->iov_hnd, (_s_)->iov_hnd)
2266
2267 XLAT_argo_iov(&iovs[i], &compat_iovs[i]);
2268
2269 #undef XLAT_argo_iov_HNDL_iov_hnd
2270 }
2271
2272 rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2273 out:
2274 argo_dprintk("<-compat_argo_op(%u)=%ld\n", cmd, rc);
2275
2276 return rc;
2277 }
2278 #endif
2279
2280 static void
argo_domain_init(struct argo_domain * argo)2281 argo_domain_init(struct argo_domain *argo)
2282 {
2283 unsigned int i;
2284
2285 rwlock_init(&argo->rings_L2_rwlock);
2286 spin_lock_init(&argo->send_L2_lock);
2287 spin_lock_init(&argo->wildcard_L2_lock);
2288
2289 for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
2290 {
2291 INIT_LIST_HEAD(&argo->ring_hash[i]);
2292 INIT_LIST_HEAD(&argo->send_hash[i]);
2293 }
2294 INIT_LIST_HEAD(&argo->wildcard_pend_list);
2295 }
2296
2297 int
argo_init(struct domain * d)2298 argo_init(struct domain *d)
2299 {
2300 struct argo_domain *argo;
2301
2302 if ( !opt_argo || xsm_argo_enable(d) )
2303 {
2304 argo_dprintk("argo disabled, domid: %u\n", d->domain_id);
2305 return 0;
2306 }
2307
2308 argo_dprintk("init: domid: %u\n", d->domain_id);
2309
2310 argo = xzalloc(struct argo_domain);
2311 if ( !argo )
2312 return -ENOMEM;
2313
2314 argo_domain_init(argo);
2315
2316 write_lock(&L1_global_argo_rwlock);
2317
2318 d->argo = argo;
2319
2320 write_unlock(&L1_global_argo_rwlock);
2321
2322 return 0;
2323 }
2324
2325 void
argo_destroy(struct domain * d)2326 argo_destroy(struct domain *d)
2327 {
2328 BUG_ON(!d->is_dying);
2329
2330 write_lock(&L1_global_argo_rwlock);
2331
2332 argo_dprintk("destroy: domid %u d->argo=%p\n", d->domain_id, d->argo);
2333
2334 if ( d->argo )
2335 {
2336 domain_rings_remove_all(d);
2337 partner_rings_remove(d);
2338 wildcard_rings_pending_remove(d);
2339 XFREE(d->argo);
2340 }
2341
2342 write_unlock(&L1_global_argo_rwlock);
2343 }
2344
2345 void
argo_soft_reset(struct domain * d)2346 argo_soft_reset(struct domain *d)
2347 {
2348 write_lock(&L1_global_argo_rwlock);
2349
2350 argo_dprintk("soft reset d=%u d->argo=%p\n", d->domain_id, d->argo);
2351
2352 if ( d->argo )
2353 {
2354 domain_rings_remove_all(d);
2355 partner_rings_remove(d);
2356 wildcard_rings_pending_remove(d);
2357
2358 /*
2359 * Since neither opt_argo or xsm_argo_enable(d) can change at runtime,
2360 * if d->argo is true then both opt_argo and xsm_argo_enable(d) must be
2361 * true, and we can assume that init is allowed to proceed again here.
2362 */
2363 argo_domain_init(d->argo);
2364 }
2365
2366 write_unlock(&L1_global_argo_rwlock);
2367 }
2368