1 /******************************************************************************
2  * Argo : Hypervisor-Mediated data eXchange
3  *
4  * Derived from v4v, the version 2 of v2v.
5  *
6  * Copyright (c) 2010, Citrix Systems
7  * Copyright (c) 2018-2019 BAE Systems
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  */
18 
19 #include <xen/argo.h>
20 #include <xen/domain.h>
21 #include <xen/domain_page.h>
22 #include <xen/errno.h>
23 #include <xen/event.h>
24 #include <xen/guest_access.h>
25 #include <xen/lib.h>
26 #include <xen/nospec.h>
27 #include <xen/param.h>
28 #include <xen/sched.h>
29 #include <xen/time.h>
30 #include <xsm/xsm.h>
31 
32 #include <public/argo.h>
33 
34 #ifdef CONFIG_COMPAT
35 #include <compat/argo.h>
36 CHECK_argo_addr;
37 #undef CHECK_argo_addr
38 #define CHECK_argo_addr struct xen_argo_addr
39 CHECK_argo_register_ring;
40 CHECK_argo_ring;
41 CHECK_argo_ring_data_ent;
42 #undef CHECK_argo_ring_data_ent
43 #define CHECK_argo_ring_data_ent struct xen_argo_ring_data_ent
44 CHECK_argo_ring_data;
45 CHECK_argo_ring_message_header;
46 CHECK_argo_unregister_ring;
47 CHECK_argo_send_addr;
48 #endif
49 
50 #define MAX_RINGS_PER_DOMAIN            128U
51 #define MAX_NOTIFY_COUNT                256U
52 #define MAX_PENDING_PER_RING             32U
53 
54 /* All messages on the ring are padded to a multiple of the slot size. */
55 #define ROUNDUP_MESSAGE(a) ROUNDUP((a), XEN_ARGO_MSG_SLOT_SIZE)
56 
57 /* The maximum size of a message that may be sent on the largest Argo ring. */
58 #define MAX_ARGO_MESSAGE_SIZE ((XEN_ARGO_MAX_RING_SIZE) - \
59         (sizeof(struct xen_argo_ring_message_header)) - ROUNDUP_MESSAGE(1))
60 
61 /* Number of PAGEs needed to hold a ring of a given size in bytes */
62 #define NPAGES_RING(ring_len) \
63     (ROUNDUP((ROUNDUP_MESSAGE(ring_len) + sizeof(xen_argo_ring_t)), PAGE_SIZE) \
64      >> PAGE_SHIFT)
65 
66 DEFINE_XEN_GUEST_HANDLE(xen_argo_addr_t);
67 DEFINE_XEN_GUEST_HANDLE(xen_argo_gfn_t);
68 DEFINE_XEN_GUEST_HANDLE(xen_argo_iov_t);
69 DEFINE_XEN_GUEST_HANDLE(xen_argo_register_ring_t);
70 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_t);
71 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_t);
72 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t);
73 DEFINE_XEN_GUEST_HANDLE(xen_argo_send_addr_t);
74 DEFINE_XEN_GUEST_HANDLE(xen_argo_unregister_ring_t);
75 #ifdef CONFIG_COMPAT
76 DEFINE_COMPAT_HANDLE(compat_argo_iov_t);
77 #endif
78 
79 static bool __read_mostly opt_argo;
80 static bool __read_mostly opt_argo_mac_permissive;
81 
parse_argo(const char * s)82 static int __init parse_argo(const char *s)
83 {
84     const char *ss;
85     int val, rc = 0;
86 
87     do {
88         ss = strchr(s, ',');
89         if ( !ss )
90             ss = strchr(s, '\0');
91 
92         if ( (val = parse_bool(s, ss)) >= 0 )
93             opt_argo = val;
94         else if ( (val = parse_boolean("mac-permissive", s, ss)) >= 0 )
95             opt_argo_mac_permissive = val;
96         else
97             rc = -EINVAL;
98 
99         s = ss + 1;
100     } while ( *ss );
101 
102     return rc;
103 }
104 custom_param("argo", parse_argo);
105 
106 typedef struct argo_ring_id
107 {
108     xen_argo_port_t aport;
109     domid_t partner_id;
110     domid_t domain_id;
111 } argo_ring_id;
112 
113 /* Data about a domain's own ring that it has registered */
114 struct argo_ring_info
115 {
116     /* next node in the hash, protected by rings_L2 */
117     struct list_head node;
118     /* this ring's id, protected by rings_L2 */
119     struct argo_ring_id id;
120     /* L3, the ring_info lock: protects the members of this struct below */
121     spinlock_t L3_lock;
122     /* length of the ring, protected by L3 */
123     unsigned int len;
124     /* number of pages translated into mfns, protected by L3 */
125     unsigned int nmfns;
126     /* cached tx pointer location, protected by L3 */
127     unsigned int tx_ptr;
128     /* mapped ring pages protected by L3 */
129     void **mfn_mapping;
130     /* list of mfns of guest ring, protected by L3 */
131     mfn_t *mfns;
132     /* list of struct pending_ent for this ring, protected by L3 */
133     struct list_head pending;
134     /* number of pending entries queued for this ring, protected by L3 */
135     unsigned int npending;
136 };
137 
138 /* Data about a single-sender ring, held by the sender (partner) domain */
139 struct argo_send_info
140 {
141     /* next node in the hash, protected by send_L2 */
142     struct list_head node;
143     /* this ring's id, protected by send_L2 */
144     struct argo_ring_id id;
145 };
146 
147 /* A space-available notification that is awaiting sufficient space */
148 struct pending_ent
149 {
150     /* List node within argo_ring_info's pending list */
151     struct list_head node;
152     /*
153      * List node within argo_domain's wildcard_pend_list. Only used if the
154      * ring is one with a wildcard partner (ie. that any domain may send to)
155      * to enable cancelling signals on wildcard rings on domain destroy.
156      */
157     struct list_head wildcard_node;
158     /*
159      * Pointer to the ring_info that this ent pertains to. Used to ensure that
160      * ring_info->npending is decremented when ents for wildcard rings are
161      * cancelled for domain destroy.
162      * Caution: Must hold the correct locks before accessing ring_info via this.
163      */
164     struct argo_ring_info *ring_info;
165     /* minimum ring space available that this signal is waiting upon */
166     unsigned int len;
167     /* domain to be notified when space is available */
168     domid_t domain_id;
169 };
170 
171 /*
172  * The value of the argo element in a struct domain is
173  * protected by L1_global_argo_rwlock
174  */
175 #define ARGO_HASHTABLE_SIZE 32
176 struct argo_domain
177 {
178     /* rings_L2 */
179     rwlock_t rings_L2_rwlock;
180     /*
181      * Hash table of argo_ring_info about rings this domain has registered.
182      * Protected by rings_L2.
183      */
184     struct list_head ring_hash[ARGO_HASHTABLE_SIZE];
185     /* Counter of rings registered by this domain. Protected by rings_L2. */
186     unsigned int ring_count;
187 
188     /* send_L2 */
189     spinlock_t send_L2_lock;
190     /*
191      * Hash table of argo_send_info about rings other domains have registered
192      * for this domain to send to. Single partner, non-wildcard rings.
193      * Protected by send_L2.
194      */
195     struct list_head send_hash[ARGO_HASHTABLE_SIZE];
196 
197     /* wildcard_L2 */
198     spinlock_t wildcard_L2_lock;
199     /*
200      * List of pending space-available signals for this domain about wildcard
201      * rings registered by other domains. Protected by wildcard_L2.
202      */
203     struct list_head wildcard_pend_list;
204 };
205 
206 /*
207  * Locking is organized as follows:
208  *
209  * Terminology: R(<lock>) means taking a read lock on the specified lock;
210  *              W(<lock>) means taking a write lock on it.
211  *
212  * == L1 : The global read/write lock: L1_global_argo_rwlock
213  * Protects the argo elements of all struct domain *d in the system.
214  *
215  * R(L1) does not protect any of the elements of d->argo; it protects their
216  * addresses. W(L1) protects those and more since it implies W on all the lower
217  * level locks - see the notes on those locks below.
218  *
219  * The destruction of an argo-enabled domain, which must have a non-NULL d->argo
220  * pointer, will need to free that d->argo pointer, which requires W(L1).
221  * Since holding R(L1) will block acquiring W(L1), it will ensure that
222  * no domains pointers that argo is interested in become invalid while either
223  * W(L1) or R(L1) are held.
224  */
225 
226 static DEFINE_RWLOCK(L1_global_argo_rwlock); /* L1 */
227 
228 /*
229  * == rings_L2 : The per-domain ring hash lock: d->argo->rings_L2_rwlock
230  *
231  * Holding a read lock on rings_L2 protects the ring hash table and
232  * the elements in the hash_table d->argo->ring_hash, and
233  * the node and id fields in struct argo_ring_info in the
234  * hash table.
235  * Holding a write lock on rings_L2 protects all of the elements of all the
236  * struct argo_ring_info belonging to this domain.
237  *
238  * To take rings_L2 you must already have R(L1). W(L1) implies W(rings_L2) and
239  * L3.
240  *
241  * == L3 : The individual ring_info lock: ring_info->L3_lock
242  *
243  * Protects all the fields within the argo_ring_info, aside from the ones that
244  * rings_L2 already protects: node, id, lock.
245  *
246  * To acquire L3 you must already have R(rings_L2). W(rings_L2) implies L3.
247  *
248  * == send_L2 : The per-domain single-sender partner rings lock:
249  *              d->argo->send_L2_lock
250  *
251  * Protects the per-domain send hash table : d->argo->send_hash
252  * and the elements in the hash table, and the node and id fields
253  * in struct argo_send_info in the hash table.
254  *
255  * To take send_L2, you must already have R(L1). W(L1) implies send_L2.
256  * Do not attempt to acquire a rings_L2 on any domain after taking and while
257  * holding a send_L2 lock -- acquire the rings_L2 (if one is needed) beforehand.
258  *
259  * == wildcard_L2 : The per-domain wildcard pending list lock:
260  *                  d->argo->wildcard_L2_lock
261  *
262  * Protects the per-domain list of outstanding signals for space availability
263  * on wildcard rings.
264  *
265  * To take wildcard_L2, you must already have R(L1). W(L1) implies wildcard_L2.
266  * No other locks are acquired after obtaining wildcard_L2.
267  */
268 
269 /*
270  * Lock state validations macros
271  *
272  * These macros encode the logic to verify that the locking has adhered to the
273  * locking discipline above.
274  * eg. On entry to logic that requires holding at least R(rings_L2), this:
275  *      ASSERT(LOCKING_Read_rings_L2(d));
276  *
277  * checks that the lock state is sufficient, validating that one of the
278  * following must be true when executed:       R(rings_L2) && R(L1)
279  *                                        or:  W(rings_L2) && R(L1)
280  *                                        or:  W(L1)
281  *
282  * The LOCKING macros defined below here are for use at verification points.
283  */
284 #define LOCKING_Write_L1 (rw_is_write_locked(&L1_global_argo_rwlock))
285 /*
286  * While LOCKING_Read_L1 will return true even if the lock is write-locked,
287  * that's OK because everywhere that a Read lock is needed with these macros,
288  * holding a Write lock there instead is OK too: we're checking that _at least_
289  * the specified level of locks are held.
290  */
291 #define LOCKING_Read_L1 (rw_is_locked(&L1_global_argo_rwlock))
292 
293 #define LOCKING_Write_rings_L2(d) \
294     ((LOCKING_Read_L1 && rw_is_write_locked(&(d)->argo->rings_L2_rwlock)) || \
295      LOCKING_Write_L1)
296 /*
297  * Skip checking LOCKING_Write_rings_L2(d) within this LOCKING_Read_rings_L2
298  * definition because the first clause that is testing R(L1) && R(L2) will also
299  * return true if R(L1) && W(L2) is true, because of the way that rw_is_locked
300  * behaves. This results in a slightly shorter and faster implementation.
301  */
302 #define LOCKING_Read_rings_L2(d) \
303     ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock)) || \
304      LOCKING_Write_L1)
305 /*
306  * Skip checking LOCKING_Write_L1 within this LOCKING_L3 definition because
307  * LOCKING_Write_rings_L2(d) will return true for that condition.
308  */
309 #define LOCKING_L3(d, r) \
310     ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock) \
311       && spin_is_locked(&(r)->L3_lock)) || LOCKING_Write_rings_L2(d))
312 
313 #define LOCKING_send_L2(d) \
314     ((LOCKING_Read_L1 && spin_is_locked(&(d)->argo->send_L2_lock)) || \
315      LOCKING_Write_L1)
316 
317 /* Change this to #define ARGO_DEBUG here to enable more debug messages */
318 #undef ARGO_DEBUG
319 
320 #ifdef ARGO_DEBUG
321 #define argo_dprintk(format, args...) printk("argo: " format, ## args )
322 #else
323 #define argo_dprintk(format, ... ) ((void)0)
324 #endif
325 
326 /*
327  * This hash function is used to distribute rings within the per-domain
328  * hash tables (d->argo->ring_hash and d->argo_send_hash). The hash table
329  * will provide a struct if a match is found with a 'argo_ring_id' key:
330  * ie. the key is a (domain id, argo port, partner domain id) tuple.
331  * The algorithm approximates the string hashing function 'djb2'.
332  */
333 static unsigned int
hash_index(const struct argo_ring_id * id)334 hash_index(const struct argo_ring_id *id)
335 {
336     unsigned int hash = 5381; /* prime constant from djb2 */
337 
338     /* For each input: hash = hash * 33 + <new input character value> */
339     hash = ((hash << 5) + hash) +  (id->aport            & 0xff);
340     hash = ((hash << 5) + hash) + ((id->aport      >> 8) & 0xff);
341     hash = ((hash << 5) + hash) + ((id->aport     >> 16) & 0xff);
342     hash = ((hash << 5) + hash) + ((id->aport     >> 24) & 0xff);
343     hash = ((hash << 5) + hash) +  (id->domain_id        & 0xff);
344     hash = ((hash << 5) + hash) + ((id->domain_id  >> 8) & 0xff);
345     hash = ((hash << 5) + hash) +  (id->partner_id       & 0xff);
346     hash = ((hash << 5) + hash) + ((id->partner_id >> 8) & 0xff);
347 
348     /*
349      * Since ARGO_HASHTABLE_SIZE is small, use higher-order bits of the
350      * hash to contribute to the lower-order bits before masking off.
351      */
352     return (hash ^ (hash >> 15)) & (ARGO_HASHTABLE_SIZE - 1);
353 }
354 
355 static struct argo_ring_info *
find_ring_info(const struct domain * d,const struct argo_ring_id * id)356 find_ring_info(const struct domain *d, const struct argo_ring_id *id)
357 {
358     struct argo_ring_info *ring_info;
359     const struct list_head *bucket;
360 
361     ASSERT(LOCKING_Read_rings_L2(d));
362 
363     /* List is not modified here. Search and return the match if found. */
364     bucket = &d->argo->ring_hash[hash_index(id)];
365 
366     list_for_each_entry(ring_info, bucket, node)
367     {
368         const struct argo_ring_id *cmpid = &ring_info->id;
369 
370         if ( cmpid->aport == id->aport &&
371              cmpid->domain_id == id->domain_id &&
372              cmpid->partner_id == id->partner_id )
373         {
374             argo_dprintk("found ring_info for ring(%u:%x %u)\n",
375                          id->domain_id, id->aport, id->partner_id);
376             return ring_info;
377         }
378     }
379     argo_dprintk("no ring_info for ring(%u:%x %u)\n",
380                  id->domain_id, id->aport, id->partner_id);
381 
382     return NULL;
383 }
384 
385 static struct argo_ring_info *
find_ring_info_by_match(const struct domain * d,xen_argo_port_t aport,domid_t partner_id)386 find_ring_info_by_match(const struct domain *d, xen_argo_port_t aport,
387                         domid_t partner_id)
388 {
389     struct argo_ring_id id;
390     struct argo_ring_info *ring_info;
391 
392     ASSERT(LOCKING_Read_rings_L2(d));
393 
394     id.aport = aport;
395     id.domain_id = d->domain_id;
396     id.partner_id = partner_id;
397 
398     ring_info = find_ring_info(d, &id);
399     if ( ring_info )
400         return ring_info;
401 
402     id.partner_id = XEN_ARGO_DOMID_ANY;
403 
404     return find_ring_info(d, &id);
405 }
406 
407 static struct argo_send_info *
find_send_info(const struct domain * d,const struct argo_ring_id * id)408 find_send_info(const struct domain *d, const struct argo_ring_id *id)
409 {
410     struct argo_send_info *send_info;
411     const struct list_head *bucket;
412 
413     ASSERT(LOCKING_send_L2(d));
414 
415     /* List is not modified here. Search and return the match if found. */
416     bucket = &d->argo->send_hash[hash_index(id)];
417 
418     list_for_each_entry(send_info, bucket, node)
419     {
420         const struct argo_ring_id *cmpid = &send_info->id;
421 
422         if ( cmpid->aport == id->aport &&
423              cmpid->domain_id == id->domain_id &&
424              cmpid->partner_id == id->partner_id )
425         {
426             argo_dprintk("found send_info for ring(%u:%x %u)\n",
427                          id->domain_id, id->aport, id->partner_id);
428             return send_info;
429         }
430     }
431     argo_dprintk("no send_info for ring(%u:%x %u)\n",
432                  id->domain_id, id->aport, id->partner_id);
433 
434     return NULL;
435 }
436 
437 static void
signal_domain(struct domain * d)438 signal_domain(struct domain *d)
439 {
440     argo_dprintk("signalling domid:%u\n", d->domain_id);
441 
442     send_guest_global_virq(d, VIRQ_ARGO);
443 }
444 
445 static void
signal_domid(domid_t domain_id)446 signal_domid(domid_t domain_id)
447 {
448     struct domain *d = get_domain_by_id(domain_id);
449 
450     if ( !d )
451         return;
452 
453     signal_domain(d);
454     put_domain(d);
455 }
456 
457 static void
ring_unmap(const struct domain * d,struct argo_ring_info * ring_info)458 ring_unmap(const struct domain *d, struct argo_ring_info *ring_info)
459 {
460     unsigned int i;
461 
462     ASSERT(LOCKING_L3(d, ring_info));
463 
464     if ( !ring_info->mfn_mapping )
465         return;
466 
467     ASSERT(!ring_info->nmfns || ring_info->mfns);
468 
469     for ( i = 0; i < ring_info->nmfns; i++ )
470     {
471         if ( !ring_info->mfn_mapping[i] )
472             continue;
473 
474         ASSERT(!mfn_eq(ring_info->mfns[i], INVALID_MFN));
475         argo_dprintk(XENLOG_ERR "argo: unmapping page %"PRI_mfn" from %p\n",
476                      mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
477 
478         unmap_domain_page_global(ring_info->mfn_mapping[i]);
479         ring_info->mfn_mapping[i] = NULL;
480     }
481 }
482 
483 static int
ring_map_page(const struct domain * d,struct argo_ring_info * ring_info,unsigned int i,void ** out_ptr)484 ring_map_page(const struct domain *d, struct argo_ring_info *ring_info,
485               unsigned int i, void **out_ptr)
486 {
487     ASSERT(LOCKING_L3(d, ring_info));
488 
489     /*
490      * FIXME: Investigate using vmap to create a single contiguous virtual
491      * address space mapping of the ring instead of using the array of single
492      * page mappings.
493      * Affects logic in memcpy_to_guest_ring, the mfn_mapping array data
494      * structure, and places where ring mappings are added or removed.
495      */
496 
497     if ( i >= ring_info->nmfns )
498     {
499         gprintk(XENLOG_ERR,
500                "argo: ring (vm%u:%x vm%u) %p attempted to map page %u of %u\n",
501                 ring_info->id.domain_id, ring_info->id.aport,
502                 ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
503         return -ENOMEM;
504     }
505     i = array_index_nospec(i, ring_info->nmfns);
506 
507     if ( !ring_info->mfns || !ring_info->mfn_mapping )
508     {
509         ASSERT_UNREACHABLE();
510         ring_info->len = 0;
511         return -ENOMEM;
512     }
513 
514     if ( !ring_info->mfn_mapping[i] )
515     {
516         ring_info->mfn_mapping[i] = map_domain_page_global(ring_info->mfns[i]);
517         if ( !ring_info->mfn_mapping[i] )
518         {
519             gprintk(XENLOG_ERR, "argo: ring (vm%u:%x vm%u) %p attempted to map "
520                     "page %u of %u\n",
521                     ring_info->id.domain_id, ring_info->id.aport,
522                     ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
523             return -ENOMEM;
524         }
525         argo_dprintk("mapping page %"PRI_mfn" to %p\n",
526                      mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
527     }
528 
529     if ( out_ptr )
530         *out_ptr = ring_info->mfn_mapping[i];
531 
532     return 0;
533 }
534 
535 static void
update_tx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t tx_ptr)536 update_tx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
537               uint32_t tx_ptr)
538 {
539     xen_argo_ring_t *ringp;
540 
541     ASSERT(LOCKING_L3(d, ring_info));
542     ASSERT(ring_info->mfn_mapping[0]);
543 
544     ring_info->tx_ptr = tx_ptr;
545     ringp = ring_info->mfn_mapping[0];
546 
547     write_atomic(&ringp->tx_ptr, tx_ptr);
548     smp_wmb();
549 }
550 
551 static int
memcpy_to_guest_ring(const struct domain * d,struct argo_ring_info * ring_info,unsigned int offset,const void * src,XEN_GUEST_HANDLE (uint8)src_hnd,unsigned int len)552 memcpy_to_guest_ring(const struct domain *d, struct argo_ring_info *ring_info,
553                      unsigned int offset,
554                      const void *src, XEN_GUEST_HANDLE(uint8) src_hnd,
555                      unsigned int len)
556 {
557     unsigned int mfns_index = offset >> PAGE_SHIFT;
558     void *dst;
559     int ret;
560     unsigned int src_offset = 0;
561 
562     ASSERT(LOCKING_L3(d, ring_info));
563 
564     offset &= ~PAGE_MASK;
565 
566     if ( len + offset > XEN_ARGO_MAX_RING_SIZE )
567         return -EFAULT;
568 
569     while ( len )
570     {
571         unsigned int head_len = (offset + len) > PAGE_SIZE ? PAGE_SIZE - offset
572                                                            : len;
573 
574         ret = ring_map_page(d, ring_info, mfns_index, &dst);
575         if ( ret )
576             return ret;
577 
578         if ( src )
579         {
580             memcpy(dst + offset, src + src_offset, head_len);
581             src_offset += head_len;
582         }
583         else
584         {
585             if ( copy_from_guest(dst + offset, src_hnd, head_len) )
586                 return -EFAULT;
587 
588             guest_handle_add_offset(src_hnd, head_len);
589         }
590 
591         mfns_index++;
592         len -= head_len;
593         offset = 0;
594     }
595 
596     return 0;
597 }
598 
599 /*
600  * Use this with caution: rx_ptr is under guest control and may be bogus.
601  * See get_sanitized_ring for a safer alternative.
602  */
603 static int
get_rx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t * rx_ptr)604 get_rx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
605            uint32_t *rx_ptr)
606 {
607     void *src;
608     xen_argo_ring_t *ringp;
609     int ret;
610 
611     ASSERT(LOCKING_L3(d, ring_info));
612 
613     if ( !ring_info->nmfns || ring_info->nmfns < NPAGES_RING(ring_info->len) )
614         return -EINVAL;
615 
616     ret = ring_map_page(d, ring_info, 0, &src);
617     if ( ret )
618         return ret;
619 
620     ringp = (xen_argo_ring_t *)src;
621 
622     *rx_ptr = read_atomic(&ringp->rx_ptr);
623 
624     return 0;
625 }
626 
627 /*
628  * get_sanitized_ring creates a modified copy of the ring pointers where
629  * the rx_ptr is rounded up to ensure it is aligned, and then ring
630  * wrap is handled. Simplifies safe use of the rx_ptr for available
631  * space calculation.
632  */
633 static int
get_sanitized_ring(const struct domain * d,xen_argo_ring_t * ring,struct argo_ring_info * ring_info)634 get_sanitized_ring(const struct domain *d, xen_argo_ring_t *ring,
635                    struct argo_ring_info *ring_info)
636 {
637     uint32_t rx_ptr;
638     int ret;
639 
640     ASSERT(LOCKING_L3(d, ring_info));
641 
642     ret = get_rx_ptr(d, ring_info, &rx_ptr);
643     if ( ret )
644         return ret;
645 
646     ring->tx_ptr = ring_info->tx_ptr;
647 
648     rx_ptr = ROUNDUP_MESSAGE(rx_ptr);
649     if ( rx_ptr >= ring_info->len )
650         rx_ptr = 0;
651 
652     ring->rx_ptr = rx_ptr;
653 
654     return 0;
655 }
656 
657 static unsigned int
ringbuf_payload_space(const struct domain * d,struct argo_ring_info * ring_info)658 ringbuf_payload_space(const struct domain *d, struct argo_ring_info *ring_info)
659 {
660     xen_argo_ring_t ring;
661     unsigned int len;
662     int ret;
663 
664     ASSERT(LOCKING_L3(d, ring_info));
665 
666     len = ring_info->len;
667     if ( !len )
668         return 0;
669 
670     if ( get_sanitized_ring(d, &ring, ring_info) )
671         return 0;
672 
673     argo_dprintk("sanitized ringbuf_payload_space: tx_ptr=%u rx_ptr=%u\n",
674                  ring.tx_ptr, ring.rx_ptr);
675 
676     /*
677      * rx_ptr == tx_ptr means that the ring has been emptied.
678      * See message size checking logic in the entry to ringbuf_insert which
679      * ensures that there is always one message slot of size ROUNDUP_MESSAGE(1)
680      * left available, preventing a ring from being entirely filled.
681      * This ensures that matching ring indexes always indicate an empty ring
682      * and never a full one.
683      */
684     ret = ring.rx_ptr - ring.tx_ptr;
685     if ( ret <= 0 )
686         ret += len;
687 
688     /*
689      * In a sanitized ring, we can rely on:
690      *              (rx_ptr < ring_info->len)           &&
691      *              (tx_ptr < ring_info->len)           &&
692      *      (ring_info->len <= XEN_ARGO_MAX_RING_SIZE)
693      *
694      * and since: XEN_ARGO_MAX_RING_SIZE < INT32_MAX
695      * therefore right here: ret < INT32_MAX
696      * and we are safe to return it as a unsigned value from this function.
697      * The subtractions below cannot increase its value.
698      */
699 
700     /*
701      * The maximum size payload for a message that will be accepted is:
702      * (the available space between the ring indexes)
703      *    minus (space for a message header)
704      *    minus (space for one message slot)
705      * since ringbuf_insert requires that one message slot be left
706      * unfilled, to avoid filling the ring to capacity and confusing a full
707      * ring with an empty one.
708      * Since the ring indexes are sanitized, the value in ret is aligned, so
709      * the simple subtraction here works to return the aligned value needed:
710      */
711     ret -= sizeof(struct xen_argo_ring_message_header);
712     ret -= ROUNDUP_MESSAGE(1);
713 
714     return (ret < 0) ? 0 : ret;
715 }
716 
717 /*
718  * iov_count returns its count on success via an out variable to avoid
719  * potential for a negative return value to be used incorrectly
720  * (eg. coerced into an unsigned variable resulting in a large incorrect value)
721  */
722 static int
iov_count(const xen_argo_iov_t * piov,unsigned int niov,unsigned int * count)723 iov_count(const xen_argo_iov_t *piov, unsigned int niov,
724           unsigned int *count)
725 {
726     unsigned int sum_iov_lens = 0;
727 
728     if ( niov > XEN_ARGO_MAXIOV )
729         return -EINVAL;
730 
731     for ( ; niov--; piov++ )
732     {
733         /* valid iovs must have the padding field set to zero */
734         if ( piov->pad )
735         {
736             argo_dprintk("invalid iov: padding is not zero\n");
737             return -EINVAL;
738         }
739 
740         /* check each to protect sum against integer overflow */
741         if ( piov->iov_len > MAX_ARGO_MESSAGE_SIZE )
742         {
743             argo_dprintk("invalid iov_len: too big (%u)>%llu\n",
744                          piov->iov_len, MAX_ARGO_MESSAGE_SIZE);
745             return -EINVAL;
746         }
747 
748         sum_iov_lens += piov->iov_len;
749 
750         /*
751          * Again protect sum from integer overflow
752          * and ensure total msg size will be within bounds.
753          */
754         if ( sum_iov_lens > MAX_ARGO_MESSAGE_SIZE )
755         {
756             argo_dprintk("invalid iov series: total message too big\n");
757             return -EMSGSIZE;
758         }
759     }
760 
761     *count = sum_iov_lens;
762 
763     return 0;
764 }
765 
766 static int
ringbuf_insert(const struct domain * d,struct argo_ring_info * ring_info,const struct argo_ring_id * src_id,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type,unsigned int len)767 ringbuf_insert(const struct domain *d, struct argo_ring_info *ring_info,
768                const struct argo_ring_id *src_id, xen_argo_iov_t *iovs,
769                unsigned int niov, uint32_t message_type, unsigned int len)
770 {
771     xen_argo_ring_t ring;
772     struct xen_argo_ring_message_header mh = { };
773     int sp, ret;
774     xen_argo_iov_t *piov;
775     XEN_GUEST_HANDLE(uint8) NULL_hnd = { };
776 
777     ASSERT(LOCKING_L3(d, ring_info));
778 
779     /*
780      * Enforced below: no more than 'len' bytes of guest data
781      * (plus the message header) will be sent in this operation.
782      */
783 
784     /*
785      * Upper bound check the message len against the ring size.
786      * The message must not fill the ring; there must be at least one slot
787      * remaining so we can distinguish a full ring from an empty one.
788      * iov_count has already verified: len <= MAX_ARGO_MESSAGE_SIZE.
789      */
790     if ( ring_info->len <= (sizeof(struct xen_argo_ring_message_header) +
791                             ROUNDUP_MESSAGE(len)) )
792         return -EMSGSIZE;
793 
794     ret = get_sanitized_ring(d, &ring, ring_info);
795     if ( ret )
796         return ret;
797 
798     argo_dprintk("ring.tx_ptr=%u ring.rx_ptr=%u ring len=%u"
799                  " ring_info->tx_ptr=%u\n",
800                  ring.tx_ptr, ring.rx_ptr, ring_info->len, ring_info->tx_ptr);
801 
802     if ( ring.rx_ptr == ring.tx_ptr )
803         sp = ring_info->len;
804     else
805     {
806         sp = ring.rx_ptr - ring.tx_ptr;
807         if ( sp < 0 )
808             sp += ring_info->len;
809     }
810 
811     /*
812      * Size bounds check against currently available space in the ring.
813      * Again: the message must not fill the ring leaving no space remaining.
814      */
815     if ( (ROUNDUP_MESSAGE(len) +
816             sizeof(struct xen_argo_ring_message_header)) >= sp )
817     {
818         argo_dprintk("EAGAIN\n");
819         return -EAGAIN;
820     }
821 
822     mh.len = len + sizeof(struct xen_argo_ring_message_header);
823     mh.source.aport = src_id->aport;
824     mh.source.domain_id = src_id->domain_id;
825     mh.message_type = message_type;
826 
827     /*
828      * For this copy to the guest ring, tx_ptr is always 16-byte aligned
829      * and the message header is 16 bytes long.
830      */
831     BUILD_BUG_ON(
832         sizeof(struct xen_argo_ring_message_header) != ROUNDUP_MESSAGE(1));
833 
834     /*
835      * First data write into the destination ring: fixed size, message header.
836      * This cannot overrun because the available free space (value in 'sp')
837      * is checked above and must be at least this size.
838      */
839     ret = memcpy_to_guest_ring(d, ring_info,
840                                ring.tx_ptr + sizeof(xen_argo_ring_t),
841                                &mh, NULL_hnd, sizeof(mh));
842     if ( ret )
843     {
844         gprintk(XENLOG_ERR,
845                 "argo: failed to write message header to ring (vm%u:%x vm%u)\n",
846                 ring_info->id.domain_id, ring_info->id.aport,
847                 ring_info->id.partner_id);
848 
849         return ret;
850     }
851 
852     ring.tx_ptr += sizeof(mh);
853     if ( ring.tx_ptr == ring_info->len )
854         ring.tx_ptr = 0;
855 
856     for ( piov = iovs; niov--; piov++ )
857     {
858         XEN_GUEST_HANDLE(uint8) buf_hnd = piov->iov_hnd;
859         unsigned int iov_len = piov->iov_len;
860 
861         /* If no data is provided in this iov, moan and skip on to the next */
862         if ( !iov_len )
863         {
864             gprintk(XENLOG_WARNING,
865                     "argo: no data iov_len=0 iov_hnd=%p ring (vm%u:%x vm%u)\n",
866                     buf_hnd.p, ring_info->id.domain_id, ring_info->id.aport,
867                     ring_info->id.partner_id);
868 
869             continue;
870         }
871 
872         if ( unlikely(!guest_handle_okay(buf_hnd, iov_len)) )
873         {
874             gprintk(XENLOG_ERR,
875                     "argo: bad iov handle [%p, %u] (vm%u:%x vm%u)\n",
876                     buf_hnd.p, iov_len,
877                     ring_info->id.domain_id, ring_info->id.aport,
878                     ring_info->id.partner_id);
879 
880             return -EFAULT;
881         }
882 
883         sp = ring_info->len - ring.tx_ptr;
884 
885         /* Check: iov data size versus free space at the tail of the ring */
886         if ( iov_len > sp )
887         {
888             /*
889              * Second possible data write: ring-tail-wrap-write.
890              * Populate the ring tail and update the internal tx_ptr to handle
891              * wrapping at the end of ring.
892              * Size of data written here: sp
893              * which is the exact full amount of free space available at the
894              * tail of the ring, so this cannot overrun.
895              */
896             ret = memcpy_to_guest_ring(d, ring_info,
897                                        ring.tx_ptr + sizeof(xen_argo_ring_t),
898                                        NULL, buf_hnd, sp);
899             if ( ret )
900             {
901                 gprintk(XENLOG_ERR,
902                         "argo: failed to copy {%p, %d} (vm%u:%x vm%u)\n",
903                         buf_hnd.p, sp,
904                         ring_info->id.domain_id, ring_info->id.aport,
905                         ring_info->id.partner_id);
906 
907                 return ret;
908             }
909 
910             ring.tx_ptr = 0;
911             iov_len -= sp;
912             guest_handle_add_offset(buf_hnd, sp);
913 
914             ASSERT(iov_len <= ring_info->len);
915         }
916 
917         /*
918          * Third possible data write: all data remaining for this iov.
919          * Size of data written here: iov_len
920          *
921          * Case 1: if the ring-tail-wrap-write above was performed, then
922          *         iov_len has been decreased by 'sp' and ring.tx_ptr is zero.
923          *
924          *    We know from checking the result of iov_count:
925          *      len + sizeof(message_header) <= ring_info->len
926          *    We also know that len is the total of summing all iov_lens, so:
927          *       iov_len <= len
928          *    so by transitivity:
929          *       iov_len <= len <= (ring_info->len - sizeof(msgheader))
930          *    and therefore:
931          *       (iov_len + sizeof(msgheader) <= ring_info->len) &&
932          *       (ring.tx_ptr == 0)
933          *    so this write cannot overrun here.
934          *
935          * Case 2: ring-tail-wrap-write above was not performed
936          *    -> so iov_len is the guest-supplied value and: (iov_len <= sp)
937          *    ie. less than available space at the tail of the ring:
938          *        so this write cannot overrun.
939          */
940         ret = memcpy_to_guest_ring(d, ring_info,
941                                    ring.tx_ptr + sizeof(xen_argo_ring_t),
942                                    NULL, buf_hnd, iov_len);
943         if ( ret )
944         {
945             gprintk(XENLOG_ERR,
946                     "argo: failed to copy [%p, %u] (vm%u:%x vm%u)\n",
947                     buf_hnd.p, iov_len, ring_info->id.domain_id,
948                     ring_info->id.aport, ring_info->id.partner_id);
949 
950             return ret;
951         }
952 
953         ring.tx_ptr += iov_len;
954 
955         if ( ring.tx_ptr == ring_info->len )
956             ring.tx_ptr = 0;
957     }
958 
959     /*
960      * Finished writing data from all iovs into the ring: now need to round up
961      * tx_ptr to align to the next message boundary, and then wrap if necessary.
962      */
963     ring.tx_ptr = ROUNDUP_MESSAGE(ring.tx_ptr);
964 
965     if ( ring.tx_ptr >= ring_info->len )
966         ring.tx_ptr -= ring_info->len;
967 
968     update_tx_ptr(d, ring_info, ring.tx_ptr);
969 
970     /*
971      * At this point (and also on an error exit paths from this function) it is
972      * possible to unmap the ring_info, ie:
973      *   ring_unmap(d, ring_info);
974      * but performance should be improved by not doing so, and retaining
975      * the mapping.
976      * An XSM policy control over level of confidentiality required
977      * versus performance cost could be added to decide that here.
978      */
979 
980     return ret;
981 }
982 
983 static void
wildcard_pending_list_remove(domid_t domain_id,struct pending_ent * ent)984 wildcard_pending_list_remove(domid_t domain_id, struct pending_ent *ent)
985 {
986     struct domain *d = get_domain_by_id(domain_id);
987 
988     if ( !d )
989         return;
990 
991     ASSERT(LOCKING_Read_L1);
992 
993     if ( d->argo )
994     {
995         spin_lock(&d->argo->wildcard_L2_lock);
996         list_del(&ent->wildcard_node);
997         spin_unlock(&d->argo->wildcard_L2_lock);
998     }
999     put_domain(d);
1000 }
1001 
1002 static void
wildcard_pending_list_insert(domid_t domain_id,struct pending_ent * ent)1003 wildcard_pending_list_insert(domid_t domain_id, struct pending_ent *ent)
1004 {
1005     struct domain *d = get_domain_by_id(domain_id);
1006 
1007     if ( !d )
1008         return;
1009 
1010     ASSERT(LOCKING_Read_L1);
1011 
1012     if ( d->argo )
1013     {
1014         spin_lock(&d->argo->wildcard_L2_lock);
1015         list_add(&ent->wildcard_node, &d->argo->wildcard_pend_list);
1016         spin_unlock(&d->argo->wildcard_L2_lock);
1017     }
1018     put_domain(d);
1019 }
1020 
1021 static void
pending_remove_all(const struct domain * d,struct argo_ring_info * ring_info)1022 pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info)
1023 {
1024     struct pending_ent *ent;
1025 
1026     ASSERT(LOCKING_L3(d, ring_info));
1027 
1028     /* Delete all pending notifications from this ring's list. */
1029     while ( (ent = list_first_entry_or_null(&ring_info->pending,
1030                                             struct pending_ent, node)) )
1031     {
1032         /* For wildcard rings, remove each from their wildcard list too. */
1033         if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1034             wildcard_pending_list_remove(ent->domain_id, ent);
1035         list_del(&ent->node);
1036         xfree(ent);
1037     }
1038     ring_info->npending = 0;
1039 }
1040 
1041 static void
pending_notify(struct list_head * to_notify)1042 pending_notify(struct list_head *to_notify)
1043 {
1044     struct pending_ent *ent;
1045 
1046     ASSERT(LOCKING_Read_L1);
1047 
1048     /* Sending signals for all ents in this list, draining until it is empty. */
1049     while ( (ent = list_first_entry_or_null(to_notify, struct pending_ent,
1050                                             node)) )
1051     {
1052         list_del(&ent->node);
1053         signal_domid(ent->domain_id);
1054         xfree(ent);
1055     }
1056 }
1057 
1058 static void
pending_find(const struct domain * d,struct argo_ring_info * ring_info,unsigned int payload_space,struct list_head * to_notify)1059 pending_find(const struct domain *d, struct argo_ring_info *ring_info,
1060              unsigned int payload_space, struct list_head *to_notify)
1061 {
1062     struct pending_ent *ent, *next;
1063 
1064     ASSERT(LOCKING_Read_rings_L2(d));
1065 
1066     /*
1067      * TODO: Current policy here is to signal _all_ of the waiting domains
1068      *       interested in sending a message of size less than payload_space.
1069      *
1070      * This is likely to be suboptimal, since once one of them has added
1071      * their message to the ring, there may well be insufficient room
1072      * available for any of the others to transmit, meaning that they were
1073      * woken in vain, which created extra work just to requeue their wait.
1074      *
1075      * Retain this simple policy for now since it at least avoids starving a
1076      * domain of available space notifications because of a policy that only
1077      * notified other domains instead. Improvement may be possible;
1078      * investigation required.
1079      */
1080     spin_lock(&ring_info->L3_lock);
1081 
1082     /* Remove matching ents from the ring list, and add them to "to_notify" */
1083     list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1084     {
1085         if ( payload_space >= ent->len )
1086         {
1087             if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1088                 wildcard_pending_list_remove(ent->domain_id, ent);
1089 
1090             list_del(&ent->node);
1091             ring_info->npending--;
1092             list_add(&ent->node, to_notify);
1093         }
1094     }
1095 
1096     spin_unlock(&ring_info->L3_lock);
1097 }
1098 
1099 static int
pending_queue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1100 pending_queue(const struct domain *d, struct argo_ring_info *ring_info,
1101               domid_t src_id, unsigned int len)
1102 {
1103     struct pending_ent *ent;
1104 
1105     ASSERT(LOCKING_L3(d, ring_info));
1106 
1107     if ( ring_info->npending >= MAX_PENDING_PER_RING )
1108         return -EBUSY;
1109 
1110     ent = xmalloc(struct pending_ent);
1111     if ( !ent )
1112         return -ENOMEM;
1113 
1114     ent->len = len;
1115     ent->domain_id = src_id;
1116     ent->ring_info = ring_info;
1117 
1118     if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1119         wildcard_pending_list_insert(src_id, ent);
1120     list_add(&ent->node, &ring_info->pending);
1121     ring_info->npending++;
1122 
1123     return 0;
1124 }
1125 
1126 static int
pending_requeue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1127 pending_requeue(const struct domain *d, struct argo_ring_info *ring_info,
1128                 domid_t src_id, unsigned int len)
1129 {
1130     struct pending_ent *ent;
1131 
1132     ASSERT(LOCKING_L3(d, ring_info));
1133 
1134     /* List structure is not modified here. Update len in a match if found. */
1135     list_for_each_entry(ent, &ring_info->pending, node)
1136     {
1137         if ( ent->domain_id == src_id )
1138         {
1139             /*
1140              * Reuse an existing queue entry for a notification rather than add
1141              * another. If the existing entry is waiting for a smaller size than
1142              * the current message then adjust the record to wait for the
1143              * current (larger) size to be available before triggering a
1144              * notification.
1145              * This assists the waiting sender by ensuring that whenever a
1146              * notification is triggered, there is sufficient space available
1147              * for (at least) any one of the messages awaiting transmission.
1148              */
1149             if ( ent->len < len )
1150                 ent->len = len;
1151 
1152             return 0;
1153         }
1154     }
1155 
1156     return pending_queue(d, ring_info, src_id, len);
1157 }
1158 
1159 static void
pending_cancel(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id)1160 pending_cancel(const struct domain *d, struct argo_ring_info *ring_info,
1161                domid_t src_id)
1162 {
1163     struct pending_ent *ent, *next;
1164 
1165     ASSERT(LOCKING_L3(d, ring_info));
1166 
1167     /* Remove all ents where domain_id matches src_id from the ring's list. */
1168     list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1169     {
1170         if ( ent->domain_id == src_id )
1171         {
1172             /* For wildcard rings, remove each from their wildcard list too. */
1173             if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1174                 wildcard_pending_list_remove(ent->domain_id, ent);
1175             list_del(&ent->node);
1176             xfree(ent);
1177             ring_info->npending--;
1178         }
1179     }
1180 }
1181 
1182 static void
wildcard_rings_pending_remove(struct domain * d)1183 wildcard_rings_pending_remove(struct domain *d)
1184 {
1185     struct pending_ent *ent;
1186 
1187     ASSERT(LOCKING_Write_L1);
1188 
1189     /* Delete all pending signals to the domain about wildcard rings. */
1190     while ( (ent = list_first_entry_or_null(&d->argo->wildcard_pend_list,
1191                                             struct pending_ent, node)) )
1192     {
1193         /*
1194          * The ent->node deleted here, and the npending value decreased,
1195          * belong to the ring_info of another domain, which is why this
1196          * function requires holding W(L1):
1197          * it implies the L3 lock that protects that ring_info struct.
1198          */
1199         ent->ring_info->npending--;
1200         list_del(&ent->node);
1201         list_del(&ent->wildcard_node);
1202         xfree(ent);
1203     }
1204 }
1205 
1206 static void
ring_remove_mfns(const struct domain * d,struct argo_ring_info * ring_info)1207 ring_remove_mfns(const struct domain *d, struct argo_ring_info *ring_info)
1208 {
1209     unsigned int i;
1210 
1211     ASSERT(LOCKING_Write_rings_L2(d));
1212 
1213     if ( !ring_info->mfns )
1214         return;
1215 
1216     if ( !ring_info->mfn_mapping )
1217     {
1218         ASSERT_UNREACHABLE();
1219         return;
1220     }
1221 
1222     ring_unmap(d, ring_info);
1223 
1224     for ( i = 0; i < ring_info->nmfns; i++ )
1225         if ( !mfn_eq(ring_info->mfns[i], INVALID_MFN) )
1226             put_page_and_type(mfn_to_page(ring_info->mfns[i]));
1227 
1228     ring_info->nmfns = 0;
1229     XFREE(ring_info->mfns);
1230     XFREE(ring_info->mfn_mapping);
1231 }
1232 
1233 static void
ring_remove_info(const struct domain * d,struct argo_ring_info * ring_info)1234 ring_remove_info(const struct domain *d, struct argo_ring_info *ring_info)
1235 {
1236     ASSERT(LOCKING_Write_rings_L2(d));
1237 
1238     pending_remove_all(d, ring_info);
1239     list_del(&ring_info->node);
1240     ring_remove_mfns(d, ring_info);
1241     xfree(ring_info);
1242 }
1243 
1244 static void
domain_rings_remove_all(struct domain * d)1245 domain_rings_remove_all(struct domain *d)
1246 {
1247     unsigned int i;
1248 
1249     ASSERT(LOCKING_Write_rings_L2(d));
1250 
1251     for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1252     {
1253         struct argo_ring_info *ring_info;
1254         struct list_head *bucket = &d->argo->ring_hash[i];
1255 
1256         while ( (ring_info = list_first_entry_or_null(bucket,
1257                                                       struct argo_ring_info,
1258                                                       node)) )
1259             ring_remove_info(d, ring_info);
1260     }
1261     d->argo->ring_count = 0;
1262 }
1263 
1264 /*
1265  * Tear down all rings of other domains where src_d domain is the partner.
1266  * (ie. it is the single domain that can send to those rings.)
1267  * This will also cancel any pending notifications about those rings.
1268  */
1269 static void
partner_rings_remove(struct domain * src_d)1270 partner_rings_remove(struct domain *src_d)
1271 {
1272     unsigned int i;
1273 
1274     ASSERT(LOCKING_Write_L1);
1275 
1276     for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1277     {
1278         struct argo_send_info *send_info;
1279         struct list_head *bucket = &src_d->argo->send_hash[i];
1280 
1281         /* Remove all ents from the send list. Take each off their ring list. */
1282         while ( (send_info = list_first_entry_or_null(bucket,
1283                                                       struct argo_send_info,
1284                                                       node)) )
1285         {
1286             struct domain *dst_d = get_domain_by_id(send_info->id.domain_id);
1287 
1288             if ( dst_d && dst_d->argo )
1289             {
1290                 struct argo_ring_info *ring_info =
1291                     find_ring_info(dst_d, &send_info->id);
1292 
1293                 if ( ring_info )
1294                 {
1295                     ring_remove_info(dst_d, ring_info);
1296                     dst_d->argo->ring_count--;
1297                 }
1298                 else
1299                     ASSERT_UNREACHABLE();
1300             }
1301             else
1302                 ASSERT_UNREACHABLE();
1303 
1304             if ( dst_d )
1305                 put_domain(dst_d);
1306 
1307             list_del(&send_info->node);
1308             xfree(send_info);
1309         }
1310     }
1311 }
1312 
1313 static int
fill_ring_data(const struct domain * currd,XEN_GUEST_HANDLE (xen_argo_ring_data_ent_t)data_ent_hnd)1314 fill_ring_data(const struct domain *currd,
1315                XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) data_ent_hnd)
1316 {
1317     xen_argo_ring_data_ent_t ent;
1318     struct domain *dst_d;
1319     struct argo_ring_info *ring_info;
1320     int ret = 0;
1321 
1322     ASSERT(currd == current->domain);
1323     ASSERT(LOCKING_Read_L1);
1324 
1325     if ( __copy_from_guest(&ent, data_ent_hnd, 1) )
1326         return -EFAULT;
1327 
1328     argo_dprintk("fill_ring_data: ent.ring.domain=%u,ent.ring.aport=%x\n",
1329                  ent.ring.domain_id, ent.ring.aport);
1330 
1331     ent.flags = 0;
1332 
1333     dst_d = get_domain_by_id(ent.ring.domain_id);
1334     if ( !dst_d || !dst_d->argo )
1335         goto out;
1336 
1337     /*
1338      * Don't supply information about rings that a guest is not
1339      * allowed to send to.
1340      */
1341     ret = xsm_argo_send(currd, dst_d);
1342     if ( ret )
1343     {
1344         put_domain(dst_d);
1345         return ret;
1346     }
1347 
1348     read_lock(&dst_d->argo->rings_L2_rwlock);
1349 
1350     ring_info = find_ring_info_by_match(dst_d, ent.ring.aport,
1351                                         currd->domain_id);
1352     if ( ring_info )
1353     {
1354         unsigned int space_avail;
1355 
1356         ent.flags |= XEN_ARGO_RING_EXISTS;
1357 
1358         spin_lock(&ring_info->L3_lock);
1359 
1360         ent.max_message_size = ring_info->len -
1361                                    sizeof(struct xen_argo_ring_message_header) -
1362                                    ROUNDUP_MESSAGE(1);
1363 
1364         if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1365             ent.flags |= XEN_ARGO_RING_SHARED;
1366 
1367         space_avail = ringbuf_payload_space(dst_d, ring_info);
1368 
1369         argo_dprintk("fill_ring_data: aport=%x space_avail=%u"
1370                      " space_wanted=%u\n",
1371                      ring_info->id.aport, space_avail, ent.space_required);
1372 
1373         /* Do not queue a notification for an unachievable size */
1374         if ( ent.space_required > ent.max_message_size )
1375             ent.flags |= XEN_ARGO_RING_EMSGSIZE;
1376         else if ( space_avail >= ent.space_required )
1377         {
1378             pending_cancel(dst_d, ring_info, currd->domain_id);
1379             ent.flags |= XEN_ARGO_RING_SUFFICIENT;
1380         }
1381         else
1382         {
1383             ret = pending_requeue(dst_d, ring_info, currd->domain_id,
1384                                   ent.space_required);
1385             if ( ret == -EBUSY )
1386             {
1387                 /*
1388                  * Too many other domains are already awaiting notification
1389                  * about available space on this ring. Indicate this state via
1390                  * flag. No need to return an error to the caller; allow the
1391                  * processing of queries about other rings to continue.
1392                  */
1393                 ent.flags |= XEN_ARGO_RING_EBUSY;
1394                 ret = 0;
1395             }
1396         }
1397 
1398         spin_unlock(&ring_info->L3_lock);
1399 
1400         if ( space_avail == ent.max_message_size )
1401             ent.flags |= XEN_ARGO_RING_EMPTY;
1402 
1403     }
1404     read_unlock(&dst_d->argo->rings_L2_rwlock);
1405 
1406  out:
1407     if ( dst_d )
1408         put_domain(dst_d);
1409 
1410     if ( !ret && (__copy_field_to_guest(data_ent_hnd, &ent, flags) ||
1411                   __copy_field_to_guest(data_ent_hnd, &ent, max_message_size)) )
1412         return -EFAULT;
1413 
1414     return ret;
1415 }
1416 
1417 static int
find_ring_mfn(struct domain * d,gfn_t gfn,mfn_t * mfn)1418 find_ring_mfn(struct domain *d, gfn_t gfn, mfn_t *mfn)
1419 {
1420     struct page_info *page;
1421     p2m_type_t p2mt;
1422     int ret;
1423 
1424     ret = check_get_page_from_gfn(d, gfn, false, &p2mt, &page);
1425     if ( unlikely(ret) )
1426         return ret;
1427 
1428     *mfn = page_to_mfn(page);
1429     if ( !mfn_valid(*mfn) )
1430         ret = -EINVAL;
1431 #ifdef CONFIG_X86
1432     else if ( p2mt == p2m_ram_logdirty )
1433         ret = -EAGAIN;
1434 #endif
1435     else if ( (p2mt != p2m_ram_rw) ||
1436               !get_page_and_type(page, d, PGT_writable_page) )
1437         ret = -EINVAL;
1438 
1439     put_page(page);
1440 
1441     return ret;
1442 }
1443 
1444 static int
find_ring_mfns(struct domain * d,struct argo_ring_info * ring_info,const unsigned int npage,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,const unsigned int len)1445 find_ring_mfns(struct domain *d, struct argo_ring_info *ring_info,
1446                const unsigned int npage,
1447                XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1448                const unsigned int len)
1449 {
1450     unsigned int i;
1451     int ret = 0;
1452     mfn_t *mfns;
1453     void **mfn_mapping;
1454 
1455     ASSERT(LOCKING_Write_rings_L2(d));
1456 
1457     if ( ring_info->mfns )
1458     {
1459         /* Ring already existed: drop the previous mapping. */
1460         argo_dprintk("argo: vm%u re-register existing ring "
1461                      "(vm%u:%x vm%u) clears mapping\n",
1462                      d->domain_id, ring_info->id.domain_id,
1463                      ring_info->id.aport, ring_info->id.partner_id);
1464 
1465         ring_remove_mfns(d, ring_info);
1466         ASSERT(!ring_info->mfns);
1467     }
1468 
1469     mfns = xmalloc_array(mfn_t, npage);
1470     if ( !mfns )
1471         return -ENOMEM;
1472 
1473     for ( i = 0; i < npage; i++ )
1474         mfns[i] = INVALID_MFN;
1475 
1476     mfn_mapping = xzalloc_array(void *, npage);
1477     if ( !mfn_mapping )
1478     {
1479         xfree(mfns);
1480         return -ENOMEM;
1481     }
1482 
1483     ring_info->mfns = mfns;
1484     ring_info->mfn_mapping = mfn_mapping;
1485 
1486     for ( i = 0; i < npage; i++ )
1487     {
1488         mfn_t mfn;
1489         xen_argo_gfn_t argo_gfn;
1490 
1491         ret = __copy_from_guest_offset(&argo_gfn, gfn_hnd, i, 1) ? -EFAULT : 0;
1492         if ( ret )
1493             break;
1494 
1495         ret = find_ring_mfn(d, _gfn(argo_gfn), &mfn);
1496         if ( ret )
1497         {
1498             gprintk(XENLOG_ERR, "argo: vm%u: invalid gfn %"PRI_gfn" "
1499                     "r:(vm%u:%x vm%u) %p %u/%u\n",
1500                     d->domain_id, gfn_x(_gfn(argo_gfn)),
1501                     ring_info->id.domain_id, ring_info->id.aport,
1502                     ring_info->id.partner_id, ring_info, i, npage);
1503             break;
1504         }
1505 
1506         ring_info->mfns[i] = mfn;
1507 
1508         argo_dprintk("%u: %"PRI_gfn" -> %"PRI_mfn"\n",
1509                      i, gfn_x(_gfn(argo_gfn)), mfn_x(ring_info->mfns[i]));
1510     }
1511 
1512     ring_info->nmfns = i;
1513 
1514     if ( ret )
1515         ring_remove_mfns(d, ring_info);
1516     else
1517     {
1518         ASSERT(ring_info->nmfns == NPAGES_RING(len));
1519 
1520         argo_dprintk("argo: vm%u ring (vm%u:%x vm%u) %p "
1521                      "mfn_mapping %p len %u nmfns %u\n",
1522                      d->domain_id, ring_info->id.domain_id,
1523                      ring_info->id.aport, ring_info->id.partner_id, ring_info,
1524                      ring_info->mfn_mapping, ring_info->len, ring_info->nmfns);
1525     }
1526 
1527     return ret;
1528 }
1529 
1530 static long
unregister_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_unregister_ring_t)unreg_hnd)1531 unregister_ring(struct domain *currd,
1532                 XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd)
1533 {
1534     xen_argo_unregister_ring_t unreg;
1535     struct argo_ring_id ring_id;
1536     struct argo_ring_info *ring_info = NULL;
1537     struct argo_send_info *send_info = NULL;
1538     struct domain *dst_d = NULL;
1539 
1540     ASSERT(currd == current->domain);
1541 
1542     if ( copy_from_guest(&unreg, unreg_hnd, 1) )
1543         return -EFAULT;
1544 
1545     if ( unreg.pad )
1546         return -EINVAL;
1547 
1548     ring_id.partner_id = unreg.partner_id;
1549     ring_id.aport = unreg.aport;
1550     ring_id.domain_id = currd->domain_id;
1551 
1552     read_lock(&L1_global_argo_rwlock);
1553 
1554     if ( unlikely(!currd->argo) )
1555     {
1556         read_unlock(&L1_global_argo_rwlock);
1557         return -ENODEV;
1558     }
1559 
1560     write_lock(&currd->argo->rings_L2_rwlock);
1561 
1562     ring_info = find_ring_info(currd, &ring_id);
1563     if ( !ring_info )
1564         goto out;
1565 
1566     ring_remove_info(currd, ring_info);
1567     currd->argo->ring_count--;
1568 
1569     if ( ring_id.partner_id == XEN_ARGO_DOMID_ANY )
1570         goto out;
1571 
1572     dst_d = get_domain_by_id(ring_id.partner_id);
1573     if ( !dst_d || !dst_d->argo )
1574     {
1575         ASSERT_UNREACHABLE();
1576         goto out;
1577     }
1578 
1579     spin_lock(&dst_d->argo->send_L2_lock);
1580 
1581     send_info = find_send_info(dst_d, &ring_id);
1582     if ( send_info )
1583         list_del(&send_info->node);
1584     else
1585         ASSERT_UNREACHABLE();
1586 
1587     spin_unlock(&dst_d->argo->send_L2_lock);
1588 
1589  out:
1590     write_unlock(&currd->argo->rings_L2_rwlock);
1591 
1592     read_unlock(&L1_global_argo_rwlock);
1593 
1594     if ( dst_d )
1595         put_domain(dst_d);
1596 
1597     xfree(send_info);
1598 
1599     if ( !ring_info )
1600     {
1601         argo_dprintk("unregister_ring: no ring_info found for ring(%u:%x %u)\n",
1602                      ring_id.domain_id, ring_id.aport, ring_id.partner_id);
1603         return -ENOENT;
1604     }
1605 
1606     return 0;
1607 }
1608 
1609 static long
register_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_register_ring_t)reg_hnd,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,unsigned int npage,unsigned int flags)1610 register_ring(struct domain *currd,
1611               XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd,
1612               XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1613               unsigned int npage, unsigned int flags)
1614 {
1615     xen_argo_register_ring_t reg;
1616     struct argo_ring_id ring_id;
1617     void *map_ringp;
1618     xen_argo_ring_t *ringp;
1619     struct argo_ring_info *ring_info, *new_ring_info = NULL;
1620     struct argo_send_info *send_info = NULL;
1621     struct domain *dst_d = NULL;
1622     int ret = 0;
1623     unsigned int private_tx_ptr;
1624 
1625     ASSERT(currd == current->domain);
1626 
1627     /* flags: reserve currently-undefined bits, require zero.  */
1628     if ( unlikely(flags & ~XEN_ARGO_REGISTER_FLAG_MASK) )
1629         return -EINVAL;
1630 
1631     if ( copy_from_guest(&reg, reg_hnd, 1) )
1632         return -EFAULT;
1633 
1634     /*
1635      * A ring must be large enough to transmit messages, so requires space for:
1636      * * 1 message header, plus
1637      * * 1 payload slot (payload is always rounded to a multiple of 16 bytes)
1638      *   for the message payload to be written into, plus
1639      * * 1 more slot, so that the ring cannot be filled to capacity with a
1640      *   single minimum-size message -- see the logic in ringbuf_insert --
1641      *   allowing for this ensures that there can be space remaining when a
1642      *   message is present.
1643      * The above determines the minimum acceptable ring size.
1644      */
1645     if ( (reg.len < (sizeof(struct xen_argo_ring_message_header)
1646                       + ROUNDUP_MESSAGE(1) + ROUNDUP_MESSAGE(1))) ||
1647          (reg.len > XEN_ARGO_MAX_RING_SIZE) ||
1648          (reg.len != ROUNDUP_MESSAGE(reg.len)) ||
1649          (NPAGES_RING(reg.len) != npage) ||
1650          (reg.pad != 0) )
1651         return -EINVAL;
1652 
1653     ring_id.partner_id = reg.partner_id;
1654     ring_id.aport = reg.aport;
1655     ring_id.domain_id = currd->domain_id;
1656 
1657     if ( reg.partner_id == XEN_ARGO_DOMID_ANY )
1658     {
1659         ret = opt_argo_mac_permissive ? xsm_argo_register_any_source(currd) :
1660                                         -EPERM;
1661         if ( ret )
1662             return ret;
1663     }
1664     else
1665     {
1666         dst_d = get_domain_by_id(reg.partner_id);
1667         if ( !dst_d )
1668         {
1669             argo_dprintk("!dst_d, ESRCH\n");
1670             return -ESRCH;
1671         }
1672 
1673         ret = xsm_argo_register_single_source(currd, dst_d);
1674         if ( ret )
1675             goto out;
1676 
1677         send_info = xzalloc(struct argo_send_info);
1678         if ( !send_info )
1679         {
1680             ret = -ENOMEM;
1681             goto out;
1682         }
1683         send_info->id = ring_id;
1684     }
1685 
1686     /*
1687      * Common case is that the ring doesn't already exist, so do the alloc here
1688      * before picking up any locks.
1689      */
1690     new_ring_info = xzalloc(struct argo_ring_info);
1691     if ( !new_ring_info )
1692     {
1693         ret = -ENOMEM;
1694         goto out;
1695     }
1696 
1697     read_lock(&L1_global_argo_rwlock);
1698 
1699     if ( !currd->argo )
1700     {
1701         ret = -ENODEV;
1702         goto out_unlock;
1703     }
1704 
1705     if ( dst_d && !dst_d->argo )
1706     {
1707         argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
1708         ret = -ECONNREFUSED;
1709         goto out_unlock;
1710     }
1711 
1712     write_lock(&currd->argo->rings_L2_rwlock);
1713 
1714     if ( currd->argo->ring_count >= MAX_RINGS_PER_DOMAIN )
1715     {
1716         ret = -ENOSPC;
1717         goto out_unlock2;
1718     }
1719 
1720     ring_info = find_ring_info(currd, &ring_id);
1721     if ( !ring_info )
1722     {
1723         ring_info = new_ring_info;
1724         new_ring_info = NULL;
1725 
1726         spin_lock_init(&ring_info->L3_lock);
1727 
1728         ring_info->id = ring_id;
1729         INIT_LIST_HEAD(&ring_info->pending);
1730 
1731         list_add(&ring_info->node,
1732                  &currd->argo->ring_hash[hash_index(&ring_info->id)]);
1733 
1734         argo_dprintk("argo: vm%u registering ring (vm%u:%x vm%u)\n",
1735                      currd->domain_id, ring_id.domain_id, ring_id.aport,
1736                      ring_id.partner_id);
1737     }
1738     else if ( ring_info->len )
1739     {
1740         /*
1741          * If the caller specified that the ring must not already exist,
1742          * fail at attempt to add a completed ring which already exists.
1743          */
1744         if ( flags & XEN_ARGO_REGISTER_FLAG_FAIL_EXIST )
1745         {
1746             gprintk(XENLOG_ERR, "argo: vm%u disallowed reregistration of "
1747                     "existing ring (vm%u:%x vm%u)\n",
1748                     currd->domain_id, ring_id.domain_id, ring_id.aport,
1749                     ring_id.partner_id);
1750             ret = -EEXIST;
1751             goto out_unlock2;
1752         }
1753 
1754         if ( ring_info->len != reg.len )
1755         {
1756             /*
1757              * Change of ring size could result in entries on the pending
1758              * notifications list that will never trigger.
1759              * Simple blunt solution: disallow ring resize for now.
1760              * TODO: investigate enabling ring resize.
1761              */
1762             gprintk(XENLOG_ERR, "argo: vm%u attempted to change ring size "
1763                     "(vm%u:%x vm%u)\n",
1764                     currd->domain_id, ring_id.domain_id, ring_id.aport,
1765                     ring_id.partner_id);
1766             /*
1767              * Could return EINVAL here, but if the ring didn't already
1768              * exist then the arguments would have been valid, so: EEXIST.
1769              */
1770             ret = -EEXIST;
1771             goto out_unlock2;
1772         }
1773 
1774         argo_dprintk("argo: vm%u re-registering existing ring (vm%u:%x vm%u)\n",
1775                      currd->domain_id, ring_id.domain_id, ring_id.aport,
1776                      ring_id.partner_id);
1777     }
1778 
1779     ret = find_ring_mfns(currd, ring_info, npage, gfn_hnd, reg.len);
1780     if ( ret )
1781     {
1782         gprintk(XENLOG_ERR,
1783                 "argo: vm%u failed to find ring mfns (vm%u:%x vm%u)\n",
1784                 currd->domain_id, ring_id.domain_id, ring_id.aport,
1785                 ring_id.partner_id);
1786 
1787         ring_remove_info(currd, ring_info);
1788         goto out_unlock2;
1789     }
1790 
1791     /*
1792      * The first page of the memory supplied for the ring has the xen_argo_ring
1793      * structure at its head, which is where the ring indexes reside.
1794      */
1795     ret = ring_map_page(currd, ring_info, 0, &map_ringp);
1796     if ( ret )
1797     {
1798         gprintk(XENLOG_ERR,
1799                 "argo: vm%u failed to map ring mfn 0 (vm%u:%x vm%u)\n",
1800                 currd->domain_id, ring_id.domain_id, ring_id.aport,
1801                 ring_id.partner_id);
1802 
1803         ring_remove_info(currd, ring_info);
1804         goto out_unlock2;
1805     }
1806     ringp = map_ringp;
1807 
1808     private_tx_ptr = read_atomic(&ringp->tx_ptr);
1809 
1810     if ( (private_tx_ptr >= reg.len) ||
1811          (ROUNDUP_MESSAGE(private_tx_ptr) != private_tx_ptr) )
1812     {
1813         /*
1814          * Since the ring is a mess, attempt to flush the contents of it
1815          * here by setting the tx_ptr to the next aligned message slot past
1816          * the latest rx_ptr we have observed. Handle ring wrap correctly.
1817          */
1818         private_tx_ptr = ROUNDUP_MESSAGE(read_atomic(&ringp->rx_ptr));
1819 
1820         if ( private_tx_ptr >= reg.len )
1821             private_tx_ptr = 0;
1822 
1823         update_tx_ptr(currd, ring_info, private_tx_ptr);
1824     }
1825 
1826     ring_info->tx_ptr = private_tx_ptr;
1827     ring_info->len = reg.len;
1828     currd->argo->ring_count++;
1829 
1830     if ( send_info )
1831     {
1832         spin_lock(&dst_d->argo->send_L2_lock);
1833 
1834         list_add(&send_info->node,
1835                  &dst_d->argo->send_hash[hash_index(&send_info->id)]);
1836 
1837         spin_unlock(&dst_d->argo->send_L2_lock);
1838     }
1839 
1840  out_unlock2:
1841     write_unlock(&currd->argo->rings_L2_rwlock);
1842 
1843  out_unlock:
1844     read_unlock(&L1_global_argo_rwlock);
1845 
1846  out:
1847     if ( dst_d )
1848         put_domain(dst_d);
1849 
1850     if ( ret )
1851         xfree(send_info);
1852 
1853     xfree(new_ring_info);
1854 
1855     return ret;
1856 }
1857 
1858 static void
notify_ring(const struct domain * d,struct argo_ring_info * ring_info,struct list_head * to_notify)1859 notify_ring(const struct domain *d, struct argo_ring_info *ring_info,
1860             struct list_head *to_notify)
1861 {
1862     unsigned int space;
1863 
1864     ASSERT(LOCKING_Read_rings_L2(d));
1865 
1866     spin_lock(&ring_info->L3_lock);
1867 
1868     if ( ring_info->len )
1869         space = ringbuf_payload_space(d, ring_info);
1870     else
1871         space = 0;
1872 
1873     spin_unlock(&ring_info->L3_lock);
1874 
1875     if ( space )
1876         pending_find(d, ring_info, space, to_notify);
1877 }
1878 
1879 static void
notify_check_pending(struct domain * d)1880 notify_check_pending(struct domain *d)
1881 {
1882     unsigned int i;
1883     LIST_HEAD(to_notify);
1884 
1885     ASSERT(LOCKING_Read_L1);
1886 
1887     read_lock(&d->argo->rings_L2_rwlock);
1888 
1889     /* Walk all rings, call notify_ring on each to populate to_notify list */
1890     for ( i = 0; i < ARGO_HASHTABLE_SIZE; i++ )
1891     {
1892         struct argo_ring_info *ring_info, *next;
1893         struct list_head *bucket = &d->argo->ring_hash[i];
1894 
1895         list_for_each_entry_safe(ring_info, next, bucket, node)
1896             notify_ring(d, ring_info, &to_notify);
1897     }
1898 
1899     read_unlock(&d->argo->rings_L2_rwlock);
1900 
1901     if ( !list_empty(&to_notify) )
1902         pending_notify(&to_notify);
1903 }
1904 
1905 static long
notify(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_ring_data_t)ring_data_hnd)1906 notify(struct domain *currd,
1907        XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd)
1908 {
1909     XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) ent_hnd;
1910     xen_argo_ring_data_t ring_data;
1911     int ret = 0;
1912 
1913     ASSERT(currd == current->domain);
1914 
1915     read_lock(&L1_global_argo_rwlock);
1916 
1917     if ( !currd->argo )
1918     {
1919         argo_dprintk("!d->argo, ENODEV\n");
1920         ret = -ENODEV;
1921         goto out;
1922     }
1923 
1924     notify_check_pending(currd);
1925 
1926     if ( guest_handle_is_null(ring_data_hnd) )
1927         goto out;
1928 
1929     ret = copy_from_guest(&ring_data, ring_data_hnd, 1) ? -EFAULT : 0;
1930     if ( ret )
1931         goto out;
1932 
1933     if ( ring_data.nent > MAX_NOTIFY_COUNT )
1934     {
1935         gprintk(XENLOG_ERR, "argo: notify entry count(%u) exceeds max(%u)\n",
1936                 ring_data.nent, MAX_NOTIFY_COUNT);
1937         ret = -EACCES;
1938         goto out;
1939     }
1940 
1941     ent_hnd = guest_handle_for_field(ring_data_hnd,
1942                                      xen_argo_ring_data_ent_t, data[0]);
1943     if ( unlikely(!guest_handle_okay(ent_hnd, ring_data.nent)) )
1944     {
1945         ret = -EFAULT;
1946         goto out;
1947     }
1948 
1949     while ( !ret && ring_data.nent-- )
1950     {
1951         ret = fill_ring_data(currd, ent_hnd);
1952         guest_handle_add_offset(ent_hnd, 1);
1953     }
1954 
1955  out:
1956     read_unlock(&L1_global_argo_rwlock);
1957 
1958     return ret;
1959 }
1960 
1961 static long
sendv(struct domain * src_d,xen_argo_addr_t * src_addr,const xen_argo_addr_t * dst_addr,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type)1962 sendv(struct domain *src_d, xen_argo_addr_t *src_addr,
1963       const xen_argo_addr_t *dst_addr, xen_argo_iov_t *iovs, unsigned int niov,
1964       uint32_t message_type)
1965 {
1966     struct domain *dst_d = NULL;
1967     struct argo_ring_id src_id;
1968     struct argo_ring_info *ring_info;
1969     int ret = 0;
1970     unsigned int len = 0;
1971 
1972     argo_dprintk("sendv: (%u:%x)->(%u:%x) niov:%u type:%x\n",
1973                  src_addr->domain_id, src_addr->aport, dst_addr->domain_id,
1974                  dst_addr->aport, niov, message_type);
1975 
1976     /* Check padding is zeroed. */
1977     if ( unlikely(src_addr->pad || dst_addr->pad) )
1978         return -EINVAL;
1979 
1980     if ( src_addr->domain_id == XEN_ARGO_DOMID_ANY )
1981          src_addr->domain_id = src_d->domain_id;
1982 
1983     /* No domain is currently authorized to send on behalf of another */
1984     if ( unlikely(src_addr->domain_id != src_d->domain_id) )
1985         return -EPERM;
1986 
1987     src_id.aport = src_addr->aport;
1988     src_id.domain_id = src_d->domain_id;
1989     src_id.partner_id = dst_addr->domain_id;
1990 
1991     dst_d = get_domain_by_id(dst_addr->domain_id);
1992     if ( !dst_d )
1993         return -ESRCH;
1994 
1995     ret = xsm_argo_send(src_d, dst_d);
1996     if ( ret )
1997     {
1998         gprintk(XENLOG_ERR, "argo: XSM REJECTED %i -> %i\n",
1999                 src_d->domain_id, dst_d->domain_id);
2000 
2001         put_domain(dst_d);
2002 
2003         return ret;
2004     }
2005 
2006     read_lock(&L1_global_argo_rwlock);
2007 
2008     if ( !src_d->argo )
2009     {
2010         ret = -ENODEV;
2011         goto out_unlock;
2012     }
2013 
2014     if ( !dst_d->argo )
2015     {
2016         argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
2017         ret = -ECONNREFUSED;
2018         goto out_unlock;
2019     }
2020 
2021     read_lock(&dst_d->argo->rings_L2_rwlock);
2022 
2023     ring_info = find_ring_info_by_match(dst_d, dst_addr->aport,
2024                                         src_id.domain_id);
2025     if ( !ring_info )
2026     {
2027         gprintk(XENLOG_ERR,
2028                 "argo: vm%u connection refused, src (vm%u:%x) dst (vm%u:%x)\n",
2029                 current->domain->domain_id, src_id.domain_id, src_id.aport,
2030                 dst_addr->domain_id, dst_addr->aport);
2031 
2032         ret = -ECONNREFUSED;
2033     }
2034     else
2035     {
2036         spin_lock(&ring_info->L3_lock);
2037 
2038         /*
2039          * Obtain the total size of data to transmit -- sets the 'len' variable
2040          * -- and sanity check that the iovs conform to size and number limits.
2041          */
2042         ret = iov_count(iovs, niov, &len);
2043         if ( !ret )
2044         {
2045             ret = ringbuf_insert(dst_d, ring_info, &src_id, iovs, niov,
2046                                  message_type, len);
2047             if ( ret == -EAGAIN )
2048             {
2049                 int rc;
2050 
2051                 argo_dprintk("argo_ringbuf_sendv failed, EAGAIN\n");
2052                 /* requeue to issue a notification when space is there */
2053                 rc = pending_requeue(dst_d, ring_info, src_id.domain_id, len);
2054                 if ( rc )
2055                     ret = rc;
2056             }
2057         }
2058 
2059         spin_unlock(&ring_info->L3_lock);
2060     }
2061 
2062     read_unlock(&dst_d->argo->rings_L2_rwlock);
2063 
2064  out_unlock:
2065     read_unlock(&L1_global_argo_rwlock);
2066 
2067     if ( ret >= 0 )
2068         signal_domain(dst_d);
2069 
2070     if ( dst_d )
2071         put_domain(dst_d);
2072 
2073     return ( ret < 0 ) ? ret : len;
2074 }
2075 
2076 long
do_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long raw_arg3,unsigned long raw_arg4)2077 do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2078            XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long raw_arg3,
2079            unsigned long raw_arg4)
2080 {
2081     struct domain *currd = current->domain;
2082     long rc;
2083     unsigned int arg3 = raw_arg3, arg4 = raw_arg4;
2084 
2085     argo_dprintk("->do_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2086                  (void *)arg1.p, (void *)arg2.p, raw_arg3, raw_arg4);
2087 
2088     /* Reject numeric hypercall args outside 32-bit range */
2089     if ( (arg3 != raw_arg3) || (arg4 != raw_arg4) )
2090         return -EINVAL;
2091 
2092     if ( unlikely(!opt_argo) )
2093         return -EOPNOTSUPP;
2094 
2095     rc = xsm_argo_enable(currd);
2096     if ( rc )
2097         return rc;
2098 
2099     switch ( cmd )
2100     {
2101     case XEN_ARGO_OP_register_ring:
2102     {
2103         XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd =
2104             guest_handle_cast(arg1, xen_argo_register_ring_t);
2105         XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd =
2106             guest_handle_cast(arg2, xen_argo_gfn_t);
2107         /* arg3: npage, arg4: flags */
2108 
2109         BUILD_BUG_ON(!IS_ALIGNED(XEN_ARGO_MAX_RING_SIZE, PAGE_SIZE));
2110 
2111         if ( unlikely(arg3 > (XEN_ARGO_MAX_RING_SIZE >> PAGE_SHIFT)) )
2112         {
2113             rc = -EINVAL;
2114             break;
2115         }
2116 
2117         /* Check array to allow use of the faster __copy operations later */
2118         if ( unlikely(!guest_handle_okay(gfn_hnd, arg3)) )
2119         {
2120             rc = -EFAULT;
2121             break;
2122         }
2123 
2124         rc = register_ring(currd, reg_hnd, gfn_hnd, arg3, arg4);
2125         break;
2126     }
2127 
2128     case XEN_ARGO_OP_unregister_ring:
2129     {
2130         XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd =
2131             guest_handle_cast(arg1, xen_argo_unregister_ring_t);
2132 
2133         if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2134         {
2135             rc = -EINVAL;
2136             break;
2137         }
2138 
2139         rc = unregister_ring(currd, unreg_hnd);
2140         break;
2141     }
2142 
2143     case XEN_ARGO_OP_sendv:
2144     {
2145         xen_argo_send_addr_t send_addr;
2146         xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2147         unsigned int niov;
2148 
2149         XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd =
2150             guest_handle_cast(arg1, xen_argo_send_addr_t);
2151         XEN_GUEST_HANDLE_PARAM(xen_argo_iov_t) iovs_hnd =
2152             guest_handle_cast(arg2, xen_argo_iov_t);
2153         /* arg3 is niov */
2154         /* arg4 is message_type. Must be a 32-bit value. */
2155 
2156         /* XEN_ARGO_MAXIOV value determines size of iov array on stack */
2157         BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2158 
2159         rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2160         if ( rc )
2161         {
2162             rc = -EFAULT;
2163             break;
2164         }
2165 
2166         /*
2167          * Reject niov above maximum limit or message_types that are outside
2168          * 32 bit range.
2169          */
2170         if ( unlikely((arg3 > XEN_ARGO_MAXIOV) || (arg4 != (uint32_t)arg4)) )
2171         {
2172             rc = -EINVAL;
2173             break;
2174         }
2175         niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2176 
2177         rc = copy_from_guest(iovs, iovs_hnd, niov) ? -EFAULT : 0;
2178         if ( rc )
2179         {
2180             rc = -EFAULT;
2181             break;
2182         }
2183 
2184         rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2185         break;
2186     }
2187 
2188     case XEN_ARGO_OP_notify:
2189     {
2190         XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd =
2191                    guest_handle_cast(arg1, xen_argo_ring_data_t);
2192 
2193         if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2194         {
2195             rc = -EINVAL;
2196             break;
2197         }
2198 
2199         rc = notify(currd, ring_data_hnd);
2200         break;
2201     }
2202 
2203     default:
2204         rc = -EOPNOTSUPP;
2205         break;
2206     }
2207 
2208     argo_dprintk("<-do_argo_op(%u)=%ld\n", cmd, rc);
2209 
2210     return rc;
2211 }
2212 
2213 #ifdef CONFIG_COMPAT
2214 long
compat_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long arg3,unsigned long arg4)2215 compat_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2216                XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long arg3,
2217                unsigned long arg4)
2218 {
2219     struct domain *currd = current->domain;
2220     long rc;
2221     xen_argo_send_addr_t send_addr;
2222     xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2223     compat_argo_iov_t compat_iovs[XEN_ARGO_MAXIOV];
2224     unsigned int i, niov;
2225     XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd;
2226 
2227     /* check XEN_ARGO_MAXIOV as it sizes stack arrays: iovs, compat_iovs */
2228     BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2229 
2230     /* Forward all ops besides sendv to the native handler. */
2231     if ( cmd != XEN_ARGO_OP_sendv )
2232         return do_argo_op(cmd, arg1, arg2, arg3, arg4);
2233 
2234     if ( unlikely(!opt_argo) )
2235         return -EOPNOTSUPP;
2236 
2237     rc = xsm_argo_enable(currd);
2238     if ( rc )
2239         return rc;
2240 
2241     argo_dprintk("->compat_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2242                  (void *)arg1.p, (void *)arg2.p, arg3, arg4);
2243 
2244     send_addr_hnd = guest_handle_cast(arg1, xen_argo_send_addr_t);
2245     /* arg2: iovs, arg3: niov, arg4: message_type */
2246 
2247     rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2248     if ( rc )
2249         goto out;
2250 
2251     if ( unlikely(arg3 > XEN_ARGO_MAXIOV) )
2252     {
2253         rc = -EINVAL;
2254         goto out;
2255     }
2256     niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2257 
2258     rc = copy_from_guest(compat_iovs, arg2, niov) ? -EFAULT : 0;
2259     if ( rc )
2260         goto out;
2261 
2262     for ( i = 0; i < niov; i++ )
2263     {
2264 #define XLAT_argo_iov_HNDL_iov_hnd(_d_, _s_) \
2265     guest_from_compat_handle((_d_)->iov_hnd, (_s_)->iov_hnd)
2266 
2267         XLAT_argo_iov(&iovs[i], &compat_iovs[i]);
2268 
2269 #undef XLAT_argo_iov_HNDL_iov_hnd
2270     }
2271 
2272     rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2273  out:
2274     argo_dprintk("<-compat_argo_op(%u)=%ld\n", cmd, rc);
2275 
2276     return rc;
2277 }
2278 #endif
2279 
2280 static void
argo_domain_init(struct argo_domain * argo)2281 argo_domain_init(struct argo_domain *argo)
2282 {
2283     unsigned int i;
2284 
2285     rwlock_init(&argo->rings_L2_rwlock);
2286     spin_lock_init(&argo->send_L2_lock);
2287     spin_lock_init(&argo->wildcard_L2_lock);
2288 
2289     for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
2290     {
2291         INIT_LIST_HEAD(&argo->ring_hash[i]);
2292         INIT_LIST_HEAD(&argo->send_hash[i]);
2293     }
2294     INIT_LIST_HEAD(&argo->wildcard_pend_list);
2295 }
2296 
2297 int
argo_init(struct domain * d)2298 argo_init(struct domain *d)
2299 {
2300     struct argo_domain *argo;
2301 
2302     if ( !opt_argo || xsm_argo_enable(d) )
2303     {
2304         argo_dprintk("argo disabled, domid: %u\n", d->domain_id);
2305         return 0;
2306     }
2307 
2308     argo_dprintk("init: domid: %u\n", d->domain_id);
2309 
2310     argo = xzalloc(struct argo_domain);
2311     if ( !argo )
2312         return -ENOMEM;
2313 
2314     argo_domain_init(argo);
2315 
2316     write_lock(&L1_global_argo_rwlock);
2317 
2318     d->argo = argo;
2319 
2320     write_unlock(&L1_global_argo_rwlock);
2321 
2322     return 0;
2323 }
2324 
2325 void
argo_destroy(struct domain * d)2326 argo_destroy(struct domain *d)
2327 {
2328     BUG_ON(!d->is_dying);
2329 
2330     write_lock(&L1_global_argo_rwlock);
2331 
2332     argo_dprintk("destroy: domid %u d->argo=%p\n", d->domain_id, d->argo);
2333 
2334     if ( d->argo )
2335     {
2336         domain_rings_remove_all(d);
2337         partner_rings_remove(d);
2338         wildcard_rings_pending_remove(d);
2339         XFREE(d->argo);
2340     }
2341 
2342     write_unlock(&L1_global_argo_rwlock);
2343 }
2344 
2345 void
argo_soft_reset(struct domain * d)2346 argo_soft_reset(struct domain *d)
2347 {
2348     write_lock(&L1_global_argo_rwlock);
2349 
2350     argo_dprintk("soft reset d=%u d->argo=%p\n", d->domain_id, d->argo);
2351 
2352     if ( d->argo )
2353     {
2354         domain_rings_remove_all(d);
2355         partner_rings_remove(d);
2356         wildcard_rings_pending_remove(d);
2357 
2358         /*
2359          * Since neither opt_argo or xsm_argo_enable(d) can change at runtime,
2360          * if d->argo is true then both opt_argo and xsm_argo_enable(d) must be
2361          * true, and we can assume that init is allowed to proceed again here.
2362          */
2363         argo_domain_init(d->argo);
2364     }
2365 
2366     write_unlock(&L1_global_argo_rwlock);
2367 }
2368