1 /******************************************************************************
2 * arch/x86/mm/mem_sharing.c
3 *
4 * Memory sharing support.
5 *
6 * Copyright (c) 2011 GridCentric, Inc. (Adin Scannell & Andres Lagar-Cavilla)
7 * Copyright (c) 2009 Citrix Systems, Inc. (Grzegorz Milos)
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include <xen/types.h>
24 #include <xen/domain_page.h>
25 #include <xen/event.h>
26 #include <xen/spinlock.h>
27 #include <xen/rwlock.h>
28 #include <xen/mm.h>
29 #include <xen/grant_table.h>
30 #include <xen/sched.h>
31 #include <xen/rcupdate.h>
32 #include <xen/guest_access.h>
33 #include <xen/vm_event.h>
34 #include <asm/page.h>
35 #include <asm/string.h>
36 #include <asm/p2m.h>
37 #include <asm/altp2m.h>
38 #include <asm/atomic.h>
39 #include <asm/event.h>
40 #include <asm/hap.h>
41 #include <asm/hvm/hvm.h>
42 #include <xsm/xsm.h>
43
44 #include <public/hvm/params.h>
45
46 #include "mm-locks.h"
47
48 static shr_handle_t next_handle = 1;
49
50 typedef struct pg_lock_data {
51 int mm_unlock_level;
52 unsigned short recurse_count;
53 } pg_lock_data_t;
54
55 static DEFINE_PER_CPU(pg_lock_data_t, __pld);
56
57 /* Reverse map defines */
58 #define RMAP_HASHTAB_ORDER 0
59 #define RMAP_HASHTAB_SIZE \
60 ((PAGE_SIZE << RMAP_HASHTAB_ORDER) / sizeof(struct list_head))
61 #define RMAP_USES_HASHTAB(page) \
62 ((page)->sharing->hash_table.flag == NULL)
63 #define RMAP_HEAVY_SHARED_PAGE RMAP_HASHTAB_SIZE
64 /*
65 * A bit of hysteresis. We don't want to be mutating between list and hash
66 * table constantly.
67 */
68 #define RMAP_LIGHT_SHARED_PAGE (RMAP_HEAVY_SHARED_PAGE >> 2)
69
70 #if MEM_SHARING_AUDIT
71
72 static LIST_HEAD(shr_audit_list);
73 static DEFINE_SPINLOCK(shr_audit_lock);
74 static DEFINE_RCU_READ_LOCK(shr_audit_read_lock);
75
76 /* RCU delayed free of audit list entry */
_free_pg_shared_info(struct rcu_head * head)77 static void _free_pg_shared_info(struct rcu_head *head)
78 {
79 xfree(container_of(head, struct page_sharing_info, rcu_head));
80 }
81
audit_add_list(struct page_info * page)82 static void audit_add_list(struct page_info *page)
83 {
84 INIT_LIST_HEAD(&page->sharing->entry);
85 spin_lock(&shr_audit_lock);
86 list_add_rcu(&page->sharing->entry, &shr_audit_list);
87 spin_unlock(&shr_audit_lock);
88 }
89
90 /* Removes from the audit list and cleans up the page sharing metadata. */
page_sharing_dispose(struct page_info * page)91 static void page_sharing_dispose(struct page_info *page)
92 {
93 /* Unlikely given our thresholds, but we should be careful. */
94 if ( unlikely(RMAP_USES_HASHTAB(page)) )
95 free_xenheap_pages(page->sharing->hash_table.bucket,
96 RMAP_HASHTAB_ORDER);
97
98 spin_lock(&shr_audit_lock);
99 list_del_rcu(&page->sharing->entry);
100 spin_unlock(&shr_audit_lock);
101 INIT_RCU_HEAD(&page->sharing->rcu_head);
102 call_rcu(&page->sharing->rcu_head, _free_pg_shared_info);
103 }
104
105 #else
106
107 #define audit_add_list(p) ((void)0)
page_sharing_dispose(struct page_info * page)108 static void page_sharing_dispose(struct page_info *page)
109 {
110 /* Unlikely given our thresholds, but we should be careful. */
111 if ( unlikely(RMAP_USES_HASHTAB(page)) )
112 free_xenheap_pages(page->sharing->hash_table.bucket,
113 RMAP_HASHTAB_ORDER);
114 xfree(page->sharing);
115 }
116
117 #endif /* MEM_SHARING_AUDIT */
118
119 /*
120 * Private implementations of page_lock/unlock to bypass PV-only
121 * sanity checks not applicable to mem-sharing.
122 *
123 * _page_lock is used in memory sharing to protect addition (share) and removal
124 * (unshare) of (gfn,domain) tupples to a list of gfn's that the shared page is
125 * currently backing.
126 * Nesting may happen when sharing (and locking) two pages.
127 * Deadlock is avoided by locking pages in increasing order.
128 * All memory sharing code paths take the p2m lock of the affected gfn before
129 * taking the lock for the underlying page. We enforce ordering between
130 * page_lock and p2m_lock using an mm-locks.h construct.
131 *
132 * TODO: Investigate if PGT_validated is necessary.
133 */
_page_lock(struct page_info * page)134 static bool _page_lock(struct page_info *page)
135 {
136 unsigned long x, nx;
137
138 do {
139 while ( (x = page->u.inuse.type_info) & PGT_locked )
140 cpu_relax();
141 nx = x + (1 | PGT_locked);
142 if ( !(x & PGT_validated) ||
143 !(x & PGT_count_mask) ||
144 !(nx & PGT_count_mask) )
145 return false;
146 } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
147
148 return true;
149 }
150
_page_unlock(struct page_info * page)151 static void _page_unlock(struct page_info *page)
152 {
153 unsigned long x, nx, y = page->u.inuse.type_info;
154
155 do {
156 x = y;
157 ASSERT((x & PGT_count_mask) && (x & PGT_locked));
158
159 nx = x - (1 | PGT_locked);
160 /* We must not drop the last reference here. */
161 ASSERT(nx & PGT_count_mask);
162 } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
163 }
164
mem_sharing_page_lock(struct page_info * pg)165 static bool mem_sharing_page_lock(struct page_info *pg)
166 {
167 bool rc;
168 pg_lock_data_t *pld = &(this_cpu(__pld));
169
170 page_sharing_mm_pre_lock();
171 rc = _page_lock(pg);
172 if ( rc )
173 {
174 preempt_disable();
175 page_sharing_mm_post_lock(&pld->mm_unlock_level,
176 &pld->recurse_count);
177 }
178 return rc;
179 }
180
mem_sharing_page_unlock(struct page_info * pg)181 static void mem_sharing_page_unlock(struct page_info *pg)
182 {
183 pg_lock_data_t *pld = &(this_cpu(__pld));
184
185 page_sharing_mm_unlock(pld->mm_unlock_level,
186 &pld->recurse_count);
187 preempt_enable();
188 _page_unlock(pg);
189 }
190
get_next_handle(void)191 static shr_handle_t get_next_handle(void)
192 {
193 /* Get the next handle get_page style */
194 uint64_t x, y = next_handle;
195 do {
196 x = y;
197 }
198 while ( (y = cmpxchg(&next_handle, x, x + 1)) != x );
199 return x + 1;
200 }
201
202 static atomic_t nr_saved_mfns = ATOMIC_INIT(0);
203 static atomic_t nr_shared_mfns = ATOMIC_INIT(0);
204
205 /*
206 * Reverse map
207 *
208 * Every shared frame keeps a reverse map (rmap) of <domain, gfn> tuples that
209 * this shared frame backs. For pages with a low degree of sharing, a O(n)
210 * search linked list is good enough. For pages with higher degree of sharing,
211 * we use a hash table instead.
212 */
213
214 typedef struct gfn_info
215 {
216 unsigned long gfn;
217 domid_t domain;
218 struct list_head list;
219 } gfn_info_t;
220
rmap_init(struct page_info * page)221 static void rmap_init(struct page_info *page)
222 {
223 /* We always start off as a doubly linked list. */
224 INIT_LIST_HEAD(&page->sharing->gfns);
225 }
226
227 /* Exceedingly simple "hash function" */
228 #define HASH(domain, gfn) \
229 (((gfn) + (domain)) % RMAP_HASHTAB_SIZE)
230
231 /*
232 * Conversions. Tuned by the thresholds. Should only happen twice
233 * (once each) during the lifetime of a shared page.
234 */
rmap_list_to_hash_table(struct page_info * page)235 static inline int rmap_list_to_hash_table(struct page_info *page)
236 {
237 unsigned int i;
238 struct list_head *pos, *tmp, *b =
239 alloc_xenheap_pages(RMAP_HASHTAB_ORDER, 0);
240
241 if ( b == NULL )
242 return -ENOMEM;
243
244 for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ )
245 INIT_LIST_HEAD(b + i);
246
247 list_for_each_safe ( pos, tmp, &page->sharing->gfns )
248 {
249 gfn_info_t *gfn_info = list_entry(pos, gfn_info_t, list);
250 struct list_head *bucket = b + HASH(gfn_info->domain, gfn_info->gfn);
251
252 list_del(pos);
253 list_add(pos, bucket);
254 }
255
256 page->sharing->hash_table.bucket = b;
257 page->sharing->hash_table.flag = NULL;
258
259 return 0;
260 }
261
rmap_hash_table_to_list(struct page_info * page)262 static void rmap_hash_table_to_list(struct page_info *page)
263 {
264 unsigned int i;
265 struct list_head *bucket = page->sharing->hash_table.bucket;
266
267 INIT_LIST_HEAD(&page->sharing->gfns);
268
269 for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ )
270 {
271 struct list_head *pos, *tmp, *head = bucket + i;
272
273 list_for_each_safe ( pos, tmp, head )
274 {
275 list_del(pos);
276 list_add(pos, &page->sharing->gfns);
277 }
278 }
279
280 free_xenheap_pages(bucket, RMAP_HASHTAB_ORDER);
281 }
282
283 /* Generic accessors to the rmap */
rmap_count(const struct page_info * pg)284 static unsigned long rmap_count(const struct page_info *pg)
285 {
286 unsigned long count;
287 unsigned long t = read_atomic(&pg->u.inuse.type_info);
288
289 count = t & PGT_count_mask;
290 if ( t & PGT_locked )
291 count--;
292 return count;
293 }
294
295 /*
296 * The page type count is always decreased after removing from the rmap.
297 * Use a convert flag to avoid mutating the rmap if in the middle of an
298 * iterator, or if the page will be soon destroyed anyways.
299 */
rmap_del(gfn_info_t * gfn_info,struct page_info * page,int convert)300 static void rmap_del(gfn_info_t *gfn_info, struct page_info *page, int convert)
301 {
302 if ( RMAP_USES_HASHTAB(page) && convert &&
303 (rmap_count(page) <= RMAP_LIGHT_SHARED_PAGE) )
304 rmap_hash_table_to_list(page);
305
306 /* Regardless of rmap type, same removal operation */
307 list_del(&gfn_info->list);
308 }
309
310 /* The page type count is always increased before adding to the rmap. */
rmap_add(gfn_info_t * gfn_info,struct page_info * page)311 static void rmap_add(gfn_info_t *gfn_info, struct page_info *page)
312 {
313 struct list_head *head;
314
315 if ( !RMAP_USES_HASHTAB(page) &&
316 (rmap_count(page) >= RMAP_HEAVY_SHARED_PAGE) )
317 /*
318 * The conversion may fail with ENOMEM. We'll be less efficient,
319 * but no reason to panic.
320 */
321 (void)rmap_list_to_hash_table(page);
322
323 head = (RMAP_USES_HASHTAB(page)
324 ? page->sharing->hash_table.bucket + HASH(gfn_info->domain,
325 gfn_info->gfn)
326 : &page->sharing->gfns);
327
328 INIT_LIST_HEAD(&gfn_info->list);
329 list_add(&gfn_info->list, head);
330 }
331
rmap_retrieve(uint16_t domain_id,unsigned long gfn,struct page_info * page)332 static gfn_info_t *rmap_retrieve(uint16_t domain_id, unsigned long gfn,
333 struct page_info *page)
334 {
335 gfn_info_t *gfn_info;
336 struct list_head *le, *head;
337
338 head = (RMAP_USES_HASHTAB(page)
339 ? page->sharing->hash_table.bucket + HASH(domain_id, gfn)
340 : &page->sharing->gfns);
341
342 list_for_each ( le, head )
343 {
344 gfn_info = list_entry(le, gfn_info_t, list);
345 if ( (gfn_info->gfn == gfn) && (gfn_info->domain == domain_id) )
346 return gfn_info;
347 }
348
349 /* Nothing was found */
350 return NULL;
351 }
352
353 /*
354 * The iterator hides the details of how the rmap is implemented. This
355 * involves splitting the list_for_each_safe macro into two steps.
356 */
357 struct rmap_iterator {
358 struct list_head *curr;
359 struct list_head *next;
360 unsigned int bucket;
361 };
362
rmap_seed_iterator(struct page_info * page,struct rmap_iterator * ri)363 static void rmap_seed_iterator(struct page_info *page, struct rmap_iterator *ri)
364 {
365 ri->curr = (RMAP_USES_HASHTAB(page)
366 ? page->sharing->hash_table.bucket
367 : &page->sharing->gfns);
368 ri->next = ri->curr->next;
369 ri->bucket = 0;
370 }
371
rmap_iterate(struct page_info * page,struct rmap_iterator * ri)372 static gfn_info_t *rmap_iterate(struct page_info *page,
373 struct rmap_iterator *ri)
374 {
375 struct list_head *head = (RMAP_USES_HASHTAB(page)
376 ? page->sharing->hash_table.bucket + ri->bucket
377 : &page->sharing->gfns);
378
379 retry:
380 if ( ri->next == head)
381 {
382 if ( RMAP_USES_HASHTAB(page) )
383 {
384 ri->bucket++;
385 if ( ri->bucket >= RMAP_HASHTAB_SIZE )
386 /* No more hash table buckets */
387 return NULL;
388 head = page->sharing->hash_table.bucket + ri->bucket;
389 ri->curr = head;
390 ri->next = ri->curr->next;
391 goto retry;
392 }
393 else
394 /* List exhausted */
395 return NULL;
396 }
397
398 ri->curr = ri->next;
399 ri->next = ri->curr->next;
400
401 return list_entry(ri->curr, gfn_info_t, list);
402 }
403
mem_sharing_gfn_alloc(struct page_info * page,struct domain * d,unsigned long gfn)404 static gfn_info_t *mem_sharing_gfn_alloc(struct page_info *page,
405 struct domain *d, unsigned long gfn)
406 {
407 gfn_info_t *gfn_info = xmalloc(gfn_info_t);
408
409 if ( gfn_info == NULL )
410 return NULL;
411
412 gfn_info->gfn = gfn;
413 gfn_info->domain = d->domain_id;
414
415 rmap_add(gfn_info, page);
416
417 /* Increment our number of shared pges. */
418 atomic_inc(&d->shr_pages);
419
420 return gfn_info;
421 }
422
mem_sharing_gfn_destroy(struct page_info * page,struct domain * d,gfn_info_t * gfn_info)423 static void mem_sharing_gfn_destroy(struct page_info *page, struct domain *d,
424 gfn_info_t *gfn_info)
425 {
426 /* Decrement the number of pages. */
427 atomic_dec(&d->shr_pages);
428
429 /* Free the gfn_info structure. */
430 rmap_del(gfn_info, page, 1);
431 xfree(gfn_info);
432 }
433
mem_sharing_lookup(unsigned long mfn)434 static struct page_info *mem_sharing_lookup(unsigned long mfn)
435 {
436 struct page_info *page;
437 unsigned long t;
438
439 if ( !mfn_valid(_mfn(mfn)) )
440 return NULL;
441
442 page = mfn_to_page(_mfn(mfn));
443 if ( page_get_owner(page) != dom_cow )
444 return NULL;
445
446 /*
447 * Count has to be at least two, because we're called
448 * with the mfn locked (1) and this is supposed to be
449 * a shared page (1).
450 */
451 t = read_atomic(&page->u.inuse.type_info);
452 ASSERT((t & PGT_type_mask) == PGT_shared_page);
453 ASSERT((t & PGT_count_mask) >= 2);
454 ASSERT(SHARED_M2P(get_gpfn_from_mfn(mfn)));
455
456 return page;
457 }
458
audit(void)459 static int audit(void)
460 {
461 #if MEM_SHARING_AUDIT
462 int errors = 0;
463 unsigned long count_expected;
464 unsigned long count_found = 0;
465 struct list_head *ae;
466
467 count_expected = atomic_read(&nr_shared_mfns);
468
469 rcu_read_lock(&shr_audit_read_lock);
470
471 list_for_each_rcu ( ae, &shr_audit_list )
472 {
473 struct page_sharing_info *pg_shared_info;
474 unsigned long nr_gfns = 0;
475 struct page_info *pg;
476 mfn_t mfn;
477 gfn_info_t *g;
478 struct rmap_iterator ri;
479
480 pg_shared_info = list_entry(ae, struct page_sharing_info, entry);
481 pg = pg_shared_info->pg;
482 mfn = page_to_mfn(pg);
483
484 /* If we can't lock it, it's definitely not a shared page */
485 if ( !mem_sharing_page_lock(pg) )
486 {
487 gdprintk(XENLOG_ERR,
488 "mfn %lx in audit list, but cannot be locked (%lx)!\n",
489 mfn_x(mfn), pg->u.inuse.type_info);
490 errors++;
491 continue;
492 }
493
494 /* Check if the MFN has correct type, owner and handle. */
495 if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page )
496 {
497 gdprintk(XENLOG_ERR,
498 "mfn %lx in audit list, but not PGT_shared_page (%lx)!\n",
499 mfn_x(mfn), pg->u.inuse.type_info & PGT_type_mask);
500 errors++;
501 continue;
502 }
503
504 /* Check the page owner. */
505 if ( page_get_owner(pg) != dom_cow )
506 {
507 gdprintk(XENLOG_ERR, "mfn %lx shared, but wrong owner (%pd)!\n",
508 mfn_x(mfn), page_get_owner(pg));
509 errors++;
510 }
511
512 /* Check the m2p entry */
513 if ( !SHARED_M2P(get_gpfn_from_mfn(mfn_x(mfn))) )
514 {
515 gdprintk(XENLOG_ERR, "mfn %lx shared, but wrong m2p entry (%lx)!\n",
516 mfn_x(mfn), get_gpfn_from_mfn(mfn_x(mfn)));
517 errors++;
518 }
519
520 /* Check we have a list */
521 if ( (!pg->sharing) || rmap_count(pg) == 0 )
522 {
523 gdprintk(XENLOG_ERR, "mfn %lx shared, but empty gfn list!\n",
524 mfn_x(mfn));
525 errors++;
526 continue;
527 }
528
529 /* We've found a page that is shared */
530 count_found++;
531
532 /* Check if all GFNs map to the MFN, and the p2m types */
533 rmap_seed_iterator(pg, &ri);
534 while ( (g = rmap_iterate(pg, &ri)) != NULL )
535 {
536 struct domain *d;
537 p2m_type_t t;
538 mfn_t o_mfn;
539
540 d = get_domain_by_id(g->domain);
541 if ( d == NULL )
542 {
543 gdprintk(XENLOG_ERR,
544 "Unknown dom: %d, for PFN=%lx, MFN=%lx\n",
545 g->domain, g->gfn, mfn_x(mfn));
546 errors++;
547 continue;
548 }
549 o_mfn = get_gfn_query_unlocked(d, g->gfn, &t);
550 if ( !mfn_eq(o_mfn, mfn) )
551 {
552 gdprintk(XENLOG_ERR, "Incorrect P2M for %pd, PFN=%lx."
553 "Expecting MFN=%lx, got %lx\n",
554 d, g->gfn, mfn_x(mfn), mfn_x(o_mfn));
555 errors++;
556 }
557 if ( t != p2m_ram_shared )
558 {
559 gdprintk(XENLOG_ERR,
560 "Incorrect P2M type for %pd, PFN=%lx MFN=%lx."
561 "Expecting t=%d, got %d\n",
562 d, g->gfn, mfn_x(mfn), p2m_ram_shared, t);
563 errors++;
564 }
565 put_domain(d);
566 nr_gfns++;
567 }
568 /* The type count has an extra ref because we have locked the page */
569 if ( (nr_gfns + 1) != (pg->u.inuse.type_info & PGT_count_mask) )
570 {
571 gdprintk(XENLOG_ERR, "Mismatched counts for MFN=%lx."
572 "nr_gfns in list %lu, in type_info %lx\n",
573 mfn_x(mfn), nr_gfns,
574 (pg->u.inuse.type_info & PGT_count_mask));
575 errors++;
576 }
577
578 mem_sharing_page_unlock(pg);
579 }
580
581 rcu_read_unlock(&shr_audit_read_lock);
582
583 if ( count_found != count_expected )
584 {
585 gdprintk(XENLOG_ERR, "Expected %ld shared mfns, found %ld.",
586 count_expected, count_found);
587 errors++;
588 }
589
590 return errors;
591 #else
592 return -EOPNOTSUPP;
593 #endif
594 }
595
mem_sharing_notify_enomem(struct domain * d,unsigned long gfn,bool allow_sleep)596 int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
597 bool allow_sleep)
598 {
599 struct vcpu *v = current;
600 int rc;
601 vm_event_request_t req = {
602 .reason = VM_EVENT_REASON_MEM_SHARING,
603 .vcpu_id = v->vcpu_id,
604 .u.mem_sharing.gfn = gfn,
605 .u.mem_sharing.p2mt = p2m_ram_shared,
606 };
607
608 if ( (rc = __vm_event_claim_slot(
609 d, d->vm_event_share, allow_sleep)) < 0 )
610 return rc;
611
612 if ( v->domain == d )
613 {
614 req.flags = VM_EVENT_FLAG_VCPU_PAUSED;
615 vm_event_vcpu_pause(v);
616 }
617
618 vm_event_put_request(d, d->vm_event_share, &req);
619
620 return 0;
621 }
622
mem_sharing_get_nr_saved_mfns(void)623 unsigned int mem_sharing_get_nr_saved_mfns(void)
624 {
625 return atomic_read(&nr_saved_mfns);
626 }
627
mem_sharing_get_nr_shared_mfns(void)628 unsigned int mem_sharing_get_nr_shared_mfns(void)
629 {
630 return atomic_read(&nr_shared_mfns);
631 }
632
633 /* Functions that change a page's type and ownership */
page_make_sharable(struct domain * d,struct page_info * page,unsigned int expected_refcnt,bool validate_only)634 static int page_make_sharable(struct domain *d,
635 struct page_info *page,
636 unsigned int expected_refcnt,
637 bool validate_only)
638 {
639 int rc = 0;
640 bool drop_dom_ref = false;
641
642 spin_lock_recursive(&d->page_alloc_lock);
643
644 if ( d->is_dying )
645 {
646 rc = -EBUSY;
647 goto out;
648 }
649
650 /* Change page type and count atomically */
651 if ( !get_page_and_type(page, d, PGT_shared_page) )
652 {
653 rc = -EINVAL;
654 goto out;
655 }
656
657 /* Check it wasn't already sharable and undo if it was */
658 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
659 {
660 put_page_and_type(page);
661 rc = -EEXIST;
662 goto out;
663 }
664
665 /*
666 * Check if the ref count is 2. The first from PGC_allocated, and
667 * the second from get_page_and_type at the top of this function.
668 */
669 if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
670 {
671 /* Return type count back to zero */
672 put_page_and_type(page);
673 rc = -E2BIG;
674 goto out;
675 }
676
677 if ( !validate_only )
678 {
679 page_set_owner(page, dom_cow);
680 drop_dom_ref = !domain_adjust_tot_pages(d, -1);
681 page_list_del(page, &d->page_list);
682 }
683
684 out:
685 spin_unlock_recursive(&d->page_alloc_lock);
686
687 if ( drop_dom_ref )
688 put_domain(d);
689
690 return rc;
691 }
692
page_make_private(struct domain * d,struct page_info * page)693 static int page_make_private(struct domain *d, struct page_info *page)
694 {
695 unsigned long expected_type;
696
697 if ( !get_page(page, dom_cow) )
698 return -EINVAL;
699
700 spin_lock(&d->page_alloc_lock);
701
702 if ( d->is_dying )
703 {
704 spin_unlock(&d->page_alloc_lock);
705 put_page(page);
706 return -EBUSY;
707 }
708
709 expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
710 if ( page->u.inuse.type_info != expected_type )
711 {
712 spin_unlock(&d->page_alloc_lock);
713 put_page(page);
714 return -EEXIST;
715 }
716
717 mem_sharing_page_unlock(page);
718
719 /* Drop the final typecount */
720 put_page_and_type(page);
721
722 /* Change the owner */
723 ASSERT(page_get_owner(page) == dom_cow);
724 page_set_owner(page, d);
725
726 if ( domain_adjust_tot_pages(d, 1) == 1 )
727 get_knownalive_domain(d);
728 page_list_add_tail(page, &d->page_list);
729 spin_unlock(&d->page_alloc_lock);
730
731 put_page(page);
732
733 return 0;
734 }
735
__grab_shared_page(mfn_t mfn)736 static struct page_info *__grab_shared_page(mfn_t mfn)
737 {
738 struct page_info *pg = NULL;
739
740 if ( !mfn_valid(mfn) )
741 return NULL;
742
743 pg = mfn_to_page(mfn);
744
745 /*
746 * If the page is not validated we can't lock it, and if it's
747 * not validated it's obviously not shared.
748 */
749 if ( !mem_sharing_page_lock(pg) )
750 return NULL;
751
752 if ( mem_sharing_lookup(mfn_x(mfn)) == NULL )
753 {
754 mem_sharing_page_unlock(pg);
755 return NULL;
756 }
757
758 return pg;
759 }
760
debug_mfn(mfn_t mfn)761 static int debug_mfn(mfn_t mfn)
762 {
763 struct page_info *page;
764 int num_refs;
765
766 if ( (page = __grab_shared_page(mfn)) == NULL)
767 {
768 gdprintk(XENLOG_ERR, "Invalid MFN=%lx\n", mfn_x(mfn));
769 return -EINVAL;
770 }
771
772 gdprintk(XENLOG_ERR,
773 "Debug page: MFN=%lx is ci=%lx, ti=%lx, owner_id=%pd\n",
774 mfn_x(page_to_mfn(page)), page->count_info,
775 page->u.inuse.type_info, page_get_owner(page));
776
777 /* -1 because the page is locked and that's an additional type ref */
778 num_refs = ((int) (page->u.inuse.type_info & PGT_count_mask)) - 1;
779 mem_sharing_page_unlock(page);
780 return num_refs;
781 }
782
debug_gfn(struct domain * d,gfn_t gfn)783 static int debug_gfn(struct domain *d, gfn_t gfn)
784 {
785 p2m_type_t p2mt;
786 mfn_t mfn;
787 int num_refs;
788
789 mfn = get_gfn_query(d, gfn_x(gfn), &p2mt);
790
791 gdprintk(XENLOG_ERR, "Debug for %pd, gfn=%" PRI_gfn "\n",
792 d, gfn_x(gfn));
793
794 num_refs = debug_mfn(mfn);
795 put_gfn(d, gfn_x(gfn));
796
797 return num_refs;
798 }
799
debug_gref(struct domain * d,grant_ref_t ref)800 static int debug_gref(struct domain *d, grant_ref_t ref)
801 {
802 int rc;
803 uint16_t status;
804 gfn_t gfn;
805
806 rc = mem_sharing_gref_to_gfn(d->grant_table, ref, &gfn, &status);
807 if ( rc )
808 {
809 gdprintk(XENLOG_ERR, "Asked to debug [%pd,gref=%u]: error %d.\n",
810 d, ref, rc);
811 return rc;
812 }
813
814 gdprintk(XENLOG_ERR, "==> Grant [%pd,ref=%d], status=%x. ",
815 d, ref, status);
816
817 return debug_gfn(d, gfn);
818 }
819
nominate_page(struct domain * d,gfn_t gfn,unsigned int expected_refcnt,bool validate_only,shr_handle_t * phandle)820 static int nominate_page(struct domain *d, gfn_t gfn,
821 unsigned int expected_refcnt, bool validate_only,
822 shr_handle_t *phandle)
823 {
824 struct p2m_domain *hp2m = p2m_get_hostp2m(d);
825 p2m_type_t p2mt;
826 p2m_access_t p2ma;
827 mfn_t mfn;
828 struct page_info *page = NULL; /* gcc... */
829 int ret;
830
831 *phandle = 0UL;
832
833 mfn = get_gfn_type_access(hp2m, gfn_x(gfn), &p2mt, &p2ma, 0, NULL);
834
835 /* Check if mfn is valid */
836 ret = -EINVAL;
837 if ( !mfn_valid(mfn) )
838 goto out;
839
840 /* Return the handle if the page is already shared */
841 if ( p2m_is_shared(p2mt) )
842 {
843 struct page_info *pg = __grab_shared_page(mfn);
844 if ( !pg )
845 BUG();
846
847 *phandle = pg->sharing->handle;
848 ret = 0;
849 mem_sharing_page_unlock(pg);
850 goto out;
851 }
852
853 /* Check p2m type */
854 if ( !p2m_is_sharable(p2mt) )
855 goto out;
856
857 page = mfn_to_page(mfn);
858 if ( !page || is_special_page(page) )
859 goto out;
860
861 /* Check if there are mem_access/remapped altp2m entries for this page */
862 if ( altp2m_active(d) )
863 {
864 unsigned int i;
865 struct p2m_domain *ap2m;
866 mfn_t amfn;
867 p2m_type_t ap2mt;
868 p2m_access_t ap2ma;
869
870 altp2m_list_lock(d);
871
872 for ( i = 0; i < MAX_ALTP2M; i++ )
873 {
874 ap2m = d->arch.altp2m_p2m[i];
875 if ( !ap2m )
876 continue;
877
878 amfn = __get_gfn_type_access(ap2m, gfn_x(gfn), &ap2mt, &ap2ma,
879 0, NULL, false);
880 if ( mfn_valid(amfn) && (!mfn_eq(amfn, mfn) || ap2ma != p2ma) )
881 {
882 altp2m_list_unlock(d);
883 goto out;
884 }
885 }
886
887 altp2m_list_unlock(d);
888 }
889
890 /* Try to convert the mfn to the sharable type */
891 ret = page_make_sharable(d, page, expected_refcnt, validate_only);
892 if ( ret || validate_only )
893 goto out;
894
895 /*
896 * Now that the page is validated, we can lock it. There is no
897 * race because we're holding the p2m entry, so no one else
898 * could be nominating this gfn.
899 */
900 ret = -ENOENT;
901 if ( !mem_sharing_page_lock(page) )
902 goto out;
903
904 /* Initialize the shared state */
905 ret = -ENOMEM;
906 if ( !(page->sharing = xmalloc(struct page_sharing_info)) )
907 {
908 /* Making a page private atomically unlocks it */
909 BUG_ON(page_make_private(d, page));
910 goto out;
911 }
912 page->sharing->pg = page;
913 rmap_init(page);
914
915 /* Create the handle */
916 page->sharing->handle = get_next_handle();
917
918 /* Create the local gfn info */
919 if ( !mem_sharing_gfn_alloc(page, d, gfn_x(gfn)) )
920 {
921 xfree(page->sharing);
922 page->sharing = NULL;
923 BUG_ON(page_make_private(d, page));
924 goto out;
925 }
926
927 /* Change the p2m type, should never fail with p2m locked. */
928 BUG_ON(p2m_change_type_one(d, gfn_x(gfn), p2mt, p2m_ram_shared));
929
930 /* Account for this page. */
931 atomic_inc(&nr_shared_mfns);
932
933 /* Update m2p entry to SHARED_M2P_ENTRY */
934 set_gpfn_from_mfn(mfn_x(mfn), SHARED_M2P_ENTRY);
935
936 *phandle = page->sharing->handle;
937 audit_add_list(page);
938 mem_sharing_page_unlock(page);
939 ret = 0;
940
941 out:
942 put_gfn(d, gfn_x(gfn));
943 return ret;
944 }
945
share_pages(struct domain * sd,gfn_t sgfn,shr_handle_t sh,struct domain * cd,gfn_t cgfn,shr_handle_t ch)946 static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
947 struct domain *cd, gfn_t cgfn, shr_handle_t ch)
948 {
949 struct page_info *spage, *cpage, *firstpg, *secondpg;
950 gfn_info_t *gfn;
951 struct domain *d;
952 int ret = -EINVAL;
953 mfn_t smfn, cmfn;
954 p2m_type_t smfn_type, cmfn_type;
955 struct two_gfns tg;
956 struct rmap_iterator ri;
957 unsigned long put_count = 0;
958
959 get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
960 cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg, true);
961
962 /*
963 * This tricky business is to avoid two callers deadlocking if
964 * grabbing pages in opposite client/source order.
965 */
966 if ( mfn_eq(smfn, cmfn) )
967 {
968 /*
969 * The pages are already the same. We could return some
970 * kind of error here, but no matter how you look at it,
971 * the pages are already 'shared'. It possibly represents
972 * a big problem somewhere else, but as far as sharing is
973 * concerned: great success!
974 */
975 ret = 0;
976 goto err_out;
977 }
978
979 if ( mfn_x(smfn) < mfn_x(cmfn) )
980 {
981 ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
982 spage = firstpg = __grab_shared_page(smfn);
983 if ( spage == NULL )
984 goto err_out;
985
986 ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
987 cpage = secondpg = __grab_shared_page(cmfn);
988 if ( cpage == NULL )
989 {
990 mem_sharing_page_unlock(spage);
991 goto err_out;
992 }
993 }
994 else
995 {
996 ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
997 cpage = firstpg = __grab_shared_page(cmfn);
998 if ( cpage == NULL )
999 goto err_out;
1000
1001 ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
1002 spage = secondpg = __grab_shared_page(smfn);
1003 if ( spage == NULL )
1004 {
1005 mem_sharing_page_unlock(cpage);
1006 goto err_out;
1007 }
1008 }
1009
1010 ASSERT(smfn_type == p2m_ram_shared);
1011 ASSERT(cmfn_type == p2m_ram_shared);
1012
1013 /* Check that the handles match */
1014 if ( spage->sharing->handle != sh )
1015 {
1016 ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
1017 mem_sharing_page_unlock(secondpg);
1018 mem_sharing_page_unlock(firstpg);
1019 goto err_out;
1020 }
1021
1022 if ( cpage->sharing->handle != ch )
1023 {
1024 ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
1025 mem_sharing_page_unlock(secondpg);
1026 mem_sharing_page_unlock(firstpg);
1027 goto err_out;
1028 }
1029
1030 /* Merge the lists together */
1031 rmap_seed_iterator(cpage, &ri);
1032 while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
1033 {
1034 /*
1035 * Get the source page and type, this should never fail:
1036 * we are under shr lock, and got a successful lookup.
1037 */
1038 BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page));
1039 /*
1040 * Move the gfn_info from client list to source list.
1041 * Don't change the type of rmap for the client page.
1042 */
1043 rmap_del(gfn, cpage, 0);
1044 rmap_add(gfn, spage);
1045 put_count++;
1046 d = get_domain_by_id(gfn->domain);
1047 BUG_ON(!d);
1048 BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
1049 put_domain(d);
1050 }
1051 ASSERT(list_empty(&cpage->sharing->gfns));
1052 BUG_ON(!put_count);
1053
1054 /* Clear the rest of the shared state */
1055 page_sharing_dispose(cpage);
1056 cpage->sharing = NULL;
1057
1058 mem_sharing_page_unlock(secondpg);
1059 mem_sharing_page_unlock(firstpg);
1060
1061 /* Free the client page */
1062 put_page_alloc_ref(cpage);
1063
1064 while ( put_count-- )
1065 put_page_and_type(cpage);
1066
1067 /* We managed to free a domain page. */
1068 atomic_dec(&nr_shared_mfns);
1069 atomic_inc(&nr_saved_mfns);
1070 ret = 0;
1071
1072 err_out:
1073 put_two_gfns(&tg);
1074 return ret;
1075 }
1076
1077 /*
1078 * This function is intended to be used for plugging a "hole" in the client's
1079 * physmap with a shared memory entry. Unfortunately the definition of a "hole"
1080 * is currently ambigious. There are two cases one can run into a "hole":
1081 * 1) there is no pagetable entry at all
1082 * 2) there is a pagetable entry with a type that passes p2m_is_hole
1083 *
1084 * The intended use-case for this function is case 1.
1085 *
1086 * During 1) the mem_access being returned is p2m_access_n and that is
1087 * incorrect to be applied to the new entry being added the client physmap,
1088 * thus we make use of the p2m->default_access instead.
1089 * When 2) is true it is possible that the existing pagetable entry also has
1090 * a mem_access permission set, which could be p2m_access_n. Since we can't
1091 * differentiate whether we are in case 1) or 2), we default to using the
1092 * access permission defined as default for the p2m, thus in
1093 * case 2) overwriting any custom mem_access permission the user may have set
1094 * on a hole page. Custom mem_access permissions being set on a hole are
1095 * unheard of but technically possible.
1096 *
1097 * TODO: to properly resolve this issue implement differentiation between the
1098 * two "hole" types.
1099 */
1100 static
add_to_physmap(struct domain * sd,unsigned long sgfn,shr_handle_t sh,struct domain * cd,unsigned long cgfn,bool lock)1101 int add_to_physmap(struct domain *sd, unsigned long sgfn, shr_handle_t sh,
1102 struct domain *cd, unsigned long cgfn, bool lock)
1103 {
1104 struct page_info *spage;
1105 int ret = -EINVAL;
1106 mfn_t smfn, cmfn;
1107 p2m_type_t smfn_type, cmfn_type;
1108 struct gfn_info *gfn_info;
1109 struct p2m_domain *p2m = p2m_get_hostp2m(cd);
1110 struct two_gfns tg;
1111
1112 get_two_gfns(sd, _gfn(sgfn), &smfn_type, NULL, &smfn,
1113 cd, _gfn(cgfn), &cmfn_type, NULL, &cmfn, 0, &tg, lock);
1114
1115 /* Get the source shared page, check and lock */
1116 ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
1117 spage = __grab_shared_page(smfn);
1118 if ( spage == NULL )
1119 goto err_out;
1120
1121 ASSERT(smfn_type == p2m_ram_shared);
1122
1123 /* Check that the handles match */
1124 if ( spage->sharing->handle != sh )
1125 goto err_unlock;
1126
1127 /*
1128 * Make sure the target page is a hole in the physmap. These are typically
1129 * p2m_mmio_dm, but also accept p2m_invalid and paged out pages. See the
1130 * definition of p2m_is_hole in p2m.h.
1131 */
1132 if ( !p2m_is_hole(cmfn_type) )
1133 {
1134 ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
1135 goto err_unlock;
1136 }
1137
1138 /* This is simpler than regular sharing */
1139 BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page));
1140 if ( !(gfn_info = mem_sharing_gfn_alloc(spage, cd, cgfn)) )
1141 {
1142 put_page_and_type(spage);
1143 ret = -ENOMEM;
1144 goto err_unlock;
1145 }
1146
1147 ret = p2m_set_entry(p2m, _gfn(cgfn), smfn, PAGE_ORDER_4K,
1148 p2m_ram_shared, p2m->default_access);
1149
1150 /* Tempted to turn this into an assert */
1151 if ( ret )
1152 {
1153 mem_sharing_gfn_destroy(spage, cd, gfn_info);
1154 put_page_and_type(spage);
1155 }
1156 else
1157 {
1158 /*
1159 * There is a chance we're plugging a hole where a paged out
1160 * page was.
1161 */
1162 if ( p2m_is_paging(cmfn_type) && (cmfn_type != p2m_ram_paging_out) )
1163 {
1164 atomic_dec(&cd->paged_pages);
1165 /*
1166 * Further, there is a chance this was a valid page.
1167 * Don't leak it.
1168 */
1169 if ( mfn_valid(cmfn) )
1170 {
1171 struct page_info *cpage = mfn_to_page(cmfn);
1172
1173 if ( !get_page(cpage, cd) )
1174 {
1175 domain_crash(cd);
1176 ret = -EOVERFLOW;
1177 goto err_unlock;
1178 }
1179 put_page_alloc_ref(cpage);
1180 put_page(cpage);
1181 }
1182 }
1183 }
1184
1185 atomic_inc(&nr_saved_mfns);
1186
1187 err_unlock:
1188 mem_sharing_page_unlock(spage);
1189 err_out:
1190 if ( lock )
1191 put_two_gfns(&tg);
1192 return ret;
1193 }
1194
1195
1196 /*
1197 * A note on the rationale for unshare error handling:
1198 * 1. Unshare can only fail with ENOMEM. Any other error conditions BUG_ON()'s
1199 * 2. We notify a potential dom0 helper through a vm_event ring. But we
1200 * allow the notification to not go to sleep. If the event ring is full
1201 * of ENOMEM warnings, then it's on the ball.
1202 * 3. We cannot go to sleep until the unshare is resolved, because we might
1203 * be buried deep into locks (e.g. something -> copy_to_user -> __hvm_copy)
1204 * 4. So, we make sure we:
1205 * 4.1. return an error
1206 * 4.2. do not corrupt shared memory
1207 * 4.3. do not corrupt guest memory
1208 * 4.4. let the guest deal with it if the error propagation will reach it
1209 */
__mem_sharing_unshare_page(struct domain * d,unsigned long gfn,bool destroy)1210 int __mem_sharing_unshare_page(struct domain *d,
1211 unsigned long gfn,
1212 bool destroy)
1213 {
1214 p2m_type_t p2mt;
1215 mfn_t mfn;
1216 struct page_info *page, *old_page;
1217 int last_gfn;
1218 gfn_info_t *gfn_info = NULL;
1219
1220 mfn = get_gfn(d, gfn, &p2mt);
1221
1222 /* Has someone already unshared it? */
1223 if ( !p2m_is_shared(p2mt) )
1224 {
1225 put_gfn(d, gfn);
1226 return 0;
1227 }
1228
1229 page = __grab_shared_page(mfn);
1230 if ( page == NULL )
1231 {
1232 gdprintk(XENLOG_ERR, "Domain p2m is shared, but page is not: %lx\n",
1233 gfn);
1234 BUG();
1235 }
1236
1237 gfn_info = rmap_retrieve(d->domain_id, gfn, page);
1238 if ( unlikely(gfn_info == NULL) )
1239 {
1240 gdprintk(XENLOG_ERR, "Could not find gfn_info for shared gfn: %lx\n",
1241 gfn);
1242 BUG();
1243 }
1244
1245 /*
1246 * Do the accounting first. If anything fails below, we have bigger
1247 * bigger fish to fry. First, remove the gfn from the list.
1248 */
1249 last_gfn = rmap_count(page) == 1;
1250 if ( last_gfn )
1251 {
1252 /*
1253 * Clean up shared state. Get rid of the <domid, gfn> tuple
1254 * before destroying the rmap.
1255 */
1256 mem_sharing_gfn_destroy(page, d, gfn_info);
1257 page_sharing_dispose(page);
1258 page->sharing = NULL;
1259 atomic_dec(&nr_shared_mfns);
1260 }
1261 else
1262 atomic_dec(&nr_saved_mfns);
1263
1264 /*
1265 * If the GFN is getting destroyed drop the references to MFN
1266 * (possibly freeing the page), and exit early.
1267 */
1268 if ( destroy )
1269 {
1270 if ( !last_gfn )
1271 mem_sharing_gfn_destroy(page, d, gfn_info);
1272
1273 mem_sharing_page_unlock(page);
1274
1275 if ( last_gfn )
1276 put_page_alloc_ref(page);
1277
1278 put_page_and_type(page);
1279 put_gfn(d, gfn);
1280
1281 return 0;
1282 }
1283
1284 if ( last_gfn )
1285 {
1286 /* Making a page private atomically unlocks it */
1287 BUG_ON(page_make_private(d, page) != 0);
1288 goto private_page_found;
1289 }
1290
1291 old_page = page;
1292 page = alloc_domheap_page(d, 0);
1293 if ( !page )
1294 {
1295 /* Undo dec of nr_saved_mfns, as the retry will decrease again. */
1296 atomic_inc(&nr_saved_mfns);
1297 mem_sharing_page_unlock(old_page);
1298 put_gfn(d, gfn);
1299 /*
1300 * Caller is responsible for placing an event
1301 * in the ring.
1302 */
1303 return -ENOMEM;
1304 }
1305
1306 copy_domain_page(page_to_mfn(page), page_to_mfn(old_page));
1307
1308 BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)));
1309 mem_sharing_gfn_destroy(old_page, d, gfn_info);
1310 mem_sharing_page_unlock(old_page);
1311 put_page_and_type(old_page);
1312
1313 private_page_found:
1314 if ( p2m_change_type_one(d, gfn, p2m_ram_shared, p2m_ram_rw) )
1315 {
1316 gdprintk(XENLOG_ERR, "Could not change p2m type d %pd gfn %lx.\n",
1317 d, gfn);
1318 BUG();
1319 }
1320
1321 /* Update m2p entry */
1322 set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), gfn);
1323
1324 /*
1325 * Now that the gfn<->mfn map is properly established,
1326 * marking dirty is feasible
1327 */
1328 paging_mark_dirty(d, page_to_mfn(page));
1329 /* We do not need to unlock a private page */
1330 put_gfn(d, gfn);
1331 return 0;
1332 }
1333
relinquish_shared_pages(struct domain * d)1334 int relinquish_shared_pages(struct domain *d)
1335 {
1336 int rc = 0;
1337 struct mem_sharing_domain *msd = &d->arch.hvm.mem_sharing;
1338 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1339 unsigned long gfn, count = 0;
1340
1341 if ( p2m == NULL )
1342 return 0;
1343
1344 p2m_lock(p2m);
1345 for ( gfn = msd->next_shared_gfn_to_relinquish;
1346 gfn <= p2m->max_mapped_pfn; gfn++ )
1347 {
1348 p2m_access_t a;
1349 p2m_type_t t;
1350 mfn_t mfn;
1351 int set_rc;
1352
1353 if ( !atomic_read(&d->shr_pages) )
1354 break;
1355
1356 mfn = p2m->get_entry(p2m, _gfn(gfn), &t, &a, 0, NULL, NULL);
1357 if ( mfn_valid(mfn) && p2m_is_shared(t) )
1358 {
1359 /* Does not fail with ENOMEM given "destroy" is set to true */
1360 BUG_ON(__mem_sharing_unshare_page(d, gfn, true));
1361 /*
1362 * Clear out the p2m entry so no one else may try to
1363 * unshare. Must succeed: we just read the old entry and
1364 * we hold the p2m lock.
1365 */
1366 set_rc = p2m->set_entry(p2m, _gfn(gfn), INVALID_MFN, PAGE_ORDER_4K,
1367 p2m_invalid, p2m_access_rwx, -1);
1368 ASSERT(!set_rc);
1369 count += 0x10;
1370 }
1371 else
1372 ++count;
1373
1374 /* Preempt every 2MiB (shared) or 32MiB (unshared) - arbitrary. */
1375 if ( count >= 0x2000 )
1376 {
1377 if ( hypercall_preempt_check() )
1378 {
1379 msd->next_shared_gfn_to_relinquish = gfn + 1;
1380 rc = -ERESTART;
1381 break;
1382 }
1383 count = 0;
1384 }
1385 }
1386
1387 p2m_unlock(p2m);
1388 return rc;
1389 }
1390
range_share(struct domain * d,struct domain * cd,struct mem_sharing_op_range * range)1391 static int range_share(struct domain *d, struct domain *cd,
1392 struct mem_sharing_op_range *range)
1393 {
1394 int rc = 0;
1395 shr_handle_t sh, ch;
1396 unsigned long start = range->opaque ?: range->first_gfn;
1397
1398 while ( range->last_gfn >= start )
1399 {
1400 /*
1401 * We only break out if we run out of memory as individual pages may
1402 * legitimately be unsharable and we just want to skip over those.
1403 */
1404 rc = nominate_page(d, _gfn(start), 0, false, &sh);
1405 if ( rc == -ENOMEM )
1406 break;
1407
1408 if ( !rc )
1409 {
1410 rc = nominate_page(cd, _gfn(start), 0, false, &ch);
1411 if ( rc == -ENOMEM )
1412 break;
1413
1414 if ( !rc )
1415 {
1416 /* If we get here this should be guaranteed to succeed. */
1417 rc = share_pages(d, _gfn(start), sh, cd, _gfn(start), ch);
1418 ASSERT(!rc);
1419 }
1420 }
1421
1422 /* Check for continuation if it's not the last iteration. */
1423 if ( range->last_gfn >= ++start && hypercall_preempt_check() )
1424 {
1425 rc = 1;
1426 break;
1427 }
1428 }
1429
1430 range->opaque = start;
1431
1432 /*
1433 * The last page may fail with -EINVAL, and for range sharing we don't
1434 * care about that.
1435 */
1436 if ( range->last_gfn < start && rc == -EINVAL )
1437 rc = 0;
1438
1439 return rc;
1440 }
1441
mem_sharing_control(struct domain * d,bool enable,uint16_t flags)1442 static inline int mem_sharing_control(struct domain *d, bool enable,
1443 uint16_t flags)
1444 {
1445 if ( enable )
1446 {
1447 if ( unlikely(!is_hvm_domain(d) || !cpu_has_vmx) )
1448 return -EOPNOTSUPP;
1449
1450 if ( unlikely(!hap_enabled(d)) )
1451 return -ENODEV;
1452
1453 if ( unlikely(is_iommu_enabled(d) &&
1454 !(flags & XENMEM_FORK_WITH_IOMMU_ALLOWED)) )
1455 return -EXDEV;
1456 }
1457
1458 d->arch.hvm.mem_sharing.enabled = enable;
1459 return 0;
1460 }
1461
1462 /*
1463 * Forking a page only gets called when the VM faults due to no entry being
1464 * in the EPT for the access. Depending on the type of access we either
1465 * populate the physmap with a shared entry for read-only access or
1466 * fork the page if its a write access.
1467 *
1468 * The client p2m is already locked so we only need to lock
1469 * the parent's here.
1470 */
mem_sharing_fork_page(struct domain * d,gfn_t gfn,bool unsharing)1471 int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool unsharing)
1472 {
1473 int rc = -ENOENT;
1474 shr_handle_t handle;
1475 struct domain *parent = d->parent;
1476 struct p2m_domain *p2m;
1477 unsigned long gfn_l = gfn_x(gfn);
1478 mfn_t mfn, new_mfn;
1479 p2m_type_t p2mt;
1480 struct page_info *page;
1481
1482 if ( !mem_sharing_is_fork(d) )
1483 return -ENOENT;
1484
1485 if ( !unsharing )
1486 {
1487 /* For read-only accesses we just add a shared entry to the physmap */
1488 while ( parent )
1489 {
1490 if ( !(rc = nominate_page(parent, gfn, 0, false, &handle)) )
1491 break;
1492
1493 parent = parent->parent;
1494 }
1495
1496 if ( !rc )
1497 {
1498 /* The client's p2m is already locked */
1499 p2m = p2m_get_hostp2m(parent);
1500
1501 p2m_lock(p2m);
1502 rc = add_to_physmap(parent, gfn_l, handle, d, gfn_l, false);
1503 p2m_unlock(p2m);
1504
1505 if ( !rc )
1506 return 0;
1507 }
1508 }
1509
1510 /*
1511 * If it's a write access (ie. unsharing) or if adding a shared entry to
1512 * the physmap failed we'll fork the page directly.
1513 */
1514 p2m = p2m_get_hostp2m(d);
1515 parent = d->parent;
1516
1517 while ( parent )
1518 {
1519 mfn = get_gfn_query(parent, gfn_l, &p2mt);
1520
1521 /* We can't fork grant memory from the parent, only regular ram */
1522 if ( mfn_valid(mfn) && p2m_is_ram(p2mt) )
1523 break;
1524
1525 put_gfn(parent, gfn_l);
1526 parent = parent->parent;
1527 }
1528
1529 if ( !parent )
1530 return -ENOENT;
1531
1532 if ( !(page = alloc_domheap_page(d, 0)) )
1533 {
1534 put_gfn(parent, gfn_l);
1535 return -ENOMEM;
1536 }
1537
1538 new_mfn = page_to_mfn(page);
1539 copy_domain_page(new_mfn, mfn);
1540 set_gpfn_from_mfn(mfn_x(new_mfn), gfn_l);
1541
1542 put_gfn(parent, gfn_l);
1543
1544 return p2m->set_entry(p2m, gfn, new_mfn, PAGE_ORDER_4K, p2m_ram_rw,
1545 p2m->default_access, -1);
1546 }
1547
bring_up_vcpus(struct domain * cd,struct domain * d)1548 static int bring_up_vcpus(struct domain *cd, struct domain *d)
1549 {
1550 unsigned int i;
1551 int ret = -EINVAL;
1552
1553 if ( d->max_vcpus != cd->max_vcpus ||
1554 (ret = cpupool_move_domain(cd, d->cpupool)) )
1555 return ret;
1556
1557 for ( i = 0; i < cd->max_vcpus; i++ )
1558 {
1559 if ( !d->vcpu[i] || cd->vcpu[i] )
1560 continue;
1561
1562 if ( !vcpu_create(cd, i) )
1563 return -EINVAL;
1564 }
1565
1566 domain_update_node_affinity(cd);
1567 return 0;
1568 }
1569
copy_vcpu_settings(struct domain * cd,const struct domain * d)1570 static int copy_vcpu_settings(struct domain *cd, const struct domain *d)
1571 {
1572 unsigned int i;
1573 struct p2m_domain *p2m = p2m_get_hostp2m(cd);
1574 int ret = -EINVAL;
1575
1576 for ( i = 0; i < cd->max_vcpus; i++ )
1577 {
1578 const struct vcpu *d_vcpu = d->vcpu[i];
1579 struct vcpu *cd_vcpu = cd->vcpu[i];
1580 mfn_t vcpu_info_mfn;
1581
1582 if ( !d_vcpu || !cd_vcpu )
1583 continue;
1584
1585 /* Copy & map in the vcpu_info page if the guest uses one */
1586 vcpu_info_mfn = d_vcpu->vcpu_info_mfn;
1587 if ( !mfn_eq(vcpu_info_mfn, INVALID_MFN) )
1588 {
1589 mfn_t new_vcpu_info_mfn = cd_vcpu->vcpu_info_mfn;
1590
1591 /* Allocate & map the page for it if it hasn't been already */
1592 if ( mfn_eq(new_vcpu_info_mfn, INVALID_MFN) )
1593 {
1594 gfn_t gfn = mfn_to_gfn(d, vcpu_info_mfn);
1595 unsigned long gfn_l = gfn_x(gfn);
1596 struct page_info *page;
1597
1598 if ( !(page = alloc_domheap_page(cd, 0)) )
1599 return -ENOMEM;
1600
1601 new_vcpu_info_mfn = page_to_mfn(page);
1602 set_gpfn_from_mfn(mfn_x(new_vcpu_info_mfn), gfn_l);
1603
1604 ret = p2m->set_entry(p2m, gfn, new_vcpu_info_mfn,
1605 PAGE_ORDER_4K, p2m_ram_rw,
1606 p2m->default_access, -1);
1607 if ( ret )
1608 return ret;
1609
1610 ret = map_vcpu_info(cd_vcpu, gfn_l,
1611 PAGE_OFFSET(d_vcpu->vcpu_info));
1612 if ( ret )
1613 return ret;
1614 }
1615
1616 copy_domain_page(new_vcpu_info_mfn, vcpu_info_mfn);
1617 }
1618
1619 /*
1620 * TODO: to support VMs with PV interfaces copy additional
1621 * settings here, such as PV timers.
1622 */
1623 }
1624
1625 return 0;
1626 }
1627
fork_hap_allocation(struct domain * cd,struct domain * d)1628 static int fork_hap_allocation(struct domain *cd, struct domain *d)
1629 {
1630 int rc;
1631 bool preempted;
1632 unsigned long mb = hap_get_allocation(d);
1633
1634 if ( mb == hap_get_allocation(cd) )
1635 return 0;
1636
1637 paging_lock(cd);
1638 rc = hap_set_allocation(cd, mb << (20 - PAGE_SHIFT), &preempted);
1639 paging_unlock(cd);
1640
1641 return preempted ? -ERESTART : rc;
1642 }
1643
copy_tsc(struct domain * cd,struct domain * d)1644 static void copy_tsc(struct domain *cd, struct domain *d)
1645 {
1646 uint32_t tsc_mode;
1647 uint32_t gtsc_khz;
1648 uint32_t incarnation;
1649 uint64_t elapsed_nsec;
1650
1651 tsc_get_info(d, &tsc_mode, &elapsed_nsec, >sc_khz, &incarnation);
1652 /* Don't bump incarnation on set */
1653 tsc_set_info(cd, tsc_mode, elapsed_nsec, gtsc_khz, incarnation - 1);
1654 }
1655
copy_special_pages(struct domain * cd,struct domain * d)1656 static int copy_special_pages(struct domain *cd, struct domain *d)
1657 {
1658 mfn_t new_mfn, old_mfn;
1659 gfn_t new_gfn, old_gfn;
1660 struct p2m_domain *p2m = p2m_get_hostp2m(cd);
1661 static const unsigned int params[] =
1662 {
1663 HVM_PARAM_STORE_PFN,
1664 HVM_PARAM_IOREQ_PFN,
1665 HVM_PARAM_BUFIOREQ_PFN,
1666 HVM_PARAM_CONSOLE_PFN
1667 };
1668 unsigned int i;
1669 int rc;
1670
1671 for ( i = 0; i < ARRAY_SIZE(params); i++ )
1672 {
1673 p2m_type_t t;
1674 uint64_t value = 0;
1675 struct page_info *page;
1676
1677 if ( hvm_get_param(d, params[i], &value) || !value )
1678 continue;
1679
1680 old_mfn = get_gfn_query_unlocked(d, value, &t);
1681 new_mfn = get_gfn_query_unlocked(cd, value, &t);
1682
1683 /* Allocate the page and map it in if it's not present */
1684 if ( mfn_eq(new_mfn, INVALID_MFN) )
1685 {
1686 if ( !(page = alloc_domheap_page(cd, 0)) )
1687 return -ENOMEM;
1688
1689 new_mfn = page_to_mfn(page);
1690 set_gpfn_from_mfn(mfn_x(new_mfn), value);
1691
1692 rc = p2m->set_entry(p2m, _gfn(value), new_mfn, PAGE_ORDER_4K,
1693 p2m_ram_rw, p2m->default_access, -1);
1694 if ( rc )
1695 return rc;
1696 }
1697
1698 copy_domain_page(new_mfn, old_mfn);
1699 }
1700
1701 old_mfn = _mfn(virt_to_mfn(d->shared_info));
1702 new_mfn = _mfn(virt_to_mfn(cd->shared_info));
1703 copy_domain_page(new_mfn, old_mfn);
1704
1705 old_gfn = _gfn(get_gpfn_from_mfn(mfn_x(old_mfn)));
1706 new_gfn = _gfn(get_gpfn_from_mfn(mfn_x(new_mfn)));
1707
1708 if ( !gfn_eq(old_gfn, new_gfn) )
1709 {
1710 if ( !gfn_eq(new_gfn, INVALID_GFN) )
1711 {
1712 /* if shared_info is mapped to a different gfn just remove it */
1713 rc = p2m->set_entry(p2m, new_gfn, INVALID_MFN, PAGE_ORDER_4K,
1714 p2m_invalid, p2m->default_access, -1);
1715 if ( rc )
1716 return rc;
1717 }
1718
1719 if ( !gfn_eq(old_gfn, INVALID_GFN) )
1720 {
1721 /* now map it to the same gfn as the parent */
1722 rc = p2m->set_entry(p2m, old_gfn, new_mfn, PAGE_ORDER_4K,
1723 p2m_ram_rw, p2m->default_access, -1);
1724 if ( rc )
1725 return rc;
1726 }
1727 }
1728
1729 return 0;
1730 }
1731
copy_settings(struct domain * cd,struct domain * d)1732 static int copy_settings(struct domain *cd, struct domain *d)
1733 {
1734 int rc;
1735
1736 if ( (rc = copy_vcpu_settings(cd, d)) )
1737 return rc;
1738
1739 if ( (rc = hvm_copy_context_and_params(cd, d)) )
1740 return rc;
1741
1742 if ( (rc = copy_special_pages(cd, d)) )
1743 return rc;
1744
1745 copy_tsc(cd, d);
1746
1747 return rc;
1748 }
1749
fork(struct domain * cd,struct domain * d)1750 static int fork(struct domain *cd, struct domain *d)
1751 {
1752 int rc = -EBUSY;
1753
1754 if ( !cd->controller_pause_count )
1755 return rc;
1756
1757 if ( !cd->parent )
1758 {
1759 if ( !get_domain(d) )
1760 {
1761 ASSERT_UNREACHABLE();
1762 return -EBUSY;
1763 }
1764
1765 domain_pause(d);
1766 cd->max_pages = d->max_pages;
1767 cd->parent = d;
1768 }
1769
1770 /* This is preemptible so it's the first to get done */
1771 if ( (rc = fork_hap_allocation(cd, d)) )
1772 goto done;
1773
1774 if ( (rc = bring_up_vcpus(cd, d)) )
1775 goto done;
1776
1777 rc = copy_settings(cd, d);
1778
1779 done:
1780 if ( rc && rc != -ERESTART )
1781 {
1782 domain_unpause(d);
1783 put_domain(d);
1784 cd->parent = NULL;
1785 }
1786
1787 return rc;
1788 }
1789
1790 /*
1791 * The fork reset operation is intended to be used on short-lived forks only.
1792 * There is no hypercall continuation operation implemented for this reason.
1793 * For forks that obtain a larger memory footprint it is likely going to be
1794 * more performant to create a new fork instead of resetting an existing one.
1795 *
1796 * TODO: In case this hypercall would become useful on forks with larger memory
1797 * footprints the hypercall continuation should be implemented (or if this
1798 * feature needs to be become "stable").
1799 */
mem_sharing_fork_reset(struct domain * d,struct domain * pd)1800 static int mem_sharing_fork_reset(struct domain *d, struct domain *pd)
1801 {
1802 int rc;
1803 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1804 struct page_info *page, *tmp;
1805
1806 domain_pause(d);
1807
1808 /* need recursive lock because we will free pages */
1809 spin_lock_recursive(&d->page_alloc_lock);
1810 page_list_for_each_safe(page, tmp, &d->page_list)
1811 {
1812 shr_handle_t sh;
1813 mfn_t mfn = page_to_mfn(page);
1814 gfn_t gfn = mfn_to_gfn(d, mfn);
1815
1816 /*
1817 * We only want to remove pages from the fork here that were copied
1818 * from the parent but could be potentially re-populated using memory
1819 * sharing after the reset. These pages all must be regular pages with
1820 * no extra reference held to them, thus should be possible to make
1821 * them sharable. Unfortunately p2m_is_sharable check is not sufficient
1822 * to test this as it doesn't check the page's reference count. We thus
1823 * check whether the page is convertable to the shared type using
1824 * nominate_page. In case the page is already shared (ie. a share
1825 * handle is returned) then we don't remove it.
1826 */
1827 if ( (rc = nominate_page(d, gfn, 0, true, &sh)) || sh )
1828 continue;
1829
1830 /* forked memory is 4k, not splitting large pages so this must work */
1831 rc = p2m->set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
1832 p2m_invalid, p2m_access_rwx, -1);
1833 ASSERT(!rc);
1834
1835 put_page_alloc_ref(page);
1836 put_page_and_type(page);
1837 }
1838 spin_unlock_recursive(&d->page_alloc_lock);
1839
1840 rc = copy_settings(d, pd);
1841
1842 domain_unpause(d);
1843
1844 return rc;
1845 }
1846
mem_sharing_memop(XEN_GUEST_HANDLE_PARAM (xen_mem_sharing_op_t)arg)1847 int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
1848 {
1849 int rc;
1850 xen_mem_sharing_op_t mso;
1851 struct domain *d;
1852
1853 rc = -EFAULT;
1854 if ( copy_from_guest(&mso, arg, 1) )
1855 return rc;
1856
1857 if ( mso.op == XENMEM_sharing_op_audit )
1858 return audit();
1859
1860 rc = rcu_lock_live_remote_domain_by_id(mso.domain, &d);
1861 if ( rc )
1862 return rc;
1863
1864 rc = xsm_mem_sharing(XSM_DM_PRIV, d);
1865 if ( rc )
1866 goto out;
1867
1868 if ( !mem_sharing_enabled(d) &&
1869 (rc = mem_sharing_control(d, true, 0)) )
1870 return rc;
1871
1872 switch ( mso.op )
1873 {
1874 case XENMEM_sharing_op_nominate_gfn:
1875 {
1876 shr_handle_t handle;
1877
1878 rc = nominate_page(d, _gfn(mso.u.nominate.u.gfn), 0, false, &handle);
1879 mso.u.nominate.handle = handle;
1880 }
1881 break;
1882
1883 case XENMEM_sharing_op_nominate_gref:
1884 {
1885 grant_ref_t gref = mso.u.nominate.u.grant_ref;
1886 gfn_t gfn;
1887 shr_handle_t handle;
1888
1889 rc = mem_sharing_gref_to_gfn(d->grant_table, gref, &gfn, NULL);
1890 if ( rc < 0 )
1891 goto out;
1892
1893 rc = nominate_page(d, gfn, 3, false, &handle);
1894 mso.u.nominate.handle = handle;
1895 }
1896 break;
1897
1898 case XENMEM_sharing_op_share:
1899 {
1900 gfn_t sgfn, cgfn;
1901 struct domain *cd;
1902 shr_handle_t sh, ch;
1903
1904 rc = rcu_lock_live_remote_domain_by_id(mso.u.share.client_domain,
1905 &cd);
1906 if ( rc )
1907 goto out;
1908
1909 rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, mso.op);
1910 if ( rc )
1911 {
1912 rcu_unlock_domain(cd);
1913 goto out;
1914 }
1915
1916 if ( !mem_sharing_enabled(cd) )
1917 {
1918 rcu_unlock_domain(cd);
1919 rc = -EINVAL;
1920 goto out;
1921 }
1922
1923 if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.source_gfn) )
1924 {
1925 grant_ref_t gref =
1926 XENMEM_SHARING_OP_FIELD_GET_GREF(mso.u.share.source_gfn);
1927
1928 rc = mem_sharing_gref_to_gfn(d->grant_table, gref, &sgfn,
1929 NULL);
1930 if ( rc < 0 )
1931 {
1932 rcu_unlock_domain(cd);
1933 goto out;
1934 }
1935 }
1936 else
1937 sgfn = _gfn(mso.u.share.source_gfn);
1938
1939 if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.client_gfn) )
1940 {
1941 grant_ref_t gref =
1942 XENMEM_SHARING_OP_FIELD_GET_GREF(mso.u.share.client_gfn);
1943
1944 rc = mem_sharing_gref_to_gfn(cd->grant_table, gref, &cgfn,
1945 NULL);
1946 if ( rc < 0 )
1947 {
1948 rcu_unlock_domain(cd);
1949 goto out;
1950 }
1951 }
1952 else
1953 cgfn = _gfn(mso.u.share.client_gfn);
1954
1955 sh = mso.u.share.source_handle;
1956 ch = mso.u.share.client_handle;
1957
1958 rc = share_pages(d, sgfn, sh, cd, cgfn, ch);
1959
1960 rcu_unlock_domain(cd);
1961 }
1962 break;
1963
1964 case XENMEM_sharing_op_add_physmap:
1965 {
1966 unsigned long sgfn, cgfn;
1967 struct domain *cd;
1968 shr_handle_t sh;
1969
1970 rc = rcu_lock_live_remote_domain_by_id(mso.u.share.client_domain,
1971 &cd);
1972 if ( rc )
1973 goto out;
1974
1975 rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, mso.op);
1976 if ( rc )
1977 {
1978 rcu_unlock_domain(cd);
1979 goto out;
1980 }
1981
1982 if ( !mem_sharing_enabled(cd) )
1983 {
1984 rcu_unlock_domain(cd);
1985 rc = -EINVAL;
1986 goto out;
1987 }
1988
1989 if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.source_gfn) )
1990 {
1991 /* Cannot add a gref to the physmap */
1992 rcu_unlock_domain(cd);
1993 rc = -EINVAL;
1994 goto out;
1995 }
1996
1997 sgfn = mso.u.share.source_gfn;
1998 sh = mso.u.share.source_handle;
1999 cgfn = mso.u.share.client_gfn;
2000
2001 rc = add_to_physmap(d, sgfn, sh, cd, cgfn, true);
2002
2003 rcu_unlock_domain(cd);
2004 }
2005 break;
2006
2007 case XENMEM_sharing_op_range_share:
2008 {
2009 unsigned long max_sgfn, max_cgfn;
2010 struct domain *cd;
2011
2012 rc = -EINVAL;
2013 if ( mso.u.range._pad[0] || mso.u.range._pad[1] ||
2014 mso.u.range._pad[2] )
2015 goto out;
2016
2017 /*
2018 * We use opaque for the hypercall continuation value.
2019 * Ideally the user sets this to 0 in the beginning but
2020 * there is no good way of enforcing that here, so we just check
2021 * that it's at least in range.
2022 */
2023 if ( mso.u.range.opaque &&
2024 (mso.u.range.opaque < mso.u.range.first_gfn ||
2025 mso.u.range.opaque > mso.u.range.last_gfn) )
2026 goto out;
2027
2028 rc = rcu_lock_live_remote_domain_by_id(mso.u.range.client_domain,
2029 &cd);
2030 if ( rc )
2031 goto out;
2032
2033 /*
2034 * We reuse XENMEM_sharing_op_share XSM check here as this is
2035 * essentially the same concept repeated over multiple pages.
2036 */
2037 rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd,
2038 XENMEM_sharing_op_share);
2039 if ( rc )
2040 {
2041 rcu_unlock_domain(cd);
2042 goto out;
2043 }
2044
2045 if ( !mem_sharing_enabled(cd) )
2046 {
2047 rcu_unlock_domain(cd);
2048 rc = -EINVAL;
2049 goto out;
2050 }
2051
2052 /*
2053 * Sanity check only, the client should keep the domains paused for
2054 * the duration of this op.
2055 */
2056 if ( !atomic_read(&d->pause_count) ||
2057 !atomic_read(&cd->pause_count) )
2058 {
2059 rcu_unlock_domain(cd);
2060 rc = -EINVAL;
2061 goto out;
2062 }
2063
2064 max_sgfn = domain_get_maximum_gpfn(d);
2065 max_cgfn = domain_get_maximum_gpfn(cd);
2066
2067 if ( max_sgfn < mso.u.range.first_gfn ||
2068 max_sgfn < mso.u.range.last_gfn ||
2069 max_cgfn < mso.u.range.first_gfn ||
2070 max_cgfn < mso.u.range.last_gfn )
2071 {
2072 rcu_unlock_domain(cd);
2073 rc = -EINVAL;
2074 goto out;
2075 }
2076
2077 rc = range_share(d, cd, &mso.u.range);
2078 rcu_unlock_domain(cd);
2079
2080 if ( rc > 0 )
2081 {
2082 if ( __copy_to_guest(arg, &mso, 1) )
2083 rc = -EFAULT;
2084 else
2085 rc = hypercall_create_continuation(__HYPERVISOR_memory_op,
2086 "lh", XENMEM_sharing_op,
2087 arg);
2088 }
2089 else
2090 mso.u.range.opaque = 0;
2091 }
2092 break;
2093
2094 case XENMEM_sharing_op_debug_gfn:
2095 rc = debug_gfn(d, _gfn(mso.u.debug.u.gfn));
2096 break;
2097
2098 case XENMEM_sharing_op_debug_gref:
2099 rc = debug_gref(d, mso.u.debug.u.gref);
2100 break;
2101
2102 case XENMEM_sharing_op_fork:
2103 {
2104 struct domain *pd;
2105
2106 rc = -EINVAL;
2107 if ( mso.u.fork.pad )
2108 goto out;
2109 if ( mso.u.fork.flags &
2110 ~(XENMEM_FORK_WITH_IOMMU_ALLOWED | XENMEM_FORK_BLOCK_INTERRUPTS) )
2111 goto out;
2112
2113 rc = rcu_lock_live_remote_domain_by_id(mso.u.fork.parent_domain,
2114 &pd);
2115 if ( rc )
2116 goto out;
2117
2118 rc = -EINVAL;
2119 if ( pd->max_vcpus != d->max_vcpus )
2120 {
2121 rcu_unlock_domain(pd);
2122 goto out;
2123 }
2124
2125 if ( !mem_sharing_enabled(pd) &&
2126 (rc = mem_sharing_control(pd, true, mso.u.fork.flags)) )
2127 {
2128 rcu_unlock_domain(pd);
2129 goto out;
2130 }
2131
2132 rc = fork(d, pd);
2133
2134 if ( rc == -ERESTART )
2135 rc = hypercall_create_continuation(__HYPERVISOR_memory_op,
2136 "lh", XENMEM_sharing_op,
2137 arg);
2138 else if ( !rc && (mso.u.fork.flags & XENMEM_FORK_BLOCK_INTERRUPTS) )
2139 d->arch.hvm.mem_sharing.block_interrupts = true;
2140
2141 rcu_unlock_domain(pd);
2142 break;
2143 }
2144
2145 case XENMEM_sharing_op_fork_reset:
2146 {
2147 struct domain *pd;
2148
2149 rc = -EINVAL;
2150 if ( mso.u.fork.pad || mso.u.fork.flags )
2151 goto out;
2152
2153 rc = -ENOSYS;
2154 if ( !d->parent )
2155 goto out;
2156
2157 rc = rcu_lock_live_remote_domain_by_id(d->parent->domain_id, &pd);
2158 if ( rc )
2159 goto out;
2160
2161 rc = mem_sharing_fork_reset(d, pd);
2162
2163 rcu_unlock_domain(pd);
2164 break;
2165 }
2166
2167 default:
2168 rc = -ENOSYS;
2169 break;
2170 }
2171
2172 if ( !rc && __copy_to_guest(arg, &mso, 1) )
2173 rc = -EFAULT;
2174
2175 out:
2176 rcu_unlock_domain(d);
2177 return rc;
2178 }
2179
mem_sharing_domctl(struct domain * d,struct xen_domctl_mem_sharing_op * mec)2180 int mem_sharing_domctl(struct domain *d, struct xen_domctl_mem_sharing_op *mec)
2181 {
2182 int rc;
2183
2184 switch ( mec->op )
2185 {
2186 case XEN_DOMCTL_MEM_SHARING_CONTROL:
2187 rc = mem_sharing_control(d, mec->u.enable, 0);
2188 break;
2189
2190 default:
2191 rc = -ENOSYS;
2192 break;
2193 }
2194
2195 return rc;
2196 }
2197