1 /******************************************************************************
2 * memory.c
3 *
4 * Code to handle memory-related requests.
5 *
6 * Copyright (c) 2003-2004, B Dragovic
7 * Copyright (c) 2003-2005, K A Fraser
8 */
9
10 #include <xen/domain_page.h>
11 #include <xen/types.h>
12 #include <xen/lib.h>
13 #include <xen/mm.h>
14 #include <xen/param.h>
15 #include <xen/perfc.h>
16 #include <xen/sched.h>
17 #include <xen/event.h>
18 #include <xen/paging.h>
19 #include <xen/iocap.h>
20 #include <xen/guest_access.h>
21 #include <xen/hypercall.h>
22 #include <xen/errno.h>
23 #include <xen/numa.h>
24 #include <xen/mem_access.h>
25 #include <xen/trace.h>
26 #include <xen/grant_table.h>
27 #include <asm/current.h>
28 #include <asm/hardirq.h>
29 #include <asm/p2m.h>
30 #include <public/memory.h>
31 #include <xsm/xsm.h>
32
33 #ifdef CONFIG_X86
34 #include <asm/guest.h>
35 #endif
36
37 struct memop_args {
38 /* INPUT */
39 struct domain *domain; /* Domain to be affected. */
40 XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */
41 unsigned int nr_extents; /* Number of extents to allocate or free. */
42 unsigned int extent_order; /* Size of each extent. */
43 unsigned int memflags; /* Allocation flags. */
44
45 /* INPUT/OUTPUT */
46 unsigned int nr_done; /* Number of extents processed so far. */
47 int preempted; /* Was the hypercall preempted? */
48 };
49
50 #ifndef CONFIG_CTLDOM_MAX_ORDER
51 #define CONFIG_CTLDOM_MAX_ORDER CONFIG_PAGEALLOC_MAX_ORDER
52 #endif
53 #ifndef CONFIG_PTDOM_MAX_ORDER
54 #define CONFIG_PTDOM_MAX_ORDER CONFIG_HWDOM_MAX_ORDER
55 #endif
56
57 static unsigned int __read_mostly domu_max_order = CONFIG_DOMU_MAX_ORDER;
58 static unsigned int __read_mostly ctldom_max_order = CONFIG_CTLDOM_MAX_ORDER;
59 static unsigned int __read_mostly hwdom_max_order = CONFIG_HWDOM_MAX_ORDER;
60 #ifdef HAS_PASSTHROUGH
61 static unsigned int __read_mostly ptdom_max_order = CONFIG_PTDOM_MAX_ORDER;
62 #endif
63
parse_max_order(const char * s)64 static int __init parse_max_order(const char *s)
65 {
66 if ( *s != ',' )
67 domu_max_order = simple_strtoul(s, &s, 0);
68 if ( *s == ',' && *++s != ',' )
69 ctldom_max_order = simple_strtoul(s, &s, 0);
70 if ( *s == ',' && *++s != ',' )
71 hwdom_max_order = simple_strtoul(s, &s, 0);
72 #ifdef HAS_PASSTHROUGH
73 if ( *s == ',' && *++s != ',' )
74 ptdom_max_order = simple_strtoul(s, &s, 0);
75 #endif
76
77 return *s ? -EINVAL : 0;
78 }
79 custom_param("memop-max-order", parse_max_order);
80
max_order(const struct domain * d)81 static unsigned int max_order(const struct domain *d)
82 {
83 unsigned int order = domu_max_order;
84
85 #ifdef HAS_PASSTHROUGH
86 if ( cache_flush_permitted(d) && order < ptdom_max_order )
87 order = ptdom_max_order;
88 #endif
89
90 if ( is_control_domain(d) && order < ctldom_max_order )
91 order = ctldom_max_order;
92
93 if ( is_hardware_domain(d) && order < hwdom_max_order )
94 order = hwdom_max_order;
95
96 return min(order, MAX_ORDER + 0U);
97 }
98
99 /* Helper to copy a typesafe MFN to guest */
100 static inline
__copy_mfn_to_guest_offset(XEN_GUEST_HANDLE (xen_pfn_t)hnd,size_t off,mfn_t mfn)101 unsigned long __copy_mfn_to_guest_offset(XEN_GUEST_HANDLE(xen_pfn_t) hnd,
102 size_t off, mfn_t mfn)
103 {
104 xen_pfn_t mfn_ = mfn_x(mfn);
105
106 return __copy_to_guest_offset(hnd, off, &mfn_, 1);
107 }
108
increase_reservation(struct memop_args * a)109 static void increase_reservation(struct memop_args *a)
110 {
111 struct page_info *page;
112 unsigned long i;
113 struct domain *d = a->domain;
114
115 if ( !guest_handle_is_null(a->extent_list) &&
116 !guest_handle_subrange_okay(a->extent_list, a->nr_done,
117 a->nr_extents-1) )
118 return;
119
120 if ( a->extent_order > max_order(current->domain) )
121 return;
122
123 for ( i = a->nr_done; i < a->nr_extents; i++ )
124 {
125 if ( i != a->nr_done && hypercall_preempt_check() )
126 {
127 a->preempted = 1;
128 goto out;
129 }
130
131 page = alloc_domheap_pages(d, a->extent_order, a->memflags);
132 if ( unlikely(page == NULL) )
133 {
134 gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
135 "id=%d memflags=%x (%ld of %d)\n",
136 a->extent_order, d->domain_id, a->memflags,
137 i, a->nr_extents);
138 goto out;
139 }
140
141 /* Inform the domain of the new page's machine address. */
142 if ( !paging_mode_translate(d) &&
143 !guest_handle_is_null(a->extent_list) )
144 {
145 mfn_t mfn = page_to_mfn(page);
146
147 if ( unlikely(__copy_mfn_to_guest_offset(a->extent_list, i, mfn)) )
148 goto out;
149 }
150 }
151
152 out:
153 a->nr_done = i;
154 }
155
populate_physmap(struct memop_args * a)156 static void populate_physmap(struct memop_args *a)
157 {
158 struct page_info *page;
159 unsigned int i, j;
160 xen_pfn_t gpfn;
161 struct domain *d = a->domain, *curr_d = current->domain;
162 bool need_tlbflush = false;
163 uint32_t tlbflush_timestamp = 0;
164
165 if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
166 a->nr_extents-1) )
167 return;
168
169 if ( a->extent_order > (a->memflags & MEMF_populate_on_demand ? MAX_ORDER :
170 max_order(curr_d)) )
171 return;
172
173 if ( unlikely(!d->creation_finished) )
174 {
175 /*
176 * With MEMF_no_tlbflush set, alloc_heap_pages() will ignore
177 * TLB-flushes. After VM creation, this is a security issue (it can
178 * make pages accessible to guest B, when guest A may still have a
179 * cached mapping to them). So we do this only during domain creation,
180 * when the domain itself has not yet been unpaused for the first
181 * time.
182 */
183 a->memflags |= MEMF_no_tlbflush;
184 /*
185 * With MEMF_no_icache_flush, alloc_heap_pages() will skip
186 * performing icache flushes. We do it only before domain
187 * creation as once the domain is running there is a danger of
188 * executing instructions from stale caches if icache flush is
189 * delayed.
190 */
191 a->memflags |= MEMF_no_icache_flush;
192 }
193
194 for ( i = a->nr_done; i < a->nr_extents; i++ )
195 {
196 mfn_t mfn;
197
198 if ( i != a->nr_done && hypercall_preempt_check() )
199 {
200 a->preempted = 1;
201 goto out;
202 }
203
204 if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
205 goto out;
206
207 if ( a->memflags & MEMF_populate_on_demand )
208 {
209 /* Disallow populating PoD pages on oneself. */
210 if ( d == curr_d )
211 goto out;
212
213 if ( is_hvm_domain(d) &&
214 guest_physmap_mark_populate_on_demand(d, gpfn,
215 a->extent_order) < 0 )
216 goto out;
217 }
218 else
219 {
220 if ( is_domain_direct_mapped(d) )
221 {
222 mfn = _mfn(gpfn);
223
224 for ( j = 0; j < (1U << a->extent_order); j++,
225 mfn = mfn_add(mfn, 1) )
226 {
227 if ( !mfn_valid(mfn) )
228 {
229 gdprintk(XENLOG_INFO, "Invalid mfn %#"PRI_mfn"\n",
230 mfn_x(mfn));
231 goto out;
232 }
233
234 page = mfn_to_page(mfn);
235 if ( !get_page(page, d) )
236 {
237 gdprintk(XENLOG_INFO,
238 "mfn %#"PRI_mfn" doesn't belong to d%d\n",
239 mfn_x(mfn), d->domain_id);
240 goto out;
241 }
242 put_page(page);
243 }
244
245 mfn = _mfn(gpfn);
246 }
247 else
248 {
249 page = alloc_domheap_pages(d, a->extent_order, a->memflags);
250
251 if ( unlikely(!page) )
252 {
253 gdprintk(XENLOG_INFO,
254 "Could not allocate order=%u extent: id=%d memflags=%#x (%u of %u)\n",
255 a->extent_order, d->domain_id, a->memflags,
256 i, a->nr_extents);
257 goto out;
258 }
259
260 if ( unlikely(a->memflags & MEMF_no_tlbflush) )
261 {
262 for ( j = 0; j < (1U << a->extent_order); j++ )
263 accumulate_tlbflush(&need_tlbflush, &page[j],
264 &tlbflush_timestamp);
265 }
266
267 mfn = page_to_mfn(page);
268 }
269
270 guest_physmap_add_page(d, _gfn(gpfn), mfn, a->extent_order);
271
272 if ( !paging_mode_translate(d) &&
273 /* Inform the domain of the new page's machine address. */
274 unlikely(__copy_mfn_to_guest_offset(a->extent_list, i, mfn)) )
275 goto out;
276 }
277 }
278
279 out:
280 if ( need_tlbflush )
281 filtered_flush_tlb_mask(tlbflush_timestamp);
282
283 if ( a->memflags & MEMF_no_icache_flush )
284 invalidate_icache();
285
286 a->nr_done = i;
287 }
288
guest_remove_page(struct domain * d,unsigned long gmfn)289 int guest_remove_page(struct domain *d, unsigned long gmfn)
290 {
291 struct page_info *page;
292 #ifdef CONFIG_X86
293 p2m_type_t p2mt;
294 #endif
295 mfn_t mfn;
296 bool *dont_flush_p, dont_flush;
297 int rc;
298
299 #ifdef CONFIG_X86
300 mfn = get_gfn_query(d, gmfn, &p2mt);
301 if ( unlikely(p2mt == p2m_invalid) || unlikely(p2mt == p2m_mmio_dm) )
302 {
303 put_gfn(d, gmfn);
304
305 return -ENOENT;
306 }
307
308 if ( unlikely(p2m_is_paging(p2mt)) )
309 {
310 /*
311 * If the page hasn't yet been paged out, there is an
312 * actual page that needs to be released.
313 */
314 if ( p2mt == p2m_ram_paging_out )
315 {
316 ASSERT(mfn_valid(mfn));
317 goto obtain_page;
318 }
319
320 rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0);
321 if ( rc )
322 goto out_put_gfn;
323
324 put_gfn(d, gmfn);
325
326 p2m_mem_paging_drop_page(d, _gfn(gmfn), p2mt);
327
328 return 0;
329 }
330 if ( p2mt == p2m_mmio_direct )
331 {
332 rc = clear_mmio_p2m_entry(d, gmfn, mfn, PAGE_ORDER_4K);
333 goto out_put_gfn;
334 }
335 #else
336 mfn = gfn_to_mfn(d, _gfn(gmfn));
337 #endif
338 if ( unlikely(!mfn_valid(mfn)) )
339 {
340 #ifdef CONFIG_X86
341 put_gfn(d, gmfn);
342 #endif
343 gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n",
344 d->domain_id, gmfn);
345
346 return -EINVAL;
347 }
348
349 #ifdef CONFIG_X86
350 if ( p2m_is_shared(p2mt) )
351 {
352 /*
353 * Unshare the page, bail out on error. We unshare because we
354 * might be the only one using this shared page, and we need to
355 * trigger proper cleanup. Once done, this is like any other page.
356 */
357 rc = mem_sharing_unshare_page(d, gmfn);
358 if ( rc )
359 {
360 mem_sharing_notify_enomem(d, gmfn, false);
361 goto out_put_gfn;
362 }
363 /* Maybe the mfn changed */
364 mfn = get_gfn_query_unlocked(d, gmfn, &p2mt);
365 ASSERT(!p2m_is_shared(p2mt));
366 }
367 #endif /* CONFIG_X86 */
368
369 obtain_page: __maybe_unused;
370 page = mfn_to_page(mfn);
371 if ( unlikely(!get_page(page, d)) )
372 {
373 #ifdef CONFIG_X86
374 put_gfn(d, gmfn);
375 if ( !p2m_is_paging(p2mt) )
376 #endif
377 gdprintk(XENLOG_INFO, "Bad page free for Dom%u GFN %lx\n",
378 d->domain_id, gmfn);
379
380 return -ENXIO;
381 }
382
383 /*
384 * Since we're likely to free the page below, we need to suspend
385 * xenmem_add_to_physmap()'s suppressing of IOMMU TLB flushes.
386 */
387 dont_flush_p = &this_cpu(iommu_dont_flush_iotlb);
388 dont_flush = *dont_flush_p;
389 *dont_flush_p = false;
390
391 rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0);
392
393 *dont_flush_p = dont_flush;
394
395 /*
396 * With the lack of an IOMMU on some platforms, domains with DMA-capable
397 * device must retrieve the same pfn when the hypercall populate_physmap
398 * is called.
399 *
400 * For this purpose (and to match populate_physmap() behavior), the page
401 * is kept allocated.
402 */
403 if ( !rc && !is_domain_direct_mapped(d) )
404 put_page_alloc_ref(page);
405
406 put_page(page);
407
408 #ifdef CONFIG_X86
409 out_put_gfn:
410 put_gfn(d, gmfn);
411 #endif
412
413 /*
414 * Filter out -ENOENT return values that aren't a result of an empty p2m
415 * entry.
416 */
417 return rc != -ENOENT ? rc : -EINVAL;
418 }
419
decrease_reservation(struct memop_args * a)420 static void decrease_reservation(struct memop_args *a)
421 {
422 unsigned long i, j;
423 xen_pfn_t gmfn;
424
425 if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
426 a->nr_extents-1) ||
427 a->extent_order > max_order(current->domain) )
428 return;
429
430 for ( i = a->nr_done; i < a->nr_extents; i++ )
431 {
432 unsigned long pod_done;
433
434 if ( i != a->nr_done && hypercall_preempt_check() )
435 {
436 a->preempted = 1;
437 goto out;
438 }
439
440 if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
441 goto out;
442
443 if ( tb_init_done )
444 {
445 struct {
446 u64 gfn;
447 int d:16,order:16;
448 } t;
449
450 t.gfn = gmfn;
451 t.d = a->domain->domain_id;
452 t.order = a->extent_order;
453
454 __trace_var(TRC_MEM_DECREASE_RESERVATION, 0, sizeof(t), &t);
455 }
456
457 /* See if populate-on-demand wants to handle this */
458 pod_done = is_hvm_domain(a->domain) ?
459 p2m_pod_decrease_reservation(a->domain, _gfn(gmfn),
460 a->extent_order) : 0;
461
462 /*
463 * Look for pages not handled by p2m_pod_decrease_reservation().
464 *
465 * guest_remove_page() will return -ENOENT for pages which have already
466 * been removed by p2m_pod_decrease_reservation(); so expect to see
467 * exactly pod_done failures. Any more means that there were invalid
468 * entries before p2m_pod_decrease_reservation() was called.
469 */
470 for ( j = 0; j + pod_done < (1UL << a->extent_order); j++ )
471 {
472 switch ( guest_remove_page(a->domain, gmfn + j) )
473 {
474 case 0:
475 break;
476 case -ENOENT:
477 if ( !pod_done )
478 goto out;
479 --pod_done;
480 break;
481 default:
482 goto out;
483 }
484 }
485 }
486
487 out:
488 a->nr_done = i;
489 }
490
propagate_node(unsigned int xmf,unsigned int * memflags)491 static bool propagate_node(unsigned int xmf, unsigned int *memflags)
492 {
493 const struct domain *currd = current->domain;
494
495 BUILD_BUG_ON(XENMEMF_get_node(0) != NUMA_NO_NODE);
496 BUILD_BUG_ON(MEMF_get_node(0) != NUMA_NO_NODE);
497
498 if ( XENMEMF_get_node(xmf) == NUMA_NO_NODE )
499 return true;
500
501 if ( is_hardware_domain(currd) || is_control_domain(currd) )
502 {
503 if ( XENMEMF_get_node(xmf) >= MAX_NUMNODES )
504 return false;
505
506 *memflags |= MEMF_node(XENMEMF_get_node(xmf));
507 if ( xmf & XENMEMF_exact_node_request )
508 *memflags |= MEMF_exact_node;
509 }
510 else if ( xmf & XENMEMF_exact_node_request )
511 return false;
512
513 return true;
514 }
515
memory_exchange(XEN_GUEST_HANDLE_PARAM (xen_memory_exchange_t)arg)516 static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg)
517 {
518 struct xen_memory_exchange exch;
519 PAGE_LIST_HEAD(in_chunk_list);
520 PAGE_LIST_HEAD(out_chunk_list);
521 unsigned long in_chunk_order, out_chunk_order;
522 xen_pfn_t gpfn, gmfn;
523 mfn_t mfn;
524 unsigned long i, j, k;
525 unsigned int memflags = 0;
526 long rc = 0;
527 struct domain *d;
528 struct page_info *page;
529
530 if ( copy_from_guest(&exch, arg, 1) )
531 return -EFAULT;
532
533 if ( max(exch.in.extent_order, exch.out.extent_order) >
534 max_order(current->domain) )
535 {
536 rc = -EPERM;
537 goto fail_early;
538 }
539
540 /* Various sanity checks. */
541 if ( (exch.nr_exchanged > exch.in.nr_extents) ||
542 /* Input and output domain identifiers match? */
543 (exch.in.domid != exch.out.domid) ||
544 /* Sizes of input and output lists do not overflow a long? */
545 ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) ||
546 ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) ||
547 /* Sizes of input and output lists match? */
548 ((exch.in.nr_extents << exch.in.extent_order) !=
549 (exch.out.nr_extents << exch.out.extent_order)) )
550 {
551 rc = -EINVAL;
552 goto fail_early;
553 }
554
555 if ( exch.nr_exchanged == exch.in.nr_extents )
556 return 0;
557
558 if ( !guest_handle_subrange_okay(exch.in.extent_start, exch.nr_exchanged,
559 exch.in.nr_extents - 1) )
560 {
561 rc = -EFAULT;
562 goto fail_early;
563 }
564
565 if ( exch.in.extent_order <= exch.out.extent_order )
566 {
567 in_chunk_order = exch.out.extent_order - exch.in.extent_order;
568 out_chunk_order = 0;
569
570 if ( !guest_handle_subrange_okay(exch.out.extent_start,
571 exch.nr_exchanged >> in_chunk_order,
572 exch.out.nr_extents - 1) )
573 {
574 rc = -EFAULT;
575 goto fail_early;
576 }
577 }
578 else
579 {
580 in_chunk_order = 0;
581 out_chunk_order = exch.in.extent_order - exch.out.extent_order;
582
583 if ( !guest_handle_subrange_okay(exch.out.extent_start,
584 exch.nr_exchanged << out_chunk_order,
585 exch.out.nr_extents - 1) )
586 {
587 rc = -EFAULT;
588 goto fail_early;
589 }
590 }
591
592 if ( unlikely(!propagate_node(exch.out.mem_flags, &memflags)) )
593 {
594 rc = -EINVAL;
595 goto fail_early;
596 }
597
598 d = rcu_lock_domain_by_any_id(exch.in.domid);
599 if ( d == NULL )
600 {
601 rc = -ESRCH;
602 goto fail_early;
603 }
604
605 rc = xsm_memory_exchange(XSM_TARGET, d);
606 if ( rc )
607 {
608 rcu_unlock_domain(d);
609 goto fail_early;
610 }
611
612 memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
613 d,
614 XENMEMF_get_address_bits(exch.out.mem_flags) ? :
615 (BITS_PER_LONG+PAGE_SHIFT)));
616
617 for ( i = (exch.nr_exchanged >> in_chunk_order);
618 i < (exch.in.nr_extents >> in_chunk_order);
619 i++ )
620 {
621 if ( i != (exch.nr_exchanged >> in_chunk_order) &&
622 hypercall_preempt_check() )
623 {
624 exch.nr_exchanged = i << in_chunk_order;
625 rcu_unlock_domain(d);
626 if ( __copy_field_to_guest(arg, &exch, nr_exchanged) )
627 return -EFAULT;
628 return hypercall_create_continuation(
629 __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg);
630 }
631
632 /* Steal a chunk's worth of input pages from the domain. */
633 for ( j = 0; j < (1UL << in_chunk_order); j++ )
634 {
635 if ( unlikely(__copy_from_guest_offset(
636 &gmfn, exch.in.extent_start, (i<<in_chunk_order)+j, 1)) )
637 {
638 rc = -EFAULT;
639 goto fail;
640 }
641
642 for ( k = 0; k < (1UL << exch.in.extent_order); k++ )
643 {
644 #ifdef CONFIG_X86
645 p2m_type_t p2mt;
646
647 /* Shared pages cannot be exchanged */
648 mfn = get_gfn_unshare(d, gmfn + k, &p2mt);
649 if ( p2m_is_shared(p2mt) )
650 {
651 put_gfn(d, gmfn + k);
652 rc = -ENOMEM;
653 goto fail;
654 }
655 #else /* !CONFIG_X86 */
656 mfn = gfn_to_mfn(d, _gfn(gmfn + k));
657 #endif
658 if ( unlikely(!mfn_valid(mfn)) )
659 {
660 #ifdef CONFIG_X86
661 put_gfn(d, gmfn + k);
662 #endif
663 rc = -EINVAL;
664 goto fail;
665 }
666
667 page = mfn_to_page(mfn);
668
669 rc = steal_page(d, page, MEMF_no_refcount);
670 if ( unlikely(rc) )
671 {
672 #ifdef CONFIG_X86
673 put_gfn(d, gmfn + k);
674 #endif
675 goto fail;
676 }
677
678 page_list_add(page, &in_chunk_list);
679 #ifdef CONFIG_X86
680 put_gfn(d, gmfn + k);
681 #endif
682 }
683 }
684
685 /* Allocate a chunk's worth of anonymous output pages. */
686 for ( j = 0; j < (1UL << out_chunk_order); j++ )
687 {
688 page = alloc_domheap_pages(d, exch.out.extent_order,
689 MEMF_no_owner | memflags);
690 if ( unlikely(page == NULL) )
691 {
692 rc = -ENOMEM;
693 goto fail;
694 }
695
696 page_list_add(page, &out_chunk_list);
697 }
698
699 /*
700 * Success! Beyond this point we cannot fail for this chunk.
701 */
702
703 /*
704 * These pages have already had owner and reference cleared.
705 * Do the final two steps: Remove from the physmap, and free
706 * them.
707 */
708 while ( (page = page_list_remove_head(&in_chunk_list)) )
709 {
710 unsigned long gfn;
711
712 mfn = page_to_mfn(page);
713 gfn = mfn_to_gmfn(d, mfn_x(mfn));
714 /* Pages were unshared above */
715 BUG_ON(SHARED_M2P(gfn));
716 if ( guest_physmap_remove_page(d, _gfn(gfn), mfn, 0) )
717 domain_crash(d);
718 free_domheap_page(page);
719 }
720
721 /* Assign each output page to the domain. */
722 for ( j = 0; (page = page_list_remove_head(&out_chunk_list)); ++j )
723 {
724 if ( assign_pages(d, page, exch.out.extent_order,
725 MEMF_no_refcount) )
726 {
727 unsigned long dec_count;
728 bool_t drop_dom_ref;
729
730 /*
731 * Pages in in_chunk_list is stolen without
732 * decreasing the tot_pages. If the domain is dying when
733 * assign pages, we need decrease the count. For those pages
734 * that has been assigned, it should be covered by
735 * domain_relinquish_resources().
736 */
737 dec_count = (((1UL << exch.in.extent_order) *
738 (1UL << in_chunk_order)) -
739 (j * (1UL << exch.out.extent_order)));
740
741 spin_lock(&d->page_alloc_lock);
742 drop_dom_ref = (dec_count &&
743 !domain_adjust_tot_pages(d, -dec_count));
744 spin_unlock(&d->page_alloc_lock);
745
746 if ( drop_dom_ref )
747 put_domain(d);
748
749 free_domheap_pages(page, exch.out.extent_order);
750 goto dying;
751 }
752
753 if ( __copy_from_guest_offset(&gpfn, exch.out.extent_start,
754 (i << out_chunk_order) + j, 1) )
755 {
756 rc = -EFAULT;
757 continue;
758 }
759
760 mfn = page_to_mfn(page);
761 guest_physmap_add_page(d, _gfn(gpfn), mfn,
762 exch.out.extent_order);
763
764 if ( !paging_mode_translate(d) &&
765 __copy_mfn_to_guest_offset(exch.out.extent_start,
766 (i << out_chunk_order) + j,
767 mfn) )
768 rc = -EFAULT;
769 }
770 BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) );
771
772 if ( rc )
773 goto fail;
774 }
775
776 exch.nr_exchanged = exch.in.nr_extents;
777 if ( __copy_field_to_guest(arg, &exch, nr_exchanged) )
778 rc = -EFAULT;
779 rcu_unlock_domain(d);
780 return rc;
781
782 /*
783 * Failed a chunk! Free any partial chunk work. Tell caller how many
784 * chunks succeeded.
785 */
786 fail:
787 /*
788 * Reassign any input pages we managed to steal. NB that if the assign
789 * fails again, we're on the hook for freeing the page, since we've already
790 * cleared PGC_allocated.
791 */
792 while ( (page = page_list_remove_head(&in_chunk_list)) )
793 if ( assign_pages(d, page, 0, MEMF_no_refcount) )
794 {
795 BUG_ON(!d->is_dying);
796 free_domheap_page(page);
797 }
798
799 dying:
800 rcu_unlock_domain(d);
801 /* Free any output pages we managed to allocate. */
802 while ( (page = page_list_remove_head(&out_chunk_list)) )
803 free_domheap_pages(page, exch.out.extent_order);
804
805 exch.nr_exchanged = i << in_chunk_order;
806
807 fail_early:
808 if ( __copy_field_to_guest(arg, &exch, nr_exchanged) )
809 rc = -EFAULT;
810 return rc;
811 }
812
xenmem_add_to_physmap(struct domain * d,struct xen_add_to_physmap * xatp,unsigned int start)813 int xenmem_add_to_physmap(struct domain *d, struct xen_add_to_physmap *xatp,
814 unsigned int start)
815 {
816 unsigned int done = 0;
817 long rc = 0;
818 union add_to_physmap_extra extra = {};
819 struct page_info *pages[16];
820
821 ASSERT(paging_mode_translate(d));
822
823 if ( xatp->space == XENMAPSPACE_gmfn_foreign )
824 extra.foreign_domid = DOMID_INVALID;
825
826 if ( xatp->space != XENMAPSPACE_gmfn_range )
827 return xenmem_add_to_physmap_one(d, xatp->space, extra,
828 xatp->idx, _gfn(xatp->gpfn));
829
830 if ( xatp->size < start )
831 return -EILSEQ;
832
833 xatp->idx += start;
834 xatp->gpfn += start;
835 xatp->size -= start;
836
837 if ( is_iommu_enabled(d) )
838 {
839 this_cpu(iommu_dont_flush_iotlb) = 1;
840 extra.ppage = &pages[0];
841 }
842
843 while ( xatp->size > done )
844 {
845 rc = xenmem_add_to_physmap_one(d, XENMAPSPACE_gmfn, extra,
846 xatp->idx, _gfn(xatp->gpfn));
847 if ( rc < 0 )
848 break;
849
850 xatp->idx++;
851 xatp->gpfn++;
852
853 if ( extra.ppage )
854 ++extra.ppage;
855
856 /* Check for continuation if it's not the last iteration. */
857 if ( (++done >= ARRAY_SIZE(pages) && extra.ppage) ||
858 (xatp->size > done && hypercall_preempt_check()) )
859 {
860 rc = start + done;
861 break;
862 }
863 }
864
865 if ( is_iommu_enabled(d) )
866 {
867 int ret;
868 unsigned int i;
869
870 this_cpu(iommu_dont_flush_iotlb) = 0;
871
872 ret = iommu_iotlb_flush(d, _dfn(xatp->idx - done), done,
873 IOMMU_FLUSHF_added | IOMMU_FLUSHF_modified);
874 if ( unlikely(ret) && rc >= 0 )
875 rc = ret;
876
877 /*
878 * Now that the IOMMU TLB flush was done for the original GFN, drop
879 * the page references. The 2nd flush below is fine to make later, as
880 * whoever removes the page again from its new GFN will have to do
881 * another flush anyway.
882 */
883 for ( i = 0; i < done; ++i )
884 put_page(pages[i]);
885
886 ret = iommu_iotlb_flush(d, _dfn(xatp->gpfn - done), done,
887 IOMMU_FLUSHF_added | IOMMU_FLUSHF_modified);
888 if ( unlikely(ret) && rc >= 0 )
889 rc = ret;
890 }
891
892 return rc;
893 }
894
xenmem_add_to_physmap_batch(struct domain * d,struct xen_add_to_physmap_batch * xatpb,unsigned int extent)895 static int xenmem_add_to_physmap_batch(struct domain *d,
896 struct xen_add_to_physmap_batch *xatpb,
897 unsigned int extent)
898 {
899 union add_to_physmap_extra extra = {};
900
901 if ( unlikely(xatpb->size < extent) )
902 return -EILSEQ;
903
904 if ( unlikely(xatpb->size == extent) )
905 return extent ? -EILSEQ : 0;
906
907 if ( !guest_handle_subrange_okay(xatpb->idxs, extent, xatpb->size - 1) ||
908 !guest_handle_subrange_okay(xatpb->gpfns, extent, xatpb->size - 1) ||
909 !guest_handle_subrange_okay(xatpb->errs, extent, xatpb->size - 1) )
910 return -EFAULT;
911
912 switch ( xatpb->space )
913 {
914 case XENMAPSPACE_dev_mmio:
915 /* res0 is reserved for future use. */
916 if ( xatpb->u.res0 )
917 return -EOPNOTSUPP;
918 break;
919
920 case XENMAPSPACE_gmfn_foreign:
921 extra.foreign_domid = xatpb->u.foreign_domid;
922 break;
923 }
924
925 while ( xatpb->size > extent )
926 {
927 xen_ulong_t idx;
928 xen_pfn_t gpfn;
929 int rc;
930
931 if ( unlikely(__copy_from_guest_offset(&idx, xatpb->idxs,
932 extent, 1)) ||
933 unlikely(__copy_from_guest_offset(&gpfn, xatpb->gpfns,
934 extent, 1)) )
935 return -EFAULT;
936
937 rc = xenmem_add_to_physmap_one(d, xatpb->space, extra,
938 idx, _gfn(gpfn));
939
940 if ( unlikely(__copy_to_guest_offset(xatpb->errs, extent, &rc, 1)) )
941 return -EFAULT;
942
943 /* Check for continuation if it's not the last iteration. */
944 if ( xatpb->size > ++extent && hypercall_preempt_check() )
945 return extent;
946 }
947
948 return 0;
949 }
950
construct_memop_from_reservation(const struct xen_memory_reservation * r,struct memop_args * a)951 static int construct_memop_from_reservation(
952 const struct xen_memory_reservation *r,
953 struct memop_args *a)
954 {
955 unsigned int address_bits;
956
957 a->extent_list = r->extent_start;
958 a->nr_extents = r->nr_extents;
959 a->extent_order = r->extent_order;
960 a->memflags = 0;
961
962 address_bits = XENMEMF_get_address_bits(r->mem_flags);
963 if ( (address_bits != 0) &&
964 (address_bits < (get_order_from_pages(max_page) + PAGE_SHIFT)) )
965 {
966 if ( address_bits <= PAGE_SHIFT )
967 return -EINVAL;
968 a->memflags = MEMF_bits(address_bits);
969 }
970
971 if ( r->mem_flags & XENMEMF_vnode )
972 {
973 nodeid_t vnode, pnode;
974 struct domain *d = a->domain;
975
976 read_lock(&d->vnuma_rwlock);
977 if ( d->vnuma )
978 {
979 vnode = XENMEMF_get_node(r->mem_flags);
980 if ( vnode >= d->vnuma->nr_vnodes )
981 {
982 read_unlock(&d->vnuma_rwlock);
983 return -EINVAL;
984 }
985
986 pnode = d->vnuma->vnode_to_pnode[vnode];
987 if ( pnode != NUMA_NO_NODE )
988 {
989 a->memflags |= MEMF_node(pnode);
990 if ( r->mem_flags & XENMEMF_exact_node_request )
991 a->memflags |= MEMF_exact_node;
992 }
993 }
994 read_unlock(&d->vnuma_rwlock);
995 }
996 else if ( unlikely(!propagate_node(r->mem_flags, &a->memflags)) )
997 return -EINVAL;
998
999 return 0;
1000 }
1001
1002 #ifdef CONFIG_HAS_PASSTHROUGH
1003 struct get_reserved_device_memory {
1004 struct xen_reserved_device_memory_map map;
1005 unsigned int used_entries;
1006 };
1007
get_reserved_device_memory(xen_pfn_t start,xen_ulong_t nr,u32 id,void * ctxt)1008 static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
1009 u32 id, void *ctxt)
1010 {
1011 struct get_reserved_device_memory *grdm = ctxt;
1012 uint32_t sbdf = PCI_SBDF3(grdm->map.dev.pci.seg, grdm->map.dev.pci.bus,
1013 grdm->map.dev.pci.devfn).sbdf;
1014
1015 if ( !(grdm->map.flags & XENMEM_RDM_ALL) && (sbdf != id) )
1016 return 0;
1017
1018 if ( grdm->used_entries < grdm->map.nr_entries )
1019 {
1020 struct xen_reserved_device_memory rdm = {
1021 .start_pfn = start, .nr_pages = nr
1022 };
1023
1024 if ( __copy_to_guest_offset(grdm->map.buffer, grdm->used_entries,
1025 &rdm, 1) )
1026 return -EFAULT;
1027 }
1028
1029 ++grdm->used_entries;
1030
1031 return 1;
1032 }
1033 #endif
1034
xatp_permission_check(struct domain * d,unsigned int space)1035 static long xatp_permission_check(struct domain *d, unsigned int space)
1036 {
1037 if ( !paging_mode_translate(d) )
1038 return -EACCES;
1039
1040 /*
1041 * XENMAPSPACE_dev_mmio mapping is only supported for hardware Domain
1042 * to map this kind of space to itself.
1043 */
1044 if ( (space == XENMAPSPACE_dev_mmio) &&
1045 (!is_hardware_domain(d) || (d != current->domain)) )
1046 return -EACCES;
1047
1048 return xsm_add_to_physmap(XSM_TARGET, current->domain, d);
1049 }
1050
acquire_grant_table(struct domain * d,unsigned int id,unsigned long frame,unsigned int nr_frames,xen_pfn_t mfn_list[])1051 static int acquire_grant_table(struct domain *d, unsigned int id,
1052 unsigned long frame,
1053 unsigned int nr_frames,
1054 xen_pfn_t mfn_list[])
1055 {
1056 unsigned int i = nr_frames;
1057
1058 /* Iterate backwards in case table needs to grow */
1059 while ( i-- != 0 )
1060 {
1061 mfn_t mfn = INVALID_MFN;
1062 int rc;
1063
1064 switch ( id )
1065 {
1066 case XENMEM_resource_grant_table_id_shared:
1067 rc = gnttab_get_shared_frame(d, frame + i, &mfn);
1068 break;
1069
1070 case XENMEM_resource_grant_table_id_status:
1071 rc = gnttab_get_status_frame(d, frame + i, &mfn);
1072 break;
1073
1074 default:
1075 rc = -EINVAL;
1076 break;
1077 }
1078
1079 if ( rc )
1080 return rc;
1081
1082 ASSERT(!mfn_eq(mfn, INVALID_MFN));
1083 mfn_list[i] = mfn_x(mfn);
1084 }
1085
1086 return 0;
1087 }
1088
acquire_resource(XEN_GUEST_HANDLE_PARAM (xen_mem_acquire_resource_t)arg)1089 static int acquire_resource(
1090 XEN_GUEST_HANDLE_PARAM(xen_mem_acquire_resource_t) arg)
1091 {
1092 struct domain *d, *currd = current->domain;
1093 xen_mem_acquire_resource_t xmar;
1094 /*
1095 * The mfn_list and gfn_list (below) arrays are ok on stack for the
1096 * moment since they are small, but if they need to grow in future
1097 * use-cases then per-CPU arrays or heap allocations may be required.
1098 */
1099 xen_pfn_t mfn_list[32];
1100 int rc;
1101
1102 /*
1103 * FIXME: Until foreign pages inserted into the P2M are properly
1104 * reference counted, it is unsafe to allow mapping of
1105 * resource pages unless the caller is the hardware domain.
1106 */
1107 if ( paging_mode_translate(currd) && !is_hardware_domain(currd) )
1108 return -EACCES;
1109
1110 if ( copy_from_guest(&xmar, arg, 1) )
1111 return -EFAULT;
1112
1113 if ( xmar.pad != 0 )
1114 return -EINVAL;
1115
1116 if ( guest_handle_is_null(xmar.frame_list) )
1117 {
1118 if ( xmar.nr_frames )
1119 return -EINVAL;
1120
1121 xmar.nr_frames = ARRAY_SIZE(mfn_list);
1122
1123 if ( __copy_field_to_guest(arg, &xmar, nr_frames) )
1124 return -EFAULT;
1125
1126 return 0;
1127 }
1128
1129 if ( xmar.nr_frames > ARRAY_SIZE(mfn_list) )
1130 return -E2BIG;
1131
1132 rc = rcu_lock_remote_domain_by_id(xmar.domid, &d);
1133 if ( rc )
1134 return rc;
1135
1136 rc = xsm_domain_resource_map(XSM_DM_PRIV, d);
1137 if ( rc )
1138 goto out;
1139
1140 switch ( xmar.type )
1141 {
1142 case XENMEM_resource_grant_table:
1143 rc = acquire_grant_table(d, xmar.id, xmar.frame, xmar.nr_frames,
1144 mfn_list);
1145 break;
1146
1147 default:
1148 rc = arch_acquire_resource(d, xmar.type, xmar.id, xmar.frame,
1149 xmar.nr_frames, mfn_list);
1150 break;
1151 }
1152
1153 if ( rc )
1154 goto out;
1155
1156 if ( !paging_mode_translate(currd) )
1157 {
1158 if ( copy_to_guest(xmar.frame_list, mfn_list, xmar.nr_frames) )
1159 rc = -EFAULT;
1160 }
1161 else
1162 {
1163 xen_pfn_t gfn_list[ARRAY_SIZE(mfn_list)];
1164 unsigned int i;
1165
1166 if ( copy_from_guest(gfn_list, xmar.frame_list, xmar.nr_frames) )
1167 rc = -EFAULT;
1168
1169 for ( i = 0; !rc && i < xmar.nr_frames; i++ )
1170 {
1171 rc = set_foreign_p2m_entry(currd, gfn_list[i],
1172 _mfn(mfn_list[i]));
1173 /* rc should be -EIO for any iteration other than the first */
1174 if ( rc && i )
1175 rc = -EIO;
1176 }
1177 }
1178
1179 out:
1180 rcu_unlock_domain(d);
1181
1182 return rc;
1183 }
1184
do_memory_op(unsigned long cmd,XEN_GUEST_HANDLE_PARAM (void)arg)1185 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
1186 {
1187 struct domain *d, *curr_d = current->domain;
1188 long rc;
1189 struct xen_memory_reservation reservation;
1190 struct memop_args args;
1191 domid_t domid;
1192 unsigned long start_extent = cmd >> MEMOP_EXTENT_SHIFT;
1193 int op = cmd & MEMOP_CMD_MASK;
1194
1195 switch ( op )
1196 {
1197 case XENMEM_increase_reservation:
1198 case XENMEM_decrease_reservation:
1199 case XENMEM_populate_physmap:
1200 if ( copy_from_guest(&reservation, arg, 1) )
1201 return start_extent;
1202
1203 /* Is size too large for us to encode a continuation? */
1204 if ( reservation.nr_extents > (UINT_MAX >> MEMOP_EXTENT_SHIFT) )
1205 return start_extent;
1206
1207 if ( unlikely(start_extent >= reservation.nr_extents) )
1208 return start_extent;
1209
1210 d = rcu_lock_domain_by_any_id(reservation.domid);
1211 if ( d == NULL )
1212 return start_extent;
1213 args.domain = d;
1214
1215 if ( construct_memop_from_reservation(&reservation, &args) )
1216 {
1217 rcu_unlock_domain(d);
1218 return start_extent;
1219 }
1220
1221 args.nr_done = start_extent;
1222 args.preempted = 0;
1223
1224 if ( op == XENMEM_populate_physmap
1225 && (reservation.mem_flags & XENMEMF_populate_on_demand) )
1226 args.memflags |= MEMF_populate_on_demand;
1227
1228 if ( xsm_memory_adjust_reservation(XSM_TARGET, curr_d, d) )
1229 {
1230 rcu_unlock_domain(d);
1231 return start_extent;
1232 }
1233
1234 #ifdef CONFIG_X86
1235 if ( pv_shim && op != XENMEM_decrease_reservation && !start_extent )
1236 /* Avoid calling pv_shim_online_memory when in a continuation. */
1237 pv_shim_online_memory(args.nr_extents, args.extent_order);
1238 #endif
1239
1240 switch ( op )
1241 {
1242 case XENMEM_increase_reservation:
1243 increase_reservation(&args);
1244 break;
1245 case XENMEM_decrease_reservation:
1246 decrease_reservation(&args);
1247 break;
1248 default: /* XENMEM_populate_physmap */
1249 populate_physmap(&args);
1250 break;
1251 }
1252
1253 rcu_unlock_domain(d);
1254
1255 rc = args.nr_done;
1256
1257 if ( args.preempted )
1258 return hypercall_create_continuation(
1259 __HYPERVISOR_memory_op, "lh",
1260 op | (rc << MEMOP_EXTENT_SHIFT), arg);
1261
1262 #ifdef CONFIG_X86
1263 if ( pv_shim && op == XENMEM_decrease_reservation )
1264 /*
1265 * Only call pv_shim_offline_memory when the hypercall has
1266 * finished. Note that nr_done is used to cope in case the
1267 * hypercall has failed and only part of the extents where
1268 * processed.
1269 */
1270 pv_shim_offline_memory(args.nr_done, args.extent_order);
1271 #endif
1272
1273 break;
1274
1275 case XENMEM_exchange:
1276 if ( unlikely(start_extent) )
1277 return -EINVAL;
1278
1279 rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t));
1280 break;
1281
1282 case XENMEM_maximum_ram_page:
1283 if ( unlikely(start_extent) )
1284 return -EINVAL;
1285
1286 rc = max_page;
1287 break;
1288
1289 case XENMEM_current_reservation:
1290 case XENMEM_maximum_reservation:
1291 case XENMEM_maximum_gpfn:
1292 if ( unlikely(start_extent) )
1293 return -EINVAL;
1294
1295 if ( copy_from_guest(&domid, arg, 1) )
1296 return -EFAULT;
1297
1298 d = rcu_lock_domain_by_any_id(domid);
1299 if ( d == NULL )
1300 return -ESRCH;
1301
1302 rc = xsm_memory_stat_reservation(XSM_TARGET, curr_d, d);
1303 if ( rc )
1304 {
1305 rcu_unlock_domain(d);
1306 return rc;
1307 }
1308
1309 switch ( op )
1310 {
1311 case XENMEM_current_reservation:
1312 rc = domain_tot_pages(d);
1313 break;
1314 case XENMEM_maximum_reservation:
1315 rc = d->max_pages;
1316 break;
1317 default:
1318 ASSERT(op == XENMEM_maximum_gpfn);
1319 rc = domain_get_maximum_gpfn(d);
1320 break;
1321 }
1322
1323 rcu_unlock_domain(d);
1324
1325 break;
1326
1327 case XENMEM_add_to_physmap:
1328 {
1329 struct xen_add_to_physmap xatp;
1330
1331 BUILD_BUG_ON((typeof(xatp.size))-1 > (UINT_MAX >> MEMOP_EXTENT_SHIFT));
1332
1333 /* Check for malicious or buggy input. */
1334 if ( start_extent != (typeof(xatp.size))start_extent )
1335 return -EDOM;
1336
1337 if ( copy_from_guest(&xatp, arg, 1) )
1338 return -EFAULT;
1339
1340 /* Foreign mapping is only possible via add_to_physmap_batch. */
1341 if ( xatp.space == XENMAPSPACE_gmfn_foreign )
1342 return -ENOSYS;
1343
1344 d = rcu_lock_domain_by_any_id(xatp.domid);
1345 if ( d == NULL )
1346 return -ESRCH;
1347
1348 rc = xatp_permission_check(d, xatp.space);
1349 if ( rc )
1350 {
1351 rcu_unlock_domain(d);
1352 return rc;
1353 }
1354
1355 rc = xenmem_add_to_physmap(d, &xatp, start_extent);
1356
1357 rcu_unlock_domain(d);
1358
1359 if ( xatp.space == XENMAPSPACE_gmfn_range && rc > 0 )
1360 rc = hypercall_create_continuation(
1361 __HYPERVISOR_memory_op, "lh",
1362 op | (rc << MEMOP_EXTENT_SHIFT), arg);
1363
1364 return rc;
1365 }
1366
1367 case XENMEM_add_to_physmap_batch:
1368 {
1369 struct xen_add_to_physmap_batch xatpb;
1370
1371 BUILD_BUG_ON((typeof(xatpb.size))-1 >
1372 (UINT_MAX >> MEMOP_EXTENT_SHIFT));
1373
1374 /* Check for malicious or buggy input. */
1375 if ( start_extent != (typeof(xatpb.size))start_extent )
1376 return -EDOM;
1377
1378 if ( copy_from_guest(&xatpb, arg, 1) )
1379 return -EFAULT;
1380
1381 /* This mapspace is unsupported for this hypercall. */
1382 if ( xatpb.space == XENMAPSPACE_gmfn_range )
1383 return -EOPNOTSUPP;
1384
1385 d = rcu_lock_domain_by_any_id(xatpb.domid);
1386 if ( d == NULL )
1387 return -ESRCH;
1388
1389 rc = xatp_permission_check(d, xatpb.space);
1390 if ( rc )
1391 {
1392 rcu_unlock_domain(d);
1393 return rc;
1394 }
1395
1396 rc = xenmem_add_to_physmap_batch(d, &xatpb, start_extent);
1397
1398 rcu_unlock_domain(d);
1399
1400 if ( rc > 0 )
1401 rc = hypercall_create_continuation(
1402 __HYPERVISOR_memory_op, "lh",
1403 op | (rc << MEMOP_EXTENT_SHIFT), arg);
1404
1405 return rc;
1406 }
1407
1408 case XENMEM_remove_from_physmap:
1409 {
1410 struct xen_remove_from_physmap xrfp;
1411 struct page_info *page;
1412
1413 if ( unlikely(start_extent) )
1414 return -EINVAL;
1415
1416 if ( copy_from_guest(&xrfp, arg, 1) )
1417 return -EFAULT;
1418
1419 d = rcu_lock_domain_by_any_id(xrfp.domid);
1420 if ( d == NULL )
1421 return -ESRCH;
1422
1423 rc = paging_mode_translate(d)
1424 ? xsm_remove_from_physmap(XSM_TARGET, curr_d, d)
1425 : -EACCES;
1426 if ( rc )
1427 {
1428 rcu_unlock_domain(d);
1429 return rc;
1430 }
1431
1432 page = get_page_from_gfn(d, xrfp.gpfn, NULL, P2M_ALLOC);
1433 if ( page )
1434 {
1435 rc = guest_physmap_remove_page(d, _gfn(xrfp.gpfn),
1436 page_to_mfn(page), 0);
1437 put_page(page);
1438 }
1439 else
1440 rc = -ENOENT;
1441
1442 rcu_unlock_domain(d);
1443
1444 break;
1445 }
1446
1447 case XENMEM_access_op:
1448 rc = mem_access_memop(cmd, guest_handle_cast(arg, xen_mem_access_op_t));
1449 break;
1450
1451 case XENMEM_claim_pages:
1452 if ( unlikely(start_extent) )
1453 return -EINVAL;
1454
1455 if ( copy_from_guest(&reservation, arg, 1) )
1456 return -EFAULT;
1457
1458 if ( !guest_handle_is_null(reservation.extent_start) )
1459 return -EINVAL;
1460
1461 if ( reservation.extent_order != 0 )
1462 return -EINVAL;
1463
1464 if ( reservation.mem_flags != 0 )
1465 return -EINVAL;
1466
1467 d = rcu_lock_domain_by_id(reservation.domid);
1468 if ( d == NULL )
1469 return -EINVAL;
1470
1471 rc = xsm_claim_pages(XSM_PRIV, d);
1472
1473 if ( !rc )
1474 rc = domain_set_outstanding_pages(d, reservation.nr_extents);
1475
1476 rcu_unlock_domain(d);
1477
1478 break;
1479
1480 case XENMEM_get_vnumainfo:
1481 {
1482 struct xen_vnuma_topology_info topology;
1483 unsigned int dom_vnodes, dom_vranges, dom_vcpus;
1484 struct vnuma_info tmp;
1485
1486 if ( unlikely(start_extent) )
1487 return -EINVAL;
1488
1489 /*
1490 * Guest passes nr_vnodes, number of regions and nr_vcpus thus
1491 * we know how much memory guest has allocated.
1492 */
1493 if ( copy_from_guest(&topology, arg, 1 ))
1494 return -EFAULT;
1495
1496 if ( topology.pad != 0 )
1497 return -EINVAL;
1498
1499 if ( (d = rcu_lock_domain_by_any_id(topology.domid)) == NULL )
1500 return -ESRCH;
1501
1502 rc = xsm_get_vnumainfo(XSM_TARGET, d);
1503 if ( rc )
1504 {
1505 rcu_unlock_domain(d);
1506 return rc;
1507 }
1508
1509 read_lock(&d->vnuma_rwlock);
1510
1511 if ( d->vnuma == NULL )
1512 {
1513 read_unlock(&d->vnuma_rwlock);
1514 rcu_unlock_domain(d);
1515 return -EOPNOTSUPP;
1516 }
1517
1518 dom_vnodes = d->vnuma->nr_vnodes;
1519 dom_vranges = d->vnuma->nr_vmemranges;
1520 dom_vcpus = d->max_vcpus;
1521
1522 /*
1523 * Copied from guest values may differ from domain vnuma config.
1524 * Check here guest parameters make sure we dont overflow.
1525 * Additionaly check padding.
1526 */
1527 if ( topology.nr_vnodes < dom_vnodes ||
1528 topology.nr_vcpus < dom_vcpus ||
1529 topology.nr_vmemranges < dom_vranges )
1530 {
1531 read_unlock(&d->vnuma_rwlock);
1532 rcu_unlock_domain(d);
1533
1534 topology.nr_vnodes = dom_vnodes;
1535 topology.nr_vcpus = dom_vcpus;
1536 topology.nr_vmemranges = dom_vranges;
1537
1538 /* Copy back needed values. */
1539 return __copy_to_guest(arg, &topology, 1) ? -EFAULT : -ENOBUFS;
1540 }
1541
1542 read_unlock(&d->vnuma_rwlock);
1543
1544 tmp.vdistance = xmalloc_array(unsigned int, dom_vnodes * dom_vnodes);
1545 tmp.vmemrange = xmalloc_array(xen_vmemrange_t, dom_vranges);
1546 tmp.vcpu_to_vnode = xmalloc_array(unsigned int, dom_vcpus);
1547
1548 if ( tmp.vdistance == NULL ||
1549 tmp.vmemrange == NULL ||
1550 tmp.vcpu_to_vnode == NULL )
1551 {
1552 rc = -ENOMEM;
1553 goto vnumainfo_out;
1554 }
1555
1556 /*
1557 * Check if vnuma info has changed and if the allocated arrays
1558 * are not big enough.
1559 */
1560 read_lock(&d->vnuma_rwlock);
1561
1562 if ( dom_vnodes < d->vnuma->nr_vnodes ||
1563 dom_vranges < d->vnuma->nr_vmemranges ||
1564 dom_vcpus < d->max_vcpus )
1565 {
1566 read_unlock(&d->vnuma_rwlock);
1567 rc = -EAGAIN;
1568 goto vnumainfo_out;
1569 }
1570
1571 dom_vnodes = d->vnuma->nr_vnodes;
1572 dom_vranges = d->vnuma->nr_vmemranges;
1573 dom_vcpus = d->max_vcpus;
1574
1575 memcpy(tmp.vmemrange, d->vnuma->vmemrange,
1576 sizeof(*d->vnuma->vmemrange) * dom_vranges);
1577 memcpy(tmp.vdistance, d->vnuma->vdistance,
1578 sizeof(*d->vnuma->vdistance) * dom_vnodes * dom_vnodes);
1579 memcpy(tmp.vcpu_to_vnode, d->vnuma->vcpu_to_vnode,
1580 sizeof(*d->vnuma->vcpu_to_vnode) * dom_vcpus);
1581
1582 read_unlock(&d->vnuma_rwlock);
1583
1584 rc = -EFAULT;
1585
1586 if ( copy_to_guest(topology.vmemrange.h, tmp.vmemrange,
1587 dom_vranges) != 0 )
1588 goto vnumainfo_out;
1589
1590 if ( copy_to_guest(topology.vdistance.h, tmp.vdistance,
1591 dom_vnodes * dom_vnodes) != 0 )
1592 goto vnumainfo_out;
1593
1594 if ( copy_to_guest(topology.vcpu_to_vnode.h, tmp.vcpu_to_vnode,
1595 dom_vcpus) != 0 )
1596 goto vnumainfo_out;
1597
1598 topology.nr_vnodes = dom_vnodes;
1599 topology.nr_vcpus = dom_vcpus;
1600 topology.nr_vmemranges = dom_vranges;
1601
1602 rc = __copy_to_guest(arg, &topology, 1) ? -EFAULT : 0;
1603
1604 vnumainfo_out:
1605 rcu_unlock_domain(d);
1606
1607 xfree(tmp.vdistance);
1608 xfree(tmp.vmemrange);
1609 xfree(tmp.vcpu_to_vnode);
1610 break;
1611 }
1612
1613 #ifdef CONFIG_HAS_PASSTHROUGH
1614 case XENMEM_reserved_device_memory_map:
1615 {
1616 struct get_reserved_device_memory grdm;
1617
1618 if ( unlikely(start_extent) )
1619 return -EINVAL;
1620
1621 if ( copy_from_guest(&grdm.map, arg, 1) ||
1622 !guest_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
1623 return -EFAULT;
1624
1625 if ( grdm.map.flags & ~XENMEM_RDM_ALL )
1626 return -EINVAL;
1627
1628 grdm.used_entries = 0;
1629 rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
1630 &grdm);
1631
1632 if ( !rc && grdm.map.nr_entries < grdm.used_entries )
1633 rc = -ENOBUFS;
1634 grdm.map.nr_entries = grdm.used_entries;
1635 if ( __copy_to_guest(arg, &grdm.map, 1) )
1636 rc = -EFAULT;
1637
1638 break;
1639 }
1640 #endif
1641
1642 case XENMEM_acquire_resource:
1643 rc = acquire_resource(
1644 guest_handle_cast(arg, xen_mem_acquire_resource_t));
1645 break;
1646
1647 default:
1648 rc = arch_memory_op(cmd, arg);
1649 break;
1650 }
1651
1652 return rc;
1653 }
1654
clear_domain_page(mfn_t mfn)1655 void clear_domain_page(mfn_t mfn)
1656 {
1657 void *ptr = map_domain_page(mfn);
1658
1659 clear_page(ptr);
1660 unmap_domain_page(ptr);
1661 }
1662
copy_domain_page(mfn_t dest,mfn_t source)1663 void copy_domain_page(mfn_t dest, mfn_t source)
1664 {
1665 const void *src = map_domain_page(source);
1666 void *dst = map_domain_page(dest);
1667
1668 copy_page(dst, src);
1669 unmap_domain_page(dst);
1670 unmap_domain_page(src);
1671 }
1672
destroy_ring_for_helper(void ** _va,struct page_info * page)1673 void destroy_ring_for_helper(
1674 void **_va, struct page_info *page)
1675 {
1676 void *va = *_va;
1677
1678 if ( va != NULL )
1679 {
1680 unmap_domain_page_global(va);
1681 put_page_and_type(page);
1682 *_va = NULL;
1683 }
1684 }
1685
1686 /*
1687 * Acquire a pointer to struct page_info for a specified domain and GFN,
1688 * checking whether the page has been paged out, or needs unsharing.
1689 * If the function succeeds then zero is returned, page_p is written
1690 * with a pointer to the struct page_info with a reference taken, and
1691 * p2mt_p it is written with the P2M type of the page. The caller is
1692 * responsible for dropping the reference.
1693 * If the function fails then an appropriate errno is returned and the
1694 * values referenced by page_p and p2mt_p are undefined.
1695 */
check_get_page_from_gfn(struct domain * d,gfn_t gfn,bool readonly,p2m_type_t * p2mt_p,struct page_info ** page_p)1696 int check_get_page_from_gfn(struct domain *d, gfn_t gfn, bool readonly,
1697 p2m_type_t *p2mt_p, struct page_info **page_p)
1698 {
1699 p2m_query_t q = readonly ? P2M_ALLOC : P2M_UNSHARE;
1700 p2m_type_t p2mt;
1701 struct page_info *page;
1702
1703 page = get_page_from_gfn(d, gfn_x(gfn), &p2mt, q);
1704
1705 #ifdef CONFIG_HAS_MEM_PAGING
1706 if ( p2m_is_paging(p2mt) )
1707 {
1708 if ( page )
1709 put_page(page);
1710
1711 p2m_mem_paging_populate(d, gfn);
1712 return -EAGAIN;
1713 }
1714 #endif
1715 #ifdef CONFIG_MEM_SHARING
1716 if ( (q & P2M_UNSHARE) && p2m_is_shared(p2mt) )
1717 {
1718 if ( page )
1719 put_page(page);
1720
1721 return -EAGAIN;
1722 }
1723 #endif
1724
1725 if ( !page )
1726 return -EINVAL;
1727
1728 *p2mt_p = p2mt;
1729 *page_p = page;
1730 return 0;
1731 }
1732
prepare_ring_for_helper(struct domain * d,unsigned long gmfn,struct page_info ** _page,void ** _va)1733 int prepare_ring_for_helper(
1734 struct domain *d, unsigned long gmfn, struct page_info **_page,
1735 void **_va)
1736 {
1737 p2m_type_t p2mt;
1738 struct page_info *page;
1739 void *va;
1740 int rc;
1741
1742 rc = check_get_page_from_gfn(d, _gfn(gmfn), false, &p2mt, &page);
1743 if ( rc )
1744 return (rc == -EAGAIN) ? -ENOENT : rc;
1745
1746 if ( !get_page_type(page, PGT_writable_page) )
1747 {
1748 put_page(page);
1749 return -EINVAL;
1750 }
1751
1752 va = __map_domain_page_global(page);
1753 if ( va == NULL )
1754 {
1755 put_page_and_type(page);
1756 return -ENOMEM;
1757 }
1758
1759 *_va = va;
1760 *_page = page;
1761
1762 return 0;
1763 }
1764
1765 /*
1766 * Local variables:
1767 * mode: C
1768 * c-file-style: "BSD"
1769 * c-basic-offset: 4
1770 * tab-width: 4
1771 * indent-tabs-mode: nil
1772 * End:
1773 */
1774