1 /******************************************************************************
2 * arch/x86/paging.c
3 *
4 * x86 specific paging support
5 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6 * Copyright (c) 2007 XenSource Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <xen/init.h>
23 #include <xen/guest_access.h>
24 #include <asm/paging.h>
25 #include <asm/shadow.h>
26 #include <asm/p2m.h>
27 #include <asm/hap.h>
28 #include <asm/event.h>
29 #include <asm/hvm/nestedhvm.h>
30 #include <xen/numa.h>
31 #include <xsm/xsm.h>
32 #include <public/sched.h> /* SHUTDOWN_suspend */
33
34 #include "mm-locks.h"
35
36 /* Printouts */
37 #define PAGING_PRINTK(_f, _a...) \
38 debugtrace_printk("pg: %s(): " _f, __func__, ##_a)
39 #define PAGING_ERROR(_f, _a...) \
40 printk("pg error: %s(): " _f, __func__, ##_a)
41 #define PAGING_DEBUG(flag, _f, _a...) \
42 do { \
43 if (PAGING_DEBUG_ ## flag) \
44 debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
45 } while (0)
46
47 /* Per-CPU variable for enforcing the lock ordering */
48 DEFINE_PER_CPU(int, mm_lock_level);
49
50 /************************************************/
51 /* LOG DIRTY SUPPORT */
52 /************************************************/
53
paging_new_log_dirty_page(struct domain * d)54 static mfn_t paging_new_log_dirty_page(struct domain *d)
55 {
56 struct page_info *page;
57
58 page = d->arch.paging.alloc_page(d);
59 if ( unlikely(page == NULL) )
60 {
61 d->arch.paging.log_dirty.failed_allocs++;
62 return INVALID_MFN;
63 }
64
65 d->arch.paging.log_dirty.allocs++;
66
67 return page_to_mfn(page);
68 }
69
70 /* Alloc and init a new leaf node */
paging_new_log_dirty_leaf(struct domain * d)71 static mfn_t paging_new_log_dirty_leaf(struct domain *d)
72 {
73 mfn_t mfn = paging_new_log_dirty_page(d);
74
75 if ( mfn_valid(mfn) )
76 clear_domain_page(mfn);
77
78 return mfn;
79 }
80
81 /* Alloc and init a new non-leaf node */
paging_new_log_dirty_node(struct domain * d)82 static mfn_t paging_new_log_dirty_node(struct domain *d)
83 {
84 mfn_t mfn = paging_new_log_dirty_page(d);
85 if ( mfn_valid(mfn) )
86 {
87 int i;
88 mfn_t *node = map_domain_page(mfn);
89 for ( i = 0; i < LOGDIRTY_NODE_ENTRIES; i++ )
90 node[i] = INVALID_MFN;
91 unmap_domain_page(node);
92 }
93 return mfn;
94 }
95
96 /* get the top of the log-dirty bitmap trie */
paging_map_log_dirty_bitmap(struct domain * d)97 static mfn_t *paging_map_log_dirty_bitmap(struct domain *d)
98 {
99 if ( likely(mfn_valid(d->arch.paging.log_dirty.top)) )
100 return map_domain_page(d->arch.paging.log_dirty.top);
101 return NULL;
102 }
103
paging_free_log_dirty_page(struct domain * d,mfn_t mfn)104 static void paging_free_log_dirty_page(struct domain *d, mfn_t mfn)
105 {
106 d->arch.paging.log_dirty.allocs--;
107 d->arch.paging.free_page(d, mfn_to_page(mfn));
108 }
109
paging_free_log_dirty_bitmap(struct domain * d,int rc)110 static int paging_free_log_dirty_bitmap(struct domain *d, int rc)
111 {
112 mfn_t *l4, *l3, *l2;
113 int i4, i3, i2;
114
115 paging_lock(d);
116
117 if ( !mfn_valid(d->arch.paging.log_dirty.top) )
118 {
119 paging_unlock(d);
120 return 0;
121 }
122
123 if ( !d->arch.paging.preempt.dom )
124 {
125 memset(&d->arch.paging.preempt.log_dirty, 0,
126 sizeof(d->arch.paging.preempt.log_dirty));
127 ASSERT(rc <= 0);
128 d->arch.paging.preempt.log_dirty.done = -rc;
129 }
130 else if ( d->arch.paging.preempt.dom != current->domain ||
131 d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF )
132 {
133 paging_unlock(d);
134 return -EBUSY;
135 }
136
137 l4 = map_domain_page(d->arch.paging.log_dirty.top);
138 i4 = d->arch.paging.preempt.log_dirty.i4;
139 i3 = d->arch.paging.preempt.log_dirty.i3;
140 rc = 0;
141
142 for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 )
143 {
144 if ( !mfn_valid(l4[i4]) )
145 continue;
146
147 l3 = map_domain_page(l4[i4]);
148
149 for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
150 {
151 if ( !mfn_valid(l3[i3]) )
152 continue;
153
154 l2 = map_domain_page(l3[i3]);
155
156 for ( i2 = 0; i2 < LOGDIRTY_NODE_ENTRIES; i2++ )
157 if ( mfn_valid(l2[i2]) )
158 paging_free_log_dirty_page(d, l2[i2]);
159
160 unmap_domain_page(l2);
161 paging_free_log_dirty_page(d, l3[i3]);
162 l3[i3] = INVALID_MFN;
163
164 if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
165 {
166 d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
167 d->arch.paging.preempt.log_dirty.i4 = i4;
168 rc = -ERESTART;
169 break;
170 }
171 }
172
173 unmap_domain_page(l3);
174 if ( rc )
175 break;
176 paging_free_log_dirty_page(d, l4[i4]);
177 l4[i4] = INVALID_MFN;
178
179 if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
180 {
181 d->arch.paging.preempt.log_dirty.i3 = 0;
182 d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
183 rc = -ERESTART;
184 break;
185 }
186 }
187
188 unmap_domain_page(l4);
189
190 if ( !rc )
191 {
192 paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
193 d->arch.paging.log_dirty.top = INVALID_MFN;
194
195 ASSERT(d->arch.paging.log_dirty.allocs == 0);
196 d->arch.paging.log_dirty.failed_allocs = 0;
197
198 rc = -d->arch.paging.preempt.log_dirty.done;
199 d->arch.paging.preempt.dom = NULL;
200 }
201 else
202 {
203 d->arch.paging.preempt.dom = current->domain;
204 d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF;
205 }
206
207 paging_unlock(d);
208
209 return rc;
210 }
211
paging_log_dirty_enable(struct domain * d,bool log_global)212 int paging_log_dirty_enable(struct domain *d, bool log_global)
213 {
214 int ret;
215
216 if ( has_arch_pdevs(d) && log_global )
217 {
218 /*
219 * Refuse to turn on global log-dirty mode
220 * if the domain is sharing the P2M with the IOMMU.
221 */
222 return -EINVAL;
223 }
224
225 if ( paging_mode_log_dirty(d) )
226 return -EINVAL;
227
228 domain_pause(d);
229 ret = d->arch.paging.log_dirty.ops->enable(d, log_global);
230 domain_unpause(d);
231
232 return ret;
233 }
234
paging_log_dirty_disable(struct domain * d,bool_t resuming)235 static int paging_log_dirty_disable(struct domain *d, bool_t resuming)
236 {
237 int ret = 1;
238
239 if ( !resuming )
240 {
241 domain_pause(d);
242 /* Safe because the domain is paused. */
243 if ( paging_mode_log_dirty(d) )
244 {
245 ret = d->arch.paging.log_dirty.ops->disable(d);
246 ASSERT(ret <= 0);
247 }
248 }
249
250 ret = paging_free_log_dirty_bitmap(d, ret);
251 if ( ret == -ERESTART )
252 return ret;
253
254 domain_unpause(d);
255
256 return ret;
257 }
258
259 /* Mark a page as dirty, with taking guest pfn as parameter */
paging_mark_pfn_dirty(struct domain * d,pfn_t pfn)260 void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn)
261 {
262 bool changed;
263 mfn_t mfn, *l4, *l3, *l2;
264 unsigned long *l1;
265 unsigned int i1, i2, i3, i4;
266
267 if ( !paging_mode_log_dirty(d) )
268 return;
269
270 /* Shared MFNs should NEVER be marked dirty */
271 BUG_ON(paging_mode_translate(d) && SHARED_M2P(pfn_x(pfn)));
272
273 /*
274 * Values with the MSB set denote MFNs that aren't really part of the
275 * domain's pseudo-physical memory map (e.g., the shared info frame).
276 * Nothing to do here...
277 */
278 if ( unlikely(!VALID_M2P(pfn_x(pfn))) )
279 return;
280
281 i1 = L1_LOGDIRTY_IDX(pfn);
282 i2 = L2_LOGDIRTY_IDX(pfn);
283 i3 = L3_LOGDIRTY_IDX(pfn);
284 i4 = L4_LOGDIRTY_IDX(pfn);
285
286 /* Recursive: this is called from inside the shadow code */
287 paging_lock_recursive(d);
288
289 if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
290 {
291 d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
292 if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
293 goto out;
294 }
295
296 l4 = paging_map_log_dirty_bitmap(d);
297 mfn = l4[i4];
298 if ( !mfn_valid(mfn) )
299 l4[i4] = mfn = paging_new_log_dirty_node(d);
300 unmap_domain_page(l4);
301 if ( !mfn_valid(mfn) )
302 goto out;
303
304 l3 = map_domain_page(mfn);
305 mfn = l3[i3];
306 if ( !mfn_valid(mfn) )
307 l3[i3] = mfn = paging_new_log_dirty_node(d);
308 unmap_domain_page(l3);
309 if ( !mfn_valid(mfn) )
310 goto out;
311
312 l2 = map_domain_page(mfn);
313 mfn = l2[i2];
314 if ( !mfn_valid(mfn) )
315 l2[i2] = mfn = paging_new_log_dirty_leaf(d);
316 unmap_domain_page(l2);
317 if ( !mfn_valid(mfn) )
318 goto out;
319
320 l1 = map_domain_page(mfn);
321 changed = !__test_and_set_bit(i1, l1);
322 unmap_domain_page(l1);
323 if ( changed )
324 {
325 PAGING_DEBUG(LOGDIRTY,
326 "d%d: marked mfn %" PRI_mfn " (pfn %" PRI_pfn ")\n",
327 d->domain_id, mfn_x(mfn), pfn_x(pfn));
328 d->arch.paging.log_dirty.dirty_count++;
329 }
330
331 out:
332 /* We've already recorded any failed allocations */
333 paging_unlock(d);
334 return;
335 }
336
337 /* Mark a page as dirty */
paging_mark_dirty(struct domain * d,mfn_t gmfn)338 void paging_mark_dirty(struct domain *d, mfn_t gmfn)
339 {
340 pfn_t pfn;
341
342 if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) ||
343 page_get_owner(mfn_to_page(gmfn)) != d )
344 return;
345
346 /* We /really/ mean PFN here, even for non-translated guests. */
347 pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn)));
348
349 paging_mark_pfn_dirty(d, pfn);
350 }
351
352
353 /* Is this guest page dirty? */
paging_mfn_is_dirty(struct domain * d,mfn_t gmfn)354 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn)
355 {
356 pfn_t pfn;
357 mfn_t mfn, *l4, *l3, *l2;
358 unsigned long *l1;
359 int rv;
360
361 ASSERT(paging_locked_by_me(d));
362 ASSERT(paging_mode_log_dirty(d));
363
364 /* We /really/ mean PFN here, even for non-translated guests. */
365 pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn)));
366 /* Invalid pages can't be dirty. */
367 if ( unlikely(!VALID_M2P(pfn_x(pfn))) )
368 return 0;
369
370 mfn = d->arch.paging.log_dirty.top;
371 if ( !mfn_valid(mfn) )
372 return 0;
373
374 l4 = map_domain_page(mfn);
375 mfn = l4[L4_LOGDIRTY_IDX(pfn)];
376 unmap_domain_page(l4);
377 if ( !mfn_valid(mfn) )
378 return 0;
379
380 l3 = map_domain_page(mfn);
381 mfn = l3[L3_LOGDIRTY_IDX(pfn)];
382 unmap_domain_page(l3);
383 if ( !mfn_valid(mfn) )
384 return 0;
385
386 l2 = map_domain_page(mfn);
387 mfn = l2[L2_LOGDIRTY_IDX(pfn)];
388 unmap_domain_page(l2);
389 if ( !mfn_valid(mfn) )
390 return 0;
391
392 l1 = map_domain_page(mfn);
393 rv = test_bit(L1_LOGDIRTY_IDX(pfn), l1);
394 unmap_domain_page(l1);
395 return rv;
396 }
397
398
399 /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN,
400 * clear the bitmap and stats as well. */
paging_log_dirty_op(struct domain * d,struct xen_domctl_shadow_op * sc,bool_t resuming)401 static int paging_log_dirty_op(struct domain *d,
402 struct xen_domctl_shadow_op *sc,
403 bool_t resuming)
404 {
405 int rv = 0, clean = 0, peek = 1;
406 unsigned long pages = 0;
407 mfn_t *l4 = NULL, *l3 = NULL, *l2 = NULL;
408 unsigned long *l1 = NULL;
409 int i4, i3, i2;
410
411 if ( !resuming )
412 {
413 /*
414 * Mark dirty all currently write-mapped pages on e.g. the
415 * final iteration of a save operation.
416 */
417 if ( is_hvm_domain(d) &&
418 (sc->mode & XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL) )
419 hvm_mapped_guest_frames_mark_dirty(d);
420
421 domain_pause(d);
422
423 /*
424 * Flush dirty GFNs potentially cached by hardware. Only need to flush
425 * when not resuming, as domain was paused in resuming case therefore
426 * it's not possible to have any new dirty pages.
427 */
428 p2m_flush_hardware_cached_dirty(d);
429 }
430
431 paging_lock(d);
432
433 if ( !d->arch.paging.preempt.dom )
434 memset(&d->arch.paging.preempt.log_dirty, 0,
435 sizeof(d->arch.paging.preempt.log_dirty));
436 else if ( d->arch.paging.preempt.dom != current->domain ||
437 d->arch.paging.preempt.op != sc->op )
438 {
439 paging_unlock(d);
440 ASSERT(!resuming);
441 domain_unpause(d);
442 return -EBUSY;
443 }
444
445 clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
446
447 PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
448 (clean) ? "clean" : "peek",
449 d->domain_id,
450 d->arch.paging.log_dirty.fault_count,
451 d->arch.paging.log_dirty.dirty_count);
452
453 sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
454 sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
455
456 if ( guest_handle_is_null(sc->dirty_bitmap) )
457 /* caller may have wanted just to clean the state or access stats. */
458 peek = 0;
459
460 if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
461 printk(XENLOG_WARNING
462 "%u failed page allocs while logging dirty pages of d%d\n",
463 d->arch.paging.log_dirty.failed_allocs, d->domain_id);
464 rv = -ENOMEM;
465 goto out;
466 }
467
468 l4 = paging_map_log_dirty_bitmap(d);
469 i4 = d->arch.paging.preempt.log_dirty.i4;
470 i3 = d->arch.paging.preempt.log_dirty.i3;
471 pages = d->arch.paging.preempt.log_dirty.done;
472
473 for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 )
474 {
475 l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(l4[i4]) : NULL;
476 for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ )
477 {
478 l2 = ((l3 && mfn_valid(l3[i3])) ?
479 map_domain_page(l3[i3]) : NULL);
480 for ( i2 = 0;
481 (pages < sc->pages) && (i2 < LOGDIRTY_NODE_ENTRIES);
482 i2++ )
483 {
484 unsigned int bytes = PAGE_SIZE;
485 l1 = ((l2 && mfn_valid(l2[i2])) ?
486 map_domain_page(l2[i2]) : NULL);
487 if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) )
488 bytes = (unsigned int)((sc->pages - pages + 7) >> 3);
489 if ( likely(peek) )
490 {
491 if ( (l1 ? copy_to_guest_offset(sc->dirty_bitmap,
492 pages >> 3, (uint8_t *)l1,
493 bytes)
494 : clear_guest_offset(sc->dirty_bitmap,
495 pages >> 3, bytes)) != 0 )
496 {
497 rv = -EFAULT;
498 goto out;
499 }
500 }
501 pages += bytes << 3;
502 if ( l1 )
503 {
504 if ( clean )
505 clear_page(l1);
506 unmap_domain_page(l1);
507 }
508 }
509 if ( l2 )
510 unmap_domain_page(l2);
511
512 if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
513 {
514 d->arch.paging.preempt.log_dirty.i4 = i4;
515 d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
516 rv = -ERESTART;
517 break;
518 }
519 }
520 if ( l3 )
521 unmap_domain_page(l3);
522
523 if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 &&
524 hypercall_preempt_check() )
525 {
526 d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
527 d->arch.paging.preempt.log_dirty.i3 = 0;
528 rv = -ERESTART;
529 }
530 if ( rv )
531 break;
532 }
533 if ( l4 )
534 unmap_domain_page(l4);
535
536 if ( !rv )
537 {
538 d->arch.paging.preempt.dom = NULL;
539 if ( clean )
540 {
541 d->arch.paging.log_dirty.fault_count = 0;
542 d->arch.paging.log_dirty.dirty_count = 0;
543 }
544 }
545 else
546 {
547 d->arch.paging.preempt.dom = current->domain;
548 d->arch.paging.preempt.op = sc->op;
549 d->arch.paging.preempt.log_dirty.done = pages;
550 }
551
552 paging_unlock(d);
553
554 if ( rv )
555 {
556 /* Never leave the domain paused on real errors. */
557 ASSERT(rv == -ERESTART);
558 return rv;
559 }
560
561 if ( pages < sc->pages )
562 sc->pages = pages;
563 if ( clean )
564 {
565 /* We need to further call clean_dirty_bitmap() functions of specific
566 * paging modes (shadow or hap). Safe because the domain is paused. */
567 d->arch.paging.log_dirty.ops->clean(d);
568 }
569 domain_unpause(d);
570 return rv;
571
572 out:
573 d->arch.paging.preempt.dom = NULL;
574 paging_unlock(d);
575 domain_unpause(d);
576
577 if ( l1 )
578 unmap_domain_page(l1);
579 if ( l2 )
580 unmap_domain_page(l2);
581 if ( l3 )
582 unmap_domain_page(l3);
583 if ( l4 )
584 unmap_domain_page(l4);
585
586 return rv;
587 }
588
paging_log_dirty_range(struct domain * d,unsigned long begin_pfn,unsigned long nr,uint8_t * dirty_bitmap)589 void paging_log_dirty_range(struct domain *d,
590 unsigned long begin_pfn,
591 unsigned long nr,
592 uint8_t *dirty_bitmap)
593 {
594 struct p2m_domain *p2m = p2m_get_hostp2m(d);
595 int i;
596 unsigned long pfn;
597
598 /*
599 * Set l1e entries of P2M table to be read-only.
600 *
601 * On first write, it page faults, its entry is changed to read-write,
602 * and on retry the write succeeds.
603 *
604 * We populate dirty_bitmap by looking for entries that have been
605 * switched to read-write.
606 */
607
608 p2m_lock(p2m);
609
610 for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
611 if ( !p2m_change_type_one(d, pfn, p2m_ram_rw, p2m_ram_logdirty) )
612 dirty_bitmap[i >> 3] |= (1 << (i & 7));
613
614 p2m_unlock(p2m);
615
616 guest_flush_tlb_mask(d, d->dirty_cpumask);
617 }
618
619 /*
620 * Callers must supply log_dirty_ops for the log dirty code to call. This
621 * function usually is invoked when paging is enabled. Check shadow_enable()
622 * and hap_enable() for reference.
623 *
624 * These function pointers must not be followed with the log-dirty lock held.
625 */
paging_log_dirty_init(struct domain * d,const struct log_dirty_ops * ops)626 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops)
627 {
628 d->arch.paging.log_dirty.ops = ops;
629 }
630
631 /************************************************/
632 /* CODE FOR PAGING SUPPORT */
633 /************************************************/
634 /* Domain paging struct initialization. */
paging_domain_init(struct domain * d)635 int paging_domain_init(struct domain *d)
636 {
637 int rc;
638
639 if ( (rc = p2m_init(d)) != 0 )
640 return rc;
641
642 mm_lock_init(&d->arch.paging.lock);
643
644 /* This must be initialized separately from the rest of the
645 * log-dirty init code as that can be called more than once and we
646 * don't want to leak any active log-dirty bitmaps */
647 d->arch.paging.log_dirty.top = INVALID_MFN;
648
649 /*
650 * Shadow pagetables are the default, but we will use
651 * hardware assistance if it's available and enabled.
652 */
653 if ( hap_enabled(d) )
654 hap_domain_init(d);
655 else
656 rc = shadow_domain_init(d);
657
658 return rc;
659 }
660
661 /* vcpu paging struct initialization goes here */
paging_vcpu_init(struct vcpu * v)662 void paging_vcpu_init(struct vcpu *v)
663 {
664 if ( hap_enabled(v->domain) )
665 hap_vcpu_init(v);
666 else
667 shadow_vcpu_init(v);
668 }
669
670
paging_domctl(struct domain * d,struct xen_domctl_shadow_op * sc,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl,bool_t resuming)671 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
672 XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
673 bool_t resuming)
674 {
675 int rc;
676
677 if ( unlikely(d == current->domain) )
678 {
679 gdprintk(XENLOG_INFO, "Tried to do a paging op on itself.\n");
680 return -EINVAL;
681 }
682
683 if ( unlikely(d->is_dying) )
684 {
685 gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n",
686 d->domain_id);
687 return 0;
688 }
689
690 if ( unlikely(d->vcpu == NULL) || unlikely(d->vcpu[0] == NULL) )
691 {
692 gdprintk(XENLOG_DEBUG, "Paging op on a domain (%u) with no vcpus\n",
693 d->domain_id);
694 return -EINVAL;
695 }
696
697 if ( resuming
698 ? (d->arch.paging.preempt.dom != current->domain ||
699 d->arch.paging.preempt.op != sc->op)
700 : (d->arch.paging.preempt.dom &&
701 sc->op != XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION) )
702 {
703 printk(XENLOG_G_DEBUG
704 "%pv: Paging op %#x on Dom%u with unfinished prior op %#x by Dom%u\n",
705 current, sc->op, d->domain_id, d->arch.paging.preempt.op,
706 d->arch.paging.preempt.dom
707 ? d->arch.paging.preempt.dom->domain_id : DOMID_INVALID);
708 return -EBUSY;
709 }
710
711 rc = xsm_shadow_control(XSM_HOOK, d, sc->op);
712 if ( rc )
713 return rc;
714
715 /* Code to handle log-dirty. Note that some log dirty operations
716 * piggy-back on shadow operations. For example, when
717 * XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty
718 * mode is enabled. If does, we disables log dirty and continues with
719 * shadow code. For this reason, we need to further dispatch domctl
720 * to next-level paging code (shadow or hap).
721 */
722 switch ( sc->op )
723 {
724
725 case XEN_DOMCTL_SHADOW_OP_ENABLE:
726 if ( !(sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY) )
727 break;
728 /* Else fall through... */
729 case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
730 return paging_log_dirty_enable(d, true);
731
732 case XEN_DOMCTL_SHADOW_OP_OFF:
733 if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 )
734 return rc;
735 break;
736
737 case XEN_DOMCTL_SHADOW_OP_CLEAN:
738 case XEN_DOMCTL_SHADOW_OP_PEEK:
739 if ( sc->mode & ~XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL )
740 return -EINVAL;
741 return paging_log_dirty_op(d, sc, resuming);
742 }
743
744 /* Here, dispatch domctl to the appropriate paging code */
745 if ( hap_enabled(d) )
746 return hap_domctl(d, sc, u_domctl);
747 else
748 return shadow_domctl(d, sc, u_domctl);
749 }
750
paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)751 long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
752 {
753 struct xen_domctl op;
754 struct domain *d;
755 int ret;
756
757 if ( copy_from_guest(&op, u_domctl, 1) )
758 return -EFAULT;
759
760 if ( op.interface_version != XEN_DOMCTL_INTERFACE_VERSION ||
761 op.cmd != XEN_DOMCTL_shadow_op )
762 return -EOPNOTSUPP;
763
764 d = rcu_lock_domain_by_id(op.domain);
765 if ( d == NULL )
766 return -ESRCH;
767
768 ret = xsm_domctl(XSM_OTHER, d, op.cmd);
769 if ( !ret )
770 {
771 if ( domctl_lock_acquire() )
772 {
773 ret = paging_domctl(d, &op.u.shadow_op, u_domctl, 1);
774
775 domctl_lock_release();
776 }
777 else
778 ret = -ERESTART;
779 }
780
781 rcu_unlock_domain(d);
782
783 if ( ret == -ERESTART )
784 ret = hypercall_create_continuation(__HYPERVISOR_arch_1,
785 "h", u_domctl);
786 else if ( __copy_field_to_guest(u_domctl, &op, u.shadow_op) )
787 ret = -EFAULT;
788
789 return ret;
790 }
791
792 /* Call when destroying a domain */
paging_teardown(struct domain * d)793 int paging_teardown(struct domain *d)
794 {
795 int rc;
796 bool preempted = false;
797
798 if ( hap_enabled(d) )
799 hap_teardown(d, &preempted);
800 else
801 shadow_teardown(d, &preempted);
802
803 if ( preempted )
804 return -ERESTART;
805
806 /* clean up log dirty resources. */
807 rc = paging_free_log_dirty_bitmap(d, 0);
808 if ( rc == -ERESTART )
809 return rc;
810
811 /* Move populate-on-demand cache back to domain_list for destruction */
812 rc = p2m_pod_empty_cache(d);
813
814 return rc;
815 }
816
817 /* Call once all of the references to the domain have gone away */
paging_final_teardown(struct domain * d)818 void paging_final_teardown(struct domain *d)
819 {
820 if ( hap_enabled(d) )
821 hap_final_teardown(d);
822 else
823 shadow_final_teardown(d);
824
825 p2m_final_teardown(d);
826 }
827
828 /* Enable an arbitrary paging-assistance mode. Call once at domain
829 * creation. */
paging_enable(struct domain * d,u32 mode)830 int paging_enable(struct domain *d, u32 mode)
831 {
832 /* Unrecognised paging mode? */
833 if ( mode & ~PG_MASK )
834 return -EINVAL;
835
836 /* All of external|translate|refcounts, or none. */
837 switch ( mode & (PG_external | PG_translate | PG_refcounts) )
838 {
839 case 0:
840 #if PG_external | PG_translate | PG_refcounts
841 case PG_external | PG_translate | PG_refcounts:
842 #endif
843 break;
844 default:
845 return -EINVAL;
846 }
847
848 if ( hap_enabled(d) )
849 return hap_enable(d, mode);
850 else
851 return shadow_enable(d, mode);
852 }
853
854 #ifdef CONFIG_HVM
855 /* Called from the guest to indicate that a process is being torn down
856 * and therefore its pagetables will soon be discarded */
pagetable_dying(paddr_t gpa)857 void pagetable_dying(paddr_t gpa)
858 {
859 #ifdef CONFIG_SHADOW_PAGING
860 struct vcpu *curr = current;
861
862 ASSERT(paging_mode_shadow(curr->domain));
863
864 curr->arch.paging.mode->shadow.pagetable_dying(gpa);
865 #else
866 BUG();
867 #endif
868 }
869 #endif /* CONFIG_HVM */
870
871 /* Print paging-assistance info to the console */
paging_dump_domain_info(struct domain * d)872 void paging_dump_domain_info(struct domain *d)
873 {
874 if ( paging_mode_enabled(d) )
875 {
876 printk(" paging assistance: ");
877 if ( paging_mode_shadow(d) )
878 printk("shadow ");
879 if ( paging_mode_sh_forced(d) )
880 printk("forced ");
881 if ( paging_mode_hap(d) )
882 printk("hap ");
883 if ( paging_mode_refcounts(d) )
884 printk("refcounts ");
885 if ( paging_mode_log_dirty(d) )
886 printk("log_dirty ");
887 if ( paging_mode_translate(d) )
888 printk("translate ");
889 if ( paging_mode_external(d) )
890 printk("external ");
891 printk("\n");
892 }
893 }
894
paging_dump_vcpu_info(struct vcpu * v)895 void paging_dump_vcpu_info(struct vcpu *v)
896 {
897 if ( paging_mode_enabled(v->domain) )
898 {
899 printk(" paging assistance: ");
900 if ( paging_mode_shadow(v->domain) )
901 {
902 if ( paging_get_hostmode(v) )
903 printk("shadowed %u-on-%u\n",
904 paging_get_hostmode(v)->guest_levels,
905 paging_get_hostmode(v)->shadow.shadow_levels);
906 else
907 printk("not shadowed\n");
908 }
909 else if ( paging_mode_hap(v->domain) && paging_get_hostmode(v) )
910 printk("hap, %u levels\n",
911 paging_get_hostmode(v)->guest_levels);
912 else
913 printk("none\n");
914 }
915 }
916
paging_get_mode(struct vcpu * v)917 const struct paging_mode *paging_get_mode(struct vcpu *v)
918 {
919 if (!nestedhvm_is_n2(v))
920 return paging_get_hostmode(v);
921
922 return paging_get_nestedmode(v);
923 }
924
925 #ifdef CONFIG_HVM
paging_update_nestedmode(struct vcpu * v)926 void paging_update_nestedmode(struct vcpu *v)
927 {
928 ASSERT(nestedhvm_enabled(v->domain));
929 if (nestedhvm_paging_mode_hap(v))
930 /* nested-on-nested */
931 v->arch.paging.nestedmode = hap_paging_get_mode(v);
932 else
933 /* TODO: shadow-on-shadow */
934 v->arch.paging.nestedmode = NULL;
935 hvm_asid_flush_vcpu(v);
936 }
937 #endif
938
paging_write_p2m_entry(struct p2m_domain * p2m,unsigned long gfn,l1_pgentry_t * p,l1_pgentry_t new,unsigned int level)939 int paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
940 l1_pgentry_t *p, l1_pgentry_t new,
941 unsigned int level)
942 {
943 struct domain *d = p2m->domain;
944 struct vcpu *v = current;
945 int rc = 0;
946
947 if ( v->domain != d )
948 v = d->vcpu ? d->vcpu[0] : NULL;
949 if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v) != NULL) )
950 rc = paging_get_hostmode(v)->write_p2m_entry(p2m, gfn, p, new, level);
951 else
952 safe_write_pte(p, new);
953
954 return rc;
955 }
956
957 #ifdef CONFIG_HVM
paging_set_allocation(struct domain * d,unsigned int pages,bool * preempted)958 int __init paging_set_allocation(struct domain *d, unsigned int pages,
959 bool *preempted)
960 {
961 int rc;
962
963 ASSERT(paging_mode_enabled(d));
964
965 paging_lock(d);
966 if ( hap_enabled(d) )
967 rc = hap_set_allocation(d, pages, preempted);
968 else
969 rc = shadow_set_allocation(d, pages, preempted);
970 paging_unlock(d);
971
972 return rc;
973 }
974 #endif
975
976 /*
977 * Local variables:
978 * mode: C
979 * c-file-style: "BSD"
980 * c-basic-offset: 4
981 * indent-tabs-mode: nil
982 * End:
983 */
984