1 /******************************************************************************
2  * arch/x86/paging.c
3  *
4  * x86 specific paging support
5  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6  * Copyright (c) 2007 XenSource Inc.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <xen/init.h>
23 #include <xen/guest_access.h>
24 #include <asm/paging.h>
25 #include <asm/shadow.h>
26 #include <asm/p2m.h>
27 #include <asm/hap.h>
28 #include <asm/event.h>
29 #include <asm/hvm/nestedhvm.h>
30 #include <xen/numa.h>
31 #include <xsm/xsm.h>
32 #include <public/sched.h> /* SHUTDOWN_suspend */
33 
34 #include "mm-locks.h"
35 
36 /* Printouts */
37 #define PAGING_PRINTK(_f, _a...)                                     \
38     debugtrace_printk("pg: %s(): " _f, __func__, ##_a)
39 #define PAGING_ERROR(_f, _a...)                                      \
40     printk("pg error: %s(): " _f, __func__, ##_a)
41 #define PAGING_DEBUG(flag, _f, _a...)                                \
42     do {                                                             \
43         if (PAGING_DEBUG_ ## flag)                                   \
44             debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
45     } while (0)
46 
47 /* Per-CPU variable for enforcing the lock ordering */
48 DEFINE_PER_CPU(int, mm_lock_level);
49 
50 /************************************************/
51 /*              LOG DIRTY SUPPORT               */
52 /************************************************/
53 
paging_new_log_dirty_page(struct domain * d)54 static mfn_t paging_new_log_dirty_page(struct domain *d)
55 {
56     struct page_info *page;
57 
58     page = d->arch.paging.alloc_page(d);
59     if ( unlikely(page == NULL) )
60     {
61         d->arch.paging.log_dirty.failed_allocs++;
62         return INVALID_MFN;
63     }
64 
65     d->arch.paging.log_dirty.allocs++;
66 
67     return page_to_mfn(page);
68 }
69 
70 /* Alloc and init a new leaf node */
paging_new_log_dirty_leaf(struct domain * d)71 static mfn_t paging_new_log_dirty_leaf(struct domain *d)
72 {
73     mfn_t mfn = paging_new_log_dirty_page(d);
74 
75     if ( mfn_valid(mfn) )
76         clear_domain_page(mfn);
77 
78     return mfn;
79 }
80 
81 /* Alloc and init a new non-leaf node */
paging_new_log_dirty_node(struct domain * d)82 static mfn_t paging_new_log_dirty_node(struct domain *d)
83 {
84     mfn_t mfn = paging_new_log_dirty_page(d);
85     if ( mfn_valid(mfn) )
86     {
87         int i;
88         mfn_t *node = map_domain_page(mfn);
89         for ( i = 0; i < LOGDIRTY_NODE_ENTRIES; i++ )
90             node[i] = INVALID_MFN;
91         unmap_domain_page(node);
92     }
93     return mfn;
94 }
95 
96 /* get the top of the log-dirty bitmap trie */
paging_map_log_dirty_bitmap(struct domain * d)97 static mfn_t *paging_map_log_dirty_bitmap(struct domain *d)
98 {
99     if ( likely(mfn_valid(d->arch.paging.log_dirty.top)) )
100         return map_domain_page(d->arch.paging.log_dirty.top);
101     return NULL;
102 }
103 
paging_free_log_dirty_page(struct domain * d,mfn_t mfn)104 static void paging_free_log_dirty_page(struct domain *d, mfn_t mfn)
105 {
106     d->arch.paging.log_dirty.allocs--;
107     d->arch.paging.free_page(d, mfn_to_page(mfn));
108 }
109 
paging_free_log_dirty_bitmap(struct domain * d,int rc)110 static int paging_free_log_dirty_bitmap(struct domain *d, int rc)
111 {
112     mfn_t *l4, *l3, *l2;
113     int i4, i3, i2;
114 
115     paging_lock(d);
116 
117     if ( !mfn_valid(d->arch.paging.log_dirty.top) )
118     {
119         paging_unlock(d);
120         return 0;
121     }
122 
123     if ( !d->arch.paging.preempt.dom )
124     {
125         memset(&d->arch.paging.preempt.log_dirty, 0,
126                sizeof(d->arch.paging.preempt.log_dirty));
127         ASSERT(rc <= 0);
128         d->arch.paging.preempt.log_dirty.done = -rc;
129     }
130     else if ( d->arch.paging.preempt.dom != current->domain ||
131               d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF )
132     {
133         paging_unlock(d);
134         return -EBUSY;
135     }
136 
137     l4 = map_domain_page(d->arch.paging.log_dirty.top);
138     i4 = d->arch.paging.preempt.log_dirty.i4;
139     i3 = d->arch.paging.preempt.log_dirty.i3;
140     rc = 0;
141 
142     for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 )
143     {
144         if ( !mfn_valid(l4[i4]) )
145             continue;
146 
147         l3 = map_domain_page(l4[i4]);
148 
149         for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
150         {
151             if ( !mfn_valid(l3[i3]) )
152                 continue;
153 
154             l2 = map_domain_page(l3[i3]);
155 
156             for ( i2 = 0; i2 < LOGDIRTY_NODE_ENTRIES; i2++ )
157                 if ( mfn_valid(l2[i2]) )
158                     paging_free_log_dirty_page(d, l2[i2]);
159 
160             unmap_domain_page(l2);
161             paging_free_log_dirty_page(d, l3[i3]);
162             l3[i3] = INVALID_MFN;
163 
164             if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
165             {
166                 d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
167                 d->arch.paging.preempt.log_dirty.i4 = i4;
168                 rc = -ERESTART;
169                 break;
170             }
171         }
172 
173         unmap_domain_page(l3);
174         if ( rc )
175             break;
176         paging_free_log_dirty_page(d, l4[i4]);
177         l4[i4] = INVALID_MFN;
178 
179         if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
180         {
181             d->arch.paging.preempt.log_dirty.i3 = 0;
182             d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
183             rc = -ERESTART;
184             break;
185         }
186     }
187 
188     unmap_domain_page(l4);
189 
190     if ( !rc )
191     {
192         paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
193         d->arch.paging.log_dirty.top = INVALID_MFN;
194 
195         ASSERT(d->arch.paging.log_dirty.allocs == 0);
196         d->arch.paging.log_dirty.failed_allocs = 0;
197 
198         rc = -d->arch.paging.preempt.log_dirty.done;
199         d->arch.paging.preempt.dom = NULL;
200     }
201     else
202     {
203         d->arch.paging.preempt.dom = current->domain;
204         d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF;
205     }
206 
207     paging_unlock(d);
208 
209     return rc;
210 }
211 
paging_log_dirty_enable(struct domain * d,bool log_global)212 int paging_log_dirty_enable(struct domain *d, bool log_global)
213 {
214     int ret;
215 
216     if ( has_arch_pdevs(d) && log_global )
217     {
218         /*
219          * Refuse to turn on global log-dirty mode
220          * if the domain is sharing the P2M with the IOMMU.
221          */
222         return -EINVAL;
223     }
224 
225     if ( paging_mode_log_dirty(d) )
226         return -EINVAL;
227 
228     domain_pause(d);
229     ret = d->arch.paging.log_dirty.ops->enable(d, log_global);
230     domain_unpause(d);
231 
232     return ret;
233 }
234 
paging_log_dirty_disable(struct domain * d,bool_t resuming)235 static int paging_log_dirty_disable(struct domain *d, bool_t resuming)
236 {
237     int ret = 1;
238 
239     if ( !resuming )
240     {
241         domain_pause(d);
242         /* Safe because the domain is paused. */
243         if ( paging_mode_log_dirty(d) )
244         {
245             ret = d->arch.paging.log_dirty.ops->disable(d);
246             ASSERT(ret <= 0);
247         }
248     }
249 
250     ret = paging_free_log_dirty_bitmap(d, ret);
251     if ( ret == -ERESTART )
252         return ret;
253 
254     domain_unpause(d);
255 
256     return ret;
257 }
258 
259 /* Mark a page as dirty, with taking guest pfn as parameter */
paging_mark_pfn_dirty(struct domain * d,pfn_t pfn)260 void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn)
261 {
262     bool changed;
263     mfn_t mfn, *l4, *l3, *l2;
264     unsigned long *l1;
265     unsigned int i1, i2, i3, i4;
266 
267     if ( !paging_mode_log_dirty(d) )
268         return;
269 
270     /* Shared MFNs should NEVER be marked dirty */
271     BUG_ON(paging_mode_translate(d) && SHARED_M2P(pfn_x(pfn)));
272 
273     /*
274      * Values with the MSB set denote MFNs that aren't really part of the
275      * domain's pseudo-physical memory map (e.g., the shared info frame).
276      * Nothing to do here...
277      */
278     if ( unlikely(!VALID_M2P(pfn_x(pfn))) )
279         return;
280 
281     i1 = L1_LOGDIRTY_IDX(pfn);
282     i2 = L2_LOGDIRTY_IDX(pfn);
283     i3 = L3_LOGDIRTY_IDX(pfn);
284     i4 = L4_LOGDIRTY_IDX(pfn);
285 
286     /* Recursive: this is called from inside the shadow code */
287     paging_lock_recursive(d);
288 
289     if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
290     {
291          d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
292          if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
293              goto out;
294     }
295 
296     l4 = paging_map_log_dirty_bitmap(d);
297     mfn = l4[i4];
298     if ( !mfn_valid(mfn) )
299         l4[i4] = mfn = paging_new_log_dirty_node(d);
300     unmap_domain_page(l4);
301     if ( !mfn_valid(mfn) )
302         goto out;
303 
304     l3 = map_domain_page(mfn);
305     mfn = l3[i3];
306     if ( !mfn_valid(mfn) )
307         l3[i3] = mfn = paging_new_log_dirty_node(d);
308     unmap_domain_page(l3);
309     if ( !mfn_valid(mfn) )
310         goto out;
311 
312     l2 = map_domain_page(mfn);
313     mfn = l2[i2];
314     if ( !mfn_valid(mfn) )
315         l2[i2] = mfn = paging_new_log_dirty_leaf(d);
316     unmap_domain_page(l2);
317     if ( !mfn_valid(mfn) )
318         goto out;
319 
320     l1 = map_domain_page(mfn);
321     changed = !__test_and_set_bit(i1, l1);
322     unmap_domain_page(l1);
323     if ( changed )
324     {
325         PAGING_DEBUG(LOGDIRTY,
326                      "d%d: marked mfn %" PRI_mfn " (pfn %" PRI_pfn ")\n",
327                      d->domain_id, mfn_x(mfn), pfn_x(pfn));
328         d->arch.paging.log_dirty.dirty_count++;
329     }
330 
331 out:
332     /* We've already recorded any failed allocations */
333     paging_unlock(d);
334     return;
335 }
336 
337 /* Mark a page as dirty */
paging_mark_dirty(struct domain * d,mfn_t gmfn)338 void paging_mark_dirty(struct domain *d, mfn_t gmfn)
339 {
340     pfn_t pfn;
341 
342     if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) ||
343          page_get_owner(mfn_to_page(gmfn)) != d )
344         return;
345 
346     /* We /really/ mean PFN here, even for non-translated guests. */
347     pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn)));
348 
349     paging_mark_pfn_dirty(d, pfn);
350 }
351 
352 
353 /* Is this guest page dirty? */
paging_mfn_is_dirty(struct domain * d,mfn_t gmfn)354 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn)
355 {
356     pfn_t pfn;
357     mfn_t mfn, *l4, *l3, *l2;
358     unsigned long *l1;
359     int rv;
360 
361     ASSERT(paging_locked_by_me(d));
362     ASSERT(paging_mode_log_dirty(d));
363 
364     /* We /really/ mean PFN here, even for non-translated guests. */
365     pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn)));
366     /* Invalid pages can't be dirty. */
367     if ( unlikely(!VALID_M2P(pfn_x(pfn))) )
368         return 0;
369 
370     mfn = d->arch.paging.log_dirty.top;
371     if ( !mfn_valid(mfn) )
372         return 0;
373 
374     l4 = map_domain_page(mfn);
375     mfn = l4[L4_LOGDIRTY_IDX(pfn)];
376     unmap_domain_page(l4);
377     if ( !mfn_valid(mfn) )
378         return 0;
379 
380     l3 = map_domain_page(mfn);
381     mfn = l3[L3_LOGDIRTY_IDX(pfn)];
382     unmap_domain_page(l3);
383     if ( !mfn_valid(mfn) )
384         return 0;
385 
386     l2 = map_domain_page(mfn);
387     mfn = l2[L2_LOGDIRTY_IDX(pfn)];
388     unmap_domain_page(l2);
389     if ( !mfn_valid(mfn) )
390         return 0;
391 
392     l1 = map_domain_page(mfn);
393     rv = test_bit(L1_LOGDIRTY_IDX(pfn), l1);
394     unmap_domain_page(l1);
395     return rv;
396 }
397 
398 
399 /* Read a domain's log-dirty bitmap and stats.  If the operation is a CLEAN,
400  * clear the bitmap and stats as well. */
paging_log_dirty_op(struct domain * d,struct xen_domctl_shadow_op * sc,bool_t resuming)401 static int paging_log_dirty_op(struct domain *d,
402                                struct xen_domctl_shadow_op *sc,
403                                bool_t resuming)
404 {
405     int rv = 0, clean = 0, peek = 1;
406     unsigned long pages = 0;
407     mfn_t *l4 = NULL, *l3 = NULL, *l2 = NULL;
408     unsigned long *l1 = NULL;
409     int i4, i3, i2;
410 
411     if ( !resuming )
412     {
413         /*
414          * Mark dirty all currently write-mapped pages on e.g. the
415          * final iteration of a save operation.
416          */
417         if ( is_hvm_domain(d) &&
418              (sc->mode & XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL) )
419             hvm_mapped_guest_frames_mark_dirty(d);
420 
421         domain_pause(d);
422 
423         /*
424          * Flush dirty GFNs potentially cached by hardware. Only need to flush
425          * when not resuming, as domain was paused in resuming case therefore
426          * it's not possible to have any new dirty pages.
427          */
428         p2m_flush_hardware_cached_dirty(d);
429     }
430 
431     paging_lock(d);
432 
433     if ( !d->arch.paging.preempt.dom )
434         memset(&d->arch.paging.preempt.log_dirty, 0,
435                sizeof(d->arch.paging.preempt.log_dirty));
436     else if ( d->arch.paging.preempt.dom != current->domain ||
437               d->arch.paging.preempt.op != sc->op )
438     {
439         paging_unlock(d);
440         ASSERT(!resuming);
441         domain_unpause(d);
442         return -EBUSY;
443     }
444 
445     clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
446 
447     PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
448                  (clean) ? "clean" : "peek",
449                  d->domain_id,
450                  d->arch.paging.log_dirty.fault_count,
451                  d->arch.paging.log_dirty.dirty_count);
452 
453     sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
454     sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
455 
456     if ( guest_handle_is_null(sc->dirty_bitmap) )
457         /* caller may have wanted just to clean the state or access stats. */
458         peek = 0;
459 
460     if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
461         printk(XENLOG_WARNING
462                "%u failed page allocs while logging dirty pages of d%d\n",
463                d->arch.paging.log_dirty.failed_allocs, d->domain_id);
464         rv = -ENOMEM;
465         goto out;
466     }
467 
468     l4 = paging_map_log_dirty_bitmap(d);
469     i4 = d->arch.paging.preempt.log_dirty.i4;
470     i3 = d->arch.paging.preempt.log_dirty.i3;
471     pages = d->arch.paging.preempt.log_dirty.done;
472 
473     for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 )
474     {
475         l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(l4[i4]) : NULL;
476         for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ )
477         {
478             l2 = ((l3 && mfn_valid(l3[i3])) ?
479                   map_domain_page(l3[i3]) : NULL);
480             for ( i2 = 0;
481                   (pages < sc->pages) && (i2 < LOGDIRTY_NODE_ENTRIES);
482                   i2++ )
483             {
484                 unsigned int bytes = PAGE_SIZE;
485                 l1 = ((l2 && mfn_valid(l2[i2])) ?
486                       map_domain_page(l2[i2]) : NULL);
487                 if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) )
488                     bytes = (unsigned int)((sc->pages - pages + 7) >> 3);
489                 if ( likely(peek) )
490                 {
491                     if ( (l1 ? copy_to_guest_offset(sc->dirty_bitmap,
492                                                     pages >> 3, (uint8_t *)l1,
493                                                     bytes)
494                              : clear_guest_offset(sc->dirty_bitmap,
495                                                   pages >> 3, bytes)) != 0 )
496                     {
497                         rv = -EFAULT;
498                         goto out;
499                     }
500                 }
501                 pages += bytes << 3;
502                 if ( l1 )
503                 {
504                     if ( clean )
505                         clear_page(l1);
506                     unmap_domain_page(l1);
507                 }
508             }
509             if ( l2 )
510                 unmap_domain_page(l2);
511 
512             if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
513             {
514                 d->arch.paging.preempt.log_dirty.i4 = i4;
515                 d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
516                 rv = -ERESTART;
517                 break;
518             }
519         }
520         if ( l3 )
521             unmap_domain_page(l3);
522 
523         if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 &&
524              hypercall_preempt_check() )
525         {
526             d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
527             d->arch.paging.preempt.log_dirty.i3 = 0;
528             rv = -ERESTART;
529         }
530         if ( rv )
531             break;
532     }
533     if ( l4 )
534         unmap_domain_page(l4);
535 
536     if ( !rv )
537     {
538         d->arch.paging.preempt.dom = NULL;
539         if ( clean )
540         {
541             d->arch.paging.log_dirty.fault_count = 0;
542             d->arch.paging.log_dirty.dirty_count = 0;
543         }
544     }
545     else
546     {
547         d->arch.paging.preempt.dom = current->domain;
548         d->arch.paging.preempt.op = sc->op;
549         d->arch.paging.preempt.log_dirty.done = pages;
550     }
551 
552     paging_unlock(d);
553 
554     if ( rv )
555     {
556         /* Never leave the domain paused on real errors. */
557         ASSERT(rv == -ERESTART);
558         return rv;
559     }
560 
561     if ( pages < sc->pages )
562         sc->pages = pages;
563     if ( clean )
564     {
565         /* We need to further call clean_dirty_bitmap() functions of specific
566          * paging modes (shadow or hap).  Safe because the domain is paused. */
567         d->arch.paging.log_dirty.ops->clean(d);
568     }
569     domain_unpause(d);
570     return rv;
571 
572  out:
573     d->arch.paging.preempt.dom = NULL;
574     paging_unlock(d);
575     domain_unpause(d);
576 
577     if ( l1 )
578         unmap_domain_page(l1);
579     if ( l2 )
580         unmap_domain_page(l2);
581     if ( l3 )
582         unmap_domain_page(l3);
583     if ( l4 )
584         unmap_domain_page(l4);
585 
586     return rv;
587 }
588 
paging_log_dirty_range(struct domain * d,unsigned long begin_pfn,unsigned long nr,uint8_t * dirty_bitmap)589 void paging_log_dirty_range(struct domain *d,
590                            unsigned long begin_pfn,
591                            unsigned long nr,
592                            uint8_t *dirty_bitmap)
593 {
594     struct p2m_domain *p2m = p2m_get_hostp2m(d);
595     int i;
596     unsigned long pfn;
597 
598     /*
599      * Set l1e entries of P2M table to be read-only.
600      *
601      * On first write, it page faults, its entry is changed to read-write,
602      * and on retry the write succeeds.
603      *
604      * We populate dirty_bitmap by looking for entries that have been
605      * switched to read-write.
606      */
607 
608     p2m_lock(p2m);
609 
610     for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
611         if ( !p2m_change_type_one(d, pfn, p2m_ram_rw, p2m_ram_logdirty) )
612             dirty_bitmap[i >> 3] |= (1 << (i & 7));
613 
614     p2m_unlock(p2m);
615 
616     guest_flush_tlb_mask(d, d->dirty_cpumask);
617 }
618 
619 /*
620  * Callers must supply log_dirty_ops for the log dirty code to call. This
621  * function usually is invoked when paging is enabled. Check shadow_enable()
622  * and hap_enable() for reference.
623  *
624  * These function pointers must not be followed with the log-dirty lock held.
625  */
paging_log_dirty_init(struct domain * d,const struct log_dirty_ops * ops)626 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops)
627 {
628     d->arch.paging.log_dirty.ops = ops;
629 }
630 
631 /************************************************/
632 /*           CODE FOR PAGING SUPPORT            */
633 /************************************************/
634 /* Domain paging struct initialization. */
paging_domain_init(struct domain * d)635 int paging_domain_init(struct domain *d)
636 {
637     int rc;
638 
639     if ( (rc = p2m_init(d)) != 0 )
640         return rc;
641 
642     mm_lock_init(&d->arch.paging.lock);
643 
644     /* This must be initialized separately from the rest of the
645      * log-dirty init code as that can be called more than once and we
646      * don't want to leak any active log-dirty bitmaps */
647     d->arch.paging.log_dirty.top = INVALID_MFN;
648 
649     /*
650      * Shadow pagetables are the default, but we will use
651      * hardware assistance if it's available and enabled.
652      */
653     if ( hap_enabled(d) )
654         hap_domain_init(d);
655     else
656         rc = shadow_domain_init(d);
657 
658     return rc;
659 }
660 
661 /* vcpu paging struct initialization goes here */
paging_vcpu_init(struct vcpu * v)662 void paging_vcpu_init(struct vcpu *v)
663 {
664     if ( hap_enabled(v->domain) )
665         hap_vcpu_init(v);
666     else
667         shadow_vcpu_init(v);
668 }
669 
670 
paging_domctl(struct domain * d,struct xen_domctl_shadow_op * sc,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl,bool_t resuming)671 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
672                   XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
673                   bool_t resuming)
674 {
675     int rc;
676 
677     if ( unlikely(d == current->domain) )
678     {
679         gdprintk(XENLOG_INFO, "Tried to do a paging op on itself.\n");
680         return -EINVAL;
681     }
682 
683     if ( unlikely(d->is_dying) )
684     {
685         gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n",
686                  d->domain_id);
687         return 0;
688     }
689 
690     if ( unlikely(d->vcpu == NULL) || unlikely(d->vcpu[0] == NULL) )
691     {
692         gdprintk(XENLOG_DEBUG, "Paging op on a domain (%u) with no vcpus\n",
693                  d->domain_id);
694         return -EINVAL;
695     }
696 
697     if ( resuming
698          ? (d->arch.paging.preempt.dom != current->domain ||
699             d->arch.paging.preempt.op != sc->op)
700          : (d->arch.paging.preempt.dom &&
701             sc->op != XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION) )
702     {
703         printk(XENLOG_G_DEBUG
704                "%pv: Paging op %#x on Dom%u with unfinished prior op %#x by Dom%u\n",
705                current, sc->op, d->domain_id, d->arch.paging.preempt.op,
706                d->arch.paging.preempt.dom
707                ? d->arch.paging.preempt.dom->domain_id : DOMID_INVALID);
708         return -EBUSY;
709     }
710 
711     rc = xsm_shadow_control(XSM_HOOK, d, sc->op);
712     if ( rc )
713         return rc;
714 
715     /* Code to handle log-dirty. Note that some log dirty operations
716      * piggy-back on shadow operations. For example, when
717      * XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty
718      * mode is enabled. If does, we disables log dirty and continues with
719      * shadow code. For this reason, we need to further dispatch domctl
720      * to next-level paging code (shadow or hap).
721      */
722     switch ( sc->op )
723     {
724 
725     case XEN_DOMCTL_SHADOW_OP_ENABLE:
726         if ( !(sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY) )
727             break;
728         /* Else fall through... */
729     case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
730         return paging_log_dirty_enable(d, true);
731 
732     case XEN_DOMCTL_SHADOW_OP_OFF:
733         if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 )
734             return rc;
735         break;
736 
737     case XEN_DOMCTL_SHADOW_OP_CLEAN:
738     case XEN_DOMCTL_SHADOW_OP_PEEK:
739         if ( sc->mode & ~XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL )
740             return -EINVAL;
741         return paging_log_dirty_op(d, sc, resuming);
742     }
743 
744     /* Here, dispatch domctl to the appropriate paging code */
745     if ( hap_enabled(d) )
746         return hap_domctl(d, sc, u_domctl);
747     else
748         return shadow_domctl(d, sc, u_domctl);
749 }
750 
paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)751 long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
752 {
753     struct xen_domctl op;
754     struct domain *d;
755     int ret;
756 
757     if ( copy_from_guest(&op, u_domctl, 1) )
758         return -EFAULT;
759 
760     if ( op.interface_version != XEN_DOMCTL_INTERFACE_VERSION ||
761          op.cmd != XEN_DOMCTL_shadow_op )
762         return -EOPNOTSUPP;
763 
764     d = rcu_lock_domain_by_id(op.domain);
765     if ( d == NULL )
766         return -ESRCH;
767 
768     ret = xsm_domctl(XSM_OTHER, d, op.cmd);
769     if ( !ret )
770     {
771         if ( domctl_lock_acquire() )
772         {
773             ret = paging_domctl(d, &op.u.shadow_op, u_domctl, 1);
774 
775             domctl_lock_release();
776         }
777         else
778             ret = -ERESTART;
779     }
780 
781     rcu_unlock_domain(d);
782 
783     if ( ret == -ERESTART )
784         ret = hypercall_create_continuation(__HYPERVISOR_arch_1,
785                                             "h", u_domctl);
786     else if ( __copy_field_to_guest(u_domctl, &op, u.shadow_op) )
787         ret = -EFAULT;
788 
789     return ret;
790 }
791 
792 /* Call when destroying a domain */
paging_teardown(struct domain * d)793 int paging_teardown(struct domain *d)
794 {
795     int rc;
796     bool preempted = false;
797 
798     if ( hap_enabled(d) )
799         hap_teardown(d, &preempted);
800     else
801         shadow_teardown(d, &preempted);
802 
803     if ( preempted )
804         return -ERESTART;
805 
806     /* clean up log dirty resources. */
807     rc = paging_free_log_dirty_bitmap(d, 0);
808     if ( rc == -ERESTART )
809         return rc;
810 
811     /* Move populate-on-demand cache back to domain_list for destruction */
812     rc = p2m_pod_empty_cache(d);
813 
814     return rc;
815 }
816 
817 /* Call once all of the references to the domain have gone away */
paging_final_teardown(struct domain * d)818 void paging_final_teardown(struct domain *d)
819 {
820     if ( hap_enabled(d) )
821         hap_final_teardown(d);
822     else
823         shadow_final_teardown(d);
824 
825     p2m_final_teardown(d);
826 }
827 
828 /* Enable an arbitrary paging-assistance mode.  Call once at domain
829  * creation. */
paging_enable(struct domain * d,u32 mode)830 int paging_enable(struct domain *d, u32 mode)
831 {
832     /* Unrecognised paging mode? */
833     if ( mode & ~PG_MASK )
834         return -EINVAL;
835 
836     /* All of external|translate|refcounts, or none. */
837     switch ( mode & (PG_external | PG_translate | PG_refcounts) )
838     {
839     case 0:
840 #if PG_external | PG_translate | PG_refcounts
841     case PG_external | PG_translate | PG_refcounts:
842 #endif
843         break;
844     default:
845         return -EINVAL;
846     }
847 
848     if ( hap_enabled(d) )
849         return hap_enable(d, mode);
850     else
851         return shadow_enable(d, mode);
852 }
853 
854 #ifdef CONFIG_HVM
855 /* Called from the guest to indicate that a process is being torn down
856  * and therefore its pagetables will soon be discarded */
pagetable_dying(paddr_t gpa)857 void pagetable_dying(paddr_t gpa)
858 {
859 #ifdef CONFIG_SHADOW_PAGING
860     struct vcpu *curr = current;
861 
862     ASSERT(paging_mode_shadow(curr->domain));
863 
864     curr->arch.paging.mode->shadow.pagetable_dying(gpa);
865 #else
866     BUG();
867 #endif
868 }
869 #endif /* CONFIG_HVM */
870 
871 /* Print paging-assistance info to the console */
paging_dump_domain_info(struct domain * d)872 void paging_dump_domain_info(struct domain *d)
873 {
874     if ( paging_mode_enabled(d) )
875     {
876         printk("    paging assistance: ");
877         if ( paging_mode_shadow(d) )
878             printk("shadow ");
879         if ( paging_mode_sh_forced(d) )
880             printk("forced ");
881         if ( paging_mode_hap(d) )
882             printk("hap ");
883         if ( paging_mode_refcounts(d) )
884             printk("refcounts ");
885         if ( paging_mode_log_dirty(d) )
886             printk("log_dirty ");
887         if ( paging_mode_translate(d) )
888             printk("translate ");
889         if ( paging_mode_external(d) )
890             printk("external ");
891         printk("\n");
892     }
893 }
894 
paging_dump_vcpu_info(struct vcpu * v)895 void paging_dump_vcpu_info(struct vcpu *v)
896 {
897     if ( paging_mode_enabled(v->domain) )
898     {
899         printk("    paging assistance: ");
900         if ( paging_mode_shadow(v->domain) )
901         {
902             if ( paging_get_hostmode(v) )
903                 printk("shadowed %u-on-%u\n",
904                        paging_get_hostmode(v)->guest_levels,
905                        paging_get_hostmode(v)->shadow.shadow_levels);
906             else
907                 printk("not shadowed\n");
908         }
909         else if ( paging_mode_hap(v->domain) && paging_get_hostmode(v) )
910             printk("hap, %u levels\n",
911                    paging_get_hostmode(v)->guest_levels);
912         else
913             printk("none\n");
914     }
915 }
916 
paging_get_mode(struct vcpu * v)917 const struct paging_mode *paging_get_mode(struct vcpu *v)
918 {
919     if (!nestedhvm_is_n2(v))
920         return paging_get_hostmode(v);
921 
922     return paging_get_nestedmode(v);
923 }
924 
925 #ifdef CONFIG_HVM
paging_update_nestedmode(struct vcpu * v)926 void paging_update_nestedmode(struct vcpu *v)
927 {
928     ASSERT(nestedhvm_enabled(v->domain));
929     if (nestedhvm_paging_mode_hap(v))
930         /* nested-on-nested */
931         v->arch.paging.nestedmode = hap_paging_get_mode(v);
932     else
933         /* TODO: shadow-on-shadow */
934         v->arch.paging.nestedmode = NULL;
935     hvm_asid_flush_vcpu(v);
936 }
937 #endif
938 
paging_write_p2m_entry(struct p2m_domain * p2m,unsigned long gfn,l1_pgentry_t * p,l1_pgentry_t new,unsigned int level)939 int paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
940                            l1_pgentry_t *p, l1_pgentry_t new,
941                            unsigned int level)
942 {
943     struct domain *d = p2m->domain;
944     struct vcpu *v = current;
945     int rc = 0;
946 
947     if ( v->domain != d )
948         v = d->vcpu ? d->vcpu[0] : NULL;
949     if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v) != NULL) )
950         rc = paging_get_hostmode(v)->write_p2m_entry(p2m, gfn, p, new, level);
951     else
952         safe_write_pte(p, new);
953 
954     return rc;
955 }
956 
957 #ifdef CONFIG_HVM
paging_set_allocation(struct domain * d,unsigned int pages,bool * preempted)958 int __init paging_set_allocation(struct domain *d, unsigned int pages,
959                                  bool *preempted)
960 {
961     int rc;
962 
963     ASSERT(paging_mode_enabled(d));
964 
965     paging_lock(d);
966     if ( hap_enabled(d) )
967         rc = hap_set_allocation(d, pages, preempted);
968     else
969         rc = shadow_set_allocation(d, pages, preempted);
970     paging_unlock(d);
971 
972     return rc;
973 }
974 #endif
975 
976 /*
977  * Local variables:
978  * mode: C
979  * c-file-style: "BSD"
980  * c-basic-offset: 4
981  * indent-tabs-mode: nil
982  * End:
983  */
984