1 /******************************************************************************
2  * arch/x86/mm/p2m-pt.c
3  *
4  * Implementation of p2m datastructures as pagetables, for use by
5  * NPT and shadow-pagetable code
6  *
7  * Parts of this code are Copyright (c) 2009-2011 by Citrix Systems, Inc.
8  * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
9  * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
10  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
11  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation; either version 2 of the License, or
16  * (at your option) any later version.
17  *
18  * This program is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; If not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 #include <xen/vm_event.h>
28 #include <xen/event.h>
29 #include <xen/trace.h>
30 #include <public/vm_event.h>
31 #include <asm/altp2m.h>
32 #include <asm/domain.h>
33 #include <asm/page.h>
34 #include <asm/paging.h>
35 #include <asm/p2m.h>
36 #include <asm/mem_sharing.h>
37 #include <asm/hvm/nestedhvm.h>
38 
39 #include "mm-locks.h"
40 
41 /*
42  * We may store INVALID_MFN in PTEs.  We need to clip this to avoid trampling
43  * over higher-order bits (NX, p2m type). We seem to not need to unclip on the
44  * read path, as callers are concerned only with p2m type in such cases.
45  */
46 #define p2m_l1e_from_pfn(pfn, flags)    \
47     l1e_from_pfn((pfn) & (PADDR_MASK >> PAGE_SHIFT), (flags))
48 #define p2m_l2e_from_pfn(pfn, flags)    \
49     l2e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \
50                           >> PAGE_SHIFT), (flags) | _PAGE_PSE)
51 #define p2m_l3e_from_pfn(pfn, flags)    \
52     l3e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \
53                           >> PAGE_SHIFT), (flags) | _PAGE_PSE)
54 
55 /* PTE flags for the various types of p2m entry */
56 #define P2M_BASE_FLAGS \
57         (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
58 
59 #define RECALC_FLAGS (_PAGE_USER|_PAGE_ACCESSED)
60 #define set_recalc(level, ent) level##e_remove_flags(ent, RECALC_FLAGS)
61 #define clear_recalc(level, ent) level##e_add_flags(ent, RECALC_FLAGS)
62 #define _needs_recalc(flags) (!((flags) & _PAGE_USER))
63 #define needs_recalc(level, ent) _needs_recalc(level##e_get_flags(ent))
64 #define valid_recalc(level, ent) (!(level##e_get_flags(ent) & _PAGE_ACCESSED))
65 
p2m_type_to_flags(const struct p2m_domain * p2m,p2m_type_t t,mfn_t mfn,unsigned int level)66 static unsigned long p2m_type_to_flags(const struct p2m_domain *p2m,
67                                        p2m_type_t t,
68                                        mfn_t mfn,
69                                        unsigned int level)
70 {
71     unsigned long flags = (unsigned long)(t & 0x7f) << 12;
72 
73     switch(t)
74     {
75     case p2m_invalid:
76     case p2m_mmio_dm:
77     case p2m_populate_on_demand:
78     case p2m_ram_paging_out:
79     case p2m_ram_paged:
80     case p2m_ram_paging_in:
81     default:
82         return flags | _PAGE_NX_BIT;
83     case p2m_grant_map_ro:
84         return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT;
85     case p2m_ioreq_server:
86         flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
87         if ( p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE )
88             return flags & ~_PAGE_RW;
89         return flags;
90     case p2m_ram_ro:
91     case p2m_ram_logdirty:
92     case p2m_ram_shared:
93         return flags | P2M_BASE_FLAGS;
94     case p2m_ram_rw:
95         return flags | P2M_BASE_FLAGS | _PAGE_RW;
96     case p2m_grant_map_rw:
97     case p2m_map_foreign:
98         return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
99     case p2m_mmio_direct:
100         if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
101             flags |= _PAGE_RW;
102         else
103         {
104             flags |= _PAGE_PWT;
105             ASSERT(!level);
106         }
107         return flags | P2M_BASE_FLAGS | _PAGE_PCD;
108     }
109 }
110 
111 
112 // Find the next level's P2M entry, checking for out-of-range gfn's...
113 // Returns NULL on error.
114 //
115 static l1_pgentry_t *
p2m_find_entry(void * table,unsigned long * gfn_remainder,unsigned long gfn,uint32_t shift,uint32_t max)116 p2m_find_entry(void *table, unsigned long *gfn_remainder,
117                    unsigned long gfn, uint32_t shift, uint32_t max)
118 {
119     u32 index;
120 
121     index = *gfn_remainder >> shift;
122     if ( index >= max )
123     {
124         P2M_DEBUG("gfn=%#lx out of range "
125                   "(gfn_remainder=%#lx shift=%d index=%#x max=%#x)\n",
126                   gfn, *gfn_remainder, shift, index, max);
127         return NULL;
128     }
129     *gfn_remainder &= (1 << shift) - 1;
130     return (l1_pgentry_t *)table + index;
131 }
132 
133 /* Free intermediate tables from a p2m sub-tree */
134 static void
p2m_free_entry(struct p2m_domain * p2m,l1_pgentry_t * p2m_entry,int page_order)135 p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order)
136 {
137     /* End if the entry is a leaf entry. */
138     if ( page_order == PAGE_ORDER_4K
139          || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT)
140          || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
141         return;
142 
143     if ( page_order > PAGE_ORDER_2M )
144     {
145         l1_pgentry_t *l3_table = map_domain_page(l1e_get_mfn(*p2m_entry));
146 
147         for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
148             p2m_free_entry(p2m, l3_table + i, page_order - 9);
149         unmap_domain_page(l3_table);
150     }
151 
152     p2m_free_ptp(p2m, l1e_get_page(*p2m_entry));
153 }
154 
155 // Walk one level of the P2M table, allocating a new table if required.
156 // Returns 0 on error.
157 //
158 
159 /* Returns: 0 for success, -errno for failure */
160 static int
p2m_next_level(struct p2m_domain * p2m,void ** table,unsigned long * gfn_remainder,unsigned long gfn,u32 shift,u32 max,unsigned int level,bool_t unmap)161 p2m_next_level(struct p2m_domain *p2m, void **table,
162                unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
163                u32 max, unsigned int level, bool_t unmap)
164 {
165     l1_pgentry_t *p2m_entry, new_entry;
166     void *next;
167     unsigned int flags;
168     int rc;
169     mfn_t mfn;
170 
171     if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
172                                       shift, max)) )
173         return -ENOENT;
174 
175     flags = l1e_get_flags(*p2m_entry);
176 
177     /* PoD/paging: Not present doesn't imply empty. */
178     if ( !flags )
179     {
180         mfn = p2m_alloc_ptp(p2m, level);
181 
182         if ( mfn_eq(mfn, INVALID_MFN) )
183             return -ENOMEM;
184 
185         new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
186 
187         rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1);
188         if ( rc )
189             goto error;
190     }
191     else if ( flags & _PAGE_PSE )
192     {
193         /* Split superpages pages into smaller ones. */
194         unsigned long pfn = l1e_get_pfn(*p2m_entry);
195         l1_pgentry_t *l1_entry;
196         unsigned int i;
197 
198         switch ( level )
199         {
200         case 2:
201             break;
202 
203         case 1:
204             /*
205              * New splintered mappings inherit the flags of the old superpage,
206              * with a little reorganisation for the _PAGE_PSE_PAT bit.
207              */
208             if ( pfn & 1 )           /* ==> _PAGE_PSE_PAT was set */
209                 pfn -= 1;            /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
210             else
211                 flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
212             break;
213 
214         default:
215             ASSERT_UNREACHABLE();
216             return -EINVAL;
217         }
218 
219         mfn = p2m_alloc_ptp(p2m, level);
220         if ( mfn_eq(mfn, INVALID_MFN) )
221             return -ENOMEM;
222 
223         l1_entry = map_domain_page(mfn);
224 
225         for ( i = 0; i < (1u << PAGETABLE_ORDER); i++ )
226         {
227             new_entry = l1e_from_pfn(pfn | (i << ((level - 1) * PAGETABLE_ORDER)),
228                                      flags);
229             rc = p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, level);
230             if ( rc )
231             {
232                 unmap_domain_page(l1_entry);
233                 goto error;
234             }
235         }
236 
237         unmap_domain_page(l1_entry);
238 
239         new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
240         rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry,
241                                   level + 1);
242         if ( rc )
243             goto error;
244     }
245     else
246         ASSERT(flags & _PAGE_PRESENT);
247 
248     next = map_domain_page(l1e_get_mfn(*p2m_entry));
249     if ( unmap )
250         unmap_domain_page(*table);
251     *table = next;
252 
253     return 0;
254 
255  error:
256     ASSERT(rc && mfn_valid(mfn));
257     ASSERT_UNREACHABLE();
258     p2m_free_ptp(p2m, mfn_to_page(mfn));
259     return rc;
260 }
261 
262 /*
263  * Mark (via clearing the U flag) as needing P2M type re-calculation all valid
264  * present entries at the targeted level for the passed in GFN range, which is
265  * guaranteed to not cross a page (table) boundary at that level.
266  */
p2m_pt_set_recalc_range(struct p2m_domain * p2m,unsigned int level,unsigned long first_gfn,unsigned long last_gfn)267 static int p2m_pt_set_recalc_range(struct p2m_domain *p2m,
268                                    unsigned int level,
269                                    unsigned long first_gfn,
270                                    unsigned long last_gfn)
271 {
272     void *table;
273     unsigned long gfn_remainder = first_gfn, remainder;
274     unsigned int i;
275     l1_pgentry_t *pent, *plast;
276     int err = 0;
277 
278     table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
279     for ( i = 4; i-- > level; )
280     {
281         remainder = gfn_remainder;
282         pent = p2m_find_entry(table, &remainder, first_gfn,
283                               i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
284         if ( !pent )
285         {
286             err = -EINVAL;
287             goto out;
288         }
289 
290         if ( !(l1e_get_flags(*pent) & _PAGE_PRESENT) )
291             goto out;
292 
293         err = p2m_next_level(p2m, &table, &gfn_remainder, first_gfn,
294                              i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER,
295                              i, 1);
296         if ( err )
297             goto out;
298     }
299 
300     remainder = gfn_remainder + (last_gfn - first_gfn);
301     pent = p2m_find_entry(table, &gfn_remainder, first_gfn,
302                           i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
303     plast = p2m_find_entry(table, &remainder, last_gfn,
304                            i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
305     if ( pent && plast )
306         for ( ; pent <= plast; ++pent )
307         {
308             l1_pgentry_t e = *pent;
309 
310             if ( (l1e_get_flags(e) & _PAGE_PRESENT) && !needs_recalc(l1, e) )
311             {
312                 set_recalc(l1, e);
313                 err = p2m->write_p2m_entry(p2m, first_gfn, pent, e, level);
314                 if ( err )
315                 {
316                     ASSERT_UNREACHABLE();
317                     goto out;
318                 }
319             }
320             first_gfn += 1UL << (i * PAGETABLE_ORDER);
321         }
322     else
323         err = -EIO;
324 
325  out:
326     unmap_domain_page(table);
327 
328     return err;
329 }
330 
331 /*
332  * Handle possibly necessary P2M type re-calculation (U flag clear for a
333  * present entry) for the entries in the page table hierarchy for the given
334  * GFN. Propagate the re-calculation flag down to the next page table level
335  * for entries not involved in the translation of the given GFN.
336  */
do_recalc(struct p2m_domain * p2m,unsigned long gfn)337 static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
338 {
339     void *table;
340     unsigned long gfn_remainder = gfn;
341     unsigned int level = 4;
342     l1_pgentry_t *pent;
343     int err = 0;
344     bool recalc_done = false;
345 
346     table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
347     while ( --level )
348     {
349         unsigned long remainder = gfn_remainder;
350 
351         pent = p2m_find_entry(table, &remainder, gfn,
352                               level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
353         if ( !pent || !(l1e_get_flags(*pent) & _PAGE_PRESENT) )
354             goto out;
355 
356         if ( l1e_get_flags(*pent) & _PAGE_PSE )
357         {
358             unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
359 
360             ASSERT(p2m_flags_to_type(l1e_get_flags(*pent)) != p2m_ioreq_server);
361             if ( !needs_recalc(l1, *pent) ||
362                  !p2m_is_changeable(p2m_flags_to_type(l1e_get_flags(*pent))) ||
363                  p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask) >= 0 )
364                 break;
365         }
366 
367         err = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
368                              level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER,
369                              level, 0);
370         if ( err )
371             goto out;
372 
373         if ( needs_recalc(l1, *pent) )
374         {
375             l1_pgentry_t e = *pent, *ptab = table;
376             unsigned int i;
377 
378             if ( !valid_recalc(l1, e) )
379                 P2M_DEBUG("bogus recalc state at d%d:%lx:%u\n",
380                           p2m->domain->domain_id, gfn, level);
381             remainder = gfn_remainder;
382             for ( i = 0; i < (1 << PAGETABLE_ORDER); ++i )
383             {
384                 l1_pgentry_t ent = ptab[i];
385 
386                 if ( (l1e_get_flags(ent) & _PAGE_PRESENT) &&
387                      !needs_recalc(l1, ent) )
388                 {
389                     set_recalc(l1, ent);
390                     err = p2m->write_p2m_entry(p2m, gfn - remainder, &ptab[i],
391                                                ent, level);
392                     if ( err )
393                     {
394                         ASSERT_UNREACHABLE();
395                         break;
396                     }
397                 }
398                 remainder -= 1UL << ((level - 1) * PAGETABLE_ORDER);
399             }
400             smp_wmb();
401             if ( !err )
402             {
403                 clear_recalc(l1, e);
404                 err = p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
405                 ASSERT(!err);
406 
407                 recalc_done = true;
408             }
409         }
410         unmap_domain_page((void *)((unsigned long)pent & PAGE_MASK));
411         if ( unlikely(err) )
412             goto out;
413     }
414 
415     pent = p2m_find_entry(table, &gfn_remainder, gfn,
416                           level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
417     if ( pent && (l1e_get_flags(*pent) & _PAGE_PRESENT) &&
418          needs_recalc(l1, *pent) )
419     {
420         l1_pgentry_t e = *pent;
421         p2m_type_t ot, nt;
422         unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
423 
424         if ( !valid_recalc(l1, e) )
425             P2M_DEBUG("bogus recalc leaf at d%d:%lx:%u\n",
426                       p2m->domain->domain_id, gfn, level);
427         ot = p2m_flags_to_type(l1e_get_flags(e));
428         nt = p2m_recalc_type_range(true, ot, p2m, gfn & mask, gfn | ~mask);
429         if ( nt != ot )
430         {
431             unsigned long mfn = l1e_get_pfn(e);
432             unsigned long flags = p2m_type_to_flags(p2m, nt,
433                                                     _mfn(mfn), level);
434 
435             if ( level )
436             {
437                 if ( flags & _PAGE_PAT )
438                 {
439                      BUILD_BUG_ON(_PAGE_PAT != _PAGE_PSE);
440                      mfn |= _PAGE_PSE_PAT >> PAGE_SHIFT;
441                 }
442                 else
443                      mfn &= ~((unsigned long)_PAGE_PSE_PAT >> PAGE_SHIFT);
444                 flags |= _PAGE_PSE;
445             }
446 
447             e = l1e_from_pfn(mfn, flags);
448             ASSERT(!needs_recalc(l1, e));
449         }
450         else
451             clear_recalc(l1, e);
452         err = p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
453         ASSERT(!err);
454 
455         recalc_done = true;
456     }
457 
458  out:
459     unmap_domain_page(table);
460 
461     return err ?: recalc_done;
462 }
463 
p2m_pt_handle_deferred_changes(uint64_t gpa)464 int p2m_pt_handle_deferred_changes(uint64_t gpa)
465 {
466     struct p2m_domain *p2m = p2m_get_hostp2m(current->domain);
467     int rc;
468 
469     /*
470      * Should altp2m ever be enabled for NPT / shadow use, this code
471      * should be updated to make use of the active altp2m, like
472      * ept_handle_misconfig().
473      */
474     ASSERT(!altp2m_active(current->domain));
475 
476     p2m_lock(p2m);
477     rc = do_recalc(p2m, PFN_DOWN(gpa));
478     p2m_unlock(p2m);
479 
480     return rc;
481 }
482 
483 /* Checks only applicable to entries with order > PAGE_ORDER_4K */
check_entry(mfn_t mfn,p2m_type_t new,p2m_type_t old,unsigned int order)484 static void check_entry(mfn_t mfn, p2m_type_t new, p2m_type_t old,
485                         unsigned int order)
486 {
487     ASSERT(order > PAGE_ORDER_4K);
488     ASSERT(old != p2m_ioreq_server);
489     if ( new == p2m_mmio_direct )
490         ASSERT(!mfn_eq(mfn, INVALID_MFN) &&
491                !rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
492                                         mfn_x(mfn) + (1ul << order)));
493     else if ( p2m_allows_invalid_mfn(new) || new == p2m_invalid ||
494               new == p2m_mmio_dm )
495         ASSERT(mfn_valid(mfn) || mfn_eq(mfn, INVALID_MFN));
496     else
497         ASSERT(mfn_valid(mfn));
498 }
499 
500 /* Returns: 0 for success, -errno for failure */
501 static int
p2m_pt_set_entry(struct p2m_domain * p2m,gfn_t gfn_,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma,int sve)502 p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn,
503                  unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma,
504                  int sve)
505 {
506     struct domain *d = p2m->domain;
507     /* XXX -- this might be able to be faster iff current->domain == d */
508     void *table;
509     unsigned long gfn = gfn_x(gfn_);
510     unsigned long gfn_remainder = gfn;
511     l1_pgentry_t *p2m_entry, entry_content;
512     /* Intermediate table to free if we're replacing it with a superpage. */
513     l1_pgentry_t intermediate_entry = l1e_empty();
514     l2_pgentry_t l2e_content;
515     l3_pgentry_t l3e_content;
516     int rc;
517     unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt, mfn);
518     /*
519      * old_mfn and iommu_old_flags control possible flush/update needs on the
520      * IOMMU: We need to flush when MFN or flags (i.e. permissions) change.
521      * iommu_old_flags being initialized to zero covers the case of the entry
522      * getting replaced being a non-present (leaf or intermediate) one. For
523      * present leaf entries the real value will get calculated below, while
524      * for present intermediate entries ~0 (guaranteed != iommu_pte_flags)
525      * will be used (to cover all cases of what the leaf entries underneath
526      * the intermediate one might be).
527      */
528     unsigned int flags, iommu_old_flags = 0;
529     unsigned long old_mfn = mfn_x(INVALID_MFN);
530 
531     if ( !sve )
532         return -EOPNOTSUPP;
533 
534     if ( tb_init_done )
535     {
536         struct {
537             u64 gfn, mfn;
538             int p2mt;
539             int d:16,order:16;
540         } t;
541 
542         t.gfn = gfn;
543         t.mfn = mfn_x(mfn);
544         t.p2mt = p2mt;
545         t.d = d->domain_id;
546         t.order = page_order;
547 
548         __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t);
549     }
550 
551     /* Carry out any eventually pending earlier changes first. */
552     rc = do_recalc(p2m, gfn);
553     if ( rc < 0 )
554         return rc;
555 
556     table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
557     rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
558                         L4_PAGETABLE_SHIFT - PAGE_SHIFT,
559                         L4_PAGETABLE_ENTRIES, 3, 1);
560     if ( rc )
561         goto out;
562 
563     /*
564      * Try to allocate 1GB page table if this feature is supported.
565      */
566     if ( page_order == PAGE_ORDER_1G )
567     {
568         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
569                                    L3_PAGETABLE_SHIFT - PAGE_SHIFT,
570                                    L3_PAGETABLE_ENTRIES);
571         ASSERT(p2m_entry);
572         flags = l1e_get_flags(*p2m_entry);
573         if ( flags & _PAGE_PRESENT )
574         {
575             if ( flags & _PAGE_PSE )
576             {
577                 old_mfn = l1e_get_pfn(*p2m_entry);
578                 iommu_old_flags =
579                     p2m_get_iommu_flags(p2m_flags_to_type(flags),
580                                         _mfn(old_mfn));
581             }
582             else
583             {
584                 iommu_old_flags = ~0;
585                 intermediate_entry = *p2m_entry;
586             }
587         }
588 
589         check_entry(mfn, p2mt, p2m_flags_to_type(flags), page_order);
590         l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
591             ? p2m_l3e_from_pfn(mfn_x(mfn),
592                                p2m_type_to_flags(p2m, p2mt, mfn, 2))
593             : l3e_empty();
594         entry_content.l1 = l3e_content.l3;
595 
596         rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 3);
597         /* NB: paging_write_p2m_entry() handles tlb flushes properly */
598         if ( rc )
599             goto out;
600     }
601     else
602     {
603         rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
604                             L3_PAGETABLE_SHIFT - PAGE_SHIFT,
605                             L3_PAGETABLE_ENTRIES, 2, 1);
606         if ( rc )
607             goto out;
608     }
609 
610     if ( page_order == PAGE_ORDER_4K )
611     {
612         rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
613                             L2_PAGETABLE_SHIFT - PAGE_SHIFT,
614                             L2_PAGETABLE_ENTRIES, 1, 1);
615         if ( rc )
616             goto out;
617 
618         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
619                                    0, L1_PAGETABLE_ENTRIES);
620         ASSERT(p2m_entry);
621         old_mfn = l1e_get_pfn(*p2m_entry);
622         iommu_old_flags =
623             p2m_get_iommu_flags(p2m_flags_to_type(l1e_get_flags(*p2m_entry)),
624                                 _mfn(old_mfn));
625 
626         if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
627             entry_content = p2m_l1e_from_pfn(mfn_x(mfn),
628                                          p2m_type_to_flags(p2m, p2mt, mfn, 0));
629         else
630             entry_content = l1e_empty();
631 
632         /* level 1 entry */
633         rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 1);
634         /* NB: paging_write_p2m_entry() handles tlb flushes properly */
635         if ( rc )
636             goto out;
637     }
638     else if ( page_order == PAGE_ORDER_2M )
639     {
640         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
641                                    L2_PAGETABLE_SHIFT - PAGE_SHIFT,
642                                    L2_PAGETABLE_ENTRIES);
643         ASSERT(p2m_entry);
644         flags = l1e_get_flags(*p2m_entry);
645         if ( flags & _PAGE_PRESENT )
646         {
647             if ( flags & _PAGE_PSE )
648             {
649                 old_mfn = l1e_get_pfn(*p2m_entry);
650                 iommu_old_flags =
651                     p2m_get_iommu_flags(p2m_flags_to_type(flags),
652                                         _mfn(old_mfn));
653             }
654             else
655             {
656                 iommu_old_flags = ~0;
657                 intermediate_entry = *p2m_entry;
658             }
659         }
660 
661         check_entry(mfn, p2mt, p2m_flags_to_type(flags), page_order);
662         l2e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
663             ? p2m_l2e_from_pfn(mfn_x(mfn),
664                                p2m_type_to_flags(p2m, p2mt, mfn, 1))
665             : l2e_empty();
666         entry_content.l1 = l2e_content.l2;
667 
668         rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 2);
669         /* NB: paging_write_p2m_entry() handles tlb flushes properly */
670         if ( rc )
671             goto out;
672     }
673 
674     /* Track the highest gfn for which we have ever had a valid mapping */
675     if ( p2mt != p2m_invalid
676          && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) )
677         p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
678 
679     if ( need_iommu_pt_sync(p2m->domain) &&
680          (iommu_old_flags != iommu_pte_flags || old_mfn != mfn_x(mfn)) )
681         rc = iommu_pte_flags
682              ? iommu_legacy_map(d, _dfn(gfn), mfn, page_order,
683                                 iommu_pte_flags)
684              : iommu_legacy_unmap(d, _dfn(gfn), page_order);
685 
686     /*
687      * Free old intermediate tables if necessary.  This has to be the
688      * last thing we do, after removal from the IOMMU tables, so as to
689      * avoid a potential use-after-free.
690      */
691     if ( l1e_get_flags(intermediate_entry) & _PAGE_PRESENT )
692         p2m_free_entry(p2m, &intermediate_entry, page_order);
693 
694  out:
695     unmap_domain_page(table);
696     return rc;
697 }
698 
699 static mfn_t
p2m_pt_get_entry(struct p2m_domain * p2m,gfn_t gfn_,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order,bool_t * sve)700 p2m_pt_get_entry(struct p2m_domain *p2m, gfn_t gfn_,
701                  p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
702                  unsigned int *page_order, bool_t *sve)
703 {
704     mfn_t mfn;
705     unsigned long gfn = gfn_x(gfn_);
706     paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
707     l2_pgentry_t *l2e;
708     l1_pgentry_t *l1e;
709     unsigned int flags;
710     p2m_type_t l1t;
711     bool_t recalc;
712 
713     ASSERT(paging_mode_translate(p2m->domain));
714 
715     if ( sve )
716         *sve = 1;
717 
718     /* XXX This is for compatibility with the old model, where anything not
719      * XXX marked as RAM was considered to be emulated MMIO space.
720      * XXX Once we start explicitly registering MMIO regions in the p2m
721      * XXX we will return p2m_invalid for unmapped gfns */
722     *t = p2m_mmio_dm;
723     /* Not implemented except with EPT */
724     *a = p2m_access_rwx;
725 
726     if ( gfn > p2m->max_mapped_pfn )
727     {
728         /* This pfn is higher than the highest the p2m map currently holds */
729         if ( page_order )
730         {
731             for ( *page_order = 3 * PAGETABLE_ORDER; *page_order;
732                   *page_order -= PAGETABLE_ORDER )
733                 if ( (gfn & ~((1UL << *page_order) - 1)) >
734                      p2m->max_mapped_pfn )
735                     break;
736         }
737         return INVALID_MFN;
738     }
739 
740     mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
741 
742     {
743         l4_pgentry_t *l4e = map_domain_page(mfn);
744         l4e += l4_table_offset(addr);
745         if ( page_order )
746             *page_order = 3 * PAGETABLE_ORDER;
747         if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
748         {
749             unmap_domain_page(l4e);
750             return INVALID_MFN;
751         }
752         mfn = l4e_get_mfn(*l4e);
753         recalc = needs_recalc(l4, *l4e);
754         unmap_domain_page(l4e);
755     }
756     {
757         l3_pgentry_t *l3e = map_domain_page(mfn);
758         l3e += l3_table_offset(addr);
759         if ( page_order )
760             *page_order = 2 * PAGETABLE_ORDER;
761 
762 pod_retry_l3:
763         flags = l3e_get_flags(*l3e);
764         if ( !(flags & _PAGE_PRESENT) )
765         {
766             if ( p2m_flags_to_type(flags) == p2m_populate_on_demand )
767             {
768                 if ( q & P2M_ALLOC )
769                 {
770                     if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_1G) )
771                         goto pod_retry_l3;
772                     gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__);
773                 }
774                 else
775                     *t = p2m_populate_on_demand;
776             }
777             unmap_domain_page(l3e);
778             return INVALID_MFN;
779         }
780         if ( flags & _PAGE_PSE )
781         {
782             mfn = _mfn(l3e_get_pfn(*l3e) +
783                        l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
784                        l1_table_offset(addr));
785             *t = p2m_recalc_type(recalc || _needs_recalc(flags),
786                                  p2m_flags_to_type(flags), p2m, gfn);
787             unmap_domain_page(l3e);
788 
789             ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
790             return (p2m_is_valid(*t)) ? mfn : INVALID_MFN;
791         }
792 
793         mfn = l3e_get_mfn(*l3e);
794         if ( _needs_recalc(flags) )
795             recalc = 1;
796         unmap_domain_page(l3e);
797     }
798 
799     l2e = map_domain_page(mfn);
800     l2e += l2_table_offset(addr);
801     if ( page_order )
802         *page_order = PAGETABLE_ORDER;
803 
804 pod_retry_l2:
805     flags = l2e_get_flags(*l2e);
806     if ( !(flags & _PAGE_PRESENT) )
807     {
808         /* PoD: Try to populate a 2-meg chunk */
809         if ( p2m_flags_to_type(flags) == p2m_populate_on_demand )
810         {
811             if ( q & P2M_ALLOC ) {
812                 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_2M) )
813                     goto pod_retry_l2;
814             } else
815                 *t = p2m_populate_on_demand;
816         }
817 
818         unmap_domain_page(l2e);
819         return INVALID_MFN;
820     }
821     if ( flags & _PAGE_PSE )
822     {
823         mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
824         *t = p2m_recalc_type(recalc || _needs_recalc(flags),
825                              p2m_flags_to_type(flags), p2m, gfn);
826         unmap_domain_page(l2e);
827 
828         ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
829         return (p2m_is_valid(*t)) ? mfn : INVALID_MFN;
830     }
831 
832     mfn = l2e_get_mfn(*l2e);
833     if ( needs_recalc(l2, *l2e) )
834         recalc = 1;
835     unmap_domain_page(l2e);
836 
837     l1e = map_domain_page(mfn);
838     l1e += l1_table_offset(addr);
839     if ( page_order )
840         *page_order = 0;
841 
842 pod_retry_l1:
843     flags = l1e_get_flags(*l1e);
844     l1t = p2m_flags_to_type(flags);
845     if ( !(flags & _PAGE_PRESENT) && !p2m_is_paging(l1t) )
846     {
847         /* PoD: Try to populate */
848         if ( l1t == p2m_populate_on_demand )
849         {
850             if ( q & P2M_ALLOC ) {
851                 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_4K) )
852                     goto pod_retry_l1;
853             } else
854                 *t = p2m_populate_on_demand;
855         }
856 
857         unmap_domain_page(l1e);
858         return INVALID_MFN;
859     }
860     mfn = l1e_get_mfn(*l1e);
861     *t = p2m_recalc_type(recalc || _needs_recalc(flags), l1t, p2m, gfn);
862     unmap_domain_page(l1e);
863 
864     ASSERT(mfn_valid(mfn) || !p2m_is_any_ram(*t) || p2m_is_paging(*t));
865     return (p2m_is_valid(*t) || p2m_is_any_ram(*t)) ? mfn : INVALID_MFN;
866 }
867 
p2m_pt_change_entry_type_global(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt)868 static void p2m_pt_change_entry_type_global(struct p2m_domain *p2m,
869                                             p2m_type_t ot, p2m_type_t nt)
870 {
871     l1_pgentry_t *tab;
872     unsigned long gfn = 0;
873     unsigned int i, changed;
874     const struct domain *d = p2m->domain;
875 
876     if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) == 0 )
877         return;
878 
879     ASSERT(hap_enabled(d));
880 
881     tab = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
882     for ( changed = i = 0; i < (1 << PAGETABLE_ORDER); ++i )
883     {
884         l1_pgentry_t e = tab[i];
885 
886         if ( (l1e_get_flags(e) & _PAGE_PRESENT) &&
887              !needs_recalc(l1, e) )
888         {
889             int rc;
890 
891             set_recalc(l1, e);
892             rc = p2m->write_p2m_entry(p2m, gfn, &tab[i], e, 4);
893             if ( rc )
894             {
895                 ASSERT_UNREACHABLE();
896                 break;
897             }
898             ++changed;
899         }
900         gfn += 1UL << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
901     }
902     unmap_domain_page(tab);
903 
904     if ( changed )
905          guest_flush_tlb_mask(d, d->dirty_cpumask);
906 }
907 
p2m_pt_change_entry_type_range(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt,unsigned long first_gfn,unsigned long last_gfn)908 static int p2m_pt_change_entry_type_range(struct p2m_domain *p2m,
909                                           p2m_type_t ot, p2m_type_t nt,
910                                           unsigned long first_gfn,
911                                           unsigned long last_gfn)
912 {
913     unsigned long mask = (1 << PAGETABLE_ORDER) - 1;
914     unsigned int i;
915     int err = 0;
916 
917     ASSERT(hap_enabled(p2m->domain));
918 
919     for ( i = 1; i <= 4; )
920     {
921         if ( first_gfn & mask )
922         {
923             unsigned long end_gfn = min(first_gfn | mask, last_gfn);
924 
925             err = p2m_pt_set_recalc_range(p2m, i, first_gfn, end_gfn);
926             if ( err || end_gfn >= last_gfn )
927                 break;
928             first_gfn = end_gfn + 1;
929         }
930         else if ( (last_gfn & mask) != mask )
931         {
932             unsigned long start_gfn = max(first_gfn, last_gfn & ~mask);
933 
934             err = p2m_pt_set_recalc_range(p2m, i, start_gfn, last_gfn);
935             if ( err || start_gfn <= first_gfn )
936                 break;
937             last_gfn = start_gfn - 1;
938         }
939         else
940         {
941             ++i;
942             mask |= mask << PAGETABLE_ORDER;
943         }
944     }
945 
946     return err;
947 }
948 
949 #if P2M_AUDIT && defined(CONFIG_HVM)
p2m_pt_audit_p2m(struct p2m_domain * p2m)950 long p2m_pt_audit_p2m(struct p2m_domain *p2m)
951 {
952     unsigned long entry_count = 0, pmbad = 0;
953     unsigned long mfn, gfn, m2pfn;
954 
955     ASSERT(p2m_locked_by_me(p2m));
956     ASSERT(pod_locked_by_me(p2m));
957 
958     /* Audit part one: walk the domain's p2m table, checking the entries. */
959     if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
960     {
961         l2_pgentry_t *l2e;
962         l1_pgentry_t *l1e;
963         int i1, i2;
964 
965         l4_pgentry_t *l4e;
966         l3_pgentry_t *l3e;
967         int i4, i3;
968         l4e = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
969 
970         gfn = 0;
971         for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
972         {
973             if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
974             {
975                 gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
976                 continue;
977             }
978             l3e = map_l3t_from_l4e(l4e[i4]);
979             for ( i3 = 0;
980                   i3 < L3_PAGETABLE_ENTRIES;
981                   i3++ )
982             {
983                 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
984                 {
985                     gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
986                     continue;
987                 }
988 
989                 /* check for 1GB super page */
990                 if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE )
991                 {
992                     mfn = l3e_get_pfn(l3e[i3]);
993                     ASSERT(mfn_valid(_mfn(mfn)));
994                     /* we have to cover 512x512 4K pages */
995                     for ( i2 = 0;
996                           i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES);
997                           i2++)
998                     {
999                         m2pfn = get_gpfn_from_mfn(mfn+i2);
1000                         if ( m2pfn != (gfn + i2) )
1001                         {
1002                             pmbad++;
1003                             P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1004                                        " -> gfn %#lx\n", gfn+i2, mfn+i2,
1005                                        m2pfn);
1006                             BUG();
1007                         }
1008                         gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1009                         continue;
1010                     }
1011                 }
1012 
1013                 l2e = map_l2t_from_l3e(l3e[i3]);
1014                 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1015                 {
1016                     if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1017                     {
1018                         if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
1019                              && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
1020                                   == p2m_populate_on_demand ) )
1021                             entry_count+=SUPERPAGE_PAGES;
1022                         gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1023                         continue;
1024                     }
1025 
1026                     /* check for super page */
1027                     if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
1028                     {
1029                         mfn = l2e_get_pfn(l2e[i2]);
1030                         ASSERT(mfn_valid(_mfn(mfn)));
1031                         for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
1032                         {
1033                             m2pfn = get_gpfn_from_mfn(mfn+i1);
1034                             /* Allow shared M2Ps */
1035                             if ( (m2pfn != (gfn + i1)) && !SHARED_M2P(m2pfn) )
1036                             {
1037                                 pmbad++;
1038                                 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1039                                            " -> gfn %#lx\n", gfn+i1, mfn+i1,
1040                                            m2pfn);
1041                                 BUG();
1042                             }
1043                         }
1044                         gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1045                         continue;
1046                     }
1047 
1048                     l1e = map_l1t_from_l2e(l2e[i2]);
1049 
1050                     for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1051                     {
1052                         p2m_type_t type;
1053 
1054                         type = p2m_flags_to_type(l1e_get_flags(l1e[i1]));
1055                         if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
1056                         {
1057                             if ( type == p2m_populate_on_demand )
1058                                 entry_count++;
1059                             continue;
1060                         }
1061                         mfn = l1e_get_pfn(l1e[i1]);
1062                         ASSERT(mfn_valid(_mfn(mfn)));
1063                         m2pfn = get_gpfn_from_mfn(mfn);
1064                         if ( m2pfn != gfn &&
1065                              type != p2m_mmio_direct &&
1066                              !p2m_is_grant(type) &&
1067                              !p2m_is_shared(type) )
1068                         {
1069                             pmbad++;
1070                             printk("mismatch: gfn %#lx -> mfn %#lx"
1071                                    " -> gfn %#lx\n", gfn, mfn, m2pfn);
1072                             P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1073                                        " -> gfn %#lx\n", gfn, mfn, m2pfn);
1074                             BUG();
1075                         }
1076                     }
1077                     unmap_domain_page(l1e);
1078                 }
1079                 unmap_domain_page(l2e);
1080             }
1081             unmap_domain_page(l3e);
1082         }
1083 
1084         unmap_domain_page(l4e);
1085     }
1086 
1087     if ( entry_count != p2m->pod.entry_count )
1088     {
1089         printk("%s: refcounted entry count %ld, audit count %lu!\n",
1090                __func__,
1091                p2m->pod.entry_count,
1092                entry_count);
1093         BUG();
1094     }
1095 
1096     return pmbad;
1097 }
1098 #else
1099 # define p2m_pt_audit_p2m NULL
1100 #endif /* P2M_AUDIT */
1101 
1102 /* Set up the p2m function pointers for pagetable format */
p2m_pt_init(struct p2m_domain * p2m)1103 void p2m_pt_init(struct p2m_domain *p2m)
1104 {
1105     p2m->set_entry = p2m_pt_set_entry;
1106     p2m->get_entry = p2m_pt_get_entry;
1107     p2m->recalc = do_recalc;
1108     p2m->change_entry_type_global = p2m_pt_change_entry_type_global;
1109     p2m->change_entry_type_range = p2m_pt_change_entry_type_range;
1110     p2m->write_p2m_entry = paging_write_p2m_entry;
1111 #if P2M_AUDIT
1112     p2m->audit_p2m = p2m_pt_audit_p2m;
1113 #else
1114     p2m->audit_p2m = NULL;
1115 #endif
1116 }
1117 
1118 
1119