1 /*
2  * ept-p2m.c: use the EPT page table as p2m
3  * Copyright (c) 2007, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <xen/domain_page.h>
19 #include <xen/sched.h>
20 #include <asm/altp2m.h>
21 #include <asm/current.h>
22 #include <asm/paging.h>
23 #include <asm/types.h>
24 #include <asm/domain.h>
25 #include <asm/p2m.h>
26 #include <asm/hvm/vmx/vmx.h>
27 #include <asm/hvm/vmx/vmcs.h>
28 #include <asm/hvm/nestedhvm.h>
29 #include <xen/iommu.h>
30 #include <asm/mtrr.h>
31 #include <asm/hvm/cacheattr.h>
32 #include <xen/keyhandler.h>
33 #include <xen/softirq.h>
34 
35 #include "mm-locks.h"
36 
37 #define atomic_read_ept_entry(__pepte)                              \
38     ( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } )
39 
40 #define is_epte_present(ept_entry)      ((ept_entry)->epte & 0x7)
41 #define is_epte_superpage(ept_entry)    ((ept_entry)->sp)
is_epte_valid(ept_entry_t * e)42 static inline bool_t is_epte_valid(ept_entry_t *e)
43 {
44     /* suppress_ve alone is not considered valid, so mask it off */
45     return ((e->epte & ~(1ul << 63)) != 0 && e->sa_p2mt != p2m_invalid);
46 }
47 
48 /* returns : 0 for success, -errno otherwise */
atomic_write_ept_entry(struct p2m_domain * p2m,ept_entry_t * entryptr,ept_entry_t new,int level)49 static int atomic_write_ept_entry(struct p2m_domain *p2m,
50                                   ept_entry_t *entryptr, ept_entry_t new,
51                                   int level)
52 {
53     int rc = p2m_entry_modify(p2m, new.sa_p2mt, entryptr->sa_p2mt,
54                               _mfn(new.mfn), _mfn(entryptr->mfn), level + 1);
55 
56     if ( rc )
57         return rc;
58 
59     write_atomic(&entryptr->epte, new.epte);
60 
61     /*
62      * The recalc field on the EPT is used to signal either that a
63      * recalculation of the EMT field is required (which doesn't effect the
64      * IOMMU), or a type change. Type changes can only be between ram_rw,
65      * logdirty and ioreq_server: changes to/from logdirty won't work well with
66      * an IOMMU anyway, as IOMMU #PFs are not synchronous and will lead to
67      * aborts, and changes to/from ioreq_server are already fully flushed
68      * before returning to guest context (see
69      * XEN_DMOP_map_mem_type_to_ioreq_server).
70      */
71     if ( !new.recalc && iommu_use_hap_pt(p2m->domain) )
72         iommu_sync_cache(entryptr, sizeof(*entryptr));
73 
74     return 0;
75 }
76 
ept_p2m_type_to_flags(const struct p2m_domain * p2m,ept_entry_t * entry)77 static void ept_p2m_type_to_flags(const struct p2m_domain *p2m,
78                                   ept_entry_t *entry)
79 {
80     /*
81      * First apply type permissions.
82      *
83      * A/D bits are also manually set to avoid overhead of MMU having to set
84      * them later. Both A/D bits are safe to be updated directly as they are
85      * ignored by processor if EPT A/D bits is not turned on.
86      *
87      * A bit is set for all present p2m types in middle and leaf EPT entries.
88      * D bit is set for all writable types in EPT leaf entry, except for
89      * log-dirty type with PML.
90      */
91     switch ( entry->sa_p2mt )
92     {
93         case p2m_invalid:
94         case p2m_mmio_dm:
95         case p2m_populate_on_demand:
96         case p2m_ram_paging_out:
97         case p2m_ram_paged:
98         case p2m_ram_paging_in:
99         default:
100             entry->r = entry->w = entry->x = 0;
101             break;
102         case p2m_ram_rw:
103             entry->r = entry->w = entry->x = 1;
104             entry->a = entry->d = !!cpu_has_vmx_ept_ad;
105             break;
106         case p2m_ioreq_server:
107             entry->r = 1;
108             entry->w = !(p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE);
109             entry->x = 0;
110             entry->a = !!cpu_has_vmx_ept_ad;
111             entry->d = entry->w && entry->a;
112             break;
113         case p2m_mmio_direct:
114             entry->r = entry->x = 1;
115             entry->w = !rangeset_contains_singleton(mmio_ro_ranges,
116                                                     entry->mfn);
117             ASSERT(entry->w || !is_epte_superpage(entry));
118             entry->a = !!cpu_has_vmx_ept_ad;
119             entry->d = entry->w && cpu_has_vmx_ept_ad;
120             break;
121         case p2m_ram_logdirty:
122             entry->r = entry->x = 1;
123             /*
124              * In case of PML, we don't have to write protect 4K page, but
125              * only need to clear D-bit for it, but we still need to write
126              * protect super page in order to split it to 4K pages in EPT
127              * violation.
128              */
129             if ( vmx_domain_pml_enabled(p2m->domain) &&
130                  !is_epte_superpage(entry) )
131                 entry->w = 1;
132             else
133                 entry->w = 0;
134             entry->a = !!cpu_has_vmx_ept_ad;
135             /* For both PML or non-PML cases we clear D bit anyway */
136             entry->d = 0;
137             break;
138         case p2m_ram_ro:
139         case p2m_ram_shared:
140             entry->r = entry->x = 1;
141             entry->w = 0;
142             entry->a = !!cpu_has_vmx_ept_ad;
143             entry->d = 0;
144             break;
145         case p2m_grant_map_rw:
146         case p2m_map_foreign:
147             entry->r = entry->w = 1;
148             entry->x = 0;
149             entry->a = entry->d = !!cpu_has_vmx_ept_ad;
150             break;
151         case p2m_grant_map_ro:
152             entry->r = 1;
153             entry->w = entry->x = 0;
154             entry->a = !!cpu_has_vmx_ept_ad;
155             entry->d = 0;
156             break;
157     }
158 
159     /* Then restrict with access permissions */
160     switch ( entry->access )
161     {
162         case p2m_access_n:
163         case p2m_access_n2rwx:
164             entry->r = entry->w = entry->x = 0;
165             break;
166         case p2m_access_r:
167             entry->w = entry->x = 0;
168             break;
169         case p2m_access_w:
170             entry->r = entry->x = 0;
171             break;
172         case p2m_access_x:
173             entry->r = entry->w = 0;
174             break;
175         case p2m_access_rx:
176         case p2m_access_rx2rw:
177             entry->w = 0;
178             break;
179         case p2m_access_wx:
180             entry->r = 0;
181             break;
182         case p2m_access_rw:
183             entry->x = 0;
184             break;
185         case p2m_access_rwx:
186             break;
187     }
188 
189     /*
190      * Don't create executable superpages if we need to shatter them to
191      * protect against CVE-2018-12207.
192      */
193     if ( !p2m->domain->arch.hvm.vmx.exec_sp && is_epte_superpage(entry) )
194         entry->x = 0;
195 }
196 
197 #define GUEST_TABLE_MAP_FAILED  0
198 #define GUEST_TABLE_NORMAL_PAGE 1
199 #define GUEST_TABLE_SUPER_PAGE  2
200 #define GUEST_TABLE_POD_PAGE    3
201 
202 /* Fill in middle level of ept table; return pointer to mapped new table. */
ept_set_middle_entry(struct p2m_domain * p2m,ept_entry_t * ept_entry)203 static ept_entry_t *ept_set_middle_entry(struct p2m_domain *p2m,
204                                          ept_entry_t *ept_entry)
205 {
206     mfn_t mfn;
207     ept_entry_t *table;
208     unsigned int i;
209 
210     mfn = p2m_alloc_ptp(p2m, 0);
211     if ( mfn_eq(mfn, INVALID_MFN) )
212         return NULL;
213 
214     table = map_domain_page(mfn);
215 
216     for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
217         table[i].suppress_ve = 1;
218 
219     ept_entry->epte = 0;
220     ept_entry->mfn = mfn_x(mfn);
221     ept_entry->access = p2m->default_access;
222 
223     ept_entry->r = ept_entry->w = ept_entry->x = 1;
224     /* Manually set A bit to avoid overhead of MMU having to write it later. */
225     ept_entry->a = !!cpu_has_vmx_ept_ad;
226 
227     ept_entry->suppress_ve = 1;
228 
229     return table;
230 }
231 
232 /* free ept sub tree behind an entry */
ept_free_entry(struct p2m_domain * p2m,ept_entry_t * ept_entry,int level)233 static void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
234 {
235     /* End if the entry is a leaf entry. */
236     if ( level == 0 || !is_epte_present(ept_entry) ||
237          is_epte_superpage(ept_entry) )
238         return;
239 
240     if ( level > 1 )
241     {
242         ept_entry_t *epte = map_domain_page(_mfn(ept_entry->mfn));
243         for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
244             ept_free_entry(p2m, epte + i, level - 1);
245         unmap_domain_page(epte);
246     }
247 
248     p2m_tlb_flush_sync(p2m);
249     p2m_free_ptp(p2m, mfn_to_page(_mfn(ept_entry->mfn)));
250 }
251 
ept_split_super_page(struct p2m_domain * p2m,ept_entry_t * ept_entry,unsigned int level,unsigned int target)252 static bool_t ept_split_super_page(struct p2m_domain *p2m,
253                                    ept_entry_t *ept_entry,
254                                    unsigned int level, unsigned int target)
255 {
256     ept_entry_t new_ept, *table;
257     uint64_t trunk;
258     unsigned int i;
259     bool_t rv = 1;
260 
261     /* End if the entry is a leaf entry or reaches the target level. */
262     if ( level <= target )
263         return 1;
264 
265     ASSERT(is_epte_superpage(ept_entry));
266 
267     table = ept_set_middle_entry(p2m, &new_ept);
268     if ( !table )
269         return 0;
270 
271     trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
272 
273     for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
274     {
275         ept_entry_t *epte = table + i;
276 
277         *epte = *ept_entry;
278         epte->sp = (level > 1);
279         epte->mfn += i * trunk;
280         epte->snp = is_iommu_enabled(p2m->domain) && iommu_snoop;
281 
282         ept_p2m_type_to_flags(p2m, epte);
283 
284         if ( (level - 1) == target )
285             continue;
286 
287         ASSERT(is_epte_superpage(epte));
288 
289         if ( !(rv = ept_split_super_page(p2m, epte, level - 1, target)) )
290             break;
291     }
292 
293     if ( iommu_use_hap_pt(p2m->domain) )
294         iommu_sync_cache(table, EPT_PAGETABLE_ENTRIES * sizeof(ept_entry_t));
295 
296     unmap_domain_page(table);
297 
298     /* Even failed we should install the newly allocated ept page. */
299     *ept_entry = new_ept;
300 
301     return rv;
302 }
303 
304 /* Take the currently mapped table, find the corresponding gfn entry,
305  * and map the next table, if available.  If the entry is empty
306  * and read_only is set,
307  * Return values:
308  *  GUEST_TABLE_MAP_FAILED: Failed to map.  Either read_only was set and the
309  *   entry was empty, or allocating a new page failed.
310  *  GUEST_TABLE_NORMAL_PAGE: next level mapped normally
311  *  GUEST_TABLE_SUPER_PAGE:
312  *   The next entry points to a superpage, and caller indicates
313  *   that they are going to the superpage level, or are only doing
314  *   a read.
315  *  GUEST_TABLE_POD:
316  *   The next entry is marked populate-on-demand.
317  */
ept_next_level(struct p2m_domain * p2m,bool_t read_only,ept_entry_t ** table,unsigned long * gfn_remainder,int next_level)318 static int ept_next_level(struct p2m_domain *p2m, bool_t read_only,
319                           ept_entry_t **table, unsigned long *gfn_remainder,
320                           int next_level)
321 {
322     ept_entry_t *ept_entry, *next = NULL, e;
323     u32 shift, index;
324 
325     ASSERT(next_level);
326 
327     shift = next_level * EPT_TABLE_ORDER;
328 
329     index = *gfn_remainder >> shift;
330 
331     /* index must be falling into the page */
332     ASSERT(index < EPT_PAGETABLE_ENTRIES);
333 
334     ept_entry = (*table) + index;
335 
336     /* ept_next_level() is called (sometimes) without a lock.  Read
337      * the entry once, and act on the "cached" entry after that to
338      * avoid races. */
339     e = atomic_read_ept_entry(ept_entry);
340 
341     if ( !is_epte_present(&e) )
342     {
343         int rc;
344 
345         if ( e.sa_p2mt == p2m_populate_on_demand )
346             return GUEST_TABLE_POD_PAGE;
347 
348         if ( read_only )
349             return GUEST_TABLE_MAP_FAILED;
350 
351         next = ept_set_middle_entry(p2m, &e);
352         if ( !next )
353             return GUEST_TABLE_MAP_FAILED;
354 
355         if ( iommu_use_hap_pt(p2m->domain) )
356             iommu_sync_cache(next, EPT_PAGETABLE_ENTRIES * sizeof(ept_entry_t));
357 
358         rc = atomic_write_ept_entry(p2m, ept_entry, e, next_level);
359         ASSERT(rc == 0);
360     }
361     /* The only time sp would be set here is if we had hit a superpage */
362     else if ( is_epte_superpage(&e) )
363         return GUEST_TABLE_SUPER_PAGE;
364 
365     unmap_domain_page(*table);
366     *table = next ?: map_domain_page(_mfn(e.mfn));
367     *gfn_remainder &= (1UL << shift) - 1;
368     return GUEST_TABLE_NORMAL_PAGE;
369 }
370 
371 /*
372  * Invalidate (via setting the EMT field to an invalid value) all valid
373  * present entries in the given page table, optionally marking the entries
374  * also for their subtrees needing P2M type re-calculation.
375  */
ept_invalidate_emt_subtree(struct p2m_domain * p2m,mfn_t mfn,bool recalc,unsigned int level)376 static bool ept_invalidate_emt_subtree(struct p2m_domain *p2m, mfn_t mfn,
377                                        bool recalc, unsigned int level)
378 {
379     int rc;
380     ept_entry_t *epte = map_domain_page(mfn);
381     unsigned int i;
382     bool changed = false;
383 
384     if ( !level )
385     {
386         ASSERT_UNREACHABLE();
387         return false;
388     }
389 
390     for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
391     {
392         ept_entry_t e = atomic_read_ept_entry(&epte[i]);
393 
394         if ( !is_epte_valid(&e) || !is_epte_present(&e) ||
395              (e.emt == MTRR_NUM_TYPES && (e.recalc || !recalc)) )
396             continue;
397 
398         e.emt = MTRR_NUM_TYPES;
399         if ( recalc )
400             e.recalc = 1;
401         rc = atomic_write_ept_entry(p2m, &epte[i], e, level - 1);
402         ASSERT(rc == 0);
403         changed = true;
404     }
405 
406     unmap_domain_page(epte);
407 
408     return changed;
409 }
410 
411 /*
412  * Just like ept_invalidate_emt_subtree() except that
413  * - not all entries at the targeted level may need processing,
414  * - the re-calculation flag gets always set.
415  * The passed in range is guaranteed to not cross a page (table)
416  * boundary at the targeted level.
417  */
ept_invalidate_emt_range(struct p2m_domain * p2m,unsigned int target,unsigned long first_gfn,unsigned long last_gfn)418 static int ept_invalidate_emt_range(struct p2m_domain *p2m,
419                                     unsigned int target,
420                                     unsigned long first_gfn,
421                                     unsigned long last_gfn)
422 {
423     ept_entry_t *table;
424     unsigned long gfn_remainder = first_gfn;
425     unsigned int i, index;
426     int wrc, rc = 0;
427 
428     table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
429     for ( i = p2m->ept.wl; i > target; --i )
430     {
431         int ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
432 
433         if ( ret == GUEST_TABLE_MAP_FAILED )
434             goto out;
435         if ( ret != GUEST_TABLE_NORMAL_PAGE )
436             break;
437     }
438 
439     if ( i > target )
440     {
441         /* We need to split the original page. */
442         ept_entry_t split_ept_entry;
443 
444         index = gfn_remainder >> (i * EPT_TABLE_ORDER);
445         split_ept_entry = atomic_read_ept_entry(&table[index]);
446         ASSERT(is_epte_superpage(&split_ept_entry));
447         if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) )
448         {
449             ept_free_entry(p2m, &split_ept_entry, i);
450             rc = -ENOMEM;
451             goto out;
452         }
453         wrc = atomic_write_ept_entry(p2m, &table[index], split_ept_entry, i);
454         ASSERT(wrc == 0);
455 
456         for ( ; i > target; --i )
457             if ( ept_next_level(p2m, 1, &table, &gfn_remainder, i) !=
458                  GUEST_TABLE_NORMAL_PAGE )
459                 break;
460         /* We just installed the pages we need. */
461         ASSERT(i == target);
462     }
463 
464     index = gfn_remainder >> (i * EPT_TABLE_ORDER);
465     i = (last_gfn >> (i * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
466     for ( ; index <= i; ++index )
467     {
468         ept_entry_t e = atomic_read_ept_entry(&table[index]);
469 
470         if ( is_epte_valid(&e) && is_epte_present(&e) &&
471              (e.emt != MTRR_NUM_TYPES || !e.recalc) )
472         {
473             e.emt = MTRR_NUM_TYPES;
474             e.recalc = 1;
475             wrc = atomic_write_ept_entry(p2m, &table[index], e, target);
476             ASSERT(wrc == 0);
477             rc = 1;
478         }
479     }
480 
481  out:
482     unmap_domain_page(table);
483 
484     return rc;
485 }
486 
487 /*
488  * Resolve deliberately mis-configured (EMT field set to an invalid value)
489  * entries in the page table hierarchy for the given GFN:
490  * - calculate the correct value for the EMT field,
491  * - if marked so, re-calculate the P2M type,
492  * - propagate EMT and re-calculation flag down to the next page table level
493  *   for entries not involved in the translation of the given GFN.
494  * Returns:
495  * - negative errno values in error,
496  * - zero if no adjustment was done,
497  * - a positive value if at least one adjustment was done.
498  */
resolve_misconfig(struct p2m_domain * p2m,unsigned long gfn)499 static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
500 {
501     struct ept_data *ept = &p2m->ept;
502     unsigned int level = ept->wl;
503     unsigned long mfn = ept->mfn;
504     ept_entry_t *epte;
505     int wrc, rc = 0;
506 
507     if ( !mfn )
508         return 0;
509 
510     for ( ; ; --level )
511     {
512         ept_entry_t e;
513         unsigned int i;
514 
515         epte = map_domain_page(_mfn(mfn));
516         i = (gfn >> (level * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
517         e = atomic_read_ept_entry(&epte[i]);
518 
519         if ( level == 0 || is_epte_superpage(&e) )
520         {
521             uint8_t ipat = 0;
522 
523             if ( e.emt != MTRR_NUM_TYPES )
524                 break;
525 
526             if ( level == 0 )
527             {
528                 for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i )
529                 {
530                     p2m_type_t nt;
531 
532                     e = atomic_read_ept_entry(&epte[i]);
533                     if ( e.emt == MTRR_NUM_TYPES )
534                         e.emt = 0;
535                     if ( !is_epte_valid(&e) || !is_epte_present(&e) )
536                         continue;
537                     e.emt = epte_get_entry_emt(p2m->domain, gfn + i,
538                                                _mfn(e.mfn), 0, &ipat,
539                                                e.sa_p2mt == p2m_mmio_direct);
540                     e.ipat = ipat;
541 
542                     nt = p2m_recalc_type(e.recalc, e.sa_p2mt, p2m, gfn + i);
543                     if ( nt != e.sa_p2mt )
544                     {
545                         e.sa_p2mt = nt;
546                         ept_p2m_type_to_flags(p2m, &e);
547                     }
548                     e.recalc = 0;
549                     wrc = atomic_write_ept_entry(p2m, &epte[i], e, level);
550                     ASSERT(wrc == 0);
551                 }
552             }
553             else
554             {
555                 int emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
556                                              level * EPT_TABLE_ORDER, &ipat,
557                                              e.sa_p2mt == p2m_mmio_direct);
558                 bool_t recalc = e.recalc;
559 
560                 if ( recalc && p2m_is_changeable(e.sa_p2mt) )
561                 {
562                     unsigned long mask = ~0UL << (level * EPT_TABLE_ORDER);
563 
564                     ASSERT(e.sa_p2mt != p2m_ioreq_server);
565                     switch ( p2m_is_logdirty_range(p2m, gfn & mask,
566                                                    gfn | ~mask) )
567                     {
568                     case 0:
569                          e.sa_p2mt = p2m_ram_rw;
570                          e.recalc = 0;
571                          break;
572                     case 1:
573                          e.sa_p2mt = p2m_ram_logdirty;
574                          e.recalc = 0;
575                          break;
576                     default: /* Force split. */
577                          emt = -1;
578                          break;
579                     }
580                 }
581                 if ( unlikely(emt < 0) )
582                 {
583                     if ( ept_split_super_page(p2m, &e, level, level - 1) )
584                     {
585                         wrc = atomic_write_ept_entry(p2m, &epte[i], e, level);
586                         ASSERT(wrc == 0);
587                         unmap_domain_page(epte);
588                         mfn = e.mfn;
589                         continue;
590                     }
591                     ept_free_entry(p2m, &e, level);
592                     rc = -ENOMEM;
593                     break;
594                 }
595                 e.emt = emt;
596                 e.ipat = ipat;
597                 e.recalc = 0;
598                 if ( recalc && p2m_is_changeable(e.sa_p2mt) )
599                     ept_p2m_type_to_flags(p2m, &e);
600                 wrc = atomic_write_ept_entry(p2m, &epte[i], e, level);
601                 ASSERT(wrc == 0);
602             }
603 
604             rc = 1;
605             break;
606         }
607 
608         if ( e.emt == MTRR_NUM_TYPES )
609         {
610             ASSERT(is_epte_present(&e));
611             ept_invalidate_emt_subtree(p2m, _mfn(e.mfn), e.recalc, level);
612             smp_wmb();
613             e.emt = 0;
614             e.recalc = 0;
615             wrc = atomic_write_ept_entry(p2m, &epte[i], e, level);
616             ASSERT(wrc == 0);
617             unmap_domain_page(epte);
618             rc = 1;
619         }
620         else if ( is_epte_present(&e) && !e.emt )
621             unmap_domain_page(epte);
622         else
623             break;
624 
625         mfn = e.mfn;
626     }
627 
628     unmap_domain_page(epte);
629     if ( rc )
630     {
631         struct vcpu *v;
632 
633         for_each_vcpu ( p2m->domain, v )
634             v->arch.hvm.vmx.ept_spurious_misconfig = 1;
635     }
636 
637     return rc;
638 }
639 
ept_handle_misconfig(uint64_t gpa)640 bool_t ept_handle_misconfig(uint64_t gpa)
641 {
642     struct vcpu *curr = current;
643     struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
644     bool_t spurious;
645     int rc;
646 
647     if ( altp2m_active(curr->domain) )
648         p2m = p2m_get_altp2m(curr);
649 
650     p2m_lock(p2m);
651 
652     spurious = curr->arch.hvm.vmx.ept_spurious_misconfig;
653     rc = resolve_misconfig(p2m, PFN_DOWN(gpa));
654     curr->arch.hvm.vmx.ept_spurious_misconfig = 0;
655 
656     p2m_unlock(p2m);
657 
658     return spurious ? (rc >= 0) : (rc > 0);
659 }
660 
661 /*
662  * ept_set_entry() computes 'need_modify_vtd_table' for itself,
663  * by observing whether any gfn->mfn translations are modified.
664  *
665  * Returns: 0 for success, -errno for failure
666  */
667 static int
ept_set_entry(struct p2m_domain * p2m,gfn_t gfn_,mfn_t mfn,unsigned int order,p2m_type_t p2mt,p2m_access_t p2ma,int sve)668 ept_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn,
669               unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma,
670               int sve)
671 {
672     ept_entry_t *table, *ept_entry = NULL;
673     unsigned long gfn = gfn_x(gfn_);
674     unsigned long gfn_remainder = gfn;
675     unsigned int i, target = order / EPT_TABLE_ORDER;
676     unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? (gfn | mfn_x(mfn)) : gfn;
677     int ret, rc = 0;
678     bool_t entry_written = 0;
679     bool_t direct_mmio = (p2mt == p2m_mmio_direct);
680     uint8_t ipat = 0;
681     bool_t need_modify_vtd_table = 1;
682     bool_t vtd_pte_present = 0;
683     unsigned int iommu_flags = p2m_get_iommu_flags(p2mt, mfn);
684     bool_t needs_sync = 1;
685     ept_entry_t old_entry = { .epte = 0 };
686     ept_entry_t new_entry = { .epte = 0 };
687     struct ept_data *ept = &p2m->ept;
688     struct domain *d = p2m->domain;
689 
690     ASSERT(ept);
691 
692     /*
693      * the caller must make sure:
694      * 1. passing valid gfn and mfn at order boundary.
695      * 2. gfn not exceeding guest physical address width.
696      * 3. passing a valid order.
697      */
698     if ( (fn_mask & ((1UL << order) - 1)) ||
699          ((u64)gfn >> ((ept->wl + 1) * EPT_TABLE_ORDER)) ||
700          (order % EPT_TABLE_ORDER) )
701         return -EINVAL;
702 
703     /* Carry out any eventually pending earlier changes first. */
704     ret = resolve_misconfig(p2m, gfn);
705     if ( ret < 0 )
706         return ret;
707 
708     ASSERT((target == 2 && hap_has_1gb) ||
709            (target == 1 && hap_has_2mb) ||
710            (target == 0));
711     ASSERT(!p2m_is_foreign(p2mt) || target == 0);
712 
713     table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
714 
715     ret = GUEST_TABLE_MAP_FAILED;
716     for ( i = ept->wl; i > target; i-- )
717     {
718         ret = ept_next_level(p2m, 0, &table, &gfn_remainder, i);
719         if ( ret == GUEST_TABLE_MAP_FAILED )
720         {
721             rc = -ENOMEM;
722             goto out;
723         }
724         if ( ret != GUEST_TABLE_NORMAL_PAGE )
725             break;
726     }
727 
728     ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target);
729 
730     ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
731 
732     /* In case VT-d uses same page table, this flag is needed by VT-d */
733     vtd_pte_present = is_epte_present(ept_entry);
734 
735     /*
736      * If we're here with i > target, we must be at a leaf node, and
737      * we need to break up the superpage.
738      *
739      * If we're here with i == target and i > 0, we need to check to see
740      * if we're replacing a non-leaf entry (i.e., pointing to an N-1 table)
741      * with a leaf entry (a 1GiB or 2MiB page), and handle things appropriately.
742      */
743 
744     if ( i == target )
745     {
746         /* We reached the target level. */
747 
748         /* No need to flush if the old entry wasn't valid */
749         if ( !is_epte_present(ept_entry) )
750             needs_sync = 0;
751 
752         /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 2MiB),
753          * the intermediate tables will be freed below after the ept flush
754          *
755          * Read-then-write is OK because we hold the p2m lock. */
756         old_entry = *ept_entry;
757     }
758     else
759     {
760         /* We need to split the original page. */
761         ept_entry_t split_ept_entry;
762 
763         ASSERT(is_epte_superpage(ept_entry));
764 
765         split_ept_entry = atomic_read_ept_entry(ept_entry);
766 
767         if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) )
768         {
769             ept_free_entry(p2m, &split_ept_entry, i);
770             rc = -ENOMEM;
771             goto out;
772         }
773 
774         /* now install the newly split ept sub-tree */
775         /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
776         rc = atomic_write_ept_entry(p2m, ept_entry, split_ept_entry, i);
777         ASSERT(rc == 0);
778 
779         /* then move to the level we want to make real changes */
780         for ( ; i > target; i-- )
781             if ( ept_next_level(p2m, 0, &table, &gfn_remainder, i) !=
782                  GUEST_TABLE_NORMAL_PAGE )
783                 break;
784         /* We just installed the pages we need. */
785         ASSERT(i == target);
786 
787         ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
788     }
789 
790     if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
791     {
792         int emt = epte_get_entry_emt(p2m->domain, gfn, mfn,
793                                      i * EPT_TABLE_ORDER, &ipat, direct_mmio);
794 
795         if ( emt >= 0 )
796             new_entry.emt = emt;
797         else /* ept_handle_misconfig() will need to take care of this. */
798             new_entry.emt = MTRR_NUM_TYPES;
799 
800         new_entry.ipat = ipat;
801         new_entry.sp = !!i;
802         new_entry.sa_p2mt = p2mt;
803         new_entry.access = p2ma;
804         new_entry.snp = is_iommu_enabled(d) && iommu_snoop;
805 
806         /* the caller should take care of the previous page */
807         new_entry.mfn = mfn_x(mfn);
808 
809         /* Safe to read-then-write because we hold the p2m lock */
810         if ( ept_entry->mfn == new_entry.mfn &&
811              p2m_get_iommu_flags(ept_entry->sa_p2mt, _mfn(ept_entry->mfn)) ==
812              iommu_flags )
813             need_modify_vtd_table = 0;
814 
815         ept_p2m_type_to_flags(p2m, &new_entry);
816     }
817 
818     if ( sve != -1 )
819         new_entry.suppress_ve = !!sve;
820     else
821         new_entry.suppress_ve = is_epte_valid(&old_entry) ?
822                                     old_entry.suppress_ve : 1;
823 
824     rc = atomic_write_ept_entry(p2m, ept_entry, new_entry, target);
825     if ( unlikely(rc) )
826         old_entry.epte = 0;
827     else
828     {
829         entry_written = 1;
830 
831         if ( p2mt != p2m_invalid &&
832              (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) )
833             /* Track the highest gfn for which we have ever had a valid mapping */
834             p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
835     }
836 
837 out:
838     if ( needs_sync )
839         ept_sync_domain(p2m);
840 
841     /* For host p2m, may need to change VT-d page table.*/
842     if ( rc == 0 && p2m_is_hostp2m(p2m) &&
843          need_modify_vtd_table )
844     {
845         if ( iommu_use_hap_pt(d) )
846             rc = iommu_iotlb_flush(d, _dfn(gfn), (1u << order),
847                                    (iommu_flags ? IOMMU_FLUSHF_added : 0) |
848                                    (vtd_pte_present ? IOMMU_FLUSHF_modified
849                                                     : 0));
850         else if ( need_iommu_pt_sync(d) )
851             rc = iommu_flags ?
852                 iommu_legacy_map(d, _dfn(gfn), mfn, order, iommu_flags) :
853                 iommu_legacy_unmap(d, _dfn(gfn), order);
854     }
855 
856     unmap_domain_page(table);
857 
858     /* Release the old intermediate tables, if any.  This has to be the
859        last thing we do, after the ept_sync_domain() and removal
860        from the iommu tables, so as to avoid a potential
861        use-after-free. */
862     if ( is_epte_present(&old_entry) )
863         ept_free_entry(p2m, &old_entry, target);
864 
865     if ( entry_written && p2m_is_hostp2m(p2m) )
866     {
867         ret = p2m_altp2m_propagate_change(d, _gfn(gfn), mfn, order, p2mt, p2ma);
868         if ( !rc )
869             rc = ret;
870     }
871 
872     return rc;
873 }
874 
875 /* Read ept p2m entries */
ept_get_entry(struct p2m_domain * p2m,gfn_t gfn_,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order,bool_t * sve)876 static mfn_t ept_get_entry(struct p2m_domain *p2m,
877                            gfn_t gfn_, p2m_type_t *t, p2m_access_t* a,
878                            p2m_query_t q, unsigned int *page_order,
879                            bool_t *sve)
880 {
881     ept_entry_t *table =
882         map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
883     unsigned long gfn = gfn_x(gfn_);
884     unsigned long gfn_remainder = gfn;
885     ept_entry_t *ept_entry;
886     u32 index;
887     int i;
888     bool_t recalc = 0;
889     mfn_t mfn = INVALID_MFN;
890     struct ept_data *ept = &p2m->ept;
891 
892     *t = p2m_mmio_dm;
893     *a = p2m_access_n;
894     if ( sve )
895         *sve = 1;
896 
897     /* This pfn is higher than the highest the p2m map currently holds */
898     if ( gfn > p2m->max_mapped_pfn )
899     {
900         for ( i = ept->wl; i > 0; --i )
901             if ( (gfn & ~((1UL << (i * EPT_TABLE_ORDER)) - 1)) >
902                  p2m->max_mapped_pfn )
903                 break;
904         goto out;
905     }
906 
907     /* Should check if gfn obeys GAW here. */
908 
909     for ( i = ept->wl; i > 0; i-- )
910     {
911         int ret;
912 
913     retry:
914         if ( table[gfn_remainder >> (i * EPT_TABLE_ORDER)].recalc )
915             recalc = 1;
916         ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
917         if ( ret == GUEST_TABLE_MAP_FAILED )
918             goto out;
919         if ( ret == GUEST_TABLE_POD_PAGE )
920         {
921             if ( !(q & P2M_ALLOC) )
922             {
923                 *t = p2m_populate_on_demand;
924                 goto out;
925             }
926 
927             /* Populate this superpage */
928             ASSERT(i <= 2);
929 
930             index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
931             ept_entry = table + index;
932 
933             if ( p2m_pod_demand_populate(p2m, gfn_, i * EPT_TABLE_ORDER) )
934                 goto retry;
935             goto out;
936         }
937         if ( ret == GUEST_TABLE_SUPER_PAGE )
938             break;
939     }
940 
941     index = gfn_remainder >> (i * EPT_TABLE_ORDER);
942     ept_entry = table + index;
943 
944     if ( ept_entry->sa_p2mt == p2m_populate_on_demand )
945     {
946         if ( !(q & P2M_ALLOC) )
947         {
948             *t = p2m_populate_on_demand;
949             goto out;
950         }
951 
952         ASSERT(i == 0);
953 
954         if ( !p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_4K) )
955             goto out;
956     }
957 
958     if ( is_epte_valid(ept_entry) )
959     {
960         *t = p2m_recalc_type(recalc || ept_entry->recalc,
961                              ept_entry->sa_p2mt, p2m, gfn);
962         *a = ept_entry->access;
963         if ( sve )
964             *sve = ept_entry->suppress_ve;
965 
966         mfn = _mfn(ept_entry->mfn);
967         if ( i )
968         {
969             /*
970              * We may meet super pages, and to split into 4k pages
971              * to emulate p2m table
972              */
973             unsigned long split_mfn = mfn_x(mfn) +
974                 (gfn_remainder &
975                  ((1 << (i * EPT_TABLE_ORDER)) - 1));
976             mfn = _mfn(split_mfn);
977         }
978     }
979 
980  out:
981     if ( page_order )
982         *page_order = i * EPT_TABLE_ORDER;
983 
984     unmap_domain_page(table);
985     return mfn;
986 }
987 
ept_walk_table(struct domain * d,unsigned long gfn)988 void ept_walk_table(struct domain *d, unsigned long gfn)
989 {
990     struct p2m_domain *p2m = p2m_get_hostp2m(d);
991     struct ept_data *ept = &p2m->ept;
992     ept_entry_t *table =
993         map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
994     unsigned long gfn_remainder = gfn;
995 
996     int i;
997 
998     gprintk(XENLOG_ERR, "Walking EPT tables for GFN %lx:\n", gfn);
999 
1000     /* This pfn is higher than the highest the p2m map currently holds */
1001     if ( gfn > p2m->max_mapped_pfn )
1002     {
1003         gprintk(XENLOG_ERR, " gfn exceeds max_mapped_pfn %lx\n",
1004                 p2m->max_mapped_pfn);
1005         goto out;
1006     }
1007 
1008     for ( i = ept->wl; i >= 0; i-- )
1009     {
1010         ept_entry_t *ept_entry, *next;
1011         u32 index;
1012 
1013         /* Stolen from ept_next_level */
1014         index = gfn_remainder >> (i*EPT_TABLE_ORDER);
1015         ept_entry = table + index;
1016 
1017         gprintk(XENLOG_ERR, " epte %"PRIx64"\n", ept_entry->epte);
1018 
1019         if ( (i == 0) || !is_epte_present(ept_entry) ||
1020              is_epte_superpage(ept_entry) )
1021             goto out;
1022         else
1023         {
1024             gfn_remainder &= (1UL << (i*EPT_TABLE_ORDER)) - 1;
1025 
1026             next = map_domain_page(_mfn(ept_entry->mfn));
1027 
1028             unmap_domain_page(table);
1029 
1030             table = next;
1031         }
1032     }
1033 
1034 out:
1035     unmap_domain_page(table);
1036     return;
1037 }
1038 
ept_change_entry_type_global(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt)1039 static void ept_change_entry_type_global(struct p2m_domain *p2m,
1040                                          p2m_type_t ot, p2m_type_t nt)
1041 {
1042     unsigned long mfn = p2m->ept.mfn;
1043 
1044     if ( !mfn )
1045         return;
1046 
1047     if ( ept_invalidate_emt_subtree(p2m, _mfn(mfn), 1, p2m->ept.wl) )
1048         ept_sync_domain(p2m);
1049 }
1050 
ept_change_entry_type_range(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt,unsigned long first_gfn,unsigned long last_gfn)1051 static int ept_change_entry_type_range(struct p2m_domain *p2m,
1052                                        p2m_type_t ot, p2m_type_t nt,
1053                                        unsigned long first_gfn,
1054                                        unsigned long last_gfn)
1055 {
1056     unsigned int i, wl = p2m->ept.wl;
1057     unsigned long mask = (1 << EPT_TABLE_ORDER) - 1;
1058     int rc = 0, sync = 0;
1059 
1060     if ( !p2m->ept.mfn )
1061         return -EINVAL;
1062 
1063     for ( i = 0; i <= wl; )
1064     {
1065         if ( first_gfn & mask )
1066         {
1067             unsigned long end_gfn = min(first_gfn | mask, last_gfn);
1068 
1069             rc = ept_invalidate_emt_range(p2m, i, first_gfn, end_gfn);
1070             sync |= rc;
1071             if ( rc < 0 || end_gfn >= last_gfn )
1072                 break;
1073             first_gfn = end_gfn + 1;
1074         }
1075         else if ( (last_gfn & mask) != mask )
1076         {
1077             unsigned long start_gfn = max(first_gfn, last_gfn & ~mask);
1078 
1079             rc = ept_invalidate_emt_range(p2m, i, start_gfn, last_gfn);
1080             sync |= rc;
1081             if ( rc < 0 || start_gfn <= first_gfn )
1082                 break;
1083             last_gfn = start_gfn - 1;
1084         }
1085         else
1086         {
1087             ++i;
1088             mask |= mask << EPT_TABLE_ORDER;
1089         }
1090     }
1091 
1092     if ( sync )
1093         ept_sync_domain(p2m);
1094 
1095     return rc < 0 ? rc : 0;
1096 }
1097 
ept_memory_type_changed(struct p2m_domain * p2m)1098 static void ept_memory_type_changed(struct p2m_domain *p2m)
1099 {
1100     unsigned long mfn = p2m->ept.mfn;
1101 
1102     if ( !mfn )
1103         return;
1104 
1105     if ( ept_invalidate_emt_subtree(p2m, _mfn(mfn), 0, p2m->ept.wl) )
1106         ept_sync_domain(p2m);
1107 }
1108 
__ept_sync_domain(void * info)1109 static void __ept_sync_domain(void *info)
1110 {
1111     /*
1112      * The invalidation will be done before VMENTER (see
1113      * vmx_vmenter_helper()).
1114      */
1115 }
1116 
ept_sync_domain_prepare(struct p2m_domain * p2m)1117 static void ept_sync_domain_prepare(struct p2m_domain *p2m)
1118 {
1119     struct domain *d = p2m->domain;
1120     struct ept_data *ept = &p2m->ept;
1121 
1122     if ( nestedhvm_enabled(d) )
1123     {
1124         if ( p2m_is_nestedp2m(p2m) )
1125             ept = &p2m_get_hostp2m(d)->ept;
1126         else
1127             p2m_flush_nestedp2m(d);
1128     }
1129 
1130     /*
1131      * Need to invalidate on all PCPUs because either:
1132      *
1133      * a) A VCPU has run and some translations may be cached.
1134      * b) A VCPU has not run and and the initial invalidation in case
1135      *    of an EP4TA reuse is still needed.
1136      */
1137     cpumask_setall(ept->invalidate);
1138 }
1139 
ept_sync_domain_mask(struct p2m_domain * p2m,const cpumask_t * mask)1140 static void ept_sync_domain_mask(struct p2m_domain *p2m, const cpumask_t *mask)
1141 {
1142     on_selected_cpus(mask, __ept_sync_domain, p2m, 1);
1143 }
1144 
ept_sync_domain(struct p2m_domain * p2m)1145 void ept_sync_domain(struct p2m_domain *p2m)
1146 {
1147     struct domain *d = p2m->domain;
1148 
1149     /* Only if using EPT and this domain has some VCPUs to dirty. */
1150     if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
1151         return;
1152 
1153     ept_sync_domain_prepare(p2m);
1154 
1155     if ( p2m->defer_flush )
1156     {
1157         p2m->need_flush = 1;
1158         return;
1159     }
1160 
1161     ept_sync_domain_mask(p2m, d->dirty_cpumask);
1162 }
1163 
ept_tlb_flush(struct p2m_domain * p2m)1164 static void ept_tlb_flush(struct p2m_domain *p2m)
1165 {
1166     ept_sync_domain_mask(p2m, p2m->domain->dirty_cpumask);
1167 }
1168 
ept_set_ad_sync(struct domain * d,bool value)1169 static void ept_set_ad_sync(struct domain *d, bool value)
1170 {
1171     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
1172 
1173     ASSERT(p2m_locked_by_me(hostp2m));
1174 
1175     hostp2m->ept.ad = value;
1176 
1177     if ( unlikely(altp2m_active(d)) )
1178     {
1179         unsigned int i;
1180 
1181         for ( i = 0; i < MAX_ALTP2M; i++ )
1182         {
1183             struct p2m_domain *p2m;
1184 
1185             if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
1186                 continue;
1187 
1188             p2m = d->arch.altp2m_p2m[i];
1189 
1190             p2m_lock(p2m);
1191             p2m->ept.ad = value;
1192             p2m_unlock(p2m);
1193         }
1194     }
1195 }
1196 
ept_enable_pml(struct p2m_domain * p2m)1197 static void ept_enable_pml(struct p2m_domain *p2m)
1198 {
1199     /* Domain must have been paused */
1200     ASSERT(atomic_read(&p2m->domain->pause_count));
1201 
1202     /*
1203      * No need to return whether vmx_domain_enable_pml has succeeded, as
1204      * ept_p2m_type_to_flags will do the check, and write protection will be
1205      * used if PML is not enabled.
1206      */
1207     if ( vmx_domain_enable_pml(p2m->domain) )
1208         return;
1209 
1210     /* Enable EPT A/D bit for PML */
1211     ept_set_ad_sync(p2m->domain, true);
1212     vmx_domain_update_eptp(p2m->domain);
1213 }
1214 
ept_disable_pml(struct p2m_domain * p2m)1215 static void ept_disable_pml(struct p2m_domain *p2m)
1216 {
1217     /* Domain must have been paused */
1218     ASSERT(atomic_read(&p2m->domain->pause_count));
1219 
1220     vmx_domain_disable_pml(p2m->domain);
1221 
1222     /* Disable EPT A/D bit */
1223     ept_set_ad_sync(p2m->domain, false);
1224     vmx_domain_update_eptp(p2m->domain);
1225 }
1226 
ept_enable_hardware_log_dirty(struct p2m_domain * p2m)1227 static void ept_enable_hardware_log_dirty(struct p2m_domain *p2m)
1228 {
1229     struct p2m_domain *hostp2m = p2m_get_hostp2m(p2m->domain);
1230 
1231     p2m_lock(hostp2m);
1232     ept_enable_pml(hostp2m);
1233     p2m_unlock(hostp2m);
1234 }
1235 
ept_disable_hardware_log_dirty(struct p2m_domain * p2m)1236 static void ept_disable_hardware_log_dirty(struct p2m_domain *p2m)
1237 {
1238     struct p2m_domain *hostp2m = p2m_get_hostp2m(p2m->domain);
1239 
1240     p2m_lock(hostp2m);
1241     ept_disable_pml(hostp2m);
1242     p2m_unlock(hostp2m);
1243 }
1244 
ept_flush_pml_buffers(struct p2m_domain * p2m)1245 static void ept_flush_pml_buffers(struct p2m_domain *p2m)
1246 {
1247     /* Domain must have been paused */
1248     ASSERT(atomic_read(&p2m->domain->pause_count));
1249 
1250     vmx_domain_flush_pml_buffers(p2m->domain);
1251 }
1252 
ept_p2m_init(struct p2m_domain * p2m)1253 int ept_p2m_init(struct p2m_domain *p2m)
1254 {
1255     struct ept_data *ept = &p2m->ept;
1256 
1257     p2m->set_entry = ept_set_entry;
1258     p2m->get_entry = ept_get_entry;
1259     p2m->recalc = resolve_misconfig;
1260     p2m->change_entry_type_global = ept_change_entry_type_global;
1261     p2m->change_entry_type_range = ept_change_entry_type_range;
1262     p2m->memory_type_changed = ept_memory_type_changed;
1263     p2m->audit_p2m = NULL;
1264     p2m->tlb_flush = ept_tlb_flush;
1265 
1266     /* Set the memory type used when accessing EPT paging structures. */
1267     ept->mt = EPT_DEFAULT_MT;
1268 
1269     /* set EPT page-walk length, now it's actual walk length - 1, i.e. 3 */
1270     ept->wl = 3;
1271 
1272     if ( cpu_has_vmx_pml )
1273     {
1274         p2m->enable_hardware_log_dirty = ept_enable_hardware_log_dirty;
1275         p2m->disable_hardware_log_dirty = ept_disable_hardware_log_dirty;
1276         p2m->flush_hardware_cached_dirty = ept_flush_pml_buffers;
1277     }
1278 
1279     if ( !zalloc_cpumask_var(&ept->invalidate) )
1280         return -ENOMEM;
1281 
1282     /*
1283      * Assume an initial invalidation is required, in case an EP4TA is
1284      * reused.
1285      */
1286     cpumask_setall(ept->invalidate);
1287 
1288     return 0;
1289 }
1290 
ept_p2m_uninit(struct p2m_domain * p2m)1291 void ept_p2m_uninit(struct p2m_domain *p2m)
1292 {
1293     struct ept_data *ept = &p2m->ept;
1294     free_cpumask_var(ept->invalidate);
1295 }
1296 
memory_type_to_str(unsigned int x)1297 static const char *memory_type_to_str(unsigned int x)
1298 {
1299     static const char memory_types[8][3] = {
1300         [MTRR_TYPE_UNCACHABLE]     = "UC",
1301         [MTRR_TYPE_WRCOMB]         = "WC",
1302         [MTRR_TYPE_WRTHROUGH]      = "WT",
1303         [MTRR_TYPE_WRPROT]         = "WP",
1304         [MTRR_TYPE_WRBACK]         = "WB",
1305         [MTRR_NUM_TYPES]           = "??"
1306     };
1307 
1308     ASSERT(x < ARRAY_SIZE(memory_types));
1309     return memory_types[x][0] ? memory_types[x] : "?";
1310 }
1311 
ept_dump_p2m_table(unsigned char key)1312 static void ept_dump_p2m_table(unsigned char key)
1313 {
1314     struct domain *d;
1315     ept_entry_t *table, *ept_entry;
1316     int order;
1317     int i;
1318     unsigned long gfn, gfn_remainder;
1319     unsigned long record_counter = 0;
1320     struct p2m_domain *p2m;
1321     struct ept_data *ept;
1322 
1323     rcu_read_lock(&domlist_read_lock);
1324 
1325     for_each_domain(d)
1326     {
1327         if ( !hap_enabled(d) )
1328             continue;
1329 
1330         p2m = p2m_get_hostp2m(d);
1331         ept = &p2m->ept;
1332         printk("\ndomain%d EPT p2m table:\n", d->domain_id);
1333 
1334         for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += 1UL << order )
1335         {
1336             char c = 0;
1337             int ret = GUEST_TABLE_MAP_FAILED;
1338 
1339             gfn_remainder = gfn;
1340             table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
1341 
1342             for ( i = ept->wl; i > 0; i-- )
1343             {
1344                 ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
1345                 if ( ept_entry->emt == MTRR_NUM_TYPES )
1346                     c = '?';
1347                 ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
1348                 if ( ret != GUEST_TABLE_NORMAL_PAGE )
1349                     break;
1350             }
1351 
1352             order = i * EPT_TABLE_ORDER;
1353             ept_entry = table + (gfn_remainder >> order);
1354             if ( ret != GUEST_TABLE_MAP_FAILED && is_epte_valid(ept_entry) )
1355             {
1356                 if ( ept_entry->sa_p2mt == p2m_populate_on_demand )
1357                     printk("gfn: %13lx order: %2d PoD\n", gfn, order);
1358                 else
1359                     printk("gfn: %13lx order: %2d mfn: %13lx %c%c%c %c%c%c\n",
1360                            gfn, order, ept_entry->mfn + 0UL,
1361                            ept_entry->r ? 'r' : ' ',
1362                            ept_entry->w ? 'w' : ' ',
1363                            ept_entry->x ? 'x' : ' ',
1364                            memory_type_to_str(ept_entry->emt)[0],
1365                            memory_type_to_str(ept_entry->emt)[1]
1366                            ?: ept_entry->emt + '0',
1367                            c ?: ept_entry->ipat ? '!' : ' ');
1368 
1369                 if ( !(record_counter++ % 100) )
1370                     process_pending_softirqs();
1371             }
1372             unmap_domain_page(table);
1373         }
1374     }
1375 
1376     rcu_read_unlock(&domlist_read_lock);
1377 }
1378 
setup_ept_dump(void)1379 void setup_ept_dump(void)
1380 {
1381     register_keyhandler('D', ept_dump_p2m_table, "dump VT-x EPT tables", 0);
1382 }
1383 
p2m_init_altp2m_ept(struct domain * d,unsigned int i)1384 void p2m_init_altp2m_ept(struct domain *d, unsigned int i)
1385 {
1386     struct p2m_domain *p2m = array_access_nospec(d->arch.altp2m_p2m, i);
1387     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
1388     struct ept_data *ept;
1389 
1390     p2m->ept.ad = hostp2m->ept.ad;
1391     ept = &p2m->ept;
1392     ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
1393     d->arch.altp2m_eptp[array_index_nospec(i, MAX_EPTP)] = ept->eptp;
1394     d->arch.altp2m_visible_eptp[array_index_nospec(i, MAX_EPTP)] = ept->eptp;
1395 }
1396 
p2m_find_altp2m_by_eptp(struct domain * d,uint64_t eptp)1397 unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp)
1398 {
1399     struct p2m_domain *p2m;
1400     struct ept_data *ept;
1401     unsigned int i;
1402 
1403     altp2m_list_lock(d);
1404 
1405     for ( i = 0; i < MAX_ALTP2M; i++ )
1406     {
1407         if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
1408             continue;
1409 
1410         p2m = d->arch.altp2m_p2m[i];
1411         ept = &p2m->ept;
1412 
1413         if ( eptp == ept->eptp )
1414             goto out;
1415     }
1416 
1417     i = INVALID_ALTP2M;
1418 
1419  out:
1420     altp2m_list_unlock(d);
1421     return i;
1422 }
1423 
1424 /*
1425  * Local variables:
1426  * mode: C
1427  * c-file-style: "BSD"
1428  * c-basic-offset: 4
1429  * tab-width: 4
1430  * indent-tabs-mode: nil
1431  * End:
1432  */
1433