1 /*
2 * ept-p2m.c: use the EPT page table as p2m
3 * Copyright (c) 2007, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <xen/domain_page.h>
19 #include <xen/sched.h>
20 #include <asm/altp2m.h>
21 #include <asm/current.h>
22 #include <asm/paging.h>
23 #include <asm/types.h>
24 #include <asm/domain.h>
25 #include <asm/p2m.h>
26 #include <asm/hvm/vmx/vmx.h>
27 #include <asm/hvm/vmx/vmcs.h>
28 #include <asm/hvm/nestedhvm.h>
29 #include <xen/iommu.h>
30 #include <asm/mtrr.h>
31 #include <asm/hvm/cacheattr.h>
32 #include <xen/keyhandler.h>
33 #include <xen/softirq.h>
34
35 #include "mm-locks.h"
36
37 #define atomic_read_ept_entry(__pepte) \
38 ( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } )
39
40 #define is_epte_present(ept_entry) ((ept_entry)->epte & 0x7)
41 #define is_epte_superpage(ept_entry) ((ept_entry)->sp)
is_epte_valid(ept_entry_t * e)42 static inline bool_t is_epte_valid(ept_entry_t *e)
43 {
44 /* suppress_ve alone is not considered valid, so mask it off */
45 return ((e->epte & ~(1ul << 63)) != 0 && e->sa_p2mt != p2m_invalid);
46 }
47
48 /* returns : 0 for success, -errno otherwise */
atomic_write_ept_entry(struct p2m_domain * p2m,ept_entry_t * entryptr,ept_entry_t new,int level)49 static int atomic_write_ept_entry(struct p2m_domain *p2m,
50 ept_entry_t *entryptr, ept_entry_t new,
51 int level)
52 {
53 int rc = p2m_entry_modify(p2m, new.sa_p2mt, entryptr->sa_p2mt,
54 _mfn(new.mfn), _mfn(entryptr->mfn), level + 1);
55
56 if ( rc )
57 return rc;
58
59 write_atomic(&entryptr->epte, new.epte);
60
61 /*
62 * The recalc field on the EPT is used to signal either that a
63 * recalculation of the EMT field is required (which doesn't effect the
64 * IOMMU), or a type change. Type changes can only be between ram_rw,
65 * logdirty and ioreq_server: changes to/from logdirty won't work well with
66 * an IOMMU anyway, as IOMMU #PFs are not synchronous and will lead to
67 * aborts, and changes to/from ioreq_server are already fully flushed
68 * before returning to guest context (see
69 * XEN_DMOP_map_mem_type_to_ioreq_server).
70 */
71 if ( !new.recalc && iommu_use_hap_pt(p2m->domain) )
72 iommu_sync_cache(entryptr, sizeof(*entryptr));
73
74 return 0;
75 }
76
ept_p2m_type_to_flags(const struct p2m_domain * p2m,ept_entry_t * entry)77 static void ept_p2m_type_to_flags(const struct p2m_domain *p2m,
78 ept_entry_t *entry)
79 {
80 /*
81 * First apply type permissions.
82 *
83 * A/D bits are also manually set to avoid overhead of MMU having to set
84 * them later. Both A/D bits are safe to be updated directly as they are
85 * ignored by processor if EPT A/D bits is not turned on.
86 *
87 * A bit is set for all present p2m types in middle and leaf EPT entries.
88 * D bit is set for all writable types in EPT leaf entry, except for
89 * log-dirty type with PML.
90 */
91 switch ( entry->sa_p2mt )
92 {
93 case p2m_invalid:
94 case p2m_mmio_dm:
95 case p2m_populate_on_demand:
96 case p2m_ram_paging_out:
97 case p2m_ram_paged:
98 case p2m_ram_paging_in:
99 default:
100 entry->r = entry->w = entry->x = 0;
101 break;
102 case p2m_ram_rw:
103 entry->r = entry->w = entry->x = 1;
104 entry->a = entry->d = !!cpu_has_vmx_ept_ad;
105 break;
106 case p2m_ioreq_server:
107 entry->r = 1;
108 entry->w = !(p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE);
109 entry->x = 0;
110 entry->a = !!cpu_has_vmx_ept_ad;
111 entry->d = entry->w && entry->a;
112 break;
113 case p2m_mmio_direct:
114 entry->r = entry->x = 1;
115 entry->w = !rangeset_contains_singleton(mmio_ro_ranges,
116 entry->mfn);
117 ASSERT(entry->w || !is_epte_superpage(entry));
118 entry->a = !!cpu_has_vmx_ept_ad;
119 entry->d = entry->w && cpu_has_vmx_ept_ad;
120 break;
121 case p2m_ram_logdirty:
122 entry->r = entry->x = 1;
123 /*
124 * In case of PML, we don't have to write protect 4K page, but
125 * only need to clear D-bit for it, but we still need to write
126 * protect super page in order to split it to 4K pages in EPT
127 * violation.
128 */
129 if ( vmx_domain_pml_enabled(p2m->domain) &&
130 !is_epte_superpage(entry) )
131 entry->w = 1;
132 else
133 entry->w = 0;
134 entry->a = !!cpu_has_vmx_ept_ad;
135 /* For both PML or non-PML cases we clear D bit anyway */
136 entry->d = 0;
137 break;
138 case p2m_ram_ro:
139 case p2m_ram_shared:
140 entry->r = entry->x = 1;
141 entry->w = 0;
142 entry->a = !!cpu_has_vmx_ept_ad;
143 entry->d = 0;
144 break;
145 case p2m_grant_map_rw:
146 case p2m_map_foreign:
147 entry->r = entry->w = 1;
148 entry->x = 0;
149 entry->a = entry->d = !!cpu_has_vmx_ept_ad;
150 break;
151 case p2m_grant_map_ro:
152 entry->r = 1;
153 entry->w = entry->x = 0;
154 entry->a = !!cpu_has_vmx_ept_ad;
155 entry->d = 0;
156 break;
157 }
158
159 /* Then restrict with access permissions */
160 switch ( entry->access )
161 {
162 case p2m_access_n:
163 case p2m_access_n2rwx:
164 entry->r = entry->w = entry->x = 0;
165 break;
166 case p2m_access_r:
167 entry->w = entry->x = 0;
168 break;
169 case p2m_access_w:
170 entry->r = entry->x = 0;
171 break;
172 case p2m_access_x:
173 entry->r = entry->w = 0;
174 break;
175 case p2m_access_rx:
176 case p2m_access_rx2rw:
177 entry->w = 0;
178 break;
179 case p2m_access_wx:
180 entry->r = 0;
181 break;
182 case p2m_access_rw:
183 entry->x = 0;
184 break;
185 case p2m_access_rwx:
186 break;
187 }
188
189 /*
190 * Don't create executable superpages if we need to shatter them to
191 * protect against CVE-2018-12207.
192 */
193 if ( !p2m->domain->arch.hvm.vmx.exec_sp && is_epte_superpage(entry) )
194 entry->x = 0;
195 }
196
197 #define GUEST_TABLE_MAP_FAILED 0
198 #define GUEST_TABLE_NORMAL_PAGE 1
199 #define GUEST_TABLE_SUPER_PAGE 2
200 #define GUEST_TABLE_POD_PAGE 3
201
202 /* Fill in middle level of ept table; return pointer to mapped new table. */
ept_set_middle_entry(struct p2m_domain * p2m,ept_entry_t * ept_entry)203 static ept_entry_t *ept_set_middle_entry(struct p2m_domain *p2m,
204 ept_entry_t *ept_entry)
205 {
206 mfn_t mfn;
207 ept_entry_t *table;
208 unsigned int i;
209
210 mfn = p2m_alloc_ptp(p2m, 0);
211 if ( mfn_eq(mfn, INVALID_MFN) )
212 return NULL;
213
214 table = map_domain_page(mfn);
215
216 for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
217 table[i].suppress_ve = 1;
218
219 ept_entry->epte = 0;
220 ept_entry->mfn = mfn_x(mfn);
221 ept_entry->access = p2m->default_access;
222
223 ept_entry->r = ept_entry->w = ept_entry->x = 1;
224 /* Manually set A bit to avoid overhead of MMU having to write it later. */
225 ept_entry->a = !!cpu_has_vmx_ept_ad;
226
227 ept_entry->suppress_ve = 1;
228
229 return table;
230 }
231
232 /* free ept sub tree behind an entry */
ept_free_entry(struct p2m_domain * p2m,ept_entry_t * ept_entry,int level)233 static void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
234 {
235 /* End if the entry is a leaf entry. */
236 if ( level == 0 || !is_epte_present(ept_entry) ||
237 is_epte_superpage(ept_entry) )
238 return;
239
240 if ( level > 1 )
241 {
242 ept_entry_t *epte = map_domain_page(_mfn(ept_entry->mfn));
243 for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
244 ept_free_entry(p2m, epte + i, level - 1);
245 unmap_domain_page(epte);
246 }
247
248 p2m_tlb_flush_sync(p2m);
249 p2m_free_ptp(p2m, mfn_to_page(_mfn(ept_entry->mfn)));
250 }
251
ept_split_super_page(struct p2m_domain * p2m,ept_entry_t * ept_entry,unsigned int level,unsigned int target)252 static bool_t ept_split_super_page(struct p2m_domain *p2m,
253 ept_entry_t *ept_entry,
254 unsigned int level, unsigned int target)
255 {
256 ept_entry_t new_ept, *table;
257 uint64_t trunk;
258 unsigned int i;
259 bool_t rv = 1;
260
261 /* End if the entry is a leaf entry or reaches the target level. */
262 if ( level <= target )
263 return 1;
264
265 ASSERT(is_epte_superpage(ept_entry));
266
267 table = ept_set_middle_entry(p2m, &new_ept);
268 if ( !table )
269 return 0;
270
271 trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
272
273 for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
274 {
275 ept_entry_t *epte = table + i;
276
277 *epte = *ept_entry;
278 epte->sp = (level > 1);
279 epte->mfn += i * trunk;
280 epte->snp = is_iommu_enabled(p2m->domain) && iommu_snoop;
281
282 ept_p2m_type_to_flags(p2m, epte);
283
284 if ( (level - 1) == target )
285 continue;
286
287 ASSERT(is_epte_superpage(epte));
288
289 if ( !(rv = ept_split_super_page(p2m, epte, level - 1, target)) )
290 break;
291 }
292
293 if ( iommu_use_hap_pt(p2m->domain) )
294 iommu_sync_cache(table, EPT_PAGETABLE_ENTRIES * sizeof(ept_entry_t));
295
296 unmap_domain_page(table);
297
298 /* Even failed we should install the newly allocated ept page. */
299 *ept_entry = new_ept;
300
301 return rv;
302 }
303
304 /* Take the currently mapped table, find the corresponding gfn entry,
305 * and map the next table, if available. If the entry is empty
306 * and read_only is set,
307 * Return values:
308 * GUEST_TABLE_MAP_FAILED: Failed to map. Either read_only was set and the
309 * entry was empty, or allocating a new page failed.
310 * GUEST_TABLE_NORMAL_PAGE: next level mapped normally
311 * GUEST_TABLE_SUPER_PAGE:
312 * The next entry points to a superpage, and caller indicates
313 * that they are going to the superpage level, or are only doing
314 * a read.
315 * GUEST_TABLE_POD:
316 * The next entry is marked populate-on-demand.
317 */
ept_next_level(struct p2m_domain * p2m,bool_t read_only,ept_entry_t ** table,unsigned long * gfn_remainder,int next_level)318 static int ept_next_level(struct p2m_domain *p2m, bool_t read_only,
319 ept_entry_t **table, unsigned long *gfn_remainder,
320 int next_level)
321 {
322 ept_entry_t *ept_entry, *next = NULL, e;
323 u32 shift, index;
324
325 ASSERT(next_level);
326
327 shift = next_level * EPT_TABLE_ORDER;
328
329 index = *gfn_remainder >> shift;
330
331 /* index must be falling into the page */
332 ASSERT(index < EPT_PAGETABLE_ENTRIES);
333
334 ept_entry = (*table) + index;
335
336 /* ept_next_level() is called (sometimes) without a lock. Read
337 * the entry once, and act on the "cached" entry after that to
338 * avoid races. */
339 e = atomic_read_ept_entry(ept_entry);
340
341 if ( !is_epte_present(&e) )
342 {
343 int rc;
344
345 if ( e.sa_p2mt == p2m_populate_on_demand )
346 return GUEST_TABLE_POD_PAGE;
347
348 if ( read_only )
349 return GUEST_TABLE_MAP_FAILED;
350
351 next = ept_set_middle_entry(p2m, &e);
352 if ( !next )
353 return GUEST_TABLE_MAP_FAILED;
354
355 if ( iommu_use_hap_pt(p2m->domain) )
356 iommu_sync_cache(next, EPT_PAGETABLE_ENTRIES * sizeof(ept_entry_t));
357
358 rc = atomic_write_ept_entry(p2m, ept_entry, e, next_level);
359 ASSERT(rc == 0);
360 }
361 /* The only time sp would be set here is if we had hit a superpage */
362 else if ( is_epte_superpage(&e) )
363 return GUEST_TABLE_SUPER_PAGE;
364
365 unmap_domain_page(*table);
366 *table = next ?: map_domain_page(_mfn(e.mfn));
367 *gfn_remainder &= (1UL << shift) - 1;
368 return GUEST_TABLE_NORMAL_PAGE;
369 }
370
371 /*
372 * Invalidate (via setting the EMT field to an invalid value) all valid
373 * present entries in the given page table, optionally marking the entries
374 * also for their subtrees needing P2M type re-calculation.
375 */
ept_invalidate_emt_subtree(struct p2m_domain * p2m,mfn_t mfn,bool recalc,unsigned int level)376 static bool ept_invalidate_emt_subtree(struct p2m_domain *p2m, mfn_t mfn,
377 bool recalc, unsigned int level)
378 {
379 int rc;
380 ept_entry_t *epte = map_domain_page(mfn);
381 unsigned int i;
382 bool changed = false;
383
384 if ( !level )
385 {
386 ASSERT_UNREACHABLE();
387 return false;
388 }
389
390 for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
391 {
392 ept_entry_t e = atomic_read_ept_entry(&epte[i]);
393
394 if ( !is_epte_valid(&e) || !is_epte_present(&e) ||
395 (e.emt == MTRR_NUM_TYPES && (e.recalc || !recalc)) )
396 continue;
397
398 e.emt = MTRR_NUM_TYPES;
399 if ( recalc )
400 e.recalc = 1;
401 rc = atomic_write_ept_entry(p2m, &epte[i], e, level - 1);
402 ASSERT(rc == 0);
403 changed = true;
404 }
405
406 unmap_domain_page(epte);
407
408 return changed;
409 }
410
411 /*
412 * Just like ept_invalidate_emt_subtree() except that
413 * - not all entries at the targeted level may need processing,
414 * - the re-calculation flag gets always set.
415 * The passed in range is guaranteed to not cross a page (table)
416 * boundary at the targeted level.
417 */
ept_invalidate_emt_range(struct p2m_domain * p2m,unsigned int target,unsigned long first_gfn,unsigned long last_gfn)418 static int ept_invalidate_emt_range(struct p2m_domain *p2m,
419 unsigned int target,
420 unsigned long first_gfn,
421 unsigned long last_gfn)
422 {
423 ept_entry_t *table;
424 unsigned long gfn_remainder = first_gfn;
425 unsigned int i, index;
426 int wrc, rc = 0;
427
428 table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
429 for ( i = p2m->ept.wl; i > target; --i )
430 {
431 int ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
432
433 if ( ret == GUEST_TABLE_MAP_FAILED )
434 goto out;
435 if ( ret != GUEST_TABLE_NORMAL_PAGE )
436 break;
437 }
438
439 if ( i > target )
440 {
441 /* We need to split the original page. */
442 ept_entry_t split_ept_entry;
443
444 index = gfn_remainder >> (i * EPT_TABLE_ORDER);
445 split_ept_entry = atomic_read_ept_entry(&table[index]);
446 ASSERT(is_epte_superpage(&split_ept_entry));
447 if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) )
448 {
449 ept_free_entry(p2m, &split_ept_entry, i);
450 rc = -ENOMEM;
451 goto out;
452 }
453 wrc = atomic_write_ept_entry(p2m, &table[index], split_ept_entry, i);
454 ASSERT(wrc == 0);
455
456 for ( ; i > target; --i )
457 if ( ept_next_level(p2m, 1, &table, &gfn_remainder, i) !=
458 GUEST_TABLE_NORMAL_PAGE )
459 break;
460 /* We just installed the pages we need. */
461 ASSERT(i == target);
462 }
463
464 index = gfn_remainder >> (i * EPT_TABLE_ORDER);
465 i = (last_gfn >> (i * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
466 for ( ; index <= i; ++index )
467 {
468 ept_entry_t e = atomic_read_ept_entry(&table[index]);
469
470 if ( is_epte_valid(&e) && is_epte_present(&e) &&
471 (e.emt != MTRR_NUM_TYPES || !e.recalc) )
472 {
473 e.emt = MTRR_NUM_TYPES;
474 e.recalc = 1;
475 wrc = atomic_write_ept_entry(p2m, &table[index], e, target);
476 ASSERT(wrc == 0);
477 rc = 1;
478 }
479 }
480
481 out:
482 unmap_domain_page(table);
483
484 return rc;
485 }
486
487 /*
488 * Resolve deliberately mis-configured (EMT field set to an invalid value)
489 * entries in the page table hierarchy for the given GFN:
490 * - calculate the correct value for the EMT field,
491 * - if marked so, re-calculate the P2M type,
492 * - propagate EMT and re-calculation flag down to the next page table level
493 * for entries not involved in the translation of the given GFN.
494 * Returns:
495 * - negative errno values in error,
496 * - zero if no adjustment was done,
497 * - a positive value if at least one adjustment was done.
498 */
resolve_misconfig(struct p2m_domain * p2m,unsigned long gfn)499 static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
500 {
501 struct ept_data *ept = &p2m->ept;
502 unsigned int level = ept->wl;
503 unsigned long mfn = ept->mfn;
504 ept_entry_t *epte;
505 int wrc, rc = 0;
506
507 if ( !mfn )
508 return 0;
509
510 for ( ; ; --level )
511 {
512 ept_entry_t e;
513 unsigned int i;
514
515 epte = map_domain_page(_mfn(mfn));
516 i = (gfn >> (level * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
517 e = atomic_read_ept_entry(&epte[i]);
518
519 if ( level == 0 || is_epte_superpage(&e) )
520 {
521 uint8_t ipat = 0;
522
523 if ( e.emt != MTRR_NUM_TYPES )
524 break;
525
526 if ( level == 0 )
527 {
528 for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i )
529 {
530 p2m_type_t nt;
531
532 e = atomic_read_ept_entry(&epte[i]);
533 if ( e.emt == MTRR_NUM_TYPES )
534 e.emt = 0;
535 if ( !is_epte_valid(&e) || !is_epte_present(&e) )
536 continue;
537 e.emt = epte_get_entry_emt(p2m->domain, gfn + i,
538 _mfn(e.mfn), 0, &ipat,
539 e.sa_p2mt == p2m_mmio_direct);
540 e.ipat = ipat;
541
542 nt = p2m_recalc_type(e.recalc, e.sa_p2mt, p2m, gfn + i);
543 if ( nt != e.sa_p2mt )
544 {
545 e.sa_p2mt = nt;
546 ept_p2m_type_to_flags(p2m, &e);
547 }
548 e.recalc = 0;
549 wrc = atomic_write_ept_entry(p2m, &epte[i], e, level);
550 ASSERT(wrc == 0);
551 }
552 }
553 else
554 {
555 int emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
556 level * EPT_TABLE_ORDER, &ipat,
557 e.sa_p2mt == p2m_mmio_direct);
558 bool_t recalc = e.recalc;
559
560 if ( recalc && p2m_is_changeable(e.sa_p2mt) )
561 {
562 unsigned long mask = ~0UL << (level * EPT_TABLE_ORDER);
563
564 ASSERT(e.sa_p2mt != p2m_ioreq_server);
565 switch ( p2m_is_logdirty_range(p2m, gfn & mask,
566 gfn | ~mask) )
567 {
568 case 0:
569 e.sa_p2mt = p2m_ram_rw;
570 e.recalc = 0;
571 break;
572 case 1:
573 e.sa_p2mt = p2m_ram_logdirty;
574 e.recalc = 0;
575 break;
576 default: /* Force split. */
577 emt = -1;
578 break;
579 }
580 }
581 if ( unlikely(emt < 0) )
582 {
583 if ( ept_split_super_page(p2m, &e, level, level - 1) )
584 {
585 wrc = atomic_write_ept_entry(p2m, &epte[i], e, level);
586 ASSERT(wrc == 0);
587 unmap_domain_page(epte);
588 mfn = e.mfn;
589 continue;
590 }
591 ept_free_entry(p2m, &e, level);
592 rc = -ENOMEM;
593 break;
594 }
595 e.emt = emt;
596 e.ipat = ipat;
597 e.recalc = 0;
598 if ( recalc && p2m_is_changeable(e.sa_p2mt) )
599 ept_p2m_type_to_flags(p2m, &e);
600 wrc = atomic_write_ept_entry(p2m, &epte[i], e, level);
601 ASSERT(wrc == 0);
602 }
603
604 rc = 1;
605 break;
606 }
607
608 if ( e.emt == MTRR_NUM_TYPES )
609 {
610 ASSERT(is_epte_present(&e));
611 ept_invalidate_emt_subtree(p2m, _mfn(e.mfn), e.recalc, level);
612 smp_wmb();
613 e.emt = 0;
614 e.recalc = 0;
615 wrc = atomic_write_ept_entry(p2m, &epte[i], e, level);
616 ASSERT(wrc == 0);
617 unmap_domain_page(epte);
618 rc = 1;
619 }
620 else if ( is_epte_present(&e) && !e.emt )
621 unmap_domain_page(epte);
622 else
623 break;
624
625 mfn = e.mfn;
626 }
627
628 unmap_domain_page(epte);
629 if ( rc )
630 {
631 struct vcpu *v;
632
633 for_each_vcpu ( p2m->domain, v )
634 v->arch.hvm.vmx.ept_spurious_misconfig = 1;
635 }
636
637 return rc;
638 }
639
ept_handle_misconfig(uint64_t gpa)640 bool_t ept_handle_misconfig(uint64_t gpa)
641 {
642 struct vcpu *curr = current;
643 struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
644 bool_t spurious;
645 int rc;
646
647 if ( altp2m_active(curr->domain) )
648 p2m = p2m_get_altp2m(curr);
649
650 p2m_lock(p2m);
651
652 spurious = curr->arch.hvm.vmx.ept_spurious_misconfig;
653 rc = resolve_misconfig(p2m, PFN_DOWN(gpa));
654 curr->arch.hvm.vmx.ept_spurious_misconfig = 0;
655
656 p2m_unlock(p2m);
657
658 return spurious ? (rc >= 0) : (rc > 0);
659 }
660
661 /*
662 * ept_set_entry() computes 'need_modify_vtd_table' for itself,
663 * by observing whether any gfn->mfn translations are modified.
664 *
665 * Returns: 0 for success, -errno for failure
666 */
667 static int
ept_set_entry(struct p2m_domain * p2m,gfn_t gfn_,mfn_t mfn,unsigned int order,p2m_type_t p2mt,p2m_access_t p2ma,int sve)668 ept_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn,
669 unsigned int order, p2m_type_t p2mt, p2m_access_t p2ma,
670 int sve)
671 {
672 ept_entry_t *table, *ept_entry = NULL;
673 unsigned long gfn = gfn_x(gfn_);
674 unsigned long gfn_remainder = gfn;
675 unsigned int i, target = order / EPT_TABLE_ORDER;
676 unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? (gfn | mfn_x(mfn)) : gfn;
677 int ret, rc = 0;
678 bool_t entry_written = 0;
679 bool_t direct_mmio = (p2mt == p2m_mmio_direct);
680 uint8_t ipat = 0;
681 bool_t need_modify_vtd_table = 1;
682 bool_t vtd_pte_present = 0;
683 unsigned int iommu_flags = p2m_get_iommu_flags(p2mt, mfn);
684 bool_t needs_sync = 1;
685 ept_entry_t old_entry = { .epte = 0 };
686 ept_entry_t new_entry = { .epte = 0 };
687 struct ept_data *ept = &p2m->ept;
688 struct domain *d = p2m->domain;
689
690 ASSERT(ept);
691
692 /*
693 * the caller must make sure:
694 * 1. passing valid gfn and mfn at order boundary.
695 * 2. gfn not exceeding guest physical address width.
696 * 3. passing a valid order.
697 */
698 if ( (fn_mask & ((1UL << order) - 1)) ||
699 ((u64)gfn >> ((ept->wl + 1) * EPT_TABLE_ORDER)) ||
700 (order % EPT_TABLE_ORDER) )
701 return -EINVAL;
702
703 /* Carry out any eventually pending earlier changes first. */
704 ret = resolve_misconfig(p2m, gfn);
705 if ( ret < 0 )
706 return ret;
707
708 ASSERT((target == 2 && hap_has_1gb) ||
709 (target == 1 && hap_has_2mb) ||
710 (target == 0));
711 ASSERT(!p2m_is_foreign(p2mt) || target == 0);
712
713 table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
714
715 ret = GUEST_TABLE_MAP_FAILED;
716 for ( i = ept->wl; i > target; i-- )
717 {
718 ret = ept_next_level(p2m, 0, &table, &gfn_remainder, i);
719 if ( ret == GUEST_TABLE_MAP_FAILED )
720 {
721 rc = -ENOMEM;
722 goto out;
723 }
724 if ( ret != GUEST_TABLE_NORMAL_PAGE )
725 break;
726 }
727
728 ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target);
729
730 ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
731
732 /* In case VT-d uses same page table, this flag is needed by VT-d */
733 vtd_pte_present = is_epte_present(ept_entry);
734
735 /*
736 * If we're here with i > target, we must be at a leaf node, and
737 * we need to break up the superpage.
738 *
739 * If we're here with i == target and i > 0, we need to check to see
740 * if we're replacing a non-leaf entry (i.e., pointing to an N-1 table)
741 * with a leaf entry (a 1GiB or 2MiB page), and handle things appropriately.
742 */
743
744 if ( i == target )
745 {
746 /* We reached the target level. */
747
748 /* No need to flush if the old entry wasn't valid */
749 if ( !is_epte_present(ept_entry) )
750 needs_sync = 0;
751
752 /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 2MiB),
753 * the intermediate tables will be freed below after the ept flush
754 *
755 * Read-then-write is OK because we hold the p2m lock. */
756 old_entry = *ept_entry;
757 }
758 else
759 {
760 /* We need to split the original page. */
761 ept_entry_t split_ept_entry;
762
763 ASSERT(is_epte_superpage(ept_entry));
764
765 split_ept_entry = atomic_read_ept_entry(ept_entry);
766
767 if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) )
768 {
769 ept_free_entry(p2m, &split_ept_entry, i);
770 rc = -ENOMEM;
771 goto out;
772 }
773
774 /* now install the newly split ept sub-tree */
775 /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
776 rc = atomic_write_ept_entry(p2m, ept_entry, split_ept_entry, i);
777 ASSERT(rc == 0);
778
779 /* then move to the level we want to make real changes */
780 for ( ; i > target; i-- )
781 if ( ept_next_level(p2m, 0, &table, &gfn_remainder, i) !=
782 GUEST_TABLE_NORMAL_PAGE )
783 break;
784 /* We just installed the pages we need. */
785 ASSERT(i == target);
786
787 ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
788 }
789
790 if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
791 {
792 int emt = epte_get_entry_emt(p2m->domain, gfn, mfn,
793 i * EPT_TABLE_ORDER, &ipat, direct_mmio);
794
795 if ( emt >= 0 )
796 new_entry.emt = emt;
797 else /* ept_handle_misconfig() will need to take care of this. */
798 new_entry.emt = MTRR_NUM_TYPES;
799
800 new_entry.ipat = ipat;
801 new_entry.sp = !!i;
802 new_entry.sa_p2mt = p2mt;
803 new_entry.access = p2ma;
804 new_entry.snp = is_iommu_enabled(d) && iommu_snoop;
805
806 /* the caller should take care of the previous page */
807 new_entry.mfn = mfn_x(mfn);
808
809 /* Safe to read-then-write because we hold the p2m lock */
810 if ( ept_entry->mfn == new_entry.mfn &&
811 p2m_get_iommu_flags(ept_entry->sa_p2mt, _mfn(ept_entry->mfn)) ==
812 iommu_flags )
813 need_modify_vtd_table = 0;
814
815 ept_p2m_type_to_flags(p2m, &new_entry);
816 }
817
818 if ( sve != -1 )
819 new_entry.suppress_ve = !!sve;
820 else
821 new_entry.suppress_ve = is_epte_valid(&old_entry) ?
822 old_entry.suppress_ve : 1;
823
824 rc = atomic_write_ept_entry(p2m, ept_entry, new_entry, target);
825 if ( unlikely(rc) )
826 old_entry.epte = 0;
827 else
828 {
829 entry_written = 1;
830
831 if ( p2mt != p2m_invalid &&
832 (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) )
833 /* Track the highest gfn for which we have ever had a valid mapping */
834 p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
835 }
836
837 out:
838 if ( needs_sync )
839 ept_sync_domain(p2m);
840
841 /* For host p2m, may need to change VT-d page table.*/
842 if ( rc == 0 && p2m_is_hostp2m(p2m) &&
843 need_modify_vtd_table )
844 {
845 if ( iommu_use_hap_pt(d) )
846 rc = iommu_iotlb_flush(d, _dfn(gfn), (1u << order),
847 (iommu_flags ? IOMMU_FLUSHF_added : 0) |
848 (vtd_pte_present ? IOMMU_FLUSHF_modified
849 : 0));
850 else if ( need_iommu_pt_sync(d) )
851 rc = iommu_flags ?
852 iommu_legacy_map(d, _dfn(gfn), mfn, order, iommu_flags) :
853 iommu_legacy_unmap(d, _dfn(gfn), order);
854 }
855
856 unmap_domain_page(table);
857
858 /* Release the old intermediate tables, if any. This has to be the
859 last thing we do, after the ept_sync_domain() and removal
860 from the iommu tables, so as to avoid a potential
861 use-after-free. */
862 if ( is_epte_present(&old_entry) )
863 ept_free_entry(p2m, &old_entry, target);
864
865 if ( entry_written && p2m_is_hostp2m(p2m) )
866 {
867 ret = p2m_altp2m_propagate_change(d, _gfn(gfn), mfn, order, p2mt, p2ma);
868 if ( !rc )
869 rc = ret;
870 }
871
872 return rc;
873 }
874
875 /* Read ept p2m entries */
ept_get_entry(struct p2m_domain * p2m,gfn_t gfn_,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order,bool_t * sve)876 static mfn_t ept_get_entry(struct p2m_domain *p2m,
877 gfn_t gfn_, p2m_type_t *t, p2m_access_t* a,
878 p2m_query_t q, unsigned int *page_order,
879 bool_t *sve)
880 {
881 ept_entry_t *table =
882 map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
883 unsigned long gfn = gfn_x(gfn_);
884 unsigned long gfn_remainder = gfn;
885 ept_entry_t *ept_entry;
886 u32 index;
887 int i;
888 bool_t recalc = 0;
889 mfn_t mfn = INVALID_MFN;
890 struct ept_data *ept = &p2m->ept;
891
892 *t = p2m_mmio_dm;
893 *a = p2m_access_n;
894 if ( sve )
895 *sve = 1;
896
897 /* This pfn is higher than the highest the p2m map currently holds */
898 if ( gfn > p2m->max_mapped_pfn )
899 {
900 for ( i = ept->wl; i > 0; --i )
901 if ( (gfn & ~((1UL << (i * EPT_TABLE_ORDER)) - 1)) >
902 p2m->max_mapped_pfn )
903 break;
904 goto out;
905 }
906
907 /* Should check if gfn obeys GAW here. */
908
909 for ( i = ept->wl; i > 0; i-- )
910 {
911 int ret;
912
913 retry:
914 if ( table[gfn_remainder >> (i * EPT_TABLE_ORDER)].recalc )
915 recalc = 1;
916 ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
917 if ( ret == GUEST_TABLE_MAP_FAILED )
918 goto out;
919 if ( ret == GUEST_TABLE_POD_PAGE )
920 {
921 if ( !(q & P2M_ALLOC) )
922 {
923 *t = p2m_populate_on_demand;
924 goto out;
925 }
926
927 /* Populate this superpage */
928 ASSERT(i <= 2);
929
930 index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
931 ept_entry = table + index;
932
933 if ( p2m_pod_demand_populate(p2m, gfn_, i * EPT_TABLE_ORDER) )
934 goto retry;
935 goto out;
936 }
937 if ( ret == GUEST_TABLE_SUPER_PAGE )
938 break;
939 }
940
941 index = gfn_remainder >> (i * EPT_TABLE_ORDER);
942 ept_entry = table + index;
943
944 if ( ept_entry->sa_p2mt == p2m_populate_on_demand )
945 {
946 if ( !(q & P2M_ALLOC) )
947 {
948 *t = p2m_populate_on_demand;
949 goto out;
950 }
951
952 ASSERT(i == 0);
953
954 if ( !p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_4K) )
955 goto out;
956 }
957
958 if ( is_epte_valid(ept_entry) )
959 {
960 *t = p2m_recalc_type(recalc || ept_entry->recalc,
961 ept_entry->sa_p2mt, p2m, gfn);
962 *a = ept_entry->access;
963 if ( sve )
964 *sve = ept_entry->suppress_ve;
965
966 mfn = _mfn(ept_entry->mfn);
967 if ( i )
968 {
969 /*
970 * We may meet super pages, and to split into 4k pages
971 * to emulate p2m table
972 */
973 unsigned long split_mfn = mfn_x(mfn) +
974 (gfn_remainder &
975 ((1 << (i * EPT_TABLE_ORDER)) - 1));
976 mfn = _mfn(split_mfn);
977 }
978 }
979
980 out:
981 if ( page_order )
982 *page_order = i * EPT_TABLE_ORDER;
983
984 unmap_domain_page(table);
985 return mfn;
986 }
987
ept_walk_table(struct domain * d,unsigned long gfn)988 void ept_walk_table(struct domain *d, unsigned long gfn)
989 {
990 struct p2m_domain *p2m = p2m_get_hostp2m(d);
991 struct ept_data *ept = &p2m->ept;
992 ept_entry_t *table =
993 map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
994 unsigned long gfn_remainder = gfn;
995
996 int i;
997
998 gprintk(XENLOG_ERR, "Walking EPT tables for GFN %lx:\n", gfn);
999
1000 /* This pfn is higher than the highest the p2m map currently holds */
1001 if ( gfn > p2m->max_mapped_pfn )
1002 {
1003 gprintk(XENLOG_ERR, " gfn exceeds max_mapped_pfn %lx\n",
1004 p2m->max_mapped_pfn);
1005 goto out;
1006 }
1007
1008 for ( i = ept->wl; i >= 0; i-- )
1009 {
1010 ept_entry_t *ept_entry, *next;
1011 u32 index;
1012
1013 /* Stolen from ept_next_level */
1014 index = gfn_remainder >> (i*EPT_TABLE_ORDER);
1015 ept_entry = table + index;
1016
1017 gprintk(XENLOG_ERR, " epte %"PRIx64"\n", ept_entry->epte);
1018
1019 if ( (i == 0) || !is_epte_present(ept_entry) ||
1020 is_epte_superpage(ept_entry) )
1021 goto out;
1022 else
1023 {
1024 gfn_remainder &= (1UL << (i*EPT_TABLE_ORDER)) - 1;
1025
1026 next = map_domain_page(_mfn(ept_entry->mfn));
1027
1028 unmap_domain_page(table);
1029
1030 table = next;
1031 }
1032 }
1033
1034 out:
1035 unmap_domain_page(table);
1036 return;
1037 }
1038
ept_change_entry_type_global(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt)1039 static void ept_change_entry_type_global(struct p2m_domain *p2m,
1040 p2m_type_t ot, p2m_type_t nt)
1041 {
1042 unsigned long mfn = p2m->ept.mfn;
1043
1044 if ( !mfn )
1045 return;
1046
1047 if ( ept_invalidate_emt_subtree(p2m, _mfn(mfn), 1, p2m->ept.wl) )
1048 ept_sync_domain(p2m);
1049 }
1050
ept_change_entry_type_range(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt,unsigned long first_gfn,unsigned long last_gfn)1051 static int ept_change_entry_type_range(struct p2m_domain *p2m,
1052 p2m_type_t ot, p2m_type_t nt,
1053 unsigned long first_gfn,
1054 unsigned long last_gfn)
1055 {
1056 unsigned int i, wl = p2m->ept.wl;
1057 unsigned long mask = (1 << EPT_TABLE_ORDER) - 1;
1058 int rc = 0, sync = 0;
1059
1060 if ( !p2m->ept.mfn )
1061 return -EINVAL;
1062
1063 for ( i = 0; i <= wl; )
1064 {
1065 if ( first_gfn & mask )
1066 {
1067 unsigned long end_gfn = min(first_gfn | mask, last_gfn);
1068
1069 rc = ept_invalidate_emt_range(p2m, i, first_gfn, end_gfn);
1070 sync |= rc;
1071 if ( rc < 0 || end_gfn >= last_gfn )
1072 break;
1073 first_gfn = end_gfn + 1;
1074 }
1075 else if ( (last_gfn & mask) != mask )
1076 {
1077 unsigned long start_gfn = max(first_gfn, last_gfn & ~mask);
1078
1079 rc = ept_invalidate_emt_range(p2m, i, start_gfn, last_gfn);
1080 sync |= rc;
1081 if ( rc < 0 || start_gfn <= first_gfn )
1082 break;
1083 last_gfn = start_gfn - 1;
1084 }
1085 else
1086 {
1087 ++i;
1088 mask |= mask << EPT_TABLE_ORDER;
1089 }
1090 }
1091
1092 if ( sync )
1093 ept_sync_domain(p2m);
1094
1095 return rc < 0 ? rc : 0;
1096 }
1097
ept_memory_type_changed(struct p2m_domain * p2m)1098 static void ept_memory_type_changed(struct p2m_domain *p2m)
1099 {
1100 unsigned long mfn = p2m->ept.mfn;
1101
1102 if ( !mfn )
1103 return;
1104
1105 if ( ept_invalidate_emt_subtree(p2m, _mfn(mfn), 0, p2m->ept.wl) )
1106 ept_sync_domain(p2m);
1107 }
1108
__ept_sync_domain(void * info)1109 static void __ept_sync_domain(void *info)
1110 {
1111 /*
1112 * The invalidation will be done before VMENTER (see
1113 * vmx_vmenter_helper()).
1114 */
1115 }
1116
ept_sync_domain_prepare(struct p2m_domain * p2m)1117 static void ept_sync_domain_prepare(struct p2m_domain *p2m)
1118 {
1119 struct domain *d = p2m->domain;
1120 struct ept_data *ept = &p2m->ept;
1121
1122 if ( nestedhvm_enabled(d) )
1123 {
1124 if ( p2m_is_nestedp2m(p2m) )
1125 ept = &p2m_get_hostp2m(d)->ept;
1126 else
1127 p2m_flush_nestedp2m(d);
1128 }
1129
1130 /*
1131 * Need to invalidate on all PCPUs because either:
1132 *
1133 * a) A VCPU has run and some translations may be cached.
1134 * b) A VCPU has not run and and the initial invalidation in case
1135 * of an EP4TA reuse is still needed.
1136 */
1137 cpumask_setall(ept->invalidate);
1138 }
1139
ept_sync_domain_mask(struct p2m_domain * p2m,const cpumask_t * mask)1140 static void ept_sync_domain_mask(struct p2m_domain *p2m, const cpumask_t *mask)
1141 {
1142 on_selected_cpus(mask, __ept_sync_domain, p2m, 1);
1143 }
1144
ept_sync_domain(struct p2m_domain * p2m)1145 void ept_sync_domain(struct p2m_domain *p2m)
1146 {
1147 struct domain *d = p2m->domain;
1148
1149 /* Only if using EPT and this domain has some VCPUs to dirty. */
1150 if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
1151 return;
1152
1153 ept_sync_domain_prepare(p2m);
1154
1155 if ( p2m->defer_flush )
1156 {
1157 p2m->need_flush = 1;
1158 return;
1159 }
1160
1161 ept_sync_domain_mask(p2m, d->dirty_cpumask);
1162 }
1163
ept_tlb_flush(struct p2m_domain * p2m)1164 static void ept_tlb_flush(struct p2m_domain *p2m)
1165 {
1166 ept_sync_domain_mask(p2m, p2m->domain->dirty_cpumask);
1167 }
1168
ept_set_ad_sync(struct domain * d,bool value)1169 static void ept_set_ad_sync(struct domain *d, bool value)
1170 {
1171 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
1172
1173 ASSERT(p2m_locked_by_me(hostp2m));
1174
1175 hostp2m->ept.ad = value;
1176
1177 if ( unlikely(altp2m_active(d)) )
1178 {
1179 unsigned int i;
1180
1181 for ( i = 0; i < MAX_ALTP2M; i++ )
1182 {
1183 struct p2m_domain *p2m;
1184
1185 if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
1186 continue;
1187
1188 p2m = d->arch.altp2m_p2m[i];
1189
1190 p2m_lock(p2m);
1191 p2m->ept.ad = value;
1192 p2m_unlock(p2m);
1193 }
1194 }
1195 }
1196
ept_enable_pml(struct p2m_domain * p2m)1197 static void ept_enable_pml(struct p2m_domain *p2m)
1198 {
1199 /* Domain must have been paused */
1200 ASSERT(atomic_read(&p2m->domain->pause_count));
1201
1202 /*
1203 * No need to return whether vmx_domain_enable_pml has succeeded, as
1204 * ept_p2m_type_to_flags will do the check, and write protection will be
1205 * used if PML is not enabled.
1206 */
1207 if ( vmx_domain_enable_pml(p2m->domain) )
1208 return;
1209
1210 /* Enable EPT A/D bit for PML */
1211 ept_set_ad_sync(p2m->domain, true);
1212 vmx_domain_update_eptp(p2m->domain);
1213 }
1214
ept_disable_pml(struct p2m_domain * p2m)1215 static void ept_disable_pml(struct p2m_domain *p2m)
1216 {
1217 /* Domain must have been paused */
1218 ASSERT(atomic_read(&p2m->domain->pause_count));
1219
1220 vmx_domain_disable_pml(p2m->domain);
1221
1222 /* Disable EPT A/D bit */
1223 ept_set_ad_sync(p2m->domain, false);
1224 vmx_domain_update_eptp(p2m->domain);
1225 }
1226
ept_enable_hardware_log_dirty(struct p2m_domain * p2m)1227 static void ept_enable_hardware_log_dirty(struct p2m_domain *p2m)
1228 {
1229 struct p2m_domain *hostp2m = p2m_get_hostp2m(p2m->domain);
1230
1231 p2m_lock(hostp2m);
1232 ept_enable_pml(hostp2m);
1233 p2m_unlock(hostp2m);
1234 }
1235
ept_disable_hardware_log_dirty(struct p2m_domain * p2m)1236 static void ept_disable_hardware_log_dirty(struct p2m_domain *p2m)
1237 {
1238 struct p2m_domain *hostp2m = p2m_get_hostp2m(p2m->domain);
1239
1240 p2m_lock(hostp2m);
1241 ept_disable_pml(hostp2m);
1242 p2m_unlock(hostp2m);
1243 }
1244
ept_flush_pml_buffers(struct p2m_domain * p2m)1245 static void ept_flush_pml_buffers(struct p2m_domain *p2m)
1246 {
1247 /* Domain must have been paused */
1248 ASSERT(atomic_read(&p2m->domain->pause_count));
1249
1250 vmx_domain_flush_pml_buffers(p2m->domain);
1251 }
1252
ept_p2m_init(struct p2m_domain * p2m)1253 int ept_p2m_init(struct p2m_domain *p2m)
1254 {
1255 struct ept_data *ept = &p2m->ept;
1256
1257 p2m->set_entry = ept_set_entry;
1258 p2m->get_entry = ept_get_entry;
1259 p2m->recalc = resolve_misconfig;
1260 p2m->change_entry_type_global = ept_change_entry_type_global;
1261 p2m->change_entry_type_range = ept_change_entry_type_range;
1262 p2m->memory_type_changed = ept_memory_type_changed;
1263 p2m->audit_p2m = NULL;
1264 p2m->tlb_flush = ept_tlb_flush;
1265
1266 /* Set the memory type used when accessing EPT paging structures. */
1267 ept->mt = EPT_DEFAULT_MT;
1268
1269 /* set EPT page-walk length, now it's actual walk length - 1, i.e. 3 */
1270 ept->wl = 3;
1271
1272 if ( cpu_has_vmx_pml )
1273 {
1274 p2m->enable_hardware_log_dirty = ept_enable_hardware_log_dirty;
1275 p2m->disable_hardware_log_dirty = ept_disable_hardware_log_dirty;
1276 p2m->flush_hardware_cached_dirty = ept_flush_pml_buffers;
1277 }
1278
1279 if ( !zalloc_cpumask_var(&ept->invalidate) )
1280 return -ENOMEM;
1281
1282 /*
1283 * Assume an initial invalidation is required, in case an EP4TA is
1284 * reused.
1285 */
1286 cpumask_setall(ept->invalidate);
1287
1288 return 0;
1289 }
1290
ept_p2m_uninit(struct p2m_domain * p2m)1291 void ept_p2m_uninit(struct p2m_domain *p2m)
1292 {
1293 struct ept_data *ept = &p2m->ept;
1294 free_cpumask_var(ept->invalidate);
1295 }
1296
memory_type_to_str(unsigned int x)1297 static const char *memory_type_to_str(unsigned int x)
1298 {
1299 static const char memory_types[8][3] = {
1300 [MTRR_TYPE_UNCACHABLE] = "UC",
1301 [MTRR_TYPE_WRCOMB] = "WC",
1302 [MTRR_TYPE_WRTHROUGH] = "WT",
1303 [MTRR_TYPE_WRPROT] = "WP",
1304 [MTRR_TYPE_WRBACK] = "WB",
1305 [MTRR_NUM_TYPES] = "??"
1306 };
1307
1308 ASSERT(x < ARRAY_SIZE(memory_types));
1309 return memory_types[x][0] ? memory_types[x] : "?";
1310 }
1311
ept_dump_p2m_table(unsigned char key)1312 static void ept_dump_p2m_table(unsigned char key)
1313 {
1314 struct domain *d;
1315 ept_entry_t *table, *ept_entry;
1316 int order;
1317 int i;
1318 unsigned long gfn, gfn_remainder;
1319 unsigned long record_counter = 0;
1320 struct p2m_domain *p2m;
1321 struct ept_data *ept;
1322
1323 rcu_read_lock(&domlist_read_lock);
1324
1325 for_each_domain(d)
1326 {
1327 if ( !hap_enabled(d) )
1328 continue;
1329
1330 p2m = p2m_get_hostp2m(d);
1331 ept = &p2m->ept;
1332 printk("\ndomain%d EPT p2m table:\n", d->domain_id);
1333
1334 for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += 1UL << order )
1335 {
1336 char c = 0;
1337 int ret = GUEST_TABLE_MAP_FAILED;
1338
1339 gfn_remainder = gfn;
1340 table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
1341
1342 for ( i = ept->wl; i > 0; i-- )
1343 {
1344 ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
1345 if ( ept_entry->emt == MTRR_NUM_TYPES )
1346 c = '?';
1347 ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
1348 if ( ret != GUEST_TABLE_NORMAL_PAGE )
1349 break;
1350 }
1351
1352 order = i * EPT_TABLE_ORDER;
1353 ept_entry = table + (gfn_remainder >> order);
1354 if ( ret != GUEST_TABLE_MAP_FAILED && is_epte_valid(ept_entry) )
1355 {
1356 if ( ept_entry->sa_p2mt == p2m_populate_on_demand )
1357 printk("gfn: %13lx order: %2d PoD\n", gfn, order);
1358 else
1359 printk("gfn: %13lx order: %2d mfn: %13lx %c%c%c %c%c%c\n",
1360 gfn, order, ept_entry->mfn + 0UL,
1361 ept_entry->r ? 'r' : ' ',
1362 ept_entry->w ? 'w' : ' ',
1363 ept_entry->x ? 'x' : ' ',
1364 memory_type_to_str(ept_entry->emt)[0],
1365 memory_type_to_str(ept_entry->emt)[1]
1366 ?: ept_entry->emt + '0',
1367 c ?: ept_entry->ipat ? '!' : ' ');
1368
1369 if ( !(record_counter++ % 100) )
1370 process_pending_softirqs();
1371 }
1372 unmap_domain_page(table);
1373 }
1374 }
1375
1376 rcu_read_unlock(&domlist_read_lock);
1377 }
1378
setup_ept_dump(void)1379 void setup_ept_dump(void)
1380 {
1381 register_keyhandler('D', ept_dump_p2m_table, "dump VT-x EPT tables", 0);
1382 }
1383
p2m_init_altp2m_ept(struct domain * d,unsigned int i)1384 void p2m_init_altp2m_ept(struct domain *d, unsigned int i)
1385 {
1386 struct p2m_domain *p2m = array_access_nospec(d->arch.altp2m_p2m, i);
1387 struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
1388 struct ept_data *ept;
1389
1390 p2m->ept.ad = hostp2m->ept.ad;
1391 ept = &p2m->ept;
1392 ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
1393 d->arch.altp2m_eptp[array_index_nospec(i, MAX_EPTP)] = ept->eptp;
1394 d->arch.altp2m_visible_eptp[array_index_nospec(i, MAX_EPTP)] = ept->eptp;
1395 }
1396
p2m_find_altp2m_by_eptp(struct domain * d,uint64_t eptp)1397 unsigned int p2m_find_altp2m_by_eptp(struct domain *d, uint64_t eptp)
1398 {
1399 struct p2m_domain *p2m;
1400 struct ept_data *ept;
1401 unsigned int i;
1402
1403 altp2m_list_lock(d);
1404
1405 for ( i = 0; i < MAX_ALTP2M; i++ )
1406 {
1407 if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
1408 continue;
1409
1410 p2m = d->arch.altp2m_p2m[i];
1411 ept = &p2m->ept;
1412
1413 if ( eptp == ept->eptp )
1414 goto out;
1415 }
1416
1417 i = INVALID_ALTP2M;
1418
1419 out:
1420 altp2m_list_unlock(d);
1421 return i;
1422 }
1423
1424 /*
1425 * Local variables:
1426 * mode: C
1427 * c-file-style: "BSD"
1428 * c-basic-offset: 4
1429 * tab-width: 4
1430 * indent-tabs-mode: nil
1431 * End:
1432 */
1433