1 /******************************************************************************
2 * arch/x86/mm/p2m-pt.c
3 *
4 * Implementation of p2m datastructures as pagetables, for use by
5 * NPT and shadow-pagetable code
6 *
7 * Parts of this code are Copyright (c) 2009-2011 by Citrix Systems, Inc.
8 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
9 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
10 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
11 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; If not, see <http://www.gnu.org/licenses/>.
25 */
26
27 #include <xen/vm_event.h>
28 #include <xen/event.h>
29 #include <xen/trace.h>
30 #include <public/vm_event.h>
31 #include <asm/altp2m.h>
32 #include <asm/domain.h>
33 #include <asm/page.h>
34 #include <asm/paging.h>
35 #include <asm/p2m.h>
36 #include <asm/mem_sharing.h>
37 #include <asm/hvm/nestedhvm.h>
38
39 #include "mm-locks.h"
40
41 /*
42 * We may store INVALID_MFN in PTEs. We need to clip this to avoid trampling
43 * over higher-order bits (NX, p2m type). We seem to not need to unclip on the
44 * read path, as callers are concerned only with p2m type in such cases.
45 */
46 #define p2m_l1e_from_pfn(pfn, flags) \
47 l1e_from_pfn((pfn) & (PADDR_MASK >> PAGE_SHIFT), (flags))
48 #define p2m_l2e_from_pfn(pfn, flags) \
49 l2e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \
50 >> PAGE_SHIFT), (flags) | _PAGE_PSE)
51 #define p2m_l3e_from_pfn(pfn, flags) \
52 l3e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \
53 >> PAGE_SHIFT), (flags) | _PAGE_PSE)
54
55 /* PTE flags for the various types of p2m entry */
56 #define P2M_BASE_FLAGS \
57 (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
58
59 #define RECALC_FLAGS (_PAGE_USER|_PAGE_ACCESSED)
60 #define set_recalc(level, ent) level##e_remove_flags(ent, RECALC_FLAGS)
61 #define clear_recalc(level, ent) level##e_add_flags(ent, RECALC_FLAGS)
62 #define _needs_recalc(flags) (!((flags) & _PAGE_USER))
63 #define needs_recalc(level, ent) _needs_recalc(level##e_get_flags(ent))
64 #define valid_recalc(level, ent) (!(level##e_get_flags(ent) & _PAGE_ACCESSED))
65
p2m_type_to_flags(const struct p2m_domain * p2m,p2m_type_t t,mfn_t mfn,unsigned int level)66 static unsigned long p2m_type_to_flags(const struct p2m_domain *p2m,
67 p2m_type_t t,
68 mfn_t mfn,
69 unsigned int level)
70 {
71 unsigned long flags = (unsigned long)(t & 0x7f) << 12;
72
73 switch(t)
74 {
75 case p2m_invalid:
76 case p2m_mmio_dm:
77 case p2m_populate_on_demand:
78 case p2m_ram_paging_out:
79 case p2m_ram_paged:
80 case p2m_ram_paging_in:
81 default:
82 return flags | _PAGE_NX_BIT;
83 case p2m_grant_map_ro:
84 return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT;
85 case p2m_ioreq_server:
86 flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
87 if ( p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE )
88 return flags & ~_PAGE_RW;
89 return flags;
90 case p2m_ram_ro:
91 case p2m_ram_logdirty:
92 case p2m_ram_shared:
93 return flags | P2M_BASE_FLAGS;
94 case p2m_ram_rw:
95 return flags | P2M_BASE_FLAGS | _PAGE_RW;
96 case p2m_grant_map_rw:
97 case p2m_map_foreign:
98 return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
99 case p2m_mmio_direct:
100 if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
101 flags |= _PAGE_RW;
102 else
103 {
104 flags |= _PAGE_PWT;
105 ASSERT(!level);
106 }
107 return flags | P2M_BASE_FLAGS | _PAGE_PCD;
108 }
109 }
110
111
112 // Find the next level's P2M entry, checking for out-of-range gfn's...
113 // Returns NULL on error.
114 //
115 static l1_pgentry_t *
p2m_find_entry(void * table,unsigned long * gfn_remainder,unsigned long gfn,uint32_t shift,uint32_t max)116 p2m_find_entry(void *table, unsigned long *gfn_remainder,
117 unsigned long gfn, uint32_t shift, uint32_t max)
118 {
119 u32 index;
120
121 index = *gfn_remainder >> shift;
122 if ( index >= max )
123 {
124 P2M_DEBUG("gfn=%#lx out of range "
125 "(gfn_remainder=%#lx shift=%d index=%#x max=%#x)\n",
126 gfn, *gfn_remainder, shift, index, max);
127 return NULL;
128 }
129 *gfn_remainder &= (1 << shift) - 1;
130 return (l1_pgentry_t *)table + index;
131 }
132
133 /* Free intermediate tables from a p2m sub-tree */
134 static void
p2m_free_entry(struct p2m_domain * p2m,l1_pgentry_t * p2m_entry,int page_order)135 p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order)
136 {
137 /* End if the entry is a leaf entry. */
138 if ( page_order == PAGE_ORDER_4K
139 || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT)
140 || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
141 return;
142
143 if ( page_order > PAGE_ORDER_2M )
144 {
145 l1_pgentry_t *l3_table = map_domain_page(l1e_get_mfn(*p2m_entry));
146
147 for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
148 p2m_free_entry(p2m, l3_table + i, page_order - 9);
149 unmap_domain_page(l3_table);
150 }
151
152 p2m_free_ptp(p2m, l1e_get_page(*p2m_entry));
153 }
154
155 // Walk one level of the P2M table, allocating a new table if required.
156 // Returns 0 on error.
157 //
158
159 /* Returns: 0 for success, -errno for failure */
160 static int
p2m_next_level(struct p2m_domain * p2m,void ** table,unsigned long * gfn_remainder,unsigned long gfn,u32 shift,u32 max,unsigned int level,bool_t unmap)161 p2m_next_level(struct p2m_domain *p2m, void **table,
162 unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
163 u32 max, unsigned int level, bool_t unmap)
164 {
165 l1_pgentry_t *p2m_entry, new_entry;
166 void *next;
167 unsigned int flags;
168 int rc;
169 mfn_t mfn;
170
171 if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
172 shift, max)) )
173 return -ENOENT;
174
175 flags = l1e_get_flags(*p2m_entry);
176
177 /* PoD/paging: Not present doesn't imply empty. */
178 if ( !flags )
179 {
180 mfn = p2m_alloc_ptp(p2m, level);
181
182 if ( mfn_eq(mfn, INVALID_MFN) )
183 return -ENOMEM;
184
185 new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
186
187 rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1);
188 if ( rc )
189 goto error;
190 }
191 else if ( flags & _PAGE_PSE )
192 {
193 /* Split superpages pages into smaller ones. */
194 unsigned long pfn = l1e_get_pfn(*p2m_entry);
195 l1_pgentry_t *l1_entry;
196 unsigned int i;
197
198 switch ( level )
199 {
200 case 2:
201 break;
202
203 case 1:
204 /*
205 * New splintered mappings inherit the flags of the old superpage,
206 * with a little reorganisation for the _PAGE_PSE_PAT bit.
207 */
208 if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
209 pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
210 else
211 flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
212 break;
213
214 default:
215 ASSERT_UNREACHABLE();
216 return -EINVAL;
217 }
218
219 mfn = p2m_alloc_ptp(p2m, level);
220 if ( mfn_eq(mfn, INVALID_MFN) )
221 return -ENOMEM;
222
223 l1_entry = map_domain_page(mfn);
224
225 for ( i = 0; i < (1u << PAGETABLE_ORDER); i++ )
226 {
227 new_entry = l1e_from_pfn(pfn | (i << ((level - 1) * PAGETABLE_ORDER)),
228 flags);
229 rc = p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, level);
230 if ( rc )
231 {
232 unmap_domain_page(l1_entry);
233 goto error;
234 }
235 }
236
237 unmap_domain_page(l1_entry);
238
239 new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
240 rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry,
241 level + 1);
242 if ( rc )
243 goto error;
244 }
245 else
246 ASSERT(flags & _PAGE_PRESENT);
247
248 next = map_domain_page(l1e_get_mfn(*p2m_entry));
249 if ( unmap )
250 unmap_domain_page(*table);
251 *table = next;
252
253 return 0;
254
255 error:
256 ASSERT(rc && mfn_valid(mfn));
257 ASSERT_UNREACHABLE();
258 p2m_free_ptp(p2m, mfn_to_page(mfn));
259 return rc;
260 }
261
262 /*
263 * Mark (via clearing the U flag) as needing P2M type re-calculation all valid
264 * present entries at the targeted level for the passed in GFN range, which is
265 * guaranteed to not cross a page (table) boundary at that level.
266 */
p2m_pt_set_recalc_range(struct p2m_domain * p2m,unsigned int level,unsigned long first_gfn,unsigned long last_gfn)267 static int p2m_pt_set_recalc_range(struct p2m_domain *p2m,
268 unsigned int level,
269 unsigned long first_gfn,
270 unsigned long last_gfn)
271 {
272 void *table;
273 unsigned long gfn_remainder = first_gfn, remainder;
274 unsigned int i;
275 l1_pgentry_t *pent, *plast;
276 int err = 0;
277
278 table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
279 for ( i = 4; i-- > level; )
280 {
281 remainder = gfn_remainder;
282 pent = p2m_find_entry(table, &remainder, first_gfn,
283 i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
284 if ( !pent )
285 {
286 err = -EINVAL;
287 goto out;
288 }
289
290 if ( !(l1e_get_flags(*pent) & _PAGE_PRESENT) )
291 goto out;
292
293 err = p2m_next_level(p2m, &table, &gfn_remainder, first_gfn,
294 i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER,
295 i, 1);
296 if ( err )
297 goto out;
298 }
299
300 remainder = gfn_remainder + (last_gfn - first_gfn);
301 pent = p2m_find_entry(table, &gfn_remainder, first_gfn,
302 i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
303 plast = p2m_find_entry(table, &remainder, last_gfn,
304 i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
305 if ( pent && plast )
306 for ( ; pent <= plast; ++pent )
307 {
308 l1_pgentry_t e = *pent;
309
310 if ( (l1e_get_flags(e) & _PAGE_PRESENT) && !needs_recalc(l1, e) )
311 {
312 set_recalc(l1, e);
313 err = p2m->write_p2m_entry(p2m, first_gfn, pent, e, level);
314 if ( err )
315 {
316 ASSERT_UNREACHABLE();
317 goto out;
318 }
319 }
320 first_gfn += 1UL << (i * PAGETABLE_ORDER);
321 }
322 else
323 err = -EIO;
324
325 out:
326 unmap_domain_page(table);
327
328 return err;
329 }
330
331 /*
332 * Handle possibly necessary P2M type re-calculation (U flag clear for a
333 * present entry) for the entries in the page table hierarchy for the given
334 * GFN. Propagate the re-calculation flag down to the next page table level
335 * for entries not involved in the translation of the given GFN.
336 */
do_recalc(struct p2m_domain * p2m,unsigned long gfn)337 static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
338 {
339 void *table;
340 unsigned long gfn_remainder = gfn;
341 unsigned int level = 4;
342 l1_pgentry_t *pent;
343 int err = 0;
344 bool recalc_done = false;
345
346 table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
347 while ( --level )
348 {
349 unsigned long remainder = gfn_remainder;
350
351 pent = p2m_find_entry(table, &remainder, gfn,
352 level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
353 if ( !pent || !(l1e_get_flags(*pent) & _PAGE_PRESENT) )
354 goto out;
355
356 if ( l1e_get_flags(*pent) & _PAGE_PSE )
357 {
358 unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
359
360 ASSERT(p2m_flags_to_type(l1e_get_flags(*pent)) != p2m_ioreq_server);
361 if ( !needs_recalc(l1, *pent) ||
362 !p2m_is_changeable(p2m_flags_to_type(l1e_get_flags(*pent))) ||
363 p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask) >= 0 )
364 break;
365 }
366
367 err = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
368 level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER,
369 level, 0);
370 if ( err )
371 goto out;
372
373 if ( needs_recalc(l1, *pent) )
374 {
375 l1_pgentry_t e = *pent, *ptab = table;
376 unsigned int i;
377
378 if ( !valid_recalc(l1, e) )
379 P2M_DEBUG("bogus recalc state at d%d:%lx:%u\n",
380 p2m->domain->domain_id, gfn, level);
381 remainder = gfn_remainder;
382 for ( i = 0; i < (1 << PAGETABLE_ORDER); ++i )
383 {
384 l1_pgentry_t ent = ptab[i];
385
386 if ( (l1e_get_flags(ent) & _PAGE_PRESENT) &&
387 !needs_recalc(l1, ent) )
388 {
389 set_recalc(l1, ent);
390 err = p2m->write_p2m_entry(p2m, gfn - remainder, &ptab[i],
391 ent, level);
392 if ( err )
393 {
394 ASSERT_UNREACHABLE();
395 break;
396 }
397 }
398 remainder -= 1UL << ((level - 1) * PAGETABLE_ORDER);
399 }
400 smp_wmb();
401 if ( !err )
402 {
403 clear_recalc(l1, e);
404 err = p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
405 ASSERT(!err);
406
407 recalc_done = true;
408 }
409 }
410 unmap_domain_page((void *)((unsigned long)pent & PAGE_MASK));
411 if ( unlikely(err) )
412 goto out;
413 }
414
415 pent = p2m_find_entry(table, &gfn_remainder, gfn,
416 level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
417 if ( pent && (l1e_get_flags(*pent) & _PAGE_PRESENT) &&
418 needs_recalc(l1, *pent) )
419 {
420 l1_pgentry_t e = *pent;
421 p2m_type_t ot, nt;
422 unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
423
424 if ( !valid_recalc(l1, e) )
425 P2M_DEBUG("bogus recalc leaf at d%d:%lx:%u\n",
426 p2m->domain->domain_id, gfn, level);
427 ot = p2m_flags_to_type(l1e_get_flags(e));
428 nt = p2m_recalc_type_range(true, ot, p2m, gfn & mask, gfn | ~mask);
429 if ( nt != ot )
430 {
431 unsigned long mfn = l1e_get_pfn(e);
432 unsigned long flags = p2m_type_to_flags(p2m, nt,
433 _mfn(mfn), level);
434
435 if ( level )
436 {
437 if ( flags & _PAGE_PAT )
438 {
439 BUILD_BUG_ON(_PAGE_PAT != _PAGE_PSE);
440 mfn |= _PAGE_PSE_PAT >> PAGE_SHIFT;
441 }
442 else
443 mfn &= ~((unsigned long)_PAGE_PSE_PAT >> PAGE_SHIFT);
444 flags |= _PAGE_PSE;
445 }
446
447 e = l1e_from_pfn(mfn, flags);
448 ASSERT(!needs_recalc(l1, e));
449 }
450 else
451 clear_recalc(l1, e);
452 err = p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
453 ASSERT(!err);
454
455 recalc_done = true;
456 }
457
458 out:
459 unmap_domain_page(table);
460
461 return err ?: recalc_done;
462 }
463
p2m_pt_handle_deferred_changes(uint64_t gpa)464 int p2m_pt_handle_deferred_changes(uint64_t gpa)
465 {
466 struct p2m_domain *p2m = p2m_get_hostp2m(current->domain);
467 int rc;
468
469 /*
470 * Should altp2m ever be enabled for NPT / shadow use, this code
471 * should be updated to make use of the active altp2m, like
472 * ept_handle_misconfig().
473 */
474 ASSERT(!altp2m_active(current->domain));
475
476 p2m_lock(p2m);
477 rc = do_recalc(p2m, PFN_DOWN(gpa));
478 p2m_unlock(p2m);
479
480 return rc;
481 }
482
483 /* Checks only applicable to entries with order > PAGE_ORDER_4K */
check_entry(mfn_t mfn,p2m_type_t new,p2m_type_t old,unsigned int order)484 static void check_entry(mfn_t mfn, p2m_type_t new, p2m_type_t old,
485 unsigned int order)
486 {
487 ASSERT(order > PAGE_ORDER_4K);
488 ASSERT(old != p2m_ioreq_server);
489 if ( new == p2m_mmio_direct )
490 ASSERT(!mfn_eq(mfn, INVALID_MFN) &&
491 !rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
492 mfn_x(mfn) + (1ul << order)));
493 else if ( p2m_allows_invalid_mfn(new) || new == p2m_invalid ||
494 new == p2m_mmio_dm )
495 ASSERT(mfn_valid(mfn) || mfn_eq(mfn, INVALID_MFN));
496 else
497 ASSERT(mfn_valid(mfn));
498 }
499
500 /* Returns: 0 for success, -errno for failure */
501 static int
p2m_pt_set_entry(struct p2m_domain * p2m,gfn_t gfn_,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma,int sve)502 p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn,
503 unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma,
504 int sve)
505 {
506 struct domain *d = p2m->domain;
507 /* XXX -- this might be able to be faster iff current->domain == d */
508 void *table;
509 unsigned long gfn = gfn_x(gfn_);
510 unsigned long gfn_remainder = gfn;
511 l1_pgentry_t *p2m_entry, entry_content;
512 /* Intermediate table to free if we're replacing it with a superpage. */
513 l1_pgentry_t intermediate_entry = l1e_empty();
514 l2_pgentry_t l2e_content;
515 l3_pgentry_t l3e_content;
516 int rc;
517 unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt, mfn);
518 /*
519 * old_mfn and iommu_old_flags control possible flush/update needs on the
520 * IOMMU: We need to flush when MFN or flags (i.e. permissions) change.
521 * iommu_old_flags being initialized to zero covers the case of the entry
522 * getting replaced being a non-present (leaf or intermediate) one. For
523 * present leaf entries the real value will get calculated below, while
524 * for present intermediate entries ~0 (guaranteed != iommu_pte_flags)
525 * will be used (to cover all cases of what the leaf entries underneath
526 * the intermediate one might be).
527 */
528 unsigned int flags, iommu_old_flags = 0;
529 unsigned long old_mfn = mfn_x(INVALID_MFN);
530
531 if ( !sve )
532 return -EOPNOTSUPP;
533
534 if ( tb_init_done )
535 {
536 struct {
537 u64 gfn, mfn;
538 int p2mt;
539 int d:16,order:16;
540 } t;
541
542 t.gfn = gfn;
543 t.mfn = mfn_x(mfn);
544 t.p2mt = p2mt;
545 t.d = d->domain_id;
546 t.order = page_order;
547
548 __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t);
549 }
550
551 /* Carry out any eventually pending earlier changes first. */
552 rc = do_recalc(p2m, gfn);
553 if ( rc < 0 )
554 return rc;
555
556 table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
557 rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
558 L4_PAGETABLE_SHIFT - PAGE_SHIFT,
559 L4_PAGETABLE_ENTRIES, 3, 1);
560 if ( rc )
561 goto out;
562
563 /*
564 * Try to allocate 1GB page table if this feature is supported.
565 */
566 if ( page_order == PAGE_ORDER_1G )
567 {
568 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
569 L3_PAGETABLE_SHIFT - PAGE_SHIFT,
570 L3_PAGETABLE_ENTRIES);
571 ASSERT(p2m_entry);
572 flags = l1e_get_flags(*p2m_entry);
573 if ( flags & _PAGE_PRESENT )
574 {
575 if ( flags & _PAGE_PSE )
576 {
577 old_mfn = l1e_get_pfn(*p2m_entry);
578 iommu_old_flags =
579 p2m_get_iommu_flags(p2m_flags_to_type(flags),
580 _mfn(old_mfn));
581 }
582 else
583 {
584 iommu_old_flags = ~0;
585 intermediate_entry = *p2m_entry;
586 }
587 }
588
589 check_entry(mfn, p2mt, p2m_flags_to_type(flags), page_order);
590 l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
591 ? p2m_l3e_from_pfn(mfn_x(mfn),
592 p2m_type_to_flags(p2m, p2mt, mfn, 2))
593 : l3e_empty();
594 entry_content.l1 = l3e_content.l3;
595
596 rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 3);
597 /* NB: paging_write_p2m_entry() handles tlb flushes properly */
598 if ( rc )
599 goto out;
600 }
601 else
602 {
603 rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
604 L3_PAGETABLE_SHIFT - PAGE_SHIFT,
605 L3_PAGETABLE_ENTRIES, 2, 1);
606 if ( rc )
607 goto out;
608 }
609
610 if ( page_order == PAGE_ORDER_4K )
611 {
612 rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
613 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
614 L2_PAGETABLE_ENTRIES, 1, 1);
615 if ( rc )
616 goto out;
617
618 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
619 0, L1_PAGETABLE_ENTRIES);
620 ASSERT(p2m_entry);
621 old_mfn = l1e_get_pfn(*p2m_entry);
622 iommu_old_flags =
623 p2m_get_iommu_flags(p2m_flags_to_type(l1e_get_flags(*p2m_entry)),
624 _mfn(old_mfn));
625
626 if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
627 entry_content = p2m_l1e_from_pfn(mfn_x(mfn),
628 p2m_type_to_flags(p2m, p2mt, mfn, 0));
629 else
630 entry_content = l1e_empty();
631
632 /* level 1 entry */
633 rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 1);
634 /* NB: paging_write_p2m_entry() handles tlb flushes properly */
635 if ( rc )
636 goto out;
637 }
638 else if ( page_order == PAGE_ORDER_2M )
639 {
640 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
641 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
642 L2_PAGETABLE_ENTRIES);
643 ASSERT(p2m_entry);
644 flags = l1e_get_flags(*p2m_entry);
645 if ( flags & _PAGE_PRESENT )
646 {
647 if ( flags & _PAGE_PSE )
648 {
649 old_mfn = l1e_get_pfn(*p2m_entry);
650 iommu_old_flags =
651 p2m_get_iommu_flags(p2m_flags_to_type(flags),
652 _mfn(old_mfn));
653 }
654 else
655 {
656 iommu_old_flags = ~0;
657 intermediate_entry = *p2m_entry;
658 }
659 }
660
661 check_entry(mfn, p2mt, p2m_flags_to_type(flags), page_order);
662 l2e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
663 ? p2m_l2e_from_pfn(mfn_x(mfn),
664 p2m_type_to_flags(p2m, p2mt, mfn, 1))
665 : l2e_empty();
666 entry_content.l1 = l2e_content.l2;
667
668 rc = p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 2);
669 /* NB: paging_write_p2m_entry() handles tlb flushes properly */
670 if ( rc )
671 goto out;
672 }
673
674 /* Track the highest gfn for which we have ever had a valid mapping */
675 if ( p2mt != p2m_invalid
676 && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) )
677 p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
678
679 if ( need_iommu_pt_sync(p2m->domain) &&
680 (iommu_old_flags != iommu_pte_flags || old_mfn != mfn_x(mfn)) )
681 rc = iommu_pte_flags
682 ? iommu_legacy_map(d, _dfn(gfn), mfn, page_order,
683 iommu_pte_flags)
684 : iommu_legacy_unmap(d, _dfn(gfn), page_order);
685
686 /*
687 * Free old intermediate tables if necessary. This has to be the
688 * last thing we do, after removal from the IOMMU tables, so as to
689 * avoid a potential use-after-free.
690 */
691 if ( l1e_get_flags(intermediate_entry) & _PAGE_PRESENT )
692 p2m_free_entry(p2m, &intermediate_entry, page_order);
693
694 out:
695 unmap_domain_page(table);
696 return rc;
697 }
698
699 static mfn_t
p2m_pt_get_entry(struct p2m_domain * p2m,gfn_t gfn_,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order,bool_t * sve)700 p2m_pt_get_entry(struct p2m_domain *p2m, gfn_t gfn_,
701 p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
702 unsigned int *page_order, bool_t *sve)
703 {
704 mfn_t mfn;
705 unsigned long gfn = gfn_x(gfn_);
706 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
707 l2_pgentry_t *l2e;
708 l1_pgentry_t *l1e;
709 unsigned int flags;
710 p2m_type_t l1t;
711 bool_t recalc;
712
713 ASSERT(paging_mode_translate(p2m->domain));
714
715 if ( sve )
716 *sve = 1;
717
718 /* XXX This is for compatibility with the old model, where anything not
719 * XXX marked as RAM was considered to be emulated MMIO space.
720 * XXX Once we start explicitly registering MMIO regions in the p2m
721 * XXX we will return p2m_invalid for unmapped gfns */
722 *t = p2m_mmio_dm;
723 /* Not implemented except with EPT */
724 *a = p2m_access_rwx;
725
726 if ( gfn > p2m->max_mapped_pfn )
727 {
728 /* This pfn is higher than the highest the p2m map currently holds */
729 if ( page_order )
730 {
731 for ( *page_order = 3 * PAGETABLE_ORDER; *page_order;
732 *page_order -= PAGETABLE_ORDER )
733 if ( (gfn & ~((1UL << *page_order) - 1)) >
734 p2m->max_mapped_pfn )
735 break;
736 }
737 return INVALID_MFN;
738 }
739
740 mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
741
742 {
743 l4_pgentry_t *l4e = map_domain_page(mfn);
744 l4e += l4_table_offset(addr);
745 if ( page_order )
746 *page_order = 3 * PAGETABLE_ORDER;
747 if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
748 {
749 unmap_domain_page(l4e);
750 return INVALID_MFN;
751 }
752 mfn = l4e_get_mfn(*l4e);
753 recalc = needs_recalc(l4, *l4e);
754 unmap_domain_page(l4e);
755 }
756 {
757 l3_pgentry_t *l3e = map_domain_page(mfn);
758 l3e += l3_table_offset(addr);
759 if ( page_order )
760 *page_order = 2 * PAGETABLE_ORDER;
761
762 pod_retry_l3:
763 flags = l3e_get_flags(*l3e);
764 if ( !(flags & _PAGE_PRESENT) )
765 {
766 if ( p2m_flags_to_type(flags) == p2m_populate_on_demand )
767 {
768 if ( q & P2M_ALLOC )
769 {
770 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_1G) )
771 goto pod_retry_l3;
772 gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__);
773 }
774 else
775 *t = p2m_populate_on_demand;
776 }
777 unmap_domain_page(l3e);
778 return INVALID_MFN;
779 }
780 if ( flags & _PAGE_PSE )
781 {
782 mfn = _mfn(l3e_get_pfn(*l3e) +
783 l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
784 l1_table_offset(addr));
785 *t = p2m_recalc_type(recalc || _needs_recalc(flags),
786 p2m_flags_to_type(flags), p2m, gfn);
787 unmap_domain_page(l3e);
788
789 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
790 return (p2m_is_valid(*t)) ? mfn : INVALID_MFN;
791 }
792
793 mfn = l3e_get_mfn(*l3e);
794 if ( _needs_recalc(flags) )
795 recalc = 1;
796 unmap_domain_page(l3e);
797 }
798
799 l2e = map_domain_page(mfn);
800 l2e += l2_table_offset(addr);
801 if ( page_order )
802 *page_order = PAGETABLE_ORDER;
803
804 pod_retry_l2:
805 flags = l2e_get_flags(*l2e);
806 if ( !(flags & _PAGE_PRESENT) )
807 {
808 /* PoD: Try to populate a 2-meg chunk */
809 if ( p2m_flags_to_type(flags) == p2m_populate_on_demand )
810 {
811 if ( q & P2M_ALLOC ) {
812 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_2M) )
813 goto pod_retry_l2;
814 } else
815 *t = p2m_populate_on_demand;
816 }
817
818 unmap_domain_page(l2e);
819 return INVALID_MFN;
820 }
821 if ( flags & _PAGE_PSE )
822 {
823 mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
824 *t = p2m_recalc_type(recalc || _needs_recalc(flags),
825 p2m_flags_to_type(flags), p2m, gfn);
826 unmap_domain_page(l2e);
827
828 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
829 return (p2m_is_valid(*t)) ? mfn : INVALID_MFN;
830 }
831
832 mfn = l2e_get_mfn(*l2e);
833 if ( needs_recalc(l2, *l2e) )
834 recalc = 1;
835 unmap_domain_page(l2e);
836
837 l1e = map_domain_page(mfn);
838 l1e += l1_table_offset(addr);
839 if ( page_order )
840 *page_order = 0;
841
842 pod_retry_l1:
843 flags = l1e_get_flags(*l1e);
844 l1t = p2m_flags_to_type(flags);
845 if ( !(flags & _PAGE_PRESENT) && !p2m_is_paging(l1t) )
846 {
847 /* PoD: Try to populate */
848 if ( l1t == p2m_populate_on_demand )
849 {
850 if ( q & P2M_ALLOC ) {
851 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_4K) )
852 goto pod_retry_l1;
853 } else
854 *t = p2m_populate_on_demand;
855 }
856
857 unmap_domain_page(l1e);
858 return INVALID_MFN;
859 }
860 mfn = l1e_get_mfn(*l1e);
861 *t = p2m_recalc_type(recalc || _needs_recalc(flags), l1t, p2m, gfn);
862 unmap_domain_page(l1e);
863
864 ASSERT(mfn_valid(mfn) || !p2m_is_any_ram(*t) || p2m_is_paging(*t));
865 return (p2m_is_valid(*t) || p2m_is_any_ram(*t)) ? mfn : INVALID_MFN;
866 }
867
p2m_pt_change_entry_type_global(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt)868 static void p2m_pt_change_entry_type_global(struct p2m_domain *p2m,
869 p2m_type_t ot, p2m_type_t nt)
870 {
871 l1_pgentry_t *tab;
872 unsigned long gfn = 0;
873 unsigned int i, changed;
874 const struct domain *d = p2m->domain;
875
876 if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) == 0 )
877 return;
878
879 ASSERT(hap_enabled(d));
880
881 tab = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
882 for ( changed = i = 0; i < (1 << PAGETABLE_ORDER); ++i )
883 {
884 l1_pgentry_t e = tab[i];
885
886 if ( (l1e_get_flags(e) & _PAGE_PRESENT) &&
887 !needs_recalc(l1, e) )
888 {
889 int rc;
890
891 set_recalc(l1, e);
892 rc = p2m->write_p2m_entry(p2m, gfn, &tab[i], e, 4);
893 if ( rc )
894 {
895 ASSERT_UNREACHABLE();
896 break;
897 }
898 ++changed;
899 }
900 gfn += 1UL << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
901 }
902 unmap_domain_page(tab);
903
904 if ( changed )
905 guest_flush_tlb_mask(d, d->dirty_cpumask);
906 }
907
p2m_pt_change_entry_type_range(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt,unsigned long first_gfn,unsigned long last_gfn)908 static int p2m_pt_change_entry_type_range(struct p2m_domain *p2m,
909 p2m_type_t ot, p2m_type_t nt,
910 unsigned long first_gfn,
911 unsigned long last_gfn)
912 {
913 unsigned long mask = (1 << PAGETABLE_ORDER) - 1;
914 unsigned int i;
915 int err = 0;
916
917 ASSERT(hap_enabled(p2m->domain));
918
919 for ( i = 1; i <= 4; )
920 {
921 if ( first_gfn & mask )
922 {
923 unsigned long end_gfn = min(first_gfn | mask, last_gfn);
924
925 err = p2m_pt_set_recalc_range(p2m, i, first_gfn, end_gfn);
926 if ( err || end_gfn >= last_gfn )
927 break;
928 first_gfn = end_gfn + 1;
929 }
930 else if ( (last_gfn & mask) != mask )
931 {
932 unsigned long start_gfn = max(first_gfn, last_gfn & ~mask);
933
934 err = p2m_pt_set_recalc_range(p2m, i, start_gfn, last_gfn);
935 if ( err || start_gfn <= first_gfn )
936 break;
937 last_gfn = start_gfn - 1;
938 }
939 else
940 {
941 ++i;
942 mask |= mask << PAGETABLE_ORDER;
943 }
944 }
945
946 return err;
947 }
948
949 #if P2M_AUDIT && defined(CONFIG_HVM)
p2m_pt_audit_p2m(struct p2m_domain * p2m)950 long p2m_pt_audit_p2m(struct p2m_domain *p2m)
951 {
952 unsigned long entry_count = 0, pmbad = 0;
953 unsigned long mfn, gfn, m2pfn;
954
955 ASSERT(p2m_locked_by_me(p2m));
956 ASSERT(pod_locked_by_me(p2m));
957
958 /* Audit part one: walk the domain's p2m table, checking the entries. */
959 if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
960 {
961 l2_pgentry_t *l2e;
962 l1_pgentry_t *l1e;
963 int i1, i2;
964
965 l4_pgentry_t *l4e;
966 l3_pgentry_t *l3e;
967 int i4, i3;
968 l4e = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
969
970 gfn = 0;
971 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
972 {
973 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
974 {
975 gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
976 continue;
977 }
978 l3e = map_l3t_from_l4e(l4e[i4]);
979 for ( i3 = 0;
980 i3 < L3_PAGETABLE_ENTRIES;
981 i3++ )
982 {
983 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
984 {
985 gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
986 continue;
987 }
988
989 /* check for 1GB super page */
990 if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE )
991 {
992 mfn = l3e_get_pfn(l3e[i3]);
993 ASSERT(mfn_valid(_mfn(mfn)));
994 /* we have to cover 512x512 4K pages */
995 for ( i2 = 0;
996 i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES);
997 i2++)
998 {
999 m2pfn = get_gpfn_from_mfn(mfn+i2);
1000 if ( m2pfn != (gfn + i2) )
1001 {
1002 pmbad++;
1003 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1004 " -> gfn %#lx\n", gfn+i2, mfn+i2,
1005 m2pfn);
1006 BUG();
1007 }
1008 gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1009 continue;
1010 }
1011 }
1012
1013 l2e = map_l2t_from_l3e(l3e[i3]);
1014 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1015 {
1016 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1017 {
1018 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
1019 && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
1020 == p2m_populate_on_demand ) )
1021 entry_count+=SUPERPAGE_PAGES;
1022 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1023 continue;
1024 }
1025
1026 /* check for super page */
1027 if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
1028 {
1029 mfn = l2e_get_pfn(l2e[i2]);
1030 ASSERT(mfn_valid(_mfn(mfn)));
1031 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
1032 {
1033 m2pfn = get_gpfn_from_mfn(mfn+i1);
1034 /* Allow shared M2Ps */
1035 if ( (m2pfn != (gfn + i1)) && !SHARED_M2P(m2pfn) )
1036 {
1037 pmbad++;
1038 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1039 " -> gfn %#lx\n", gfn+i1, mfn+i1,
1040 m2pfn);
1041 BUG();
1042 }
1043 }
1044 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1045 continue;
1046 }
1047
1048 l1e = map_l1t_from_l2e(l2e[i2]);
1049
1050 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1051 {
1052 p2m_type_t type;
1053
1054 type = p2m_flags_to_type(l1e_get_flags(l1e[i1]));
1055 if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
1056 {
1057 if ( type == p2m_populate_on_demand )
1058 entry_count++;
1059 continue;
1060 }
1061 mfn = l1e_get_pfn(l1e[i1]);
1062 ASSERT(mfn_valid(_mfn(mfn)));
1063 m2pfn = get_gpfn_from_mfn(mfn);
1064 if ( m2pfn != gfn &&
1065 type != p2m_mmio_direct &&
1066 !p2m_is_grant(type) &&
1067 !p2m_is_shared(type) )
1068 {
1069 pmbad++;
1070 printk("mismatch: gfn %#lx -> mfn %#lx"
1071 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1072 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1073 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1074 BUG();
1075 }
1076 }
1077 unmap_domain_page(l1e);
1078 }
1079 unmap_domain_page(l2e);
1080 }
1081 unmap_domain_page(l3e);
1082 }
1083
1084 unmap_domain_page(l4e);
1085 }
1086
1087 if ( entry_count != p2m->pod.entry_count )
1088 {
1089 printk("%s: refcounted entry count %ld, audit count %lu!\n",
1090 __func__,
1091 p2m->pod.entry_count,
1092 entry_count);
1093 BUG();
1094 }
1095
1096 return pmbad;
1097 }
1098 #else
1099 # define p2m_pt_audit_p2m NULL
1100 #endif /* P2M_AUDIT */
1101
1102 /* Set up the p2m function pointers for pagetable format */
p2m_pt_init(struct p2m_domain * p2m)1103 void p2m_pt_init(struct p2m_domain *p2m)
1104 {
1105 p2m->set_entry = p2m_pt_set_entry;
1106 p2m->get_entry = p2m_pt_get_entry;
1107 p2m->recalc = do_recalc;
1108 p2m->change_entry_type_global = p2m_pt_change_entry_type_global;
1109 p2m->change_entry_type_range = p2m_pt_change_entry_type_range;
1110 p2m->write_p2m_entry = paging_write_p2m_entry;
1111 #if P2M_AUDIT
1112 p2m->audit_p2m = p2m_pt_audit_p2m;
1113 #else
1114 p2m->audit_p2m = NULL;
1115 #endif
1116 }
1117
1118
1119