1 #include <xen/cpu.h>
2 #include <xen/domain_page.h>
3 #include <xen/iocap.h>
4 #include <xen/lib.h>
5 #include <xen/sched.h>
6 #include <xen/softirq.h>
7
8 #include <asm/alternative.h>
9 #include <asm/event.h>
10 #include <asm/flushtlb.h>
11 #include <asm/guest_walk.h>
12 #include <asm/page.h>
13
14 #define MAX_VMID_8_BIT (1UL << 8)
15 #define MAX_VMID_16_BIT (1UL << 16)
16
17 #define INVALID_VMID 0 /* VMID 0 is reserved */
18
19 #ifdef CONFIG_ARM_64
20 unsigned int __read_mostly p2m_root_order;
21 unsigned int __read_mostly p2m_root_level;
22 static unsigned int __read_mostly max_vmid = MAX_VMID_8_BIT;
23 /* VMID is by default 8 bit width on AArch64 */
24 #define MAX_VMID max_vmid
25 #else
26 /* VMID is always 8 bit width on AArch32 */
27 #define MAX_VMID MAX_VMID_8_BIT
28 #endif
29
30 #define P2M_ROOT_PAGES (1<<P2M_ROOT_ORDER)
31
32 /*
33 * Set larger than any possible value, so the number of IPA bits can be
34 * restricted by external entity (e.g. IOMMU).
35 */
36 unsigned int __read_mostly p2m_ipa_bits = 64;
37
38 /* Helpers to lookup the properties of each level */
39 static const paddr_t level_masks[] =
40 { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK };
41 static const uint8_t level_orders[] =
42 { ZEROETH_ORDER, FIRST_ORDER, SECOND_ORDER, THIRD_ORDER };
43
44 static mfn_t __read_mostly empty_root_mfn;
45
generate_vttbr(uint16_t vmid,mfn_t root_mfn)46 static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
47 {
48 return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
49 }
50
51 /* Unlock the flush and do a P2M TLB flush if necessary */
p2m_write_unlock(struct p2m_domain * p2m)52 void p2m_write_unlock(struct p2m_domain *p2m)
53 {
54 /*
55 * The final flush is done with the P2M write lock taken to avoid
56 * someone else modifying the P2M wbefore the TLB invalidation has
57 * completed.
58 */
59 p2m_tlb_flush_sync(p2m);
60
61 write_unlock(&p2m->lock);
62 }
63
p2m_dump_info(struct domain * d)64 void p2m_dump_info(struct domain *d)
65 {
66 struct p2m_domain *p2m = p2m_get_hostp2m(d);
67
68 p2m_read_lock(p2m);
69 printk("p2m mappings for domain %d (vmid %d):\n",
70 d->domain_id, p2m->vmid);
71 BUG_ON(p2m->stats.mappings[0] || p2m->stats.shattered[0]);
72 printk(" 1G mappings: %ld (shattered %ld)\n",
73 p2m->stats.mappings[1], p2m->stats.shattered[1]);
74 printk(" 2M mappings: %ld (shattered %ld)\n",
75 p2m->stats.mappings[2], p2m->stats.shattered[2]);
76 printk(" 4K mappings: %ld\n", p2m->stats.mappings[3]);
77 p2m_read_unlock(p2m);
78 }
79
memory_type_changed(struct domain * d)80 void memory_type_changed(struct domain *d)
81 {
82 }
83
dump_p2m_lookup(struct domain * d,paddr_t addr)84 void dump_p2m_lookup(struct domain *d, paddr_t addr)
85 {
86 struct p2m_domain *p2m = p2m_get_hostp2m(d);
87
88 printk("dom%d IPA 0x%"PRIpaddr"\n", d->domain_id, addr);
89
90 printk("P2M @ %p mfn:%#"PRI_mfn"\n",
91 p2m->root, mfn_x(page_to_mfn(p2m->root)));
92
93 dump_pt_walk(page_to_maddr(p2m->root), addr,
94 P2M_ROOT_LEVEL, P2M_ROOT_PAGES);
95 }
96
97 /*
98 * p2m_save_state and p2m_restore_state work in pair to workaround
99 * ARM64_WORKAROUND_AT_SPECULATE. p2m_save_state will set-up VTTBR to
100 * point to the empty page-tables to stop allocating TLB entries.
101 */
p2m_save_state(struct vcpu * p)102 void p2m_save_state(struct vcpu *p)
103 {
104 p->arch.sctlr = READ_SYSREG(SCTLR_EL1);
105
106 if ( cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
107 {
108 WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), VTTBR_EL2);
109 /*
110 * Ensure VTTBR_EL2 is correctly synchronized so we can restore
111 * the next vCPU context without worrying about AT instruction
112 * speculation.
113 */
114 isb();
115 }
116 }
117
p2m_restore_state(struct vcpu * n)118 void p2m_restore_state(struct vcpu *n)
119 {
120 struct p2m_domain *p2m = p2m_get_hostp2m(n->domain);
121 uint8_t *last_vcpu_ran;
122
123 if ( is_idle_vcpu(n) )
124 return;
125
126 WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1);
127 WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2);
128
129 /*
130 * ARM64_WORKAROUND_AT_SPECULATE: VTTBR_EL2 should be restored after all
131 * registers associated to EL1/EL0 translations regime have been
132 * synchronized.
133 */
134 asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE));
135 WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
136
137 last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()];
138
139 /*
140 * While we are restoring an out-of-context translation regime
141 * we still need to ensure:
142 * - VTTBR_EL2 is synchronized before flushing the TLBs
143 * - All registers for EL1 are synchronized before executing an AT
144 * instructions targeting S1/S2.
145 */
146 isb();
147
148 /*
149 * Flush local TLB for the domain to prevent wrong TLB translation
150 * when running multiple vCPU of the same domain on a single pCPU.
151 */
152 if ( *last_vcpu_ran != INVALID_VCPU_ID && *last_vcpu_ran != n->vcpu_id )
153 flush_guest_tlb_local();
154
155 *last_vcpu_ran = n->vcpu_id;
156 }
157
158 /*
159 * Force a synchronous P2M TLB flush.
160 *
161 * Must be called with the p2m lock held.
162 */
p2m_force_tlb_flush_sync(struct p2m_domain * p2m)163 static void p2m_force_tlb_flush_sync(struct p2m_domain *p2m)
164 {
165 unsigned long flags = 0;
166 uint64_t ovttbr;
167
168 ASSERT(p2m_is_write_locked(p2m));
169
170 /*
171 * ARM only provides an instruction to flush TLBs for the current
172 * VMID. So switch to the VTTBR of a given P2M if different.
173 */
174 ovttbr = READ_SYSREG64(VTTBR_EL2);
175 if ( ovttbr != p2m->vttbr )
176 {
177 uint64_t vttbr;
178
179 local_irq_save(flags);
180
181 /*
182 * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate
183 * TLBs entries because the context is partially modified. We
184 * only need the VMID for flushing the TLBs, so we can generate
185 * a new VTTBR with the VMID to flush and the empty root table.
186 */
187 if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
188 vttbr = p2m->vttbr;
189 else
190 vttbr = generate_vttbr(p2m->vmid, empty_root_mfn);
191
192 WRITE_SYSREG64(vttbr, VTTBR_EL2);
193
194 /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */
195 isb();
196 }
197
198 flush_guest_tlb();
199
200 if ( ovttbr != READ_SYSREG64(VTTBR_EL2) )
201 {
202 WRITE_SYSREG64(ovttbr, VTTBR_EL2);
203 /* Ensure VTTBR_EL2 is back in place before continuing. */
204 isb();
205 local_irq_restore(flags);
206 }
207
208 p2m->need_flush = false;
209 }
210
p2m_tlb_flush_sync(struct p2m_domain * p2m)211 void p2m_tlb_flush_sync(struct p2m_domain *p2m)
212 {
213 if ( p2m->need_flush )
214 p2m_force_tlb_flush_sync(p2m);
215 }
216
217 /*
218 * Find and map the root page table. The caller is responsible for
219 * unmapping the table.
220 *
221 * The function will return NULL if the offset of the root table is
222 * invalid.
223 */
p2m_get_root_pointer(struct p2m_domain * p2m,gfn_t gfn)224 static lpae_t *p2m_get_root_pointer(struct p2m_domain *p2m,
225 gfn_t gfn)
226 {
227 unsigned long root_table;
228
229 /*
230 * While the root table index is the offset from the previous level,
231 * we can't use (P2M_ROOT_LEVEL - 1) because the root level might be
232 * 0. Yet we still want to check if all the unused bits are zeroed.
233 */
234 root_table = gfn_x(gfn) >> (level_orders[P2M_ROOT_LEVEL] + LPAE_SHIFT);
235 if ( root_table >= P2M_ROOT_PAGES )
236 return NULL;
237
238 return __map_domain_page(p2m->root + root_table);
239 }
240
241 /*
242 * Lookup the MFN corresponding to a domain's GFN.
243 * Lookup mem access in the ratrix tree.
244 * The entries associated to the GFN is considered valid.
245 */
p2m_mem_access_radix_get(struct p2m_domain * p2m,gfn_t gfn)246 static p2m_access_t p2m_mem_access_radix_get(struct p2m_domain *p2m, gfn_t gfn)
247 {
248 void *ptr;
249
250 if ( !p2m->mem_access_enabled )
251 return p2m->default_access;
252
253 ptr = radix_tree_lookup(&p2m->mem_access_settings, gfn_x(gfn));
254 if ( !ptr )
255 return p2m_access_rwx;
256 else
257 return radix_tree_ptr_to_int(ptr);
258 }
259
260 /*
261 * In the case of the P2M, the valid bit is used for other purpose. Use
262 * the type to check whether an entry is valid.
263 */
p2m_is_valid(lpae_t pte)264 static inline bool p2m_is_valid(lpae_t pte)
265 {
266 return pte.p2m.type != p2m_invalid;
267 }
268
269 /*
270 * lpae_is_* helpers don't check whether the valid bit is set in the
271 * PTE. Provide our own overlay to check the valid bit.
272 */
p2m_is_mapping(lpae_t pte,unsigned int level)273 static inline bool p2m_is_mapping(lpae_t pte, unsigned int level)
274 {
275 return p2m_is_valid(pte) && lpae_is_mapping(pte, level);
276 }
277
p2m_is_superpage(lpae_t pte,unsigned int level)278 static inline bool p2m_is_superpage(lpae_t pte, unsigned int level)
279 {
280 return p2m_is_valid(pte) && lpae_is_superpage(pte, level);
281 }
282
283 #define GUEST_TABLE_MAP_FAILED 0
284 #define GUEST_TABLE_SUPER_PAGE 1
285 #define GUEST_TABLE_NORMAL_PAGE 2
286
287 static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry);
288
289 /*
290 * Take the currently mapped table, find the corresponding GFN entry,
291 * and map the next table, if available. The previous table will be
292 * unmapped if the next level was mapped (e.g GUEST_TABLE_NORMAL_PAGE
293 * returned).
294 *
295 * The read_only parameters indicates whether intermediate tables should
296 * be allocated when not present.
297 *
298 * Return values:
299 * GUEST_TABLE_MAP_FAILED: Either read_only was set and the entry
300 * was empty, or allocating a new page failed.
301 * GUEST_TABLE_NORMAL_PAGE: next level mapped normally
302 * GUEST_TABLE_SUPER_PAGE: The next entry points to a superpage.
303 */
p2m_next_level(struct p2m_domain * p2m,bool read_only,unsigned int level,lpae_t ** table,unsigned int offset)304 static int p2m_next_level(struct p2m_domain *p2m, bool read_only,
305 unsigned int level, lpae_t **table,
306 unsigned int offset)
307 {
308 lpae_t *entry;
309 int ret;
310 mfn_t mfn;
311
312 entry = *table + offset;
313
314 if ( !p2m_is_valid(*entry) )
315 {
316 if ( read_only )
317 return GUEST_TABLE_MAP_FAILED;
318
319 ret = p2m_create_table(p2m, entry);
320 if ( ret )
321 return GUEST_TABLE_MAP_FAILED;
322 }
323
324 /* The function p2m_next_level is never called at the 3rd level */
325 ASSERT(level < 3);
326 if ( p2m_is_mapping(*entry, level) )
327 return GUEST_TABLE_SUPER_PAGE;
328
329 mfn = lpae_get_mfn(*entry);
330
331 unmap_domain_page(*table);
332 *table = map_domain_page(mfn);
333
334 return GUEST_TABLE_NORMAL_PAGE;
335 }
336
337 /*
338 * Get the details of a given gfn.
339 *
340 * If the entry is present, the associated MFN will be returned and the
341 * access and type filled up. The page_order will correspond to the
342 * order of the mapping in the page table (i.e it could be a superpage).
343 *
344 * If the entry is not present, INVALID_MFN will be returned and the
345 * page_order will be set according to the order of the invalid range.
346 *
347 * valid will contain the value of bit[0] (e.g valid bit) of the
348 * entry.
349 */
p2m_get_entry(struct p2m_domain * p2m,gfn_t gfn,p2m_type_t * t,p2m_access_t * a,unsigned int * page_order,bool * valid)350 mfn_t p2m_get_entry(struct p2m_domain *p2m, gfn_t gfn,
351 p2m_type_t *t, p2m_access_t *a,
352 unsigned int *page_order,
353 bool *valid)
354 {
355 paddr_t addr = gfn_to_gaddr(gfn);
356 unsigned int level = 0;
357 lpae_t entry, *table;
358 int rc;
359 mfn_t mfn = INVALID_MFN;
360 p2m_type_t _t;
361 DECLARE_OFFSETS(offsets, addr);
362
363 ASSERT(p2m_is_locked(p2m));
364 BUILD_BUG_ON(THIRD_MASK != PAGE_MASK);
365
366 /* Allow t to be NULL */
367 t = t ?: &_t;
368
369 *t = p2m_invalid;
370
371 if ( valid )
372 *valid = false;
373
374 /* XXX: Check if the mapping is lower than the mapped gfn */
375
376 /* This gfn is higher than the highest the p2m map currently holds */
377 if ( gfn_x(gfn) > gfn_x(p2m->max_mapped_gfn) )
378 {
379 for ( level = P2M_ROOT_LEVEL; level < 3; level++ )
380 if ( (gfn_x(gfn) & (level_masks[level] >> PAGE_SHIFT)) >
381 gfn_x(p2m->max_mapped_gfn) )
382 break;
383
384 goto out;
385 }
386
387 table = p2m_get_root_pointer(p2m, gfn);
388
389 /*
390 * the table should always be non-NULL because the gfn is below
391 * p2m->max_mapped_gfn and the root table pages are always present.
392 */
393 if ( !table )
394 {
395 ASSERT_UNREACHABLE();
396 level = P2M_ROOT_LEVEL;
397 goto out;
398 }
399
400 for ( level = P2M_ROOT_LEVEL; level < 3; level++ )
401 {
402 rc = p2m_next_level(p2m, true, level, &table, offsets[level]);
403 if ( rc == GUEST_TABLE_MAP_FAILED )
404 goto out_unmap;
405 else if ( rc != GUEST_TABLE_NORMAL_PAGE )
406 break;
407 }
408
409 entry = table[offsets[level]];
410
411 if ( p2m_is_valid(entry) )
412 {
413 *t = entry.p2m.type;
414
415 if ( a )
416 *a = p2m_mem_access_radix_get(p2m, gfn);
417
418 mfn = lpae_get_mfn(entry);
419 /*
420 * The entry may point to a superpage. Find the MFN associated
421 * to the GFN.
422 */
423 mfn = mfn_add(mfn, gfn_x(gfn) & ((1UL << level_orders[level]) - 1));
424
425 if ( valid )
426 *valid = lpae_is_valid(entry);
427 }
428
429 out_unmap:
430 unmap_domain_page(table);
431
432 out:
433 if ( page_order )
434 *page_order = level_orders[level];
435
436 return mfn;
437 }
438
p2m_lookup(struct domain * d,gfn_t gfn,p2m_type_t * t)439 mfn_t p2m_lookup(struct domain *d, gfn_t gfn, p2m_type_t *t)
440 {
441 mfn_t mfn;
442 struct p2m_domain *p2m = p2m_get_hostp2m(d);
443
444 p2m_read_lock(p2m);
445 mfn = p2m_get_entry(p2m, gfn, t, NULL, NULL, NULL);
446 p2m_read_unlock(p2m);
447
448 return mfn;
449 }
450
p2m_get_page_from_gfn(struct domain * d,gfn_t gfn,p2m_type_t * t)451 struct page_info *p2m_get_page_from_gfn(struct domain *d, gfn_t gfn,
452 p2m_type_t *t)
453 {
454 struct page_info *page;
455 p2m_type_t p2mt;
456 mfn_t mfn = p2m_lookup(d, gfn, &p2mt);
457
458 if ( t )
459 *t = p2mt;
460
461 if ( !p2m_is_any_ram(p2mt) )
462 return NULL;
463
464 if ( !mfn_valid(mfn) )
465 return NULL;
466
467 page = mfn_to_page(mfn);
468
469 /*
470 * get_page won't work on foreign mapping because the page doesn't
471 * belong to the current domain.
472 */
473 if ( p2m_is_foreign(p2mt) )
474 {
475 struct domain *fdom = page_get_owner_and_reference(page);
476 ASSERT(fdom != NULL);
477 ASSERT(fdom != d);
478 return page;
479 }
480
481 return get_page(page, d) ? page : NULL;
482 }
483
guest_physmap_mark_populate_on_demand(struct domain * d,unsigned long gfn,unsigned int order)484 int guest_physmap_mark_populate_on_demand(struct domain *d,
485 unsigned long gfn,
486 unsigned int order)
487 {
488 return -ENOSYS;
489 }
490
p2m_pod_decrease_reservation(struct domain * d,gfn_t gfn,unsigned int order)491 unsigned long p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn,
492 unsigned int order)
493 {
494 return 0;
495 }
496
p2m_set_permission(lpae_t * e,p2m_type_t t,p2m_access_t a)497 static void p2m_set_permission(lpae_t *e, p2m_type_t t, p2m_access_t a)
498 {
499 /* First apply type permissions */
500 switch ( t )
501 {
502 case p2m_ram_rw:
503 e->p2m.xn = 0;
504 e->p2m.write = 1;
505 break;
506
507 case p2m_ram_ro:
508 e->p2m.xn = 0;
509 e->p2m.write = 0;
510 break;
511
512 case p2m_iommu_map_rw:
513 case p2m_map_foreign_rw:
514 case p2m_grant_map_rw:
515 case p2m_mmio_direct_dev:
516 case p2m_mmio_direct_nc:
517 case p2m_mmio_direct_c:
518 e->p2m.xn = 1;
519 e->p2m.write = 1;
520 break;
521
522 case p2m_iommu_map_ro:
523 case p2m_map_foreign_ro:
524 case p2m_grant_map_ro:
525 case p2m_invalid:
526 e->p2m.xn = 1;
527 e->p2m.write = 0;
528 break;
529
530 case p2m_max_real_type:
531 BUG();
532 break;
533 }
534
535 /* Then restrict with access permissions */
536 switch ( a )
537 {
538 case p2m_access_rwx:
539 break;
540 case p2m_access_wx:
541 e->p2m.read = 0;
542 break;
543 case p2m_access_rw:
544 e->p2m.xn = 1;
545 break;
546 case p2m_access_w:
547 e->p2m.read = 0;
548 e->p2m.xn = 1;
549 break;
550 case p2m_access_rx:
551 case p2m_access_rx2rw:
552 e->p2m.write = 0;
553 break;
554 case p2m_access_x:
555 e->p2m.write = 0;
556 e->p2m.read = 0;
557 break;
558 case p2m_access_r:
559 e->p2m.write = 0;
560 e->p2m.xn = 1;
561 break;
562 case p2m_access_n:
563 case p2m_access_n2rwx:
564 e->p2m.read = e->p2m.write = 0;
565 e->p2m.xn = 1;
566 break;
567 }
568 }
569
mfn_to_p2m_entry(mfn_t mfn,p2m_type_t t,p2m_access_t a)570 static lpae_t mfn_to_p2m_entry(mfn_t mfn, p2m_type_t t, p2m_access_t a)
571 {
572 /*
573 * sh, xn and write bit will be defined in the following switches
574 * based on mattr and t.
575 */
576 lpae_t e = (lpae_t) {
577 .p2m.af = 1,
578 .p2m.read = 1,
579 .p2m.table = 1,
580 .p2m.valid = 1,
581 .p2m.type = t,
582 };
583
584 BUILD_BUG_ON(p2m_max_real_type > (1 << 4));
585
586 switch ( t )
587 {
588 case p2m_mmio_direct_dev:
589 e.p2m.mattr = MATTR_DEV;
590 e.p2m.sh = LPAE_SH_OUTER;
591 break;
592
593 case p2m_mmio_direct_c:
594 e.p2m.mattr = MATTR_MEM;
595 e.p2m.sh = LPAE_SH_OUTER;
596 break;
597
598 /*
599 * ARM ARM: Overlaying the shareability attribute (DDI
600 * 0406C.b B3-1376 to 1377)
601 *
602 * A memory region with a resultant memory type attribute of Normal,
603 * and a resultant cacheability attribute of Inner Non-cacheable,
604 * Outer Non-cacheable, must have a resultant shareability attribute
605 * of Outer Shareable, otherwise shareability is UNPREDICTABLE.
606 *
607 * On ARMv8 shareability is ignored and explicitly treated as Outer
608 * Shareable for Normal Inner Non_cacheable, Outer Non-cacheable.
609 * See the note for table D4-40, in page 1788 of the ARM DDI 0487A.j.
610 */
611 case p2m_mmio_direct_nc:
612 e.p2m.mattr = MATTR_MEM_NC;
613 e.p2m.sh = LPAE_SH_OUTER;
614 break;
615
616 default:
617 e.p2m.mattr = MATTR_MEM;
618 e.p2m.sh = LPAE_SH_INNER;
619 }
620
621 p2m_set_permission(&e, t, a);
622
623 ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
624
625 lpae_set_mfn(e, mfn);
626
627 return e;
628 }
629
630 /* Generate table entry with correct attributes. */
page_to_p2m_table(struct page_info * page)631 static lpae_t page_to_p2m_table(struct page_info *page)
632 {
633 /*
634 * The access value does not matter because the hardware will ignore
635 * the permission fields for table entry.
636 *
637 * We use p2m_ram_rw so the entry has a valid type. This is important
638 * for p2m_is_valid() to return valid on table entries.
639 */
640 return mfn_to_p2m_entry(page_to_mfn(page), p2m_ram_rw, p2m_access_rwx);
641 }
642
p2m_write_pte(lpae_t * p,lpae_t pte,bool clean_pte)643 static inline void p2m_write_pte(lpae_t *p, lpae_t pte, bool clean_pte)
644 {
645 write_pte(p, pte);
646 if ( clean_pte )
647 clean_dcache(*p);
648 }
649
p2m_remove_pte(lpae_t * p,bool clean_pte)650 static inline void p2m_remove_pte(lpae_t *p, bool clean_pte)
651 {
652 lpae_t pte;
653
654 memset(&pte, 0x00, sizeof(pte));
655 p2m_write_pte(p, pte, clean_pte);
656 }
657
658 /* Allocate a new page table page and hook it in via the given entry. */
p2m_create_table(struct p2m_domain * p2m,lpae_t * entry)659 static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry)
660 {
661 struct page_info *page;
662 lpae_t *p;
663
664 ASSERT(!p2m_is_valid(*entry));
665
666 page = alloc_domheap_page(NULL, 0);
667 if ( page == NULL )
668 return -ENOMEM;
669
670 page_list_add(page, &p2m->pages);
671
672 p = __map_domain_page(page);
673 clear_page(p);
674
675 if ( p2m->clean_pte )
676 clean_dcache_va_range(p, PAGE_SIZE);
677
678 unmap_domain_page(p);
679
680 p2m_write_pte(entry, page_to_p2m_table(page), p2m->clean_pte);
681
682 return 0;
683 }
684
p2m_mem_access_radix_set(struct p2m_domain * p2m,gfn_t gfn,p2m_access_t a)685 static int p2m_mem_access_radix_set(struct p2m_domain *p2m, gfn_t gfn,
686 p2m_access_t a)
687 {
688 int rc;
689
690 if ( !p2m->mem_access_enabled )
691 return 0;
692
693 if ( p2m_access_rwx == a )
694 {
695 radix_tree_delete(&p2m->mem_access_settings, gfn_x(gfn));
696 return 0;
697 }
698
699 rc = radix_tree_insert(&p2m->mem_access_settings, gfn_x(gfn),
700 radix_tree_int_to_ptr(a));
701 if ( rc == -EEXIST )
702 {
703 /* If a setting already exists, change it to the new one */
704 radix_tree_replace_slot(
705 radix_tree_lookup_slot(
706 &p2m->mem_access_settings, gfn_x(gfn)),
707 radix_tree_int_to_ptr(a));
708 rc = 0;
709 }
710
711 return rc;
712 }
713
714 /*
715 * Put any references on the single 4K page referenced by pte.
716 * TODO: Handle superpages, for now we only take special references for leaf
717 * pages (specifically foreign ones, which can't be super mapped today).
718 */
p2m_put_l3_page(const lpae_t pte)719 static void p2m_put_l3_page(const lpae_t pte)
720 {
721 ASSERT(p2m_is_valid(pte));
722
723 /*
724 * TODO: Handle other p2m types
725 *
726 * It's safe to do the put_page here because page_alloc will
727 * flush the TLBs if the page is reallocated before the end of
728 * this loop.
729 */
730 if ( p2m_is_foreign(pte.p2m.type) )
731 {
732 mfn_t mfn = lpae_get_mfn(pte);
733
734 ASSERT(mfn_valid(mfn));
735 put_page(mfn_to_page(mfn));
736 }
737 }
738
739 /* Free lpae sub-tree behind an entry */
p2m_free_entry(struct p2m_domain * p2m,lpae_t entry,unsigned int level)740 static void p2m_free_entry(struct p2m_domain *p2m,
741 lpae_t entry, unsigned int level)
742 {
743 unsigned int i;
744 lpae_t *table;
745 mfn_t mfn;
746 struct page_info *pg;
747
748 /* Nothing to do if the entry is invalid. */
749 if ( !p2m_is_valid(entry) )
750 return;
751
752 /* Nothing to do but updating the stats if the entry is a super-page. */
753 if ( p2m_is_superpage(entry, level) )
754 {
755 p2m->stats.mappings[level]--;
756 return;
757 }
758
759 if ( level == 3 )
760 {
761 p2m->stats.mappings[level]--;
762 p2m_put_l3_page(entry);
763 return;
764 }
765
766 table = map_domain_page(lpae_get_mfn(entry));
767 for ( i = 0; i < LPAE_ENTRIES; i++ )
768 p2m_free_entry(p2m, *(table + i), level + 1);
769
770 unmap_domain_page(table);
771
772 /*
773 * Make sure all the references in the TLB have been removed before
774 * freing the intermediate page table.
775 * XXX: Should we defer the free of the page table to avoid the
776 * flush?
777 */
778 p2m_tlb_flush_sync(p2m);
779
780 mfn = lpae_get_mfn(entry);
781 ASSERT(mfn_valid(mfn));
782
783 pg = mfn_to_page(mfn);
784
785 page_list_del(pg, &p2m->pages);
786 free_domheap_page(pg);
787 }
788
p2m_split_superpage(struct p2m_domain * p2m,lpae_t * entry,unsigned int level,unsigned int target,const unsigned int * offsets)789 static bool p2m_split_superpage(struct p2m_domain *p2m, lpae_t *entry,
790 unsigned int level, unsigned int target,
791 const unsigned int *offsets)
792 {
793 struct page_info *page;
794 unsigned int i;
795 lpae_t pte, *table;
796 bool rv = true;
797
798 /* Convenience aliases */
799 mfn_t mfn = lpae_get_mfn(*entry);
800 unsigned int next_level = level + 1;
801 unsigned int level_order = level_orders[next_level];
802
803 /*
804 * This should only be called with target != level and the entry is
805 * a superpage.
806 */
807 ASSERT(level < target);
808 ASSERT(p2m_is_superpage(*entry, level));
809
810 page = alloc_domheap_page(NULL, 0);
811 if ( !page )
812 return false;
813
814 page_list_add(page, &p2m->pages);
815 table = __map_domain_page(page);
816
817 /*
818 * We are either splitting a first level 1G page into 512 second level
819 * 2M pages, or a second level 2M page into 512 third level 4K pages.
820 */
821 for ( i = 0; i < LPAE_ENTRIES; i++ )
822 {
823 lpae_t *new_entry = table + i;
824
825 /*
826 * Use the content of the superpage entry and override
827 * the necessary fields. So the correct permission are kept.
828 */
829 pte = *entry;
830 lpae_set_mfn(pte, mfn_add(mfn, i << level_order));
831
832 /*
833 * First and second level pages set p2m.table = 0, but third
834 * level entries set p2m.table = 1.
835 */
836 pte.p2m.table = (next_level == 3);
837
838 write_pte(new_entry, pte);
839 }
840
841 /* Update stats */
842 p2m->stats.shattered[level]++;
843 p2m->stats.mappings[level]--;
844 p2m->stats.mappings[next_level] += LPAE_ENTRIES;
845
846 /*
847 * Shatter superpage in the page to the level we want to make the
848 * changes.
849 * This is done outside the loop to avoid checking the offset to
850 * know whether the entry should be shattered for every entry.
851 */
852 if ( next_level != target )
853 rv = p2m_split_superpage(p2m, table + offsets[next_level],
854 level + 1, target, offsets);
855
856 if ( p2m->clean_pte )
857 clean_dcache_va_range(table, PAGE_SIZE);
858
859 unmap_domain_page(table);
860
861 /*
862 * Even if we failed, we should install the newly allocated LPAE
863 * entry. The caller will be in charge to free the sub-tree.
864 */
865 p2m_write_pte(entry, page_to_p2m_table(page), p2m->clean_pte);
866
867 return rv;
868 }
869
870 /*
871 * Insert an entry in the p2m. This should be called with a mapping
872 * equal to a page/superpage (4K, 2M, 1G).
873 */
__p2m_set_entry(struct p2m_domain * p2m,gfn_t sgfn,unsigned int page_order,mfn_t smfn,p2m_type_t t,p2m_access_t a)874 static int __p2m_set_entry(struct p2m_domain *p2m,
875 gfn_t sgfn,
876 unsigned int page_order,
877 mfn_t smfn,
878 p2m_type_t t,
879 p2m_access_t a)
880 {
881 unsigned int level = 0;
882 unsigned int target = 3 - (page_order / LPAE_SHIFT);
883 lpae_t *entry, *table, orig_pte;
884 int rc;
885 /* A mapping is removed if the MFN is invalid. */
886 bool removing_mapping = mfn_eq(smfn, INVALID_MFN);
887 DECLARE_OFFSETS(offsets, gfn_to_gaddr(sgfn));
888
889 ASSERT(p2m_is_write_locked(p2m));
890
891 /*
892 * Check if the level target is valid: we only support
893 * 4K - 2M - 1G mapping.
894 */
895 ASSERT(target > 0 && target <= 3);
896
897 table = p2m_get_root_pointer(p2m, sgfn);
898 if ( !table )
899 return -EINVAL;
900
901 for ( level = P2M_ROOT_LEVEL; level < target; level++ )
902 {
903 /*
904 * Don't try to allocate intermediate page table if the mapping
905 * is about to be removed.
906 */
907 rc = p2m_next_level(p2m, removing_mapping,
908 level, &table, offsets[level]);
909 if ( rc == GUEST_TABLE_MAP_FAILED )
910 {
911 /*
912 * We are here because p2m_next_level has failed to map
913 * the intermediate page table (e.g the table does not exist
914 * and they p2m tree is read-only). It is a valid case
915 * when removing a mapping as it may not exist in the
916 * page table. In this case, just ignore it.
917 */
918 rc = removing_mapping ? 0 : -ENOENT;
919 goto out;
920 }
921 else if ( rc != GUEST_TABLE_NORMAL_PAGE )
922 break;
923 }
924
925 entry = table + offsets[level];
926
927 /*
928 * If we are here with level < target, we must be at a leaf node,
929 * and we need to break up the superpage.
930 */
931 if ( level < target )
932 {
933 /* We need to split the original page. */
934 lpae_t split_pte = *entry;
935
936 ASSERT(p2m_is_superpage(*entry, level));
937
938 if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets) )
939 {
940 /*
941 * The current super-page is still in-place, so re-increment
942 * the stats.
943 */
944 p2m->stats.mappings[level]++;
945
946 /* Free the allocated sub-tree */
947 p2m_free_entry(p2m, split_pte, level);
948
949 rc = -ENOMEM;
950 goto out;
951 }
952
953 /*
954 * Follow the break-before-sequence to update the entry.
955 * For more details see (D4.7.1 in ARM DDI 0487A.j).
956 */
957 p2m_remove_pte(entry, p2m->clean_pte);
958 p2m_force_tlb_flush_sync(p2m);
959
960 p2m_write_pte(entry, split_pte, p2m->clean_pte);
961
962 /* then move to the level we want to make real changes */
963 for ( ; level < target; level++ )
964 {
965 rc = p2m_next_level(p2m, true, level, &table, offsets[level]);
966
967 /*
968 * The entry should be found and either be a table
969 * or a superpage if level 3 is not targeted
970 */
971 ASSERT(rc == GUEST_TABLE_NORMAL_PAGE ||
972 (rc == GUEST_TABLE_SUPER_PAGE && target < 3));
973 }
974
975 entry = table + offsets[level];
976 }
977
978 /*
979 * We should always be there with the correct level because
980 * all the intermediate tables have been installed if necessary.
981 */
982 ASSERT(level == target);
983
984 orig_pte = *entry;
985
986 /*
987 * The radix-tree can only work on 4KB. This is only used when
988 * memaccess is enabled and during shutdown.
989 */
990 ASSERT(!p2m->mem_access_enabled || page_order == 0 ||
991 p2m->domain->is_dying);
992 /*
993 * The access type should always be p2m_access_rwx when the mapping
994 * is removed.
995 */
996 ASSERT(!mfn_eq(INVALID_MFN, smfn) || (a == p2m_access_rwx));
997 /*
998 * Update the mem access permission before update the P2M. So we
999 * don't have to revert the mapping if it has failed.
1000 */
1001 rc = p2m_mem_access_radix_set(p2m, sgfn, a);
1002 if ( rc )
1003 goto out;
1004
1005 /*
1006 * Always remove the entry in order to follow the break-before-make
1007 * sequence when updating the translation table (D4.7.1 in ARM DDI
1008 * 0487A.j).
1009 */
1010 if ( lpae_is_valid(orig_pte) )
1011 p2m_remove_pte(entry, p2m->clean_pte);
1012
1013 if ( removing_mapping )
1014 /* Flush can be deferred if the entry is removed */
1015 p2m->need_flush |= !!lpae_is_valid(orig_pte);
1016 else
1017 {
1018 lpae_t pte = mfn_to_p2m_entry(smfn, t, a);
1019
1020 if ( level < 3 )
1021 pte.p2m.table = 0; /* Superpage entry */
1022
1023 /*
1024 * It is necessary to flush the TLB before writing the new entry
1025 * to keep coherency when the previous entry was valid.
1026 *
1027 * Although, it could be defered when only the permissions are
1028 * changed (e.g in case of memaccess).
1029 */
1030 if ( lpae_is_valid(orig_pte) )
1031 {
1032 if ( likely(!p2m->mem_access_enabled) ||
1033 P2M_CLEAR_PERM(pte) != P2M_CLEAR_PERM(orig_pte) )
1034 p2m_force_tlb_flush_sync(p2m);
1035 else
1036 p2m->need_flush = true;
1037 }
1038 else if ( !p2m_is_valid(orig_pte) ) /* new mapping */
1039 p2m->stats.mappings[level]++;
1040
1041 p2m_write_pte(entry, pte, p2m->clean_pte);
1042
1043 p2m->max_mapped_gfn = gfn_max(p2m->max_mapped_gfn,
1044 gfn_add(sgfn, (1UL << page_order) - 1));
1045 p2m->lowest_mapped_gfn = gfn_min(p2m->lowest_mapped_gfn, sgfn);
1046 }
1047
1048 if ( is_iommu_enabled(p2m->domain) &&
1049 (lpae_is_valid(orig_pte) || lpae_is_valid(*entry)) )
1050 {
1051 unsigned int flush_flags = 0;
1052
1053 if ( lpae_is_valid(orig_pte) )
1054 flush_flags |= IOMMU_FLUSHF_modified;
1055 if ( lpae_is_valid(*entry) )
1056 flush_flags |= IOMMU_FLUSHF_added;
1057
1058 rc = iommu_iotlb_flush(p2m->domain, _dfn(gfn_x(sgfn)),
1059 1UL << page_order, flush_flags);
1060 }
1061 else
1062 rc = 0;
1063
1064 /*
1065 * Free the entry only if the original pte was valid and the base
1066 * is different (to avoid freeing when permission is changed).
1067 */
1068 if ( p2m_is_valid(orig_pte) &&
1069 !mfn_eq(lpae_get_mfn(*entry), lpae_get_mfn(orig_pte)) )
1070 p2m_free_entry(p2m, orig_pte, level);
1071
1072 out:
1073 unmap_domain_page(table);
1074
1075 return rc;
1076 }
1077
p2m_set_entry(struct p2m_domain * p2m,gfn_t sgfn,unsigned long nr,mfn_t smfn,p2m_type_t t,p2m_access_t a)1078 int p2m_set_entry(struct p2m_domain *p2m,
1079 gfn_t sgfn,
1080 unsigned long nr,
1081 mfn_t smfn,
1082 p2m_type_t t,
1083 p2m_access_t a)
1084 {
1085 int rc = 0;
1086
1087 while ( nr )
1088 {
1089 unsigned long mask;
1090 unsigned long order;
1091
1092 /*
1093 * Don't take into account the MFN when removing mapping (i.e
1094 * MFN_INVALID) to calculate the correct target order.
1095 *
1096 * XXX: Support superpage mappings if nr is not aligned to a
1097 * superpage size.
1098 */
1099 mask = !mfn_eq(smfn, INVALID_MFN) ? mfn_x(smfn) : 0;
1100 mask |= gfn_x(sgfn) | nr;
1101
1102 /* Always map 4k by 4k when memaccess is enabled */
1103 if ( unlikely(p2m->mem_access_enabled) )
1104 order = THIRD_ORDER;
1105 else if ( !(mask & ((1UL << FIRST_ORDER) - 1)) )
1106 order = FIRST_ORDER;
1107 else if ( !(mask & ((1UL << SECOND_ORDER) - 1)) )
1108 order = SECOND_ORDER;
1109 else
1110 order = THIRD_ORDER;
1111
1112 rc = __p2m_set_entry(p2m, sgfn, order, smfn, t, a);
1113 if ( rc )
1114 break;
1115
1116 sgfn = gfn_add(sgfn, (1 << order));
1117 if ( !mfn_eq(smfn, INVALID_MFN) )
1118 smfn = mfn_add(smfn, (1 << order));
1119
1120 nr -= (1 << order);
1121 }
1122
1123 return rc;
1124 }
1125
1126 /* Invalidate all entries in the table. The p2m should be write locked. */
p2m_invalidate_table(struct p2m_domain * p2m,mfn_t mfn)1127 static void p2m_invalidate_table(struct p2m_domain *p2m, mfn_t mfn)
1128 {
1129 lpae_t *table;
1130 unsigned int i;
1131
1132 ASSERT(p2m_is_write_locked(p2m));
1133
1134 table = map_domain_page(mfn);
1135
1136 for ( i = 0; i < LPAE_ENTRIES; i++ )
1137 {
1138 lpae_t pte = table[i];
1139
1140 /*
1141 * Writing an entry can be expensive because it may involve
1142 * cleaning the cache. So avoid updating the entry if the valid
1143 * bit is already cleared.
1144 */
1145 if ( !pte.p2m.valid )
1146 continue;
1147
1148 pte.p2m.valid = 0;
1149
1150 p2m_write_pte(&table[i], pte, p2m->clean_pte);
1151 }
1152
1153 unmap_domain_page(table);
1154
1155 p2m->need_flush = true;
1156 }
1157
1158 /*
1159 * Invalidate all entries in the root page-tables. This is
1160 * useful to get fault on entry and do an action.
1161 */
p2m_invalidate_root(struct p2m_domain * p2m)1162 void p2m_invalidate_root(struct p2m_domain *p2m)
1163 {
1164 unsigned int i;
1165
1166 p2m_write_lock(p2m);
1167
1168 for ( i = 0; i < P2M_ROOT_LEVEL; i++ )
1169 p2m_invalidate_table(p2m, page_to_mfn(p2m->root + i));
1170
1171 p2m_write_unlock(p2m);
1172 }
1173
1174 /*
1175 * Resolve any translation fault due to change in the p2m. This
1176 * includes break-before-make and valid bit cleared.
1177 */
p2m_resolve_translation_fault(struct domain * d,gfn_t gfn)1178 bool p2m_resolve_translation_fault(struct domain *d, gfn_t gfn)
1179 {
1180 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1181 unsigned int level = 0;
1182 bool resolved = false;
1183 lpae_t entry, *table;
1184
1185 /* Convenience aliases */
1186 DECLARE_OFFSETS(offsets, gfn_to_gaddr(gfn));
1187
1188 p2m_write_lock(p2m);
1189
1190 /* This gfn is higher than the highest the p2m map currently holds */
1191 if ( gfn_x(gfn) > gfn_x(p2m->max_mapped_gfn) )
1192 goto out;
1193
1194 table = p2m_get_root_pointer(p2m, gfn);
1195 /*
1196 * The table should always be non-NULL because the gfn is below
1197 * p2m->max_mapped_gfn and the root table pages are always present.
1198 */
1199 if ( !table )
1200 {
1201 ASSERT_UNREACHABLE();
1202 goto out;
1203 }
1204
1205 /*
1206 * Go down the page-tables until an entry has the valid bit unset or
1207 * a block/page entry has been hit.
1208 */
1209 for ( level = P2M_ROOT_LEVEL; level <= 3; level++ )
1210 {
1211 int rc;
1212
1213 entry = table[offsets[level]];
1214
1215 if ( level == 3 )
1216 break;
1217
1218 /* Stop as soon as we hit an entry with the valid bit unset. */
1219 if ( !lpae_is_valid(entry) )
1220 break;
1221
1222 rc = p2m_next_level(p2m, true, level, &table, offsets[level]);
1223 if ( rc == GUEST_TABLE_MAP_FAILED )
1224 goto out_unmap;
1225 else if ( rc != GUEST_TABLE_NORMAL_PAGE )
1226 break;
1227 }
1228
1229 /*
1230 * If the valid bit of the entry is set, it means someone was playing with
1231 * the Stage-2 page table. Nothing to do and mark the fault as resolved.
1232 */
1233 if ( lpae_is_valid(entry) )
1234 {
1235 resolved = true;
1236 goto out_unmap;
1237 }
1238
1239 /*
1240 * The valid bit is unset. If the entry is still not valid then the fault
1241 * cannot be resolved, exit and report it.
1242 */
1243 if ( !p2m_is_valid(entry) )
1244 goto out_unmap;
1245
1246 /*
1247 * Now we have an entry with valid bit unset, but still valid from
1248 * the P2M point of view.
1249 *
1250 * If an entry is pointing to a table, each entry of the table will
1251 * have there valid bit cleared. This allows a function to clear the
1252 * full p2m with just a couple of write. The valid bit will then be
1253 * propagated on the fault.
1254 * If an entry is pointing to a block/page, no work to do for now.
1255 */
1256 if ( lpae_is_table(entry, level) )
1257 p2m_invalidate_table(p2m, lpae_get_mfn(entry));
1258
1259 /*
1260 * Now that the work on the entry is done, set the valid bit to prevent
1261 * another fault on that entry.
1262 */
1263 resolved = true;
1264 entry.p2m.valid = 1;
1265
1266 p2m_write_pte(table + offsets[level], entry, p2m->clean_pte);
1267
1268 /*
1269 * No need to flush the TLBs as the modified entry had the valid bit
1270 * unset.
1271 */
1272
1273 out_unmap:
1274 unmap_domain_page(table);
1275
1276 out:
1277 p2m_write_unlock(p2m);
1278
1279 return resolved;
1280 }
1281
p2m_insert_mapping(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn,p2m_type_t t)1282 static inline int p2m_insert_mapping(struct domain *d,
1283 gfn_t start_gfn,
1284 unsigned long nr,
1285 mfn_t mfn,
1286 p2m_type_t t)
1287 {
1288 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1289 int rc;
1290
1291 p2m_write_lock(p2m);
1292 rc = p2m_set_entry(p2m, start_gfn, nr, mfn, t, p2m->default_access);
1293 p2m_write_unlock(p2m);
1294
1295 return rc;
1296 }
1297
p2m_remove_mapping(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1298 static inline int p2m_remove_mapping(struct domain *d,
1299 gfn_t start_gfn,
1300 unsigned long nr,
1301 mfn_t mfn)
1302 {
1303 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1304 int rc;
1305
1306 p2m_write_lock(p2m);
1307 rc = p2m_set_entry(p2m, start_gfn, nr, INVALID_MFN,
1308 p2m_invalid, p2m_access_rwx);
1309 p2m_write_unlock(p2m);
1310
1311 return rc;
1312 }
1313
map_regions_p2mt(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn,p2m_type_t p2mt)1314 int map_regions_p2mt(struct domain *d,
1315 gfn_t gfn,
1316 unsigned long nr,
1317 mfn_t mfn,
1318 p2m_type_t p2mt)
1319 {
1320 return p2m_insert_mapping(d, gfn, nr, mfn, p2mt);
1321 }
1322
unmap_regions_p2mt(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn)1323 int unmap_regions_p2mt(struct domain *d,
1324 gfn_t gfn,
1325 unsigned long nr,
1326 mfn_t mfn)
1327 {
1328 return p2m_remove_mapping(d, gfn, nr, mfn);
1329 }
1330
map_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1331 int map_mmio_regions(struct domain *d,
1332 gfn_t start_gfn,
1333 unsigned long nr,
1334 mfn_t mfn)
1335 {
1336 return p2m_insert_mapping(d, start_gfn, nr, mfn, p2m_mmio_direct_dev);
1337 }
1338
unmap_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1339 int unmap_mmio_regions(struct domain *d,
1340 gfn_t start_gfn,
1341 unsigned long nr,
1342 mfn_t mfn)
1343 {
1344 return p2m_remove_mapping(d, start_gfn, nr, mfn);
1345 }
1346
map_dev_mmio_region(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn)1347 int map_dev_mmio_region(struct domain *d,
1348 gfn_t gfn,
1349 unsigned long nr,
1350 mfn_t mfn)
1351 {
1352 int res;
1353
1354 if ( !(nr && iomem_access_permitted(d, mfn_x(mfn), mfn_x(mfn) + nr - 1)) )
1355 return 0;
1356
1357 res = p2m_insert_mapping(d, gfn, nr, mfn, p2m_mmio_direct_c);
1358 if ( res < 0 )
1359 {
1360 printk(XENLOG_G_ERR "Unable to map MFNs [%#"PRI_mfn" - %#"PRI_mfn" in Dom%d\n",
1361 mfn_x(mfn), mfn_x(mfn) + nr - 1, d->domain_id);
1362 return res;
1363 }
1364
1365 return 0;
1366 }
1367
guest_physmap_add_entry(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned long page_order,p2m_type_t t)1368 int guest_physmap_add_entry(struct domain *d,
1369 gfn_t gfn,
1370 mfn_t mfn,
1371 unsigned long page_order,
1372 p2m_type_t t)
1373 {
1374 return p2m_insert_mapping(d, gfn, (1 << page_order), mfn, t);
1375 }
1376
guest_physmap_remove_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)1377 int guest_physmap_remove_page(struct domain *d, gfn_t gfn, mfn_t mfn,
1378 unsigned int page_order)
1379 {
1380 return p2m_remove_mapping(d, gfn, (1 << page_order), mfn);
1381 }
1382
p2m_allocate_root(void)1383 static struct page_info *p2m_allocate_root(void)
1384 {
1385 struct page_info *page;
1386 unsigned int i;
1387
1388 page = alloc_domheap_pages(NULL, P2M_ROOT_ORDER, 0);
1389 if ( page == NULL )
1390 return NULL;
1391
1392 /* Clear both first level pages */
1393 for ( i = 0; i < P2M_ROOT_PAGES; i++ )
1394 clear_and_clean_page(page + i);
1395
1396 return page;
1397 }
1398
p2m_alloc_table(struct domain * d)1399 static int p2m_alloc_table(struct domain *d)
1400 {
1401 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1402
1403 p2m->root = p2m_allocate_root();
1404 if ( !p2m->root )
1405 return -ENOMEM;
1406
1407 p2m->vttbr = generate_vttbr(p2m->vmid, page_to_mfn(p2m->root));
1408
1409 /*
1410 * Make sure that all TLBs corresponding to the new VMID are flushed
1411 * before using it
1412 */
1413 p2m_write_lock(p2m);
1414 p2m_force_tlb_flush_sync(p2m);
1415 p2m_write_unlock(p2m);
1416
1417 return 0;
1418 }
1419
1420
1421 static spinlock_t vmid_alloc_lock = SPIN_LOCK_UNLOCKED;
1422
1423 /*
1424 * VTTBR_EL2 VMID field is 8 or 16 bits. AArch64 may support 16-bit VMID.
1425 * Using a bitmap here limits us to 256 or 65536 (for AArch64) concurrent
1426 * domains. The bitmap space will be allocated dynamically based on
1427 * whether 8 or 16 bit VMIDs are supported.
1428 */
1429 static unsigned long *vmid_mask;
1430
p2m_vmid_allocator_init(void)1431 static void p2m_vmid_allocator_init(void)
1432 {
1433 /*
1434 * allocate space for vmid_mask based on MAX_VMID
1435 */
1436 vmid_mask = xzalloc_array(unsigned long, BITS_TO_LONGS(MAX_VMID));
1437
1438 if ( !vmid_mask )
1439 panic("Could not allocate VMID bitmap space\n");
1440
1441 set_bit(INVALID_VMID, vmid_mask);
1442 }
1443
p2m_alloc_vmid(struct domain * d)1444 static int p2m_alloc_vmid(struct domain *d)
1445 {
1446 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1447
1448 int rc, nr;
1449
1450 spin_lock(&vmid_alloc_lock);
1451
1452 nr = find_first_zero_bit(vmid_mask, MAX_VMID);
1453
1454 ASSERT(nr != INVALID_VMID);
1455
1456 if ( nr == MAX_VMID )
1457 {
1458 rc = -EBUSY;
1459 printk(XENLOG_ERR "p2m.c: dom%d: VMID pool exhausted\n", d->domain_id);
1460 goto out;
1461 }
1462
1463 set_bit(nr, vmid_mask);
1464
1465 p2m->vmid = nr;
1466
1467 rc = 0;
1468
1469 out:
1470 spin_unlock(&vmid_alloc_lock);
1471 return rc;
1472 }
1473
p2m_free_vmid(struct domain * d)1474 static void p2m_free_vmid(struct domain *d)
1475 {
1476 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1477 spin_lock(&vmid_alloc_lock);
1478 if ( p2m->vmid != INVALID_VMID )
1479 clear_bit(p2m->vmid, vmid_mask);
1480
1481 spin_unlock(&vmid_alloc_lock);
1482 }
1483
p2m_teardown(struct domain * d)1484 void p2m_teardown(struct domain *d)
1485 {
1486 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1487 struct page_info *pg;
1488
1489 /* p2m not actually initialized */
1490 if ( !p2m->domain )
1491 return;
1492
1493 while ( (pg = page_list_remove_head(&p2m->pages)) )
1494 free_domheap_page(pg);
1495
1496 if ( p2m->root )
1497 free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
1498
1499 p2m->root = NULL;
1500
1501 p2m_free_vmid(d);
1502
1503 radix_tree_destroy(&p2m->mem_access_settings, NULL);
1504
1505 p2m->domain = NULL;
1506 }
1507
p2m_init(struct domain * d)1508 int p2m_init(struct domain *d)
1509 {
1510 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1511 int rc = 0;
1512 unsigned int cpu;
1513
1514 rwlock_init(&p2m->lock);
1515 INIT_PAGE_LIST_HEAD(&p2m->pages);
1516
1517 p2m->vmid = INVALID_VMID;
1518
1519 rc = p2m_alloc_vmid(d);
1520 if ( rc != 0 )
1521 return rc;
1522
1523 p2m->max_mapped_gfn = _gfn(0);
1524 p2m->lowest_mapped_gfn = _gfn(ULONG_MAX);
1525
1526 p2m->default_access = p2m_access_rwx;
1527 p2m->mem_access_enabled = false;
1528 radix_tree_init(&p2m->mem_access_settings);
1529
1530 /*
1531 * Some IOMMUs don't support coherent PT walk. When the p2m is
1532 * shared with the CPU, Xen has to make sure that the PT changes have
1533 * reached the memory
1534 */
1535 p2m->clean_pte = is_iommu_enabled(d) &&
1536 !iommu_has_feature(d, IOMMU_FEAT_COHERENT_WALK);
1537
1538 rc = p2m_alloc_table(d);
1539
1540 /*
1541 * Make sure that the type chosen to is able to store the an vCPU ID
1542 * between 0 and the maximum of virtual CPUS supported as long as
1543 * the INVALID_VCPU_ID.
1544 */
1545 BUILD_BUG_ON((1 << (sizeof(p2m->last_vcpu_ran[0]) * 8)) < MAX_VIRT_CPUS);
1546 BUILD_BUG_ON((1 << (sizeof(p2m->last_vcpu_ran[0])* 8)) < INVALID_VCPU_ID);
1547
1548 for_each_possible_cpu(cpu)
1549 p2m->last_vcpu_ran[cpu] = INVALID_VCPU_ID;
1550
1551 /*
1552 * Besides getting a domain when we only have the p2m in hand,
1553 * the back pointer to domain is also used in p2m_teardown()
1554 * as an end-of-initialization indicator.
1555 */
1556 p2m->domain = d;
1557
1558 return rc;
1559 }
1560
1561 /*
1562 * The function will go through the p2m and remove page reference when it
1563 * is required. The mapping will be removed from the p2m.
1564 *
1565 * XXX: See whether the mapping can be left intact in the p2m.
1566 */
relinquish_p2m_mapping(struct domain * d)1567 int relinquish_p2m_mapping(struct domain *d)
1568 {
1569 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1570 unsigned long count = 0;
1571 p2m_type_t t;
1572 int rc = 0;
1573 unsigned int order;
1574 gfn_t start, end;
1575
1576 p2m_write_lock(p2m);
1577
1578 start = p2m->lowest_mapped_gfn;
1579 end = gfn_add(p2m->max_mapped_gfn, 1);
1580
1581 for ( ; gfn_x(start) < gfn_x(end);
1582 start = gfn_next_boundary(start, order) )
1583 {
1584 mfn_t mfn = p2m_get_entry(p2m, start, &t, NULL, &order, NULL);
1585
1586 count++;
1587 /*
1588 * Arbitrarily preempt every 512 iterations.
1589 */
1590 if ( !(count % 512) && hypercall_preempt_check() )
1591 {
1592 rc = -ERESTART;
1593 break;
1594 }
1595
1596 /*
1597 * p2m_set_entry will take care of removing reference on page
1598 * when it is necessary and removing the mapping in the p2m.
1599 */
1600 if ( !mfn_eq(mfn, INVALID_MFN) )
1601 {
1602 /*
1603 * For valid mapping, the start will always be aligned as
1604 * entry will be removed whilst relinquishing.
1605 */
1606 rc = __p2m_set_entry(p2m, start, order, INVALID_MFN,
1607 p2m_invalid, p2m_access_rwx);
1608 if ( unlikely(rc) )
1609 {
1610 printk(XENLOG_G_ERR "Unable to remove mapping gfn=%#"PRI_gfn" order=%u from the p2m of domain %d\n", gfn_x(start), order, d->domain_id);
1611 break;
1612 }
1613 }
1614 }
1615
1616 /*
1617 * Update lowest_mapped_gfn so on the next call we still start where
1618 * we stopped.
1619 */
1620 p2m->lowest_mapped_gfn = start;
1621
1622 p2m_write_unlock(p2m);
1623
1624 return rc;
1625 }
1626
p2m_cache_flush_range(struct domain * d,gfn_t * pstart,gfn_t end)1627 int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end)
1628 {
1629 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1630 gfn_t next_block_gfn;
1631 gfn_t start = *pstart;
1632 mfn_t mfn = INVALID_MFN;
1633 p2m_type_t t;
1634 unsigned int order;
1635 int rc = 0;
1636 /* Counter for preemption */
1637 unsigned short count = 0;
1638
1639 /*
1640 * The operation cache flush will invalidate the RAM assigned to the
1641 * guest in a given range. It will not modify the page table and
1642 * flushing the cache whilst the page is used by another CPU is
1643 * fine. So using read-lock is fine here.
1644 */
1645 p2m_read_lock(p2m);
1646
1647 start = gfn_max(start, p2m->lowest_mapped_gfn);
1648 end = gfn_min(end, gfn_add(p2m->max_mapped_gfn, 1));
1649
1650 next_block_gfn = start;
1651
1652 while ( gfn_x(start) < gfn_x(end) )
1653 {
1654 /*
1655 * Cleaning the cache for the P2M may take a long time. So we
1656 * need to be able to preempt. We will arbitrarily preempt every
1657 * time count reach 512 or above.
1658 *
1659 * The count will be incremented by:
1660 * - 1 on region skipped
1661 * - 10 for each page requiring a flush
1662 */
1663 if ( count >= 512 )
1664 {
1665 if ( softirq_pending(smp_processor_id()) )
1666 {
1667 rc = -ERESTART;
1668 break;
1669 }
1670 count = 0;
1671 }
1672
1673 /*
1674 * We want to flush page by page as:
1675 * - it may not be possible to map the full block (can be up to 1GB)
1676 * in Xen memory
1677 * - we may want to do fine grain preemption as flushing multiple
1678 * page in one go may take a long time
1679 *
1680 * As p2m_get_entry is able to return the size of the mapping
1681 * in the p2m, it is pointless to execute it for each page.
1682 *
1683 * We can optimize it by tracking the gfn of the next
1684 * block. So we will only call p2m_get_entry for each block (can
1685 * be up to 1GB).
1686 */
1687 if ( gfn_eq(start, next_block_gfn) )
1688 {
1689 bool valid;
1690
1691 mfn = p2m_get_entry(p2m, start, &t, NULL, &order, &valid);
1692 next_block_gfn = gfn_next_boundary(start, order);
1693
1694 if ( mfn_eq(mfn, INVALID_MFN) || !p2m_is_any_ram(t) || !valid )
1695 {
1696 count++;
1697 start = next_block_gfn;
1698 continue;
1699 }
1700 }
1701
1702 count += 10;
1703
1704 flush_page_to_ram(mfn_x(mfn), false);
1705
1706 start = gfn_add(start, 1);
1707 mfn = mfn_add(mfn, 1);
1708 }
1709
1710 if ( rc != -ERESTART )
1711 invalidate_icache();
1712
1713 p2m_read_unlock(p2m);
1714
1715 *pstart = start;
1716
1717 return rc;
1718 }
1719
1720 /*
1721 * Clean & invalidate RAM associated to the guest vCPU.
1722 *
1723 * The function can only work with the current vCPU and should be called
1724 * with IRQ enabled as the vCPU could get preempted.
1725 */
p2m_flush_vm(struct vcpu * v)1726 void p2m_flush_vm(struct vcpu *v)
1727 {
1728 struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
1729 int rc;
1730 gfn_t start = _gfn(0);
1731
1732 ASSERT(v == current);
1733 ASSERT(local_irq_is_enabled());
1734 ASSERT(v->arch.need_flush_to_ram);
1735
1736 do
1737 {
1738 rc = p2m_cache_flush_range(v->domain, &start, _gfn(ULONG_MAX));
1739 if ( rc == -ERESTART )
1740 do_softirq();
1741 } while ( rc == -ERESTART );
1742
1743 if ( rc != 0 )
1744 gprintk(XENLOG_WARNING,
1745 "P2M has not been correctly cleaned (rc = %d)\n",
1746 rc);
1747
1748 /*
1749 * Invalidate the p2m to track which page was modified by the guest
1750 * between call of p2m_flush_vm().
1751 */
1752 p2m_invalidate_root(p2m);
1753
1754 v->arch.need_flush_to_ram = false;
1755 }
1756
1757 /*
1758 * See note at ARMv7 ARM B1.14.4 (DDI 0406C.c) (TL;DR: S/W ops are not
1759 * easily virtualized).
1760 *
1761 * Main problems:
1762 * - S/W ops are local to a CPU (not broadcast)
1763 * - We have line migration behind our back (speculation)
1764 * - System caches don't support S/W at all (damn!)
1765 *
1766 * In the face of the above, the best we can do is to try and convert
1767 * S/W ops to VA ops. Because the guest is not allowed to infer the S/W
1768 * to PA mapping, it can only use S/W to nuke the whole cache, which is
1769 * rather a good thing for us.
1770 *
1771 * Also, it is only used when turning caches on/off ("The expected
1772 * usage of the cache maintenance instructions that operate by set/way
1773 * is associated with the powerdown and powerup of caches, if this is
1774 * required by the implementation.").
1775 *
1776 * We use the following policy:
1777 * - If we trap a S/W operation, we enabled VM trapping to detect
1778 * caches being turned on/off, and do a full clean.
1779 *
1780 * - We flush the caches on both caches being turned on and off.
1781 *
1782 * - Once the caches are enabled, we stop trapping VM ops.
1783 */
p2m_set_way_flush(struct vcpu * v)1784 void p2m_set_way_flush(struct vcpu *v)
1785 {
1786 /* This function can only work with the current vCPU. */
1787 ASSERT(v == current);
1788
1789 if ( !(v->arch.hcr_el2 & HCR_TVM) )
1790 {
1791 v->arch.need_flush_to_ram = true;
1792 vcpu_hcr_set_flags(v, HCR_TVM);
1793 }
1794 }
1795
p2m_toggle_cache(struct vcpu * v,bool was_enabled)1796 void p2m_toggle_cache(struct vcpu *v, bool was_enabled)
1797 {
1798 bool now_enabled = vcpu_has_cache_enabled(v);
1799
1800 /* This function can only work with the current vCPU. */
1801 ASSERT(v == current);
1802
1803 /*
1804 * If switching the MMU+caches on, need to invalidate the caches.
1805 * If switching it off, need to clean the caches.
1806 * Clean + invalidate does the trick always.
1807 */
1808 if ( was_enabled != now_enabled )
1809 v->arch.need_flush_to_ram = true;
1810
1811 /* Caches are now on, stop trapping VM ops (until a S/W op) */
1812 if ( now_enabled )
1813 vcpu_hcr_clear_flags(v, HCR_TVM);
1814 }
1815
gfn_to_mfn(struct domain * d,gfn_t gfn)1816 mfn_t gfn_to_mfn(struct domain *d, gfn_t gfn)
1817 {
1818 return p2m_lookup(d, gfn, NULL);
1819 }
1820
get_page_from_gva(struct vcpu * v,vaddr_t va,unsigned long flags)1821 struct page_info *get_page_from_gva(struct vcpu *v, vaddr_t va,
1822 unsigned long flags)
1823 {
1824 struct domain *d = v->domain;
1825 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1826 struct page_info *page = NULL;
1827 paddr_t maddr = 0;
1828 uint64_t par;
1829 mfn_t mfn;
1830 p2m_type_t t;
1831
1832 /*
1833 * XXX: To support a different vCPU, we would need to load the
1834 * VTTBR_EL2, TTBR0_EL1, TTBR1_EL1 and SCTLR_EL1
1835 */
1836 if ( v != current )
1837 return NULL;
1838
1839 /*
1840 * The lock is here to protect us against the break-before-make
1841 * sequence used when updating the entry.
1842 */
1843 p2m_read_lock(p2m);
1844 par = gvirt_to_maddr(va, &maddr, flags);
1845 p2m_read_unlock(p2m);
1846
1847 /*
1848 * gvirt_to_maddr may fail if the entry does not have the valid bit
1849 * set. Fallback to the second method:
1850 * 1) Translate the VA to IPA using software lookup -> Stage-1 page-table
1851 * may not be accessible because the stage-2 entries may have valid
1852 * bit unset.
1853 * 2) Software lookup of the MFN
1854 *
1855 * Note that when memaccess is enabled, we instead call directly
1856 * p2m_mem_access_check_and_get_page(...). Because the function is a
1857 * a variant of the methods described above, it will be able to
1858 * handle entries with valid bit unset.
1859 *
1860 * TODO: Integrate more nicely memaccess with the rest of the
1861 * function.
1862 * TODO: Use the fault error in PAR_EL1 to avoid pointless
1863 * translation.
1864 */
1865 if ( par )
1866 {
1867 paddr_t ipa;
1868 unsigned int s1_perms;
1869
1870 /*
1871 * When memaccess is enabled, the translation GVA to MADDR may
1872 * have failed because of a permission fault.
1873 */
1874 if ( p2m->mem_access_enabled )
1875 return p2m_mem_access_check_and_get_page(va, flags, v);
1876
1877 /*
1878 * The software stage-1 table walk can still fail, e.g, if the
1879 * GVA is not mapped.
1880 */
1881 if ( !guest_walk_tables(v, va, &ipa, &s1_perms) )
1882 {
1883 dprintk(XENLOG_G_DEBUG,
1884 "%pv: Failed to walk page-table va %#"PRIvaddr"\n", v, va);
1885 return NULL;
1886 }
1887
1888 mfn = p2m_lookup(d, gaddr_to_gfn(ipa), &t);
1889 if ( mfn_eq(INVALID_MFN, mfn) || !p2m_is_ram(t) )
1890 return NULL;
1891
1892 /*
1893 * Check permission that are assumed by the caller. For instance
1894 * in case of guestcopy, the caller assumes that the translated
1895 * page can be accessed with the requested permissions. If this
1896 * is not the case, we should fail.
1897 *
1898 * Please note that we do not check for the GV2M_EXEC
1899 * permission. This is fine because the hardware-based translation
1900 * instruction does not test for execute permissions.
1901 */
1902 if ( (flags & GV2M_WRITE) && !(s1_perms & GV2M_WRITE) )
1903 return NULL;
1904
1905 if ( (flags & GV2M_WRITE) && t != p2m_ram_rw )
1906 return NULL;
1907 }
1908 else
1909 mfn = maddr_to_mfn(maddr);
1910
1911 if ( !mfn_valid(mfn) )
1912 {
1913 dprintk(XENLOG_G_DEBUG, "%pv: Invalid MFN %#"PRI_mfn"\n",
1914 v, mfn_x(mfn));
1915 return NULL;
1916 }
1917
1918 page = mfn_to_page(mfn);
1919 ASSERT(page);
1920
1921 if ( unlikely(!get_page(page, d)) )
1922 {
1923 dprintk(XENLOG_G_DEBUG, "%pv: Failing to acquire the MFN %#"PRI_mfn"\n",
1924 v, mfn_x(maddr_to_mfn(maddr)));
1925 return NULL;
1926 }
1927
1928 return page;
1929 }
1930
p2m_restrict_ipa_bits(unsigned int ipa_bits)1931 void __init p2m_restrict_ipa_bits(unsigned int ipa_bits)
1932 {
1933 /*
1934 * Calculate the minimum of the maximum IPA bits that any external entity
1935 * can support.
1936 */
1937 if ( ipa_bits < p2m_ipa_bits )
1938 p2m_ipa_bits = ipa_bits;
1939 }
1940
1941 /* VTCR value to be configured by all CPUs. Set only once by the boot CPU */
1942 static uint32_t __read_mostly vtcr;
1943
setup_virt_paging_one(void * data)1944 static void setup_virt_paging_one(void *data)
1945 {
1946 WRITE_SYSREG32(vtcr, VTCR_EL2);
1947
1948 /*
1949 * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from
1950 * entries related to EL1/EL0 translation regime until a guest vCPU
1951 * is running. For that, we need to set-up VTTBR to point to an empty
1952 * page-table and turn on stage-2 translation. The TLB entries
1953 * associated with EL1/EL0 translation regime will also be flushed in case
1954 * an AT instruction was speculated before hand.
1955 */
1956 if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) )
1957 {
1958 WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), VTTBR_EL2);
1959 WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_VM, HCR_EL2);
1960 isb();
1961
1962 flush_all_guests_tlb_local();
1963 }
1964 }
1965
setup_virt_paging(void)1966 void __init setup_virt_paging(void)
1967 {
1968 /* Setup Stage 2 address translation */
1969 unsigned long val = VTCR_RES1|VTCR_SH0_IS|VTCR_ORGN0_WBWA|VTCR_IRGN0_WBWA;
1970
1971 #ifdef CONFIG_ARM_32
1972 if ( p2m_ipa_bits < 40 )
1973 panic("P2M: Not able to support %u-bit IPA at the moment\n",
1974 p2m_ipa_bits);
1975
1976 printk("P2M: 40-bit IPA\n");
1977 p2m_ipa_bits = 40;
1978 val |= VTCR_T0SZ(0x18); /* 40 bit IPA */
1979 val |= VTCR_SL0(0x1); /* P2M starts at first level */
1980 #else /* CONFIG_ARM_64 */
1981 const struct {
1982 unsigned int pabits; /* Physical Address Size */
1983 unsigned int t0sz; /* Desired T0SZ, minimum in comment */
1984 unsigned int root_order; /* Page order of the root of the p2m */
1985 unsigned int sl0; /* Desired SL0, maximum in comment */
1986 } pa_range_info[] = {
1987 /* T0SZ minimum and SL0 maximum from ARM DDI 0487A.b Table D4-5 */
1988 /* PA size, t0sz(min), root-order, sl0(max) */
1989 [0] = { 32, 32/*32*/, 0, 1 },
1990 [1] = { 36, 28/*28*/, 0, 1 },
1991 [2] = { 40, 24/*24*/, 1, 1 },
1992 [3] = { 42, 22/*22*/, 3, 1 },
1993 [4] = { 44, 20/*20*/, 0, 2 },
1994 [5] = { 48, 16/*16*/, 0, 2 },
1995 [6] = { 0 }, /* Invalid */
1996 [7] = { 0 } /* Invalid */
1997 };
1998
1999 unsigned int i, cpu;
2000 unsigned int pa_range = 0x10; /* Larger than any possible value */
2001 bool vmid_8_bit = false;
2002
2003 for_each_online_cpu ( cpu )
2004 {
2005 const struct cpuinfo_arm *info = &cpu_data[cpu];
2006
2007 /*
2008 * Restrict "p2m_ipa_bits" if needed. As P2M table is always configured
2009 * with IPA bits == PA bits, compare against "pabits".
2010 */
2011 if ( pa_range_info[info->mm64.pa_range].pabits < p2m_ipa_bits )
2012 p2m_ipa_bits = pa_range_info[info->mm64.pa_range].pabits;
2013
2014 /* Set a flag if the current cpu does not support 16 bit VMIDs. */
2015 if ( info->mm64.vmid_bits != MM64_VMID_16_BITS_SUPPORT )
2016 vmid_8_bit = true;
2017 }
2018
2019 /*
2020 * If the flag is not set then it means all CPUs support 16-bit
2021 * VMIDs.
2022 */
2023 if ( !vmid_8_bit )
2024 max_vmid = MAX_VMID_16_BIT;
2025
2026 /* Choose suitable "pa_range" according to the resulted "p2m_ipa_bits". */
2027 for ( i = 0; i < ARRAY_SIZE(pa_range_info); i++ )
2028 {
2029 if ( p2m_ipa_bits == pa_range_info[i].pabits )
2030 {
2031 pa_range = i;
2032 break;
2033 }
2034 }
2035
2036 /* pa_range is 4 bits, but the defined encodings are only 3 bits */
2037 if ( pa_range >= ARRAY_SIZE(pa_range_info) || !pa_range_info[pa_range].pabits )
2038 panic("Unknown encoding of ID_AA64MMFR0_EL1.PARange %x\n", pa_range);
2039
2040 val |= VTCR_PS(pa_range);
2041 val |= VTCR_TG0_4K;
2042
2043 /* Set the VS bit only if 16 bit VMID is supported. */
2044 if ( MAX_VMID == MAX_VMID_16_BIT )
2045 val |= VTCR_VS;
2046 val |= VTCR_SL0(pa_range_info[pa_range].sl0);
2047 val |= VTCR_T0SZ(pa_range_info[pa_range].t0sz);
2048
2049 p2m_root_order = pa_range_info[pa_range].root_order;
2050 p2m_root_level = 2 - pa_range_info[pa_range].sl0;
2051 p2m_ipa_bits = 64 - pa_range_info[pa_range].t0sz;
2052
2053 printk("P2M: %d-bit IPA with %d-bit PA and %d-bit VMID\n",
2054 p2m_ipa_bits,
2055 pa_range_info[pa_range].pabits,
2056 ( MAX_VMID == MAX_VMID_16_BIT ) ? 16 : 8);
2057 #endif
2058 printk("P2M: %d levels with order-%d root, VTCR 0x%lx\n",
2059 4 - P2M_ROOT_LEVEL, P2M_ROOT_ORDER, val);
2060
2061 p2m_vmid_allocator_init();
2062
2063 /* It is not allowed to concatenate a level zero root */
2064 BUG_ON( P2M_ROOT_LEVEL == 0 && P2M_ROOT_ORDER > 0 );
2065 vtcr = val;
2066
2067 /*
2068 * ARM64_WORKAROUND_AT_SPECULATE requires to allocate root table
2069 * with all entries zeroed.
2070 */
2071 if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) )
2072 {
2073 struct page_info *root;
2074
2075 root = p2m_allocate_root();
2076 if ( !root )
2077 panic("Unable to allocate root table for ARM64_WORKAROUND_AT_SPECULATE\n");
2078
2079 empty_root_mfn = page_to_mfn(root);
2080 }
2081
2082 setup_virt_paging_one(NULL);
2083 smp_call_function(setup_virt_paging_one, NULL, 1);
2084 }
2085
cpu_virt_paging_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)2086 static int cpu_virt_paging_callback(struct notifier_block *nfb,
2087 unsigned long action,
2088 void *hcpu)
2089 {
2090 switch ( action )
2091 {
2092 case CPU_STARTING:
2093 ASSERT(system_state != SYS_STATE_boot);
2094 setup_virt_paging_one(NULL);
2095 break;
2096 default:
2097 break;
2098 }
2099
2100 return NOTIFY_DONE;
2101 }
2102
2103 static struct notifier_block cpu_virt_paging_nfb = {
2104 .notifier_call = cpu_virt_paging_callback,
2105 };
2106
cpu_virt_paging_init(void)2107 static int __init cpu_virt_paging_init(void)
2108 {
2109 register_cpu_notifier(&cpu_virt_paging_nfb);
2110
2111 return 0;
2112 }
2113 /*
2114 * Initialization of the notifier has to be done at init rather than presmp_init
2115 * phase because: the registered notifier is used to setup virtual paging for
2116 * non-boot CPUs after the initial virtual paging for all CPUs is already setup,
2117 * i.e. when a non-boot CPU is hotplugged after the system has booted. In other
2118 * words, the notifier should be registered after the virtual paging is
2119 * initially setup (setup_virt_paging() is called from start_xen()). This is
2120 * required because vtcr config value has to be set before a notifier can fire.
2121 */
2122 __initcall(cpu_virt_paging_init);
2123
2124 /*
2125 * Local variables:
2126 * mode: C
2127 * c-file-style: "BSD"
2128 * c-basic-offset: 4
2129 * indent-tabs-mode: nil
2130 * End:
2131 */
2132