1 #include <xen/cpu.h>
2 #include <xen/domain_page.h>
3 #include <xen/iocap.h>
4 #include <xen/lib.h>
5 #include <xen/sched.h>
6 #include <xen/softirq.h>
7 
8 #include <asm/alternative.h>
9 #include <asm/event.h>
10 #include <asm/flushtlb.h>
11 #include <asm/guest_walk.h>
12 #include <asm/page.h>
13 
14 #define MAX_VMID_8_BIT  (1UL << 8)
15 #define MAX_VMID_16_BIT (1UL << 16)
16 
17 #define INVALID_VMID 0 /* VMID 0 is reserved */
18 
19 #ifdef CONFIG_ARM_64
20 unsigned int __read_mostly p2m_root_order;
21 unsigned int __read_mostly p2m_root_level;
22 static unsigned int __read_mostly max_vmid = MAX_VMID_8_BIT;
23 /* VMID is by default 8 bit width on AArch64 */
24 #define MAX_VMID       max_vmid
25 #else
26 /* VMID is always 8 bit width on AArch32 */
27 #define MAX_VMID        MAX_VMID_8_BIT
28 #endif
29 
30 #define P2M_ROOT_PAGES    (1<<P2M_ROOT_ORDER)
31 
32 /*
33  * Set larger than any possible value, so the number of IPA bits can be
34  * restricted by external entity (e.g. IOMMU).
35  */
36 unsigned int __read_mostly p2m_ipa_bits = 64;
37 
38 /* Helpers to lookup the properties of each level */
39 static const paddr_t level_masks[] =
40     { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK };
41 static const uint8_t level_orders[] =
42     { ZEROETH_ORDER, FIRST_ORDER, SECOND_ORDER, THIRD_ORDER };
43 
44 static mfn_t __read_mostly empty_root_mfn;
45 
generate_vttbr(uint16_t vmid,mfn_t root_mfn)46 static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
47 {
48     return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
49 }
50 
51 /* Unlock the flush and do a P2M TLB flush if necessary */
p2m_write_unlock(struct p2m_domain * p2m)52 void p2m_write_unlock(struct p2m_domain *p2m)
53 {
54     /*
55      * The final flush is done with the P2M write lock taken to avoid
56      * someone else modifying the P2M wbefore the TLB invalidation has
57      * completed.
58      */
59     p2m_tlb_flush_sync(p2m);
60 
61     write_unlock(&p2m->lock);
62 }
63 
p2m_dump_info(struct domain * d)64 void p2m_dump_info(struct domain *d)
65 {
66     struct p2m_domain *p2m = p2m_get_hostp2m(d);
67 
68     p2m_read_lock(p2m);
69     printk("p2m mappings for domain %d (vmid %d):\n",
70            d->domain_id, p2m->vmid);
71     BUG_ON(p2m->stats.mappings[0] || p2m->stats.shattered[0]);
72     printk("  1G mappings: %ld (shattered %ld)\n",
73            p2m->stats.mappings[1], p2m->stats.shattered[1]);
74     printk("  2M mappings: %ld (shattered %ld)\n",
75            p2m->stats.mappings[2], p2m->stats.shattered[2]);
76     printk("  4K mappings: %ld\n", p2m->stats.mappings[3]);
77     p2m_read_unlock(p2m);
78 }
79 
memory_type_changed(struct domain * d)80 void memory_type_changed(struct domain *d)
81 {
82 }
83 
dump_p2m_lookup(struct domain * d,paddr_t addr)84 void dump_p2m_lookup(struct domain *d, paddr_t addr)
85 {
86     struct p2m_domain *p2m = p2m_get_hostp2m(d);
87 
88     printk("dom%d IPA 0x%"PRIpaddr"\n", d->domain_id, addr);
89 
90     printk("P2M @ %p mfn:%#"PRI_mfn"\n",
91            p2m->root, mfn_x(page_to_mfn(p2m->root)));
92 
93     dump_pt_walk(page_to_maddr(p2m->root), addr,
94                  P2M_ROOT_LEVEL, P2M_ROOT_PAGES);
95 }
96 
97 /*
98  * p2m_save_state and p2m_restore_state work in pair to workaround
99  * ARM64_WORKAROUND_AT_SPECULATE. p2m_save_state will set-up VTTBR to
100  * point to the empty page-tables to stop allocating TLB entries.
101  */
p2m_save_state(struct vcpu * p)102 void p2m_save_state(struct vcpu *p)
103 {
104     p->arch.sctlr = READ_SYSREG(SCTLR_EL1);
105 
106     if ( cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
107     {
108         WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), VTTBR_EL2);
109         /*
110          * Ensure VTTBR_EL2 is correctly synchronized so we can restore
111          * the next vCPU context without worrying about AT instruction
112          * speculation.
113          */
114         isb();
115     }
116 }
117 
p2m_restore_state(struct vcpu * n)118 void p2m_restore_state(struct vcpu *n)
119 {
120     struct p2m_domain *p2m = p2m_get_hostp2m(n->domain);
121     uint8_t *last_vcpu_ran;
122 
123     if ( is_idle_vcpu(n) )
124         return;
125 
126     WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1);
127     WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2);
128 
129     /*
130      * ARM64_WORKAROUND_AT_SPECULATE: VTTBR_EL2 should be restored after all
131      * registers associated to EL1/EL0 translations regime have been
132      * synchronized.
133      */
134     asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE));
135     WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
136 
137     last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()];
138 
139     /*
140      * While we are restoring an out-of-context translation regime
141      * we still need to ensure:
142      *  - VTTBR_EL2 is synchronized before flushing the TLBs
143      *  - All registers for EL1 are synchronized before executing an AT
144      *  instructions targeting S1/S2.
145      */
146     isb();
147 
148     /*
149      * Flush local TLB for the domain to prevent wrong TLB translation
150      * when running multiple vCPU of the same domain on a single pCPU.
151      */
152     if ( *last_vcpu_ran != INVALID_VCPU_ID && *last_vcpu_ran != n->vcpu_id )
153         flush_guest_tlb_local();
154 
155     *last_vcpu_ran = n->vcpu_id;
156 }
157 
158 /*
159  * Force a synchronous P2M TLB flush.
160  *
161  * Must be called with the p2m lock held.
162  */
p2m_force_tlb_flush_sync(struct p2m_domain * p2m)163 static void p2m_force_tlb_flush_sync(struct p2m_domain *p2m)
164 {
165     unsigned long flags = 0;
166     uint64_t ovttbr;
167 
168     ASSERT(p2m_is_write_locked(p2m));
169 
170     /*
171      * ARM only provides an instruction to flush TLBs for the current
172      * VMID. So switch to the VTTBR of a given P2M if different.
173      */
174     ovttbr = READ_SYSREG64(VTTBR_EL2);
175     if ( ovttbr != p2m->vttbr )
176     {
177         uint64_t vttbr;
178 
179         local_irq_save(flags);
180 
181         /*
182          * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate
183          * TLBs entries because the context is partially modified. We
184          * only need the VMID for flushing the TLBs, so we can generate
185          * a new VTTBR with the VMID to flush and the empty root table.
186          */
187         if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) )
188             vttbr = p2m->vttbr;
189         else
190             vttbr = generate_vttbr(p2m->vmid, empty_root_mfn);
191 
192         WRITE_SYSREG64(vttbr, VTTBR_EL2);
193 
194         /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */
195         isb();
196     }
197 
198     flush_guest_tlb();
199 
200     if ( ovttbr != READ_SYSREG64(VTTBR_EL2) )
201     {
202         WRITE_SYSREG64(ovttbr, VTTBR_EL2);
203         /* Ensure VTTBR_EL2 is back in place before continuing. */
204         isb();
205         local_irq_restore(flags);
206     }
207 
208     p2m->need_flush = false;
209 }
210 
p2m_tlb_flush_sync(struct p2m_domain * p2m)211 void p2m_tlb_flush_sync(struct p2m_domain *p2m)
212 {
213     if ( p2m->need_flush )
214         p2m_force_tlb_flush_sync(p2m);
215 }
216 
217 /*
218  * Find and map the root page table. The caller is responsible for
219  * unmapping the table.
220  *
221  * The function will return NULL if the offset of the root table is
222  * invalid.
223  */
p2m_get_root_pointer(struct p2m_domain * p2m,gfn_t gfn)224 static lpae_t *p2m_get_root_pointer(struct p2m_domain *p2m,
225                                     gfn_t gfn)
226 {
227     unsigned long root_table;
228 
229     /*
230      * While the root table index is the offset from the previous level,
231      * we can't use (P2M_ROOT_LEVEL - 1) because the root level might be
232      * 0. Yet we still want to check if all the unused bits are zeroed.
233      */
234     root_table = gfn_x(gfn) >> (level_orders[P2M_ROOT_LEVEL] + LPAE_SHIFT);
235     if ( root_table >= P2M_ROOT_PAGES )
236         return NULL;
237 
238     return __map_domain_page(p2m->root + root_table);
239 }
240 
241 /*
242  * Lookup the MFN corresponding to a domain's GFN.
243  * Lookup mem access in the ratrix tree.
244  * The entries associated to the GFN is considered valid.
245  */
p2m_mem_access_radix_get(struct p2m_domain * p2m,gfn_t gfn)246 static p2m_access_t p2m_mem_access_radix_get(struct p2m_domain *p2m, gfn_t gfn)
247 {
248     void *ptr;
249 
250     if ( !p2m->mem_access_enabled )
251         return p2m->default_access;
252 
253     ptr = radix_tree_lookup(&p2m->mem_access_settings, gfn_x(gfn));
254     if ( !ptr )
255         return p2m_access_rwx;
256     else
257         return radix_tree_ptr_to_int(ptr);
258 }
259 
260 /*
261  * In the case of the P2M, the valid bit is used for other purpose. Use
262  * the type to check whether an entry is valid.
263  */
p2m_is_valid(lpae_t pte)264 static inline bool p2m_is_valid(lpae_t pte)
265 {
266     return pte.p2m.type != p2m_invalid;
267 }
268 
269 /*
270  * lpae_is_* helpers don't check whether the valid bit is set in the
271  * PTE. Provide our own overlay to check the valid bit.
272  */
p2m_is_mapping(lpae_t pte,unsigned int level)273 static inline bool p2m_is_mapping(lpae_t pte, unsigned int level)
274 {
275     return p2m_is_valid(pte) && lpae_is_mapping(pte, level);
276 }
277 
p2m_is_superpage(lpae_t pte,unsigned int level)278 static inline bool p2m_is_superpage(lpae_t pte, unsigned int level)
279 {
280     return p2m_is_valid(pte) && lpae_is_superpage(pte, level);
281 }
282 
283 #define GUEST_TABLE_MAP_FAILED 0
284 #define GUEST_TABLE_SUPER_PAGE 1
285 #define GUEST_TABLE_NORMAL_PAGE 2
286 
287 static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry);
288 
289 /*
290  * Take the currently mapped table, find the corresponding GFN entry,
291  * and map the next table, if available. The previous table will be
292  * unmapped if the next level was mapped (e.g GUEST_TABLE_NORMAL_PAGE
293  * returned).
294  *
295  * The read_only parameters indicates whether intermediate tables should
296  * be allocated when not present.
297  *
298  * Return values:
299  *  GUEST_TABLE_MAP_FAILED: Either read_only was set and the entry
300  *  was empty, or allocating a new page failed.
301  *  GUEST_TABLE_NORMAL_PAGE: next level mapped normally
302  *  GUEST_TABLE_SUPER_PAGE: The next entry points to a superpage.
303  */
p2m_next_level(struct p2m_domain * p2m,bool read_only,unsigned int level,lpae_t ** table,unsigned int offset)304 static int p2m_next_level(struct p2m_domain *p2m, bool read_only,
305                           unsigned int level, lpae_t **table,
306                           unsigned int offset)
307 {
308     lpae_t *entry;
309     int ret;
310     mfn_t mfn;
311 
312     entry = *table + offset;
313 
314     if ( !p2m_is_valid(*entry) )
315     {
316         if ( read_only )
317             return GUEST_TABLE_MAP_FAILED;
318 
319         ret = p2m_create_table(p2m, entry);
320         if ( ret )
321             return GUEST_TABLE_MAP_FAILED;
322     }
323 
324     /* The function p2m_next_level is never called at the 3rd level */
325     ASSERT(level < 3);
326     if ( p2m_is_mapping(*entry, level) )
327         return GUEST_TABLE_SUPER_PAGE;
328 
329     mfn = lpae_get_mfn(*entry);
330 
331     unmap_domain_page(*table);
332     *table = map_domain_page(mfn);
333 
334     return GUEST_TABLE_NORMAL_PAGE;
335 }
336 
337 /*
338  * Get the details of a given gfn.
339  *
340  * If the entry is present, the associated MFN will be returned and the
341  * access and type filled up. The page_order will correspond to the
342  * order of the mapping in the page table (i.e it could be a superpage).
343  *
344  * If the entry is not present, INVALID_MFN will be returned and the
345  * page_order will be set according to the order of the invalid range.
346  *
347  * valid will contain the value of bit[0] (e.g valid bit) of the
348  * entry.
349  */
p2m_get_entry(struct p2m_domain * p2m,gfn_t gfn,p2m_type_t * t,p2m_access_t * a,unsigned int * page_order,bool * valid)350 mfn_t p2m_get_entry(struct p2m_domain *p2m, gfn_t gfn,
351                     p2m_type_t *t, p2m_access_t *a,
352                     unsigned int *page_order,
353                     bool *valid)
354 {
355     paddr_t addr = gfn_to_gaddr(gfn);
356     unsigned int level = 0;
357     lpae_t entry, *table;
358     int rc;
359     mfn_t mfn = INVALID_MFN;
360     p2m_type_t _t;
361     DECLARE_OFFSETS(offsets, addr);
362 
363     ASSERT(p2m_is_locked(p2m));
364     BUILD_BUG_ON(THIRD_MASK != PAGE_MASK);
365 
366     /* Allow t to be NULL */
367     t = t ?: &_t;
368 
369     *t = p2m_invalid;
370 
371     if ( valid )
372         *valid = false;
373 
374     /* XXX: Check if the mapping is lower than the mapped gfn */
375 
376     /* This gfn is higher than the highest the p2m map currently holds */
377     if ( gfn_x(gfn) > gfn_x(p2m->max_mapped_gfn) )
378     {
379         for ( level = P2M_ROOT_LEVEL; level < 3; level++ )
380             if ( (gfn_x(gfn) & (level_masks[level] >> PAGE_SHIFT)) >
381                  gfn_x(p2m->max_mapped_gfn) )
382                 break;
383 
384         goto out;
385     }
386 
387     table = p2m_get_root_pointer(p2m, gfn);
388 
389     /*
390      * the table should always be non-NULL because the gfn is below
391      * p2m->max_mapped_gfn and the root table pages are always present.
392      */
393     if ( !table )
394     {
395         ASSERT_UNREACHABLE();
396         level = P2M_ROOT_LEVEL;
397         goto out;
398     }
399 
400     for ( level = P2M_ROOT_LEVEL; level < 3; level++ )
401     {
402         rc = p2m_next_level(p2m, true, level, &table, offsets[level]);
403         if ( rc == GUEST_TABLE_MAP_FAILED )
404             goto out_unmap;
405         else if ( rc != GUEST_TABLE_NORMAL_PAGE )
406             break;
407     }
408 
409     entry = table[offsets[level]];
410 
411     if ( p2m_is_valid(entry) )
412     {
413         *t = entry.p2m.type;
414 
415         if ( a )
416             *a = p2m_mem_access_radix_get(p2m, gfn);
417 
418         mfn = lpae_get_mfn(entry);
419         /*
420          * The entry may point to a superpage. Find the MFN associated
421          * to the GFN.
422          */
423         mfn = mfn_add(mfn, gfn_x(gfn) & ((1UL << level_orders[level]) - 1));
424 
425         if ( valid )
426             *valid = lpae_is_valid(entry);
427     }
428 
429 out_unmap:
430     unmap_domain_page(table);
431 
432 out:
433     if ( page_order )
434         *page_order = level_orders[level];
435 
436     return mfn;
437 }
438 
p2m_lookup(struct domain * d,gfn_t gfn,p2m_type_t * t)439 mfn_t p2m_lookup(struct domain *d, gfn_t gfn, p2m_type_t *t)
440 {
441     mfn_t mfn;
442     struct p2m_domain *p2m = p2m_get_hostp2m(d);
443 
444     p2m_read_lock(p2m);
445     mfn = p2m_get_entry(p2m, gfn, t, NULL, NULL, NULL);
446     p2m_read_unlock(p2m);
447 
448     return mfn;
449 }
450 
p2m_get_page_from_gfn(struct domain * d,gfn_t gfn,p2m_type_t * t)451 struct page_info *p2m_get_page_from_gfn(struct domain *d, gfn_t gfn,
452                                         p2m_type_t *t)
453 {
454     struct page_info *page;
455     p2m_type_t p2mt;
456     mfn_t mfn = p2m_lookup(d, gfn, &p2mt);
457 
458     if ( t )
459         *t = p2mt;
460 
461     if ( !p2m_is_any_ram(p2mt) )
462         return NULL;
463 
464     if ( !mfn_valid(mfn) )
465         return NULL;
466 
467     page = mfn_to_page(mfn);
468 
469     /*
470      * get_page won't work on foreign mapping because the page doesn't
471      * belong to the current domain.
472      */
473     if ( p2m_is_foreign(p2mt) )
474     {
475         struct domain *fdom = page_get_owner_and_reference(page);
476         ASSERT(fdom != NULL);
477         ASSERT(fdom != d);
478         return page;
479     }
480 
481     return get_page(page, d) ? page : NULL;
482 }
483 
guest_physmap_mark_populate_on_demand(struct domain * d,unsigned long gfn,unsigned int order)484 int guest_physmap_mark_populate_on_demand(struct domain *d,
485                                           unsigned long gfn,
486                                           unsigned int order)
487 {
488     return -ENOSYS;
489 }
490 
p2m_pod_decrease_reservation(struct domain * d,gfn_t gfn,unsigned int order)491 unsigned long p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn,
492                                            unsigned int order)
493 {
494     return 0;
495 }
496 
p2m_set_permission(lpae_t * e,p2m_type_t t,p2m_access_t a)497 static void p2m_set_permission(lpae_t *e, p2m_type_t t, p2m_access_t a)
498 {
499     /* First apply type permissions */
500     switch ( t )
501     {
502     case p2m_ram_rw:
503         e->p2m.xn = 0;
504         e->p2m.write = 1;
505         break;
506 
507     case p2m_ram_ro:
508         e->p2m.xn = 0;
509         e->p2m.write = 0;
510         break;
511 
512     case p2m_iommu_map_rw:
513     case p2m_map_foreign_rw:
514     case p2m_grant_map_rw:
515     case p2m_mmio_direct_dev:
516     case p2m_mmio_direct_nc:
517     case p2m_mmio_direct_c:
518         e->p2m.xn = 1;
519         e->p2m.write = 1;
520         break;
521 
522     case p2m_iommu_map_ro:
523     case p2m_map_foreign_ro:
524     case p2m_grant_map_ro:
525     case p2m_invalid:
526         e->p2m.xn = 1;
527         e->p2m.write = 0;
528         break;
529 
530     case p2m_max_real_type:
531         BUG();
532         break;
533     }
534 
535     /* Then restrict with access permissions */
536     switch ( a )
537     {
538     case p2m_access_rwx:
539         break;
540     case p2m_access_wx:
541         e->p2m.read = 0;
542         break;
543     case p2m_access_rw:
544         e->p2m.xn = 1;
545         break;
546     case p2m_access_w:
547         e->p2m.read = 0;
548         e->p2m.xn = 1;
549         break;
550     case p2m_access_rx:
551     case p2m_access_rx2rw:
552         e->p2m.write = 0;
553         break;
554     case p2m_access_x:
555         e->p2m.write = 0;
556         e->p2m.read = 0;
557         break;
558     case p2m_access_r:
559         e->p2m.write = 0;
560         e->p2m.xn = 1;
561         break;
562     case p2m_access_n:
563     case p2m_access_n2rwx:
564         e->p2m.read = e->p2m.write = 0;
565         e->p2m.xn = 1;
566         break;
567     }
568 }
569 
mfn_to_p2m_entry(mfn_t mfn,p2m_type_t t,p2m_access_t a)570 static lpae_t mfn_to_p2m_entry(mfn_t mfn, p2m_type_t t, p2m_access_t a)
571 {
572     /*
573      * sh, xn and write bit will be defined in the following switches
574      * based on mattr and t.
575      */
576     lpae_t e = (lpae_t) {
577         .p2m.af = 1,
578         .p2m.read = 1,
579         .p2m.table = 1,
580         .p2m.valid = 1,
581         .p2m.type = t,
582     };
583 
584     BUILD_BUG_ON(p2m_max_real_type > (1 << 4));
585 
586     switch ( t )
587     {
588     case p2m_mmio_direct_dev:
589         e.p2m.mattr = MATTR_DEV;
590         e.p2m.sh = LPAE_SH_OUTER;
591         break;
592 
593     case p2m_mmio_direct_c:
594         e.p2m.mattr = MATTR_MEM;
595         e.p2m.sh = LPAE_SH_OUTER;
596         break;
597 
598     /*
599      * ARM ARM: Overlaying the shareability attribute (DDI
600      * 0406C.b B3-1376 to 1377)
601      *
602      * A memory region with a resultant memory type attribute of Normal,
603      * and a resultant cacheability attribute of Inner Non-cacheable,
604      * Outer Non-cacheable, must have a resultant shareability attribute
605      * of Outer Shareable, otherwise shareability is UNPREDICTABLE.
606      *
607      * On ARMv8 shareability is ignored and explicitly treated as Outer
608      * Shareable for Normal Inner Non_cacheable, Outer Non-cacheable.
609      * See the note for table D4-40, in page 1788 of the ARM DDI 0487A.j.
610      */
611     case p2m_mmio_direct_nc:
612         e.p2m.mattr = MATTR_MEM_NC;
613         e.p2m.sh = LPAE_SH_OUTER;
614         break;
615 
616     default:
617         e.p2m.mattr = MATTR_MEM;
618         e.p2m.sh = LPAE_SH_INNER;
619     }
620 
621     p2m_set_permission(&e, t, a);
622 
623     ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
624 
625     lpae_set_mfn(e, mfn);
626 
627     return e;
628 }
629 
630 /* Generate table entry with correct attributes. */
page_to_p2m_table(struct page_info * page)631 static lpae_t page_to_p2m_table(struct page_info *page)
632 {
633     /*
634      * The access value does not matter because the hardware will ignore
635      * the permission fields for table entry.
636      *
637      * We use p2m_ram_rw so the entry has a valid type. This is important
638      * for p2m_is_valid() to return valid on table entries.
639      */
640     return mfn_to_p2m_entry(page_to_mfn(page), p2m_ram_rw, p2m_access_rwx);
641 }
642 
p2m_write_pte(lpae_t * p,lpae_t pte,bool clean_pte)643 static inline void p2m_write_pte(lpae_t *p, lpae_t pte, bool clean_pte)
644 {
645     write_pte(p, pte);
646     if ( clean_pte )
647         clean_dcache(*p);
648 }
649 
p2m_remove_pte(lpae_t * p,bool clean_pte)650 static inline void p2m_remove_pte(lpae_t *p, bool clean_pte)
651 {
652     lpae_t pte;
653 
654     memset(&pte, 0x00, sizeof(pte));
655     p2m_write_pte(p, pte, clean_pte);
656 }
657 
658 /* Allocate a new page table page and hook it in via the given entry. */
p2m_create_table(struct p2m_domain * p2m,lpae_t * entry)659 static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry)
660 {
661     struct page_info *page;
662     lpae_t *p;
663 
664     ASSERT(!p2m_is_valid(*entry));
665 
666     page = alloc_domheap_page(NULL, 0);
667     if ( page == NULL )
668         return -ENOMEM;
669 
670     page_list_add(page, &p2m->pages);
671 
672     p = __map_domain_page(page);
673     clear_page(p);
674 
675     if ( p2m->clean_pte )
676         clean_dcache_va_range(p, PAGE_SIZE);
677 
678     unmap_domain_page(p);
679 
680     p2m_write_pte(entry, page_to_p2m_table(page), p2m->clean_pte);
681 
682     return 0;
683 }
684 
p2m_mem_access_radix_set(struct p2m_domain * p2m,gfn_t gfn,p2m_access_t a)685 static int p2m_mem_access_radix_set(struct p2m_domain *p2m, gfn_t gfn,
686                                     p2m_access_t a)
687 {
688     int rc;
689 
690     if ( !p2m->mem_access_enabled )
691         return 0;
692 
693     if ( p2m_access_rwx == a )
694     {
695         radix_tree_delete(&p2m->mem_access_settings, gfn_x(gfn));
696         return 0;
697     }
698 
699     rc = radix_tree_insert(&p2m->mem_access_settings, gfn_x(gfn),
700                            radix_tree_int_to_ptr(a));
701     if ( rc == -EEXIST )
702     {
703         /* If a setting already exists, change it to the new one */
704         radix_tree_replace_slot(
705             radix_tree_lookup_slot(
706                 &p2m->mem_access_settings, gfn_x(gfn)),
707             radix_tree_int_to_ptr(a));
708         rc = 0;
709     }
710 
711     return rc;
712 }
713 
714 /*
715  * Put any references on the single 4K page referenced by pte.
716  * TODO: Handle superpages, for now we only take special references for leaf
717  * pages (specifically foreign ones, which can't be super mapped today).
718  */
p2m_put_l3_page(const lpae_t pte)719 static void p2m_put_l3_page(const lpae_t pte)
720 {
721     ASSERT(p2m_is_valid(pte));
722 
723     /*
724      * TODO: Handle other p2m types
725      *
726      * It's safe to do the put_page here because page_alloc will
727      * flush the TLBs if the page is reallocated before the end of
728      * this loop.
729      */
730     if ( p2m_is_foreign(pte.p2m.type) )
731     {
732         mfn_t mfn = lpae_get_mfn(pte);
733 
734         ASSERT(mfn_valid(mfn));
735         put_page(mfn_to_page(mfn));
736     }
737 }
738 
739 /* Free lpae sub-tree behind an entry */
p2m_free_entry(struct p2m_domain * p2m,lpae_t entry,unsigned int level)740 static void p2m_free_entry(struct p2m_domain *p2m,
741                            lpae_t entry, unsigned int level)
742 {
743     unsigned int i;
744     lpae_t *table;
745     mfn_t mfn;
746     struct page_info *pg;
747 
748     /* Nothing to do if the entry is invalid. */
749     if ( !p2m_is_valid(entry) )
750         return;
751 
752     /* Nothing to do but updating the stats if the entry is a super-page. */
753     if ( p2m_is_superpage(entry, level) )
754     {
755         p2m->stats.mappings[level]--;
756         return;
757     }
758 
759     if ( level == 3 )
760     {
761         p2m->stats.mappings[level]--;
762         p2m_put_l3_page(entry);
763         return;
764     }
765 
766     table = map_domain_page(lpae_get_mfn(entry));
767     for ( i = 0; i < LPAE_ENTRIES; i++ )
768         p2m_free_entry(p2m, *(table + i), level + 1);
769 
770     unmap_domain_page(table);
771 
772     /*
773      * Make sure all the references in the TLB have been removed before
774      * freing the intermediate page table.
775      * XXX: Should we defer the free of the page table to avoid the
776      * flush?
777      */
778     p2m_tlb_flush_sync(p2m);
779 
780     mfn = lpae_get_mfn(entry);
781     ASSERT(mfn_valid(mfn));
782 
783     pg = mfn_to_page(mfn);
784 
785     page_list_del(pg, &p2m->pages);
786     free_domheap_page(pg);
787 }
788 
p2m_split_superpage(struct p2m_domain * p2m,lpae_t * entry,unsigned int level,unsigned int target,const unsigned int * offsets)789 static bool p2m_split_superpage(struct p2m_domain *p2m, lpae_t *entry,
790                                 unsigned int level, unsigned int target,
791                                 const unsigned int *offsets)
792 {
793     struct page_info *page;
794     unsigned int i;
795     lpae_t pte, *table;
796     bool rv = true;
797 
798     /* Convenience aliases */
799     mfn_t mfn = lpae_get_mfn(*entry);
800     unsigned int next_level = level + 1;
801     unsigned int level_order = level_orders[next_level];
802 
803     /*
804      * This should only be called with target != level and the entry is
805      * a superpage.
806      */
807     ASSERT(level < target);
808     ASSERT(p2m_is_superpage(*entry, level));
809 
810     page = alloc_domheap_page(NULL, 0);
811     if ( !page )
812         return false;
813 
814     page_list_add(page, &p2m->pages);
815     table = __map_domain_page(page);
816 
817     /*
818      * We are either splitting a first level 1G page into 512 second level
819      * 2M pages, or a second level 2M page into 512 third level 4K pages.
820      */
821     for ( i = 0; i < LPAE_ENTRIES; i++ )
822     {
823         lpae_t *new_entry = table + i;
824 
825         /*
826          * Use the content of the superpage entry and override
827          * the necessary fields. So the correct permission are kept.
828          */
829         pte = *entry;
830         lpae_set_mfn(pte, mfn_add(mfn, i << level_order));
831 
832         /*
833          * First and second level pages set p2m.table = 0, but third
834          * level entries set p2m.table = 1.
835          */
836         pte.p2m.table = (next_level == 3);
837 
838         write_pte(new_entry, pte);
839     }
840 
841     /* Update stats */
842     p2m->stats.shattered[level]++;
843     p2m->stats.mappings[level]--;
844     p2m->stats.mappings[next_level] += LPAE_ENTRIES;
845 
846     /*
847      * Shatter superpage in the page to the level we want to make the
848      * changes.
849      * This is done outside the loop to avoid checking the offset to
850      * know whether the entry should be shattered for every entry.
851      */
852     if ( next_level != target )
853         rv = p2m_split_superpage(p2m, table + offsets[next_level],
854                                  level + 1, target, offsets);
855 
856     if ( p2m->clean_pte )
857         clean_dcache_va_range(table, PAGE_SIZE);
858 
859     unmap_domain_page(table);
860 
861     /*
862      * Even if we failed, we should install the newly allocated LPAE
863      * entry. The caller will be in charge to free the sub-tree.
864      */
865     p2m_write_pte(entry, page_to_p2m_table(page), p2m->clean_pte);
866 
867     return rv;
868 }
869 
870 /*
871  * Insert an entry in the p2m. This should be called with a mapping
872  * equal to a page/superpage (4K, 2M, 1G).
873  */
__p2m_set_entry(struct p2m_domain * p2m,gfn_t sgfn,unsigned int page_order,mfn_t smfn,p2m_type_t t,p2m_access_t a)874 static int __p2m_set_entry(struct p2m_domain *p2m,
875                            gfn_t sgfn,
876                            unsigned int page_order,
877                            mfn_t smfn,
878                            p2m_type_t t,
879                            p2m_access_t a)
880 {
881     unsigned int level = 0;
882     unsigned int target = 3 - (page_order / LPAE_SHIFT);
883     lpae_t *entry, *table, orig_pte;
884     int rc;
885     /* A mapping is removed if the MFN is invalid. */
886     bool removing_mapping = mfn_eq(smfn, INVALID_MFN);
887     DECLARE_OFFSETS(offsets, gfn_to_gaddr(sgfn));
888 
889     ASSERT(p2m_is_write_locked(p2m));
890 
891     /*
892      * Check if the level target is valid: we only support
893      * 4K - 2M - 1G mapping.
894      */
895     ASSERT(target > 0 && target <= 3);
896 
897     table = p2m_get_root_pointer(p2m, sgfn);
898     if ( !table )
899         return -EINVAL;
900 
901     for ( level = P2M_ROOT_LEVEL; level < target; level++ )
902     {
903         /*
904          * Don't try to allocate intermediate page table if the mapping
905          * is about to be removed.
906          */
907         rc = p2m_next_level(p2m, removing_mapping,
908                             level, &table, offsets[level]);
909         if ( rc == GUEST_TABLE_MAP_FAILED )
910         {
911             /*
912              * We are here because p2m_next_level has failed to map
913              * the intermediate page table (e.g the table does not exist
914              * and they p2m tree is read-only). It is a valid case
915              * when removing a mapping as it may not exist in the
916              * page table. In this case, just ignore it.
917              */
918             rc = removing_mapping ?  0 : -ENOENT;
919             goto out;
920         }
921         else if ( rc != GUEST_TABLE_NORMAL_PAGE )
922             break;
923     }
924 
925     entry = table + offsets[level];
926 
927     /*
928      * If we are here with level < target, we must be at a leaf node,
929      * and we need to break up the superpage.
930      */
931     if ( level < target )
932     {
933         /* We need to split the original page. */
934         lpae_t split_pte = *entry;
935 
936         ASSERT(p2m_is_superpage(*entry, level));
937 
938         if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets) )
939         {
940             /*
941              * The current super-page is still in-place, so re-increment
942              * the stats.
943              */
944             p2m->stats.mappings[level]++;
945 
946             /* Free the allocated sub-tree */
947             p2m_free_entry(p2m, split_pte, level);
948 
949             rc = -ENOMEM;
950             goto out;
951         }
952 
953         /*
954          * Follow the break-before-sequence to update the entry.
955          * For more details see (D4.7.1 in ARM DDI 0487A.j).
956          */
957         p2m_remove_pte(entry, p2m->clean_pte);
958         p2m_force_tlb_flush_sync(p2m);
959 
960         p2m_write_pte(entry, split_pte, p2m->clean_pte);
961 
962         /* then move to the level we want to make real changes */
963         for ( ; level < target; level++ )
964         {
965             rc = p2m_next_level(p2m, true, level, &table, offsets[level]);
966 
967             /*
968              * The entry should be found and either be a table
969              * or a superpage if level 3 is not targeted
970              */
971             ASSERT(rc == GUEST_TABLE_NORMAL_PAGE ||
972                    (rc == GUEST_TABLE_SUPER_PAGE && target < 3));
973         }
974 
975         entry = table + offsets[level];
976     }
977 
978     /*
979      * We should always be there with the correct level because
980      * all the intermediate tables have been installed if necessary.
981      */
982     ASSERT(level == target);
983 
984     orig_pte = *entry;
985 
986     /*
987      * The radix-tree can only work on 4KB. This is only used when
988      * memaccess is enabled and during shutdown.
989      */
990     ASSERT(!p2m->mem_access_enabled || page_order == 0 ||
991            p2m->domain->is_dying);
992     /*
993      * The access type should always be p2m_access_rwx when the mapping
994      * is removed.
995      */
996     ASSERT(!mfn_eq(INVALID_MFN, smfn) || (a == p2m_access_rwx));
997     /*
998      * Update the mem access permission before update the P2M. So we
999      * don't have to revert the mapping if it has failed.
1000      */
1001     rc = p2m_mem_access_radix_set(p2m, sgfn, a);
1002     if ( rc )
1003         goto out;
1004 
1005     /*
1006      * Always remove the entry in order to follow the break-before-make
1007      * sequence when updating the translation table (D4.7.1 in ARM DDI
1008      * 0487A.j).
1009      */
1010     if ( lpae_is_valid(orig_pte) )
1011         p2m_remove_pte(entry, p2m->clean_pte);
1012 
1013     if ( removing_mapping )
1014         /* Flush can be deferred if the entry is removed */
1015         p2m->need_flush |= !!lpae_is_valid(orig_pte);
1016     else
1017     {
1018         lpae_t pte = mfn_to_p2m_entry(smfn, t, a);
1019 
1020         if ( level < 3 )
1021             pte.p2m.table = 0; /* Superpage entry */
1022 
1023         /*
1024          * It is necessary to flush the TLB before writing the new entry
1025          * to keep coherency when the previous entry was valid.
1026          *
1027          * Although, it could be defered when only the permissions are
1028          * changed (e.g in case of memaccess).
1029          */
1030         if ( lpae_is_valid(orig_pte) )
1031         {
1032             if ( likely(!p2m->mem_access_enabled) ||
1033                  P2M_CLEAR_PERM(pte) != P2M_CLEAR_PERM(orig_pte) )
1034                 p2m_force_tlb_flush_sync(p2m);
1035             else
1036                 p2m->need_flush = true;
1037         }
1038         else if ( !p2m_is_valid(orig_pte) ) /* new mapping */
1039             p2m->stats.mappings[level]++;
1040 
1041         p2m_write_pte(entry, pte, p2m->clean_pte);
1042 
1043         p2m->max_mapped_gfn = gfn_max(p2m->max_mapped_gfn,
1044                                       gfn_add(sgfn, (1UL << page_order) - 1));
1045         p2m->lowest_mapped_gfn = gfn_min(p2m->lowest_mapped_gfn, sgfn);
1046     }
1047 
1048     if ( is_iommu_enabled(p2m->domain) &&
1049          (lpae_is_valid(orig_pte) || lpae_is_valid(*entry)) )
1050     {
1051         unsigned int flush_flags = 0;
1052 
1053         if ( lpae_is_valid(orig_pte) )
1054             flush_flags |= IOMMU_FLUSHF_modified;
1055         if ( lpae_is_valid(*entry) )
1056             flush_flags |= IOMMU_FLUSHF_added;
1057 
1058         rc = iommu_iotlb_flush(p2m->domain, _dfn(gfn_x(sgfn)),
1059                                1UL << page_order, flush_flags);
1060     }
1061     else
1062         rc = 0;
1063 
1064     /*
1065      * Free the entry only if the original pte was valid and the base
1066      * is different (to avoid freeing when permission is changed).
1067      */
1068     if ( p2m_is_valid(orig_pte) &&
1069          !mfn_eq(lpae_get_mfn(*entry), lpae_get_mfn(orig_pte)) )
1070         p2m_free_entry(p2m, orig_pte, level);
1071 
1072 out:
1073     unmap_domain_page(table);
1074 
1075     return rc;
1076 }
1077 
p2m_set_entry(struct p2m_domain * p2m,gfn_t sgfn,unsigned long nr,mfn_t smfn,p2m_type_t t,p2m_access_t a)1078 int p2m_set_entry(struct p2m_domain *p2m,
1079                   gfn_t sgfn,
1080                   unsigned long nr,
1081                   mfn_t smfn,
1082                   p2m_type_t t,
1083                   p2m_access_t a)
1084 {
1085     int rc = 0;
1086 
1087     while ( nr )
1088     {
1089         unsigned long mask;
1090         unsigned long order;
1091 
1092         /*
1093          * Don't take into account the MFN when removing mapping (i.e
1094          * MFN_INVALID) to calculate the correct target order.
1095          *
1096          * XXX: Support superpage mappings if nr is not aligned to a
1097          * superpage size.
1098          */
1099         mask = !mfn_eq(smfn, INVALID_MFN) ? mfn_x(smfn) : 0;
1100         mask |= gfn_x(sgfn) | nr;
1101 
1102         /* Always map 4k by 4k when memaccess is enabled */
1103         if ( unlikely(p2m->mem_access_enabled) )
1104             order = THIRD_ORDER;
1105         else if ( !(mask & ((1UL << FIRST_ORDER) - 1)) )
1106             order = FIRST_ORDER;
1107         else if ( !(mask & ((1UL << SECOND_ORDER) - 1)) )
1108             order = SECOND_ORDER;
1109         else
1110             order = THIRD_ORDER;
1111 
1112         rc = __p2m_set_entry(p2m, sgfn, order, smfn, t, a);
1113         if ( rc )
1114             break;
1115 
1116         sgfn = gfn_add(sgfn, (1 << order));
1117         if ( !mfn_eq(smfn, INVALID_MFN) )
1118            smfn = mfn_add(smfn, (1 << order));
1119 
1120         nr -= (1 << order);
1121     }
1122 
1123     return rc;
1124 }
1125 
1126 /* Invalidate all entries in the table. The p2m should be write locked. */
p2m_invalidate_table(struct p2m_domain * p2m,mfn_t mfn)1127 static void p2m_invalidate_table(struct p2m_domain *p2m, mfn_t mfn)
1128 {
1129     lpae_t *table;
1130     unsigned int i;
1131 
1132     ASSERT(p2m_is_write_locked(p2m));
1133 
1134     table = map_domain_page(mfn);
1135 
1136     for ( i = 0; i < LPAE_ENTRIES; i++ )
1137     {
1138         lpae_t pte = table[i];
1139 
1140         /*
1141          * Writing an entry can be expensive because it may involve
1142          * cleaning the cache. So avoid updating the entry if the valid
1143          * bit is already cleared.
1144          */
1145         if ( !pte.p2m.valid )
1146             continue;
1147 
1148         pte.p2m.valid = 0;
1149 
1150         p2m_write_pte(&table[i], pte, p2m->clean_pte);
1151     }
1152 
1153     unmap_domain_page(table);
1154 
1155     p2m->need_flush = true;
1156 }
1157 
1158 /*
1159  * Invalidate all entries in the root page-tables. This is
1160  * useful to get fault on entry and do an action.
1161  */
p2m_invalidate_root(struct p2m_domain * p2m)1162 void p2m_invalidate_root(struct p2m_domain *p2m)
1163 {
1164     unsigned int i;
1165 
1166     p2m_write_lock(p2m);
1167 
1168     for ( i = 0; i < P2M_ROOT_LEVEL; i++ )
1169         p2m_invalidate_table(p2m, page_to_mfn(p2m->root + i));
1170 
1171     p2m_write_unlock(p2m);
1172 }
1173 
1174 /*
1175  * Resolve any translation fault due to change in the p2m. This
1176  * includes break-before-make and valid bit cleared.
1177  */
p2m_resolve_translation_fault(struct domain * d,gfn_t gfn)1178 bool p2m_resolve_translation_fault(struct domain *d, gfn_t gfn)
1179 {
1180     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1181     unsigned int level = 0;
1182     bool resolved = false;
1183     lpae_t entry, *table;
1184 
1185     /* Convenience aliases */
1186     DECLARE_OFFSETS(offsets, gfn_to_gaddr(gfn));
1187 
1188     p2m_write_lock(p2m);
1189 
1190     /* This gfn is higher than the highest the p2m map currently holds */
1191     if ( gfn_x(gfn) > gfn_x(p2m->max_mapped_gfn) )
1192         goto out;
1193 
1194     table = p2m_get_root_pointer(p2m, gfn);
1195     /*
1196      * The table should always be non-NULL because the gfn is below
1197      * p2m->max_mapped_gfn and the root table pages are always present.
1198      */
1199     if ( !table )
1200     {
1201         ASSERT_UNREACHABLE();
1202         goto out;
1203     }
1204 
1205     /*
1206      * Go down the page-tables until an entry has the valid bit unset or
1207      * a block/page entry has been hit.
1208      */
1209     for ( level = P2M_ROOT_LEVEL; level <= 3; level++ )
1210     {
1211         int rc;
1212 
1213         entry = table[offsets[level]];
1214 
1215         if ( level == 3 )
1216             break;
1217 
1218         /* Stop as soon as we hit an entry with the valid bit unset. */
1219         if ( !lpae_is_valid(entry) )
1220             break;
1221 
1222         rc = p2m_next_level(p2m, true, level, &table, offsets[level]);
1223         if ( rc == GUEST_TABLE_MAP_FAILED )
1224             goto out_unmap;
1225         else if ( rc != GUEST_TABLE_NORMAL_PAGE )
1226             break;
1227     }
1228 
1229     /*
1230      * If the valid bit of the entry is set, it means someone was playing with
1231      * the Stage-2 page table. Nothing to do and mark the fault as resolved.
1232      */
1233     if ( lpae_is_valid(entry) )
1234     {
1235         resolved = true;
1236         goto out_unmap;
1237     }
1238 
1239     /*
1240      * The valid bit is unset. If the entry is still not valid then the fault
1241      * cannot be resolved, exit and report it.
1242      */
1243     if ( !p2m_is_valid(entry) )
1244         goto out_unmap;
1245 
1246     /*
1247      * Now we have an entry with valid bit unset, but still valid from
1248      * the P2M point of view.
1249      *
1250      * If an entry is pointing to a table, each entry of the table will
1251      * have there valid bit cleared. This allows a function to clear the
1252      * full p2m with just a couple of write. The valid bit will then be
1253      * propagated on the fault.
1254      * If an entry is pointing to a block/page, no work to do for now.
1255      */
1256     if ( lpae_is_table(entry, level) )
1257         p2m_invalidate_table(p2m, lpae_get_mfn(entry));
1258 
1259     /*
1260      * Now that the work on the entry is done, set the valid bit to prevent
1261      * another fault on that entry.
1262      */
1263     resolved = true;
1264     entry.p2m.valid = 1;
1265 
1266     p2m_write_pte(table + offsets[level], entry, p2m->clean_pte);
1267 
1268     /*
1269      * No need to flush the TLBs as the modified entry had the valid bit
1270      * unset.
1271      */
1272 
1273 out_unmap:
1274     unmap_domain_page(table);
1275 
1276 out:
1277     p2m_write_unlock(p2m);
1278 
1279     return resolved;
1280 }
1281 
p2m_insert_mapping(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn,p2m_type_t t)1282 static inline int p2m_insert_mapping(struct domain *d,
1283                                      gfn_t start_gfn,
1284                                      unsigned long nr,
1285                                      mfn_t mfn,
1286                                      p2m_type_t t)
1287 {
1288     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1289     int rc;
1290 
1291     p2m_write_lock(p2m);
1292     rc = p2m_set_entry(p2m, start_gfn, nr, mfn, t, p2m->default_access);
1293     p2m_write_unlock(p2m);
1294 
1295     return rc;
1296 }
1297 
p2m_remove_mapping(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1298 static inline int p2m_remove_mapping(struct domain *d,
1299                                      gfn_t start_gfn,
1300                                      unsigned long nr,
1301                                      mfn_t mfn)
1302 {
1303     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1304     int rc;
1305 
1306     p2m_write_lock(p2m);
1307     rc = p2m_set_entry(p2m, start_gfn, nr, INVALID_MFN,
1308                        p2m_invalid, p2m_access_rwx);
1309     p2m_write_unlock(p2m);
1310 
1311     return rc;
1312 }
1313 
map_regions_p2mt(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn,p2m_type_t p2mt)1314 int map_regions_p2mt(struct domain *d,
1315                      gfn_t gfn,
1316                      unsigned long nr,
1317                      mfn_t mfn,
1318                      p2m_type_t p2mt)
1319 {
1320     return p2m_insert_mapping(d, gfn, nr, mfn, p2mt);
1321 }
1322 
unmap_regions_p2mt(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn)1323 int unmap_regions_p2mt(struct domain *d,
1324                        gfn_t gfn,
1325                        unsigned long nr,
1326                        mfn_t mfn)
1327 {
1328     return p2m_remove_mapping(d, gfn, nr, mfn);
1329 }
1330 
map_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1331 int map_mmio_regions(struct domain *d,
1332                      gfn_t start_gfn,
1333                      unsigned long nr,
1334                      mfn_t mfn)
1335 {
1336     return p2m_insert_mapping(d, start_gfn, nr, mfn, p2m_mmio_direct_dev);
1337 }
1338 
unmap_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1339 int unmap_mmio_regions(struct domain *d,
1340                        gfn_t start_gfn,
1341                        unsigned long nr,
1342                        mfn_t mfn)
1343 {
1344     return p2m_remove_mapping(d, start_gfn, nr, mfn);
1345 }
1346 
map_dev_mmio_region(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn)1347 int map_dev_mmio_region(struct domain *d,
1348                         gfn_t gfn,
1349                         unsigned long nr,
1350                         mfn_t mfn)
1351 {
1352     int res;
1353 
1354     if ( !(nr && iomem_access_permitted(d, mfn_x(mfn), mfn_x(mfn) + nr - 1)) )
1355         return 0;
1356 
1357     res = p2m_insert_mapping(d, gfn, nr, mfn, p2m_mmio_direct_c);
1358     if ( res < 0 )
1359     {
1360         printk(XENLOG_G_ERR "Unable to map MFNs [%#"PRI_mfn" - %#"PRI_mfn" in Dom%d\n",
1361                mfn_x(mfn), mfn_x(mfn) + nr - 1, d->domain_id);
1362         return res;
1363     }
1364 
1365     return 0;
1366 }
1367 
guest_physmap_add_entry(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned long page_order,p2m_type_t t)1368 int guest_physmap_add_entry(struct domain *d,
1369                             gfn_t gfn,
1370                             mfn_t mfn,
1371                             unsigned long page_order,
1372                             p2m_type_t t)
1373 {
1374     return p2m_insert_mapping(d, gfn, (1 << page_order), mfn, t);
1375 }
1376 
guest_physmap_remove_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)1377 int guest_physmap_remove_page(struct domain *d, gfn_t gfn, mfn_t mfn,
1378                               unsigned int page_order)
1379 {
1380     return p2m_remove_mapping(d, gfn, (1 << page_order), mfn);
1381 }
1382 
p2m_allocate_root(void)1383 static struct page_info *p2m_allocate_root(void)
1384 {
1385     struct page_info *page;
1386     unsigned int i;
1387 
1388     page = alloc_domheap_pages(NULL, P2M_ROOT_ORDER, 0);
1389     if ( page == NULL )
1390         return NULL;
1391 
1392     /* Clear both first level pages */
1393     for ( i = 0; i < P2M_ROOT_PAGES; i++ )
1394         clear_and_clean_page(page + i);
1395 
1396     return page;
1397 }
1398 
p2m_alloc_table(struct domain * d)1399 static int p2m_alloc_table(struct domain *d)
1400 {
1401     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1402 
1403     p2m->root = p2m_allocate_root();
1404     if ( !p2m->root )
1405         return -ENOMEM;
1406 
1407     p2m->vttbr = generate_vttbr(p2m->vmid, page_to_mfn(p2m->root));
1408 
1409     /*
1410      * Make sure that all TLBs corresponding to the new VMID are flushed
1411      * before using it
1412      */
1413     p2m_write_lock(p2m);
1414     p2m_force_tlb_flush_sync(p2m);
1415     p2m_write_unlock(p2m);
1416 
1417     return 0;
1418 }
1419 
1420 
1421 static spinlock_t vmid_alloc_lock = SPIN_LOCK_UNLOCKED;
1422 
1423 /*
1424  * VTTBR_EL2 VMID field is 8 or 16 bits. AArch64 may support 16-bit VMID.
1425  * Using a bitmap here limits us to 256 or 65536 (for AArch64) concurrent
1426  * domains. The bitmap space will be allocated dynamically based on
1427  * whether 8 or 16 bit VMIDs are supported.
1428  */
1429 static unsigned long *vmid_mask;
1430 
p2m_vmid_allocator_init(void)1431 static void p2m_vmid_allocator_init(void)
1432 {
1433     /*
1434      * allocate space for vmid_mask based on MAX_VMID
1435      */
1436     vmid_mask = xzalloc_array(unsigned long, BITS_TO_LONGS(MAX_VMID));
1437 
1438     if ( !vmid_mask )
1439         panic("Could not allocate VMID bitmap space\n");
1440 
1441     set_bit(INVALID_VMID, vmid_mask);
1442 }
1443 
p2m_alloc_vmid(struct domain * d)1444 static int p2m_alloc_vmid(struct domain *d)
1445 {
1446     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1447 
1448     int rc, nr;
1449 
1450     spin_lock(&vmid_alloc_lock);
1451 
1452     nr = find_first_zero_bit(vmid_mask, MAX_VMID);
1453 
1454     ASSERT(nr != INVALID_VMID);
1455 
1456     if ( nr == MAX_VMID )
1457     {
1458         rc = -EBUSY;
1459         printk(XENLOG_ERR "p2m.c: dom%d: VMID pool exhausted\n", d->domain_id);
1460         goto out;
1461     }
1462 
1463     set_bit(nr, vmid_mask);
1464 
1465     p2m->vmid = nr;
1466 
1467     rc = 0;
1468 
1469 out:
1470     spin_unlock(&vmid_alloc_lock);
1471     return rc;
1472 }
1473 
p2m_free_vmid(struct domain * d)1474 static void p2m_free_vmid(struct domain *d)
1475 {
1476     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1477     spin_lock(&vmid_alloc_lock);
1478     if ( p2m->vmid != INVALID_VMID )
1479         clear_bit(p2m->vmid, vmid_mask);
1480 
1481     spin_unlock(&vmid_alloc_lock);
1482 }
1483 
p2m_teardown(struct domain * d)1484 void p2m_teardown(struct domain *d)
1485 {
1486     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1487     struct page_info *pg;
1488 
1489     /* p2m not actually initialized */
1490     if ( !p2m->domain )
1491         return;
1492 
1493     while ( (pg = page_list_remove_head(&p2m->pages)) )
1494         free_domheap_page(pg);
1495 
1496     if ( p2m->root )
1497         free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
1498 
1499     p2m->root = NULL;
1500 
1501     p2m_free_vmid(d);
1502 
1503     radix_tree_destroy(&p2m->mem_access_settings, NULL);
1504 
1505     p2m->domain = NULL;
1506 }
1507 
p2m_init(struct domain * d)1508 int p2m_init(struct domain *d)
1509 {
1510     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1511     int rc = 0;
1512     unsigned int cpu;
1513 
1514     rwlock_init(&p2m->lock);
1515     INIT_PAGE_LIST_HEAD(&p2m->pages);
1516 
1517     p2m->vmid = INVALID_VMID;
1518 
1519     rc = p2m_alloc_vmid(d);
1520     if ( rc != 0 )
1521         return rc;
1522 
1523     p2m->max_mapped_gfn = _gfn(0);
1524     p2m->lowest_mapped_gfn = _gfn(ULONG_MAX);
1525 
1526     p2m->default_access = p2m_access_rwx;
1527     p2m->mem_access_enabled = false;
1528     radix_tree_init(&p2m->mem_access_settings);
1529 
1530     /*
1531      * Some IOMMUs don't support coherent PT walk. When the p2m is
1532      * shared with the CPU, Xen has to make sure that the PT changes have
1533      * reached the memory
1534      */
1535     p2m->clean_pte = is_iommu_enabled(d) &&
1536         !iommu_has_feature(d, IOMMU_FEAT_COHERENT_WALK);
1537 
1538     rc = p2m_alloc_table(d);
1539 
1540     /*
1541      * Make sure that the type chosen to is able to store the an vCPU ID
1542      * between 0 and the maximum of virtual CPUS supported as long as
1543      * the INVALID_VCPU_ID.
1544      */
1545     BUILD_BUG_ON((1 << (sizeof(p2m->last_vcpu_ran[0]) * 8)) < MAX_VIRT_CPUS);
1546     BUILD_BUG_ON((1 << (sizeof(p2m->last_vcpu_ran[0])* 8)) < INVALID_VCPU_ID);
1547 
1548     for_each_possible_cpu(cpu)
1549        p2m->last_vcpu_ran[cpu] = INVALID_VCPU_ID;
1550 
1551     /*
1552      * Besides getting a domain when we only have the p2m in hand,
1553      * the back pointer to domain is also used in p2m_teardown()
1554      * as an end-of-initialization indicator.
1555      */
1556     p2m->domain = d;
1557 
1558     return rc;
1559 }
1560 
1561 /*
1562  * The function will go through the p2m and remove page reference when it
1563  * is required. The mapping will be removed from the p2m.
1564  *
1565  * XXX: See whether the mapping can be left intact in the p2m.
1566  */
relinquish_p2m_mapping(struct domain * d)1567 int relinquish_p2m_mapping(struct domain *d)
1568 {
1569     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1570     unsigned long count = 0;
1571     p2m_type_t t;
1572     int rc = 0;
1573     unsigned int order;
1574     gfn_t start, end;
1575 
1576     p2m_write_lock(p2m);
1577 
1578     start = p2m->lowest_mapped_gfn;
1579     end = gfn_add(p2m->max_mapped_gfn, 1);
1580 
1581     for ( ; gfn_x(start) < gfn_x(end);
1582           start = gfn_next_boundary(start, order) )
1583     {
1584         mfn_t mfn = p2m_get_entry(p2m, start, &t, NULL, &order, NULL);
1585 
1586         count++;
1587         /*
1588          * Arbitrarily preempt every 512 iterations.
1589          */
1590         if ( !(count % 512) && hypercall_preempt_check() )
1591         {
1592             rc = -ERESTART;
1593             break;
1594         }
1595 
1596         /*
1597          * p2m_set_entry will take care of removing reference on page
1598          * when it is necessary and removing the mapping in the p2m.
1599          */
1600         if ( !mfn_eq(mfn, INVALID_MFN) )
1601         {
1602             /*
1603              * For valid mapping, the start will always be aligned as
1604              * entry will be removed whilst relinquishing.
1605              */
1606             rc = __p2m_set_entry(p2m, start, order, INVALID_MFN,
1607                                  p2m_invalid, p2m_access_rwx);
1608             if ( unlikely(rc) )
1609             {
1610                 printk(XENLOG_G_ERR "Unable to remove mapping gfn=%#"PRI_gfn" order=%u from the p2m of domain %d\n", gfn_x(start), order, d->domain_id);
1611                 break;
1612             }
1613         }
1614     }
1615 
1616     /*
1617      * Update lowest_mapped_gfn so on the next call we still start where
1618      * we stopped.
1619      */
1620     p2m->lowest_mapped_gfn = start;
1621 
1622     p2m_write_unlock(p2m);
1623 
1624     return rc;
1625 }
1626 
p2m_cache_flush_range(struct domain * d,gfn_t * pstart,gfn_t end)1627 int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end)
1628 {
1629     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1630     gfn_t next_block_gfn;
1631     gfn_t start = *pstart;
1632     mfn_t mfn = INVALID_MFN;
1633     p2m_type_t t;
1634     unsigned int order;
1635     int rc = 0;
1636     /* Counter for preemption */
1637     unsigned short count = 0;
1638 
1639     /*
1640      * The operation cache flush will invalidate the RAM assigned to the
1641      * guest in a given range. It will not modify the page table and
1642      * flushing the cache whilst the page is used by another CPU is
1643      * fine. So using read-lock is fine here.
1644      */
1645     p2m_read_lock(p2m);
1646 
1647     start = gfn_max(start, p2m->lowest_mapped_gfn);
1648     end = gfn_min(end, gfn_add(p2m->max_mapped_gfn, 1));
1649 
1650     next_block_gfn = start;
1651 
1652     while ( gfn_x(start) < gfn_x(end) )
1653     {
1654        /*
1655          * Cleaning the cache for the P2M may take a long time. So we
1656          * need to be able to preempt. We will arbitrarily preempt every
1657          * time count reach 512 or above.
1658          *
1659          * The count will be incremented by:
1660          *  - 1 on region skipped
1661          *  - 10 for each page requiring a flush
1662          */
1663         if ( count >= 512 )
1664         {
1665             if ( softirq_pending(smp_processor_id()) )
1666             {
1667                 rc = -ERESTART;
1668                 break;
1669             }
1670             count = 0;
1671         }
1672 
1673         /*
1674          * We want to flush page by page as:
1675          *  - it may not be possible to map the full block (can be up to 1GB)
1676          *    in Xen memory
1677          *  - we may want to do fine grain preemption as flushing multiple
1678          *    page in one go may take a long time
1679          *
1680          * As p2m_get_entry is able to return the size of the mapping
1681          * in the p2m, it is pointless to execute it for each page.
1682          *
1683          * We can optimize it by tracking the gfn of the next
1684          * block. So we will only call p2m_get_entry for each block (can
1685          * be up to 1GB).
1686          */
1687         if ( gfn_eq(start, next_block_gfn) )
1688         {
1689             bool valid;
1690 
1691             mfn = p2m_get_entry(p2m, start, &t, NULL, &order, &valid);
1692             next_block_gfn = gfn_next_boundary(start, order);
1693 
1694             if ( mfn_eq(mfn, INVALID_MFN) || !p2m_is_any_ram(t) || !valid )
1695             {
1696                 count++;
1697                 start = next_block_gfn;
1698                 continue;
1699             }
1700         }
1701 
1702         count += 10;
1703 
1704         flush_page_to_ram(mfn_x(mfn), false);
1705 
1706         start = gfn_add(start, 1);
1707         mfn = mfn_add(mfn, 1);
1708     }
1709 
1710     if ( rc != -ERESTART )
1711         invalidate_icache();
1712 
1713     p2m_read_unlock(p2m);
1714 
1715     *pstart = start;
1716 
1717     return rc;
1718 }
1719 
1720 /*
1721  * Clean & invalidate RAM associated to the guest vCPU.
1722  *
1723  * The function can only work with the current vCPU and should be called
1724  * with IRQ enabled as the vCPU could get preempted.
1725  */
p2m_flush_vm(struct vcpu * v)1726 void p2m_flush_vm(struct vcpu *v)
1727 {
1728     struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
1729     int rc;
1730     gfn_t start = _gfn(0);
1731 
1732     ASSERT(v == current);
1733     ASSERT(local_irq_is_enabled());
1734     ASSERT(v->arch.need_flush_to_ram);
1735 
1736     do
1737     {
1738         rc = p2m_cache_flush_range(v->domain, &start, _gfn(ULONG_MAX));
1739         if ( rc == -ERESTART )
1740             do_softirq();
1741     } while ( rc == -ERESTART );
1742 
1743     if ( rc != 0 )
1744         gprintk(XENLOG_WARNING,
1745                 "P2M has not been correctly cleaned (rc = %d)\n",
1746                 rc);
1747 
1748     /*
1749      * Invalidate the p2m to track which page was modified by the guest
1750      * between call of p2m_flush_vm().
1751      */
1752     p2m_invalidate_root(p2m);
1753 
1754     v->arch.need_flush_to_ram = false;
1755 }
1756 
1757 /*
1758  * See note at ARMv7 ARM B1.14.4 (DDI 0406C.c) (TL;DR: S/W ops are not
1759  * easily virtualized).
1760  *
1761  * Main problems:
1762  *  - S/W ops are local to a CPU (not broadcast)
1763  *  - We have line migration behind our back (speculation)
1764  *  - System caches don't support S/W at all (damn!)
1765  *
1766  * In the face of the above, the best we can do is to try and convert
1767  * S/W ops to VA ops. Because the guest is not allowed to infer the S/W
1768  * to PA mapping, it can only use S/W to nuke the whole cache, which is
1769  * rather a good thing for us.
1770  *
1771  * Also, it is only used when turning caches on/off ("The expected
1772  * usage of the cache maintenance instructions that operate by set/way
1773  * is associated with the powerdown and powerup of caches, if this is
1774  * required by the implementation.").
1775  *
1776  * We use the following policy:
1777  *  - If we trap a S/W operation, we enabled VM trapping to detect
1778  *  caches being turned on/off, and do a full clean.
1779  *
1780  *  - We flush the caches on both caches being turned on and off.
1781  *
1782  *  - Once the caches are enabled, we stop trapping VM ops.
1783  */
p2m_set_way_flush(struct vcpu * v)1784 void p2m_set_way_flush(struct vcpu *v)
1785 {
1786     /* This function can only work with the current vCPU. */
1787     ASSERT(v == current);
1788 
1789     if ( !(v->arch.hcr_el2 & HCR_TVM) )
1790     {
1791         v->arch.need_flush_to_ram = true;
1792         vcpu_hcr_set_flags(v, HCR_TVM);
1793     }
1794 }
1795 
p2m_toggle_cache(struct vcpu * v,bool was_enabled)1796 void p2m_toggle_cache(struct vcpu *v, bool was_enabled)
1797 {
1798     bool now_enabled = vcpu_has_cache_enabled(v);
1799 
1800     /* This function can only work with the current vCPU. */
1801     ASSERT(v == current);
1802 
1803     /*
1804      * If switching the MMU+caches on, need to invalidate the caches.
1805      * If switching it off, need to clean the caches.
1806      * Clean + invalidate does the trick always.
1807      */
1808     if ( was_enabled != now_enabled )
1809         v->arch.need_flush_to_ram = true;
1810 
1811     /* Caches are now on, stop trapping VM ops (until a S/W op) */
1812     if ( now_enabled )
1813         vcpu_hcr_clear_flags(v, HCR_TVM);
1814 }
1815 
gfn_to_mfn(struct domain * d,gfn_t gfn)1816 mfn_t gfn_to_mfn(struct domain *d, gfn_t gfn)
1817 {
1818     return p2m_lookup(d, gfn, NULL);
1819 }
1820 
get_page_from_gva(struct vcpu * v,vaddr_t va,unsigned long flags)1821 struct page_info *get_page_from_gva(struct vcpu *v, vaddr_t va,
1822                                     unsigned long flags)
1823 {
1824     struct domain *d = v->domain;
1825     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1826     struct page_info *page = NULL;
1827     paddr_t maddr = 0;
1828     uint64_t par;
1829     mfn_t mfn;
1830     p2m_type_t t;
1831 
1832     /*
1833      * XXX: To support a different vCPU, we would need to load the
1834      * VTTBR_EL2, TTBR0_EL1, TTBR1_EL1 and SCTLR_EL1
1835      */
1836     if ( v != current )
1837         return NULL;
1838 
1839     /*
1840      * The lock is here to protect us against the break-before-make
1841      * sequence used when updating the entry.
1842      */
1843     p2m_read_lock(p2m);
1844     par = gvirt_to_maddr(va, &maddr, flags);
1845     p2m_read_unlock(p2m);
1846 
1847     /*
1848      * gvirt_to_maddr may fail if the entry does not have the valid bit
1849      * set. Fallback to the second method:
1850      *  1) Translate the VA to IPA using software lookup -> Stage-1 page-table
1851      *  may not be accessible because the stage-2 entries may have valid
1852      *  bit unset.
1853      *  2) Software lookup of the MFN
1854      *
1855      * Note that when memaccess is enabled, we instead call directly
1856      * p2m_mem_access_check_and_get_page(...). Because the function is a
1857      * a variant of the methods described above, it will be able to
1858      * handle entries with valid bit unset.
1859      *
1860      * TODO: Integrate more nicely memaccess with the rest of the
1861      * function.
1862      * TODO: Use the fault error in PAR_EL1 to avoid pointless
1863      *  translation.
1864      */
1865     if ( par )
1866     {
1867         paddr_t ipa;
1868         unsigned int s1_perms;
1869 
1870         /*
1871          * When memaccess is enabled, the translation GVA to MADDR may
1872          * have failed because of a permission fault.
1873          */
1874         if ( p2m->mem_access_enabled )
1875             return p2m_mem_access_check_and_get_page(va, flags, v);
1876 
1877         /*
1878          * The software stage-1 table walk can still fail, e.g, if the
1879          * GVA is not mapped.
1880          */
1881         if ( !guest_walk_tables(v, va, &ipa, &s1_perms) )
1882         {
1883             dprintk(XENLOG_G_DEBUG,
1884                     "%pv: Failed to walk page-table va %#"PRIvaddr"\n", v, va);
1885             return NULL;
1886         }
1887 
1888         mfn = p2m_lookup(d, gaddr_to_gfn(ipa), &t);
1889         if ( mfn_eq(INVALID_MFN, mfn) || !p2m_is_ram(t) )
1890             return NULL;
1891 
1892         /*
1893          * Check permission that are assumed by the caller. For instance
1894          * in case of guestcopy, the caller assumes that the translated
1895          * page can be accessed with the requested permissions. If this
1896          * is not the case, we should fail.
1897          *
1898          * Please note that we do not check for the GV2M_EXEC
1899          * permission. This is fine because the hardware-based translation
1900          * instruction does not test for execute permissions.
1901          */
1902         if ( (flags & GV2M_WRITE) && !(s1_perms & GV2M_WRITE) )
1903             return NULL;
1904 
1905         if ( (flags & GV2M_WRITE) && t != p2m_ram_rw )
1906             return NULL;
1907     }
1908     else
1909         mfn = maddr_to_mfn(maddr);
1910 
1911     if ( !mfn_valid(mfn) )
1912     {
1913         dprintk(XENLOG_G_DEBUG, "%pv: Invalid MFN %#"PRI_mfn"\n",
1914                 v, mfn_x(mfn));
1915         return NULL;
1916     }
1917 
1918     page = mfn_to_page(mfn);
1919     ASSERT(page);
1920 
1921     if ( unlikely(!get_page(page, d)) )
1922     {
1923         dprintk(XENLOG_G_DEBUG, "%pv: Failing to acquire the MFN %#"PRI_mfn"\n",
1924                 v, mfn_x(maddr_to_mfn(maddr)));
1925         return NULL;
1926     }
1927 
1928     return page;
1929 }
1930 
p2m_restrict_ipa_bits(unsigned int ipa_bits)1931 void __init p2m_restrict_ipa_bits(unsigned int ipa_bits)
1932 {
1933     /*
1934      * Calculate the minimum of the maximum IPA bits that any external entity
1935      * can support.
1936      */
1937     if ( ipa_bits < p2m_ipa_bits )
1938         p2m_ipa_bits = ipa_bits;
1939 }
1940 
1941 /* VTCR value to be configured by all CPUs. Set only once by the boot CPU */
1942 static uint32_t __read_mostly vtcr;
1943 
setup_virt_paging_one(void * data)1944 static void setup_virt_paging_one(void *data)
1945 {
1946     WRITE_SYSREG32(vtcr, VTCR_EL2);
1947 
1948     /*
1949      * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from
1950      * entries related to EL1/EL0 translation regime until a guest vCPU
1951      * is running. For that, we need to set-up VTTBR to point to an empty
1952      * page-table and turn on stage-2 translation. The TLB entries
1953      * associated with EL1/EL0 translation regime will also be flushed in case
1954      * an AT instruction was speculated before hand.
1955      */
1956     if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) )
1957     {
1958         WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), VTTBR_EL2);
1959         WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_VM, HCR_EL2);
1960         isb();
1961 
1962         flush_all_guests_tlb_local();
1963     }
1964 }
1965 
setup_virt_paging(void)1966 void __init setup_virt_paging(void)
1967 {
1968     /* Setup Stage 2 address translation */
1969     unsigned long val = VTCR_RES1|VTCR_SH0_IS|VTCR_ORGN0_WBWA|VTCR_IRGN0_WBWA;
1970 
1971 #ifdef CONFIG_ARM_32
1972     if ( p2m_ipa_bits < 40 )
1973         panic("P2M: Not able to support %u-bit IPA at the moment\n",
1974               p2m_ipa_bits);
1975 
1976     printk("P2M: 40-bit IPA\n");
1977     p2m_ipa_bits = 40;
1978     val |= VTCR_T0SZ(0x18); /* 40 bit IPA */
1979     val |= VTCR_SL0(0x1); /* P2M starts at first level */
1980 #else /* CONFIG_ARM_64 */
1981     const struct {
1982         unsigned int pabits; /* Physical Address Size */
1983         unsigned int t0sz;   /* Desired T0SZ, minimum in comment */
1984         unsigned int root_order; /* Page order of the root of the p2m */
1985         unsigned int sl0;    /* Desired SL0, maximum in comment */
1986     } pa_range_info[] = {
1987         /* T0SZ minimum and SL0 maximum from ARM DDI 0487A.b Table D4-5 */
1988         /*      PA size, t0sz(min), root-order, sl0(max) */
1989         [0] = { 32,      32/*32*/,  0,          1 },
1990         [1] = { 36,      28/*28*/,  0,          1 },
1991         [2] = { 40,      24/*24*/,  1,          1 },
1992         [3] = { 42,      22/*22*/,  3,          1 },
1993         [4] = { 44,      20/*20*/,  0,          2 },
1994         [5] = { 48,      16/*16*/,  0,          2 },
1995         [6] = { 0 }, /* Invalid */
1996         [7] = { 0 }  /* Invalid */
1997     };
1998 
1999     unsigned int i, cpu;
2000     unsigned int pa_range = 0x10; /* Larger than any possible value */
2001     bool vmid_8_bit = false;
2002 
2003     for_each_online_cpu ( cpu )
2004     {
2005         const struct cpuinfo_arm *info = &cpu_data[cpu];
2006 
2007         /*
2008          * Restrict "p2m_ipa_bits" if needed. As P2M table is always configured
2009          * with IPA bits == PA bits, compare against "pabits".
2010          */
2011         if ( pa_range_info[info->mm64.pa_range].pabits < p2m_ipa_bits )
2012             p2m_ipa_bits = pa_range_info[info->mm64.pa_range].pabits;
2013 
2014         /* Set a flag if the current cpu does not support 16 bit VMIDs. */
2015         if ( info->mm64.vmid_bits != MM64_VMID_16_BITS_SUPPORT )
2016             vmid_8_bit = true;
2017     }
2018 
2019     /*
2020      * If the flag is not set then it means all CPUs support 16-bit
2021      * VMIDs.
2022      */
2023     if ( !vmid_8_bit )
2024         max_vmid = MAX_VMID_16_BIT;
2025 
2026     /* Choose suitable "pa_range" according to the resulted "p2m_ipa_bits". */
2027     for ( i = 0; i < ARRAY_SIZE(pa_range_info); i++ )
2028     {
2029         if ( p2m_ipa_bits == pa_range_info[i].pabits )
2030         {
2031             pa_range = i;
2032             break;
2033         }
2034     }
2035 
2036     /* pa_range is 4 bits, but the defined encodings are only 3 bits */
2037     if ( pa_range >= ARRAY_SIZE(pa_range_info) || !pa_range_info[pa_range].pabits )
2038         panic("Unknown encoding of ID_AA64MMFR0_EL1.PARange %x\n", pa_range);
2039 
2040     val |= VTCR_PS(pa_range);
2041     val |= VTCR_TG0_4K;
2042 
2043     /* Set the VS bit only if 16 bit VMID is supported. */
2044     if ( MAX_VMID == MAX_VMID_16_BIT )
2045         val |= VTCR_VS;
2046     val |= VTCR_SL0(pa_range_info[pa_range].sl0);
2047     val |= VTCR_T0SZ(pa_range_info[pa_range].t0sz);
2048 
2049     p2m_root_order = pa_range_info[pa_range].root_order;
2050     p2m_root_level = 2 - pa_range_info[pa_range].sl0;
2051     p2m_ipa_bits = 64 - pa_range_info[pa_range].t0sz;
2052 
2053     printk("P2M: %d-bit IPA with %d-bit PA and %d-bit VMID\n",
2054            p2m_ipa_bits,
2055            pa_range_info[pa_range].pabits,
2056            ( MAX_VMID == MAX_VMID_16_BIT ) ? 16 : 8);
2057 #endif
2058     printk("P2M: %d levels with order-%d root, VTCR 0x%lx\n",
2059            4 - P2M_ROOT_LEVEL, P2M_ROOT_ORDER, val);
2060 
2061     p2m_vmid_allocator_init();
2062 
2063     /* It is not allowed to concatenate a level zero root */
2064     BUG_ON( P2M_ROOT_LEVEL == 0 && P2M_ROOT_ORDER > 0 );
2065     vtcr = val;
2066 
2067     /*
2068      * ARM64_WORKAROUND_AT_SPECULATE requires to allocate root table
2069      * with all entries zeroed.
2070      */
2071     if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) )
2072     {
2073         struct page_info *root;
2074 
2075         root = p2m_allocate_root();
2076         if ( !root )
2077             panic("Unable to allocate root table for ARM64_WORKAROUND_AT_SPECULATE\n");
2078 
2079         empty_root_mfn = page_to_mfn(root);
2080     }
2081 
2082     setup_virt_paging_one(NULL);
2083     smp_call_function(setup_virt_paging_one, NULL, 1);
2084 }
2085 
cpu_virt_paging_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)2086 static int cpu_virt_paging_callback(struct notifier_block *nfb,
2087                                     unsigned long action,
2088                                     void *hcpu)
2089 {
2090     switch ( action )
2091     {
2092     case CPU_STARTING:
2093         ASSERT(system_state != SYS_STATE_boot);
2094         setup_virt_paging_one(NULL);
2095         break;
2096     default:
2097         break;
2098     }
2099 
2100     return NOTIFY_DONE;
2101 }
2102 
2103 static struct notifier_block cpu_virt_paging_nfb = {
2104     .notifier_call = cpu_virt_paging_callback,
2105 };
2106 
cpu_virt_paging_init(void)2107 static int __init cpu_virt_paging_init(void)
2108 {
2109     register_cpu_notifier(&cpu_virt_paging_nfb);
2110 
2111     return 0;
2112 }
2113 /*
2114  * Initialization of the notifier has to be done at init rather than presmp_init
2115  * phase because: the registered notifier is used to setup virtual paging for
2116  * non-boot CPUs after the initial virtual paging for all CPUs is already setup,
2117  * i.e. when a non-boot CPU is hotplugged after the system has booted. In other
2118  * words, the notifier should be registered after the virtual paging is
2119  * initially setup (setup_virt_paging() is called from start_xen()). This is
2120  * required because vtcr config value has to be set before a notifier can fire.
2121  */
2122 __initcall(cpu_virt_paging_init);
2123 
2124 /*
2125  * Local variables:
2126  * mode: C
2127  * c-file-style: "BSD"
2128  * c-basic-offset: 4
2129  * indent-tabs-mode: nil
2130  * End:
2131  */
2132