1 /*
2  * xen/arch/arm/mm.c
3  *
4  * MMU code for an ARMv7-A with virt extensions.
5  *
6  * Tim Deegan <tim@xen.org>
7  * Copyright (c) 2011 Citrix Systems.
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  */
19 
20 #include <xen/compile.h>
21 #include <xen/types.h>
22 #include <xen/device_tree.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <xen/preempt.h>
26 #include <xen/errno.h>
27 #include <xen/grant_table.h>
28 #include <xen/softirq.h>
29 #include <xen/event.h>
30 #include <xen/guest_access.h>
31 #include <xen/domain_page.h>
32 #include <xen/err.h>
33 #include <asm/page.h>
34 #include <asm/current.h>
35 #include <asm/flushtlb.h>
36 #include <public/memory.h>
37 #include <xen/sched.h>
38 #include <xen/vmap.h>
39 #include <xsm/xsm.h>
40 #include <xen/pfn.h>
41 #include <xen/sizes.h>
42 #include <xen/libfdt/libfdt.h>
43 
44 #include <asm/setup.h>
45 
46 /* Override macros from asm/page.h to make them work with mfn_t */
47 #undef virt_to_mfn
48 #define virt_to_mfn(va) _mfn(__virt_to_mfn(va))
49 #undef mfn_to_virt
50 #define mfn_to_virt(mfn) __mfn_to_virt(mfn_x(mfn))
51 
52 #ifdef NDEBUG
53 static inline void
54 __attribute__ ((__format__ (__printf__, 1, 2)))
mm_printk(const char * fmt,...)55 mm_printk(const char *fmt, ...) {}
56 #else
57 #define mm_printk(fmt, args...)             \
58     do                                      \
59     {                                       \
60         dprintk(XENLOG_ERR, fmt, ## args);  \
61         WARN();                             \
62     } while (0);
63 #endif
64 
65 /*
66  * Macros to define page-tables:
67  *  - DEFINE_BOOT_PAGE_TABLE is used to define page-table that are used
68  *  in assembly code before BSS is zeroed.
69  *  - DEFINE_PAGE_TABLE{,S} are used to define one or multiple
70  *  page-tables to be used after BSS is zeroed (typically they are only used
71  *  in C).
72  */
73 #define DEFINE_BOOT_PAGE_TABLE(name)                                          \
74 lpae_t __aligned(PAGE_SIZE) __section(".data.page_aligned") name[LPAE_ENTRIES]
75 
76 #define DEFINE_PAGE_TABLES(name, nr)                    \
77 lpae_t __aligned(PAGE_SIZE) name[LPAE_ENTRIES * (nr)]
78 
79 #define DEFINE_PAGE_TABLE(name) DEFINE_PAGE_TABLES(name, 1)
80 
81 /* Static start-of-day pagetables that we use before the allocators
82  * are up. These are used by all CPUs during bringup before switching
83  * to the CPUs own pagetables.
84  *
85  * These pagetables have a very simple structure. They include:
86  *  - 2MB worth of 4K mappings of xen at XEN_VIRT_START, boot_first and
87  *    boot_second are used to populate the tables down to boot_third
88  *    which contains the actual mapping.
89  *  - a 1:1 mapping of xen at its current physical address. This uses a
90  *    section mapping at whichever of boot_{pgtable,first,second}
91  *    covers that physical address.
92  *
93  * For the boot CPU these mappings point to the address where Xen was
94  * loaded by the bootloader. For secondary CPUs they point to the
95  * relocated copy of Xen for the benefit of secondary CPUs.
96  *
97  * In addition to the above for the boot CPU the device-tree is
98  * initially mapped in the boot misc slot. This mapping is not present
99  * for secondary CPUs.
100  *
101  * Finally, if EARLY_PRINTK is enabled then xen_fixmap will be mapped
102  * by the CPU once it has moved off the 1:1 mapping.
103  */
104 DEFINE_BOOT_PAGE_TABLE(boot_pgtable);
105 #ifdef CONFIG_ARM_64
106 DEFINE_BOOT_PAGE_TABLE(boot_first);
107 DEFINE_BOOT_PAGE_TABLE(boot_first_id);
108 #endif
109 DEFINE_BOOT_PAGE_TABLE(boot_second_id);
110 DEFINE_BOOT_PAGE_TABLE(boot_third_id);
111 DEFINE_BOOT_PAGE_TABLE(boot_second);
112 DEFINE_BOOT_PAGE_TABLE(boot_third);
113 
114 /* Main runtime page tables */
115 
116 /*
117  * For arm32 xen_pgtable and xen_dommap are per-PCPU and are allocated before
118  * bringing up each CPU. For arm64 xen_pgtable is common to all PCPUs.
119  *
120  * xen_second, xen_fixmap and xen_xenmap are always shared between all
121  * PCPUs.
122  */
123 
124 #ifdef CONFIG_ARM_64
125 #define HYP_PT_ROOT_LEVEL 0
126 static DEFINE_PAGE_TABLE(xen_pgtable);
127 static DEFINE_PAGE_TABLE(xen_first);
128 #define THIS_CPU_PGTABLE xen_pgtable
129 #else
130 #define HYP_PT_ROOT_LEVEL 1
131 /* Per-CPU pagetable pages */
132 /* xen_pgtable == root of the trie (zeroeth level on 64-bit, first on 32-bit) */
133 static DEFINE_PER_CPU(lpae_t *, xen_pgtable);
134 #define THIS_CPU_PGTABLE this_cpu(xen_pgtable)
135 /* xen_dommap == pages used by map_domain_page, these pages contain
136  * the second level pagetables which map the domheap region
137  * DOMHEAP_VIRT_START...DOMHEAP_VIRT_END in 2MB chunks. */
138 static DEFINE_PER_CPU(lpae_t *, xen_dommap);
139 /* Root of the trie for cpu0, other CPU's PTs are dynamically allocated */
140 static DEFINE_PAGE_TABLE(cpu0_pgtable);
141 /* cpu0's domheap page tables */
142 static DEFINE_PAGE_TABLES(cpu0_dommap, DOMHEAP_SECOND_PAGES);
143 #endif
144 
145 #ifdef CONFIG_ARM_64
146 /* The first page of the first level mapping of the xenheap. The
147  * subsequent xenheap first level pages are dynamically allocated, but
148  * we need this one to bootstrap ourselves. */
149 static DEFINE_PAGE_TABLE(xenheap_first_first);
150 /* The zeroeth level slot which uses xenheap_first_first. Used because
151  * setup_xenheap_mappings otherwise relies on mfn_to_virt which isn't
152  * valid for a non-xenheap mapping. */
153 static __initdata int xenheap_first_first_slot = -1;
154 #endif
155 
156 /* Common pagetable leaves */
157 /* Second level page tables.
158  *
159  * The second-level table is 2 contiguous pages long, and covers all
160  * addresses from 0 to 0x7fffffff. Offsets into it are calculated
161  * with second_linear_offset(), not second_table_offset().
162  */
163 static DEFINE_PAGE_TABLES(xen_second, 2);
164 /* First level page table used for fixmap */
165 DEFINE_BOOT_PAGE_TABLE(xen_fixmap);
166 /* First level page table used to map Xen itself with the XN bit set
167  * as appropriate. */
168 static DEFINE_PAGE_TABLE(xen_xenmap);
169 
170 /* Non-boot CPUs use this to find the correct pagetables. */
171 uint64_t init_ttbr;
172 
173 static paddr_t phys_offset;
174 
175 /* Limits of the Xen heap */
176 mfn_t xenheap_mfn_start __read_mostly = INVALID_MFN_INITIALIZER;
177 mfn_t xenheap_mfn_end __read_mostly;
178 vaddr_t xenheap_virt_end __read_mostly;
179 #ifdef CONFIG_ARM_64
180 vaddr_t xenheap_virt_start __read_mostly;
181 unsigned long xenheap_base_pdx __read_mostly;
182 #endif
183 
184 unsigned long frametable_base_pdx __read_mostly;
185 unsigned long frametable_virt_end __read_mostly;
186 
187 unsigned long max_page;
188 unsigned long total_pages;
189 
190 extern char __init_begin[], __init_end[];
191 
192 /* Checking VA memory layout alignment. */
build_assertions(void)193 static void __init __maybe_unused build_assertions(void)
194 {
195     /* 2MB aligned regions */
196     BUILD_BUG_ON(XEN_VIRT_START & ~SECOND_MASK);
197     BUILD_BUG_ON(FIXMAP_ADDR(0) & ~SECOND_MASK);
198     BUILD_BUG_ON(BOOT_RELOC_VIRT_START & ~SECOND_MASK);
199     /* 1GB aligned regions */
200 #ifdef CONFIG_ARM_32
201     BUILD_BUG_ON(XENHEAP_VIRT_START & ~FIRST_MASK);
202 #else
203     BUILD_BUG_ON(DIRECTMAP_VIRT_START & ~FIRST_MASK);
204 #endif
205     /* Page table structure constraints */
206 #ifdef CONFIG_ARM_64
207     BUILD_BUG_ON(zeroeth_table_offset(XEN_VIRT_START));
208 #endif
209     BUILD_BUG_ON(first_table_offset(XEN_VIRT_START));
210     BUILD_BUG_ON(second_linear_offset(XEN_VIRT_START) >= LPAE_ENTRIES);
211 #ifdef CONFIG_DOMAIN_PAGE
212     BUILD_BUG_ON(DOMHEAP_VIRT_START & ~FIRST_MASK);
213 #endif
214 }
215 
dump_pt_walk(paddr_t ttbr,paddr_t addr,unsigned int root_level,unsigned int nr_root_tables)216 void dump_pt_walk(paddr_t ttbr, paddr_t addr,
217                   unsigned int root_level,
218                   unsigned int nr_root_tables)
219 {
220     static const char *level_strs[4] = { "0TH", "1ST", "2ND", "3RD" };
221     const mfn_t root_mfn = maddr_to_mfn(ttbr);
222     const unsigned int offsets[4] = {
223         zeroeth_table_offset(addr),
224         first_table_offset(addr),
225         second_table_offset(addr),
226         third_table_offset(addr)
227     };
228     lpae_t pte, *mapping;
229     unsigned int level, root_table;
230 
231 #ifdef CONFIG_ARM_32
232     BUG_ON(root_level < 1);
233 #endif
234     BUG_ON(root_level > 3);
235 
236     if ( nr_root_tables > 1 )
237     {
238         /*
239          * Concatenated root-level tables. The table number will be
240          * the offset at the previous level. It is not possible to
241          * concatenate a level-0 root.
242          */
243         BUG_ON(root_level == 0);
244         root_table = offsets[root_level - 1];
245         printk("Using concatenated root table %u\n", root_table);
246         if ( root_table >= nr_root_tables )
247         {
248             printk("Invalid root table offset\n");
249             return;
250         }
251     }
252     else
253         root_table = 0;
254 
255     mapping = map_domain_page(mfn_add(root_mfn, root_table));
256 
257     for ( level = root_level; ; level++ )
258     {
259         if ( offsets[level] > LPAE_ENTRIES )
260             break;
261 
262         pte = mapping[offsets[level]];
263 
264         printk("%s[0x%x] = 0x%"PRIpaddr"\n",
265                level_strs[level], offsets[level], pte.bits);
266 
267         if ( level == 3 || !pte.walk.valid || !pte.walk.table )
268             break;
269 
270         /* For next iteration */
271         unmap_domain_page(mapping);
272         mapping = map_domain_page(lpae_get_mfn(pte));
273     }
274 
275     unmap_domain_page(mapping);
276 }
277 
dump_hyp_walk(vaddr_t addr)278 void dump_hyp_walk(vaddr_t addr)
279 {
280     uint64_t ttbr = READ_SYSREG64(TTBR0_EL2);
281     lpae_t *pgtable = THIS_CPU_PGTABLE;
282 
283     printk("Walking Hypervisor VA 0x%"PRIvaddr" "
284            "on CPU%d via TTBR 0x%016"PRIx64"\n",
285            addr, smp_processor_id(), ttbr);
286 
287     if ( smp_processor_id() == 0 )
288         BUG_ON( (lpae_t *)(unsigned long)(ttbr - phys_offset) != pgtable );
289     else
290         BUG_ON( virt_to_maddr(pgtable) != ttbr );
291     dump_pt_walk(ttbr, addr, HYP_PT_ROOT_LEVEL, 1);
292 }
293 
294 /*
295  * Standard entry type that we'll use to build Xen's own pagetables.
296  * We put the same permissions at every level, because they're ignored
297  * by the walker in non-leaf entries.
298  */
mfn_to_xen_entry(mfn_t mfn,unsigned attr)299 static inline lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned attr)
300 {
301     lpae_t e = (lpae_t) {
302         .pt = {
303             .valid = 1,           /* Mappings are present */
304             .table = 0,           /* Set to 1 for links and 4k maps */
305             .ai = attr,
306             .ns = 1,              /* Hyp mode is in the non-secure world */
307             .up = 1,              /* See below */
308             .ro = 0,              /* Assume read-write */
309             .af = 1,              /* No need for access tracking */
310             .ng = 1,              /* Makes TLB flushes easier */
311             .contig = 0,          /* Assume non-contiguous */
312             .xn = 1,              /* No need to execute outside .text */
313             .avail = 0,           /* Reference count for domheap mapping */
314         }};
315     /*
316      * For EL2 stage-1 page table, up (aka AP[1]) is RES1 as the translation
317      * regime applies to only one exception level (see D4.4.4 and G4.6.1
318      * in ARM DDI 0487B.a). If this changes, remember to update the
319      * hard-coded values in head.S too.
320      */
321 
322     switch ( attr )
323     {
324     case MT_NORMAL_NC:
325         /*
326          * ARM ARM: Overlaying the shareability attribute (DDI
327          * 0406C.b B3-1376 to 1377)
328          *
329          * A memory region with a resultant memory type attribute of Normal,
330          * and a resultant cacheability attribute of Inner Non-cacheable,
331          * Outer Non-cacheable, must have a resultant shareability attribute
332          * of Outer Shareable, otherwise shareability is UNPREDICTABLE.
333          *
334          * On ARMv8 sharability is ignored and explicitly treated as Outer
335          * Shareable for Normal Inner Non_cacheable, Outer Non-cacheable.
336          */
337         e.pt.sh = LPAE_SH_OUTER;
338         break;
339     case MT_DEVICE_nGnRnE:
340     case MT_DEVICE_nGnRE:
341         /*
342          * Shareability is ignored for non-Normal memory, Outer is as
343          * good as anything.
344          *
345          * On ARMv8 sharability is ignored and explicitly treated as Outer
346          * Shareable for any device memory type.
347          */
348         e.pt.sh = LPAE_SH_OUTER;
349         break;
350     default:
351         e.pt.sh = LPAE_SH_INNER;  /* Xen mappings are SMP coherent */
352         break;
353     }
354 
355     ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
356 
357     lpae_set_mfn(e, mfn);
358 
359     return e;
360 }
361 
362 /* Map a 4k page in a fixmap entry */
set_fixmap(unsigned map,mfn_t mfn,unsigned int flags)363 void set_fixmap(unsigned map, mfn_t mfn, unsigned int flags)
364 {
365     int res;
366 
367     res = map_pages_to_xen(FIXMAP_ADDR(map), mfn, 1, flags);
368     BUG_ON(res != 0);
369 }
370 
371 /* Remove a mapping from a fixmap entry */
clear_fixmap(unsigned map)372 void clear_fixmap(unsigned map)
373 {
374     int res;
375 
376     res = destroy_xen_mappings(FIXMAP_ADDR(map), FIXMAP_ADDR(map) + PAGE_SIZE);
377     BUG_ON(res != 0);
378 }
379 
380 /* Create Xen's mappings of memory.
381  * Mapping_size must be either 2MB or 32MB.
382  * Base and virt must be mapping_size aligned.
383  * Size must be a multiple of mapping_size.
384  * second must be a contiguous set of second level page tables
385  * covering the region starting at virt_offset. */
create_mappings(lpae_t * second,unsigned long virt_offset,unsigned long base_mfn,unsigned long nr_mfns,unsigned int mapping_size)386 static void __init create_mappings(lpae_t *second,
387                                    unsigned long virt_offset,
388                                    unsigned long base_mfn,
389                                    unsigned long nr_mfns,
390                                    unsigned int mapping_size)
391 {
392     unsigned long i, count;
393     const unsigned long granularity = mapping_size >> PAGE_SHIFT;
394     lpae_t pte, *p;
395 
396     ASSERT((mapping_size == MB(2)) || (mapping_size == MB(32)));
397     ASSERT(!((virt_offset >> PAGE_SHIFT) % granularity));
398     ASSERT(!(base_mfn % granularity));
399     ASSERT(!(nr_mfns % granularity));
400 
401     count = nr_mfns / LPAE_ENTRIES;
402     p = second + second_linear_offset(virt_offset);
403     pte = mfn_to_xen_entry(_mfn(base_mfn), MT_NORMAL);
404     if ( granularity == 16 * LPAE_ENTRIES )
405         pte.pt.contig = 1;  /* These maps are in 16-entry contiguous chunks. */
406     for ( i = 0; i < count; i++ )
407     {
408         write_pte(p + i, pte);
409         pte.pt.base += 1 << LPAE_SHIFT;
410     }
411     flush_xen_tlb_local();
412 }
413 
414 #ifdef CONFIG_DOMAIN_PAGE
map_domain_page_global(mfn_t mfn)415 void *map_domain_page_global(mfn_t mfn)
416 {
417     return vmap(&mfn, 1);
418 }
419 
unmap_domain_page_global(const void * va)420 void unmap_domain_page_global(const void *va)
421 {
422     vunmap(va);
423 }
424 
425 /* Map a page of domheap memory */
map_domain_page(mfn_t mfn)426 void *map_domain_page(mfn_t mfn)
427 {
428     unsigned long flags;
429     lpae_t *map = this_cpu(xen_dommap);
430     unsigned long slot_mfn = mfn_x(mfn) & ~LPAE_ENTRY_MASK;
431     vaddr_t va;
432     lpae_t pte;
433     int i, slot;
434 
435     local_irq_save(flags);
436 
437     /* The map is laid out as an open-addressed hash table where each
438      * entry is a 2MB superpage pte.  We use the available bits of each
439      * PTE as a reference count; when the refcount is zero the slot can
440      * be reused. */
441     for ( slot = (slot_mfn >> LPAE_SHIFT) % DOMHEAP_ENTRIES, i = 0;
442           i < DOMHEAP_ENTRIES;
443           slot = (slot + 1) % DOMHEAP_ENTRIES, i++ )
444     {
445         if ( map[slot].pt.avail < 0xf &&
446              map[slot].pt.base == slot_mfn &&
447              map[slot].pt.valid )
448         {
449             /* This slot already points to the right place; reuse it */
450             map[slot].pt.avail++;
451             break;
452         }
453         else if ( map[slot].pt.avail == 0 )
454         {
455             /* Commandeer this 2MB slot */
456             pte = mfn_to_xen_entry(_mfn(slot_mfn), MT_NORMAL);
457             pte.pt.avail = 1;
458             write_pte(map + slot, pte);
459             break;
460         }
461 
462     }
463     /* If the map fills up, the callers have misbehaved. */
464     BUG_ON(i == DOMHEAP_ENTRIES);
465 
466 #ifndef NDEBUG
467     /* Searching the hash could get slow if the map starts filling up.
468      * Cross that bridge when we come to it */
469     {
470         static int max_tries = 32;
471         if ( i >= max_tries )
472         {
473             dprintk(XENLOG_WARNING, "Domheap map is filling: %i tries\n", i);
474             max_tries *= 2;
475         }
476     }
477 #endif
478 
479     local_irq_restore(flags);
480 
481     va = (DOMHEAP_VIRT_START
482           + (slot << SECOND_SHIFT)
483           + ((mfn_x(mfn) & LPAE_ENTRY_MASK) << THIRD_SHIFT));
484 
485     /*
486      * We may not have flushed this specific subpage at map time,
487      * since we only flush the 4k page not the superpage
488      */
489     flush_xen_tlb_range_va_local(va, PAGE_SIZE);
490 
491     return (void *)va;
492 }
493 
494 /* Release a mapping taken with map_domain_page() */
unmap_domain_page(const void * va)495 void unmap_domain_page(const void *va)
496 {
497     unsigned long flags;
498     lpae_t *map = this_cpu(xen_dommap);
499     int slot = ((unsigned long) va - DOMHEAP_VIRT_START) >> SECOND_SHIFT;
500 
501     if ( !va )
502         return;
503 
504     local_irq_save(flags);
505 
506     ASSERT(slot >= 0 && slot < DOMHEAP_ENTRIES);
507     ASSERT(map[slot].pt.avail != 0);
508 
509     map[slot].pt.avail--;
510 
511     local_irq_restore(flags);
512 }
513 
domain_page_map_to_mfn(const void * ptr)514 mfn_t domain_page_map_to_mfn(const void *ptr)
515 {
516     unsigned long va = (unsigned long)ptr;
517     lpae_t *map = this_cpu(xen_dommap);
518     int slot = (va - DOMHEAP_VIRT_START) >> SECOND_SHIFT;
519     unsigned long offset = (va>>THIRD_SHIFT) & LPAE_ENTRY_MASK;
520 
521     if ( va >= VMAP_VIRT_START && va < VMAP_VIRT_END )
522         return virt_to_mfn(va);
523 
524     ASSERT(slot >= 0 && slot < DOMHEAP_ENTRIES);
525     ASSERT(map[slot].pt.avail != 0);
526 
527     return mfn_add(lpae_get_mfn(map[slot]), offset);
528 }
529 #endif
530 
flush_page_to_ram(unsigned long mfn,bool sync_icache)531 void flush_page_to_ram(unsigned long mfn, bool sync_icache)
532 {
533     void *v = map_domain_page(_mfn(mfn));
534 
535     clean_and_invalidate_dcache_va_range(v, PAGE_SIZE);
536     unmap_domain_page(v);
537 
538     /*
539      * For some of the instruction cache (such as VIPT), the entire I-Cache
540      * needs to be flushed to guarantee that all the aliases of a given
541      * physical address will be removed from the cache.
542      * Invalidating the I-Cache by VA highly depends on the behavior of the
543      * I-Cache (See D4.9.2 in ARM DDI 0487A.k_iss10775). Instead of using flush
544      * by VA on select platforms, we just flush the entire cache here.
545      */
546     if ( sync_icache )
547         invalidate_icache();
548 }
549 
pte_of_xenaddr(vaddr_t va)550 static inline lpae_t pte_of_xenaddr(vaddr_t va)
551 {
552     paddr_t ma = va + phys_offset;
553 
554     return mfn_to_xen_entry(maddr_to_mfn(ma), MT_NORMAL);
555 }
556 
early_fdt_map(paddr_t fdt_paddr)557 void * __init early_fdt_map(paddr_t fdt_paddr)
558 {
559     /* We are using 2MB superpage for mapping the FDT */
560     paddr_t base_paddr = fdt_paddr & SECOND_MASK;
561     paddr_t offset;
562     void *fdt_virt;
563     uint32_t size;
564 
565     /*
566      * Check whether the physical FDT address is set and meets the minimum
567      * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be at
568      * least 8 bytes so that we always access the magic and size fields
569      * of the FDT header after mapping the first chunk, double check if
570      * that is indeed the case.
571      */
572     BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
573     if ( !fdt_paddr || fdt_paddr % MIN_FDT_ALIGN )
574         return NULL;
575 
576     /* The FDT is mapped using 2MB superpage */
577     BUILD_BUG_ON(BOOT_FDT_VIRT_START % SZ_2M);
578 
579     create_mappings(xen_second, BOOT_FDT_VIRT_START, paddr_to_pfn(base_paddr),
580                     SZ_2M >> PAGE_SHIFT, SZ_2M);
581 
582     offset = fdt_paddr % SECOND_SIZE;
583     fdt_virt = (void *)BOOT_FDT_VIRT_START + offset;
584 
585     if ( fdt_magic(fdt_virt) != FDT_MAGIC )
586         return NULL;
587 
588     size = fdt_totalsize(fdt_virt);
589     if ( size > MAX_FDT_SIZE )
590         return NULL;
591 
592     if ( (offset + size) > SZ_2M )
593     {
594         create_mappings(xen_second, BOOT_FDT_VIRT_START + SZ_2M,
595                         paddr_to_pfn(base_paddr + SZ_2M),
596                         SZ_2M >> PAGE_SHIFT, SZ_2M);
597     }
598 
599     return fdt_virt;
600 }
601 
remove_early_mappings(void)602 void __init remove_early_mappings(void)
603 {
604     lpae_t pte = {0};
605     write_pte(xen_second + second_table_offset(BOOT_FDT_VIRT_START), pte);
606     write_pte(xen_second + second_table_offset(BOOT_FDT_VIRT_START + SZ_2M),
607               pte);
608     flush_xen_tlb_range_va(BOOT_FDT_VIRT_START, BOOT_FDT_SLOT_SIZE);
609 }
610 
611 /*
612  * After boot, Xen page-tables should not contain mapping that are both
613  * Writable and eXecutables.
614  *
615  * This should be called on each CPU to enforce the policy.
616  */
xen_pt_enforce_wnx(void)617 static void xen_pt_enforce_wnx(void)
618 {
619     WRITE_SYSREG32(READ_SYSREG32(SCTLR_EL2) | SCTLR_Axx_ELx_WXN, SCTLR_EL2);
620     /*
621      * The TLBs may cache SCTLR_EL2.WXN. So ensure it is synchronized
622      * before flushing the TLBs.
623      */
624     isb();
625     flush_xen_tlb_local();
626 }
627 
628 extern void switch_ttbr(uint64_t ttbr);
629 
630 /* Clear a translation table and clean & invalidate the cache */
clear_table(void * table)631 static void clear_table(void *table)
632 {
633     clear_page(table);
634     clean_and_invalidate_dcache_va_range(table, PAGE_SIZE);
635 }
636 
637 /* Boot-time pagetable setup.
638  * Changes here may need matching changes in head.S */
setup_pagetables(unsigned long boot_phys_offset)639 void __init setup_pagetables(unsigned long boot_phys_offset)
640 {
641     uint64_t ttbr;
642     lpae_t pte, *p;
643     int i;
644 
645     phys_offset = boot_phys_offset;
646 
647 #ifdef CONFIG_ARM_64
648     p = (void *) xen_pgtable;
649     p[0] = pte_of_xenaddr((uintptr_t)xen_first);
650     p[0].pt.table = 1;
651     p[0].pt.xn = 0;
652     p = (void *) xen_first;
653 #else
654     p = (void *) cpu0_pgtable;
655 #endif
656 
657     /* Initialise first level entries, to point to second level entries */
658     for ( i = 0; i < 2; i++)
659     {
660         p[i] = pte_of_xenaddr((uintptr_t)(xen_second+i*LPAE_ENTRIES));
661         p[i].pt.table = 1;
662         p[i].pt.xn = 0;
663     }
664 
665 #ifdef CONFIG_ARM_32
666     for ( i = 0; i < DOMHEAP_SECOND_PAGES; i++ )
667     {
668         p[first_table_offset(DOMHEAP_VIRT_START+i*FIRST_SIZE)]
669             = pte_of_xenaddr((uintptr_t)(cpu0_dommap+i*LPAE_ENTRIES));
670         p[first_table_offset(DOMHEAP_VIRT_START+i*FIRST_SIZE)].pt.table = 1;
671     }
672 #endif
673 
674     /* Break up the Xen mapping into 4k pages and protect them separately. */
675     for ( i = 0; i < LPAE_ENTRIES; i++ )
676     {
677         vaddr_t va = XEN_VIRT_START + (i << PAGE_SHIFT);
678 
679         if ( !is_kernel(va) )
680             break;
681         pte = pte_of_xenaddr(va);
682         pte.pt.table = 1; /* 4k mappings always have this bit set */
683         if ( is_kernel_text(va) || is_kernel_inittext(va) )
684         {
685             pte.pt.xn = 0;
686             pte.pt.ro = 1;
687         }
688         if ( is_kernel_rodata(va) )
689             pte.pt.ro = 1;
690         xen_xenmap[i] = pte;
691     }
692 
693     /* Initialise xen second level entries ... */
694     /* ... Xen's text etc */
695 
696     pte = pte_of_xenaddr((vaddr_t)xen_xenmap);
697     pte.pt.table = 1;
698     xen_second[second_table_offset(XEN_VIRT_START)] = pte;
699 
700     /* ... Fixmap */
701     pte = pte_of_xenaddr((vaddr_t)xen_fixmap);
702     pte.pt.table = 1;
703     xen_second[second_table_offset(FIXMAP_ADDR(0))] = pte;
704 
705 #ifdef CONFIG_ARM_64
706     ttbr = (uintptr_t) xen_pgtable + phys_offset;
707 #else
708     ttbr = (uintptr_t) cpu0_pgtable + phys_offset;
709 #endif
710 
711     switch_ttbr(ttbr);
712 
713     xen_pt_enforce_wnx();
714 
715 #ifdef CONFIG_ARM_32
716     per_cpu(xen_pgtable, 0) = cpu0_pgtable;
717     per_cpu(xen_dommap, 0) = cpu0_dommap;
718 #endif
719 }
720 
clear_boot_pagetables(void)721 static void clear_boot_pagetables(void)
722 {
723     /*
724      * Clear the copy of the boot pagetables. Each secondary CPU
725      * rebuilds these itself (see head.S).
726      */
727     clear_table(boot_pgtable);
728 #ifdef CONFIG_ARM_64
729     clear_table(boot_first);
730     clear_table(boot_first_id);
731 #endif
732     clear_table(boot_second);
733     clear_table(boot_third);
734 }
735 
736 #ifdef CONFIG_ARM_64
init_secondary_pagetables(int cpu)737 int init_secondary_pagetables(int cpu)
738 {
739     clear_boot_pagetables();
740 
741     /* Set init_ttbr for this CPU coming up. All CPus share a single setof
742      * pagetables, but rewrite it each time for consistency with 32 bit. */
743     init_ttbr = (uintptr_t) xen_pgtable + phys_offset;
744     clean_dcache(init_ttbr);
745     return 0;
746 }
747 #else
init_secondary_pagetables(int cpu)748 int init_secondary_pagetables(int cpu)
749 {
750     lpae_t *first, *domheap, pte;
751     int i;
752 
753     first = alloc_xenheap_page(); /* root == first level on 32-bit 3-level trie */
754     domheap = alloc_xenheap_pages(get_order_from_pages(DOMHEAP_SECOND_PAGES), 0);
755 
756     if ( domheap == NULL || first == NULL )
757     {
758         printk("Not enough free memory for secondary CPU%d pagetables\n", cpu);
759         free_xenheap_pages(domheap, get_order_from_pages(DOMHEAP_SECOND_PAGES));
760         free_xenheap_page(first);
761         return -ENOMEM;
762     }
763 
764     /* Initialise root pagetable from root of boot tables */
765     memcpy(first, cpu0_pgtable, PAGE_SIZE);
766 
767     /* Ensure the domheap has no stray mappings */
768     memset(domheap, 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
769 
770     /* Update the first level mapping to reference the local CPUs
771      * domheap mapping pages. */
772     for ( i = 0; i < DOMHEAP_SECOND_PAGES; i++ )
773     {
774         pte = mfn_to_xen_entry(virt_to_mfn(domheap+i*LPAE_ENTRIES),
775                                MT_NORMAL);
776         pte.pt.table = 1;
777         write_pte(&first[first_table_offset(DOMHEAP_VIRT_START+i*FIRST_SIZE)], pte);
778     }
779 
780     per_cpu(xen_pgtable, cpu) = first;
781     per_cpu(xen_dommap, cpu) = domheap;
782 
783     clear_boot_pagetables();
784 
785     /* Set init_ttbr for this CPU coming up */
786     init_ttbr = __pa(first);
787     clean_dcache(init_ttbr);
788 
789     return 0;
790 }
791 #endif
792 
793 /* MMU setup for secondary CPUS (which already have paging enabled) */
mmu_init_secondary_cpu(void)794 void mmu_init_secondary_cpu(void)
795 {
796     xen_pt_enforce_wnx();
797 }
798 
799 #ifdef CONFIG_ARM_32
800 /* Set up the xenheap: up to 1GB of contiguous, always-mapped memory. */
setup_xenheap_mappings(unsigned long base_mfn,unsigned long nr_mfns)801 void __init setup_xenheap_mappings(unsigned long base_mfn,
802                                    unsigned long nr_mfns)
803 {
804     create_mappings(xen_second, XENHEAP_VIRT_START, base_mfn, nr_mfns, MB(32));
805 
806     /* Record where the xenheap is, for translation routines. */
807     xenheap_virt_end = XENHEAP_VIRT_START + nr_mfns * PAGE_SIZE;
808     xenheap_mfn_start = _mfn(base_mfn);
809     xenheap_mfn_end = _mfn(base_mfn + nr_mfns);
810 }
811 #else /* CONFIG_ARM_64 */
setup_xenheap_mappings(unsigned long base_mfn,unsigned long nr_mfns)812 void __init setup_xenheap_mappings(unsigned long base_mfn,
813                                    unsigned long nr_mfns)
814 {
815     lpae_t *first, pte;
816     unsigned long mfn, end_mfn;
817     vaddr_t vaddr;
818 
819     /* Align to previous 1GB boundary */
820     mfn = base_mfn & ~((FIRST_SIZE>>PAGE_SHIFT)-1);
821 
822     /* First call sets the xenheap physical and virtual offset. */
823     if ( mfn_eq(xenheap_mfn_start, INVALID_MFN) )
824     {
825         xenheap_mfn_start = _mfn(base_mfn);
826         xenheap_base_pdx = mfn_to_pdx(_mfn(base_mfn));
827         xenheap_virt_start = DIRECTMAP_VIRT_START +
828             (base_mfn - mfn) * PAGE_SIZE;
829     }
830 
831     if ( base_mfn < mfn_x(xenheap_mfn_start) )
832         panic("cannot add xenheap mapping at %lx below heap start %lx\n",
833               base_mfn, mfn_x(xenheap_mfn_start));
834 
835     end_mfn = base_mfn + nr_mfns;
836 
837     /*
838      * Virtual address aligned to previous 1GB to match physical
839      * address alignment done above.
840      */
841     vaddr = (vaddr_t)__mfn_to_virt(base_mfn) & FIRST_MASK;
842 
843     while ( mfn < end_mfn )
844     {
845         int slot = zeroeth_table_offset(vaddr);
846         lpae_t *p = &xen_pgtable[slot];
847 
848         if ( p->pt.valid )
849         {
850             /* mfn_to_virt is not valid on the 1st 1st mfn, since it
851              * is not within the xenheap. */
852             first = slot == xenheap_first_first_slot ?
853                 xenheap_first_first : mfn_to_virt(lpae_get_mfn(*p));
854         }
855         else if ( xenheap_first_first_slot == -1)
856         {
857             /* Use xenheap_first_first to bootstrap the mappings */
858             first = xenheap_first_first;
859 
860             pte = pte_of_xenaddr((vaddr_t)xenheap_first_first);
861             pte.pt.table = 1;
862             write_pte(p, pte);
863 
864             xenheap_first_first_slot = slot;
865         }
866         else
867         {
868             mfn_t first_mfn = alloc_boot_pages(1, 1);
869 
870             clear_page(mfn_to_virt(first_mfn));
871             pte = mfn_to_xen_entry(first_mfn, MT_NORMAL);
872             pte.pt.table = 1;
873             write_pte(p, pte);
874             first = mfn_to_virt(first_mfn);
875         }
876 
877         pte = mfn_to_xen_entry(_mfn(mfn), MT_NORMAL);
878         /* TODO: Set pte.pt.contig when appropriate. */
879         write_pte(&first[first_table_offset(vaddr)], pte);
880 
881         mfn += FIRST_SIZE>>PAGE_SHIFT;
882         vaddr += FIRST_SIZE;
883     }
884 
885     flush_xen_tlb_local();
886 }
887 #endif
888 
889 /* Map a frame table to cover physical addresses ps through pe */
setup_frametable_mappings(paddr_t ps,paddr_t pe)890 void __init setup_frametable_mappings(paddr_t ps, paddr_t pe)
891 {
892     unsigned long nr_pdxs = mfn_to_pdx(mfn_add(maddr_to_mfn(pe), -1)) -
893                             mfn_to_pdx(maddr_to_mfn(ps)) + 1;
894     unsigned long frametable_size = nr_pdxs * sizeof(struct page_info);
895     mfn_t base_mfn;
896     const unsigned long mapping_size = frametable_size < MB(32) ? MB(2) : MB(32);
897 #ifdef CONFIG_ARM_64
898     lpae_t *second, pte;
899     unsigned long nr_second;
900     mfn_t second_base;
901     int i;
902 #endif
903 
904     frametable_base_pdx = mfn_to_pdx(maddr_to_mfn(ps));
905     /* Round up to 2M or 32M boundary, as appropriate. */
906     frametable_size = ROUNDUP(frametable_size, mapping_size);
907     base_mfn = alloc_boot_pages(frametable_size >> PAGE_SHIFT, 32<<(20-12));
908 
909 #ifdef CONFIG_ARM_64
910     /* Compute the number of second level pages. */
911     nr_second = ROUNDUP(frametable_size, FIRST_SIZE) >> FIRST_SHIFT;
912     second_base = alloc_boot_pages(nr_second, 1);
913     second = mfn_to_virt(second_base);
914     for ( i = 0; i < nr_second; i++ )
915     {
916         clear_page(mfn_to_virt(mfn_add(second_base, i)));
917         pte = mfn_to_xen_entry(mfn_add(second_base, i), MT_NORMAL);
918         pte.pt.table = 1;
919         write_pte(&xen_first[first_table_offset(FRAMETABLE_VIRT_START)+i], pte);
920     }
921     create_mappings(second, 0, mfn_x(base_mfn), frametable_size >> PAGE_SHIFT,
922                     mapping_size);
923 #else
924     create_mappings(xen_second, FRAMETABLE_VIRT_START, mfn_x(base_mfn),
925                     frametable_size >> PAGE_SHIFT, mapping_size);
926 #endif
927 
928     memset(&frame_table[0], 0, nr_pdxs * sizeof(struct page_info));
929     memset(&frame_table[nr_pdxs], -1,
930            frametable_size - (nr_pdxs * sizeof(struct page_info)));
931 
932     frametable_virt_end = FRAMETABLE_VIRT_START + (nr_pdxs * sizeof(struct page_info));
933 }
934 
arch_vmap_virt_end(void)935 void *__init arch_vmap_virt_end(void)
936 {
937     return (void *)VMAP_VIRT_END;
938 }
939 
940 /*
941  * This function should only be used to remap device address ranges
942  * TODO: add a check to verify this assumption
943  */
ioremap_attr(paddr_t pa,size_t len,unsigned int attributes)944 void *ioremap_attr(paddr_t pa, size_t len, unsigned int attributes)
945 {
946     mfn_t mfn = _mfn(PFN_DOWN(pa));
947     unsigned int offs = pa & (PAGE_SIZE - 1);
948     unsigned int nr = PFN_UP(offs + len);
949     void *ptr = __vmap(&mfn, nr, 1, 1, attributes, VMAP_DEFAULT);
950 
951     if ( ptr == NULL )
952         return NULL;
953 
954     return ptr + offs;
955 }
956 
ioremap(paddr_t pa,size_t len)957 void *ioremap(paddr_t pa, size_t len)
958 {
959     return ioremap_attr(pa, len, PAGE_HYPERVISOR_NOCACHE);
960 }
961 
create_xen_table(lpae_t * entry)962 static int create_xen_table(lpae_t *entry)
963 {
964     void *p;
965     lpae_t pte;
966 
967     p = alloc_xenheap_page();
968     if ( p == NULL )
969         return -ENOMEM;
970     clear_page(p);
971     pte = mfn_to_xen_entry(virt_to_mfn(p), MT_NORMAL);
972     pte.pt.table = 1;
973     write_pte(entry, pte);
974     return 0;
975 }
976 
xen_map_table(mfn_t mfn)977 static lpae_t *xen_map_table(mfn_t mfn)
978 {
979     /*
980      * We may require to map the page table before map_domain_page() is
981      * useable. The requirements here is it must be useable as soon as
982      * page-tables are allocated dynamically via alloc_boot_pages().
983      *
984      * We need to do the check on physical address rather than virtual
985      * address to avoid truncation on Arm32. Therefore is_kernel() cannot
986      * be used.
987      */
988     if ( system_state == SYS_STATE_early_boot )
989     {
990         if ( is_xen_fixed_mfn(mfn) )
991         {
992             /*
993              * It is fine to demote the type because the size of Xen
994              * will always fit in vaddr_t.
995              */
996             vaddr_t offset = mfn_to_maddr(mfn) - virt_to_maddr(&_start);
997 
998             return (lpae_t *)(XEN_VIRT_START + offset);
999         }
1000     }
1001 
1002     return map_domain_page(mfn);
1003 }
1004 
xen_unmap_table(const lpae_t * table)1005 static void xen_unmap_table(const lpae_t *table)
1006 {
1007     /*
1008      * During early boot, xen_map_table() will not use map_domain_page()
1009      * for page-tables residing in Xen binary. So skip the unmap part.
1010      */
1011     if ( system_state == SYS_STATE_early_boot && is_kernel(table) )
1012         return;
1013 
1014     unmap_domain_page(table);
1015 }
1016 
1017 #define XEN_TABLE_MAP_FAILED 0
1018 #define XEN_TABLE_SUPER_PAGE 1
1019 #define XEN_TABLE_NORMAL_PAGE 2
1020 
1021 /*
1022  * Take the currently mapped table, find the corresponding entry,
1023  * and map the next table, if available.
1024  *
1025  * The read_only parameters indicates whether intermediate tables should
1026  * be allocated when not present.
1027  *
1028  * Return values:
1029  *  XEN_TABLE_MAP_FAILED: Either read_only was set and the entry
1030  *  was empty, or allocating a new page failed.
1031  *  XEN_TABLE_NORMAL_PAGE: next level mapped normally
1032  *  XEN_TABLE_SUPER_PAGE: The next entry points to a superpage.
1033  */
xen_pt_next_level(bool read_only,unsigned int level,lpae_t ** table,unsigned int offset)1034 static int xen_pt_next_level(bool read_only, unsigned int level,
1035                              lpae_t **table, unsigned int offset)
1036 {
1037     lpae_t *entry;
1038     int ret;
1039     mfn_t mfn;
1040 
1041     entry = *table + offset;
1042 
1043     if ( !lpae_is_valid(*entry) )
1044     {
1045         if ( read_only )
1046             return XEN_TABLE_MAP_FAILED;
1047 
1048         ret = create_xen_table(entry);
1049         if ( ret )
1050             return XEN_TABLE_MAP_FAILED;
1051     }
1052 
1053     /* The function xen_pt_next_level is never called at the 3rd level */
1054     if ( lpae_is_mapping(*entry, level) )
1055         return XEN_TABLE_SUPER_PAGE;
1056 
1057     mfn = lpae_get_mfn(*entry);
1058 
1059     xen_unmap_table(*table);
1060     *table = xen_map_table(mfn);
1061 
1062     return XEN_TABLE_NORMAL_PAGE;
1063 }
1064 
1065 /* Sanity check of the entry */
xen_pt_check_entry(lpae_t entry,mfn_t mfn,unsigned int flags)1066 static bool xen_pt_check_entry(lpae_t entry, mfn_t mfn, unsigned int flags)
1067 {
1068     /* Sanity check when modifying a page. */
1069     if ( (flags & _PAGE_PRESENT) && mfn_eq(mfn, INVALID_MFN) )
1070     {
1071         /* We don't allow modifying an invalid entry. */
1072         if ( !lpae_is_valid(entry) )
1073         {
1074             mm_printk("Modifying invalid entry is not allowed.\n");
1075             return false;
1076         }
1077 
1078         /* We don't allow changing memory attributes. */
1079         if ( entry.pt.ai != PAGE_AI_MASK(flags) )
1080         {
1081             mm_printk("Modifying memory attributes is not allowed (0x%x -> 0x%x).\n",
1082                       entry.pt.ai, PAGE_AI_MASK(flags));
1083             return false;
1084         }
1085 
1086         /* We don't allow modifying entry with contiguous bit set. */
1087         if ( entry.pt.contig )
1088         {
1089             mm_printk("Modifying entry with contiguous bit set is not allowed.\n");
1090             return false;
1091         }
1092     }
1093     /* Sanity check when inserting a page */
1094     else if ( flags & _PAGE_PRESENT )
1095     {
1096         /* We should be here with a valid MFN. */
1097         ASSERT(!mfn_eq(mfn, INVALID_MFN));
1098 
1099         /* We don't allow replacing any valid entry. */
1100         if ( lpae_is_valid(entry) )
1101         {
1102             mm_printk("Changing MFN for a valid entry is not allowed (%#"PRI_mfn" -> %#"PRI_mfn").\n",
1103                       mfn_x(lpae_get_mfn(entry)), mfn_x(mfn));
1104             return false;
1105         }
1106     }
1107     /* Sanity check when removing a page. */
1108     else if ( (flags & (_PAGE_PRESENT|_PAGE_POPULATE)) == 0 )
1109     {
1110         /* We should be here with an invalid MFN. */
1111         ASSERT(mfn_eq(mfn, INVALID_MFN));
1112 
1113         /* We don't allow removing page with contiguous bit set. */
1114         if ( entry.pt.contig )
1115         {
1116             mm_printk("Removing entry with contiguous bit set is not allowed.\n");
1117             return false;
1118         }
1119     }
1120     /* Sanity check when populating the page-table. No check so far. */
1121     else
1122     {
1123         ASSERT(flags & _PAGE_POPULATE);
1124         /* We should be here with an invalid MFN */
1125         ASSERT(mfn_eq(mfn, INVALID_MFN));
1126     }
1127 
1128     return true;
1129 }
1130 
xen_pt_update_entry(mfn_t root,unsigned long virt,mfn_t mfn,unsigned int flags)1131 static int xen_pt_update_entry(mfn_t root, unsigned long virt,
1132                                mfn_t mfn, unsigned int flags)
1133 {
1134     int rc;
1135     unsigned int level;
1136     /* We only support 4KB mapping (i.e level 3) for now */
1137     unsigned int target = 3;
1138     lpae_t *table;
1139     /*
1140      * The intermediate page tables are read-only when the MFN is not valid
1141      * and we are not populating page table.
1142      * This means we either modify permissions or remove an entry.
1143      */
1144     bool read_only = mfn_eq(mfn, INVALID_MFN) && !(flags & _PAGE_POPULATE);
1145     lpae_t pte, *entry;
1146 
1147     /* convenience aliases */
1148     DECLARE_OFFSETS(offsets, (paddr_t)virt);
1149 
1150     /* _PAGE_POPULATE and _PAGE_PRESENT should never be set together. */
1151     ASSERT((flags & (_PAGE_POPULATE|_PAGE_PRESENT)) != (_PAGE_POPULATE|_PAGE_PRESENT));
1152 
1153     table = xen_map_table(root);
1154     for ( level = HYP_PT_ROOT_LEVEL; level < target; level++ )
1155     {
1156         rc = xen_pt_next_level(read_only, level, &table, offsets[level]);
1157         if ( rc == XEN_TABLE_MAP_FAILED )
1158         {
1159             /*
1160              * We are here because xen_pt_next_level has failed to map
1161              * the intermediate page table (e.g the table does not exist
1162              * and the pt is read-only). It is a valid case when
1163              * removing a mapping as it may not exist in the page table.
1164              * In this case, just ignore it.
1165              */
1166             if ( flags & (_PAGE_PRESENT|_PAGE_POPULATE) )
1167             {
1168                 mm_printk("%s: Unable to map level %u\n", __func__, level);
1169                 rc = -ENOENT;
1170                 goto out;
1171             }
1172             else
1173             {
1174                 rc = 0;
1175                 goto out;
1176             }
1177         }
1178         else if ( rc != XEN_TABLE_NORMAL_PAGE )
1179             break;
1180     }
1181 
1182     if ( level != target )
1183     {
1184         mm_printk("%s: Shattering superpage is not supported\n", __func__);
1185         rc = -EOPNOTSUPP;
1186         goto out;
1187     }
1188 
1189     entry = table + offsets[level];
1190 
1191     rc = -EINVAL;
1192     if ( !xen_pt_check_entry(*entry, mfn, flags) )
1193         goto out;
1194 
1195     /* If we are only populating page-table, then we are done. */
1196     rc = 0;
1197     if ( flags & _PAGE_POPULATE )
1198         goto out;
1199 
1200     /* We are removing the page */
1201     if ( !(flags & _PAGE_PRESENT) )
1202         memset(&pte, 0x00, sizeof(pte));
1203     else
1204     {
1205         /* We are inserting a mapping => Create new pte. */
1206         if ( !mfn_eq(mfn, INVALID_MFN) )
1207         {
1208             pte = mfn_to_xen_entry(mfn, PAGE_AI_MASK(flags));
1209 
1210             /* Third level entries set pte.pt.table = 1 */
1211             pte.pt.table = 1;
1212         }
1213         else /* We are updating the permission => Copy the current pte. */
1214             pte = *entry;
1215 
1216         /* Set permission */
1217         pte.pt.ro = PAGE_RO_MASK(flags);
1218         pte.pt.xn = PAGE_XN_MASK(flags);
1219     }
1220 
1221     write_pte(entry, pte);
1222 
1223     rc = 0;
1224 
1225 out:
1226     xen_unmap_table(table);
1227 
1228     return rc;
1229 }
1230 
1231 static DEFINE_SPINLOCK(xen_pt_lock);
1232 
xen_pt_update(unsigned long virt,mfn_t mfn,unsigned long nr_mfns,unsigned int flags)1233 static int xen_pt_update(unsigned long virt,
1234                          mfn_t mfn,
1235                          unsigned long nr_mfns,
1236                          unsigned int flags)
1237 {
1238     int rc = 0;
1239     unsigned long addr = virt, addr_end = addr + nr_mfns * PAGE_SIZE;
1240 
1241     /*
1242      * For arm32, page-tables are different on each CPUs. Yet, they share
1243      * some common mappings. It is assumed that only common mappings
1244      * will be modified with this function.
1245      *
1246      * XXX: Add a check.
1247      */
1248     const mfn_t root = virt_to_mfn(THIS_CPU_PGTABLE);
1249 
1250     /*
1251      * The hardware was configured to forbid mapping both writeable and
1252      * executable.
1253      * When modifying/creating mapping (i.e _PAGE_PRESENT is set),
1254      * prevent any update if this happen.
1255      */
1256     if ( (flags & _PAGE_PRESENT) && !PAGE_RO_MASK(flags) &&
1257          !PAGE_XN_MASK(flags) )
1258     {
1259         mm_printk("Mappings should not be both Writeable and Executable.\n");
1260         return -EINVAL;
1261     }
1262 
1263     if ( !IS_ALIGNED(virt, PAGE_SIZE) )
1264     {
1265         mm_printk("The virtual address is not aligned to the page-size.\n");
1266         return -EINVAL;
1267     }
1268 
1269     spin_lock(&xen_pt_lock);
1270 
1271     for ( ; addr < addr_end; addr += PAGE_SIZE )
1272     {
1273         rc = xen_pt_update_entry(root, addr, mfn, flags);
1274         if ( rc )
1275             break;
1276 
1277         if ( !mfn_eq(mfn, INVALID_MFN) )
1278             mfn = mfn_add(mfn, 1);
1279     }
1280 
1281     /*
1282      * Flush the TLBs even in case of failure because we may have
1283      * partially modified the PT. This will prevent any unexpected
1284      * behavior afterwards.
1285      */
1286     flush_xen_tlb_range_va(virt, PAGE_SIZE * nr_mfns);
1287 
1288     spin_unlock(&xen_pt_lock);
1289 
1290     return rc;
1291 }
1292 
map_pages_to_xen(unsigned long virt,mfn_t mfn,unsigned long nr_mfns,unsigned int flags)1293 int map_pages_to_xen(unsigned long virt,
1294                      mfn_t mfn,
1295                      unsigned long nr_mfns,
1296                      unsigned int flags)
1297 {
1298     return xen_pt_update(virt, mfn, nr_mfns, flags);
1299 }
1300 
populate_pt_range(unsigned long virt,unsigned long nr_mfns)1301 int populate_pt_range(unsigned long virt, unsigned long nr_mfns)
1302 {
1303     return xen_pt_update(virt, INVALID_MFN, nr_mfns, _PAGE_POPULATE);
1304 }
1305 
destroy_xen_mappings(unsigned long v,unsigned long e)1306 int destroy_xen_mappings(unsigned long v, unsigned long e)
1307 {
1308     ASSERT(v <= e);
1309     return xen_pt_update(v, INVALID_MFN, (e - v) >> PAGE_SHIFT, 0);
1310 }
1311 
modify_xen_mappings(unsigned long s,unsigned long e,unsigned int flags)1312 int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int flags)
1313 {
1314     ASSERT(s <= e);
1315     return xen_pt_update(s, INVALID_MFN, (e - s) >> PAGE_SHIFT, flags);
1316 }
1317 
1318 /* Release all __init and __initdata ranges to be reused */
free_init_memory(void)1319 void free_init_memory(void)
1320 {
1321     paddr_t pa = virt_to_maddr(__init_begin);
1322     unsigned long len = __init_end - __init_begin;
1323     uint32_t insn;
1324     unsigned int i, nr = len / sizeof(insn);
1325     uint32_t *p;
1326     int rc;
1327 
1328     rc = modify_xen_mappings((unsigned long)__init_begin,
1329                              (unsigned long)__init_end, PAGE_HYPERVISOR_RW);
1330     if ( rc )
1331         panic("Unable to map RW the init section (rc = %d)\n", rc);
1332 
1333     /*
1334      * From now on, init will not be used for execution anymore,
1335      * so nuke the instruction cache to remove entries related to init.
1336      */
1337     invalidate_icache_local();
1338 
1339 #ifdef CONFIG_ARM_32
1340     /* udf instruction i.e (see A8.8.247 in ARM DDI 0406C.c) */
1341     insn = 0xe7f000f0;
1342 #else
1343     insn = AARCH64_BREAK_FAULT;
1344 #endif
1345     p = (uint32_t *)__init_begin;
1346     for ( i = 0; i < nr; i++ )
1347         *(p + i) = insn;
1348 
1349     rc = destroy_xen_mappings((unsigned long)__init_begin,
1350                               (unsigned long)__init_end);
1351     if ( rc )
1352         panic("Unable to remove the init section (rc = %d)\n", rc);
1353 
1354     init_domheap_pages(pa, pa + len);
1355     printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10);
1356 }
1357 
arch_dump_shared_mem_info(void)1358 void arch_dump_shared_mem_info(void)
1359 {
1360 }
1361 
steal_page(struct domain * d,struct page_info * page,unsigned int memflags)1362 int steal_page(
1363     struct domain *d, struct page_info *page, unsigned int memflags)
1364 {
1365     return -EOPNOTSUPP;
1366 }
1367 
page_is_ram_type(unsigned long mfn,unsigned long mem_type)1368 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
1369 {
1370     ASSERT_UNREACHABLE();
1371     return 0;
1372 }
1373 
domain_get_maximum_gpfn(struct domain * d)1374 unsigned long domain_get_maximum_gpfn(struct domain *d)
1375 {
1376     return gfn_x(d->arch.p2m.max_mapped_gfn);
1377 }
1378 
share_xen_page_with_guest(struct page_info * page,struct domain * d,enum XENSHARE_flags flags)1379 void share_xen_page_with_guest(struct page_info *page, struct domain *d,
1380                                enum XENSHARE_flags flags)
1381 {
1382     if ( page_get_owner(page) == d )
1383         return;
1384 
1385     spin_lock(&d->page_alloc_lock);
1386 
1387     /* The incremented type count pins as writable or read-only. */
1388     page->u.inuse.type_info =
1389         (flags == SHARE_ro ? PGT_none : PGT_writable_page) | 1;
1390 
1391     page_set_owner(page, d);
1392     smp_wmb(); /* install valid domain ptr before updating refcnt. */
1393     ASSERT((page->count_info & ~PGC_xen_heap) == 0);
1394 
1395     /* Only add to the allocation list if the domain isn't dying. */
1396     if ( !d->is_dying )
1397     {
1398         page->count_info |= PGC_allocated | 1;
1399         if ( unlikely(d->xenheap_pages++ == 0) )
1400             get_knownalive_domain(d);
1401         page_list_add_tail(page, &d->xenpage_list);
1402     }
1403 
1404     spin_unlock(&d->page_alloc_lock);
1405 }
1406 
xenmem_add_to_physmap_one(struct domain * d,unsigned int space,union add_to_physmap_extra extra,unsigned long idx,gfn_t gfn)1407 int xenmem_add_to_physmap_one(
1408     struct domain *d,
1409     unsigned int space,
1410     union add_to_physmap_extra extra,
1411     unsigned long idx,
1412     gfn_t gfn)
1413 {
1414     mfn_t mfn = INVALID_MFN;
1415     int rc;
1416     p2m_type_t t;
1417     struct page_info *page = NULL;
1418 
1419     switch ( space )
1420     {
1421     case XENMAPSPACE_grant_table:
1422         rc = gnttab_map_frame(d, idx, gfn, &mfn);
1423         if ( rc )
1424             return rc;
1425 
1426         t = p2m_ram_rw;
1427 
1428         break;
1429     case XENMAPSPACE_shared_info:
1430         if ( idx != 0 )
1431             return -EINVAL;
1432 
1433         mfn = virt_to_mfn(d->shared_info);
1434         t = p2m_ram_rw;
1435 
1436         break;
1437     case XENMAPSPACE_gmfn_foreign:
1438     {
1439         struct domain *od;
1440         p2m_type_t p2mt;
1441 
1442         od = get_pg_owner(extra.foreign_domid);
1443         if ( od == NULL )
1444             return -ESRCH;
1445 
1446         if ( od == d )
1447         {
1448             put_pg_owner(od);
1449             return -EINVAL;
1450         }
1451 
1452         rc = xsm_map_gmfn_foreign(XSM_TARGET, d, od);
1453         if ( rc )
1454         {
1455             put_pg_owner(od);
1456             return rc;
1457         }
1458 
1459         /* Take reference to the foreign domain page.
1460          * Reference will be released in XENMEM_remove_from_physmap */
1461         page = get_page_from_gfn(od, idx, &p2mt, P2M_ALLOC);
1462         if ( !page )
1463         {
1464             put_pg_owner(od);
1465             return -EINVAL;
1466         }
1467 
1468         if ( p2m_is_ram(p2mt) )
1469             t = (p2mt == p2m_ram_rw) ? p2m_map_foreign_rw : p2m_map_foreign_ro;
1470         else
1471         {
1472             put_page(page);
1473             put_pg_owner(od);
1474             return -EINVAL;
1475         }
1476 
1477         mfn = page_to_mfn(page);
1478 
1479         put_pg_owner(od);
1480         break;
1481     }
1482     case XENMAPSPACE_dev_mmio:
1483         rc = map_dev_mmio_region(d, gfn, 1, _mfn(idx));
1484         return rc;
1485 
1486     default:
1487         return -ENOSYS;
1488     }
1489 
1490     /* Map at new location. */
1491     rc = guest_physmap_add_entry(d, gfn, mfn, 0, t);
1492 
1493     /* If we fail to add the mapping, we need to drop the reference we
1494      * took earlier on foreign pages */
1495     if ( rc && space == XENMAPSPACE_gmfn_foreign )
1496     {
1497         ASSERT(page != NULL);
1498         put_page(page);
1499     }
1500 
1501     return rc;
1502 }
1503 
arch_memory_op(int op,XEN_GUEST_HANDLE_PARAM (void)arg)1504 long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg)
1505 {
1506     switch ( op )
1507     {
1508     /* XXX: memsharing not working yet */
1509     case XENMEM_get_sharing_shared_pages:
1510     case XENMEM_get_sharing_freed_pages:
1511         return 0;
1512 
1513     default:
1514         return -ENOSYS;
1515     }
1516 
1517     return 0;
1518 }
1519 
page_get_owner_and_reference(struct page_info * page)1520 struct domain *page_get_owner_and_reference(struct page_info *page)
1521 {
1522     unsigned long x, y = page->count_info;
1523     struct domain *owner;
1524 
1525     do {
1526         x = y;
1527         /*
1528          * Count ==  0: Page is not allocated, so we cannot take a reference.
1529          * Count == -1: Reference count would wrap, which is invalid.
1530          */
1531         if ( unlikely(((x + 1) & PGC_count_mask) <= 1) )
1532             return NULL;
1533     }
1534     while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x );
1535 
1536     owner = page_get_owner(page);
1537     ASSERT(owner);
1538 
1539     return owner;
1540 }
1541 
put_page(struct page_info * page)1542 void put_page(struct page_info *page)
1543 {
1544     unsigned long nx, x, y = page->count_info;
1545 
1546     do {
1547         ASSERT((y & PGC_count_mask) != 0);
1548         x  = y;
1549         nx = x - 1;
1550     }
1551     while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
1552 
1553     if ( unlikely((nx & PGC_count_mask) == 0) )
1554     {
1555         free_domheap_page(page);
1556     }
1557 }
1558 
get_page(struct page_info * page,struct domain * domain)1559 int get_page(struct page_info *page, struct domain *domain)
1560 {
1561     struct domain *owner = page_get_owner_and_reference(page);
1562 
1563     if ( likely(owner == domain) )
1564         return 1;
1565 
1566     if ( owner != NULL )
1567         put_page(page);
1568 
1569     return 0;
1570 }
1571 
1572 /* Common code requires get_page_type and put_page_type.
1573  * We don't care about typecounts so we just do the minimum to make it
1574  * happy. */
get_page_type(struct page_info * page,unsigned long type)1575 int get_page_type(struct page_info *page, unsigned long type)
1576 {
1577     return 1;
1578 }
1579 
put_page_type(struct page_info * page)1580 void put_page_type(struct page_info *page)
1581 {
1582     return;
1583 }
1584 
create_grant_host_mapping(unsigned long addr,mfn_t frame,unsigned int flags,unsigned int cache_flags)1585 int create_grant_host_mapping(unsigned long addr, mfn_t frame,
1586                               unsigned int flags, unsigned int cache_flags)
1587 {
1588     int rc;
1589     p2m_type_t t = p2m_grant_map_rw;
1590 
1591     if ( cache_flags  || (flags & ~GNTMAP_readonly) != GNTMAP_host_map )
1592         return GNTST_general_error;
1593 
1594     if ( flags & GNTMAP_readonly )
1595         t = p2m_grant_map_ro;
1596 
1597     rc = guest_physmap_add_entry(current->domain, gaddr_to_gfn(addr),
1598                                  frame, 0, t);
1599 
1600     if ( rc )
1601         return GNTST_general_error;
1602     else
1603         return GNTST_okay;
1604 }
1605 
replace_grant_host_mapping(unsigned long addr,mfn_t mfn,unsigned long new_addr,unsigned int flags)1606 int replace_grant_host_mapping(unsigned long addr, mfn_t mfn,
1607                                unsigned long new_addr, unsigned int flags)
1608 {
1609     gfn_t gfn = gaddr_to_gfn(addr);
1610     struct domain *d = current->domain;
1611     int rc;
1612 
1613     if ( new_addr != 0 || (flags & GNTMAP_contains_pte) )
1614         return GNTST_general_error;
1615 
1616     rc = guest_physmap_remove_page(d, gfn, mfn, 0);
1617 
1618     return rc ? GNTST_general_error : GNTST_okay;
1619 }
1620 
is_iomem_page(mfn_t mfn)1621 bool is_iomem_page(mfn_t mfn)
1622 {
1623     return !mfn_valid(mfn);
1624 }
1625 
clear_and_clean_page(struct page_info * page)1626 void clear_and_clean_page(struct page_info *page)
1627 {
1628     void *p = __map_domain_page(page);
1629 
1630     clear_page(p);
1631     clean_dcache_va_range(p, PAGE_SIZE);
1632     unmap_domain_page(p);
1633 }
1634 
get_upper_mfn_bound(void)1635 unsigned long get_upper_mfn_bound(void)
1636 {
1637     /* No memory hotplug yet, so current memory limit is the final one. */
1638     return max_page - 1;
1639 }
1640 
1641 /*
1642  * Local variables:
1643  * mode: C
1644  * c-file-style: "BSD"
1645  * c-basic-offset: 4
1646  * indent-tabs-mode: nil
1647  * End:
1648  */
1649