1 /******************************************************************************
2  * include/asm-x86/paging.h
3  *
4  * Common interface for paging support
5  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6  * Parts of this code are Copyright (c) 2006 by XenSource Inc.
7  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
8  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; If not, see <http://www.gnu.org/licenses/>.
22  */
23 
24 #ifndef _XEN_PAGING_H
25 #define _XEN_PAGING_H
26 
27 #include <xen/mm.h>
28 #include <public/domctl.h>
29 #include <xen/sched.h>
30 #include <xen/perfc.h>
31 #include <xen/domain_page.h>
32 #include <asm/flushtlb.h>
33 #include <asm/domain.h>
34 
35 /*****************************************************************************
36  * Macros to tell which paging mode a domain is in */
37 
38 #define PG_SH_shift    20
39 #define PG_HAP_shift   21
40 #define PG_SHF_shift   22
41 /* We're in one of the shadow modes */
42 #ifdef CONFIG_SHADOW_PAGING
43 #define PG_SH_enable   (1U << PG_SH_shift)
44 #define PG_SH_forced   (1U << PG_SHF_shift)
45 #else
46 #define PG_SH_enable   0
47 #define PG_SH_forced   0
48 #endif
49 #ifdef CONFIG_HVM
50 #define PG_HAP_enable  (1U << PG_HAP_shift)
51 #else
52 #define PG_HAP_enable  0
53 #endif
54 
55 /* common paging mode bits */
56 #define PG_mode_shift  10
57 #ifdef CONFIG_HVM
58 /* Refcounts based on shadow tables instead of guest tables */
59 #define PG_refcounts   (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift)
60 /* Xen does p2m translation, not guest */
61 #define PG_translate   (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift)
62 /* Xen does not steal address space from the domain for its own booking;
63  * requires VT or similar mechanisms */
64 #define PG_external    (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift)
65 #else
66 #define PG_refcounts   0
67 #define PG_translate   0
68 #define PG_external    0
69 #endif
70 /* Enable log dirty mode */
71 #define PG_log_dirty   (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift)
72 
73 /* All paging modes. */
74 #define PG_MASK (PG_refcounts | PG_log_dirty | PG_translate | PG_external)
75 
76 #define paging_mode_enabled(_d)   (!!(_d)->arch.paging.mode)
77 #define paging_mode_shadow(_d)    (!!((_d)->arch.paging.mode & PG_SH_enable))
78 #define paging_mode_sh_forced(_d) (!!((_d)->arch.paging.mode & PG_SH_forced))
79 #define paging_mode_hap(_d)       (!!((_d)->arch.paging.mode & PG_HAP_enable))
80 
81 #define paging_mode_refcounts(_d) (!!((_d)->arch.paging.mode & PG_refcounts))
82 #define paging_mode_log_dirty(_d) (!!((_d)->arch.paging.mode & PG_log_dirty))
83 #define paging_mode_translate(_d) (!!((_d)->arch.paging.mode & PG_translate))
84 #define paging_mode_external(_d)  (!!((_d)->arch.paging.mode & PG_external))
85 
86 /* flags used for paging debug */
87 #define PAGING_DEBUG_LOGDIRTY 0
88 
89 /*****************************************************************************
90  * Mode-specific entry points into the shadow code.
91  *
92  * These shouldn't be used directly by callers; rather use the functions
93  * below which will indirect through this table as appropriate. */
94 
95 struct shadow_paging_mode {
96 #ifdef CONFIG_SHADOW_PAGING
97     void          (*detach_old_tables     )(struct vcpu *v);
98 #ifdef CONFIG_PV
99     bool          (*write_guest_entry     )(struct vcpu *v, intpte_t *p,
100                                             intpte_t new, mfn_t gmfn);
101     bool          (*cmpxchg_guest_entry   )(struct vcpu *v, intpte_t *p,
102                                             intpte_t *old, intpte_t new,
103                                             mfn_t gmfn);
104 #endif
105 #ifdef CONFIG_HVM
106     mfn_t         (*make_monitor_table    )(struct vcpu *v);
107     void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
108     int           (*guess_wrmap           )(struct vcpu *v,
109                                             unsigned long vaddr, mfn_t gmfn);
110     void          (*pagetable_dying       )(paddr_t gpa);
111     void          (*trace_emul_write_val  )(const void *ptr, unsigned long vaddr,
112                                             const void *src, unsigned int bytes);
113 #endif
114 #endif
115     /* For outsiders to tell what mode we're in */
116     unsigned int shadow_levels;
117 };
118 
119 
120 /************************************************/
121 /*        common paging interface               */
122 /************************************************/
123 struct paging_mode {
124     int           (*page_fault            )(struct vcpu *v, unsigned long va,
125                                             struct cpu_user_regs *regs);
126     bool          (*invlpg                )(struct vcpu *v,
127                                             unsigned long linear);
128     unsigned long (*gva_to_gfn            )(struct vcpu *v,
129                                             struct p2m_domain *p2m,
130                                             unsigned long va,
131                                             uint32_t *pfec);
132     unsigned long (*p2m_ga_to_gfn         )(struct vcpu *v,
133                                             struct p2m_domain *p2m,
134                                             unsigned long cr3,
135                                             paddr_t ga, uint32_t *pfec,
136                                             unsigned int *page_order);
137     void          (*update_cr3            )(struct vcpu *v, int do_locking,
138                                             bool noflush);
139     void          (*update_paging_modes   )(struct vcpu *v);
140     int           (*write_p2m_entry       )(struct p2m_domain *p2m,
141                                             unsigned long gfn,
142                                             l1_pgentry_t *p, l1_pgentry_t new,
143                                             unsigned int level);
144     bool          (*flush_tlb             )(bool (*flush_vcpu)(void *ctxt,
145                                                                struct vcpu *v),
146                                             void *ctxt);
147 
148     unsigned int guest_levels;
149 
150     /* paging support extension */
151     struct shadow_paging_mode shadow;
152 };
153 
154 /*****************************************************************************
155  * Log dirty code */
156 
157 /* get the dirty bitmap for a specific range of pfns */
158 void paging_log_dirty_range(struct domain *d,
159                             unsigned long begin_pfn,
160                             unsigned long nr,
161                             uint8_t *dirty_bitmap);
162 
163 /* enable log dirty */
164 int paging_log_dirty_enable(struct domain *d, bool log_global);
165 
166 /* log dirty initialization */
167 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops);
168 
169 /* mark a page as dirty */
170 void paging_mark_dirty(struct domain *d, mfn_t gmfn);
171 /* mark a page as dirty with taking guest pfn as parameter */
172 void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn);
173 
174 /* is this guest page dirty?
175  * This is called from inside paging code, with the paging lock held. */
176 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
177 
178 /*
179  * Log-dirty radix tree indexing:
180  *   All tree nodes are PAGE_SIZE bytes, mapped on-demand.
181  *   Leaf nodes are simple bitmaps; 1 bit per guest pfn.
182  *   Interior nodes are arrays of LOGDIRTY_NODE_ENTRIES mfns.
183  * TODO: Dynamic radix tree height. Most guests will only need 2 levels.
184  *       The fourth level is basically unusable on 32-bit Xen.
185  * TODO2: Abstract out the radix-tree mechanics?
186  */
187 #define LOGDIRTY_NODE_ENTRIES (1 << PAGETABLE_ORDER)
188 #define L1_LOGDIRTY_IDX(pfn) (pfn_x(pfn) & ((1 << (PAGE_SHIFT + 3)) - 1))
189 #define L2_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3)) & \
190                               (LOGDIRTY_NODE_ENTRIES-1))
191 #define L3_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER)) & \
192                               (LOGDIRTY_NODE_ENTRIES-1))
193 #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
194                               (LOGDIRTY_NODE_ENTRIES-1))
195 
196 /* VRAM dirty tracking support */
197 struct sh_dirty_vram {
198     unsigned long begin_pfn;
199     unsigned long end_pfn;
200     paddr_t *sl1ma;
201     uint8_t *dirty_bitmap;
202     s_time_t last_dirty;
203 };
204 
205 /*****************************************************************************
206  * Entry points into the paging-assistance code */
207 
208 /* Initialize the paging resource for vcpu struct. It is called by
209  * vcpu_initialise() in domain.c */
210 void paging_vcpu_init(struct vcpu *v);
211 
212 /* Set up the paging-assistance-specific parts of a domain struct at
213  * start of day.  Called for every domain from arch_domain_create() */
214 int paging_domain_init(struct domain *d);
215 
216 /* Handler for paging-control ops: operations from user-space to enable
217  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
218  * manipulate the log-dirty bitmap. */
219 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
220                   XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
221                   bool_t resuming);
222 
223 /* Helper hypercall for dealing with continuations. */
224 long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
225 
226 /* Call when destroying a domain */
227 int paging_teardown(struct domain *d);
228 
229 /* Call once all of the references to the domain have gone away */
230 void paging_final_teardown(struct domain *d);
231 
232 /* Enable an arbitrary paging-assistance mode.  Call once at domain
233  * creation. */
234 int paging_enable(struct domain *d, u32 mode);
235 
236 #define paging_get_hostmode(v)		((v)->arch.paging.mode)
237 #define paging_get_nestedmode(v)	((v)->arch.paging.nestedmode)
238 const struct paging_mode *paging_get_mode(struct vcpu *v);
239 void paging_update_nestedmode(struct vcpu *v);
240 
241 /* Page fault handler
242  * Called from pagefault handler in Xen, and from the HVM trap handlers
243  * for pagefaults.  Returns 1 if this fault was an artefact of the
244  * paging code (and the guest should retry) or 0 if it is not (and the
245  * fault should be handled elsewhere or passed to the guest).
246  *
247  * Note: under shadow paging, this function handles all page faults;
248  * however, for hardware-assisted paging, this function handles only
249  * host page faults (i.e. nested page faults). */
250 static inline int
paging_fault(unsigned long va,struct cpu_user_regs * regs)251 paging_fault(unsigned long va, struct cpu_user_regs *regs)
252 {
253     struct vcpu *v = current;
254     return paging_get_hostmode(v)->page_fault(v, va, regs);
255 }
256 
257 /* Handle invlpg requests on vcpus. */
258 void paging_invlpg(struct vcpu *v, unsigned long va);
259 
260 /*
261  * Translate a guest virtual address to the frame number that the
262  * *guest* pagetables would map it to.  Returns INVALID_GFN if the guest
263  * tables don't map this address for this kind of access.
264  * *pfec is used to determine which kind of access this is when
265  * walking the tables.  The caller should set the PFEC_page_present bit
266  * in *pfec; in the failure case, that bit will be cleared if appropriate.
267  *
268  * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS:
269  * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled.
270  */
271 unsigned long paging_gva_to_gfn(struct vcpu *v,
272                                 unsigned long va,
273                                 uint32_t *pfec);
274 
275 /* Translate a guest address using a particular CR3 value.  This is used
276  * to by nested HAP code, to walk the guest-supplied NPT tables as if
277  * they were pagetables.
278  * Use 'paddr_t' for the guest address so it won't overflow when
279  * l1 or l2 guest is in 32bit PAE mode.
280  * If the GFN returned is not INVALID_GFN, *page_order gives
281  * the size of the superpage (if any) it was found in. */
paging_ga_to_gfn_cr3(struct vcpu * v,unsigned long cr3,paddr_t ga,uint32_t * pfec,unsigned int * page_order)282 static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
283                                                  unsigned long cr3,
284                                                  paddr_t ga,
285                                                  uint32_t *pfec,
286                                                  unsigned int *page_order)
287 {
288     struct p2m_domain *p2m = v->domain->arch.p2m;
289     return paging_get_hostmode(v)->p2m_ga_to_gfn(v, p2m, cr3, ga, pfec,
290         page_order);
291 }
292 
293 /* Update all the things that are derived from the guest's CR3.
294  * Called when the guest changes CR3; the caller can then use v->arch.cr3
295  * as the value to load into the host CR3 to schedule this vcpu */
paging_update_cr3(struct vcpu * v,bool noflush)296 static inline void paging_update_cr3(struct vcpu *v, bool noflush)
297 {
298     paging_get_hostmode(v)->update_cr3(v, 1, noflush);
299 }
300 
301 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
302  * Called to initialize paging structures if the paging mode
303  * has changed, and when bringing up a VCPU for the first time. */
paging_update_paging_modes(struct vcpu * v)304 static inline void paging_update_paging_modes(struct vcpu *v)
305 {
306     paging_get_hostmode(v)->update_paging_modes(v);
307 }
308 
309 #ifdef CONFIG_PV
310 
311 /*
312  * Write a new value into the guest pagetable, and update the
313  * paging-assistance state appropriately.  Returns false if we page-faulted,
314  * true for success.
315  */
paging_write_guest_entry(struct vcpu * v,intpte_t * p,intpte_t new,mfn_t gmfn)316 static inline bool paging_write_guest_entry(
317     struct vcpu *v, intpte_t *p, intpte_t new, mfn_t gmfn)
318 {
319 #ifdef CONFIG_SHADOW_PAGING
320     if ( unlikely(paging_mode_shadow(v->domain)) && paging_get_hostmode(v) )
321         return paging_get_hostmode(v)->shadow.write_guest_entry(v, p, new,
322                                                                 gmfn);
323 #endif
324     return !__copy_to_user(p, &new, sizeof(new));
325 }
326 
327 
328 /*
329  * Cmpxchg a new value into the guest pagetable, and update the
330  * paging-assistance state appropriately.  Returns false if we page-faulted,
331  * true if not.  N.B. caller should check the value of "old" to see if the
332  * cmpxchg itself was successful.
333  */
paging_cmpxchg_guest_entry(struct vcpu * v,intpte_t * p,intpte_t * old,intpte_t new,mfn_t gmfn)334 static inline bool paging_cmpxchg_guest_entry(
335     struct vcpu *v, intpte_t *p, intpte_t *old, intpte_t new, mfn_t gmfn)
336 {
337 #ifdef CONFIG_SHADOW_PAGING
338     if ( unlikely(paging_mode_shadow(v->domain)) && paging_get_hostmode(v) )
339         return paging_get_hostmode(v)->shadow.cmpxchg_guest_entry(v, p, old,
340                                                                   new, gmfn);
341 #endif
342     return !cmpxchg_user(p, *old, new);
343 }
344 
345 #endif /* CONFIG_PV */
346 
347 /* Helper function that writes a pte in such a way that a concurrent read
348  * never sees a half-written entry that has _PAGE_PRESENT set */
safe_write_pte(l1_pgentry_t * p,l1_pgentry_t new)349 static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)
350 {
351     *p = new;
352 }
353 
354 /* Atomically write a P2M entry and update the paging-assistance state
355  * appropriately.
356  * Arguments: the domain in question, the GFN whose mapping is being updated,
357  * a pointer to the entry to be written, the MFN in which the entry resides,
358  * the new contents of the entry, and the level in the p2m tree at which
359  * we are writing. */
360 struct p2m_domain;
361 
362 int paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
363                            l1_pgentry_t *p, l1_pgentry_t new,
364                            unsigned int level);
365 
366 /*
367  * Called from the guest to indicate that the a process is being
368  * torn down and its pagetables will soon be discarded.
369  */
370 void pagetable_dying(paddr_t gpa);
371 
372 /* Print paging-assistance info to the console */
373 void paging_dump_domain_info(struct domain *d);
374 void paging_dump_vcpu_info(struct vcpu *v);
375 
376 /* Set the pool of shadow pages to the required number of pages.
377  * Input might be rounded up to at minimum amount of pages, plus
378  * space for the p2m table.
379  * Returns 0 for success, non-zero for failure. */
380 int paging_set_allocation(struct domain *d, unsigned int pages,
381                           bool *preempted);
382 
383 /* Is gfn within maxphysaddr for the domain? */
gfn_valid(const struct domain * d,gfn_t gfn)384 static inline bool gfn_valid(const struct domain *d, gfn_t gfn)
385 {
386     return !(gfn_x(gfn) >> (d->arch.cpuid->extd.maxphysaddr - PAGE_SHIFT));
387 }
388 
389 /* Maxphysaddr supportable by the paging infrastructure. */
paging_max_paddr_bits(const struct domain * d)390 static always_inline unsigned int paging_max_paddr_bits(const struct domain *d)
391 {
392     unsigned int bits = paging_mode_hap(d) ? hap_paddr_bits : paddr_bits;
393 
394     if ( !IS_ENABLED(CONFIG_BIGMEM) && paging_mode_shadow(d) &&
395          !is_pv_domain(d) )
396     {
397         /* Shadowed superpages store GFNs in 32-bit page_info fields. */
398         bits = min(bits, 32U + PAGE_SHIFT);
399     }
400 
401     return bits;
402 }
403 
paging_flush_tlb(bool (* flush_vcpu)(void * ctxt,struct vcpu * v),void * ctxt)404 static inline bool paging_flush_tlb(bool (*flush_vcpu)(void *ctxt,
405                                                        struct vcpu *v),
406                                     void *ctxt)
407 {
408     return paging_get_hostmode(current)->flush_tlb(flush_vcpu, ctxt);
409 }
410 
411 #endif /* XEN_PAGING_H */
412 
413 /*
414  * Local variables:
415  * mode: C
416  * c-file-style: "BSD"
417  * c-basic-offset: 4
418  * indent-tabs-mode: nil
419  * End:
420  */
421