1 /******************************************************************************
2 * include/asm-x86/paging.h
3 *
4 * Common interface for paging support
5 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
7 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
8 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; If not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #ifndef _XEN_PAGING_H
25 #define _XEN_PAGING_H
26
27 #include <xen/mm.h>
28 #include <public/domctl.h>
29 #include <xen/sched.h>
30 #include <xen/perfc.h>
31 #include <xen/domain_page.h>
32 #include <asm/flushtlb.h>
33 #include <asm/domain.h>
34
35 /*****************************************************************************
36 * Macros to tell which paging mode a domain is in */
37
38 #define PG_SH_shift 20
39 #define PG_HAP_shift 21
40 #define PG_SHF_shift 22
41 /* We're in one of the shadow modes */
42 #ifdef CONFIG_SHADOW_PAGING
43 #define PG_SH_enable (1U << PG_SH_shift)
44 #define PG_SH_forced (1U << PG_SHF_shift)
45 #else
46 #define PG_SH_enable 0
47 #define PG_SH_forced 0
48 #endif
49 #ifdef CONFIG_HVM
50 #define PG_HAP_enable (1U << PG_HAP_shift)
51 #else
52 #define PG_HAP_enable 0
53 #endif
54
55 /* common paging mode bits */
56 #define PG_mode_shift 10
57 #ifdef CONFIG_HVM
58 /* Refcounts based on shadow tables instead of guest tables */
59 #define PG_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift)
60 /* Xen does p2m translation, not guest */
61 #define PG_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift)
62 /* Xen does not steal address space from the domain for its own booking;
63 * requires VT or similar mechanisms */
64 #define PG_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift)
65 #else
66 #define PG_refcounts 0
67 #define PG_translate 0
68 #define PG_external 0
69 #endif
70 /* Enable log dirty mode */
71 #define PG_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift)
72
73 /* All paging modes. */
74 #define PG_MASK (PG_refcounts | PG_log_dirty | PG_translate | PG_external)
75
76 #define paging_mode_enabled(_d) (!!(_d)->arch.paging.mode)
77 #define paging_mode_shadow(_d) (!!((_d)->arch.paging.mode & PG_SH_enable))
78 #define paging_mode_sh_forced(_d) (!!((_d)->arch.paging.mode & PG_SH_forced))
79 #define paging_mode_hap(_d) (!!((_d)->arch.paging.mode & PG_HAP_enable))
80
81 #define paging_mode_refcounts(_d) (!!((_d)->arch.paging.mode & PG_refcounts))
82 #define paging_mode_log_dirty(_d) (!!((_d)->arch.paging.mode & PG_log_dirty))
83 #define paging_mode_translate(_d) (!!((_d)->arch.paging.mode & PG_translate))
84 #define paging_mode_external(_d) (!!((_d)->arch.paging.mode & PG_external))
85
86 /* flags used for paging debug */
87 #define PAGING_DEBUG_LOGDIRTY 0
88
89 /*****************************************************************************
90 * Mode-specific entry points into the shadow code.
91 *
92 * These shouldn't be used directly by callers; rather use the functions
93 * below which will indirect through this table as appropriate. */
94
95 struct shadow_paging_mode {
96 #ifdef CONFIG_SHADOW_PAGING
97 void (*detach_old_tables )(struct vcpu *v);
98 #ifdef CONFIG_PV
99 bool (*write_guest_entry )(struct vcpu *v, intpte_t *p,
100 intpte_t new, mfn_t gmfn);
101 bool (*cmpxchg_guest_entry )(struct vcpu *v, intpte_t *p,
102 intpte_t *old, intpte_t new,
103 mfn_t gmfn);
104 #endif
105 #ifdef CONFIG_HVM
106 mfn_t (*make_monitor_table )(struct vcpu *v);
107 void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
108 int (*guess_wrmap )(struct vcpu *v,
109 unsigned long vaddr, mfn_t gmfn);
110 void (*pagetable_dying )(paddr_t gpa);
111 void (*trace_emul_write_val )(const void *ptr, unsigned long vaddr,
112 const void *src, unsigned int bytes);
113 #endif
114 #endif
115 /* For outsiders to tell what mode we're in */
116 unsigned int shadow_levels;
117 };
118
119
120 /************************************************/
121 /* common paging interface */
122 /************************************************/
123 struct paging_mode {
124 int (*page_fault )(struct vcpu *v, unsigned long va,
125 struct cpu_user_regs *regs);
126 bool (*invlpg )(struct vcpu *v,
127 unsigned long linear);
128 unsigned long (*gva_to_gfn )(struct vcpu *v,
129 struct p2m_domain *p2m,
130 unsigned long va,
131 uint32_t *pfec);
132 unsigned long (*p2m_ga_to_gfn )(struct vcpu *v,
133 struct p2m_domain *p2m,
134 unsigned long cr3,
135 paddr_t ga, uint32_t *pfec,
136 unsigned int *page_order);
137 void (*update_cr3 )(struct vcpu *v, int do_locking,
138 bool noflush);
139 void (*update_paging_modes )(struct vcpu *v);
140 int (*write_p2m_entry )(struct p2m_domain *p2m,
141 unsigned long gfn,
142 l1_pgentry_t *p, l1_pgentry_t new,
143 unsigned int level);
144 bool (*flush_tlb )(bool (*flush_vcpu)(void *ctxt,
145 struct vcpu *v),
146 void *ctxt);
147
148 unsigned int guest_levels;
149
150 /* paging support extension */
151 struct shadow_paging_mode shadow;
152 };
153
154 /*****************************************************************************
155 * Log dirty code */
156
157 /* get the dirty bitmap for a specific range of pfns */
158 void paging_log_dirty_range(struct domain *d,
159 unsigned long begin_pfn,
160 unsigned long nr,
161 uint8_t *dirty_bitmap);
162
163 /* enable log dirty */
164 int paging_log_dirty_enable(struct domain *d, bool log_global);
165
166 /* log dirty initialization */
167 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops);
168
169 /* mark a page as dirty */
170 void paging_mark_dirty(struct domain *d, mfn_t gmfn);
171 /* mark a page as dirty with taking guest pfn as parameter */
172 void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn);
173
174 /* is this guest page dirty?
175 * This is called from inside paging code, with the paging lock held. */
176 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
177
178 /*
179 * Log-dirty radix tree indexing:
180 * All tree nodes are PAGE_SIZE bytes, mapped on-demand.
181 * Leaf nodes are simple bitmaps; 1 bit per guest pfn.
182 * Interior nodes are arrays of LOGDIRTY_NODE_ENTRIES mfns.
183 * TODO: Dynamic radix tree height. Most guests will only need 2 levels.
184 * The fourth level is basically unusable on 32-bit Xen.
185 * TODO2: Abstract out the radix-tree mechanics?
186 */
187 #define LOGDIRTY_NODE_ENTRIES (1 << PAGETABLE_ORDER)
188 #define L1_LOGDIRTY_IDX(pfn) (pfn_x(pfn) & ((1 << (PAGE_SHIFT + 3)) - 1))
189 #define L2_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3)) & \
190 (LOGDIRTY_NODE_ENTRIES-1))
191 #define L3_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER)) & \
192 (LOGDIRTY_NODE_ENTRIES-1))
193 #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
194 (LOGDIRTY_NODE_ENTRIES-1))
195
196 /* VRAM dirty tracking support */
197 struct sh_dirty_vram {
198 unsigned long begin_pfn;
199 unsigned long end_pfn;
200 paddr_t *sl1ma;
201 uint8_t *dirty_bitmap;
202 s_time_t last_dirty;
203 };
204
205 /*****************************************************************************
206 * Entry points into the paging-assistance code */
207
208 /* Initialize the paging resource for vcpu struct. It is called by
209 * vcpu_initialise() in domain.c */
210 void paging_vcpu_init(struct vcpu *v);
211
212 /* Set up the paging-assistance-specific parts of a domain struct at
213 * start of day. Called for every domain from arch_domain_create() */
214 int paging_domain_init(struct domain *d);
215
216 /* Handler for paging-control ops: operations from user-space to enable
217 * and disable ephemeral shadow modes (test mode and log-dirty mode) and
218 * manipulate the log-dirty bitmap. */
219 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
220 XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
221 bool_t resuming);
222
223 /* Helper hypercall for dealing with continuations. */
224 long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
225
226 /* Call when destroying a domain */
227 int paging_teardown(struct domain *d);
228
229 /* Call once all of the references to the domain have gone away */
230 void paging_final_teardown(struct domain *d);
231
232 /* Enable an arbitrary paging-assistance mode. Call once at domain
233 * creation. */
234 int paging_enable(struct domain *d, u32 mode);
235
236 #define paging_get_hostmode(v) ((v)->arch.paging.mode)
237 #define paging_get_nestedmode(v) ((v)->arch.paging.nestedmode)
238 const struct paging_mode *paging_get_mode(struct vcpu *v);
239 void paging_update_nestedmode(struct vcpu *v);
240
241 /* Page fault handler
242 * Called from pagefault handler in Xen, and from the HVM trap handlers
243 * for pagefaults. Returns 1 if this fault was an artefact of the
244 * paging code (and the guest should retry) or 0 if it is not (and the
245 * fault should be handled elsewhere or passed to the guest).
246 *
247 * Note: under shadow paging, this function handles all page faults;
248 * however, for hardware-assisted paging, this function handles only
249 * host page faults (i.e. nested page faults). */
250 static inline int
paging_fault(unsigned long va,struct cpu_user_regs * regs)251 paging_fault(unsigned long va, struct cpu_user_regs *regs)
252 {
253 struct vcpu *v = current;
254 return paging_get_hostmode(v)->page_fault(v, va, regs);
255 }
256
257 /* Handle invlpg requests on vcpus. */
258 void paging_invlpg(struct vcpu *v, unsigned long va);
259
260 /*
261 * Translate a guest virtual address to the frame number that the
262 * *guest* pagetables would map it to. Returns INVALID_GFN if the guest
263 * tables don't map this address for this kind of access.
264 * *pfec is used to determine which kind of access this is when
265 * walking the tables. The caller should set the PFEC_page_present bit
266 * in *pfec; in the failure case, that bit will be cleared if appropriate.
267 *
268 * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS:
269 * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled.
270 */
271 unsigned long paging_gva_to_gfn(struct vcpu *v,
272 unsigned long va,
273 uint32_t *pfec);
274
275 /* Translate a guest address using a particular CR3 value. This is used
276 * to by nested HAP code, to walk the guest-supplied NPT tables as if
277 * they were pagetables.
278 * Use 'paddr_t' for the guest address so it won't overflow when
279 * l1 or l2 guest is in 32bit PAE mode.
280 * If the GFN returned is not INVALID_GFN, *page_order gives
281 * the size of the superpage (if any) it was found in. */
paging_ga_to_gfn_cr3(struct vcpu * v,unsigned long cr3,paddr_t ga,uint32_t * pfec,unsigned int * page_order)282 static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
283 unsigned long cr3,
284 paddr_t ga,
285 uint32_t *pfec,
286 unsigned int *page_order)
287 {
288 struct p2m_domain *p2m = v->domain->arch.p2m;
289 return paging_get_hostmode(v)->p2m_ga_to_gfn(v, p2m, cr3, ga, pfec,
290 page_order);
291 }
292
293 /* Update all the things that are derived from the guest's CR3.
294 * Called when the guest changes CR3; the caller can then use v->arch.cr3
295 * as the value to load into the host CR3 to schedule this vcpu */
paging_update_cr3(struct vcpu * v,bool noflush)296 static inline void paging_update_cr3(struct vcpu *v, bool noflush)
297 {
298 paging_get_hostmode(v)->update_cr3(v, 1, noflush);
299 }
300
301 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
302 * Called to initialize paging structures if the paging mode
303 * has changed, and when bringing up a VCPU for the first time. */
paging_update_paging_modes(struct vcpu * v)304 static inline void paging_update_paging_modes(struct vcpu *v)
305 {
306 paging_get_hostmode(v)->update_paging_modes(v);
307 }
308
309 #ifdef CONFIG_PV
310
311 /*
312 * Write a new value into the guest pagetable, and update the
313 * paging-assistance state appropriately. Returns false if we page-faulted,
314 * true for success.
315 */
paging_write_guest_entry(struct vcpu * v,intpte_t * p,intpte_t new,mfn_t gmfn)316 static inline bool paging_write_guest_entry(
317 struct vcpu *v, intpte_t *p, intpte_t new, mfn_t gmfn)
318 {
319 #ifdef CONFIG_SHADOW_PAGING
320 if ( unlikely(paging_mode_shadow(v->domain)) && paging_get_hostmode(v) )
321 return paging_get_hostmode(v)->shadow.write_guest_entry(v, p, new,
322 gmfn);
323 #endif
324 return !__copy_to_user(p, &new, sizeof(new));
325 }
326
327
328 /*
329 * Cmpxchg a new value into the guest pagetable, and update the
330 * paging-assistance state appropriately. Returns false if we page-faulted,
331 * true if not. N.B. caller should check the value of "old" to see if the
332 * cmpxchg itself was successful.
333 */
paging_cmpxchg_guest_entry(struct vcpu * v,intpte_t * p,intpte_t * old,intpte_t new,mfn_t gmfn)334 static inline bool paging_cmpxchg_guest_entry(
335 struct vcpu *v, intpte_t *p, intpte_t *old, intpte_t new, mfn_t gmfn)
336 {
337 #ifdef CONFIG_SHADOW_PAGING
338 if ( unlikely(paging_mode_shadow(v->domain)) && paging_get_hostmode(v) )
339 return paging_get_hostmode(v)->shadow.cmpxchg_guest_entry(v, p, old,
340 new, gmfn);
341 #endif
342 return !cmpxchg_user(p, *old, new);
343 }
344
345 #endif /* CONFIG_PV */
346
347 /* Helper function that writes a pte in such a way that a concurrent read
348 * never sees a half-written entry that has _PAGE_PRESENT set */
safe_write_pte(l1_pgentry_t * p,l1_pgentry_t new)349 static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)
350 {
351 *p = new;
352 }
353
354 /* Atomically write a P2M entry and update the paging-assistance state
355 * appropriately.
356 * Arguments: the domain in question, the GFN whose mapping is being updated,
357 * a pointer to the entry to be written, the MFN in which the entry resides,
358 * the new contents of the entry, and the level in the p2m tree at which
359 * we are writing. */
360 struct p2m_domain;
361
362 int paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
363 l1_pgentry_t *p, l1_pgentry_t new,
364 unsigned int level);
365
366 /*
367 * Called from the guest to indicate that the a process is being
368 * torn down and its pagetables will soon be discarded.
369 */
370 void pagetable_dying(paddr_t gpa);
371
372 /* Print paging-assistance info to the console */
373 void paging_dump_domain_info(struct domain *d);
374 void paging_dump_vcpu_info(struct vcpu *v);
375
376 /* Set the pool of shadow pages to the required number of pages.
377 * Input might be rounded up to at minimum amount of pages, plus
378 * space for the p2m table.
379 * Returns 0 for success, non-zero for failure. */
380 int paging_set_allocation(struct domain *d, unsigned int pages,
381 bool *preempted);
382
383 /* Is gfn within maxphysaddr for the domain? */
gfn_valid(const struct domain * d,gfn_t gfn)384 static inline bool gfn_valid(const struct domain *d, gfn_t gfn)
385 {
386 return !(gfn_x(gfn) >> (d->arch.cpuid->extd.maxphysaddr - PAGE_SHIFT));
387 }
388
389 /* Maxphysaddr supportable by the paging infrastructure. */
paging_max_paddr_bits(const struct domain * d)390 static always_inline unsigned int paging_max_paddr_bits(const struct domain *d)
391 {
392 unsigned int bits = paging_mode_hap(d) ? hap_paddr_bits : paddr_bits;
393
394 if ( !IS_ENABLED(CONFIG_BIGMEM) && paging_mode_shadow(d) &&
395 !is_pv_domain(d) )
396 {
397 /* Shadowed superpages store GFNs in 32-bit page_info fields. */
398 bits = min(bits, 32U + PAGE_SHIFT);
399 }
400
401 return bits;
402 }
403
paging_flush_tlb(bool (* flush_vcpu)(void * ctxt,struct vcpu * v),void * ctxt)404 static inline bool paging_flush_tlb(bool (*flush_vcpu)(void *ctxt,
405 struct vcpu *v),
406 void *ctxt)
407 {
408 return paging_get_hostmode(current)->flush_tlb(flush_vcpu, ctxt);
409 }
410
411 #endif /* XEN_PAGING_H */
412
413 /*
414 * Local variables:
415 * mode: C
416 * c-file-style: "BSD"
417 * c-basic-offset: 4
418 * indent-tabs-mode: nil
419 * End:
420 */
421