1 /******************************************************************************
2  * include/asm-x86/paging.h
3  *
4  * physical-to-machine mappings for automatically-translated domains.
5  *
6  * Copyright (c) 2011 GridCentric Inc. (Andres Lagar-Cavilla)
7  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
8  * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
9  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
10  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; If not, see <http://www.gnu.org/licenses/>.
24  */
25 
26 #ifndef _XEN_ASM_X86_P2M_H
27 #define _XEN_ASM_X86_P2M_H
28 
29 #include <xen/paging.h>
30 #include <xen/mem_access.h>
31 #include <asm/mem_sharing.h>
32 #include <asm/page.h>    /* for pagetable_t */
33 
34 extern bool_t opt_hap_1gb, opt_hap_2mb;
35 
36 /*
37  * The upper levels of the p2m pagetable always contain full rights; all
38  * variation in the access control bits is made in the level-1 PTEs.
39  *
40  * In addition to the phys-to-machine translation, each p2m PTE contains
41  * *type* information about the gfn it translates, helping Xen to decide
42  * on the correct course of action when handling a page-fault to that
43  * guest frame.  We store the type in the "available" bits of the PTEs
44  * in the table, which gives us 8 possible types on 32-bit systems.
45  * Further expansions of the type system will only be supported on
46  * 64-bit Xen.
47  */
48 
49 /*
50  * AMD IOMMU: When we share p2m table with iommu, bit 52 -bit 58 in pte
51  * cannot be non-zero, otherwise, hardware generates io page faults when
52  * device access those pages. Therefore, p2m_ram_rw has to be defined as 0.
53  */
54 typedef enum {
55     p2m_ram_rw = 0,             /* Normal read/write guest RAM */
56     p2m_invalid = 1,            /* Nothing mapped here */
57     p2m_ram_logdirty = 2,       /* Temporarily read-only for log-dirty */
58     p2m_ram_ro = 3,             /* Read-only; writes are silently dropped */
59     p2m_mmio_dm = 4,            /* Reads and write go to the device model */
60     p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
61     p2m_populate_on_demand = 6, /* Place-holder for empty memory */
62 
63     /* Although these are defined in all builds, they can only
64      * be used in 64-bit builds */
65     p2m_grant_map_rw = 7,         /* Read/write grant mapping */
66     p2m_grant_map_ro = 8,         /* Read-only grant mapping */
67     p2m_ram_paging_out = 9,       /* Memory that is being paged out */
68     p2m_ram_paged = 10,           /* Memory that has been paged out */
69     p2m_ram_paging_in = 11,       /* Memory that is being paged in */
70     p2m_ram_shared = 12,          /* Shared or sharable memory */
71     p2m_ram_broken = 13,          /* Broken page, access cause domain crash */
72     p2m_map_foreign  = 14,        /* ram pages from foreign domain */
73     p2m_ioreq_server = 15,
74 } p2m_type_t;
75 
76 /* Modifiers to the query */
77 typedef unsigned int p2m_query_t;
78 #define P2M_ALLOC    (1u<<0)   /* Populate PoD and paged-out entries */
79 #define P2M_UNSHARE  (1u<<1)   /* Break CoW sharing */
80 
81 /* We use bitmaps and maks to handle groups of types */
82 #define p2m_to_mask(_t) (1UL << (_t))
83 
84 /* RAM types, which map to real machine frames */
85 #define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw)                \
86                        | p2m_to_mask(p2m_ram_logdirty)        \
87                        | p2m_to_mask(p2m_ram_ro)              \
88                        | p2m_to_mask(p2m_ram_paging_out)      \
89                        | p2m_to_mask(p2m_ram_paged)           \
90                        | p2m_to_mask(p2m_ram_paging_in)       \
91                        | p2m_to_mask(p2m_ram_shared)          \
92                        | p2m_to_mask(p2m_ioreq_server))
93 
94 /* Types that represent a physmap hole that is ok to replace with a shared
95  * entry */
96 #define P2M_HOLE_TYPES (p2m_to_mask(p2m_mmio_dm)        \
97                        | p2m_to_mask(p2m_invalid)       \
98                        | p2m_to_mask(p2m_ram_paging_in) \
99                        | p2m_to_mask(p2m_ram_paged))
100 
101 /* Grant mapping types, which map to a real machine frame in another
102  * VM */
103 #define P2M_GRANT_TYPES (p2m_to_mask(p2m_grant_map_rw)  \
104                          | p2m_to_mask(p2m_grant_map_ro) )
105 
106 /* MMIO types, which don't have to map to anything in the frametable */
107 #define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm)        \
108                         | p2m_to_mask(p2m_mmio_direct))
109 
110 /* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
111 #define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
112                       | p2m_to_mask(p2m_ram_ro)         \
113                       | p2m_to_mask(p2m_grant_map_ro)   \
114                       | p2m_to_mask(p2m_ram_shared))
115 
116 /* Write-discard types, which should discard the write operations */
117 #define P2M_DISCARD_WRITE_TYPES (p2m_to_mask(p2m_ram_ro)     \
118                       | p2m_to_mask(p2m_grant_map_ro))
119 
120 /* Types that can be subject to bulk transitions. */
121 #define P2M_CHANGEABLE_TYPES (p2m_to_mask(p2m_ram_rw) \
122                               | p2m_to_mask(p2m_ram_logdirty) \
123                               | p2m_to_mask(p2m_ioreq_server) )
124 
125 #define P2M_POD_TYPES (p2m_to_mask(p2m_populate_on_demand))
126 
127 /* Pageable types */
128 #define P2M_PAGEABLE_TYPES (p2m_to_mask(p2m_ram_rw) \
129                             | p2m_to_mask(p2m_ram_logdirty) )
130 
131 #define P2M_PAGING_TYPES (p2m_to_mask(p2m_ram_paging_out)        \
132                           | p2m_to_mask(p2m_ram_paged)           \
133                           | p2m_to_mask(p2m_ram_paging_in))
134 
135 #define P2M_PAGED_TYPES (p2m_to_mask(p2m_ram_paged))
136 
137 /* Shared types */
138 /* XXX: Sharable types could include p2m_ram_ro too, but we would need to
139  * reinit the type correctly after fault */
140 #define P2M_SHARABLE_TYPES (p2m_to_mask(p2m_ram_rw) \
141                             | p2m_to_mask(p2m_ram_logdirty) )
142 #define P2M_SHARED_TYPES   (p2m_to_mask(p2m_ram_shared))
143 
144 /* Valid types not necessarily associated with a (valid) MFN. */
145 #define P2M_INVALID_MFN_TYPES (P2M_POD_TYPES                  \
146                                | p2m_to_mask(p2m_mmio_direct) \
147                                | P2M_PAGING_TYPES)
148 
149 /* Broken type: the frame backing this pfn has failed in hardware
150  * and must not be touched. */
151 #define P2M_BROKEN_TYPES (p2m_to_mask(p2m_ram_broken))
152 
153 /* Useful predicates */
154 #define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
155 #define p2m_is_hole(_t) (p2m_to_mask(_t) & P2M_HOLE_TYPES)
156 #define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
157 #define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
158 #define p2m_is_discard_write(_t) (p2m_to_mask(_t) & P2M_DISCARD_WRITE_TYPES)
159 #define p2m_is_changeable(_t) (p2m_to_mask(_t) & P2M_CHANGEABLE_TYPES)
160 #define p2m_is_pod(_t) (p2m_to_mask(_t) & P2M_POD_TYPES)
161 #define p2m_is_grant(_t) (p2m_to_mask(_t) & P2M_GRANT_TYPES)
162 /* Grant types are *not* considered valid, because they can be
163    unmapped at any time and, unless you happen to be the shadow or p2m
164    implementations, there's no way of synchronising against that. */
165 #define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
166 #define p2m_has_emt(_t)  (p2m_to_mask(_t) & (P2M_RAM_TYPES | p2m_to_mask(p2m_mmio_direct)))
167 #define p2m_is_pageable(_t) (p2m_to_mask(_t) & P2M_PAGEABLE_TYPES)
168 #define p2m_is_paging(_t)   (p2m_to_mask(_t) & P2M_PAGING_TYPES)
169 #define p2m_is_paged(_t)    (p2m_to_mask(_t) & P2M_PAGED_TYPES)
170 #define p2m_is_sharable(_t) (p2m_to_mask(_t) & P2M_SHARABLE_TYPES)
171 #define p2m_is_shared(_t)   (p2m_to_mask(_t) & P2M_SHARED_TYPES)
172 #define p2m_is_broken(_t)   (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
173 #define p2m_is_foreign(_t)  (p2m_to_mask(_t) & p2m_to_mask(p2m_map_foreign))
174 
175 #define p2m_is_any_ram(_t)  (p2m_to_mask(_t) &                   \
176                              (P2M_RAM_TYPES | P2M_GRANT_TYPES |  \
177                               p2m_to_mask(p2m_map_foreign)))
178 
179 #define p2m_allows_invalid_mfn(t) (p2m_to_mask(t) & P2M_INVALID_MFN_TYPES)
180 
181 typedef enum {
182     p2m_host,
183     p2m_nested,
184     p2m_alternate,
185 } p2m_class_t;
186 
187 /* Per-p2m-table state */
188 struct p2m_domain {
189     /* Lock that protects updates to the p2m */
190     mm_rwlock_t           lock;
191 
192     /* Shadow translated domain: p2m mapping */
193     pagetable_t        phys_table;
194 
195     /*
196      * Same as a domain's dirty_cpumask but limited to
197      * this p2m and those physical cpus whose vcpu's are in
198      * guestmode.
199      */
200     cpumask_var_t      dirty_cpumask;
201 
202     struct domain     *domain;   /* back pointer to domain */
203 
204     p2m_class_t       p2m_class; /* host/nested/alternate */
205 
206 #ifdef CONFIG_HVM
207     /* Nested p2ms only: nested p2m base value that this p2m shadows.
208      * This can be cleared to P2M_BASE_EADDR under the per-p2m lock but
209      * needs both the per-p2m lock and the per-domain nestedp2m lock
210      * to set it to any other value. */
211 #define P2M_BASE_EADDR     (~0ULL)
212     uint64_t           np2m_base;
213     uint64_t           np2m_generation;
214 
215     /* Nested p2ms: linked list of n2pms allocated to this domain.
216      * The host p2m hasolds the head of the list and the np2ms are
217      * threaded on in LRU order. */
218     struct list_head   np2m_list;
219 #endif
220 
221     /* Host p2m: Log-dirty ranges registered for the domain. */
222     struct rangeset   *logdirty_ranges;
223 
224     /* Host p2m: Global log-dirty mode enabled for the domain. */
225     bool               global_logdirty;
226 
227     /* Host p2m: when this flag is set, don't flush all the nested-p2m
228      * tables on every host-p2m change.  The setter of this flag
229      * is responsible for performing the full flush before releasing the
230      * host p2m's lock. */
231     int                defer_nested_flush;
232 
233 #ifdef CONFIG_HVM
234     /* Alternate p2m: count of vcpu's currently using this p2m. */
235     atomic_t           active_vcpus;
236 #endif
237 
238     /* Pages used to construct the p2m */
239     struct page_list_head pages;
240 
241     int                (*set_entry)(struct p2m_domain *p2m,
242                                     gfn_t gfn,
243                                     mfn_t mfn, unsigned int page_order,
244                                     p2m_type_t p2mt,
245                                     p2m_access_t p2ma,
246                                     int sve);
247     mfn_t              (*get_entry)(struct p2m_domain *p2m,
248                                     gfn_t gfn,
249                                     p2m_type_t *p2mt,
250                                     p2m_access_t *p2ma,
251                                     p2m_query_t q,
252                                     unsigned int *page_order,
253                                     bool_t *sve);
254     int                (*recalc)(struct p2m_domain *p2m,
255                                  unsigned long gfn);
256     void               (*enable_hardware_log_dirty)(struct p2m_domain *p2m);
257     void               (*disable_hardware_log_dirty)(struct p2m_domain *p2m);
258     void               (*flush_hardware_cached_dirty)(struct p2m_domain *p2m);
259     void               (*change_entry_type_global)(struct p2m_domain *p2m,
260                                                    p2m_type_t ot,
261                                                    p2m_type_t nt);
262     int                (*change_entry_type_range)(struct p2m_domain *p2m,
263                                                   p2m_type_t ot, p2m_type_t nt,
264                                                   unsigned long first_gfn,
265                                                   unsigned long last_gfn);
266     void               (*memory_type_changed)(struct p2m_domain *p2m);
267 
268     int                (*write_p2m_entry)(struct p2m_domain *p2m,
269                                           unsigned long gfn, l1_pgentry_t *p,
270                                           l1_pgentry_t new, unsigned int level);
271     long               (*audit_p2m)(struct p2m_domain *p2m);
272 
273     /*
274      * P2M updates may require TLBs to be flushed (invalidated).
275      *
276      * If 'defer_flush' is set, flushes may be deferred by setting
277      * 'need_flush' and then flushing in 'tlb_flush()'.
278      *
279      * 'tlb_flush()' is only called if 'need_flush' was set.
280      *
281      * If a flush may be being deferred but an immediate flush is
282      * required (e.g., if a page is being freed to pool other than the
283      * domheap), call p2m_tlb_flush_sync().
284      */
285     void (*tlb_flush)(struct p2m_domain *p2m);
286     unsigned int defer_flush;
287     bool_t need_flush;
288 
289     /* Default P2M access type for each page in the the domain: new pages,
290      * swapped in pages, cleared pages, and pages that are ambiguously
291      * retyped get this access type.  See definition of p2m_access_t. */
292     p2m_access_t default_access;
293 
294     /* If true, and an access fault comes in and there is no vm_event listener,
295      * pause domain.  Otherwise, remove access restrictions. */
296     bool_t       access_required;
297 
298     /* Highest guest frame that's ever been mapped in the p2m */
299     unsigned long max_mapped_pfn;
300 
301     /*
302      * Alternate p2m's only: range of gfn's for which underlying
303      * mfn may have duplicate mappings
304      */
305     unsigned long min_remapped_gfn;
306     unsigned long max_remapped_gfn;
307 
308 #ifdef CONFIG_HVM
309     /* Populate-on-demand variables
310      * All variables are protected with the pod lock. We cannot rely on
311      * the p2m lock if it's turned into a fine-grained lock.
312      * We only use the domain page_alloc lock for additions and
313      * deletions to the domain's page list. Because we use it nested
314      * within the PoD lock, we enforce it's ordering (by remembering
315      * the unlock level in the arch_domain sub struct). */
316     struct {
317         struct page_list_head super,   /* List of superpages                */
318                          single;       /* Non-super lists                   */
319         long             count,        /* # of pages in cache lists         */
320                          entry_count;  /* # of pages in p2m marked pod      */
321         gfn_t            reclaim_single; /* Last gfn of a scan */
322         gfn_t            max_guest;    /* gfn of max guest demand-populate */
323 
324         /*
325          * Tracking of the most recently populated PoD pages, for eager
326          * reclamation.
327          */
328         struct pod_mrp_list {
329 #define NR_POD_MRP_ENTRIES 32
330 
331 /* Encode ORDER_2M superpage in top bit of GFN */
332 #define POD_LAST_SUPERPAGE (gfn_x(INVALID_GFN) & ~(gfn_x(INVALID_GFN) >> 1))
333 
334             unsigned long list[NR_POD_MRP_ENTRIES];
335             unsigned int idx;
336         } mrp;
337         mm_lock_t        lock;         /* Locking of private pod structs,   *
338                                         * not relying on the p2m lock.      */
339     } pod;
340 #endif
341 
342     union {
343         struct ept_data ept;
344         /* NPT-equivalent structure could be added here. */
345     };
346 
347      struct {
348          spinlock_t lock;
349          /*
350           * ioreq server who's responsible for the emulation of
351           * gfns with specific p2m type(for now, p2m_ioreq_server).
352           */
353          struct hvm_ioreq_server *server;
354          /*
355           * flags specifies whether read, write or both operations
356           * are to be emulated by an ioreq server.
357           */
358          unsigned int flags;
359          unsigned long entry_count;
360      } ioreq;
361 };
362 
363 /* get host p2m table */
364 #define p2m_get_hostp2m(d)      ((d)->arch.p2m)
365 
366 /* All common type definitions should live ahead of this inclusion. */
367 #ifdef _XEN_P2M_COMMON_H
368 # error "xen/p2m-common.h should not be included directly"
369 #endif
370 #include <xen/p2m-common.h>
371 
372 /*
373  * Updates vCPU's n2pm to match its np2m_base in VMCx12 and returns that np2m.
374  */
375 struct p2m_domain *p2m_get_nestedp2m(struct vcpu *v);
376 /* Similar to the above except that returned p2m is still write-locked */
377 struct p2m_domain *p2m_get_nestedp2m_locked(struct vcpu *v);
378 
379 /* If vcpu is in host mode then behaviour matches p2m_get_hostp2m().
380  * If vcpu is in guest mode then behaviour matches p2m_get_nestedp2m().
381  */
382 struct p2m_domain *p2m_get_p2m(struct vcpu *v);
383 
384 #define NP2M_SCHEDLE_IN  0
385 #define NP2M_SCHEDLE_OUT 1
386 
387 #ifdef CONFIG_HVM
388 void np2m_schedule(int dir);
389 #else
np2m_schedule(int dir)390 static inline void np2m_schedule(int dir) {}
391 #endif
392 
p2m_is_hostp2m(const struct p2m_domain * p2m)393 static inline bool_t p2m_is_hostp2m(const struct p2m_domain *p2m)
394 {
395     return p2m->p2m_class == p2m_host;
396 }
397 
p2m_is_nestedp2m(const struct p2m_domain * p2m)398 static inline bool_t p2m_is_nestedp2m(const struct p2m_domain *p2m)
399 {
400     return p2m->p2m_class == p2m_nested;
401 }
402 
p2m_is_altp2m(const struct p2m_domain * p2m)403 static inline bool_t p2m_is_altp2m(const struct p2m_domain *p2m)
404 {
405     return p2m->p2m_class == p2m_alternate;
406 }
407 
408 #define p2m_get_pagetable(p2m)  ((p2m)->phys_table)
409 
410 /*
411  * Ensure any deferred p2m TLB flush has been completed on all VCPUs.
412  */
413 void p2m_tlb_flush_sync(struct p2m_domain *p2m);
414 void p2m_unlock_and_tlb_flush(struct p2m_domain *p2m);
415 
416 /**** p2m query accessors. They lock p2m_lock, and thus serialize
417  * lookups wrt modifications. They _do not_ release the lock on exit.
418  * After calling any of the variants below, caller needs to use
419  * put_gfn. ****/
420 
421 mfn_t __nonnull(3, 4) __get_gfn_type_access(
422     struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t,
423     p2m_access_t *a, p2m_query_t q, unsigned int *page_order, bool_t locked);
424 
425 /* Read a particular P2M table, mapping pages as we go.  Most callers
426  * should _not_ call this directly; use the other get_gfn* functions
427  * below unless you know you want to walk a p2m that isn't a domain's
428  * main one.
429  * If the lookup succeeds, the return value is != INVALID_MFN and
430  * *page_order is filled in with the order of the superpage (if any) that
431  * the entry was found in.  */
get_gfn_type_access(struct p2m_domain * p2m,unsigned long gfn,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order)432 static inline mfn_t __nonnull(3, 4) get_gfn_type_access(
433     struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t,
434     p2m_access_t *a, p2m_query_t q, unsigned int *page_order)
435 {
436     return __get_gfn_type_access(p2m, gfn, t, a, q, page_order, true);
437 }
438 
439 /* General conversion function from gfn to mfn */
get_gfn_type(struct domain * d,unsigned long gfn,p2m_type_t * t,p2m_query_t q)440 static inline mfn_t __nonnull(3) get_gfn_type(
441     struct domain *d, unsigned long gfn, p2m_type_t *t, p2m_query_t q)
442 {
443     p2m_access_t a;
444     return get_gfn_type_access(p2m_get_hostp2m(d), gfn, t, &a, q, NULL);
445 }
446 
447 /* Syntactic sugar: most callers will use one of these. */
448 #define get_gfn(d, g, t)         get_gfn_type((d), (g), (t), P2M_ALLOC)
449 #define get_gfn_query(d, g, t)   get_gfn_type((d), (g), (t), 0)
450 #define get_gfn_unshare(d, g, t) get_gfn_type((d), (g), (t), \
451                                               P2M_ALLOC | P2M_UNSHARE)
452 
453 /* Will release the p2m_lock for this gfn entry. */
454 void __put_gfn(struct p2m_domain *p2m, unsigned long gfn);
455 
456 #define put_gfn(d, gfn) __put_gfn(p2m_get_hostp2m((d)), (gfn))
457 
458 /* The intent of the "unlocked" accessor is to have the caller not worry about
459  * put_gfn. They apply to very specific situations: debug printk's, dumps
460  * during a domain crash, or to peek at a p2m entry/type. Caller is not
461  * holding the p2m entry exclusively during or after calling this.
462  *
463  * This is also used in the shadow code whenever the paging lock is
464  * held -- in those cases, the caller is protected against concurrent
465  * p2m updates by the fact that shadow_write_p2m_entry() also takes
466  * the paging lock.
467  *
468  * Note that an unlocked accessor only makes sense for a "query" lookup.
469  * Any other type of query can cause a change in the p2m and may need to
470  * perform locking.
471  */
get_gfn_query_unlocked(struct domain * d,unsigned long gfn,p2m_type_t * t)472 static inline mfn_t get_gfn_query_unlocked(struct domain *d,
473                                            unsigned long gfn,
474                                            p2m_type_t *t)
475 {
476     p2m_access_t a;
477     return __get_gfn_type_access(p2m_get_hostp2m(d), gfn, t, &a, 0, NULL, 0);
478 }
479 
480 /* Atomically look up a GFN and take a reference count on the backing page.
481  * This makes sure the page doesn't get freed (or shared) underfoot,
482  * and should be used by any path that intends to write to the backing page.
483  * Returns NULL if the page is not backed by RAM.
484  * The caller is responsible for calling put_page() afterwards. */
485 struct page_info *p2m_get_page_from_gfn(struct p2m_domain *p2m, gfn_t gfn,
486                                         p2m_type_t *t, p2m_access_t *a,
487                                         p2m_query_t q);
488 
get_page_from_gfn(struct domain * d,unsigned long gfn,p2m_type_t * t,p2m_query_t q)489 static inline struct page_info *get_page_from_gfn(
490     struct domain *d, unsigned long gfn, p2m_type_t *t, p2m_query_t q)
491 {
492     struct page_info *page;
493 
494     if ( paging_mode_translate(d) )
495         return p2m_get_page_from_gfn(p2m_get_hostp2m(d), _gfn(gfn), t, NULL, q);
496 
497     /* Non-translated guests see 1-1 RAM / MMIO mappings everywhere */
498     if ( t )
499         *t = likely(d != dom_io) ? p2m_ram_rw : p2m_mmio_direct;
500     page = mfn_to_page(_mfn(gfn));
501     return mfn_valid(_mfn(gfn)) && get_page(page, d) ? page : NULL;
502 }
503 
504 /* General conversion function from mfn to gfn */
mfn_to_gfn(const struct domain * d,mfn_t mfn)505 static inline gfn_t mfn_to_gfn(const struct domain *d, mfn_t mfn)
506 {
507     if ( paging_mode_translate(d) )
508         return _gfn(get_gpfn_from_mfn(mfn_x(mfn)));
509     else
510         return _gfn(mfn_x(mfn));
511 }
512 
513 #ifdef CONFIG_HVM
514 #define AP2MGET_prepopulate true
515 #define AP2MGET_query false
516 
517 /*
518  * Looks up altp2m entry. If the entry is not found it looks up the entry in
519  * hostp2m.
520  * The prepopulate param is used to set the found entry in altp2m.
521  */
522 int altp2m_get_effective_entry(struct p2m_domain *ap2m, gfn_t gfn, mfn_t *mfn,
523                                p2m_type_t *t, p2m_access_t *a,
524                                bool prepopulate);
525 #endif
526 
527 /* Deadlock-avoidance scheme when calling get_gfn on different gfn's */
528 struct two_gfns {
529     struct domain *first_domain, *second_domain;
530     gfn_t          first_gfn,     second_gfn;
531 };
532 
533 /* Returns mfn, type and access for potential caller consumption, but any
534  * of those can be NULL */
get_two_gfns(struct domain * rd,gfn_t rgfn,p2m_type_t * rt,p2m_access_t * ra,mfn_t * rmfn,struct domain * ld,gfn_t lgfn,p2m_type_t * lt,p2m_access_t * la,mfn_t * lmfn,p2m_query_t q,struct two_gfns * rval,bool lock)535 static inline void get_two_gfns(struct domain *rd, gfn_t rgfn,
536         p2m_type_t *rt, p2m_access_t *ra, mfn_t *rmfn, struct domain *ld,
537         gfn_t lgfn, p2m_type_t *lt, p2m_access_t *la, mfn_t *lmfn,
538         p2m_query_t q, struct two_gfns *rval, bool lock)
539 {
540     mfn_t           *first_mfn, *second_mfn, scratch_mfn;
541     p2m_access_t    *first_a, *second_a, scratch_a;
542     p2m_type_t      *first_t, *second_t, scratch_t;
543 
544     /* Sort by domain, if same domain by gfn */
545 
546 #define assign_pointers(dest, source)                   \
547 do {                                                    \
548     rval-> dest ## _domain = source ## d;               \
549     rval-> dest ## _gfn = source ## gfn;                \
550     dest ## _mfn = (source ## mfn) ?: &scratch_mfn;     \
551     dest ## _a   = (source ## a)   ?: &scratch_a;       \
552     dest ## _t   = (source ## t)   ?: &scratch_t;       \
553 } while (0)
554 
555     if ( (rd->domain_id <= ld->domain_id) ||
556          ((rd == ld) && (gfn_x(rgfn) <= gfn_x(lgfn))) )
557     {
558         assign_pointers(first, r);
559         assign_pointers(second, l);
560     } else {
561         assign_pointers(first, l);
562         assign_pointers(second, r);
563     }
564 
565 #undef assign_pointers
566 
567     /* Now do the gets */
568     *first_mfn  = __get_gfn_type_access(p2m_get_hostp2m(rval->first_domain),
569                                         gfn_x(rval->first_gfn), first_t, first_a, q, NULL, lock);
570     *second_mfn = __get_gfn_type_access(p2m_get_hostp2m(rval->second_domain),
571                                         gfn_x(rval->second_gfn), second_t, second_a, q, NULL, lock);
572 }
573 
put_two_gfns(struct two_gfns * arg)574 static inline void put_two_gfns(struct two_gfns *arg)
575 {
576     if ( !arg )
577         return;
578 
579     put_gfn(arg->second_domain, gfn_x(arg->second_gfn));
580     put_gfn(arg->first_domain,  gfn_x(arg->first_gfn));
581 }
582 
583 /* Init the datastructures for later use by the p2m code */
584 int p2m_init(struct domain *d);
585 
586 /* Allocate a new p2m table for a domain.
587  *
588  * Returns 0 for success or -errno. */
589 int p2m_alloc_table(struct p2m_domain *p2m);
590 
591 /* Return all the p2m resources to Xen. */
592 void p2m_teardown(struct p2m_domain *p2m);
593 void p2m_final_teardown(struct domain *d);
594 
595 /* Add a page to a domain's p2m table */
596 int guest_physmap_add_entry(struct domain *d, gfn_t gfn,
597                             mfn_t mfn, unsigned int page_order,
598                             p2m_type_t t);
599 
600 /* Untyped version for RAM only, for compatibility and PV. */
601 int guest_physmap_add_page(struct domain *d, gfn_t gfn, mfn_t mfn,
602                            unsigned int page_order);
603 
604 /* Set a p2m range as populate-on-demand */
605 int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
606                                           unsigned int order);
607 /* Enable hardware-assisted log-dirty. */
608 void p2m_enable_hardware_log_dirty(struct domain *d);
609 
610 /* Disable hardware-assisted log-dirty */
611 void p2m_disable_hardware_log_dirty(struct domain *d);
612 
613 /* Flush hardware cached dirty GFNs */
614 void p2m_flush_hardware_cached_dirty(struct domain *d);
615 
616 /* Change types across all p2m entries in a domain */
617 void p2m_change_entry_type_global(struct domain *d,
618                                   p2m_type_t ot, p2m_type_t nt);
619 
620 /* Change types across a range of p2m entries (start ... end-1) */
621 void p2m_change_type_range(struct domain *d,
622                            unsigned long start, unsigned long end,
623                            p2m_type_t ot, p2m_type_t nt);
624 
625 /* Compare-exchange the type of a single p2m entry */
626 int p2m_change_type_one(struct domain *d, unsigned long gfn,
627                         p2m_type_t ot, p2m_type_t nt);
628 
629 /* Synchronously change the p2m type for a range of gfns */
630 int p2m_finish_type_change(struct domain *d,
631                            gfn_t first_gfn,
632                            unsigned long max_nr);
633 
634 int p2m_is_logdirty_range(struct p2m_domain *, unsigned long start,
635                           unsigned long end);
636 
637 /* Set foreign entry in the p2m table (for priv-mapping) */
638 int set_foreign_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
639 
640 /* Set mmio addresses in the p2m table (for pass-through) */
641 int set_mmio_p2m_entry(struct domain *d, gfn_t gfn, mfn_t mfn,
642                        unsigned int order);
643 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
644                          unsigned int order);
645 
646 /* Set identity addresses in the p2m table (for pass-through) */
647 int set_identity_p2m_entry(struct domain *d, unsigned long gfn,
648                            p2m_access_t p2ma, unsigned int flag);
649 int clear_identity_p2m_entry(struct domain *d, unsigned long gfn);
650 
651 /* Add foreign mapping to the guest's p2m table. */
652 int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
653                     unsigned long gpfn, domid_t foreign_domid);
654 
655 /*
656  * Populate-on-demand
657  */
658 
659 /* Dump PoD information about the domain */
660 void p2m_pod_dump_data(struct domain *d);
661 
662 #ifdef CONFIG_HVM
663 
664 /* Report a change affecting memory types. */
665 void p2m_memory_type_changed(struct domain *d);
666 
667 /* Called by p2m code when demand-populating a PoD page */
668 bool
669 p2m_pod_demand_populate(struct p2m_domain *p2m, gfn_t gfn, unsigned int order);
670 
671 /* Move all pages from the populate-on-demand cache to the domain page_list
672  * (usually in preparation for domain destruction) */
673 int p2m_pod_empty_cache(struct domain *d);
674 
675 /* Set populate-on-demand cache size so that the total memory allocated to a
676  * domain matches target */
677 int p2m_pod_set_mem_target(struct domain *d, unsigned long target);
678 
679 /* Scan pod cache when offline/broken page triggered */
680 int
681 p2m_pod_offline_or_broken_hit(struct page_info *p);
682 
683 /* Replace pod cache when offline/broken page triggered */
684 void
685 p2m_pod_offline_or_broken_replace(struct page_info *p);
686 
p2m_pod_entry_count(const struct p2m_domain * p2m)687 static inline long p2m_pod_entry_count(const struct p2m_domain *p2m)
688 {
689     return p2m->pod.entry_count;
690 }
691 
692 void p2m_pod_init(struct p2m_domain *p2m);
693 
694 #else
695 
696 static inline bool
p2m_pod_demand_populate(struct p2m_domain * p2m,gfn_t gfn,unsigned int order)697 p2m_pod_demand_populate(struct p2m_domain *p2m, gfn_t gfn, unsigned int order)
698 {
699     return false;
700 }
701 
p2m_pod_empty_cache(struct domain * d)702 static inline int p2m_pod_empty_cache(struct domain *d)
703 {
704     return 0;
705 }
706 
p2m_pod_offline_or_broken_hit(struct page_info * p)707 static inline int p2m_pod_offline_or_broken_hit(struct page_info *p)
708 {
709     return 0;
710 }
711 
p2m_pod_offline_or_broken_replace(struct page_info * p)712 static inline void p2m_pod_offline_or_broken_replace(struct page_info *p)
713 {
714     ASSERT_UNREACHABLE();
715 }
716 
p2m_pod_entry_count(const struct p2m_domain * p2m)717 static inline long p2m_pod_entry_count(const struct p2m_domain *p2m)
718 {
719     return 0;
720 }
721 
p2m_pod_init(struct p2m_domain * p2m)722 static inline void p2m_pod_init(struct p2m_domain *p2m) {}
723 
724 #endif
725 
726 
727 /*
728  * Paging to disk and page-sharing
729  */
730 
731 /* Modify p2m table for shared gfn */
732 int set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
733 
734 /* Tell xenpaging to drop a paged out frame */
735 void p2m_mem_paging_drop_page(struct domain *d, gfn_t gfn, p2m_type_t p2mt);
736 /* Start populating a paged out frame */
737 void p2m_mem_paging_populate(struct domain *d, gfn_t gfn);
738 /* Resume normal operation (in case a domain was paused) */
739 struct vm_event_st;
740 void p2m_mem_paging_resume(struct domain *d, struct vm_event_st *rsp);
741 
742 /*
743  * Internal functions, only called by other p2m code
744  */
745 
746 mfn_t p2m_alloc_ptp(struct p2m_domain *p2m, unsigned int level);
747 void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg);
748 
749 /* Directly set a p2m entry: only for use by p2m code. Does not need
750  * a call to put_gfn afterwards/ */
751 int __must_check p2m_set_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
752                                unsigned int page_order, p2m_type_t p2mt,
753                                p2m_access_t p2ma);
754 
755 /* Set up function pointers for PT implementation: only for use by p2m code */
756 extern void p2m_pt_init(struct p2m_domain *p2m);
757 
758 void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn,
759                      p2m_query_t q, uint32_t *pfec);
760 
761 /* Debugging and auditing of the P2M code? */
762 #ifndef NDEBUG
763 #define P2M_AUDIT     1
764 #else
765 #define P2M_AUDIT     0
766 #endif
767 #define P2M_DEBUGGING 0
768 
769 #if P2M_AUDIT
770 extern void audit_p2m(struct domain *d,
771                       uint64_t *orphans,
772                       uint64_t *m2p_bad,
773                       uint64_t *p2m_bad);
774 #endif /* P2M_AUDIT */
775 
776 /* Printouts */
777 #define P2M_PRINTK(f, a...)                                \
778     debugtrace_printk("p2m: %s(): " f, __func__, ##a)
779 #define P2M_ERROR(f, a...)                                 \
780     printk(XENLOG_G_ERR "pg error: %s(): " f, __func__, ##a)
781 #if P2M_DEBUGGING
782 #define P2M_DEBUG(f, a...)                                 \
783     debugtrace_printk("p2mdebug: %s(): " f, __func__, ##a)
784 #else
785 #define P2M_DEBUG(f, a...) do { (void)(f); } while(0)
786 #endif
787 
788 /*
789  * Functions specific to the p2m-pt implementation
790  */
791 
792 /* Extract the type from the PTE flags that store it */
p2m_flags_to_type(unsigned int flags)793 static inline p2m_type_t p2m_flags_to_type(unsigned int flags)
794 {
795     /* For AMD IOMMUs we need to use type 0 for plain RAM, but we need
796      * to make sure that an entirely empty PTE doesn't have RAM type */
797     if ( flags == 0 )
798         return p2m_invalid;
799     /* AMD IOMMUs use bits 9-11 to encode next io page level and bits
800      * 59-62 for iommu flags so we can't use them to store p2m type info. */
801     return (flags >> 12) & 0x7f;
802 }
803 
p2m_recalc_type_range(bool recalc,p2m_type_t t,struct p2m_domain * p2m,unsigned long gfn_start,unsigned long gfn_end)804 static inline p2m_type_t p2m_recalc_type_range(bool recalc, p2m_type_t t,
805                                                struct p2m_domain *p2m,
806                                                unsigned long gfn_start,
807                                                unsigned long gfn_end)
808 {
809     if ( !recalc || !p2m_is_changeable(t) )
810         return t;
811 
812     if ( t == p2m_ioreq_server && p2m->ioreq.server != NULL )
813         return t;
814 
815     return p2m_is_logdirty_range(p2m, gfn_start, gfn_end) ? p2m_ram_logdirty
816                                                           : p2m_ram_rw;
817 }
818 
p2m_recalc_type(bool recalc,p2m_type_t t,struct p2m_domain * p2m,unsigned long gfn)819 static inline p2m_type_t p2m_recalc_type(bool recalc, p2m_type_t t,
820                                          struct p2m_domain *p2m,
821                                          unsigned long gfn)
822 {
823     return p2m_recalc_type_range(recalc, t, p2m, gfn, gfn);
824 }
825 
826 int p2m_pt_handle_deferred_changes(uint64_t gpa);
827 
828 /*
829  * Nested p2m: shadow p2m tables used for nested HVM virtualization
830  */
831 
832 /* Flushes specified p2m table */
833 void p2m_flush(struct vcpu *v, struct p2m_domain *p2m);
834 /* Flushes all nested p2m tables */
835 void p2m_flush_nestedp2m(struct domain *d);
836 /* Flushes the np2m specified by np2m_base (if it exists) */
837 void np2m_flush_base(struct vcpu *v, unsigned long np2m_base);
838 
839 int nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
840     l1_pgentry_t *p, l1_pgentry_t new, unsigned int level);
841 
842 /*
843  * Alternate p2m: shadow p2m tables used for alternate memory views
844  */
845 #ifdef CONFIG_HVM
846 /* get current alternate p2m table */
p2m_get_altp2m(struct vcpu * v)847 static inline struct p2m_domain *p2m_get_altp2m(struct vcpu *v)
848 {
849     unsigned int index = vcpu_altp2m(v).p2midx;
850 
851     if ( index == INVALID_ALTP2M )
852         return NULL;
853 
854     BUG_ON(index >= MAX_ALTP2M);
855 
856     return v->domain->arch.altp2m_p2m[index];
857 }
858 
859 /* Switch alternate p2m for a single vcpu */
860 bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx);
861 
862 /* Check to see if vcpu should be switched to a different p2m. */
863 void p2m_altp2m_check(struct vcpu *v, uint16_t idx);
864 
865 /* Flush all the alternate p2m's for a domain */
866 void p2m_flush_altp2m(struct domain *d);
867 
868 /* Alternate p2m paging */
869 bool p2m_altp2m_get_or_propagate(struct p2m_domain *ap2m, unsigned long gfn_l,
870                                  mfn_t *mfn, p2m_type_t *p2mt,
871                                  p2m_access_t *p2ma, unsigned int page_order);
872 
873 /* Make a specific alternate p2m valid */
874 int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx);
875 
876 /* Find an available alternate p2m and make it valid */
877 int p2m_init_next_altp2m(struct domain *d, uint16_t *idx,
878                          xenmem_access_t hvmmem_default_access);
879 
880 /* Make a specific alternate p2m invalid */
881 int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx);
882 
883 /* Switch alternate p2m for entire domain */
884 int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx);
885 
886 /* Change a gfn->mfn mapping */
887 int p2m_change_altp2m_gfn(struct domain *d, unsigned int idx,
888                           gfn_t old_gfn, gfn_t new_gfn);
889 
890 /* Propagate a host p2m change to all alternate p2m's */
891 int p2m_altp2m_propagate_change(struct domain *d, gfn_t gfn,
892                                 mfn_t mfn, unsigned int page_order,
893                                 p2m_type_t p2mt, p2m_access_t p2ma);
894 
895 /* Set a specific p2m view visibility */
896 int p2m_set_altp2m_view_visibility(struct domain *d, unsigned int idx,
897                                    uint8_t visible);
898 #else
899 struct p2m_domain *p2m_get_altp2m(struct vcpu *v);
p2m_altp2m_check(struct vcpu * v,uint16_t idx)900 static inline void p2m_altp2m_check(struct vcpu *v, uint16_t idx) {}
901 #endif
902 
903 /*
904  * p2m type to IOMMU flags
905  */
p2m_get_iommu_flags(p2m_type_t p2mt,mfn_t mfn)906 static inline unsigned int p2m_get_iommu_flags(p2m_type_t p2mt, mfn_t mfn)
907 {
908     unsigned int flags;
909 
910     switch( p2mt )
911     {
912     case p2m_ram_rw:
913     case p2m_grant_map_rw:
914     case p2m_ram_logdirty:
915     case p2m_map_foreign:
916         flags =  IOMMUF_readable | IOMMUF_writable;
917         break;
918     case p2m_ram_ro:
919     case p2m_grant_map_ro:
920         flags = IOMMUF_readable;
921         break;
922     case p2m_mmio_direct:
923         flags = IOMMUF_readable;
924         if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
925             flags |= IOMMUF_writable;
926         break;
927     default:
928         flags = 0;
929         break;
930     }
931 
932     return flags;
933 }
934 
935 int p2m_set_ioreq_server(struct domain *d, unsigned int flags,
936                          struct hvm_ioreq_server *s);
937 struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
938                                               unsigned int *flags);
939 
p2m_entry_modify(struct p2m_domain * p2m,p2m_type_t nt,p2m_type_t ot,mfn_t nfn,mfn_t ofn,unsigned int level)940 static inline int p2m_entry_modify(struct p2m_domain *p2m, p2m_type_t nt,
941                                    p2m_type_t ot, mfn_t nfn, mfn_t ofn,
942                                    unsigned int level)
943 {
944     BUG_ON(!level);
945     BUG_ON(level > 1 && (nt == p2m_ioreq_server || nt == p2m_map_foreign));
946 
947     if ( level != 1 || (nt == ot && mfn_eq(nfn, ofn)) )
948         return 0;
949 
950     switch ( nt )
951     {
952     case p2m_ioreq_server:
953         /*
954          * p2m_ioreq_server is only used for 4K pages, so
955          * the count is only done for level 1 entries.
956          */
957         p2m->ioreq.entry_count++;
958         break;
959 
960     case p2m_map_foreign:
961         if ( !mfn_valid(nfn) )
962         {
963             ASSERT_UNREACHABLE();
964             return -EINVAL;
965         }
966 
967         if ( !page_get_owner_and_reference(mfn_to_page(nfn)) )
968             return -EBUSY;
969 
970         break;
971 
972     default:
973         break;
974     }
975 
976     switch ( ot )
977     {
978     case p2m_ioreq_server:
979         ASSERT(p2m->ioreq.entry_count > 0);
980         p2m->ioreq.entry_count--;
981         break;
982 
983     case p2m_map_foreign:
984         if ( !mfn_valid(ofn) )
985         {
986             ASSERT_UNREACHABLE();
987             return -EINVAL;
988         }
989         put_page(mfn_to_page(ofn));
990         break;
991 
992     default:
993         break;
994     }
995 
996     return 0;
997 }
998 
999 #endif /* _XEN_ASM_X86_P2M_H */
1000 
1001 /*
1002  * Local variables:
1003  * mode: C
1004  * c-file-style: "BSD"
1005  * c-basic-offset: 4
1006  * indent-tabs-mode: nil
1007  * End:
1008  */
1009