1 /******************************************************************************
2  * arch/x86/mm/hap/nested_hap.c
3  *
4  * Code for Nested Virtualization
5  * Copyright (c) 2011 Advanced Micro Devices
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include <xen/vm_event.h>
22 #include <xen/event.h>
23 #include <public/vm_event.h>
24 #include <asm/domain.h>
25 #include <asm/page.h>
26 #include <asm/paging.h>
27 #include <asm/p2m.h>
28 #include <asm/mem_sharing.h>
29 #include <asm/hap.h>
30 #include <asm/hvm/support.h>
31 
32 #include <asm/hvm/nestedhvm.h>
33 
34 #include "private.h"
35 
36 /* AlGORITHM for NESTED PAGE FAULT
37  *
38  * NOTATION
39  * Levels: L0, L1, L2
40  * Guests: L1 guest, L2 guest
41  * Hypervisor: L0 hypervisor
42  * Addresses: L2-GVA, L2-GPA, L1-GVA, L1-GPA, MPA
43  *
44  * On L0, when #NPF happens, the handler function should do:
45  * hap_page_fault(GPA)
46  * {
47  *    1. If #NPF is from L1 guest, then we crash the guest VM (same as old
48  *       code)
49  *    2. If #NPF is from L2 guest, then we continue from (3)
50  *    3. Get np2m base from L1 guest. Map np2m base into L0 hypervisor address
51  *       space.
52  *    4. Walk the np2m's  page table
53  *    5.    - if not present or permission check failure, then we inject #NPF
54  *            back to L1 guest and
55  *            re-launch L1 guest (L1 guest will either treat this #NPF as MMIO,
56  *            or fix its p2m table for L2 guest)
57  *    6.    - if present, then we will get the a new translated value L1-GPA
58  *            (points to L1 machine memory)
59  *    7.        * Use L1-GPA to walk L0 P2M table
60  *    8.            - if not present, then crash the guest (should not happen)
61  *    9.            - if present, then we get a new translated value MPA
62  *                    (points to real machine memory)
63  *   10.                * Finally, use GPA and MPA to walk nested_p2m
64  *                        and fix the bits.
65  * }
66  *
67  */
68 
69 
70 /********************************************/
71 /*        NESTED VIRT P2M FUNCTIONS         */
72 /********************************************/
73 
74 int
nestedp2m_write_p2m_entry(struct p2m_domain * p2m,unsigned long gfn,l1_pgentry_t * p,l1_pgentry_t new,unsigned int level)75 nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
76     l1_pgentry_t *p, l1_pgentry_t new, unsigned int level)
77 {
78     struct domain *d = p2m->domain;
79     uint32_t old_flags;
80 
81     paging_lock(d);
82 
83     old_flags = l1e_get_flags(*p);
84     safe_write_pte(p, new);
85 
86     if (old_flags & _PAGE_PRESENT)
87         guest_flush_tlb_mask(d, p2m->dirty_cpumask);
88 
89     paging_unlock(d);
90 
91     return 0;
92 }
93 
94 /********************************************/
95 /*          NESTED VIRT FUNCTIONS           */
96 /********************************************/
97 static void
nestedhap_fix_p2m(struct vcpu * v,struct p2m_domain * p2m,paddr_t L2_gpa,paddr_t L0_gpa,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)98 nestedhap_fix_p2m(struct vcpu *v, struct p2m_domain *p2m,
99                   paddr_t L2_gpa, paddr_t L0_gpa,
100                   unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
101 {
102     int rc = 0;
103     unsigned long gfn, mask;
104     mfn_t mfn;
105 
106     ASSERT(p2m);
107     ASSERT(p2m->set_entry);
108     ASSERT(p2m_locked_by_me(p2m));
109 
110     /*
111      * If this is a superpage mapping, round down both addresses to
112      * the start of the superpage.
113      */
114     mask = ~((1UL << page_order) - 1);
115     gfn = (L2_gpa >> PAGE_SHIFT) & mask;
116     mfn = _mfn((L0_gpa >> PAGE_SHIFT) & mask);
117 
118     rc = p2m_set_entry(p2m, _gfn(gfn), mfn, page_order, p2mt, p2ma);
119 
120     if ( rc )
121     {
122         gdprintk(XENLOG_ERR,
123                  "failed to set entry for %#"PRIx64" -> %#"PRIx64" rc:%d\n",
124                  L2_gpa, L0_gpa, rc);
125         domain_crash(p2m->domain);
126     }
127 }
128 
129 /* This function uses L2_gpa to walk the P2M page table in L1. If the
130  * walk is successful, the translated value is returned in
131  * L1_gpa. The result value tells what to do next.
132  */
133 int
nestedhap_walk_L1_p2m(struct vcpu * v,paddr_t L2_gpa,paddr_t * L1_gpa,unsigned int * page_order,uint8_t * p2m_acc,bool_t access_r,bool_t access_w,bool_t access_x)134 nestedhap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa,
135                       unsigned int *page_order, uint8_t *p2m_acc,
136                       bool_t access_r, bool_t access_w, bool_t access_x)
137 {
138     ASSERT(hvm_funcs.nhvm_hap_walk_L1_p2m);
139 
140     return hvm_funcs.nhvm_hap_walk_L1_p2m(v, L2_gpa, L1_gpa, page_order,
141         p2m_acc, access_r, access_w, access_x);
142 }
143 
144 
145 /* This function uses L1_gpa to walk the P2M table in L0 hypervisor. If the
146  * walk is successful, the translated value is returned in L0_gpa. The return
147  * value tells the upper level what to do.
148  */
149 static int
nestedhap_walk_L0_p2m(struct p2m_domain * p2m,paddr_t L1_gpa,paddr_t * L0_gpa,p2m_type_t * p2mt,p2m_access_t * p2ma,unsigned int * page_order,bool_t access_r,bool_t access_w,bool_t access_x)150 nestedhap_walk_L0_p2m(struct p2m_domain *p2m, paddr_t L1_gpa, paddr_t *L0_gpa,
151                       p2m_type_t *p2mt, p2m_access_t *p2ma,
152                       unsigned int *page_order,
153                       bool_t access_r, bool_t access_w, bool_t access_x)
154 {
155     mfn_t mfn;
156     int rc;
157 
158     /* walk L0 P2M table */
159     mfn = get_gfn_type_access(p2m, L1_gpa >> PAGE_SHIFT, p2mt, p2ma,
160                               0, page_order);
161 
162     rc = NESTEDHVM_PAGEFAULT_DIRECT_MMIO;
163     if ( *p2mt == p2m_mmio_direct )
164         goto direct_mmio_out;
165     rc = NESTEDHVM_PAGEFAULT_MMIO;
166     if ( *p2mt == p2m_mmio_dm )
167         goto out;
168 
169     rc = NESTEDHVM_PAGEFAULT_L0_ERROR;
170     if ( access_w && p2m_is_readonly(*p2mt) )
171         goto out;
172 
173     if ( p2m_is_paging(*p2mt) || p2m_is_shared(*p2mt) || !p2m_is_ram(*p2mt) )
174         goto out;
175 
176     if ( !mfn_valid(mfn) )
177         goto out;
178 
179     rc = NESTEDHVM_PAGEFAULT_DONE;
180 direct_mmio_out:
181     *L0_gpa = (mfn_x(mfn) << PAGE_SHIFT) + (L1_gpa & ~PAGE_MASK);
182 out:
183     __put_gfn(p2m, L1_gpa >> PAGE_SHIFT);
184     return rc;
185 }
186 
187 /*
188  * The following function, nestedhap_page_fault(), is for steps (3)--(10).
189  *
190  * Returns:
191  */
192 int
nestedhvm_hap_nested_page_fault(struct vcpu * v,paddr_t * L2_gpa,bool_t access_r,bool_t access_w,bool_t access_x)193 nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
194     bool_t access_r, bool_t access_w, bool_t access_x)
195 {
196     int rv;
197     paddr_t L1_gpa, L0_gpa;
198     struct domain *d = v->domain;
199     struct p2m_domain *p2m, *nested_p2m;
200     unsigned int page_order_21, page_order_10, page_order_20;
201     p2m_type_t p2mt_10;
202     p2m_access_t p2ma_10 = p2m_access_rwx;
203     uint8_t p2ma_21 = p2m_access_rwx;
204 
205     p2m = p2m_get_hostp2m(d); /* L0 p2m */
206 
207     /* walk the L1 P2M table */
208     rv = nestedhap_walk_L1_p2m(v, *L2_gpa, &L1_gpa, &page_order_21, &p2ma_21,
209         access_r, access_w, access_x);
210 
211     /* let caller to handle these two cases */
212     switch (rv) {
213     case NESTEDHVM_PAGEFAULT_INJECT:
214     case NESTEDHVM_PAGEFAULT_RETRY:
215     case NESTEDHVM_PAGEFAULT_L1_ERROR:
216         return rv;
217     case NESTEDHVM_PAGEFAULT_DONE:
218         break;
219     default:
220         BUG();
221         break;
222     }
223 
224     /* ==> we have to walk L0 P2M */
225     rv = nestedhap_walk_L0_p2m(p2m, L1_gpa, &L0_gpa,
226         &p2mt_10, &p2ma_10, &page_order_10,
227         access_r, access_w, access_x);
228 
229     /* let upper level caller to handle these two cases */
230     switch (rv) {
231     case NESTEDHVM_PAGEFAULT_INJECT:
232         return rv;
233     case NESTEDHVM_PAGEFAULT_L0_ERROR:
234         *L2_gpa = L1_gpa;
235         return rv;
236     case NESTEDHVM_PAGEFAULT_DONE:
237         break;
238     case NESTEDHVM_PAGEFAULT_MMIO:
239         return rv;
240     case NESTEDHVM_PAGEFAULT_DIRECT_MMIO:
241         break;
242     default:
243         BUG();
244         break;
245     }
246 
247     page_order_20 = min(page_order_21, page_order_10);
248 
249     ASSERT(p2ma_10 <= p2m_access_n2rwx);
250     /*NOTE: if assert fails, needs to handle new access type here */
251 
252     switch ( p2ma_10 )
253     {
254     case p2m_access_n ... p2m_access_rwx:
255         break;
256     case p2m_access_rx2rw:
257         p2ma_10 = p2m_access_rx;
258         break;
259     case p2m_access_n2rwx:
260         p2ma_10 = p2m_access_n;
261         break;
262     default:
263         p2ma_10 = p2m_access_n;
264         /* For safety, remove all permissions. */
265         gdprintk(XENLOG_ERR, "Unhandled p2m access type:%d\n", p2ma_10);
266     }
267     /* Use minimal permission for nested p2m. */
268     p2ma_10 &= (p2m_access_t)p2ma_21;
269 
270     /* fix p2m_get_pagetable(nested_p2m) */
271     nested_p2m = p2m_get_nestedp2m_locked(v);
272     nestedhap_fix_p2m(v, nested_p2m, *L2_gpa, L0_gpa, page_order_20,
273         p2mt_10, p2ma_10);
274     p2m_unlock(nested_p2m);
275 
276     return NESTEDHVM_PAGEFAULT_DONE;
277 }
278 
279 /********************************************/
280 /*     NESTED VIRT INITIALIZATION FUNCS     */
281 /********************************************/
282 
283 /*
284  * Local variables:
285  * mode: C
286  * c-file-style: "BSD"
287  * c-basic-offset: 4
288  * tab-width: 4
289  * indent-tabs-mode: nil
290  * End:
291  */
292