1 /******************************************************************************
2 * arch/x86/mm/hap/nested_hap.c
3 *
4 * Code for Nested Virtualization
5 * Copyright (c) 2011 Advanced Micro Devices
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <xen/vm_event.h>
22 #include <xen/event.h>
23 #include <public/vm_event.h>
24 #include <asm/domain.h>
25 #include <asm/page.h>
26 #include <asm/paging.h>
27 #include <asm/p2m.h>
28 #include <asm/mem_sharing.h>
29 #include <asm/hap.h>
30 #include <asm/hvm/support.h>
31
32 #include <asm/hvm/nestedhvm.h>
33
34 #include "private.h"
35
36 /* AlGORITHM for NESTED PAGE FAULT
37 *
38 * NOTATION
39 * Levels: L0, L1, L2
40 * Guests: L1 guest, L2 guest
41 * Hypervisor: L0 hypervisor
42 * Addresses: L2-GVA, L2-GPA, L1-GVA, L1-GPA, MPA
43 *
44 * On L0, when #NPF happens, the handler function should do:
45 * hap_page_fault(GPA)
46 * {
47 * 1. If #NPF is from L1 guest, then we crash the guest VM (same as old
48 * code)
49 * 2. If #NPF is from L2 guest, then we continue from (3)
50 * 3. Get np2m base from L1 guest. Map np2m base into L0 hypervisor address
51 * space.
52 * 4. Walk the np2m's page table
53 * 5. - if not present or permission check failure, then we inject #NPF
54 * back to L1 guest and
55 * re-launch L1 guest (L1 guest will either treat this #NPF as MMIO,
56 * or fix its p2m table for L2 guest)
57 * 6. - if present, then we will get the a new translated value L1-GPA
58 * (points to L1 machine memory)
59 * 7. * Use L1-GPA to walk L0 P2M table
60 * 8. - if not present, then crash the guest (should not happen)
61 * 9. - if present, then we get a new translated value MPA
62 * (points to real machine memory)
63 * 10. * Finally, use GPA and MPA to walk nested_p2m
64 * and fix the bits.
65 * }
66 *
67 */
68
69
70 /********************************************/
71 /* NESTED VIRT P2M FUNCTIONS */
72 /********************************************/
73
74 int
nestedp2m_write_p2m_entry(struct p2m_domain * p2m,unsigned long gfn,l1_pgentry_t * p,l1_pgentry_t new,unsigned int level)75 nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
76 l1_pgentry_t *p, l1_pgentry_t new, unsigned int level)
77 {
78 struct domain *d = p2m->domain;
79 uint32_t old_flags;
80
81 paging_lock(d);
82
83 old_flags = l1e_get_flags(*p);
84 safe_write_pte(p, new);
85
86 if (old_flags & _PAGE_PRESENT)
87 guest_flush_tlb_mask(d, p2m->dirty_cpumask);
88
89 paging_unlock(d);
90
91 return 0;
92 }
93
94 /********************************************/
95 /* NESTED VIRT FUNCTIONS */
96 /********************************************/
97 static void
nestedhap_fix_p2m(struct vcpu * v,struct p2m_domain * p2m,paddr_t L2_gpa,paddr_t L0_gpa,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)98 nestedhap_fix_p2m(struct vcpu *v, struct p2m_domain *p2m,
99 paddr_t L2_gpa, paddr_t L0_gpa,
100 unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
101 {
102 int rc = 0;
103 unsigned long gfn, mask;
104 mfn_t mfn;
105
106 ASSERT(p2m);
107 ASSERT(p2m->set_entry);
108 ASSERT(p2m_locked_by_me(p2m));
109
110 /*
111 * If this is a superpage mapping, round down both addresses to
112 * the start of the superpage.
113 */
114 mask = ~((1UL << page_order) - 1);
115 gfn = (L2_gpa >> PAGE_SHIFT) & mask;
116 mfn = _mfn((L0_gpa >> PAGE_SHIFT) & mask);
117
118 rc = p2m_set_entry(p2m, _gfn(gfn), mfn, page_order, p2mt, p2ma);
119
120 if ( rc )
121 {
122 gdprintk(XENLOG_ERR,
123 "failed to set entry for %#"PRIx64" -> %#"PRIx64" rc:%d\n",
124 L2_gpa, L0_gpa, rc);
125 domain_crash(p2m->domain);
126 }
127 }
128
129 /* This function uses L2_gpa to walk the P2M page table in L1. If the
130 * walk is successful, the translated value is returned in
131 * L1_gpa. The result value tells what to do next.
132 */
133 int
nestedhap_walk_L1_p2m(struct vcpu * v,paddr_t L2_gpa,paddr_t * L1_gpa,unsigned int * page_order,uint8_t * p2m_acc,bool_t access_r,bool_t access_w,bool_t access_x)134 nestedhap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa,
135 unsigned int *page_order, uint8_t *p2m_acc,
136 bool_t access_r, bool_t access_w, bool_t access_x)
137 {
138 ASSERT(hvm_funcs.nhvm_hap_walk_L1_p2m);
139
140 return hvm_funcs.nhvm_hap_walk_L1_p2m(v, L2_gpa, L1_gpa, page_order,
141 p2m_acc, access_r, access_w, access_x);
142 }
143
144
145 /* This function uses L1_gpa to walk the P2M table in L0 hypervisor. If the
146 * walk is successful, the translated value is returned in L0_gpa. The return
147 * value tells the upper level what to do.
148 */
149 static int
nestedhap_walk_L0_p2m(struct p2m_domain * p2m,paddr_t L1_gpa,paddr_t * L0_gpa,p2m_type_t * p2mt,p2m_access_t * p2ma,unsigned int * page_order,bool_t access_r,bool_t access_w,bool_t access_x)150 nestedhap_walk_L0_p2m(struct p2m_domain *p2m, paddr_t L1_gpa, paddr_t *L0_gpa,
151 p2m_type_t *p2mt, p2m_access_t *p2ma,
152 unsigned int *page_order,
153 bool_t access_r, bool_t access_w, bool_t access_x)
154 {
155 mfn_t mfn;
156 int rc;
157
158 /* walk L0 P2M table */
159 mfn = get_gfn_type_access(p2m, L1_gpa >> PAGE_SHIFT, p2mt, p2ma,
160 0, page_order);
161
162 rc = NESTEDHVM_PAGEFAULT_DIRECT_MMIO;
163 if ( *p2mt == p2m_mmio_direct )
164 goto direct_mmio_out;
165 rc = NESTEDHVM_PAGEFAULT_MMIO;
166 if ( *p2mt == p2m_mmio_dm )
167 goto out;
168
169 rc = NESTEDHVM_PAGEFAULT_L0_ERROR;
170 if ( access_w && p2m_is_readonly(*p2mt) )
171 goto out;
172
173 if ( p2m_is_paging(*p2mt) || p2m_is_shared(*p2mt) || !p2m_is_ram(*p2mt) )
174 goto out;
175
176 if ( !mfn_valid(mfn) )
177 goto out;
178
179 rc = NESTEDHVM_PAGEFAULT_DONE;
180 direct_mmio_out:
181 *L0_gpa = (mfn_x(mfn) << PAGE_SHIFT) + (L1_gpa & ~PAGE_MASK);
182 out:
183 __put_gfn(p2m, L1_gpa >> PAGE_SHIFT);
184 return rc;
185 }
186
187 /*
188 * The following function, nestedhap_page_fault(), is for steps (3)--(10).
189 *
190 * Returns:
191 */
192 int
nestedhvm_hap_nested_page_fault(struct vcpu * v,paddr_t * L2_gpa,bool_t access_r,bool_t access_w,bool_t access_x)193 nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
194 bool_t access_r, bool_t access_w, bool_t access_x)
195 {
196 int rv;
197 paddr_t L1_gpa, L0_gpa;
198 struct domain *d = v->domain;
199 struct p2m_domain *p2m, *nested_p2m;
200 unsigned int page_order_21, page_order_10, page_order_20;
201 p2m_type_t p2mt_10;
202 p2m_access_t p2ma_10 = p2m_access_rwx;
203 uint8_t p2ma_21 = p2m_access_rwx;
204
205 p2m = p2m_get_hostp2m(d); /* L0 p2m */
206
207 /* walk the L1 P2M table */
208 rv = nestedhap_walk_L1_p2m(v, *L2_gpa, &L1_gpa, &page_order_21, &p2ma_21,
209 access_r, access_w, access_x);
210
211 /* let caller to handle these two cases */
212 switch (rv) {
213 case NESTEDHVM_PAGEFAULT_INJECT:
214 case NESTEDHVM_PAGEFAULT_RETRY:
215 case NESTEDHVM_PAGEFAULT_L1_ERROR:
216 return rv;
217 case NESTEDHVM_PAGEFAULT_DONE:
218 break;
219 default:
220 BUG();
221 break;
222 }
223
224 /* ==> we have to walk L0 P2M */
225 rv = nestedhap_walk_L0_p2m(p2m, L1_gpa, &L0_gpa,
226 &p2mt_10, &p2ma_10, &page_order_10,
227 access_r, access_w, access_x);
228
229 /* let upper level caller to handle these two cases */
230 switch (rv) {
231 case NESTEDHVM_PAGEFAULT_INJECT:
232 return rv;
233 case NESTEDHVM_PAGEFAULT_L0_ERROR:
234 *L2_gpa = L1_gpa;
235 return rv;
236 case NESTEDHVM_PAGEFAULT_DONE:
237 break;
238 case NESTEDHVM_PAGEFAULT_MMIO:
239 return rv;
240 case NESTEDHVM_PAGEFAULT_DIRECT_MMIO:
241 break;
242 default:
243 BUG();
244 break;
245 }
246
247 page_order_20 = min(page_order_21, page_order_10);
248
249 ASSERT(p2ma_10 <= p2m_access_n2rwx);
250 /*NOTE: if assert fails, needs to handle new access type here */
251
252 switch ( p2ma_10 )
253 {
254 case p2m_access_n ... p2m_access_rwx:
255 break;
256 case p2m_access_rx2rw:
257 p2ma_10 = p2m_access_rx;
258 break;
259 case p2m_access_n2rwx:
260 p2ma_10 = p2m_access_n;
261 break;
262 default:
263 p2ma_10 = p2m_access_n;
264 /* For safety, remove all permissions. */
265 gdprintk(XENLOG_ERR, "Unhandled p2m access type:%d\n", p2ma_10);
266 }
267 /* Use minimal permission for nested p2m. */
268 p2ma_10 &= (p2m_access_t)p2ma_21;
269
270 /* fix p2m_get_pagetable(nested_p2m) */
271 nested_p2m = p2m_get_nestedp2m_locked(v);
272 nestedhap_fix_p2m(v, nested_p2m, *L2_gpa, L0_gpa, page_order_20,
273 p2mt_10, p2ma_10);
274 p2m_unlock(nested_p2m);
275
276 return NESTEDHVM_PAGEFAULT_DONE;
277 }
278
279 /********************************************/
280 /* NESTED VIRT INITIALIZATION FUNCS */
281 /********************************************/
282
283 /*
284 * Local variables:
285 * mode: C
286 * c-file-style: "BSD"
287 * c-basic-offset: 4
288 * tab-width: 4
289 * indent-tabs-mode: nil
290 * End:
291 */
292