1 /*
2 * nested_ept.c: Handling virtulized EPT for guest in nested case.
3 *
4 * Copyright (c) 2012, Intel Corporation
5 * Xiantao Zhang <xiantao.zhang@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; If not, see <http://www.gnu.org/licenses/>.
18 */
19 #include <xen/vm_event.h>
20 #include <xen/event.h>
21 #include <public/vm_event.h>
22 #include <asm/domain.h>
23 #include <asm/page.h>
24 #include <asm/paging.h>
25 #include <asm/p2m.h>
26 #include <asm/mem_sharing.h>
27 #include <asm/hap.h>
28 #include <asm/hvm/support.h>
29
30 #include <asm/hvm/nestedhvm.h>
31
32 #include "private.h"
33
34 #include <asm/hvm/vmx/vmx.h>
35 #include <asm/hvm/vmx/vvmx.h>
36
37 /* Must reserved bits in all level entries */
38 #define EPT_MUST_RSV_BITS (((1ull << PADDR_BITS) - 1) & \
39 ~((1ull << paddr_bits) - 1))
40
41 #define NEPT_CAP_BITS \
42 (VMX_EPT_INVEPT_ALL_CONTEXT | VMX_EPT_INVEPT_SINGLE_CONTEXT | \
43 VMX_EPT_INVEPT_INSTRUCTION | VMX_EPT_SUPERPAGE_1GB | \
44 VMX_EPT_SUPERPAGE_2MB | VMX_EPT_MEMORY_TYPE_WB | \
45 VMX_EPT_MEMORY_TYPE_UC | VMX_EPT_WALK_LENGTH_4_SUPPORTED | \
46 VMX_EPT_EXEC_ONLY_SUPPORTED)
47
48 #define NVPID_CAP_BITS \
49 (VMX_VPID_INVVPID_INSTRUCTION | VMX_VPID_INVVPID_INDIVIDUAL_ADDR | \
50 VMX_VPID_INVVPID_SINGLE_CONTEXT | VMX_VPID_INVVPID_ALL_CONTEXT | \
51 VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL)
52
53 #define NEPT_1G_ENTRY_FLAG (1 << 11)
54 #define NEPT_2M_ENTRY_FLAG (1 << 10)
55 #define NEPT_4K_ENTRY_FLAG (1 << 9)
56
nept_rsv_bits_check(ept_entry_t e,uint32_t level)57 static bool_t nept_rsv_bits_check(ept_entry_t e, uint32_t level)
58 {
59 uint64_t rsv_bits = EPT_MUST_RSV_BITS;
60
61 switch ( level )
62 {
63 case 1:
64 break;
65 case 2 ... 3:
66 if ( e.sp )
67 rsv_bits |= ((1ull << (9 * (level - 1))) - 1) << PAGE_SHIFT;
68 else
69 rsv_bits |= EPTE_EMT_MASK | EPTE_IGMT_MASK;
70 break;
71 case 4:
72 rsv_bits |= EPTE_EMT_MASK | EPTE_IGMT_MASK | EPTE_SUPER_PAGE_MASK;
73 break;
74 default:
75 gdprintk(XENLOG_ERR,"Unsupported EPT paging level: %d\n", level);
76 BUG();
77 break;
78 }
79 return !!(e.epte & rsv_bits);
80 }
81
82 /* EMT checking*/
nept_emt_bits_check(ept_entry_t e,uint32_t level)83 static bool_t nept_emt_bits_check(ept_entry_t e, uint32_t level)
84 {
85 if ( e.sp || level == 1 )
86 {
87 if ( e.emt == EPT_EMT_RSV0 || e.emt == EPT_EMT_RSV1 ||
88 e.emt == EPT_EMT_RSV2 )
89 return 1;
90 }
91 return 0;
92 }
93
nept_permission_check(uint32_t rwx_acc,uint32_t rwx_bits)94 static bool_t nept_permission_check(uint32_t rwx_acc, uint32_t rwx_bits)
95 {
96 return !(EPTE_RWX_MASK & rwx_acc & ~rwx_bits);
97 }
98
99 /* nept's non-present check */
nept_non_present_check(ept_entry_t e)100 static bool_t nept_non_present_check(ept_entry_t e)
101 {
102 if ( e.epte & EPTE_RWX_MASK )
103 return 0;
104 return 1;
105 }
106
nept_get_ept_vpid_cap(void)107 uint64_t nept_get_ept_vpid_cap(void)
108 {
109 uint64_t caps = 0;
110
111 if ( cpu_has_vmx_ept )
112 caps |= NEPT_CAP_BITS;
113 if ( !cpu_has_vmx_ept_exec_only_supported )
114 caps &= ~VMX_EPT_EXEC_ONLY_SUPPORTED;
115 if ( cpu_has_vmx_vpid )
116 caps |= NVPID_CAP_BITS;
117
118 return caps;
119 }
120
nept_rwx_bits_check(ept_entry_t e)121 static bool_t nept_rwx_bits_check(ept_entry_t e)
122 {
123 /*write only or write/execute only*/
124 uint8_t rwx_bits = e.epte & EPTE_RWX_MASK;
125
126 if ( rwx_bits == ept_access_w || rwx_bits == ept_access_wx )
127 return 1;
128
129 if ( rwx_bits == ept_access_x &&
130 !(nept_get_ept_vpid_cap() & VMX_EPT_EXEC_ONLY_SUPPORTED) )
131 return 1;
132
133 return 0;
134 }
135
136 /* nept's misconfiguration check */
nept_misconfiguration_check(ept_entry_t e,uint32_t level)137 static bool_t nept_misconfiguration_check(ept_entry_t e, uint32_t level)
138 {
139 return nept_rsv_bits_check(e, level) ||
140 nept_emt_bits_check(e, level) ||
141 nept_rwx_bits_check(e);
142 }
143
ept_lvl_table_offset(unsigned long gpa,int lvl)144 static int ept_lvl_table_offset(unsigned long gpa, int lvl)
145 {
146 return (gpa >> (EPT_L4_PAGETABLE_SHIFT -(4 - lvl) * 9)) &
147 (EPT_PAGETABLE_ENTRIES - 1);
148 }
149
150 static uint32_t
nept_walk_tables(struct vcpu * v,unsigned long l2ga,ept_walk_t * gw)151 nept_walk_tables(struct vcpu *v, unsigned long l2ga, ept_walk_t *gw)
152 {
153 int lvl;
154 uint32_t rc = 0, ret = 0, gflags;
155 struct domain *d = v->domain;
156 struct p2m_domain *p2m = d->arch.p2m;
157 gfn_t base_gfn = _gfn(nhvm_vcpu_p2m_base(v) >> PAGE_SHIFT);
158 mfn_t lxmfn;
159 ept_entry_t *lxp = NULL;
160
161 memset(gw, 0, sizeof(*gw));
162
163 for (lvl = 4; lvl > 0; lvl--)
164 {
165 lxp = map_domain_gfn(p2m, base_gfn, &lxmfn, P2M_ALLOC, &rc);
166 if ( !lxp )
167 goto map_err;
168 gw->lxe[lvl] = lxp[ept_lvl_table_offset(l2ga, lvl)];
169 unmap_domain_page(lxp);
170 put_page(mfn_to_page(lxmfn));
171
172 if ( nept_non_present_check(gw->lxe[lvl]) )
173 goto non_present;
174
175 if ( nept_misconfiguration_check(gw->lxe[lvl], lvl) )
176 goto misconfig_err;
177
178 if ( (lvl == 2 || lvl == 3) && gw->lxe[lvl].sp )
179 {
180 /* Generate a fake l1 table entry so callers don't all
181 * have to understand superpages. */
182 unsigned long gfn_lvl_mask = (1ull << ((lvl - 1) * 9)) - 1;
183 gfn_t start = _gfn(gw->lxe[lvl].mfn);
184 /* Increment the pfn by the right number of 4k pages. */
185 start = _gfn((gfn_x(start) & ~gfn_lvl_mask) +
186 ((l2ga >> PAGE_SHIFT) & gfn_lvl_mask));
187 gflags = (gw->lxe[lvl].epte & EPTE_FLAG_MASK) |
188 (lvl == 3 ? NEPT_1G_ENTRY_FLAG: NEPT_2M_ENTRY_FLAG);
189 gw->lxe[0].epte = (gfn_x(start) << PAGE_SHIFT) | gflags;
190 goto done;
191 }
192 if ( lvl > 1 )
193 base_gfn = _gfn(gw->lxe[lvl].mfn);
194 }
195
196 /* If this is not a super entry, we can reach here. */
197 gflags = (gw->lxe[1].epte & EPTE_FLAG_MASK) | NEPT_4K_ENTRY_FLAG;
198 gw->lxe[0].epte = (gw->lxe[1].epte & PAGE_MASK) | gflags;
199
200 done:
201 ret = EPT_TRANSLATE_SUCCEED;
202 goto out;
203
204 map_err:
205 if ( rc == PFEC_page_paged )
206 {
207 ret = EPT_TRANSLATE_RETRY;
208 goto out;
209 }
210 /* fall through to misconfig error */
211 misconfig_err:
212 ret = EPT_TRANSLATE_MISCONFIG;
213 goto out;
214
215 non_present:
216 ret = EPT_TRANSLATE_VIOLATION;
217 /* fall through. */
218 out:
219 return ret;
220 }
221
222 /* Translate a L2 guest address to L1 gpa via L1 EPT paging structure */
223
nept_translate_l2ga(struct vcpu * v,paddr_t l2ga,unsigned int * page_order,uint32_t rwx_acc,unsigned long * l1gfn,uint8_t * p2m_acc,uint64_t * exit_qual,uint32_t * exit_reason)224 int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
225 unsigned int *page_order, uint32_t rwx_acc,
226 unsigned long *l1gfn, uint8_t *p2m_acc,
227 uint64_t *exit_qual, uint32_t *exit_reason)
228 {
229 uint32_t rc, rwx_bits = 0;
230 ept_walk_t gw;
231 rwx_acc &= EPTE_RWX_MASK;
232
233 *l1gfn = gfn_x(INVALID_GFN);
234
235 rc = nept_walk_tables(v, l2ga, &gw);
236 switch ( rc )
237 {
238 case EPT_TRANSLATE_SUCCEED:
239 if ( likely(gw.lxe[0].epte & NEPT_2M_ENTRY_FLAG) )
240 {
241 rwx_bits = gw.lxe[4].epte & gw.lxe[3].epte & gw.lxe[2].epte &
242 EPTE_RWX_MASK;
243 *page_order = 9;
244 }
245 else if ( gw.lxe[0].epte & NEPT_4K_ENTRY_FLAG )
246 {
247 rwx_bits = gw.lxe[4].epte & gw.lxe[3].epte & gw.lxe[2].epte &
248 gw.lxe[1].epte & EPTE_RWX_MASK;
249 *page_order = 0;
250 }
251 else if ( gw.lxe[0].epte & NEPT_1G_ENTRY_FLAG )
252 {
253 rwx_bits = gw.lxe[4].epte & gw.lxe[3].epte & EPTE_RWX_MASK;
254 *page_order = 18;
255 }
256 else
257 {
258 gdprintk(XENLOG_ERR, "Uncorrect l1 entry!\n");
259 BUG();
260 }
261 if ( nept_permission_check(rwx_acc, rwx_bits) )
262 {
263 *l1gfn = gw.lxe[0].mfn;
264 *p2m_acc = (uint8_t)rwx_bits;
265 break;
266 }
267 rc = EPT_TRANSLATE_VIOLATION;
268 /* Fall through to EPT violation if permission check fails. */
269 case EPT_TRANSLATE_VIOLATION:
270 *exit_qual = (*exit_qual & 0xffffffc0) | (rwx_bits << 3) | rwx_acc;
271 *exit_reason = EXIT_REASON_EPT_VIOLATION;
272 break;
273
274 case EPT_TRANSLATE_MISCONFIG:
275 rc = EPT_TRANSLATE_MISCONFIG;
276 *exit_qual = 0;
277 *exit_reason = EXIT_REASON_EPT_MISCONFIG;
278 break;
279 case EPT_TRANSLATE_RETRY:
280 break;
281 default:
282 gdprintk(XENLOG_ERR, "Unsupported ept translation type!:%d\n", rc);
283 BUG();
284 break;
285 }
286 return rc;
287 }
288