1 /*
2  * nested_ept.c: Handling virtulized EPT for guest in nested case.
3  *
4  * Copyright (c) 2012, Intel Corporation
5  *  Xiantao Zhang <xiantao.zhang@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms and conditions of the GNU General Public License,
9  * version 2, as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program; If not, see <http://www.gnu.org/licenses/>.
18  */
19 #include <xen/vm_event.h>
20 #include <xen/event.h>
21 #include <public/vm_event.h>
22 #include <asm/domain.h>
23 #include <asm/page.h>
24 #include <asm/paging.h>
25 #include <asm/p2m.h>
26 #include <asm/mem_sharing.h>
27 #include <asm/hap.h>
28 #include <asm/hvm/support.h>
29 
30 #include <asm/hvm/nestedhvm.h>
31 
32 #include "private.h"
33 
34 #include <asm/hvm/vmx/vmx.h>
35 #include <asm/hvm/vmx/vvmx.h>
36 
37 /* Must reserved bits in all level entries  */
38 #define EPT_MUST_RSV_BITS (((1ull << PADDR_BITS) - 1) & \
39                            ~((1ull << paddr_bits) - 1))
40 
41 #define NEPT_CAP_BITS       \
42         (VMX_EPT_INVEPT_ALL_CONTEXT | VMX_EPT_INVEPT_SINGLE_CONTEXT | \
43          VMX_EPT_INVEPT_INSTRUCTION | VMX_EPT_SUPERPAGE_1GB |         \
44          VMX_EPT_SUPERPAGE_2MB | VMX_EPT_MEMORY_TYPE_WB |             \
45          VMX_EPT_MEMORY_TYPE_UC | VMX_EPT_WALK_LENGTH_4_SUPPORTED |   \
46          VMX_EPT_EXEC_ONLY_SUPPORTED)
47 
48 #define NVPID_CAP_BITS \
49         (VMX_VPID_INVVPID_INSTRUCTION | VMX_VPID_INVVPID_INDIVIDUAL_ADDR | \
50          VMX_VPID_INVVPID_SINGLE_CONTEXT | VMX_VPID_INVVPID_ALL_CONTEXT |  \
51          VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL)
52 
53 #define NEPT_1G_ENTRY_FLAG (1 << 11)
54 #define NEPT_2M_ENTRY_FLAG (1 << 10)
55 #define NEPT_4K_ENTRY_FLAG (1 << 9)
56 
nept_rsv_bits_check(ept_entry_t e,uint32_t level)57 static bool_t nept_rsv_bits_check(ept_entry_t e, uint32_t level)
58 {
59     uint64_t rsv_bits = EPT_MUST_RSV_BITS;
60 
61     switch ( level )
62     {
63     case 1:
64         break;
65     case 2 ... 3:
66         if ( e.sp )
67             rsv_bits |=  ((1ull << (9 * (level - 1))) - 1) << PAGE_SHIFT;
68         else
69             rsv_bits |= EPTE_EMT_MASK | EPTE_IGMT_MASK;
70         break;
71     case 4:
72         rsv_bits |= EPTE_EMT_MASK | EPTE_IGMT_MASK | EPTE_SUPER_PAGE_MASK;
73         break;
74     default:
75         gdprintk(XENLOG_ERR,"Unsupported EPT paging level: %d\n", level);
76         BUG();
77         break;
78     }
79     return !!(e.epte & rsv_bits);
80 }
81 
82 /* EMT checking*/
nept_emt_bits_check(ept_entry_t e,uint32_t level)83 static bool_t nept_emt_bits_check(ept_entry_t e, uint32_t level)
84 {
85     if ( e.sp || level == 1 )
86     {
87         if ( e.emt == EPT_EMT_RSV0 || e.emt == EPT_EMT_RSV1 ||
88              e.emt == EPT_EMT_RSV2 )
89             return 1;
90     }
91     return 0;
92 }
93 
nept_permission_check(uint32_t rwx_acc,uint32_t rwx_bits)94 static bool_t nept_permission_check(uint32_t rwx_acc, uint32_t rwx_bits)
95 {
96     return !(EPTE_RWX_MASK & rwx_acc & ~rwx_bits);
97 }
98 
99 /* nept's non-present check */
nept_non_present_check(ept_entry_t e)100 static bool_t nept_non_present_check(ept_entry_t e)
101 {
102     if ( e.epte & EPTE_RWX_MASK )
103         return 0;
104     return 1;
105 }
106 
nept_get_ept_vpid_cap(void)107 uint64_t nept_get_ept_vpid_cap(void)
108 {
109     uint64_t caps = 0;
110 
111     if ( cpu_has_vmx_ept )
112         caps |= NEPT_CAP_BITS;
113     if ( !cpu_has_vmx_ept_exec_only_supported )
114         caps &= ~VMX_EPT_EXEC_ONLY_SUPPORTED;
115     if ( cpu_has_vmx_vpid )
116         caps |= NVPID_CAP_BITS;
117 
118     return caps;
119 }
120 
nept_rwx_bits_check(ept_entry_t e)121 static bool_t nept_rwx_bits_check(ept_entry_t e)
122 {
123     /*write only or write/execute only*/
124     uint8_t rwx_bits = e.epte & EPTE_RWX_MASK;
125 
126     if ( rwx_bits == ept_access_w || rwx_bits == ept_access_wx )
127         return 1;
128 
129     if ( rwx_bits == ept_access_x &&
130          !(nept_get_ept_vpid_cap() & VMX_EPT_EXEC_ONLY_SUPPORTED) )
131         return 1;
132 
133     return 0;
134 }
135 
136 /* nept's misconfiguration check */
nept_misconfiguration_check(ept_entry_t e,uint32_t level)137 static bool_t nept_misconfiguration_check(ept_entry_t e, uint32_t level)
138 {
139     return nept_rsv_bits_check(e, level) ||
140            nept_emt_bits_check(e, level) ||
141            nept_rwx_bits_check(e);
142 }
143 
ept_lvl_table_offset(unsigned long gpa,int lvl)144 static int ept_lvl_table_offset(unsigned long gpa, int lvl)
145 {
146     return (gpa >> (EPT_L4_PAGETABLE_SHIFT -(4 - lvl) * 9)) &
147            (EPT_PAGETABLE_ENTRIES - 1);
148 }
149 
150 static uint32_t
nept_walk_tables(struct vcpu * v,unsigned long l2ga,ept_walk_t * gw)151 nept_walk_tables(struct vcpu *v, unsigned long l2ga, ept_walk_t *gw)
152 {
153     int lvl;
154     uint32_t rc = 0, ret = 0, gflags;
155     struct domain *d = v->domain;
156     struct p2m_domain *p2m = d->arch.p2m;
157     gfn_t base_gfn = _gfn(nhvm_vcpu_p2m_base(v) >> PAGE_SHIFT);
158     mfn_t lxmfn;
159     ept_entry_t *lxp = NULL;
160 
161     memset(gw, 0, sizeof(*gw));
162 
163     for (lvl = 4; lvl > 0; lvl--)
164     {
165         lxp = map_domain_gfn(p2m, base_gfn, &lxmfn, P2M_ALLOC, &rc);
166         if ( !lxp )
167             goto map_err;
168         gw->lxe[lvl] = lxp[ept_lvl_table_offset(l2ga, lvl)];
169         unmap_domain_page(lxp);
170         put_page(mfn_to_page(lxmfn));
171 
172         if ( nept_non_present_check(gw->lxe[lvl]) )
173             goto non_present;
174 
175         if ( nept_misconfiguration_check(gw->lxe[lvl], lvl) )
176             goto misconfig_err;
177 
178         if ( (lvl == 2 || lvl == 3) && gw->lxe[lvl].sp )
179         {
180             /* Generate a fake l1 table entry so callers don't all
181              * have to understand superpages. */
182             unsigned long gfn_lvl_mask =  (1ull << ((lvl - 1) * 9)) - 1;
183             gfn_t start = _gfn(gw->lxe[lvl].mfn);
184             /* Increment the pfn by the right number of 4k pages. */
185             start = _gfn((gfn_x(start) & ~gfn_lvl_mask) +
186                      ((l2ga >> PAGE_SHIFT) & gfn_lvl_mask));
187             gflags = (gw->lxe[lvl].epte & EPTE_FLAG_MASK) |
188                      (lvl == 3 ? NEPT_1G_ENTRY_FLAG: NEPT_2M_ENTRY_FLAG);
189             gw->lxe[0].epte = (gfn_x(start) << PAGE_SHIFT) | gflags;
190             goto done;
191         }
192         if ( lvl > 1 )
193             base_gfn = _gfn(gw->lxe[lvl].mfn);
194     }
195 
196     /* If this is not a super entry, we can reach here. */
197     gflags = (gw->lxe[1].epte & EPTE_FLAG_MASK) | NEPT_4K_ENTRY_FLAG;
198     gw->lxe[0].epte = (gw->lxe[1].epte & PAGE_MASK) | gflags;
199 
200 done:
201     ret = EPT_TRANSLATE_SUCCEED;
202     goto out;
203 
204 map_err:
205     if ( rc == PFEC_page_paged )
206     {
207         ret = EPT_TRANSLATE_RETRY;
208         goto out;
209     }
210     /* fall through to misconfig error */
211 misconfig_err:
212     ret =  EPT_TRANSLATE_MISCONFIG;
213     goto out;
214 
215 non_present:
216     ret = EPT_TRANSLATE_VIOLATION;
217     /* fall through. */
218 out:
219     return ret;
220 }
221 
222 /* Translate a L2 guest address to L1 gpa via L1 EPT paging structure */
223 
nept_translate_l2ga(struct vcpu * v,paddr_t l2ga,unsigned int * page_order,uint32_t rwx_acc,unsigned long * l1gfn,uint8_t * p2m_acc,uint64_t * exit_qual,uint32_t * exit_reason)224 int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
225                         unsigned int *page_order, uint32_t rwx_acc,
226                         unsigned long *l1gfn, uint8_t *p2m_acc,
227                         uint64_t *exit_qual, uint32_t *exit_reason)
228 {
229     uint32_t rc, rwx_bits = 0;
230     ept_walk_t gw;
231     rwx_acc &= EPTE_RWX_MASK;
232 
233     *l1gfn = gfn_x(INVALID_GFN);
234 
235     rc = nept_walk_tables(v, l2ga, &gw);
236     switch ( rc )
237     {
238     case EPT_TRANSLATE_SUCCEED:
239         if ( likely(gw.lxe[0].epte & NEPT_2M_ENTRY_FLAG) )
240         {
241             rwx_bits = gw.lxe[4].epte & gw.lxe[3].epte & gw.lxe[2].epte &
242                        EPTE_RWX_MASK;
243             *page_order = 9;
244         }
245         else if ( gw.lxe[0].epte & NEPT_4K_ENTRY_FLAG )
246         {
247             rwx_bits = gw.lxe[4].epte & gw.lxe[3].epte & gw.lxe[2].epte &
248                        gw.lxe[1].epte & EPTE_RWX_MASK;
249             *page_order = 0;
250         }
251         else if ( gw.lxe[0].epte & NEPT_1G_ENTRY_FLAG  )
252         {
253             rwx_bits = gw.lxe[4].epte & gw.lxe[3].epte  & EPTE_RWX_MASK;
254             *page_order = 18;
255         }
256         else
257         {
258             gdprintk(XENLOG_ERR, "Uncorrect l1 entry!\n");
259             BUG();
260         }
261         if ( nept_permission_check(rwx_acc, rwx_bits) )
262         {
263             *l1gfn = gw.lxe[0].mfn;
264             *p2m_acc = (uint8_t)rwx_bits;
265             break;
266         }
267         rc = EPT_TRANSLATE_VIOLATION;
268     /* Fall through to EPT violation if permission check fails. */
269     case EPT_TRANSLATE_VIOLATION:
270         *exit_qual = (*exit_qual & 0xffffffc0) | (rwx_bits << 3) | rwx_acc;
271         *exit_reason = EXIT_REASON_EPT_VIOLATION;
272         break;
273 
274     case EPT_TRANSLATE_MISCONFIG:
275         rc = EPT_TRANSLATE_MISCONFIG;
276         *exit_qual = 0;
277         *exit_reason = EXIT_REASON_EPT_MISCONFIG;
278         break;
279     case EPT_TRANSLATE_RETRY:
280         break;
281     default:
282         gdprintk(XENLOG_ERR, "Unsupported ept translation type!:%d\n", rc);
283         BUG();
284         break;
285     }
286     return rc;
287 }
288