1 /******************************************************************************
2  * arch-x86_32.h
3  *
4  * Guest OS interface to x86 Xen.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Copyright (c) 2004-2006, K A Fraser
25  */
26 
27 #ifndef _ASM_X86_XEN_INTERFACE_H
28 #define _ASM_X86_XEN_INTERFACE_H
29 
30 /*
31  * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
32  * in a struct in memory.
33  * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
34  * hypercall argument.
35  * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but
36  * they might not be on other architectures.
37  */
38 #ifdef __XEN__
39 #define __DEFINE_GUEST_HANDLE(name, type) \
40     typedef struct { type *p; } __guest_handle_ ## name
41 #else
42 #define __DEFINE_GUEST_HANDLE(name, type) \
43     typedef type * __guest_handle_ ## name
44 #endif
45 
46 #define DEFINE_GUEST_HANDLE_STRUCT(name) \
47 	__DEFINE_GUEST_HANDLE(name, struct name)
48 #define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
49 #define GUEST_HANDLE(name)        __guest_handle_ ## name
50 
51 #ifdef __XEN__
52 #if defined(__i386__)
53 #define set_xen_guest_handle(hnd, val)			\
54 	do {						\
55 		if (sizeof(hnd) == 8)			\
56 			*(uint64_t *)&(hnd) = 0;	\
57 		(hnd).p = val;				\
58 	} while (0)
59 #elif defined(__x86_64__)
60 #define set_xen_guest_handle(hnd, val)	do { (hnd).p = val; } while (0)
61 #endif
62 #else
63 #if defined(__i386__)
64 #define set_xen_guest_handle(hnd, val)			\
65 	do {						\
66 		if (sizeof(hnd) == 8)			\
67 			*(uint64_t *)&(hnd) = 0;	\
68 		(hnd) = val;				\
69 	} while (0)
70 #elif defined(__x86_64__)
71 #define set_xen_guest_handle(hnd, val)	do { (hnd) = val; } while (0)
72 #endif
73 #endif
74 
75 #ifndef __ASSEMBLY__
76 /* Explicitly size integers that represent pfns in the public interface
77  * with Xen so that on ARM we can have one ABI that works for 32 and 64
78  * bit guests. */
79 typedef unsigned long xen_pfn_t;
80 #define PRI_xen_pfn "lx"
81 typedef unsigned long xen_ulong_t;
82 #define PRI_xen_ulong "lx"
83 typedef long xen_long_t;
84 #define PRI_xen_long "lx"
85 
86 /* Guest handles for primitive C types. */
87 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
88 __DEFINE_GUEST_HANDLE(uint,  unsigned int);
89 DEFINE_GUEST_HANDLE(char);
90 DEFINE_GUEST_HANDLE(int);
91 DEFINE_GUEST_HANDLE(void);
92 DEFINE_GUEST_HANDLE(uint64_t);
93 DEFINE_GUEST_HANDLE(uint32_t);
94 DEFINE_GUEST_HANDLE(xen_pfn_t);
95 DEFINE_GUEST_HANDLE(xen_ulong_t);
96 #endif
97 
98 #ifndef HYPERVISOR_VIRT_START
99 #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
100 #endif
101 
102 #define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
103 #define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
104 #define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT)
105 
106 /* Maximum number of virtual CPUs in multi-processor guests. */
107 #define MAX_VIRT_CPUS 32
108 
109 /*
110  * SEGMENT DESCRIPTOR TABLES
111  */
112 /*
113  * A number of GDT entries are reserved by Xen. These are not situated at the
114  * start of the GDT because some stupid OSes export hard-coded selector values
115  * in their ABI. These hard-coded values are always near the start of the GDT,
116  * so Xen places itself out of the way, at the far end of the GDT.
117  *
118  * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op
119  */
120 #define FIRST_RESERVED_GDT_PAGE  14
121 #define FIRST_RESERVED_GDT_BYTE  (FIRST_RESERVED_GDT_PAGE * 4096)
122 #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
123 
124 /*
125  * Send an array of these to HYPERVISOR_set_trap_table().
126  * Terminate the array with a sentinel entry, with traps[].address==0.
127  * The privilege level specifies which modes may enter a trap via a software
128  * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
129  * privilege levels as follows:
130  *  Level == 0: No one may enter
131  *  Level == 1: Kernel may enter
132  *  Level == 2: Kernel may enter
133  *  Level == 3: Everyone may enter
134  */
135 #define TI_GET_DPL(_ti)		((_ti)->flags & 3)
136 #define TI_GET_IF(_ti)		((_ti)->flags & 4)
137 #define TI_SET_DPL(_ti, _dpl)	((_ti)->flags |= (_dpl))
138 #define TI_SET_IF(_ti, _if)	((_ti)->flags |= ((!!(_if))<<2))
139 
140 #ifndef __ASSEMBLY__
141 struct trap_info {
142     uint8_t       vector;  /* exception vector                              */
143     uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
144     uint16_t      cs;      /* code selector                                 */
145     unsigned long address; /* code offset                                   */
146 };
147 DEFINE_GUEST_HANDLE_STRUCT(trap_info);
148 
149 struct arch_shared_info {
150 	/*
151 	 * Number of valid entries in the p2m table(s) anchored at
152 	 * pfn_to_mfn_frame_list_list and/or p2m_vaddr.
153 	 */
154 	unsigned long max_pfn;
155 	/*
156 	 * Frame containing list of mfns containing list of mfns containing p2m.
157 	 * A value of 0 indicates it has not yet been set up, ~0 indicates it
158 	 * has been set to invalid e.g. due to the p2m being too large for the
159 	 * 3-level p2m tree. In this case the linear mapper p2m list anchored
160 	 * at p2m_vaddr is to be used.
161 	 */
162 	xen_pfn_t pfn_to_mfn_frame_list_list;
163 	unsigned long nmi_reason;
164 	/*
165 	 * Following three fields are valid if p2m_cr3 contains a value
166 	 * different from 0.
167 	 * p2m_cr3 is the root of the address space where p2m_vaddr is valid.
168 	 * p2m_cr3 is in the same format as a cr3 value in the vcpu register
169 	 * state and holds the folded machine frame number (via xen_pfn_to_cr3)
170 	 * of a L3 or L4 page table.
171 	 * p2m_vaddr holds the virtual address of the linear p2m list. All
172 	 * entries in the range [0...max_pfn[ are accessible via this pointer.
173 	 * p2m_generation will be incremented by the guest before and after each
174 	 * change of the mappings of the p2m list. p2m_generation starts at 0
175 	 * and a value with the least significant bit set indicates that a
176 	 * mapping update is in progress. This allows guest external software
177 	 * (e.g. in Dom0) to verify that read mappings are consistent and
178 	 * whether they have changed since the last check.
179 	 * Modifying a p2m element in the linear p2m list is allowed via an
180 	 * atomic write only.
181 	 */
182 	unsigned long p2m_cr3;		/* cr3 value of the p2m address space */
183 	unsigned long p2m_vaddr;	/* virtual address of the p2m list */
184 	unsigned long p2m_generation;	/* generation count of p2m mapping */
185 #ifdef CONFIG_X86_32
186 	uint32_t wc_sec_hi;
187 #endif
188 };
189 #endif	/* !__ASSEMBLY__ */
190 
191 #ifdef CONFIG_X86_32
192 #include <asm/xen/interface_32.h>
193 #else
194 #include <asm/xen/interface_64.h>
195 #endif
196 
197 #include <asm/pvclock-abi.h>
198 
199 #ifndef __ASSEMBLY__
200 /*
201  * The following is all CPU context. Note that the fpu_ctxt block is filled
202  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
203  *
204  * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise
205  * for HVM and PVH guests, not all information in this structure is updated:
206  *
207  * - For HVM guests, the structures read include: fpu_ctxt (if
208  * VGCT_I387_VALID is set), flags, user_regs, debugreg[*]
209  *
210  * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to
211  * set cr3. All other fields not used should be set to 0.
212  */
213 struct vcpu_guest_context {
214     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
215     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
216 #define VGCF_I387_VALID                (1<<0)
217 #define VGCF_IN_KERNEL                 (1<<2)
218 #define _VGCF_i387_valid               0
219 #define VGCF_i387_valid                (1<<_VGCF_i387_valid)
220 #define _VGCF_in_kernel                2
221 #define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
222 #define _VGCF_failsafe_disables_events 3
223 #define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
224 #define _VGCF_syscall_disables_events  4
225 #define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
226 #define _VGCF_online                   5
227 #define VGCF_online                    (1<<_VGCF_online)
228     unsigned long flags;                    /* VGCF_* flags                 */
229     struct cpu_user_regs user_regs;         /* User-level CPU registers     */
230     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
231     unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
232     unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
233     unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
234     /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
235     unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
236     unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
237 #ifdef __i386__
238     unsigned long event_callback_cs;        /* CS:EIP of event callback     */
239     unsigned long event_callback_eip;
240     unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
241     unsigned long failsafe_callback_eip;
242 #else
243     unsigned long event_callback_eip;
244     unsigned long failsafe_callback_eip;
245     unsigned long syscall_callback_eip;
246 #endif
247     unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
248 #ifdef __x86_64__
249     /* Segment base addresses. */
250     uint64_t      fs_base;
251     uint64_t      gs_base_kernel;
252     uint64_t      gs_base_user;
253 #endif
254 };
255 DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
256 
257 /* AMD PMU registers and structures */
258 struct xen_pmu_amd_ctxt {
259 	/*
260 	 * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
261 	 * For PV(H) guests these fields are RO.
262 	 */
263 	uint32_t counters;
264 	uint32_t ctrls;
265 
266 	/* Counter MSRs */
267 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
268 	uint64_t regs[];
269 #elif defined(__GNUC__)
270 	uint64_t regs[0];
271 #endif
272 };
273 
274 /* Intel PMU registers and structures */
275 struct xen_pmu_cntr_pair {
276 	uint64_t counter;
277 	uint64_t control;
278 };
279 
280 struct xen_pmu_intel_ctxt {
281 	/*
282 	 * Offsets to fixed and architectural counter MSRs (relative to
283 	 * xen_pmu_arch.c.intel).
284 	 * For PV(H) guests these fields are RO.
285 	 */
286 	uint32_t fixed_counters;
287 	uint32_t arch_counters;
288 
289 	/* PMU registers */
290 	uint64_t global_ctrl;
291 	uint64_t global_ovf_ctrl;
292 	uint64_t global_status;
293 	uint64_t fixed_ctrl;
294 	uint64_t ds_area;
295 	uint64_t pebs_enable;
296 	uint64_t debugctl;
297 
298 	/* Fixed and architectural counter MSRs */
299 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
300 	uint64_t regs[];
301 #elif defined(__GNUC__)
302 	uint64_t regs[0];
303 #endif
304 };
305 
306 /* Sampled domain's registers */
307 struct xen_pmu_regs {
308 	uint64_t ip;
309 	uint64_t sp;
310 	uint64_t flags;
311 	uint16_t cs;
312 	uint16_t ss;
313 	uint8_t cpl;
314 	uint8_t pad[3];
315 };
316 
317 /* PMU flags */
318 #define PMU_CACHED	   (1<<0) /* PMU MSRs are cached in the context */
319 #define PMU_SAMPLE_USER	   (1<<1) /* Sample is from user or kernel mode */
320 #define PMU_SAMPLE_REAL	   (1<<2) /* Sample is from realmode */
321 #define PMU_SAMPLE_PV	   (1<<3) /* Sample from a PV guest */
322 
323 /*
324  * Architecture-specific information describing state of the processor at
325  * the time of PMU interrupt.
326  * Fields of this structure marked as RW for guest should only be written by
327  * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
328  * hypervisor during PMU interrupt). Hypervisor will read updated data in
329  * XENPMU_flush hypercall and clear PMU_CACHED bit.
330  */
331 struct xen_pmu_arch {
332 	union {
333 		/*
334 		 * Processor's registers at the time of interrupt.
335 		 * WO for hypervisor, RO for guests.
336 		 */
337 		struct xen_pmu_regs regs;
338 		/*
339 		 * Padding for adding new registers to xen_pmu_regs in
340 		 * the future
341 		 */
342 #define XENPMU_REGS_PAD_SZ  64
343 		uint8_t pad[XENPMU_REGS_PAD_SZ];
344 	} r;
345 
346 	/* WO for hypervisor, RO for guest */
347 	uint64_t pmu_flags;
348 
349 	/*
350 	 * APIC LVTPC register.
351 	 * RW for both hypervisor and guest.
352 	 * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
353 	 * during XENPMU_flush or XENPMU_lvtpc_set.
354 	 */
355 	union {
356 		uint32_t lapic_lvtpc;
357 		uint64_t pad;
358 	} l;
359 
360 	/*
361 	 * Vendor-specific PMU registers.
362 	 * RW for both hypervisor and guest (see exceptions above).
363 	 * Guest's updates to this field are verified and then loaded by the
364 	 * hypervisor into hardware during XENPMU_flush
365 	 */
366 	union {
367 		struct xen_pmu_amd_ctxt amd;
368 		struct xen_pmu_intel_ctxt intel;
369 
370 		/*
371 		 * Padding for contexts (fixed parts only, does not include
372 		 * MSR banks that are specified by offsets)
373 		 */
374 #define XENPMU_CTXT_PAD_SZ  128
375 		uint8_t pad[XENPMU_CTXT_PAD_SZ];
376 	} c;
377 };
378 
379 #endif	/* !__ASSEMBLY__ */
380 
381 /*
382  * Prefix forces emulation of some non-trapping instructions.
383  * Currently only CPUID.
384  */
385 #include <asm/emulate_prefix.h>
386 
387 #define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
388 #define XEN_CPUID          XEN_EMULATE_PREFIX __ASM_FORM(cpuid)
389 
390 #endif /* _ASM_X86_XEN_INTERFACE_H */
391