1 /******************************************************************************
2  * current.h
3  *
4  * Information structure that lives at the bottom of the per-cpu Xen stack.
5  */
6 
7 #ifndef __X86_CURRENT_H__
8 #define __X86_CURRENT_H__
9 
10 #include <xen/percpu.h>
11 #include <public/xen.h>
12 #include <asm/page.h>
13 
14 /*
15  * Xen's cpu stacks are 8 pages (8-page aligned), arranged as:
16  *
17  * 7 - Primary stack (with a struct cpu_info at the top)
18  * 6 - Primary stack
19  * 5 - Primay Shadow Stack (read-only)
20  * 4 - #DF IST stack
21  * 3 - #DB IST stack
22  * 2 - NMI IST stack
23  * 1 - #MC IST stack
24  * 0 - IST Shadow Stacks (4x 1k, read-only)
25  */
26 
27 /*
28  * Identify which stack page the stack pointer is on.  Returns an index
29  * as per the comment above.
30  */
get_stack_page(unsigned long sp)31 static inline unsigned int get_stack_page(unsigned long sp)
32 {
33     return (sp & (STACK_SIZE-1)) >> PAGE_SHIFT;
34 }
35 
36 struct vcpu;
37 
38 struct cpu_info {
39     struct cpu_user_regs guest_cpu_user_regs;
40     unsigned int processor_id;
41     unsigned int verw_sel;
42     struct vcpu *current_vcpu;
43     unsigned long per_cpu_offset;
44     unsigned long cr4;
45     /*
46      * Of the two following fields the latter is being set to the CR3 value
47      * to be used on the given pCPU for loading whenever 64-bit PV guest
48      * context is being entered. A value of zero indicates no setting of CR3
49      * is to be performed.
50      * The former is the value to restore when re-entering Xen, if any. IOW
51      * its value being zero means there's nothing to restore.
52      */
53     unsigned long xen_cr3;
54     unsigned long pv_cr3;
55 
56     /* See asm-x86/spec_ctrl_asm.h for usage. */
57     unsigned int shadow_spec_ctrl;
58     uint8_t      xen_spec_ctrl;
59     uint8_t      spec_ctrl_flags;
60 
61     /*
62      * The following field controls copying of the L4 page table of 64-bit
63      * PV guests to the per-cpu root page table on entering the guest context.
64      * If set the L4 page table is being copied to the root page table and
65      * the field will be reset.
66      */
67     bool         root_pgt_changed;
68 
69     /*
70      * use_pv_cr3 is set in case the value of pv_cr3 is to be written into
71      * CR3 when returning from an interrupt. The main use is when returning
72      * from a NMI or MCE to hypervisor code where pv_cr3 was active.
73      */
74     bool         use_pv_cr3;
75 
76     unsigned long __pad;
77     /* get_stack_bottom() must be 16-byte aligned */
78 };
79 
get_cpu_info_from_stack(unsigned long sp)80 static inline struct cpu_info *get_cpu_info_from_stack(unsigned long sp)
81 {
82     return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
83 }
84 
get_cpu_info(void)85 static inline struct cpu_info *get_cpu_info(void)
86 {
87 #ifdef __clang__
88     /* Clang complains that sp in the else case is not initialised. */
89     unsigned long sp;
90     asm ( "mov %%rsp, %0" : "=r" (sp) );
91 #else
92     register unsigned long sp asm("rsp");
93 #endif
94 
95     return get_cpu_info_from_stack(sp);
96 }
97 
98 #define get_current()         (get_cpu_info()->current_vcpu)
99 #define set_current(vcpu)     (get_cpu_info()->current_vcpu = (vcpu))
100 #define current               (get_current())
101 
102 #define get_processor_id()    (get_cpu_info()->processor_id)
103 #define guest_cpu_user_regs() (&get_cpu_info()->guest_cpu_user_regs)
104 
105 /*
106  * Get the bottom-of-stack, as stored in the per-CPU TSS. This actually points
107  * into the middle of cpu_info.guest_cpu_user_regs, at the section that
108  * precisely corresponds to a CPU trap frame.
109  */
110 #define get_stack_bottom()                      \
111     ((unsigned long)&get_cpu_info()->guest_cpu_user_regs.es)
112 
113 /*
114  * Get the reasonable stack bounds for stack traces and stack dumps.  Stack
115  * dumps have a slightly larger range to include exception frames in the
116  * printed information.  The returned word is inside the interesting range.
117  */
118 unsigned long get_stack_trace_bottom(unsigned long sp);
119 unsigned long get_stack_dump_bottom (unsigned long sp);
120 
121 #ifdef CONFIG_LIVEPATCH
122 # define CHECK_FOR_LIVEPATCH_WORK "call check_for_livepatch_work;"
123 #else
124 # define CHECK_FOR_LIVEPATCH_WORK ""
125 #endif
126 
127 #ifdef CONFIG_XEN_SHSTK
128 /*
129  * We need to unwind the primary shadow stack to its supervisor token, located
130  * in the last word of the primary shadow stack.
131  *
132  * Read the shadow stack pointer, subtract it from supervisor token position,
133  * and divide by 8 to get the number of slots needing popping.
134  *
135  * INCSSPQ can't pop more than 255 entries.  We shouldn't ever need to pop
136  * that many entries, and getting this wrong will cause us to #DF later.  Turn
137  * it into a BUG() now for fractionally easier debugging.
138  */
139 # define SHADOW_STACK_WORK                                      \
140     "mov $1, %[ssp];"                                           \
141     "rdsspd %[ssp];"                                            \
142     "cmp $1, %[ssp];"                                           \
143     "je .L_shstk_done.%=;" /* CET not active?  Skip. */         \
144     "mov $%c[skstk_base], %[val];"                              \
145     "and $%c[stack_mask], %[ssp];"                              \
146     "sub %[ssp], %[val];"                                       \
147     "shr $3, %[val];"                                           \
148     "cmp $255, %[val];" /* More than 255 entries?  Crash. */    \
149     UNLIKELY_START(a, shstk_adjust)                             \
150     _ASM_BUGFRAME_TEXT(0)                                       \
151     UNLIKELY_END_SECTION ";"                                    \
152     "incsspq %q[val];"                                          \
153     ".L_shstk_done.%=:"
154 #else
155 # define SHADOW_STACK_WORK ""
156 #endif
157 
158 #define switch_stack_and_jump(fn, instr, constr)                        \
159     ({                                                                  \
160         unsigned int tmp;                                               \
161         __asm__ __volatile__ (                                          \
162             SHADOW_STACK_WORK                                           \
163             "mov %[stk], %%rsp;"                                        \
164             CHECK_FOR_LIVEPATCH_WORK                                    \
165             instr "[fun]"                                               \
166             : [val] "=&r" (tmp),                                        \
167               [ssp] "=&r" (tmp)                                         \
168             : [stk] "r" (guest_cpu_user_regs()),                        \
169               [fun] constr (fn),                                        \
170               [skstk_base] "i"                                          \
171               ((PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8),               \
172               [stack_mask] "i" (STACK_SIZE - 1),                        \
173               _ASM_BUGFRAME_INFO(BUGFRAME_bug, __LINE__,                \
174                                  __FILE__, NULL)                        \
175             : "memory" );                                               \
176         unreachable();                                                  \
177     })
178 
179 #define reset_stack_and_jump(fn)                                        \
180     switch_stack_and_jump(fn, "jmp %c", "i")
181 
182 /* The constraint may only specify non-call-clobbered registers. */
183 #define reset_stack_and_jump_ind(fn)                                    \
184     switch_stack_and_jump(fn, "INDIRECT_JMP %", "b")
185 
186 /*
187  * Which VCPU's state is currently running on each CPU?
188  * This is not necesasrily the same as 'current' as a CPU may be
189  * executing a lazy state switch.
190  */
191 DECLARE_PER_CPU(struct vcpu *, curr_vcpu);
192 
193 #endif /* __X86_CURRENT_H__ */
194