1 /*
2  *  arch/x86/xstate.c
3  *
4  *  x86 extended state operations
5  *
6  */
7 
8 #include <xen/param.h>
9 #include <xen/percpu.h>
10 #include <xen/sched.h>
11 #include <asm/current.h>
12 #include <asm/processor.h>
13 #include <asm/hvm/support.h>
14 #include <asm/i387.h>
15 #include <asm/xstate.h>
16 #include <asm/asm_defns.h>
17 
18 /*
19  * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all
20  * the supported and enabled features on the processor, including the
21  * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known.
22  */
23 static u32 __read_mostly xsave_cntxt_size;
24 
25 /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
26 u64 __read_mostly xfeature_mask;
27 
28 unsigned int *__read_mostly xstate_offsets;
29 unsigned int *__read_mostly xstate_sizes;
30 u64 __read_mostly xstate_align;
31 static unsigned int __read_mostly xstate_features;
32 
33 uint32_t __read_mostly mxcsr_mask = 0x0000ffbf;
34 
35 /* Cached xcr0 for fast read */
36 static DEFINE_PER_CPU(uint64_t, xcr0);
37 
38 /* Because XCR0 is cached for each CPU, xsetbv() is not exposed. Users should
39  * use set_xcr0() instead.
40  */
xsetbv(u32 index,u64 xfeatures)41 static inline bool xsetbv(u32 index, u64 xfeatures)
42 {
43     u32 hi = xfeatures >> 32;
44     u32 lo = (u32)xfeatures;
45 
46     asm volatile ( "1: .byte 0x0f,0x01,0xd1\n"
47                    "3:                     \n"
48                    ".section .fixup,\"ax\" \n"
49                    "2: xor %0,%0           \n"
50                    "   jmp 3b              \n"
51                    ".previous              \n"
52                    _ASM_EXTABLE(1b, 2b)
53                    : "+a" (lo)
54                    : "c" (index), "d" (hi));
55     return lo != 0;
56 }
57 
set_xcr0(u64 xfeatures)58 bool set_xcr0(u64 xfeatures)
59 {
60     if ( !xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures) )
61         return false;
62     this_cpu(xcr0) = xfeatures;
63     return true;
64 }
65 
get_xcr0(void)66 uint64_t get_xcr0(void)
67 {
68     return this_cpu(xcr0);
69 }
70 
71 /* Cached xss for fast read */
72 static DEFINE_PER_CPU(uint64_t, xss);
73 
set_msr_xss(u64 xss)74 void set_msr_xss(u64 xss)
75 {
76     u64 *this_xss = &this_cpu(xss);
77 
78     if ( *this_xss != xss )
79     {
80         wrmsrl(MSR_IA32_XSS, xss);
81         *this_xss = xss;
82     }
83 }
84 
get_msr_xss(void)85 uint64_t get_msr_xss(void)
86 {
87     return this_cpu(xss);
88 }
89 
setup_xstate_features(bool bsp)90 static int setup_xstate_features(bool bsp)
91 {
92     unsigned int leaf, eax, ebx, ecx, edx;
93 
94     if ( bsp )
95     {
96         xstate_features = flsl(xfeature_mask);
97         xstate_offsets = xzalloc_array(unsigned int, xstate_features);
98         if ( !xstate_offsets )
99             return -ENOMEM;
100 
101         xstate_sizes = xzalloc_array(unsigned int, xstate_features);
102         if ( !xstate_sizes )
103             return -ENOMEM;
104     }
105 
106     for ( leaf = 2; leaf < xstate_features; leaf++ )
107     {
108         if ( bsp )
109         {
110             cpuid_count(XSTATE_CPUID, leaf, &xstate_sizes[leaf],
111                         &xstate_offsets[leaf], &ecx, &edx);
112             if ( ecx & XSTATE_ALIGN64 )
113                 __set_bit(leaf, &xstate_align);
114         }
115         else
116         {
117             cpuid_count(XSTATE_CPUID, leaf, &eax,
118                         &ebx, &ecx, &edx);
119             BUG_ON(eax != xstate_sizes[leaf]);
120             BUG_ON(ebx != xstate_offsets[leaf]);
121             BUG_ON(!(ecx & XSTATE_ALIGN64) != !test_bit(leaf, &xstate_align));
122         }
123     }
124 
125     return 0;
126 }
127 
setup_xstate_comp(uint16_t * comp_offsets,const uint64_t xcomp_bv)128 static void setup_xstate_comp(uint16_t *comp_offsets,
129                               const uint64_t xcomp_bv)
130 {
131     unsigned int i;
132     unsigned int offset;
133 
134     /*
135      * The FP xstates and SSE xstates are legacy states. They are always
136      * in the fixed offsets in the xsave area in either compacted form
137      * or standard form.
138      */
139     comp_offsets[0] = 0;
140     comp_offsets[1] = XSAVE_SSE_OFFSET;
141 
142     comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
143 
144     offset = comp_offsets[2];
145     for ( i = 2; i < xstate_features; i++ )
146     {
147         if ( (1ul << i) & xcomp_bv )
148         {
149             if ( test_bit(i, &xstate_align) )
150                 offset = ROUNDUP(offset, 64);
151             comp_offsets[i] = offset;
152             offset += xstate_sizes[i];
153         }
154     }
155     ASSERT(offset <= xsave_cntxt_size);
156 }
157 
158 /*
159  * Serialise a vcpus xsave state into a representation suitable for the
160  * toolstack.
161  *
162  * Internally a vcpus xsave state may be compressed or uncompressed, depending
163  * on the features in use, but the ABI with the toolstack is strictly
164  * uncompressed.
165  *
166  * It is the callers responsibility to ensure that there is xsave state to
167  * serialise, and that the provided buffer is exactly the right size.
168  */
expand_xsave_states(struct vcpu * v,void * dest,unsigned int size)169 void expand_xsave_states(struct vcpu *v, void *dest, unsigned int size)
170 {
171     const struct xsave_struct *xsave = v->arch.xsave_area;
172     const void *src;
173     uint16_t comp_offsets[sizeof(xfeature_mask)*8];
174     u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
175     u64 valid;
176 
177     /* Check there is state to serialise (i.e. at least an XSAVE_HDR) */
178     BUG_ON(!v->arch.xcr0_accum);
179     /* Check there is the correct room to decompress into. */
180     BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum));
181 
182     if ( !(xsave->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED) )
183     {
184         memcpy(dest, xsave, size);
185         return;
186     }
187 
188     ASSERT(xsave_area_compressed(xsave));
189     setup_xstate_comp(comp_offsets, xsave->xsave_hdr.xcomp_bv);
190 
191     /*
192      * Copy legacy XSAVE area and XSAVE hdr area.
193      */
194     memcpy(dest, xsave, XSTATE_AREA_MIN_SIZE);
195     memset(dest + XSTATE_AREA_MIN_SIZE, 0, size - XSTATE_AREA_MIN_SIZE);
196 
197     ((struct xsave_struct *)dest)->xsave_hdr.xcomp_bv =  0;
198 
199     /*
200      * Copy each region from the possibly compacted offset to the
201      * non-compacted offset.
202      */
203     src = xsave;
204     valid = xstate_bv & ~XSTATE_FP_SSE;
205     while ( valid )
206     {
207         u64 feature = valid & -valid;
208         unsigned int index = fls(feature) - 1;
209 
210         /*
211          * We previously verified xstate_bv.  If there isn't valid
212          * comp_offsets[] information, something is very broken.
213          */
214         BUG_ON(!comp_offsets[index]);
215         BUG_ON((xstate_offsets[index] + xstate_sizes[index]) > size);
216 
217         memcpy(dest + xstate_offsets[index], src + comp_offsets[index],
218                xstate_sizes[index]);
219 
220         valid &= ~feature;
221     }
222 }
223 
224 /*
225  * Deserialise a toolstack's xsave state representation suitably for a vcpu.
226  *
227  * Internally a vcpus xsave state may be compressed or uncompressed, depending
228  * on the features in use, but the ABI with the toolstack is strictly
229  * uncompressed.
230  *
231  * It is the callers responsibility to ensure that the source buffer contains
232  * xsave state, is uncompressed, and is exactly the right size.
233  */
compress_xsave_states(struct vcpu * v,const void * src,unsigned int size)234 void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size)
235 {
236     struct xsave_struct *xsave = v->arch.xsave_area;
237     void *dest;
238     uint16_t comp_offsets[sizeof(xfeature_mask)*8];
239     u64 xstate_bv, valid;
240 
241     BUG_ON(!v->arch.xcr0_accum);
242     BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum));
243     ASSERT(!xsave_area_compressed(src));
244 
245     xstate_bv = ((const struct xsave_struct *)src)->xsave_hdr.xstate_bv;
246 
247     if ( !(v->arch.xcr0_accum & XSTATE_XSAVES_ONLY) )
248     {
249         memcpy(xsave, src, size);
250         return;
251     }
252 
253     /*
254      * Copy legacy XSAVE area, to avoid complications with CPUID
255      * leaves 0 and 1 in the loop below.
256      */
257     memcpy(xsave, src, FXSAVE_SIZE);
258 
259     /* Set XSTATE_BV and XCOMP_BV.  */
260     xsave->xsave_hdr.xstate_bv = xstate_bv;
261     xsave->xsave_hdr.xcomp_bv = v->arch.xcr0_accum | XSTATE_COMPACTION_ENABLED;
262 
263     setup_xstate_comp(comp_offsets, xsave->xsave_hdr.xcomp_bv);
264 
265     /*
266      * Copy each region from the non-compacted offset to the
267      * possibly compacted offset.
268      */
269     dest = xsave;
270     valid = xstate_bv & ~XSTATE_FP_SSE;
271     while ( valid )
272     {
273         u64 feature = valid & -valid;
274         unsigned int index = fls(feature) - 1;
275 
276         /*
277          * We previously verified xstate_bv.  If we don't have valid
278          * comp_offset[] information, something is very broken.
279          */
280         BUG_ON(!comp_offsets[index]);
281         BUG_ON((xstate_offsets[index] + xstate_sizes[index]) > size);
282 
283         memcpy(dest + comp_offsets[index], src + xstate_offsets[index],
284                xstate_sizes[index]);
285 
286         valid &= ~feature;
287     }
288 }
289 
xsave(struct vcpu * v,uint64_t mask)290 void xsave(struct vcpu *v, uint64_t mask)
291 {
292     struct xsave_struct *ptr = v->arch.xsave_area;
293     uint32_t hmask = mask >> 32;
294     uint32_t lmask = mask;
295     unsigned int fip_width = v->domain->arch.x87_fip_width;
296 #define XSAVE(pfx) \
297         if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
298             asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
299                            : "=m" (*ptr) \
300                            : "a" (lmask), "d" (hmask), "D" (ptr) ); \
301         else \
302             alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
303                            ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
304                            X86_FEATURE_XSAVEOPT, \
305                            "=m" (*ptr), \
306                            "a" (lmask), "d" (hmask), "D" (ptr))
307 
308     if ( fip_width == 8 || !(mask & X86_XCR0_FP) )
309     {
310         XSAVE("0x48,");
311     }
312     else if ( fip_width == 4 )
313     {
314         XSAVE("");
315     }
316     else
317     {
318         /*
319          * FIP/FDP may not be written in some cases (e.g., if XSAVEOPT/XSAVES
320          * is used, or on AMD CPUs if an exception isn't pending).
321          *
322          * To tell if the hardware writes these fields, poison the FIP field.
323          * The poison is
324          * a) non-canonical
325          * b) non-zero for the reserved part of a 32-bit FCS:FIP
326          * c) random with a vanishingly small probability to match a value the
327          *    hardware may write (1e-19) even if it did not canonicalize the
328          *    64-bit FIP or zero-extend the 16-bit FCS.
329          */
330         uint64_t orig_fip = ptr->fpu_sse.fip.addr;
331         const uint64_t bad_fip = 0x6a3f5c4b13a533f6;
332 
333         ptr->fpu_sse.fip.addr = bad_fip;
334 
335         XSAVE("0x48,");
336 
337         /* FIP/FDP not updated? Restore the old FIP value. */
338         if ( ptr->fpu_sse.fip.addr == bad_fip )
339         {
340             ptr->fpu_sse.fip.addr = orig_fip;
341             return;
342         }
343 
344         /*
345          * If the FIP/FDP[63:32] are both zero, it is safe to use the
346          * 32-bit restore to also restore the selectors.
347          */
348         if ( !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) )
349         {
350             struct ix87_env fpu_env;
351 
352             asm volatile ( "fnstenv %0" : "=m" (fpu_env) );
353             ptr->fpu_sse.fip.sel = fpu_env.fcs;
354             ptr->fpu_sse.fdp.sel = fpu_env.fds;
355             fip_width = 4;
356         }
357         else
358             fip_width = 8;
359     }
360 #undef XSAVE
361     if ( mask & X86_XCR0_FP )
362         ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = fip_width;
363 }
364 
xrstor(struct vcpu * v,uint64_t mask)365 void xrstor(struct vcpu *v, uint64_t mask)
366 {
367     uint32_t hmask = mask >> 32;
368     uint32_t lmask = mask;
369     struct xsave_struct *ptr = v->arch.xsave_area;
370     unsigned int faults, prev_faults;
371 
372     /*
373      * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
374      * is pending. Clear the x87 state here by setting it to fixed
375      * values. The hypervisor data segment can be sometimes 0 and
376      * sometimes new user value. Both should be ok. Use the FPU saved
377      * data block as a safe address because it should be in L1.
378      */
379     if ( cpu_bug_fpu_ptrs &&
380          !(ptr->fpu_sse.fsw & ~ptr->fpu_sse.fcw & 0x003f) )
381         asm volatile ( "fnclex\n\t"        /* clear exceptions */
382                        "ffree %%st(7)\n\t" /* clear stack tag */
383                        "fildl %0"          /* load to clear state */
384                        : : "m" (ptr->fpu_sse) );
385 
386     /*
387      * XRSTOR can fault if passed a corrupted data block. We handle this
388      * possibility, which may occur if the block was passed to us by control
389      * tools or through VCPUOP_initialise, by silently adjusting state.
390      */
391     for ( prev_faults = faults = 0; ; prev_faults = faults )
392     {
393         switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
394         {
395             BUILD_BUG_ON(sizeof(faults) != 4); /* Clang doesn't support %z in asm. */
396 #define _xrstor(insn) \
397         asm volatile ( "1: .byte " insn "\n" \
398                        "3:\n" \
399                        "   .section .fixup,\"ax\"\n" \
400                        "2: incl %[faults]\n" \
401                        "   jmp 3b\n" \
402                        "   .previous\n" \
403                        _ASM_EXTABLE(1b, 2b) \
404                        : [mem] "+m" (*ptr), [faults] "+g" (faults) \
405                        : [lmask] "a" (lmask), [hmask] "d" (hmask), \
406                          [ptr] "D" (ptr) )
407 
408 #define XRSTOR(pfx) \
409         if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
410         { \
411             if ( unlikely(!(ptr->xsave_hdr.xcomp_bv & \
412                             XSTATE_COMPACTION_ENABLED)) ) \
413             { \
414                 ASSERT(!ptr->xsave_hdr.xcomp_bv); \
415                 ptr->xsave_hdr.xcomp_bv = ptr->xsave_hdr.xstate_bv | \
416                                           XSTATE_COMPACTION_ENABLED; \
417             } \
418             _xrstor(pfx "0x0f,0xc7,0x1f"); /* xrstors */ \
419         } \
420         else \
421             _xrstor(pfx "0x0f,0xae,0x2f") /* xrstor */
422 
423         default:
424             XRSTOR("0x48,");
425             break;
426         case 4: case 2:
427             XRSTOR("");
428             break;
429 #undef XRSTOR
430 #undef _xrstor
431         }
432         if ( likely(faults == prev_faults) )
433             break;
434 #ifndef NDEBUG
435         gprintk(XENLOG_WARNING, "fault#%u: mxcsr=%08x\n",
436                 faults, ptr->fpu_sse.mxcsr);
437         gprintk(XENLOG_WARNING, "xs=%016lx xc=%016lx\n",
438                 ptr->xsave_hdr.xstate_bv, ptr->xsave_hdr.xcomp_bv);
439         gprintk(XENLOG_WARNING, "r0=%016lx r1=%016lx\n",
440                 ptr->xsave_hdr.reserved[0], ptr->xsave_hdr.reserved[1]);
441         gprintk(XENLOG_WARNING, "r2=%016lx r3=%016lx\n",
442                 ptr->xsave_hdr.reserved[2], ptr->xsave_hdr.reserved[3]);
443         gprintk(XENLOG_WARNING, "r4=%016lx r5=%016lx\n",
444                 ptr->xsave_hdr.reserved[4], ptr->xsave_hdr.reserved[5]);
445 #endif
446         switch ( faults )
447         {
448         case 1: /* Stage 1: Reset state to be loaded. */
449             ptr->xsave_hdr.xstate_bv &= ~mask;
450             /*
451              * Also try to eliminate fault reasons, even if this shouldn't be
452              * needed here (other code should ensure the sanity of the data).
453              */
454             if ( ((mask & X86_XCR0_SSE) ||
455                   ((mask & X86_XCR0_YMM) &&
456                    !(ptr->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED))) )
457                 ptr->fpu_sse.mxcsr &= mxcsr_mask;
458             if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )
459             {
460                 ptr->xsave_hdr.xcomp_bv &= this_cpu(xcr0) | this_cpu(xss);
461                 ptr->xsave_hdr.xstate_bv &= ptr->xsave_hdr.xcomp_bv;
462                 ptr->xsave_hdr.xcomp_bv |= XSTATE_COMPACTION_ENABLED;
463             }
464             else
465             {
466                 ptr->xsave_hdr.xstate_bv &= this_cpu(xcr0);
467                 ptr->xsave_hdr.xcomp_bv = 0;
468             }
469             memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
470             continue;
471 
472         case 2: /* Stage 2: Reset all state. */
473             ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
474             ptr->xsave_hdr.xstate_bv = 0;
475             ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
476                                       ? XSTATE_COMPACTION_ENABLED : 0;
477             continue;
478         }
479 
480         domain_crash(current->domain);
481         return;
482     }
483 }
484 
xsave_enabled(const struct vcpu * v)485 bool xsave_enabled(const struct vcpu *v)
486 {
487     if ( !cpu_has_xsave )
488         return false;
489 
490     ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
491     ASSERT(v->arch.xsave_area);
492 
493     return !!v->arch.xcr0_accum;
494 }
495 
xstate_alloc_save_area(struct vcpu * v)496 int xstate_alloc_save_area(struct vcpu *v)
497 {
498     struct xsave_struct *save_area;
499     unsigned int size;
500 
501     if ( !cpu_has_xsave )
502         return 0;
503 
504     if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
505     {
506         size = xsave_cntxt_size;
507         BUG_ON(size < XSTATE_AREA_MIN_SIZE);
508     }
509     else
510     {
511         /*
512          * For idle vcpus on XSAVEC-capable CPUs allocate an area large
513          * enough to save any individual extended state.
514          */
515         unsigned int i;
516 
517         for ( size = 0, i = 2; i < xstate_features; ++i )
518             if ( size < xstate_sizes[i] )
519                 size = xstate_sizes[i];
520         size += XSTATE_AREA_MIN_SIZE;
521     }
522 
523     /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
524     BUILD_BUG_ON(__alignof(*save_area) < 64);
525     save_area = _xzalloc(size, __alignof(*save_area));
526     if ( save_area == NULL )
527         return -ENOMEM;
528 
529     /*
530      * Set the memory image to default values, but don't force the context
531      * to be loaded from memory (i.e. keep save_area->xsave_hdr.xstate_bv
532      * clear).
533      */
534     save_area->fpu_sse.fcw = FCW_DEFAULT;
535     save_area->fpu_sse.mxcsr = MXCSR_DEFAULT;
536 
537     v->arch.xsave_area = save_area;
538     v->arch.xcr0 = 0;
539     v->arch.xcr0_accum = 0;
540 
541     return 0;
542 }
543 
xstate_free_save_area(struct vcpu * v)544 void xstate_free_save_area(struct vcpu *v)
545 {
546     xfree(v->arch.xsave_area);
547     v->arch.xsave_area = NULL;
548 }
549 
_xstate_ctxt_size(u64 xcr0)550 static unsigned int _xstate_ctxt_size(u64 xcr0)
551 {
552     u64 act_xcr0 = get_xcr0();
553     u32 eax, ebx = 0, ecx, edx;
554     bool ok = set_xcr0(xcr0);
555 
556     ASSERT(ok);
557     cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
558     ASSERT(ebx <= ecx);
559     ok = set_xcr0(act_xcr0);
560     ASSERT(ok);
561 
562     return ebx;
563 }
564 
565 /* Fastpath for common xstate size requests, avoiding reloads of xcr0. */
xstate_ctxt_size(u64 xcr0)566 unsigned int xstate_ctxt_size(u64 xcr0)
567 {
568     if ( xcr0 == xfeature_mask )
569         return xsave_cntxt_size;
570 
571     if ( xcr0 == 0 )
572         return 0;
573 
574     return _xstate_ctxt_size(xcr0);
575 }
576 
577 /* Collect the information of processor's extended state */
xstate_init(struct cpuinfo_x86 * c)578 void xstate_init(struct cpuinfo_x86 *c)
579 {
580     /*
581      * NB: use_xsave cannot live in initdata because llvm might optimize
582      * reading it, see: https://bugs.llvm.org/show_bug.cgi?id=39707
583      */
584     static bool __read_mostly use_xsave = true;
585     boolean_param("xsave", use_xsave);
586 
587     bool bsp = c == &boot_cpu_data;
588     u32 eax, ebx, ecx, edx;
589     u64 feature_mask;
590 
591     if ( bsp )
592     {
593         static typeof(current->arch.xsave_area->fpu_sse) __initdata ctxt;
594 
595         asm ( "fxsave %0" : "=m" (ctxt) );
596         if ( ctxt.mxcsr_mask )
597             mxcsr_mask = ctxt.mxcsr_mask;
598     }
599 
600     if ( !cpu_has_xsave )
601         return;
602 
603     if ( (bsp && !use_xsave) ||
604          boot_cpu_data.cpuid_level < XSTATE_CPUID )
605     {
606         BUG_ON(!bsp);
607         setup_clear_cpu_cap(X86_FEATURE_XSAVE);
608         return;
609     }
610 
611     cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
612 
613     BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE);
614     BUG_ON((eax & X86_XCR0_YMM) && !(eax & X86_XCR0_SSE));
615     feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK;
616 
617     /*
618      * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size.
619      */
620     set_in_cr4(X86_CR4_OSXSAVE);
621     if ( !set_xcr0(feature_mask) )
622         BUG();
623 
624     if ( bsp )
625     {
626         xfeature_mask = feature_mask;
627         /*
628          * xsave_cntxt_size is the max size required by enabled features.
629          * We know FP/SSE and YMM about eax, and nothing about edx at present.
630          */
631         xsave_cntxt_size = _xstate_ctxt_size(feature_mask);
632         printk("xstate: size: %#x and states: %#"PRIx64"\n",
633                xsave_cntxt_size, xfeature_mask);
634     }
635     else
636     {
637         BUG_ON(xfeature_mask != feature_mask);
638         BUG_ON(xsave_cntxt_size != _xstate_ctxt_size(feature_mask));
639     }
640 
641     if ( setup_xstate_features(bsp) && bsp )
642         BUG();
643 }
644 
valid_xcr0(u64 xcr0)645 static bool valid_xcr0(u64 xcr0)
646 {
647     /* FP must be unconditionally set. */
648     if ( !(xcr0 & X86_XCR0_FP) )
649         return false;
650 
651     /* YMM depends on SSE. */
652     if ( (xcr0 & X86_XCR0_YMM) && !(xcr0 & X86_XCR0_SSE) )
653         return false;
654 
655     if ( xcr0 & (X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM) )
656     {
657         /* OPMASK, ZMM, and HI_ZMM require YMM. */
658         if ( !(xcr0 & X86_XCR0_YMM) )
659             return false;
660 
661         /* OPMASK, ZMM, and HI_ZMM must be the same. */
662         if ( ~xcr0 & (X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM) )
663             return false;
664     }
665 
666     /* BNDREGS and BNDCSR must be the same. */
667     return !(xcr0 & X86_XCR0_BNDREGS) == !(xcr0 & X86_XCR0_BNDCSR);
668 }
669 
validate_xstate(const struct domain * d,uint64_t xcr0,uint64_t xcr0_accum,const struct xsave_hdr * hdr)670 int validate_xstate(const struct domain *d, uint64_t xcr0, uint64_t xcr0_accum,
671                     const struct xsave_hdr *hdr)
672 {
673     uint64_t xcr0_max = cpuid_policy_xcr0_max(d->arch.cpuid);
674     unsigned int i;
675 
676     if ( (hdr->xstate_bv & ~xcr0_accum) ||
677          (xcr0 & ~xcr0_accum) ||
678          (xcr0_accum & ~xcr0_max) ||
679          !valid_xcr0(xcr0) ||
680          !valid_xcr0(xcr0_accum) )
681         return -EINVAL;
682 
683     if ( (xcr0_accum & ~xfeature_mask) ||
684          hdr->xcomp_bv )
685         return -EOPNOTSUPP;
686 
687     for ( i = 0; i < ARRAY_SIZE(hdr->reserved); ++i )
688         if ( hdr->reserved[i] )
689             return -EIO;
690 
691     return 0;
692 }
693 
handle_xsetbv(u32 index,u64 new_bv)694 int handle_xsetbv(u32 index, u64 new_bv)
695 {
696     struct vcpu *curr = current;
697     uint64_t xcr0_max = cpuid_policy_xcr0_max(curr->domain->arch.cpuid);
698     u64 mask;
699 
700     if ( index != XCR_XFEATURE_ENABLED_MASK )
701         return -EOPNOTSUPP;
702 
703     /*
704      * The CPUID logic shouldn't be able to hand out an XCR0 exceeding Xen's
705      * maximum features, but keep the check for robustness.
706      */
707     if ( unlikely(xcr0_max & ~xfeature_mask) )
708     {
709         gprintk(XENLOG_ERR,
710                 "xcr0_max %016" PRIx64 " exceeds hardware max %016" PRIx64 "\n",
711                 xcr0_max, xfeature_mask);
712         domain_crash(curr->domain);
713 
714         return -EINVAL;
715     }
716 
717     if ( (new_bv & ~xcr0_max) || !valid_xcr0(new_bv) )
718         return -EINVAL;
719 
720     /* By this point, new_bv really should be accepted by hardware. */
721     if ( unlikely(!set_xcr0(new_bv)) )
722     {
723         gprintk(XENLOG_ERR, "new_bv %016" PRIx64 " rejected by hardware\n",
724                 new_bv);
725         domain_crash(curr->domain);
726 
727         return -EFAULT;
728     }
729 
730     mask = new_bv & ~curr->arch.xcr0_accum;
731     curr->arch.xcr0 = new_bv;
732     curr->arch.xcr0_accum |= new_bv;
733 
734     if ( new_bv & XSTATE_NONLAZY )
735         curr->arch.nonlazy_xstate_used = 1;
736 
737     mask &= curr->fpu_dirtied ? ~XSTATE_FP_SSE : XSTATE_NONLAZY;
738     if ( mask )
739     {
740         unsigned long cr0 = read_cr0();
741 
742         clts();
743         if ( curr->fpu_dirtied )
744             asm ( "stmxcsr %0" : "=m" (curr->arch.xsave_area->fpu_sse.mxcsr) );
745         else if ( xstate_all(curr) )
746         {
747             /* See the comment in i387.c:vcpu_restore_fpu_eager(). */
748             mask |= XSTATE_LAZY;
749             curr->fpu_initialised = 1;
750             curr->fpu_dirtied = 1;
751             cr0 &= ~X86_CR0_TS;
752         }
753         xrstor(curr, mask);
754         if ( cr0 & X86_CR0_TS )
755             write_cr0(cr0);
756     }
757 
758     return 0;
759 }
760 
read_bndcfgu(void)761 uint64_t read_bndcfgu(void)
762 {
763     unsigned long cr0 = read_cr0();
764     struct xsave_struct *xstate
765         = idle_vcpu[smp_processor_id()]->arch.xsave_area;
766     const struct xstate_bndcsr *bndcsr;
767 
768     ASSERT(cpu_has_mpx);
769     clts();
770 
771     if ( cpu_has_xsavec )
772     {
773         asm ( ".byte 0x0f,0xc7,0x27\n" /* xsavec */
774               : "=m" (*xstate)
775               : "a" (X86_XCR0_BNDCSR), "d" (0), "D" (xstate) );
776 
777         bndcsr = (void *)(xstate + 1);
778     }
779     else
780     {
781         asm ( ".byte 0x0f,0xae,0x27\n" /* xsave */
782               : "=m" (*xstate)
783               : "a" (X86_XCR0_BNDCSR), "d" (0), "D" (xstate) );
784 
785         bndcsr = (void *)xstate + xstate_offsets[X86_XCR0_BNDCSR_POS];
786     }
787 
788     if ( cr0 & X86_CR0_TS )
789         write_cr0(cr0);
790 
791     return xstate->xsave_hdr.xstate_bv & X86_XCR0_BNDCSR ? bndcsr->bndcfgu : 0;
792 }
793 
xstate_set_init(uint64_t mask)794 void xstate_set_init(uint64_t mask)
795 {
796     unsigned long cr0 = read_cr0();
797     unsigned long xcr0 = this_cpu(xcr0);
798     struct vcpu *v = idle_vcpu[smp_processor_id()];
799     struct xsave_struct *xstate = v->arch.xsave_area;
800 
801     if ( ~xfeature_mask & mask )
802     {
803         ASSERT_UNREACHABLE();
804         return;
805     }
806 
807     if ( (~xcr0 & mask) && !set_xcr0(xcr0 | mask) )
808         return;
809 
810     clts();
811 
812     memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
813     xrstor(v, mask);
814 
815     if ( cr0 & X86_CR0_TS )
816         write_cr0(cr0);
817 
818     if ( (~xcr0 & mask) && !set_xcr0(xcr0) )
819         BUG();
820 }
821 
822 /*
823  * Local variables:
824  * mode: C
825  * c-file-style: "BSD"
826  * c-basic-offset: 4
827  * tab-width: 4
828  * indent-tabs-mode: nil
829  * End:
830  */
831