1 /******************************************************************************
2  * arch/x86/spec_ctrl.c
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; If not, see <http://www.gnu.org/licenses/>.
16  *
17  * Copyright (c) 2017-2018 Citrix Systems Ltd.
18  */
19 #include <xen/errno.h>
20 #include <xen/init.h>
21 #include <xen/lib.h>
22 #include <xen/param.h>
23 #include <xen/warning.h>
24 
25 #include <asm/microcode.h>
26 #include <asm/msr.h>
27 #include <asm/pv/domain.h>
28 #include <asm/pv/shim.h>
29 #include <asm/setup.h>
30 #include <asm/spec_ctrl.h>
31 #include <asm/spec_ctrl_asm.h>
32 
33 /* Cmdline controls for Xen's alternative blocks. */
34 static bool __initdata opt_msr_sc_pv = true;
35 static bool __initdata opt_msr_sc_hvm = true;
36 static bool __initdata opt_rsb_pv = true;
37 static bool __initdata opt_rsb_hvm = true;
38 static int8_t __initdata opt_md_clear_pv = -1;
39 static int8_t __initdata opt_md_clear_hvm = -1;
40 
41 /* Cmdline controls for Xen's speculative settings. */
42 static enum ind_thunk {
43     THUNK_DEFAULT, /* Decide which thunk to use at boot time. */
44     THUNK_NONE,    /* Missing compiler support for thunks. */
45 
46     THUNK_RETPOLINE,
47     THUNK_LFENCE,
48     THUNK_JMP,
49 } opt_thunk __initdata = THUNK_DEFAULT;
50 static int8_t __initdata opt_ibrs = -1;
51 bool __read_mostly opt_ibpb = true;
52 bool __read_mostly opt_ssbd = false;
53 int8_t __read_mostly opt_eager_fpu = -1;
54 int8_t __read_mostly opt_l1d_flush = -1;
55 bool __read_mostly opt_branch_harden = true;
56 
57 bool __initdata bsp_delay_spec_ctrl;
58 uint8_t __read_mostly default_xen_spec_ctrl;
59 uint8_t __read_mostly default_spec_ctrl_flags;
60 
61 paddr_t __read_mostly l1tf_addr_mask, __read_mostly l1tf_safe_maddr;
62 static bool __initdata cpu_has_bug_l1tf;
63 static unsigned int __initdata l1d_maxphysaddr;
64 
65 static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */
66 static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */
67 
68 static int8_t __initdata opt_srb_lock = -1;
69 uint64_t __read_mostly default_xen_mcu_opt_ctrl;
70 
parse_spec_ctrl(const char * s)71 static int __init parse_spec_ctrl(const char *s)
72 {
73     const char *ss;
74     int val, rc = 0;
75 
76     do {
77         ss = strchr(s, ',');
78         if ( !ss )
79             ss = strchr(s, '\0');
80 
81         /* Global and Xen-wide disable. */
82         val = parse_bool(s, ss);
83         if ( !val )
84         {
85             opt_msr_sc_pv = false;
86             opt_msr_sc_hvm = false;
87 
88             opt_eager_fpu = 0;
89 
90             if ( opt_xpti_hwdom < 0 )
91                 opt_xpti_hwdom = 0;
92             if ( opt_xpti_domu < 0 )
93                 opt_xpti_domu = 0;
94 
95             if ( opt_smt < 0 )
96                 opt_smt = 1;
97 
98             if ( opt_pv_l1tf_hwdom < 0 )
99                 opt_pv_l1tf_hwdom = 0;
100             if ( opt_pv_l1tf_domu < 0 )
101                 opt_pv_l1tf_domu = 0;
102 
103             if ( opt_tsx == -1 )
104                 opt_tsx = -3;
105 
106         disable_common:
107             opt_rsb_pv = false;
108             opt_rsb_hvm = false;
109             opt_md_clear_pv = 0;
110             opt_md_clear_hvm = 0;
111 
112             opt_thunk = THUNK_JMP;
113             opt_ibrs = 0;
114             opt_ibpb = false;
115             opt_ssbd = false;
116             opt_l1d_flush = 0;
117             opt_branch_harden = false;
118             opt_srb_lock = 0;
119         }
120         else if ( val > 0 )
121             rc = -EINVAL;
122         else if ( (val = parse_boolean("xen", s, ss)) >= 0 )
123         {
124             if ( !val )
125                 goto disable_common;
126 
127             rc = -EINVAL;
128         }
129 
130         /* Xen's alternative blocks. */
131         else if ( (val = parse_boolean("pv", s, ss)) >= 0 )
132         {
133             opt_msr_sc_pv = val;
134             opt_rsb_pv = val;
135             opt_md_clear_pv = val;
136         }
137         else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
138         {
139             opt_msr_sc_hvm = val;
140             opt_rsb_hvm = val;
141             opt_md_clear_hvm = val;
142         }
143         else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 )
144         {
145             opt_msr_sc_pv = val;
146             opt_msr_sc_hvm = val;
147         }
148         else if ( (val = parse_boolean("rsb", s, ss)) >= 0 )
149         {
150             opt_rsb_pv = val;
151             opt_rsb_hvm = val;
152         }
153         else if ( (val = parse_boolean("md-clear", s, ss)) >= 0 )
154         {
155             opt_md_clear_pv = val;
156             opt_md_clear_hvm = val;
157         }
158 
159         /* Xen's speculative sidechannel mitigation settings. */
160         else if ( !strncmp(s, "bti-thunk=", 10) )
161         {
162             s += 10;
163 
164             if ( !cmdline_strcmp(s, "retpoline") )
165                 opt_thunk = THUNK_RETPOLINE;
166             else if ( !cmdline_strcmp(s, "lfence") )
167                 opt_thunk = THUNK_LFENCE;
168             else if ( !cmdline_strcmp(s, "jmp") )
169                 opt_thunk = THUNK_JMP;
170             else
171                 rc = -EINVAL;
172         }
173         else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 )
174             opt_ibrs = val;
175         else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
176             opt_ibpb = val;
177         else if ( (val = parse_boolean("ssbd", s, ss)) >= 0 )
178             opt_ssbd = val;
179         else if ( (val = parse_boolean("eager-fpu", s, ss)) >= 0 )
180             opt_eager_fpu = val;
181         else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 )
182             opt_l1d_flush = val;
183         else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 )
184             opt_branch_harden = val;
185         else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
186             opt_srb_lock = val;
187         else
188             rc = -EINVAL;
189 
190         s = ss + 1;
191     } while ( *ss );
192 
193     return rc;
194 }
195 custom_param("spec-ctrl", parse_spec_ctrl);
196 
197 int8_t __read_mostly opt_xpti_hwdom = -1;
198 int8_t __read_mostly opt_xpti_domu = -1;
199 
xpti_init_default(uint64_t caps)200 static __init void xpti_init_default(uint64_t caps)
201 {
202     if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
203         caps = ARCH_CAPS_RDCL_NO;
204 
205     if ( caps & ARCH_CAPS_RDCL_NO )
206     {
207         if ( opt_xpti_hwdom < 0 )
208             opt_xpti_hwdom = 0;
209         if ( opt_xpti_domu < 0 )
210             opt_xpti_domu = 0;
211     }
212     else
213     {
214         if ( opt_xpti_hwdom < 0 )
215             opt_xpti_hwdom = 1;
216         if ( opt_xpti_domu < 0 )
217             opt_xpti_domu = 1;
218     }
219 }
220 
parse_xpti(const char * s)221 static __init int parse_xpti(const char *s)
222 {
223     const char *ss;
224     int val, rc = 0;
225 
226     /* Interpret 'xpti' alone in its positive boolean form. */
227     if ( *s == '\0' )
228         opt_xpti_hwdom = opt_xpti_domu = 1;
229 
230     do {
231         ss = strchr(s, ',');
232         if ( !ss )
233             ss = strchr(s, '\0');
234 
235         switch ( parse_bool(s, ss) )
236         {
237         case 0:
238             opt_xpti_hwdom = opt_xpti_domu = 0;
239             break;
240 
241         case 1:
242             opt_xpti_hwdom = opt_xpti_domu = 1;
243             break;
244 
245         default:
246             if ( !strcmp(s, "default") )
247                 opt_xpti_hwdom = opt_xpti_domu = -1;
248             else if ( (val = parse_boolean("dom0", s, ss)) >= 0 )
249                 opt_xpti_hwdom = val;
250             else if ( (val = parse_boolean("domu", s, ss)) >= 0 )
251                 opt_xpti_domu = val;
252             else if ( *s )
253                 rc = -EINVAL;
254             break;
255         }
256 
257         s = ss + 1;
258     } while ( *ss );
259 
260     return rc;
261 }
262 custom_param("xpti", parse_xpti);
263 
264 int8_t __read_mostly opt_pv_l1tf_hwdom = -1;
265 int8_t __read_mostly opt_pv_l1tf_domu = -1;
266 
parse_pv_l1tf(const char * s)267 static __init int parse_pv_l1tf(const char *s)
268 {
269     const char *ss;
270     int val, rc = 0;
271 
272     /* Interpret 'pv-l1tf' alone in its positive boolean form. */
273     if ( *s == '\0' )
274         opt_pv_l1tf_hwdom = opt_pv_l1tf_domu = 1;
275 
276     do {
277         ss = strchr(s, ',');
278         if ( !ss )
279             ss = strchr(s, '\0');
280 
281         switch ( parse_bool(s, ss) )
282         {
283         case 0:
284             opt_pv_l1tf_hwdom = opt_pv_l1tf_domu = 0;
285             break;
286 
287         case 1:
288             opt_pv_l1tf_hwdom = opt_pv_l1tf_domu = 1;
289             break;
290 
291         default:
292             if ( (val = parse_boolean("dom0", s, ss)) >= 0 )
293                 opt_pv_l1tf_hwdom = val;
294             else if ( (val = parse_boolean("domu", s, ss)) >= 0 )
295                 opt_pv_l1tf_domu = val;
296             else if ( *s )
297                 rc = -EINVAL;
298             break;
299         }
300 
301         s = ss + 1;
302     } while ( *ss );
303 
304     return rc;
305 }
306 custom_param("pv-l1tf", parse_pv_l1tf);
307 
print_details(enum ind_thunk thunk,uint64_t caps)308 static void __init print_details(enum ind_thunk thunk, uint64_t caps)
309 {
310     unsigned int _7d0 = 0, e8b = 0, tmp;
311 
312     /* Collect diagnostics about available mitigations. */
313     if ( boot_cpu_data.cpuid_level >= 7 )
314         cpuid_count(7, 0, &tmp, &tmp, &tmp, &_7d0);
315     if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 )
316         cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp);
317 
318     printk("Speculative mitigation facilities:\n");
319 
320     /* Hardware features which pertain to speculative mitigations. */
321     printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
322            (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
323            (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP"     : "",
324            (_7d0 & cpufeat_mask(X86_FEATURE_L1D_FLUSH)) ? " L1D_FLUSH" : "",
325            (_7d0 & cpufeat_mask(X86_FEATURE_SSBD))  ? " SSBD"      : "",
326            (_7d0 & cpufeat_mask(X86_FEATURE_MD_CLEAR)) ? " MD_CLEAR" : "",
327            (_7d0 & cpufeat_mask(X86_FEATURE_SRBDS_CTRL)) ? " SRBDS_CTRL" : "",
328            (e8b  & cpufeat_mask(X86_FEATURE_IBPB))  ? " IBPB"      : "",
329            (caps & ARCH_CAPS_IBRS_ALL)              ? " IBRS_ALL"  : "",
330            (caps & ARCH_CAPS_RDCL_NO)               ? " RDCL_NO"   : "",
331            (caps & ARCH_CAPS_RSBA)                  ? " RSBA"      : "",
332            (caps & ARCH_CAPS_SKIP_L1DFL)            ? " SKIP_L1DFL": "",
333            (caps & ARCH_CAPS_SSB_NO)                ? " SSB_NO"    : "",
334            (caps & ARCH_CAPS_MDS_NO)                ? " MDS_NO"    : "",
335            (caps & ARCH_CAPS_TSX_CTRL)              ? " TSX_CTRL"  : "",
336            (caps & ARCH_CAPS_TAA_NO)                ? " TAA_NO"    : "");
337 
338     /* Compiled-in support which pertains to mitigations. */
339     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) )
340         printk("  Compiled-in support:"
341 #ifdef CONFIG_INDIRECT_THUNK
342                " INDIRECT_THUNK"
343 #endif
344 #ifdef CONFIG_SHADOW_PAGING
345                " SHADOW_PAGING"
346 #endif
347                "\n");
348 
349     /* Settings for Xen's protection, irrespective of guests. */
350     printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s, Other:%s%s%s%s%s\n",
351            thunk == THUNK_NONE      ? "N/A" :
352            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
353            thunk == THUNK_LFENCE    ? "LFENCE" :
354            thunk == THUNK_JMP       ? "JMP" : "?",
355            !boot_cpu_has(X86_FEATURE_IBRSB)          ? "No" :
356            (default_xen_spec_ctrl & SPEC_CTRL_IBRS)  ? "IBRS+" :  "IBRS-",
357            !boot_cpu_has(X86_FEATURE_SSBD)           ? "" :
358            (default_xen_spec_ctrl & SPEC_CTRL_SSBD)  ? " SSBD+" : " SSBD-",
359            !(caps & ARCH_CAPS_TSX_CTRL)              ? "" :
360            (opt_tsx & 1)                             ? " TSX+" : " TSX-",
361            !boot_cpu_has(X86_FEATURE_SRBDS_CTRL)     ? "" :
362            opt_srb_lock                              ? " SRB_LOCK+" : " SRB_LOCK-",
363            opt_ibpb                                  ? " IBPB"  : "",
364            opt_l1d_flush                             ? " L1D_FLUSH" : "",
365            opt_md_clear_pv || opt_md_clear_hvm       ? " VERW"  : "",
366            opt_branch_harden                         ? " BRANCH_HARDEN" : "");
367 
368     /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
369     if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu )
370         printk("  L1TF: believed%s vulnerable, maxphysaddr L1D %u, CPUID %u"
371                ", Safe address %"PRIx64"\n",
372                cpu_has_bug_l1tf ? "" : " not",
373                l1d_maxphysaddr, paddr_bits, l1tf_safe_maddr);
374 
375     /*
376      * Alternatives blocks for protecting against and/or virtualising
377      * mitigation support for guests.
378      */
379 #ifdef CONFIG_HVM
380     printk("  Support for HVM VMs:%s%s%s%s%s\n",
381            (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
382             boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
383             boot_cpu_has(X86_FEATURE_MD_CLEAR)   ||
384             opt_eager_fpu)                           ? ""               : " None",
385            boot_cpu_has(X86_FEATURE_SC_MSR_HVM)      ? " MSR_SPEC_CTRL" : "",
386            boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           : "",
387            opt_eager_fpu                             ? " EAGER_FPU"     : "",
388            boot_cpu_has(X86_FEATURE_MD_CLEAR)        ? " MD_CLEAR"      : "");
389 
390 #endif
391 #ifdef CONFIG_PV
392     printk("  Support for PV VMs:%s%s%s%s%s\n",
393            (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
394             boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
395             boot_cpu_has(X86_FEATURE_MD_CLEAR)  ||
396             opt_eager_fpu)                           ? ""               : " None",
397            boot_cpu_has(X86_FEATURE_SC_MSR_PV)       ? " MSR_SPEC_CTRL" : "",
398            boot_cpu_has(X86_FEATURE_SC_RSB_PV)       ? " RSB"           : "",
399            opt_eager_fpu                             ? " EAGER_FPU"     : "",
400            boot_cpu_has(X86_FEATURE_MD_CLEAR)        ? " MD_CLEAR"      : "");
401 
402     printk("  XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
403            opt_xpti_hwdom ? "enabled" : "disabled",
404            opt_xpti_domu  ? "enabled" : "disabled",
405            xpti_pcid_enabled() ? "" : "out");
406 
407     printk("  PV L1TF shadowing: Dom0 %s, DomU %s\n",
408            opt_pv_l1tf_hwdom ? "enabled"  : "disabled",
409            opt_pv_l1tf_domu  ? "enabled"  : "disabled");
410 #endif
411 }
412 
check_smt_enabled(void)413 static bool __init check_smt_enabled(void)
414 {
415     uint64_t val;
416     unsigned int cpu;
417 
418     /*
419      * x86_num_siblings defaults to 1 in the absence of other information, and
420      * is adjusted based on other topology information found in CPUID leaves.
421      *
422      * On AMD hardware, it will be the current SMT configuration.  On Intel
423      * hardware, it will represent the maximum capability, rather than the
424      * current configuration.
425      */
426     if ( boot_cpu_data.x86_num_siblings < 2 )
427         return false;
428 
429     /*
430      * Intel Nehalem and later hardware does have an MSR which reports the
431      * current count of cores/threads in the package.
432      *
433      * At the time of writing, it is almost completely undocumented, so isn't
434      * virtualised reliably.
435      */
436     if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && !cpu_has_hypervisor &&
437          !rdmsr_safe(MSR_INTEL_CORE_THREAD_COUNT, val) )
438         return (MASK_EXTR(val, MSR_CTC_CORE_MASK) !=
439                 MASK_EXTR(val, MSR_CTC_THREAD_MASK));
440 
441     /*
442      * Search over the CPUs reported in the ACPI tables.  Any whose APIC ID
443      * has a non-zero thread id component indicates that SMT is active.
444      */
445     for_each_present_cpu ( cpu )
446         if ( x86_cpu_to_apicid[cpu] & (boot_cpu_data.x86_num_siblings - 1) )
447             return true;
448 
449     return false;
450 }
451 
452 /* Calculate whether Retpoline is known-safe on this CPU. */
retpoline_safe(uint64_t caps)453 static bool __init retpoline_safe(uint64_t caps)
454 {
455     unsigned int ucode_rev = this_cpu(cpu_sig).rev;
456 
457     if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
458         return true;
459 
460     if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
461          boot_cpu_data.x86 != 6 )
462         return false;
463 
464     /*
465      * RSBA may be set by a hypervisor to indicate that we may move to a
466      * processor which isn't retpoline-safe.
467      *
468      * Processors offering Enhanced IBRS are not guarenteed to be
469      * repoline-safe.
470      */
471     if ( caps & (ARCH_CAPS_RSBA | ARCH_CAPS_IBRS_ALL) )
472         return false;
473 
474     switch ( boot_cpu_data.x86_model )
475     {
476     case 0x17: /* Penryn */
477     case 0x1d: /* Dunnington */
478     case 0x1e: /* Nehalem */
479     case 0x1f: /* Auburndale / Havendale */
480     case 0x1a: /* Nehalem EP */
481     case 0x2e: /* Nehalem EX */
482     case 0x25: /* Westmere */
483     case 0x2c: /* Westmere EP */
484     case 0x2f: /* Westmere EX */
485     case 0x2a: /* SandyBridge */
486     case 0x2d: /* SandyBridge EP/EX */
487     case 0x3a: /* IvyBridge */
488     case 0x3e: /* IvyBridge EP/EX */
489     case 0x3c: /* Haswell */
490     case 0x3f: /* Haswell EX/EP */
491     case 0x45: /* Haswell D */
492     case 0x46: /* Haswell H */
493         return true;
494 
495         /*
496          * Broadwell processors are retpoline-safe after specific microcode
497          * versions.
498          */
499     case 0x3d: /* Broadwell */
500         return ucode_rev >= 0x2a;
501     case 0x47: /* Broadwell H */
502         return ucode_rev >= 0x1d;
503     case 0x4f: /* Broadwell EP/EX */
504         return ucode_rev >= 0xb000021;
505     case 0x56: /* Broadwell D */
506         switch ( boot_cpu_data.x86_mask )
507         {
508         case 2:  return ucode_rev >= 0x15;
509         case 3:  return ucode_rev >= 0x7000012;
510         case 4:  return ucode_rev >= 0xf000011;
511         case 5:  return ucode_rev >= 0xe000009;
512         default:
513             printk("Unrecognised CPU stepping %#x - assuming not reptpoline safe\n",
514                    boot_cpu_data.x86_mask);
515             return false;
516         }
517         break;
518 
519         /*
520          * Skylake, Kabylake and Cannonlake processors are not retpoline-safe.
521          */
522     case 0x4e: /* Skylake M */
523     case 0x55: /* Skylake X */
524     case 0x5e: /* Skylake D */
525     case 0x66: /* Cannonlake */
526     case 0x67: /* Cannonlake? */
527     case 0x8e: /* Kabylake M */
528     case 0x9e: /* Kabylake D */
529         return false;
530 
531         /*
532          * Atom processors before Goldmont Plus/Gemini Lake are retpoline-safe.
533          */
534     case 0x1c: /* Pineview */
535     case 0x26: /* Lincroft */
536     case 0x27: /* Penwell */
537     case 0x35: /* Cloverview */
538     case 0x36: /* Cedarview */
539     case 0x37: /* Baytrail / Valleyview (Silvermont) */
540     case 0x4d: /* Avaton / Rangely (Silvermont) */
541     case 0x4c: /* Cherrytrail / Brasswell */
542     case 0x4a: /* Merrifield */
543     case 0x57: /* Knights Landing */
544     case 0x5a: /* Moorefield */
545     case 0x5c: /* Goldmont */
546     case 0x5f: /* Denverton */
547     case 0x85: /* Knights Mill */
548         return true;
549 
550     default:
551         printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n",
552                boot_cpu_data.x86_model);
553         return false;
554     }
555 }
556 
557 /* Calculate whether this CPU speculates past #NM */
should_use_eager_fpu(void)558 static bool __init should_use_eager_fpu(void)
559 {
560     /*
561      * Assume all unrecognised processors are ok.  This is only known to
562      * affect Intel Family 6 processors.
563      */
564     if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
565          boot_cpu_data.x86 != 6 )
566         return false;
567 
568     switch ( boot_cpu_data.x86_model )
569     {
570         /*
571          * Core processors since at least Nehalem are vulnerable.
572          */
573     case 0x1e: /* Nehalem */
574     case 0x1f: /* Auburndale / Havendale */
575     case 0x1a: /* Nehalem EP */
576     case 0x2e: /* Nehalem EX */
577     case 0x25: /* Westmere */
578     case 0x2c: /* Westmere EP */
579     case 0x2f: /* Westmere EX */
580     case 0x2a: /* SandyBridge */
581     case 0x2d: /* SandyBridge EP/EX */
582     case 0x3a: /* IvyBridge */
583     case 0x3e: /* IvyBridge EP/EX */
584     case 0x3c: /* Haswell */
585     case 0x3f: /* Haswell EX/EP */
586     case 0x45: /* Haswell D */
587     case 0x46: /* Haswell H */
588     case 0x3d: /* Broadwell */
589     case 0x47: /* Broadwell H */
590     case 0x4f: /* Broadwell EP/EX */
591     case 0x56: /* Broadwell D */
592     case 0x4e: /* Skylake M */
593     case 0x55: /* Skylake X */
594     case 0x5e: /* Skylake D */
595     case 0x66: /* Cannonlake */
596     case 0x67: /* Cannonlake? */
597     case 0x8e: /* Kabylake M */
598     case 0x9e: /* Kabylake D */
599         return true;
600 
601         /*
602          * Atom processors are not vulnerable.
603          */
604     case 0x1c: /* Pineview */
605     case 0x26: /* Lincroft */
606     case 0x27: /* Penwell */
607     case 0x35: /* Cloverview */
608     case 0x36: /* Cedarview */
609     case 0x37: /* Baytrail / Valleyview (Silvermont) */
610     case 0x4d: /* Avaton / Rangely (Silvermont) */
611     case 0x4c: /* Cherrytrail / Brasswell */
612     case 0x4a: /* Merrifield */
613     case 0x5a: /* Moorefield */
614     case 0x5c: /* Goldmont */
615     case 0x5f: /* Denverton */
616     case 0x7a: /* Gemini Lake */
617         return false;
618 
619         /*
620          * Knights processors are not vulnerable.
621          */
622     case 0x57: /* Knights Landing */
623     case 0x85: /* Knights Mill */
624         return false;
625 
626     default:
627         printk("Unrecognised CPU model %#x - assuming vulnerable to LazyFPU\n",
628                boot_cpu_data.x86_model);
629         return true;
630     }
631 }
632 
633 /* Calculate whether this CPU is vulnerable to L1TF. */
l1tf_calculations(uint64_t caps)634 static __init void l1tf_calculations(uint64_t caps)
635 {
636     bool hit_default = false;
637 
638     l1d_maxphysaddr = paddr_bits;
639 
640     /* L1TF is only known to affect Intel Family 6 processors at this time. */
641     if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
642          boot_cpu_data.x86 == 6 )
643     {
644         switch ( boot_cpu_data.x86_model )
645         {
646             /*
647              * Core processors since at least Penryn are vulnerable.
648              */
649         case 0x17: /* Penryn */
650         case 0x1d: /* Dunnington */
651             cpu_has_bug_l1tf = true;
652             break;
653 
654         case 0x1f: /* Auburndale / Havendale */
655         case 0x1e: /* Nehalem */
656         case 0x1a: /* Nehalem EP */
657         case 0x2e: /* Nehalem EX */
658         case 0x25: /* Westmere */
659         case 0x2c: /* Westmere EP */
660         case 0x2f: /* Westmere EX */
661             cpu_has_bug_l1tf = true;
662             l1d_maxphysaddr = 44;
663             break;
664 
665         case 0x2a: /* SandyBridge */
666         case 0x2d: /* SandyBridge EP/EX */
667         case 0x3a: /* IvyBridge */
668         case 0x3e: /* IvyBridge EP/EX */
669         case 0x3c: /* Haswell */
670         case 0x3f: /* Haswell EX/EP */
671         case 0x45: /* Haswell D */
672         case 0x46: /* Haswell H */
673         case 0x3d: /* Broadwell */
674         case 0x47: /* Broadwell H */
675         case 0x4f: /* Broadwell EP/EX */
676         case 0x56: /* Broadwell D */
677         case 0x4e: /* Skylake M */
678         case 0x55: /* Skylake X */
679         case 0x5e: /* Skylake D */
680         case 0x66: /* Cannonlake */
681         case 0x67: /* Cannonlake? */
682         case 0x8e: /* Kabylake M */
683         case 0x9e: /* Kabylake D */
684             cpu_has_bug_l1tf = true;
685             l1d_maxphysaddr = 46;
686             break;
687 
688             /*
689              * Atom processors are not vulnerable.
690              */
691         case 0x1c: /* Pineview */
692         case 0x26: /* Lincroft */
693         case 0x27: /* Penwell */
694         case 0x35: /* Cloverview */
695         case 0x36: /* Cedarview */
696         case 0x37: /* Baytrail / Valleyview (Silvermont) */
697         case 0x4d: /* Avaton / Rangely (Silvermont) */
698         case 0x4c: /* Cherrytrail / Brasswell */
699         case 0x4a: /* Merrifield */
700         case 0x5a: /* Moorefield */
701         case 0x5c: /* Goldmont */
702         case 0x5f: /* Denverton */
703         case 0x7a: /* Gemini Lake */
704             break;
705 
706             /*
707              * Knights processors are not vulnerable.
708              */
709         case 0x57: /* Knights Landing */
710         case 0x85: /* Knights Mill */
711             break;
712 
713         default:
714             /* Defer printk() until we've accounted for RDCL_NO. */
715             hit_default = true;
716             cpu_has_bug_l1tf = true;
717             break;
718         }
719     }
720 
721     /* Any processor advertising RDCL_NO should be not vulnerable to L1TF. */
722     if ( caps & ARCH_CAPS_RDCL_NO )
723         cpu_has_bug_l1tf = false;
724 
725     if ( cpu_has_bug_l1tf && hit_default )
726         printk("Unrecognised CPU model %#x - assuming vulnerable to L1TF\n",
727                boot_cpu_data.x86_model);
728 
729     /*
730      * L1TF safe address heuristics.  These apply to the real hardware we are
731      * running on, and are best-effort-only if Xen is virtualised.
732      *
733      * The address mask which the L1D cache uses, which might be wider than
734      * the CPUID-reported maxphysaddr.
735      */
736     l1tf_addr_mask = ((1ul << l1d_maxphysaddr) - 1) & PAGE_MASK;
737 
738     /*
739      * To be safe, l1tf_safe_maddr must be above the highest cacheable entity
740      * in system physical address space.  However, to preserve space for
741      * paged-out metadata, it should be as low as possible above the highest
742      * cacheable address, so as to require fewer high-order bits being set.
743      *
744      * These heuristics are based on some guesswork to improve the likelihood
745      * of safety in the common case, including Linux's L1TF mitigation of
746      * inverting all address bits in a non-present PTE.
747      *
748      * - If L1D is wider than CPUID (Nehalem and later mobile/desktop/low end
749      *   server), setting any address bit beyond CPUID maxphysaddr guarantees
750      *   to make the PTE safe.  This case doesn't require all the high-order
751      *   bits being set, and doesn't require any other source of information
752      *   for safety.
753      *
754      * - If L1D is the same as CPUID (Pre-Nehalem, or high end server), we
755      *   must sacrifice high order bits from the real address space for
756      *   safety.  Therefore, make a blind guess that there is nothing
757      *   cacheable in the top quarter of physical address space.
758      *
759      *   It is exceedingly unlikely for machines to be populated with this
760      *   much RAM (likely 512G on pre-Nehalem, 16T on Nehalem/Westmere, 64T on
761      *   Sandybridge and later) due to the sheer volume of DIMMs this would
762      *   actually take.
763      *
764      *   However, it is possible to find machines this large, so the "top
765      *   quarter" guess is supplemented to push the limit higher if references
766      *   to cacheable mappings (E820/SRAT/EFI/etc) are found above the top
767      *   quarter boundary.
768      *
769      *   Finally, this top quarter guess gives us a good chance of being safe
770      *   when running virtualised (and the CPUID maxphysaddr hasn't been
771      *   levelled for heterogeneous migration safety), where the safety
772      *   consideration is still in terms of host details, but all E820/etc
773      *   information is in terms of guest physical layout.
774      */
775     l1tf_safe_maddr = max(l1tf_safe_maddr, ((l1d_maxphysaddr > paddr_bits)
776                                             ? (1ul << paddr_bits)
777                                             : (3ul << (paddr_bits - 2))));
778 }
779 
780 /* Calculate whether this CPU is vulnerable to MDS. */
mds_calculations(uint64_t caps)781 static __init void mds_calculations(uint64_t caps)
782 {
783     /* MDS is only known to affect Intel Family 6 processors at this time. */
784     if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
785          boot_cpu_data.x86 != 6 )
786         return;
787 
788     /* Any processor advertising MDS_NO should be not vulnerable to MDS. */
789     if ( caps & ARCH_CAPS_MDS_NO )
790         return;
791 
792     switch ( boot_cpu_data.x86_model )
793     {
794         /*
795          * Core processors since at least Nehalem are vulnerable.
796          */
797     case 0x1f: /* Auburndale / Havendale */
798     case 0x1e: /* Nehalem */
799     case 0x1a: /* Nehalem EP */
800     case 0x2e: /* Nehalem EX */
801     case 0x25: /* Westmere */
802     case 0x2c: /* Westmere EP */
803     case 0x2f: /* Westmere EX */
804     case 0x2a: /* SandyBridge */
805     case 0x2d: /* SandyBridge EP/EX */
806     case 0x3a: /* IvyBridge */
807     case 0x3e: /* IvyBridge EP/EX */
808     case 0x3c: /* Haswell */
809     case 0x3f: /* Haswell EX/EP */
810     case 0x45: /* Haswell D */
811     case 0x46: /* Haswell H */
812     case 0x3d: /* Broadwell */
813     case 0x47: /* Broadwell H */
814     case 0x4f: /* Broadwell EP/EX */
815     case 0x56: /* Broadwell D */
816     case 0x4e: /* Skylake M */
817     case 0x5e: /* Skylake D */
818         cpu_has_bug_mds = true;
819         break;
820 
821         /*
822          * Some Core processors have per-stepping vulnerability.
823          */
824     case 0x55: /* Skylake-X / Cascade Lake */
825         if ( boot_cpu_data.x86_mask <= 5 )
826             cpu_has_bug_mds = true;
827         break;
828 
829     case 0x8e: /* Kaby / Coffee / Whiskey Lake M */
830         if ( boot_cpu_data.x86_mask <= 0xb )
831             cpu_has_bug_mds = true;
832         break;
833 
834     case 0x9e: /* Kaby / Coffee / Whiskey Lake D */
835         if ( boot_cpu_data.x86_mask <= 0xc )
836             cpu_has_bug_mds = true;
837         break;
838 
839         /*
840          * Very old and very new Atom processors are not vulnerable.
841          */
842     case 0x1c: /* Pineview */
843     case 0x26: /* Lincroft */
844     case 0x27: /* Penwell */
845     case 0x35: /* Cloverview */
846     case 0x36: /* Cedarview */
847     case 0x7a: /* Goldmont */
848         break;
849 
850         /*
851          * Middling Atom processors are vulnerable to just the Store Buffer
852          * aspect.
853          */
854     case 0x37: /* Baytrail / Valleyview (Silvermont) */
855     case 0x4a: /* Merrifield */
856     case 0x4c: /* Cherrytrail / Brasswell */
857     case 0x4d: /* Avaton / Rangely (Silvermont) */
858     case 0x5a: /* Moorefield */
859     case 0x5d: /* SoFIA 3G Granite/ES2.1 */
860     case 0x65: /* SoFIA LTE AOSP */
861     case 0x6e: /* Cougar Mountain */
862     case 0x75: /* Lightning Mountain */
863         /*
864          * Knights processors (which are based on the Silvermont/Airmont
865          * microarchitecture) are similarly only affected by the Store Buffer
866          * aspect.
867          */
868     case 0x57: /* Knights Landing */
869     case 0x85: /* Knights Mill */
870         cpu_has_bug_msbds_only = true;
871         break;
872 
873     default:
874         printk("Unrecognised CPU model %#x - assuming vulnerable to MDS\n",
875                boot_cpu_data.x86_model);
876         cpu_has_bug_mds = true;
877         break;
878     }
879 }
880 
init_speculation_mitigations(void)881 void __init init_speculation_mitigations(void)
882 {
883     enum ind_thunk thunk = THUNK_DEFAULT;
884     bool use_spec_ctrl = false, ibrs = false, hw_smt_enabled;
885     bool cpu_has_bug_taa;
886     uint64_t caps = 0;
887 
888     if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
889         rdmsrl(MSR_ARCH_CAPABILITIES, caps);
890 
891     hw_smt_enabled = check_smt_enabled();
892 
893     /*
894      * First, disable the use of retpolines if Xen is using shadow stacks, as
895      * they are incompatible.
896      */
897     if ( cpu_has_xen_shstk &&
898          (opt_thunk == THUNK_DEFAULT || opt_thunk == THUNK_RETPOLINE) )
899         thunk = THUNK_JMP;
900 
901     /*
902      * Has the user specified any custom BTI mitigations?  If so, follow their
903      * instructions exactly and disable all heuristics.
904      */
905     if ( opt_thunk != THUNK_DEFAULT || opt_ibrs != -1 )
906     {
907         thunk = opt_thunk;
908         ibrs  = !!opt_ibrs;
909     }
910     else
911     {
912         /*
913          * Evaluate the safest Branch Target Injection mitigations to use.
914          * First, begin with compiler-aided mitigations.
915          */
916         if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
917         {
918             /*
919              * AMD's recommended mitigation is to set lfence as being dispatch
920              * serialising, and to use IND_THUNK_LFENCE.
921              */
922             if ( cpu_has_lfence_dispatch )
923                 thunk = THUNK_LFENCE;
924             /*
925              * On Intel hardware, we'd like to use retpoline in preference to
926              * IBRS, but only if it is safe on this hardware.
927              */
928             else if ( retpoline_safe(caps) )
929                 thunk = THUNK_RETPOLINE;
930             else if ( boot_cpu_has(X86_FEATURE_IBRSB) )
931                 ibrs = true;
932         }
933         /* Without compiler thunk support, use IBRS if available. */
934         else if ( boot_cpu_has(X86_FEATURE_IBRSB) )
935             ibrs = true;
936     }
937 
938     /*
939      * Supplimentary minor adjustments.  Without compiler support, there are
940      * no thunks.
941      */
942     if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) )
943         thunk = THUNK_NONE;
944 
945     /*
946      * If IBRS is in use and thunks are compiled in, there is no point
947      * suffering extra overhead.  Switch to the least-overhead thunk.
948      */
949     if ( ibrs && thunk == THUNK_DEFAULT )
950         thunk = THUNK_JMP;
951 
952     /*
953      * If there are still no thunk preferences, the compiled default is
954      * actually retpoline, and it is better than nothing.
955      */
956     if ( thunk == THUNK_DEFAULT )
957         thunk = THUNK_RETPOLINE;
958 
959     /* Apply the chosen settings. */
960     if ( thunk == THUNK_LFENCE )
961         setup_force_cpu_cap(X86_FEATURE_IND_THUNK_LFENCE);
962     else if ( thunk == THUNK_JMP )
963         setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP);
964 
965     /*
966      * If we are on hardware supporting MSR_SPEC_CTRL, see about setting up
967      * the alternatives blocks so we can virtualise support for guests.
968      */
969     if ( boot_cpu_has(X86_FEATURE_IBRSB) )
970     {
971         if ( opt_msr_sc_pv )
972         {
973             use_spec_ctrl = true;
974             setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV);
975         }
976 
977         if ( opt_msr_sc_hvm )
978         {
979             use_spec_ctrl = true;
980             setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM);
981         }
982 
983         if ( use_spec_ctrl )
984             default_spec_ctrl_flags |= SCF_ist_wrmsr;
985 
986         if ( ibrs )
987             default_xen_spec_ctrl |= SPEC_CTRL_IBRS;
988     }
989 
990     /* If we have SSBD available, see whether we should use it. */
991     if ( boot_cpu_has(X86_FEATURE_SSBD) && opt_ssbd )
992         default_xen_spec_ctrl |= SPEC_CTRL_SSBD;
993 
994     /*
995      * PV guests can poison the RSB to any virtual address from which
996      * they can execute a call instruction.  This is necessarily outside
997      * of the Xen supervisor mappings.
998      *
999      * With SMEP enabled, the processor won't speculate into user mappings.
1000      * Therefore, in this case, we don't need to worry about poisoned entries
1001      * from 64bit PV guests.
1002      *
1003      * 32bit PV guest kernels run in ring 1, so use supervisor mappings.
1004      * If a processors speculates to 32bit PV guest kernel mappings, it is
1005      * speculating in 64bit supervisor mode, and can leak data.
1006      */
1007     if ( opt_rsb_pv )
1008     {
1009         setup_force_cpu_cap(X86_FEATURE_SC_RSB_PV);
1010         default_spec_ctrl_flags |= SCF_ist_rsb;
1011     }
1012 
1013     /*
1014      * HVM guests can always poison the RSB to point at Xen supervisor
1015      * mappings.
1016      */
1017     if ( opt_rsb_hvm )
1018         setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
1019 
1020     /* Check we have hardware IBPB support before using it... */
1021     if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) )
1022         opt_ibpb = false;
1023 
1024     /* Check whether Eager FPU should be enabled by default. */
1025     if ( opt_eager_fpu == -1 )
1026         opt_eager_fpu = should_use_eager_fpu();
1027 
1028     /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */
1029     init_shadow_spec_ctrl_state();
1030 
1031     /* If Xen is using any MSR_SPEC_CTRL settings, adjust the idle path. */
1032     if ( default_xen_spec_ctrl )
1033         setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE);
1034 
1035     xpti_init_default(caps);
1036 
1037     l1tf_calculations(caps);
1038 
1039     /*
1040      * By default, enable PV domU L1TF mitigations on all L1TF-vulnerable
1041      * hardware, except when running in shim mode.
1042      *
1043      * In shim mode, SHADOW is expected to be compiled out, and a malicious
1044      * guest kernel can only attack the shim Xen, not the host Xen.
1045      */
1046     if ( opt_pv_l1tf_hwdom == -1 )
1047         opt_pv_l1tf_hwdom = 0;
1048     if ( opt_pv_l1tf_domu == -1 )
1049         opt_pv_l1tf_domu = !pv_shim && cpu_has_bug_l1tf;
1050 
1051     /*
1052      * By default, enable L1D_FLUSH on L1TF-vulnerable hardware, unless
1053      * instructed to skip the flush on vmentry by our outer hypervisor.
1054      */
1055     if ( !boot_cpu_has(X86_FEATURE_L1D_FLUSH) )
1056         opt_l1d_flush = 0;
1057     else if ( opt_l1d_flush == -1 )
1058         opt_l1d_flush = cpu_has_bug_l1tf && !(caps & ARCH_CAPS_SKIP_L1DFL);
1059 
1060     if ( opt_branch_harden )
1061         setup_force_cpu_cap(X86_FEATURE_SC_BRANCH_HARDEN);
1062 
1063     /*
1064      * We do not disable HT by default on affected hardware.
1065      *
1066      * Firstly, if the user intends to use exclusively PV, or HVM shadow
1067      * guests, HT isn't a concern and should remain fully enabled.  Secondly,
1068      * safety for HVM HAP guests can be arranged by the toolstack with core
1069      * parking, pinning or cpupool configurations, including mixed setups.
1070      *
1071      * However, if we are on affected hardware, with HT enabled, and the user
1072      * hasn't explicitly chosen whether to use HT or not, nag them to do so.
1073      */
1074     if ( opt_smt == -1 && cpu_has_bug_l1tf && !pv_shim && hw_smt_enabled )
1075         warning_add(
1076             "Booted on L1TF-vulnerable hardware with SMT/Hyperthreading\n"
1077             "enabled.  Please assess your configuration and choose an\n"
1078             "explicit 'smt=<bool>' setting.  See XSA-273.\n");
1079 
1080     mds_calculations(caps);
1081 
1082     /*
1083      * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
1084      * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
1085      * but it is somewhat better than nothing.
1086      */
1087     if ( opt_md_clear_pv == -1 )
1088         opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
1089                            boot_cpu_has(X86_FEATURE_MD_CLEAR));
1090     if ( opt_md_clear_hvm == -1 )
1091         opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
1092                             boot_cpu_has(X86_FEATURE_MD_CLEAR));
1093 
1094     /*
1095      * Enable MDS defences as applicable.  The PV blocks need using all the
1096      * time, and the Idle blocks need using if either PV or HVM defences are
1097      * used.
1098      *
1099      * HVM is more complicated.  The MD_CLEAR microcode extends L1D_FLUSH with
1100      * equivelent semantics to avoid needing to perform both flushes on the
1101      * HVM path.  The HVM blocks don't need activating if our hypervisor told
1102      * us it was handling L1D_FLUSH, or we are using L1D_FLUSH ourselves.
1103      */
1104     if ( opt_md_clear_pv )
1105         setup_force_cpu_cap(X86_FEATURE_SC_VERW_PV);
1106     if ( opt_md_clear_pv || opt_md_clear_hvm )
1107         setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
1108     if ( opt_md_clear_hvm && !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush )
1109         setup_force_cpu_cap(X86_FEATURE_SC_VERW_HVM);
1110 
1111     /*
1112      * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
1113      * active and no explicit SMT choice.
1114      */
1115     if ( opt_smt == -1 && cpu_has_bug_mds && hw_smt_enabled )
1116         warning_add(
1117             "Booted on MLPDS/MFBDS-vulnerable hardware with SMT/Hyperthreading\n"
1118             "enabled.  Mitigations will not be fully effective.  Please\n"
1119             "choose an explicit smt=<bool> setting.  See XSA-297.\n");
1120 
1121     /*
1122      * Vulnerability to TAA is a little complicated to quantify.
1123      *
1124      * In the pipeline, it is just another way to get speculative access to
1125      * stale load port, store buffer or fill buffer data, and therefore can be
1126      * considered a superset of MDS (on TSX-capable parts).  On parts which
1127      * predate MDS_NO, the existing VERW flushing will mitigate this
1128      * sidechannel as well.
1129      *
1130      * On parts which contain MDS_NO, the lack of VERW flushing means that an
1131      * attacker can still use TSX to target microarchitectural buffers to leak
1132      * secrets.  Therefore, we consider TAA to be the set of TSX-capable parts
1133      * which have MDS_NO but lack TAA_NO.
1134      *
1135      * Note: cpu_has_rtm (== hle) could already be hidden by `tsx=0` on the
1136      *       cmdline.  MSR_TSX_CTRL will only appear on TSX-capable parts, so
1137      *       we check both to spot TSX in a microcode/cmdline independent way.
1138      */
1139     cpu_has_bug_taa =
1140         (cpu_has_rtm || (caps & ARCH_CAPS_TSX_CTRL)) &&
1141         (caps & (ARCH_CAPS_MDS_NO | ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO;
1142 
1143     /*
1144      * On TAA-affected hardware, disabling TSX is the preferred mitigation, vs
1145      * the MDS mitigation of disabling HT and using VERW flushing.
1146      *
1147      * On CPUs which advertise MDS_NO, VERW has no flushing side effect until
1148      * the TSX_CTRL microcode is loaded, despite the MD_CLEAR CPUID bit being
1149      * advertised, and there isn't a MD_CLEAR_2 flag to use...
1150      *
1151      * If we're on affected hardware, able to do something about it (which
1152      * implies that VERW now works), no explicit TSX choice and traditional
1153      * MDS mitigations (no-SMT, VERW) not obviosuly in use (someone might
1154      * plausibly value TSX higher than Hyperthreading...), disable TSX to
1155      * mitigate TAA.
1156      */
1157     if ( opt_tsx == -1 && cpu_has_bug_taa && (caps & ARCH_CAPS_TSX_CTRL) &&
1158          ((hw_smt_enabled && opt_smt) ||
1159           !boot_cpu_has(X86_FEATURE_SC_VERW_IDLE)) )
1160     {
1161         setup_clear_cpu_cap(X86_FEATURE_HLE);
1162         setup_clear_cpu_cap(X86_FEATURE_RTM);
1163 
1164         opt_tsx = 0;
1165         tsx_init();
1166     }
1167 
1168     /* Calculate suitable defaults for MSR_MCU_OPT_CTRL */
1169     if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) )
1170     {
1171         uint64_t val;
1172 
1173         rdmsrl(MSR_MCU_OPT_CTRL, val);
1174 
1175         /*
1176          * On some SRBDS-affected hardware, it may be safe to relax srb-lock
1177          * by default.
1178          *
1179          * On parts which enumerate MDS_NO and not TAA_NO, TSX is the only way
1180          * to access the Fill Buffer.  If TSX isn't available (inc. SKU
1181          * reasons on some models), or TSX is explicitly disabled, then there
1182          * is no need for the extra overhead to protect RDRAND/RDSEED.
1183          */
1184         if ( opt_srb_lock == -1 &&
1185              (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO &&
1186              (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && opt_tsx == 0)) )
1187             opt_srb_lock = 0;
1188 
1189         val &= ~MCU_OPT_CTRL_RNGDS_MITG_DIS;
1190         if ( !opt_srb_lock )
1191             val |= MCU_OPT_CTRL_RNGDS_MITG_DIS;
1192 
1193         default_xen_mcu_opt_ctrl = val;
1194     }
1195 
1196     print_details(thunk, caps);
1197 
1198     /*
1199      * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard
1200      * any firmware settings.  For performance reasons, when safe to do so, we
1201      * delay applying non-zero settings until after dom0 has been constructed.
1202      *
1203      * "when safe to do so" is based on whether we are virtualised.  A native
1204      * boot won't have any other code running in a position to mount an
1205      * attack.
1206      */
1207     if ( boot_cpu_has(X86_FEATURE_IBRSB) )
1208     {
1209         bsp_delay_spec_ctrl = !cpu_has_hypervisor && default_xen_spec_ctrl;
1210 
1211         /*
1212          * If delaying MSR_SPEC_CTRL setup, use the same mechanism as
1213          * spec_ctrl_enter_idle(), by using a shadow value of zero.
1214          */
1215         if ( bsp_delay_spec_ctrl )
1216         {
1217             struct cpu_info *info = get_cpu_info();
1218 
1219             info->shadow_spec_ctrl = 0;
1220             barrier();
1221             info->spec_ctrl_flags |= SCF_use_shadow;
1222             barrier();
1223         }
1224 
1225         wrmsrl(MSR_SPEC_CTRL, bsp_delay_spec_ctrl ? 0 : default_xen_spec_ctrl);
1226     }
1227 
1228     if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) )
1229         wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl);
1230 }
1231 
build_assertions(void)1232 static void __init __maybe_unused build_assertions(void)
1233 {
1234     /* The optimised assembly relies on this alias. */
1235     BUILD_BUG_ON(SCF_use_shadow != 1);
1236 }
1237 
1238 /*
1239  * Local variables:
1240  * mode: C
1241  * c-file-style: "BSD"
1242  * c-basic-offset: 4
1243  * tab-width: 4
1244  * indent-tabs-mode: nil
1245  * End:
1246  */
1247