1 /******************************************************************************
2 * arch/x86/spec_ctrl.c
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; If not, see <http://www.gnu.org/licenses/>.
16 *
17 * Copyright (c) 2017-2018 Citrix Systems Ltd.
18 */
19 #include <xen/errno.h>
20 #include <xen/init.h>
21 #include <xen/lib.h>
22 #include <xen/param.h>
23 #include <xen/warning.h>
24
25 #include <asm/microcode.h>
26 #include <asm/msr.h>
27 #include <asm/pv/domain.h>
28 #include <asm/pv/shim.h>
29 #include <asm/setup.h>
30 #include <asm/spec_ctrl.h>
31 #include <asm/spec_ctrl_asm.h>
32
33 /* Cmdline controls for Xen's alternative blocks. */
34 static bool __initdata opt_msr_sc_pv = true;
35 static bool __initdata opt_msr_sc_hvm = true;
36 static bool __initdata opt_rsb_pv = true;
37 static bool __initdata opt_rsb_hvm = true;
38 static int8_t __initdata opt_md_clear_pv = -1;
39 static int8_t __initdata opt_md_clear_hvm = -1;
40
41 /* Cmdline controls for Xen's speculative settings. */
42 static enum ind_thunk {
43 THUNK_DEFAULT, /* Decide which thunk to use at boot time. */
44 THUNK_NONE, /* Missing compiler support for thunks. */
45
46 THUNK_RETPOLINE,
47 THUNK_LFENCE,
48 THUNK_JMP,
49 } opt_thunk __initdata = THUNK_DEFAULT;
50 static int8_t __initdata opt_ibrs = -1;
51 bool __read_mostly opt_ibpb = true;
52 bool __read_mostly opt_ssbd = false;
53 int8_t __read_mostly opt_eager_fpu = -1;
54 int8_t __read_mostly opt_l1d_flush = -1;
55 bool __read_mostly opt_branch_harden = true;
56
57 bool __initdata bsp_delay_spec_ctrl;
58 uint8_t __read_mostly default_xen_spec_ctrl;
59 uint8_t __read_mostly default_spec_ctrl_flags;
60
61 paddr_t __read_mostly l1tf_addr_mask, __read_mostly l1tf_safe_maddr;
62 static bool __initdata cpu_has_bug_l1tf;
63 static unsigned int __initdata l1d_maxphysaddr;
64
65 static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */
66 static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */
67
68 static int8_t __initdata opt_srb_lock = -1;
69 uint64_t __read_mostly default_xen_mcu_opt_ctrl;
70
parse_spec_ctrl(const char * s)71 static int __init parse_spec_ctrl(const char *s)
72 {
73 const char *ss;
74 int val, rc = 0;
75
76 do {
77 ss = strchr(s, ',');
78 if ( !ss )
79 ss = strchr(s, '\0');
80
81 /* Global and Xen-wide disable. */
82 val = parse_bool(s, ss);
83 if ( !val )
84 {
85 opt_msr_sc_pv = false;
86 opt_msr_sc_hvm = false;
87
88 opt_eager_fpu = 0;
89
90 if ( opt_xpti_hwdom < 0 )
91 opt_xpti_hwdom = 0;
92 if ( opt_xpti_domu < 0 )
93 opt_xpti_domu = 0;
94
95 if ( opt_smt < 0 )
96 opt_smt = 1;
97
98 if ( opt_pv_l1tf_hwdom < 0 )
99 opt_pv_l1tf_hwdom = 0;
100 if ( opt_pv_l1tf_domu < 0 )
101 opt_pv_l1tf_domu = 0;
102
103 if ( opt_tsx == -1 )
104 opt_tsx = -3;
105
106 disable_common:
107 opt_rsb_pv = false;
108 opt_rsb_hvm = false;
109 opt_md_clear_pv = 0;
110 opt_md_clear_hvm = 0;
111
112 opt_thunk = THUNK_JMP;
113 opt_ibrs = 0;
114 opt_ibpb = false;
115 opt_ssbd = false;
116 opt_l1d_flush = 0;
117 opt_branch_harden = false;
118 opt_srb_lock = 0;
119 }
120 else if ( val > 0 )
121 rc = -EINVAL;
122 else if ( (val = parse_boolean("xen", s, ss)) >= 0 )
123 {
124 if ( !val )
125 goto disable_common;
126
127 rc = -EINVAL;
128 }
129
130 /* Xen's alternative blocks. */
131 else if ( (val = parse_boolean("pv", s, ss)) >= 0 )
132 {
133 opt_msr_sc_pv = val;
134 opt_rsb_pv = val;
135 opt_md_clear_pv = val;
136 }
137 else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
138 {
139 opt_msr_sc_hvm = val;
140 opt_rsb_hvm = val;
141 opt_md_clear_hvm = val;
142 }
143 else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 )
144 {
145 opt_msr_sc_pv = val;
146 opt_msr_sc_hvm = val;
147 }
148 else if ( (val = parse_boolean("rsb", s, ss)) >= 0 )
149 {
150 opt_rsb_pv = val;
151 opt_rsb_hvm = val;
152 }
153 else if ( (val = parse_boolean("md-clear", s, ss)) >= 0 )
154 {
155 opt_md_clear_pv = val;
156 opt_md_clear_hvm = val;
157 }
158
159 /* Xen's speculative sidechannel mitigation settings. */
160 else if ( !strncmp(s, "bti-thunk=", 10) )
161 {
162 s += 10;
163
164 if ( !cmdline_strcmp(s, "retpoline") )
165 opt_thunk = THUNK_RETPOLINE;
166 else if ( !cmdline_strcmp(s, "lfence") )
167 opt_thunk = THUNK_LFENCE;
168 else if ( !cmdline_strcmp(s, "jmp") )
169 opt_thunk = THUNK_JMP;
170 else
171 rc = -EINVAL;
172 }
173 else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 )
174 opt_ibrs = val;
175 else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
176 opt_ibpb = val;
177 else if ( (val = parse_boolean("ssbd", s, ss)) >= 0 )
178 opt_ssbd = val;
179 else if ( (val = parse_boolean("eager-fpu", s, ss)) >= 0 )
180 opt_eager_fpu = val;
181 else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 )
182 opt_l1d_flush = val;
183 else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 )
184 opt_branch_harden = val;
185 else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
186 opt_srb_lock = val;
187 else
188 rc = -EINVAL;
189
190 s = ss + 1;
191 } while ( *ss );
192
193 return rc;
194 }
195 custom_param("spec-ctrl", parse_spec_ctrl);
196
197 int8_t __read_mostly opt_xpti_hwdom = -1;
198 int8_t __read_mostly opt_xpti_domu = -1;
199
xpti_init_default(uint64_t caps)200 static __init void xpti_init_default(uint64_t caps)
201 {
202 if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
203 caps = ARCH_CAPS_RDCL_NO;
204
205 if ( caps & ARCH_CAPS_RDCL_NO )
206 {
207 if ( opt_xpti_hwdom < 0 )
208 opt_xpti_hwdom = 0;
209 if ( opt_xpti_domu < 0 )
210 opt_xpti_domu = 0;
211 }
212 else
213 {
214 if ( opt_xpti_hwdom < 0 )
215 opt_xpti_hwdom = 1;
216 if ( opt_xpti_domu < 0 )
217 opt_xpti_domu = 1;
218 }
219 }
220
parse_xpti(const char * s)221 static __init int parse_xpti(const char *s)
222 {
223 const char *ss;
224 int val, rc = 0;
225
226 /* Interpret 'xpti' alone in its positive boolean form. */
227 if ( *s == '\0' )
228 opt_xpti_hwdom = opt_xpti_domu = 1;
229
230 do {
231 ss = strchr(s, ',');
232 if ( !ss )
233 ss = strchr(s, '\0');
234
235 switch ( parse_bool(s, ss) )
236 {
237 case 0:
238 opt_xpti_hwdom = opt_xpti_domu = 0;
239 break;
240
241 case 1:
242 opt_xpti_hwdom = opt_xpti_domu = 1;
243 break;
244
245 default:
246 if ( !strcmp(s, "default") )
247 opt_xpti_hwdom = opt_xpti_domu = -1;
248 else if ( (val = parse_boolean("dom0", s, ss)) >= 0 )
249 opt_xpti_hwdom = val;
250 else if ( (val = parse_boolean("domu", s, ss)) >= 0 )
251 opt_xpti_domu = val;
252 else if ( *s )
253 rc = -EINVAL;
254 break;
255 }
256
257 s = ss + 1;
258 } while ( *ss );
259
260 return rc;
261 }
262 custom_param("xpti", parse_xpti);
263
264 int8_t __read_mostly opt_pv_l1tf_hwdom = -1;
265 int8_t __read_mostly opt_pv_l1tf_domu = -1;
266
parse_pv_l1tf(const char * s)267 static __init int parse_pv_l1tf(const char *s)
268 {
269 const char *ss;
270 int val, rc = 0;
271
272 /* Interpret 'pv-l1tf' alone in its positive boolean form. */
273 if ( *s == '\0' )
274 opt_pv_l1tf_hwdom = opt_pv_l1tf_domu = 1;
275
276 do {
277 ss = strchr(s, ',');
278 if ( !ss )
279 ss = strchr(s, '\0');
280
281 switch ( parse_bool(s, ss) )
282 {
283 case 0:
284 opt_pv_l1tf_hwdom = opt_pv_l1tf_domu = 0;
285 break;
286
287 case 1:
288 opt_pv_l1tf_hwdom = opt_pv_l1tf_domu = 1;
289 break;
290
291 default:
292 if ( (val = parse_boolean("dom0", s, ss)) >= 0 )
293 opt_pv_l1tf_hwdom = val;
294 else if ( (val = parse_boolean("domu", s, ss)) >= 0 )
295 opt_pv_l1tf_domu = val;
296 else if ( *s )
297 rc = -EINVAL;
298 break;
299 }
300
301 s = ss + 1;
302 } while ( *ss );
303
304 return rc;
305 }
306 custom_param("pv-l1tf", parse_pv_l1tf);
307
print_details(enum ind_thunk thunk,uint64_t caps)308 static void __init print_details(enum ind_thunk thunk, uint64_t caps)
309 {
310 unsigned int _7d0 = 0, e8b = 0, tmp;
311
312 /* Collect diagnostics about available mitigations. */
313 if ( boot_cpu_data.cpuid_level >= 7 )
314 cpuid_count(7, 0, &tmp, &tmp, &tmp, &_7d0);
315 if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 )
316 cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp);
317
318 printk("Speculative mitigation facilities:\n");
319
320 /* Hardware features which pertain to speculative mitigations. */
321 printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
322 (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
323 (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "",
324 (_7d0 & cpufeat_mask(X86_FEATURE_L1D_FLUSH)) ? " L1D_FLUSH" : "",
325 (_7d0 & cpufeat_mask(X86_FEATURE_SSBD)) ? " SSBD" : "",
326 (_7d0 & cpufeat_mask(X86_FEATURE_MD_CLEAR)) ? " MD_CLEAR" : "",
327 (_7d0 & cpufeat_mask(X86_FEATURE_SRBDS_CTRL)) ? " SRBDS_CTRL" : "",
328 (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : "",
329 (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "",
330 (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "",
331 (caps & ARCH_CAPS_RSBA) ? " RSBA" : "",
332 (caps & ARCH_CAPS_SKIP_L1DFL) ? " SKIP_L1DFL": "",
333 (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : "",
334 (caps & ARCH_CAPS_MDS_NO) ? " MDS_NO" : "",
335 (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "",
336 (caps & ARCH_CAPS_TAA_NO) ? " TAA_NO" : "");
337
338 /* Compiled-in support which pertains to mitigations. */
339 if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) )
340 printk(" Compiled-in support:"
341 #ifdef CONFIG_INDIRECT_THUNK
342 " INDIRECT_THUNK"
343 #endif
344 #ifdef CONFIG_SHADOW_PAGING
345 " SHADOW_PAGING"
346 #endif
347 "\n");
348
349 /* Settings for Xen's protection, irrespective of guests. */
350 printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s, Other:%s%s%s%s%s\n",
351 thunk == THUNK_NONE ? "N/A" :
352 thunk == THUNK_RETPOLINE ? "RETPOLINE" :
353 thunk == THUNK_LFENCE ? "LFENCE" :
354 thunk == THUNK_JMP ? "JMP" : "?",
355 !boot_cpu_has(X86_FEATURE_IBRSB) ? "No" :
356 (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-",
357 !boot_cpu_has(X86_FEATURE_SSBD) ? "" :
358 (default_xen_spec_ctrl & SPEC_CTRL_SSBD) ? " SSBD+" : " SSBD-",
359 !(caps & ARCH_CAPS_TSX_CTRL) ? "" :
360 (opt_tsx & 1) ? " TSX+" : " TSX-",
361 !boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ? "" :
362 opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-",
363 opt_ibpb ? " IBPB" : "",
364 opt_l1d_flush ? " L1D_FLUSH" : "",
365 opt_md_clear_pv || opt_md_clear_hvm ? " VERW" : "",
366 opt_branch_harden ? " BRANCH_HARDEN" : "");
367
368 /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
369 if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu )
370 printk(" L1TF: believed%s vulnerable, maxphysaddr L1D %u, CPUID %u"
371 ", Safe address %"PRIx64"\n",
372 cpu_has_bug_l1tf ? "" : " not",
373 l1d_maxphysaddr, paddr_bits, l1tf_safe_maddr);
374
375 /*
376 * Alternatives blocks for protecting against and/or virtualising
377 * mitigation support for guests.
378 */
379 #ifdef CONFIG_HVM
380 printk(" Support for HVM VMs:%s%s%s%s%s\n",
381 (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
382 boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
383 boot_cpu_has(X86_FEATURE_MD_CLEAR) ||
384 opt_eager_fpu) ? "" : " None",
385 boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "",
386 boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "",
387 opt_eager_fpu ? " EAGER_FPU" : "",
388 boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : "");
389
390 #endif
391 #ifdef CONFIG_PV
392 printk(" Support for PV VMs:%s%s%s%s%s\n",
393 (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
394 boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
395 boot_cpu_has(X86_FEATURE_MD_CLEAR) ||
396 opt_eager_fpu) ? "" : " None",
397 boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "",
398 boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "",
399 opt_eager_fpu ? " EAGER_FPU" : "",
400 boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : "");
401
402 printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
403 opt_xpti_hwdom ? "enabled" : "disabled",
404 opt_xpti_domu ? "enabled" : "disabled",
405 xpti_pcid_enabled() ? "" : "out");
406
407 printk(" PV L1TF shadowing: Dom0 %s, DomU %s\n",
408 opt_pv_l1tf_hwdom ? "enabled" : "disabled",
409 opt_pv_l1tf_domu ? "enabled" : "disabled");
410 #endif
411 }
412
check_smt_enabled(void)413 static bool __init check_smt_enabled(void)
414 {
415 uint64_t val;
416 unsigned int cpu;
417
418 /*
419 * x86_num_siblings defaults to 1 in the absence of other information, and
420 * is adjusted based on other topology information found in CPUID leaves.
421 *
422 * On AMD hardware, it will be the current SMT configuration. On Intel
423 * hardware, it will represent the maximum capability, rather than the
424 * current configuration.
425 */
426 if ( boot_cpu_data.x86_num_siblings < 2 )
427 return false;
428
429 /*
430 * Intel Nehalem and later hardware does have an MSR which reports the
431 * current count of cores/threads in the package.
432 *
433 * At the time of writing, it is almost completely undocumented, so isn't
434 * virtualised reliably.
435 */
436 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && !cpu_has_hypervisor &&
437 !rdmsr_safe(MSR_INTEL_CORE_THREAD_COUNT, val) )
438 return (MASK_EXTR(val, MSR_CTC_CORE_MASK) !=
439 MASK_EXTR(val, MSR_CTC_THREAD_MASK));
440
441 /*
442 * Search over the CPUs reported in the ACPI tables. Any whose APIC ID
443 * has a non-zero thread id component indicates that SMT is active.
444 */
445 for_each_present_cpu ( cpu )
446 if ( x86_cpu_to_apicid[cpu] & (boot_cpu_data.x86_num_siblings - 1) )
447 return true;
448
449 return false;
450 }
451
452 /* Calculate whether Retpoline is known-safe on this CPU. */
retpoline_safe(uint64_t caps)453 static bool __init retpoline_safe(uint64_t caps)
454 {
455 unsigned int ucode_rev = this_cpu(cpu_sig).rev;
456
457 if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
458 return true;
459
460 if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
461 boot_cpu_data.x86 != 6 )
462 return false;
463
464 /*
465 * RSBA may be set by a hypervisor to indicate that we may move to a
466 * processor which isn't retpoline-safe.
467 *
468 * Processors offering Enhanced IBRS are not guarenteed to be
469 * repoline-safe.
470 */
471 if ( caps & (ARCH_CAPS_RSBA | ARCH_CAPS_IBRS_ALL) )
472 return false;
473
474 switch ( boot_cpu_data.x86_model )
475 {
476 case 0x17: /* Penryn */
477 case 0x1d: /* Dunnington */
478 case 0x1e: /* Nehalem */
479 case 0x1f: /* Auburndale / Havendale */
480 case 0x1a: /* Nehalem EP */
481 case 0x2e: /* Nehalem EX */
482 case 0x25: /* Westmere */
483 case 0x2c: /* Westmere EP */
484 case 0x2f: /* Westmere EX */
485 case 0x2a: /* SandyBridge */
486 case 0x2d: /* SandyBridge EP/EX */
487 case 0x3a: /* IvyBridge */
488 case 0x3e: /* IvyBridge EP/EX */
489 case 0x3c: /* Haswell */
490 case 0x3f: /* Haswell EX/EP */
491 case 0x45: /* Haswell D */
492 case 0x46: /* Haswell H */
493 return true;
494
495 /*
496 * Broadwell processors are retpoline-safe after specific microcode
497 * versions.
498 */
499 case 0x3d: /* Broadwell */
500 return ucode_rev >= 0x2a;
501 case 0x47: /* Broadwell H */
502 return ucode_rev >= 0x1d;
503 case 0x4f: /* Broadwell EP/EX */
504 return ucode_rev >= 0xb000021;
505 case 0x56: /* Broadwell D */
506 switch ( boot_cpu_data.x86_mask )
507 {
508 case 2: return ucode_rev >= 0x15;
509 case 3: return ucode_rev >= 0x7000012;
510 case 4: return ucode_rev >= 0xf000011;
511 case 5: return ucode_rev >= 0xe000009;
512 default:
513 printk("Unrecognised CPU stepping %#x - assuming not reptpoline safe\n",
514 boot_cpu_data.x86_mask);
515 return false;
516 }
517 break;
518
519 /*
520 * Skylake, Kabylake and Cannonlake processors are not retpoline-safe.
521 */
522 case 0x4e: /* Skylake M */
523 case 0x55: /* Skylake X */
524 case 0x5e: /* Skylake D */
525 case 0x66: /* Cannonlake */
526 case 0x67: /* Cannonlake? */
527 case 0x8e: /* Kabylake M */
528 case 0x9e: /* Kabylake D */
529 return false;
530
531 /*
532 * Atom processors before Goldmont Plus/Gemini Lake are retpoline-safe.
533 */
534 case 0x1c: /* Pineview */
535 case 0x26: /* Lincroft */
536 case 0x27: /* Penwell */
537 case 0x35: /* Cloverview */
538 case 0x36: /* Cedarview */
539 case 0x37: /* Baytrail / Valleyview (Silvermont) */
540 case 0x4d: /* Avaton / Rangely (Silvermont) */
541 case 0x4c: /* Cherrytrail / Brasswell */
542 case 0x4a: /* Merrifield */
543 case 0x57: /* Knights Landing */
544 case 0x5a: /* Moorefield */
545 case 0x5c: /* Goldmont */
546 case 0x5f: /* Denverton */
547 case 0x85: /* Knights Mill */
548 return true;
549
550 default:
551 printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n",
552 boot_cpu_data.x86_model);
553 return false;
554 }
555 }
556
557 /* Calculate whether this CPU speculates past #NM */
should_use_eager_fpu(void)558 static bool __init should_use_eager_fpu(void)
559 {
560 /*
561 * Assume all unrecognised processors are ok. This is only known to
562 * affect Intel Family 6 processors.
563 */
564 if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
565 boot_cpu_data.x86 != 6 )
566 return false;
567
568 switch ( boot_cpu_data.x86_model )
569 {
570 /*
571 * Core processors since at least Nehalem are vulnerable.
572 */
573 case 0x1e: /* Nehalem */
574 case 0x1f: /* Auburndale / Havendale */
575 case 0x1a: /* Nehalem EP */
576 case 0x2e: /* Nehalem EX */
577 case 0x25: /* Westmere */
578 case 0x2c: /* Westmere EP */
579 case 0x2f: /* Westmere EX */
580 case 0x2a: /* SandyBridge */
581 case 0x2d: /* SandyBridge EP/EX */
582 case 0x3a: /* IvyBridge */
583 case 0x3e: /* IvyBridge EP/EX */
584 case 0x3c: /* Haswell */
585 case 0x3f: /* Haswell EX/EP */
586 case 0x45: /* Haswell D */
587 case 0x46: /* Haswell H */
588 case 0x3d: /* Broadwell */
589 case 0x47: /* Broadwell H */
590 case 0x4f: /* Broadwell EP/EX */
591 case 0x56: /* Broadwell D */
592 case 0x4e: /* Skylake M */
593 case 0x55: /* Skylake X */
594 case 0x5e: /* Skylake D */
595 case 0x66: /* Cannonlake */
596 case 0x67: /* Cannonlake? */
597 case 0x8e: /* Kabylake M */
598 case 0x9e: /* Kabylake D */
599 return true;
600
601 /*
602 * Atom processors are not vulnerable.
603 */
604 case 0x1c: /* Pineview */
605 case 0x26: /* Lincroft */
606 case 0x27: /* Penwell */
607 case 0x35: /* Cloverview */
608 case 0x36: /* Cedarview */
609 case 0x37: /* Baytrail / Valleyview (Silvermont) */
610 case 0x4d: /* Avaton / Rangely (Silvermont) */
611 case 0x4c: /* Cherrytrail / Brasswell */
612 case 0x4a: /* Merrifield */
613 case 0x5a: /* Moorefield */
614 case 0x5c: /* Goldmont */
615 case 0x5f: /* Denverton */
616 case 0x7a: /* Gemini Lake */
617 return false;
618
619 /*
620 * Knights processors are not vulnerable.
621 */
622 case 0x57: /* Knights Landing */
623 case 0x85: /* Knights Mill */
624 return false;
625
626 default:
627 printk("Unrecognised CPU model %#x - assuming vulnerable to LazyFPU\n",
628 boot_cpu_data.x86_model);
629 return true;
630 }
631 }
632
633 /* Calculate whether this CPU is vulnerable to L1TF. */
l1tf_calculations(uint64_t caps)634 static __init void l1tf_calculations(uint64_t caps)
635 {
636 bool hit_default = false;
637
638 l1d_maxphysaddr = paddr_bits;
639
640 /* L1TF is only known to affect Intel Family 6 processors at this time. */
641 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
642 boot_cpu_data.x86 == 6 )
643 {
644 switch ( boot_cpu_data.x86_model )
645 {
646 /*
647 * Core processors since at least Penryn are vulnerable.
648 */
649 case 0x17: /* Penryn */
650 case 0x1d: /* Dunnington */
651 cpu_has_bug_l1tf = true;
652 break;
653
654 case 0x1f: /* Auburndale / Havendale */
655 case 0x1e: /* Nehalem */
656 case 0x1a: /* Nehalem EP */
657 case 0x2e: /* Nehalem EX */
658 case 0x25: /* Westmere */
659 case 0x2c: /* Westmere EP */
660 case 0x2f: /* Westmere EX */
661 cpu_has_bug_l1tf = true;
662 l1d_maxphysaddr = 44;
663 break;
664
665 case 0x2a: /* SandyBridge */
666 case 0x2d: /* SandyBridge EP/EX */
667 case 0x3a: /* IvyBridge */
668 case 0x3e: /* IvyBridge EP/EX */
669 case 0x3c: /* Haswell */
670 case 0x3f: /* Haswell EX/EP */
671 case 0x45: /* Haswell D */
672 case 0x46: /* Haswell H */
673 case 0x3d: /* Broadwell */
674 case 0x47: /* Broadwell H */
675 case 0x4f: /* Broadwell EP/EX */
676 case 0x56: /* Broadwell D */
677 case 0x4e: /* Skylake M */
678 case 0x55: /* Skylake X */
679 case 0x5e: /* Skylake D */
680 case 0x66: /* Cannonlake */
681 case 0x67: /* Cannonlake? */
682 case 0x8e: /* Kabylake M */
683 case 0x9e: /* Kabylake D */
684 cpu_has_bug_l1tf = true;
685 l1d_maxphysaddr = 46;
686 break;
687
688 /*
689 * Atom processors are not vulnerable.
690 */
691 case 0x1c: /* Pineview */
692 case 0x26: /* Lincroft */
693 case 0x27: /* Penwell */
694 case 0x35: /* Cloverview */
695 case 0x36: /* Cedarview */
696 case 0x37: /* Baytrail / Valleyview (Silvermont) */
697 case 0x4d: /* Avaton / Rangely (Silvermont) */
698 case 0x4c: /* Cherrytrail / Brasswell */
699 case 0x4a: /* Merrifield */
700 case 0x5a: /* Moorefield */
701 case 0x5c: /* Goldmont */
702 case 0x5f: /* Denverton */
703 case 0x7a: /* Gemini Lake */
704 break;
705
706 /*
707 * Knights processors are not vulnerable.
708 */
709 case 0x57: /* Knights Landing */
710 case 0x85: /* Knights Mill */
711 break;
712
713 default:
714 /* Defer printk() until we've accounted for RDCL_NO. */
715 hit_default = true;
716 cpu_has_bug_l1tf = true;
717 break;
718 }
719 }
720
721 /* Any processor advertising RDCL_NO should be not vulnerable to L1TF. */
722 if ( caps & ARCH_CAPS_RDCL_NO )
723 cpu_has_bug_l1tf = false;
724
725 if ( cpu_has_bug_l1tf && hit_default )
726 printk("Unrecognised CPU model %#x - assuming vulnerable to L1TF\n",
727 boot_cpu_data.x86_model);
728
729 /*
730 * L1TF safe address heuristics. These apply to the real hardware we are
731 * running on, and are best-effort-only if Xen is virtualised.
732 *
733 * The address mask which the L1D cache uses, which might be wider than
734 * the CPUID-reported maxphysaddr.
735 */
736 l1tf_addr_mask = ((1ul << l1d_maxphysaddr) - 1) & PAGE_MASK;
737
738 /*
739 * To be safe, l1tf_safe_maddr must be above the highest cacheable entity
740 * in system physical address space. However, to preserve space for
741 * paged-out metadata, it should be as low as possible above the highest
742 * cacheable address, so as to require fewer high-order bits being set.
743 *
744 * These heuristics are based on some guesswork to improve the likelihood
745 * of safety in the common case, including Linux's L1TF mitigation of
746 * inverting all address bits in a non-present PTE.
747 *
748 * - If L1D is wider than CPUID (Nehalem and later mobile/desktop/low end
749 * server), setting any address bit beyond CPUID maxphysaddr guarantees
750 * to make the PTE safe. This case doesn't require all the high-order
751 * bits being set, and doesn't require any other source of information
752 * for safety.
753 *
754 * - If L1D is the same as CPUID (Pre-Nehalem, or high end server), we
755 * must sacrifice high order bits from the real address space for
756 * safety. Therefore, make a blind guess that there is nothing
757 * cacheable in the top quarter of physical address space.
758 *
759 * It is exceedingly unlikely for machines to be populated with this
760 * much RAM (likely 512G on pre-Nehalem, 16T on Nehalem/Westmere, 64T on
761 * Sandybridge and later) due to the sheer volume of DIMMs this would
762 * actually take.
763 *
764 * However, it is possible to find machines this large, so the "top
765 * quarter" guess is supplemented to push the limit higher if references
766 * to cacheable mappings (E820/SRAT/EFI/etc) are found above the top
767 * quarter boundary.
768 *
769 * Finally, this top quarter guess gives us a good chance of being safe
770 * when running virtualised (and the CPUID maxphysaddr hasn't been
771 * levelled for heterogeneous migration safety), where the safety
772 * consideration is still in terms of host details, but all E820/etc
773 * information is in terms of guest physical layout.
774 */
775 l1tf_safe_maddr = max(l1tf_safe_maddr, ((l1d_maxphysaddr > paddr_bits)
776 ? (1ul << paddr_bits)
777 : (3ul << (paddr_bits - 2))));
778 }
779
780 /* Calculate whether this CPU is vulnerable to MDS. */
mds_calculations(uint64_t caps)781 static __init void mds_calculations(uint64_t caps)
782 {
783 /* MDS is only known to affect Intel Family 6 processors at this time. */
784 if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
785 boot_cpu_data.x86 != 6 )
786 return;
787
788 /* Any processor advertising MDS_NO should be not vulnerable to MDS. */
789 if ( caps & ARCH_CAPS_MDS_NO )
790 return;
791
792 switch ( boot_cpu_data.x86_model )
793 {
794 /*
795 * Core processors since at least Nehalem are vulnerable.
796 */
797 case 0x1f: /* Auburndale / Havendale */
798 case 0x1e: /* Nehalem */
799 case 0x1a: /* Nehalem EP */
800 case 0x2e: /* Nehalem EX */
801 case 0x25: /* Westmere */
802 case 0x2c: /* Westmere EP */
803 case 0x2f: /* Westmere EX */
804 case 0x2a: /* SandyBridge */
805 case 0x2d: /* SandyBridge EP/EX */
806 case 0x3a: /* IvyBridge */
807 case 0x3e: /* IvyBridge EP/EX */
808 case 0x3c: /* Haswell */
809 case 0x3f: /* Haswell EX/EP */
810 case 0x45: /* Haswell D */
811 case 0x46: /* Haswell H */
812 case 0x3d: /* Broadwell */
813 case 0x47: /* Broadwell H */
814 case 0x4f: /* Broadwell EP/EX */
815 case 0x56: /* Broadwell D */
816 case 0x4e: /* Skylake M */
817 case 0x5e: /* Skylake D */
818 cpu_has_bug_mds = true;
819 break;
820
821 /*
822 * Some Core processors have per-stepping vulnerability.
823 */
824 case 0x55: /* Skylake-X / Cascade Lake */
825 if ( boot_cpu_data.x86_mask <= 5 )
826 cpu_has_bug_mds = true;
827 break;
828
829 case 0x8e: /* Kaby / Coffee / Whiskey Lake M */
830 if ( boot_cpu_data.x86_mask <= 0xb )
831 cpu_has_bug_mds = true;
832 break;
833
834 case 0x9e: /* Kaby / Coffee / Whiskey Lake D */
835 if ( boot_cpu_data.x86_mask <= 0xc )
836 cpu_has_bug_mds = true;
837 break;
838
839 /*
840 * Very old and very new Atom processors are not vulnerable.
841 */
842 case 0x1c: /* Pineview */
843 case 0x26: /* Lincroft */
844 case 0x27: /* Penwell */
845 case 0x35: /* Cloverview */
846 case 0x36: /* Cedarview */
847 case 0x7a: /* Goldmont */
848 break;
849
850 /*
851 * Middling Atom processors are vulnerable to just the Store Buffer
852 * aspect.
853 */
854 case 0x37: /* Baytrail / Valleyview (Silvermont) */
855 case 0x4a: /* Merrifield */
856 case 0x4c: /* Cherrytrail / Brasswell */
857 case 0x4d: /* Avaton / Rangely (Silvermont) */
858 case 0x5a: /* Moorefield */
859 case 0x5d: /* SoFIA 3G Granite/ES2.1 */
860 case 0x65: /* SoFIA LTE AOSP */
861 case 0x6e: /* Cougar Mountain */
862 case 0x75: /* Lightning Mountain */
863 /*
864 * Knights processors (which are based on the Silvermont/Airmont
865 * microarchitecture) are similarly only affected by the Store Buffer
866 * aspect.
867 */
868 case 0x57: /* Knights Landing */
869 case 0x85: /* Knights Mill */
870 cpu_has_bug_msbds_only = true;
871 break;
872
873 default:
874 printk("Unrecognised CPU model %#x - assuming vulnerable to MDS\n",
875 boot_cpu_data.x86_model);
876 cpu_has_bug_mds = true;
877 break;
878 }
879 }
880
init_speculation_mitigations(void)881 void __init init_speculation_mitigations(void)
882 {
883 enum ind_thunk thunk = THUNK_DEFAULT;
884 bool use_spec_ctrl = false, ibrs = false, hw_smt_enabled;
885 bool cpu_has_bug_taa;
886 uint64_t caps = 0;
887
888 if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
889 rdmsrl(MSR_ARCH_CAPABILITIES, caps);
890
891 hw_smt_enabled = check_smt_enabled();
892
893 /*
894 * First, disable the use of retpolines if Xen is using shadow stacks, as
895 * they are incompatible.
896 */
897 if ( cpu_has_xen_shstk &&
898 (opt_thunk == THUNK_DEFAULT || opt_thunk == THUNK_RETPOLINE) )
899 thunk = THUNK_JMP;
900
901 /*
902 * Has the user specified any custom BTI mitigations? If so, follow their
903 * instructions exactly and disable all heuristics.
904 */
905 if ( opt_thunk != THUNK_DEFAULT || opt_ibrs != -1 )
906 {
907 thunk = opt_thunk;
908 ibrs = !!opt_ibrs;
909 }
910 else
911 {
912 /*
913 * Evaluate the safest Branch Target Injection mitigations to use.
914 * First, begin with compiler-aided mitigations.
915 */
916 if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
917 {
918 /*
919 * AMD's recommended mitigation is to set lfence as being dispatch
920 * serialising, and to use IND_THUNK_LFENCE.
921 */
922 if ( cpu_has_lfence_dispatch )
923 thunk = THUNK_LFENCE;
924 /*
925 * On Intel hardware, we'd like to use retpoline in preference to
926 * IBRS, but only if it is safe on this hardware.
927 */
928 else if ( retpoline_safe(caps) )
929 thunk = THUNK_RETPOLINE;
930 else if ( boot_cpu_has(X86_FEATURE_IBRSB) )
931 ibrs = true;
932 }
933 /* Without compiler thunk support, use IBRS if available. */
934 else if ( boot_cpu_has(X86_FEATURE_IBRSB) )
935 ibrs = true;
936 }
937
938 /*
939 * Supplimentary minor adjustments. Without compiler support, there are
940 * no thunks.
941 */
942 if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) )
943 thunk = THUNK_NONE;
944
945 /*
946 * If IBRS is in use and thunks are compiled in, there is no point
947 * suffering extra overhead. Switch to the least-overhead thunk.
948 */
949 if ( ibrs && thunk == THUNK_DEFAULT )
950 thunk = THUNK_JMP;
951
952 /*
953 * If there are still no thunk preferences, the compiled default is
954 * actually retpoline, and it is better than nothing.
955 */
956 if ( thunk == THUNK_DEFAULT )
957 thunk = THUNK_RETPOLINE;
958
959 /* Apply the chosen settings. */
960 if ( thunk == THUNK_LFENCE )
961 setup_force_cpu_cap(X86_FEATURE_IND_THUNK_LFENCE);
962 else if ( thunk == THUNK_JMP )
963 setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP);
964
965 /*
966 * If we are on hardware supporting MSR_SPEC_CTRL, see about setting up
967 * the alternatives blocks so we can virtualise support for guests.
968 */
969 if ( boot_cpu_has(X86_FEATURE_IBRSB) )
970 {
971 if ( opt_msr_sc_pv )
972 {
973 use_spec_ctrl = true;
974 setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV);
975 }
976
977 if ( opt_msr_sc_hvm )
978 {
979 use_spec_ctrl = true;
980 setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM);
981 }
982
983 if ( use_spec_ctrl )
984 default_spec_ctrl_flags |= SCF_ist_wrmsr;
985
986 if ( ibrs )
987 default_xen_spec_ctrl |= SPEC_CTRL_IBRS;
988 }
989
990 /* If we have SSBD available, see whether we should use it. */
991 if ( boot_cpu_has(X86_FEATURE_SSBD) && opt_ssbd )
992 default_xen_spec_ctrl |= SPEC_CTRL_SSBD;
993
994 /*
995 * PV guests can poison the RSB to any virtual address from which
996 * they can execute a call instruction. This is necessarily outside
997 * of the Xen supervisor mappings.
998 *
999 * With SMEP enabled, the processor won't speculate into user mappings.
1000 * Therefore, in this case, we don't need to worry about poisoned entries
1001 * from 64bit PV guests.
1002 *
1003 * 32bit PV guest kernels run in ring 1, so use supervisor mappings.
1004 * If a processors speculates to 32bit PV guest kernel mappings, it is
1005 * speculating in 64bit supervisor mode, and can leak data.
1006 */
1007 if ( opt_rsb_pv )
1008 {
1009 setup_force_cpu_cap(X86_FEATURE_SC_RSB_PV);
1010 default_spec_ctrl_flags |= SCF_ist_rsb;
1011 }
1012
1013 /*
1014 * HVM guests can always poison the RSB to point at Xen supervisor
1015 * mappings.
1016 */
1017 if ( opt_rsb_hvm )
1018 setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
1019
1020 /* Check we have hardware IBPB support before using it... */
1021 if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) )
1022 opt_ibpb = false;
1023
1024 /* Check whether Eager FPU should be enabled by default. */
1025 if ( opt_eager_fpu == -1 )
1026 opt_eager_fpu = should_use_eager_fpu();
1027
1028 /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */
1029 init_shadow_spec_ctrl_state();
1030
1031 /* If Xen is using any MSR_SPEC_CTRL settings, adjust the idle path. */
1032 if ( default_xen_spec_ctrl )
1033 setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE);
1034
1035 xpti_init_default(caps);
1036
1037 l1tf_calculations(caps);
1038
1039 /*
1040 * By default, enable PV domU L1TF mitigations on all L1TF-vulnerable
1041 * hardware, except when running in shim mode.
1042 *
1043 * In shim mode, SHADOW is expected to be compiled out, and a malicious
1044 * guest kernel can only attack the shim Xen, not the host Xen.
1045 */
1046 if ( opt_pv_l1tf_hwdom == -1 )
1047 opt_pv_l1tf_hwdom = 0;
1048 if ( opt_pv_l1tf_domu == -1 )
1049 opt_pv_l1tf_domu = !pv_shim && cpu_has_bug_l1tf;
1050
1051 /*
1052 * By default, enable L1D_FLUSH on L1TF-vulnerable hardware, unless
1053 * instructed to skip the flush on vmentry by our outer hypervisor.
1054 */
1055 if ( !boot_cpu_has(X86_FEATURE_L1D_FLUSH) )
1056 opt_l1d_flush = 0;
1057 else if ( opt_l1d_flush == -1 )
1058 opt_l1d_flush = cpu_has_bug_l1tf && !(caps & ARCH_CAPS_SKIP_L1DFL);
1059
1060 if ( opt_branch_harden )
1061 setup_force_cpu_cap(X86_FEATURE_SC_BRANCH_HARDEN);
1062
1063 /*
1064 * We do not disable HT by default on affected hardware.
1065 *
1066 * Firstly, if the user intends to use exclusively PV, or HVM shadow
1067 * guests, HT isn't a concern and should remain fully enabled. Secondly,
1068 * safety for HVM HAP guests can be arranged by the toolstack with core
1069 * parking, pinning or cpupool configurations, including mixed setups.
1070 *
1071 * However, if we are on affected hardware, with HT enabled, and the user
1072 * hasn't explicitly chosen whether to use HT or not, nag them to do so.
1073 */
1074 if ( opt_smt == -1 && cpu_has_bug_l1tf && !pv_shim && hw_smt_enabled )
1075 warning_add(
1076 "Booted on L1TF-vulnerable hardware with SMT/Hyperthreading\n"
1077 "enabled. Please assess your configuration and choose an\n"
1078 "explicit 'smt=<bool>' setting. See XSA-273.\n");
1079
1080 mds_calculations(caps);
1081
1082 /*
1083 * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
1084 * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
1085 * but it is somewhat better than nothing.
1086 */
1087 if ( opt_md_clear_pv == -1 )
1088 opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
1089 boot_cpu_has(X86_FEATURE_MD_CLEAR));
1090 if ( opt_md_clear_hvm == -1 )
1091 opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
1092 boot_cpu_has(X86_FEATURE_MD_CLEAR));
1093
1094 /*
1095 * Enable MDS defences as applicable. The PV blocks need using all the
1096 * time, and the Idle blocks need using if either PV or HVM defences are
1097 * used.
1098 *
1099 * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with
1100 * equivelent semantics to avoid needing to perform both flushes on the
1101 * HVM path. The HVM blocks don't need activating if our hypervisor told
1102 * us it was handling L1D_FLUSH, or we are using L1D_FLUSH ourselves.
1103 */
1104 if ( opt_md_clear_pv )
1105 setup_force_cpu_cap(X86_FEATURE_SC_VERW_PV);
1106 if ( opt_md_clear_pv || opt_md_clear_hvm )
1107 setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
1108 if ( opt_md_clear_hvm && !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush )
1109 setup_force_cpu_cap(X86_FEATURE_SC_VERW_HVM);
1110
1111 /*
1112 * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
1113 * active and no explicit SMT choice.
1114 */
1115 if ( opt_smt == -1 && cpu_has_bug_mds && hw_smt_enabled )
1116 warning_add(
1117 "Booted on MLPDS/MFBDS-vulnerable hardware with SMT/Hyperthreading\n"
1118 "enabled. Mitigations will not be fully effective. Please\n"
1119 "choose an explicit smt=<bool> setting. See XSA-297.\n");
1120
1121 /*
1122 * Vulnerability to TAA is a little complicated to quantify.
1123 *
1124 * In the pipeline, it is just another way to get speculative access to
1125 * stale load port, store buffer or fill buffer data, and therefore can be
1126 * considered a superset of MDS (on TSX-capable parts). On parts which
1127 * predate MDS_NO, the existing VERW flushing will mitigate this
1128 * sidechannel as well.
1129 *
1130 * On parts which contain MDS_NO, the lack of VERW flushing means that an
1131 * attacker can still use TSX to target microarchitectural buffers to leak
1132 * secrets. Therefore, we consider TAA to be the set of TSX-capable parts
1133 * which have MDS_NO but lack TAA_NO.
1134 *
1135 * Note: cpu_has_rtm (== hle) could already be hidden by `tsx=0` on the
1136 * cmdline. MSR_TSX_CTRL will only appear on TSX-capable parts, so
1137 * we check both to spot TSX in a microcode/cmdline independent way.
1138 */
1139 cpu_has_bug_taa =
1140 (cpu_has_rtm || (caps & ARCH_CAPS_TSX_CTRL)) &&
1141 (caps & (ARCH_CAPS_MDS_NO | ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO;
1142
1143 /*
1144 * On TAA-affected hardware, disabling TSX is the preferred mitigation, vs
1145 * the MDS mitigation of disabling HT and using VERW flushing.
1146 *
1147 * On CPUs which advertise MDS_NO, VERW has no flushing side effect until
1148 * the TSX_CTRL microcode is loaded, despite the MD_CLEAR CPUID bit being
1149 * advertised, and there isn't a MD_CLEAR_2 flag to use...
1150 *
1151 * If we're on affected hardware, able to do something about it (which
1152 * implies that VERW now works), no explicit TSX choice and traditional
1153 * MDS mitigations (no-SMT, VERW) not obviosuly in use (someone might
1154 * plausibly value TSX higher than Hyperthreading...), disable TSX to
1155 * mitigate TAA.
1156 */
1157 if ( opt_tsx == -1 && cpu_has_bug_taa && (caps & ARCH_CAPS_TSX_CTRL) &&
1158 ((hw_smt_enabled && opt_smt) ||
1159 !boot_cpu_has(X86_FEATURE_SC_VERW_IDLE)) )
1160 {
1161 setup_clear_cpu_cap(X86_FEATURE_HLE);
1162 setup_clear_cpu_cap(X86_FEATURE_RTM);
1163
1164 opt_tsx = 0;
1165 tsx_init();
1166 }
1167
1168 /* Calculate suitable defaults for MSR_MCU_OPT_CTRL */
1169 if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) )
1170 {
1171 uint64_t val;
1172
1173 rdmsrl(MSR_MCU_OPT_CTRL, val);
1174
1175 /*
1176 * On some SRBDS-affected hardware, it may be safe to relax srb-lock
1177 * by default.
1178 *
1179 * On parts which enumerate MDS_NO and not TAA_NO, TSX is the only way
1180 * to access the Fill Buffer. If TSX isn't available (inc. SKU
1181 * reasons on some models), or TSX is explicitly disabled, then there
1182 * is no need for the extra overhead to protect RDRAND/RDSEED.
1183 */
1184 if ( opt_srb_lock == -1 &&
1185 (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO &&
1186 (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && opt_tsx == 0)) )
1187 opt_srb_lock = 0;
1188
1189 val &= ~MCU_OPT_CTRL_RNGDS_MITG_DIS;
1190 if ( !opt_srb_lock )
1191 val |= MCU_OPT_CTRL_RNGDS_MITG_DIS;
1192
1193 default_xen_mcu_opt_ctrl = val;
1194 }
1195
1196 print_details(thunk, caps);
1197
1198 /*
1199 * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard
1200 * any firmware settings. For performance reasons, when safe to do so, we
1201 * delay applying non-zero settings until after dom0 has been constructed.
1202 *
1203 * "when safe to do so" is based on whether we are virtualised. A native
1204 * boot won't have any other code running in a position to mount an
1205 * attack.
1206 */
1207 if ( boot_cpu_has(X86_FEATURE_IBRSB) )
1208 {
1209 bsp_delay_spec_ctrl = !cpu_has_hypervisor && default_xen_spec_ctrl;
1210
1211 /*
1212 * If delaying MSR_SPEC_CTRL setup, use the same mechanism as
1213 * spec_ctrl_enter_idle(), by using a shadow value of zero.
1214 */
1215 if ( bsp_delay_spec_ctrl )
1216 {
1217 struct cpu_info *info = get_cpu_info();
1218
1219 info->shadow_spec_ctrl = 0;
1220 barrier();
1221 info->spec_ctrl_flags |= SCF_use_shadow;
1222 barrier();
1223 }
1224
1225 wrmsrl(MSR_SPEC_CTRL, bsp_delay_spec_ctrl ? 0 : default_xen_spec_ctrl);
1226 }
1227
1228 if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) )
1229 wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl);
1230 }
1231
build_assertions(void)1232 static void __init __maybe_unused build_assertions(void)
1233 {
1234 /* The optimised assembly relies on this alias. */
1235 BUILD_BUG_ON(SCF_use_shadow != 1);
1236 }
1237
1238 /*
1239 * Local variables:
1240 * mode: C
1241 * c-file-style: "BSD"
1242 * c-basic-offset: 4
1243 * tab-width: 4
1244 * indent-tabs-mode: nil
1245 * End:
1246 */
1247