1 #include <xen/init.h>
2 #include <xen/string.h>
3 #include <xen/delay.h>
4 #include <xen/param.h>
5 #include <xen/smp.h>
6 #include <asm/current.h>
7 #include <asm/debugreg.h>
8 #include <asm/processor.h>
9 #include <asm/xstate.h>
10 #include <asm/msr.h>
11 #include <asm/io.h>
12 #include <asm/mpspec.h>
13 #include <asm/apic.h>
14 #include <asm/random.h>
15 #include <asm/setup.h>
16 #include <mach_apic.h>
17 #include <public/sysctl.h> /* for XEN_INVALID_{SOCKET,CORE}_ID */
18
19 #include "cpu.h"
20 #include "mcheck/x86_mca.h"
21
22 bool __read_mostly opt_dom0_cpuid_faulting = true;
23
24 bool_t opt_arat = 1;
25 boolean_param("arat", opt_arat);
26
27 /* pku: Flag to enable Memory Protection Keys (default on). */
28 static bool_t opt_pku = 1;
29 boolean_param("pku", opt_pku);
30
31 unsigned int opt_cpuid_mask_ecx = ~0u;
32 integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx);
33 unsigned int opt_cpuid_mask_edx = ~0u;
34 integer_param("cpuid_mask_edx", opt_cpuid_mask_edx);
35
36 unsigned int opt_cpuid_mask_xsave_eax = ~0u;
37 integer_param("cpuid_mask_xsave_eax", opt_cpuid_mask_xsave_eax);
38
39 unsigned int opt_cpuid_mask_ext_ecx = ~0u;
40 integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
41 unsigned int opt_cpuid_mask_ext_edx = ~0u;
42 integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
43
44 unsigned int __initdata expected_levelling_cap;
45 unsigned int __read_mostly levelling_caps;
46
47 DEFINE_PER_CPU(struct cpuidmasks, cpuidmasks);
48 struct cpuidmasks __read_mostly cpuidmask_defaults;
49
50 unsigned int paddr_bits __read_mostly = 36;
51 unsigned int hap_paddr_bits __read_mostly = 36;
52 unsigned int vaddr_bits __read_mostly = VADDR_BITS;
53
54 static unsigned int cleared_caps[NCAPINTS];
55 static unsigned int forced_caps[NCAPINTS];
56
57 DEFINE_PER_CPU(bool, full_gdt_loaded);
58
setup_clear_cpu_cap(unsigned int cap)59 void __init setup_clear_cpu_cap(unsigned int cap)
60 {
61 const uint32_t *dfs;
62 unsigned int i;
63
64 if (__test_and_set_bit(cap, cleared_caps))
65 return;
66
67 if (test_bit(cap, forced_caps))
68 printk("%pS clearing previously forced feature %#x\n",
69 __builtin_return_address(0), cap);
70
71 __clear_bit(cap, boot_cpu_data.x86_capability);
72 dfs = x86_cpuid_lookup_deep_deps(cap);
73
74 if (!dfs)
75 return;
76
77 for (i = 0; i < FSCAPINTS; ++i) {
78 cleared_caps[i] |= dfs[i];
79 boot_cpu_data.x86_capability[i] &= ~dfs[i];
80 if (!(forced_caps[i] & dfs[i]))
81 continue;
82 printk("%pS implicitly clearing previously forced feature(s) %u:%#x\n",
83 __builtin_return_address(0),
84 i, forced_caps[i] & dfs[i]);
85 }
86 }
87
setup_force_cpu_cap(unsigned int cap)88 void __init setup_force_cpu_cap(unsigned int cap)
89 {
90 if (__test_and_set_bit(cap, forced_caps))
91 return;
92
93 if (test_bit(cap, cleared_caps)) {
94 printk("%pS tries to force previously cleared feature %#x\n",
95 __builtin_return_address(0), cap);
96 return;
97 }
98
99 __set_bit(cap, boot_cpu_data.x86_capability);
100 }
101
is_forced_cpu_cap(unsigned int cap)102 bool __init is_forced_cpu_cap(unsigned int cap)
103 {
104 return test_bit(cap, forced_caps);
105 }
106
default_init(struct cpuinfo_x86 * c)107 static void default_init(struct cpuinfo_x86 * c)
108 {
109 /* Not much we can do here... */
110 /* Check if at least it has cpuid */
111 BUG_ON(c->cpuid_level == -1);
112 __clear_bit(X86_FEATURE_SEP, c->x86_capability);
113 }
114
115 static const struct cpu_dev default_cpu = {
116 .c_init = default_init,
117 };
118 static const struct cpu_dev *this_cpu = &default_cpu;
119
120 static DEFINE_PER_CPU(uint64_t, msr_misc_features);
121 void (* __read_mostly ctxt_switch_masking)(const struct vcpu *next);
122
probe_cpuid_faulting(void)123 bool __init probe_cpuid_faulting(void)
124 {
125 uint64_t val;
126 int rc;
127
128 /*
129 * Don't bother looking for CPUID faulting if we aren't virtualised on
130 * AMD or Hygon hardware - it won't be present.
131 */
132 if ((boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) &&
133 !cpu_has_hypervisor)
134 return false;
135
136 if ((rc = rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val)) == 0)
137 raw_msr_policy.platform_info.cpuid_faulting =
138 val & MSR_PLATFORM_INFO_CPUID_FAULTING;
139
140 if (rc ||
141 !(val & MSR_PLATFORM_INFO_CPUID_FAULTING) ||
142 rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES,
143 this_cpu(msr_misc_features)))
144 {
145 setup_clear_cpu_cap(X86_FEATURE_CPUID_FAULTING);
146 return false;
147 }
148
149 expected_levelling_cap |= LCAP_faulting;
150 levelling_caps |= LCAP_faulting;
151 setup_force_cpu_cap(X86_FEATURE_CPUID_FAULTING);
152
153 return true;
154 }
155
set_cpuid_faulting(bool enable)156 static void set_cpuid_faulting(bool enable)
157 {
158 uint64_t *this_misc_features = &this_cpu(msr_misc_features);
159 uint64_t val = *this_misc_features;
160
161 if (!!(val & MSR_MISC_FEATURES_CPUID_FAULTING) == enable)
162 return;
163
164 val ^= MSR_MISC_FEATURES_CPUID_FAULTING;
165
166 wrmsrl(MSR_INTEL_MISC_FEATURES_ENABLES, val);
167 *this_misc_features = val;
168 }
169
ctxt_switch_levelling(const struct vcpu * next)170 void ctxt_switch_levelling(const struct vcpu *next)
171 {
172 const struct domain *nextd = next ? next->domain : NULL;
173
174 if (cpu_has_cpuid_faulting) {
175 /*
176 * No need to alter the faulting setting if we are switching
177 * to idle; it won't affect any code running in idle context.
178 */
179 if (nextd && is_idle_domain(nextd))
180 return;
181 /*
182 * We *should* be enabling faulting for PV control domains.
183 *
184 * The domain builder has now been updated to not depend on
185 * seeing host CPUID values. This makes it compatible with
186 * PVH toolstack domains, and lets us enable faulting by
187 * default for all PV domains.
188 *
189 * However, as PV control domains have never had faulting
190 * enforced on them before, there might plausibly be other
191 * dependenices on host CPUID data. Therefore, we have left
192 * an interim escape hatch in the form of
193 * `dom0=no-cpuid-faulting` to restore the older behaviour.
194 */
195 set_cpuid_faulting(nextd && (opt_dom0_cpuid_faulting ||
196 !is_control_domain(nextd) ||
197 !is_pv_domain(nextd)) &&
198 (is_pv_domain(nextd) ||
199 next->arch.msrs->
200 misc_features_enables.cpuid_faulting));
201 return;
202 }
203
204 if (ctxt_switch_masking)
205 alternative_vcall(ctxt_switch_masking, next);
206 }
207
208 bool_t opt_cpu_info;
209 boolean_param("cpuinfo", opt_cpu_info);
210
get_model_name(struct cpuinfo_x86 * c)211 int get_model_name(struct cpuinfo_x86 *c)
212 {
213 unsigned int *v;
214 char *p, *q;
215
216 if (c->extended_cpuid_level < 0x80000004)
217 return 0;
218
219 v = (unsigned int *) c->x86_model_id;
220 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
221 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
222 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
223 c->x86_model_id[48] = 0;
224
225 /* Intel chips right-justify this string for some dumb reason;
226 undo that brain damage */
227 p = q = &c->x86_model_id[0];
228 while ( *p == ' ' )
229 p++;
230 if ( p != q ) {
231 while ( *p )
232 *q++ = *p++;
233 while ( q <= &c->x86_model_id[48] )
234 *q++ = '\0'; /* Zero-pad the rest */
235 }
236
237 return 1;
238 }
239
240
display_cacheinfo(struct cpuinfo_x86 * c)241 void display_cacheinfo(struct cpuinfo_x86 *c)
242 {
243 unsigned int dummy, ecx, edx, l2size;
244
245 if (c->extended_cpuid_level >= 0x80000005) {
246 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
247 if (opt_cpu_info)
248 printk("CPU: L1 I cache %dK (%d bytes/line),"
249 " D cache %dK (%d bytes/line)\n",
250 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
251 c->x86_cache_size=(ecx>>24)+(edx>>24);
252 }
253
254 if (c->extended_cpuid_level < 0x80000006) /* Some chips just has a large L1. */
255 return;
256
257 ecx = cpuid_ecx(0x80000006);
258 l2size = ecx >> 16;
259
260 c->x86_cache_size = l2size;
261
262 if (opt_cpu_info)
263 printk("CPU: L2 Cache: %dK (%d bytes/line)\n",
264 l2size, ecx & 0xFF);
265 }
266
_phys_pkg_id(u32 cpuid_apic,int index_msb)267 static inline u32 _phys_pkg_id(u32 cpuid_apic, int index_msb)
268 {
269 return cpuid_apic >> index_msb;
270 }
271
272 /*
273 * cpuid returns the value latched in the HW at reset, not the APIC ID
274 * register's value. For any box whose BIOS changes APIC IDs, like
275 * clustered APIC systems, we must use get_apic_id().
276 *
277 * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
278 */
phys_pkg_id(u32 cpuid_apic,int index_msb)279 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
280 {
281 return _phys_pkg_id(get_apic_id(), index_msb);
282 }
283
284 /* Do minimum CPU detection early.
285 Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
286 The others are not touched to avoid unwanted side effects.
287
288 WARNING: this function is only called on the BP. Don't add code here
289 that is supposed to run on all CPUs. */
early_cpu_init(void)290 void __init early_cpu_init(void)
291 {
292 struct cpuinfo_x86 *c = &boot_cpu_data;
293 u32 eax, ebx, ecx, edx;
294
295 c->x86_cache_alignment = 32;
296
297 /* Get vendor name */
298 cpuid(0x00000000, &c->cpuid_level, &ebx, &ecx, &edx);
299 *(u32 *)&c->x86_vendor_id[0] = ebx;
300 *(u32 *)&c->x86_vendor_id[8] = ecx;
301 *(u32 *)&c->x86_vendor_id[4] = edx;
302
303 c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx);
304 switch (c->x86_vendor) {
305 case X86_VENDOR_INTEL: this_cpu = &intel_cpu_dev; break;
306 case X86_VENDOR_AMD: this_cpu = &amd_cpu_dev; break;
307 case X86_VENDOR_CENTAUR: this_cpu = ¢aur_cpu_dev; break;
308 case X86_VENDOR_SHANGHAI: this_cpu = &shanghai_cpu_dev; break;
309 case X86_VENDOR_HYGON: this_cpu = &hygon_cpu_dev; break;
310 default:
311 printk(XENLOG_ERR
312 "Unrecognised or unsupported CPU vendor '%.12s'\n",
313 c->x86_vendor_id);
314 }
315
316 cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
317 c->x86 = get_cpu_family(eax, &c->x86_model, &c->x86_mask);
318
319 edx &= ~cleared_caps[cpufeat_word(X86_FEATURE_FPU)];
320 ecx &= ~cleared_caps[cpufeat_word(X86_FEATURE_SSE3)];
321 if (edx & cpufeat_mask(X86_FEATURE_CLFLUSH))
322 c->x86_cache_alignment = ((ebx >> 8) & 0xff) * 8;
323 /* Leaf 0x1 capabilities filled in early for Xen. */
324 c->x86_capability[cpufeat_word(X86_FEATURE_FPU)] = edx;
325 c->x86_capability[cpufeat_word(X86_FEATURE_SSE3)] = ecx;
326
327 printk(XENLOG_INFO
328 "CPU Vendor: %s, Family %u (%#x), Model %u (%#x), Stepping %u (raw %08x)\n",
329 x86_cpuid_vendor_to_str(c->x86_vendor), c->x86, c->x86,
330 c->x86_model, c->x86_model, c->x86_mask, eax);
331
332 if (c->cpuid_level >= 7) {
333 cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
334 c->x86_capability[cpufeat_word(X86_FEATURE_CET_SS)] = ecx;
335 }
336
337 eax = cpuid_eax(0x80000000);
338 if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
339 eax = cpuid_eax(0x80000008);
340 paddr_bits = eax & 0xff;
341 if (paddr_bits > PADDR_BITS)
342 paddr_bits = PADDR_BITS;
343 vaddr_bits = (eax >> 8) & 0xff;
344 if (vaddr_bits > VADDR_BITS)
345 vaddr_bits = VADDR_BITS;
346 hap_paddr_bits = ((eax >> 16) & 0xff) ?: paddr_bits;
347 if (hap_paddr_bits > PADDR_BITS)
348 hap_paddr_bits = PADDR_BITS;
349 }
350
351 if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)))
352 park_offline_cpus = opt_mce;
353
354 initialize_cpu_data(0);
355 }
356
generic_identify(struct cpuinfo_x86 * c)357 static void generic_identify(struct cpuinfo_x86 *c)
358 {
359 u32 eax, ebx, ecx, edx, tmp;
360
361 /* Get vendor name */
362 cpuid(0x00000000, &c->cpuid_level, &ebx, &ecx, &edx);
363 *(u32 *)&c->x86_vendor_id[0] = ebx;
364 *(u32 *)&c->x86_vendor_id[8] = ecx;
365 *(u32 *)&c->x86_vendor_id[4] = edx;
366
367 c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx);
368 if (boot_cpu_data.x86_vendor != c->x86_vendor)
369 printk(XENLOG_ERR "CPU%u vendor %u mismatch against BSP %u\n",
370 smp_processor_id(), c->x86_vendor,
371 boot_cpu_data.x86_vendor);
372
373 /* Initialize the standard set of capabilities */
374 /* Note that the vendor-specific code below might override */
375
376 /* Model and family information. */
377 cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
378 c->x86 = get_cpu_family(eax, &c->x86_model, &c->x86_mask);
379 c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
380 c->phys_proc_id = c->apicid;
381
382 if (this_cpu->c_early_init)
383 this_cpu->c_early_init(c);
384
385 /* c_early_init() may have adjusted cpuid levels/features. Reread. */
386 c->cpuid_level = cpuid_eax(0);
387 cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
388 c->x86_capability[cpufeat_word(X86_FEATURE_FPU)] = edx;
389 c->x86_capability[cpufeat_word(X86_FEATURE_SSE3)] = ecx;
390
391 if ( cpu_has(c, X86_FEATURE_CLFLUSH) )
392 c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
393
394 if ( (c->cpuid_level >= CPUID_PM_LEAF) &&
395 (cpuid_ecx(CPUID_PM_LEAF) & CPUID6_ECX_APERFMPERF_CAPABILITY) )
396 set_bit(X86_FEATURE_APERFMPERF, c->x86_capability);
397
398 /* AMD-defined flags: level 0x80000001 */
399 c->extended_cpuid_level = cpuid_eax(0x80000000);
400 if ((c->extended_cpuid_level >> 16) != 0x8000)
401 c->extended_cpuid_level = 0;
402 if (c->extended_cpuid_level > 0x80000000)
403 cpuid(0x80000001, &tmp, &tmp,
404 &c->x86_capability[cpufeat_word(X86_FEATURE_LAHF_LM)],
405 &c->x86_capability[cpufeat_word(X86_FEATURE_SYSCALL)]);
406
407 if (c->extended_cpuid_level >= 0x80000004)
408 get_model_name(c); /* Default name */
409 if (c->extended_cpuid_level >= 0x80000007)
410 c->x86_capability[cpufeat_word(X86_FEATURE_ITSC)]
411 = cpuid_edx(0x80000007);
412 if (c->extended_cpuid_level >= 0x80000008)
413 c->x86_capability[cpufeat_word(X86_FEATURE_CLZERO)]
414 = cpuid_ebx(0x80000008);
415
416 /* Intel-defined flags: level 0x00000007 */
417 if ( c->cpuid_level >= 0x00000007 ) {
418 cpuid_count(0x00000007, 0, &eax,
419 &c->x86_capability[cpufeat_word(X86_FEATURE_FSGSBASE)],
420 &c->x86_capability[cpufeat_word(X86_FEATURE_PKU)],
421 &c->x86_capability[cpufeat_word(X86_FEATURE_AVX512_4VNNIW)]);
422 if (eax > 0)
423 cpuid_count(0x00000007, 1,
424 &c->x86_capability[cpufeat_word(X86_FEATURE_AVX512_BF16)],
425 &tmp, &tmp, &tmp);
426 }
427
428 if (c->cpuid_level >= 0xd)
429 cpuid_count(0xd, 1,
430 &c->x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)],
431 &tmp, &tmp, &tmp);
432 }
433
434 /*
435 * This does the hard work of actually picking apart the CPU stuff...
436 */
identify_cpu(struct cpuinfo_x86 * c)437 void identify_cpu(struct cpuinfo_x86 *c)
438 {
439 int i;
440
441 c->x86_cache_size = -1;
442 c->x86_vendor = X86_VENDOR_UNKNOWN;
443 c->cpuid_level = -1; /* CPUID not detected */
444 c->x86_model = c->x86_mask = 0; /* So far unknown... */
445 c->x86_vendor_id[0] = '\0'; /* Unset */
446 c->x86_model_id[0] = '\0'; /* Unset */
447 c->x86_max_cores = 1;
448 c->x86_num_siblings = 1;
449 c->x86_clflush_size = 0;
450 c->phys_proc_id = XEN_INVALID_SOCKET_ID;
451 c->cpu_core_id = XEN_INVALID_CORE_ID;
452 c->compute_unit_id = INVALID_CUID;
453 memset(&c->x86_capability, 0, sizeof c->x86_capability);
454
455 generic_identify(c);
456
457 #ifdef NOISY_CAPS
458 printk(KERN_DEBUG "CPU: After vendor identify, caps:");
459 for (i = 0; i < NCAPINTS; i++)
460 printk(" %08x", c->x86_capability[i]);
461 printk("\n");
462 #endif
463
464 /*
465 * Vendor-specific initialization. In this section we
466 * canonicalize the feature flags, meaning if there are
467 * features a certain CPU supports which CPUID doesn't
468 * tell us, CPUID claiming incorrect flags, or other bugs,
469 * we handle them here.
470 *
471 * At the end of this section, c->x86_capability better
472 * indicate the features this CPU genuinely supports!
473 */
474 if (this_cpu->c_init)
475 this_cpu->c_init(c);
476
477
478 if (c == &boot_cpu_data && !opt_pku)
479 setup_clear_cpu_cap(X86_FEATURE_PKU);
480
481 /*
482 * The vendor-specific functions might have changed features. Now
483 * we do "generic changes."
484 */
485 for (i = 0; i < FSCAPINTS; ++i)
486 c->x86_capability[i] &= known_features[i];
487
488 for (i = 0 ; i < NCAPINTS ; ++i) {
489 c->x86_capability[i] |= forced_caps[i];
490 c->x86_capability[i] &= ~cleared_caps[i];
491 }
492
493 /* If the model name is still unset, do table lookup. */
494 if ( !c->x86_model_id[0] ) {
495 /* Last resort... */
496 snprintf(c->x86_model_id, sizeof(c->x86_model_id),
497 "%02x/%02x", c->x86_vendor, c->x86_model);
498 }
499
500 /* Now the feature flags better reflect actual CPU features! */
501
502 xstate_init(c);
503
504 #ifdef NOISY_CAPS
505 printk(KERN_DEBUG "CPU: After all inits, caps:");
506 for (i = 0; i < NCAPINTS; i++)
507 printk(" %08x", c->x86_capability[i]);
508 printk("\n");
509 #endif
510
511 /*
512 * If RDRAND is available, make an attempt to check that it actually
513 * (still) works.
514 */
515 if (cpu_has(c, X86_FEATURE_RDRAND)) {
516 unsigned int prev = 0;
517
518 for (i = 0; i < 5; ++i)
519 {
520 unsigned int cur = arch_get_random();
521
522 if (prev && cur != prev)
523 break;
524 prev = cur;
525 }
526
527 if (i >= 5)
528 printk(XENLOG_WARNING "CPU%u: RDRAND appears to not work\n",
529 smp_processor_id());
530 }
531
532 if (system_state == SYS_STATE_resume)
533 return;
534
535 /*
536 * On SMP, boot_cpu_data holds the common feature set between
537 * all CPUs; so make sure that we indicate which features are
538 * common between the CPUs. The first time this routine gets
539 * executed, c == &boot_cpu_data.
540 */
541 if ( c != &boot_cpu_data ) {
542 /* AND the already accumulated flags with these */
543 for ( i = 0 ; i < NCAPINTS ; i++ )
544 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
545
546 mcheck_init(c, false);
547 } else {
548 mcheck_init(c, true);
549
550 mtrr_bp_init();
551 }
552 }
553
554 /* leaf 0xb SMT level */
555 #define SMT_LEVEL 0
556
557 /* leaf 0xb sub-leaf types */
558 #define INVALID_TYPE 0
559 #define SMT_TYPE 1
560 #define CORE_TYPE 2
561
562 #define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
563 #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
564 #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
565
566 /*
567 * Check for extended topology enumeration cpuid leaf 0xb and if it
568 * exists, use it for cpu topology detection.
569 */
detect_extended_topology(struct cpuinfo_x86 * c)570 bool detect_extended_topology(struct cpuinfo_x86 *c)
571 {
572 unsigned int eax, ebx, ecx, edx, sub_index;
573 unsigned int ht_mask_width, core_plus_mask_width;
574 unsigned int core_select_mask, core_level_siblings;
575 unsigned int initial_apicid;
576
577 if ( c->cpuid_level < 0xb )
578 return false;
579
580 cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
581
582 /* Check if the cpuid leaf 0xb is actually implemented */
583 if ( ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE) )
584 return false;
585
586 __set_bit(X86_FEATURE_XTOPOLOGY, c->x86_capability);
587
588 initial_apicid = edx;
589
590 /* Populate HT related information from sub-leaf level 0 */
591 core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
592 core_level_siblings = c->x86_num_siblings = 1u << ht_mask_width;
593
594 sub_index = 1;
595 do {
596 cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
597
598 /* Check for the Core type in the implemented sub leaves */
599 if ( LEAFB_SUBTYPE(ecx) == CORE_TYPE ) {
600 core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
601 core_level_siblings = 1u << core_plus_mask_width;
602 break;
603 }
604
605 sub_index++;
606 } while ( LEAFB_SUBTYPE(ecx) != INVALID_TYPE );
607
608 core_select_mask = (~(~0u << core_plus_mask_width)) >> ht_mask_width;
609
610 c->cpu_core_id = phys_pkg_id(initial_apicid, ht_mask_width)
611 & core_select_mask;
612 c->phys_proc_id = phys_pkg_id(initial_apicid, core_plus_mask_width);
613
614 c->apicid = phys_pkg_id(initial_apicid, 0);
615 c->x86_max_cores = (core_level_siblings / c->x86_num_siblings);
616
617 if ( opt_cpu_info )
618 {
619 printk("CPU: Physical Processor ID: %d\n",
620 c->phys_proc_id);
621 if ( c->x86_max_cores > 1 )
622 printk("CPU: Processor Core ID: %d\n",
623 c->cpu_core_id);
624 }
625
626 return true;
627 }
628
detect_ht(struct cpuinfo_x86 * c)629 void detect_ht(struct cpuinfo_x86 *c)
630 {
631 u32 eax, ebx, ecx, edx;
632 int index_msb, core_bits;
633
634 if (!cpu_has(c, X86_FEATURE_HTT) ||
635 cpu_has(c, X86_FEATURE_CMP_LEGACY) ||
636 cpu_has(c, X86_FEATURE_XTOPOLOGY))
637 return;
638
639 cpuid(1, &eax, &ebx, &ecx, &edx);
640 c->x86_num_siblings = (ebx & 0xff0000) >> 16;
641
642 if (c->x86_num_siblings == 1) {
643 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
644 } else if (c->x86_num_siblings > 1 ) {
645 index_msb = get_count_order(c->x86_num_siblings);
646 c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
647
648 if (opt_cpu_info)
649 printk("CPU: Physical Processor ID: %d\n",
650 c->phys_proc_id);
651
652 c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores;
653
654 index_msb = get_count_order(c->x86_num_siblings) ;
655
656 core_bits = get_count_order(c->x86_max_cores);
657
658 c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
659 ((1 << core_bits) - 1);
660
661 if (opt_cpu_info && c->x86_max_cores > 1)
662 printk("CPU: Processor Core ID: %d\n",
663 c->cpu_core_id);
664 }
665 }
666
apicid_to_socket(unsigned int apicid)667 unsigned int __init apicid_to_socket(unsigned int apicid)
668 {
669 unsigned int dummy;
670
671 if (boot_cpu_has(X86_FEATURE_XTOPOLOGY)) {
672 unsigned int eax, ecx, sub_index = 1, core_plus_mask_width;
673
674 cpuid_count(0xb, SMT_LEVEL, &eax, &dummy, &dummy, &dummy);
675 core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
676 do {
677 cpuid_count(0xb, sub_index, &eax, &dummy, &ecx,
678 &dummy);
679
680 if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
681 core_plus_mask_width =
682 BITS_SHIFT_NEXT_LEVEL(eax);
683 break;
684 }
685
686 sub_index++;
687 } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
688
689 return _phys_pkg_id(apicid, core_plus_mask_width);
690 }
691
692 if (boot_cpu_has(X86_FEATURE_HTT) &&
693 !boot_cpu_has(X86_FEATURE_CMP_LEGACY)) {
694 unsigned int num_siblings = (cpuid_ebx(1) & 0xff0000) >> 16;
695
696 if (num_siblings)
697 return _phys_pkg_id(apicid,
698 get_count_order(num_siblings));
699 }
700
701 return apicid;
702 }
703
print_cpu_info(unsigned int cpu)704 void print_cpu_info(unsigned int cpu)
705 {
706 const struct cpuinfo_x86 *c = cpu_data + cpu;
707 const char *vendor = NULL;
708
709 if (!opt_cpu_info)
710 return;
711
712 printk("CPU%u: ", cpu);
713
714 vendor = x86_cpuid_vendor_to_str(c->x86_vendor);
715 if (strncmp(c->x86_model_id, vendor, strlen(vendor)))
716 printk("%s ", vendor);
717
718 if (!c->x86_model_id[0])
719 printk("%d86", c->x86);
720 else
721 printk("%s", c->x86_model_id);
722
723 printk(" stepping %02x\n", c->x86_mask);
724 }
725
726 static cpumask_t cpu_initialized;
727
728 /*
729 * Sets up system tables and descriptors.
730 *
731 * - Sets up TSS with stack pointers, including ISTs
732 * - Inserts TSS selector into regular and compat GDTs
733 * - Loads GDT, IDT, TR then null LDT
734 * - Sets up IST references in the IDT
735 */
load_system_tables(void)736 void load_system_tables(void)
737 {
738 unsigned int i, cpu = smp_processor_id();
739 unsigned long stack_bottom = get_stack_bottom(),
740 stack_top = stack_bottom & ~(STACK_SIZE - 1);
741 /*
742 * NB: define tss_page as a local variable because clang 3.5 doesn't
743 * support using ARRAY_SIZE against per-cpu variables.
744 */
745 struct tss_page *tss_page = &this_cpu(tss_page);
746
747 /* The TSS may be live. Disuade any clever optimisations. */
748 volatile struct tss64 *tss = &tss_page->tss;
749 seg_desc_t *gdt =
750 this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY;
751
752 const struct desc_ptr gdtr = {
753 .base = (unsigned long)gdt,
754 .limit = LAST_RESERVED_GDT_BYTE,
755 };
756 const struct desc_ptr idtr = {
757 .base = (unsigned long)idt_tables[cpu],
758 .limit = (IDT_ENTRIES * sizeof(idt_entry_t)) - 1,
759 };
760
761 /*
762 * Set up the TSS. Warning - may be live, and the NMI/#MC must remain
763 * valid on every instruction boundary. (Note: these are all
764 * semantically ACCESS_ONCE() due to tss's volatile qualifier.)
765 *
766 * rsp0 refers to the primary stack. #MC, NMI, #DB and #DF handlers
767 * each get their own stacks. No IO Bitmap.
768 */
769 tss->rsp0 = stack_bottom;
770 tss->ist[IST_MCE - 1] = stack_top + (1 + IST_MCE) * PAGE_SIZE;
771 tss->ist[IST_NMI - 1] = stack_top + (1 + IST_NMI) * PAGE_SIZE;
772 tss->ist[IST_DB - 1] = stack_top + (1 + IST_DB) * PAGE_SIZE;
773 /*
774 * Gross bodge. The #DF handler uses the vm86 fields of cpu_user_regs
775 * beyond the hardware frame. Adjust the stack entrypoint so this
776 * doesn't manifest as an OoB write which hits the guard page.
777 */
778 tss->ist[IST_DF - 1] = stack_top + (1 + IST_DF) * PAGE_SIZE -
779 (sizeof(struct cpu_user_regs) - offsetof(struct cpu_user_regs, es));
780 tss->bitmap = IOBMP_INVALID_OFFSET;
781
782 /* All other stack pointers poisioned. */
783 for ( i = IST_MAX; i < ARRAY_SIZE(tss->ist); ++i )
784 tss->ist[i] = 0x8600111111111111ul;
785 tss->rsp1 = 0x8600111111111111ul;
786 tss->rsp2 = 0x8600111111111111ul;
787
788 /*
789 * Set up the shadow stack IST. Used entries must point at the
790 * supervisor stack token. Unused entries are poisoned.
791 *
792 * This IST Table may be live, and the NMI/#MC entries must
793 * remain valid on every instruction boundary, hence the
794 * volatile qualifier.
795 */
796 if (cpu_has_xen_shstk) {
797 volatile uint64_t *ist_ssp = tss_page->ist_ssp;
798
799 ist_ssp[0] = 0x8600111111111111ul;
800 ist_ssp[IST_MCE] = stack_top + (IST_MCE * IST_SHSTK_SIZE) - 8;
801 ist_ssp[IST_NMI] = stack_top + (IST_NMI * IST_SHSTK_SIZE) - 8;
802 ist_ssp[IST_DB] = stack_top + (IST_DB * IST_SHSTK_SIZE) - 8;
803 ist_ssp[IST_DF] = stack_top + (IST_DF * IST_SHSTK_SIZE) - 8;
804 for ( i = IST_DF + 1; i < ARRAY_SIZE(tss_page->ist_ssp); ++i )
805 ist_ssp[i] = 0x8600111111111111ul;
806
807 wrmsrl(MSR_INTERRUPT_SSP_TABLE, (unsigned long)ist_ssp);
808 }
809
810 BUILD_BUG_ON(sizeof(*tss) <= 0x67); /* Mandated by the architecture. */
811
812 _set_tssldt_desc(gdt + TSS_ENTRY, (unsigned long)tss,
813 sizeof(*tss) - 1, SYS_DESC_tss_avail);
814 if ( IS_ENABLED(CONFIG_PV32) )
815 _set_tssldt_desc(
816 this_cpu(compat_gdt) - FIRST_RESERVED_GDT_ENTRY + TSS_ENTRY,
817 (unsigned long)tss, sizeof(*tss) - 1, SYS_DESC_tss_busy);
818
819 per_cpu(full_gdt_loaded, cpu) = false;
820 lgdt(&gdtr);
821 lidt(&idtr);
822 ltr(TSS_SELECTOR);
823 lldt(0);
824
825 enable_each_ist(idt_tables[cpu]);
826
827 /*
828 * Bottom-of-stack must be 16-byte aligned!
829 *
830 * Defer checks until exception support is sufficiently set up.
831 */
832 BUILD_BUG_ON((sizeof(struct cpu_info) -
833 offsetof(struct cpu_info, guest_cpu_user_regs.es)) & 0xf);
834 BUG_ON(system_state != SYS_STATE_early_boot && (stack_bottom & 0xf));
835 }
836
837 /*
838 * cpu_init() initializes state that is per-CPU. Some data is already
839 * initialized (naturally) in the bootstrap process, such as the GDT
840 * and IDT. We reload them nevertheless, this function acts as a
841 * 'CPU state barrier', nothing should get across.
842 */
cpu_init(void)843 void cpu_init(void)
844 {
845 int cpu = smp_processor_id();
846
847 if (cpumask_test_and_set_cpu(cpu, &cpu_initialized)) {
848 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
849 for (;;) local_irq_enable();
850 }
851 if (opt_cpu_info)
852 printk("Initializing CPU#%d\n", cpu);
853
854 /* Install correct page table. */
855 write_ptbase(current);
856
857 /* Ensure FPU gets initialised for each domain. */
858 stts();
859
860 /* Reset debug registers: */
861 write_debugreg(0, 0);
862 write_debugreg(1, 0);
863 write_debugreg(2, 0);
864 write_debugreg(3, 0);
865 write_debugreg(6, X86_DR6_DEFAULT);
866 write_debugreg(7, X86_DR7_DEFAULT);
867
868 /* Enable NMIs. Our loader (e.g. Tboot) may have left them disabled. */
869 enable_nmis();
870 }
871
cpu_uninit(unsigned int cpu)872 void cpu_uninit(unsigned int cpu)
873 {
874 cpumask_clear_cpu(cpu, &cpu_initialized);
875 }
876
877 /*
878 * x86_match_cpu - match the current CPU against an array of
879 * x86_cpu_ids
880 * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
881 * {}.
882 * Return the entry if the current CPU matches the entries in the
883 * passed x86_cpu_id match table. Otherwise NULL. The match table
884 * contains vendor (X86_VENDOR_*), family, model and feature bits or
885 * respective wildcard entries.
886 *
887 * A typical table entry would be to match a specific CPU
888 * { X86_VENDOR_INTEL, 6, 0x12 }
889 * or to match a specific CPU feature
890 * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
891 *
892 * This always matches against the boot cpu, assuming models and
893 features are
894 * consistent over all CPUs.
895 */
x86_match_cpu(const struct x86_cpu_id table[])896 const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id table[])
897 {
898 const struct x86_cpu_id *m;
899 const struct cpuinfo_x86 *c = &boot_cpu_data;
900
901 for (m = table; m->vendor | m->family | m->model | m->feature; m++) {
902 if (c->x86_vendor != m->vendor)
903 continue;
904 if (c->x86 != m->family)
905 continue;
906 if (c->x86_model != m->model)
907 continue;
908 if (!cpu_has(c, m->feature))
909 continue;
910 return m;
911 }
912 return NULL;
913 }
914