1 #include <xen/init.h>
2 #include <xen/string.h>
3 #include <xen/delay.h>
4 #include <xen/param.h>
5 #include <xen/smp.h>
6 #include <asm/current.h>
7 #include <asm/debugreg.h>
8 #include <asm/processor.h>
9 #include <asm/xstate.h>
10 #include <asm/msr.h>
11 #include <asm/io.h>
12 #include <asm/mpspec.h>
13 #include <asm/apic.h>
14 #include <asm/random.h>
15 #include <asm/setup.h>
16 #include <mach_apic.h>
17 #include <public/sysctl.h> /* for XEN_INVALID_{SOCKET,CORE}_ID */
18 
19 #include "cpu.h"
20 #include "mcheck/x86_mca.h"
21 
22 bool __read_mostly opt_dom0_cpuid_faulting = true;
23 
24 bool_t opt_arat = 1;
25 boolean_param("arat", opt_arat);
26 
27 /* pku: Flag to enable Memory Protection Keys (default on). */
28 static bool_t opt_pku = 1;
29 boolean_param("pku", opt_pku);
30 
31 unsigned int opt_cpuid_mask_ecx = ~0u;
32 integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx);
33 unsigned int opt_cpuid_mask_edx = ~0u;
34 integer_param("cpuid_mask_edx", opt_cpuid_mask_edx);
35 
36 unsigned int opt_cpuid_mask_xsave_eax = ~0u;
37 integer_param("cpuid_mask_xsave_eax", opt_cpuid_mask_xsave_eax);
38 
39 unsigned int opt_cpuid_mask_ext_ecx = ~0u;
40 integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
41 unsigned int opt_cpuid_mask_ext_edx = ~0u;
42 integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
43 
44 unsigned int __initdata expected_levelling_cap;
45 unsigned int __read_mostly levelling_caps;
46 
47 DEFINE_PER_CPU(struct cpuidmasks, cpuidmasks);
48 struct cpuidmasks __read_mostly cpuidmask_defaults;
49 
50 unsigned int paddr_bits __read_mostly = 36;
51 unsigned int hap_paddr_bits __read_mostly = 36;
52 unsigned int vaddr_bits __read_mostly = VADDR_BITS;
53 
54 static unsigned int cleared_caps[NCAPINTS];
55 static unsigned int forced_caps[NCAPINTS];
56 
57 DEFINE_PER_CPU(bool, full_gdt_loaded);
58 
setup_clear_cpu_cap(unsigned int cap)59 void __init setup_clear_cpu_cap(unsigned int cap)
60 {
61 	const uint32_t *dfs;
62 	unsigned int i;
63 
64 	if (__test_and_set_bit(cap, cleared_caps))
65 		return;
66 
67 	if (test_bit(cap, forced_caps))
68 		printk("%pS clearing previously forced feature %#x\n",
69 		       __builtin_return_address(0), cap);
70 
71 	__clear_bit(cap, boot_cpu_data.x86_capability);
72 	dfs = x86_cpuid_lookup_deep_deps(cap);
73 
74 	if (!dfs)
75 		return;
76 
77 	for (i = 0; i < FSCAPINTS; ++i) {
78 		cleared_caps[i] |= dfs[i];
79 		boot_cpu_data.x86_capability[i] &= ~dfs[i];
80 		if (!(forced_caps[i] & dfs[i]))
81 			continue;
82 		printk("%pS implicitly clearing previously forced feature(s) %u:%#x\n",
83 		       __builtin_return_address(0),
84 		       i, forced_caps[i] & dfs[i]);
85 	}
86 }
87 
setup_force_cpu_cap(unsigned int cap)88 void __init setup_force_cpu_cap(unsigned int cap)
89 {
90 	if (__test_and_set_bit(cap, forced_caps))
91 		return;
92 
93 	if (test_bit(cap, cleared_caps)) {
94 		printk("%pS tries to force previously cleared feature %#x\n",
95 		       __builtin_return_address(0), cap);
96 		return;
97 	}
98 
99 	__set_bit(cap, boot_cpu_data.x86_capability);
100 }
101 
is_forced_cpu_cap(unsigned int cap)102 bool __init is_forced_cpu_cap(unsigned int cap)
103 {
104 	return test_bit(cap, forced_caps);
105 }
106 
default_init(struct cpuinfo_x86 * c)107 static void default_init(struct cpuinfo_x86 * c)
108 {
109 	/* Not much we can do here... */
110 	/* Check if at least it has cpuid */
111 	BUG_ON(c->cpuid_level == -1);
112 	__clear_bit(X86_FEATURE_SEP, c->x86_capability);
113 }
114 
115 static const struct cpu_dev default_cpu = {
116 	.c_init	= default_init,
117 };
118 static const struct cpu_dev *this_cpu = &default_cpu;
119 
120 static DEFINE_PER_CPU(uint64_t, msr_misc_features);
121 void (* __read_mostly ctxt_switch_masking)(const struct vcpu *next);
122 
probe_cpuid_faulting(void)123 bool __init probe_cpuid_faulting(void)
124 {
125 	uint64_t val;
126 	int rc;
127 
128 	/*
129 	 * Don't bother looking for CPUID faulting if we aren't virtualised on
130 	 * AMD or Hygon hardware - it won't be present.
131 	 */
132 	if ((boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)) &&
133 	    !cpu_has_hypervisor)
134 		return false;
135 
136 	if ((rc = rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val)) == 0)
137 		raw_msr_policy.platform_info.cpuid_faulting =
138 			val & MSR_PLATFORM_INFO_CPUID_FAULTING;
139 
140 	if (rc ||
141 	    !(val & MSR_PLATFORM_INFO_CPUID_FAULTING) ||
142 	    rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES,
143 		       this_cpu(msr_misc_features)))
144 	{
145 		setup_clear_cpu_cap(X86_FEATURE_CPUID_FAULTING);
146 		return false;
147 	}
148 
149 	expected_levelling_cap |= LCAP_faulting;
150 	levelling_caps |=  LCAP_faulting;
151 	setup_force_cpu_cap(X86_FEATURE_CPUID_FAULTING);
152 
153 	return true;
154 }
155 
set_cpuid_faulting(bool enable)156 static void set_cpuid_faulting(bool enable)
157 {
158 	uint64_t *this_misc_features = &this_cpu(msr_misc_features);
159 	uint64_t val = *this_misc_features;
160 
161 	if (!!(val & MSR_MISC_FEATURES_CPUID_FAULTING) == enable)
162 		return;
163 
164 	val ^= MSR_MISC_FEATURES_CPUID_FAULTING;
165 
166 	wrmsrl(MSR_INTEL_MISC_FEATURES_ENABLES, val);
167 	*this_misc_features = val;
168 }
169 
ctxt_switch_levelling(const struct vcpu * next)170 void ctxt_switch_levelling(const struct vcpu *next)
171 {
172 	const struct domain *nextd = next ? next->domain : NULL;
173 
174 	if (cpu_has_cpuid_faulting) {
175 		/*
176 		 * No need to alter the faulting setting if we are switching
177 		 * to idle; it won't affect any code running in idle context.
178 		 */
179 		if (nextd && is_idle_domain(nextd))
180 			return;
181 		/*
182 		 * We *should* be enabling faulting for PV control domains.
183 		 *
184 		 * The domain builder has now been updated to not depend on
185 		 * seeing host CPUID values.  This makes it compatible with
186 		 * PVH toolstack domains, and lets us enable faulting by
187 		 * default for all PV domains.
188 		 *
189 		 * However, as PV control domains have never had faulting
190 		 * enforced on them before, there might plausibly be other
191 		 * dependenices on host CPUID data.  Therefore, we have left
192 		 * an interim escape hatch in the form of
193 		 * `dom0=no-cpuid-faulting` to restore the older behaviour.
194 		 */
195 		set_cpuid_faulting(nextd && (opt_dom0_cpuid_faulting ||
196 					     !is_control_domain(nextd) ||
197 					     !is_pv_domain(nextd)) &&
198 				   (is_pv_domain(nextd) ||
199 				    next->arch.msrs->
200 				    misc_features_enables.cpuid_faulting));
201 		return;
202 	}
203 
204 	if (ctxt_switch_masking)
205 		alternative_vcall(ctxt_switch_masking, next);
206 }
207 
208 bool_t opt_cpu_info;
209 boolean_param("cpuinfo", opt_cpu_info);
210 
get_model_name(struct cpuinfo_x86 * c)211 int get_model_name(struct cpuinfo_x86 *c)
212 {
213 	unsigned int *v;
214 	char *p, *q;
215 
216 	if (c->extended_cpuid_level < 0x80000004)
217 		return 0;
218 
219 	v = (unsigned int *) c->x86_model_id;
220 	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
221 	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
222 	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
223 	c->x86_model_id[48] = 0;
224 
225 	/* Intel chips right-justify this string for some dumb reason;
226 	   undo that brain damage */
227 	p = q = &c->x86_model_id[0];
228 	while ( *p == ' ' )
229 	     p++;
230 	if ( p != q ) {
231 	     while ( *p )
232 		  *q++ = *p++;
233 	     while ( q <= &c->x86_model_id[48] )
234 		  *q++ = '\0';	/* Zero-pad the rest */
235 	}
236 
237 	return 1;
238 }
239 
240 
display_cacheinfo(struct cpuinfo_x86 * c)241 void display_cacheinfo(struct cpuinfo_x86 *c)
242 {
243 	unsigned int dummy, ecx, edx, l2size;
244 
245 	if (c->extended_cpuid_level >= 0x80000005) {
246 		cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
247 		if (opt_cpu_info)
248 			printk("CPU: L1 I cache %dK (%d bytes/line),"
249 			              " D cache %dK (%d bytes/line)\n",
250 			       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
251 		c->x86_cache_size=(ecx>>24)+(edx>>24);
252 	}
253 
254 	if (c->extended_cpuid_level < 0x80000006)	/* Some chips just has a large L1. */
255 		return;
256 
257 	ecx = cpuid_ecx(0x80000006);
258 	l2size = ecx >> 16;
259 
260 	c->x86_cache_size = l2size;
261 
262 	if (opt_cpu_info)
263 		printk("CPU: L2 Cache: %dK (%d bytes/line)\n",
264 		       l2size, ecx & 0xFF);
265 }
266 
_phys_pkg_id(u32 cpuid_apic,int index_msb)267 static inline u32 _phys_pkg_id(u32 cpuid_apic, int index_msb)
268 {
269 	return cpuid_apic >> index_msb;
270 }
271 
272 /*
273  * cpuid returns the value latched in the HW at reset, not the APIC ID
274  * register's value.  For any box whose BIOS changes APIC IDs, like
275  * clustered APIC systems, we must use get_apic_id().
276  *
277  * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
278  */
phys_pkg_id(u32 cpuid_apic,int index_msb)279 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
280 {
281 	return _phys_pkg_id(get_apic_id(), index_msb);
282 }
283 
284 /* Do minimum CPU detection early.
285    Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
286    The others are not touched to avoid unwanted side effects.
287 
288    WARNING: this function is only called on the BP.  Don't add code here
289    that is supposed to run on all CPUs. */
early_cpu_init(void)290 void __init early_cpu_init(void)
291 {
292 	struct cpuinfo_x86 *c = &boot_cpu_data;
293 	u32 eax, ebx, ecx, edx;
294 
295 	c->x86_cache_alignment = 32;
296 
297 	/* Get vendor name */
298 	cpuid(0x00000000, &c->cpuid_level, &ebx, &ecx, &edx);
299 	*(u32 *)&c->x86_vendor_id[0] = ebx;
300 	*(u32 *)&c->x86_vendor_id[8] = ecx;
301 	*(u32 *)&c->x86_vendor_id[4] = edx;
302 
303 	c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx);
304 	switch (c->x86_vendor) {
305 	case X86_VENDOR_INTEL:	  this_cpu = &intel_cpu_dev;    break;
306 	case X86_VENDOR_AMD:	  this_cpu = &amd_cpu_dev;      break;
307 	case X86_VENDOR_CENTAUR:  this_cpu = &centaur_cpu_dev;  break;
308 	case X86_VENDOR_SHANGHAI: this_cpu = &shanghai_cpu_dev; break;
309 	case X86_VENDOR_HYGON:    this_cpu = &hygon_cpu_dev;    break;
310 	default:
311 		printk(XENLOG_ERR
312 		       "Unrecognised or unsupported CPU vendor '%.12s'\n",
313 		       c->x86_vendor_id);
314 	}
315 
316 	cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
317 	c->x86 = get_cpu_family(eax, &c->x86_model, &c->x86_mask);
318 
319 	edx &= ~cleared_caps[cpufeat_word(X86_FEATURE_FPU)];
320 	ecx &= ~cleared_caps[cpufeat_word(X86_FEATURE_SSE3)];
321 	if (edx & cpufeat_mask(X86_FEATURE_CLFLUSH))
322 		c->x86_cache_alignment = ((ebx >> 8) & 0xff) * 8;
323 	/* Leaf 0x1 capabilities filled in early for Xen. */
324 	c->x86_capability[cpufeat_word(X86_FEATURE_FPU)] = edx;
325 	c->x86_capability[cpufeat_word(X86_FEATURE_SSE3)] = ecx;
326 
327 	printk(XENLOG_INFO
328 	       "CPU Vendor: %s, Family %u (%#x), Model %u (%#x), Stepping %u (raw %08x)\n",
329 	       x86_cpuid_vendor_to_str(c->x86_vendor), c->x86, c->x86,
330 	       c->x86_model, c->x86_model, c->x86_mask, eax);
331 
332 	if (c->cpuid_level >= 7) {
333 		cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
334 		c->x86_capability[cpufeat_word(X86_FEATURE_CET_SS)] = ecx;
335 	}
336 
337 	eax = cpuid_eax(0x80000000);
338 	if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
339 		eax = cpuid_eax(0x80000008);
340 		paddr_bits = eax & 0xff;
341 		if (paddr_bits > PADDR_BITS)
342 			paddr_bits = PADDR_BITS;
343 		vaddr_bits = (eax >> 8) & 0xff;
344 		if (vaddr_bits > VADDR_BITS)
345 			vaddr_bits = VADDR_BITS;
346 		hap_paddr_bits = ((eax >> 16) & 0xff) ?: paddr_bits;
347 		if (hap_paddr_bits > PADDR_BITS)
348 			hap_paddr_bits = PADDR_BITS;
349 	}
350 
351 	if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)))
352 		park_offline_cpus = opt_mce;
353 
354 	initialize_cpu_data(0);
355 }
356 
generic_identify(struct cpuinfo_x86 * c)357 static void generic_identify(struct cpuinfo_x86 *c)
358 {
359 	u32 eax, ebx, ecx, edx, tmp;
360 
361 	/* Get vendor name */
362 	cpuid(0x00000000, &c->cpuid_level, &ebx, &ecx, &edx);
363 	*(u32 *)&c->x86_vendor_id[0] = ebx;
364 	*(u32 *)&c->x86_vendor_id[8] = ecx;
365 	*(u32 *)&c->x86_vendor_id[4] = edx;
366 
367 	c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx);
368 	if (boot_cpu_data.x86_vendor != c->x86_vendor)
369 		printk(XENLOG_ERR "CPU%u vendor %u mismatch against BSP %u\n",
370 		       smp_processor_id(), c->x86_vendor,
371 		       boot_cpu_data.x86_vendor);
372 
373 	/* Initialize the standard set of capabilities */
374 	/* Note that the vendor-specific code below might override */
375 
376 	/* Model and family information. */
377 	cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
378 	c->x86 = get_cpu_family(eax, &c->x86_model, &c->x86_mask);
379 	c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
380 	c->phys_proc_id = c->apicid;
381 
382 	if (this_cpu->c_early_init)
383 		this_cpu->c_early_init(c);
384 
385 	/* c_early_init() may have adjusted cpuid levels/features.  Reread. */
386 	c->cpuid_level = cpuid_eax(0);
387 	cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
388 	c->x86_capability[cpufeat_word(X86_FEATURE_FPU)] = edx;
389 	c->x86_capability[cpufeat_word(X86_FEATURE_SSE3)] = ecx;
390 
391 	if ( cpu_has(c, X86_FEATURE_CLFLUSH) )
392 		c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
393 
394 	if ( (c->cpuid_level >= CPUID_PM_LEAF) &&
395 	     (cpuid_ecx(CPUID_PM_LEAF) & CPUID6_ECX_APERFMPERF_CAPABILITY) )
396 		set_bit(X86_FEATURE_APERFMPERF, c->x86_capability);
397 
398 	/* AMD-defined flags: level 0x80000001 */
399 	c->extended_cpuid_level = cpuid_eax(0x80000000);
400 	if ((c->extended_cpuid_level >> 16) != 0x8000)
401 		c->extended_cpuid_level = 0;
402 	if (c->extended_cpuid_level > 0x80000000)
403 		cpuid(0x80000001, &tmp, &tmp,
404 		      &c->x86_capability[cpufeat_word(X86_FEATURE_LAHF_LM)],
405 		      &c->x86_capability[cpufeat_word(X86_FEATURE_SYSCALL)]);
406 
407 	if (c->extended_cpuid_level >= 0x80000004)
408 		get_model_name(c); /* Default name */
409 	if (c->extended_cpuid_level >= 0x80000007)
410 		c->x86_capability[cpufeat_word(X86_FEATURE_ITSC)]
411 			= cpuid_edx(0x80000007);
412 	if (c->extended_cpuid_level >= 0x80000008)
413 		c->x86_capability[cpufeat_word(X86_FEATURE_CLZERO)]
414 			= cpuid_ebx(0x80000008);
415 
416 	/* Intel-defined flags: level 0x00000007 */
417 	if ( c->cpuid_level >= 0x00000007 ) {
418 		cpuid_count(0x00000007, 0, &eax,
419 			    &c->x86_capability[cpufeat_word(X86_FEATURE_FSGSBASE)],
420 			    &c->x86_capability[cpufeat_word(X86_FEATURE_PKU)],
421 			    &c->x86_capability[cpufeat_word(X86_FEATURE_AVX512_4VNNIW)]);
422 		if (eax > 0)
423 			cpuid_count(0x00000007, 1,
424 				    &c->x86_capability[cpufeat_word(X86_FEATURE_AVX512_BF16)],
425 				    &tmp, &tmp, &tmp);
426 	}
427 
428 	if (c->cpuid_level >= 0xd)
429 		cpuid_count(0xd, 1,
430 			    &c->x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)],
431 			    &tmp, &tmp, &tmp);
432 }
433 
434 /*
435  * This does the hard work of actually picking apart the CPU stuff...
436  */
identify_cpu(struct cpuinfo_x86 * c)437 void identify_cpu(struct cpuinfo_x86 *c)
438 {
439 	int i;
440 
441 	c->x86_cache_size = -1;
442 	c->x86_vendor = X86_VENDOR_UNKNOWN;
443 	c->cpuid_level = -1;	/* CPUID not detected */
444 	c->x86_model = c->x86_mask = 0;	/* So far unknown... */
445 	c->x86_vendor_id[0] = '\0'; /* Unset */
446 	c->x86_model_id[0] = '\0';  /* Unset */
447 	c->x86_max_cores = 1;
448 	c->x86_num_siblings = 1;
449 	c->x86_clflush_size = 0;
450 	c->phys_proc_id = XEN_INVALID_SOCKET_ID;
451 	c->cpu_core_id = XEN_INVALID_CORE_ID;
452 	c->compute_unit_id = INVALID_CUID;
453 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
454 
455 	generic_identify(c);
456 
457 #ifdef NOISY_CAPS
458 	printk(KERN_DEBUG "CPU: After vendor identify, caps:");
459 	for (i = 0; i < NCAPINTS; i++)
460 		printk(" %08x", c->x86_capability[i]);
461 	printk("\n");
462 #endif
463 
464 	/*
465 	 * Vendor-specific initialization.  In this section we
466 	 * canonicalize the feature flags, meaning if there are
467 	 * features a certain CPU supports which CPUID doesn't
468 	 * tell us, CPUID claiming incorrect flags, or other bugs,
469 	 * we handle them here.
470 	 *
471 	 * At the end of this section, c->x86_capability better
472 	 * indicate the features this CPU genuinely supports!
473 	 */
474 	if (this_cpu->c_init)
475 		this_cpu->c_init(c);
476 
477 
478    	if (c == &boot_cpu_data && !opt_pku)
479 		setup_clear_cpu_cap(X86_FEATURE_PKU);
480 
481 	/*
482 	 * The vendor-specific functions might have changed features.  Now
483 	 * we do "generic changes."
484 	 */
485 	for (i = 0; i < FSCAPINTS; ++i)
486 		c->x86_capability[i] &= known_features[i];
487 
488 	for (i = 0 ; i < NCAPINTS ; ++i) {
489 		c->x86_capability[i] |= forced_caps[i];
490 		c->x86_capability[i] &= ~cleared_caps[i];
491 	}
492 
493 	/* If the model name is still unset, do table lookup. */
494 	if ( !c->x86_model_id[0] ) {
495 		/* Last resort... */
496 		snprintf(c->x86_model_id, sizeof(c->x86_model_id),
497 			"%02x/%02x", c->x86_vendor, c->x86_model);
498 	}
499 
500 	/* Now the feature flags better reflect actual CPU features! */
501 
502 	xstate_init(c);
503 
504 #ifdef NOISY_CAPS
505 	printk(KERN_DEBUG "CPU: After all inits, caps:");
506 	for (i = 0; i < NCAPINTS; i++)
507 		printk(" %08x", c->x86_capability[i]);
508 	printk("\n");
509 #endif
510 
511 	/*
512 	 * If RDRAND is available, make an attempt to check that it actually
513 	 * (still) works.
514 	 */
515 	if (cpu_has(c, X86_FEATURE_RDRAND)) {
516 		unsigned int prev = 0;
517 
518 		for (i = 0; i < 5; ++i)
519 		{
520 			unsigned int cur = arch_get_random();
521 
522 			if (prev && cur != prev)
523 				break;
524 			prev = cur;
525 		}
526 
527 		if (i >= 5)
528 			printk(XENLOG_WARNING "CPU%u: RDRAND appears to not work\n",
529 			       smp_processor_id());
530 	}
531 
532 	if (system_state == SYS_STATE_resume)
533 		return;
534 
535 	/*
536 	 * On SMP, boot_cpu_data holds the common feature set between
537 	 * all CPUs; so make sure that we indicate which features are
538 	 * common between the CPUs.  The first time this routine gets
539 	 * executed, c == &boot_cpu_data.
540 	 */
541 	if ( c != &boot_cpu_data ) {
542 		/* AND the already accumulated flags with these */
543 		for ( i = 0 ; i < NCAPINTS ; i++ )
544 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
545 
546 		mcheck_init(c, false);
547 	} else {
548 		mcheck_init(c, true);
549 
550 		mtrr_bp_init();
551 	}
552 }
553 
554 /* leaf 0xb SMT level */
555 #define SMT_LEVEL       0
556 
557 /* leaf 0xb sub-leaf types */
558 #define INVALID_TYPE    0
559 #define SMT_TYPE        1
560 #define CORE_TYPE       2
561 
562 #define LEAFB_SUBTYPE(ecx)          (((ecx) >> 8) & 0xff)
563 #define BITS_SHIFT_NEXT_LEVEL(eax)  ((eax) & 0x1f)
564 #define LEVEL_MAX_SIBLINGS(ebx)     ((ebx) & 0xffff)
565 
566 /*
567  * Check for extended topology enumeration cpuid leaf 0xb and if it
568  * exists, use it for cpu topology detection.
569  */
detect_extended_topology(struct cpuinfo_x86 * c)570 bool detect_extended_topology(struct cpuinfo_x86 *c)
571 {
572 	unsigned int eax, ebx, ecx, edx, sub_index;
573 	unsigned int ht_mask_width, core_plus_mask_width;
574 	unsigned int core_select_mask, core_level_siblings;
575 	unsigned int initial_apicid;
576 
577 	if ( c->cpuid_level < 0xb )
578 		return false;
579 
580 	cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
581 
582 	/* Check if the cpuid leaf 0xb is actually implemented */
583 	if ( ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE) )
584 		return false;
585 
586 	__set_bit(X86_FEATURE_XTOPOLOGY, c->x86_capability);
587 
588 	initial_apicid = edx;
589 
590 	/* Populate HT related information from sub-leaf level 0 */
591 	core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
592 	core_level_siblings = c->x86_num_siblings = 1u << ht_mask_width;
593 
594 	sub_index = 1;
595 	do {
596 		cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
597 
598 		/* Check for the Core type in the implemented sub leaves */
599 		if ( LEAFB_SUBTYPE(ecx) == CORE_TYPE ) {
600 			core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
601 			core_level_siblings = 1u << core_plus_mask_width;
602 			break;
603 		}
604 
605 		sub_index++;
606 	} while ( LEAFB_SUBTYPE(ecx) != INVALID_TYPE );
607 
608 	core_select_mask = (~(~0u << core_plus_mask_width)) >> ht_mask_width;
609 
610 	c->cpu_core_id = phys_pkg_id(initial_apicid, ht_mask_width)
611 		& core_select_mask;
612 	c->phys_proc_id = phys_pkg_id(initial_apicid, core_plus_mask_width);
613 
614 	c->apicid = phys_pkg_id(initial_apicid, 0);
615 	c->x86_max_cores = (core_level_siblings / c->x86_num_siblings);
616 
617 	if ( opt_cpu_info )
618 	{
619 		printk("CPU: Physical Processor ID: %d\n",
620 		       c->phys_proc_id);
621 		if ( c->x86_max_cores > 1 )
622 			printk("CPU: Processor Core ID: %d\n",
623 			       c->cpu_core_id);
624 	}
625 
626 	return true;
627 }
628 
detect_ht(struct cpuinfo_x86 * c)629 void detect_ht(struct cpuinfo_x86 *c)
630 {
631 	u32 	eax, ebx, ecx, edx;
632 	int 	index_msb, core_bits;
633 
634 	if (!cpu_has(c, X86_FEATURE_HTT) ||
635 	    cpu_has(c, X86_FEATURE_CMP_LEGACY) ||
636 	    cpu_has(c, X86_FEATURE_XTOPOLOGY))
637 		return;
638 
639 	cpuid(1, &eax, &ebx, &ecx, &edx);
640 	c->x86_num_siblings = (ebx & 0xff0000) >> 16;
641 
642 	if (c->x86_num_siblings == 1) {
643 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
644 	} else if (c->x86_num_siblings > 1 ) {
645 		index_msb = get_count_order(c->x86_num_siblings);
646 		c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
647 
648 		if (opt_cpu_info)
649 			printk("CPU: Physical Processor ID: %d\n",
650 			       c->phys_proc_id);
651 
652 		c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores;
653 
654 		index_msb = get_count_order(c->x86_num_siblings) ;
655 
656 		core_bits = get_count_order(c->x86_max_cores);
657 
658 		c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
659 					       ((1 << core_bits) - 1);
660 
661 		if (opt_cpu_info && c->x86_max_cores > 1)
662 			printk("CPU: Processor Core ID: %d\n",
663 			       c->cpu_core_id);
664 	}
665 }
666 
apicid_to_socket(unsigned int apicid)667 unsigned int __init apicid_to_socket(unsigned int apicid)
668 {
669 	unsigned int dummy;
670 
671 	if (boot_cpu_has(X86_FEATURE_XTOPOLOGY)) {
672 		unsigned int eax, ecx, sub_index = 1, core_plus_mask_width;
673 
674 		cpuid_count(0xb, SMT_LEVEL, &eax, &dummy, &dummy, &dummy);
675 		core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
676 		do {
677 			cpuid_count(0xb, sub_index, &eax, &dummy, &ecx,
678 			            &dummy);
679 
680 			if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
681 				core_plus_mask_width =
682 					BITS_SHIFT_NEXT_LEVEL(eax);
683 				break;
684 			}
685 
686 			sub_index++;
687 		} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
688 
689 		return _phys_pkg_id(apicid, core_plus_mask_width);
690 	}
691 
692 	if (boot_cpu_has(X86_FEATURE_HTT) &&
693 	    !boot_cpu_has(X86_FEATURE_CMP_LEGACY)) {
694 		unsigned int num_siblings = (cpuid_ebx(1) & 0xff0000) >> 16;
695 
696 		if (num_siblings)
697 			return _phys_pkg_id(apicid,
698 			                    get_count_order(num_siblings));
699 	}
700 
701 	return apicid;
702 }
703 
print_cpu_info(unsigned int cpu)704 void print_cpu_info(unsigned int cpu)
705 {
706 	const struct cpuinfo_x86 *c = cpu_data + cpu;
707 	const char *vendor = NULL;
708 
709 	if (!opt_cpu_info)
710 		return;
711 
712 	printk("CPU%u: ", cpu);
713 
714 	vendor = x86_cpuid_vendor_to_str(c->x86_vendor);
715 	if (strncmp(c->x86_model_id, vendor, strlen(vendor)))
716 		printk("%s ", vendor);
717 
718 	if (!c->x86_model_id[0])
719 		printk("%d86", c->x86);
720 	else
721 		printk("%s", c->x86_model_id);
722 
723 	printk(" stepping %02x\n", c->x86_mask);
724 }
725 
726 static cpumask_t cpu_initialized;
727 
728 /*
729  * Sets up system tables and descriptors.
730  *
731  * - Sets up TSS with stack pointers, including ISTs
732  * - Inserts TSS selector into regular and compat GDTs
733  * - Loads GDT, IDT, TR then null LDT
734  * - Sets up IST references in the IDT
735  */
load_system_tables(void)736 void load_system_tables(void)
737 {
738 	unsigned int i, cpu = smp_processor_id();
739 	unsigned long stack_bottom = get_stack_bottom(),
740 		stack_top = stack_bottom & ~(STACK_SIZE - 1);
741 	/*
742 	 * NB: define tss_page as a local variable because clang 3.5 doesn't
743 	 * support using ARRAY_SIZE against per-cpu variables.
744 	 */
745 	struct tss_page *tss_page = &this_cpu(tss_page);
746 
747 	/* The TSS may be live.	 Disuade any clever optimisations. */
748 	volatile struct tss64 *tss = &tss_page->tss;
749 	seg_desc_t *gdt =
750 		this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY;
751 
752 	const struct desc_ptr gdtr = {
753 		.base = (unsigned long)gdt,
754 		.limit = LAST_RESERVED_GDT_BYTE,
755 	};
756 	const struct desc_ptr idtr = {
757 		.base = (unsigned long)idt_tables[cpu],
758 		.limit = (IDT_ENTRIES * sizeof(idt_entry_t)) - 1,
759 	};
760 
761 	/*
762 	 * Set up the TSS.  Warning - may be live, and the NMI/#MC must remain
763 	 * valid on every instruction boundary.  (Note: these are all
764 	 * semantically ACCESS_ONCE() due to tss's volatile qualifier.)
765 	 *
766 	 * rsp0 refers to the primary stack.  #MC, NMI, #DB and #DF handlers
767 	 * each get their own stacks.  No IO Bitmap.
768 	 */
769 	tss->rsp0 = stack_bottom;
770 	tss->ist[IST_MCE - 1] = stack_top + (1 + IST_MCE) * PAGE_SIZE;
771 	tss->ist[IST_NMI - 1] = stack_top + (1 + IST_NMI) * PAGE_SIZE;
772 	tss->ist[IST_DB  - 1] = stack_top + (1 + IST_DB)  * PAGE_SIZE;
773 	/*
774 	 * Gross bodge.  The #DF handler uses the vm86 fields of cpu_user_regs
775 	 * beyond the hardware frame.  Adjust the stack entrypoint so this
776 	 * doesn't manifest as an OoB write which hits the guard page.
777 	 */
778 	tss->ist[IST_DF  - 1] = stack_top + (1 + IST_DF)  * PAGE_SIZE -
779 		(sizeof(struct cpu_user_regs) - offsetof(struct cpu_user_regs, es));
780 	tss->bitmap = IOBMP_INVALID_OFFSET;
781 
782 	/* All other stack pointers poisioned. */
783 	for ( i = IST_MAX; i < ARRAY_SIZE(tss->ist); ++i )
784 		tss->ist[i] = 0x8600111111111111ul;
785 	tss->rsp1 = 0x8600111111111111ul;
786 	tss->rsp2 = 0x8600111111111111ul;
787 
788 	/*
789 	 * Set up the shadow stack IST.  Used entries must point at the
790 	 * supervisor stack token.  Unused entries are poisoned.
791 	 *
792 	 * This IST Table may be live, and the NMI/#MC entries must
793 	 * remain valid on every instruction boundary, hence the
794 	 * volatile qualifier.
795 	 */
796 	if (cpu_has_xen_shstk) {
797 		volatile uint64_t *ist_ssp = tss_page->ist_ssp;
798 
799 		ist_ssp[0] = 0x8600111111111111ul;
800 		ist_ssp[IST_MCE] = stack_top + (IST_MCE * IST_SHSTK_SIZE) - 8;
801 		ist_ssp[IST_NMI] = stack_top + (IST_NMI * IST_SHSTK_SIZE) - 8;
802 		ist_ssp[IST_DB]	 = stack_top + (IST_DB	* IST_SHSTK_SIZE) - 8;
803 		ist_ssp[IST_DF]	 = stack_top + (IST_DF	* IST_SHSTK_SIZE) - 8;
804 		for ( i = IST_DF + 1; i < ARRAY_SIZE(tss_page->ist_ssp); ++i )
805 			ist_ssp[i] = 0x8600111111111111ul;
806 
807 		wrmsrl(MSR_INTERRUPT_SSP_TABLE, (unsigned long)ist_ssp);
808 	}
809 
810 	BUILD_BUG_ON(sizeof(*tss) <= 0x67); /* Mandated by the architecture. */
811 
812 	_set_tssldt_desc(gdt + TSS_ENTRY, (unsigned long)tss,
813 			 sizeof(*tss) - 1, SYS_DESC_tss_avail);
814 	if ( IS_ENABLED(CONFIG_PV32) )
815 		_set_tssldt_desc(
816 			this_cpu(compat_gdt) - FIRST_RESERVED_GDT_ENTRY + TSS_ENTRY,
817 			(unsigned long)tss, sizeof(*tss) - 1, SYS_DESC_tss_busy);
818 
819 	per_cpu(full_gdt_loaded, cpu) = false;
820 	lgdt(&gdtr);
821 	lidt(&idtr);
822 	ltr(TSS_SELECTOR);
823 	lldt(0);
824 
825 	enable_each_ist(idt_tables[cpu]);
826 
827 	/*
828 	 * Bottom-of-stack must be 16-byte aligned!
829 	 *
830 	 * Defer checks until exception support is sufficiently set up.
831 	 */
832 	BUILD_BUG_ON((sizeof(struct cpu_info) -
833 		      offsetof(struct cpu_info, guest_cpu_user_regs.es)) & 0xf);
834 	BUG_ON(system_state != SYS_STATE_early_boot && (stack_bottom & 0xf));
835 }
836 
837 /*
838  * cpu_init() initializes state that is per-CPU. Some data is already
839  * initialized (naturally) in the bootstrap process, such as the GDT
840  * and IDT. We reload them nevertheless, this function acts as a
841  * 'CPU state barrier', nothing should get across.
842  */
cpu_init(void)843 void cpu_init(void)
844 {
845 	int cpu = smp_processor_id();
846 
847 	if (cpumask_test_and_set_cpu(cpu, &cpu_initialized)) {
848 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
849 		for (;;) local_irq_enable();
850 	}
851 	if (opt_cpu_info)
852 		printk("Initializing CPU#%d\n", cpu);
853 
854 	/* Install correct page table. */
855 	write_ptbase(current);
856 
857 	/* Ensure FPU gets initialised for each domain. */
858 	stts();
859 
860 	/* Reset debug registers: */
861 	write_debugreg(0, 0);
862 	write_debugreg(1, 0);
863 	write_debugreg(2, 0);
864 	write_debugreg(3, 0);
865 	write_debugreg(6, X86_DR6_DEFAULT);
866 	write_debugreg(7, X86_DR7_DEFAULT);
867 
868 	/* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
869 	enable_nmis();
870 }
871 
cpu_uninit(unsigned int cpu)872 void cpu_uninit(unsigned int cpu)
873 {
874 	cpumask_clear_cpu(cpu, &cpu_initialized);
875 }
876 
877 /*
878  * x86_match_cpu - match the current CPU against an array of
879  * x86_cpu_ids
880  * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
881  *         {}.
882  * Return the entry if the current CPU matches the entries in the
883  * passed x86_cpu_id match table. Otherwise NULL.  The match table
884  * contains vendor (X86_VENDOR_*), family, model and feature bits or
885  * respective wildcard entries.
886  *
887  * A typical table entry would be to match a specific CPU
888  * { X86_VENDOR_INTEL, 6, 0x12 }
889  * or to match a specific CPU feature
890  * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
891  *
892  * This always matches against the boot cpu, assuming models and
893 features are
894  * consistent over all CPUs.
895  */
x86_match_cpu(const struct x86_cpu_id table[])896 const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id table[])
897 {
898 	const struct x86_cpu_id *m;
899 	const struct cpuinfo_x86 *c = &boot_cpu_data;
900 
901 	for (m = table; m->vendor | m->family | m->model | m->feature; m++) {
902 		if (c->x86_vendor != m->vendor)
903 			continue;
904 		if (c->x86 != m->family)
905 			continue;
906 		if (c->x86_model != m->model)
907 			continue;
908 		if (!cpu_has(c, m->feature))
909 			continue;
910 		return m;
911 	}
912 	return NULL;
913 }
914