1 #include <xen/init.h>
2 #include <xen/kernel.h>
3 #include <xen/string.h>
4 #include <xen/bitops.h>
5 #include <xen/smp.h>
6 #include <asm/processor.h>
7 #include <asm/msr.h>
8 #include <asm/uaccess.h>
9 #include <asm/mpspec.h>
10 #include <asm/apic.h>
11 #include <asm/i387.h>
12 #include <mach_apic.h>
13 #include <asm/hvm/support.h>
14
15 #include "cpu.h"
16
17 /*
18 * Processors which have self-snooping capability can handle conflicting
19 * memory type across CPUs by snooping its own cache. However, there exists
20 * CPU models in which having conflicting memory types still leads to
21 * unpredictable behavior, machine check errors, or hangs. Clear this
22 * feature to prevent its use on machines with known erratas.
23 */
check_memory_type_self_snoop_errata(void)24 static void __init check_memory_type_self_snoop_errata(void)
25 {
26 if (!boot_cpu_has(X86_FEATURE_SS))
27 return;
28
29 switch (boot_cpu_data.x86_model) {
30 case 0x0f: /* Merom */
31 case 0x16: /* Merom L */
32 case 0x17: /* Penryn */
33 case 0x1d: /* Dunnington */
34 case 0x1e: /* Nehalem */
35 case 0x1f: /* Auburndale / Havendale */
36 case 0x1a: /* Nehalem EP */
37 case 0x2e: /* Nehalem EX */
38 case 0x25: /* Westmere */
39 case 0x2c: /* Westmere EP */
40 case 0x2a: /* SandyBridge */
41 return;
42 }
43
44 setup_force_cpu_cap(X86_FEATURE_XEN_SELFSNOOP);
45 }
46
47 /*
48 * Set caps in expected_levelling_cap, probe a specific masking MSR, and set
49 * caps in levelling_caps if it is found, or clobber the MSR index if missing.
50 * If preset, reads the default value into msr_val.
51 */
_probe_mask_msr(unsigned int * msr,uint64_t caps)52 static uint64_t __init _probe_mask_msr(unsigned int *msr, uint64_t caps)
53 {
54 uint64_t val = 0;
55
56 expected_levelling_cap |= caps;
57
58 if (rdmsr_safe(*msr, val) || wrmsr_safe(*msr, val))
59 *msr = 0;
60 else
61 levelling_caps |= caps;
62
63 return val;
64 }
65
66 /* Indices of the masking MSRs, or 0 if unavailable. */
67 static unsigned int __read_mostly msr_basic, __read_mostly msr_ext,
68 __read_mostly msr_xsave;
69
70 /*
71 * Probe for the existance of the expected masking MSRs. They might easily
72 * not be available if Xen is running virtualised.
73 */
probe_masking_msrs(void)74 static void __init probe_masking_msrs(void)
75 {
76 const struct cpuinfo_x86 *c = &boot_cpu_data;
77 unsigned int exp_msr_basic, exp_msr_ext, exp_msr_xsave;
78
79 /* Only family 6 supports this feature. */
80 if (c->x86 != 6)
81 return;
82
83 switch (c->x86_model) {
84 case 0x17: /* Yorkfield, Wolfdale, Penryn, Harpertown(DP) */
85 case 0x1d: /* Dunnington(MP) */
86 msr_basic = MSR_INTEL_MASK_V1_CPUID1;
87 break;
88
89 case 0x1a: /* Bloomfield, Nehalem-EP(Gainestown) */
90 case 0x1e: /* Clarksfield, Lynnfield, Jasper Forest */
91 case 0x1f: /* Something Nehalem-based - perhaps Auburndale/Havendale? */
92 case 0x25: /* Arrandale, Clarksdale */
93 case 0x2c: /* Gulftown, Westmere-EP */
94 case 0x2e: /* Nehalem-EX(Beckton) */
95 case 0x2f: /* Westmere-EX */
96 msr_basic = MSR_INTEL_MASK_V2_CPUID1;
97 msr_ext = MSR_INTEL_MASK_V2_CPUID80000001;
98 break;
99
100 case 0x2a: /* SandyBridge */
101 case 0x2d: /* SandyBridge-E, SandyBridge-EN, SandyBridge-EP */
102 msr_basic = MSR_INTEL_MASK_V3_CPUID1;
103 msr_ext = MSR_INTEL_MASK_V3_CPUID80000001;
104 msr_xsave = MSR_INTEL_MASK_V3_CPUIDD_01;
105 break;
106 }
107
108 exp_msr_basic = msr_basic;
109 exp_msr_ext = msr_ext;
110 exp_msr_xsave = msr_xsave;
111
112 if (msr_basic)
113 cpuidmask_defaults._1cd = _probe_mask_msr(&msr_basic, LCAP_1cd);
114
115 if (msr_ext)
116 cpuidmask_defaults.e1cd = _probe_mask_msr(&msr_ext, LCAP_e1cd);
117
118 if (msr_xsave)
119 cpuidmask_defaults.Da1 = _probe_mask_msr(&msr_xsave, LCAP_Da1);
120
121 /*
122 * Don't bother warning about a mismatch if virtualised. These MSRs
123 * are not architectural and almost never virtualised.
124 */
125 if ((expected_levelling_cap == levelling_caps) ||
126 cpu_has_hypervisor)
127 return;
128
129 printk(XENLOG_WARNING "Mismatch between expected (%#x) "
130 "and real (%#x) levelling caps: missing %#x\n",
131 expected_levelling_cap, levelling_caps,
132 (expected_levelling_cap ^ levelling_caps) & levelling_caps);
133 printk(XENLOG_WARNING "Fam %#x, model %#x expected (%#x/%#x/%#x), "
134 "got (%#x/%#x/%#x)\n", c->x86, c->x86_model,
135 exp_msr_basic, exp_msr_ext, exp_msr_xsave,
136 msr_basic, msr_ext, msr_xsave);
137 printk(XENLOG_WARNING
138 "If not running virtualised, please report a bug\n");
139 }
140
141 /*
142 * Context switch CPUID masking state to the next domain. Only called if
143 * CPUID Faulting isn't available, but masking MSRs have been detected. A
144 * parameter of NULL is used to context switch to the default host state (by
145 * the cpu bringup-code, crash path, etc).
146 */
intel_ctxt_switch_masking(const struct vcpu * next)147 static void intel_ctxt_switch_masking(const struct vcpu *next)
148 {
149 struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
150 const struct domain *nextd = next ? next->domain : NULL;
151 const struct cpuidmasks *masks =
152 (nextd && is_pv_domain(nextd) && nextd->arch.pv.cpuidmasks)
153 ? nextd->arch.pv.cpuidmasks : &cpuidmask_defaults;
154
155 if (msr_basic) {
156 uint64_t val = masks->_1cd;
157
158 /*
159 * OSXSAVE defaults to 1, which causes fast-forwarding of
160 * Xen's real setting. Clobber it if disabled by the guest
161 * kernel.
162 */
163 if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
164 !(next->arch.pv.ctrlreg[4] & X86_CR4_OSXSAVE))
165 val &= ~(uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE);
166
167 if (unlikely(these_masks->_1cd != val)) {
168 wrmsrl(msr_basic, val);
169 these_masks->_1cd = val;
170 }
171 }
172
173 #define LAZY(msr, field) \
174 ({ \
175 if (unlikely(these_masks->field != masks->field) && \
176 (msr)) \
177 { \
178 wrmsrl((msr), masks->field); \
179 these_masks->field = masks->field; \
180 } \
181 })
182
183 LAZY(msr_ext, e1cd);
184 LAZY(msr_xsave, Da1);
185
186 #undef LAZY
187 }
188
189 /*
190 * opt_cpuid_mask_ecx/edx: cpuid.1[ecx, edx] feature mask.
191 * For example, E8400[Intel Core 2 Duo Processor series] ecx = 0x0008E3FD,
192 * edx = 0xBFEBFBFF when executing CPUID.EAX = 1 normally. If you want to
193 * 'rev down' to E8400, you can set these values in these Xen boot parameters.
194 */
intel_init_levelling(void)195 static void __init noinline intel_init_levelling(void)
196 {
197 if (probe_cpuid_faulting())
198 return;
199
200 probe_masking_msrs();
201
202 if (msr_basic) {
203 uint32_t ecx, edx, tmp;
204
205 cpuid(0x00000001, &tmp, &tmp, &ecx, &edx);
206
207 ecx &= opt_cpuid_mask_ecx;
208 edx &= opt_cpuid_mask_edx;
209
210 /* Fast-forward bits - Must be set. */
211 if (ecx & cpufeat_mask(X86_FEATURE_XSAVE))
212 ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE);
213 edx |= cpufeat_mask(X86_FEATURE_APIC);
214
215 cpuidmask_defaults._1cd &= ((u64)edx << 32) | ecx;
216 }
217
218 if (msr_ext) {
219 uint32_t ecx, edx, tmp;
220
221 cpuid(0x80000001, &tmp, &tmp, &ecx, &edx);
222
223 ecx &= opt_cpuid_mask_ext_ecx;
224 edx &= opt_cpuid_mask_ext_edx;
225
226 cpuidmask_defaults.e1cd &= ((u64)edx << 32) | ecx;
227 }
228
229 if (msr_xsave) {
230 uint32_t eax, tmp;
231
232 cpuid_count(0x0000000d, 1, &eax, &tmp, &tmp, &tmp);
233
234 eax &= opt_cpuid_mask_xsave_eax;
235
236 cpuidmask_defaults.Da1 &= (~0ULL << 32) | eax;
237 }
238
239 if (opt_cpu_info) {
240 printk(XENLOG_INFO "Levelling caps: %#x\n", levelling_caps);
241
242 if (!cpu_has_cpuid_faulting)
243 printk(XENLOG_INFO
244 "MSR defaults: 1d 0x%08x, 1c 0x%08x, e1d 0x%08x, "
245 "e1c 0x%08x, Da1 0x%08x\n",
246 (uint32_t)(cpuidmask_defaults._1cd >> 32),
247 (uint32_t)cpuidmask_defaults._1cd,
248 (uint32_t)(cpuidmask_defaults.e1cd >> 32),
249 (uint32_t)cpuidmask_defaults.e1cd,
250 (uint32_t)cpuidmask_defaults.Da1);
251 }
252
253 if (levelling_caps)
254 ctxt_switch_masking = intel_ctxt_switch_masking;
255 }
256
early_init_intel(struct cpuinfo_x86 * c)257 static void early_init_intel(struct cpuinfo_x86 *c)
258 {
259 u64 misc_enable, disable;
260
261 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
262 if (c->x86 == 15 && c->x86_cache_alignment == 64)
263 c->x86_cache_alignment = 128;
264
265 /* Unmask CPUID levels and NX if masked: */
266 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
267
268 disable = misc_enable & (MSR_IA32_MISC_ENABLE_LIMIT_CPUID |
269 MSR_IA32_MISC_ENABLE_XD_DISABLE);
270 if (disable) {
271 wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable);
272 bootsym(trampoline_misc_enable_off) |= disable;
273 bootsym(trampoline_efer) |= EFER_NX;
274 }
275
276 if (disable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID)
277 printk(KERN_INFO "revised cpuid level: %d\n",
278 cpuid_eax(0));
279 if (disable & MSR_IA32_MISC_ENABLE_XD_DISABLE) {
280 write_efer(read_efer() | EFER_NX);
281 printk(KERN_INFO
282 "re-enabled NX (Execute Disable) protection\n");
283 }
284
285 /* CPUID workaround for Intel 0F33/0F34 CPU */
286 if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 &&
287 (boot_cpu_data.x86_mask == 3 || boot_cpu_data.x86_mask == 4))
288 paddr_bits = 36;
289
290 if (c == &boot_cpu_data) {
291 check_memory_type_self_snoop_errata();
292
293 intel_init_levelling();
294 }
295
296 ctxt_switch_levelling(NULL);
297 }
298
299 /*
300 * Errata BA80, AAK120, AAM108, AAO67, BD59, AAY54: Rapid Core C3/C6 Transition
301 * May Cause Unpredictable System Behavior
302 *
303 * Under a complex set of internal conditions, cores rapidly performing C3/C6
304 * transitions in a system with Intel Hyper-Threading Technology enabled may
305 * cause a machine check error (IA32_MCi_STATUS.MCACOD = 0x0106), system hang
306 * or unpredictable system behavior.
307 */
probe_c3_errata(const struct cpuinfo_x86 * c)308 static void probe_c3_errata(const struct cpuinfo_x86 *c)
309 {
310 #define INTEL_FAM6_MODEL(m) { X86_VENDOR_INTEL, 6, m, X86_FEATURE_ALWAYS }
311 static const struct x86_cpu_id models[] = {
312 /* Nehalem */
313 INTEL_FAM6_MODEL(0x1a),
314 INTEL_FAM6_MODEL(0x1e),
315 INTEL_FAM6_MODEL(0x1f),
316 INTEL_FAM6_MODEL(0x2e),
317 /* Westmere (note Westmere-EX is not affected) */
318 INTEL_FAM6_MODEL(0x2c),
319 INTEL_FAM6_MODEL(0x25),
320 { }
321 };
322 #undef INTEL_FAM6_MODEL
323
324 /* Serialized by the AP bringup code. */
325 if ( max_cstate > 1 && (c->apicid & (c->x86_num_siblings - 1)) &&
326 x86_match_cpu(models) )
327 {
328 printk(XENLOG_WARNING
329 "Disabling C-states C3 and C6 due to CPU errata\n");
330 max_cstate = 1;
331 }
332 }
333
334 /*
335 * P4 Xeon errata 037 workaround.
336 * Hardware prefetcher may cause stale data to be loaded into the cache.
337 *
338 * Xeon 7400 erratum AAI65 (and further newer Xeons)
339 * MONITOR/MWAIT may have excessive false wakeups
340 */
Intel_errata_workarounds(struct cpuinfo_x86 * c)341 static void Intel_errata_workarounds(struct cpuinfo_x86 *c)
342 {
343 unsigned long lo, hi;
344
345 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
346 rdmsr (MSR_IA32_MISC_ENABLE, lo, hi);
347 if ((lo & (1<<9)) == 0) {
348 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
349 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
350 lo |= (1<<9); /* Disable hw prefetching */
351 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
352 }
353 }
354
355 if (c->x86 == 6 && cpu_has_clflush &&
356 (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
357 __set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
358
359 if (cpu_has_tsx_force_abort && opt_rtm_abort)
360 wrmsrl(MSR_TSX_FORCE_ABORT, TSX_FORCE_ABORT_RTM);
361
362 probe_c3_errata(c);
363 }
364
365
366 /*
367 * find out the number of processor cores on the die
368 */
num_cpu_cores(struct cpuinfo_x86 * c)369 static int num_cpu_cores(struct cpuinfo_x86 *c)
370 {
371 unsigned int eax, ebx, ecx, edx;
372
373 if (c->cpuid_level < 4)
374 return 1;
375
376 /* Intel has a non-standard dependency on %ecx for this CPUID level. */
377 cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
378 if (eax & 0x1f)
379 return ((eax >> 26) + 1);
380 else
381 return 1;
382 }
383
intel_log_freq(const struct cpuinfo_x86 * c)384 static void intel_log_freq(const struct cpuinfo_x86 *c)
385 {
386 unsigned int eax, ebx, ecx, edx;
387 uint64_t msrval;
388 uint8_t max_ratio;
389
390 if ( c->cpuid_level >= 0x15 )
391 {
392 cpuid(0x15, &eax, &ebx, &ecx, &edx);
393 if ( ecx && ebx && eax )
394 {
395 unsigned long long val = ecx;
396
397 val *= ebx;
398 do_div(val, eax);
399 printk("CPU%u: TSC: %u Hz * %u / %u = %Lu Hz\n",
400 smp_processor_id(), ecx, ebx, eax, val);
401 }
402 else if ( ecx | eax | ebx )
403 {
404 printk("CPU%u: TSC:", smp_processor_id());
405 if ( ecx )
406 printk(" core: %u Hz", ecx);
407 if ( ebx && eax )
408 printk(" ratio: %u / %u", ebx, eax);
409 printk("\n");
410 }
411 }
412
413 if ( c->cpuid_level >= 0x16 )
414 {
415 cpuid(0x16, &eax, &ebx, &ecx, &edx);
416 if ( ecx | eax | ebx )
417 {
418 printk("CPU%u:", smp_processor_id());
419 if ( ecx )
420 printk(" bus: %u MHz", ecx);
421 if ( eax )
422 printk(" base: %u MHz", eax);
423 if ( ebx )
424 printk(" max: %u MHz", ebx);
425 printk("\n");
426 }
427 }
428
429 if ( rdmsr_safe(MSR_INTEL_PLATFORM_INFO, msrval) )
430 return;
431 max_ratio = msrval >> 8;
432
433 if ( max_ratio )
434 {
435 unsigned int factor = 10000;
436 uint8_t min_ratio = msrval >> 40;
437
438 if ( c->x86 == 6 )
439 switch ( c->x86_model )
440 {
441 case 0x1a: case 0x1e: case 0x1f: case 0x2e: /* Nehalem */
442 case 0x25: case 0x2c: case 0x2f: /* Westmere */
443 factor = 13333;
444 break;
445 }
446
447 printk("CPU%u: ", smp_processor_id());
448 if ( min_ratio )
449 printk("%u ... ", (factor * min_ratio + 50) / 100);
450 printk("%u MHz\n", (factor * max_ratio + 50) / 100);
451 }
452 }
453
init_intel(struct cpuinfo_x86 * c)454 static void init_intel(struct cpuinfo_x86 *c)
455 {
456 /* Detect the extended topology information if available */
457 detect_extended_topology(c);
458
459 init_intel_cacheinfo(c);
460 if (c->cpuid_level > 9) {
461 unsigned eax = cpuid_eax(10);
462 /* Check for version and the number of counters */
463 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
464 __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
465 }
466
467 if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
468 {
469 c->x86_max_cores = num_cpu_cores(c);
470 detect_ht(c);
471 }
472
473 /* Work around errata */
474 Intel_errata_workarounds(c);
475
476 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
477 (c->x86 == 0x6 && c->x86_model >= 0x0e))
478 __set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
479 if (cpu_has(c, X86_FEATURE_ITSC)) {
480 __set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
481 __set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
482 __set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
483 }
484 if ( opt_arat &&
485 ( c->cpuid_level >= 0x00000006 ) &&
486 ( cpuid_eax(0x00000006) & (1u<<2) ) )
487 __set_bit(X86_FEATURE_ARAT, c->x86_capability);
488
489 if ((opt_cpu_info && !(c->apicid & (c->x86_num_siblings - 1))) ||
490 c == &boot_cpu_data )
491 intel_log_freq(c);
492 }
493
494 const struct cpu_dev intel_cpu_dev = {
495 .c_early_init = early_init_intel,
496 .c_init = init_intel,
497 };
498