1 #include <xen/init.h>
2 #include <xen/bitops.h>
3 #include <xen/mm.h>
4 #include <xen/param.h>
5 #include <xen/smp.h>
6 #include <xen/pci.h>
7 #include <xen/warning.h>
8 #include <asm/io.h>
9 #include <asm/msr.h>
10 #include <asm/processor.h>
11 #include <asm/amd.h>
12 #include <asm/hvm/support.h>
13 #include <asm/spec_ctrl.h>
14 #include <asm/acpi.h>
15 #include <asm/apic.h>
16
17 #include "cpu.h"
18
19 /*
20 * Pre-canned values for overriding the CPUID features
21 * and extended features masks.
22 *
23 * Currently supported processors:
24 *
25 * "fam_0f_rev_c"
26 * "fam_0f_rev_d"
27 * "fam_0f_rev_e"
28 * "fam_0f_rev_f"
29 * "fam_0f_rev_g"
30 * "fam_10_rev_b"
31 * "fam_10_rev_c"
32 * "fam_11_rev_b"
33 */
34 static char __initdata opt_famrev[14];
35 string_param("cpuid_mask_cpu", opt_famrev);
36
37 static unsigned int __initdata opt_cpuid_mask_l7s0_eax = ~0u;
38 integer_param("cpuid_mask_l7s0_eax", opt_cpuid_mask_l7s0_eax);
39 static unsigned int __initdata opt_cpuid_mask_l7s0_ebx = ~0u;
40 integer_param("cpuid_mask_l7s0_ebx", opt_cpuid_mask_l7s0_ebx);
41
42 static unsigned int __initdata opt_cpuid_mask_thermal_ecx = ~0u;
43 integer_param("cpuid_mask_thermal_ecx", opt_cpuid_mask_thermal_ecx);
44
45 /* 1 = allow, 0 = don't allow guest creation, -1 = don't allow boot */
46 s8 __read_mostly opt_allow_unsafe;
47 boolean_param("allow_unsafe", opt_allow_unsafe);
48
49 /* Signal whether the ACPI C1E quirk is required. */
50 bool __read_mostly amd_acpi_c1e_quirk;
51
rdmsr_amd_safe(unsigned int msr,unsigned int * lo,unsigned int * hi)52 static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo,
53 unsigned int *hi)
54 {
55 int err;
56
57 asm volatile("1: rdmsr\n2:\n"
58 ".section .fixup,\"ax\"\n"
59 "3: movl %6,%2\n"
60 " jmp 2b\n"
61 ".previous\n"
62 _ASM_EXTABLE(1b, 3b)
63 : "=a" (*lo), "=d" (*hi), "=r" (err)
64 : "c" (msr), "D" (0x9c5a203a), "2" (0), "i" (-EFAULT));
65
66 return err;
67 }
68
wrmsr_amd_safe(unsigned int msr,unsigned int lo,unsigned int hi)69 static inline int wrmsr_amd_safe(unsigned int msr, unsigned int lo,
70 unsigned int hi)
71 {
72 int err;
73
74 asm volatile("1: wrmsr\n2:\n"
75 ".section .fixup,\"ax\"\n"
76 "3: movl %6,%0\n"
77 " jmp 2b\n"
78 ".previous\n"
79 _ASM_EXTABLE(1b, 3b)
80 : "=r" (err)
81 : "c" (msr), "a" (lo), "d" (hi), "D" (0x9c5a203a),
82 "0" (0), "i" (-EFAULT));
83
84 return err;
85 }
86
wrmsr_amd(unsigned int msr,uint64_t val)87 static void wrmsr_amd(unsigned int msr, uint64_t val)
88 {
89 asm volatile("wrmsr" ::
90 "c" (msr), "a" ((uint32_t)val),
91 "d" (val >> 32), "D" (0x9c5a203a));
92 }
93
94 static const struct cpuidmask {
95 uint16_t fam;
96 char rev[2];
97 unsigned int ecx, edx, ext_ecx, ext_edx;
98 } pre_canned[] __initconst = {
99 #define CAN(fam, id, rev) { \
100 fam, #rev, \
101 AMD_FEATURES_##id##_REV_##rev##_ECX, \
102 AMD_FEATURES_##id##_REV_##rev##_EDX, \
103 AMD_EXTFEATURES_##id##_REV_##rev##_ECX, \
104 AMD_EXTFEATURES_##id##_REV_##rev##_EDX \
105 }
106 #define CAN_FAM(fam, rev) CAN(0x##fam, FAM##fam##h, rev)
107 #define CAN_K8(rev) CAN(0x0f, K8, rev)
108 CAN_FAM(11, B),
109 CAN_FAM(10, C),
110 CAN_FAM(10, B),
111 CAN_K8(G),
112 CAN_K8(F),
113 CAN_K8(E),
114 CAN_K8(D),
115 CAN_K8(C)
116 #undef CAN
117 };
118
get_cpuidmask(const char * opt)119 static const struct cpuidmask *__init noinline get_cpuidmask(const char *opt)
120 {
121 unsigned long fam;
122 char rev;
123 unsigned int i;
124
125 if (strncmp(opt, "fam_", 4))
126 return NULL;
127 fam = simple_strtoul(opt + 4, &opt, 16);
128 if (strncmp(opt, "_rev_", 5) || !opt[5] || opt[6])
129 return NULL;
130 rev = toupper(opt[5]);
131
132 for (i = 0; i < ARRAY_SIZE(pre_canned); ++i)
133 if (fam == pre_canned[i].fam && rev == *pre_canned[i].rev)
134 return &pre_canned[i];
135
136 return NULL;
137 }
138
139 /*
140 * Sets caps in expected_levelling_cap, probes for the specified mask MSR, and
141 * set caps in levelling_caps if it is found. Processors prior to Fam 10h
142 * required a 32-bit password for masking MSRs. Returns the default value.
143 */
_probe_mask_msr(unsigned int msr,uint64_t caps)144 static uint64_t __init _probe_mask_msr(unsigned int msr, uint64_t caps)
145 {
146 unsigned int hi, lo;
147
148 expected_levelling_cap |= caps;
149
150 if ((rdmsr_amd_safe(msr, &lo, &hi) == 0) &&
151 (wrmsr_amd_safe(msr, lo, hi) == 0))
152 levelling_caps |= caps;
153
154 return ((uint64_t)hi << 32) | lo;
155 }
156
157 /*
158 * Probe for the existance of the expected masking MSRs. They might easily
159 * not be available if Xen is running virtualised.
160 */
probe_masking_msrs(void)161 static void __init noinline probe_masking_msrs(void)
162 {
163 const struct cpuinfo_x86 *c = &boot_cpu_data;
164
165 /*
166 * First, work out which masking MSRs we should have, based on
167 * revision and cpuid.
168 */
169
170 /* Fam11 doesn't support masking at all. */
171 if (c->x86 == 0x11)
172 return;
173
174 cpuidmask_defaults._1cd =
175 _probe_mask_msr(MSR_K8_FEATURE_MASK, LCAP_1cd);
176 cpuidmask_defaults.e1cd =
177 _probe_mask_msr(MSR_K8_EXT_FEATURE_MASK, LCAP_e1cd);
178
179 if (c->cpuid_level >= 7)
180 cpuidmask_defaults._7ab0 =
181 _probe_mask_msr(MSR_AMD_L7S0_FEATURE_MASK, LCAP_7ab0);
182
183 if (c->x86 == 0x15 && c->cpuid_level >= 6 && cpuid_ecx(6))
184 cpuidmask_defaults._6c =
185 _probe_mask_msr(MSR_AMD_THRM_FEATURE_MASK, LCAP_6c);
186
187 /*
188 * Don't bother warning about a mismatch if virtualised. These MSRs
189 * are not architectural and almost never virtualised.
190 */
191 if ((expected_levelling_cap == levelling_caps) ||
192 cpu_has_hypervisor)
193 return;
194
195 printk(XENLOG_WARNING "Mismatch between expected (%#x) "
196 "and real (%#x) levelling caps: missing %#x\n",
197 expected_levelling_cap, levelling_caps,
198 (expected_levelling_cap ^ levelling_caps) & levelling_caps);
199 printk(XENLOG_WARNING "Fam %#x, model %#x level %#x\n",
200 c->x86, c->x86_model, c->cpuid_level);
201 printk(XENLOG_WARNING
202 "If not running virtualised, please report a bug\n");
203 }
204
205 /*
206 * Context switch CPUID masking state to the next domain. Only called if
207 * CPUID Faulting isn't available, but masking MSRs have been detected. A
208 * parameter of NULL is used to context switch to the default host state (by
209 * the cpu bringup-code, crash path, etc).
210 */
amd_ctxt_switch_masking(const struct vcpu * next)211 static void amd_ctxt_switch_masking(const struct vcpu *next)
212 {
213 struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
214 const struct domain *nextd = next ? next->domain : NULL;
215 const struct cpuidmasks *masks =
216 (nextd && is_pv_domain(nextd) && nextd->arch.pv.cpuidmasks)
217 ? nextd->arch.pv.cpuidmasks : &cpuidmask_defaults;
218
219 if ((levelling_caps & LCAP_1cd) == LCAP_1cd) {
220 uint64_t val = masks->_1cd;
221
222 /*
223 * OSXSAVE defaults to 1, which causes fast-forwarding of
224 * Xen's real setting. Clobber it if disabled by the guest
225 * kernel.
226 */
227 if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
228 !(next->arch.pv.ctrlreg[4] & X86_CR4_OSXSAVE))
229 val &= ~((uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE) << 32);
230
231 if (unlikely(these_masks->_1cd != val)) {
232 wrmsr_amd(MSR_K8_FEATURE_MASK, val);
233 these_masks->_1cd = val;
234 }
235 }
236
237 #define LAZY(cap, msr, field) \
238 ({ \
239 if (unlikely(these_masks->field != masks->field) && \
240 ((levelling_caps & cap) == cap)) \
241 { \
242 wrmsr_amd(msr, masks->field); \
243 these_masks->field = masks->field; \
244 } \
245 })
246
247 LAZY(LCAP_e1cd, MSR_K8_EXT_FEATURE_MASK, e1cd);
248 LAZY(LCAP_7ab0, MSR_AMD_L7S0_FEATURE_MASK, _7ab0);
249 LAZY(LCAP_6c, MSR_AMD_THRM_FEATURE_MASK, _6c);
250
251 #undef LAZY
252 }
253
254 /*
255 * Mask the features and extended features returned by CPUID. Parameters are
256 * set from the boot line via two methods:
257 *
258 * 1) Specific processor revision string
259 * 2) User-defined masks
260 *
261 * The user-defined masks take precedence.
262 *
263 * AMD "masking msrs" are actually overrides, making it possible to advertise
264 * features which are not supported by the hardware. Care must be taken to
265 * avoid this, as the accidentally-advertised features will not actually
266 * function.
267 */
amd_init_levelling(void)268 static void __init noinline amd_init_levelling(void)
269 {
270 const struct cpuidmask *m = NULL;
271
272 if (probe_cpuid_faulting())
273 return;
274
275 probe_masking_msrs();
276
277 if (*opt_famrev != '\0') {
278 m = get_cpuidmask(opt_famrev);
279
280 if (!m)
281 printk("Invalid processor string: %s\n", opt_famrev);
282 }
283
284 if ((levelling_caps & LCAP_1cd) == LCAP_1cd) {
285 uint32_t ecx, edx, tmp;
286
287 cpuid(0x00000001, &tmp, &tmp, &ecx, &edx);
288
289 if (~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx)) {
290 ecx &= opt_cpuid_mask_ecx;
291 edx &= opt_cpuid_mask_edx;
292 } else if (m) {
293 ecx &= m->ecx;
294 edx &= m->edx;
295 }
296
297 /* Fast-forward bits - Must be set. */
298 if (ecx & cpufeat_mask(X86_FEATURE_XSAVE))
299 ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE);
300 edx |= cpufeat_mask(X86_FEATURE_APIC);
301
302 cpuidmask_defaults._1cd = ((uint64_t)ecx << 32) | edx;
303 }
304
305 if ((levelling_caps & LCAP_e1cd) == LCAP_e1cd) {
306 uint32_t ecx, edx, tmp;
307
308 cpuid(0x80000001, &tmp, &tmp, &ecx, &edx);
309
310 if (~(opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx)) {
311 ecx &= opt_cpuid_mask_ext_ecx;
312 edx &= opt_cpuid_mask_ext_edx;
313 } else if (m) {
314 ecx &= m->ext_ecx;
315 edx &= m->ext_edx;
316 }
317
318 /* Fast-forward bits - Must be set. */
319 edx |= cpufeat_mask(X86_FEATURE_APIC);
320
321 cpuidmask_defaults.e1cd = ((uint64_t)ecx << 32) | edx;
322 }
323
324 if ((levelling_caps & LCAP_7ab0) == LCAP_7ab0) {
325 uint32_t eax, ebx, tmp;
326
327 cpuid(0x00000007, &eax, &ebx, &tmp, &tmp);
328
329 if (~(opt_cpuid_mask_l7s0_eax & opt_cpuid_mask_l7s0_ebx)) {
330 eax &= opt_cpuid_mask_l7s0_eax;
331 ebx &= opt_cpuid_mask_l7s0_ebx;
332 }
333
334 cpuidmask_defaults._7ab0 &= ((uint64_t)eax << 32) | ebx;
335 }
336
337 if ((levelling_caps & LCAP_6c) == LCAP_6c) {
338 uint32_t ecx = cpuid_ecx(6);
339
340 if (~opt_cpuid_mask_thermal_ecx)
341 ecx &= opt_cpuid_mask_thermal_ecx;
342
343 cpuidmask_defaults._6c &= (~0ULL << 32) | ecx;
344 }
345
346 if (opt_cpu_info) {
347 printk(XENLOG_INFO "Levelling caps: %#x\n", levelling_caps);
348 printk(XENLOG_INFO
349 "MSR defaults: 1d 0x%08x, 1c 0x%08x, e1d 0x%08x, "
350 "e1c 0x%08x, 7a0 0x%08x, 7b0 0x%08x, 6c 0x%08x\n",
351 (uint32_t)cpuidmask_defaults._1cd,
352 (uint32_t)(cpuidmask_defaults._1cd >> 32),
353 (uint32_t)cpuidmask_defaults.e1cd,
354 (uint32_t)(cpuidmask_defaults.e1cd >> 32),
355 (uint32_t)(cpuidmask_defaults._7ab0 >> 32),
356 (uint32_t)cpuidmask_defaults._7ab0,
357 (uint32_t)cpuidmask_defaults._6c);
358 }
359
360 if (levelling_caps)
361 ctxt_switch_masking = amd_ctxt_switch_masking;
362 }
363
364 /*
365 * Check for the presence of an AMD erratum. Arguments are defined in amd.h
366 * for each known erratum. Return 1 if erratum is found.
367 */
cpu_has_amd_erratum(const struct cpuinfo_x86 * cpu,int osvw_id,...)368 int cpu_has_amd_erratum(const struct cpuinfo_x86 *cpu, int osvw_id, ...)
369 {
370 va_list ap;
371 u32 range;
372 u32 ms;
373
374 if (cpu->x86_vendor != X86_VENDOR_AMD)
375 return 0;
376
377 if (osvw_id >= 0 && cpu_has(cpu, X86_FEATURE_OSVW)) {
378 u64 osvw_len;
379
380 rdmsrl(MSR_AMD_OSVW_ID_LENGTH, osvw_len);
381
382 if (osvw_id < osvw_len) {
383 u64 osvw_bits;
384
385 rdmsrl(MSR_AMD_OSVW_STATUS + (osvw_id >> 6),
386 osvw_bits);
387
388 return (osvw_bits >> (osvw_id & 0x3f)) & 1;
389 }
390 }
391
392 /* OSVW unavailable or ID unknown, match family-model-stepping range */
393 va_start(ap, osvw_id);
394
395 ms = (cpu->x86_model << 4) | cpu->x86_mask;
396 while ((range = va_arg(ap, int))) {
397 if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
398 (ms >= AMD_MODEL_RANGE_START(range)) &&
399 (ms <= AMD_MODEL_RANGE_END(range))) {
400 va_end(ap);
401 return 1;
402 }
403 }
404
405 va_end(ap);
406 return 0;
407 }
408
409 /*
410 * Disable C1-Clock ramping if enabled in PMM7.CpuLowPwrEnh on 8th-generation
411 * cores only. Assume BIOS has setup all Northbridges equivalently.
412 */
disable_c1_ramping(void)413 static void disable_c1_ramping(void)
414 {
415 u8 pmm7;
416 int node, nr_nodes;
417
418 /* Read the number of nodes from the first Northbridge. */
419 nr_nodes = ((pci_conf_read32(PCI_SBDF(0, 0, 0x18, 0), 0x60) >> 4) &
420 0x07) + 1;
421 for (node = 0; node < nr_nodes; node++) {
422 /* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */
423 pmm7 = pci_conf_read8(PCI_SBDF(0, 0, 0x18 + node, 3), 0x87);
424 /* Invalid read means we've updated every Northbridge. */
425 if (pmm7 == 0xFF)
426 break;
427 pmm7 &= 0xFC; /* clear pmm7[1:0] */
428 pci_conf_write8(PCI_SBDF(0, 0, 0x18 + node, 3), 0x87, pmm7);
429 printk ("AMD: Disabling C1 Clock Ramping Node #%x\n", node);
430 }
431 }
432
disable_c1e(void * unused)433 static void disable_c1e(void *unused)
434 {
435 uint64_t msr_content;
436
437 /*
438 * Disable C1E mode, as the APIC timer stops in that mode.
439 * The MSR does not exist in all FamilyF CPUs (only Rev F and above),
440 * but we safely catch the #GP in that case.
441 */
442 if ((rdmsr_safe(MSR_K8_ENABLE_C1E, msr_content) == 0) &&
443 (msr_content & (3ULL << 27)) &&
444 (wrmsr_safe(MSR_K8_ENABLE_C1E, msr_content & ~(3ULL << 27)) != 0))
445 printk(KERN_ERR "Failed to disable C1E on CPU#%u (%16"PRIx64")\n",
446 smp_processor_id(), msr_content);
447 }
448
amd_check_disable_c1e(unsigned int port,u8 value)449 void amd_check_disable_c1e(unsigned int port, u8 value)
450 {
451 /* C1E is sometimes enabled during entry to ACPI mode. */
452 if ((port == acpi_smi_cmd) && (value == acpi_enable_value))
453 on_each_cpu(disable_c1e, NULL, 1);
454 }
455
456 /*
457 * BIOS is expected to clear MtrrFixDramModEn bit. According to AMD BKDG :
458 * "The MtrrFixDramModEn bit should be set to 1 during BIOS initalization of
459 * the fixed MTRRs, then cleared to 0 for operation."
460 */
check_syscfg_dram_mod_en(void)461 static void check_syscfg_dram_mod_en(void)
462 {
463 uint64_t syscfg;
464 static bool_t printed = 0;
465
466 if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
467 (boot_cpu_data.x86 >= 0x0f)))
468 return;
469
470 rdmsrl(MSR_K8_SYSCFG, syscfg);
471 if (!(syscfg & K8_MTRRFIXRANGE_DRAM_MODIFY))
472 return;
473
474 if (!test_and_set_bool(printed))
475 printk(KERN_ERR "MTRR: SYSCFG[MtrrFixDramModEn] not "
476 "cleared by BIOS, clearing this bit\n");
477
478 syscfg &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
479 wrmsrl(MSR_K8_SYSCFG, syscfg);
480 }
481
amd_get_topology(struct cpuinfo_x86 * c)482 static void amd_get_topology(struct cpuinfo_x86 *c)
483 {
484 int cpu;
485 unsigned bits;
486
487 if (c->x86_max_cores <= 1)
488 return;
489 /*
490 * On a AMD multi core setup the lower bits of the APIC id
491 * distingush the cores.
492 */
493 cpu = smp_processor_id();
494 bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
495
496 if (bits == 0) {
497 while ((1 << bits) < c->x86_max_cores)
498 bits++;
499 }
500
501 /* Low order bits define the core id */
502 c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
503 /* Convert local APIC ID into the socket ID */
504 c->phys_proc_id >>= bits;
505 /* Collect compute unit ID if available */
506 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
507 u32 eax, ebx, ecx, edx;
508
509 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
510 c->x86_num_siblings = ((ebx >> 8) & 0xff) + 1;
511
512 if (c->x86 < 0x17)
513 c->compute_unit_id = ebx & 0xFF;
514 else {
515 c->cpu_core_id = ebx & 0xFF;
516 c->x86_max_cores /= c->x86_num_siblings;
517 }
518
519 /*
520 * In case leaf B is available, use it to derive
521 * topology information.
522 */
523 if (detect_extended_topology(c))
524 return;
525 }
526
527 if (opt_cpu_info)
528 printk("CPU %d(%d) -> Processor %d, %s %d\n",
529 cpu, c->x86_max_cores, c->phys_proc_id,
530 c->compute_unit_id != INVALID_CUID ? "Compute Unit"
531 : "Core",
532 c->compute_unit_id != INVALID_CUID ? c->compute_unit_id
533 : c->cpu_core_id);
534 }
535
amd_log_freq(const struct cpuinfo_x86 * c)536 void amd_log_freq(const struct cpuinfo_x86 *c)
537 {
538 unsigned int idx = 0, h;
539 uint64_t hi, lo, val;
540
541 if (c->x86 < 0x10 || c->x86 > 0x19 ||
542 (c != &boot_cpu_data &&
543 (!opt_cpu_info || (c->apicid & (c->x86_num_siblings - 1)))))
544 return;
545
546 if (c->x86 < 0x17) {
547 unsigned int node = 0;
548 uint64_t nbcfg;
549
550 /*
551 * Make an attempt at determining the node ID, but assume
552 * symmetric setup (using node 0) if this fails.
553 */
554 if (c->extended_cpuid_level >= 0x8000001e &&
555 cpu_has(c, X86_FEATURE_TOPOEXT)) {
556 node = cpuid_ecx(0x8000001e) & 0xff;
557 if (node > 7)
558 node = 0;
559 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
560 rdmsrl(0xC001100C, val);
561 node = val & 7;
562 }
563
564 /*
565 * Enable (and use) Extended Config Space accesses, as we
566 * can't be certain that MCFG is available here during boot.
567 */
568 rdmsrl(MSR_AMD64_NB_CFG, nbcfg);
569 wrmsrl(MSR_AMD64_NB_CFG,
570 nbcfg | (1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT));
571 #define PCI_ECS_ADDRESS(sbdf, reg) \
572 (0x80000000 | ((sbdf).bdf << 8) | ((reg) & 0xfc) | (((reg) & 0xf00) << 16))
573
574 for ( ; ; ) {
575 pci_sbdf_t sbdf = PCI_SBDF(0, 0, 0x18 | node, 4);
576
577 switch (pci_conf_read32(sbdf, PCI_VENDOR_ID)) {
578 case 0x00000000:
579 case 0xffffffff:
580 /* No device at this SBDF. */
581 if (!node)
582 break;
583 node = 0;
584 continue;
585
586 default:
587 /*
588 * Core Performance Boost Control, family
589 * dependent up to 3 bits starting at bit 2.
590 *
591 * Note that boost states operate at a frequency
592 * above the base one, and thus need to be
593 * accounted for in order to correctly fetch the
594 * nominal frequency of the processor.
595 */
596 switch (c->x86) {
597 case 0x10: idx = 1; break;
598 case 0x12: idx = 7; break;
599 case 0x14: idx = 7; break;
600 case 0x15: idx = 7; break;
601 case 0x16: idx = 7; break;
602 }
603 idx &= pci_conf_read(PCI_ECS_ADDRESS(sbdf,
604 0x15c),
605 0, 4) >> 2;
606 break;
607 }
608 break;
609 }
610
611 #undef PCI_ECS_ADDRESS
612 wrmsrl(MSR_AMD64_NB_CFG, nbcfg);
613 }
614
615 lo = 0; /* gcc may not recognize the loop having at least 5 iterations */
616 for (h = c->x86 == 0x10 ? 5 : 8; h--; )
617 if (!rdmsr_safe(0xC0010064 + h, lo) && (lo >> 63))
618 break;
619 if (!(lo >> 63))
620 return;
621
622 #define FREQ(v) (c->x86 < 0x17 ? ((((v) & 0x3f) + 0x10) * 100) >> (((v) >> 6) & 7) \
623 : (((v) & 0xff) * 25 * 8) / (((v) >> 8) & 0x3f))
624 if (idx && idx < h &&
625 !rdmsr_safe(0xC0010064 + idx, val) && (val >> 63) &&
626 !rdmsr_safe(0xC0010064, hi) && (hi >> 63))
627 printk("CPU%u: %lu (%lu ... %lu) MHz\n",
628 smp_processor_id(), FREQ(val), FREQ(lo), FREQ(hi));
629 else if (h && !rdmsr_safe(0xC0010064, hi) && (hi >> 63))
630 printk("CPU%u: %lu ... %lu MHz\n",
631 smp_processor_id(), FREQ(lo), FREQ(hi));
632 else
633 printk("CPU%u: %lu MHz\n", smp_processor_id(), FREQ(lo));
634 #undef FREQ
635 }
636
early_init_amd(struct cpuinfo_x86 * c)637 void early_init_amd(struct cpuinfo_x86 *c)
638 {
639 if (c == &boot_cpu_data)
640 amd_init_levelling();
641
642 ctxt_switch_levelling(NULL);
643 }
644
init_amd(struct cpuinfo_x86 * c)645 static void init_amd(struct cpuinfo_x86 *c)
646 {
647 u32 l, h;
648
649 unsigned long long value;
650
651 /* Disable TLB flush filter by setting HWCR.FFDIS on K8
652 * bit 6 of msr C001_0015
653 *
654 * Errata 63 for SH-B3 steppings
655 * Errata 122 for all steppings (F+ have it disabled by default)
656 */
657 if (c->x86 == 15) {
658 rdmsrl(MSR_K7_HWCR, value);
659 value |= 1 << 6;
660 wrmsrl(MSR_K7_HWCR, value);
661 }
662
663 /*
664 * Some AMD CPUs duplicate the 3DNow bit in base and extended CPUID
665 * leaves. Unfortunately, this aliases PBE on Intel CPUs. Clobber the
666 * alias, leaving 3DNow in the extended leaf.
667 */
668 __clear_bit(X86_FEATURE_PBE, c->x86_capability);
669
670 if (c->x86 == 0xf && c->x86_model < 0x14
671 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
672 /*
673 * Some BIOSes incorrectly force this feature, but only K8
674 * revision D (model = 0x14) and later actually support it.
675 * (AMD Erratum #110, docId: 25759).
676 */
677 __clear_bit(X86_FEATURE_LAHF_LM, c->x86_capability);
678 if (!rdmsr_amd_safe(0xc001100d, &l, &h))
679 wrmsr_amd_safe(0xc001100d, l, h & ~1);
680 }
681
682 /*
683 * Older AMD CPUs don't save/load FOP/FIP/FDP unless an FPU exception
684 * is pending. Xen works around this at (F)XRSTOR time.
685 */
686 if (c == &boot_cpu_data && !cpu_has(c, X86_FEATURE_RSTR_FP_ERR_PTRS))
687 setup_force_cpu_cap(X86_BUG_FPU_PTRS);
688
689 /*
690 * Attempt to set lfence to be Dispatch Serialising. This MSR almost
691 * certainly isn't virtualised (and Xen at least will leak the real
692 * value in but silently discard writes), as well as being per-core
693 * rather than per-thread, so do a full safe read/write/readback cycle
694 * in the worst case.
695 */
696 if (c->x86 == 0x0f || c->x86 == 0x11)
697 /* Always dispatch serialising on this hardare. */
698 __set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability);
699 else /* Implicily "== 0x10 || >= 0x12" by being 64bit. */ {
700 if (rdmsr_safe(MSR_AMD64_DE_CFG, value))
701 /* Unable to read. Assume the safer default. */
702 __clear_bit(X86_FEATURE_LFENCE_DISPATCH,
703 c->x86_capability);
704 else if (value & AMD64_DE_CFG_LFENCE_SERIALISE)
705 /* Already dispatch serialising. */
706 __set_bit(X86_FEATURE_LFENCE_DISPATCH,
707 c->x86_capability);
708 else if (wrmsr_safe(MSR_AMD64_DE_CFG,
709 value | AMD64_DE_CFG_LFENCE_SERIALISE) ||
710 rdmsr_safe(MSR_AMD64_DE_CFG, value) ||
711 !(value & AMD64_DE_CFG_LFENCE_SERIALISE))
712 /* Attempt to set failed. Assume the safer default. */
713 __clear_bit(X86_FEATURE_LFENCE_DISPATCH,
714 c->x86_capability);
715 else
716 /* Successfully enabled! */
717 __set_bit(X86_FEATURE_LFENCE_DISPATCH,
718 c->x86_capability);
719 }
720
721 /*
722 * If the user has explicitly chosen to disable Memory Disambiguation
723 * to mitigiate Speculative Store Bypass, poke the appropriate MSR.
724 */
725 if (opt_ssbd) {
726 int bit = -1;
727
728 switch (c->x86) {
729 case 0x15: bit = 54; break;
730 case 0x16: bit = 33; break;
731 case 0x17: bit = 10; break;
732 }
733
734 if (bit >= 0 && !rdmsr_safe(MSR_AMD64_LS_CFG, value)) {
735 value |= 1ull << bit;
736 wrmsr_safe(MSR_AMD64_LS_CFG, value);
737 }
738 }
739
740 /* MFENCE stops RDTSC speculation */
741 if (!cpu_has_lfence_dispatch)
742 __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
743
744 switch(c->x86)
745 {
746 case 0xf ... 0x11:
747 disable_c1e(NULL);
748 if (acpi_smi_cmd && (acpi_enable_value | acpi_disable_value))
749 amd_acpi_c1e_quirk = true;
750 break;
751
752 case 0x15: case 0x16:
753 /*
754 * There are some Fam15/Fam16 systems where upon resume from S3
755 * firmware fails to re-setup properly functioning RDRAND.
756 * By the time we can spot the problem, it is too late to take
757 * action, and there is nothing Xen can do to repair the problem.
758 * Clear the feature unless force-enabled on the command line.
759 */
760 if (c == &boot_cpu_data &&
761 cpu_has(c, X86_FEATURE_RDRAND) &&
762 !is_forced_cpu_cap(X86_FEATURE_RDRAND)) {
763 static const char __initconst text[] =
764 "RDRAND may cease to work on this hardware upon resume from S3.\n"
765 "Please choose an explicit cpuid={no-}rdrand setting.\n";
766
767 setup_clear_cpu_cap(X86_FEATURE_RDRAND);
768 warning_add(text);
769 }
770 break;
771 }
772
773 display_cacheinfo(c);
774
775 if (c->extended_cpuid_level >= 0x80000008) {
776 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
777 }
778
779 if (c->extended_cpuid_level >= 0x80000007) {
780 if (cpu_has(c, X86_FEATURE_ITSC)) {
781 __set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
782 __set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
783 if (c->x86 != 0x11)
784 __set_bit(X86_FEATURE_TSC_RELIABLE,
785 c->x86_capability);
786 }
787 }
788
789 /* re-enable TopologyExtensions if switched off by BIOS */
790 if ((c->x86 == 0x15) &&
791 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
792 !cpu_has(c, X86_FEATURE_TOPOEXT) &&
793 !rdmsr_safe(MSR_K8_EXT_FEATURE_MASK, value)) {
794 value |= 1ULL << 54;
795 wrmsr_safe(MSR_K8_EXT_FEATURE_MASK, value);
796 rdmsrl(MSR_K8_EXT_FEATURE_MASK, value);
797 if (value & (1ULL << 54)) {
798 __set_bit(X86_FEATURE_TOPOEXT, c->x86_capability);
799 printk(KERN_INFO "CPU: Re-enabling disabled "
800 "Topology Extensions Support\n");
801 }
802 }
803
804 /*
805 * The way access filter has a performance penalty on some workloads.
806 * Disable it on the affected CPUs.
807 */
808 if (c->x86 == 0x15 && c->x86_model >= 0x02 && c->x86_model < 0x20 &&
809 !rdmsr_safe(MSR_AMD64_IC_CFG, value) && (value & 0x1e) != 0x1e)
810 wrmsr_safe(MSR_AMD64_IC_CFG, value | 0x1e);
811
812 amd_get_topology(c);
813
814 /* Pointless to use MWAIT on Family10 as it does not deep sleep. */
815 if (c->x86 == 0x10)
816 __clear_bit(X86_FEATURE_MONITOR, c->x86_capability);
817
818 if (!cpu_has_amd_erratum(c, AMD_ERRATUM_121))
819 opt_allow_unsafe = 1;
820 else if (opt_allow_unsafe < 0)
821 panic("Xen will not boot on this CPU for security reasons"
822 "Pass \"allow_unsafe\" if you're trusting all your"
823 " (PV) guest kernels.\n");
824 else if (!opt_allow_unsafe && c == &boot_cpu_data)
825 printk(KERN_WARNING
826 "*** Xen will not allow creation of DomU-s on"
827 " this CPU for security reasons. ***\n"
828 KERN_WARNING
829 "*** Pass \"allow_unsafe\" if you're trusting"
830 " all your (PV) guest kernels. ***\n");
831
832 if (c->x86 == 0x16 && c->x86_model <= 0xf) {
833 if (c == &boot_cpu_data) {
834 l = pci_conf_read32(PCI_SBDF(0, 0, 0x18, 3), 0x58);
835 h = pci_conf_read32(PCI_SBDF(0, 0, 0x18, 3), 0x5c);
836 if ((l & 0x1f) | (h & 0x1))
837 printk(KERN_WARNING
838 "Applying workaround for erratum 792: %s%s%s\n",
839 (l & 0x1f) ? "clearing D18F3x58[4:0]" : "",
840 ((l & 0x1f) && (h & 0x1)) ? " and " : "",
841 (h & 0x1) ? "clearing D18F3x5C[0]" : "");
842
843 if (l & 0x1f)
844 pci_conf_write32(PCI_SBDF(0, 0, 0x18, 3), 0x58,
845 l & ~0x1f);
846
847 if (h & 0x1)
848 pci_conf_write32(PCI_SBDF(0, 0, 0x18, 3), 0x5c,
849 h & ~0x1);
850 }
851
852 rdmsrl(MSR_AMD64_LS_CFG, value);
853 if (!(value & (1 << 15))) {
854 static bool_t warned;
855
856 if (c == &boot_cpu_data || opt_cpu_info ||
857 !test_and_set_bool(warned))
858 printk(KERN_WARNING
859 "CPU%u: Applying workaround for erratum 793\n",
860 smp_processor_id());
861 wrmsrl(MSR_AMD64_LS_CFG, value | (1 << 15));
862 }
863 } else if (c->x86 == 0x12) {
864 rdmsrl(MSR_AMD64_DE_CFG, value);
865 if (!(value & (1U << 31))) {
866 static bool warned;
867
868 if (c == &boot_cpu_data || opt_cpu_info ||
869 !test_and_set_bool(warned))
870 printk(KERN_WARNING
871 "CPU%u: Applying workaround for erratum 665\n",
872 smp_processor_id());
873 wrmsrl(MSR_AMD64_DE_CFG, value | (1U << 31));
874 }
875 }
876
877 /* AMD CPUs do not support SYSENTER outside of legacy mode. */
878 __clear_bit(X86_FEATURE_SEP, c->x86_capability);
879
880 if (c->x86 == 0x10) {
881 /* do this for boot cpu */
882 if (c == &boot_cpu_data)
883 check_enable_amd_mmconf_dmi();
884
885 fam10h_check_enable_mmcfg();
886
887 /*
888 * On family 10h BIOS may not have properly enabled WC+
889 * support, causing it to be converted to CD memtype. This may
890 * result in performance degradation for certain nested-paging
891 * guests. Prevent this conversion by clearing bit 24 in
892 * MSR_F10_BU_CFG2.
893 */
894 rdmsrl(MSR_F10_BU_CFG2, value);
895 value &= ~(1ULL << 24);
896 wrmsrl(MSR_F10_BU_CFG2, value);
897 }
898
899 /*
900 * Family 0x12 and above processors have APIC timer
901 * running in deep C states.
902 */
903 if ( opt_arat && c->x86 > 0x11 )
904 __set_bit(X86_FEATURE_ARAT, c->x86_capability);
905
906 /*
907 * Prior to Family 0x14, perf counters are not reset during warm reboot.
908 * We have to reset them manually.
909 */
910 if (nmi_watchdog != NMI_LOCAL_APIC && c->x86 < 0x14) {
911 wrmsrl(MSR_K7_PERFCTR0, 0);
912 wrmsrl(MSR_K7_PERFCTR1, 0);
913 wrmsrl(MSR_K7_PERFCTR2, 0);
914 wrmsrl(MSR_K7_PERFCTR3, 0);
915 }
916
917 if (cpu_has(c, X86_FEATURE_EFRO)) {
918 rdmsr(MSR_K7_HWCR, l, h);
919 l |= (1 << 27); /* Enable read-only APERF/MPERF bit */
920 wrmsr(MSR_K7_HWCR, l, h);
921 }
922
923 /* Prevent TSC drift in non single-processor, single-core platforms. */
924 if ((smp_processor_id() == 1) && !cpu_has(c, X86_FEATURE_ITSC))
925 disable_c1_ramping();
926
927 check_syscfg_dram_mod_en();
928
929 amd_log_freq(c);
930 }
931
932 const struct cpu_dev amd_cpu_dev = {
933 .c_early_init = early_init_amd,
934 .c_init = init_amd,
935 };
936