1 #include <xen/init.h>
2 #include <xen/bitops.h>
3 #include <xen/mm.h>
4 #include <xen/param.h>
5 #include <xen/smp.h>
6 #include <xen/pci.h>
7 #include <xen/warning.h>
8 #include <asm/io.h>
9 #include <asm/msr.h>
10 #include <asm/processor.h>
11 #include <asm/amd.h>
12 #include <asm/hvm/support.h>
13 #include <asm/spec_ctrl.h>
14 #include <asm/acpi.h>
15 #include <asm/apic.h>
16 
17 #include "cpu.h"
18 
19 /*
20  * Pre-canned values for overriding the CPUID features
21  * and extended features masks.
22  *
23  * Currently supported processors:
24  *
25  * "fam_0f_rev_c"
26  * "fam_0f_rev_d"
27  * "fam_0f_rev_e"
28  * "fam_0f_rev_f"
29  * "fam_0f_rev_g"
30  * "fam_10_rev_b"
31  * "fam_10_rev_c"
32  * "fam_11_rev_b"
33  */
34 static char __initdata opt_famrev[14];
35 string_param("cpuid_mask_cpu", opt_famrev);
36 
37 static unsigned int __initdata opt_cpuid_mask_l7s0_eax = ~0u;
38 integer_param("cpuid_mask_l7s0_eax", opt_cpuid_mask_l7s0_eax);
39 static unsigned int __initdata opt_cpuid_mask_l7s0_ebx = ~0u;
40 integer_param("cpuid_mask_l7s0_ebx", opt_cpuid_mask_l7s0_ebx);
41 
42 static unsigned int __initdata opt_cpuid_mask_thermal_ecx = ~0u;
43 integer_param("cpuid_mask_thermal_ecx", opt_cpuid_mask_thermal_ecx);
44 
45 /* 1 = allow, 0 = don't allow guest creation, -1 = don't allow boot */
46 s8 __read_mostly opt_allow_unsafe;
47 boolean_param("allow_unsafe", opt_allow_unsafe);
48 
49 /* Signal whether the ACPI C1E quirk is required. */
50 bool __read_mostly amd_acpi_c1e_quirk;
51 
rdmsr_amd_safe(unsigned int msr,unsigned int * lo,unsigned int * hi)52 static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo,
53 				 unsigned int *hi)
54 {
55 	int err;
56 
57 	asm volatile("1: rdmsr\n2:\n"
58 		     ".section .fixup,\"ax\"\n"
59 		     "3: movl %6,%2\n"
60 		     "   jmp 2b\n"
61 		     ".previous\n"
62 		     _ASM_EXTABLE(1b, 3b)
63 		     : "=a" (*lo), "=d" (*hi), "=r" (err)
64 		     : "c" (msr), "D" (0x9c5a203a), "2" (0), "i" (-EFAULT));
65 
66 	return err;
67 }
68 
wrmsr_amd_safe(unsigned int msr,unsigned int lo,unsigned int hi)69 static inline int wrmsr_amd_safe(unsigned int msr, unsigned int lo,
70 				 unsigned int hi)
71 {
72 	int err;
73 
74 	asm volatile("1: wrmsr\n2:\n"
75 		     ".section .fixup,\"ax\"\n"
76 		     "3: movl %6,%0\n"
77 		     "   jmp 2b\n"
78 		     ".previous\n"
79 		     _ASM_EXTABLE(1b, 3b)
80 		     : "=r" (err)
81 		     : "c" (msr), "a" (lo), "d" (hi), "D" (0x9c5a203a),
82 		       "0" (0), "i" (-EFAULT));
83 
84 	return err;
85 }
86 
wrmsr_amd(unsigned int msr,uint64_t val)87 static void wrmsr_amd(unsigned int msr, uint64_t val)
88 {
89 	asm volatile("wrmsr" ::
90 		     "c" (msr), "a" ((uint32_t)val),
91 		     "d" (val >> 32), "D" (0x9c5a203a));
92 }
93 
94 static const struct cpuidmask {
95 	uint16_t fam;
96 	char rev[2];
97 	unsigned int ecx, edx, ext_ecx, ext_edx;
98 } pre_canned[] __initconst = {
99 #define CAN(fam, id, rev) { \
100 		fam, #rev, \
101 		AMD_FEATURES_##id##_REV_##rev##_ECX, \
102 		AMD_FEATURES_##id##_REV_##rev##_EDX, \
103 		AMD_EXTFEATURES_##id##_REV_##rev##_ECX, \
104 		AMD_EXTFEATURES_##id##_REV_##rev##_EDX \
105 	}
106 #define CAN_FAM(fam, rev) CAN(0x##fam, FAM##fam##h, rev)
107 #define CAN_K8(rev)       CAN(0x0f,    K8,          rev)
108 	CAN_FAM(11, B),
109 	CAN_FAM(10, C),
110 	CAN_FAM(10, B),
111 	CAN_K8(G),
112 	CAN_K8(F),
113 	CAN_K8(E),
114 	CAN_K8(D),
115 	CAN_K8(C)
116 #undef CAN
117 };
118 
get_cpuidmask(const char * opt)119 static const struct cpuidmask *__init noinline get_cpuidmask(const char *opt)
120 {
121 	unsigned long fam;
122 	char rev;
123 	unsigned int i;
124 
125 	if (strncmp(opt, "fam_", 4))
126 		return NULL;
127 	fam = simple_strtoul(opt + 4, &opt, 16);
128 	if (strncmp(opt, "_rev_", 5) || !opt[5] || opt[6])
129 		return NULL;
130 	rev = toupper(opt[5]);
131 
132 	for (i = 0; i < ARRAY_SIZE(pre_canned); ++i)
133 		if (fam == pre_canned[i].fam && rev == *pre_canned[i].rev)
134 			return &pre_canned[i];
135 
136 	return NULL;
137 }
138 
139 /*
140  * Sets caps in expected_levelling_cap, probes for the specified mask MSR, and
141  * set caps in levelling_caps if it is found.  Processors prior to Fam 10h
142  * required a 32-bit password for masking MSRs.  Returns the default value.
143  */
_probe_mask_msr(unsigned int msr,uint64_t caps)144 static uint64_t __init _probe_mask_msr(unsigned int msr, uint64_t caps)
145 {
146 	unsigned int hi, lo;
147 
148 	expected_levelling_cap |= caps;
149 
150 	if ((rdmsr_amd_safe(msr, &lo, &hi) == 0) &&
151 	    (wrmsr_amd_safe(msr, lo, hi) == 0))
152 		levelling_caps |= caps;
153 
154 	return ((uint64_t)hi << 32) | lo;
155 }
156 
157 /*
158  * Probe for the existance of the expected masking MSRs.  They might easily
159  * not be available if Xen is running virtualised.
160  */
probe_masking_msrs(void)161 static void __init noinline probe_masking_msrs(void)
162 {
163 	const struct cpuinfo_x86 *c = &boot_cpu_data;
164 
165 	/*
166 	 * First, work out which masking MSRs we should have, based on
167 	 * revision and cpuid.
168 	 */
169 
170 	/* Fam11 doesn't support masking at all. */
171 	if (c->x86 == 0x11)
172 		return;
173 
174 	cpuidmask_defaults._1cd =
175 		_probe_mask_msr(MSR_K8_FEATURE_MASK, LCAP_1cd);
176 	cpuidmask_defaults.e1cd =
177 		_probe_mask_msr(MSR_K8_EXT_FEATURE_MASK, LCAP_e1cd);
178 
179 	if (c->cpuid_level >= 7)
180 		cpuidmask_defaults._7ab0 =
181 			_probe_mask_msr(MSR_AMD_L7S0_FEATURE_MASK, LCAP_7ab0);
182 
183 	if (c->x86 == 0x15 && c->cpuid_level >= 6 && cpuid_ecx(6))
184 		cpuidmask_defaults._6c =
185 			_probe_mask_msr(MSR_AMD_THRM_FEATURE_MASK, LCAP_6c);
186 
187 	/*
188 	 * Don't bother warning about a mismatch if virtualised.  These MSRs
189 	 * are not architectural and almost never virtualised.
190 	 */
191 	if ((expected_levelling_cap == levelling_caps) ||
192 	    cpu_has_hypervisor)
193 		return;
194 
195 	printk(XENLOG_WARNING "Mismatch between expected (%#x) "
196 	       "and real (%#x) levelling caps: missing %#x\n",
197 	       expected_levelling_cap, levelling_caps,
198 	       (expected_levelling_cap ^ levelling_caps) & levelling_caps);
199 	printk(XENLOG_WARNING "Fam %#x, model %#x level %#x\n",
200 	       c->x86, c->x86_model, c->cpuid_level);
201 	printk(XENLOG_WARNING
202 	       "If not running virtualised, please report a bug\n");
203 }
204 
205 /*
206  * Context switch CPUID masking state to the next domain.  Only called if
207  * CPUID Faulting isn't available, but masking MSRs have been detected.  A
208  * parameter of NULL is used to context switch to the default host state (by
209  * the cpu bringup-code, crash path, etc).
210  */
amd_ctxt_switch_masking(const struct vcpu * next)211 static void amd_ctxt_switch_masking(const struct vcpu *next)
212 {
213 	struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
214 	const struct domain *nextd = next ? next->domain : NULL;
215 	const struct cpuidmasks *masks =
216 		(nextd && is_pv_domain(nextd) && nextd->arch.pv.cpuidmasks)
217 		? nextd->arch.pv.cpuidmasks : &cpuidmask_defaults;
218 
219 	if ((levelling_caps & LCAP_1cd) == LCAP_1cd) {
220 		uint64_t val = masks->_1cd;
221 
222 		/*
223 		 * OSXSAVE defaults to 1, which causes fast-forwarding of
224 		 * Xen's real setting.  Clobber it if disabled by the guest
225 		 * kernel.
226 		 */
227 		if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
228 		    !(next->arch.pv.ctrlreg[4] & X86_CR4_OSXSAVE))
229 			val &= ~((uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE) << 32);
230 
231 		if (unlikely(these_masks->_1cd != val)) {
232 			wrmsr_amd(MSR_K8_FEATURE_MASK, val);
233 			these_masks->_1cd = val;
234 		}
235 	}
236 
237 #define LAZY(cap, msr, field)						\
238 	({								\
239 		if (unlikely(these_masks->field != masks->field) &&	\
240 		    ((levelling_caps & cap) == cap))			\
241 		{							\
242 			wrmsr_amd(msr, masks->field);			\
243 			these_masks->field = masks->field;		\
244 		}							\
245 	})
246 
247 	LAZY(LCAP_e1cd, MSR_K8_EXT_FEATURE_MASK,   e1cd);
248 	LAZY(LCAP_7ab0, MSR_AMD_L7S0_FEATURE_MASK, _7ab0);
249 	LAZY(LCAP_6c,   MSR_AMD_THRM_FEATURE_MASK, _6c);
250 
251 #undef LAZY
252 }
253 
254 /*
255  * Mask the features and extended features returned by CPUID.  Parameters are
256  * set from the boot line via two methods:
257  *
258  *   1) Specific processor revision string
259  *   2) User-defined masks
260  *
261  * The user-defined masks take precedence.
262  *
263  * AMD "masking msrs" are actually overrides, making it possible to advertise
264  * features which are not supported by the hardware.  Care must be taken to
265  * avoid this, as the accidentally-advertised features will not actually
266  * function.
267  */
amd_init_levelling(void)268 static void __init noinline amd_init_levelling(void)
269 {
270 	const struct cpuidmask *m = NULL;
271 
272 	if (probe_cpuid_faulting())
273 		return;
274 
275 	probe_masking_msrs();
276 
277 	if (*opt_famrev != '\0') {
278 		m = get_cpuidmask(opt_famrev);
279 
280 		if (!m)
281 			printk("Invalid processor string: %s\n", opt_famrev);
282 	}
283 
284 	if ((levelling_caps & LCAP_1cd) == LCAP_1cd) {
285 		uint32_t ecx, edx, tmp;
286 
287 		cpuid(0x00000001, &tmp, &tmp, &ecx, &edx);
288 
289 		if (~(opt_cpuid_mask_ecx & opt_cpuid_mask_edx)) {
290 			ecx &= opt_cpuid_mask_ecx;
291 			edx &= opt_cpuid_mask_edx;
292 		} else if (m) {
293 			ecx &= m->ecx;
294 			edx &= m->edx;
295 		}
296 
297 		/* Fast-forward bits - Must be set. */
298 		if (ecx & cpufeat_mask(X86_FEATURE_XSAVE))
299 			ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE);
300 		edx |= cpufeat_mask(X86_FEATURE_APIC);
301 
302 		cpuidmask_defaults._1cd = ((uint64_t)ecx << 32) | edx;
303 	}
304 
305 	if ((levelling_caps & LCAP_e1cd) == LCAP_e1cd) {
306 		uint32_t ecx, edx, tmp;
307 
308 		cpuid(0x80000001, &tmp, &tmp, &ecx, &edx);
309 
310 		if (~(opt_cpuid_mask_ext_ecx & opt_cpuid_mask_ext_edx)) {
311 			ecx &= opt_cpuid_mask_ext_ecx;
312 			edx &= opt_cpuid_mask_ext_edx;
313 		} else if (m) {
314 			ecx &= m->ext_ecx;
315 			edx &= m->ext_edx;
316 		}
317 
318 		/* Fast-forward bits - Must be set. */
319 		edx |= cpufeat_mask(X86_FEATURE_APIC);
320 
321 		cpuidmask_defaults.e1cd = ((uint64_t)ecx << 32) | edx;
322 	}
323 
324 	if ((levelling_caps & LCAP_7ab0) == LCAP_7ab0) {
325 		uint32_t eax, ebx, tmp;
326 
327 		cpuid(0x00000007, &eax, &ebx, &tmp, &tmp);
328 
329 		if (~(opt_cpuid_mask_l7s0_eax & opt_cpuid_mask_l7s0_ebx)) {
330 			eax &= opt_cpuid_mask_l7s0_eax;
331 			ebx &= opt_cpuid_mask_l7s0_ebx;
332 		}
333 
334 		cpuidmask_defaults._7ab0 &= ((uint64_t)eax << 32) | ebx;
335 	}
336 
337 	if ((levelling_caps & LCAP_6c) == LCAP_6c) {
338 		uint32_t ecx = cpuid_ecx(6);
339 
340 		if (~opt_cpuid_mask_thermal_ecx)
341 			ecx &= opt_cpuid_mask_thermal_ecx;
342 
343 		cpuidmask_defaults._6c &= (~0ULL << 32) | ecx;
344 	}
345 
346 	if (opt_cpu_info) {
347 		printk(XENLOG_INFO "Levelling caps: %#x\n", levelling_caps);
348 		printk(XENLOG_INFO
349 		       "MSR defaults: 1d 0x%08x, 1c 0x%08x, e1d 0x%08x, "
350 		       "e1c 0x%08x, 7a0 0x%08x, 7b0 0x%08x, 6c 0x%08x\n",
351 		       (uint32_t)cpuidmask_defaults._1cd,
352 		       (uint32_t)(cpuidmask_defaults._1cd >> 32),
353 		       (uint32_t)cpuidmask_defaults.e1cd,
354 		       (uint32_t)(cpuidmask_defaults.e1cd >> 32),
355 		       (uint32_t)(cpuidmask_defaults._7ab0 >> 32),
356 		       (uint32_t)cpuidmask_defaults._7ab0,
357 		       (uint32_t)cpuidmask_defaults._6c);
358 	}
359 
360 	if (levelling_caps)
361 		ctxt_switch_masking = amd_ctxt_switch_masking;
362 }
363 
364 /*
365  * Check for the presence of an AMD erratum. Arguments are defined in amd.h
366  * for each known erratum. Return 1 if erratum is found.
367  */
cpu_has_amd_erratum(const struct cpuinfo_x86 * cpu,int osvw_id,...)368 int cpu_has_amd_erratum(const struct cpuinfo_x86 *cpu, int osvw_id, ...)
369 {
370 	va_list ap;
371 	u32 range;
372 	u32 ms;
373 
374 	if (cpu->x86_vendor != X86_VENDOR_AMD)
375 		return 0;
376 
377 	if (osvw_id >= 0 && cpu_has(cpu, X86_FEATURE_OSVW)) {
378 		u64 osvw_len;
379 
380 		rdmsrl(MSR_AMD_OSVW_ID_LENGTH, osvw_len);
381 
382 		if (osvw_id < osvw_len) {
383 			u64 osvw_bits;
384 
385 			rdmsrl(MSR_AMD_OSVW_STATUS + (osvw_id >> 6),
386 			       osvw_bits);
387 
388 			return (osvw_bits >> (osvw_id & 0x3f)) & 1;
389 		}
390 	}
391 
392 	/* OSVW unavailable or ID unknown, match family-model-stepping range */
393 	va_start(ap, osvw_id);
394 
395 	ms = (cpu->x86_model << 4) | cpu->x86_mask;
396 	while ((range = va_arg(ap, int))) {
397 		if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
398 		    (ms >= AMD_MODEL_RANGE_START(range)) &&
399 		    (ms <= AMD_MODEL_RANGE_END(range))) {
400 			va_end(ap);
401 			return 1;
402 		}
403 	}
404 
405 	va_end(ap);
406 	return 0;
407 }
408 
409 /*
410  * Disable C1-Clock ramping if enabled in PMM7.CpuLowPwrEnh on 8th-generation
411  * cores only. Assume BIOS has setup all Northbridges equivalently.
412  */
disable_c1_ramping(void)413 static void disable_c1_ramping(void)
414 {
415 	u8 pmm7;
416 	int node, nr_nodes;
417 
418 	/* Read the number of nodes from the first Northbridge. */
419 	nr_nodes = ((pci_conf_read32(PCI_SBDF(0, 0, 0x18, 0), 0x60) >> 4) &
420 		    0x07) + 1;
421 	for (node = 0; node < nr_nodes; node++) {
422 		/* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */
423 		pmm7 = pci_conf_read8(PCI_SBDF(0, 0, 0x18 + node, 3), 0x87);
424 		/* Invalid read means we've updated every Northbridge. */
425 		if (pmm7 == 0xFF)
426 			break;
427 		pmm7 &= 0xFC; /* clear pmm7[1:0] */
428 		pci_conf_write8(PCI_SBDF(0, 0, 0x18 + node, 3), 0x87, pmm7);
429 		printk ("AMD: Disabling C1 Clock Ramping Node #%x\n", node);
430 	}
431 }
432 
disable_c1e(void * unused)433 static void disable_c1e(void *unused)
434 {
435 	uint64_t msr_content;
436 
437 	/*
438 	 * Disable C1E mode, as the APIC timer stops in that mode.
439 	 * The MSR does not exist in all FamilyF CPUs (only Rev F and above),
440 	 * but we safely catch the #GP in that case.
441 	 */
442 	if ((rdmsr_safe(MSR_K8_ENABLE_C1E, msr_content) == 0) &&
443 	    (msr_content & (3ULL << 27)) &&
444 	    (wrmsr_safe(MSR_K8_ENABLE_C1E, msr_content & ~(3ULL << 27)) != 0))
445 		printk(KERN_ERR "Failed to disable C1E on CPU#%u (%16"PRIx64")\n",
446 		       smp_processor_id(), msr_content);
447 }
448 
amd_check_disable_c1e(unsigned int port,u8 value)449 void amd_check_disable_c1e(unsigned int port, u8 value)
450 {
451 	/* C1E is sometimes enabled during entry to ACPI mode. */
452 	if ((port == acpi_smi_cmd) && (value == acpi_enable_value))
453 		on_each_cpu(disable_c1e, NULL, 1);
454 }
455 
456 /*
457  * BIOS is expected to clear MtrrFixDramModEn bit. According to AMD BKDG :
458  * "The MtrrFixDramModEn bit should be set to 1 during BIOS initalization of
459  * the fixed MTRRs, then cleared to 0 for operation."
460  */
check_syscfg_dram_mod_en(void)461 static void check_syscfg_dram_mod_en(void)
462 {
463 	uint64_t syscfg;
464 	static bool_t printed = 0;
465 
466 	if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
467 		(boot_cpu_data.x86 >= 0x0f)))
468 		return;
469 
470 	rdmsrl(MSR_K8_SYSCFG, syscfg);
471 	if (!(syscfg & K8_MTRRFIXRANGE_DRAM_MODIFY))
472 		return;
473 
474 	if (!test_and_set_bool(printed))
475 		printk(KERN_ERR "MTRR: SYSCFG[MtrrFixDramModEn] not "
476 			"cleared by BIOS, clearing this bit\n");
477 
478 	syscfg &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
479 	wrmsrl(MSR_K8_SYSCFG, syscfg);
480 }
481 
amd_get_topology(struct cpuinfo_x86 * c)482 static void amd_get_topology(struct cpuinfo_x86 *c)
483 {
484         int cpu;
485         unsigned bits;
486 
487         if (c->x86_max_cores <= 1)
488                 return;
489         /*
490          * On a AMD multi core setup the lower bits of the APIC id
491          * distingush the cores.
492          */
493         cpu = smp_processor_id();
494         bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
495 
496         if (bits == 0) {
497                 while ((1 << bits) < c->x86_max_cores)
498                         bits++;
499         }
500 
501         /* Low order bits define the core id */
502         c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
503         /* Convert local APIC ID into the socket ID */
504         c->phys_proc_id >>= bits;
505         /* Collect compute unit ID if available */
506         if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
507                 u32 eax, ebx, ecx, edx;
508 
509                 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
510                 c->x86_num_siblings = ((ebx >> 8) & 0xff) + 1;
511 
512                 if (c->x86 < 0x17)
513                         c->compute_unit_id = ebx & 0xFF;
514                 else {
515                         c->cpu_core_id = ebx & 0xFF;
516                         c->x86_max_cores /= c->x86_num_siblings;
517                 }
518 
519                 /*
520                  * In case leaf B is available, use it to derive
521                  * topology information.
522                  */
523                 if (detect_extended_topology(c))
524                         return;
525         }
526 
527         if (opt_cpu_info)
528                 printk("CPU %d(%d) -> Processor %d, %s %d\n",
529                        cpu, c->x86_max_cores, c->phys_proc_id,
530                        c->compute_unit_id != INVALID_CUID ? "Compute Unit"
531                                                           : "Core",
532                        c->compute_unit_id != INVALID_CUID ? c->compute_unit_id
533                                                           : c->cpu_core_id);
534 }
535 
amd_log_freq(const struct cpuinfo_x86 * c)536 void amd_log_freq(const struct cpuinfo_x86 *c)
537 {
538 	unsigned int idx = 0, h;
539 	uint64_t hi, lo, val;
540 
541 	if (c->x86 < 0x10 || c->x86 > 0x19 ||
542 	    (c != &boot_cpu_data &&
543 	     (!opt_cpu_info || (c->apicid & (c->x86_num_siblings - 1)))))
544 		return;
545 
546 	if (c->x86 < 0x17) {
547 		unsigned int node = 0;
548 		uint64_t nbcfg;
549 
550 		/*
551 		 * Make an attempt at determining the node ID, but assume
552 		 * symmetric setup (using node 0) if this fails.
553 		 */
554 		if (c->extended_cpuid_level >= 0x8000001e &&
555 		    cpu_has(c, X86_FEATURE_TOPOEXT)) {
556 			node = cpuid_ecx(0x8000001e) & 0xff;
557 			if (node > 7)
558 				node = 0;
559 		} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
560 			rdmsrl(0xC001100C, val);
561 			node = val & 7;
562 		}
563 
564 		/*
565 		 * Enable (and use) Extended Config Space accesses, as we
566 		 * can't be certain that MCFG is available here during boot.
567 		 */
568 		rdmsrl(MSR_AMD64_NB_CFG, nbcfg);
569 		wrmsrl(MSR_AMD64_NB_CFG,
570 		       nbcfg | (1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT));
571 #define PCI_ECS_ADDRESS(sbdf, reg) \
572     (0x80000000 | ((sbdf).bdf << 8) | ((reg) & 0xfc) | (((reg) & 0xf00) << 16))
573 
574 		for ( ; ; ) {
575 			pci_sbdf_t sbdf = PCI_SBDF(0, 0, 0x18 | node, 4);
576 
577 			switch (pci_conf_read32(sbdf, PCI_VENDOR_ID)) {
578 			case 0x00000000:
579 			case 0xffffffff:
580 				/* No device at this SBDF. */
581 				if (!node)
582 					break;
583 				node = 0;
584 				continue;
585 
586 			default:
587 				/*
588 				 * Core Performance Boost Control, family
589 				 * dependent up to 3 bits starting at bit 2.
590 				 *
591 				 * Note that boost states operate at a frequency
592 				 * above the base one, and thus need to be
593 				 * accounted for in order to correctly fetch the
594 				 * nominal frequency of the processor.
595 				 */
596 				switch (c->x86) {
597 				case 0x10: idx = 1; break;
598 				case 0x12: idx = 7; break;
599 				case 0x14: idx = 7; break;
600 				case 0x15: idx = 7; break;
601 				case 0x16: idx = 7; break;
602 				}
603 				idx &= pci_conf_read(PCI_ECS_ADDRESS(sbdf,
604 				                                     0x15c),
605 				                     0, 4) >> 2;
606 				break;
607 			}
608 			break;
609 		}
610 
611 #undef PCI_ECS_ADDRESS
612 		wrmsrl(MSR_AMD64_NB_CFG, nbcfg);
613 	}
614 
615 	lo = 0; /* gcc may not recognize the loop having at least 5 iterations */
616 	for (h = c->x86 == 0x10 ? 5 : 8; h--; )
617 		if (!rdmsr_safe(0xC0010064 + h, lo) && (lo >> 63))
618 			break;
619 	if (!(lo >> 63))
620 		return;
621 
622 #define FREQ(v) (c->x86 < 0x17 ? ((((v) & 0x3f) + 0x10) * 100) >> (((v) >> 6) & 7) \
623 		                     : (((v) & 0xff) * 25 * 8) / (((v) >> 8) & 0x3f))
624 	if (idx && idx < h &&
625 	    !rdmsr_safe(0xC0010064 + idx, val) && (val >> 63) &&
626 	    !rdmsr_safe(0xC0010064, hi) && (hi >> 63))
627 		printk("CPU%u: %lu (%lu ... %lu) MHz\n",
628 		       smp_processor_id(), FREQ(val), FREQ(lo), FREQ(hi));
629 	else if (h && !rdmsr_safe(0xC0010064, hi) && (hi >> 63))
630 		printk("CPU%u: %lu ... %lu MHz\n",
631 		       smp_processor_id(), FREQ(lo), FREQ(hi));
632 	else
633 		printk("CPU%u: %lu MHz\n", smp_processor_id(), FREQ(lo));
634 #undef FREQ
635 }
636 
early_init_amd(struct cpuinfo_x86 * c)637 void early_init_amd(struct cpuinfo_x86 *c)
638 {
639 	if (c == &boot_cpu_data)
640 		amd_init_levelling();
641 
642 	ctxt_switch_levelling(NULL);
643 }
644 
init_amd(struct cpuinfo_x86 * c)645 static void init_amd(struct cpuinfo_x86 *c)
646 {
647 	u32 l, h;
648 
649 	unsigned long long value;
650 
651 	/* Disable TLB flush filter by setting HWCR.FFDIS on K8
652 	 * bit 6 of msr C001_0015
653 	 *
654 	 * Errata 63 for SH-B3 steppings
655 	 * Errata 122 for all steppings (F+ have it disabled by default)
656 	 */
657 	if (c->x86 == 15) {
658 		rdmsrl(MSR_K7_HWCR, value);
659 		value |= 1 << 6;
660 		wrmsrl(MSR_K7_HWCR, value);
661 	}
662 
663 	/*
664 	 * Some AMD CPUs duplicate the 3DNow bit in base and extended CPUID
665 	 * leaves.  Unfortunately, this aliases PBE on Intel CPUs. Clobber the
666 	 * alias, leaving 3DNow in the extended leaf.
667 	 */
668 	__clear_bit(X86_FEATURE_PBE, c->x86_capability);
669 
670 	if (c->x86 == 0xf && c->x86_model < 0x14
671 	    && cpu_has(c, X86_FEATURE_LAHF_LM)) {
672 		/*
673 		 * Some BIOSes incorrectly force this feature, but only K8
674 		 * revision D (model = 0x14) and later actually support it.
675 		 * (AMD Erratum #110, docId: 25759).
676 		 */
677 		__clear_bit(X86_FEATURE_LAHF_LM, c->x86_capability);
678 		if (!rdmsr_amd_safe(0xc001100d, &l, &h))
679 			wrmsr_amd_safe(0xc001100d, l, h & ~1);
680 	}
681 
682 	/*
683 	 * Older AMD CPUs don't save/load FOP/FIP/FDP unless an FPU exception
684 	 * is pending.  Xen works around this at (F)XRSTOR time.
685 	 */
686 	if (c == &boot_cpu_data && !cpu_has(c, X86_FEATURE_RSTR_FP_ERR_PTRS))
687 		setup_force_cpu_cap(X86_BUG_FPU_PTRS);
688 
689 	/*
690 	 * Attempt to set lfence to be Dispatch Serialising.  This MSR almost
691 	 * certainly isn't virtualised (and Xen at least will leak the real
692 	 * value in but silently discard writes), as well as being per-core
693 	 * rather than per-thread, so do a full safe read/write/readback cycle
694 	 * in the worst case.
695 	 */
696 	if (c->x86 == 0x0f || c->x86 == 0x11)
697 		/* Always dispatch serialising on this hardare. */
698 		__set_bit(X86_FEATURE_LFENCE_DISPATCH, c->x86_capability);
699 	else /* Implicily "== 0x10 || >= 0x12" by being 64bit. */ {
700 		if (rdmsr_safe(MSR_AMD64_DE_CFG, value))
701 			/* Unable to read.  Assume the safer default. */
702 			__clear_bit(X86_FEATURE_LFENCE_DISPATCH,
703 				    c->x86_capability);
704 		else if (value & AMD64_DE_CFG_LFENCE_SERIALISE)
705 			/* Already dispatch serialising. */
706 			__set_bit(X86_FEATURE_LFENCE_DISPATCH,
707 				  c->x86_capability);
708 		else if (wrmsr_safe(MSR_AMD64_DE_CFG,
709 				    value | AMD64_DE_CFG_LFENCE_SERIALISE) ||
710 			 rdmsr_safe(MSR_AMD64_DE_CFG, value) ||
711 			 !(value & AMD64_DE_CFG_LFENCE_SERIALISE))
712 			/* Attempt to set failed.  Assume the safer default. */
713 			__clear_bit(X86_FEATURE_LFENCE_DISPATCH,
714 				    c->x86_capability);
715 		else
716 			/* Successfully enabled! */
717 			__set_bit(X86_FEATURE_LFENCE_DISPATCH,
718 				  c->x86_capability);
719 	}
720 
721 	/*
722 	 * If the user has explicitly chosen to disable Memory Disambiguation
723 	 * to mitigiate Speculative Store Bypass, poke the appropriate MSR.
724 	 */
725 	if (opt_ssbd) {
726 		int bit = -1;
727 
728 		switch (c->x86) {
729 		case 0x15: bit = 54; break;
730 		case 0x16: bit = 33; break;
731 		case 0x17: bit = 10; break;
732 		}
733 
734 		if (bit >= 0 && !rdmsr_safe(MSR_AMD64_LS_CFG, value)) {
735 			value |= 1ull << bit;
736 			wrmsr_safe(MSR_AMD64_LS_CFG, value);
737 		}
738 	}
739 
740 	/* MFENCE stops RDTSC speculation */
741 	if (!cpu_has_lfence_dispatch)
742 		__set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
743 
744 	switch(c->x86)
745 	{
746 	case 0xf ... 0x11:
747 		disable_c1e(NULL);
748 		if (acpi_smi_cmd && (acpi_enable_value | acpi_disable_value))
749 			amd_acpi_c1e_quirk = true;
750 		break;
751 
752 	case 0x15: case 0x16:
753 		/*
754 		 * There are some Fam15/Fam16 systems where upon resume from S3
755 		 * firmware fails to re-setup properly functioning RDRAND.
756 		 * By the time we can spot the problem, it is too late to take
757 		 * action, and there is nothing Xen can do to repair the problem.
758 		 * Clear the feature unless force-enabled on the command line.
759 		 */
760 		if (c == &boot_cpu_data &&
761 		    cpu_has(c, X86_FEATURE_RDRAND) &&
762 		    !is_forced_cpu_cap(X86_FEATURE_RDRAND)) {
763 			static const char __initconst text[] =
764 				"RDRAND may cease to work on this hardware upon resume from S3.\n"
765 				"Please choose an explicit cpuid={no-}rdrand setting.\n";
766 
767 			setup_clear_cpu_cap(X86_FEATURE_RDRAND);
768 			warning_add(text);
769 		}
770 		break;
771 	}
772 
773 	display_cacheinfo(c);
774 
775 	if (c->extended_cpuid_level >= 0x80000008) {
776 		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
777 	}
778 
779 	if (c->extended_cpuid_level >= 0x80000007) {
780 		if (cpu_has(c, X86_FEATURE_ITSC)) {
781 			__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
782 			__set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
783 			if (c->x86 != 0x11)
784 				__set_bit(X86_FEATURE_TSC_RELIABLE,
785 					  c->x86_capability);
786 		}
787 	}
788 
789 	/* re-enable TopologyExtensions if switched off by BIOS */
790 	if ((c->x86 == 0x15) &&
791 	    (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
792 	    !cpu_has(c, X86_FEATURE_TOPOEXT) &&
793 	    !rdmsr_safe(MSR_K8_EXT_FEATURE_MASK, value)) {
794 		value |= 1ULL << 54;
795 		wrmsr_safe(MSR_K8_EXT_FEATURE_MASK, value);
796 		rdmsrl(MSR_K8_EXT_FEATURE_MASK, value);
797 		if (value & (1ULL << 54)) {
798 			__set_bit(X86_FEATURE_TOPOEXT, c->x86_capability);
799 			printk(KERN_INFO "CPU: Re-enabling disabled "
800 			       "Topology Extensions Support\n");
801 		}
802 	}
803 
804 	/*
805 	 * The way access filter has a performance penalty on some workloads.
806 	 * Disable it on the affected CPUs.
807 	 */
808 	if (c->x86 == 0x15 && c->x86_model >= 0x02 && c->x86_model < 0x20 &&
809 	    !rdmsr_safe(MSR_AMD64_IC_CFG, value) && (value & 0x1e) != 0x1e)
810 		wrmsr_safe(MSR_AMD64_IC_CFG, value | 0x1e);
811 
812         amd_get_topology(c);
813 
814 	/* Pointless to use MWAIT on Family10 as it does not deep sleep. */
815 	if (c->x86 == 0x10)
816 		__clear_bit(X86_FEATURE_MONITOR, c->x86_capability);
817 
818 	if (!cpu_has_amd_erratum(c, AMD_ERRATUM_121))
819 		opt_allow_unsafe = 1;
820 	else if (opt_allow_unsafe < 0)
821 		panic("Xen will not boot on this CPU for security reasons"
822 		      "Pass \"allow_unsafe\" if you're trusting all your"
823 		      " (PV) guest kernels.\n");
824 	else if (!opt_allow_unsafe && c == &boot_cpu_data)
825 		printk(KERN_WARNING
826 		       "*** Xen will not allow creation of DomU-s on"
827 		       " this CPU for security reasons. ***\n"
828 		       KERN_WARNING
829 		       "*** Pass \"allow_unsafe\" if you're trusting"
830 		       " all your (PV) guest kernels. ***\n");
831 
832 	if (c->x86 == 0x16 && c->x86_model <= 0xf) {
833 		if (c == &boot_cpu_data) {
834 			l = pci_conf_read32(PCI_SBDF(0, 0, 0x18, 3), 0x58);
835 			h = pci_conf_read32(PCI_SBDF(0, 0, 0x18, 3), 0x5c);
836 			if ((l & 0x1f) | (h & 0x1))
837 				printk(KERN_WARNING
838 				       "Applying workaround for erratum 792: %s%s%s\n",
839 				       (l & 0x1f) ? "clearing D18F3x58[4:0]" : "",
840 				       ((l & 0x1f) && (h & 0x1)) ? " and " : "",
841 				       (h & 0x1) ? "clearing D18F3x5C[0]" : "");
842 
843 			if (l & 0x1f)
844 				pci_conf_write32(PCI_SBDF(0, 0, 0x18, 3), 0x58,
845 						 l & ~0x1f);
846 
847 			if (h & 0x1)
848 				pci_conf_write32(PCI_SBDF(0, 0, 0x18, 3), 0x5c,
849 						 h & ~0x1);
850 		}
851 
852 		rdmsrl(MSR_AMD64_LS_CFG, value);
853 		if (!(value & (1 << 15))) {
854 			static bool_t warned;
855 
856 			if (c == &boot_cpu_data || opt_cpu_info ||
857 			    !test_and_set_bool(warned))
858 				printk(KERN_WARNING
859 				       "CPU%u: Applying workaround for erratum 793\n",
860 				       smp_processor_id());
861 			wrmsrl(MSR_AMD64_LS_CFG, value | (1 << 15));
862 		}
863 	} else if (c->x86 == 0x12) {
864 		rdmsrl(MSR_AMD64_DE_CFG, value);
865 		if (!(value & (1U << 31))) {
866 			static bool warned;
867 
868 			if (c == &boot_cpu_data || opt_cpu_info ||
869 			    !test_and_set_bool(warned))
870 				printk(KERN_WARNING
871 				       "CPU%u: Applying workaround for erratum 665\n",
872 				       smp_processor_id());
873 			wrmsrl(MSR_AMD64_DE_CFG, value | (1U << 31));
874 		}
875 	}
876 
877 	/* AMD CPUs do not support SYSENTER outside of legacy mode. */
878 	__clear_bit(X86_FEATURE_SEP, c->x86_capability);
879 
880 	if (c->x86 == 0x10) {
881 		/* do this for boot cpu */
882 		if (c == &boot_cpu_data)
883 			check_enable_amd_mmconf_dmi();
884 
885 		fam10h_check_enable_mmcfg();
886 
887 		/*
888 		 * On family 10h BIOS may not have properly enabled WC+
889 		 * support, causing it to be converted to CD memtype. This may
890 		 * result in performance degradation for certain nested-paging
891 		 * guests. Prevent this conversion by clearing bit 24 in
892 		 * MSR_F10_BU_CFG2.
893 		 */
894 		rdmsrl(MSR_F10_BU_CFG2, value);
895 		value &= ~(1ULL << 24);
896 		wrmsrl(MSR_F10_BU_CFG2, value);
897 	}
898 
899 	/*
900 	 * Family 0x12 and above processors have APIC timer
901 	 * running in deep C states.
902 	 */
903 	if ( opt_arat && c->x86 > 0x11 )
904 		__set_bit(X86_FEATURE_ARAT, c->x86_capability);
905 
906 	/*
907 	 * Prior to Family 0x14, perf counters are not reset during warm reboot.
908 	 * We have to reset them manually.
909 	 */
910 	if (nmi_watchdog != NMI_LOCAL_APIC && c->x86 < 0x14) {
911 		wrmsrl(MSR_K7_PERFCTR0, 0);
912 		wrmsrl(MSR_K7_PERFCTR1, 0);
913 		wrmsrl(MSR_K7_PERFCTR2, 0);
914 		wrmsrl(MSR_K7_PERFCTR3, 0);
915 	}
916 
917 	if (cpu_has(c, X86_FEATURE_EFRO)) {
918 		rdmsr(MSR_K7_HWCR, l, h);
919 		l |= (1 << 27); /* Enable read-only APERF/MPERF bit */
920 		wrmsr(MSR_K7_HWCR, l, h);
921 	}
922 
923 	/* Prevent TSC drift in non single-processor, single-core platforms. */
924 	if ((smp_processor_id() == 1) && !cpu_has(c, X86_FEATURE_ITSC))
925 		disable_c1_ramping();
926 
927 	check_syscfg_dram_mod_en();
928 
929 	amd_log_freq(c);
930 }
931 
932 const struct cpu_dev amd_cpu_dev = {
933 	.c_early_init	= early_init_amd,
934 	.c_init		= init_amd,
935 };
936