1 /******************************************************************************
2 * arch/x86/msr.c
3 *
4 * Policy objects for Model-Specific Registers.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Copyright (c) 2017 Citrix Systems Ltd.
20 */
21
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/nospec.h>
25 #include <xen/sched.h>
26
27 #include <asm/debugreg.h>
28 #include <asm/hvm/viridian.h>
29 #include <asm/msr.h>
30 #include <asm/setup.h>
31
32 #include <public/hvm/params.h>
33
34 DEFINE_PER_CPU(uint32_t, tsc_aux);
35
36 struct msr_policy __read_mostly raw_msr_policy,
37 __read_mostly host_msr_policy;
38 #ifdef CONFIG_PV
39 struct msr_policy __read_mostly pv_max_msr_policy;
40 struct msr_policy __read_mostly pv_def_msr_policy;
41 #endif
42 #ifdef CONFIG_HVM
43 struct msr_policy __read_mostly hvm_max_msr_policy;
44 struct msr_policy __read_mostly hvm_def_msr_policy;
45 #endif
46
calculate_raw_policy(void)47 static void __init calculate_raw_policy(void)
48 {
49 /* 0x000000ce MSR_INTEL_PLATFORM_INFO */
50 /* Was already added by probe_cpuid_faulting() */
51 }
52
calculate_host_policy(void)53 static void __init calculate_host_policy(void)
54 {
55 struct msr_policy *mp = &host_msr_policy;
56
57 *mp = raw_msr_policy;
58
59 /* 0x000000ce MSR_INTEL_PLATFORM_INFO */
60 /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */
61 mp->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
62 }
63
calculate_pv_max_policy(void)64 static void __init calculate_pv_max_policy(void)
65 {
66 struct msr_policy *mp = &pv_max_msr_policy;
67
68 *mp = host_msr_policy;
69 }
70
calculate_pv_def_policy(void)71 static void __init calculate_pv_def_policy(void)
72 {
73 struct msr_policy *mp = &pv_def_msr_policy;
74
75 *mp = pv_max_msr_policy;
76 }
77
calculate_hvm_max_policy(void)78 static void __init calculate_hvm_max_policy(void)
79 {
80 struct msr_policy *mp = &hvm_max_msr_policy;
81
82 *mp = host_msr_policy;
83
84 /* It's always possible to emulate CPUID faulting for HVM guests */
85 mp->platform_info.cpuid_faulting = true;
86 }
87
calculate_hvm_def_policy(void)88 static void __init calculate_hvm_def_policy(void)
89 {
90 struct msr_policy *mp = &hvm_def_msr_policy;
91
92 *mp = hvm_max_msr_policy;
93 }
94
init_guest_msr_policy(void)95 void __init init_guest_msr_policy(void)
96 {
97 calculate_raw_policy();
98 calculate_host_policy();
99
100 if ( IS_ENABLED(CONFIG_PV) )
101 {
102 calculate_pv_max_policy();
103 calculate_pv_def_policy();
104 }
105
106 if ( hvm_enabled )
107 {
108 calculate_hvm_max_policy();
109 calculate_hvm_def_policy();
110 }
111 }
112
init_domain_msr_policy(struct domain * d)113 int init_domain_msr_policy(struct domain *d)
114 {
115 struct msr_policy *mp = is_pv_domain(d)
116 ? (IS_ENABLED(CONFIG_PV) ? &pv_def_msr_policy : NULL)
117 : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_msr_policy : NULL);
118
119 if ( !mp )
120 {
121 ASSERT_UNREACHABLE();
122 return -EOPNOTSUPP;
123 }
124
125 mp = xmemdup(mp);
126 if ( !mp )
127 return -ENOMEM;
128
129 /* See comment in ctxt_switch_levelling() */
130 if ( !opt_dom0_cpuid_faulting && is_control_domain(d) && is_pv_domain(d) )
131 mp->platform_info.cpuid_faulting = false;
132
133 /*
134 * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0,
135 * so dom0 can turn off workarounds as appropriate. Temporary, until the
136 * domain policy logic gains a better understanding of MSRs.
137 */
138 if ( is_hardware_domain(d) && boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
139 {
140 uint64_t val;
141
142 rdmsrl(MSR_ARCH_CAPABILITIES, val);
143
144 mp->arch_caps.raw = val &
145 (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA |
146 ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_TAA_NO);
147 }
148
149 d->arch.msr = mp;
150
151 return 0;
152 }
153
init_vcpu_msr_policy(struct vcpu * v)154 int init_vcpu_msr_policy(struct vcpu *v)
155 {
156 struct vcpu_msrs *msrs = xzalloc(struct vcpu_msrs);
157
158 if ( !msrs )
159 return -ENOMEM;
160
161 v->arch.msrs = msrs;
162
163 return 0;
164 }
165
guest_rdmsr(struct vcpu * v,uint32_t msr,uint64_t * val)166 int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
167 {
168 const struct vcpu *curr = current;
169 const struct domain *d = v->domain;
170 const struct cpuid_policy *cp = d->arch.cpuid;
171 const struct msr_policy *mp = d->arch.msr;
172 const struct vcpu_msrs *msrs = v->arch.msrs;
173 int ret = X86EMUL_OKAY;
174
175 switch ( msr )
176 {
177 case MSR_AMD_PATCHLOADER:
178 case MSR_IA32_UCODE_WRITE:
179 case MSR_PRED_CMD:
180 case MSR_FLUSH_CMD:
181 /* Write-only */
182 case MSR_TEST_CTRL:
183 case MSR_CORE_CAPABILITIES:
184 case MSR_TSX_FORCE_ABORT:
185 case MSR_TSX_CTRL:
186 case MSR_MCU_OPT_CTRL:
187 case MSR_RTIT_OUTPUT_BASE ... MSR_RTIT_ADDR_B(7):
188 case MSR_RAPL_POWER_UNIT:
189 case MSR_PKG_POWER_LIMIT ... MSR_PKG_POWER_INFO:
190 case MSR_DRAM_POWER_LIMIT ... MSR_DRAM_POWER_INFO:
191 case MSR_PP0_POWER_LIMIT ... MSR_PP0_POLICY:
192 case MSR_PP1_POWER_LIMIT ... MSR_PP1_POLICY:
193 case MSR_PLATFORM_ENERGY_COUNTER:
194 case MSR_PLATFORM_POWER_LIMIT:
195 case MSR_U_CET:
196 case MSR_S_CET:
197 case MSR_PL0_SSP ... MSR_INTERRUPT_SSP_TABLE:
198 case MSR_AMD64_LWP_CFG:
199 case MSR_AMD64_LWP_CBADDR:
200 case MSR_PPIN_CTL:
201 case MSR_PPIN:
202 case MSR_F15H_CU_POWER ... MSR_F15H_CU_MAX_POWER:
203 case MSR_AMD_RAPL_POWER_UNIT ... MSR_AMD_PKG_ENERGY_STATUS:
204 case MSR_AMD_PPIN_CTL:
205 case MSR_AMD_PPIN:
206 /* Not offered to guests. */
207 goto gp_fault;
208
209 case MSR_IA32_PLATFORM_ID:
210 if ( !(cp->x86_vendor & X86_VENDOR_INTEL) ||
211 !(boot_cpu_data.x86_vendor & X86_VENDOR_INTEL) )
212 goto gp_fault;
213 rdmsrl(MSR_IA32_PLATFORM_ID, *val);
214 break;
215
216 case MSR_AMD_PATCHLEVEL:
217 BUILD_BUG_ON(MSR_IA32_UCODE_REV != MSR_AMD_PATCHLEVEL);
218 /*
219 * AMD and Intel use the same MSR for the current microcode version.
220 *
221 * There is no need to jump through the SDM-provided hoops for Intel.
222 * A guest might itself perform the "write 0, CPUID, read" sequence,
223 * but servicing the CPUID for the guest typically wont result in
224 * actually executing a CPUID instruction.
225 *
226 * As a guest can't influence the value of this MSR, the value will be
227 * from Xen's last microcode load, which can be forwarded straight to
228 * the guest.
229 */
230 if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
231 !(boot_cpu_data.x86_vendor &
232 (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
233 rdmsr_safe(MSR_AMD_PATCHLEVEL, *val) )
234 goto gp_fault;
235 break;
236
237 case MSR_SPEC_CTRL:
238 if ( !cp->feat.ibrsb )
239 goto gp_fault;
240 *val = msrs->spec_ctrl.raw;
241 break;
242
243 case MSR_INTEL_PLATFORM_INFO:
244 *val = mp->platform_info.raw;
245 break;
246
247 case MSR_ARCH_CAPABILITIES:
248 if ( !cp->feat.arch_caps )
249 goto gp_fault;
250 *val = mp->arch_caps.raw;
251 break;
252
253 case MSR_INTEL_MISC_FEATURES_ENABLES:
254 *val = msrs->misc_features_enables.raw;
255 break;
256
257 /*
258 * These MSRs are not enumerated in CPUID. They have been around
259 * since the Pentium 4, and implemented by other vendors.
260 *
261 * Some versions of Windows try reading these before setting up a #GP
262 * handler, and Linux has several unguarded reads as well. Provide
263 * RAZ semantics, in general, but permit a cpufreq controller dom0 to
264 * have full access.
265 */
266 case MSR_IA32_PERF_STATUS:
267 case MSR_IA32_PERF_CTL:
268 if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
269 goto gp_fault;
270
271 *val = 0;
272 if ( likely(!is_cpufreq_controller(d)) || rdmsr_safe(msr, *val) == 0 )
273 break;
274 goto gp_fault;
275
276 case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
277 if ( !is_hvm_domain(d) || v != curr )
278 goto gp_fault;
279
280 ret = guest_rdmsr_x2apic(v, msr, val);
281 break;
282
283 case MSR_IA32_BNDCFGS:
284 if ( !cp->feat.mpx || !is_hvm_domain(d) ||
285 !hvm_get_guest_bndcfgs(v, val) )
286 goto gp_fault;
287 break;
288
289 case MSR_IA32_XSS:
290 if ( !cp->xstate.xsaves )
291 goto gp_fault;
292
293 *val = msrs->xss.raw;
294 break;
295
296 case 0x40000000 ... 0x400001ff:
297 if ( is_viridian_domain(d) )
298 {
299 ret = guest_rdmsr_viridian(v, msr, val);
300 break;
301 }
302
303 /* Fallthrough. */
304 case 0x40000200 ... 0x400002ff:
305 ret = guest_rdmsr_xen(v, msr, val);
306 break;
307
308 case MSR_TSC_AUX:
309 if ( !cp->extd.rdtscp && !cp->feat.rdpid )
310 goto gp_fault;
311
312 *val = msrs->tsc_aux;
313 break;
314
315 case MSR_AMD64_DR0_ADDRESS_MASK:
316 case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
317 if ( !cp->extd.dbext )
318 goto gp_fault;
319
320 /*
321 * In HVM context when we've allowed the guest direct access to debug
322 * registers, the value in msrs->dr_mask[] may be stale. Re-read it
323 * out of hardware.
324 */
325 #ifdef CONFIG_HVM
326 if ( v == current && is_hvm_domain(d) && v->arch.hvm.flag_dr_dirty )
327 rdmsrl(msr, *val);
328 else
329 #endif
330 *val = msrs->dr_mask[
331 array_index_nospec((msr == MSR_AMD64_DR0_ADDRESS_MASK)
332 ? 0 : (msr - MSR_AMD64_DR1_ADDRESS_MASK + 1),
333 ARRAY_SIZE(msrs->dr_mask))];
334 break;
335
336 /*
337 * TODO: Implement when we have better topology representation.
338 case MSR_INTEL_CORE_THREAD_COUNT:
339 */
340 default:
341 return X86EMUL_UNHANDLEABLE;
342 }
343
344 /*
345 * Interim safety check that functions we dispatch to don't alias "Not yet
346 * handled by the new MSR infrastructure".
347 */
348 ASSERT(ret != X86EMUL_UNHANDLEABLE);
349
350 return ret;
351
352 gp_fault:
353 return X86EMUL_EXCEPTION;
354 }
355
guest_wrmsr(struct vcpu * v,uint32_t msr,uint64_t val)356 int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
357 {
358 const struct vcpu *curr = current;
359 struct domain *d = v->domain;
360 const struct cpuid_policy *cp = d->arch.cpuid;
361 const struct msr_policy *mp = d->arch.msr;
362 struct vcpu_msrs *msrs = v->arch.msrs;
363 int ret = X86EMUL_OKAY;
364
365 switch ( msr )
366 {
367 uint64_t rsvd;
368
369 case MSR_IA32_PLATFORM_ID:
370 case MSR_CORE_CAPABILITIES:
371 case MSR_INTEL_CORE_THREAD_COUNT:
372 case MSR_INTEL_PLATFORM_INFO:
373 case MSR_ARCH_CAPABILITIES:
374 case MSR_IA32_PERF_STATUS:
375 /* Read-only */
376 case MSR_TEST_CTRL:
377 case MSR_TSX_FORCE_ABORT:
378 case MSR_TSX_CTRL:
379 case MSR_MCU_OPT_CTRL:
380 case MSR_RTIT_OUTPUT_BASE ... MSR_RTIT_ADDR_B(7):
381 case MSR_RAPL_POWER_UNIT:
382 case MSR_PKG_POWER_LIMIT ... MSR_PKG_POWER_INFO:
383 case MSR_DRAM_POWER_LIMIT ... MSR_DRAM_POWER_INFO:
384 case MSR_PP0_POWER_LIMIT ... MSR_PP0_POLICY:
385 case MSR_PP1_POWER_LIMIT ... MSR_PP1_POLICY:
386 case MSR_PLATFORM_ENERGY_COUNTER:
387 case MSR_PLATFORM_POWER_LIMIT:
388 case MSR_U_CET:
389 case MSR_S_CET:
390 case MSR_PL0_SSP ... MSR_INTERRUPT_SSP_TABLE:
391 case MSR_AMD64_LWP_CFG:
392 case MSR_AMD64_LWP_CBADDR:
393 case MSR_PPIN_CTL:
394 case MSR_PPIN:
395 case MSR_F15H_CU_POWER ... MSR_F15H_CU_MAX_POWER:
396 case MSR_AMD_RAPL_POWER_UNIT ... MSR_AMD_PKG_ENERGY_STATUS:
397 case MSR_AMD_PPIN_CTL:
398 case MSR_AMD_PPIN:
399 /* Not offered to guests. */
400 goto gp_fault;
401
402 case MSR_AMD_PATCHLEVEL:
403 BUILD_BUG_ON(MSR_IA32_UCODE_REV != MSR_AMD_PATCHLEVEL);
404 /*
405 * AMD and Intel use the same MSR for the current microcode version.
406 *
407 * Both document it as read-only. However Intel also document that,
408 * for backwards compatiblity, the OS should write 0 to it before
409 * trying to access the current microcode version.
410 */
411 if ( d->arch.cpuid->x86_vendor != X86_VENDOR_INTEL || val != 0 )
412 goto gp_fault;
413 break;
414
415 case MSR_AMD_PATCHLOADER:
416 /*
417 * See note on MSR_IA32_UCODE_WRITE below, which may or may not apply
418 * to AMD CPUs as well (at least the architectural/CPUID part does).
419 */
420 if ( is_pv_domain(d) ||
421 d->arch.cpuid->x86_vendor != X86_VENDOR_AMD )
422 goto gp_fault;
423 break;
424
425 case MSR_IA32_UCODE_WRITE:
426 /*
427 * Some versions of Windows at least on certain hardware try to load
428 * microcode before setting up an IDT. Therefore we must not inject #GP
429 * for such attempts. Also the MSR is architectural and not qualified
430 * by any CPUID bit.
431 */
432 if ( is_pv_domain(d) ||
433 d->arch.cpuid->x86_vendor != X86_VENDOR_INTEL )
434 goto gp_fault;
435 break;
436
437 case MSR_SPEC_CTRL:
438 if ( !cp->feat.ibrsb )
439 goto gp_fault; /* MSR available? */
440
441 /*
442 * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored)
443 * when STIBP isn't enumerated in hardware.
444 */
445 rsvd = ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
446 (cp->feat.ssbd ? SPEC_CTRL_SSBD : 0));
447
448 if ( val & rsvd )
449 goto gp_fault; /* Rsvd bit set? */
450
451 msrs->spec_ctrl.raw = val;
452 break;
453
454 case MSR_PRED_CMD:
455 if ( !cp->feat.ibrsb && !cp->extd.ibpb )
456 goto gp_fault; /* MSR available? */
457
458 if ( val & ~PRED_CMD_IBPB )
459 goto gp_fault; /* Rsvd bit set? */
460
461 if ( v == curr )
462 wrmsrl(MSR_PRED_CMD, val);
463 break;
464
465 case MSR_FLUSH_CMD:
466 if ( !cp->feat.l1d_flush )
467 goto gp_fault; /* MSR available? */
468
469 if ( val & ~FLUSH_CMD_L1D )
470 goto gp_fault; /* Rsvd bit set? */
471
472 if ( v == curr )
473 wrmsrl(MSR_FLUSH_CMD, val);
474 break;
475
476 case MSR_INTEL_MISC_FEATURES_ENABLES:
477 {
478 bool old_cpuid_faulting = msrs->misc_features_enables.cpuid_faulting;
479
480 rsvd = ~0ull;
481 if ( mp->platform_info.cpuid_faulting )
482 rsvd &= ~MSR_MISC_FEATURES_CPUID_FAULTING;
483
484 if ( val & rsvd )
485 goto gp_fault;
486
487 msrs->misc_features_enables.raw = val;
488
489 if ( v == curr && is_hvm_domain(d) && cpu_has_cpuid_faulting &&
490 (old_cpuid_faulting ^ msrs->misc_features_enables.cpuid_faulting) )
491 ctxt_switch_levelling(v);
492 break;
493 }
494
495 /*
496 * This MSR is not enumerated in CPUID. It has been around since the
497 * Pentium 4, and implemented by other vendors.
498 *
499 * To match the RAZ semantics, implement as write-discard, except for
500 * a cpufreq controller dom0 which has full access.
501 */
502 case MSR_IA32_PERF_CTL:
503 if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
504 goto gp_fault;
505
506 if ( likely(!is_cpufreq_controller(d)) || wrmsr_safe(msr, val) == 0 )
507 break;
508 goto gp_fault;
509
510 case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
511 if ( !is_hvm_domain(d) || v != curr )
512 goto gp_fault;
513
514 ret = guest_wrmsr_x2apic(v, msr, val);
515 break;
516
517 case MSR_IA32_BNDCFGS:
518 if ( !cp->feat.mpx || !is_hvm_domain(d) ||
519 !hvm_set_guest_bndcfgs(v, val) )
520 goto gp_fault;
521 break;
522
523 case MSR_IA32_XSS:
524 if ( !cp->xstate.xsaves )
525 goto gp_fault;
526
527 /* No XSS features currently supported for guests */
528 if ( val != 0 )
529 goto gp_fault;
530
531 msrs->xss.raw = val;
532 break;
533
534 case 0x40000000 ... 0x400001ff:
535 if ( is_viridian_domain(d) )
536 {
537 ret = guest_wrmsr_viridian(v, msr, val);
538 break;
539 }
540
541 /* Fallthrough. */
542 case 0x40000200 ... 0x400002ff:
543 ret = guest_wrmsr_xen(v, msr, val);
544 break;
545
546 case MSR_TSC_AUX:
547 if ( !cp->extd.rdtscp && !cp->feat.rdpid )
548 goto gp_fault;
549 if ( val != (uint32_t)val )
550 goto gp_fault;
551
552 msrs->tsc_aux = val;
553 if ( v == curr )
554 wrmsr_tsc_aux(val);
555 break;
556
557 case MSR_AMD64_DR0_ADDRESS_MASK:
558 case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
559 if ( !cp->extd.dbext || val != (uint32_t)val )
560 goto gp_fault;
561
562 msrs->dr_mask[
563 array_index_nospec((msr == MSR_AMD64_DR0_ADDRESS_MASK)
564 ? 0 : (msr - MSR_AMD64_DR1_ADDRESS_MASK + 1),
565 ARRAY_SIZE(msrs->dr_mask))] = val;
566
567 if ( v == curr && (curr->arch.dr7 & DR7_ACTIVE_MASK) )
568 wrmsrl(msr, val);
569 break;
570
571 default:
572 return X86EMUL_UNHANDLEABLE;
573 }
574
575 /*
576 * Interim safety check that functions we dispatch to don't alias "Not yet
577 * handled by the new MSR infrastructure".
578 */
579 ASSERT(ret != X86EMUL_UNHANDLEABLE);
580
581 return ret;
582
583 gp_fault:
584 return X86EMUL_EXCEPTION;
585 }
586
587 /*
588 * Local variables:
589 * mode: C
590 * c-file-style: "BSD"
591 * c-basic-offset: 4
592 * tab-width: 4
593 * indent-tabs-mode: nil
594 * End:
595 */
596