1 /******************************************************************************
2  * arch/x86/msr.c
3  *
4  * Policy objects for Model-Specific Registers.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Copyright (c) 2017 Citrix Systems Ltd.
20  */
21 
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/nospec.h>
25 #include <xen/sched.h>
26 
27 #include <asm/debugreg.h>
28 #include <asm/hvm/viridian.h>
29 #include <asm/msr.h>
30 #include <asm/setup.h>
31 
32 #include <public/hvm/params.h>
33 
34 DEFINE_PER_CPU(uint32_t, tsc_aux);
35 
36 struct msr_policy __read_mostly     raw_msr_policy,
37                   __read_mostly    host_msr_policy;
38 #ifdef CONFIG_PV
39 struct msr_policy __read_mostly  pv_max_msr_policy;
40 struct msr_policy __read_mostly  pv_def_msr_policy;
41 #endif
42 #ifdef CONFIG_HVM
43 struct msr_policy __read_mostly hvm_max_msr_policy;
44 struct msr_policy __read_mostly hvm_def_msr_policy;
45 #endif
46 
calculate_raw_policy(void)47 static void __init calculate_raw_policy(void)
48 {
49     /* 0x000000ce  MSR_INTEL_PLATFORM_INFO */
50     /* Was already added by probe_cpuid_faulting() */
51 }
52 
calculate_host_policy(void)53 static void __init calculate_host_policy(void)
54 {
55     struct msr_policy *mp = &host_msr_policy;
56 
57     *mp = raw_msr_policy;
58 
59     /* 0x000000ce  MSR_INTEL_PLATFORM_INFO */
60     /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES */
61     mp->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
62 }
63 
calculate_pv_max_policy(void)64 static void __init calculate_pv_max_policy(void)
65 {
66     struct msr_policy *mp = &pv_max_msr_policy;
67 
68     *mp = host_msr_policy;
69 }
70 
calculate_pv_def_policy(void)71 static void __init calculate_pv_def_policy(void)
72 {
73     struct msr_policy *mp = &pv_def_msr_policy;
74 
75     *mp = pv_max_msr_policy;
76 }
77 
calculate_hvm_max_policy(void)78 static void __init calculate_hvm_max_policy(void)
79 {
80     struct msr_policy *mp = &hvm_max_msr_policy;
81 
82     *mp = host_msr_policy;
83 
84     /* It's always possible to emulate CPUID faulting for HVM guests */
85     mp->platform_info.cpuid_faulting = true;
86 }
87 
calculate_hvm_def_policy(void)88 static void __init calculate_hvm_def_policy(void)
89 {
90     struct msr_policy *mp = &hvm_def_msr_policy;
91 
92     *mp = hvm_max_msr_policy;
93 }
94 
init_guest_msr_policy(void)95 void __init init_guest_msr_policy(void)
96 {
97     calculate_raw_policy();
98     calculate_host_policy();
99 
100     if ( IS_ENABLED(CONFIG_PV) )
101     {
102         calculate_pv_max_policy();
103         calculate_pv_def_policy();
104     }
105 
106     if ( hvm_enabled )
107     {
108         calculate_hvm_max_policy();
109         calculate_hvm_def_policy();
110     }
111 }
112 
init_domain_msr_policy(struct domain * d)113 int init_domain_msr_policy(struct domain *d)
114 {
115     struct msr_policy *mp = is_pv_domain(d)
116         ? (IS_ENABLED(CONFIG_PV)  ?  &pv_def_msr_policy : NULL)
117         : (IS_ENABLED(CONFIG_HVM) ? &hvm_def_msr_policy : NULL);
118 
119     if ( !mp )
120     {
121         ASSERT_UNREACHABLE();
122         return -EOPNOTSUPP;
123     }
124 
125     mp = xmemdup(mp);
126     if ( !mp )
127         return -ENOMEM;
128 
129     /* See comment in ctxt_switch_levelling() */
130     if ( !opt_dom0_cpuid_faulting && is_control_domain(d) && is_pv_domain(d) )
131         mp->platform_info.cpuid_faulting = false;
132 
133     /*
134      * Expose the "hardware speculation behaviour" bits of ARCH_CAPS to dom0,
135      * so dom0 can turn off workarounds as appropriate.  Temporary, until the
136      * domain policy logic gains a better understanding of MSRs.
137      */
138     if ( is_hardware_domain(d) && boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
139     {
140         uint64_t val;
141 
142         rdmsrl(MSR_ARCH_CAPABILITIES, val);
143 
144         mp->arch_caps.raw = val &
145             (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA |
146              ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_TAA_NO);
147     }
148 
149     d->arch.msr = mp;
150 
151     return 0;
152 }
153 
init_vcpu_msr_policy(struct vcpu * v)154 int init_vcpu_msr_policy(struct vcpu *v)
155 {
156     struct vcpu_msrs *msrs = xzalloc(struct vcpu_msrs);
157 
158     if ( !msrs )
159         return -ENOMEM;
160 
161     v->arch.msrs = msrs;
162 
163     return 0;
164 }
165 
guest_rdmsr(struct vcpu * v,uint32_t msr,uint64_t * val)166 int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
167 {
168     const struct vcpu *curr = current;
169     const struct domain *d = v->domain;
170     const struct cpuid_policy *cp = d->arch.cpuid;
171     const struct msr_policy *mp = d->arch.msr;
172     const struct vcpu_msrs *msrs = v->arch.msrs;
173     int ret = X86EMUL_OKAY;
174 
175     switch ( msr )
176     {
177     case MSR_AMD_PATCHLOADER:
178     case MSR_IA32_UCODE_WRITE:
179     case MSR_PRED_CMD:
180     case MSR_FLUSH_CMD:
181         /* Write-only */
182     case MSR_TEST_CTRL:
183     case MSR_CORE_CAPABILITIES:
184     case MSR_TSX_FORCE_ABORT:
185     case MSR_TSX_CTRL:
186     case MSR_MCU_OPT_CTRL:
187     case MSR_RTIT_OUTPUT_BASE ... MSR_RTIT_ADDR_B(7):
188     case MSR_RAPL_POWER_UNIT:
189     case MSR_PKG_POWER_LIMIT  ... MSR_PKG_POWER_INFO:
190     case MSR_DRAM_POWER_LIMIT ... MSR_DRAM_POWER_INFO:
191     case MSR_PP0_POWER_LIMIT  ... MSR_PP0_POLICY:
192     case MSR_PP1_POWER_LIMIT  ... MSR_PP1_POLICY:
193     case MSR_PLATFORM_ENERGY_COUNTER:
194     case MSR_PLATFORM_POWER_LIMIT:
195     case MSR_U_CET:
196     case MSR_S_CET:
197     case MSR_PL0_SSP ... MSR_INTERRUPT_SSP_TABLE:
198     case MSR_AMD64_LWP_CFG:
199     case MSR_AMD64_LWP_CBADDR:
200     case MSR_PPIN_CTL:
201     case MSR_PPIN:
202     case MSR_F15H_CU_POWER ... MSR_F15H_CU_MAX_POWER:
203     case MSR_AMD_RAPL_POWER_UNIT ... MSR_AMD_PKG_ENERGY_STATUS:
204     case MSR_AMD_PPIN_CTL:
205     case MSR_AMD_PPIN:
206         /* Not offered to guests. */
207         goto gp_fault;
208 
209     case MSR_IA32_PLATFORM_ID:
210         if ( !(cp->x86_vendor & X86_VENDOR_INTEL) ||
211              !(boot_cpu_data.x86_vendor & X86_VENDOR_INTEL) )
212             goto gp_fault;
213         rdmsrl(MSR_IA32_PLATFORM_ID, *val);
214         break;
215 
216     case MSR_AMD_PATCHLEVEL:
217         BUILD_BUG_ON(MSR_IA32_UCODE_REV != MSR_AMD_PATCHLEVEL);
218         /*
219          * AMD and Intel use the same MSR for the current microcode version.
220          *
221          * There is no need to jump through the SDM-provided hoops for Intel.
222          * A guest might itself perform the "write 0, CPUID, read" sequence,
223          * but servicing the CPUID for the guest typically wont result in
224          * actually executing a CPUID instruction.
225          *
226          * As a guest can't influence the value of this MSR, the value will be
227          * from Xen's last microcode load, which can be forwarded straight to
228          * the guest.
229          */
230         if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
231              !(boot_cpu_data.x86_vendor &
232                (X86_VENDOR_INTEL | X86_VENDOR_AMD)) ||
233              rdmsr_safe(MSR_AMD_PATCHLEVEL, *val) )
234             goto gp_fault;
235         break;
236 
237     case MSR_SPEC_CTRL:
238         if ( !cp->feat.ibrsb )
239             goto gp_fault;
240         *val = msrs->spec_ctrl.raw;
241         break;
242 
243     case MSR_INTEL_PLATFORM_INFO:
244         *val = mp->platform_info.raw;
245         break;
246 
247     case MSR_ARCH_CAPABILITIES:
248         if ( !cp->feat.arch_caps )
249             goto gp_fault;
250         *val = mp->arch_caps.raw;
251         break;
252 
253     case MSR_INTEL_MISC_FEATURES_ENABLES:
254         *val = msrs->misc_features_enables.raw;
255         break;
256 
257         /*
258          * These MSRs are not enumerated in CPUID.  They have been around
259          * since the Pentium 4, and implemented by other vendors.
260          *
261          * Some versions of Windows try reading these before setting up a #GP
262          * handler, and Linux has several unguarded reads as well.  Provide
263          * RAZ semantics, in general, but permit a cpufreq controller dom0 to
264          * have full access.
265          */
266     case MSR_IA32_PERF_STATUS:
267     case MSR_IA32_PERF_CTL:
268         if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
269             goto gp_fault;
270 
271         *val = 0;
272         if ( likely(!is_cpufreq_controller(d)) || rdmsr_safe(msr, *val) == 0 )
273             break;
274         goto gp_fault;
275 
276     case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
277         if ( !is_hvm_domain(d) || v != curr )
278             goto gp_fault;
279 
280         ret = guest_rdmsr_x2apic(v, msr, val);
281         break;
282 
283     case MSR_IA32_BNDCFGS:
284         if ( !cp->feat.mpx || !is_hvm_domain(d) ||
285              !hvm_get_guest_bndcfgs(v, val) )
286             goto gp_fault;
287         break;
288 
289     case MSR_IA32_XSS:
290         if ( !cp->xstate.xsaves )
291             goto gp_fault;
292 
293         *val = msrs->xss.raw;
294         break;
295 
296     case 0x40000000 ... 0x400001ff:
297         if ( is_viridian_domain(d) )
298         {
299             ret = guest_rdmsr_viridian(v, msr, val);
300             break;
301         }
302 
303         /* Fallthrough. */
304     case 0x40000200 ... 0x400002ff:
305         ret = guest_rdmsr_xen(v, msr, val);
306         break;
307 
308     case MSR_TSC_AUX:
309         if ( !cp->extd.rdtscp && !cp->feat.rdpid )
310             goto gp_fault;
311 
312         *val = msrs->tsc_aux;
313         break;
314 
315     case MSR_AMD64_DR0_ADDRESS_MASK:
316     case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
317         if ( !cp->extd.dbext )
318             goto gp_fault;
319 
320         /*
321          * In HVM context when we've allowed the guest direct access to debug
322          * registers, the value in msrs->dr_mask[] may be stale.  Re-read it
323          * out of hardware.
324          */
325 #ifdef CONFIG_HVM
326         if ( v == current && is_hvm_domain(d) && v->arch.hvm.flag_dr_dirty )
327             rdmsrl(msr, *val);
328         else
329 #endif
330             *val = msrs->dr_mask[
331                 array_index_nospec((msr == MSR_AMD64_DR0_ADDRESS_MASK)
332                                    ? 0 : (msr - MSR_AMD64_DR1_ADDRESS_MASK + 1),
333                                    ARRAY_SIZE(msrs->dr_mask))];
334         break;
335 
336         /*
337          * TODO: Implement when we have better topology representation.
338     case MSR_INTEL_CORE_THREAD_COUNT:
339          */
340     default:
341         return X86EMUL_UNHANDLEABLE;
342     }
343 
344     /*
345      * Interim safety check that functions we dispatch to don't alias "Not yet
346      * handled by the new MSR infrastructure".
347      */
348     ASSERT(ret != X86EMUL_UNHANDLEABLE);
349 
350     return ret;
351 
352  gp_fault:
353     return X86EMUL_EXCEPTION;
354 }
355 
guest_wrmsr(struct vcpu * v,uint32_t msr,uint64_t val)356 int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
357 {
358     const struct vcpu *curr = current;
359     struct domain *d = v->domain;
360     const struct cpuid_policy *cp = d->arch.cpuid;
361     const struct msr_policy *mp = d->arch.msr;
362     struct vcpu_msrs *msrs = v->arch.msrs;
363     int ret = X86EMUL_OKAY;
364 
365     switch ( msr )
366     {
367         uint64_t rsvd;
368 
369     case MSR_IA32_PLATFORM_ID:
370     case MSR_CORE_CAPABILITIES:
371     case MSR_INTEL_CORE_THREAD_COUNT:
372     case MSR_INTEL_PLATFORM_INFO:
373     case MSR_ARCH_CAPABILITIES:
374     case MSR_IA32_PERF_STATUS:
375         /* Read-only */
376     case MSR_TEST_CTRL:
377     case MSR_TSX_FORCE_ABORT:
378     case MSR_TSX_CTRL:
379     case MSR_MCU_OPT_CTRL:
380     case MSR_RTIT_OUTPUT_BASE ... MSR_RTIT_ADDR_B(7):
381     case MSR_RAPL_POWER_UNIT:
382     case MSR_PKG_POWER_LIMIT  ... MSR_PKG_POWER_INFO:
383     case MSR_DRAM_POWER_LIMIT ... MSR_DRAM_POWER_INFO:
384     case MSR_PP0_POWER_LIMIT  ... MSR_PP0_POLICY:
385     case MSR_PP1_POWER_LIMIT  ... MSR_PP1_POLICY:
386     case MSR_PLATFORM_ENERGY_COUNTER:
387     case MSR_PLATFORM_POWER_LIMIT:
388     case MSR_U_CET:
389     case MSR_S_CET:
390     case MSR_PL0_SSP ... MSR_INTERRUPT_SSP_TABLE:
391     case MSR_AMD64_LWP_CFG:
392     case MSR_AMD64_LWP_CBADDR:
393     case MSR_PPIN_CTL:
394     case MSR_PPIN:
395     case MSR_F15H_CU_POWER ... MSR_F15H_CU_MAX_POWER:
396     case MSR_AMD_RAPL_POWER_UNIT ... MSR_AMD_PKG_ENERGY_STATUS:
397     case MSR_AMD_PPIN_CTL:
398     case MSR_AMD_PPIN:
399         /* Not offered to guests. */
400         goto gp_fault;
401 
402     case MSR_AMD_PATCHLEVEL:
403         BUILD_BUG_ON(MSR_IA32_UCODE_REV != MSR_AMD_PATCHLEVEL);
404         /*
405          * AMD and Intel use the same MSR for the current microcode version.
406          *
407          * Both document it as read-only.  However Intel also document that,
408          * for backwards compatiblity, the OS should write 0 to it before
409          * trying to access the current microcode version.
410          */
411         if ( d->arch.cpuid->x86_vendor != X86_VENDOR_INTEL || val != 0 )
412             goto gp_fault;
413         break;
414 
415     case MSR_AMD_PATCHLOADER:
416         /*
417          * See note on MSR_IA32_UCODE_WRITE below, which may or may not apply
418          * to AMD CPUs as well (at least the architectural/CPUID part does).
419          */
420         if ( is_pv_domain(d) ||
421              d->arch.cpuid->x86_vendor != X86_VENDOR_AMD )
422             goto gp_fault;
423         break;
424 
425     case MSR_IA32_UCODE_WRITE:
426         /*
427          * Some versions of Windows at least on certain hardware try to load
428          * microcode before setting up an IDT. Therefore we must not inject #GP
429          * for such attempts. Also the MSR is architectural and not qualified
430          * by any CPUID bit.
431          */
432         if ( is_pv_domain(d) ||
433              d->arch.cpuid->x86_vendor != X86_VENDOR_INTEL )
434             goto gp_fault;
435         break;
436 
437     case MSR_SPEC_CTRL:
438         if ( !cp->feat.ibrsb )
439             goto gp_fault; /* MSR available? */
440 
441         /*
442          * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored)
443          * when STIBP isn't enumerated in hardware.
444          */
445         rsvd = ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
446                  (cp->feat.ssbd ? SPEC_CTRL_SSBD : 0));
447 
448         if ( val & rsvd )
449             goto gp_fault; /* Rsvd bit set? */
450 
451         msrs->spec_ctrl.raw = val;
452         break;
453 
454     case MSR_PRED_CMD:
455         if ( !cp->feat.ibrsb && !cp->extd.ibpb )
456             goto gp_fault; /* MSR available? */
457 
458         if ( val & ~PRED_CMD_IBPB )
459             goto gp_fault; /* Rsvd bit set? */
460 
461         if ( v == curr )
462             wrmsrl(MSR_PRED_CMD, val);
463         break;
464 
465     case MSR_FLUSH_CMD:
466         if ( !cp->feat.l1d_flush )
467             goto gp_fault; /* MSR available? */
468 
469         if ( val & ~FLUSH_CMD_L1D )
470             goto gp_fault; /* Rsvd bit set? */
471 
472         if ( v == curr )
473             wrmsrl(MSR_FLUSH_CMD, val);
474         break;
475 
476     case MSR_INTEL_MISC_FEATURES_ENABLES:
477     {
478         bool old_cpuid_faulting = msrs->misc_features_enables.cpuid_faulting;
479 
480         rsvd = ~0ull;
481         if ( mp->platform_info.cpuid_faulting )
482             rsvd &= ~MSR_MISC_FEATURES_CPUID_FAULTING;
483 
484         if ( val & rsvd )
485             goto gp_fault;
486 
487         msrs->misc_features_enables.raw = val;
488 
489         if ( v == curr && is_hvm_domain(d) && cpu_has_cpuid_faulting &&
490              (old_cpuid_faulting ^ msrs->misc_features_enables.cpuid_faulting) )
491             ctxt_switch_levelling(v);
492         break;
493     }
494 
495         /*
496          * This MSR is not enumerated in CPUID.  It has been around since the
497          * Pentium 4, and implemented by other vendors.
498          *
499          * To match the RAZ semantics, implement as write-discard, except for
500          * a cpufreq controller dom0 which has full access.
501          */
502     case MSR_IA32_PERF_CTL:
503         if ( !(cp->x86_vendor & (X86_VENDOR_INTEL | X86_VENDOR_CENTAUR)) )
504             goto gp_fault;
505 
506         if ( likely(!is_cpufreq_controller(d)) || wrmsr_safe(msr, val) == 0 )
507             break;
508         goto gp_fault;
509 
510     case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
511         if ( !is_hvm_domain(d) || v != curr )
512             goto gp_fault;
513 
514         ret = guest_wrmsr_x2apic(v, msr, val);
515         break;
516 
517     case MSR_IA32_BNDCFGS:
518         if ( !cp->feat.mpx || !is_hvm_domain(d) ||
519              !hvm_set_guest_bndcfgs(v, val) )
520             goto gp_fault;
521         break;
522 
523     case MSR_IA32_XSS:
524         if ( !cp->xstate.xsaves )
525             goto gp_fault;
526 
527         /* No XSS features currently supported for guests */
528         if ( val != 0 )
529             goto gp_fault;
530 
531         msrs->xss.raw = val;
532         break;
533 
534     case 0x40000000 ... 0x400001ff:
535         if ( is_viridian_domain(d) )
536         {
537             ret = guest_wrmsr_viridian(v, msr, val);
538             break;
539         }
540 
541         /* Fallthrough. */
542     case 0x40000200 ... 0x400002ff:
543         ret = guest_wrmsr_xen(v, msr, val);
544         break;
545 
546     case MSR_TSC_AUX:
547         if ( !cp->extd.rdtscp && !cp->feat.rdpid )
548             goto gp_fault;
549         if ( val != (uint32_t)val )
550             goto gp_fault;
551 
552         msrs->tsc_aux = val;
553         if ( v == curr )
554             wrmsr_tsc_aux(val);
555         break;
556 
557     case MSR_AMD64_DR0_ADDRESS_MASK:
558     case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
559         if ( !cp->extd.dbext || val != (uint32_t)val )
560             goto gp_fault;
561 
562         msrs->dr_mask[
563             array_index_nospec((msr == MSR_AMD64_DR0_ADDRESS_MASK)
564                                ? 0 : (msr - MSR_AMD64_DR1_ADDRESS_MASK + 1),
565                                ARRAY_SIZE(msrs->dr_mask))] = val;
566 
567         if ( v == curr && (curr->arch.dr7 & DR7_ACTIVE_MASK) )
568             wrmsrl(msr, val);
569         break;
570 
571     default:
572         return X86EMUL_UNHANDLEABLE;
573     }
574 
575     /*
576      * Interim safety check that functions we dispatch to don't alias "Not yet
577      * handled by the new MSR infrastructure".
578      */
579     ASSERT(ret != X86EMUL_UNHANDLEABLE);
580 
581     return ret;
582 
583  gp_fault:
584     return X86EMUL_EXCEPTION;
585 }
586 
587 /*
588  * Local variables:
589  * mode: C
590  * c-file-style: "BSD"
591  * c-basic-offset: 4
592  * tab-width: 4
593  * indent-tabs-mode: nil
594  * End:
595  */
596