1 /*
2  * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
3  *
4  * Copyright (c) 2007, Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Author: Haitao Shan <haitao.shan@intel.com>
19  */
20 
21 #include <xen/sched.h>
22 #include <xen/xenoprof.h>
23 #include <xen/irq.h>
24 #include <asm/system.h>
25 #include <asm/regs.h>
26 #include <asm/types.h>
27 #include <asm/apic.h>
28 #include <asm/traps.h>
29 #include <asm/msr.h>
30 #include <asm/msr-index.h>
31 #include <asm/vpmu.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/vlapic.h>
34 #include <asm/hvm/vmx/vmx.h>
35 #include <asm/hvm/vmx/vmcs.h>
36 #include <public/sched.h>
37 #include <public/hvm/save.h>
38 #include <public/pmu.h>
39 
40 /*
41  * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
42  * instruction.
43  * cpuid 0xa - Architectural Performance Monitoring Leaf
44  * Register eax
45  */
46 #define PMU_VERSION_SHIFT        0  /* Version ID */
47 #define PMU_VERSION_BITS         8  /* 8 bits 0..7 */
48 #define PMU_VERSION_MASK         (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT)
49 
50 #define PMU_GENERAL_NR_SHIFT     8  /* Number of general pmu registers */
51 #define PMU_GENERAL_NR_BITS      8  /* 8 bits 8..15 */
52 #define PMU_GENERAL_NR_MASK      (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT)
53 
54 #define PMU_GENERAL_WIDTH_SHIFT 16  /* Width of general pmu registers */
55 #define PMU_GENERAL_WIDTH_BITS   8  /* 8 bits 16..23 */
56 #define PMU_GENERAL_WIDTH_MASK  (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT)
57 /* Register edx */
58 #define PMU_FIXED_NR_SHIFT       0  /* Number of fixed pmu registers */
59 #define PMU_FIXED_NR_BITS        5  /* 5 bits 0..4 */
60 #define PMU_FIXED_NR_MASK        (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT)
61 
62 #define PMU_FIXED_WIDTH_SHIFT    5  /* Width of fixed pmu registers */
63 #define PMU_FIXED_WIDTH_BITS     8  /* 8 bits 5..12 */
64 #define PMU_FIXED_WIDTH_MASK     (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT)
65 
66 /* Alias registers (0x4c1) for full-width writes to PMCs */
67 #define MSR_PMC_ALIAS_MASK       (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
68 static bool_t __read_mostly full_width_write;
69 
70 /*
71  * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
72  * counters. 4 bits for every counter.
73  */
74 #define FIXED_CTR_CTRL_BITS 4
75 #define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
76 #define FIXED_CTR_CTRL_ANYTHREAD_MASK 0x4
77 
78 #define ARCH_CNTR_ENABLED   (1ULL << 22)
79 #define ARCH_CNTR_PIN_CONTROL (1ULL << 19)
80 
81 /* Number of general-purpose and fixed performance counters */
82 static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
83 
84 /* Masks used for testing whether and MSR is valid */
85 #define ARCH_CTRL_MASK  (~((1ull << 32) - 1) | (1ull << 21) | ARCH_CNTR_PIN_CONTROL)
86 static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
87 static uint64_t __read_mostly global_ovf_ctrl_mask, global_ctrl_mask;
88 
89 /* Total size of PMU registers block (copied to/from PV(H) guest) */
90 static unsigned int __read_mostly regs_sz;
91 /* Offset into context of the beginning of PMU register block */
92 static const unsigned int regs_off =
93         sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
94         sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
95 
96 /*
97  * QUIRK to workaround an issue on various family 6 cpus.
98  * The issue leads to endless PMC interrupt loops on the processor.
99  * If the interrupt handler is running and a pmc reaches the value 0, this
100  * value remains forever and it triggers immediately a new interrupt after
101  * finishing the handler.
102  * A workaround is to read all flagged counters and if the value is 0 write
103  * 1 (or another value != 0) into it.
104  * There exist no errata and the real cause of this behaviour is unknown.
105  */
106 bool_t __read_mostly is_pmc_quirk;
107 
check_pmc_quirk(void)108 static void check_pmc_quirk(void)
109 {
110     if ( current_cpu_data.x86 == 6 )
111         is_pmc_quirk = 1;
112     else
113         is_pmc_quirk = 0;
114 }
115 
handle_pmc_quirk(u64 msr_content)116 static void handle_pmc_quirk(u64 msr_content)
117 {
118     int i;
119     u64 val;
120 
121     if ( !is_pmc_quirk )
122         return;
123 
124     val = msr_content;
125     for ( i = 0; i < arch_pmc_cnt; i++ )
126     {
127         if ( val & 0x1 )
128         {
129             u64 cnt;
130             rdmsrl(MSR_P6_PERFCTR(i), cnt);
131             if ( cnt == 0 )
132                 wrmsrl(MSR_P6_PERFCTR(i), 1);
133         }
134         val >>= 1;
135     }
136     val = msr_content >> 32;
137     for ( i = 0; i < fixed_pmc_cnt; i++ )
138     {
139         if ( val & 0x1 )
140         {
141             u64 cnt;
142             rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
143             if ( cnt == 0 )
144                 wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
145         }
146         val >>= 1;
147     }
148 }
149 
150 /*
151  * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15]
152  */
core2_get_arch_pmc_count(void)153 static int core2_get_arch_pmc_count(void)
154 {
155     u32 eax;
156 
157     eax = cpuid_eax(0xa);
158     return MASK_EXTR(eax, PMU_GENERAL_NR_MASK);
159 }
160 
161 /*
162  * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4]
163  */
core2_get_fixed_pmc_count(void)164 static int core2_get_fixed_pmc_count(void)
165 {
166     u32 edx = cpuid_edx(0xa);
167 
168     return MASK_EXTR(edx, PMU_FIXED_NR_MASK);
169 }
170 
171 /* edx bits 5-12: Bit width of fixed-function performance counters  */
core2_get_bitwidth_fix_count(void)172 static int core2_get_bitwidth_fix_count(void)
173 {
174     u32 edx;
175 
176     edx = cpuid_edx(0xa);
177     return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK);
178 }
179 
is_core2_vpmu_msr(u32 msr_index,int * type,int * index)180 static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
181 {
182     u32 msr_index_pmc;
183 
184     switch ( msr_index )
185     {
186     case MSR_CORE_PERF_FIXED_CTR_CTRL:
187     case MSR_IA32_DS_AREA:
188     case MSR_IA32_PEBS_ENABLE:
189         *type = MSR_TYPE_CTRL;
190         return 1;
191 
192     case MSR_CORE_PERF_GLOBAL_CTRL:
193     case MSR_CORE_PERF_GLOBAL_STATUS:
194     case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
195         *type = MSR_TYPE_GLOBAL;
196         return 1;
197 
198     default:
199 
200         if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
201              (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) )
202         {
203             *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
204             *type = MSR_TYPE_COUNTER;
205             return 1;
206         }
207 
208         if ( (msr_index >= MSR_P6_EVNTSEL(0)) &&
209              (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) )
210         {
211             *index = msr_index - MSR_P6_EVNTSEL(0);
212             *type = MSR_TYPE_ARCH_CTRL;
213             return 1;
214         }
215 
216         msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
217         if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) &&
218              (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) )
219         {
220             *type = MSR_TYPE_ARCH_COUNTER;
221             *index = msr_index_pmc - MSR_IA32_PERFCTR0;
222             return 1;
223         }
224         return 0;
225     }
226 }
227 
core2_vpmu_set_msr_bitmap(struct vcpu * v)228 static void core2_vpmu_set_msr_bitmap(struct vcpu *v)
229 {
230     unsigned int i;
231 
232     /* Allow Read/Write PMU Counters MSR Directly. */
233     for ( i = 0; i < fixed_pmc_cnt; i++ )
234         vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
235 
236     for ( i = 0; i < arch_pmc_cnt; i++ )
237     {
238         vmx_clear_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
239 
240         if ( full_width_write )
241             vmx_clear_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
242     }
243 
244     /* Allow Read PMU Non-global Controls Directly. */
245     for ( i = 0; i < arch_pmc_cnt; i++ )
246         vmx_clear_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
247 
248     vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
249     vmx_clear_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
250 }
251 
core2_vpmu_unset_msr_bitmap(struct vcpu * v)252 static void core2_vpmu_unset_msr_bitmap(struct vcpu *v)
253 {
254     unsigned int i;
255 
256     for ( i = 0; i < fixed_pmc_cnt; i++ )
257         vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
258 
259     for ( i = 0; i < arch_pmc_cnt; i++ )
260     {
261         vmx_set_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
262 
263         if ( full_width_write )
264             vmx_set_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
265     }
266 
267     for ( i = 0; i < arch_pmc_cnt; i++ )
268         vmx_set_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
269 
270     vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
271     vmx_set_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
272 }
273 
__core2_vpmu_save(struct vcpu * v)274 static inline void __core2_vpmu_save(struct vcpu *v)
275 {
276     int i;
277     struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
278     uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
279     struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
280         vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
281 
282     for ( i = 0; i < fixed_pmc_cnt; i++ )
283         rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
284     for ( i = 0; i < arch_pmc_cnt; i++ )
285         rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
286 
287     if ( !is_hvm_vcpu(v) )
288         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
289 }
290 
core2_vpmu_save(struct vcpu * v,bool_t to_guest)291 static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
292 {
293     struct vpmu_struct *vpmu = vcpu_vpmu(v);
294 
295     if ( !is_hvm_vcpu(v) )
296         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
297 
298     if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
299         return 0;
300 
301     __core2_vpmu_save(v);
302 
303     /* Unset PMU MSR bitmap to trap lazy load. */
304     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && is_hvm_vcpu(v) &&
305          cpu_has_vmx_msr_bitmap )
306         core2_vpmu_unset_msr_bitmap(v);
307 
308     if ( to_guest )
309     {
310         ASSERT(!has_vlapic(v->domain));
311         memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
312                vpmu->context + regs_off, regs_sz);
313     }
314 
315     return 1;
316 }
317 
__core2_vpmu_load(struct vcpu * v)318 static inline void __core2_vpmu_load(struct vcpu *v)
319 {
320     unsigned int i, pmc_start;
321     struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
322     uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
323     struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
324         vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
325 
326     for ( i = 0; i < fixed_pmc_cnt; i++ )
327         wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
328 
329     if ( full_width_write )
330         pmc_start = MSR_IA32_A_PERFCTR0;
331     else
332         pmc_start = MSR_IA32_PERFCTR0;
333     for ( i = 0; i < arch_pmc_cnt; i++ )
334     {
335         wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
336         wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
337     }
338 
339     wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
340     if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) )
341         wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
342 
343     if ( !is_hvm_vcpu(v) )
344     {
345         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
346         core2_vpmu_cxt->global_ovf_ctrl = 0;
347         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
348     }
349 }
350 
core2_vpmu_verify(struct vcpu * v)351 static int core2_vpmu_verify(struct vcpu *v)
352 {
353     unsigned int i;
354     struct vpmu_struct *vpmu = vcpu_vpmu(v);
355     struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
356     uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
357     struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
358         vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
359     uint64_t fixed_ctrl;
360     uint64_t *priv_context = vpmu->priv_context;
361     uint64_t enabled_cntrs = 0;
362 
363     if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
364         return -EINVAL;
365     if ( core2_vpmu_cxt->global_ctrl & global_ctrl_mask )
366         return -EINVAL;
367     if ( core2_vpmu_cxt->pebs_enable )
368         return -EINVAL;
369 
370     fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
371     if ( fixed_ctrl & fixed_ctrl_mask )
372         return -EINVAL;
373 
374     for ( i = 0; i < fixed_pmc_cnt; i++ )
375     {
376         if ( fixed_counters[i] & fixed_counters_mask )
377             return -EINVAL;
378         if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
379             enabled_cntrs |= (1ULL << i);
380     }
381     enabled_cntrs <<= 32;
382 
383     for ( i = 0; i < arch_pmc_cnt; i++ )
384     {
385         uint64_t control = xen_pmu_cntr_pair[i].control;
386 
387         if ( control & ARCH_CTRL_MASK )
388             return -EINVAL;
389         if ( control & ARCH_CNTR_ENABLED )
390             enabled_cntrs |= (1ULL << i);
391     }
392 
393     if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) &&
394          !(is_hvm_vcpu(v)
395            ? is_canonical_address(core2_vpmu_cxt->ds_area)
396            : __addr_ok(core2_vpmu_cxt->ds_area)) )
397         return -EINVAL;
398 
399     if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
400          (core2_vpmu_cxt->ds_area != 0) )
401         vpmu_set(vpmu, VPMU_RUNNING);
402     else
403         vpmu_reset(vpmu, VPMU_RUNNING);
404 
405     *priv_context = enabled_cntrs;
406 
407     return 0;
408 }
409 
core2_vpmu_load(struct vcpu * v,bool_t from_guest)410 static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
411 {
412     struct vpmu_struct *vpmu = vcpu_vpmu(v);
413 
414     if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
415         return 0;
416 
417     if ( from_guest )
418     {
419         int ret;
420 
421         ASSERT(!has_vlapic(v->domain));
422 
423         memcpy(vpmu->context + regs_off,
424                (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
425                regs_sz);
426 
427         ret = core2_vpmu_verify(v);
428         if ( ret )
429         {
430             /*
431              * Not necessary since we should never load the context until
432              * guest provides valid values. But just to be safe.
433              */
434             memset(vpmu->context + regs_off, 0, regs_sz);
435             return ret;
436         }
437     }
438 
439     vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
440 
441     __core2_vpmu_load(v);
442 
443     return 0;
444 }
445 
core2_vpmu_alloc_resource(struct vcpu * v)446 static int core2_vpmu_alloc_resource(struct vcpu *v)
447 {
448     struct vpmu_struct *vpmu = vcpu_vpmu(v);
449     struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
450     uint64_t *p = NULL;
451 
452     if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
453         return 0;
454 
455     if ( is_hvm_vcpu(v) )
456     {
457         if ( vmx_add_host_load_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, 0) )
458             goto out_err;
459 
460         if ( vmx_add_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, 0) )
461             goto out_err;
462     }
463 
464     core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
465                                    sizeof(uint64_t) * fixed_pmc_cnt +
466                                    sizeof(struct xen_pmu_cntr_pair) *
467                                    arch_pmc_cnt);
468     p = xzalloc(uint64_t);
469     if ( !core2_vpmu_cxt || !p )
470         goto out_err;
471 
472     core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
473     core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
474                                     sizeof(uint64_t) * fixed_pmc_cnt;
475 
476     vpmu->context = core2_vpmu_cxt;
477     vpmu->priv_context = p;
478 
479     if ( !has_vlapic(v->domain) )
480     {
481         /* Copy fixed/arch register offsets to shared area */
482         ASSERT(vpmu->xenpmu_data);
483         memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
484     }
485 
486     vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
487 
488     return 1;
489 
490 out_err:
491     release_pmu_ownership(PMU_OWNER_HVM);
492 
493     xfree(core2_vpmu_cxt);
494     xfree(p);
495 
496     printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
497            v->vcpu_id, v->domain->domain_id);
498 
499     return 0;
500 }
501 
core2_vpmu_msr_common_check(u32 msr_index,int * type,int * index)502 static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
503 {
504     struct vpmu_struct *vpmu = vcpu_vpmu(current);
505 
506     if ( !is_core2_vpmu_msr(msr_index, type, index) )
507         return 0;
508 
509     if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) &&
510          !core2_vpmu_alloc_resource(current) )
511         return 0;
512 
513     /* Do the lazy load staff. */
514     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
515     {
516         __core2_vpmu_load(current);
517         vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
518 
519         if ( is_hvm_vcpu(current) && cpu_has_vmx_msr_bitmap )
520             core2_vpmu_set_msr_bitmap(current);
521     }
522     return 1;
523 }
524 
core2_vpmu_do_wrmsr(unsigned int msr,uint64_t msr_content,uint64_t supported)525 static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
526                                uint64_t supported)
527 {
528     int i, tmp;
529     int type = -1, index = -1;
530     struct vcpu *v = current;
531     struct vpmu_struct *vpmu = vcpu_vpmu(v);
532     struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
533     uint64_t *enabled_cntrs;
534 
535     if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
536         return -EINVAL;
537 
538     ASSERT(!supported);
539 
540     if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
541         /* Writing unsupported bits to a fixed counter */
542         return -EINVAL;
543 
544     core2_vpmu_cxt = vpmu->context;
545     enabled_cntrs = vpmu->priv_context;
546     switch ( msr )
547     {
548     case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
549         if ( msr_content & global_ovf_ctrl_mask )
550             return -EINVAL;
551         core2_vpmu_cxt->global_status &= ~msr_content;
552         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
553         return 0;
554     case MSR_CORE_PERF_GLOBAL_STATUS:
555         gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
556                  "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
557         return -EINVAL;
558     case MSR_IA32_PEBS_ENABLE:
559         if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
560                               XENPMU_FEATURE_ARCH_ONLY) )
561             return -EINVAL;
562         if ( msr_content )
563             /* PEBS is reported as unavailable in MSR_IA32_MISC_ENABLE */
564             return -EINVAL;
565         return 0;
566     case MSR_IA32_DS_AREA:
567         if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
568             return -EINVAL;
569         if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
570         {
571             if ( !(is_hvm_vcpu(v) ? is_canonical_address(msr_content)
572                                   : __addr_ok(msr_content)) )
573             {
574                 gdprintk(XENLOG_WARNING,
575                          "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
576                          msr_content);
577                 return -EINVAL;
578             }
579             core2_vpmu_cxt->ds_area = msr_content;
580             break;
581         }
582         gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
583         return 0;
584     case MSR_CORE_PERF_GLOBAL_CTRL:
585         if ( msr_content & global_ctrl_mask )
586             return -EINVAL;
587         core2_vpmu_cxt->global_ctrl = msr_content;
588         break;
589     case MSR_CORE_PERF_FIXED_CTR_CTRL:
590         if ( msr_content & fixed_ctrl_mask )
591             return -EINVAL;
592 
593         if ( is_hvm_vcpu(v) )
594             vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
595                                &core2_vpmu_cxt->global_ctrl);
596         else
597             rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
598         *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
599         if ( msr_content != 0 )
600         {
601             u64 val = msr_content;
602             for ( i = 0; i < fixed_pmc_cnt; i++ )
603             {
604                 if ( val & 3 )
605                     *enabled_cntrs |= (1ULL << 32) << i;
606                 val >>= FIXED_CTR_CTRL_BITS;
607             }
608         }
609 
610         core2_vpmu_cxt->fixed_ctrl = msr_content;
611         break;
612     default:
613         tmp = msr - MSR_P6_EVNTSEL(0);
614         if ( tmp >= 0 && tmp < arch_pmc_cnt )
615         {
616             bool_t blocked = 0;
617             uint64_t umaskevent = msr_content & MSR_IA32_CMT_EVTSEL_UE_MASK;
618             struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
619                 vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
620 
621             if ( msr_content & ARCH_CTRL_MASK )
622                 return -EINVAL;
623 
624             /* PMC filters */
625             if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
626                                   XENPMU_FEATURE_ARCH_ONLY) )
627             {
628                 blocked = 1;
629                 switch ( umaskevent )
630                 {
631                 /*
632                  * See the Pre-Defined Architectural Performance Events table
633                  * from the Intel 64 and IA-32 Architectures Software
634                  * Developer's Manual, Volume 3B, System Programming Guide,
635                  * Part 2.
636                  */
637                 case 0x003c:	/* UnHalted Core Cycles */
638                 case 0x013c:	/* UnHalted Reference Cycles */
639                 case 0x00c0:	/* Instructions Retired */
640                     blocked = 0;
641                     break;
642                 }
643             }
644 
645             if ( vpmu_features & XENPMU_FEATURE_ARCH_ONLY )
646             {
647                 /* Additional counters beyond IPC only; blocked already set. */
648                 switch ( umaskevent )
649                 {
650                 case 0x4f2e:	/* Last Level Cache References */
651                 case 0x412e:	/* Last Level Cache Misses */
652                 case 0x00c4:	/* Branch Instructions Retired */
653                 case 0x00c5:	/* All Branch Mispredict Retired */
654                     blocked = 0;
655                     break;
656                }
657             }
658 
659             if ( blocked )
660                 return -EINVAL;
661 
662             if ( is_hvm_vcpu(v) )
663                 vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
664                                    &core2_vpmu_cxt->global_ctrl);
665             else
666                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
667 
668             if ( msr_content & ARCH_CNTR_ENABLED )
669                 *enabled_cntrs |= 1ULL << tmp;
670             else
671                 *enabled_cntrs &= ~(1ULL << tmp);
672 
673             xen_pmu_cntr_pair[tmp].control = msr_content;
674         }
675     }
676 
677     if ( type != MSR_TYPE_GLOBAL )
678         wrmsrl(msr, msr_content);
679     else
680     {
681         if ( is_hvm_vcpu(v) )
682             vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
683         else
684             wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
685     }
686 
687     if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
688          (core2_vpmu_cxt->ds_area != 0) )
689         vpmu_set(vpmu, VPMU_RUNNING);
690     else
691         vpmu_reset(vpmu, VPMU_RUNNING);
692 
693     return 0;
694 }
695 
core2_vpmu_do_rdmsr(unsigned int msr,uint64_t * msr_content)696 static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
697 {
698     int type = -1, index = -1;
699     struct vcpu *v = current;
700     struct vpmu_struct *vpmu = vcpu_vpmu(v);
701     struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
702 
703     if ( core2_vpmu_msr_common_check(msr, &type, &index) )
704     {
705         core2_vpmu_cxt = vpmu->context;
706         switch ( msr )
707         {
708         case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
709             *msr_content = 0;
710             break;
711         case MSR_CORE_PERF_GLOBAL_STATUS:
712             *msr_content = core2_vpmu_cxt->global_status;
713             break;
714         case MSR_CORE_PERF_GLOBAL_CTRL:
715             if ( is_hvm_vcpu(v) )
716                 vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
717             else
718                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
719             break;
720         default:
721             rdmsrl(msr, *msr_content);
722         }
723     }
724     else if ( msr == MSR_IA32_MISC_ENABLE )
725     {
726         /* Extension for BTS */
727         if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
728             *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
729         *msr_content |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
730     }
731 
732     return 0;
733 }
734 
735 /* Dump vpmu info on console, called in the context of keyhandler 'q'. */
core2_vpmu_dump(const struct vcpu * v)736 static void core2_vpmu_dump(const struct vcpu *v)
737 {
738     const struct vpmu_struct *vpmu = vcpu_vpmu(v);
739     unsigned int i;
740     const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
741     u64 val;
742     uint64_t *fixed_counters;
743     struct xen_pmu_cntr_pair *cntr_pair;
744 
745     if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
746          return;
747 
748     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
749     {
750         if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
751             printk("    vPMU loaded\n");
752         else
753             printk("    vPMU allocated\n");
754         return;
755     }
756 
757     printk("    vPMU running\n");
758 
759     cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
760     fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
761 
762     /* Print the contents of the counter and its configuration msr. */
763     for ( i = 0; i < arch_pmc_cnt; i++ )
764         printk("      general_%d: 0x%016lx ctrl: 0x%016lx\n",
765             i, cntr_pair[i].counter, cntr_pair[i].control);
766 
767     /*
768      * The configuration of the fixed counter is 4 bits each in the
769      * MSR_CORE_PERF_FIXED_CTR_CTRL.
770      */
771     val = core2_vpmu_cxt->fixed_ctrl;
772     for ( i = 0; i < fixed_pmc_cnt; i++ )
773     {
774         printk("      fixed_%d:   0x%016lx ctrl: %#lx\n",
775                i, fixed_counters[i],
776                val & FIXED_CTR_CTRL_MASK);
777         val >>= FIXED_CTR_CTRL_BITS;
778     }
779 }
780 
core2_vpmu_do_interrupt(struct cpu_user_regs * regs)781 static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
782 {
783     struct vcpu *v = current;
784     u64 msr_content;
785     struct vpmu_struct *vpmu = vcpu_vpmu(v);
786     struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
787 
788     rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
789     if ( msr_content )
790     {
791         if ( is_pmc_quirk )
792             handle_pmc_quirk(msr_content);
793         core2_vpmu_cxt->global_status |= msr_content;
794         msr_content &= ~global_ovf_ctrl_mask;
795         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
796     }
797     else
798     {
799         /* No PMC overflow but perhaps a Trace Message interrupt. */
800         __vmread(GUEST_IA32_DEBUGCTL, &msr_content);
801         if ( !(msr_content & IA32_DEBUGCTLMSR_TR) )
802             return 0;
803     }
804 
805     return 1;
806 }
807 
core2_vpmu_destroy(struct vcpu * v)808 static void core2_vpmu_destroy(struct vcpu *v)
809 {
810     struct vpmu_struct *vpmu = vcpu_vpmu(v);
811 
812     xfree(vpmu->context);
813     vpmu->context = NULL;
814     xfree(vpmu->priv_context);
815     vpmu->priv_context = NULL;
816     if ( is_hvm_vcpu(v) && cpu_has_vmx_msr_bitmap )
817         core2_vpmu_unset_msr_bitmap(v);
818     release_pmu_ownership(PMU_OWNER_HVM);
819     vpmu_clear(vpmu);
820 }
821 
822 static const struct arch_vpmu_ops core2_vpmu_ops = {
823     .do_wrmsr = core2_vpmu_do_wrmsr,
824     .do_rdmsr = core2_vpmu_do_rdmsr,
825     .do_interrupt = core2_vpmu_do_interrupt,
826     .arch_vpmu_destroy = core2_vpmu_destroy,
827     .arch_vpmu_save = core2_vpmu_save,
828     .arch_vpmu_load = core2_vpmu_load,
829     .arch_vpmu_dump = core2_vpmu_dump
830 };
831 
vmx_vpmu_initialise(struct vcpu * v)832 int vmx_vpmu_initialise(struct vcpu *v)
833 {
834     struct vpmu_struct *vpmu = vcpu_vpmu(v);
835     u64 msr_content;
836     static bool_t ds_warned;
837 
838     if ( vpmu_mode == XENPMU_MODE_OFF )
839         return 0;
840 
841     if ( v->domain->arch.cpuid->basic.pmu_version <= 1 ||
842          v->domain->arch.cpuid->basic.pmu_version >= 5 )
843         return -EINVAL;
844 
845     if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
846         return -EINVAL;
847 
848     if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
849         goto func_out;
850     /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
851     while ( boot_cpu_has(X86_FEATURE_DS) )
852     {
853         if ( !boot_cpu_has(X86_FEATURE_DTES64) )
854         {
855             if ( !ds_warned )
856                 printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
857                        " - Debug Store disabled for guests\n");
858             break;
859         }
860         vpmu_set(vpmu, VPMU_CPU_HAS_DS);
861         rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
862         if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
863         {
864             /* If BTS_UNAVAIL is set reset the DS feature. */
865             vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
866             if ( !ds_warned )
867                 printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
868                        " - Debug Store disabled for guests\n");
869             break;
870         }
871 
872         vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
873         if ( !ds_warned )
874         {
875             if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
876                 printk(XENLOG_G_INFO
877                        "vpmu: CPU doesn't support CPL-Qualified BTS\n");
878             printk("******************************************************\n");
879             printk("** WARNING: Emulation of BTS Feature is switched on **\n");
880             printk("** Using this processor feature in a virtualized    **\n");
881             printk("** environment is not 100%% safe.                    **\n");
882             printk("** Setting the DS buffer address with wrong values  **\n");
883             printk("** may lead to hypervisor hangs or crashes.         **\n");
884             printk("** It is NOT recommended for production use!        **\n");
885             printk("******************************************************\n");
886         }
887         break;
888     }
889     ds_warned = 1;
890  func_out:
891 
892     /* PV domains can allocate resources immediately */
893     if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
894         return -EIO;
895 
896     vpmu->arch_vpmu_ops = &core2_vpmu_ops;
897 
898     return 0;
899 }
900 
core2_vpmu_init(void)901 int __init core2_vpmu_init(void)
902 {
903     unsigned int version = 0;
904     unsigned int i;
905 
906     if ( current_cpu_data.cpuid_level >= 0xa )
907         version = MASK_EXTR(cpuid_eax(0xa), PMU_VERSION_MASK);
908 
909     switch ( version )
910     {
911     case 4:
912         printk(XENLOG_INFO "VPMU: PMU version 4 is not fully supported. "
913                "Emulating version 3\n");
914         /* FALLTHROUGH */
915 
916     case 2:
917     case 3:
918         break;
919 
920     default:
921         printk(XENLOG_WARNING "VPMU: PMU version %u is not supported\n",
922                version);
923         return -EINVAL;
924     }
925 
926     if ( current_cpu_data.x86 != 6 )
927     {
928         printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
929         return -EINVAL;
930     }
931 
932     arch_pmc_cnt = core2_get_arch_pmc_count();
933     fixed_pmc_cnt = core2_get_fixed_pmc_count();
934 
935     if ( cpu_has_pdcm )
936     {
937         uint64_t caps;
938 
939         rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
940         full_width_write = (caps >> 13) & 1;
941     }
942 
943     fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
944     /* mask .AnyThread bits for all fixed counters */
945     for( i = 0; i < fixed_pmc_cnt; i++ )
946        fixed_ctrl_mask |=
947            (FIXED_CTR_CTRL_ANYTHREAD_MASK << (FIXED_CTR_CTRL_BITS * i));
948 
949     fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
950     global_ctrl_mask = ~((((1ULL << fixed_pmc_cnt) - 1) << 32) |
951                          ((1ULL << arch_pmc_cnt) - 1));
952     global_ovf_ctrl_mask = ~(0xC000000000000000 |
953                              (((1ULL << fixed_pmc_cnt) - 1) << 32) |
954                              ((1ULL << arch_pmc_cnt) - 1));
955     if ( version > 2 )
956         /*
957          * Even though we don't support Uncore counters guests should be
958          * able to clear all available overflows.
959          */
960         global_ovf_ctrl_mask &= ~(1ULL << 61);
961 
962     regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
963               sizeof(uint64_t) * fixed_pmc_cnt +
964               sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
965 
966     check_pmc_quirk();
967 
968     if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
969          sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
970     {
971         printk(XENLOG_WARNING
972                "VPMU: Register bank does not fit into VPMU share page\n");
973         arch_pmc_cnt = fixed_pmc_cnt = 0;
974         return -ENOSPC;
975     }
976 
977     return 0;
978 }
979 
980