1 /*
2 * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
3 *
4 * Copyright (c) 2007, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; If not, see <http://www.gnu.org/licenses/>.
17 *
18 * Author: Haitao Shan <haitao.shan@intel.com>
19 */
20
21 #include <xen/sched.h>
22 #include <xen/xenoprof.h>
23 #include <xen/irq.h>
24 #include <asm/system.h>
25 #include <asm/regs.h>
26 #include <asm/types.h>
27 #include <asm/apic.h>
28 #include <asm/traps.h>
29 #include <asm/msr.h>
30 #include <asm/msr-index.h>
31 #include <asm/vpmu.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/vlapic.h>
34 #include <asm/hvm/vmx/vmx.h>
35 #include <asm/hvm/vmx/vmcs.h>
36 #include <public/sched.h>
37 #include <public/hvm/save.h>
38 #include <public/pmu.h>
39
40 /*
41 * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
42 * instruction.
43 * cpuid 0xa - Architectural Performance Monitoring Leaf
44 * Register eax
45 */
46 #define PMU_VERSION_SHIFT 0 /* Version ID */
47 #define PMU_VERSION_BITS 8 /* 8 bits 0..7 */
48 #define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT)
49
50 #define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */
51 #define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */
52 #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT)
53
54 #define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */
55 #define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */
56 #define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT)
57 /* Register edx */
58 #define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */
59 #define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */
60 #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT)
61
62 #define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */
63 #define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */
64 #define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT)
65
66 /* Alias registers (0x4c1) for full-width writes to PMCs */
67 #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
68 static bool_t __read_mostly full_width_write;
69
70 /*
71 * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
72 * counters. 4 bits for every counter.
73 */
74 #define FIXED_CTR_CTRL_BITS 4
75 #define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
76 #define FIXED_CTR_CTRL_ANYTHREAD_MASK 0x4
77
78 #define ARCH_CNTR_ENABLED (1ULL << 22)
79 #define ARCH_CNTR_PIN_CONTROL (1ULL << 19)
80
81 /* Number of general-purpose and fixed performance counters */
82 static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
83
84 /* Masks used for testing whether and MSR is valid */
85 #define ARCH_CTRL_MASK (~((1ull << 32) - 1) | (1ull << 21) | ARCH_CNTR_PIN_CONTROL)
86 static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
87 static uint64_t __read_mostly global_ovf_ctrl_mask, global_ctrl_mask;
88
89 /* Total size of PMU registers block (copied to/from PV(H) guest) */
90 static unsigned int __read_mostly regs_sz;
91 /* Offset into context of the beginning of PMU register block */
92 static const unsigned int regs_off =
93 sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
94 sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
95
96 /*
97 * QUIRK to workaround an issue on various family 6 cpus.
98 * The issue leads to endless PMC interrupt loops on the processor.
99 * If the interrupt handler is running and a pmc reaches the value 0, this
100 * value remains forever and it triggers immediately a new interrupt after
101 * finishing the handler.
102 * A workaround is to read all flagged counters and if the value is 0 write
103 * 1 (or another value != 0) into it.
104 * There exist no errata and the real cause of this behaviour is unknown.
105 */
106 bool_t __read_mostly is_pmc_quirk;
107
check_pmc_quirk(void)108 static void check_pmc_quirk(void)
109 {
110 if ( current_cpu_data.x86 == 6 )
111 is_pmc_quirk = 1;
112 else
113 is_pmc_quirk = 0;
114 }
115
handle_pmc_quirk(u64 msr_content)116 static void handle_pmc_quirk(u64 msr_content)
117 {
118 int i;
119 u64 val;
120
121 if ( !is_pmc_quirk )
122 return;
123
124 val = msr_content;
125 for ( i = 0; i < arch_pmc_cnt; i++ )
126 {
127 if ( val & 0x1 )
128 {
129 u64 cnt;
130 rdmsrl(MSR_P6_PERFCTR(i), cnt);
131 if ( cnt == 0 )
132 wrmsrl(MSR_P6_PERFCTR(i), 1);
133 }
134 val >>= 1;
135 }
136 val = msr_content >> 32;
137 for ( i = 0; i < fixed_pmc_cnt; i++ )
138 {
139 if ( val & 0x1 )
140 {
141 u64 cnt;
142 rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
143 if ( cnt == 0 )
144 wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
145 }
146 val >>= 1;
147 }
148 }
149
150 /*
151 * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15]
152 */
core2_get_arch_pmc_count(void)153 static int core2_get_arch_pmc_count(void)
154 {
155 u32 eax;
156
157 eax = cpuid_eax(0xa);
158 return MASK_EXTR(eax, PMU_GENERAL_NR_MASK);
159 }
160
161 /*
162 * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4]
163 */
core2_get_fixed_pmc_count(void)164 static int core2_get_fixed_pmc_count(void)
165 {
166 u32 edx = cpuid_edx(0xa);
167
168 return MASK_EXTR(edx, PMU_FIXED_NR_MASK);
169 }
170
171 /* edx bits 5-12: Bit width of fixed-function performance counters */
core2_get_bitwidth_fix_count(void)172 static int core2_get_bitwidth_fix_count(void)
173 {
174 u32 edx;
175
176 edx = cpuid_edx(0xa);
177 return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK);
178 }
179
is_core2_vpmu_msr(u32 msr_index,int * type,int * index)180 static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
181 {
182 u32 msr_index_pmc;
183
184 switch ( msr_index )
185 {
186 case MSR_CORE_PERF_FIXED_CTR_CTRL:
187 case MSR_IA32_DS_AREA:
188 case MSR_IA32_PEBS_ENABLE:
189 *type = MSR_TYPE_CTRL;
190 return 1;
191
192 case MSR_CORE_PERF_GLOBAL_CTRL:
193 case MSR_CORE_PERF_GLOBAL_STATUS:
194 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
195 *type = MSR_TYPE_GLOBAL;
196 return 1;
197
198 default:
199
200 if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
201 (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) )
202 {
203 *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
204 *type = MSR_TYPE_COUNTER;
205 return 1;
206 }
207
208 if ( (msr_index >= MSR_P6_EVNTSEL(0)) &&
209 (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) )
210 {
211 *index = msr_index - MSR_P6_EVNTSEL(0);
212 *type = MSR_TYPE_ARCH_CTRL;
213 return 1;
214 }
215
216 msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
217 if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) &&
218 (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) )
219 {
220 *type = MSR_TYPE_ARCH_COUNTER;
221 *index = msr_index_pmc - MSR_IA32_PERFCTR0;
222 return 1;
223 }
224 return 0;
225 }
226 }
227
core2_vpmu_set_msr_bitmap(struct vcpu * v)228 static void core2_vpmu_set_msr_bitmap(struct vcpu *v)
229 {
230 unsigned int i;
231
232 /* Allow Read/Write PMU Counters MSR Directly. */
233 for ( i = 0; i < fixed_pmc_cnt; i++ )
234 vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
235
236 for ( i = 0; i < arch_pmc_cnt; i++ )
237 {
238 vmx_clear_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
239
240 if ( full_width_write )
241 vmx_clear_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
242 }
243
244 /* Allow Read PMU Non-global Controls Directly. */
245 for ( i = 0; i < arch_pmc_cnt; i++ )
246 vmx_clear_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
247
248 vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
249 vmx_clear_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
250 }
251
core2_vpmu_unset_msr_bitmap(struct vcpu * v)252 static void core2_vpmu_unset_msr_bitmap(struct vcpu *v)
253 {
254 unsigned int i;
255
256 for ( i = 0; i < fixed_pmc_cnt; i++ )
257 vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
258
259 for ( i = 0; i < arch_pmc_cnt; i++ )
260 {
261 vmx_set_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
262
263 if ( full_width_write )
264 vmx_set_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
265 }
266
267 for ( i = 0; i < arch_pmc_cnt; i++ )
268 vmx_set_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
269
270 vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
271 vmx_set_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
272 }
273
__core2_vpmu_save(struct vcpu * v)274 static inline void __core2_vpmu_save(struct vcpu *v)
275 {
276 int i;
277 struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
278 uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
279 struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
280 vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
281
282 for ( i = 0; i < fixed_pmc_cnt; i++ )
283 rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
284 for ( i = 0; i < arch_pmc_cnt; i++ )
285 rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
286
287 if ( !is_hvm_vcpu(v) )
288 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
289 }
290
core2_vpmu_save(struct vcpu * v,bool_t to_guest)291 static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
292 {
293 struct vpmu_struct *vpmu = vcpu_vpmu(v);
294
295 if ( !is_hvm_vcpu(v) )
296 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
297
298 if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
299 return 0;
300
301 __core2_vpmu_save(v);
302
303 /* Unset PMU MSR bitmap to trap lazy load. */
304 if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && is_hvm_vcpu(v) &&
305 cpu_has_vmx_msr_bitmap )
306 core2_vpmu_unset_msr_bitmap(v);
307
308 if ( to_guest )
309 {
310 ASSERT(!has_vlapic(v->domain));
311 memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
312 vpmu->context + regs_off, regs_sz);
313 }
314
315 return 1;
316 }
317
__core2_vpmu_load(struct vcpu * v)318 static inline void __core2_vpmu_load(struct vcpu *v)
319 {
320 unsigned int i, pmc_start;
321 struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
322 uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
323 struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
324 vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
325
326 for ( i = 0; i < fixed_pmc_cnt; i++ )
327 wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
328
329 if ( full_width_write )
330 pmc_start = MSR_IA32_A_PERFCTR0;
331 else
332 pmc_start = MSR_IA32_PERFCTR0;
333 for ( i = 0; i < arch_pmc_cnt; i++ )
334 {
335 wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
336 wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
337 }
338
339 wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
340 if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) )
341 wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
342
343 if ( !is_hvm_vcpu(v) )
344 {
345 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
346 core2_vpmu_cxt->global_ovf_ctrl = 0;
347 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
348 }
349 }
350
core2_vpmu_verify(struct vcpu * v)351 static int core2_vpmu_verify(struct vcpu *v)
352 {
353 unsigned int i;
354 struct vpmu_struct *vpmu = vcpu_vpmu(v);
355 struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
356 uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
357 struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
358 vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
359 uint64_t fixed_ctrl;
360 uint64_t *priv_context = vpmu->priv_context;
361 uint64_t enabled_cntrs = 0;
362
363 if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
364 return -EINVAL;
365 if ( core2_vpmu_cxt->global_ctrl & global_ctrl_mask )
366 return -EINVAL;
367 if ( core2_vpmu_cxt->pebs_enable )
368 return -EINVAL;
369
370 fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
371 if ( fixed_ctrl & fixed_ctrl_mask )
372 return -EINVAL;
373
374 for ( i = 0; i < fixed_pmc_cnt; i++ )
375 {
376 if ( fixed_counters[i] & fixed_counters_mask )
377 return -EINVAL;
378 if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
379 enabled_cntrs |= (1ULL << i);
380 }
381 enabled_cntrs <<= 32;
382
383 for ( i = 0; i < arch_pmc_cnt; i++ )
384 {
385 uint64_t control = xen_pmu_cntr_pair[i].control;
386
387 if ( control & ARCH_CTRL_MASK )
388 return -EINVAL;
389 if ( control & ARCH_CNTR_ENABLED )
390 enabled_cntrs |= (1ULL << i);
391 }
392
393 if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) &&
394 !(is_hvm_vcpu(v)
395 ? is_canonical_address(core2_vpmu_cxt->ds_area)
396 : __addr_ok(core2_vpmu_cxt->ds_area)) )
397 return -EINVAL;
398
399 if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
400 (core2_vpmu_cxt->ds_area != 0) )
401 vpmu_set(vpmu, VPMU_RUNNING);
402 else
403 vpmu_reset(vpmu, VPMU_RUNNING);
404
405 *priv_context = enabled_cntrs;
406
407 return 0;
408 }
409
core2_vpmu_load(struct vcpu * v,bool_t from_guest)410 static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
411 {
412 struct vpmu_struct *vpmu = vcpu_vpmu(v);
413
414 if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
415 return 0;
416
417 if ( from_guest )
418 {
419 int ret;
420
421 ASSERT(!has_vlapic(v->domain));
422
423 memcpy(vpmu->context + regs_off,
424 (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
425 regs_sz);
426
427 ret = core2_vpmu_verify(v);
428 if ( ret )
429 {
430 /*
431 * Not necessary since we should never load the context until
432 * guest provides valid values. But just to be safe.
433 */
434 memset(vpmu->context + regs_off, 0, regs_sz);
435 return ret;
436 }
437 }
438
439 vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
440
441 __core2_vpmu_load(v);
442
443 return 0;
444 }
445
core2_vpmu_alloc_resource(struct vcpu * v)446 static int core2_vpmu_alloc_resource(struct vcpu *v)
447 {
448 struct vpmu_struct *vpmu = vcpu_vpmu(v);
449 struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
450 uint64_t *p = NULL;
451
452 if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
453 return 0;
454
455 if ( is_hvm_vcpu(v) )
456 {
457 if ( vmx_add_host_load_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, 0) )
458 goto out_err;
459
460 if ( vmx_add_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, 0) )
461 goto out_err;
462 }
463
464 core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
465 sizeof(uint64_t) * fixed_pmc_cnt +
466 sizeof(struct xen_pmu_cntr_pair) *
467 arch_pmc_cnt);
468 p = xzalloc(uint64_t);
469 if ( !core2_vpmu_cxt || !p )
470 goto out_err;
471
472 core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
473 core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
474 sizeof(uint64_t) * fixed_pmc_cnt;
475
476 vpmu->context = core2_vpmu_cxt;
477 vpmu->priv_context = p;
478
479 if ( !has_vlapic(v->domain) )
480 {
481 /* Copy fixed/arch register offsets to shared area */
482 ASSERT(vpmu->xenpmu_data);
483 memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
484 }
485
486 vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
487
488 return 1;
489
490 out_err:
491 release_pmu_ownership(PMU_OWNER_HVM);
492
493 xfree(core2_vpmu_cxt);
494 xfree(p);
495
496 printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
497 v->vcpu_id, v->domain->domain_id);
498
499 return 0;
500 }
501
core2_vpmu_msr_common_check(u32 msr_index,int * type,int * index)502 static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
503 {
504 struct vpmu_struct *vpmu = vcpu_vpmu(current);
505
506 if ( !is_core2_vpmu_msr(msr_index, type, index) )
507 return 0;
508
509 if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) &&
510 !core2_vpmu_alloc_resource(current) )
511 return 0;
512
513 /* Do the lazy load staff. */
514 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
515 {
516 __core2_vpmu_load(current);
517 vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
518
519 if ( is_hvm_vcpu(current) && cpu_has_vmx_msr_bitmap )
520 core2_vpmu_set_msr_bitmap(current);
521 }
522 return 1;
523 }
524
core2_vpmu_do_wrmsr(unsigned int msr,uint64_t msr_content,uint64_t supported)525 static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
526 uint64_t supported)
527 {
528 int i, tmp;
529 int type = -1, index = -1;
530 struct vcpu *v = current;
531 struct vpmu_struct *vpmu = vcpu_vpmu(v);
532 struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
533 uint64_t *enabled_cntrs;
534
535 if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
536 return -EINVAL;
537
538 ASSERT(!supported);
539
540 if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
541 /* Writing unsupported bits to a fixed counter */
542 return -EINVAL;
543
544 core2_vpmu_cxt = vpmu->context;
545 enabled_cntrs = vpmu->priv_context;
546 switch ( msr )
547 {
548 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
549 if ( msr_content & global_ovf_ctrl_mask )
550 return -EINVAL;
551 core2_vpmu_cxt->global_status &= ~msr_content;
552 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
553 return 0;
554 case MSR_CORE_PERF_GLOBAL_STATUS:
555 gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
556 "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
557 return -EINVAL;
558 case MSR_IA32_PEBS_ENABLE:
559 if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
560 XENPMU_FEATURE_ARCH_ONLY) )
561 return -EINVAL;
562 if ( msr_content )
563 /* PEBS is reported as unavailable in MSR_IA32_MISC_ENABLE */
564 return -EINVAL;
565 return 0;
566 case MSR_IA32_DS_AREA:
567 if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
568 return -EINVAL;
569 if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
570 {
571 if ( !(is_hvm_vcpu(v) ? is_canonical_address(msr_content)
572 : __addr_ok(msr_content)) )
573 {
574 gdprintk(XENLOG_WARNING,
575 "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
576 msr_content);
577 return -EINVAL;
578 }
579 core2_vpmu_cxt->ds_area = msr_content;
580 break;
581 }
582 gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
583 return 0;
584 case MSR_CORE_PERF_GLOBAL_CTRL:
585 if ( msr_content & global_ctrl_mask )
586 return -EINVAL;
587 core2_vpmu_cxt->global_ctrl = msr_content;
588 break;
589 case MSR_CORE_PERF_FIXED_CTR_CTRL:
590 if ( msr_content & fixed_ctrl_mask )
591 return -EINVAL;
592
593 if ( is_hvm_vcpu(v) )
594 vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
595 &core2_vpmu_cxt->global_ctrl);
596 else
597 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
598 *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
599 if ( msr_content != 0 )
600 {
601 u64 val = msr_content;
602 for ( i = 0; i < fixed_pmc_cnt; i++ )
603 {
604 if ( val & 3 )
605 *enabled_cntrs |= (1ULL << 32) << i;
606 val >>= FIXED_CTR_CTRL_BITS;
607 }
608 }
609
610 core2_vpmu_cxt->fixed_ctrl = msr_content;
611 break;
612 default:
613 tmp = msr - MSR_P6_EVNTSEL(0);
614 if ( tmp >= 0 && tmp < arch_pmc_cnt )
615 {
616 bool_t blocked = 0;
617 uint64_t umaskevent = msr_content & MSR_IA32_CMT_EVTSEL_UE_MASK;
618 struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
619 vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
620
621 if ( msr_content & ARCH_CTRL_MASK )
622 return -EINVAL;
623
624 /* PMC filters */
625 if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
626 XENPMU_FEATURE_ARCH_ONLY) )
627 {
628 blocked = 1;
629 switch ( umaskevent )
630 {
631 /*
632 * See the Pre-Defined Architectural Performance Events table
633 * from the Intel 64 and IA-32 Architectures Software
634 * Developer's Manual, Volume 3B, System Programming Guide,
635 * Part 2.
636 */
637 case 0x003c: /* UnHalted Core Cycles */
638 case 0x013c: /* UnHalted Reference Cycles */
639 case 0x00c0: /* Instructions Retired */
640 blocked = 0;
641 break;
642 }
643 }
644
645 if ( vpmu_features & XENPMU_FEATURE_ARCH_ONLY )
646 {
647 /* Additional counters beyond IPC only; blocked already set. */
648 switch ( umaskevent )
649 {
650 case 0x4f2e: /* Last Level Cache References */
651 case 0x412e: /* Last Level Cache Misses */
652 case 0x00c4: /* Branch Instructions Retired */
653 case 0x00c5: /* All Branch Mispredict Retired */
654 blocked = 0;
655 break;
656 }
657 }
658
659 if ( blocked )
660 return -EINVAL;
661
662 if ( is_hvm_vcpu(v) )
663 vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
664 &core2_vpmu_cxt->global_ctrl);
665 else
666 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
667
668 if ( msr_content & ARCH_CNTR_ENABLED )
669 *enabled_cntrs |= 1ULL << tmp;
670 else
671 *enabled_cntrs &= ~(1ULL << tmp);
672
673 xen_pmu_cntr_pair[tmp].control = msr_content;
674 }
675 }
676
677 if ( type != MSR_TYPE_GLOBAL )
678 wrmsrl(msr, msr_content);
679 else
680 {
681 if ( is_hvm_vcpu(v) )
682 vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
683 else
684 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
685 }
686
687 if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
688 (core2_vpmu_cxt->ds_area != 0) )
689 vpmu_set(vpmu, VPMU_RUNNING);
690 else
691 vpmu_reset(vpmu, VPMU_RUNNING);
692
693 return 0;
694 }
695
core2_vpmu_do_rdmsr(unsigned int msr,uint64_t * msr_content)696 static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
697 {
698 int type = -1, index = -1;
699 struct vcpu *v = current;
700 struct vpmu_struct *vpmu = vcpu_vpmu(v);
701 struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
702
703 if ( core2_vpmu_msr_common_check(msr, &type, &index) )
704 {
705 core2_vpmu_cxt = vpmu->context;
706 switch ( msr )
707 {
708 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
709 *msr_content = 0;
710 break;
711 case MSR_CORE_PERF_GLOBAL_STATUS:
712 *msr_content = core2_vpmu_cxt->global_status;
713 break;
714 case MSR_CORE_PERF_GLOBAL_CTRL:
715 if ( is_hvm_vcpu(v) )
716 vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
717 else
718 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
719 break;
720 default:
721 rdmsrl(msr, *msr_content);
722 }
723 }
724 else if ( msr == MSR_IA32_MISC_ENABLE )
725 {
726 /* Extension for BTS */
727 if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
728 *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
729 *msr_content |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
730 }
731
732 return 0;
733 }
734
735 /* Dump vpmu info on console, called in the context of keyhandler 'q'. */
core2_vpmu_dump(const struct vcpu * v)736 static void core2_vpmu_dump(const struct vcpu *v)
737 {
738 const struct vpmu_struct *vpmu = vcpu_vpmu(v);
739 unsigned int i;
740 const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
741 u64 val;
742 uint64_t *fixed_counters;
743 struct xen_pmu_cntr_pair *cntr_pair;
744
745 if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
746 return;
747
748 if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
749 {
750 if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
751 printk(" vPMU loaded\n");
752 else
753 printk(" vPMU allocated\n");
754 return;
755 }
756
757 printk(" vPMU running\n");
758
759 cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
760 fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
761
762 /* Print the contents of the counter and its configuration msr. */
763 for ( i = 0; i < arch_pmc_cnt; i++ )
764 printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n",
765 i, cntr_pair[i].counter, cntr_pair[i].control);
766
767 /*
768 * The configuration of the fixed counter is 4 bits each in the
769 * MSR_CORE_PERF_FIXED_CTR_CTRL.
770 */
771 val = core2_vpmu_cxt->fixed_ctrl;
772 for ( i = 0; i < fixed_pmc_cnt; i++ )
773 {
774 printk(" fixed_%d: 0x%016lx ctrl: %#lx\n",
775 i, fixed_counters[i],
776 val & FIXED_CTR_CTRL_MASK);
777 val >>= FIXED_CTR_CTRL_BITS;
778 }
779 }
780
core2_vpmu_do_interrupt(struct cpu_user_regs * regs)781 static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
782 {
783 struct vcpu *v = current;
784 u64 msr_content;
785 struct vpmu_struct *vpmu = vcpu_vpmu(v);
786 struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
787
788 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
789 if ( msr_content )
790 {
791 if ( is_pmc_quirk )
792 handle_pmc_quirk(msr_content);
793 core2_vpmu_cxt->global_status |= msr_content;
794 msr_content &= ~global_ovf_ctrl_mask;
795 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
796 }
797 else
798 {
799 /* No PMC overflow but perhaps a Trace Message interrupt. */
800 __vmread(GUEST_IA32_DEBUGCTL, &msr_content);
801 if ( !(msr_content & IA32_DEBUGCTLMSR_TR) )
802 return 0;
803 }
804
805 return 1;
806 }
807
core2_vpmu_destroy(struct vcpu * v)808 static void core2_vpmu_destroy(struct vcpu *v)
809 {
810 struct vpmu_struct *vpmu = vcpu_vpmu(v);
811
812 xfree(vpmu->context);
813 vpmu->context = NULL;
814 xfree(vpmu->priv_context);
815 vpmu->priv_context = NULL;
816 if ( is_hvm_vcpu(v) && cpu_has_vmx_msr_bitmap )
817 core2_vpmu_unset_msr_bitmap(v);
818 release_pmu_ownership(PMU_OWNER_HVM);
819 vpmu_clear(vpmu);
820 }
821
822 static const struct arch_vpmu_ops core2_vpmu_ops = {
823 .do_wrmsr = core2_vpmu_do_wrmsr,
824 .do_rdmsr = core2_vpmu_do_rdmsr,
825 .do_interrupt = core2_vpmu_do_interrupt,
826 .arch_vpmu_destroy = core2_vpmu_destroy,
827 .arch_vpmu_save = core2_vpmu_save,
828 .arch_vpmu_load = core2_vpmu_load,
829 .arch_vpmu_dump = core2_vpmu_dump
830 };
831
vmx_vpmu_initialise(struct vcpu * v)832 int vmx_vpmu_initialise(struct vcpu *v)
833 {
834 struct vpmu_struct *vpmu = vcpu_vpmu(v);
835 u64 msr_content;
836 static bool_t ds_warned;
837
838 if ( vpmu_mode == XENPMU_MODE_OFF )
839 return 0;
840
841 if ( v->domain->arch.cpuid->basic.pmu_version <= 1 ||
842 v->domain->arch.cpuid->basic.pmu_version >= 5 )
843 return -EINVAL;
844
845 if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
846 return -EINVAL;
847
848 if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
849 goto func_out;
850 /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
851 while ( boot_cpu_has(X86_FEATURE_DS) )
852 {
853 if ( !boot_cpu_has(X86_FEATURE_DTES64) )
854 {
855 if ( !ds_warned )
856 printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
857 " - Debug Store disabled for guests\n");
858 break;
859 }
860 vpmu_set(vpmu, VPMU_CPU_HAS_DS);
861 rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
862 if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
863 {
864 /* If BTS_UNAVAIL is set reset the DS feature. */
865 vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
866 if ( !ds_warned )
867 printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
868 " - Debug Store disabled for guests\n");
869 break;
870 }
871
872 vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
873 if ( !ds_warned )
874 {
875 if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
876 printk(XENLOG_G_INFO
877 "vpmu: CPU doesn't support CPL-Qualified BTS\n");
878 printk("******************************************************\n");
879 printk("** WARNING: Emulation of BTS Feature is switched on **\n");
880 printk("** Using this processor feature in a virtualized **\n");
881 printk("** environment is not 100%% safe. **\n");
882 printk("** Setting the DS buffer address with wrong values **\n");
883 printk("** may lead to hypervisor hangs or crashes. **\n");
884 printk("** It is NOT recommended for production use! **\n");
885 printk("******************************************************\n");
886 }
887 break;
888 }
889 ds_warned = 1;
890 func_out:
891
892 /* PV domains can allocate resources immediately */
893 if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
894 return -EIO;
895
896 vpmu->arch_vpmu_ops = &core2_vpmu_ops;
897
898 return 0;
899 }
900
core2_vpmu_init(void)901 int __init core2_vpmu_init(void)
902 {
903 unsigned int version = 0;
904 unsigned int i;
905
906 if ( current_cpu_data.cpuid_level >= 0xa )
907 version = MASK_EXTR(cpuid_eax(0xa), PMU_VERSION_MASK);
908
909 switch ( version )
910 {
911 case 4:
912 printk(XENLOG_INFO "VPMU: PMU version 4 is not fully supported. "
913 "Emulating version 3\n");
914 /* FALLTHROUGH */
915
916 case 2:
917 case 3:
918 break;
919
920 default:
921 printk(XENLOG_WARNING "VPMU: PMU version %u is not supported\n",
922 version);
923 return -EINVAL;
924 }
925
926 if ( current_cpu_data.x86 != 6 )
927 {
928 printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
929 return -EINVAL;
930 }
931
932 arch_pmc_cnt = core2_get_arch_pmc_count();
933 fixed_pmc_cnt = core2_get_fixed_pmc_count();
934
935 if ( cpu_has_pdcm )
936 {
937 uint64_t caps;
938
939 rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
940 full_width_write = (caps >> 13) & 1;
941 }
942
943 fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
944 /* mask .AnyThread bits for all fixed counters */
945 for( i = 0; i < fixed_pmc_cnt; i++ )
946 fixed_ctrl_mask |=
947 (FIXED_CTR_CTRL_ANYTHREAD_MASK << (FIXED_CTR_CTRL_BITS * i));
948
949 fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
950 global_ctrl_mask = ~((((1ULL << fixed_pmc_cnt) - 1) << 32) |
951 ((1ULL << arch_pmc_cnt) - 1));
952 global_ovf_ctrl_mask = ~(0xC000000000000000 |
953 (((1ULL << fixed_pmc_cnt) - 1) << 32) |
954 ((1ULL << arch_pmc_cnt) - 1));
955 if ( version > 2 )
956 /*
957 * Even though we don't support Uncore counters guests should be
958 * able to clear all available overflows.
959 */
960 global_ovf_ctrl_mask &= ~(1ULL << 61);
961
962 regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
963 sizeof(uint64_t) * fixed_pmc_cnt +
964 sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
965
966 check_pmc_quirk();
967
968 if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
969 sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
970 {
971 printk(XENLOG_WARNING
972 "VPMU: Register bank does not fit into VPMU share page\n");
973 arch_pmc_cnt = fixed_pmc_cnt = 0;
974 return -ENOSPC;
975 }
976
977 return 0;
978 }
979
980