1 /*
2 * mtrr.c: MTRR/PAT virtualization
3 *
4 * Copyright (c) 2007, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include <xen/domain_page.h>
20 #include <asm/e820.h>
21 #include <asm/iocap.h>
22 #include <asm/paging.h>
23 #include <asm/p2m.h>
24 #include <asm/mtrr.h>
25 #include <asm/hvm/support.h>
26 #include <asm/hvm/cacheattr.h>
27 #include <public/hvm/e820.h>
28
29 /* Get page attribute fields (PAn) from PAT MSR. */
30 #define pat_cr_2_paf(pat_cr,n) ((((uint64_t)pat_cr) >> ((n)<<3)) & 0xff)
31
32 /* PAT entry to PTE flags (PAT, PCD, PWT bits). */
33 static const uint8_t pat_entry_2_pte_flags[8] = {
34 0, _PAGE_PWT,
35 _PAGE_PCD, _PAGE_PCD | _PAGE_PWT,
36 _PAGE_PAT, _PAGE_PAT | _PAGE_PWT,
37 _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT };
38
39 /* Effective mm type lookup table, according to MTRR and PAT. */
40 static const uint8_t mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = {
41 #define RS MEMORY_NUM_TYPES
42 #define UC MTRR_TYPE_UNCACHABLE
43 #define WB MTRR_TYPE_WRBACK
44 #define WC MTRR_TYPE_WRCOMB
45 #define WP MTRR_TYPE_WRPROT
46 #define WT MTRR_TYPE_WRTHROUGH
47
48 /* PAT(UC, WC, RS, RS, WT, WP, WB, UC-) */
49 /* MTRR(UC) */ {UC, WC, RS, RS, UC, UC, UC, UC},
50 /* MTRR(WC) */ {UC, WC, RS, RS, UC, UC, WC, WC},
51 /* MTRR(RS) */ {RS, RS, RS, RS, RS, RS, RS, RS},
52 /* MTRR(RS) */ {RS, RS, RS, RS, RS, RS, RS, RS},
53 /* MTRR(WT) */ {UC, WC, RS, RS, WT, WP, WT, UC},
54 /* MTRR(WP) */ {UC, WC, RS, RS, WT, WP, WP, WC},
55 /* MTRR(WB) */ {UC, WC, RS, RS, WT, WP, WB, UC}
56
57 #undef UC
58 #undef WC
59 #undef WT
60 #undef WP
61 #undef WB
62 #undef RS
63 };
64
65 /*
66 * Reverse lookup table, to find a pat type according to MTRR and effective
67 * memory type. This table is dynamically generated.
68 */
69 static uint8_t __read_mostly mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES] =
70 { [0 ... MTRR_NUM_TYPES-1] =
71 { [0 ... MEMORY_NUM_TYPES-1] = INVALID_MEM_TYPE }
72 };
73
74 /* Lookup table for PAT entry of a given PAT value in host PAT. */
75 static uint8_t __read_mostly pat_entry_tbl[PAT_TYPE_NUMS] =
76 { [0 ... PAT_TYPE_NUMS-1] = INVALID_MEM_TYPE };
77
hvm_mtrr_pat_init(void)78 static int __init hvm_mtrr_pat_init(void)
79 {
80 unsigned int i, j;
81
82 for ( i = 0; i < MTRR_NUM_TYPES; i++ )
83 {
84 for ( j = 0; j < PAT_TYPE_NUMS; j++ )
85 {
86 unsigned int tmp = mm_type_tbl[i][j];
87
88 if ( tmp < MEMORY_NUM_TYPES )
89 mtrr_epat_tbl[i][tmp] = j;
90 }
91 }
92
93 for ( i = 0; i < PAT_TYPE_NUMS; i++ )
94 {
95 for ( j = 0; j < PAT_TYPE_NUMS; j++ )
96 {
97 if ( pat_cr_2_paf(XEN_MSR_PAT, j) == i )
98 {
99 pat_entry_tbl[i] = j;
100 break;
101 }
102 }
103 }
104
105 return 0;
106 }
107 __initcall(hvm_mtrr_pat_init);
108
pat_type_2_pte_flags(uint8_t pat_type)109 uint8_t pat_type_2_pte_flags(uint8_t pat_type)
110 {
111 unsigned int pat_entry = pat_entry_tbl[pat_type];
112
113 /*
114 * INVALID_MEM_TYPE, means doesn't find the pat_entry in host PAT for a
115 * given pat_type. If host PAT covers all the PAT types, it can't happen.
116 */
117 if ( unlikely(pat_entry == INVALID_MEM_TYPE) )
118 pat_entry = pat_entry_tbl[PAT_TYPE_UNCACHABLE];
119
120 return pat_entry_2_pte_flags[pat_entry];
121 }
122
hvm_vcpu_cacheattr_init(struct vcpu * v)123 int hvm_vcpu_cacheattr_init(struct vcpu *v)
124 {
125 struct mtrr_state *m = &v->arch.hvm.mtrr;
126 unsigned int num_var_ranges =
127 is_hardware_domain(v->domain) ? MASK_EXTR(mtrr_state.mtrr_cap,
128 MTRRcap_VCNT)
129 : MTRR_VCNT;
130
131 if ( num_var_ranges > MTRR_VCNT_MAX )
132 {
133 ASSERT(is_hardware_domain(v->domain));
134 printk("WARNING: limited Dom%u variable range MTRRs from %u to %u\n",
135 v->domain->domain_id, num_var_ranges, MTRR_VCNT_MAX);
136 num_var_ranges = MTRR_VCNT_MAX;
137 }
138
139 memset(m, 0, sizeof(*m));
140
141 m->var_ranges = xzalloc_array(struct mtrr_var_range, num_var_ranges);
142 if ( m->var_ranges == NULL )
143 return -ENOMEM;
144
145 m->mtrr_cap = (1u << 10) | (1u << 8) | num_var_ranges;
146
147 v->arch.hvm.pat_cr =
148 ((uint64_t)PAT_TYPE_WRBACK) | /* PAT0: WB */
149 ((uint64_t)PAT_TYPE_WRTHROUGH << 8) | /* PAT1: WT */
150 ((uint64_t)PAT_TYPE_UC_MINUS << 16) | /* PAT2: UC- */
151 ((uint64_t)PAT_TYPE_UNCACHABLE << 24) | /* PAT3: UC */
152 ((uint64_t)PAT_TYPE_WRBACK << 32) | /* PAT4: WB */
153 ((uint64_t)PAT_TYPE_WRTHROUGH << 40) | /* PAT5: WT */
154 ((uint64_t)PAT_TYPE_UC_MINUS << 48) | /* PAT6: UC- */
155 ((uint64_t)PAT_TYPE_UNCACHABLE << 56); /* PAT7: UC */
156
157 if ( is_hardware_domain(v->domain) )
158 {
159 /* Copy values from the host. */
160 struct domain *d = v->domain;
161 unsigned int i;
162
163 if ( mtrr_state.have_fixed )
164 for ( i = 0; i < NUM_FIXED_MSR; i++ )
165 mtrr_fix_range_msr_set(d, m, i,
166 ((uint64_t *)mtrr_state.fixed_ranges)[i]);
167
168 for ( i = 0; i < num_var_ranges; i++ )
169 {
170 mtrr_var_range_msr_set(d, m, MSR_IA32_MTRR_PHYSBASE(i),
171 mtrr_state.var_ranges[i].base);
172 mtrr_var_range_msr_set(d, m, MSR_IA32_MTRR_PHYSMASK(i),
173 mtrr_state.var_ranges[i].mask);
174 }
175
176 mtrr_def_type_msr_set(d, m,
177 mtrr_state.def_type |
178 MASK_INSR(mtrr_state.fixed_enabled,
179 MTRRdefType_FE) |
180 MASK_INSR(mtrr_state.enabled, MTRRdefType_E));
181 }
182
183 return 0;
184 }
185
hvm_vcpu_cacheattr_destroy(struct vcpu * v)186 void hvm_vcpu_cacheattr_destroy(struct vcpu *v)
187 {
188 xfree(v->arch.hvm.mtrr.var_ranges);
189 }
190
191 /*
192 * Get MTRR memory type for physical address pa.
193 *
194 * May return a negative value when order > 0, indicating to the caller
195 * that the respective mapping needs splitting.
196 */
get_mtrr_type(const struct mtrr_state * m,paddr_t pa,unsigned int order)197 static int get_mtrr_type(const struct mtrr_state *m,
198 paddr_t pa, unsigned int order)
199 {
200 uint8_t overlap_mtrr = 0;
201 uint8_t overlap_mtrr_pos = 0;
202 uint64_t mask = -(uint64_t)PAGE_SIZE << order;
203 unsigned int seg, num_var_ranges = MASK_EXTR(m->mtrr_cap, MTRRcap_VCNT);
204
205 if ( unlikely(!m->enabled) )
206 return MTRR_TYPE_UNCACHABLE;
207
208 pa &= mask;
209 if ( (pa < 0x100000) && m->fixed_enabled )
210 {
211 /* Fixed range MTRR takes effect. */
212 uint32_t addr = (uint32_t)pa, index;
213
214 if ( addr < 0x80000 )
215 {
216 /* 0x00000 ... 0x7FFFF in 64k steps */
217 if ( order > 4 )
218 return -1;
219 seg = (addr >> 16);
220 return m->fixed_ranges[seg];
221 }
222 else if ( addr < 0xc0000 )
223 {
224 /* 0x80000 ... 0xBFFFF in 16k steps */
225 if ( order > 2 )
226 return -1;
227 seg = (addr - 0x80000) >> 14;
228 index = (seg >> 3) + 1;
229 seg &= 7; /* select 0-7 segments */
230 return m->fixed_ranges[index*8 + seg];
231 }
232 else
233 {
234 /* 0xC0000 ... 0xFFFFF in 4k steps */
235 if ( order )
236 return -1;
237 seg = (addr - 0xc0000) >> 12;
238 index = (seg >> 3) + 3;
239 seg &= 7; /* select 0-7 segments */
240 return m->fixed_ranges[index*8 + seg];
241 }
242 }
243
244 /* Match with variable MTRRs. */
245 for ( seg = 0; seg < num_var_ranges; seg++ )
246 {
247 uint64_t phys_base = m->var_ranges[seg].base;
248 uint64_t phys_mask = m->var_ranges[seg].mask;
249
250 if ( phys_mask & MTRR_PHYSMASK_VALID )
251 {
252 phys_mask &= mask;
253 if ( (pa & phys_mask) == (phys_base & phys_mask) )
254 {
255 if ( unlikely(m->overlapped) || order )
256 {
257 overlap_mtrr |= 1 << (phys_base & MTRR_PHYSBASE_TYPE_MASK);
258 overlap_mtrr_pos = phys_base & MTRR_PHYSBASE_TYPE_MASK;
259 }
260 else
261 {
262 /* If no overlap, return the found one */
263 return (phys_base & MTRR_PHYSBASE_TYPE_MASK);
264 }
265 }
266 }
267 }
268
269 /* Not found? */
270 if ( unlikely(overlap_mtrr == 0) )
271 return m->def_type;
272
273 /* One match, or multiple identical ones? */
274 if ( likely(overlap_mtrr == (1 << overlap_mtrr_pos)) )
275 return overlap_mtrr_pos;
276
277 if ( order )
278 return -1;
279
280 /* Two or more matches, one being UC? */
281 if ( overlap_mtrr & (1 << MTRR_TYPE_UNCACHABLE) )
282 return MTRR_TYPE_UNCACHABLE;
283
284 /* Two or more matches, all of them WT and WB? */
285 if ( overlap_mtrr ==
286 ((1 << MTRR_TYPE_WRTHROUGH) | (1 << MTRR_TYPE_WRBACK)) )
287 return MTRR_TYPE_WRTHROUGH;
288
289 /* Behaviour is undefined, but return the last overlapped type. */
290 return overlap_mtrr_pos;
291 }
292
293 /*
294 * return the memory type from PAT.
295 * NOTE: valid only when paging is enabled.
296 * Only 4K page PTE is supported now.
297 */
page_pat_type(uint64_t pat_cr,uint32_t pte_flags)298 static uint8_t page_pat_type(uint64_t pat_cr, uint32_t pte_flags)
299 {
300 int32_t pat_entry;
301
302 /* PCD/PWT -> bit 1/0 of PAT entry */
303 pat_entry = ( pte_flags >> 3 ) & 0x3;
304 /* PAT bits as bit 2 of PAT entry */
305 if ( pte_flags & _PAGE_PAT )
306 pat_entry |= 4;
307
308 return (uint8_t)pat_cr_2_paf(pat_cr, pat_entry);
309 }
310
311 /*
312 * Effective memory type for leaf page.
313 */
effective_mm_type(struct mtrr_state * m,uint64_t pat,paddr_t gpa,uint32_t pte_flags,uint8_t gmtrr_mtype)314 static uint8_t effective_mm_type(struct mtrr_state *m,
315 uint64_t pat,
316 paddr_t gpa,
317 uint32_t pte_flags,
318 uint8_t gmtrr_mtype)
319 {
320 uint8_t mtrr_mtype, pat_value;
321
322 /* if get_pat_flags() gives a dedicated MTRR type,
323 * just use it
324 */
325 if ( gmtrr_mtype == NO_HARDCODE_MEM_TYPE )
326 mtrr_mtype = get_mtrr_type(m, gpa, 0);
327 else
328 mtrr_mtype = gmtrr_mtype;
329
330 pat_value = page_pat_type(pat, pte_flags);
331
332 return mm_type_tbl[mtrr_mtype][pat_value];
333 }
334
get_pat_flags(struct vcpu * v,uint32_t gl1e_flags,paddr_t gpaddr,paddr_t spaddr,uint8_t gmtrr_mtype)335 uint32_t get_pat_flags(struct vcpu *v,
336 uint32_t gl1e_flags,
337 paddr_t gpaddr,
338 paddr_t spaddr,
339 uint8_t gmtrr_mtype)
340 {
341 uint8_t guest_eff_mm_type;
342 uint8_t shadow_mtrr_type;
343 uint8_t pat_entry_value;
344 uint64_t pat = v->arch.hvm.pat_cr;
345 struct mtrr_state *g = &v->arch.hvm.mtrr;
346
347 /* 1. Get the effective memory type of guest physical address,
348 * with the pair of guest MTRR and PAT
349 */
350 guest_eff_mm_type = effective_mm_type(g, pat, gpaddr,
351 gl1e_flags, gmtrr_mtype);
352 /* 2. Get the memory type of host physical address, with MTRR */
353 shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr, 0);
354
355 /* 3. Find the memory type in PAT, with host MTRR memory type
356 * and guest effective memory type.
357 */
358 pat_entry_value = mtrr_epat_tbl[shadow_mtrr_type][guest_eff_mm_type];
359 /* If conflit occurs(e.g host MTRR is UC, guest memory type is
360 * WB),set UC as effective memory. Here, returning PAT_TYPE_UNCACHABLE will
361 * always set effective memory as UC.
362 */
363 if ( pat_entry_value == INVALID_MEM_TYPE )
364 {
365 struct domain *d = v->domain;
366 p2m_type_t p2mt;
367 get_gfn_query_unlocked(d, paddr_to_pfn(gpaddr), &p2mt);
368 if (p2m_is_ram(p2mt))
369 gdprintk(XENLOG_WARNING,
370 "Conflict occurs for a given guest l1e flags:%x "
371 "at %"PRIx64" (the effective mm type:%d), "
372 "because the host mtrr type is:%d\n",
373 gl1e_flags, (uint64_t)gpaddr, guest_eff_mm_type,
374 shadow_mtrr_type);
375 pat_entry_value = PAT_TYPE_UNCACHABLE;
376 }
377 /* 4. Get the pte flags */
378 return pat_type_2_pte_flags(pat_entry_value);
379 }
380
valid_mtrr_type(uint8_t type)381 static inline bool_t valid_mtrr_type(uint8_t type)
382 {
383 switch ( type )
384 {
385 case MTRR_TYPE_UNCACHABLE:
386 case MTRR_TYPE_WRBACK:
387 case MTRR_TYPE_WRCOMB:
388 case MTRR_TYPE_WRPROT:
389 case MTRR_TYPE_WRTHROUGH:
390 return 1;
391 }
392 return 0;
393 }
394
mtrr_def_type_msr_set(struct domain * d,struct mtrr_state * m,uint64_t msr_content)395 bool_t mtrr_def_type_msr_set(struct domain *d, struct mtrr_state *m,
396 uint64_t msr_content)
397 {
398 uint8_t def_type = msr_content & 0xff;
399 bool fixed_enabled = MASK_EXTR(msr_content, MTRRdefType_FE);
400 bool enabled = MASK_EXTR(msr_content, MTRRdefType_E);
401
402 if ( unlikely(!valid_mtrr_type(def_type)) )
403 {
404 HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid MTRR def type:%x\n", def_type);
405 return 0;
406 }
407
408 if ( unlikely(msr_content && (msr_content & ~0xcffUL)) )
409 {
410 HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
411 msr_content);
412 return 0;
413 }
414
415 if ( m->enabled != enabled || m->fixed_enabled != fixed_enabled ||
416 m->def_type != def_type )
417 {
418 m->enabled = enabled;
419 m->def_type = def_type;
420 m->fixed_enabled = fixed_enabled;
421 memory_type_changed(d);
422 }
423
424 return 1;
425 }
426
mtrr_fix_range_msr_set(struct domain * d,struct mtrr_state * m,uint32_t row,uint64_t msr_content)427 bool_t mtrr_fix_range_msr_set(struct domain *d, struct mtrr_state *m,
428 uint32_t row, uint64_t msr_content)
429 {
430 uint64_t *fixed_range_base = (uint64_t *)m->fixed_ranges;
431
432 if ( fixed_range_base[row] != msr_content )
433 {
434 uint8_t *range = (uint8_t*)&msr_content;
435 unsigned int i;
436
437 for ( i = 0; i < 8; i++ )
438 if ( unlikely(!valid_mtrr_type(range[i])) )
439 return 0;
440
441 fixed_range_base[row] = msr_content;
442
443 if ( m->enabled && m->fixed_enabled )
444 memory_type_changed(d);
445 }
446
447 return 1;
448 }
449
mtrr_var_range_msr_set(struct domain * d,struct mtrr_state * m,uint32_t msr,uint64_t msr_content)450 bool_t mtrr_var_range_msr_set(
451 struct domain *d, struct mtrr_state *m, uint32_t msr, uint64_t msr_content)
452 {
453 uint32_t index, phys_addr;
454 uint64_t msr_mask;
455 uint64_t *var_range_base = (uint64_t*)m->var_ranges;
456
457 index = msr - MSR_IA32_MTRR_PHYSBASE(0);
458 if ( (index / 2) >= MASK_EXTR(m->mtrr_cap, MTRRcap_VCNT) )
459 {
460 ASSERT_UNREACHABLE();
461 return 0;
462 }
463
464 if ( var_range_base[index] == msr_content )
465 return 1;
466
467 if ( unlikely(!valid_mtrr_type((uint8_t)msr_content)) )
468 return 0;
469
470 if ( d == current->domain )
471 phys_addr = d->arch.cpuid->extd.maxphysaddr;
472 else
473 phys_addr = paddr_bits;
474 msr_mask = ~((((uint64_t)1) << phys_addr) - 1);
475 msr_mask |= (index & 1) ? 0x7ffUL : 0xf00UL;
476 if ( unlikely(msr_content & msr_mask) )
477 {
478 HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
479 msr_content);
480 return 0;
481 }
482
483 var_range_base[index] = msr_content;
484
485 m->overlapped = is_var_mtrr_overlapped(m);
486
487 if ( m->enabled )
488 memory_type_changed(d);
489
490 return 1;
491 }
492
mtrr_pat_not_equal(const struct vcpu * vd,const struct vcpu * vs)493 bool mtrr_pat_not_equal(const struct vcpu *vd, const struct vcpu *vs)
494 {
495 const struct mtrr_state *md = &vd->arch.hvm.mtrr;
496 const struct mtrr_state *ms = &vs->arch.hvm.mtrr;
497
498 if ( md->enabled != ms->enabled )
499 return true;
500
501 if ( md->enabled )
502 {
503 unsigned int num_var_ranges = MASK_EXTR(md->mtrr_cap, MTRRcap_VCNT);
504
505 /* Test default type MSR. */
506 if ( md->def_type != ms->def_type )
507 return true;
508
509 /* Test fixed ranges. */
510 if ( md->fixed_enabled != ms->fixed_enabled )
511 return true;
512
513 if ( md->fixed_enabled &&
514 memcmp(md->fixed_ranges, ms->fixed_ranges,
515 sizeof(md->fixed_ranges)) )
516 return true;
517
518 /* Test variable ranges. */
519 if ( num_var_ranges != MASK_EXTR(ms->mtrr_cap, MTRRcap_VCNT) ||
520 memcmp(md->var_ranges, ms->var_ranges,
521 num_var_ranges * sizeof(*md->var_ranges)) )
522 return true;
523 }
524
525 /* Test PAT. */
526 return vd->arch.hvm.pat_cr != vs->arch.hvm.pat_cr;
527 }
528
529 struct hvm_mem_pinned_cacheattr_range {
530 struct list_head list;
531 uint64_t start, end;
532 uint32_t type;
533 struct rcu_head rcu;
534 };
535
536 static DEFINE_RCU_READ_LOCK(pinned_cacheattr_rcu_lock);
537
hvm_init_cacheattr_region_list(struct domain * d)538 void hvm_init_cacheattr_region_list(struct domain *d)
539 {
540 INIT_LIST_HEAD(&d->arch.hvm.pinned_cacheattr_ranges);
541 }
542
hvm_destroy_cacheattr_region_list(struct domain * d)543 void hvm_destroy_cacheattr_region_list(struct domain *d)
544 {
545 struct list_head *head = &d->arch.hvm.pinned_cacheattr_ranges;
546 struct hvm_mem_pinned_cacheattr_range *range;
547
548 while ( !list_empty(head) )
549 {
550 range = list_entry(head->next,
551 struct hvm_mem_pinned_cacheattr_range,
552 list);
553 list_del(&range->list);
554 xfree(range);
555 }
556 }
557
hvm_get_mem_pinned_cacheattr(struct domain * d,gfn_t gfn,unsigned int order)558 int hvm_get_mem_pinned_cacheattr(struct domain *d, gfn_t gfn,
559 unsigned int order)
560 {
561 struct hvm_mem_pinned_cacheattr_range *range;
562 uint64_t mask = ~(uint64_t)0 << order;
563 int rc = -ENXIO;
564
565 ASSERT(is_hvm_domain(d));
566
567 rcu_read_lock(&pinned_cacheattr_rcu_lock);
568 list_for_each_entry_rcu ( range,
569 &d->arch.hvm.pinned_cacheattr_ranges,
570 list )
571 {
572 if ( ((gfn_x(gfn) & mask) >= range->start) &&
573 ((gfn_x(gfn) | ~mask) <= range->end) )
574 {
575 rc = range->type;
576 break;
577 }
578 if ( ((gfn_x(gfn) & mask) <= range->end) &&
579 ((gfn_x(gfn) | ~mask) >= range->start) )
580 {
581 rc = -EADDRNOTAVAIL;
582 break;
583 }
584 }
585 rcu_read_unlock(&pinned_cacheattr_rcu_lock);
586
587 return rc;
588 }
589
free_pinned_cacheattr_entry(struct rcu_head * rcu)590 static void free_pinned_cacheattr_entry(struct rcu_head *rcu)
591 {
592 xfree(container_of(rcu, struct hvm_mem_pinned_cacheattr_range, rcu));
593 }
594
hvm_set_mem_pinned_cacheattr(struct domain * d,uint64_t gfn_start,uint64_t gfn_end,uint32_t type)595 int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
596 uint64_t gfn_end, uint32_t type)
597 {
598 struct hvm_mem_pinned_cacheattr_range *range;
599 int rc = 1;
600
601 if ( !is_hvm_domain(d) )
602 return -EOPNOTSUPP;
603
604 if ( gfn_end < gfn_start || (gfn_start | gfn_end) >> paddr_bits )
605 return -EINVAL;
606
607 switch ( type )
608 {
609 case XEN_DOMCTL_DELETE_MEM_CACHEATTR:
610 /* Remove the requested range. */
611 rcu_read_lock(&pinned_cacheattr_rcu_lock);
612 list_for_each_entry_rcu ( range,
613 &d->arch.hvm.pinned_cacheattr_ranges,
614 list )
615 if ( range->start == gfn_start && range->end == gfn_end )
616 {
617 rcu_read_unlock(&pinned_cacheattr_rcu_lock);
618 list_del_rcu(&range->list);
619 type = range->type;
620 call_rcu(&range->rcu, free_pinned_cacheattr_entry);
621 p2m_memory_type_changed(d);
622 switch ( type )
623 {
624 case PAT_TYPE_UC_MINUS:
625 /*
626 * For EPT we can also avoid the flush in this case;
627 * see epte_get_entry_emt().
628 */
629 if ( hap_enabled(d) && cpu_has_vmx )
630 case PAT_TYPE_UNCACHABLE:
631 break;
632 /* fall through */
633 default:
634 flush_all(FLUSH_CACHE);
635 break;
636 }
637 return 0;
638 }
639 rcu_read_unlock(&pinned_cacheattr_rcu_lock);
640 return -ENOENT;
641
642 case PAT_TYPE_UC_MINUS:
643 case PAT_TYPE_UNCACHABLE:
644 case PAT_TYPE_WRBACK:
645 case PAT_TYPE_WRCOMB:
646 case PAT_TYPE_WRPROT:
647 case PAT_TYPE_WRTHROUGH:
648 break;
649
650 default:
651 return -EINVAL;
652 }
653
654 rcu_read_lock(&pinned_cacheattr_rcu_lock);
655 list_for_each_entry_rcu ( range,
656 &d->arch.hvm.pinned_cacheattr_ranges,
657 list )
658 {
659 if ( range->start == gfn_start && range->end == gfn_end )
660 {
661 range->type = type;
662 rc = 0;
663 break;
664 }
665 if ( range->start <= gfn_end && gfn_start <= range->end )
666 {
667 rc = -EBUSY;
668 break;
669 }
670 }
671 rcu_read_unlock(&pinned_cacheattr_rcu_lock);
672 if ( rc <= 0 )
673 return rc;
674
675 range = xzalloc(struct hvm_mem_pinned_cacheattr_range);
676 if ( range == NULL )
677 return -ENOMEM;
678
679 range->start = gfn_start;
680 range->end = gfn_end;
681 range->type = type;
682
683 list_add_rcu(&range->list, &d->arch.hvm.pinned_cacheattr_ranges);
684 p2m_memory_type_changed(d);
685 if ( type != PAT_TYPE_WRBACK )
686 flush_all(FLUSH_CACHE);
687
688 return 0;
689 }
690
hvm_save_mtrr_msr(struct vcpu * v,hvm_domain_context_t * h)691 static int hvm_save_mtrr_msr(struct vcpu *v, hvm_domain_context_t *h)
692 {
693 const struct mtrr_state *mtrr_state = &v->arch.hvm.mtrr;
694 struct hvm_hw_mtrr hw_mtrr = {
695 .msr_mtrr_def_type = mtrr_state->def_type |
696 MASK_INSR(mtrr_state->fixed_enabled,
697 MTRRdefType_FE) |
698 MASK_INSR(mtrr_state->enabled, MTRRdefType_E),
699 .msr_mtrr_cap = mtrr_state->mtrr_cap,
700 };
701 unsigned int i;
702
703 if ( MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT) >
704 (ARRAY_SIZE(hw_mtrr.msr_mtrr_var) / 2) )
705 {
706 dprintk(XENLOG_G_ERR,
707 "HVM save: %pv: too many (%lu) variable range MTRRs\n",
708 v, MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT));
709 return -EINVAL;
710 }
711
712 hvm_get_guest_pat(v, &hw_mtrr.msr_pat_cr);
713
714 for ( i = 0; i < MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT); i++ )
715 {
716 hw_mtrr.msr_mtrr_var[i * 2] = mtrr_state->var_ranges->base;
717 hw_mtrr.msr_mtrr_var[i * 2 + 1] = mtrr_state->var_ranges->mask;
718 }
719
720 BUILD_BUG_ON(sizeof(hw_mtrr.msr_mtrr_fixed) !=
721 sizeof(mtrr_state->fixed_ranges));
722
723 memcpy(hw_mtrr.msr_mtrr_fixed, mtrr_state->fixed_ranges,
724 sizeof(hw_mtrr.msr_mtrr_fixed));
725
726 return hvm_save_entry(MTRR, v->vcpu_id, h, &hw_mtrr);
727 }
728
hvm_load_mtrr_msr(struct domain * d,hvm_domain_context_t * h)729 static int hvm_load_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
730 {
731 unsigned int vcpuid, i;
732 struct vcpu *v;
733 struct mtrr_state *mtrr_state;
734 struct hvm_hw_mtrr hw_mtrr;
735
736 vcpuid = hvm_load_instance(h);
737 if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
738 {
739 dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
740 d->domain_id, vcpuid);
741 return -EINVAL;
742 }
743
744 if ( hvm_load_entry(MTRR, h, &hw_mtrr) != 0 )
745 return -EINVAL;
746
747 if ( MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT) > MTRR_VCNT )
748 {
749 dprintk(XENLOG_G_ERR,
750 "HVM restore: %pv: too many (%lu) variable range MTRRs\n",
751 v, MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT));
752 return -EINVAL;
753 }
754
755 mtrr_state = &v->arch.hvm.mtrr;
756
757 hvm_set_guest_pat(v, hw_mtrr.msr_pat_cr);
758
759 mtrr_state->mtrr_cap = hw_mtrr.msr_mtrr_cap;
760
761 for ( i = 0; i < NUM_FIXED_MSR; i++ )
762 mtrr_fix_range_msr_set(d, mtrr_state, i, hw_mtrr.msr_mtrr_fixed[i]);
763
764 for ( i = 0; i < MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT); i++ )
765 {
766 mtrr_var_range_msr_set(d, mtrr_state,
767 MSR_IA32_MTRR_PHYSBASE(i),
768 hw_mtrr.msr_mtrr_var[i * 2]);
769 mtrr_var_range_msr_set(d, mtrr_state,
770 MSR_IA32_MTRR_PHYSMASK(i),
771 hw_mtrr.msr_mtrr_var[i * 2 + 1]);
772 }
773
774 mtrr_def_type_msr_set(d, mtrr_state, hw_mtrr.msr_mtrr_def_type);
775
776 return 0;
777 }
778
779 HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr, 1,
780 HVMSR_PER_VCPU);
781
memory_type_changed(struct domain * d)782 void memory_type_changed(struct domain *d)
783 {
784 if ( (is_iommu_enabled(d) || cache_flush_permitted(d)) &&
785 d->vcpu && d->vcpu[0] )
786 {
787 p2m_memory_type_changed(d);
788 flush_all(FLUSH_CACHE);
789 }
790 }
791
epte_get_entry_emt(struct domain * d,unsigned long gfn,mfn_t mfn,unsigned int order,uint8_t * ipat,bool_t direct_mmio)792 int epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
793 unsigned int order, uint8_t *ipat, bool_t direct_mmio)
794 {
795 int gmtrr_mtype, hmtrr_mtype;
796 struct vcpu *v = current;
797 unsigned long i;
798
799 *ipat = 0;
800
801 if ( v->domain != d )
802 v = d->vcpu ? d->vcpu[0] : NULL;
803
804 /* Mask, not add, for order so it works with INVALID_MFN on unmapping */
805 if ( rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
806 mfn_x(mfn) | ((1UL << order) - 1)) )
807 {
808 if ( !order || rangeset_contains_range(mmio_ro_ranges, mfn_x(mfn),
809 mfn_x(mfn) | ((1UL << order) - 1)) )
810 {
811 *ipat = 1;
812 return MTRR_TYPE_UNCACHABLE;
813 }
814 /* Force invalid memory type so resolve_misconfig() will split it */
815 return -1;
816 }
817
818 if ( direct_mmio )
819 {
820 if ( (mfn_x(mfn) ^ mfn_x(d->arch.hvm.vmx.apic_access_mfn)) >> order )
821 return MTRR_TYPE_UNCACHABLE;
822 if ( order )
823 return -1;
824 *ipat = 1;
825 return MTRR_TYPE_WRBACK;
826 }
827
828 if ( !mfn_valid(mfn) )
829 {
830 *ipat = 1;
831 return MTRR_TYPE_UNCACHABLE;
832 }
833
834 if ( !is_iommu_enabled(d) && !cache_flush_permitted(d) )
835 {
836 *ipat = 1;
837 return MTRR_TYPE_WRBACK;
838 }
839
840 for ( i = 0; i < (1ul << order); i++ )
841 {
842 if ( is_special_page(mfn_to_page(mfn_add(mfn, i))) )
843 {
844 if ( order )
845 return -1;
846 *ipat = 1;
847 return MTRR_TYPE_WRBACK;
848 }
849 }
850
851 gmtrr_mtype = hvm_get_mem_pinned_cacheattr(d, _gfn(gfn), order);
852 if ( gmtrr_mtype >= 0 )
853 {
854 *ipat = 1;
855 return gmtrr_mtype != PAT_TYPE_UC_MINUS ? gmtrr_mtype
856 : MTRR_TYPE_UNCACHABLE;
857 }
858 if ( gmtrr_mtype == -EADDRNOTAVAIL )
859 return -1;
860
861 gmtrr_mtype = is_hvm_domain(d) && v ?
862 get_mtrr_type(&v->arch.hvm.mtrr,
863 gfn << PAGE_SHIFT, order) :
864 MTRR_TYPE_WRBACK;
865 hmtrr_mtype = get_mtrr_type(&mtrr_state, mfn_x(mfn) << PAGE_SHIFT, order);
866 if ( gmtrr_mtype < 0 || hmtrr_mtype < 0 )
867 return -1;
868
869 /* If both types match we're fine. */
870 if ( likely(gmtrr_mtype == hmtrr_mtype) )
871 return hmtrr_mtype;
872
873 /* If either type is UC, we have to go with that one. */
874 if ( gmtrr_mtype == MTRR_TYPE_UNCACHABLE ||
875 hmtrr_mtype == MTRR_TYPE_UNCACHABLE )
876 return MTRR_TYPE_UNCACHABLE;
877
878 /* If either type is WB, we have to go with the other one. */
879 if ( gmtrr_mtype == MTRR_TYPE_WRBACK )
880 return hmtrr_mtype;
881 if ( hmtrr_mtype == MTRR_TYPE_WRBACK )
882 return gmtrr_mtype;
883
884 /*
885 * At this point we have disagreeing WC, WT, or WP types. The only
886 * combination that can be cleanly resolved is WT:WP. The ones involving
887 * WC need to be converted to UC, both due to the memory ordering
888 * differences and because WC disallows reads to be cached (WT and WP
889 * permit this), while WT and WP require writes to go straight to memory
890 * (WC can buffer them).
891 */
892 if ( (gmtrr_mtype == MTRR_TYPE_WRTHROUGH &&
893 hmtrr_mtype == MTRR_TYPE_WRPROT) ||
894 (gmtrr_mtype == MTRR_TYPE_WRPROT &&
895 hmtrr_mtype == MTRR_TYPE_WRTHROUGH) )
896 return MTRR_TYPE_WRPROT;
897
898 return MTRR_TYPE_UNCACHABLE;
899 }
900
901 /*
902 * Local variables:
903 * mode: C
904 * c-file-style: "BSD"
905 * c-basic-offset: 4
906 * tab-width: 4
907 * indent-tabs-mode: nil
908 * End:
909 */
910