1 /*
2  * mtrr.c: MTRR/PAT virtualization
3  *
4  * Copyright (c) 2007, Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include <xen/domain_page.h>
20 #include <asm/e820.h>
21 #include <asm/iocap.h>
22 #include <asm/paging.h>
23 #include <asm/p2m.h>
24 #include <asm/mtrr.h>
25 #include <asm/hvm/support.h>
26 #include <asm/hvm/cacheattr.h>
27 #include <public/hvm/e820.h>
28 
29 /* Get page attribute fields (PAn) from PAT MSR. */
30 #define pat_cr_2_paf(pat_cr,n)  ((((uint64_t)pat_cr) >> ((n)<<3)) & 0xff)
31 
32 /* PAT entry to PTE flags (PAT, PCD, PWT bits). */
33 static const uint8_t pat_entry_2_pte_flags[8] = {
34     0,           _PAGE_PWT,
35     _PAGE_PCD,   _PAGE_PCD | _PAGE_PWT,
36     _PAGE_PAT,   _PAGE_PAT | _PAGE_PWT,
37     _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT };
38 
39 /* Effective mm type lookup table, according to MTRR and PAT. */
40 static const uint8_t mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = {
41 #define RS MEMORY_NUM_TYPES
42 #define UC MTRR_TYPE_UNCACHABLE
43 #define WB MTRR_TYPE_WRBACK
44 #define WC MTRR_TYPE_WRCOMB
45 #define WP MTRR_TYPE_WRPROT
46 #define WT MTRR_TYPE_WRTHROUGH
47 
48 /*          PAT(UC, WC, RS, RS, WT, WP, WB, UC-) */
49 /* MTRR(UC) */ {UC, WC, RS, RS, UC, UC, UC, UC},
50 /* MTRR(WC) */ {UC, WC, RS, RS, UC, UC, WC, WC},
51 /* MTRR(RS) */ {RS, RS, RS, RS, RS, RS, RS, RS},
52 /* MTRR(RS) */ {RS, RS, RS, RS, RS, RS, RS, RS},
53 /* MTRR(WT) */ {UC, WC, RS, RS, WT, WP, WT, UC},
54 /* MTRR(WP) */ {UC, WC, RS, RS, WT, WP, WP, WC},
55 /* MTRR(WB) */ {UC, WC, RS, RS, WT, WP, WB, UC}
56 
57 #undef UC
58 #undef WC
59 #undef WT
60 #undef WP
61 #undef WB
62 #undef RS
63 };
64 
65 /*
66  * Reverse lookup table, to find a pat type according to MTRR and effective
67  * memory type. This table is dynamically generated.
68  */
69 static uint8_t __read_mostly mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES] =
70     { [0 ... MTRR_NUM_TYPES-1] =
71         { [0 ... MEMORY_NUM_TYPES-1] = INVALID_MEM_TYPE }
72     };
73 
74 /* Lookup table for PAT entry of a given PAT value in host PAT. */
75 static uint8_t __read_mostly pat_entry_tbl[PAT_TYPE_NUMS] =
76     { [0 ... PAT_TYPE_NUMS-1] = INVALID_MEM_TYPE };
77 
hvm_mtrr_pat_init(void)78 static int __init hvm_mtrr_pat_init(void)
79 {
80     unsigned int i, j;
81 
82     for ( i = 0; i < MTRR_NUM_TYPES; i++ )
83     {
84         for ( j = 0; j < PAT_TYPE_NUMS; j++ )
85         {
86             unsigned int tmp = mm_type_tbl[i][j];
87 
88             if ( tmp < MEMORY_NUM_TYPES )
89                 mtrr_epat_tbl[i][tmp] = j;
90         }
91     }
92 
93     for ( i = 0; i < PAT_TYPE_NUMS; i++ )
94     {
95         for ( j = 0; j < PAT_TYPE_NUMS; j++ )
96         {
97             if ( pat_cr_2_paf(XEN_MSR_PAT, j) == i )
98             {
99                 pat_entry_tbl[i] = j;
100                 break;
101             }
102         }
103     }
104 
105     return 0;
106 }
107 __initcall(hvm_mtrr_pat_init);
108 
pat_type_2_pte_flags(uint8_t pat_type)109 uint8_t pat_type_2_pte_flags(uint8_t pat_type)
110 {
111     unsigned int pat_entry = pat_entry_tbl[pat_type];
112 
113     /*
114      * INVALID_MEM_TYPE, means doesn't find the pat_entry in host PAT for a
115      * given pat_type. If host PAT covers all the PAT types, it can't happen.
116      */
117     if ( unlikely(pat_entry == INVALID_MEM_TYPE) )
118         pat_entry = pat_entry_tbl[PAT_TYPE_UNCACHABLE];
119 
120     return pat_entry_2_pte_flags[pat_entry];
121 }
122 
hvm_vcpu_cacheattr_init(struct vcpu * v)123 int hvm_vcpu_cacheattr_init(struct vcpu *v)
124 {
125     struct mtrr_state *m = &v->arch.hvm.mtrr;
126     unsigned int num_var_ranges =
127         is_hardware_domain(v->domain) ? MASK_EXTR(mtrr_state.mtrr_cap,
128                                                   MTRRcap_VCNT)
129                                       : MTRR_VCNT;
130 
131     if ( num_var_ranges > MTRR_VCNT_MAX )
132     {
133         ASSERT(is_hardware_domain(v->domain));
134         printk("WARNING: limited Dom%u variable range MTRRs from %u to %u\n",
135                v->domain->domain_id, num_var_ranges, MTRR_VCNT_MAX);
136         num_var_ranges = MTRR_VCNT_MAX;
137     }
138 
139     memset(m, 0, sizeof(*m));
140 
141     m->var_ranges = xzalloc_array(struct mtrr_var_range, num_var_ranges);
142     if ( m->var_ranges == NULL )
143         return -ENOMEM;
144 
145     m->mtrr_cap = (1u << 10) | (1u << 8) | num_var_ranges;
146 
147     v->arch.hvm.pat_cr =
148         ((uint64_t)PAT_TYPE_WRBACK) |               /* PAT0: WB */
149         ((uint64_t)PAT_TYPE_WRTHROUGH << 8) |       /* PAT1: WT */
150         ((uint64_t)PAT_TYPE_UC_MINUS << 16) |       /* PAT2: UC- */
151         ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |     /* PAT3: UC */
152         ((uint64_t)PAT_TYPE_WRBACK << 32) |         /* PAT4: WB */
153         ((uint64_t)PAT_TYPE_WRTHROUGH << 40) |      /* PAT5: WT */
154         ((uint64_t)PAT_TYPE_UC_MINUS << 48) |       /* PAT6: UC- */
155         ((uint64_t)PAT_TYPE_UNCACHABLE << 56);      /* PAT7: UC */
156 
157     if ( is_hardware_domain(v->domain) )
158     {
159         /* Copy values from the host. */
160         struct domain *d = v->domain;
161         unsigned int i;
162 
163         if ( mtrr_state.have_fixed )
164             for ( i = 0; i < NUM_FIXED_MSR; i++ )
165                 mtrr_fix_range_msr_set(d, m, i,
166                                       ((uint64_t *)mtrr_state.fixed_ranges)[i]);
167 
168         for ( i = 0; i < num_var_ranges; i++ )
169         {
170             mtrr_var_range_msr_set(d, m, MSR_IA32_MTRR_PHYSBASE(i),
171                                    mtrr_state.var_ranges[i].base);
172             mtrr_var_range_msr_set(d, m, MSR_IA32_MTRR_PHYSMASK(i),
173                                    mtrr_state.var_ranges[i].mask);
174         }
175 
176         mtrr_def_type_msr_set(d, m,
177                               mtrr_state.def_type |
178                               MASK_INSR(mtrr_state.fixed_enabled,
179                                         MTRRdefType_FE) |
180                               MASK_INSR(mtrr_state.enabled, MTRRdefType_E));
181     }
182 
183     return 0;
184 }
185 
hvm_vcpu_cacheattr_destroy(struct vcpu * v)186 void hvm_vcpu_cacheattr_destroy(struct vcpu *v)
187 {
188     xfree(v->arch.hvm.mtrr.var_ranges);
189 }
190 
191 /*
192  * Get MTRR memory type for physical address pa.
193  *
194  * May return a negative value when order > 0, indicating to the caller
195  * that the respective mapping needs splitting.
196  */
get_mtrr_type(const struct mtrr_state * m,paddr_t pa,unsigned int order)197 static int get_mtrr_type(const struct mtrr_state *m,
198                          paddr_t pa, unsigned int order)
199 {
200    uint8_t     overlap_mtrr = 0;
201    uint8_t     overlap_mtrr_pos = 0;
202    uint64_t    mask = -(uint64_t)PAGE_SIZE << order;
203    unsigned int seg, num_var_ranges = MASK_EXTR(m->mtrr_cap, MTRRcap_VCNT);
204 
205    if ( unlikely(!m->enabled) )
206        return MTRR_TYPE_UNCACHABLE;
207 
208    pa &= mask;
209    if ( (pa < 0x100000) && m->fixed_enabled )
210    {
211        /* Fixed range MTRR takes effect. */
212        uint32_t addr = (uint32_t)pa, index;
213 
214        if ( addr < 0x80000 )
215        {
216            /* 0x00000 ... 0x7FFFF in 64k steps */
217            if ( order > 4 )
218                return -1;
219            seg = (addr >> 16);
220            return m->fixed_ranges[seg];
221        }
222        else if ( addr < 0xc0000 )
223        {
224            /* 0x80000 ... 0xBFFFF in 16k steps */
225            if ( order > 2 )
226                return -1;
227            seg = (addr - 0x80000) >> 14;
228            index = (seg >> 3) + 1;
229            seg &= 7;            /* select 0-7 segments */
230            return m->fixed_ranges[index*8 + seg];
231        }
232        else
233        {
234            /* 0xC0000 ... 0xFFFFF in 4k steps */
235            if ( order )
236                return -1;
237            seg = (addr - 0xc0000) >> 12;
238            index = (seg >> 3) + 3;
239            seg &= 7;            /* select 0-7 segments */
240            return m->fixed_ranges[index*8 + seg];
241        }
242    }
243 
244    /* Match with variable MTRRs. */
245    for ( seg = 0; seg < num_var_ranges; seg++ )
246    {
247        uint64_t phys_base = m->var_ranges[seg].base;
248        uint64_t phys_mask = m->var_ranges[seg].mask;
249 
250        if ( phys_mask & MTRR_PHYSMASK_VALID )
251        {
252            phys_mask &= mask;
253            if ( (pa & phys_mask) == (phys_base & phys_mask) )
254            {
255                if ( unlikely(m->overlapped) || order )
256                {
257                     overlap_mtrr |= 1 << (phys_base & MTRR_PHYSBASE_TYPE_MASK);
258                     overlap_mtrr_pos = phys_base & MTRR_PHYSBASE_TYPE_MASK;
259                }
260                else
261                {
262                    /* If no overlap, return the found one */
263                    return (phys_base & MTRR_PHYSBASE_TYPE_MASK);
264                }
265            }
266        }
267    }
268 
269    /* Not found? */
270    if ( unlikely(overlap_mtrr == 0) )
271        return m->def_type;
272 
273    /* One match, or multiple identical ones? */
274    if ( likely(overlap_mtrr == (1 << overlap_mtrr_pos)) )
275        return overlap_mtrr_pos;
276 
277    if ( order )
278        return -1;
279 
280    /* Two or more matches, one being UC? */
281    if ( overlap_mtrr & (1 << MTRR_TYPE_UNCACHABLE) )
282        return MTRR_TYPE_UNCACHABLE;
283 
284    /* Two or more matches, all of them WT and WB? */
285    if ( overlap_mtrr ==
286         ((1 << MTRR_TYPE_WRTHROUGH) | (1 << MTRR_TYPE_WRBACK)) )
287        return MTRR_TYPE_WRTHROUGH;
288 
289    /* Behaviour is undefined, but return the last overlapped type. */
290    return overlap_mtrr_pos;
291 }
292 
293 /*
294  * return the memory type from PAT.
295  * NOTE: valid only when paging is enabled.
296  *       Only 4K page PTE is supported now.
297  */
page_pat_type(uint64_t pat_cr,uint32_t pte_flags)298 static uint8_t page_pat_type(uint64_t pat_cr, uint32_t pte_flags)
299 {
300     int32_t pat_entry;
301 
302     /* PCD/PWT -> bit 1/0 of PAT entry */
303     pat_entry = ( pte_flags >> 3 ) & 0x3;
304     /* PAT bits as bit 2 of PAT entry */
305     if ( pte_flags & _PAGE_PAT )
306         pat_entry |= 4;
307 
308     return (uint8_t)pat_cr_2_paf(pat_cr, pat_entry);
309 }
310 
311 /*
312  * Effective memory type for leaf page.
313  */
effective_mm_type(struct mtrr_state * m,uint64_t pat,paddr_t gpa,uint32_t pte_flags,uint8_t gmtrr_mtype)314 static uint8_t effective_mm_type(struct mtrr_state *m,
315                                  uint64_t pat,
316                                  paddr_t gpa,
317                                  uint32_t pte_flags,
318                                  uint8_t gmtrr_mtype)
319 {
320     uint8_t mtrr_mtype, pat_value;
321 
322     /* if get_pat_flags() gives a dedicated MTRR type,
323      * just use it
324      */
325     if ( gmtrr_mtype == NO_HARDCODE_MEM_TYPE )
326         mtrr_mtype = get_mtrr_type(m, gpa, 0);
327     else
328         mtrr_mtype = gmtrr_mtype;
329 
330     pat_value = page_pat_type(pat, pte_flags);
331 
332     return mm_type_tbl[mtrr_mtype][pat_value];
333 }
334 
get_pat_flags(struct vcpu * v,uint32_t gl1e_flags,paddr_t gpaddr,paddr_t spaddr,uint8_t gmtrr_mtype)335 uint32_t get_pat_flags(struct vcpu *v,
336                        uint32_t gl1e_flags,
337                        paddr_t gpaddr,
338                        paddr_t spaddr,
339                        uint8_t gmtrr_mtype)
340 {
341     uint8_t guest_eff_mm_type;
342     uint8_t shadow_mtrr_type;
343     uint8_t pat_entry_value;
344     uint64_t pat = v->arch.hvm.pat_cr;
345     struct mtrr_state *g = &v->arch.hvm.mtrr;
346 
347     /* 1. Get the effective memory type of guest physical address,
348      * with the pair of guest MTRR and PAT
349      */
350     guest_eff_mm_type = effective_mm_type(g, pat, gpaddr,
351                                           gl1e_flags, gmtrr_mtype);
352     /* 2. Get the memory type of host physical address, with MTRR */
353     shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr, 0);
354 
355     /* 3. Find the memory type in PAT, with host MTRR memory type
356      * and guest effective memory type.
357      */
358     pat_entry_value = mtrr_epat_tbl[shadow_mtrr_type][guest_eff_mm_type];
359     /* If conflit occurs(e.g host MTRR is UC, guest memory type is
360      * WB),set UC as effective memory. Here, returning PAT_TYPE_UNCACHABLE will
361      * always set effective memory as UC.
362      */
363     if ( pat_entry_value == INVALID_MEM_TYPE )
364     {
365         struct domain *d = v->domain;
366         p2m_type_t p2mt;
367         get_gfn_query_unlocked(d, paddr_to_pfn(gpaddr), &p2mt);
368         if (p2m_is_ram(p2mt))
369             gdprintk(XENLOG_WARNING,
370                     "Conflict occurs for a given guest l1e flags:%x "
371                     "at %"PRIx64" (the effective mm type:%d), "
372                     "because the host mtrr type is:%d\n",
373                     gl1e_flags, (uint64_t)gpaddr, guest_eff_mm_type,
374                     shadow_mtrr_type);
375         pat_entry_value = PAT_TYPE_UNCACHABLE;
376     }
377     /* 4. Get the pte flags */
378     return pat_type_2_pte_flags(pat_entry_value);
379 }
380 
valid_mtrr_type(uint8_t type)381 static inline bool_t valid_mtrr_type(uint8_t type)
382 {
383     switch ( type )
384     {
385     case MTRR_TYPE_UNCACHABLE:
386     case MTRR_TYPE_WRBACK:
387     case MTRR_TYPE_WRCOMB:
388     case MTRR_TYPE_WRPROT:
389     case MTRR_TYPE_WRTHROUGH:
390         return 1;
391     }
392     return 0;
393 }
394 
mtrr_def_type_msr_set(struct domain * d,struct mtrr_state * m,uint64_t msr_content)395 bool_t mtrr_def_type_msr_set(struct domain *d, struct mtrr_state *m,
396                              uint64_t msr_content)
397 {
398     uint8_t def_type = msr_content & 0xff;
399     bool fixed_enabled = MASK_EXTR(msr_content, MTRRdefType_FE);
400     bool enabled = MASK_EXTR(msr_content, MTRRdefType_E);
401 
402     if ( unlikely(!valid_mtrr_type(def_type)) )
403     {
404          HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid MTRR def type:%x\n", def_type);
405          return 0;
406     }
407 
408     if ( unlikely(msr_content && (msr_content & ~0xcffUL)) )
409     {
410          HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
411                      msr_content);
412          return 0;
413     }
414 
415     if ( m->enabled != enabled || m->fixed_enabled != fixed_enabled ||
416          m->def_type != def_type )
417     {
418         m->enabled = enabled;
419         m->def_type = def_type;
420         m->fixed_enabled = fixed_enabled;
421         memory_type_changed(d);
422     }
423 
424     return 1;
425 }
426 
mtrr_fix_range_msr_set(struct domain * d,struct mtrr_state * m,uint32_t row,uint64_t msr_content)427 bool_t mtrr_fix_range_msr_set(struct domain *d, struct mtrr_state *m,
428                               uint32_t row, uint64_t msr_content)
429 {
430     uint64_t *fixed_range_base = (uint64_t *)m->fixed_ranges;
431 
432     if ( fixed_range_base[row] != msr_content )
433     {
434         uint8_t *range = (uint8_t*)&msr_content;
435         unsigned int i;
436 
437         for ( i = 0; i < 8; i++ )
438             if ( unlikely(!valid_mtrr_type(range[i])) )
439                 return 0;
440 
441         fixed_range_base[row] = msr_content;
442 
443         if ( m->enabled && m->fixed_enabled )
444             memory_type_changed(d);
445     }
446 
447     return 1;
448 }
449 
mtrr_var_range_msr_set(struct domain * d,struct mtrr_state * m,uint32_t msr,uint64_t msr_content)450 bool_t mtrr_var_range_msr_set(
451     struct domain *d, struct mtrr_state *m, uint32_t msr, uint64_t msr_content)
452 {
453     uint32_t index, phys_addr;
454     uint64_t msr_mask;
455     uint64_t *var_range_base = (uint64_t*)m->var_ranges;
456 
457     index = msr - MSR_IA32_MTRR_PHYSBASE(0);
458     if ( (index / 2) >= MASK_EXTR(m->mtrr_cap, MTRRcap_VCNT) )
459     {
460         ASSERT_UNREACHABLE();
461         return 0;
462     }
463 
464     if ( var_range_base[index] == msr_content )
465         return 1;
466 
467     if ( unlikely(!valid_mtrr_type((uint8_t)msr_content)) )
468         return 0;
469 
470     if ( d == current->domain )
471         phys_addr = d->arch.cpuid->extd.maxphysaddr;
472     else
473         phys_addr = paddr_bits;
474     msr_mask = ~((((uint64_t)1) << phys_addr) - 1);
475     msr_mask |= (index & 1) ? 0x7ffUL : 0xf00UL;
476     if ( unlikely(msr_content & msr_mask) )
477     {
478         HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
479                     msr_content);
480         return 0;
481     }
482 
483     var_range_base[index] = msr_content;
484 
485     m->overlapped = is_var_mtrr_overlapped(m);
486 
487     if ( m->enabled )
488         memory_type_changed(d);
489 
490     return 1;
491 }
492 
mtrr_pat_not_equal(const struct vcpu * vd,const struct vcpu * vs)493 bool mtrr_pat_not_equal(const struct vcpu *vd, const struct vcpu *vs)
494 {
495     const struct mtrr_state *md = &vd->arch.hvm.mtrr;
496     const struct mtrr_state *ms = &vs->arch.hvm.mtrr;
497 
498     if ( md->enabled != ms->enabled )
499         return true;
500 
501     if ( md->enabled )
502     {
503         unsigned int num_var_ranges = MASK_EXTR(md->mtrr_cap, MTRRcap_VCNT);
504 
505         /* Test default type MSR. */
506         if ( md->def_type != ms->def_type )
507             return true;
508 
509         /* Test fixed ranges. */
510         if ( md->fixed_enabled != ms->fixed_enabled )
511             return true;
512 
513         if ( md->fixed_enabled &&
514              memcmp(md->fixed_ranges, ms->fixed_ranges,
515                     sizeof(md->fixed_ranges)) )
516             return true;
517 
518         /* Test variable ranges. */
519         if ( num_var_ranges != MASK_EXTR(ms->mtrr_cap, MTRRcap_VCNT) ||
520              memcmp(md->var_ranges, ms->var_ranges,
521                     num_var_ranges * sizeof(*md->var_ranges)) )
522             return true;
523     }
524 
525     /* Test PAT. */
526     return vd->arch.hvm.pat_cr != vs->arch.hvm.pat_cr;
527 }
528 
529 struct hvm_mem_pinned_cacheattr_range {
530     struct list_head list;
531     uint64_t start, end;
532     uint32_t type;
533     struct rcu_head rcu;
534 };
535 
536 static DEFINE_RCU_READ_LOCK(pinned_cacheattr_rcu_lock);
537 
hvm_init_cacheattr_region_list(struct domain * d)538 void hvm_init_cacheattr_region_list(struct domain *d)
539 {
540     INIT_LIST_HEAD(&d->arch.hvm.pinned_cacheattr_ranges);
541 }
542 
hvm_destroy_cacheattr_region_list(struct domain * d)543 void hvm_destroy_cacheattr_region_list(struct domain *d)
544 {
545     struct list_head *head = &d->arch.hvm.pinned_cacheattr_ranges;
546     struct hvm_mem_pinned_cacheattr_range *range;
547 
548     while ( !list_empty(head) )
549     {
550         range = list_entry(head->next,
551                            struct hvm_mem_pinned_cacheattr_range,
552                            list);
553         list_del(&range->list);
554         xfree(range);
555     }
556 }
557 
hvm_get_mem_pinned_cacheattr(struct domain * d,gfn_t gfn,unsigned int order)558 int hvm_get_mem_pinned_cacheattr(struct domain *d, gfn_t gfn,
559                                  unsigned int order)
560 {
561     struct hvm_mem_pinned_cacheattr_range *range;
562     uint64_t mask = ~(uint64_t)0 << order;
563     int rc = -ENXIO;
564 
565     ASSERT(is_hvm_domain(d));
566 
567     rcu_read_lock(&pinned_cacheattr_rcu_lock);
568     list_for_each_entry_rcu ( range,
569                               &d->arch.hvm.pinned_cacheattr_ranges,
570                               list )
571     {
572         if ( ((gfn_x(gfn) & mask) >= range->start) &&
573              ((gfn_x(gfn) | ~mask) <= range->end) )
574         {
575             rc = range->type;
576             break;
577         }
578         if ( ((gfn_x(gfn) & mask) <= range->end) &&
579              ((gfn_x(gfn) | ~mask) >= range->start) )
580         {
581             rc = -EADDRNOTAVAIL;
582             break;
583         }
584     }
585     rcu_read_unlock(&pinned_cacheattr_rcu_lock);
586 
587     return rc;
588 }
589 
free_pinned_cacheattr_entry(struct rcu_head * rcu)590 static void free_pinned_cacheattr_entry(struct rcu_head *rcu)
591 {
592     xfree(container_of(rcu, struct hvm_mem_pinned_cacheattr_range, rcu));
593 }
594 
hvm_set_mem_pinned_cacheattr(struct domain * d,uint64_t gfn_start,uint64_t gfn_end,uint32_t type)595 int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
596                                  uint64_t gfn_end, uint32_t type)
597 {
598     struct hvm_mem_pinned_cacheattr_range *range;
599     int rc = 1;
600 
601     if ( !is_hvm_domain(d) )
602         return -EOPNOTSUPP;
603 
604     if ( gfn_end < gfn_start || (gfn_start | gfn_end) >> paddr_bits )
605         return -EINVAL;
606 
607     switch ( type )
608     {
609     case XEN_DOMCTL_DELETE_MEM_CACHEATTR:
610         /* Remove the requested range. */
611         rcu_read_lock(&pinned_cacheattr_rcu_lock);
612         list_for_each_entry_rcu ( range,
613                                   &d->arch.hvm.pinned_cacheattr_ranges,
614                                   list )
615             if ( range->start == gfn_start && range->end == gfn_end )
616             {
617                 rcu_read_unlock(&pinned_cacheattr_rcu_lock);
618                 list_del_rcu(&range->list);
619                 type = range->type;
620                 call_rcu(&range->rcu, free_pinned_cacheattr_entry);
621                 p2m_memory_type_changed(d);
622                 switch ( type )
623                 {
624                 case PAT_TYPE_UC_MINUS:
625                     /*
626                      * For EPT we can also avoid the flush in this case;
627                      * see epte_get_entry_emt().
628                      */
629                     if ( hap_enabled(d) && cpu_has_vmx )
630                 case PAT_TYPE_UNCACHABLE:
631                         break;
632                     /* fall through */
633                 default:
634                     flush_all(FLUSH_CACHE);
635                     break;
636                 }
637                 return 0;
638             }
639         rcu_read_unlock(&pinned_cacheattr_rcu_lock);
640         return -ENOENT;
641 
642     case PAT_TYPE_UC_MINUS:
643     case PAT_TYPE_UNCACHABLE:
644     case PAT_TYPE_WRBACK:
645     case PAT_TYPE_WRCOMB:
646     case PAT_TYPE_WRPROT:
647     case PAT_TYPE_WRTHROUGH:
648         break;
649 
650     default:
651         return -EINVAL;
652     }
653 
654     rcu_read_lock(&pinned_cacheattr_rcu_lock);
655     list_for_each_entry_rcu ( range,
656                               &d->arch.hvm.pinned_cacheattr_ranges,
657                               list )
658     {
659         if ( range->start == gfn_start && range->end == gfn_end )
660         {
661             range->type = type;
662             rc = 0;
663             break;
664         }
665         if ( range->start <= gfn_end && gfn_start <= range->end )
666         {
667             rc = -EBUSY;
668             break;
669         }
670     }
671     rcu_read_unlock(&pinned_cacheattr_rcu_lock);
672     if ( rc <= 0 )
673         return rc;
674 
675     range = xzalloc(struct hvm_mem_pinned_cacheattr_range);
676     if ( range == NULL )
677         return -ENOMEM;
678 
679     range->start = gfn_start;
680     range->end = gfn_end;
681     range->type = type;
682 
683     list_add_rcu(&range->list, &d->arch.hvm.pinned_cacheattr_ranges);
684     p2m_memory_type_changed(d);
685     if ( type != PAT_TYPE_WRBACK )
686         flush_all(FLUSH_CACHE);
687 
688     return 0;
689 }
690 
hvm_save_mtrr_msr(struct vcpu * v,hvm_domain_context_t * h)691 static int hvm_save_mtrr_msr(struct vcpu *v, hvm_domain_context_t *h)
692 {
693     const struct mtrr_state *mtrr_state = &v->arch.hvm.mtrr;
694     struct hvm_hw_mtrr hw_mtrr = {
695         .msr_mtrr_def_type = mtrr_state->def_type |
696                              MASK_INSR(mtrr_state->fixed_enabled,
697                                        MTRRdefType_FE) |
698                             MASK_INSR(mtrr_state->enabled, MTRRdefType_E),
699         .msr_mtrr_cap      = mtrr_state->mtrr_cap,
700     };
701     unsigned int i;
702 
703     if ( MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT) >
704          (ARRAY_SIZE(hw_mtrr.msr_mtrr_var) / 2) )
705     {
706         dprintk(XENLOG_G_ERR,
707                 "HVM save: %pv: too many (%lu) variable range MTRRs\n",
708                 v, MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT));
709         return -EINVAL;
710     }
711 
712     hvm_get_guest_pat(v, &hw_mtrr.msr_pat_cr);
713 
714     for ( i = 0; i < MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT); i++ )
715     {
716         hw_mtrr.msr_mtrr_var[i * 2] = mtrr_state->var_ranges->base;
717         hw_mtrr.msr_mtrr_var[i * 2 + 1] = mtrr_state->var_ranges->mask;
718     }
719 
720     BUILD_BUG_ON(sizeof(hw_mtrr.msr_mtrr_fixed) !=
721                  sizeof(mtrr_state->fixed_ranges));
722 
723     memcpy(hw_mtrr.msr_mtrr_fixed, mtrr_state->fixed_ranges,
724            sizeof(hw_mtrr.msr_mtrr_fixed));
725 
726     return hvm_save_entry(MTRR, v->vcpu_id, h, &hw_mtrr);
727 }
728 
hvm_load_mtrr_msr(struct domain * d,hvm_domain_context_t * h)729 static int hvm_load_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
730 {
731     unsigned int vcpuid, i;
732     struct vcpu *v;
733     struct mtrr_state *mtrr_state;
734     struct hvm_hw_mtrr hw_mtrr;
735 
736     vcpuid = hvm_load_instance(h);
737     if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
738     {
739         dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
740                 d->domain_id, vcpuid);
741         return -EINVAL;
742     }
743 
744     if ( hvm_load_entry(MTRR, h, &hw_mtrr) != 0 )
745         return -EINVAL;
746 
747     if ( MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT) > MTRR_VCNT )
748     {
749         dprintk(XENLOG_G_ERR,
750                 "HVM restore: %pv: too many (%lu) variable range MTRRs\n",
751                 v, MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT));
752         return -EINVAL;
753     }
754 
755     mtrr_state = &v->arch.hvm.mtrr;
756 
757     hvm_set_guest_pat(v, hw_mtrr.msr_pat_cr);
758 
759     mtrr_state->mtrr_cap = hw_mtrr.msr_mtrr_cap;
760 
761     for ( i = 0; i < NUM_FIXED_MSR; i++ )
762         mtrr_fix_range_msr_set(d, mtrr_state, i, hw_mtrr.msr_mtrr_fixed[i]);
763 
764     for ( i = 0; i < MASK_EXTR(hw_mtrr.msr_mtrr_cap, MTRRcap_VCNT); i++ )
765     {
766         mtrr_var_range_msr_set(d, mtrr_state,
767                                MSR_IA32_MTRR_PHYSBASE(i),
768                                hw_mtrr.msr_mtrr_var[i * 2]);
769         mtrr_var_range_msr_set(d, mtrr_state,
770                                MSR_IA32_MTRR_PHYSMASK(i),
771                                hw_mtrr.msr_mtrr_var[i * 2 + 1]);
772     }
773 
774     mtrr_def_type_msr_set(d, mtrr_state, hw_mtrr.msr_mtrr_def_type);
775 
776     return 0;
777 }
778 
779 HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr, 1,
780                           HVMSR_PER_VCPU);
781 
memory_type_changed(struct domain * d)782 void memory_type_changed(struct domain *d)
783 {
784     if ( (is_iommu_enabled(d) || cache_flush_permitted(d)) &&
785          d->vcpu && d->vcpu[0] )
786     {
787         p2m_memory_type_changed(d);
788         flush_all(FLUSH_CACHE);
789     }
790 }
791 
epte_get_entry_emt(struct domain * d,unsigned long gfn,mfn_t mfn,unsigned int order,uint8_t * ipat,bool_t direct_mmio)792 int epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
793                        unsigned int order, uint8_t *ipat, bool_t direct_mmio)
794 {
795     int gmtrr_mtype, hmtrr_mtype;
796     struct vcpu *v = current;
797     unsigned long i;
798 
799     *ipat = 0;
800 
801     if ( v->domain != d )
802         v = d->vcpu ? d->vcpu[0] : NULL;
803 
804     /* Mask, not add, for order so it works with INVALID_MFN on unmapping */
805     if ( rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
806                                  mfn_x(mfn) | ((1UL << order) - 1)) )
807     {
808         if ( !order || rangeset_contains_range(mmio_ro_ranges, mfn_x(mfn),
809                                                mfn_x(mfn) | ((1UL << order) - 1)) )
810         {
811             *ipat = 1;
812             return MTRR_TYPE_UNCACHABLE;
813         }
814         /* Force invalid memory type so resolve_misconfig() will split it */
815         return -1;
816     }
817 
818     if ( direct_mmio )
819     {
820         if ( (mfn_x(mfn) ^ mfn_x(d->arch.hvm.vmx.apic_access_mfn)) >> order )
821             return MTRR_TYPE_UNCACHABLE;
822         if ( order )
823             return -1;
824         *ipat = 1;
825         return MTRR_TYPE_WRBACK;
826     }
827 
828     if ( !mfn_valid(mfn) )
829     {
830         *ipat = 1;
831         return MTRR_TYPE_UNCACHABLE;
832     }
833 
834     if ( !is_iommu_enabled(d) && !cache_flush_permitted(d) )
835     {
836         *ipat = 1;
837         return MTRR_TYPE_WRBACK;
838     }
839 
840     for ( i = 0; i < (1ul << order); i++ )
841     {
842         if ( is_special_page(mfn_to_page(mfn_add(mfn, i))) )
843         {
844             if ( order )
845                 return -1;
846             *ipat = 1;
847             return MTRR_TYPE_WRBACK;
848         }
849     }
850 
851     gmtrr_mtype = hvm_get_mem_pinned_cacheattr(d, _gfn(gfn), order);
852     if ( gmtrr_mtype >= 0 )
853     {
854         *ipat = 1;
855         return gmtrr_mtype != PAT_TYPE_UC_MINUS ? gmtrr_mtype
856                                                 : MTRR_TYPE_UNCACHABLE;
857     }
858     if ( gmtrr_mtype == -EADDRNOTAVAIL )
859         return -1;
860 
861     gmtrr_mtype = is_hvm_domain(d) && v ?
862                   get_mtrr_type(&v->arch.hvm.mtrr,
863                                 gfn << PAGE_SHIFT, order) :
864                   MTRR_TYPE_WRBACK;
865     hmtrr_mtype = get_mtrr_type(&mtrr_state, mfn_x(mfn) << PAGE_SHIFT, order);
866     if ( gmtrr_mtype < 0 || hmtrr_mtype < 0 )
867         return -1;
868 
869     /* If both types match we're fine. */
870     if ( likely(gmtrr_mtype == hmtrr_mtype) )
871         return hmtrr_mtype;
872 
873     /* If either type is UC, we have to go with that one. */
874     if ( gmtrr_mtype == MTRR_TYPE_UNCACHABLE ||
875          hmtrr_mtype == MTRR_TYPE_UNCACHABLE )
876         return MTRR_TYPE_UNCACHABLE;
877 
878     /* If either type is WB, we have to go with the other one. */
879     if ( gmtrr_mtype == MTRR_TYPE_WRBACK )
880         return hmtrr_mtype;
881     if ( hmtrr_mtype == MTRR_TYPE_WRBACK )
882         return gmtrr_mtype;
883 
884     /*
885      * At this point we have disagreeing WC, WT, or WP types. The only
886      * combination that can be cleanly resolved is WT:WP. The ones involving
887      * WC need to be converted to UC, both due to the memory ordering
888      * differences and because WC disallows reads to be cached (WT and WP
889      * permit this), while WT and WP require writes to go straight to memory
890      * (WC can buffer them).
891      */
892     if ( (gmtrr_mtype == MTRR_TYPE_WRTHROUGH &&
893           hmtrr_mtype == MTRR_TYPE_WRPROT) ||
894          (gmtrr_mtype == MTRR_TYPE_WRPROT &&
895           hmtrr_mtype == MTRR_TYPE_WRTHROUGH) )
896         return MTRR_TYPE_WRPROT;
897 
898     return MTRR_TYPE_UNCACHABLE;
899 }
900 
901 /*
902  * Local variables:
903  * mode: C
904  * c-file-style: "BSD"
905  * c-basic-offset: 4
906  * tab-width: 4
907  * indent-tabs-mode: nil
908  * End:
909  */
910