1 /*
2  * Copyright (c) 2010, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Author: Allen Kay <allen.m.kay@intel.com>
17  */
18 
19 #include <xen/irq.h>
20 #include <xen/param.h>
21 #include <xen/sched.h>
22 #include <xen/xmalloc.h>
23 #include <xen/domain_page.h>
24 #include <xen/iommu.h>
25 #include <xen/numa.h>
26 #include <xen/softirq.h>
27 #include <xen/time.h>
28 #include <xen/pci.h>
29 #include <xen/pci_ids.h>
30 #include <xen/pci_regs.h>
31 #include <xen/keyhandler.h>
32 #include <asm/msi.h>
33 #include <asm/irq.h>
34 #include <asm/pci.h>
35 #include <mach_apic.h>
36 #include "iommu.h"
37 #include "dmar.h"
38 #include "extern.h"
39 #include "vtd.h"
40 
41 #define IOH_DEV      PCI_SBDF(0, 0, 0, 0)
42 #define IGD_DEV      PCI_SBDF(0, 0, 2, 0)
43 
44 #define IGD_BAR_MASK 0xFFFFFFFFFFFF0000
45 #define GGC 0x52
46 #define GGC_MEMORY_VT_ENABLED  (0x8 << 8)
47 
48 #define IS_CTG(id)    (id == 0x2a408086)
49 #define IS_ILK(id)    (id == 0x00408086 || id == 0x00448086 || id== 0x00628086 || id == 0x006A8086)
50 #define IS_CPT(id)    (id == 0x01008086 || id == 0x01048086)
51 
52 /* SandyBridge IGD timeouts in milliseconds */
53 #define SNB_IGD_TIMEOUT_LEGACY    1000
54 #define SNB_IGD_TIMEOUT            670
55 static unsigned int snb_igd_timeout;
56 
57 static u32 __read_mostly ioh_id;
58 static u32 __initdata igd_id;
59 bool_t __read_mostly rwbf_quirk;
60 static bool_t __read_mostly is_cantiga_b3;
61 static bool_t __read_mostly is_snb_gfx;
62 static u8 *__read_mostly igd_reg_va;
63 static spinlock_t igd_lock;
64 
65 /*
66  * QUIRK to workaround Xen boot issue on Calpella/Ironlake OEM BIOS
67  * not enabling VT-d properly in IGD.  The workaround is to not enabling
68  * IGD VT-d translation if VT is not enabled in IGD.
69  */
is_igd_vt_enabled_quirk(void)70 int is_igd_vt_enabled_quirk(void)
71 {
72     u16 ggc;
73 
74     if ( !IS_ILK(ioh_id) )
75         return 1;
76 
77     /* integrated graphics on Intel platforms is located at 0:2.0 */
78     ggc = pci_conf_read16(IGD_DEV, GGC);
79     return ( ggc & GGC_MEMORY_VT_ENABLED ? 1 : 0 );
80 }
81 
82 /*
83  * QUIRK to workaround cantiga VT-d buffer flush issue.
84  * The workaround is to force write buffer flush even if
85  * VT-d capability indicates it is not required.
86  */
cantiga_b3_errata_init(void)87 static void __init cantiga_b3_errata_init(void)
88 {
89     u16 vid;
90     u8 did_hi, rid;
91 
92     vid = pci_conf_read16(IGD_DEV, 0);
93     if ( vid != 0x8086 )
94         return;
95 
96     did_hi = pci_conf_read8(IGD_DEV, 3);
97     rid = pci_conf_read8(IGD_DEV, 8);
98 
99     if ( (did_hi == 0x2A) && (rid == 0x7) )
100         is_cantiga_b3 = 1;
101 }
102 
103 /* check for Sandybridge IGD device ID's */
snb_errata_init(void)104 static void __init snb_errata_init(void)
105 {
106     is_snb_gfx = IS_SNB_GFX(igd_id);
107     spin_lock_init(&igd_lock);
108 }
109 
110 /*
111  * QUIRK to workaround Cantiga IGD VT-d low power errata.
112  * This errata impacts IGD assignment on Cantiga systems
113  * and can potentially cause VT-d operations to hang.
114  * The workaround is to access an IGD PCI config register
115  * to get IGD out of low power state before VT-d translation
116  * enable/disable and IOTLB flushes.
117  */
118 
119 /*
120  * map IGD MMIO+0x2000 page to allow Xen access to IGD 3D register.
121  */
map_igd_reg(void)122 static void __init map_igd_reg(void)
123 {
124     u64 igd_mmio;
125 
126     if ( !is_cantiga_b3 && !is_snb_gfx )
127         return;
128 
129     if ( igd_reg_va )
130         return;
131 
132     igd_mmio   = pci_conf_read32(IGD_DEV, PCI_BASE_ADDRESS_1);
133     igd_mmio <<= 32;
134     igd_mmio  += pci_conf_read32(IGD_DEV, PCI_BASE_ADDRESS_0);
135     igd_reg_va = ioremap(igd_mmio & IGD_BAR_MASK, 0x3000);
136 }
137 
138 /*
139  * force IGD to exit low power mode by accessing a IGD 3D regsiter.
140  */
cantiga_vtd_ops_preamble(struct vtd_iommu * iommu)141 static int cantiga_vtd_ops_preamble(struct vtd_iommu *iommu)
142 {
143     struct acpi_drhd_unit *drhd = iommu->drhd;
144 
145     if ( !is_igd_drhd(drhd) || !is_cantiga_b3 )
146         return 0;
147 
148     if ( !igd_reg_va )
149         return 0;
150 
151     /*
152      * Read IGD register at IGD MMIO + 0x20A4 to force IGD
153      * to exit low power state.
154      */
155     return *(volatile int *)(igd_reg_va + 0x20A4);
156 }
157 
158 /*
159  * Sandybridge RC6 power management inhibit state erratum.
160  * This can cause power high power consumption.
161  * Workaround is to prevent graphics get into RC6
162  * state when doing VT-d IOTLB operations, do the VT-d
163  * IOTLB operation, and then re-enable RC6 state.
164  *
165  * This quirk is enabled with the snb_igd_quirk command
166  * line parameter.  Specifying snb_igd_quirk with no value
167  * (or any of the standard boolean values) enables this
168  * quirk and sets the timeout to the legacy timeout of
169  * 1000 msec.  Setting this parameter to the string
170  * "cap" enables this quirk and sets the timeout to
171  * the theoretical maximum of 670 msec.  Setting this
172  * parameter to a numerical value enables the quirk and
173  * sets the timeout to that numerical number of msecs.
174  */
snb_vtd_ops_preamble(struct vtd_iommu * iommu)175 static void snb_vtd_ops_preamble(struct vtd_iommu *iommu)
176 {
177     struct acpi_drhd_unit *drhd = iommu->drhd;
178     s_time_t start_time;
179 
180     if ( !is_igd_drhd(drhd) || !is_snb_gfx )
181         return;
182 
183     if ( !igd_reg_va )
184         return;
185 
186     *(volatile u32 *)(igd_reg_va + 0x2054) = 0x000FFFFF;
187     *(volatile u32 *)(igd_reg_va + 0x2700) = 0;
188 
189     start_time = NOW();
190     while ( (*(volatile u32 *)(igd_reg_va + 0x22AC) & 0xF) != 0 )
191     {
192         if ( NOW() > start_time + snb_igd_timeout )
193         {
194             dprintk(XENLOG_INFO VTDPREFIX,
195                     "snb_vtd_ops_preamble: failed to disable idle handshake\n");
196             break;
197         }
198         cpu_relax();
199     }
200 
201     *(volatile u32 *)(igd_reg_va + 0x2050) = 0x10001;
202 }
203 
snb_vtd_ops_postamble(struct vtd_iommu * iommu)204 static void snb_vtd_ops_postamble(struct vtd_iommu *iommu)
205 {
206     struct acpi_drhd_unit *drhd = iommu->drhd;
207 
208     if ( !is_igd_drhd(drhd) || !is_snb_gfx )
209         return;
210 
211     if ( !igd_reg_va )
212         return;
213 
214     *(volatile u32 *)(igd_reg_va + 0x2054) = 0xA;
215     *(volatile u32 *)(igd_reg_va + 0x2050) = 0x10000;
216 }
217 
218 /*
219  * call before VT-d translation enable and IOTLB flush operations.
220  */
221 
vtd_ops_preamble_quirk(struct vtd_iommu * iommu)222 void vtd_ops_preamble_quirk(struct vtd_iommu *iommu)
223 {
224     cantiga_vtd_ops_preamble(iommu);
225     if ( snb_igd_timeout != 0 )
226     {
227         spin_lock(&igd_lock);
228 
229         /* match unlock in postamble */
230         snb_vtd_ops_preamble(iommu);
231     }
232 }
233 
234 /*
235  * call after VT-d translation enable and IOTLB flush operations.
236  */
vtd_ops_postamble_quirk(struct vtd_iommu * iommu)237 void vtd_ops_postamble_quirk(struct vtd_iommu *iommu)
238 {
239     if ( snb_igd_timeout != 0 )
240     {
241         snb_vtd_ops_postamble(iommu);
242 
243         /* match the lock in preamble */
244         spin_unlock(&igd_lock);
245     }
246 }
247 
parse_snb_timeout(const char * s)248 static int __init parse_snb_timeout(const char *s)
249 {
250     int t;
251     const char *q = NULL;
252 
253     t = parse_bool(s, NULL);
254     if ( t < 0 )
255     {
256         if ( *s == '\0' )
257             t = SNB_IGD_TIMEOUT_LEGACY;
258         else if ( strcmp(s, "cap") == 0 )
259             t = SNB_IGD_TIMEOUT;
260         else
261             t = strtoul(s, &q, 0);
262     }
263     else
264         t = t ? SNB_IGD_TIMEOUT_LEGACY : 0;
265     snb_igd_timeout = MILLISECS(t);
266 
267     return (q && *q) ? -EINVAL : 0;
268 }
269 custom_param("snb_igd_quirk", parse_snb_timeout);
270 
271 /* 5500/5520/X58 Chipset Interrupt remapping errata, for stepping B-3.
272  * Fixed in stepping C-2. */
tylersburg_intremap_quirk(void)273 static void __init tylersburg_intremap_quirk(void)
274 {
275     uint32_t bus, device;
276     uint8_t rev;
277 
278     for ( bus = 0; bus < 0x100; bus++ )
279     {
280         /* Match on System Management Registers on Device 20 Function 0 */
281         device = pci_conf_read32(PCI_SBDF(0, bus, 20, 0), PCI_VENDOR_ID);
282         rev = pci_conf_read8(PCI_SBDF(0, bus, 20, 0), PCI_REVISION_ID);
283 
284         if ( rev == 0x13 && device == 0x342e8086 )
285         {
286             printk(XENLOG_WARNING VTDPREFIX
287                    "Disabling IOMMU due to Intel 5500/5520/X58 Chipset errata #47, #53\n");
288             iommu_enable = 0;
289             break;
290         }
291     }
292 }
293 
294 /* initialize platform identification flags */
platform_quirks_init(void)295 void __init platform_quirks_init(void)
296 {
297     ioh_id = pci_conf_read32(IOH_DEV, 0);
298     igd_id = pci_conf_read32(IGD_DEV, 0);
299 
300     /* Mobile 4 Series Chipset neglects to set RWBF capability. */
301     if ( ioh_id == 0x2a408086 )
302     {
303         dprintk(XENLOG_INFO VTDPREFIX, "DMAR: Forcing write-buffer flush\n");
304         rwbf_quirk = 1;
305     }
306 
307     /* initialize cantiga B3 identification */
308     cantiga_b3_errata_init();
309 
310     snb_errata_init();
311 
312     /* ioremap IGD MMIO+0x2000 page */
313     map_igd_reg();
314 
315     /* Tylersburg interrupt remap quirk */
316     if ( iommu_intremap )
317         tylersburg_intremap_quirk();
318 }
319 
320 /*
321  * QUIRK to workaround wifi direct assignment issue.  This issue
322  * impacts only cases where Intel integrated wifi device is directly
323  * is directly assigned to a guest.
324  *
325  * The workaround is to map ME phantom device 0:3.7 or 0:22.7
326  * to the ME vt-d engine if detect the user is trying to directly
327  * assigning Intel integrated wifi device to a guest.
328  */
329 
map_me_phantom_function(struct domain * domain,u32 dev,int map)330 static int __must_check map_me_phantom_function(struct domain *domain,
331                                                 u32 dev, int map)
332 {
333     struct acpi_drhd_unit *drhd;
334     struct pci_dev *pdev;
335     int rc;
336 
337     /* find ME VT-d engine base on a real ME device */
338     pdev = pci_get_pdev(0, 0, PCI_DEVFN(dev, 0));
339     drhd = acpi_find_matched_drhd_unit(pdev);
340 
341     /* map or unmap ME phantom function */
342     if ( map )
343         rc = domain_context_mapping_one(domain, drhd->iommu, 0,
344                                         PCI_DEVFN(dev, 7), NULL);
345     else
346         rc = domain_context_unmap_one(domain, drhd->iommu, 0,
347                                       PCI_DEVFN(dev, 7));
348 
349     return rc;
350 }
351 
me_wifi_quirk(struct domain * domain,u8 bus,u8 devfn,int map)352 int me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map)
353 {
354     u32 id;
355     int rc = 0;
356 
357     id = pci_conf_read32(PCI_SBDF(0, 0, 0, 0), 0);
358     if ( IS_CTG(id) )
359     {
360         /* quit if ME does not exist */
361         if ( pci_conf_read32(PCI_SBDF(0, 0, 3, 0), 0) == 0xffffffff )
362             return 0;
363 
364         /* if device is WLAN device, map ME phantom device 0:3.7 */
365         id = pci_conf_read32(PCI_SBDF3(0, bus, devfn), 0);
366         switch (id)
367         {
368             case 0x42328086:
369             case 0x42358086:
370             case 0x42368086:
371             case 0x42378086:
372             case 0x423a8086:
373             case 0x423b8086:
374             case 0x423c8086:
375             case 0x423d8086:
376                 rc = map_me_phantom_function(domain, 3, map);
377                 break;
378             default:
379                 break;
380         }
381     }
382     else if ( IS_ILK(id) || IS_CPT(id) )
383     {
384         /* quit if ME does not exist */
385         if ( pci_conf_read32(PCI_SBDF(0, 0, 22, 0), 0) == 0xffffffff )
386             return 0;
387 
388         /* if device is WLAN device, map ME phantom device 0:22.7 */
389         id = pci_conf_read32(PCI_SBDF3(0, bus, devfn), 0);
390         switch (id)
391         {
392             case 0x00878086:        /* Kilmer Peak */
393             case 0x00898086:
394             case 0x00828086:        /* Taylor Peak */
395             case 0x00858086:
396             case 0x008F8086:        /* Rainbow Peak */
397             case 0x00908086:
398             case 0x00918086:
399             case 0x42388086:        /* Puma Peak */
400             case 0x422b8086:
401             case 0x422c8086:
402                 rc = map_me_phantom_function(domain, 22, map);
403                 break;
404             default:
405                 break;
406         }
407     }
408 
409     return rc;
410 }
411 
pci_vtd_quirk(const struct pci_dev * pdev)412 void pci_vtd_quirk(const struct pci_dev *pdev)
413 {
414     int seg = pdev->seg;
415     int bus = pdev->bus;
416     int dev = PCI_SLOT(pdev->devfn);
417     int func = PCI_FUNC(pdev->devfn);
418     int pos;
419     bool_t ff;
420     u32 val, val2;
421     u64 bar;
422     paddr_t pa;
423     const char *action;
424 
425     if ( pci_conf_read16(pdev->sbdf, PCI_VENDOR_ID) != PCI_VENDOR_ID_INTEL )
426         return;
427 
428     switch ( pci_conf_read16(pdev->sbdf, PCI_DEVICE_ID) )
429     {
430     /*
431      * Mask reporting Intel VT-d faults to IOH core logic:
432      *   - Some platform escalates VT-d faults to platform errors.
433      *   - This can cause system failure upon non-fatal VT-d faults.
434      *   - Potential security issue if malicious guest trigger VT-d faults.
435      */
436     case 0x342e: /* Tylersburg chipset (Nehalem / Westmere systems) */
437     case 0x3728: /* Xeon C5500/C3500 (JasperForest) */
438     case 0x3c28: /* Sandybridge */
439         val = pci_conf_read32(pdev->sbdf, 0x1AC);
440         pci_conf_write32(pdev->sbdf, 0x1AC, val | (1 << 31));
441         printk(XENLOG_INFO "Masked VT-d error signaling on %04x:%02x:%02x.%u\n",
442                seg, bus, dev, func);
443         break;
444 
445     /* Tylersburg (EP)/Boxboro (MP) chipsets (NHM-EP/EX, WSM-EP/EX) */
446     case 0x3400 ... 0x3407: /* host bridges */
447     case 0x3408 ... 0x3411: case 0x3420 ... 0x3421: /* root ports */
448     /* JasperForest (Intel Xeon Processor C5500/C3500 */
449     case 0x3700 ... 0x370f: /* host bridges */
450     case 0x3720 ... 0x3724: /* root ports */
451     /* Sandybridge-EP (Romley) */
452     case 0x3c00: /* host bridge */
453     case 0x3c01 ... 0x3c0b: /* root ports */
454         pos = pci_find_ext_capability(seg, bus, pdev->devfn,
455                                       PCI_EXT_CAP_ID_ERR);
456         if ( !pos )
457         {
458             pos = pci_find_ext_capability(seg, bus, pdev->devfn,
459                                           PCI_EXT_CAP_ID_VNDR);
460             while ( pos )
461             {
462                 val = pci_conf_read32(pdev->sbdf, pos + PCI_VNDR_HEADER);
463                 if ( PCI_VNDR_HEADER_ID(val) == 4 && PCI_VNDR_HEADER_REV(val) == 1 )
464                 {
465                     pos += PCI_VNDR_HEADER;
466                     break;
467                 }
468                 pos = pci_find_next_ext_capability(seg, bus, pdev->devfn, pos,
469                                                    PCI_EXT_CAP_ID_VNDR);
470             }
471             ff = 0;
472         }
473         else
474             ff = pcie_aer_get_firmware_first(pdev);
475         if ( !pos )
476         {
477             printk(XENLOG_WARNING "%04x:%02x:%02x.%u without AER capability?\n",
478                    seg, bus, dev, func);
479             break;
480         }
481 
482         val = pci_conf_read32(pdev->sbdf, pos + PCI_ERR_UNCOR_MASK);
483         val2 = pci_conf_read32(pdev->sbdf, pos + PCI_ERR_COR_MASK);
484         if ( (val & PCI_ERR_UNC_UNSUP) && (val2 & PCI_ERR_COR_ADV_NFAT) )
485             action = "Found masked";
486         else if ( !ff )
487         {
488             pci_conf_write32(pdev->sbdf, pos + PCI_ERR_UNCOR_MASK,
489                              val | PCI_ERR_UNC_UNSUP);
490             pci_conf_write32(pdev->sbdf, pos + PCI_ERR_COR_MASK,
491                              val2 | PCI_ERR_COR_ADV_NFAT);
492             action = "Masked";
493         }
494         else
495             action = "Must not mask";
496 
497         /* XPUNCERRMSK Send Completion with Unsupported Request */
498         val = pci_conf_read32(pdev->sbdf, 0x20c);
499         pci_conf_write32(pdev->sbdf, 0x20c, val | (1 << 4));
500 
501         printk(XENLOG_INFO "%s UR signaling on %04x:%02x:%02x.%u\n",
502                action, seg, bus, dev, func);
503         break;
504 
505     case 0x0040: case 0x0044: case 0x0048: /* Nehalem/Westmere */
506     case 0x0100: case 0x0104: case 0x0108: /* Sandybridge */
507     case 0x0150: case 0x0154: case 0x0158: /* Ivybridge */
508     case 0x0a00: case 0x0a04: case 0x0a08: case 0x0a0f: /* Haswell ULT */
509     case 0x0c00: case 0x0c04: case 0x0c08: case 0x0c0f: /* Haswell */
510     case 0x0d00: case 0x0d04: case 0x0d08: case 0x0d0f: /* Haswell */
511     case 0x1600: case 0x1604: case 0x1608: case 0x160f: /* Broadwell */
512     case 0x1610: case 0x1614: case 0x1618: /* Broadwell */
513     case 0x1900: case 0x1904: case 0x1908: case 0x190c: case 0x190f: /* Skylake */
514     case 0x1910: case 0x1918: case 0x191f: /* Skylake */
515         bar = pci_conf_read32(pdev->sbdf, 0x6c);
516         bar = (bar << 32) | pci_conf_read32(pdev->sbdf, 0x68);
517         pa = bar & 0x7ffffff000UL; /* bits 12...38 */
518         if ( (bar & 1) && pa &&
519              page_is_ram_type(paddr_to_pfn(pa), RAM_TYPE_RESERVED) )
520         {
521             u32 __iomem *va = ioremap(pa, PAGE_SIZE);
522 
523             if ( va )
524             {
525                 __set_bit(0x1c8 * 8 + 20, va);
526                 iounmap(va);
527                 printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n",
528                        seg, bus, dev, func);
529             }
530             else
531                 printk(XENLOG_ERR "Could not map %"PRIpaddr" for %04x:%02x:%02x.%u\n",
532                        pa, seg, bus, dev, func);
533         }
534         else
535             printk(XENLOG_WARNING "Bogus DMIBAR %#"PRIx64" on %04x:%02x:%02x.%u\n",
536                    bar, seg, bus, dev, func);
537         break;
538     }
539 }
540 
quirk_iommu_caps(struct vtd_iommu * iommu)541 void __init quirk_iommu_caps(struct vtd_iommu *iommu)
542 {
543     /*
544      * IOMMU Quirks:
545      *
546      * SandyBridge IOMMUs claim support for 2M and 1G superpages, but don't
547      * implement superpages internally.
548      *
549      * There are issues changing the walk length under in-flight DMA, which
550      * has manifested as incompatibility between EPT/IOMMU sharing and the
551      * workaround for CVE-2018-12207 / XSA-304.  Hide the superpages
552      * capabilities in the IOMMU, which will prevent Xen from sharing the EPT
553      * and IOMMU pagetables.
554      *
555      * Detection of SandyBridge unfortunately has to be done by processor
556      * model because the client parts don't expose their IOMMUs as PCI devices
557      * we could match with a Device ID.
558      */
559     if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
560          boot_cpu_data.x86 == 6 &&
561          (boot_cpu_data.x86_model == 0x2a ||
562           boot_cpu_data.x86_model == 0x2d) )
563         iommu->cap &= ~(0xful << 34);
564 }
565