1 /*
2  * io.c: Handling I/O and interrupts.
3  *
4  * Copyright (c) 2004, Intel Corporation.
5  * Copyright (c) 2005, International Business Machines Corporation.
6  * Copyright (c) 2008, Citrix Systems, Inc.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include <xen/init.h>
22 #include <xen/mm.h>
23 #include <xen/lib.h>
24 #include <xen/errno.h>
25 #include <xen/trace.h>
26 #include <xen/event.h>
27 #include <xen/hypercall.h>
28 #include <xen/vpci.h>
29 #include <asm/current.h>
30 #include <asm/cpufeature.h>
31 #include <asm/processor.h>
32 #include <asm/msr.h>
33 #include <asm/apic.h>
34 #include <asm/paging.h>
35 #include <asm/shadow.h>
36 #include <asm/p2m.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/ioreq.h>
39 #include <asm/hvm/support.h>
40 #include <asm/hvm/vpt.h>
41 #include <asm/hvm/vpic.h>
42 #include <asm/hvm/vlapic.h>
43 #include <asm/hvm/trace.h>
44 #include <asm/hvm/emulate.h>
45 #include <public/sched.h>
46 #include <xen/iocap.h>
47 #include <public/hvm/ioreq.h>
48 
send_timeoffset_req(unsigned long timeoff)49 void send_timeoffset_req(unsigned long timeoff)
50 {
51     ioreq_t p = {
52         .type = IOREQ_TYPE_TIMEOFFSET,
53         .size = 8,
54         .count = 1,
55         .dir = IOREQ_WRITE,
56         .data = timeoff,
57         .state = STATE_IOREQ_READY,
58     };
59 
60     if ( timeoff == 0 )
61         return;
62 
63     if ( hvm_broadcast_ioreq(&p, true) != 0 )
64         gprintk(XENLOG_ERR, "Unsuccessful timeoffset update\n");
65 }
66 
67 /* Ask ioemu mapcache to invalidate mappings. */
send_invalidate_req(void)68 void send_invalidate_req(void)
69 {
70     ioreq_t p = {
71         .type = IOREQ_TYPE_INVALIDATE,
72         .size = 4,
73         .dir = IOREQ_WRITE,
74         .data = ~0UL, /* flush all */
75     };
76 
77     if ( hvm_broadcast_ioreq(&p, false) != 0 )
78         gprintk(XENLOG_ERR, "Unsuccessful map-cache invalidate\n");
79 }
80 
hvm_emulate_one_insn(hvm_emulate_validate_t * validate,const char * descr)81 bool hvm_emulate_one_insn(hvm_emulate_validate_t *validate, const char *descr)
82 {
83     struct hvm_emulate_ctxt ctxt;
84     int rc;
85 
86     hvm_emulate_init_once(&ctxt, validate, guest_cpu_user_regs());
87 
88     switch ( rc = hvm_emulate_one(&ctxt, HVMIO_no_completion) )
89     {
90     case X86EMUL_UNHANDLEABLE:
91         hvm_dump_emulation_state(XENLOG_G_WARNING, descr, &ctxt, rc);
92         return false;
93 
94     case X86EMUL_UNRECOGNIZED:
95         hvm_dump_emulation_state(XENLOG_G_WARNING, descr, &ctxt, rc);
96         hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
97         break;
98 
99     case X86EMUL_EXCEPTION:
100         hvm_inject_event(&ctxt.ctxt.event);
101         break;
102     }
103 
104     hvm_emulate_writeback(&ctxt);
105 
106     return true;
107 }
108 
handle_mmio_with_translation(unsigned long gla,unsigned long gpfn,struct npfec access)109 bool handle_mmio_with_translation(unsigned long gla, unsigned long gpfn,
110                                   struct npfec access)
111 {
112     struct hvm_vcpu_io *vio = &current->arch.hvm.hvm_io;
113 
114     vio->mmio_access = access.gla_valid &&
115                        access.kind == npfec_kind_with_gla
116                        ? access : (struct npfec){};
117     vio->mmio_gla = gla & PAGE_MASK;
118     vio->mmio_gpfn = gpfn;
119     return handle_mmio();
120 }
121 
handle_pio(uint16_t port,unsigned int size,int dir)122 bool handle_pio(uint16_t port, unsigned int size, int dir)
123 {
124     struct vcpu *curr = current;
125     struct hvm_vcpu_io *vio = &curr->arch.hvm.hvm_io;
126     unsigned int data;
127     int rc;
128 
129     ASSERT((size - 1) < 4 && size != 3);
130 
131     if ( dir == IOREQ_WRITE )
132         data = guest_cpu_user_regs()->eax;
133     else
134         data = ~0; /* Avoid any risk of stack rubble. */
135 
136     rc = hvmemul_do_pio_buffer(port, size, dir, &data);
137 
138     if ( hvm_ioreq_needs_completion(&vio->io_req) )
139         vio->io_completion = HVMIO_pio_completion;
140 
141     switch ( rc )
142     {
143     case X86EMUL_OKAY:
144         if ( dir == IOREQ_READ )
145         {
146             if ( size == 4 ) /* Needs zero extension. */
147                 guest_cpu_user_regs()->rax = data;
148             else
149                 memcpy(&guest_cpu_user_regs()->rax, &data, size);
150         }
151         break;
152 
153     case X86EMUL_RETRY:
154         /*
155          * We should not advance RIP/EIP if the domain is shutting down or
156          * if X86EMUL_RETRY has been returned by an internal handler.
157          */
158         if ( curr->domain->is_shutting_down || !hvm_io_pending(curr) )
159             return false;
160         break;
161 
162     default:
163         gprintk(XENLOG_ERR, "Unexpected PIO status %d, port %#x %s 0x%0*x\n",
164                 rc, port, dir == IOREQ_WRITE ? "write" : "read",
165                 size * 2, data & ((1u << (size * 8)) - 1));
166         domain_crash(curr->domain);
167         return false;
168     }
169 
170     return true;
171 }
172 
g2m_portio_accept(const struct hvm_io_handler * handler,const ioreq_t * p)173 static bool_t g2m_portio_accept(const struct hvm_io_handler *handler,
174                                 const ioreq_t *p)
175 {
176     struct vcpu *curr = current;
177     const struct hvm_domain *hvm = &curr->domain->arch.hvm;
178     struct hvm_vcpu_io *vio = &curr->arch.hvm.hvm_io;
179     struct g2m_ioport *g2m_ioport;
180     unsigned int start, end;
181 
182     list_for_each_entry( g2m_ioport, &hvm->g2m_ioport_list, list )
183     {
184         start = g2m_ioport->gport;
185         end = start + g2m_ioport->np;
186         if ( (p->addr >= start) && (p->addr + p->size <= end) )
187         {
188             vio->g2m_ioport = g2m_ioport;
189             return 1;
190         }
191     }
192 
193     return 0;
194 }
195 
g2m_portio_read(const struct hvm_io_handler * handler,uint64_t addr,uint32_t size,uint64_t * data)196 static int g2m_portio_read(const struct hvm_io_handler *handler,
197                            uint64_t addr, uint32_t size, uint64_t *data)
198 {
199     struct hvm_vcpu_io *vio = &current->arch.hvm.hvm_io;
200     const struct g2m_ioport *g2m_ioport = vio->g2m_ioport;
201     unsigned int mport = (addr - g2m_ioport->gport) + g2m_ioport->mport;
202 
203     switch ( size )
204     {
205     case 1:
206         *data = inb(mport);
207         break;
208     case 2:
209         *data = inw(mport);
210         break;
211     case 4:
212         *data = inl(mport);
213         break;
214     default:
215         BUG();
216     }
217 
218     return X86EMUL_OKAY;
219 }
220 
g2m_portio_write(const struct hvm_io_handler * handler,uint64_t addr,uint32_t size,uint64_t data)221 static int g2m_portio_write(const struct hvm_io_handler *handler,
222                             uint64_t addr, uint32_t size, uint64_t data)
223 {
224     struct hvm_vcpu_io *vio = &current->arch.hvm.hvm_io;
225     const struct g2m_ioport *g2m_ioport = vio->g2m_ioport;
226     unsigned int mport = (addr - g2m_ioport->gport) + g2m_ioport->mport;
227 
228     switch ( size )
229     {
230     case 1:
231         outb(data, mport);
232         break;
233     case 2:
234         outw(data, mport);
235         break;
236     case 4:
237         outl(data, mport);
238         break;
239     default:
240         BUG();
241     }
242 
243     return X86EMUL_OKAY;
244 }
245 
246 static const struct hvm_io_ops g2m_portio_ops = {
247     .accept = g2m_portio_accept,
248     .read = g2m_portio_read,
249     .write = g2m_portio_write
250 };
251 
register_g2m_portio_handler(struct domain * d)252 void register_g2m_portio_handler(struct domain *d)
253 {
254     struct hvm_io_handler *handler = hvm_next_io_handler(d);
255 
256     if ( handler == NULL )
257         return;
258 
259     handler->type = IOREQ_TYPE_PIO;
260     handler->ops = &g2m_portio_ops;
261 }
262 
hvm_pci_decode_addr(unsigned int cf8,unsigned int addr,pci_sbdf_t * sbdf)263 unsigned int hvm_pci_decode_addr(unsigned int cf8, unsigned int addr,
264                                  pci_sbdf_t *sbdf)
265 {
266     ASSERT(CF8_ENABLED(cf8));
267 
268     sbdf->bdf = CF8_BDF(cf8);
269     sbdf->seg = 0;
270     /*
271      * NB: the lower 2 bits of the register address are fetched from the
272      * offset into the 0xcfc register when reading/writing to it.
273      */
274     return CF8_ADDR_LO(cf8) | (addr & 3);
275 }
276 
277 /* Do some sanity checks. */
vpci_access_allowed(unsigned int reg,unsigned int len)278 static bool vpci_access_allowed(unsigned int reg, unsigned int len)
279 {
280     /* Check access size. */
281     if ( len != 1 && len != 2 && len != 4 && len != 8 )
282         return false;
283 
284     /* Check that access is size aligned. */
285     if ( (reg & (len - 1)) )
286         return false;
287 
288     return true;
289 }
290 
291 /* vPCI config space IO ports handlers (0xcf8/0xcfc). */
vpci_portio_accept(const struct hvm_io_handler * handler,const ioreq_t * p)292 static bool vpci_portio_accept(const struct hvm_io_handler *handler,
293                                const ioreq_t *p)
294 {
295     return (p->addr == 0xcf8 && p->size == 4) || (p->addr & ~3) == 0xcfc;
296 }
297 
vpci_portio_read(const struct hvm_io_handler * handler,uint64_t addr,uint32_t size,uint64_t * data)298 static int vpci_portio_read(const struct hvm_io_handler *handler,
299                             uint64_t addr, uint32_t size, uint64_t *data)
300 {
301     const struct domain *d = current->domain;
302     unsigned int reg;
303     pci_sbdf_t sbdf;
304     uint32_t cf8;
305 
306     *data = ~(uint64_t)0;
307 
308     if ( addr == 0xcf8 )
309     {
310         ASSERT(size == 4);
311         *data = d->arch.hvm.pci_cf8;
312         return X86EMUL_OKAY;
313     }
314 
315     ASSERT((addr & ~3) == 0xcfc);
316     cf8 = ACCESS_ONCE(d->arch.hvm.pci_cf8);
317     if ( !CF8_ENABLED(cf8) )
318         return X86EMUL_UNHANDLEABLE;
319 
320     reg = hvm_pci_decode_addr(cf8, addr, &sbdf);
321 
322     if ( !vpci_access_allowed(reg, size) )
323         return X86EMUL_OKAY;
324 
325     *data = vpci_read(sbdf, reg, size);
326 
327     return X86EMUL_OKAY;
328 }
329 
vpci_portio_write(const struct hvm_io_handler * handler,uint64_t addr,uint32_t size,uint64_t data)330 static int vpci_portio_write(const struct hvm_io_handler *handler,
331                              uint64_t addr, uint32_t size, uint64_t data)
332 {
333     struct domain *d = current->domain;
334     unsigned int reg;
335     pci_sbdf_t sbdf;
336     uint32_t cf8;
337 
338     if ( addr == 0xcf8 )
339     {
340         ASSERT(size == 4);
341         d->arch.hvm.pci_cf8 = data;
342         return X86EMUL_OKAY;
343     }
344 
345     ASSERT((addr & ~3) == 0xcfc);
346     cf8 = ACCESS_ONCE(d->arch.hvm.pci_cf8);
347     if ( !CF8_ENABLED(cf8) )
348         return X86EMUL_UNHANDLEABLE;
349 
350     reg = hvm_pci_decode_addr(cf8, addr, &sbdf);
351 
352     if ( !vpci_access_allowed(reg, size) )
353         return X86EMUL_OKAY;
354 
355     vpci_write(sbdf, reg, size, data);
356 
357     return X86EMUL_OKAY;
358 }
359 
360 static const struct hvm_io_ops vpci_portio_ops = {
361     .accept = vpci_portio_accept,
362     .read = vpci_portio_read,
363     .write = vpci_portio_write,
364 };
365 
register_vpci_portio_handler(struct domain * d)366 void register_vpci_portio_handler(struct domain *d)
367 {
368     struct hvm_io_handler *handler;
369 
370     if ( !has_vpci(d) )
371         return;
372 
373     handler = hvm_next_io_handler(d);
374     if ( !handler )
375         return;
376 
377     handler->type = IOREQ_TYPE_PIO;
378     handler->ops = &vpci_portio_ops;
379 }
380 
381 struct hvm_mmcfg {
382     struct list_head next;
383     paddr_t addr;
384     unsigned int size;
385     uint16_t segment;
386     uint8_t start_bus;
387 };
388 
389 /* Handlers to trap PCI MMCFG config accesses. */
vpci_mmcfg_find(const struct domain * d,paddr_t addr)390 static const struct hvm_mmcfg *vpci_mmcfg_find(const struct domain *d,
391                                                paddr_t addr)
392 {
393     const struct hvm_mmcfg *mmcfg;
394 
395     list_for_each_entry ( mmcfg, &d->arch.hvm.mmcfg_regions, next )
396         if ( addr >= mmcfg->addr && addr < mmcfg->addr + mmcfg->size )
397             return mmcfg;
398 
399     return NULL;
400 }
401 
vpci_is_mmcfg_address(const struct domain * d,paddr_t addr)402 bool vpci_is_mmcfg_address(const struct domain *d, paddr_t addr)
403 {
404     return vpci_mmcfg_find(d, addr);
405 }
406 
vpci_mmcfg_decode_addr(const struct hvm_mmcfg * mmcfg,paddr_t addr,pci_sbdf_t * sbdf)407 static unsigned int vpci_mmcfg_decode_addr(const struct hvm_mmcfg *mmcfg,
408                                            paddr_t addr, pci_sbdf_t *sbdf)
409 {
410     addr -= mmcfg->addr;
411     sbdf->bdf = MMCFG_BDF(addr);
412     sbdf->bus += mmcfg->start_bus;
413     sbdf->seg = mmcfg->segment;
414 
415     return addr & (PCI_CFG_SPACE_EXP_SIZE - 1);
416 }
417 
vpci_mmcfg_accept(struct vcpu * v,unsigned long addr)418 static int vpci_mmcfg_accept(struct vcpu *v, unsigned long addr)
419 {
420     struct domain *d = v->domain;
421     bool found;
422 
423     read_lock(&d->arch.hvm.mmcfg_lock);
424     found = vpci_mmcfg_find(d, addr);
425     read_unlock(&d->arch.hvm.mmcfg_lock);
426 
427     return found;
428 }
429 
vpci_mmcfg_read(struct vcpu * v,unsigned long addr,unsigned int len,unsigned long * data)430 static int vpci_mmcfg_read(struct vcpu *v, unsigned long addr,
431                            unsigned int len, unsigned long *data)
432 {
433     struct domain *d = v->domain;
434     const struct hvm_mmcfg *mmcfg;
435     unsigned int reg;
436     pci_sbdf_t sbdf;
437 
438     *data = ~0ul;
439 
440     read_lock(&d->arch.hvm.mmcfg_lock);
441     mmcfg = vpci_mmcfg_find(d, addr);
442     if ( !mmcfg )
443     {
444         read_unlock(&d->arch.hvm.mmcfg_lock);
445         return X86EMUL_RETRY;
446     }
447 
448     reg = vpci_mmcfg_decode_addr(mmcfg, addr, &sbdf);
449     read_unlock(&d->arch.hvm.mmcfg_lock);
450 
451     if ( !vpci_access_allowed(reg, len) ||
452          (reg + len) > PCI_CFG_SPACE_EXP_SIZE )
453         return X86EMUL_OKAY;
454 
455     /*
456      * According to the PCIe 3.1A specification:
457      *  - Configuration Reads and Writes must usually be DWORD or smaller
458      *    in size.
459      *  - Because Root Complex implementations are not required to support
460      *    accesses to a RCRB that cross DW boundaries [...] software
461      *    should take care not to cause the generation of such accesses
462      *    when accessing a RCRB unless the Root Complex will support the
463      *    access.
464      *  Xen however supports 8byte accesses by splitting them into two
465      *  4byte accesses.
466      */
467     *data = vpci_read(sbdf, reg, min(4u, len));
468     if ( len == 8 )
469         *data |= (uint64_t)vpci_read(sbdf, reg + 4, 4) << 32;
470 
471     return X86EMUL_OKAY;
472 }
473 
vpci_mmcfg_write(struct vcpu * v,unsigned long addr,unsigned int len,unsigned long data)474 static int vpci_mmcfg_write(struct vcpu *v, unsigned long addr,
475                             unsigned int len, unsigned long data)
476 {
477     struct domain *d = v->domain;
478     const struct hvm_mmcfg *mmcfg;
479     unsigned int reg;
480     pci_sbdf_t sbdf;
481 
482     read_lock(&d->arch.hvm.mmcfg_lock);
483     mmcfg = vpci_mmcfg_find(d, addr);
484     if ( !mmcfg )
485     {
486         read_unlock(&d->arch.hvm.mmcfg_lock);
487         return X86EMUL_RETRY;
488     }
489 
490     reg = vpci_mmcfg_decode_addr(mmcfg, addr, &sbdf);
491     read_unlock(&d->arch.hvm.mmcfg_lock);
492 
493     if ( !vpci_access_allowed(reg, len) ||
494          (reg + len) > PCI_CFG_SPACE_EXP_SIZE )
495         return X86EMUL_OKAY;
496 
497     vpci_write(sbdf, reg, min(4u, len), data);
498     if ( len == 8 )
499         vpci_write(sbdf, reg + 4, 4, data >> 32);
500 
501     return X86EMUL_OKAY;
502 }
503 
504 static const struct hvm_mmio_ops vpci_mmcfg_ops = {
505     .check = vpci_mmcfg_accept,
506     .read = vpci_mmcfg_read,
507     .write = vpci_mmcfg_write,
508 };
509 
register_vpci_mmcfg_handler(struct domain * d,paddr_t addr,unsigned int start_bus,unsigned int end_bus,unsigned int seg)510 int register_vpci_mmcfg_handler(struct domain *d, paddr_t addr,
511                                 unsigned int start_bus, unsigned int end_bus,
512                                 unsigned int seg)
513 {
514     struct hvm_mmcfg *mmcfg, *new;
515 
516     ASSERT(is_hardware_domain(d));
517 
518     if ( start_bus > end_bus )
519         return -EINVAL;
520 
521     new = xmalloc(struct hvm_mmcfg);
522     if ( !new )
523         return -ENOMEM;
524 
525     new->addr = addr + (start_bus << 20);
526     new->start_bus = start_bus;
527     new->segment = seg;
528     new->size = (end_bus - start_bus + 1) << 20;
529 
530     write_lock(&d->arch.hvm.mmcfg_lock);
531     list_for_each_entry ( mmcfg, &d->arch.hvm.mmcfg_regions, next )
532         if ( new->addr < mmcfg->addr + mmcfg->size &&
533              mmcfg->addr < new->addr + new->size )
534         {
535             int ret = -EEXIST;
536 
537             if ( new->addr == mmcfg->addr &&
538                  new->start_bus == mmcfg->start_bus &&
539                  new->segment == mmcfg->segment &&
540                  new->size == mmcfg->size )
541                 ret = 0;
542             write_unlock(&d->arch.hvm.mmcfg_lock);
543             xfree(new);
544             return ret;
545         }
546 
547     if ( list_empty(&d->arch.hvm.mmcfg_regions) )
548         register_mmio_handler(d, &vpci_mmcfg_ops);
549 
550     list_add(&new->next, &d->arch.hvm.mmcfg_regions);
551     write_unlock(&d->arch.hvm.mmcfg_lock);
552 
553     return 0;
554 }
555 
destroy_vpci_mmcfg(struct domain * d)556 void destroy_vpci_mmcfg(struct domain *d)
557 {
558     struct list_head *mmcfg_regions = &d->arch.hvm.mmcfg_regions;
559 
560     write_lock(&d->arch.hvm.mmcfg_lock);
561     while ( !list_empty(mmcfg_regions) )
562     {
563         struct hvm_mmcfg *mmcfg = list_first_entry(mmcfg_regions,
564                                                    struct hvm_mmcfg, next);
565 
566         list_del(&mmcfg->next);
567         xfree(mmcfg);
568     }
569     write_unlock(&d->arch.hvm.mmcfg_lock);
570 }
571 
572 /*
573  * Local variables:
574  * mode: C
575  * c-file-style: "BSD"
576  * c-basic-offset: 4
577  * tab-width: 4
578  * indent-tabs-mode: nil
579  * End:
580  */
581