1 /*
2 * io.c: Handling I/O and interrupts.
3 *
4 * Copyright (c) 2004, Intel Corporation.
5 * Copyright (c) 2005, International Business Machines Corporation.
6 * Copyright (c) 2008, Citrix Systems, Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <xen/init.h>
22 #include <xen/mm.h>
23 #include <xen/lib.h>
24 #include <xen/errno.h>
25 #include <xen/trace.h>
26 #include <xen/event.h>
27 #include <xen/hypercall.h>
28 #include <xen/vpci.h>
29 #include <asm/current.h>
30 #include <asm/cpufeature.h>
31 #include <asm/processor.h>
32 #include <asm/msr.h>
33 #include <asm/apic.h>
34 #include <asm/paging.h>
35 #include <asm/shadow.h>
36 #include <asm/p2m.h>
37 #include <asm/hvm/hvm.h>
38 #include <asm/hvm/ioreq.h>
39 #include <asm/hvm/support.h>
40 #include <asm/hvm/vpt.h>
41 #include <asm/hvm/vpic.h>
42 #include <asm/hvm/vlapic.h>
43 #include <asm/hvm/trace.h>
44 #include <asm/hvm/emulate.h>
45 #include <public/sched.h>
46 #include <xen/iocap.h>
47 #include <public/hvm/ioreq.h>
48
send_timeoffset_req(unsigned long timeoff)49 void send_timeoffset_req(unsigned long timeoff)
50 {
51 ioreq_t p = {
52 .type = IOREQ_TYPE_TIMEOFFSET,
53 .size = 8,
54 .count = 1,
55 .dir = IOREQ_WRITE,
56 .data = timeoff,
57 .state = STATE_IOREQ_READY,
58 };
59
60 if ( timeoff == 0 )
61 return;
62
63 if ( hvm_broadcast_ioreq(&p, true) != 0 )
64 gprintk(XENLOG_ERR, "Unsuccessful timeoffset update\n");
65 }
66
67 /* Ask ioemu mapcache to invalidate mappings. */
send_invalidate_req(void)68 void send_invalidate_req(void)
69 {
70 ioreq_t p = {
71 .type = IOREQ_TYPE_INVALIDATE,
72 .size = 4,
73 .dir = IOREQ_WRITE,
74 .data = ~0UL, /* flush all */
75 };
76
77 if ( hvm_broadcast_ioreq(&p, false) != 0 )
78 gprintk(XENLOG_ERR, "Unsuccessful map-cache invalidate\n");
79 }
80
hvm_emulate_one_insn(hvm_emulate_validate_t * validate,const char * descr)81 bool hvm_emulate_one_insn(hvm_emulate_validate_t *validate, const char *descr)
82 {
83 struct hvm_emulate_ctxt ctxt;
84 int rc;
85
86 hvm_emulate_init_once(&ctxt, validate, guest_cpu_user_regs());
87
88 switch ( rc = hvm_emulate_one(&ctxt, HVMIO_no_completion) )
89 {
90 case X86EMUL_UNHANDLEABLE:
91 hvm_dump_emulation_state(XENLOG_G_WARNING, descr, &ctxt, rc);
92 return false;
93
94 case X86EMUL_UNRECOGNIZED:
95 hvm_dump_emulation_state(XENLOG_G_WARNING, descr, &ctxt, rc);
96 hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
97 break;
98
99 case X86EMUL_EXCEPTION:
100 hvm_inject_event(&ctxt.ctxt.event);
101 break;
102 }
103
104 hvm_emulate_writeback(&ctxt);
105
106 return true;
107 }
108
handle_mmio_with_translation(unsigned long gla,unsigned long gpfn,struct npfec access)109 bool handle_mmio_with_translation(unsigned long gla, unsigned long gpfn,
110 struct npfec access)
111 {
112 struct hvm_vcpu_io *vio = ¤t->arch.hvm.hvm_io;
113
114 vio->mmio_access = access.gla_valid &&
115 access.kind == npfec_kind_with_gla
116 ? access : (struct npfec){};
117 vio->mmio_gla = gla & PAGE_MASK;
118 vio->mmio_gpfn = gpfn;
119 return handle_mmio();
120 }
121
handle_pio(uint16_t port,unsigned int size,int dir)122 bool handle_pio(uint16_t port, unsigned int size, int dir)
123 {
124 struct vcpu *curr = current;
125 struct hvm_vcpu_io *vio = &curr->arch.hvm.hvm_io;
126 unsigned int data;
127 int rc;
128
129 ASSERT((size - 1) < 4 && size != 3);
130
131 if ( dir == IOREQ_WRITE )
132 data = guest_cpu_user_regs()->eax;
133 else
134 data = ~0; /* Avoid any risk of stack rubble. */
135
136 rc = hvmemul_do_pio_buffer(port, size, dir, &data);
137
138 if ( hvm_ioreq_needs_completion(&vio->io_req) )
139 vio->io_completion = HVMIO_pio_completion;
140
141 switch ( rc )
142 {
143 case X86EMUL_OKAY:
144 if ( dir == IOREQ_READ )
145 {
146 if ( size == 4 ) /* Needs zero extension. */
147 guest_cpu_user_regs()->rax = data;
148 else
149 memcpy(&guest_cpu_user_regs()->rax, &data, size);
150 }
151 break;
152
153 case X86EMUL_RETRY:
154 /*
155 * We should not advance RIP/EIP if the domain is shutting down or
156 * if X86EMUL_RETRY has been returned by an internal handler.
157 */
158 if ( curr->domain->is_shutting_down || !hvm_io_pending(curr) )
159 return false;
160 break;
161
162 default:
163 gprintk(XENLOG_ERR, "Unexpected PIO status %d, port %#x %s 0x%0*x\n",
164 rc, port, dir == IOREQ_WRITE ? "write" : "read",
165 size * 2, data & ((1u << (size * 8)) - 1));
166 domain_crash(curr->domain);
167 return false;
168 }
169
170 return true;
171 }
172
g2m_portio_accept(const struct hvm_io_handler * handler,const ioreq_t * p)173 static bool_t g2m_portio_accept(const struct hvm_io_handler *handler,
174 const ioreq_t *p)
175 {
176 struct vcpu *curr = current;
177 const struct hvm_domain *hvm = &curr->domain->arch.hvm;
178 struct hvm_vcpu_io *vio = &curr->arch.hvm.hvm_io;
179 struct g2m_ioport *g2m_ioport;
180 unsigned int start, end;
181
182 list_for_each_entry( g2m_ioport, &hvm->g2m_ioport_list, list )
183 {
184 start = g2m_ioport->gport;
185 end = start + g2m_ioport->np;
186 if ( (p->addr >= start) && (p->addr + p->size <= end) )
187 {
188 vio->g2m_ioport = g2m_ioport;
189 return 1;
190 }
191 }
192
193 return 0;
194 }
195
g2m_portio_read(const struct hvm_io_handler * handler,uint64_t addr,uint32_t size,uint64_t * data)196 static int g2m_portio_read(const struct hvm_io_handler *handler,
197 uint64_t addr, uint32_t size, uint64_t *data)
198 {
199 struct hvm_vcpu_io *vio = ¤t->arch.hvm.hvm_io;
200 const struct g2m_ioport *g2m_ioport = vio->g2m_ioport;
201 unsigned int mport = (addr - g2m_ioport->gport) + g2m_ioport->mport;
202
203 switch ( size )
204 {
205 case 1:
206 *data = inb(mport);
207 break;
208 case 2:
209 *data = inw(mport);
210 break;
211 case 4:
212 *data = inl(mport);
213 break;
214 default:
215 BUG();
216 }
217
218 return X86EMUL_OKAY;
219 }
220
g2m_portio_write(const struct hvm_io_handler * handler,uint64_t addr,uint32_t size,uint64_t data)221 static int g2m_portio_write(const struct hvm_io_handler *handler,
222 uint64_t addr, uint32_t size, uint64_t data)
223 {
224 struct hvm_vcpu_io *vio = ¤t->arch.hvm.hvm_io;
225 const struct g2m_ioport *g2m_ioport = vio->g2m_ioport;
226 unsigned int mport = (addr - g2m_ioport->gport) + g2m_ioport->mport;
227
228 switch ( size )
229 {
230 case 1:
231 outb(data, mport);
232 break;
233 case 2:
234 outw(data, mport);
235 break;
236 case 4:
237 outl(data, mport);
238 break;
239 default:
240 BUG();
241 }
242
243 return X86EMUL_OKAY;
244 }
245
246 static const struct hvm_io_ops g2m_portio_ops = {
247 .accept = g2m_portio_accept,
248 .read = g2m_portio_read,
249 .write = g2m_portio_write
250 };
251
register_g2m_portio_handler(struct domain * d)252 void register_g2m_portio_handler(struct domain *d)
253 {
254 struct hvm_io_handler *handler = hvm_next_io_handler(d);
255
256 if ( handler == NULL )
257 return;
258
259 handler->type = IOREQ_TYPE_PIO;
260 handler->ops = &g2m_portio_ops;
261 }
262
hvm_pci_decode_addr(unsigned int cf8,unsigned int addr,pci_sbdf_t * sbdf)263 unsigned int hvm_pci_decode_addr(unsigned int cf8, unsigned int addr,
264 pci_sbdf_t *sbdf)
265 {
266 ASSERT(CF8_ENABLED(cf8));
267
268 sbdf->bdf = CF8_BDF(cf8);
269 sbdf->seg = 0;
270 /*
271 * NB: the lower 2 bits of the register address are fetched from the
272 * offset into the 0xcfc register when reading/writing to it.
273 */
274 return CF8_ADDR_LO(cf8) | (addr & 3);
275 }
276
277 /* Do some sanity checks. */
vpci_access_allowed(unsigned int reg,unsigned int len)278 static bool vpci_access_allowed(unsigned int reg, unsigned int len)
279 {
280 /* Check access size. */
281 if ( len != 1 && len != 2 && len != 4 && len != 8 )
282 return false;
283
284 /* Check that access is size aligned. */
285 if ( (reg & (len - 1)) )
286 return false;
287
288 return true;
289 }
290
291 /* vPCI config space IO ports handlers (0xcf8/0xcfc). */
vpci_portio_accept(const struct hvm_io_handler * handler,const ioreq_t * p)292 static bool vpci_portio_accept(const struct hvm_io_handler *handler,
293 const ioreq_t *p)
294 {
295 return (p->addr == 0xcf8 && p->size == 4) || (p->addr & ~3) == 0xcfc;
296 }
297
vpci_portio_read(const struct hvm_io_handler * handler,uint64_t addr,uint32_t size,uint64_t * data)298 static int vpci_portio_read(const struct hvm_io_handler *handler,
299 uint64_t addr, uint32_t size, uint64_t *data)
300 {
301 const struct domain *d = current->domain;
302 unsigned int reg;
303 pci_sbdf_t sbdf;
304 uint32_t cf8;
305
306 *data = ~(uint64_t)0;
307
308 if ( addr == 0xcf8 )
309 {
310 ASSERT(size == 4);
311 *data = d->arch.hvm.pci_cf8;
312 return X86EMUL_OKAY;
313 }
314
315 ASSERT((addr & ~3) == 0xcfc);
316 cf8 = ACCESS_ONCE(d->arch.hvm.pci_cf8);
317 if ( !CF8_ENABLED(cf8) )
318 return X86EMUL_UNHANDLEABLE;
319
320 reg = hvm_pci_decode_addr(cf8, addr, &sbdf);
321
322 if ( !vpci_access_allowed(reg, size) )
323 return X86EMUL_OKAY;
324
325 *data = vpci_read(sbdf, reg, size);
326
327 return X86EMUL_OKAY;
328 }
329
vpci_portio_write(const struct hvm_io_handler * handler,uint64_t addr,uint32_t size,uint64_t data)330 static int vpci_portio_write(const struct hvm_io_handler *handler,
331 uint64_t addr, uint32_t size, uint64_t data)
332 {
333 struct domain *d = current->domain;
334 unsigned int reg;
335 pci_sbdf_t sbdf;
336 uint32_t cf8;
337
338 if ( addr == 0xcf8 )
339 {
340 ASSERT(size == 4);
341 d->arch.hvm.pci_cf8 = data;
342 return X86EMUL_OKAY;
343 }
344
345 ASSERT((addr & ~3) == 0xcfc);
346 cf8 = ACCESS_ONCE(d->arch.hvm.pci_cf8);
347 if ( !CF8_ENABLED(cf8) )
348 return X86EMUL_UNHANDLEABLE;
349
350 reg = hvm_pci_decode_addr(cf8, addr, &sbdf);
351
352 if ( !vpci_access_allowed(reg, size) )
353 return X86EMUL_OKAY;
354
355 vpci_write(sbdf, reg, size, data);
356
357 return X86EMUL_OKAY;
358 }
359
360 static const struct hvm_io_ops vpci_portio_ops = {
361 .accept = vpci_portio_accept,
362 .read = vpci_portio_read,
363 .write = vpci_portio_write,
364 };
365
register_vpci_portio_handler(struct domain * d)366 void register_vpci_portio_handler(struct domain *d)
367 {
368 struct hvm_io_handler *handler;
369
370 if ( !has_vpci(d) )
371 return;
372
373 handler = hvm_next_io_handler(d);
374 if ( !handler )
375 return;
376
377 handler->type = IOREQ_TYPE_PIO;
378 handler->ops = &vpci_portio_ops;
379 }
380
381 struct hvm_mmcfg {
382 struct list_head next;
383 paddr_t addr;
384 unsigned int size;
385 uint16_t segment;
386 uint8_t start_bus;
387 };
388
389 /* Handlers to trap PCI MMCFG config accesses. */
vpci_mmcfg_find(const struct domain * d,paddr_t addr)390 static const struct hvm_mmcfg *vpci_mmcfg_find(const struct domain *d,
391 paddr_t addr)
392 {
393 const struct hvm_mmcfg *mmcfg;
394
395 list_for_each_entry ( mmcfg, &d->arch.hvm.mmcfg_regions, next )
396 if ( addr >= mmcfg->addr && addr < mmcfg->addr + mmcfg->size )
397 return mmcfg;
398
399 return NULL;
400 }
401
vpci_is_mmcfg_address(const struct domain * d,paddr_t addr)402 bool vpci_is_mmcfg_address(const struct domain *d, paddr_t addr)
403 {
404 return vpci_mmcfg_find(d, addr);
405 }
406
vpci_mmcfg_decode_addr(const struct hvm_mmcfg * mmcfg,paddr_t addr,pci_sbdf_t * sbdf)407 static unsigned int vpci_mmcfg_decode_addr(const struct hvm_mmcfg *mmcfg,
408 paddr_t addr, pci_sbdf_t *sbdf)
409 {
410 addr -= mmcfg->addr;
411 sbdf->bdf = MMCFG_BDF(addr);
412 sbdf->bus += mmcfg->start_bus;
413 sbdf->seg = mmcfg->segment;
414
415 return addr & (PCI_CFG_SPACE_EXP_SIZE - 1);
416 }
417
vpci_mmcfg_accept(struct vcpu * v,unsigned long addr)418 static int vpci_mmcfg_accept(struct vcpu *v, unsigned long addr)
419 {
420 struct domain *d = v->domain;
421 bool found;
422
423 read_lock(&d->arch.hvm.mmcfg_lock);
424 found = vpci_mmcfg_find(d, addr);
425 read_unlock(&d->arch.hvm.mmcfg_lock);
426
427 return found;
428 }
429
vpci_mmcfg_read(struct vcpu * v,unsigned long addr,unsigned int len,unsigned long * data)430 static int vpci_mmcfg_read(struct vcpu *v, unsigned long addr,
431 unsigned int len, unsigned long *data)
432 {
433 struct domain *d = v->domain;
434 const struct hvm_mmcfg *mmcfg;
435 unsigned int reg;
436 pci_sbdf_t sbdf;
437
438 *data = ~0ul;
439
440 read_lock(&d->arch.hvm.mmcfg_lock);
441 mmcfg = vpci_mmcfg_find(d, addr);
442 if ( !mmcfg )
443 {
444 read_unlock(&d->arch.hvm.mmcfg_lock);
445 return X86EMUL_RETRY;
446 }
447
448 reg = vpci_mmcfg_decode_addr(mmcfg, addr, &sbdf);
449 read_unlock(&d->arch.hvm.mmcfg_lock);
450
451 if ( !vpci_access_allowed(reg, len) ||
452 (reg + len) > PCI_CFG_SPACE_EXP_SIZE )
453 return X86EMUL_OKAY;
454
455 /*
456 * According to the PCIe 3.1A specification:
457 * - Configuration Reads and Writes must usually be DWORD or smaller
458 * in size.
459 * - Because Root Complex implementations are not required to support
460 * accesses to a RCRB that cross DW boundaries [...] software
461 * should take care not to cause the generation of such accesses
462 * when accessing a RCRB unless the Root Complex will support the
463 * access.
464 * Xen however supports 8byte accesses by splitting them into two
465 * 4byte accesses.
466 */
467 *data = vpci_read(sbdf, reg, min(4u, len));
468 if ( len == 8 )
469 *data |= (uint64_t)vpci_read(sbdf, reg + 4, 4) << 32;
470
471 return X86EMUL_OKAY;
472 }
473
vpci_mmcfg_write(struct vcpu * v,unsigned long addr,unsigned int len,unsigned long data)474 static int vpci_mmcfg_write(struct vcpu *v, unsigned long addr,
475 unsigned int len, unsigned long data)
476 {
477 struct domain *d = v->domain;
478 const struct hvm_mmcfg *mmcfg;
479 unsigned int reg;
480 pci_sbdf_t sbdf;
481
482 read_lock(&d->arch.hvm.mmcfg_lock);
483 mmcfg = vpci_mmcfg_find(d, addr);
484 if ( !mmcfg )
485 {
486 read_unlock(&d->arch.hvm.mmcfg_lock);
487 return X86EMUL_RETRY;
488 }
489
490 reg = vpci_mmcfg_decode_addr(mmcfg, addr, &sbdf);
491 read_unlock(&d->arch.hvm.mmcfg_lock);
492
493 if ( !vpci_access_allowed(reg, len) ||
494 (reg + len) > PCI_CFG_SPACE_EXP_SIZE )
495 return X86EMUL_OKAY;
496
497 vpci_write(sbdf, reg, min(4u, len), data);
498 if ( len == 8 )
499 vpci_write(sbdf, reg + 4, 4, data >> 32);
500
501 return X86EMUL_OKAY;
502 }
503
504 static const struct hvm_mmio_ops vpci_mmcfg_ops = {
505 .check = vpci_mmcfg_accept,
506 .read = vpci_mmcfg_read,
507 .write = vpci_mmcfg_write,
508 };
509
register_vpci_mmcfg_handler(struct domain * d,paddr_t addr,unsigned int start_bus,unsigned int end_bus,unsigned int seg)510 int register_vpci_mmcfg_handler(struct domain *d, paddr_t addr,
511 unsigned int start_bus, unsigned int end_bus,
512 unsigned int seg)
513 {
514 struct hvm_mmcfg *mmcfg, *new;
515
516 ASSERT(is_hardware_domain(d));
517
518 if ( start_bus > end_bus )
519 return -EINVAL;
520
521 new = xmalloc(struct hvm_mmcfg);
522 if ( !new )
523 return -ENOMEM;
524
525 new->addr = addr + (start_bus << 20);
526 new->start_bus = start_bus;
527 new->segment = seg;
528 new->size = (end_bus - start_bus + 1) << 20;
529
530 write_lock(&d->arch.hvm.mmcfg_lock);
531 list_for_each_entry ( mmcfg, &d->arch.hvm.mmcfg_regions, next )
532 if ( new->addr < mmcfg->addr + mmcfg->size &&
533 mmcfg->addr < new->addr + new->size )
534 {
535 int ret = -EEXIST;
536
537 if ( new->addr == mmcfg->addr &&
538 new->start_bus == mmcfg->start_bus &&
539 new->segment == mmcfg->segment &&
540 new->size == mmcfg->size )
541 ret = 0;
542 write_unlock(&d->arch.hvm.mmcfg_lock);
543 xfree(new);
544 return ret;
545 }
546
547 if ( list_empty(&d->arch.hvm.mmcfg_regions) )
548 register_mmio_handler(d, &vpci_mmcfg_ops);
549
550 list_add(&new->next, &d->arch.hvm.mmcfg_regions);
551 write_unlock(&d->arch.hvm.mmcfg_lock);
552
553 return 0;
554 }
555
destroy_vpci_mmcfg(struct domain * d)556 void destroy_vpci_mmcfg(struct domain *d)
557 {
558 struct list_head *mmcfg_regions = &d->arch.hvm.mmcfg_regions;
559
560 write_lock(&d->arch.hvm.mmcfg_lock);
561 while ( !list_empty(mmcfg_regions) )
562 {
563 struct hvm_mmcfg *mmcfg = list_first_entry(mmcfg_regions,
564 struct hvm_mmcfg, next);
565
566 list_del(&mmcfg->next);
567 xfree(mmcfg);
568 }
569 write_unlock(&d->arch.hvm.mmcfg_lock);
570 }
571
572 /*
573 * Local variables:
574 * mode: C
575 * c-file-style: "BSD"
576 * c-basic-offset: 4
577 * tab-width: 4
578 * indent-tabs-mode: nil
579 * End:
580 */
581