1 /******************************************************************************
2 * arch/x86/pv/emul-priv-op.c
3 *
4 * Emulate privileged instructions for PV guests
5 *
6 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <xen/domain_page.h>
23 #include <xen/event.h>
24 #include <xen/guest_access.h>
25 #include <xen/iocap.h>
26
27 #include <asm/amd.h>
28 #include <asm/debugreg.h>
29 #include <asm/hpet.h>
30 #include <asm/hypercall.h>
31 #include <asm/mc146818rtc.h>
32 #include <asm/pv/domain.h>
33 #include <asm/shared.h>
34
35 #include <xsm/xsm.h>
36
37 #include "../x86_64/mmconfig.h"
38 #include "emulate.h"
39 #include "mm.h"
40
41 struct priv_op_ctxt {
42 struct x86_emulate_ctxt ctxt;
43 struct {
44 unsigned long base, limit;
45 } cs;
46 char *io_emul_stub;
47 unsigned int bpmatch;
48 };
49
50 /* I/O emulation helpers. Use non-standard calling conventions. */
51 void nocall load_guest_gprs(struct cpu_user_regs *);
52 void nocall save_guest_gprs(void);
53
54 typedef void io_emul_stub_t(struct cpu_user_regs *);
55
io_emul_stub_setup(struct priv_op_ctxt * ctxt,u8 opcode,unsigned int port,unsigned int bytes)56 static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode,
57 unsigned int port, unsigned int bytes)
58 {
59 /*
60 * Construct a stub for IN/OUT emulation.
61 *
62 * Some platform drivers communicate with the SMM handler using GPRs as a
63 * mailbox. Therefore, we must perform the emulation with the hardware
64 * domain's registers in view.
65 *
66 * We write a stub of the following form, using the guest load/save
67 * helpers (non-standard ABI), and one of several possible stubs
68 * performing the real I/O.
69 */
70 static const char prologue[] = {
71 0x53, /* push %rbx */
72 0x55, /* push %rbp */
73 0x41, 0x54, /* push %r12 */
74 0x41, 0x55, /* push %r13 */
75 0x41, 0x56, /* push %r14 */
76 0x41, 0x57, /* push %r15 */
77 0x57, /* push %rdi (param for save_guest_gprs) */
78 }; /* call load_guest_gprs */
79 /* <I/O stub> */
80 /* call save_guest_gprs */
81 static const char epilogue[] = {
82 0x5f, /* pop %rdi */
83 0x41, 0x5f, /* pop %r15 */
84 0x41, 0x5e, /* pop %r14 */
85 0x41, 0x5d, /* pop %r13 */
86 0x41, 0x5c, /* pop %r12 */
87 0x5d, /* pop %rbp */
88 0x5b, /* pop %rbx */
89 0xc3, /* ret */
90 };
91
92 struct stubs *this_stubs = &this_cpu(stubs);
93 unsigned long stub_va = this_stubs->addr + STUB_BUF_SIZE / 2;
94 unsigned int quirk_bytes = 0;
95 char *p;
96
97 /* Helpers - Read outer scope but only modify p. */
98 #define APPEND_BUFF(b) ({ memcpy(p, b, sizeof(b)); p += sizeof(b); })
99 #define APPEND_CALL(f) \
100 ({ \
101 long disp = (long)(f) - (stub_va + p - ctxt->io_emul_stub + 5); \
102 BUG_ON((int32_t)disp != disp); \
103 *p++ = 0xe8; \
104 *(int32_t *)p = disp; p += 4; \
105 })
106
107 if ( !ctxt->io_emul_stub )
108 ctxt->io_emul_stub =
109 map_domain_page(_mfn(this_stubs->mfn)) + (stub_va & ~PAGE_MASK);
110
111 p = ctxt->io_emul_stub;
112
113 APPEND_BUFF(prologue);
114 APPEND_CALL(load_guest_gprs);
115
116 /* Some platforms might need to quirk the stub for specific inputs. */
117 if ( unlikely(ioemul_handle_quirk) )
118 {
119 quirk_bytes = ioemul_handle_quirk(opcode, p, ctxt->ctxt.regs);
120 p += quirk_bytes;
121 }
122
123 /* Default I/O stub. */
124 if ( likely(!quirk_bytes) )
125 {
126 *p++ = (bytes != 2) ? 0x90 : 0x66; /* data16 or nop */
127 *p++ = opcode; /* <opcode> */
128 *p++ = !(opcode & 8) ? port : 0x90; /* imm8 or nop */
129 }
130
131 APPEND_CALL(save_guest_gprs);
132 APPEND_BUFF(epilogue);
133
134 /* Build-time best effort attempt to catch problems. */
135 BUILD_BUG_ON(STUB_BUF_SIZE / 2 <
136 (sizeof(prologue) + sizeof(epilogue) + 10 /* 2x call */ +
137 MAX(3 /* default stub */, IOEMUL_QUIRK_STUB_BYTES)));
138 /* Runtime confirmation that we haven't clobbered an adjacent stub. */
139 BUG_ON(STUB_BUF_SIZE / 2 < (p - ctxt->io_emul_stub));
140
141 /* Handy function-typed pointer to the stub. */
142 return (void *)stub_va;
143
144 #undef APPEND_CALL
145 #undef APPEND_BUFF
146 }
147
148
149 /* Perform IOPL check between the vcpu's shadowed IOPL, and the assumed cpl. */
iopl_ok(const struct vcpu * v,const struct cpu_user_regs * regs)150 static bool iopl_ok(const struct vcpu *v, const struct cpu_user_regs *regs)
151 {
152 unsigned int cpl = guest_kernel_mode(v, regs) ?
153 (VM_ASSIST(v->domain, architectural_iopl) ? 0 : 1) : 3;
154
155 ASSERT((v->arch.pv.iopl & ~X86_EFLAGS_IOPL) == 0);
156
157 return IOPL(cpl) <= v->arch.pv.iopl;
158 }
159
160 /* Has the guest requested sufficient permission for this I/O access? */
guest_io_okay(unsigned int port,unsigned int bytes,struct vcpu * v,struct cpu_user_regs * regs)161 static bool guest_io_okay(unsigned int port, unsigned int bytes,
162 struct vcpu *v, struct cpu_user_regs *regs)
163 {
164 /* If in user mode, switch to kernel mode just to read I/O bitmap. */
165 const bool user_mode = !(v->arch.flags & TF_kernel_mode);
166
167 if ( iopl_ok(v, regs) )
168 return true;
169
170 if ( (port + bytes) <= v->arch.pv.iobmp_limit )
171 {
172 union { uint8_t bytes[2]; uint16_t mask; } x;
173
174 /*
175 * Grab permission bytes from guest space. Inaccessible bytes are
176 * read as 0xff (no access allowed).
177 */
178 if ( user_mode )
179 toggle_guest_pt(v);
180
181 switch ( __copy_from_guest_offset(x.bytes, v->arch.pv.iobmp,
182 port>>3, 2) )
183 {
184 default: x.bytes[0] = ~0;
185 /* fallthrough */
186 case 1: x.bytes[1] = ~0;
187 /* fallthrough */
188 case 0: break;
189 }
190
191 if ( user_mode )
192 toggle_guest_pt(v);
193
194 if ( (x.mask & (((1 << bytes) - 1) << (port & 7))) == 0 )
195 return true;
196 }
197
198 return false;
199 }
200
201 /* Has the administrator granted sufficient permission for this I/O access? */
admin_io_okay(unsigned int port,unsigned int bytes,const struct domain * d)202 static bool admin_io_okay(unsigned int port, unsigned int bytes,
203 const struct domain *d)
204 {
205 /*
206 * Port 0xcf8 (CONFIG_ADDRESS) is only visible for DWORD accesses.
207 * We never permit direct access to that register.
208 */
209 if ( (port == 0xcf8) && (bytes == 4) )
210 return false;
211
212 /* We also never permit direct access to the RTC/CMOS registers. */
213 if ( ((port & ~1) == RTC_PORT(0)) )
214 return false;
215
216 return ioports_access_permitted(d, port, port + bytes - 1);
217 }
218
pci_cfg_ok(struct domain * currd,unsigned int start,unsigned int size,uint32_t * write)219 static bool pci_cfg_ok(struct domain *currd, unsigned int start,
220 unsigned int size, uint32_t *write)
221 {
222 uint32_t machine_bdf;
223
224 if ( !is_hardware_domain(currd) )
225 return false;
226
227 if ( !CF8_ENABLED(currd->arch.pci_cf8) )
228 return true;
229
230 machine_bdf = CF8_BDF(currd->arch.pci_cf8);
231 if ( write )
232 {
233 const unsigned long *ro_map = pci_get_ro_map(0);
234
235 if ( ro_map && test_bit(machine_bdf, ro_map) )
236 return false;
237 }
238 start |= CF8_ADDR_LO(currd->arch.pci_cf8);
239 /* AMD extended configuration space access? */
240 if ( CF8_ADDR_HI(currd->arch.pci_cf8) &&
241 boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
242 boot_cpu_data.x86 >= 0x10 && boot_cpu_data.x86 < 0x17 )
243 {
244 uint64_t msr_val;
245
246 if ( rdmsr_safe(MSR_AMD64_NB_CFG, msr_val) )
247 return false;
248 if ( msr_val & (1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT) )
249 start |= CF8_ADDR_HI(currd->arch.pci_cf8);
250 }
251
252 return !write ?
253 xsm_pci_config_permission(XSM_HOOK, currd, machine_bdf,
254 start, start + size - 1, 0) == 0 :
255 pci_conf_write_intercept(0, machine_bdf, start, size, write) >= 0;
256 }
257
guest_io_read(unsigned int port,unsigned int bytes,struct domain * currd)258 static uint32_t guest_io_read(unsigned int port, unsigned int bytes,
259 struct domain *currd)
260 {
261 uint32_t data = 0;
262 unsigned int shift = 0;
263
264 if ( admin_io_okay(port, bytes, currd) )
265 {
266 switch ( bytes )
267 {
268 case 1: return inb(port);
269 case 2: return inw(port);
270 case 4: return inl(port);
271 }
272 }
273
274 while ( bytes != 0 )
275 {
276 unsigned int size = 1;
277 uint32_t sub_data = ~0;
278
279 if ( (port == 0x42) || (port == 0x43) || (port == 0x61) )
280 {
281 sub_data = pv_pit_handler(port, 0, 0);
282 }
283 else if ( port == RTC_PORT(0) || port == RTC_PORT(1) )
284 {
285 sub_data = rtc_guest_read(port);
286 }
287 else if ( (port == 0xcf8) && (bytes == 4) )
288 {
289 size = 4;
290 sub_data = currd->arch.pci_cf8;
291 }
292 else if ( (port & 0xfffc) == 0xcfc )
293 {
294 size = min(bytes, 4 - (port & 3));
295 if ( size == 3 )
296 size = 2;
297 if ( pci_cfg_ok(currd, port & 3, size, NULL) )
298 sub_data = pci_conf_read(currd->arch.pci_cf8, port & 3, size);
299 }
300
301 if ( size == 4 )
302 return sub_data;
303
304 data |= (sub_data & ((1u << (size * 8)) - 1)) << shift;
305 shift += size * 8;
306 port += size;
307 bytes -= size;
308 }
309
310 return data;
311 }
312
check_guest_io_breakpoint(struct vcpu * v,unsigned int port,unsigned int len)313 static unsigned int check_guest_io_breakpoint(struct vcpu *v,
314 unsigned int port,
315 unsigned int len)
316 {
317 unsigned int width, i, match = 0;
318 unsigned long start;
319
320 if ( !v->arch.pv.dr7_emul || !(v->arch.pv.ctrlreg[4] & X86_CR4_DE) )
321 return 0;
322
323 for ( i = 0; i < 4; i++ )
324 {
325 if ( !(v->arch.pv.dr7_emul & (3 << (i * DR_ENABLE_SIZE))) )
326 continue;
327
328 start = v->arch.dr[i];
329 width = 0;
330
331 switch ( (v->arch.dr7 >>
332 (DR_CONTROL_SHIFT + i * DR_CONTROL_SIZE)) & 0xc )
333 {
334 case DR_LEN_1: width = 1; break;
335 case DR_LEN_2: width = 2; break;
336 case DR_LEN_4: width = 4; break;
337 case DR_LEN_8: width = 8; break;
338 }
339
340 if ( (start < (port + len)) && ((start + width) > port) )
341 match |= 1u << i;
342 }
343
344 return match;
345 }
346
read_io(unsigned int port,unsigned int bytes,unsigned long * val,struct x86_emulate_ctxt * ctxt)347 static int read_io(unsigned int port, unsigned int bytes,
348 unsigned long *val, struct x86_emulate_ctxt *ctxt)
349 {
350 struct priv_op_ctxt *poc = container_of(ctxt, struct priv_op_ctxt, ctxt);
351 struct vcpu *curr = current;
352 struct domain *currd = current->domain;
353
354 /* INS must not come here. */
355 ASSERT((ctxt->opcode & ~9) == 0xe4);
356
357 if ( !guest_io_okay(port, bytes, curr, ctxt->regs) )
358 return X86EMUL_UNHANDLEABLE;
359
360 poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes);
361
362 if ( admin_io_okay(port, bytes, currd) )
363 {
364 io_emul_stub_t *io_emul =
365 io_emul_stub_setup(poc, ctxt->opcode, port, bytes);
366
367 io_emul(ctxt->regs);
368 return X86EMUL_DONE;
369 }
370
371 *val = guest_io_read(port, bytes, currd);
372
373 return X86EMUL_OKAY;
374 }
375
guest_io_write(unsigned int port,unsigned int bytes,uint32_t data,struct domain * currd)376 static void guest_io_write(unsigned int port, unsigned int bytes,
377 uint32_t data, struct domain *currd)
378 {
379 if ( admin_io_okay(port, bytes, currd) )
380 {
381 switch ( bytes )
382 {
383 case 1:
384 outb((uint8_t)data, port);
385 if ( amd_acpi_c1e_quirk )
386 amd_check_disable_c1e(port, (uint8_t)data);
387 break;
388 case 2:
389 outw((uint16_t)data, port);
390 break;
391 case 4:
392 outl(data, port);
393 break;
394 }
395 return;
396 }
397
398 while ( bytes != 0 )
399 {
400 unsigned int size = 1;
401
402 if ( (port == 0x42) || (port == 0x43) || (port == 0x61) )
403 {
404 pv_pit_handler(port, (uint8_t)data, 1);
405 }
406 else if ( port == RTC_PORT(0) || port == RTC_PORT(1) )
407 {
408 rtc_guest_write(port, data);
409 }
410 else if ( (port == 0xcf8) && (bytes == 4) )
411 {
412 size = 4;
413 currd->arch.pci_cf8 = data;
414 }
415 else if ( (port & 0xfffc) == 0xcfc )
416 {
417 size = min(bytes, 4 - (port & 3));
418 if ( size == 3 )
419 size = 2;
420 if ( pci_cfg_ok(currd, port & 3, size, &data) )
421 pci_conf_write(currd->arch.pci_cf8, port & 3, size, data);
422 }
423
424 if ( size == 4 )
425 return;
426
427 port += size;
428 bytes -= size;
429 data >>= size * 8;
430 }
431 }
432
write_io(unsigned int port,unsigned int bytes,unsigned long val,struct x86_emulate_ctxt * ctxt)433 static int write_io(unsigned int port, unsigned int bytes,
434 unsigned long val, struct x86_emulate_ctxt *ctxt)
435 {
436 struct priv_op_ctxt *poc = container_of(ctxt, struct priv_op_ctxt, ctxt);
437 struct vcpu *curr = current;
438 struct domain *currd = current->domain;
439
440 /* OUTS must not come here. */
441 ASSERT((ctxt->opcode & ~9) == 0xe6);
442
443 if ( !guest_io_okay(port, bytes, curr, ctxt->regs) )
444 return X86EMUL_UNHANDLEABLE;
445
446 poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes);
447
448 if ( admin_io_okay(port, bytes, currd) )
449 {
450 io_emul_stub_t *io_emul =
451 io_emul_stub_setup(poc, ctxt->opcode, port, bytes);
452
453 io_emul(ctxt->regs);
454 if ( (bytes == 1) && amd_acpi_c1e_quirk )
455 amd_check_disable_c1e(port, val);
456 return X86EMUL_DONE;
457 }
458
459 guest_io_write(port, bytes, val, currd);
460
461 return X86EMUL_OKAY;
462 }
463
read_segment(enum x86_segment seg,struct segment_register * reg,struct x86_emulate_ctxt * ctxt)464 static int read_segment(enum x86_segment seg,
465 struct segment_register *reg,
466 struct x86_emulate_ctxt *ctxt)
467 {
468 /* Check if this is an attempt to access the I/O bitmap. */
469 if ( seg == x86_seg_tr )
470 {
471 switch ( ctxt->opcode )
472 {
473 case 0x6c ... 0x6f: /* ins / outs */
474 case 0xe4 ... 0xe7: /* in / out (immediate port) */
475 case 0xec ... 0xef: /* in / out (port in %dx) */
476 /* Defer the check to priv_op_{read,write}_io(). */
477 return X86EMUL_DONE;
478 }
479 }
480
481 if ( ctxt->addr_size < 64 )
482 {
483 unsigned long limit;
484 unsigned int sel, ar;
485
486 switch ( seg )
487 {
488 case x86_seg_cs: sel = ctxt->regs->cs; break;
489 case x86_seg_ds: sel = read_sreg(ds); break;
490 case x86_seg_es: sel = read_sreg(es); break;
491 case x86_seg_fs: sel = read_sreg(fs); break;
492 case x86_seg_gs: sel = read_sreg(gs); break;
493 case x86_seg_ss: sel = ctxt->regs->ss; break;
494 default: return X86EMUL_UNHANDLEABLE;
495 }
496
497 if ( !pv_emul_read_descriptor(sel, current, ®->base,
498 &limit, &ar, 0) )
499 return X86EMUL_UNHANDLEABLE;
500
501 reg->limit = limit;
502 reg->attr = ar >> 8;
503 }
504 else
505 {
506 switch ( seg )
507 {
508 default:
509 if ( !is_x86_user_segment(seg) )
510 return X86EMUL_UNHANDLEABLE;
511 reg->base = 0;
512 break;
513 case x86_seg_fs:
514 reg->base = rdfsbase();
515 break;
516 case x86_seg_gs:
517 reg->base = rdgsbase();
518 break;
519 }
520
521 reg->limit = ~0U;
522
523 reg->attr = 0;
524 reg->type = _SEGMENT_WR >> 8;
525 if ( seg == x86_seg_cs )
526 {
527 reg->type |= _SEGMENT_CODE >> 8;
528 reg->l = 1;
529 }
530 else
531 reg->db = 1;
532 reg->s = 1;
533 reg->dpl = 3;
534 reg->p = 1;
535 reg->g = 1;
536 }
537
538 /*
539 * For x86_emulate.c's mode_ring0() to work, fake a DPL of zero.
540 * Also do this for consistency for non-conforming code segments.
541 */
542 if ( (seg == x86_seg_ss ||
543 (seg == x86_seg_cs &&
544 !(reg->type & (_SEGMENT_EC >> 8)))) &&
545 guest_kernel_mode(current, ctxt->regs) )
546 reg->dpl = 0;
547
548 return X86EMUL_OKAY;
549 }
550
pv_emul_virt_to_linear(unsigned long base,unsigned long offset,unsigned int bytes,unsigned long limit,enum x86_segment seg,struct x86_emulate_ctxt * ctxt,unsigned long * addr)551 static int pv_emul_virt_to_linear(unsigned long base, unsigned long offset,
552 unsigned int bytes, unsigned long limit,
553 enum x86_segment seg,
554 struct x86_emulate_ctxt *ctxt,
555 unsigned long *addr)
556 {
557 int rc = X86EMUL_OKAY;
558
559 *addr = base + offset;
560
561 if ( ctxt->addr_size < 64 )
562 {
563 if ( limit < bytes - 1 || offset > limit - bytes + 1 )
564 rc = X86EMUL_EXCEPTION;
565 *addr = (uint32_t)*addr;
566 }
567 else if ( !__addr_ok(*addr) )
568 rc = X86EMUL_EXCEPTION;
569
570 if ( unlikely(rc == X86EMUL_EXCEPTION) )
571 x86_emul_hw_exception(seg != x86_seg_ss ? TRAP_gp_fault
572 : TRAP_stack_error,
573 0, ctxt);
574
575 return rc;
576 }
577
rep_ins(uint16_t port,enum x86_segment seg,unsigned long offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)578 static int rep_ins(uint16_t port,
579 enum x86_segment seg, unsigned long offset,
580 unsigned int bytes_per_rep, unsigned long *reps,
581 struct x86_emulate_ctxt *ctxt)
582 {
583 struct priv_op_ctxt *poc = container_of(ctxt, struct priv_op_ctxt, ctxt);
584 struct vcpu *curr = current;
585 struct domain *currd = current->domain;
586 unsigned long goal = *reps;
587 struct segment_register sreg;
588 int rc;
589
590 ASSERT(seg == x86_seg_es);
591
592 *reps = 0;
593
594 if ( !guest_io_okay(port, bytes_per_rep, curr, ctxt->regs) )
595 return X86EMUL_UNHANDLEABLE;
596
597 rc = read_segment(x86_seg_es, &sreg, ctxt);
598 if ( rc != X86EMUL_OKAY )
599 return rc;
600
601 if ( !sreg.p )
602 return X86EMUL_UNHANDLEABLE;
603 if ( !sreg.s ||
604 (sreg.type & (_SEGMENT_CODE >> 8)) ||
605 !(sreg.type & (_SEGMENT_WR >> 8)) )
606 {
607 x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
608 return X86EMUL_EXCEPTION;
609 }
610
611 poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes_per_rep);
612
613 while ( *reps < goal )
614 {
615 unsigned int data = guest_io_read(port, bytes_per_rep, currd);
616 unsigned long addr;
617
618 rc = pv_emul_virt_to_linear(sreg.base, offset, bytes_per_rep,
619 sreg.limit, x86_seg_es, ctxt, &addr);
620 if ( rc != X86EMUL_OKAY )
621 return rc;
622
623 if ( (rc = __copy_to_user((void *)addr, &data, bytes_per_rep)) != 0 )
624 {
625 x86_emul_pagefault(PFEC_write_access,
626 addr + bytes_per_rep - rc, ctxt);
627 return X86EMUL_EXCEPTION;
628 }
629
630 ++*reps;
631
632 if ( poc->bpmatch || hypercall_preempt_check() )
633 break;
634
635 /* x86_emulate() clips the repetition count to ensure we don't wrap. */
636 if ( unlikely(ctxt->regs->eflags & X86_EFLAGS_DF) )
637 offset -= bytes_per_rep;
638 else
639 offset += bytes_per_rep;
640 }
641
642 return X86EMUL_OKAY;
643 }
644
rep_outs(enum x86_segment seg,unsigned long offset,uint16_t port,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)645 static int rep_outs(enum x86_segment seg, unsigned long offset,
646 uint16_t port,
647 unsigned int bytes_per_rep, unsigned long *reps,
648 struct x86_emulate_ctxt *ctxt)
649 {
650 struct priv_op_ctxt *poc = container_of(ctxt, struct priv_op_ctxt, ctxt);
651 struct vcpu *curr = current;
652 struct domain *currd = current->domain;
653 unsigned long goal = *reps;
654 struct segment_register sreg;
655 int rc;
656
657 *reps = 0;
658
659 if ( !guest_io_okay(port, bytes_per_rep, curr, ctxt->regs) )
660 return X86EMUL_UNHANDLEABLE;
661
662 rc = read_segment(seg, &sreg, ctxt);
663 if ( rc != X86EMUL_OKAY )
664 return rc;
665
666 if ( !sreg.p )
667 return X86EMUL_UNHANDLEABLE;
668 if ( !sreg.s ||
669 ((sreg.type & (_SEGMENT_CODE >> 8)) &&
670 !(sreg.type & (_SEGMENT_WR >> 8))) )
671 {
672 x86_emul_hw_exception(seg != x86_seg_ss ? TRAP_gp_fault
673 : TRAP_stack_error,
674 0, ctxt);
675 return X86EMUL_EXCEPTION;
676 }
677
678 poc->bpmatch = check_guest_io_breakpoint(curr, port, bytes_per_rep);
679
680 while ( *reps < goal )
681 {
682 unsigned int data = 0;
683 unsigned long addr;
684
685 rc = pv_emul_virt_to_linear(sreg.base, offset, bytes_per_rep,
686 sreg.limit, seg, ctxt, &addr);
687 if ( rc != X86EMUL_OKAY )
688 return rc;
689
690 if ( (rc = __copy_from_user(&data, (void *)addr, bytes_per_rep)) != 0 )
691 {
692 x86_emul_pagefault(0, addr + bytes_per_rep - rc, ctxt);
693 return X86EMUL_EXCEPTION;
694 }
695
696 guest_io_write(port, bytes_per_rep, data, currd);
697
698 ++*reps;
699
700 if ( poc->bpmatch || hypercall_preempt_check() )
701 break;
702
703 /* x86_emulate() clips the repetition count to ensure we don't wrap. */
704 if ( unlikely(ctxt->regs->eflags & X86_EFLAGS_DF) )
705 offset -= bytes_per_rep;
706 else
707 offset += bytes_per_rep;
708 }
709
710 return X86EMUL_OKAY;
711 }
712
read_cr(unsigned int reg,unsigned long * val,struct x86_emulate_ctxt * ctxt)713 static int read_cr(unsigned int reg, unsigned long *val,
714 struct x86_emulate_ctxt *ctxt)
715 {
716 const struct vcpu *curr = current;
717
718 switch ( reg )
719 {
720 case 0: /* Read CR0 */
721 *val = (read_cr0() & ~X86_CR0_TS) | curr->arch.pv.ctrlreg[0];
722 return X86EMUL_OKAY;
723
724 case 2: /* Read CR2 */
725 case 4: /* Read CR4 */
726 *val = curr->arch.pv.ctrlreg[reg];
727 return X86EMUL_OKAY;
728
729 case 3: /* Read CR3 */
730 {
731 const struct domain *currd = curr->domain;
732 mfn_t mfn;
733
734 if ( !is_pv_32bit_domain(currd) )
735 {
736 mfn = pagetable_get_mfn(curr->arch.guest_table);
737 *val = xen_pfn_to_cr3(gfn_x(mfn_to_gfn(currd, mfn)));
738 }
739 else
740 {
741 l4_pgentry_t *pl4e =
742 map_domain_page(pagetable_get_mfn(curr->arch.guest_table));
743
744 mfn = l4e_get_mfn(*pl4e);
745 unmap_domain_page(pl4e);
746 *val = compat_pfn_to_cr3(gfn_x(mfn_to_gfn(currd, mfn)));
747 }
748
749 return X86EMUL_OKAY;
750 }
751 }
752
753 return X86EMUL_UNHANDLEABLE;
754 }
755
write_cr(unsigned int reg,unsigned long val,struct x86_emulate_ctxt * ctxt)756 static int write_cr(unsigned int reg, unsigned long val,
757 struct x86_emulate_ctxt *ctxt)
758 {
759 struct vcpu *curr = current;
760
761 switch ( reg )
762 {
763 case 0: /* Write CR0 */
764 if ( (val ^ read_cr0()) & ~X86_CR0_TS )
765 {
766 gdprintk(XENLOG_WARNING,
767 "Attempt to change unmodifiable CR0 flags\n");
768 break;
769 }
770 do_fpu_taskswitch(!!(val & X86_CR0_TS));
771 return X86EMUL_OKAY;
772
773 case 2: /* Write CR2 */
774 curr->arch.pv.ctrlreg[2] = val;
775 arch_set_cr2(curr, val);
776 return X86EMUL_OKAY;
777
778 case 3: /* Write CR3 */
779 {
780 struct domain *currd = curr->domain;
781 unsigned long gfn;
782 struct page_info *page;
783 int rc;
784
785 gfn = !is_pv_32bit_domain(currd)
786 ? xen_cr3_to_pfn(val) : compat_cr3_to_pfn(val);
787 page = get_page_from_gfn(currd, gfn, NULL, P2M_ALLOC);
788 if ( !page )
789 break;
790 rc = new_guest_cr3(page_to_mfn(page));
791 put_page(page);
792
793 switch ( rc )
794 {
795 case 0:
796 return X86EMUL_OKAY;
797 case -ERESTART: /* retry after preemption */
798 return X86EMUL_RETRY;
799 }
800 break;
801 }
802
803 case 4: /* Write CR4 */
804 curr->arch.pv.ctrlreg[4] = pv_fixup_guest_cr4(curr, val);
805 write_cr4(pv_make_cr4(curr));
806 ctxt_switch_levelling(curr);
807 return X86EMUL_OKAY;
808 }
809
810 return X86EMUL_UNHANDLEABLE;
811 }
812
guest_misc_enable(uint64_t val)813 static inline uint64_t guest_misc_enable(uint64_t val)
814 {
815 val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
816 MSR_IA32_MISC_ENABLE_MONITOR_ENABLE);
817 val |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
818 MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
819 MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
820 return val;
821 }
822
read_msr(unsigned int reg,uint64_t * val,struct x86_emulate_ctxt * ctxt)823 static int read_msr(unsigned int reg, uint64_t *val,
824 struct x86_emulate_ctxt *ctxt)
825 {
826 struct vcpu *curr = current;
827 const struct domain *currd = curr->domain;
828 bool vpmu_msr = false;
829 int ret;
830
831 if ( (ret = guest_rdmsr(curr, reg, val)) != X86EMUL_UNHANDLEABLE )
832 {
833 if ( ret == X86EMUL_EXCEPTION )
834 x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
835
836 return ret;
837 }
838
839 switch ( reg )
840 {
841 int rc;
842
843 case MSR_FS_BASE:
844 if ( is_pv_32bit_domain(currd) )
845 break;
846 *val = rdfsbase();
847 return X86EMUL_OKAY;
848
849 case MSR_GS_BASE:
850 if ( is_pv_32bit_domain(currd) )
851 break;
852 *val = rdgsbase();
853 return X86EMUL_OKAY;
854
855 case MSR_SHADOW_GS_BASE:
856 if ( is_pv_32bit_domain(currd) )
857 break;
858 *val = curr->arch.pv.gs_base_user;
859 return X86EMUL_OKAY;
860
861 case MSR_IA32_TSC:
862 *val = currd->arch.vtsc ? pv_soft_rdtsc(curr, ctxt->regs) : rdtsc();
863 return X86EMUL_OKAY;
864
865 case MSR_EFER:
866 /* Hide unknown bits, and unconditionally hide SVME from guests. */
867 *val = read_efer() & EFER_KNOWN_MASK & ~EFER_SVME;
868 /*
869 * Hide the 64-bit features from 32-bit guests. SCE has
870 * vendor-dependent behaviour.
871 */
872 if ( is_pv_32bit_domain(currd) )
873 *val &= ~(EFER_LME | EFER_LMA |
874 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
875 ? EFER_SCE : 0));
876 return X86EMUL_OKAY;
877
878 case MSR_K7_FID_VID_CTL:
879 case MSR_K7_FID_VID_STATUS:
880 case MSR_K8_PSTATE_LIMIT:
881 case MSR_K8_PSTATE_CTRL:
882 case MSR_K8_PSTATE_STATUS:
883 case MSR_K8_PSTATE0:
884 case MSR_K8_PSTATE1:
885 case MSR_K8_PSTATE2:
886 case MSR_K8_PSTATE3:
887 case MSR_K8_PSTATE4:
888 case MSR_K8_PSTATE5:
889 case MSR_K8_PSTATE6:
890 case MSR_K8_PSTATE7:
891 if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
892 break;
893 if ( unlikely(is_cpufreq_controller(currd)) )
894 goto normal;
895 *val = 0;
896 return X86EMUL_OKAY;
897
898 case MSR_FAM10H_MMIO_CONF_BASE:
899 if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
900 boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 >= 0x17 )
901 break;
902 /* fall through */
903 case MSR_AMD64_NB_CFG:
904 if ( is_hwdom_pinned_vcpu(curr) )
905 goto normal;
906 *val = 0;
907 return X86EMUL_OKAY;
908
909 case MSR_IA32_MISC_ENABLE:
910 if ( rdmsr_safe(reg, *val) )
911 break;
912 *val = guest_misc_enable(*val);
913 return X86EMUL_OKAY;
914
915 case MSR_IA32_PERF_CAPABILITIES:
916 /* No extra capabilities are supported. */
917 *val = 0;
918 return X86EMUL_OKAY;
919
920 case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR(7):
921 case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL(3):
922 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR2:
923 case MSR_CORE_PERF_FIXED_CTR_CTRL ... MSR_CORE_PERF_GLOBAL_OVF_CTRL:
924 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
925 {
926 vpmu_msr = true;
927 /* fall through */
928 case MSR_AMD_FAM15H_EVNTSEL0 ... MSR_AMD_FAM15H_PERFCTR5:
929 case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
930 if ( vpmu_msr || (boot_cpu_data.x86_vendor &
931 (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
932 {
933 if ( vpmu_do_rdmsr(reg, val) )
934 break;
935 return X86EMUL_OKAY;
936 }
937 }
938 /* fall through */
939 default:
940 rc = vmce_rdmsr(reg, val);
941 if ( rc < 0 )
942 break;
943 if ( rc )
944 return X86EMUL_OKAY;
945 /* fall through */
946 normal:
947 /* Everyone can read the MSR space. */
948 /* gdprintk(XENLOG_WARNING, "Domain attempted RDMSR %08x\n", reg); */
949 if ( rdmsr_safe(reg, *val) )
950 break;
951 return X86EMUL_OKAY;
952 }
953
954 return X86EMUL_UNHANDLEABLE;
955 }
956
write_msr(unsigned int reg,uint64_t val,struct x86_emulate_ctxt * ctxt)957 static int write_msr(unsigned int reg, uint64_t val,
958 struct x86_emulate_ctxt *ctxt)
959 {
960 struct vcpu *curr = current;
961 const struct domain *currd = curr->domain;
962 bool vpmu_msr = false;
963 int ret;
964
965 if ( (ret = guest_wrmsr(curr, reg, val)) != X86EMUL_UNHANDLEABLE )
966 {
967 if ( ret == X86EMUL_EXCEPTION )
968 x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
969
970 return ret;
971 }
972
973 switch ( reg )
974 {
975 uint64_t temp;
976 int rc;
977
978 case MSR_FS_BASE:
979 if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
980 break;
981 wrfsbase(val);
982 return X86EMUL_OKAY;
983
984 case MSR_GS_BASE:
985 if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
986 break;
987 wrgsbase(val);
988 return X86EMUL_OKAY;
989
990 case MSR_SHADOW_GS_BASE:
991 if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
992 break;
993 wrgsshadow(val);
994 curr->arch.pv.gs_base_user = val;
995 return X86EMUL_OKAY;
996
997 case MSR_K7_FID_VID_STATUS:
998 case MSR_K7_FID_VID_CTL:
999 case MSR_K8_PSTATE_LIMIT:
1000 case MSR_K8_PSTATE_CTRL:
1001 case MSR_K8_PSTATE_STATUS:
1002 case MSR_K8_PSTATE0:
1003 case MSR_K8_PSTATE1:
1004 case MSR_K8_PSTATE2:
1005 case MSR_K8_PSTATE3:
1006 case MSR_K8_PSTATE4:
1007 case MSR_K8_PSTATE5:
1008 case MSR_K8_PSTATE6:
1009 case MSR_K8_PSTATE7:
1010 case MSR_K8_HWCR:
1011 if ( !(boot_cpu_data.x86_vendor &
1012 (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
1013 break;
1014 if ( likely(!is_cpufreq_controller(currd)) ||
1015 wrmsr_safe(reg, val) == 0 )
1016 return X86EMUL_OKAY;
1017 break;
1018
1019 case MSR_AMD64_NB_CFG:
1020 if ( !is_hwdom_pinned_vcpu(curr) )
1021 return X86EMUL_OKAY;
1022 if ( (rdmsr_safe(MSR_AMD64_NB_CFG, temp) != 0) ||
1023 ((val ^ temp) & ~(1ULL << AMD64_NB_CFG_CF8_EXT_ENABLE_BIT)) )
1024 goto invalid;
1025 if ( wrmsr_safe(MSR_AMD64_NB_CFG, val) == 0 )
1026 return X86EMUL_OKAY;
1027 break;
1028
1029 case MSR_FAM10H_MMIO_CONF_BASE:
1030 if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
1031 boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 >= 0x17 )
1032 break;
1033 if ( !is_hwdom_pinned_vcpu(curr) )
1034 return X86EMUL_OKAY;
1035 if ( rdmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, temp) != 0 )
1036 break;
1037 if ( (pci_probe & PCI_PROBE_MASK) == PCI_PROBE_MMCONF ?
1038 temp != val :
1039 ((temp ^ val) &
1040 ~(FAM10H_MMIO_CONF_ENABLE |
1041 (FAM10H_MMIO_CONF_BUSRANGE_MASK <<
1042 FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
1043 ((u64)FAM10H_MMIO_CONF_BASE_MASK <<
1044 FAM10H_MMIO_CONF_BASE_SHIFT))) )
1045 goto invalid;
1046 if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, val) == 0 )
1047 return X86EMUL_OKAY;
1048 break;
1049
1050 case MSR_IA32_MISC_ENABLE:
1051 if ( rdmsr_safe(reg, temp) )
1052 break;
1053 if ( val != guest_misc_enable(temp) )
1054 goto invalid;
1055 return X86EMUL_OKAY;
1056
1057 case MSR_IA32_MPERF:
1058 case MSR_IA32_APERF:
1059 if ( !(boot_cpu_data.x86_vendor &
1060 (X86_VENDOR_INTEL | X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
1061 break;
1062 if ( likely(!is_cpufreq_controller(currd)) ||
1063 wrmsr_safe(reg, val) == 0 )
1064 return X86EMUL_OKAY;
1065 break;
1066
1067 case MSR_IA32_THERM_CONTROL:
1068 case MSR_IA32_ENERGY_PERF_BIAS:
1069 if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
1070 break;
1071 if ( !is_hwdom_pinned_vcpu(curr) || wrmsr_safe(reg, val) == 0 )
1072 return X86EMUL_OKAY;
1073 break;
1074
1075 case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR(7):
1076 case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL(3):
1077 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR2:
1078 case MSR_CORE_PERF_FIXED_CTR_CTRL ... MSR_CORE_PERF_GLOBAL_OVF_CTRL:
1079 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
1080 {
1081 vpmu_msr = true;
1082 case MSR_AMD_FAM15H_EVNTSEL0 ... MSR_AMD_FAM15H_PERFCTR5:
1083 case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
1084 if ( vpmu_msr || (boot_cpu_data.x86_vendor &
1085 (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
1086 {
1087 if ( (vpmu_mode & XENPMU_MODE_ALL) &&
1088 !is_hardware_domain(currd) )
1089 return X86EMUL_OKAY;
1090
1091 if ( vpmu_do_wrmsr(reg, val, 0) )
1092 break;
1093 return X86EMUL_OKAY;
1094 }
1095 }
1096 /* fall through */
1097 default:
1098 rc = vmce_wrmsr(reg, val);
1099 if ( rc < 0 )
1100 break;
1101 if ( rc )
1102 return X86EMUL_OKAY;
1103
1104 if ( (rdmsr_safe(reg, temp) != 0) || (val != temp) )
1105 invalid:
1106 gdprintk(XENLOG_WARNING,
1107 "Domain attempted WRMSR %08x from 0x%016"PRIx64" to 0x%016"PRIx64"\n",
1108 reg, temp, val);
1109 return X86EMUL_OKAY;
1110 }
1111
1112 return X86EMUL_UNHANDLEABLE;
1113 }
1114
cache_op(enum x86emul_cache_op op,enum x86_segment seg,unsigned long offset,struct x86_emulate_ctxt * ctxt)1115 static int cache_op(enum x86emul_cache_op op, enum x86_segment seg,
1116 unsigned long offset, struct x86_emulate_ctxt *ctxt)
1117 {
1118 ASSERT(op == x86emul_wbinvd || op == x86emul_wbnoinvd);
1119
1120 /* Ignore the instruction if unprivileged. */
1121 if ( !cache_flush_permitted(current->domain) )
1122 /*
1123 * Non-physdev domain attempted WBINVD; ignore for now since
1124 * newer linux uses this in some start-of-day timing loops.
1125 */
1126 ;
1127 else if ( op == x86emul_wbnoinvd /* && cpu_has_wbnoinvd */ )
1128 wbnoinvd();
1129 else
1130 wbinvd();
1131
1132 return X86EMUL_OKAY;
1133 }
1134
validate(const struct x86_emulate_state * state,struct x86_emulate_ctxt * ctxt)1135 static int validate(const struct x86_emulate_state *state,
1136 struct x86_emulate_ctxt *ctxt)
1137 {
1138 switch ( ctxt->opcode )
1139 {
1140 case 0x6c ... 0x6f: /* ins / outs */
1141 case 0xe4 ... 0xe7: /* in / out (immediate port) */
1142 case 0xec ... 0xef: /* in / out (port in %dx) */
1143 case X86EMUL_OPC(0x0f, 0x06): /* clts */
1144 case X86EMUL_OPC(0x0f, 0x09): /* wbinvd */
1145 case X86EMUL_OPC(0x0f, 0x20) ...
1146 X86EMUL_OPC(0x0f, 0x23): /* mov to/from cr/dr */
1147 case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */
1148 case X86EMUL_OPC(0x0f, 0x31): /* rdtsc */
1149 case X86EMUL_OPC(0x0f, 0x32): /* rdmsr */
1150 case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */
1151 return X86EMUL_OKAY;
1152
1153 case 0xfa: case 0xfb: /* cli / sti */
1154 if ( !iopl_ok(current, ctxt->regs) )
1155 break;
1156 /*
1157 * This is just too dangerous to allow, in my opinion. Consider if the
1158 * caller then tries to reenable interrupts using POPF: we can't trap
1159 * that and we'll end up with hard-to-debug lockups. Fast & loose will
1160 * do for us. :-)
1161 vcpu_info(current, evtchn_upcall_mask) = (ctxt->opcode == 0xfa);
1162 */
1163 return X86EMUL_DONE;
1164
1165 case X86EMUL_OPC(0x0f, 0x01):
1166 {
1167 unsigned int modrm_rm, modrm_reg;
1168
1169 if ( x86_insn_modrm(state, &modrm_rm, &modrm_reg) != 3 ||
1170 (modrm_rm & 7) != 1 )
1171 break;
1172 switch ( modrm_reg & 7 )
1173 {
1174 case 2: /* xsetbv */
1175 case 7: /* rdtscp */
1176 return X86EMUL_OKAY;
1177 }
1178 break;
1179 }
1180 }
1181
1182 return X86EMUL_UNHANDLEABLE;
1183 }
1184
insn_fetch(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1185 static int insn_fetch(enum x86_segment seg,
1186 unsigned long offset,
1187 void *p_data,
1188 unsigned int bytes,
1189 struct x86_emulate_ctxt *ctxt)
1190 {
1191 const struct priv_op_ctxt *poc =
1192 container_of(ctxt, struct priv_op_ctxt, ctxt);
1193 unsigned int rc;
1194 unsigned long addr = poc->cs.base + offset;
1195
1196 ASSERT(seg == x86_seg_cs);
1197
1198 /* We don't mean to emulate any branches. */
1199 if ( !bytes )
1200 return X86EMUL_UNHANDLEABLE;
1201
1202 rc = pv_emul_virt_to_linear(poc->cs.base, offset, bytes, poc->cs.limit,
1203 x86_seg_cs, ctxt, &addr);
1204 if ( rc != X86EMUL_OKAY )
1205 return rc;
1206
1207 if ( (rc = __copy_from_user(p_data, (void *)addr, bytes)) != 0 )
1208 {
1209 /*
1210 * TODO: This should report PFEC_insn_fetch when goc->insn_fetch &&
1211 * cpu_has_nx, but we'd then need a "fetch" variant of
1212 * __copy_from_user() respecting NX, SMEP, and protection keys.
1213 */
1214 x86_emul_pagefault(0, addr + bytes - rc, ctxt);
1215 return X86EMUL_EXCEPTION;
1216 }
1217
1218 return X86EMUL_OKAY;
1219 }
1220
1221
1222 static const struct x86_emulate_ops priv_op_ops = {
1223 .insn_fetch = insn_fetch,
1224 .read = x86emul_unhandleable_rw,
1225 .validate = validate,
1226 .read_io = read_io,
1227 .write_io = write_io,
1228 .rep_ins = rep_ins,
1229 .rep_outs = rep_outs,
1230 .read_segment = read_segment,
1231 .read_cr = read_cr,
1232 .write_cr = write_cr,
1233 .read_dr = x86emul_read_dr,
1234 .write_dr = x86emul_write_dr,
1235 .write_xcr = x86emul_write_xcr,
1236 .read_msr = read_msr,
1237 .write_msr = write_msr,
1238 .cpuid = x86emul_cpuid,
1239 .cache_op = cache_op,
1240 };
1241
pv_emulate_privileged_op(struct cpu_user_regs * regs)1242 int pv_emulate_privileged_op(struct cpu_user_regs *regs)
1243 {
1244 struct vcpu *curr = current;
1245 struct domain *currd = curr->domain;
1246 struct priv_op_ctxt ctxt = {
1247 .ctxt.regs = regs,
1248 .ctxt.cpuid = currd->arch.cpuid,
1249 .ctxt.lma = !is_pv_32bit_domain(currd),
1250 };
1251 int rc;
1252 unsigned int eflags, ar;
1253
1254 if ( !pv_emul_read_descriptor(regs->cs, curr, &ctxt.cs.base,
1255 &ctxt.cs.limit, &ar, 1) ||
1256 !(ar & _SEGMENT_S) ||
1257 !(ar & _SEGMENT_P) ||
1258 !(ar & _SEGMENT_CODE) )
1259 return 0;
1260
1261 /* Mirror virtualized state into EFLAGS. */
1262 ASSERT(regs->eflags & X86_EFLAGS_IF);
1263 if ( vcpu_info(curr, evtchn_upcall_mask) )
1264 regs->eflags &= ~X86_EFLAGS_IF;
1265 else
1266 regs->eflags |= X86_EFLAGS_IF;
1267 ASSERT(!(regs->eflags & X86_EFLAGS_IOPL));
1268 regs->eflags |= curr->arch.pv.iopl;
1269 eflags = regs->eflags;
1270
1271 ctxt.ctxt.addr_size = ar & _SEGMENT_L ? 64 : ar & _SEGMENT_DB ? 32 : 16;
1272 /* Leave zero in ctxt.ctxt.sp_size, as it's not needed. */
1273 rc = x86_emulate(&ctxt.ctxt, &priv_op_ops);
1274
1275 if ( ctxt.io_emul_stub )
1276 unmap_domain_page(ctxt.io_emul_stub);
1277
1278 /*
1279 * Un-mirror virtualized state from EFLAGS.
1280 * Nothing we allow to be emulated can change anything other than the
1281 * arithmetic bits, and the resume flag.
1282 */
1283 ASSERT(!((regs->eflags ^ eflags) &
1284 ~(X86_EFLAGS_RF | X86_EFLAGS_ARITH_MASK)));
1285 regs->eflags |= X86_EFLAGS_IF;
1286 regs->eflags &= ~X86_EFLAGS_IOPL;
1287
1288 switch ( rc )
1289 {
1290 case X86EMUL_OKAY:
1291 if ( ctxt.ctxt.retire.singlestep )
1292 ctxt.bpmatch |= DR_STEP;
1293 if ( ctxt.bpmatch )
1294 {
1295 curr->arch.dr6 |= ctxt.bpmatch | DR_STATUS_RESERVED_ONE;
1296 if ( !(curr->arch.pv.trap_bounce.flags & TBF_EXCEPTION) )
1297 pv_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
1298 }
1299 /* fall through */
1300 case X86EMUL_RETRY:
1301 return EXCRET_fault_fixed;
1302
1303 case X86EMUL_EXCEPTION:
1304 pv_inject_event(&ctxt.ctxt.event);
1305 return EXCRET_fault_fixed;
1306 }
1307
1308 return 0;
1309 }
1310
1311 /*
1312 * Local variables:
1313 * mode: C
1314 * c-file-style: "BSD"
1315 * c-basic-offset: 4
1316 * tab-width: 4
1317 * indent-tabs-mode: nil
1318 * End:
1319 */
1320