1 /*
2 * intr.c: handling I/O, interrupts related VMX entry/exit
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2004-2007, XenSource Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include <xen/init.h>
20 #include <xen/mm.h>
21 #include <xen/lib.h>
22 #include <xen/errno.h>
23 #include <xen/trace.h>
24 #include <xen/event.h>
25 #include <asm/apicdef.h>
26 #include <asm/current.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/hvm/hvm.h>
31 #include <asm/hvm/io.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/vmx/vmx.h>
34 #include <asm/hvm/vmx/vmcs.h>
35 #include <asm/hvm/vpic.h>
36 #include <asm/hvm/vlapic.h>
37 #include <asm/hvm/nestedhvm.h>
38 #include <public/hvm/ioreq.h>
39 #include <asm/hvm/trace.h>
40 #include <asm/vm_event.h>
41
42 /*
43 * A few notes on virtual NMI and INTR delivery, and interactions with
44 * interruptibility states:
45 *
46 * We can only inject an ExtInt if EFLAGS.IF = 1 and no blocking by
47 * STI nor MOV SS. Otherwise the VM entry fails. The 'virtual interrupt
48 * pending' control causes a VM exit when all these checks succeed. It will
49 * exit immediately after VM entry if the checks succeed at that point.
50 *
51 * We can only inject an NMI if no blocking by MOV SS (also, depending on
52 * implementation, if no blocking by STI). If pin-based 'virtual NMIs'
53 * control is specified then the NMI-blocking interruptibility flag is
54 * also checked. The 'virtual NMI pending' control (available only in
55 * conjunction with 'virtual NMIs') causes a VM exit when all these checks
56 * succeed. It will exit immediately after VM entry if the checks succeed
57 * at that point.
58 *
59 * Because a processor may or may not check blocking-by-STI when injecting
60 * a virtual NMI, it will be necessary to convert that to block-by-MOV-SS
61 * before specifying the 'virtual NMI pending' control. Otherwise we could
62 * enter an infinite loop where we check blocking-by-STI in software and
63 * thus delay delivery of a virtual NMI, but the processor causes immediate
64 * VM exit because it does not check blocking-by-STI.
65 *
66 * Injecting a virtual NMI sets the NMI-blocking interruptibility flag only
67 * if the 'virtual NMIs' control is set. Injecting *any* kind of event clears
68 * the STI- and MOV-SS-blocking interruptibility-state flags.
69 */
70
vmx_enable_intr_window(struct vcpu * v,struct hvm_intack intack)71 static void vmx_enable_intr_window(struct vcpu *v, struct hvm_intack intack)
72 {
73 u32 ctl = CPU_BASED_VIRTUAL_INTR_PENDING;
74
75 ASSERT(intack.source != hvm_intsrc_none);
76
77 if ( unlikely(tb_init_done) )
78 {
79 unsigned long intr;
80
81 __vmread(VM_ENTRY_INTR_INFO, &intr);
82 HVMTRACE_3D(INTR_WINDOW, intack.vector, intack.source,
83 (intr & INTR_INFO_VALID_MASK) ? intr & 0xff : -1);
84 }
85
86 if ( (intack.source == hvm_intsrc_nmi) && cpu_has_vmx_vnmi )
87 {
88 /*
89 * We set MOV-SS blocking in lieu of STI blocking when delivering an
90 * NMI. This is because it is processor-specific whether STI-blocking
91 * blocks NMIs. Hence we *must* check for STI-blocking on NMI delivery
92 * (otherwise vmentry will fail on processors that check for STI-
93 * blocking) but if the processor does not check for STI-blocking then
94 * we may immediately vmexit and hance make no progress!
95 * (see SDM 3B 21.3, "Other Causes of VM Exits").
96 */
97 unsigned long intr_shadow;
98
99 __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
100 if ( intr_shadow & VMX_INTR_SHADOW_STI )
101 {
102 /* Having both STI-blocking and MOV-SS-blocking fails vmentry. */
103 intr_shadow &= ~VMX_INTR_SHADOW_STI;
104 intr_shadow |= VMX_INTR_SHADOW_MOV_SS;
105 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
106 }
107 ctl = CPU_BASED_VIRTUAL_NMI_PENDING;
108 }
109
110 if ( !(v->arch.hvm.vmx.exec_control & ctl) )
111 {
112 v->arch.hvm.vmx.exec_control |= ctl;
113 vmx_update_cpu_exec_control(v);
114 }
115 }
116
117 /*
118 * Injecting interrupts for nested virtualization
119 *
120 * When injecting virtual interrupts (originated from L0), there are
121 * two major possibilities, within L1 context and within L2 context
122 * 1. L1 context (in_nesting == 0)
123 * Everything is the same as without nested, check RFLAGS.IF to
124 * see if the injection can be done, using VMCS to inject the
125 * interrupt
126 *
127 * 2. L2 context (in_nesting == 1)
128 * Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
129 * irq according to intr_ack_on_exit, shouldn't block normally,
130 * except for:
131 * a. context transition
132 * interrupt needs to be blocked at virtual VMEntry time
133 * b. L2 idtv reinjection
134 * if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
135 * it needs to be reinjected without exiting to L1, interrupt
136 * injection should be blocked as well at this point.
137 *
138 * Unfortunately, interrupt blocking in L2 won't work with simple
139 * intr_window_open (which depends on L2's IF). To solve this,
140 * the following algorithm can be used:
141 * v->arch.hvm.vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
142 * only L0 control, physical control may be different from it.
143 * - if in L1, it behaves normally, intr window is written
144 * to physical control as it is
145 * - if in L2, replace it to MTF (or NMI window) if possible
146 * - if MTF/NMI window is not used, intr window can still be
147 * used but may have negative impact on interrupt performance.
148 */
149
nvmx_intr_blocked(struct vcpu * v)150 enum hvm_intblk nvmx_intr_blocked(struct vcpu *v)
151 {
152 int r = hvm_intblk_none;
153 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
154
155 if ( nestedhvm_vcpu_in_guestmode(v) )
156 {
157 if ( nvcpu->nv_vmexit_pending ||
158 nvcpu->nv_vmswitch_in_progress )
159 r = hvm_intblk_rflags_ie;
160 else
161 {
162 unsigned long intr_info;
163
164 __vmread(VM_ENTRY_INTR_INFO, &intr_info);
165 if ( intr_info & INTR_INFO_VALID_MASK )
166 r = hvm_intblk_rflags_ie;
167 }
168 }
169 else if ( nvcpu->nv_vmentry_pending )
170 r = hvm_intblk_rflags_ie;
171
172 return r;
173 }
174
nvmx_intr_intercept(struct vcpu * v,struct hvm_intack intack)175 static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
176 {
177 u32 ctrl;
178
179 /* If blocked by L1's tpr, then nothing to do. */
180 if ( nestedhvm_vcpu_in_guestmode(v) &&
181 hvm_interrupt_blocked(v, intack) == hvm_intblk_tpr )
182 return 1;
183
184 if ( nvmx_intr_blocked(v) != hvm_intblk_none )
185 {
186 vmx_enable_intr_window(v, intack);
187 return 1;
188 }
189
190 if ( nestedhvm_vcpu_in_guestmode(v) )
191 {
192 ctrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
193 if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
194 return 0;
195
196 if ( intack.source == hvm_intsrc_pic ||
197 intack.source == hvm_intsrc_lapic )
198 {
199 vmx_inject_extint(intack.vector, intack.source);
200
201 ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
202 if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
203 {
204 /* for now, duplicate the ack path in vmx_intr_assist */
205 hvm_vcpu_ack_pending_irq(v, intack);
206 pt_intr_post(v, intack);
207
208 intack = hvm_vcpu_has_pending_irq(v);
209 if ( unlikely(intack.source != hvm_intsrc_none) )
210 vmx_enable_intr_window(v, intack);
211 }
212 else if ( !cpu_has_vmx_virtual_intr_delivery )
213 vmx_enable_intr_window(v, intack);
214
215 return 1;
216 }
217 else if ( intack.source == hvm_intsrc_vector )
218 {
219 vmx_inject_extint(intack.vector, intack.source);
220 return 1;
221 }
222 }
223
224 return 0;
225 }
226
vmx_sync_exit_bitmap(struct vcpu * v)227 void vmx_sync_exit_bitmap(struct vcpu *v)
228 {
229 const unsigned int n = ARRAY_SIZE(v->arch.hvm.vmx.eoi_exit_bitmap);
230 unsigned int i;
231
232 while ( (i = find_first_bit(&v->arch.hvm.vmx.eoi_exitmap_changed, n)) < n )
233 {
234 clear_bit(i, &v->arch.hvm.vmx.eoi_exitmap_changed);
235 __vmwrite(EOI_EXIT_BITMAP(i), v->arch.hvm.vmx.eoi_exit_bitmap[i]);
236 }
237 }
238
vmx_intr_assist(void)239 void vmx_intr_assist(void)
240 {
241 struct hvm_intack intack;
242 struct vcpu *v = current;
243 unsigned int tpr_threshold = 0;
244 enum hvm_intblk intblk;
245 int pt_vector;
246
247 /* Block event injection when single step with MTF. */
248 if ( unlikely(v->arch.hvm.single_step) )
249 {
250 v->arch.hvm.vmx.exec_control |= CPU_BASED_MONITOR_TRAP_FLAG;
251 vmx_update_cpu_exec_control(v);
252 return;
253 }
254
255 /* Block event injection while handling a sync vm_event. */
256 if ( unlikely(v->arch.vm_event) && v->arch.vm_event->sync_event )
257 return;
258
259 #ifdef CONFIG_MEM_SHARING
260 /* Block event injection for VM fork if requested */
261 if ( unlikely(v->domain->arch.hvm.mem_sharing.block_interrupts) )
262 return;
263 #endif
264
265 /* Crank the handle on interrupt state. */
266 pt_vector = pt_update_irq(v);
267
268 do {
269 unsigned long intr_info;
270
271 intack = hvm_vcpu_has_pending_irq(v);
272 if ( likely(intack.source == hvm_intsrc_none) )
273 goto out;
274
275 if ( unlikely(nvmx_intr_intercept(v, intack)) )
276 goto out;
277
278 intblk = hvm_interrupt_blocked(v, intack);
279 if ( cpu_has_vmx_virtual_intr_delivery )
280 {
281 /* Set "Interrupt-window exiting" for ExtINT and NMI. */
282 if ( (intblk != hvm_intblk_none) &&
283 (intack.source == hvm_intsrc_pic ||
284 intack.source == hvm_intsrc_vector ||
285 intack.source == hvm_intsrc_nmi) )
286 {
287 vmx_enable_intr_window(v, intack);
288 goto out;
289 }
290
291 __vmread(VM_ENTRY_INTR_INFO, &intr_info);
292 if ( intr_info & INTR_INFO_VALID_MASK )
293 {
294 if ( (intack.source == hvm_intsrc_pic) ||
295 (intack.source == hvm_intsrc_nmi) ||
296 (intack.source == hvm_intsrc_mce) )
297 vmx_enable_intr_window(v, intack);
298
299 goto out;
300 }
301 } else if ( intblk == hvm_intblk_tpr )
302 {
303 ASSERT(vlapic_enabled(vcpu_vlapic(v)));
304 ASSERT(intack.source == hvm_intsrc_lapic);
305 tpr_threshold = intack.vector >> 4;
306 goto out;
307 }
308 else if ( intblk != hvm_intblk_none )
309 {
310 vmx_enable_intr_window(v, intack);
311 goto out;
312 }
313 else
314 {
315 __vmread(VM_ENTRY_INTR_INFO, &intr_info);
316 if ( intr_info & INTR_INFO_VALID_MASK )
317 {
318 vmx_enable_intr_window(v, intack);
319 goto out;
320 }
321 }
322
323 intack = hvm_vcpu_ack_pending_irq(v, intack);
324 } while ( intack.source == hvm_intsrc_none );
325
326 if ( intack.source == hvm_intsrc_nmi )
327 {
328 vmx_inject_nmi();
329 }
330 else if ( intack.source == hvm_intsrc_mce )
331 {
332 hvm_inject_hw_exception(TRAP_machine_check, X86_EVENT_NO_EC);
333 }
334 else if ( cpu_has_vmx_virtual_intr_delivery &&
335 intack.source != hvm_intsrc_pic &&
336 intack.source != hvm_intsrc_vector )
337 {
338 unsigned long status;
339
340 /*
341 * intack.vector is the highest priority vector. So we set eoi_exit_bitmap
342 * for intack.vector - give a chance to post periodic time interrupts when
343 * periodic time interrupts become the highest one
344 */
345 if ( pt_vector != -1 )
346 {
347 #ifndef NDEBUG
348 /*
349 * We assert that intack.vector is the highest priority vector for
350 * only an interrupt from vlapic can reach this point and the
351 * highest vector is chosen in hvm_vcpu_has_pending_irq().
352 * But, in fact, the assertion failed sometimes. It is suspected
353 * that PIR is not synced to vIRR which makes pt_vector is left in
354 * PIR. In order to verify this suspicion, dump some information
355 * when the assertion fails.
356 */
357 if ( unlikely(intack.vector < pt_vector) )
358 {
359 const struct vlapic *vlapic;
360 const struct pi_desc *pi_desc;
361 const uint32_t *word;
362 unsigned int i;
363
364 printk(XENLOG_ERR "%pv: intack: %u:%02x pt: %02x\n",
365 current, intack.source, intack.vector, pt_vector);
366
367 vlapic = vcpu_vlapic(v);
368 if ( vlapic && vlapic->regs )
369 {
370 word = (const void *)&vlapic->regs->data[APIC_IRR];
371 printk(XENLOG_ERR "vIRR:");
372 for ( i = X86_NR_VECTORS / 32; i-- ; )
373 printk(" %08x", word[i*4]);
374 printk("\n");
375 }
376
377 pi_desc = &v->arch.hvm.vmx.pi_desc;
378 if ( pi_desc )
379 {
380 word = (const void *)&pi_desc->pir;
381 printk(XENLOG_ERR " PIR:");
382 for ( i = X86_NR_VECTORS / 32; i-- ; )
383 printk(" %08x", word[i]);
384 printk("\n");
385 }
386 }
387 #endif
388 ASSERT(intack.vector >= pt_vector);
389 vmx_set_eoi_exit_bitmap(v, intack.vector);
390 }
391
392 /* we need update the RVI field */
393 __vmread(GUEST_INTR_STATUS, &status);
394 status &= ~VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
395 status |= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK &
396 intack.vector;
397 __vmwrite(GUEST_INTR_STATUS, status);
398
399 vmx_sync_exit_bitmap(v);
400
401 pt_intr_post(v, intack);
402 }
403 else
404 {
405 HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
406 vmx_inject_extint(intack.vector, intack.source);
407 pt_intr_post(v, intack);
408 }
409
410 /* Is there another IRQ to queue up behind this one? */
411 intack = hvm_vcpu_has_pending_irq(v);
412 if ( !cpu_has_vmx_virtual_intr_delivery ||
413 intack.source == hvm_intsrc_pic ||
414 intack.source == hvm_intsrc_vector )
415 {
416 if ( unlikely(intack.source != hvm_intsrc_none) )
417 vmx_enable_intr_window(v, intack);
418 }
419
420 out:
421 if ( !nestedhvm_vcpu_in_guestmode(v) &&
422 !cpu_has_vmx_virtual_intr_delivery &&
423 cpu_has_vmx_tpr_shadow )
424 __vmwrite(TPR_THRESHOLD, tpr_threshold);
425 }
426
427 /*
428 * Local variables:
429 * mode: C
430 * c-file-style: "BSD"
431 * c-basic-offset: 4
432 * tab-width: 4
433 * indent-tabs-mode: nil
434 * End:
435 */
436