1 /*
2  * intr.c: handling I/O, interrupts related VMX entry/exit
3  * Copyright (c) 2004, Intel Corporation.
4  * Copyright (c) 2004-2007, XenSource Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include <xen/init.h>
20 #include <xen/mm.h>
21 #include <xen/lib.h>
22 #include <xen/errno.h>
23 #include <xen/trace.h>
24 #include <xen/event.h>
25 #include <asm/apicdef.h>
26 #include <asm/current.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/hvm/hvm.h>
31 #include <asm/hvm/io.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/vmx/vmx.h>
34 #include <asm/hvm/vmx/vmcs.h>
35 #include <asm/hvm/vpic.h>
36 #include <asm/hvm/vlapic.h>
37 #include <asm/hvm/nestedhvm.h>
38 #include <public/hvm/ioreq.h>
39 #include <asm/hvm/trace.h>
40 #include <asm/vm_event.h>
41 
42 /*
43  * A few notes on virtual NMI and INTR delivery, and interactions with
44  * interruptibility states:
45  *
46  * We can only inject an ExtInt if EFLAGS.IF = 1 and no blocking by
47  * STI nor MOV SS. Otherwise the VM entry fails. The 'virtual interrupt
48  * pending' control causes a VM exit when all these checks succeed. It will
49  * exit immediately after VM entry if the checks succeed at that point.
50  *
51  * We can only inject an NMI if no blocking by MOV SS (also, depending on
52  * implementation, if no blocking by STI). If pin-based 'virtual NMIs'
53  * control is specified then the NMI-blocking interruptibility flag is
54  * also checked. The 'virtual NMI pending' control (available only in
55  * conjunction with 'virtual NMIs') causes a VM exit when all these checks
56  * succeed. It will exit immediately after VM entry if the checks succeed
57  * at that point.
58  *
59  * Because a processor may or may not check blocking-by-STI when injecting
60  * a virtual NMI, it will be necessary to convert that to block-by-MOV-SS
61  * before specifying the 'virtual NMI pending' control. Otherwise we could
62  * enter an infinite loop where we check blocking-by-STI in software and
63  * thus delay delivery of a virtual NMI, but the processor causes immediate
64  * VM exit because it does not check blocking-by-STI.
65  *
66  * Injecting a virtual NMI sets the NMI-blocking interruptibility flag only
67  * if the 'virtual NMIs' control is set. Injecting *any* kind of event clears
68  * the STI- and MOV-SS-blocking interruptibility-state flags.
69  */
70 
vmx_enable_intr_window(struct vcpu * v,struct hvm_intack intack)71 static void vmx_enable_intr_window(struct vcpu *v, struct hvm_intack intack)
72 {
73     u32 ctl = CPU_BASED_VIRTUAL_INTR_PENDING;
74 
75     ASSERT(intack.source != hvm_intsrc_none);
76 
77     if ( unlikely(tb_init_done) )
78     {
79         unsigned long intr;
80 
81         __vmread(VM_ENTRY_INTR_INFO, &intr);
82         HVMTRACE_3D(INTR_WINDOW, intack.vector, intack.source,
83                     (intr & INTR_INFO_VALID_MASK) ? intr & 0xff : -1);
84     }
85 
86     if ( (intack.source == hvm_intsrc_nmi) && cpu_has_vmx_vnmi )
87     {
88         /*
89          * We set MOV-SS blocking in lieu of STI blocking when delivering an
90          * NMI. This is because it is processor-specific whether STI-blocking
91          * blocks NMIs. Hence we *must* check for STI-blocking on NMI delivery
92          * (otherwise vmentry will fail on processors that check for STI-
93          * blocking) but if the processor does not check for STI-blocking then
94          * we may immediately vmexit and hance make no progress!
95          * (see SDM 3B 21.3, "Other Causes of VM Exits").
96          */
97         unsigned long intr_shadow;
98 
99         __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
100         if ( intr_shadow & VMX_INTR_SHADOW_STI )
101         {
102             /* Having both STI-blocking and MOV-SS-blocking fails vmentry. */
103             intr_shadow &= ~VMX_INTR_SHADOW_STI;
104             intr_shadow |= VMX_INTR_SHADOW_MOV_SS;
105             __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
106         }
107         ctl = CPU_BASED_VIRTUAL_NMI_PENDING;
108     }
109 
110     if ( !(v->arch.hvm.vmx.exec_control & ctl) )
111     {
112         v->arch.hvm.vmx.exec_control |= ctl;
113         vmx_update_cpu_exec_control(v);
114     }
115 }
116 
117 /*
118  * Injecting interrupts for nested virtualization
119  *
120  *  When injecting virtual interrupts (originated from L0), there are
121  *  two major possibilities, within L1 context and within L2 context
122  *   1. L1 context (in_nesting == 0)
123  *     Everything is the same as without nested, check RFLAGS.IF to
124  *     see if the injection can be done, using VMCS to inject the
125  *     interrupt
126  *
127  *   2. L2 context (in_nesting == 1)
128  *     Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
129  *     irq according to intr_ack_on_exit, shouldn't block normally,
130  *     except for:
131  *    a. context transition
132  *     interrupt needs to be blocked at virtual VMEntry time
133  *    b. L2 idtv reinjection
134  *     if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
135  *     it needs to be reinjected without exiting to L1, interrupt
136  *     injection should be blocked as well at this point.
137  *
138  *  Unfortunately, interrupt blocking in L2 won't work with simple
139  *  intr_window_open (which depends on L2's IF). To solve this,
140  *  the following algorithm can be used:
141  *   v->arch.hvm.vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
142  *   only L0 control, physical control may be different from it.
143  *       - if in L1, it behaves normally, intr window is written
144  *         to physical control as it is
145  *       - if in L2, replace it to MTF (or NMI window) if possible
146  *       - if MTF/NMI window is not used, intr window can still be
147  *         used but may have negative impact on interrupt performance.
148  */
149 
nvmx_intr_blocked(struct vcpu * v)150 enum hvm_intblk nvmx_intr_blocked(struct vcpu *v)
151 {
152     int r = hvm_intblk_none;
153     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
154 
155     if ( nestedhvm_vcpu_in_guestmode(v) )
156     {
157         if ( nvcpu->nv_vmexit_pending ||
158              nvcpu->nv_vmswitch_in_progress )
159             r = hvm_intblk_rflags_ie;
160         else
161         {
162             unsigned long intr_info;
163 
164             __vmread(VM_ENTRY_INTR_INFO, &intr_info);
165             if ( intr_info & INTR_INFO_VALID_MASK )
166                 r = hvm_intblk_rflags_ie;
167         }
168     }
169     else if ( nvcpu->nv_vmentry_pending )
170         r = hvm_intblk_rflags_ie;
171 
172     return r;
173 }
174 
nvmx_intr_intercept(struct vcpu * v,struct hvm_intack intack)175 static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
176 {
177     u32 ctrl;
178 
179     /* If blocked by L1's tpr, then nothing to do. */
180     if ( nestedhvm_vcpu_in_guestmode(v) &&
181          hvm_interrupt_blocked(v, intack) == hvm_intblk_tpr )
182         return 1;
183 
184     if ( nvmx_intr_blocked(v) != hvm_intblk_none )
185     {
186         vmx_enable_intr_window(v, intack);
187         return 1;
188     }
189 
190     if ( nestedhvm_vcpu_in_guestmode(v) )
191     {
192         ctrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
193         if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
194             return 0;
195 
196         if ( intack.source == hvm_intsrc_pic ||
197                  intack.source == hvm_intsrc_lapic )
198         {
199             vmx_inject_extint(intack.vector, intack.source);
200 
201             ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
202             if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
203             {
204                 /* for now, duplicate the ack path in vmx_intr_assist */
205                 hvm_vcpu_ack_pending_irq(v, intack);
206                 pt_intr_post(v, intack);
207 
208                 intack = hvm_vcpu_has_pending_irq(v);
209                 if ( unlikely(intack.source != hvm_intsrc_none) )
210                     vmx_enable_intr_window(v, intack);
211             }
212             else if ( !cpu_has_vmx_virtual_intr_delivery )
213                 vmx_enable_intr_window(v, intack);
214 
215             return 1;
216         }
217         else if ( intack.source == hvm_intsrc_vector )
218         {
219             vmx_inject_extint(intack.vector, intack.source);
220             return 1;
221         }
222     }
223 
224     return 0;
225 }
226 
vmx_sync_exit_bitmap(struct vcpu * v)227 void vmx_sync_exit_bitmap(struct vcpu *v)
228 {
229     const unsigned int n = ARRAY_SIZE(v->arch.hvm.vmx.eoi_exit_bitmap);
230     unsigned int i;
231 
232     while ( (i = find_first_bit(&v->arch.hvm.vmx.eoi_exitmap_changed, n)) < n )
233     {
234         clear_bit(i, &v->arch.hvm.vmx.eoi_exitmap_changed);
235         __vmwrite(EOI_EXIT_BITMAP(i), v->arch.hvm.vmx.eoi_exit_bitmap[i]);
236     }
237 }
238 
vmx_intr_assist(void)239 void vmx_intr_assist(void)
240 {
241     struct hvm_intack intack;
242     struct vcpu *v = current;
243     unsigned int tpr_threshold = 0;
244     enum hvm_intblk intblk;
245     int pt_vector;
246 
247     /* Block event injection when single step with MTF. */
248     if ( unlikely(v->arch.hvm.single_step) )
249     {
250         v->arch.hvm.vmx.exec_control |= CPU_BASED_MONITOR_TRAP_FLAG;
251         vmx_update_cpu_exec_control(v);
252         return;
253     }
254 
255     /* Block event injection while handling a sync vm_event. */
256     if ( unlikely(v->arch.vm_event) && v->arch.vm_event->sync_event )
257         return;
258 
259 #ifdef CONFIG_MEM_SHARING
260     /* Block event injection for VM fork if requested */
261     if ( unlikely(v->domain->arch.hvm.mem_sharing.block_interrupts) )
262         return;
263 #endif
264 
265     /* Crank the handle on interrupt state. */
266     pt_vector = pt_update_irq(v);
267 
268     do {
269         unsigned long intr_info;
270 
271         intack = hvm_vcpu_has_pending_irq(v);
272         if ( likely(intack.source == hvm_intsrc_none) )
273             goto out;
274 
275         if ( unlikely(nvmx_intr_intercept(v, intack)) )
276             goto out;
277 
278         intblk = hvm_interrupt_blocked(v, intack);
279         if ( cpu_has_vmx_virtual_intr_delivery )
280         {
281             /* Set "Interrupt-window exiting" for ExtINT and NMI. */
282             if ( (intblk != hvm_intblk_none) &&
283                  (intack.source == hvm_intsrc_pic ||
284                   intack.source == hvm_intsrc_vector ||
285                   intack.source == hvm_intsrc_nmi) )
286             {
287                 vmx_enable_intr_window(v, intack);
288                 goto out;
289             }
290 
291             __vmread(VM_ENTRY_INTR_INFO, &intr_info);
292             if ( intr_info & INTR_INFO_VALID_MASK )
293             {
294                 if ( (intack.source == hvm_intsrc_pic) ||
295                      (intack.source == hvm_intsrc_nmi) ||
296                      (intack.source == hvm_intsrc_mce) )
297                     vmx_enable_intr_window(v, intack);
298 
299                 goto out;
300             }
301         } else if ( intblk == hvm_intblk_tpr )
302         {
303             ASSERT(vlapic_enabled(vcpu_vlapic(v)));
304             ASSERT(intack.source == hvm_intsrc_lapic);
305             tpr_threshold = intack.vector >> 4;
306             goto out;
307         }
308         else if ( intblk != hvm_intblk_none )
309         {
310             vmx_enable_intr_window(v, intack);
311             goto out;
312         }
313         else
314         {
315             __vmread(VM_ENTRY_INTR_INFO, &intr_info);
316             if ( intr_info & INTR_INFO_VALID_MASK )
317             {
318                 vmx_enable_intr_window(v, intack);
319                 goto out;
320             }
321         }
322 
323         intack = hvm_vcpu_ack_pending_irq(v, intack);
324     } while ( intack.source == hvm_intsrc_none );
325 
326     if ( intack.source == hvm_intsrc_nmi )
327     {
328         vmx_inject_nmi();
329     }
330     else if ( intack.source == hvm_intsrc_mce )
331     {
332         hvm_inject_hw_exception(TRAP_machine_check, X86_EVENT_NO_EC);
333     }
334     else if ( cpu_has_vmx_virtual_intr_delivery &&
335               intack.source != hvm_intsrc_pic &&
336               intack.source != hvm_intsrc_vector )
337     {
338         unsigned long status;
339 
340        /*
341         * intack.vector is the highest priority vector. So we set eoi_exit_bitmap
342         * for intack.vector - give a chance to post periodic time interrupts when
343         * periodic time interrupts become the highest one
344         */
345         if ( pt_vector != -1 )
346         {
347 #ifndef NDEBUG
348             /*
349              * We assert that intack.vector is the highest priority vector for
350              * only an interrupt from vlapic can reach this point and the
351              * highest vector is chosen in hvm_vcpu_has_pending_irq().
352              * But, in fact, the assertion failed sometimes. It is suspected
353              * that PIR is not synced to vIRR which makes pt_vector is left in
354              * PIR. In order to verify this suspicion, dump some information
355              * when the assertion fails.
356              */
357             if ( unlikely(intack.vector < pt_vector) )
358             {
359                 const struct vlapic *vlapic;
360                 const struct pi_desc *pi_desc;
361                 const uint32_t *word;
362                 unsigned int i;
363 
364                 printk(XENLOG_ERR "%pv: intack: %u:%02x pt: %02x\n",
365                        current, intack.source, intack.vector, pt_vector);
366 
367                 vlapic = vcpu_vlapic(v);
368                 if ( vlapic && vlapic->regs )
369                 {
370                     word = (const void *)&vlapic->regs->data[APIC_IRR];
371                     printk(XENLOG_ERR "vIRR:");
372                     for ( i = X86_NR_VECTORS / 32; i-- ; )
373                         printk(" %08x", word[i*4]);
374                     printk("\n");
375                 }
376 
377                 pi_desc = &v->arch.hvm.vmx.pi_desc;
378                 if ( pi_desc )
379                 {
380                     word = (const void *)&pi_desc->pir;
381                     printk(XENLOG_ERR " PIR:");
382                     for ( i = X86_NR_VECTORS / 32; i-- ; )
383                         printk(" %08x", word[i]);
384                     printk("\n");
385                 }
386             }
387 #endif
388             ASSERT(intack.vector >= pt_vector);
389             vmx_set_eoi_exit_bitmap(v, intack.vector);
390         }
391 
392         /* we need update the RVI field */
393         __vmread(GUEST_INTR_STATUS, &status);
394         status &= ~VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
395         status |= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK &
396                     intack.vector;
397         __vmwrite(GUEST_INTR_STATUS, status);
398 
399         vmx_sync_exit_bitmap(v);
400 
401         pt_intr_post(v, intack);
402     }
403     else
404     {
405         HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
406         vmx_inject_extint(intack.vector, intack.source);
407         pt_intr_post(v, intack);
408     }
409 
410     /* Is there another IRQ to queue up behind this one? */
411     intack = hvm_vcpu_has_pending_irq(v);
412     if ( !cpu_has_vmx_virtual_intr_delivery ||
413          intack.source == hvm_intsrc_pic ||
414          intack.source == hvm_intsrc_vector )
415     {
416         if ( unlikely(intack.source != hvm_intsrc_none) )
417             vmx_enable_intr_window(v, intack);
418     }
419 
420  out:
421     if ( !nestedhvm_vcpu_in_guestmode(v) &&
422          !cpu_has_vmx_virtual_intr_delivery &&
423          cpu_has_vmx_tpr_shadow )
424         __vmwrite(TPR_THRESHOLD, tpr_threshold);
425 }
426 
427 /*
428  * Local variables:
429  * mode: C
430  * c-file-style: "BSD"
431  * c-basic-offset: 4
432  * tab-width: 4
433  * indent-tabs-mode: nil
434  * End:
435  */
436