1 /******************************************************************************
2  * arch/x86/irq.c
3  *
4  * Portions of this file are:
5  *  Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
6  */
7 
8 #include <xen/init.h>
9 #include <xen/delay.h>
10 #include <xen/errno.h>
11 #include <xen/event.h>
12 #include <xen/irq.h>
13 #include <xen/param.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/keyhandler.h>
17 #include <xen/compat.h>
18 #include <xen/iocap.h>
19 #include <xen/iommu.h>
20 #include <xen/symbols.h>
21 #include <xen/trace.h>
22 #include <xen/softirq.h>
23 #include <xsm/xsm.h>
24 #include <asm/msi.h>
25 #include <asm/current.h>
26 #include <asm/flushtlb.h>
27 #include <asm/mach-generic/mach_apic.h>
28 #include <irq_vectors.h>
29 #include <public/physdev.h>
30 
31 static int parse_irq_vector_map_param(const char *s);
32 
33 /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
34 bool __read_mostly opt_noirqbalance;
35 boolean_param("noirqbalance", opt_noirqbalance);
36 
37 unsigned int __read_mostly nr_irqs_gsi = 16;
38 unsigned int __read_mostly nr_irqs;
39 integer_param("nr_irqs", nr_irqs);
40 
41 /* This default may be changed by the AMD IOMMU code */
42 int __read_mostly opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_DEFAULT;
43 custom_param("irq_vector_map", parse_irq_vector_map_param);
44 
45 vmask_t global_used_vector_map;
46 
47 struct irq_desc __read_mostly *irq_desc = NULL;
48 
49 static DECLARE_BITMAP(used_vectors, X86_NR_VECTORS);
50 
51 static DEFINE_SPINLOCK(vector_lock);
52 
53 DEFINE_PER_CPU(vector_irq_t, vector_irq);
54 
55 DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs);
56 
57 static LIST_HEAD(irq_ratelimit_list);
58 static DEFINE_SPINLOCK(irq_ratelimit_lock);
59 static struct timer irq_ratelimit_timer;
60 
61 /* irq_ratelimit: the max irq rate allowed in every 10ms, set 0 to disable */
62 static unsigned int __read_mostly irq_ratelimit_threshold = 10000;
63 integer_param("irq_ratelimit", irq_ratelimit_threshold);
64 
parse_irq_vector_map_param(const char * s)65 static int __init parse_irq_vector_map_param(const char *s)
66 {
67     const char *ss;
68     int rc = 0;
69 
70     do {
71         ss = strchr(s, ',');
72         if ( !ss )
73             ss = strchr(s, '\0');
74 
75         if ( !cmdline_strcmp(s, "none") )
76             opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_NONE;
77         else if ( !cmdline_strcmp(s, "global") )
78             opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
79         else if ( !cmdline_strcmp(s, "per-device") )
80             opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV;
81         else
82             rc = -EINVAL;
83 
84         s = ss + 1;
85     } while ( *ss );
86 
87     return rc;
88 }
89 
90 /* Must be called when irq disabled */
lock_vector_lock(void)91 void lock_vector_lock(void)
92 {
93     /* Used to the online set of cpus does not change
94      * during assign_irq_vector.
95      */
96     spin_lock(&vector_lock);
97 }
98 
unlock_vector_lock(void)99 void unlock_vector_lock(void)
100 {
101     spin_unlock(&vector_lock);
102 }
103 
valid_irq_vector(unsigned int vector)104 static inline bool valid_irq_vector(unsigned int vector)
105 {
106     return vector >= FIRST_IRQ_VECTOR && vector <= LAST_IRQ_VECTOR;
107 }
108 
release_old_vec(struct irq_desc * desc)109 static void release_old_vec(struct irq_desc *desc)
110 {
111     unsigned int vector = desc->arch.old_vector;
112 
113     desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED;
114     cpumask_clear(desc->arch.old_cpu_mask);
115 
116     if ( !valid_irq_vector(vector) )
117         ASSERT_UNREACHABLE();
118     else if ( desc->arch.used_vectors )
119     {
120         ASSERT(test_bit(vector, desc->arch.used_vectors));
121         clear_bit(vector, desc->arch.used_vectors);
122     }
123 }
124 
_trace_irq_mask(uint32_t event,int irq,int vector,const cpumask_t * mask)125 static void _trace_irq_mask(uint32_t event, int irq, int vector,
126                             const cpumask_t *mask)
127 {
128     struct {
129         unsigned int irq:16, vec:16;
130         unsigned int mask[6];
131     } d = {
132        .irq = irq,
133        .vec = vector,
134     };
135 
136     memcpy(d.mask, mask,
137            min(sizeof(d.mask), BITS_TO_LONGS(nr_cpu_ids) * sizeof(long)));
138     trace_var(event, 1, sizeof(d), &d);
139 }
140 
trace_irq_mask(uint32_t event,int irq,int vector,const cpumask_t * mask)141 static void trace_irq_mask(uint32_t event, int irq, int vector,
142                            const cpumask_t *mask)
143 {
144     if ( unlikely(tb_init_done) )
145         _trace_irq_mask(event, irq, vector, mask);
146 }
147 
_bind_irq_vector(struct irq_desc * desc,int vector,const cpumask_t * cpu_mask)148 static int __init _bind_irq_vector(struct irq_desc *desc, int vector,
149                                    const cpumask_t *cpu_mask)
150 {
151     cpumask_t online_mask;
152     int cpu;
153 
154     BUG_ON((unsigned)vector >= X86_NR_VECTORS);
155 
156     cpumask_and(&online_mask, cpu_mask, &cpu_online_map);
157     if (cpumask_empty(&online_mask))
158         return -EINVAL;
159     if ( (desc->arch.vector == vector) &&
160          cpumask_equal(desc->arch.cpu_mask, &online_mask) )
161         return 0;
162     if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
163         return -EBUSY;
164     trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, desc->irq, vector, &online_mask);
165     for_each_cpu(cpu, &online_mask)
166         per_cpu(vector_irq, cpu)[vector] = desc->irq;
167     desc->arch.vector = vector;
168     cpumask_copy(desc->arch.cpu_mask, &online_mask);
169     if ( desc->arch.used_vectors )
170     {
171         ASSERT(!test_bit(vector, desc->arch.used_vectors));
172         set_bit(vector, desc->arch.used_vectors);
173     }
174     desc->arch.used = IRQ_USED;
175     return 0;
176 }
177 
bind_irq_vector(int irq,int vector,const cpumask_t * cpu_mask)178 int __init bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask)
179 {
180     struct irq_desc *desc = irq_to_desc(irq);
181     unsigned long flags;
182     int ret;
183 
184     BUG_ON((unsigned)irq >= nr_irqs);
185 
186     spin_lock_irqsave(&desc->lock, flags);
187     spin_lock(&vector_lock);
188     ret = _bind_irq_vector(desc, vector, cpu_mask);
189     spin_unlock(&vector_lock);
190     spin_unlock_irqrestore(&desc->lock, flags);
191 
192     return ret;
193 }
194 
_clear_irq_vector(struct irq_desc * desc)195 static void _clear_irq_vector(struct irq_desc *desc)
196 {
197     unsigned int cpu, old_vector, irq = desc->irq;
198     unsigned int vector = desc->arch.vector;
199     cpumask_t *tmp_mask = this_cpu(scratch_cpumask);
200 
201     BUG_ON(!valid_irq_vector(vector));
202 
203     /* Always clear desc->arch.vector */
204     cpumask_and(tmp_mask, desc->arch.cpu_mask, &cpu_online_map);
205 
206     for_each_cpu(cpu, tmp_mask)
207     {
208         ASSERT(per_cpu(vector_irq, cpu)[vector] == irq);
209         per_cpu(vector_irq, cpu)[vector] = ~irq;
210     }
211 
212     desc->arch.vector = IRQ_VECTOR_UNASSIGNED;
213     cpumask_clear(desc->arch.cpu_mask);
214 
215     if ( desc->arch.used_vectors )
216     {
217         ASSERT(test_bit(vector, desc->arch.used_vectors));
218         clear_bit(vector, desc->arch.used_vectors);
219     }
220 
221     desc->arch.used = IRQ_UNUSED;
222 
223     trace_irq_mask(TRC_HW_IRQ_CLEAR_VECTOR, irq, vector, tmp_mask);
224 
225     if ( likely(!desc->arch.move_in_progress) )
226         return;
227 
228     /* If we were in motion, also clear desc->arch.old_vector */
229     old_vector = desc->arch.old_vector;
230     cpumask_and(tmp_mask, desc->arch.old_cpu_mask, &cpu_online_map);
231 
232     for_each_cpu(cpu, tmp_mask)
233     {
234         ASSERT(per_cpu(vector_irq, cpu)[old_vector] == irq);
235         TRACE_3D(TRC_HW_IRQ_MOVE_FINISH, irq, old_vector, cpu);
236         per_cpu(vector_irq, cpu)[old_vector] = ~irq;
237     }
238 
239     release_old_vec(desc);
240 
241     desc->arch.move_in_progress = 0;
242 }
243 
clear_irq_vector(int irq)244 void __init clear_irq_vector(int irq)
245 {
246     struct irq_desc *desc = irq_to_desc(irq);
247     unsigned long flags;
248 
249     spin_lock_irqsave(&desc->lock, flags);
250     spin_lock(&vector_lock);
251     _clear_irq_vector(desc);
252     spin_unlock(&vector_lock);
253     spin_unlock_irqrestore(&desc->lock, flags);
254 }
255 
256 /*
257  * Dynamic irq allocate and deallocation for MSI
258  */
259 
create_irq(nodeid_t node,bool grant_access)260 int create_irq(nodeid_t node, bool grant_access)
261 {
262     int irq, ret;
263     struct irq_desc *desc;
264 
265     for (irq = nr_irqs_gsi; irq < nr_irqs; irq++)
266     {
267         desc = irq_to_desc(irq);
268         if (cmpxchg(&desc->arch.used, IRQ_UNUSED, IRQ_RESERVED) == IRQ_UNUSED)
269            break;
270     }
271 
272     if (irq >= nr_irqs)
273          return -ENOSPC;
274 
275     ret = init_one_irq_desc(desc);
276     if (!ret)
277     {
278         cpumask_t *mask = NULL;
279 
280         if ( node != NUMA_NO_NODE )
281         {
282             mask = &node_to_cpumask(node);
283             if (cpumask_empty(mask))
284                 mask = NULL;
285         }
286         ret = assign_irq_vector(irq, mask);
287     }
288 
289     ASSERT(desc->arch.creator_domid == DOMID_INVALID);
290 
291     if (ret < 0)
292     {
293         desc->arch.used = IRQ_UNUSED;
294         irq = ret;
295     }
296     else if ( grant_access )
297     {
298         struct domain *currd = current->domain;
299 
300         ret = irq_permit_access(currd, irq);
301         if ( ret )
302             printk(XENLOG_G_ERR
303                    "Could not grant %pd access to IRQ%d (error %d)\n",
304                    currd, irq, ret);
305         else
306             desc->arch.creator_domid = currd->domain_id;
307     }
308 
309     return irq;
310 }
311 
destroy_irq(unsigned int irq)312 void destroy_irq(unsigned int irq)
313 {
314     struct irq_desc *desc = irq_to_desc(irq);
315     unsigned long flags;
316     struct irqaction *action;
317 
318     BUG_ON(!MSI_IRQ(irq));
319 
320     if ( desc->arch.creator_domid != DOMID_INVALID )
321     {
322         struct domain *d = get_domain_by_id(desc->arch.creator_domid);
323 
324         if ( d )
325         {
326             int err = irq_deny_access(d, irq);
327 
328             if ( err )
329                 printk(XENLOG_G_ERR
330                        "Could not revoke %pd access to IRQ%u (error %d)\n",
331                        d, irq, err);
332 
333             put_domain(d);
334         }
335 
336         desc->arch.creator_domid = DOMID_INVALID;
337     }
338 
339     spin_lock_irqsave(&desc->lock, flags);
340     desc->status  &= ~IRQ_GUEST;
341     desc->handler->shutdown(desc);
342     desc->status |= IRQ_DISABLED;
343     action = desc->action;
344     desc->action  = NULL;
345     desc->msi_desc = NULL;
346     cpumask_setall(desc->affinity);
347     spin_unlock_irqrestore(&desc->lock, flags);
348 
349     /* Wait to make sure it's not being used on another CPU */
350     do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
351 
352     spin_lock_irqsave(&desc->lock, flags);
353     desc->handler = &no_irq_type;
354     spin_lock(&vector_lock);
355     _clear_irq_vector(desc);
356     spin_unlock(&vector_lock);
357     desc->arch.used_vectors = NULL;
358     spin_unlock_irqrestore(&desc->lock, flags);
359 
360     xfree(action);
361 }
362 
irq_to_vector(int irq)363 int irq_to_vector(int irq)
364 {
365     int vector = IRQ_VECTOR_UNASSIGNED;
366     const struct irq_desc *desc;
367 
368     BUG_ON(irq >= nr_irqs || irq < 0);
369     desc = irq_to_desc(irq);
370 
371     if (IO_APIC_IRQ(irq))
372     {
373         vector = desc->arch.vector;
374         /*
375          * Both parts of the condition are needed here during early boot, as
376          * at that time IRQ0 in particular may still have the 8259A chip set,
377          * but has already got its special IRQ0_VECTOR.
378          */
379         if ( desc->handler->enable == enable_8259A_irq &&
380              vector >= FIRST_LEGACY_VECTOR && vector <= LAST_LEGACY_VECTOR )
381             vector = 0;
382     }
383     else if (MSI_IRQ(irq))
384         vector = desc->arch.vector;
385     else
386         vector = LEGACY_VECTOR(irq);
387 
388     return vector;
389 }
390 
arch_init_one_irq_desc(struct irq_desc * desc)391 int arch_init_one_irq_desc(struct irq_desc *desc)
392 {
393     if ( !zalloc_cpumask_var(&desc->arch.cpu_mask) )
394         return -ENOMEM;
395 
396     if ( !alloc_cpumask_var(&desc->arch.old_cpu_mask) )
397     {
398         free_cpumask_var(desc->arch.cpu_mask);
399         return -ENOMEM;
400     }
401 
402     if ( !alloc_cpumask_var(&desc->arch.pending_mask) )
403     {
404         free_cpumask_var(desc->arch.old_cpu_mask);
405         free_cpumask_var(desc->arch.cpu_mask);
406         return -ENOMEM;
407     }
408 
409     desc->arch.vector = IRQ_VECTOR_UNASSIGNED;
410     desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED;
411     desc->arch.creator_domid = DOMID_INVALID;
412 
413     return 0;
414 }
415 
init_irq_data(void)416 int __init init_irq_data(void)
417 {
418     struct irq_desc *desc;
419     int irq, vector;
420 
421     for ( vector = 0; vector < X86_NR_VECTORS; ++vector )
422         this_cpu(vector_irq)[vector] = INT_MIN;
423 
424     irq_desc = xzalloc_array(struct irq_desc, nr_irqs);
425 
426     if ( !irq_desc )
427         return -ENOMEM;
428 
429     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
430     {
431         desc = irq_to_desc(irq);
432         desc->irq = irq;
433         init_one_irq_desc(desc);
434     }
435     for ( ; irq < nr_irqs; irq++ )
436         irq_to_desc(irq)->irq = irq;
437 
438 #ifdef CONFIG_PV
439     /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
440     set_bit(LEGACY_SYSCALL_VECTOR, used_vectors);
441     set_bit(HYPERCALL_VECTOR, used_vectors);
442 #endif
443 
444     /*
445      * Mark vectors up to the cleanup one as used, to prevent an infinite loop
446      * invoking irq_move_cleanup_interrupt.
447      */
448     BUILD_BUG_ON(IRQ_MOVE_CLEANUP_VECTOR < FIRST_DYNAMIC_VECTOR);
449     for ( vector = FIRST_DYNAMIC_VECTOR;
450           vector <= IRQ_MOVE_CLEANUP_VECTOR;
451           vector++ )
452         __set_bit(vector, used_vectors);
453 
454     return 0;
455 }
456 
ack_none(struct irq_desc * desc)457 static void ack_none(struct irq_desc *desc)
458 {
459     ack_bad_irq(desc->irq);
460 }
461 
462 hw_irq_controller no_irq_type = {
463     "none",
464     irq_startup_none,
465     irq_shutdown_none,
466     irq_enable_none,
467     irq_disable_none,
468     ack_none,
469 };
470 
irq_get_used_vector_mask(int irq)471 static vmask_t *irq_get_used_vector_mask(int irq)
472 {
473     vmask_t *ret = NULL;
474 
475     if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_GLOBAL )
476     {
477         struct irq_desc *desc = irq_to_desc(irq);
478 
479         ret = &global_used_vector_map;
480 
481         if ( desc->arch.used_vectors )
482             printk(XENLOG_INFO "Unassigned IRQ %d already has used_vectors\n",
483                    irq);
484         else
485         {
486             int vector;
487 
488             vector = irq_to_vector(irq);
489             if ( valid_irq_vector(vector) )
490             {
491                 printk(XENLOG_INFO "IRQ%d already assigned vector %02x\n",
492                        irq, vector);
493 
494                 ASSERT(!test_bit(vector, ret));
495 
496                 set_bit(vector, ret);
497             }
498             else if ( vector != IRQ_VECTOR_UNASSIGNED )
499                 printk(XENLOG_WARNING "IRQ%d mapped to bogus vector %02x\n",
500                        irq, vector);
501         }
502     }
503     else if ( IO_APIC_IRQ(irq) &&
504               opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_NONE )
505     {
506         ret = io_apic_get_used_vector_map(irq);
507     }
508 
509     return ret;
510 }
511 
_assign_irq_vector(struct irq_desc * desc,const cpumask_t * mask)512 static int _assign_irq_vector(struct irq_desc *desc, const cpumask_t *mask)
513 {
514     /*
515      * NOTE! The local APIC isn't very good at handling
516      * multiple interrupts at the same interrupt level.
517      * As the interrupt level is determined by taking the
518      * vector number and shifting that right by 4, we
519      * want to spread these out a bit so that they don't
520      * all fall in the same interrupt level.
521      *
522      * Also, we've got to be careful not to trash gate
523      * 0x80, because int 0x80 is hm, kind of importantish. ;)
524      */
525     static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
526     unsigned int cpu;
527     int err, old_vector, irq = desc->irq;
528     vmask_t *irq_used_vectors = NULL;
529 
530     old_vector = irq_to_vector(irq);
531     if ( valid_irq_vector(old_vector) )
532     {
533         cpumask_t tmp_mask;
534 
535         cpumask_and(&tmp_mask, mask, &cpu_online_map);
536         if (cpumask_intersects(&tmp_mask, desc->arch.cpu_mask)) {
537             desc->arch.vector = old_vector;
538             return 0;
539         }
540     }
541 
542     if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count )
543         return -EAGAIN;
544 
545     err = -ENOSPC;
546 
547     /* This is the only place normal IRQs are ever marked
548      * as "in use".  If they're not in use yet, check to see
549      * if we need to assign a global vector mask. */
550     if ( desc->arch.used == IRQ_USED )
551     {
552         irq_used_vectors = desc->arch.used_vectors;
553     }
554     else
555         irq_used_vectors = irq_get_used_vector_mask(irq);
556 
557     for_each_cpu(cpu, mask)
558     {
559         const cpumask_t *vec_mask;
560         int new_cpu;
561         int vector, offset;
562 
563         /* Only try and allocate irqs on cpus that are present. */
564         if (!cpu_online(cpu))
565             continue;
566 
567         vec_mask = vector_allocation_cpumask(cpu);
568 
569         vector = current_vector;
570         offset = current_offset;
571 next:
572         vector += 8;
573         if (vector > LAST_DYNAMIC_VECTOR) {
574             /* If out of vectors on large boxen, must share them. */
575             offset = (offset + 1) % 8;
576             vector = FIRST_DYNAMIC_VECTOR + offset;
577         }
578         if (unlikely(current_vector == vector))
579             continue;
580 
581         if (test_bit(vector, used_vectors))
582             goto next;
583 
584         if (irq_used_vectors
585             && test_bit(vector, irq_used_vectors) )
586             goto next;
587 
588         if ( cpumask_test_cpu(0, vec_mask) &&
589              vector >= FIRST_LEGACY_VECTOR && vector <= LAST_LEGACY_VECTOR )
590             goto next;
591 
592         for_each_cpu(new_cpu, vec_mask)
593             if (per_cpu(vector_irq, new_cpu)[vector] >= 0)
594                 goto next;
595         /* Found one! */
596         current_vector = vector;
597         current_offset = offset;
598 
599         if ( valid_irq_vector(old_vector) )
600         {
601             cpumask_and(desc->arch.old_cpu_mask, desc->arch.cpu_mask,
602                         &cpu_online_map);
603             desc->arch.old_vector = desc->arch.vector;
604             if ( !cpumask_empty(desc->arch.old_cpu_mask) )
605                 desc->arch.move_in_progress = 1;
606             else
607                 /* This can happen while offlining a CPU. */
608                 release_old_vec(desc);
609         }
610 
611         trace_irq_mask(TRC_HW_IRQ_ASSIGN_VECTOR, irq, vector, vec_mask);
612 
613         for_each_cpu(new_cpu, vec_mask)
614             per_cpu(vector_irq, new_cpu)[vector] = irq;
615         desc->arch.vector = vector;
616         cpumask_copy(desc->arch.cpu_mask, vec_mask);
617 
618         desc->arch.used = IRQ_USED;
619         ASSERT((desc->arch.used_vectors == NULL)
620                || (desc->arch.used_vectors == irq_used_vectors));
621         desc->arch.used_vectors = irq_used_vectors;
622 
623         if ( desc->arch.used_vectors )
624         {
625             ASSERT(!test_bit(vector, desc->arch.used_vectors));
626 
627             set_bit(vector, desc->arch.used_vectors);
628         }
629 
630         err = 0;
631         break;
632     }
633     return err;
634 }
635 
assign_irq_vector(int irq,const cpumask_t * mask)636 int assign_irq_vector(int irq, const cpumask_t *mask)
637 {
638     int ret;
639     unsigned long flags;
640     struct irq_desc *desc = irq_to_desc(irq);
641 
642     BUG_ON(irq >= nr_irqs || irq <0);
643 
644     spin_lock_irqsave(&desc->lock, flags);
645 
646     spin_lock(&vector_lock);
647     ret = _assign_irq_vector(desc, mask ?: TARGET_CPUS);
648     spin_unlock(&vector_lock);
649 
650     if ( !ret )
651     {
652         ret = desc->arch.vector;
653         if ( mask )
654             cpumask_copy(desc->affinity, mask);
655         else
656             cpumask_setall(desc->affinity);
657     }
658 
659     spin_unlock_irqrestore(&desc->lock, flags);
660 
661     return ret;
662 }
663 
664 /*
665  * Initialize vector_irq on a new cpu. This function must be called
666  * with vector_lock held.  For this reason it may not itself acquire
667  * the IRQ descriptor locks, as lock nesting is the other way around.
668  */
setup_vector_irq(unsigned int cpu)669 void setup_vector_irq(unsigned int cpu)
670 {
671     unsigned int irq, vector;
672 
673     /* Clear vector_irq */
674     for ( vector = 0; vector < X86_NR_VECTORS; ++vector )
675         per_cpu(vector_irq, cpu)[vector] = INT_MIN;
676     /* Mark the inuse vectors */
677     for ( irq = 0; irq < nr_irqs; ++irq )
678     {
679         struct irq_desc *desc = irq_to_desc(irq);
680 
681         if ( !irq_desc_initialized(desc) )
682             continue;
683         vector = irq_to_vector(irq);
684         if ( vector >= FIRST_HIPRIORITY_VECTOR &&
685              vector <= LAST_HIPRIORITY_VECTOR )
686             cpumask_set_cpu(cpu, desc->arch.cpu_mask);
687         else if ( !cpumask_test_cpu(cpu, desc->arch.cpu_mask) )
688             continue;
689         per_cpu(vector_irq, cpu)[vector] = irq;
690     }
691 }
692 
move_masked_irq(struct irq_desc * desc)693 void move_masked_irq(struct irq_desc *desc)
694 {
695     cpumask_t *pending_mask = desc->arch.pending_mask;
696 
697     if (likely(!(desc->status & IRQ_MOVE_PENDING)))
698         return;
699 
700     desc->status &= ~IRQ_MOVE_PENDING;
701 
702     if (!desc->handler->set_affinity)
703         return;
704 
705     /*
706      * If there was a valid mask to work with, please do the disable,
707      * re-program, enable sequence. This is *not* particularly important for
708      * level triggered but in a edge trigger case, we might be setting rte when
709      * an active trigger is comming in. This could cause some ioapics to
710      * mal-function. Being paranoid i guess!
711      *
712      * For correct operation this depends on the caller masking the irqs.
713      */
714     if ( likely(cpumask_intersects(pending_mask, &cpu_online_map)) )
715         desc->handler->set_affinity(desc, pending_mask);
716 
717     cpumask_clear(pending_mask);
718 }
719 
move_native_irq(struct irq_desc * desc)720 void move_native_irq(struct irq_desc *desc)
721 {
722     if (likely(!(desc->status & IRQ_MOVE_PENDING)))
723         return;
724 
725     if (unlikely(desc->status & IRQ_DISABLED))
726         return;
727 
728     desc->handler->disable(desc);
729     move_masked_irq(desc);
730     desc->handler->enable(desc);
731 }
732 
irq_move_cleanup_interrupt(struct cpu_user_regs * regs)733 void irq_move_cleanup_interrupt(struct cpu_user_regs *regs)
734 {
735     unsigned vector, me;
736 
737     ack_APIC_irq();
738 
739     me = smp_processor_id();
740     if ( !cpu_online(me) )
741         return;
742 
743     for ( vector = FIRST_DYNAMIC_VECTOR;
744           vector <= LAST_HIPRIORITY_VECTOR; vector++)
745     {
746         unsigned int irq;
747         unsigned int irr;
748         struct irq_desc *desc;
749         irq = per_cpu(vector_irq, me)[vector];
750 
751         if ((int)irq < 0)
752             continue;
753 
754         desc = irq_to_desc(irq);
755         if (!desc)
756             continue;
757 
758         spin_lock(&desc->lock);
759 
760         if (desc->handler->enable == enable_8259A_irq)
761             goto unlock;
762 
763         if (!desc->arch.move_cleanup_count)
764             goto unlock;
765 
766         if ( vector == desc->arch.vector &&
767              cpumask_test_cpu(me, desc->arch.cpu_mask) )
768             goto unlock;
769 
770         irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
771         /*
772          * Check if the vector that needs to be cleanedup is
773          * registered at the cpu's IRR. If so, then this is not
774          * the best time to clean it up. Lets clean it up in the
775          * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
776          * to myself.
777          */
778         if ( irr & (1u << (vector % 32)) )
779         {
780             if ( vector < IRQ_MOVE_CLEANUP_VECTOR )
781             {
782                 ASSERT_UNREACHABLE();
783                 goto unlock;
784             }
785             send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
786             TRACE_3D(TRC_HW_IRQ_MOVE_CLEANUP_DELAY,
787                      irq, vector, smp_processor_id());
788             goto unlock;
789         }
790 
791         TRACE_3D(TRC_HW_IRQ_MOVE_CLEANUP,
792                  irq, vector, smp_processor_id());
793 
794         per_cpu(vector_irq, me)[vector] = ~irq;
795         desc->arch.move_cleanup_count--;
796 
797         if ( desc->arch.move_cleanup_count == 0 )
798         {
799             ASSERT(vector == desc->arch.old_vector);
800             release_old_vec(desc);
801         }
802 unlock:
803         spin_unlock(&desc->lock);
804     }
805 }
806 
send_cleanup_vector(struct irq_desc * desc)807 static void send_cleanup_vector(struct irq_desc *desc)
808 {
809     cpumask_and(desc->arch.old_cpu_mask, desc->arch.old_cpu_mask,
810                 &cpu_online_map);
811     desc->arch.move_cleanup_count = cpumask_weight(desc->arch.old_cpu_mask);
812 
813     if ( desc->arch.move_cleanup_count )
814         send_IPI_mask(desc->arch.old_cpu_mask, IRQ_MOVE_CLEANUP_VECTOR);
815     else
816         release_old_vec(desc);
817 
818     desc->arch.move_in_progress = 0;
819 }
820 
irq_complete_move(struct irq_desc * desc)821 void irq_complete_move(struct irq_desc *desc)
822 {
823     unsigned vector, me;
824 
825     if (likely(!desc->arch.move_in_progress))
826         return;
827 
828     vector = (u8)get_irq_regs()->entry_vector;
829     me = smp_processor_id();
830 
831     if ( vector == desc->arch.vector &&
832          cpumask_test_cpu(me, desc->arch.cpu_mask) )
833         send_cleanup_vector(desc);
834 }
835 
set_desc_affinity(struct irq_desc * desc,const cpumask_t * mask)836 unsigned int set_desc_affinity(struct irq_desc *desc, const cpumask_t *mask)
837 {
838     int ret;
839     unsigned long flags;
840     cpumask_t dest_mask;
841 
842     if ( mask && !cpumask_intersects(mask, &cpu_online_map) )
843         return BAD_APICID;
844 
845     spin_lock_irqsave(&vector_lock, flags);
846     ret = _assign_irq_vector(desc, mask ?: TARGET_CPUS);
847     spin_unlock_irqrestore(&vector_lock, flags);
848 
849     if ( ret < 0 )
850         return BAD_APICID;
851 
852     if ( mask )
853     {
854         cpumask_copy(desc->affinity, mask);
855         cpumask_and(&dest_mask, mask, desc->arch.cpu_mask);
856     }
857     else
858     {
859         cpumask_setall(desc->affinity);
860         cpumask_copy(&dest_mask, desc->arch.cpu_mask);
861     }
862     cpumask_and(&dest_mask, &dest_mask, &cpu_online_map);
863 
864     return cpu_mask_to_apicid(&dest_mask);
865 }
866 
867 /* For re-setting irq interrupt affinity for specific irq */
irq_set_affinity(struct irq_desc * desc,const cpumask_t * mask)868 void irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
869 {
870     if (!desc->handler->set_affinity)
871         return;
872 
873     ASSERT(spin_is_locked(&desc->lock));
874     desc->status &= ~IRQ_MOVE_PENDING;
875     smp_wmb();
876     cpumask_copy(desc->arch.pending_mask, mask);
877     smp_wmb();
878     desc->status |= IRQ_MOVE_PENDING;
879 }
880 
pirq_set_affinity(struct domain * d,int pirq,const cpumask_t * mask)881 void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask)
882 {
883     unsigned long flags;
884     struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
885 
886     if ( !desc )
887         return;
888     irq_set_affinity(desc, mask);
889     spin_unlock_irqrestore(&desc->lock, flags);
890 }
891 
892 DEFINE_PER_CPU(unsigned int, irq_count);
893 static DEFINE_PER_CPU(bool, check_eoi_deferral);
894 
alloc_hipriority_vector(void)895 uint8_t alloc_hipriority_vector(void)
896 {
897     static uint8_t next = FIRST_HIPRIORITY_VECTOR;
898     BUG_ON(next < FIRST_HIPRIORITY_VECTOR);
899     BUG_ON(next > LAST_HIPRIORITY_VECTOR);
900     return next++;
901 }
902 
903 static void (*direct_apic_vector[X86_NR_VECTORS])(struct cpu_user_regs *);
set_direct_apic_vector(uint8_t vector,void (* handler)(struct cpu_user_regs *))904 void set_direct_apic_vector(
905     uint8_t vector, void (*handler)(struct cpu_user_regs *))
906 {
907     BUG_ON(direct_apic_vector[vector] != NULL);
908     direct_apic_vector[vector] = handler;
909 }
910 
alloc_direct_apic_vector(uint8_t * vector,void (* handler)(struct cpu_user_regs *))911 void alloc_direct_apic_vector(
912     uint8_t *vector, void (*handler)(struct cpu_user_regs *))
913 {
914     static DEFINE_SPINLOCK(lock);
915 
916     spin_lock(&lock);
917     if (*vector == 0) {
918         *vector = alloc_hipriority_vector();
919         set_direct_apic_vector(*vector, handler);
920     }
921     spin_unlock(&lock);
922 }
923 
irq_ratelimit_timer_fn(void * data)924 static void irq_ratelimit_timer_fn(void *data)
925 {
926     struct irq_desc *desc, *tmp;
927     unsigned long flags;
928 
929     spin_lock_irqsave(&irq_ratelimit_lock, flags);
930 
931     list_for_each_entry_safe ( desc, tmp, &irq_ratelimit_list, rl_link )
932     {
933         spin_lock(&desc->lock);
934         desc->handler->enable(desc);
935         list_del(&desc->rl_link);
936         INIT_LIST_HEAD(&desc->rl_link);
937         spin_unlock(&desc->lock);
938     }
939 
940     spin_unlock_irqrestore(&irq_ratelimit_lock, flags);
941 }
942 
irq_ratelimit_init(void)943 static int __init irq_ratelimit_init(void)
944 {
945     if ( irq_ratelimit_threshold )
946         init_timer(&irq_ratelimit_timer, irq_ratelimit_timer_fn, NULL, 0);
947     return 0;
948 }
949 __initcall(irq_ratelimit_init);
950 
request_irq(unsigned int irq,unsigned int irqflags,void (* handler)(int,void *,struct cpu_user_regs *),const char * devname,void * dev_id)951 int __init request_irq(unsigned int irq, unsigned int irqflags,
952         void (*handler)(int, void *, struct cpu_user_regs *),
953         const char * devname, void *dev_id)
954 {
955     struct irqaction * action;
956     int retval;
957 
958     /*
959      * Sanity-check: shared interrupts must pass in a real dev-ID,
960      * otherwise we'll have trouble later trying to figure out
961      * which interrupt is which (messes up the interrupt freeing
962      * logic etc).
963      */
964     if (irq >= nr_irqs)
965         return -EINVAL;
966     if (!handler)
967         return -EINVAL;
968 
969     action = xmalloc(struct irqaction);
970     if (!action)
971         return -ENOMEM;
972 
973     action->handler = handler;
974     action->name = devname;
975     action->dev_id = dev_id;
976     action->free_on_release = 1;
977 
978     retval = setup_irq(irq, irqflags, action);
979     if (retval)
980         xfree(action);
981 
982     return retval;
983 }
984 
release_irq(unsigned int irq,const void * dev_id)985 void __init release_irq(unsigned int irq, const void *dev_id)
986 {
987     struct irq_desc *desc;
988     unsigned long flags;
989     struct irqaction *action;
990 
991     desc = irq_to_desc(irq);
992 
993     spin_lock_irqsave(&desc->lock,flags);
994     action = desc->action;
995     desc->action  = NULL;
996     desc->handler->shutdown(desc);
997     desc->status |= IRQ_DISABLED;
998     spin_unlock_irqrestore(&desc->lock,flags);
999 
1000     /* Wait to make sure it's not being used on another CPU */
1001     do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
1002 
1003     if (action && action->free_on_release)
1004         xfree(action);
1005 }
1006 
setup_irq(unsigned int irq,unsigned int irqflags,struct irqaction * new)1007 int __init setup_irq(unsigned int irq, unsigned int irqflags,
1008                      struct irqaction *new)
1009 {
1010     struct irq_desc *desc;
1011     unsigned long flags;
1012 
1013     ASSERT(irqflags == 0);
1014 
1015     desc = irq_to_desc(irq);
1016 
1017     spin_lock_irqsave(&desc->lock,flags);
1018 
1019     if ( desc->action != NULL )
1020     {
1021         spin_unlock_irqrestore(&desc->lock,flags);
1022         return -EBUSY;
1023     }
1024 
1025     desc->action  = new;
1026     desc->status &= ~IRQ_DISABLED;
1027     desc->handler->startup(desc);
1028 
1029     spin_unlock_irqrestore(&desc->lock,flags);
1030 
1031     return 0;
1032 }
1033 
1034 
1035 /*
1036  * HANDLING OF GUEST-BOUND PHYSICAL IRQS
1037  */
1038 
1039 #define IRQ_MAX_GUESTS 7
1040 typedef struct {
1041     u8 nr_guests;
1042     u8 in_flight;
1043     u8 shareable;
1044     u8 ack_type;
1045 #define ACKTYPE_NONE   0     /* No final acknowledgement is required */
1046 #define ACKTYPE_UNMASK 1     /* Unmask PIC hardware (from any CPU)   */
1047 #define ACKTYPE_EOI    2     /* EOI on the CPU that was interrupted  */
1048     cpumask_var_t cpu_eoi_map; /* CPUs that need to EOI this interrupt */
1049     struct timer eoi_timer;
1050     struct domain *guest[IRQ_MAX_GUESTS];
1051 } irq_guest_action_t;
1052 
1053 /*
1054  * Stack of interrupts awaiting EOI on each CPU. These must be popped in
1055  * order, as only the current highest-priority pending irq can be EOIed.
1056  */
1057 struct pending_eoi {
1058     u32 ready:1;  /* Ready for EOI now?  */
1059     u32 irq:23;   /* irq of the vector */
1060     u32 vector:8; /* vector awaiting EOI */
1061 };
1062 
1063 static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_DYNAMIC_VECTORS]);
1064 #define pending_eoi_sp(p) ((p)[NR_DYNAMIC_VECTORS-1].vector)
1065 
cpu_has_pending_apic_eoi(void)1066 bool cpu_has_pending_apic_eoi(void)
1067 {
1068     return pending_eoi_sp(this_cpu(pending_eoi)) != 0;
1069 }
1070 
end_nonmaskable_irq(struct irq_desc * desc,uint8_t vector)1071 void end_nonmaskable_irq(struct irq_desc *desc, uint8_t vector)
1072 {
1073     struct pending_eoi *peoi = this_cpu(pending_eoi);
1074     unsigned int sp = pending_eoi_sp(peoi);
1075 
1076     if ( !this_cpu(check_eoi_deferral) || !sp || peoi[sp - 1].vector < vector )
1077     {
1078         ack_APIC_irq();
1079         return;
1080     }
1081 
1082     /* Defer this vector's EOI until all higher ones have been EOI-ed. */
1083     pending_eoi_sp(peoi) = sp + 1;
1084     do {
1085         peoi[sp] = peoi[sp - 1];
1086     } while ( --sp && peoi[sp - 1].vector > vector );
1087     ASSERT(!sp || peoi[sp - 1].vector < vector);
1088 
1089     peoi[sp].irq = desc->irq;
1090     peoi[sp].vector = vector;
1091     peoi[sp].ready = 1;
1092 }
1093 
set_pirq_eoi(struct domain * d,unsigned int irq)1094 static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
1095 {
1096     if ( d->arch.pirq_eoi_map )
1097     {
1098         ASSERT(irq < PAGE_SIZE * BITS_PER_BYTE);
1099         set_bit(irq, d->arch.pirq_eoi_map);
1100     }
1101 }
1102 
clear_pirq_eoi(struct domain * d,unsigned int irq)1103 static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
1104 {
1105     if ( d->arch.pirq_eoi_map )
1106     {
1107         ASSERT(irq < PAGE_SIZE * BITS_PER_BYTE);
1108         clear_bit(irq, d->arch.pirq_eoi_map);
1109     }
1110 }
1111 
1112 static void set_eoi_ready(void *data);
1113 
irq_guest_eoi_timer_fn(void * data)1114 static void irq_guest_eoi_timer_fn(void *data)
1115 {
1116     struct irq_desc *desc = data;
1117     unsigned int i, irq = desc - irq_desc;
1118     irq_guest_action_t *action;
1119 
1120     spin_lock_irq(&desc->lock);
1121 
1122     if ( !(desc->status & IRQ_GUEST) )
1123         goto out;
1124 
1125     action = (irq_guest_action_t *)desc->action;
1126 
1127     ASSERT(action->ack_type != ACKTYPE_NONE);
1128 
1129     /*
1130      * Is no IRQ in flight at all, or another instance of this timer already
1131      * running? Skip everything to avoid forcing an EOI early.
1132      */
1133     if ( !action->in_flight || timer_is_active(&action->eoi_timer) )
1134         goto out;
1135 
1136     for ( i = 0; i < action->nr_guests; i++ )
1137     {
1138         struct domain *d = action->guest[i];
1139         unsigned int pirq = domain_irq_to_pirq(d, irq);
1140 
1141         if ( test_and_clear_bool(pirq_info(d, pirq)->masked) )
1142             action->in_flight--;
1143     }
1144 
1145     if ( action->in_flight )
1146     {
1147         printk(XENLOG_G_WARNING
1148                "IRQ%u: %d/%d handler(s) still in flight at forced EOI\n",
1149                irq, action->in_flight, action->nr_guests);
1150         ASSERT_UNREACHABLE();
1151     }
1152 
1153     switch ( action->ack_type )
1154     {
1155         cpumask_t *cpu_eoi_map;
1156 
1157     case ACKTYPE_UNMASK:
1158         if ( desc->handler->end )
1159             desc->handler->end(desc, 0);
1160         break;
1161 
1162     case ACKTYPE_EOI:
1163         cpu_eoi_map = this_cpu(scratch_cpumask);
1164         cpumask_copy(cpu_eoi_map, action->cpu_eoi_map);
1165         spin_unlock_irq(&desc->lock);
1166         on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 0);
1167         return;
1168     }
1169 
1170  out:
1171     spin_unlock_irq(&desc->lock);
1172 }
1173 
1174 /*
1175  * Retrieve Xen irq-descriptor corresponding to a domain-specific irq.
1176  * The descriptor is returned locked. This function is safe against changes
1177  * to the per-domain irq-to-vector mapping.
1178  */
domain_spin_lock_irq_desc(struct domain * d,int pirq,unsigned long * pflags)1179 struct irq_desc *domain_spin_lock_irq_desc(
1180     struct domain *d, int pirq, unsigned long *pflags)
1181 {
1182     const struct pirq *info = pirq_info(d, pirq);
1183 
1184     return info ? pirq_spin_lock_irq_desc(info, pflags) : NULL;
1185 }
1186 
1187 /*
1188  * Same with struct pirq already looked up.
1189  */
pirq_spin_lock_irq_desc(const struct pirq * pirq,unsigned long * pflags)1190 struct irq_desc *pirq_spin_lock_irq_desc(
1191     const struct pirq *pirq, unsigned long *pflags)
1192 {
1193     struct irq_desc *desc;
1194     unsigned long flags;
1195 
1196     for ( ; ; )
1197     {
1198         int irq = pirq->arch.irq;
1199 
1200         if ( irq <= 0 )
1201             return NULL;
1202 
1203         desc = irq_to_desc(irq);
1204         spin_lock_irqsave(&desc->lock, flags);
1205         if ( irq == pirq->arch.irq )
1206             break;
1207         spin_unlock_irqrestore(&desc->lock, flags);
1208     }
1209 
1210     if ( pflags )
1211         *pflags = flags;
1212 
1213     return desc;
1214 }
1215 
prepare_domain_irq_pirq(struct domain * d,int irq,int pirq,struct pirq ** pinfo)1216 static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq,
1217                                 struct pirq **pinfo)
1218 {
1219     int err = radix_tree_insert(&d->arch.irq_pirq, irq,
1220                                 radix_tree_int_to_ptr(0));
1221     struct pirq *info;
1222 
1223     if ( err && err != -EEXIST )
1224         return err;
1225     info = pirq_get_info(d, pirq);
1226     if ( !info )
1227     {
1228         if ( !err )
1229             radix_tree_delete(&d->arch.irq_pirq, irq);
1230         return -ENOMEM;
1231     }
1232     *pinfo = info;
1233 
1234     return !!err;
1235 }
1236 
set_domain_irq_pirq(struct domain * d,int irq,struct pirq * pirq)1237 static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
1238 {
1239     radix_tree_replace_slot(
1240         radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
1241         radix_tree_int_to_ptr(pirq->pirq));
1242     pirq->arch.irq = irq;
1243 }
1244 
clear_domain_irq_pirq(struct domain * d,int irq,struct pirq * pirq)1245 static void clear_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
1246 {
1247     pirq->arch.irq = 0;
1248     radix_tree_replace_slot(
1249         radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
1250         radix_tree_int_to_ptr(0));
1251 }
1252 
cleanup_domain_irq_pirq(struct domain * d,int irq,struct pirq * pirq)1253 static void cleanup_domain_irq_pirq(struct domain *d, int irq,
1254                                     struct pirq *pirq)
1255 {
1256     pirq_cleanup_check(pirq, d);
1257     radix_tree_delete(&d->arch.irq_pirq, irq);
1258 }
1259 
init_domain_irq_mapping(struct domain * d)1260 int init_domain_irq_mapping(struct domain *d)
1261 {
1262     unsigned int i;
1263     int err = 0;
1264 
1265     radix_tree_init(&d->arch.irq_pirq);
1266     if ( is_hvm_domain(d) )
1267         radix_tree_init(&d->arch.hvm.emuirq_pirq);
1268 
1269     for ( i = 1; platform_legacy_irq(i); ++i )
1270     {
1271         struct pirq *info;
1272 
1273         if ( IO_APIC_IRQ(i) )
1274             continue;
1275         err = prepare_domain_irq_pirq(d, i, i, &info);
1276         if ( err )
1277         {
1278             ASSERT(err < 0);
1279             break;
1280         }
1281         set_domain_irq_pirq(d, i, info);
1282     }
1283 
1284     if ( err )
1285         cleanup_domain_irq_mapping(d);
1286     return err;
1287 }
1288 
cleanup_domain_irq_mapping(struct domain * d)1289 void cleanup_domain_irq_mapping(struct domain *d)
1290 {
1291     radix_tree_destroy(&d->arch.irq_pirq, NULL);
1292     if ( is_hvm_domain(d) )
1293         radix_tree_destroy(&d->arch.hvm.emuirq_pirq, NULL);
1294 }
1295 
alloc_pirq_struct(struct domain * d)1296 struct pirq *alloc_pirq_struct(struct domain *d)
1297 {
1298     size_t sz = is_hvm_domain(d) ? sizeof(struct pirq) :
1299                                    offsetof(struct pirq, arch.hvm);
1300     struct pirq *pirq = xzalloc_bytes(sz);
1301 
1302     if ( pirq )
1303     {
1304         if ( is_hvm_domain(d) )
1305         {
1306             pirq->arch.hvm.emuirq = IRQ_UNBOUND;
1307             pt_pirq_init(d, &pirq->arch.hvm.dpci);
1308         }
1309     }
1310 
1311     return pirq;
1312 }
1313 
1314 void (pirq_cleanup_check)(struct pirq *pirq, struct domain *d)
1315 {
1316     /*
1317      * Check whether all fields have their default values, and delete
1318      * the entry from the tree if so.
1319      *
1320      * NB: Common parts were already checked.
1321      */
1322     if ( pirq->arch.irq )
1323         return;
1324 
1325     if ( is_hvm_domain(d) )
1326     {
1327         if ( pirq->arch.hvm.emuirq != IRQ_UNBOUND )
1328             return;
1329         if ( !pt_pirq_cleanup_check(&pirq->arch.hvm.dpci) )
1330             return;
1331     }
1332 
1333     if ( radix_tree_delete(&d->pirq_tree, pirq->pirq) != pirq )
1334         BUG_ON(!d->is_dying);
1335 }
1336 
1337 /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
flush_ready_eoi(void)1338 static void flush_ready_eoi(void)
1339 {
1340     struct pending_eoi *peoi = this_cpu(pending_eoi);
1341     struct irq_desc         *desc;
1342     int                irq, sp;
1343 
1344     ASSERT(!local_irq_is_enabled());
1345 
1346     sp = pending_eoi_sp(peoi);
1347 
1348     while ( (--sp >= 0) && peoi[sp].ready )
1349     {
1350         irq = peoi[sp].irq;
1351         ASSERT(irq > 0);
1352         desc = irq_to_desc(irq);
1353         spin_lock(&desc->lock);
1354         if ( desc->handler->end )
1355             desc->handler->end(desc, peoi[sp].vector);
1356         spin_unlock(&desc->lock);
1357     }
1358 
1359     pending_eoi_sp(peoi) = sp+1;
1360 }
1361 
__set_eoi_ready(struct irq_desc * desc)1362 static void __set_eoi_ready(struct irq_desc *desc)
1363 {
1364     irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
1365     struct pending_eoi *peoi = this_cpu(pending_eoi);
1366     int                 irq, sp;
1367 
1368     irq = desc - irq_desc;
1369 
1370     if ( !(desc->status & IRQ_GUEST) ||
1371          (action->in_flight != 0) ||
1372          !cpumask_test_and_clear_cpu(smp_processor_id(),
1373                                      action->cpu_eoi_map) )
1374         return;
1375 
1376     sp = pending_eoi_sp(peoi);
1377 
1378     do {
1379         ASSERT(sp > 0);
1380     } while ( peoi[--sp].irq != irq );
1381     ASSERT(!peoi[sp].ready);
1382     peoi[sp].ready = 1;
1383 }
1384 
1385 /* Mark specified IRQ as ready-for-EOI (if it really is) and attempt to EOI. */
set_eoi_ready(void * data)1386 static void set_eoi_ready(void *data)
1387 {
1388     struct irq_desc *desc = data;
1389 
1390     ASSERT(!local_irq_is_enabled());
1391 
1392     spin_lock(&desc->lock);
1393     __set_eoi_ready(desc);
1394     spin_unlock(&desc->lock);
1395 
1396     flush_ready_eoi();
1397 }
1398 
pirq_guest_eoi(struct pirq * pirq)1399 void pirq_guest_eoi(struct pirq *pirq)
1400 {
1401     struct irq_desc *desc;
1402 
1403     ASSERT(local_irq_is_enabled());
1404     desc = pirq_spin_lock_irq_desc(pirq, NULL);
1405     if ( desc )
1406         desc_guest_eoi(desc, pirq);
1407 }
1408 
desc_guest_eoi(struct irq_desc * desc,struct pirq * pirq)1409 void desc_guest_eoi(struct irq_desc *desc, struct pirq *pirq)
1410 {
1411     irq_guest_action_t *action;
1412     cpumask_t           cpu_eoi_map;
1413     int                 irq;
1414 
1415     if ( !(desc->status & IRQ_GUEST) )
1416     {
1417         spin_unlock_irq(&desc->lock);
1418         return;
1419     }
1420 
1421     action = (irq_guest_action_t *)desc->action;
1422     irq = desc - irq_desc;
1423 
1424     if ( unlikely(!test_and_clear_bool(pirq->masked)) ||
1425          unlikely(--action->in_flight != 0) )
1426     {
1427         spin_unlock_irq(&desc->lock);
1428         return;
1429     }
1430 
1431     stop_timer(&action->eoi_timer);
1432 
1433     if ( action->ack_type == ACKTYPE_UNMASK )
1434     {
1435         ASSERT(cpumask_empty(action->cpu_eoi_map));
1436         if ( desc->handler->end )
1437             desc->handler->end(desc, 0);
1438         spin_unlock_irq(&desc->lock);
1439         return;
1440     }
1441 
1442     ASSERT(action->ack_type == ACKTYPE_EOI);
1443 
1444     cpumask_copy(&cpu_eoi_map, action->cpu_eoi_map);
1445 
1446     if ( __cpumask_test_and_clear_cpu(smp_processor_id(), &cpu_eoi_map) )
1447     {
1448         __set_eoi_ready(desc);
1449         spin_unlock(&desc->lock);
1450         flush_ready_eoi();
1451         local_irq_enable();
1452     }
1453     else
1454     {
1455         spin_unlock_irq(&desc->lock);
1456     }
1457 
1458     if ( !cpumask_empty(&cpu_eoi_map) )
1459         on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
1460 }
1461 
pirq_guest_unmask(struct domain * d)1462 int pirq_guest_unmask(struct domain *d)
1463 {
1464     unsigned int pirq = 0, n, i;
1465     struct pirq *pirqs[16];
1466 
1467     do {
1468         n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
1469                                    ARRAY_SIZE(pirqs));
1470         for ( i = 0; i < n; ++i )
1471         {
1472             pirq = pirqs[i]->pirq;
1473             if ( pirqs[i]->masked &&
1474                  !evtchn_port_is_masked(d, pirqs[i]->evtchn) )
1475                 pirq_guest_eoi(pirqs[i]);
1476         }
1477     } while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
1478 
1479     return 0;
1480 }
1481 
irq_acktype(const struct irq_desc * desc)1482 static int irq_acktype(const struct irq_desc *desc)
1483 {
1484     if ( desc->handler == &no_irq_type )
1485         return ACKTYPE_NONE;
1486 
1487     /*
1488      * Edge-triggered IO-APIC and LAPIC interrupts need no final
1489      * acknowledgement: we ACK early during interrupt processing.
1490      */
1491     if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ||
1492          !strcmp(desc->handler->typename, "local-APIC-edge") )
1493         return ACKTYPE_NONE;
1494 
1495     /*
1496      * MSIs are treated as edge-triggered interrupts, except
1497      * when there is no proper way to mask them.
1498      */
1499     if ( desc->msi_desc )
1500         return msi_maskable_irq(desc->msi_desc) ? ACKTYPE_NONE : ACKTYPE_EOI;
1501 
1502     /*
1503      * Level-triggered IO-APIC interrupts need to be acknowledged on the CPU
1504      * on which they were received. This is because we tickle the LAPIC to EOI.
1505      */
1506     if ( !strcmp(desc->handler->typename, "IO-APIC-level") )
1507         return desc->handler->ack == irq_complete_move ?
1508                ACKTYPE_EOI : ACKTYPE_UNMASK;
1509 
1510     /* Legacy PIC interrupts can be acknowledged from any CPU. */
1511     if ( !strcmp(desc->handler->typename, "XT-PIC") )
1512         return ACKTYPE_UNMASK;
1513 
1514     printk("Unknown PIC type '%s' for IRQ%d\n",
1515            desc->handler->typename, desc->irq);
1516     BUG();
1517 
1518     return 0;
1519 }
1520 
pirq_shared(struct domain * d,int pirq)1521 int pirq_shared(struct domain *d, int pirq)
1522 {
1523     struct irq_desc         *desc;
1524     irq_guest_action_t *action;
1525     unsigned long       flags;
1526     int                 shared;
1527 
1528     desc = domain_spin_lock_irq_desc(d, pirq, &flags);
1529     if ( desc == NULL )
1530         return 0;
1531 
1532     action = (irq_guest_action_t *)desc->action;
1533     shared = ((desc->status & IRQ_GUEST) && (action->nr_guests > 1));
1534 
1535     spin_unlock_irqrestore(&desc->lock, flags);
1536 
1537     return shared;
1538 }
1539 
pirq_guest_bind(struct vcpu * v,struct pirq * pirq,int will_share)1540 int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
1541 {
1542     unsigned int        irq;
1543     struct irq_desc         *desc;
1544     irq_guest_action_t *action, *newaction = NULL;
1545     int                 rc = 0;
1546 
1547     WARN_ON(!spin_is_locked(&v->domain->event_lock));
1548     BUG_ON(!local_irq_is_enabled());
1549 
1550  retry:
1551     desc = pirq_spin_lock_irq_desc(pirq, NULL);
1552     if ( desc == NULL )
1553     {
1554         rc = -EINVAL;
1555         goto out;
1556     }
1557 
1558     action = (irq_guest_action_t *)desc->action;
1559     irq = desc - irq_desc;
1560 
1561     if ( !(desc->status & IRQ_GUEST) )
1562     {
1563         if ( desc->action != NULL )
1564         {
1565             printk(XENLOG_G_INFO
1566                    "Cannot bind IRQ%d to dom%d. In use by '%s'.\n",
1567                    pirq->pirq, v->domain->domain_id, desc->action->name);
1568             rc = -EBUSY;
1569             goto unlock_out;
1570         }
1571 
1572         if ( newaction == NULL )
1573         {
1574             spin_unlock_irq(&desc->lock);
1575             if ( (newaction = xmalloc(irq_guest_action_t)) != NULL &&
1576                  zalloc_cpumask_var(&newaction->cpu_eoi_map) )
1577                 goto retry;
1578             xfree(newaction);
1579             printk(XENLOG_G_INFO
1580                    "Cannot bind IRQ%d to dom%d. Out of memory.\n",
1581                    pirq->pirq, v->domain->domain_id);
1582             return -ENOMEM;
1583         }
1584 
1585         action = newaction;
1586         desc->action = (struct irqaction *)action;
1587         newaction = NULL;
1588 
1589         action->nr_guests   = 0;
1590         action->in_flight   = 0;
1591         action->shareable   = will_share;
1592         action->ack_type    = irq_acktype(desc);
1593         init_timer(&action->eoi_timer, irq_guest_eoi_timer_fn, desc, 0);
1594 
1595         desc->status |= IRQ_GUEST;
1596 
1597         /*
1598          * Attempt to bind the interrupt target to the correct (or at least
1599          * some online) CPU.
1600          */
1601         if ( desc->handler->set_affinity )
1602         {
1603             const cpumask_t *affinity = NULL;
1604 
1605             if ( !opt_noirqbalance )
1606                 affinity = cpumask_of(v->processor);
1607             else if ( !cpumask_intersects(desc->affinity, &cpu_online_map) )
1608             {
1609                 cpumask_setall(desc->affinity);
1610                 affinity = &cpumask_all;
1611             }
1612             else if ( !cpumask_intersects(desc->arch.cpu_mask,
1613                                           &cpu_online_map) )
1614                 affinity = desc->affinity;
1615             if ( affinity )
1616                 desc->handler->set_affinity(desc, affinity);
1617         }
1618 
1619         desc->status &= ~IRQ_DISABLED;
1620         desc->handler->startup(desc);
1621     }
1622     else if ( !will_share || !action->shareable )
1623     {
1624         printk(XENLOG_G_INFO "Cannot bind IRQ%d to dom%d. %s.\n",
1625                pirq->pirq, v->domain->domain_id,
1626                will_share ? "Others do not share"
1627                           : "Will not share with others");
1628         rc = -EBUSY;
1629         goto unlock_out;
1630     }
1631     else if ( action->nr_guests == 0 )
1632     {
1633         /*
1634          * Indicates that an ACKTYPE_EOI interrupt is being released.
1635          * Wait for that to happen before continuing.
1636          */
1637         ASSERT(action->ack_type == ACKTYPE_EOI);
1638         ASSERT(desc->status & IRQ_DISABLED);
1639         spin_unlock_irq(&desc->lock);
1640         cpu_relax();
1641         goto retry;
1642     }
1643 
1644     if ( action->nr_guests == IRQ_MAX_GUESTS )
1645     {
1646         printk(XENLOG_G_INFO "Cannot bind IRQ%d to dom%d. "
1647                "Already at max share.\n",
1648                pirq->pirq, v->domain->domain_id);
1649         rc = -EBUSY;
1650         goto unlock_out;
1651     }
1652 
1653     action->guest[action->nr_guests++] = v->domain;
1654 
1655     if ( action->ack_type != ACKTYPE_NONE )
1656         set_pirq_eoi(v->domain, pirq->pirq);
1657     else
1658         clear_pirq_eoi(v->domain, pirq->pirq);
1659 
1660  unlock_out:
1661     spin_unlock_irq(&desc->lock);
1662  out:
1663     if ( newaction != NULL )
1664     {
1665         free_cpumask_var(newaction->cpu_eoi_map);
1666         xfree(newaction);
1667     }
1668     return rc;
1669 }
1670 
__pirq_guest_unbind(struct domain * d,struct pirq * pirq,struct irq_desc * desc)1671 static irq_guest_action_t *__pirq_guest_unbind(
1672     struct domain *d, struct pirq *pirq, struct irq_desc *desc)
1673 {
1674     unsigned int        irq;
1675     irq_guest_action_t *action;
1676     cpumask_t           cpu_eoi_map;
1677     int                 i;
1678 
1679     action = (irq_guest_action_t *)desc->action;
1680     irq = desc - irq_desc;
1681 
1682     if ( unlikely(action == NULL) )
1683     {
1684         dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n",
1685                 d->domain_id, pirq->pirq);
1686         return NULL;
1687     }
1688 
1689     BUG_ON(!(desc->status & IRQ_GUEST));
1690 
1691     for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
1692         continue;
1693     BUG_ON(i == action->nr_guests);
1694     memmove(&action->guest[i], &action->guest[i+1],
1695             (action->nr_guests-i-1) * sizeof(action->guest[0]));
1696     action->nr_guests--;
1697 
1698     switch ( action->ack_type )
1699     {
1700     case ACKTYPE_UNMASK:
1701         if ( test_and_clear_bool(pirq->masked) &&
1702              (--action->in_flight == 0) &&
1703              desc->handler->end )
1704                 desc->handler->end(desc, 0);
1705         break;
1706     case ACKTYPE_EOI:
1707         /* NB. If #guests == 0 then we clear the eoi_map later on. */
1708         if ( test_and_clear_bool(pirq->masked) &&
1709              (--action->in_flight == 0) &&
1710              (action->nr_guests != 0) )
1711         {
1712             cpumask_copy(&cpu_eoi_map, action->cpu_eoi_map);
1713             spin_unlock_irq(&desc->lock);
1714             on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
1715             spin_lock_irq(&desc->lock);
1716         }
1717         break;
1718     }
1719 
1720     /*
1721      * The guest cannot re-bind to this IRQ until this function returns. So,
1722      * when we have flushed this IRQ from ->masked, it should remain flushed.
1723      */
1724     BUG_ON(pirq->masked);
1725 
1726     if ( action->nr_guests != 0 )
1727         return NULL;
1728 
1729     BUG_ON(action->in_flight != 0);
1730 
1731     /* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */
1732     desc->handler->disable(desc);
1733     desc->status |= IRQ_DISABLED;
1734 
1735     /*
1736      * Mark any remaining pending EOIs as ready to flush.
1737      * NOTE: We will need to make this a stronger barrier if in future we allow
1738      * an interrupt vectors to be re-bound to a different PIC. In that case we
1739      * would need to flush all ready EOIs before returning as otherwise the
1740      * desc->handler could change and we would call the wrong 'end' hook.
1741      */
1742     cpumask_copy(&cpu_eoi_map, action->cpu_eoi_map);
1743     if ( !cpumask_empty(&cpu_eoi_map) )
1744     {
1745         BUG_ON(action->ack_type != ACKTYPE_EOI);
1746         spin_unlock_irq(&desc->lock);
1747         on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 1);
1748         spin_lock_irq(&desc->lock);
1749     }
1750 
1751     BUG_ON(!cpumask_empty(action->cpu_eoi_map));
1752 
1753     desc->action = NULL;
1754     desc->status &= ~(IRQ_GUEST|IRQ_INPROGRESS);
1755     desc->handler->shutdown(desc);
1756 
1757     /* Caller frees the old guest descriptor block. */
1758     return action;
1759 }
1760 
pirq_guest_unbind(struct domain * d,struct pirq * pirq)1761 void pirq_guest_unbind(struct domain *d, struct pirq *pirq)
1762 {
1763     irq_guest_action_t *oldaction = NULL;
1764     struct irq_desc *desc;
1765     int irq = 0;
1766 
1767     WARN_ON(!spin_is_locked(&d->event_lock));
1768 
1769     BUG_ON(!local_irq_is_enabled());
1770     desc = pirq_spin_lock_irq_desc(pirq, NULL);
1771 
1772     if ( desc == NULL )
1773     {
1774         irq = -pirq->arch.irq;
1775         BUG_ON(irq <= 0);
1776         desc = irq_to_desc(irq);
1777         spin_lock_irq(&desc->lock);
1778         clear_domain_irq_pirq(d, irq, pirq);
1779     }
1780     else
1781     {
1782         oldaction = __pirq_guest_unbind(d, pirq, desc);
1783     }
1784 
1785     spin_unlock_irq(&desc->lock);
1786 
1787     if ( oldaction != NULL )
1788     {
1789         kill_timer(&oldaction->eoi_timer);
1790         free_cpumask_var(oldaction->cpu_eoi_map);
1791         xfree(oldaction);
1792     }
1793     else if ( irq > 0 )
1794         cleanup_domain_irq_pirq(d, irq, pirq);
1795 }
1796 
pirq_guest_force_unbind(struct domain * d,struct pirq * pirq)1797 static bool pirq_guest_force_unbind(struct domain *d, struct pirq *pirq)
1798 {
1799     struct irq_desc *desc;
1800     irq_guest_action_t *action, *oldaction = NULL;
1801     unsigned int i;
1802     bool bound = false;
1803 
1804     WARN_ON(!spin_is_locked(&d->event_lock));
1805 
1806     BUG_ON(!local_irq_is_enabled());
1807     desc = pirq_spin_lock_irq_desc(pirq, NULL);
1808     BUG_ON(desc == NULL);
1809 
1810     if ( !(desc->status & IRQ_GUEST) )
1811         goto out;
1812 
1813     action = (irq_guest_action_t *)desc->action;
1814     if ( unlikely(action == NULL) )
1815     {
1816         dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n",
1817             d->domain_id, pirq->pirq);
1818         goto out;
1819     }
1820 
1821     for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
1822         continue;
1823     if ( i == action->nr_guests )
1824         goto out;
1825 
1826     bound = true;
1827     oldaction = __pirq_guest_unbind(d, pirq, desc);
1828 
1829  out:
1830     spin_unlock_irq(&desc->lock);
1831 
1832     if ( oldaction != NULL )
1833     {
1834         kill_timer(&oldaction->eoi_timer);
1835         free_cpumask_var(oldaction->cpu_eoi_map);
1836         xfree(oldaction);
1837     }
1838 
1839     return bound;
1840 }
1841 
do_IRQ_guest(struct irq_desc * desc,unsigned int vector)1842 static void do_IRQ_guest(struct irq_desc *desc, unsigned int vector)
1843 {
1844     irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
1845     unsigned int        i;
1846     struct pending_eoi *peoi = this_cpu(pending_eoi);
1847 
1848     if ( unlikely(!action->nr_guests) )
1849     {
1850         /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */
1851         ASSERT(action->ack_type == ACKTYPE_EOI);
1852         ASSERT(desc->status & IRQ_DISABLED);
1853         if ( desc->handler->end )
1854             desc->handler->end(desc, vector);
1855         return;
1856     }
1857 
1858     /*
1859      * Stop the timer as soon as we're certain we'll set it again further down,
1860      * to prevent the current timeout (if any) to needlessly expire.
1861      */
1862     if ( action->ack_type != ACKTYPE_NONE )
1863         stop_timer(&action->eoi_timer);
1864 
1865     if ( action->ack_type == ACKTYPE_EOI )
1866     {
1867         unsigned int sp = pending_eoi_sp(peoi);
1868 
1869         ASSERT(sp < (NR_DYNAMIC_VECTORS - 1));
1870         ASSERT(!sp || (peoi[sp - 1].vector < vector));
1871         peoi[sp].irq = desc->irq;
1872         peoi[sp].vector = vector;
1873         peoi[sp].ready = 0;
1874         pending_eoi_sp(peoi) = sp + 1;
1875         cpumask_set_cpu(smp_processor_id(), action->cpu_eoi_map);
1876     }
1877 
1878     for ( i = 0; i < action->nr_guests; i++ )
1879     {
1880         struct domain *d = action->guest[i];
1881         struct pirq *pirq = pirq_info(d, domain_irq_to_pirq(d, desc->irq));;
1882 
1883         if ( (action->ack_type != ACKTYPE_NONE) &&
1884              !test_and_set_bool(pirq->masked) )
1885             action->in_flight++;
1886         if ( !is_hvm_domain(d) || !hvm_do_IRQ_dpci(d, pirq) )
1887             send_guest_pirq(d, pirq);
1888     }
1889 
1890     if ( action->ack_type != ACKTYPE_NONE )
1891     {
1892         migrate_timer(&action->eoi_timer, smp_processor_id());
1893         set_timer(&action->eoi_timer, NOW() + MILLISECS(1));
1894     }
1895 }
1896 
do_IRQ(struct cpu_user_regs * regs)1897 void do_IRQ(struct cpu_user_regs *regs)
1898 {
1899     struct irqaction *action;
1900     uint32_t          tsc_in;
1901     struct irq_desc  *desc;
1902     unsigned int      vector = (uint8_t)regs->entry_vector;
1903     int               irq = this_cpu(vector_irq)[vector];
1904     struct cpu_user_regs *old_regs = set_irq_regs(regs);
1905 
1906     perfc_incr(irqs);
1907     this_cpu(irq_count)++;
1908     irq_enter();
1909 
1910     if ( irq < 0 )
1911     {
1912         if ( direct_apic_vector[vector] )
1913             direct_apic_vector[vector](regs);
1914         else
1915         {
1916             const char *kind = ", LAPIC";
1917 
1918             if ( apic_isr_read(vector) )
1919                 ack_APIC_irq();
1920             else
1921                 kind = "";
1922             if ( !(vector >= FIRST_LEGACY_VECTOR &&
1923                    vector <= LAST_LEGACY_VECTOR &&
1924                    !smp_processor_id() &&
1925                    bogus_8259A_irq(vector - FIRST_LEGACY_VECTOR)) )
1926             {
1927                 printk("CPU%u: No irq handler for vector %02x (IRQ %d%s)\n",
1928                        smp_processor_id(), vector, irq, kind);
1929                 desc = irq_to_desc(~irq);
1930                 if ( ~irq < nr_irqs && irq_desc_initialized(desc) )
1931                 {
1932                     spin_lock(&desc->lock);
1933                     printk("IRQ%d a=%04lx[%04lx,%04lx] v=%02x[%02x] t=%s s=%08x\n",
1934                            ~irq, *cpumask_bits(desc->affinity),
1935                            *cpumask_bits(desc->arch.cpu_mask),
1936                            *cpumask_bits(desc->arch.old_cpu_mask),
1937                            desc->arch.vector, desc->arch.old_vector,
1938                            desc->handler->typename, desc->status);
1939                     spin_unlock(&desc->lock);
1940                 }
1941             }
1942             TRACE_1D(TRC_HW_IRQ_UNMAPPED_VECTOR, vector);
1943         }
1944         goto out_no_unlock;
1945     }
1946 
1947     desc = irq_to_desc(irq);
1948 
1949     spin_lock(&desc->lock);
1950     desc->handler->ack(desc);
1951 
1952     if ( likely(desc->status & IRQ_GUEST) )
1953     {
1954         if ( irq_ratelimit_timer.function && /* irq rate limiting enabled? */
1955              unlikely(desc->rl_cnt++ >= irq_ratelimit_threshold) )
1956         {
1957             s_time_t now = NOW();
1958 
1959             if ( now < (desc->rl_quantum_start + MILLISECS(10)) )
1960             {
1961                 desc->handler->disable(desc);
1962                 /*
1963                  * If handler->disable doesn't actually mask the interrupt, a
1964                  * disabled irq still can fire. This check also avoids possible
1965                  * deadlocks if ratelimit_timer_fn runs at the same time.
1966                  */
1967                 if ( likely(list_empty(&desc->rl_link)) )
1968                 {
1969                     spin_lock(&irq_ratelimit_lock);
1970                     if ( list_empty(&irq_ratelimit_list) )
1971                         set_timer(&irq_ratelimit_timer, now + MILLISECS(10));
1972                     list_add(&desc->rl_link, &irq_ratelimit_list);
1973                     spin_unlock(&irq_ratelimit_lock);
1974                 }
1975                 goto out;
1976             }
1977             desc->rl_cnt = 0;
1978             desc->rl_quantum_start = now;
1979         }
1980 
1981         tsc_in = tb_init_done ? get_cycles() : 0;
1982         do_IRQ_guest(desc, vector);
1983         TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
1984         goto out_no_end;
1985     }
1986 
1987     desc->status &= ~IRQ_REPLAY;
1988     desc->status |= IRQ_PENDING;
1989 
1990     /*
1991      * Since we set PENDING, if another processor is handling a different
1992      * instance of this same irq, the other processor will take care of it.
1993      */
1994     if ( desc->status & (IRQ_DISABLED | IRQ_INPROGRESS) )
1995         goto out;
1996 
1997     desc->status |= IRQ_INPROGRESS;
1998 
1999     action = desc->action;
2000     while ( desc->status & IRQ_PENDING )
2001     {
2002         desc->status &= ~IRQ_PENDING;
2003         spin_unlock_irq(&desc->lock);
2004 
2005         tsc_in = tb_init_done ? get_cycles() : 0;
2006         action->handler(irq, action->dev_id, regs);
2007         TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
2008 
2009         spin_lock_irq(&desc->lock);
2010     }
2011 
2012     desc->status &= ~IRQ_INPROGRESS;
2013 
2014  out:
2015     if ( desc->handler->end )
2016     {
2017         /*
2018          * If higher priority vectors still have their EOIs pending, we may
2019          * not issue an EOI here, as this would EOI the highest priority one.
2020          */
2021         this_cpu(check_eoi_deferral) = true;
2022         desc->handler->end(desc, vector);
2023         this_cpu(check_eoi_deferral) = false;
2024 
2025         spin_unlock(&desc->lock);
2026         flush_ready_eoi();
2027         goto out_no_unlock;
2028     }
2029 
2030  out_no_end:
2031     spin_unlock(&desc->lock);
2032  out_no_unlock:
2033     irq_exit();
2034     set_irq_regs(old_regs);
2035 }
2036 
is_free_pirq(const struct domain * d,const struct pirq * pirq)2037 static inline bool is_free_pirq(const struct domain *d,
2038                                 const struct pirq *pirq)
2039 {
2040     return !pirq || (!pirq->arch.irq && (!is_hvm_domain(d) ||
2041         pirq->arch.hvm.emuirq == IRQ_UNBOUND));
2042 }
2043 
get_free_pirq(struct domain * d,int type)2044 int get_free_pirq(struct domain *d, int type)
2045 {
2046     int i;
2047 
2048     ASSERT(spin_is_locked(&d->event_lock));
2049 
2050     if ( type == MAP_PIRQ_TYPE_GSI )
2051     {
2052         for ( i = 16; i < nr_irqs_gsi; i++ )
2053             if ( is_free_pirq(d, pirq_info(d, i)) )
2054             {
2055                 pirq_get_info(d, i);
2056                 return i;
2057             }
2058     }
2059     for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; i-- )
2060         if ( is_free_pirq(d, pirq_info(d, i)) )
2061         {
2062             pirq_get_info(d, i);
2063             return i;
2064         }
2065 
2066     return -ENOSPC;
2067 }
2068 
get_free_pirqs(struct domain * d,unsigned int nr)2069 int get_free_pirqs(struct domain *d, unsigned int nr)
2070 {
2071     unsigned int i, found = 0;
2072 
2073     ASSERT(spin_is_locked(&d->event_lock));
2074 
2075     for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
2076         if ( is_free_pirq(d, pirq_info(d, i)) )
2077         {
2078             pirq_get_info(d, i);
2079             if ( ++found == nr )
2080                 return i;
2081         }
2082         else
2083             found = 0;
2084 
2085     return -ENOSPC;
2086 }
2087 
2088 #define MAX_MSI_IRQS 32 /* limited by MSI capability struct properties */
2089 
map_domain_pirq(struct domain * d,int pirq,int irq,int type,void * data)2090 int map_domain_pirq(
2091     struct domain *d, int pirq, int irq, int type, void *data)
2092 {
2093     int ret = 0;
2094     int old_irq, old_pirq;
2095     struct pirq *info;
2096     struct irq_desc *desc;
2097     unsigned long flags;
2098     DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {};
2099     DECLARE_BITMAP(granted, MAX_MSI_IRQS) = {};
2100 
2101     ASSERT(spin_is_locked(&d->event_lock));
2102 
2103     if ( !irq_access_permitted(current->domain, irq))
2104         return -EPERM;
2105 
2106     if ( pirq < 0 || pirq >= d->nr_pirqs || irq <= 0 || irq >= nr_irqs )
2107     {
2108         dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or irq %d\n",
2109                 d->domain_id, pirq, irq);
2110         return -EINVAL;
2111     }
2112 
2113     old_irq = domain_pirq_to_irq(d, pirq);
2114     old_pirq = domain_irq_to_pirq(d, irq);
2115 
2116     if ( (old_irq > 0 && (old_irq != irq) ) ||
2117          (old_pirq && (old_pirq != pirq)) )
2118     {
2119         dprintk(XENLOG_G_WARNING,
2120                 "dom%d: pirq %d or irq %d already mapped (%d,%d)\n",
2121                 d->domain_id, pirq, irq, old_pirq, old_irq);
2122         return 0;
2123     }
2124 
2125     ret = xsm_map_domain_irq(XSM_HOOK, d, irq, data);
2126     if ( ret )
2127     {
2128         dprintk(XENLOG_G_ERR, "dom%d: could not permit access to irq %d mapping to pirq %d\n",
2129                 d->domain_id, irq, pirq);
2130         return ret;
2131     }
2132 
2133     if ( likely(!irq_access_permitted(d, irq)) )
2134     {
2135         ret = irq_permit_access(d, irq);
2136         if ( ret )
2137         {
2138             printk(XENLOG_G_ERR
2139                    "dom%d: could not permit access to IRQ%d (pirq %d)\n",
2140                   d->domain_id, irq, pirq);
2141             return ret;
2142         }
2143         __set_bit(0, granted);
2144     }
2145 
2146     ret = prepare_domain_irq_pirq(d, irq, pirq, &info);
2147     if ( ret < 0 )
2148         goto revoke;
2149     if ( !ret )
2150         __set_bit(0, prepared);
2151 
2152     desc = irq_to_desc(irq);
2153 
2154     if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
2155     {
2156         struct msi_info *msi = (struct msi_info *)data;
2157         struct msi_desc *msi_desc;
2158         struct pci_dev *pdev;
2159         unsigned int nr = 0;
2160 
2161         ASSERT(pcidevs_locked());
2162 
2163         ret = -ENODEV;
2164         if ( !cpu_has_apic )
2165             goto done;
2166 
2167         pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn);
2168         if ( !pdev )
2169             goto done;
2170 
2171         ret = pci_enable_msi(msi, &msi_desc);
2172         if ( ret )
2173         {
2174             if ( ret > 0 )
2175             {
2176                 msi->entry_nr = ret;
2177                 ret = -ENFILE;
2178             }
2179             goto done;
2180         }
2181 
2182         spin_lock_irqsave(&desc->lock, flags);
2183 
2184         if ( desc->handler != &no_irq_type )
2185         {
2186             spin_unlock_irqrestore(&desc->lock, flags);
2187             dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
2188                     d->domain_id, irq);
2189             pci_disable_msi(msi_desc);
2190             msi_desc->irq = -1;
2191             msi_free_irq(msi_desc);
2192             ret = -EBUSY;
2193             goto done;
2194         }
2195 
2196         while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
2197         {
2198             if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
2199                  !desc->arch.used_vectors )
2200             {
2201                 desc->arch.used_vectors = &pdev->arch.used_vectors;
2202                 if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
2203                 {
2204                     int vector = desc->arch.vector;
2205 
2206                     ASSERT(!test_bit(vector, desc->arch.used_vectors));
2207                     set_bit(vector, desc->arch.used_vectors);
2208                 }
2209             }
2210             if ( type == MAP_PIRQ_TYPE_MSI ||
2211                  msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
2212                  ++nr == msi->entry_nr )
2213                 break;
2214 
2215             set_domain_irq_pirq(d, irq, info);
2216             spin_unlock_irqrestore(&desc->lock, flags);
2217 
2218             info = NULL;
2219             irq = create_irq(NUMA_NO_NODE, true);
2220             ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
2221                            : irq;
2222             if ( ret < 0 )
2223                 break;
2224             if ( !ret )
2225                 __set_bit(nr, prepared);
2226             msi_desc[nr].irq = irq;
2227 
2228             if ( likely(!irq_access_permitted(d, irq)) )
2229             {
2230                 if ( irq_permit_access(d, irq) )
2231                     printk(XENLOG_G_WARNING
2232                            "dom%d: could not permit access to IRQ%d (pirq %d)\n",
2233                            d->domain_id, irq, pirq);
2234                 else
2235                     __set_bit(nr, granted);
2236             }
2237 
2238             desc = irq_to_desc(irq);
2239             spin_lock_irqsave(&desc->lock, flags);
2240 
2241             if ( desc->handler != &no_irq_type )
2242             {
2243                 dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n",
2244                         d->domain_id, irq, pirq + nr, desc->handler->typename);
2245                 ret = -EBUSY;
2246                 break;
2247             }
2248         }
2249 
2250         if ( ret )
2251         {
2252             spin_unlock_irqrestore(&desc->lock, flags);
2253             pci_disable_msi(msi_desc);
2254             if ( nr )
2255             {
2256                 ASSERT(msi_desc->irq >= 0);
2257                 desc = irq_to_desc(msi_desc->irq);
2258                 spin_lock_irqsave(&desc->lock, flags);
2259                 desc->handler = &no_irq_type;
2260                 desc->msi_desc = NULL;
2261                 spin_unlock_irqrestore(&desc->lock, flags);
2262             }
2263             while ( nr )
2264             {
2265                 if ( irq >= 0 && test_bit(nr, granted) &&
2266                      irq_deny_access(d, irq) )
2267                     printk(XENLOG_G_ERR
2268                            "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
2269                            d->domain_id, irq, pirq);
2270                 if ( info && test_bit(nr, prepared) )
2271                     cleanup_domain_irq_pirq(d, irq, info);
2272                 info = pirq_info(d, pirq + --nr);
2273                 irq = info->arch.irq;
2274             }
2275             msi_desc->irq = -1;
2276             msi_free_irq(msi_desc);
2277             goto done;
2278         }
2279 
2280         set_domain_irq_pirq(d, irq, info);
2281         spin_unlock_irqrestore(&desc->lock, flags);
2282     }
2283     else
2284     {
2285         spin_lock_irqsave(&desc->lock, flags);
2286         set_domain_irq_pirq(d, irq, info);
2287         spin_unlock_irqrestore(&desc->lock, flags);
2288         ret = 0;
2289     }
2290 
2291 done:
2292     if ( ret )
2293     {
2294         if ( test_bit(0, prepared) )
2295             cleanup_domain_irq_pirq(d, irq, info);
2296  revoke:
2297         if ( test_bit(0, granted) && irq_deny_access(d, irq) )
2298             printk(XENLOG_G_ERR
2299                    "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
2300                    d->domain_id, irq, pirq);
2301     }
2302     return ret;
2303 }
2304 
2305 /* The pirq should have been unbound before this call. */
unmap_domain_pirq(struct domain * d,int pirq)2306 int unmap_domain_pirq(struct domain *d, int pirq)
2307 {
2308     unsigned long flags;
2309     struct irq_desc *desc;
2310     int irq, ret = 0, rc;
2311     unsigned int i, nr = 1;
2312     bool forced_unbind;
2313     struct pirq *info;
2314     struct msi_desc *msi_desc = NULL;
2315 
2316     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
2317         return -EINVAL;
2318 
2319     ASSERT(pcidevs_locked());
2320     ASSERT(spin_is_locked(&d->event_lock));
2321 
2322     info = pirq_info(d, pirq);
2323     if ( !info || (irq = info->arch.irq) <= 0 )
2324     {
2325         dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
2326                 d->domain_id, pirq);
2327         ret = -EINVAL;
2328         goto done;
2329     }
2330 
2331     desc = irq_to_desc(irq);
2332     msi_desc = desc->msi_desc;
2333     if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
2334     {
2335         if ( msi_desc->msi_attrib.entry_nr )
2336         {
2337             printk(XENLOG_G_ERR
2338                    "dom%d: trying to unmap secondary MSI pirq %d\n",
2339                    d->domain_id, pirq);
2340             ret = -EBUSY;
2341             goto done;
2342         }
2343         nr = msi_desc->msi.nvec;
2344     }
2345 
2346     ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq,
2347                                msi_desc ? msi_desc->dev : NULL);
2348     if ( ret )
2349         goto done;
2350 
2351     forced_unbind = pirq_guest_force_unbind(d, info);
2352     if ( forced_unbind )
2353         dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n",
2354                 d->domain_id, pirq);
2355 
2356     if ( msi_desc != NULL )
2357         pci_disable_msi(msi_desc);
2358 
2359     spin_lock_irqsave(&desc->lock, flags);
2360 
2361     for ( i = 0; ; )
2362     {
2363         BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
2364 
2365         if ( !forced_unbind )
2366             clear_domain_irq_pirq(d, irq, info);
2367         else
2368         {
2369             info->arch.irq = -irq;
2370             radix_tree_replace_slot(
2371                 radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
2372                 radix_tree_int_to_ptr(-pirq));
2373         }
2374 
2375         if ( msi_desc )
2376         {
2377             desc->handler = &no_irq_type;
2378             desc->msi_desc = NULL;
2379         }
2380 
2381         if ( ++i == nr )
2382             break;
2383 
2384         spin_unlock_irqrestore(&desc->lock, flags);
2385 
2386         if ( !forced_unbind )
2387            cleanup_domain_irq_pirq(d, irq, info);
2388 
2389         rc = irq_deny_access(d, irq);
2390         if ( rc )
2391         {
2392             printk(XENLOG_G_ERR
2393                    "dom%d: could not deny access to IRQ%d (pirq %d)\n",
2394                    d->domain_id, irq, pirq + i);
2395             ret = rc;
2396         }
2397 
2398         do {
2399             info = pirq_info(d, pirq + i);
2400             if ( info && (irq = info->arch.irq) > 0 )
2401                 break;
2402             printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
2403                    d->domain_id, pirq + i);
2404         } while ( ++i < nr );
2405 
2406         if ( i == nr )
2407         {
2408             desc = NULL;
2409             break;
2410         }
2411 
2412         desc = irq_to_desc(irq);
2413         BUG_ON(desc->msi_desc != msi_desc + i);
2414 
2415         spin_lock_irqsave(&desc->lock, flags);
2416     }
2417 
2418     if ( desc )
2419     {
2420         spin_unlock_irqrestore(&desc->lock, flags);
2421 
2422         if ( !forced_unbind )
2423             cleanup_domain_irq_pirq(d, irq, info);
2424 
2425         rc = irq_deny_access(d, irq);
2426         if ( rc )
2427         {
2428             printk(XENLOG_G_ERR
2429                    "dom%d: could not deny access to IRQ%d (pirq %d)\n",
2430                    d->domain_id, irq, pirq + nr - 1);
2431             ret = rc;
2432         }
2433     }
2434 
2435     if (msi_desc)
2436         msi_free_irq(msi_desc);
2437 
2438  done:
2439     return ret;
2440 }
2441 
free_domain_pirqs(struct domain * d)2442 void free_domain_pirqs(struct domain *d)
2443 {
2444     int i;
2445 
2446     pcidevs_lock();
2447     spin_lock(&d->event_lock);
2448 
2449     for ( i = 0; i < d->nr_pirqs; i++ )
2450         if ( domain_pirq_to_irq(d, i) > 0 )
2451             unmap_domain_pirq(d, i);
2452 
2453     spin_unlock(&d->event_lock);
2454     pcidevs_unlock();
2455 }
2456 
dump_irqs(unsigned char key)2457 static void dump_irqs(unsigned char key)
2458 {
2459     int i, irq, pirq;
2460     struct irq_desc *desc;
2461     irq_guest_action_t *action;
2462     struct domain *d;
2463     const struct pirq *info;
2464     unsigned long flags;
2465     char *ssid;
2466 
2467     printk("IRQ information:\n");
2468 
2469     for ( irq = 0; irq < nr_irqs; irq++ )
2470     {
2471         if ( !(irq & 0x1f) )
2472             process_pending_softirqs();
2473 
2474         desc = irq_to_desc(irq);
2475 
2476         if ( !irq_desc_initialized(desc) || desc->handler == &no_irq_type )
2477             continue;
2478 
2479         ssid = in_irq() ? NULL : xsm_show_irq_sid(irq);
2480 
2481         spin_lock_irqsave(&desc->lock, flags);
2482 
2483         printk("   IRQ:%4d vec:%02x %-15s status=%03x aff:{%*pbl}/{%*pbl} ",
2484                irq, desc->arch.vector, desc->handler->typename, desc->status,
2485                CPUMASK_PR(desc->affinity), CPUMASK_PR(desc->arch.cpu_mask));
2486 
2487         if ( ssid )
2488             printk("Z=%-25s ", ssid);
2489 
2490         if ( desc->status & IRQ_GUEST )
2491         {
2492             action = (irq_guest_action_t *)desc->action;
2493 
2494             printk("in-flight=%d%c",
2495                    action->in_flight, action->nr_guests ? ' ' : '\n');
2496 
2497             for ( i = 0; i < action->nr_guests; )
2498             {
2499                 struct evtchn *evtchn;
2500                 unsigned int pending = 2, masked = 2;
2501 
2502                 d = action->guest[i++];
2503                 pirq = domain_irq_to_pirq(d, irq);
2504                 info = pirq_info(d, pirq);
2505                 evtchn = evtchn_from_port(d, info->evtchn);
2506                 if ( evtchn_read_trylock(evtchn) )
2507                 {
2508                     pending = evtchn_is_pending(d, evtchn);
2509                     masked = evtchn_is_masked(d, evtchn);
2510                     evtchn_read_unlock(evtchn);
2511                 }
2512                 printk("d%d:%3d(%c%c%c)%c",
2513                        d->domain_id, pirq, "-P?"[pending],
2514                        "-M?"[masked], info->masked ? 'M' : '-',
2515                        i < action->nr_guests ? ',' : '\n');
2516             }
2517         }
2518         else if ( desc->action )
2519             printk("%ps()\n", desc->action->handler);
2520         else
2521             printk("mapped, unbound\n");
2522 
2523         spin_unlock_irqrestore(&desc->lock, flags);
2524 
2525         xfree(ssid);
2526     }
2527 
2528     process_pending_softirqs();
2529     printk("Direct vector information:\n");
2530     for ( i = FIRST_DYNAMIC_VECTOR; i < X86_NR_VECTORS; ++i )
2531         if ( direct_apic_vector[i] )
2532             printk("   %#02x -> %ps()\n", i, direct_apic_vector[i]);
2533 
2534     dump_ioapic_irq_info();
2535 }
2536 
setup_dump_irqs(void)2537 static int __init setup_dump_irqs(void)
2538 {
2539     register_keyhandler('i', dump_irqs, "dump interrupt bindings", 1);
2540     return 0;
2541 }
2542 __initcall(setup_dump_irqs);
2543 
2544 /* Reset irq affinities to match the given CPU mask. */
fixup_irqs(const cpumask_t * mask,bool verbose)2545 void fixup_irqs(const cpumask_t *mask, bool verbose)
2546 {
2547     unsigned int irq;
2548     static int warned;
2549     struct irq_desc *desc;
2550 
2551     for ( irq = 0; irq < nr_irqs; irq++ )
2552     {
2553         bool break_affinity = false, set_affinity = true;
2554         unsigned int vector;
2555         cpumask_t *affinity = this_cpu(scratch_cpumask);
2556 
2557         if ( irq == 2 )
2558             continue;
2559 
2560         desc = irq_to_desc(irq);
2561         if ( !irq_desc_initialized(desc) )
2562             continue;
2563 
2564         spin_lock(&desc->lock);
2565 
2566         vector = irq_to_vector(irq);
2567         if ( vector >= FIRST_HIPRIORITY_VECTOR &&
2568              vector <= LAST_HIPRIORITY_VECTOR )
2569         {
2570             cpumask_and(desc->arch.cpu_mask, desc->arch.cpu_mask, mask);
2571 
2572             /*
2573              * This can in particular happen when parking secondary threads
2574              * during boot and when the serial console wants to use a PCI IRQ.
2575              */
2576             if ( desc->handler == &no_irq_type )
2577             {
2578                 spin_unlock(&desc->lock);
2579                 continue;
2580             }
2581         }
2582 
2583         if ( desc->arch.move_cleanup_count )
2584         {
2585             /* The cleanup IPI may have got sent while we were still online. */
2586             cpumask_andnot(affinity, desc->arch.old_cpu_mask,
2587                            &cpu_online_map);
2588             desc->arch.move_cleanup_count -= cpumask_weight(affinity);
2589             if ( !desc->arch.move_cleanup_count )
2590                 release_old_vec(desc);
2591         }
2592 
2593         if ( !desc->action || cpumask_subset(desc->affinity, mask) )
2594         {
2595             spin_unlock(&desc->lock);
2596             continue;
2597         }
2598 
2599         /*
2600          * In order for the affinity adjustment below to be successful, we
2601          * need _assign_irq_vector() to succeed. This in particular means
2602          * clearing desc->arch.move_in_progress if this would otherwise
2603          * prevent the function from succeeding. Since there's no way for the
2604          * flag to get cleared anymore when there's no possible destination
2605          * left (the only possibility then would be the IRQs enabled window
2606          * after this loop), there's then also no race with us doing it here.
2607          *
2608          * Therefore the logic here and there need to remain in sync.
2609          */
2610         if ( desc->arch.move_in_progress &&
2611              !cpumask_intersects(mask, desc->arch.cpu_mask) )
2612         {
2613             unsigned int cpu;
2614 
2615             cpumask_and(affinity, desc->arch.old_cpu_mask, &cpu_online_map);
2616 
2617             spin_lock(&vector_lock);
2618             for_each_cpu(cpu, affinity)
2619                 per_cpu(vector_irq, cpu)[desc->arch.old_vector] = ~irq;
2620             spin_unlock(&vector_lock);
2621 
2622             release_old_vec(desc);
2623             desc->arch.move_in_progress = 0;
2624         }
2625 
2626         if ( !cpumask_intersects(mask, desc->affinity) )
2627         {
2628             break_affinity = true;
2629             cpumask_setall(affinity);
2630         }
2631         else
2632             cpumask_copy(affinity, desc->affinity);
2633 
2634         if ( desc->handler->disable )
2635             desc->handler->disable(desc);
2636 
2637         if ( desc->handler->set_affinity )
2638             desc->handler->set_affinity(desc, affinity);
2639         else if ( !(warned++) )
2640             set_affinity = false;
2641 
2642         if ( desc->handler->enable )
2643             desc->handler->enable(desc);
2644 
2645         cpumask_copy(affinity, desc->affinity);
2646 
2647         spin_unlock(&desc->lock);
2648 
2649         if ( !verbose )
2650             continue;
2651 
2652         if ( !set_affinity )
2653             printk("Cannot set affinity for IRQ%u\n", irq);
2654         else if ( break_affinity )
2655             printk("Broke affinity for IRQ%u, new: %*pb\n",
2656                    irq, CPUMASK_PR(affinity));
2657     }
2658 
2659     /* That doesn't seem sufficient.  Give it 1ms. */
2660     local_irq_enable();
2661     mdelay(1);
2662     local_irq_disable();
2663 }
2664 
fixup_eoi(void)2665 void fixup_eoi(void)
2666 {
2667     unsigned int irq, sp;
2668     struct irq_desc *desc;
2669     irq_guest_action_t *action;
2670     struct pending_eoi *peoi;
2671 
2672     /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
2673     for ( irq = 0; irq < nr_irqs; irq++ )
2674     {
2675         desc = irq_to_desc(irq);
2676         if ( !(desc->status & IRQ_GUEST) )
2677             continue;
2678         action = (irq_guest_action_t *)desc->action;
2679         cpumask_clear_cpu(smp_processor_id(), action->cpu_eoi_map);
2680     }
2681 
2682     /* Flush the interrupt EOI stack. */
2683     peoi = this_cpu(pending_eoi);
2684     for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
2685         peoi[sp].ready = 1;
2686     flush_ready_eoi();
2687 }
2688 
map_domain_emuirq_pirq(struct domain * d,int pirq,int emuirq)2689 int map_domain_emuirq_pirq(struct domain *d, int pirq, int emuirq)
2690 {
2691     int old_emuirq = IRQ_UNBOUND, old_pirq = IRQ_UNBOUND;
2692     struct pirq *info;
2693 
2694     ASSERT(spin_is_locked(&d->event_lock));
2695 
2696     if ( !is_hvm_domain(d) )
2697         return -EINVAL;
2698 
2699     if ( pirq < 0 || pirq >= d->nr_pirqs ||
2700             emuirq == IRQ_UNBOUND || emuirq >= (int) nr_irqs )
2701     {
2702         dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or emuirq %d\n",
2703                 d->domain_id, pirq, emuirq);
2704         return -EINVAL;
2705     }
2706 
2707     old_emuirq = domain_pirq_to_emuirq(d, pirq);
2708     if ( emuirq != IRQ_PT )
2709         old_pirq = domain_emuirq_to_pirq(d, emuirq);
2710 
2711     if ( (old_emuirq != IRQ_UNBOUND && (old_emuirq != emuirq) ) ||
2712          (old_pirq != IRQ_UNBOUND && (old_pirq != pirq)) )
2713     {
2714         dprintk(XENLOG_G_WARNING, "dom%d: pirq %d or emuirq %d already mapped\n",
2715                 d->domain_id, pirq, emuirq);
2716         return 0;
2717     }
2718 
2719     info = pirq_get_info(d, pirq);
2720     if ( !info )
2721         return -ENOMEM;
2722 
2723     /* do not store emuirq mappings for pt devices */
2724     if ( emuirq != IRQ_PT )
2725     {
2726         int err = radix_tree_insert(&d->arch.hvm.emuirq_pirq, emuirq,
2727                                     radix_tree_int_to_ptr(pirq));
2728 
2729         switch ( err )
2730         {
2731         case 0:
2732             break;
2733         case -EEXIST:
2734             radix_tree_replace_slot(
2735                 radix_tree_lookup_slot(
2736                     &d->arch.hvm.emuirq_pirq, emuirq),
2737                 radix_tree_int_to_ptr(pirq));
2738             break;
2739         default:
2740             pirq_cleanup_check(info, d);
2741             return err;
2742         }
2743     }
2744     info->arch.hvm.emuirq = emuirq;
2745 
2746     return 0;
2747 }
2748 
unmap_domain_pirq_emuirq(struct domain * d,int pirq)2749 int unmap_domain_pirq_emuirq(struct domain *d, int pirq)
2750 {
2751     int emuirq, ret = 0;
2752     struct pirq *info;
2753 
2754     if ( !is_hvm_domain(d) )
2755         return -EINVAL;
2756 
2757     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
2758         return -EINVAL;
2759 
2760     ASSERT(spin_is_locked(&d->event_lock));
2761 
2762     emuirq = domain_pirq_to_emuirq(d, pirq);
2763     if ( emuirq == IRQ_UNBOUND )
2764     {
2765         dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
2766                 d->domain_id, pirq);
2767         ret = -EINVAL;
2768         goto done;
2769     }
2770 
2771     info = pirq_info(d, pirq);
2772     if ( info )
2773     {
2774         info->arch.hvm.emuirq = IRQ_UNBOUND;
2775         pirq_cleanup_check(info, d);
2776     }
2777     if ( emuirq != IRQ_PT )
2778         radix_tree_delete(&d->arch.hvm.emuirq_pirq, emuirq);
2779 
2780  done:
2781     return ret;
2782 }
2783 
arch_evtchn_bind_pirq(struct domain * d,int pirq)2784 void arch_evtchn_bind_pirq(struct domain *d, int pirq)
2785 {
2786     int irq = domain_pirq_to_irq(d, pirq);
2787     struct irq_desc *desc;
2788     unsigned long flags;
2789 
2790     if ( irq <= 0 )
2791         return;
2792 
2793     if ( is_hvm_domain(d) )
2794         map_domain_emuirq_pirq(d, pirq, IRQ_PT);
2795 
2796     desc = irq_to_desc(irq);
2797     spin_lock_irqsave(&desc->lock, flags);
2798     if ( desc->msi_desc )
2799         guest_mask_msi_irq(desc, 0);
2800     spin_unlock_irqrestore(&desc->lock, flags);
2801 }
2802 
allocate_pirq(struct domain * d,int index,int pirq,int irq,int type,int * nr)2803 static int allocate_pirq(struct domain *d, int index, int pirq, int irq,
2804                          int type, int *nr)
2805 {
2806     int current_pirq;
2807 
2808     ASSERT(spin_is_locked(&d->event_lock));
2809     current_pirq = domain_irq_to_pirq(d, irq);
2810     if ( pirq < 0 )
2811     {
2812         if ( current_pirq )
2813         {
2814             dprintk(XENLOG_G_ERR, "dom%d: %d:%d already mapped to %d\n",
2815                     d->domain_id, index, pirq, current_pirq);
2816             if ( current_pirq < 0 )
2817                 return -EBUSY;
2818         }
2819         else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
2820         {
2821             if ( *nr <= 0 || *nr > MAX_MSI_IRQS )
2822                 return -EDOM;
2823             if ( *nr != 1 && !iommu_intremap )
2824                 return -EOPNOTSUPP;
2825 
2826             while ( *nr & (*nr - 1) )
2827                 *nr += *nr & -*nr;
2828             pirq = get_free_pirqs(d, *nr);
2829             if ( pirq < 0 )
2830             {
2831                 while ( (*nr >>= 1) > 1 )
2832                     if ( get_free_pirqs(d, *nr) > 0 )
2833                         break;
2834                 dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n",
2835                         d->domain_id, *nr << 1);
2836             }
2837         }
2838         else
2839         {
2840             pirq = get_free_pirq(d, type);
2841             if ( pirq < 0 )
2842                 dprintk(XENLOG_G_ERR, "dom%d: no free pirq\n", d->domain_id);
2843         }
2844     }
2845     else if ( current_pirq && pirq != current_pirq )
2846     {
2847         dprintk(XENLOG_G_ERR, "dom%d: irq %d already mapped to pirq %d\n",
2848                 d->domain_id, irq, current_pirq);
2849         return -EEXIST;
2850     }
2851 
2852     return pirq;
2853 }
2854 
allocate_and_map_gsi_pirq(struct domain * d,int index,int * pirq_p)2855 int allocate_and_map_gsi_pirq(struct domain *d, int index, int *pirq_p)
2856 {
2857     int irq, pirq, ret;
2858 
2859     if ( index < 0 || index >= nr_irqs_gsi )
2860     {
2861         dprintk(XENLOG_G_ERR, "dom%d: map invalid irq %d\n", d->domain_id,
2862                 index);
2863         return -EINVAL;
2864     }
2865 
2866     irq = domain_pirq_to_irq(current->domain, index);
2867     if ( irq <= 0 )
2868     {
2869         if ( is_hardware_domain(current->domain) )
2870             irq = index;
2871         else
2872         {
2873             dprintk(XENLOG_G_ERR, "dom%d: map pirq with incorrect irq!\n",
2874                     d->domain_id);
2875             return -EINVAL;
2876         }
2877     }
2878 
2879     /* Verify or get pirq. */
2880     spin_lock(&d->event_lock);
2881     pirq = allocate_pirq(d, index, *pirq_p, irq, MAP_PIRQ_TYPE_GSI, NULL);
2882     if ( pirq < 0 )
2883     {
2884         ret = pirq;
2885         goto done;
2886     }
2887 
2888     ret = map_domain_pirq(d, pirq, irq, MAP_PIRQ_TYPE_GSI, NULL);
2889     if ( !ret )
2890         *pirq_p = pirq;
2891 
2892  done:
2893     spin_unlock(&d->event_lock);
2894 
2895     return ret;
2896 }
2897 
allocate_and_map_msi_pirq(struct domain * d,int index,int * pirq_p,int type,struct msi_info * msi)2898 int allocate_and_map_msi_pirq(struct domain *d, int index, int *pirq_p,
2899                               int type, struct msi_info *msi)
2900 {
2901     int irq, pirq, ret;
2902 
2903     switch ( type )
2904     {
2905     case MAP_PIRQ_TYPE_MSI:
2906         if ( !msi->table_base )
2907             msi->entry_nr = 1;
2908         irq = index;
2909         if ( irq == -1 )
2910         {
2911     case MAP_PIRQ_TYPE_MULTI_MSI:
2912             irq = create_irq(NUMA_NO_NODE, true);
2913         }
2914 
2915         if ( irq < nr_irqs_gsi || irq >= nr_irqs )
2916         {
2917             dprintk(XENLOG_G_ERR, "dom%d: can't create irq for msi!\n",
2918                     d->domain_id);
2919             return -EINVAL;
2920         }
2921         break;
2922 
2923     default:
2924         dprintk(XENLOG_G_ERR, "dom%d: wrong pirq type %x\n",
2925                 d->domain_id, type);
2926         ASSERT_UNREACHABLE();
2927         return -EINVAL;
2928     }
2929 
2930     msi->irq = irq;
2931 
2932     pcidevs_lock();
2933     /* Verify or get pirq. */
2934     spin_lock(&d->event_lock);
2935     pirq = allocate_pirq(d, index, *pirq_p, irq, type, &msi->entry_nr);
2936     if ( pirq < 0 )
2937     {
2938         ret = pirq;
2939         goto done;
2940     }
2941 
2942     ret = map_domain_pirq(d, pirq, irq, type, msi);
2943     if ( !ret )
2944         *pirq_p = pirq;
2945 
2946  done:
2947     spin_unlock(&d->event_lock);
2948     pcidevs_unlock();
2949     if ( ret )
2950     {
2951         switch ( type )
2952         {
2953         case MAP_PIRQ_TYPE_MSI:
2954             if ( index == -1 )
2955         case MAP_PIRQ_TYPE_MULTI_MSI:
2956                 destroy_irq(irq);
2957             break;
2958         }
2959     }
2960 
2961     return ret;
2962 }
2963