1 /******************************************************************************
2 * arch/x86/irq.c
3 *
4 * Portions of this file are:
5 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
6 */
7
8 #include <xen/init.h>
9 #include <xen/delay.h>
10 #include <xen/errno.h>
11 #include <xen/event.h>
12 #include <xen/irq.h>
13 #include <xen/param.h>
14 #include <xen/perfc.h>
15 #include <xen/sched.h>
16 #include <xen/keyhandler.h>
17 #include <xen/compat.h>
18 #include <xen/iocap.h>
19 #include <xen/iommu.h>
20 #include <xen/symbols.h>
21 #include <xen/trace.h>
22 #include <xen/softirq.h>
23 #include <xsm/xsm.h>
24 #include <asm/msi.h>
25 #include <asm/current.h>
26 #include <asm/flushtlb.h>
27 #include <asm/mach-generic/mach_apic.h>
28 #include <irq_vectors.h>
29 #include <public/physdev.h>
30
31 static int parse_irq_vector_map_param(const char *s);
32
33 /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
34 bool __read_mostly opt_noirqbalance;
35 boolean_param("noirqbalance", opt_noirqbalance);
36
37 unsigned int __read_mostly nr_irqs_gsi = 16;
38 unsigned int __read_mostly nr_irqs;
39 integer_param("nr_irqs", nr_irqs);
40
41 /* This default may be changed by the AMD IOMMU code */
42 int __read_mostly opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_DEFAULT;
43 custom_param("irq_vector_map", parse_irq_vector_map_param);
44
45 vmask_t global_used_vector_map;
46
47 struct irq_desc __read_mostly *irq_desc = NULL;
48
49 static DECLARE_BITMAP(used_vectors, X86_NR_VECTORS);
50
51 static DEFINE_SPINLOCK(vector_lock);
52
53 DEFINE_PER_CPU(vector_irq_t, vector_irq);
54
55 DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs);
56
57 static LIST_HEAD(irq_ratelimit_list);
58 static DEFINE_SPINLOCK(irq_ratelimit_lock);
59 static struct timer irq_ratelimit_timer;
60
61 /* irq_ratelimit: the max irq rate allowed in every 10ms, set 0 to disable */
62 static unsigned int __read_mostly irq_ratelimit_threshold = 10000;
63 integer_param("irq_ratelimit", irq_ratelimit_threshold);
64
parse_irq_vector_map_param(const char * s)65 static int __init parse_irq_vector_map_param(const char *s)
66 {
67 const char *ss;
68 int rc = 0;
69
70 do {
71 ss = strchr(s, ',');
72 if ( !ss )
73 ss = strchr(s, '\0');
74
75 if ( !cmdline_strcmp(s, "none") )
76 opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_NONE;
77 else if ( !cmdline_strcmp(s, "global") )
78 opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
79 else if ( !cmdline_strcmp(s, "per-device") )
80 opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV;
81 else
82 rc = -EINVAL;
83
84 s = ss + 1;
85 } while ( *ss );
86
87 return rc;
88 }
89
90 /* Must be called when irq disabled */
lock_vector_lock(void)91 void lock_vector_lock(void)
92 {
93 /* Used to the online set of cpus does not change
94 * during assign_irq_vector.
95 */
96 spin_lock(&vector_lock);
97 }
98
unlock_vector_lock(void)99 void unlock_vector_lock(void)
100 {
101 spin_unlock(&vector_lock);
102 }
103
valid_irq_vector(unsigned int vector)104 static inline bool valid_irq_vector(unsigned int vector)
105 {
106 return vector >= FIRST_IRQ_VECTOR && vector <= LAST_IRQ_VECTOR;
107 }
108
release_old_vec(struct irq_desc * desc)109 static void release_old_vec(struct irq_desc *desc)
110 {
111 unsigned int vector = desc->arch.old_vector;
112
113 desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED;
114 cpumask_clear(desc->arch.old_cpu_mask);
115
116 if ( !valid_irq_vector(vector) )
117 ASSERT_UNREACHABLE();
118 else if ( desc->arch.used_vectors )
119 {
120 ASSERT(test_bit(vector, desc->arch.used_vectors));
121 clear_bit(vector, desc->arch.used_vectors);
122 }
123 }
124
_trace_irq_mask(uint32_t event,int irq,int vector,const cpumask_t * mask)125 static void _trace_irq_mask(uint32_t event, int irq, int vector,
126 const cpumask_t *mask)
127 {
128 struct {
129 unsigned int irq:16, vec:16;
130 unsigned int mask[6];
131 } d = {
132 .irq = irq,
133 .vec = vector,
134 };
135
136 memcpy(d.mask, mask,
137 min(sizeof(d.mask), BITS_TO_LONGS(nr_cpu_ids) * sizeof(long)));
138 trace_var(event, 1, sizeof(d), &d);
139 }
140
trace_irq_mask(uint32_t event,int irq,int vector,const cpumask_t * mask)141 static void trace_irq_mask(uint32_t event, int irq, int vector,
142 const cpumask_t *mask)
143 {
144 if ( unlikely(tb_init_done) )
145 _trace_irq_mask(event, irq, vector, mask);
146 }
147
_bind_irq_vector(struct irq_desc * desc,int vector,const cpumask_t * cpu_mask)148 static int __init _bind_irq_vector(struct irq_desc *desc, int vector,
149 const cpumask_t *cpu_mask)
150 {
151 cpumask_t online_mask;
152 int cpu;
153
154 BUG_ON((unsigned)vector >= X86_NR_VECTORS);
155
156 cpumask_and(&online_mask, cpu_mask, &cpu_online_map);
157 if (cpumask_empty(&online_mask))
158 return -EINVAL;
159 if ( (desc->arch.vector == vector) &&
160 cpumask_equal(desc->arch.cpu_mask, &online_mask) )
161 return 0;
162 if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
163 return -EBUSY;
164 trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, desc->irq, vector, &online_mask);
165 for_each_cpu(cpu, &online_mask)
166 per_cpu(vector_irq, cpu)[vector] = desc->irq;
167 desc->arch.vector = vector;
168 cpumask_copy(desc->arch.cpu_mask, &online_mask);
169 if ( desc->arch.used_vectors )
170 {
171 ASSERT(!test_bit(vector, desc->arch.used_vectors));
172 set_bit(vector, desc->arch.used_vectors);
173 }
174 desc->arch.used = IRQ_USED;
175 return 0;
176 }
177
bind_irq_vector(int irq,int vector,const cpumask_t * cpu_mask)178 int __init bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask)
179 {
180 struct irq_desc *desc = irq_to_desc(irq);
181 unsigned long flags;
182 int ret;
183
184 BUG_ON((unsigned)irq >= nr_irqs);
185
186 spin_lock_irqsave(&desc->lock, flags);
187 spin_lock(&vector_lock);
188 ret = _bind_irq_vector(desc, vector, cpu_mask);
189 spin_unlock(&vector_lock);
190 spin_unlock_irqrestore(&desc->lock, flags);
191
192 return ret;
193 }
194
_clear_irq_vector(struct irq_desc * desc)195 static void _clear_irq_vector(struct irq_desc *desc)
196 {
197 unsigned int cpu, old_vector, irq = desc->irq;
198 unsigned int vector = desc->arch.vector;
199 cpumask_t *tmp_mask = this_cpu(scratch_cpumask);
200
201 BUG_ON(!valid_irq_vector(vector));
202
203 /* Always clear desc->arch.vector */
204 cpumask_and(tmp_mask, desc->arch.cpu_mask, &cpu_online_map);
205
206 for_each_cpu(cpu, tmp_mask)
207 {
208 ASSERT(per_cpu(vector_irq, cpu)[vector] == irq);
209 per_cpu(vector_irq, cpu)[vector] = ~irq;
210 }
211
212 desc->arch.vector = IRQ_VECTOR_UNASSIGNED;
213 cpumask_clear(desc->arch.cpu_mask);
214
215 if ( desc->arch.used_vectors )
216 {
217 ASSERT(test_bit(vector, desc->arch.used_vectors));
218 clear_bit(vector, desc->arch.used_vectors);
219 }
220
221 desc->arch.used = IRQ_UNUSED;
222
223 trace_irq_mask(TRC_HW_IRQ_CLEAR_VECTOR, irq, vector, tmp_mask);
224
225 if ( likely(!desc->arch.move_in_progress) )
226 return;
227
228 /* If we were in motion, also clear desc->arch.old_vector */
229 old_vector = desc->arch.old_vector;
230 cpumask_and(tmp_mask, desc->arch.old_cpu_mask, &cpu_online_map);
231
232 for_each_cpu(cpu, tmp_mask)
233 {
234 ASSERT(per_cpu(vector_irq, cpu)[old_vector] == irq);
235 TRACE_3D(TRC_HW_IRQ_MOVE_FINISH, irq, old_vector, cpu);
236 per_cpu(vector_irq, cpu)[old_vector] = ~irq;
237 }
238
239 release_old_vec(desc);
240
241 desc->arch.move_in_progress = 0;
242 }
243
clear_irq_vector(int irq)244 void __init clear_irq_vector(int irq)
245 {
246 struct irq_desc *desc = irq_to_desc(irq);
247 unsigned long flags;
248
249 spin_lock_irqsave(&desc->lock, flags);
250 spin_lock(&vector_lock);
251 _clear_irq_vector(desc);
252 spin_unlock(&vector_lock);
253 spin_unlock_irqrestore(&desc->lock, flags);
254 }
255
256 /*
257 * Dynamic irq allocate and deallocation for MSI
258 */
259
create_irq(nodeid_t node,bool grant_access)260 int create_irq(nodeid_t node, bool grant_access)
261 {
262 int irq, ret;
263 struct irq_desc *desc;
264
265 for (irq = nr_irqs_gsi; irq < nr_irqs; irq++)
266 {
267 desc = irq_to_desc(irq);
268 if (cmpxchg(&desc->arch.used, IRQ_UNUSED, IRQ_RESERVED) == IRQ_UNUSED)
269 break;
270 }
271
272 if (irq >= nr_irqs)
273 return -ENOSPC;
274
275 ret = init_one_irq_desc(desc);
276 if (!ret)
277 {
278 cpumask_t *mask = NULL;
279
280 if ( node != NUMA_NO_NODE )
281 {
282 mask = &node_to_cpumask(node);
283 if (cpumask_empty(mask))
284 mask = NULL;
285 }
286 ret = assign_irq_vector(irq, mask);
287 }
288
289 ASSERT(desc->arch.creator_domid == DOMID_INVALID);
290
291 if (ret < 0)
292 {
293 desc->arch.used = IRQ_UNUSED;
294 irq = ret;
295 }
296 else if ( grant_access )
297 {
298 struct domain *currd = current->domain;
299
300 ret = irq_permit_access(currd, irq);
301 if ( ret )
302 printk(XENLOG_G_ERR
303 "Could not grant %pd access to IRQ%d (error %d)\n",
304 currd, irq, ret);
305 else
306 desc->arch.creator_domid = currd->domain_id;
307 }
308
309 return irq;
310 }
311
destroy_irq(unsigned int irq)312 void destroy_irq(unsigned int irq)
313 {
314 struct irq_desc *desc = irq_to_desc(irq);
315 unsigned long flags;
316 struct irqaction *action;
317
318 BUG_ON(!MSI_IRQ(irq));
319
320 if ( desc->arch.creator_domid != DOMID_INVALID )
321 {
322 struct domain *d = get_domain_by_id(desc->arch.creator_domid);
323
324 if ( d )
325 {
326 int err = irq_deny_access(d, irq);
327
328 if ( err )
329 printk(XENLOG_G_ERR
330 "Could not revoke %pd access to IRQ%u (error %d)\n",
331 d, irq, err);
332
333 put_domain(d);
334 }
335
336 desc->arch.creator_domid = DOMID_INVALID;
337 }
338
339 spin_lock_irqsave(&desc->lock, flags);
340 desc->status &= ~IRQ_GUEST;
341 desc->handler->shutdown(desc);
342 desc->status |= IRQ_DISABLED;
343 action = desc->action;
344 desc->action = NULL;
345 desc->msi_desc = NULL;
346 cpumask_setall(desc->affinity);
347 spin_unlock_irqrestore(&desc->lock, flags);
348
349 /* Wait to make sure it's not being used on another CPU */
350 do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
351
352 spin_lock_irqsave(&desc->lock, flags);
353 desc->handler = &no_irq_type;
354 spin_lock(&vector_lock);
355 _clear_irq_vector(desc);
356 spin_unlock(&vector_lock);
357 desc->arch.used_vectors = NULL;
358 spin_unlock_irqrestore(&desc->lock, flags);
359
360 xfree(action);
361 }
362
irq_to_vector(int irq)363 int irq_to_vector(int irq)
364 {
365 int vector = IRQ_VECTOR_UNASSIGNED;
366 const struct irq_desc *desc;
367
368 BUG_ON(irq >= nr_irqs || irq < 0);
369 desc = irq_to_desc(irq);
370
371 if (IO_APIC_IRQ(irq))
372 {
373 vector = desc->arch.vector;
374 /*
375 * Both parts of the condition are needed here during early boot, as
376 * at that time IRQ0 in particular may still have the 8259A chip set,
377 * but has already got its special IRQ0_VECTOR.
378 */
379 if ( desc->handler->enable == enable_8259A_irq &&
380 vector >= FIRST_LEGACY_VECTOR && vector <= LAST_LEGACY_VECTOR )
381 vector = 0;
382 }
383 else if (MSI_IRQ(irq))
384 vector = desc->arch.vector;
385 else
386 vector = LEGACY_VECTOR(irq);
387
388 return vector;
389 }
390
arch_init_one_irq_desc(struct irq_desc * desc)391 int arch_init_one_irq_desc(struct irq_desc *desc)
392 {
393 if ( !zalloc_cpumask_var(&desc->arch.cpu_mask) )
394 return -ENOMEM;
395
396 if ( !alloc_cpumask_var(&desc->arch.old_cpu_mask) )
397 {
398 free_cpumask_var(desc->arch.cpu_mask);
399 return -ENOMEM;
400 }
401
402 if ( !alloc_cpumask_var(&desc->arch.pending_mask) )
403 {
404 free_cpumask_var(desc->arch.old_cpu_mask);
405 free_cpumask_var(desc->arch.cpu_mask);
406 return -ENOMEM;
407 }
408
409 desc->arch.vector = IRQ_VECTOR_UNASSIGNED;
410 desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED;
411 desc->arch.creator_domid = DOMID_INVALID;
412
413 return 0;
414 }
415
init_irq_data(void)416 int __init init_irq_data(void)
417 {
418 struct irq_desc *desc;
419 int irq, vector;
420
421 for ( vector = 0; vector < X86_NR_VECTORS; ++vector )
422 this_cpu(vector_irq)[vector] = INT_MIN;
423
424 irq_desc = xzalloc_array(struct irq_desc, nr_irqs);
425
426 if ( !irq_desc )
427 return -ENOMEM;
428
429 for ( irq = 0; irq < nr_irqs_gsi; irq++ )
430 {
431 desc = irq_to_desc(irq);
432 desc->irq = irq;
433 init_one_irq_desc(desc);
434 }
435 for ( ; irq < nr_irqs; irq++ )
436 irq_to_desc(irq)->irq = irq;
437
438 #ifdef CONFIG_PV
439 /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
440 set_bit(LEGACY_SYSCALL_VECTOR, used_vectors);
441 set_bit(HYPERCALL_VECTOR, used_vectors);
442 #endif
443
444 /*
445 * Mark vectors up to the cleanup one as used, to prevent an infinite loop
446 * invoking irq_move_cleanup_interrupt.
447 */
448 BUILD_BUG_ON(IRQ_MOVE_CLEANUP_VECTOR < FIRST_DYNAMIC_VECTOR);
449 for ( vector = FIRST_DYNAMIC_VECTOR;
450 vector <= IRQ_MOVE_CLEANUP_VECTOR;
451 vector++ )
452 __set_bit(vector, used_vectors);
453
454 return 0;
455 }
456
ack_none(struct irq_desc * desc)457 static void ack_none(struct irq_desc *desc)
458 {
459 ack_bad_irq(desc->irq);
460 }
461
462 hw_irq_controller no_irq_type = {
463 "none",
464 irq_startup_none,
465 irq_shutdown_none,
466 irq_enable_none,
467 irq_disable_none,
468 ack_none,
469 };
470
irq_get_used_vector_mask(int irq)471 static vmask_t *irq_get_used_vector_mask(int irq)
472 {
473 vmask_t *ret = NULL;
474
475 if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_GLOBAL )
476 {
477 struct irq_desc *desc = irq_to_desc(irq);
478
479 ret = &global_used_vector_map;
480
481 if ( desc->arch.used_vectors )
482 printk(XENLOG_INFO "Unassigned IRQ %d already has used_vectors\n",
483 irq);
484 else
485 {
486 int vector;
487
488 vector = irq_to_vector(irq);
489 if ( valid_irq_vector(vector) )
490 {
491 printk(XENLOG_INFO "IRQ%d already assigned vector %02x\n",
492 irq, vector);
493
494 ASSERT(!test_bit(vector, ret));
495
496 set_bit(vector, ret);
497 }
498 else if ( vector != IRQ_VECTOR_UNASSIGNED )
499 printk(XENLOG_WARNING "IRQ%d mapped to bogus vector %02x\n",
500 irq, vector);
501 }
502 }
503 else if ( IO_APIC_IRQ(irq) &&
504 opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_NONE )
505 {
506 ret = io_apic_get_used_vector_map(irq);
507 }
508
509 return ret;
510 }
511
_assign_irq_vector(struct irq_desc * desc,const cpumask_t * mask)512 static int _assign_irq_vector(struct irq_desc *desc, const cpumask_t *mask)
513 {
514 /*
515 * NOTE! The local APIC isn't very good at handling
516 * multiple interrupts at the same interrupt level.
517 * As the interrupt level is determined by taking the
518 * vector number and shifting that right by 4, we
519 * want to spread these out a bit so that they don't
520 * all fall in the same interrupt level.
521 *
522 * Also, we've got to be careful not to trash gate
523 * 0x80, because int 0x80 is hm, kind of importantish. ;)
524 */
525 static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
526 unsigned int cpu;
527 int err, old_vector, irq = desc->irq;
528 vmask_t *irq_used_vectors = NULL;
529
530 old_vector = irq_to_vector(irq);
531 if ( valid_irq_vector(old_vector) )
532 {
533 cpumask_t tmp_mask;
534
535 cpumask_and(&tmp_mask, mask, &cpu_online_map);
536 if (cpumask_intersects(&tmp_mask, desc->arch.cpu_mask)) {
537 desc->arch.vector = old_vector;
538 return 0;
539 }
540 }
541
542 if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count )
543 return -EAGAIN;
544
545 err = -ENOSPC;
546
547 /* This is the only place normal IRQs are ever marked
548 * as "in use". If they're not in use yet, check to see
549 * if we need to assign a global vector mask. */
550 if ( desc->arch.used == IRQ_USED )
551 {
552 irq_used_vectors = desc->arch.used_vectors;
553 }
554 else
555 irq_used_vectors = irq_get_used_vector_mask(irq);
556
557 for_each_cpu(cpu, mask)
558 {
559 const cpumask_t *vec_mask;
560 int new_cpu;
561 int vector, offset;
562
563 /* Only try and allocate irqs on cpus that are present. */
564 if (!cpu_online(cpu))
565 continue;
566
567 vec_mask = vector_allocation_cpumask(cpu);
568
569 vector = current_vector;
570 offset = current_offset;
571 next:
572 vector += 8;
573 if (vector > LAST_DYNAMIC_VECTOR) {
574 /* If out of vectors on large boxen, must share them. */
575 offset = (offset + 1) % 8;
576 vector = FIRST_DYNAMIC_VECTOR + offset;
577 }
578 if (unlikely(current_vector == vector))
579 continue;
580
581 if (test_bit(vector, used_vectors))
582 goto next;
583
584 if (irq_used_vectors
585 && test_bit(vector, irq_used_vectors) )
586 goto next;
587
588 if ( cpumask_test_cpu(0, vec_mask) &&
589 vector >= FIRST_LEGACY_VECTOR && vector <= LAST_LEGACY_VECTOR )
590 goto next;
591
592 for_each_cpu(new_cpu, vec_mask)
593 if (per_cpu(vector_irq, new_cpu)[vector] >= 0)
594 goto next;
595 /* Found one! */
596 current_vector = vector;
597 current_offset = offset;
598
599 if ( valid_irq_vector(old_vector) )
600 {
601 cpumask_and(desc->arch.old_cpu_mask, desc->arch.cpu_mask,
602 &cpu_online_map);
603 desc->arch.old_vector = desc->arch.vector;
604 if ( !cpumask_empty(desc->arch.old_cpu_mask) )
605 desc->arch.move_in_progress = 1;
606 else
607 /* This can happen while offlining a CPU. */
608 release_old_vec(desc);
609 }
610
611 trace_irq_mask(TRC_HW_IRQ_ASSIGN_VECTOR, irq, vector, vec_mask);
612
613 for_each_cpu(new_cpu, vec_mask)
614 per_cpu(vector_irq, new_cpu)[vector] = irq;
615 desc->arch.vector = vector;
616 cpumask_copy(desc->arch.cpu_mask, vec_mask);
617
618 desc->arch.used = IRQ_USED;
619 ASSERT((desc->arch.used_vectors == NULL)
620 || (desc->arch.used_vectors == irq_used_vectors));
621 desc->arch.used_vectors = irq_used_vectors;
622
623 if ( desc->arch.used_vectors )
624 {
625 ASSERT(!test_bit(vector, desc->arch.used_vectors));
626
627 set_bit(vector, desc->arch.used_vectors);
628 }
629
630 err = 0;
631 break;
632 }
633 return err;
634 }
635
assign_irq_vector(int irq,const cpumask_t * mask)636 int assign_irq_vector(int irq, const cpumask_t *mask)
637 {
638 int ret;
639 unsigned long flags;
640 struct irq_desc *desc = irq_to_desc(irq);
641
642 BUG_ON(irq >= nr_irqs || irq <0);
643
644 spin_lock_irqsave(&desc->lock, flags);
645
646 spin_lock(&vector_lock);
647 ret = _assign_irq_vector(desc, mask ?: TARGET_CPUS);
648 spin_unlock(&vector_lock);
649
650 if ( !ret )
651 {
652 ret = desc->arch.vector;
653 if ( mask )
654 cpumask_copy(desc->affinity, mask);
655 else
656 cpumask_setall(desc->affinity);
657 }
658
659 spin_unlock_irqrestore(&desc->lock, flags);
660
661 return ret;
662 }
663
664 /*
665 * Initialize vector_irq on a new cpu. This function must be called
666 * with vector_lock held. For this reason it may not itself acquire
667 * the IRQ descriptor locks, as lock nesting is the other way around.
668 */
setup_vector_irq(unsigned int cpu)669 void setup_vector_irq(unsigned int cpu)
670 {
671 unsigned int irq, vector;
672
673 /* Clear vector_irq */
674 for ( vector = 0; vector < X86_NR_VECTORS; ++vector )
675 per_cpu(vector_irq, cpu)[vector] = INT_MIN;
676 /* Mark the inuse vectors */
677 for ( irq = 0; irq < nr_irqs; ++irq )
678 {
679 struct irq_desc *desc = irq_to_desc(irq);
680
681 if ( !irq_desc_initialized(desc) )
682 continue;
683 vector = irq_to_vector(irq);
684 if ( vector >= FIRST_HIPRIORITY_VECTOR &&
685 vector <= LAST_HIPRIORITY_VECTOR )
686 cpumask_set_cpu(cpu, desc->arch.cpu_mask);
687 else if ( !cpumask_test_cpu(cpu, desc->arch.cpu_mask) )
688 continue;
689 per_cpu(vector_irq, cpu)[vector] = irq;
690 }
691 }
692
move_masked_irq(struct irq_desc * desc)693 void move_masked_irq(struct irq_desc *desc)
694 {
695 cpumask_t *pending_mask = desc->arch.pending_mask;
696
697 if (likely(!(desc->status & IRQ_MOVE_PENDING)))
698 return;
699
700 desc->status &= ~IRQ_MOVE_PENDING;
701
702 if (!desc->handler->set_affinity)
703 return;
704
705 /*
706 * If there was a valid mask to work with, please do the disable,
707 * re-program, enable sequence. This is *not* particularly important for
708 * level triggered but in a edge trigger case, we might be setting rte when
709 * an active trigger is comming in. This could cause some ioapics to
710 * mal-function. Being paranoid i guess!
711 *
712 * For correct operation this depends on the caller masking the irqs.
713 */
714 if ( likely(cpumask_intersects(pending_mask, &cpu_online_map)) )
715 desc->handler->set_affinity(desc, pending_mask);
716
717 cpumask_clear(pending_mask);
718 }
719
move_native_irq(struct irq_desc * desc)720 void move_native_irq(struct irq_desc *desc)
721 {
722 if (likely(!(desc->status & IRQ_MOVE_PENDING)))
723 return;
724
725 if (unlikely(desc->status & IRQ_DISABLED))
726 return;
727
728 desc->handler->disable(desc);
729 move_masked_irq(desc);
730 desc->handler->enable(desc);
731 }
732
irq_move_cleanup_interrupt(struct cpu_user_regs * regs)733 void irq_move_cleanup_interrupt(struct cpu_user_regs *regs)
734 {
735 unsigned vector, me;
736
737 ack_APIC_irq();
738
739 me = smp_processor_id();
740 if ( !cpu_online(me) )
741 return;
742
743 for ( vector = FIRST_DYNAMIC_VECTOR;
744 vector <= LAST_HIPRIORITY_VECTOR; vector++)
745 {
746 unsigned int irq;
747 unsigned int irr;
748 struct irq_desc *desc;
749 irq = per_cpu(vector_irq, me)[vector];
750
751 if ((int)irq < 0)
752 continue;
753
754 desc = irq_to_desc(irq);
755 if (!desc)
756 continue;
757
758 spin_lock(&desc->lock);
759
760 if (desc->handler->enable == enable_8259A_irq)
761 goto unlock;
762
763 if (!desc->arch.move_cleanup_count)
764 goto unlock;
765
766 if ( vector == desc->arch.vector &&
767 cpumask_test_cpu(me, desc->arch.cpu_mask) )
768 goto unlock;
769
770 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
771 /*
772 * Check if the vector that needs to be cleanedup is
773 * registered at the cpu's IRR. If so, then this is not
774 * the best time to clean it up. Lets clean it up in the
775 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
776 * to myself.
777 */
778 if ( irr & (1u << (vector % 32)) )
779 {
780 if ( vector < IRQ_MOVE_CLEANUP_VECTOR )
781 {
782 ASSERT_UNREACHABLE();
783 goto unlock;
784 }
785 send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
786 TRACE_3D(TRC_HW_IRQ_MOVE_CLEANUP_DELAY,
787 irq, vector, smp_processor_id());
788 goto unlock;
789 }
790
791 TRACE_3D(TRC_HW_IRQ_MOVE_CLEANUP,
792 irq, vector, smp_processor_id());
793
794 per_cpu(vector_irq, me)[vector] = ~irq;
795 desc->arch.move_cleanup_count--;
796
797 if ( desc->arch.move_cleanup_count == 0 )
798 {
799 ASSERT(vector == desc->arch.old_vector);
800 release_old_vec(desc);
801 }
802 unlock:
803 spin_unlock(&desc->lock);
804 }
805 }
806
send_cleanup_vector(struct irq_desc * desc)807 static void send_cleanup_vector(struct irq_desc *desc)
808 {
809 cpumask_and(desc->arch.old_cpu_mask, desc->arch.old_cpu_mask,
810 &cpu_online_map);
811 desc->arch.move_cleanup_count = cpumask_weight(desc->arch.old_cpu_mask);
812
813 if ( desc->arch.move_cleanup_count )
814 send_IPI_mask(desc->arch.old_cpu_mask, IRQ_MOVE_CLEANUP_VECTOR);
815 else
816 release_old_vec(desc);
817
818 desc->arch.move_in_progress = 0;
819 }
820
irq_complete_move(struct irq_desc * desc)821 void irq_complete_move(struct irq_desc *desc)
822 {
823 unsigned vector, me;
824
825 if (likely(!desc->arch.move_in_progress))
826 return;
827
828 vector = (u8)get_irq_regs()->entry_vector;
829 me = smp_processor_id();
830
831 if ( vector == desc->arch.vector &&
832 cpumask_test_cpu(me, desc->arch.cpu_mask) )
833 send_cleanup_vector(desc);
834 }
835
set_desc_affinity(struct irq_desc * desc,const cpumask_t * mask)836 unsigned int set_desc_affinity(struct irq_desc *desc, const cpumask_t *mask)
837 {
838 int ret;
839 unsigned long flags;
840 cpumask_t dest_mask;
841
842 if ( mask && !cpumask_intersects(mask, &cpu_online_map) )
843 return BAD_APICID;
844
845 spin_lock_irqsave(&vector_lock, flags);
846 ret = _assign_irq_vector(desc, mask ?: TARGET_CPUS);
847 spin_unlock_irqrestore(&vector_lock, flags);
848
849 if ( ret < 0 )
850 return BAD_APICID;
851
852 if ( mask )
853 {
854 cpumask_copy(desc->affinity, mask);
855 cpumask_and(&dest_mask, mask, desc->arch.cpu_mask);
856 }
857 else
858 {
859 cpumask_setall(desc->affinity);
860 cpumask_copy(&dest_mask, desc->arch.cpu_mask);
861 }
862 cpumask_and(&dest_mask, &dest_mask, &cpu_online_map);
863
864 return cpu_mask_to_apicid(&dest_mask);
865 }
866
867 /* For re-setting irq interrupt affinity for specific irq */
irq_set_affinity(struct irq_desc * desc,const cpumask_t * mask)868 void irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
869 {
870 if (!desc->handler->set_affinity)
871 return;
872
873 ASSERT(spin_is_locked(&desc->lock));
874 desc->status &= ~IRQ_MOVE_PENDING;
875 smp_wmb();
876 cpumask_copy(desc->arch.pending_mask, mask);
877 smp_wmb();
878 desc->status |= IRQ_MOVE_PENDING;
879 }
880
pirq_set_affinity(struct domain * d,int pirq,const cpumask_t * mask)881 void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask)
882 {
883 unsigned long flags;
884 struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
885
886 if ( !desc )
887 return;
888 irq_set_affinity(desc, mask);
889 spin_unlock_irqrestore(&desc->lock, flags);
890 }
891
892 DEFINE_PER_CPU(unsigned int, irq_count);
893 static DEFINE_PER_CPU(bool, check_eoi_deferral);
894
alloc_hipriority_vector(void)895 uint8_t alloc_hipriority_vector(void)
896 {
897 static uint8_t next = FIRST_HIPRIORITY_VECTOR;
898 BUG_ON(next < FIRST_HIPRIORITY_VECTOR);
899 BUG_ON(next > LAST_HIPRIORITY_VECTOR);
900 return next++;
901 }
902
903 static void (*direct_apic_vector[X86_NR_VECTORS])(struct cpu_user_regs *);
set_direct_apic_vector(uint8_t vector,void (* handler)(struct cpu_user_regs *))904 void set_direct_apic_vector(
905 uint8_t vector, void (*handler)(struct cpu_user_regs *))
906 {
907 BUG_ON(direct_apic_vector[vector] != NULL);
908 direct_apic_vector[vector] = handler;
909 }
910
alloc_direct_apic_vector(uint8_t * vector,void (* handler)(struct cpu_user_regs *))911 void alloc_direct_apic_vector(
912 uint8_t *vector, void (*handler)(struct cpu_user_regs *))
913 {
914 static DEFINE_SPINLOCK(lock);
915
916 spin_lock(&lock);
917 if (*vector == 0) {
918 *vector = alloc_hipriority_vector();
919 set_direct_apic_vector(*vector, handler);
920 }
921 spin_unlock(&lock);
922 }
923
irq_ratelimit_timer_fn(void * data)924 static void irq_ratelimit_timer_fn(void *data)
925 {
926 struct irq_desc *desc, *tmp;
927 unsigned long flags;
928
929 spin_lock_irqsave(&irq_ratelimit_lock, flags);
930
931 list_for_each_entry_safe ( desc, tmp, &irq_ratelimit_list, rl_link )
932 {
933 spin_lock(&desc->lock);
934 desc->handler->enable(desc);
935 list_del(&desc->rl_link);
936 INIT_LIST_HEAD(&desc->rl_link);
937 spin_unlock(&desc->lock);
938 }
939
940 spin_unlock_irqrestore(&irq_ratelimit_lock, flags);
941 }
942
irq_ratelimit_init(void)943 static int __init irq_ratelimit_init(void)
944 {
945 if ( irq_ratelimit_threshold )
946 init_timer(&irq_ratelimit_timer, irq_ratelimit_timer_fn, NULL, 0);
947 return 0;
948 }
949 __initcall(irq_ratelimit_init);
950
request_irq(unsigned int irq,unsigned int irqflags,void (* handler)(int,void *,struct cpu_user_regs *),const char * devname,void * dev_id)951 int __init request_irq(unsigned int irq, unsigned int irqflags,
952 void (*handler)(int, void *, struct cpu_user_regs *),
953 const char * devname, void *dev_id)
954 {
955 struct irqaction * action;
956 int retval;
957
958 /*
959 * Sanity-check: shared interrupts must pass in a real dev-ID,
960 * otherwise we'll have trouble later trying to figure out
961 * which interrupt is which (messes up the interrupt freeing
962 * logic etc).
963 */
964 if (irq >= nr_irqs)
965 return -EINVAL;
966 if (!handler)
967 return -EINVAL;
968
969 action = xmalloc(struct irqaction);
970 if (!action)
971 return -ENOMEM;
972
973 action->handler = handler;
974 action->name = devname;
975 action->dev_id = dev_id;
976 action->free_on_release = 1;
977
978 retval = setup_irq(irq, irqflags, action);
979 if (retval)
980 xfree(action);
981
982 return retval;
983 }
984
release_irq(unsigned int irq,const void * dev_id)985 void __init release_irq(unsigned int irq, const void *dev_id)
986 {
987 struct irq_desc *desc;
988 unsigned long flags;
989 struct irqaction *action;
990
991 desc = irq_to_desc(irq);
992
993 spin_lock_irqsave(&desc->lock,flags);
994 action = desc->action;
995 desc->action = NULL;
996 desc->handler->shutdown(desc);
997 desc->status |= IRQ_DISABLED;
998 spin_unlock_irqrestore(&desc->lock,flags);
999
1000 /* Wait to make sure it's not being used on another CPU */
1001 do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
1002
1003 if (action && action->free_on_release)
1004 xfree(action);
1005 }
1006
setup_irq(unsigned int irq,unsigned int irqflags,struct irqaction * new)1007 int __init setup_irq(unsigned int irq, unsigned int irqflags,
1008 struct irqaction *new)
1009 {
1010 struct irq_desc *desc;
1011 unsigned long flags;
1012
1013 ASSERT(irqflags == 0);
1014
1015 desc = irq_to_desc(irq);
1016
1017 spin_lock_irqsave(&desc->lock,flags);
1018
1019 if ( desc->action != NULL )
1020 {
1021 spin_unlock_irqrestore(&desc->lock,flags);
1022 return -EBUSY;
1023 }
1024
1025 desc->action = new;
1026 desc->status &= ~IRQ_DISABLED;
1027 desc->handler->startup(desc);
1028
1029 spin_unlock_irqrestore(&desc->lock,flags);
1030
1031 return 0;
1032 }
1033
1034
1035 /*
1036 * HANDLING OF GUEST-BOUND PHYSICAL IRQS
1037 */
1038
1039 #define IRQ_MAX_GUESTS 7
1040 typedef struct {
1041 u8 nr_guests;
1042 u8 in_flight;
1043 u8 shareable;
1044 u8 ack_type;
1045 #define ACKTYPE_NONE 0 /* No final acknowledgement is required */
1046 #define ACKTYPE_UNMASK 1 /* Unmask PIC hardware (from any CPU) */
1047 #define ACKTYPE_EOI 2 /* EOI on the CPU that was interrupted */
1048 cpumask_var_t cpu_eoi_map; /* CPUs that need to EOI this interrupt */
1049 struct timer eoi_timer;
1050 struct domain *guest[IRQ_MAX_GUESTS];
1051 } irq_guest_action_t;
1052
1053 /*
1054 * Stack of interrupts awaiting EOI on each CPU. These must be popped in
1055 * order, as only the current highest-priority pending irq can be EOIed.
1056 */
1057 struct pending_eoi {
1058 u32 ready:1; /* Ready for EOI now? */
1059 u32 irq:23; /* irq of the vector */
1060 u32 vector:8; /* vector awaiting EOI */
1061 };
1062
1063 static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_DYNAMIC_VECTORS]);
1064 #define pending_eoi_sp(p) ((p)[NR_DYNAMIC_VECTORS-1].vector)
1065
cpu_has_pending_apic_eoi(void)1066 bool cpu_has_pending_apic_eoi(void)
1067 {
1068 return pending_eoi_sp(this_cpu(pending_eoi)) != 0;
1069 }
1070
end_nonmaskable_irq(struct irq_desc * desc,uint8_t vector)1071 void end_nonmaskable_irq(struct irq_desc *desc, uint8_t vector)
1072 {
1073 struct pending_eoi *peoi = this_cpu(pending_eoi);
1074 unsigned int sp = pending_eoi_sp(peoi);
1075
1076 if ( !this_cpu(check_eoi_deferral) || !sp || peoi[sp - 1].vector < vector )
1077 {
1078 ack_APIC_irq();
1079 return;
1080 }
1081
1082 /* Defer this vector's EOI until all higher ones have been EOI-ed. */
1083 pending_eoi_sp(peoi) = sp + 1;
1084 do {
1085 peoi[sp] = peoi[sp - 1];
1086 } while ( --sp && peoi[sp - 1].vector > vector );
1087 ASSERT(!sp || peoi[sp - 1].vector < vector);
1088
1089 peoi[sp].irq = desc->irq;
1090 peoi[sp].vector = vector;
1091 peoi[sp].ready = 1;
1092 }
1093
set_pirq_eoi(struct domain * d,unsigned int irq)1094 static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
1095 {
1096 if ( d->arch.pirq_eoi_map )
1097 {
1098 ASSERT(irq < PAGE_SIZE * BITS_PER_BYTE);
1099 set_bit(irq, d->arch.pirq_eoi_map);
1100 }
1101 }
1102
clear_pirq_eoi(struct domain * d,unsigned int irq)1103 static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
1104 {
1105 if ( d->arch.pirq_eoi_map )
1106 {
1107 ASSERT(irq < PAGE_SIZE * BITS_PER_BYTE);
1108 clear_bit(irq, d->arch.pirq_eoi_map);
1109 }
1110 }
1111
1112 static void set_eoi_ready(void *data);
1113
irq_guest_eoi_timer_fn(void * data)1114 static void irq_guest_eoi_timer_fn(void *data)
1115 {
1116 struct irq_desc *desc = data;
1117 unsigned int i, irq = desc - irq_desc;
1118 irq_guest_action_t *action;
1119
1120 spin_lock_irq(&desc->lock);
1121
1122 if ( !(desc->status & IRQ_GUEST) )
1123 goto out;
1124
1125 action = (irq_guest_action_t *)desc->action;
1126
1127 ASSERT(action->ack_type != ACKTYPE_NONE);
1128
1129 /*
1130 * Is no IRQ in flight at all, or another instance of this timer already
1131 * running? Skip everything to avoid forcing an EOI early.
1132 */
1133 if ( !action->in_flight || timer_is_active(&action->eoi_timer) )
1134 goto out;
1135
1136 for ( i = 0; i < action->nr_guests; i++ )
1137 {
1138 struct domain *d = action->guest[i];
1139 unsigned int pirq = domain_irq_to_pirq(d, irq);
1140
1141 if ( test_and_clear_bool(pirq_info(d, pirq)->masked) )
1142 action->in_flight--;
1143 }
1144
1145 if ( action->in_flight )
1146 {
1147 printk(XENLOG_G_WARNING
1148 "IRQ%u: %d/%d handler(s) still in flight at forced EOI\n",
1149 irq, action->in_flight, action->nr_guests);
1150 ASSERT_UNREACHABLE();
1151 }
1152
1153 switch ( action->ack_type )
1154 {
1155 cpumask_t *cpu_eoi_map;
1156
1157 case ACKTYPE_UNMASK:
1158 if ( desc->handler->end )
1159 desc->handler->end(desc, 0);
1160 break;
1161
1162 case ACKTYPE_EOI:
1163 cpu_eoi_map = this_cpu(scratch_cpumask);
1164 cpumask_copy(cpu_eoi_map, action->cpu_eoi_map);
1165 spin_unlock_irq(&desc->lock);
1166 on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 0);
1167 return;
1168 }
1169
1170 out:
1171 spin_unlock_irq(&desc->lock);
1172 }
1173
1174 /*
1175 * Retrieve Xen irq-descriptor corresponding to a domain-specific irq.
1176 * The descriptor is returned locked. This function is safe against changes
1177 * to the per-domain irq-to-vector mapping.
1178 */
domain_spin_lock_irq_desc(struct domain * d,int pirq,unsigned long * pflags)1179 struct irq_desc *domain_spin_lock_irq_desc(
1180 struct domain *d, int pirq, unsigned long *pflags)
1181 {
1182 const struct pirq *info = pirq_info(d, pirq);
1183
1184 return info ? pirq_spin_lock_irq_desc(info, pflags) : NULL;
1185 }
1186
1187 /*
1188 * Same with struct pirq already looked up.
1189 */
pirq_spin_lock_irq_desc(const struct pirq * pirq,unsigned long * pflags)1190 struct irq_desc *pirq_spin_lock_irq_desc(
1191 const struct pirq *pirq, unsigned long *pflags)
1192 {
1193 struct irq_desc *desc;
1194 unsigned long flags;
1195
1196 for ( ; ; )
1197 {
1198 int irq = pirq->arch.irq;
1199
1200 if ( irq <= 0 )
1201 return NULL;
1202
1203 desc = irq_to_desc(irq);
1204 spin_lock_irqsave(&desc->lock, flags);
1205 if ( irq == pirq->arch.irq )
1206 break;
1207 spin_unlock_irqrestore(&desc->lock, flags);
1208 }
1209
1210 if ( pflags )
1211 *pflags = flags;
1212
1213 return desc;
1214 }
1215
prepare_domain_irq_pirq(struct domain * d,int irq,int pirq,struct pirq ** pinfo)1216 static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq,
1217 struct pirq **pinfo)
1218 {
1219 int err = radix_tree_insert(&d->arch.irq_pirq, irq,
1220 radix_tree_int_to_ptr(0));
1221 struct pirq *info;
1222
1223 if ( err && err != -EEXIST )
1224 return err;
1225 info = pirq_get_info(d, pirq);
1226 if ( !info )
1227 {
1228 if ( !err )
1229 radix_tree_delete(&d->arch.irq_pirq, irq);
1230 return -ENOMEM;
1231 }
1232 *pinfo = info;
1233
1234 return !!err;
1235 }
1236
set_domain_irq_pirq(struct domain * d,int irq,struct pirq * pirq)1237 static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
1238 {
1239 radix_tree_replace_slot(
1240 radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
1241 radix_tree_int_to_ptr(pirq->pirq));
1242 pirq->arch.irq = irq;
1243 }
1244
clear_domain_irq_pirq(struct domain * d,int irq,struct pirq * pirq)1245 static void clear_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
1246 {
1247 pirq->arch.irq = 0;
1248 radix_tree_replace_slot(
1249 radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
1250 radix_tree_int_to_ptr(0));
1251 }
1252
cleanup_domain_irq_pirq(struct domain * d,int irq,struct pirq * pirq)1253 static void cleanup_domain_irq_pirq(struct domain *d, int irq,
1254 struct pirq *pirq)
1255 {
1256 pirq_cleanup_check(pirq, d);
1257 radix_tree_delete(&d->arch.irq_pirq, irq);
1258 }
1259
init_domain_irq_mapping(struct domain * d)1260 int init_domain_irq_mapping(struct domain *d)
1261 {
1262 unsigned int i;
1263 int err = 0;
1264
1265 radix_tree_init(&d->arch.irq_pirq);
1266 if ( is_hvm_domain(d) )
1267 radix_tree_init(&d->arch.hvm.emuirq_pirq);
1268
1269 for ( i = 1; platform_legacy_irq(i); ++i )
1270 {
1271 struct pirq *info;
1272
1273 if ( IO_APIC_IRQ(i) )
1274 continue;
1275 err = prepare_domain_irq_pirq(d, i, i, &info);
1276 if ( err )
1277 {
1278 ASSERT(err < 0);
1279 break;
1280 }
1281 set_domain_irq_pirq(d, i, info);
1282 }
1283
1284 if ( err )
1285 cleanup_domain_irq_mapping(d);
1286 return err;
1287 }
1288
cleanup_domain_irq_mapping(struct domain * d)1289 void cleanup_domain_irq_mapping(struct domain *d)
1290 {
1291 radix_tree_destroy(&d->arch.irq_pirq, NULL);
1292 if ( is_hvm_domain(d) )
1293 radix_tree_destroy(&d->arch.hvm.emuirq_pirq, NULL);
1294 }
1295
alloc_pirq_struct(struct domain * d)1296 struct pirq *alloc_pirq_struct(struct domain *d)
1297 {
1298 size_t sz = is_hvm_domain(d) ? sizeof(struct pirq) :
1299 offsetof(struct pirq, arch.hvm);
1300 struct pirq *pirq = xzalloc_bytes(sz);
1301
1302 if ( pirq )
1303 {
1304 if ( is_hvm_domain(d) )
1305 {
1306 pirq->arch.hvm.emuirq = IRQ_UNBOUND;
1307 pt_pirq_init(d, &pirq->arch.hvm.dpci);
1308 }
1309 }
1310
1311 return pirq;
1312 }
1313
1314 void (pirq_cleanup_check)(struct pirq *pirq, struct domain *d)
1315 {
1316 /*
1317 * Check whether all fields have their default values, and delete
1318 * the entry from the tree if so.
1319 *
1320 * NB: Common parts were already checked.
1321 */
1322 if ( pirq->arch.irq )
1323 return;
1324
1325 if ( is_hvm_domain(d) )
1326 {
1327 if ( pirq->arch.hvm.emuirq != IRQ_UNBOUND )
1328 return;
1329 if ( !pt_pirq_cleanup_check(&pirq->arch.hvm.dpci) )
1330 return;
1331 }
1332
1333 if ( radix_tree_delete(&d->pirq_tree, pirq->pirq) != pirq )
1334 BUG_ON(!d->is_dying);
1335 }
1336
1337 /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
flush_ready_eoi(void)1338 static void flush_ready_eoi(void)
1339 {
1340 struct pending_eoi *peoi = this_cpu(pending_eoi);
1341 struct irq_desc *desc;
1342 int irq, sp;
1343
1344 ASSERT(!local_irq_is_enabled());
1345
1346 sp = pending_eoi_sp(peoi);
1347
1348 while ( (--sp >= 0) && peoi[sp].ready )
1349 {
1350 irq = peoi[sp].irq;
1351 ASSERT(irq > 0);
1352 desc = irq_to_desc(irq);
1353 spin_lock(&desc->lock);
1354 if ( desc->handler->end )
1355 desc->handler->end(desc, peoi[sp].vector);
1356 spin_unlock(&desc->lock);
1357 }
1358
1359 pending_eoi_sp(peoi) = sp+1;
1360 }
1361
__set_eoi_ready(struct irq_desc * desc)1362 static void __set_eoi_ready(struct irq_desc *desc)
1363 {
1364 irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
1365 struct pending_eoi *peoi = this_cpu(pending_eoi);
1366 int irq, sp;
1367
1368 irq = desc - irq_desc;
1369
1370 if ( !(desc->status & IRQ_GUEST) ||
1371 (action->in_flight != 0) ||
1372 !cpumask_test_and_clear_cpu(smp_processor_id(),
1373 action->cpu_eoi_map) )
1374 return;
1375
1376 sp = pending_eoi_sp(peoi);
1377
1378 do {
1379 ASSERT(sp > 0);
1380 } while ( peoi[--sp].irq != irq );
1381 ASSERT(!peoi[sp].ready);
1382 peoi[sp].ready = 1;
1383 }
1384
1385 /* Mark specified IRQ as ready-for-EOI (if it really is) and attempt to EOI. */
set_eoi_ready(void * data)1386 static void set_eoi_ready(void *data)
1387 {
1388 struct irq_desc *desc = data;
1389
1390 ASSERT(!local_irq_is_enabled());
1391
1392 spin_lock(&desc->lock);
1393 __set_eoi_ready(desc);
1394 spin_unlock(&desc->lock);
1395
1396 flush_ready_eoi();
1397 }
1398
pirq_guest_eoi(struct pirq * pirq)1399 void pirq_guest_eoi(struct pirq *pirq)
1400 {
1401 struct irq_desc *desc;
1402
1403 ASSERT(local_irq_is_enabled());
1404 desc = pirq_spin_lock_irq_desc(pirq, NULL);
1405 if ( desc )
1406 desc_guest_eoi(desc, pirq);
1407 }
1408
desc_guest_eoi(struct irq_desc * desc,struct pirq * pirq)1409 void desc_guest_eoi(struct irq_desc *desc, struct pirq *pirq)
1410 {
1411 irq_guest_action_t *action;
1412 cpumask_t cpu_eoi_map;
1413 int irq;
1414
1415 if ( !(desc->status & IRQ_GUEST) )
1416 {
1417 spin_unlock_irq(&desc->lock);
1418 return;
1419 }
1420
1421 action = (irq_guest_action_t *)desc->action;
1422 irq = desc - irq_desc;
1423
1424 if ( unlikely(!test_and_clear_bool(pirq->masked)) ||
1425 unlikely(--action->in_flight != 0) )
1426 {
1427 spin_unlock_irq(&desc->lock);
1428 return;
1429 }
1430
1431 stop_timer(&action->eoi_timer);
1432
1433 if ( action->ack_type == ACKTYPE_UNMASK )
1434 {
1435 ASSERT(cpumask_empty(action->cpu_eoi_map));
1436 if ( desc->handler->end )
1437 desc->handler->end(desc, 0);
1438 spin_unlock_irq(&desc->lock);
1439 return;
1440 }
1441
1442 ASSERT(action->ack_type == ACKTYPE_EOI);
1443
1444 cpumask_copy(&cpu_eoi_map, action->cpu_eoi_map);
1445
1446 if ( __cpumask_test_and_clear_cpu(smp_processor_id(), &cpu_eoi_map) )
1447 {
1448 __set_eoi_ready(desc);
1449 spin_unlock(&desc->lock);
1450 flush_ready_eoi();
1451 local_irq_enable();
1452 }
1453 else
1454 {
1455 spin_unlock_irq(&desc->lock);
1456 }
1457
1458 if ( !cpumask_empty(&cpu_eoi_map) )
1459 on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
1460 }
1461
pirq_guest_unmask(struct domain * d)1462 int pirq_guest_unmask(struct domain *d)
1463 {
1464 unsigned int pirq = 0, n, i;
1465 struct pirq *pirqs[16];
1466
1467 do {
1468 n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
1469 ARRAY_SIZE(pirqs));
1470 for ( i = 0; i < n; ++i )
1471 {
1472 pirq = pirqs[i]->pirq;
1473 if ( pirqs[i]->masked &&
1474 !evtchn_port_is_masked(d, pirqs[i]->evtchn) )
1475 pirq_guest_eoi(pirqs[i]);
1476 }
1477 } while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
1478
1479 return 0;
1480 }
1481
irq_acktype(const struct irq_desc * desc)1482 static int irq_acktype(const struct irq_desc *desc)
1483 {
1484 if ( desc->handler == &no_irq_type )
1485 return ACKTYPE_NONE;
1486
1487 /*
1488 * Edge-triggered IO-APIC and LAPIC interrupts need no final
1489 * acknowledgement: we ACK early during interrupt processing.
1490 */
1491 if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ||
1492 !strcmp(desc->handler->typename, "local-APIC-edge") )
1493 return ACKTYPE_NONE;
1494
1495 /*
1496 * MSIs are treated as edge-triggered interrupts, except
1497 * when there is no proper way to mask them.
1498 */
1499 if ( desc->msi_desc )
1500 return msi_maskable_irq(desc->msi_desc) ? ACKTYPE_NONE : ACKTYPE_EOI;
1501
1502 /*
1503 * Level-triggered IO-APIC interrupts need to be acknowledged on the CPU
1504 * on which they were received. This is because we tickle the LAPIC to EOI.
1505 */
1506 if ( !strcmp(desc->handler->typename, "IO-APIC-level") )
1507 return desc->handler->ack == irq_complete_move ?
1508 ACKTYPE_EOI : ACKTYPE_UNMASK;
1509
1510 /* Legacy PIC interrupts can be acknowledged from any CPU. */
1511 if ( !strcmp(desc->handler->typename, "XT-PIC") )
1512 return ACKTYPE_UNMASK;
1513
1514 printk("Unknown PIC type '%s' for IRQ%d\n",
1515 desc->handler->typename, desc->irq);
1516 BUG();
1517
1518 return 0;
1519 }
1520
pirq_shared(struct domain * d,int pirq)1521 int pirq_shared(struct domain *d, int pirq)
1522 {
1523 struct irq_desc *desc;
1524 irq_guest_action_t *action;
1525 unsigned long flags;
1526 int shared;
1527
1528 desc = domain_spin_lock_irq_desc(d, pirq, &flags);
1529 if ( desc == NULL )
1530 return 0;
1531
1532 action = (irq_guest_action_t *)desc->action;
1533 shared = ((desc->status & IRQ_GUEST) && (action->nr_guests > 1));
1534
1535 spin_unlock_irqrestore(&desc->lock, flags);
1536
1537 return shared;
1538 }
1539
pirq_guest_bind(struct vcpu * v,struct pirq * pirq,int will_share)1540 int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
1541 {
1542 unsigned int irq;
1543 struct irq_desc *desc;
1544 irq_guest_action_t *action, *newaction = NULL;
1545 int rc = 0;
1546
1547 WARN_ON(!spin_is_locked(&v->domain->event_lock));
1548 BUG_ON(!local_irq_is_enabled());
1549
1550 retry:
1551 desc = pirq_spin_lock_irq_desc(pirq, NULL);
1552 if ( desc == NULL )
1553 {
1554 rc = -EINVAL;
1555 goto out;
1556 }
1557
1558 action = (irq_guest_action_t *)desc->action;
1559 irq = desc - irq_desc;
1560
1561 if ( !(desc->status & IRQ_GUEST) )
1562 {
1563 if ( desc->action != NULL )
1564 {
1565 printk(XENLOG_G_INFO
1566 "Cannot bind IRQ%d to dom%d. In use by '%s'.\n",
1567 pirq->pirq, v->domain->domain_id, desc->action->name);
1568 rc = -EBUSY;
1569 goto unlock_out;
1570 }
1571
1572 if ( newaction == NULL )
1573 {
1574 spin_unlock_irq(&desc->lock);
1575 if ( (newaction = xmalloc(irq_guest_action_t)) != NULL &&
1576 zalloc_cpumask_var(&newaction->cpu_eoi_map) )
1577 goto retry;
1578 xfree(newaction);
1579 printk(XENLOG_G_INFO
1580 "Cannot bind IRQ%d to dom%d. Out of memory.\n",
1581 pirq->pirq, v->domain->domain_id);
1582 return -ENOMEM;
1583 }
1584
1585 action = newaction;
1586 desc->action = (struct irqaction *)action;
1587 newaction = NULL;
1588
1589 action->nr_guests = 0;
1590 action->in_flight = 0;
1591 action->shareable = will_share;
1592 action->ack_type = irq_acktype(desc);
1593 init_timer(&action->eoi_timer, irq_guest_eoi_timer_fn, desc, 0);
1594
1595 desc->status |= IRQ_GUEST;
1596
1597 /*
1598 * Attempt to bind the interrupt target to the correct (or at least
1599 * some online) CPU.
1600 */
1601 if ( desc->handler->set_affinity )
1602 {
1603 const cpumask_t *affinity = NULL;
1604
1605 if ( !opt_noirqbalance )
1606 affinity = cpumask_of(v->processor);
1607 else if ( !cpumask_intersects(desc->affinity, &cpu_online_map) )
1608 {
1609 cpumask_setall(desc->affinity);
1610 affinity = &cpumask_all;
1611 }
1612 else if ( !cpumask_intersects(desc->arch.cpu_mask,
1613 &cpu_online_map) )
1614 affinity = desc->affinity;
1615 if ( affinity )
1616 desc->handler->set_affinity(desc, affinity);
1617 }
1618
1619 desc->status &= ~IRQ_DISABLED;
1620 desc->handler->startup(desc);
1621 }
1622 else if ( !will_share || !action->shareable )
1623 {
1624 printk(XENLOG_G_INFO "Cannot bind IRQ%d to dom%d. %s.\n",
1625 pirq->pirq, v->domain->domain_id,
1626 will_share ? "Others do not share"
1627 : "Will not share with others");
1628 rc = -EBUSY;
1629 goto unlock_out;
1630 }
1631 else if ( action->nr_guests == 0 )
1632 {
1633 /*
1634 * Indicates that an ACKTYPE_EOI interrupt is being released.
1635 * Wait for that to happen before continuing.
1636 */
1637 ASSERT(action->ack_type == ACKTYPE_EOI);
1638 ASSERT(desc->status & IRQ_DISABLED);
1639 spin_unlock_irq(&desc->lock);
1640 cpu_relax();
1641 goto retry;
1642 }
1643
1644 if ( action->nr_guests == IRQ_MAX_GUESTS )
1645 {
1646 printk(XENLOG_G_INFO "Cannot bind IRQ%d to dom%d. "
1647 "Already at max share.\n",
1648 pirq->pirq, v->domain->domain_id);
1649 rc = -EBUSY;
1650 goto unlock_out;
1651 }
1652
1653 action->guest[action->nr_guests++] = v->domain;
1654
1655 if ( action->ack_type != ACKTYPE_NONE )
1656 set_pirq_eoi(v->domain, pirq->pirq);
1657 else
1658 clear_pirq_eoi(v->domain, pirq->pirq);
1659
1660 unlock_out:
1661 spin_unlock_irq(&desc->lock);
1662 out:
1663 if ( newaction != NULL )
1664 {
1665 free_cpumask_var(newaction->cpu_eoi_map);
1666 xfree(newaction);
1667 }
1668 return rc;
1669 }
1670
__pirq_guest_unbind(struct domain * d,struct pirq * pirq,struct irq_desc * desc)1671 static irq_guest_action_t *__pirq_guest_unbind(
1672 struct domain *d, struct pirq *pirq, struct irq_desc *desc)
1673 {
1674 unsigned int irq;
1675 irq_guest_action_t *action;
1676 cpumask_t cpu_eoi_map;
1677 int i;
1678
1679 action = (irq_guest_action_t *)desc->action;
1680 irq = desc - irq_desc;
1681
1682 if ( unlikely(action == NULL) )
1683 {
1684 dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n",
1685 d->domain_id, pirq->pirq);
1686 return NULL;
1687 }
1688
1689 BUG_ON(!(desc->status & IRQ_GUEST));
1690
1691 for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
1692 continue;
1693 BUG_ON(i == action->nr_guests);
1694 memmove(&action->guest[i], &action->guest[i+1],
1695 (action->nr_guests-i-1) * sizeof(action->guest[0]));
1696 action->nr_guests--;
1697
1698 switch ( action->ack_type )
1699 {
1700 case ACKTYPE_UNMASK:
1701 if ( test_and_clear_bool(pirq->masked) &&
1702 (--action->in_flight == 0) &&
1703 desc->handler->end )
1704 desc->handler->end(desc, 0);
1705 break;
1706 case ACKTYPE_EOI:
1707 /* NB. If #guests == 0 then we clear the eoi_map later on. */
1708 if ( test_and_clear_bool(pirq->masked) &&
1709 (--action->in_flight == 0) &&
1710 (action->nr_guests != 0) )
1711 {
1712 cpumask_copy(&cpu_eoi_map, action->cpu_eoi_map);
1713 spin_unlock_irq(&desc->lock);
1714 on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
1715 spin_lock_irq(&desc->lock);
1716 }
1717 break;
1718 }
1719
1720 /*
1721 * The guest cannot re-bind to this IRQ until this function returns. So,
1722 * when we have flushed this IRQ from ->masked, it should remain flushed.
1723 */
1724 BUG_ON(pirq->masked);
1725
1726 if ( action->nr_guests != 0 )
1727 return NULL;
1728
1729 BUG_ON(action->in_flight != 0);
1730
1731 /* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */
1732 desc->handler->disable(desc);
1733 desc->status |= IRQ_DISABLED;
1734
1735 /*
1736 * Mark any remaining pending EOIs as ready to flush.
1737 * NOTE: We will need to make this a stronger barrier if in future we allow
1738 * an interrupt vectors to be re-bound to a different PIC. In that case we
1739 * would need to flush all ready EOIs before returning as otherwise the
1740 * desc->handler could change and we would call the wrong 'end' hook.
1741 */
1742 cpumask_copy(&cpu_eoi_map, action->cpu_eoi_map);
1743 if ( !cpumask_empty(&cpu_eoi_map) )
1744 {
1745 BUG_ON(action->ack_type != ACKTYPE_EOI);
1746 spin_unlock_irq(&desc->lock);
1747 on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 1);
1748 spin_lock_irq(&desc->lock);
1749 }
1750
1751 BUG_ON(!cpumask_empty(action->cpu_eoi_map));
1752
1753 desc->action = NULL;
1754 desc->status &= ~(IRQ_GUEST|IRQ_INPROGRESS);
1755 desc->handler->shutdown(desc);
1756
1757 /* Caller frees the old guest descriptor block. */
1758 return action;
1759 }
1760
pirq_guest_unbind(struct domain * d,struct pirq * pirq)1761 void pirq_guest_unbind(struct domain *d, struct pirq *pirq)
1762 {
1763 irq_guest_action_t *oldaction = NULL;
1764 struct irq_desc *desc;
1765 int irq = 0;
1766
1767 WARN_ON(!spin_is_locked(&d->event_lock));
1768
1769 BUG_ON(!local_irq_is_enabled());
1770 desc = pirq_spin_lock_irq_desc(pirq, NULL);
1771
1772 if ( desc == NULL )
1773 {
1774 irq = -pirq->arch.irq;
1775 BUG_ON(irq <= 0);
1776 desc = irq_to_desc(irq);
1777 spin_lock_irq(&desc->lock);
1778 clear_domain_irq_pirq(d, irq, pirq);
1779 }
1780 else
1781 {
1782 oldaction = __pirq_guest_unbind(d, pirq, desc);
1783 }
1784
1785 spin_unlock_irq(&desc->lock);
1786
1787 if ( oldaction != NULL )
1788 {
1789 kill_timer(&oldaction->eoi_timer);
1790 free_cpumask_var(oldaction->cpu_eoi_map);
1791 xfree(oldaction);
1792 }
1793 else if ( irq > 0 )
1794 cleanup_domain_irq_pirq(d, irq, pirq);
1795 }
1796
pirq_guest_force_unbind(struct domain * d,struct pirq * pirq)1797 static bool pirq_guest_force_unbind(struct domain *d, struct pirq *pirq)
1798 {
1799 struct irq_desc *desc;
1800 irq_guest_action_t *action, *oldaction = NULL;
1801 unsigned int i;
1802 bool bound = false;
1803
1804 WARN_ON(!spin_is_locked(&d->event_lock));
1805
1806 BUG_ON(!local_irq_is_enabled());
1807 desc = pirq_spin_lock_irq_desc(pirq, NULL);
1808 BUG_ON(desc == NULL);
1809
1810 if ( !(desc->status & IRQ_GUEST) )
1811 goto out;
1812
1813 action = (irq_guest_action_t *)desc->action;
1814 if ( unlikely(action == NULL) )
1815 {
1816 dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n",
1817 d->domain_id, pirq->pirq);
1818 goto out;
1819 }
1820
1821 for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
1822 continue;
1823 if ( i == action->nr_guests )
1824 goto out;
1825
1826 bound = true;
1827 oldaction = __pirq_guest_unbind(d, pirq, desc);
1828
1829 out:
1830 spin_unlock_irq(&desc->lock);
1831
1832 if ( oldaction != NULL )
1833 {
1834 kill_timer(&oldaction->eoi_timer);
1835 free_cpumask_var(oldaction->cpu_eoi_map);
1836 xfree(oldaction);
1837 }
1838
1839 return bound;
1840 }
1841
do_IRQ_guest(struct irq_desc * desc,unsigned int vector)1842 static void do_IRQ_guest(struct irq_desc *desc, unsigned int vector)
1843 {
1844 irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
1845 unsigned int i;
1846 struct pending_eoi *peoi = this_cpu(pending_eoi);
1847
1848 if ( unlikely(!action->nr_guests) )
1849 {
1850 /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */
1851 ASSERT(action->ack_type == ACKTYPE_EOI);
1852 ASSERT(desc->status & IRQ_DISABLED);
1853 if ( desc->handler->end )
1854 desc->handler->end(desc, vector);
1855 return;
1856 }
1857
1858 /*
1859 * Stop the timer as soon as we're certain we'll set it again further down,
1860 * to prevent the current timeout (if any) to needlessly expire.
1861 */
1862 if ( action->ack_type != ACKTYPE_NONE )
1863 stop_timer(&action->eoi_timer);
1864
1865 if ( action->ack_type == ACKTYPE_EOI )
1866 {
1867 unsigned int sp = pending_eoi_sp(peoi);
1868
1869 ASSERT(sp < (NR_DYNAMIC_VECTORS - 1));
1870 ASSERT(!sp || (peoi[sp - 1].vector < vector));
1871 peoi[sp].irq = desc->irq;
1872 peoi[sp].vector = vector;
1873 peoi[sp].ready = 0;
1874 pending_eoi_sp(peoi) = sp + 1;
1875 cpumask_set_cpu(smp_processor_id(), action->cpu_eoi_map);
1876 }
1877
1878 for ( i = 0; i < action->nr_guests; i++ )
1879 {
1880 struct domain *d = action->guest[i];
1881 struct pirq *pirq = pirq_info(d, domain_irq_to_pirq(d, desc->irq));;
1882
1883 if ( (action->ack_type != ACKTYPE_NONE) &&
1884 !test_and_set_bool(pirq->masked) )
1885 action->in_flight++;
1886 if ( !is_hvm_domain(d) || !hvm_do_IRQ_dpci(d, pirq) )
1887 send_guest_pirq(d, pirq);
1888 }
1889
1890 if ( action->ack_type != ACKTYPE_NONE )
1891 {
1892 migrate_timer(&action->eoi_timer, smp_processor_id());
1893 set_timer(&action->eoi_timer, NOW() + MILLISECS(1));
1894 }
1895 }
1896
do_IRQ(struct cpu_user_regs * regs)1897 void do_IRQ(struct cpu_user_regs *regs)
1898 {
1899 struct irqaction *action;
1900 uint32_t tsc_in;
1901 struct irq_desc *desc;
1902 unsigned int vector = (uint8_t)regs->entry_vector;
1903 int irq = this_cpu(vector_irq)[vector];
1904 struct cpu_user_regs *old_regs = set_irq_regs(regs);
1905
1906 perfc_incr(irqs);
1907 this_cpu(irq_count)++;
1908 irq_enter();
1909
1910 if ( irq < 0 )
1911 {
1912 if ( direct_apic_vector[vector] )
1913 direct_apic_vector[vector](regs);
1914 else
1915 {
1916 const char *kind = ", LAPIC";
1917
1918 if ( apic_isr_read(vector) )
1919 ack_APIC_irq();
1920 else
1921 kind = "";
1922 if ( !(vector >= FIRST_LEGACY_VECTOR &&
1923 vector <= LAST_LEGACY_VECTOR &&
1924 !smp_processor_id() &&
1925 bogus_8259A_irq(vector - FIRST_LEGACY_VECTOR)) )
1926 {
1927 printk("CPU%u: No irq handler for vector %02x (IRQ %d%s)\n",
1928 smp_processor_id(), vector, irq, kind);
1929 desc = irq_to_desc(~irq);
1930 if ( ~irq < nr_irqs && irq_desc_initialized(desc) )
1931 {
1932 spin_lock(&desc->lock);
1933 printk("IRQ%d a=%04lx[%04lx,%04lx] v=%02x[%02x] t=%s s=%08x\n",
1934 ~irq, *cpumask_bits(desc->affinity),
1935 *cpumask_bits(desc->arch.cpu_mask),
1936 *cpumask_bits(desc->arch.old_cpu_mask),
1937 desc->arch.vector, desc->arch.old_vector,
1938 desc->handler->typename, desc->status);
1939 spin_unlock(&desc->lock);
1940 }
1941 }
1942 TRACE_1D(TRC_HW_IRQ_UNMAPPED_VECTOR, vector);
1943 }
1944 goto out_no_unlock;
1945 }
1946
1947 desc = irq_to_desc(irq);
1948
1949 spin_lock(&desc->lock);
1950 desc->handler->ack(desc);
1951
1952 if ( likely(desc->status & IRQ_GUEST) )
1953 {
1954 if ( irq_ratelimit_timer.function && /* irq rate limiting enabled? */
1955 unlikely(desc->rl_cnt++ >= irq_ratelimit_threshold) )
1956 {
1957 s_time_t now = NOW();
1958
1959 if ( now < (desc->rl_quantum_start + MILLISECS(10)) )
1960 {
1961 desc->handler->disable(desc);
1962 /*
1963 * If handler->disable doesn't actually mask the interrupt, a
1964 * disabled irq still can fire. This check also avoids possible
1965 * deadlocks if ratelimit_timer_fn runs at the same time.
1966 */
1967 if ( likely(list_empty(&desc->rl_link)) )
1968 {
1969 spin_lock(&irq_ratelimit_lock);
1970 if ( list_empty(&irq_ratelimit_list) )
1971 set_timer(&irq_ratelimit_timer, now + MILLISECS(10));
1972 list_add(&desc->rl_link, &irq_ratelimit_list);
1973 spin_unlock(&irq_ratelimit_lock);
1974 }
1975 goto out;
1976 }
1977 desc->rl_cnt = 0;
1978 desc->rl_quantum_start = now;
1979 }
1980
1981 tsc_in = tb_init_done ? get_cycles() : 0;
1982 do_IRQ_guest(desc, vector);
1983 TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
1984 goto out_no_end;
1985 }
1986
1987 desc->status &= ~IRQ_REPLAY;
1988 desc->status |= IRQ_PENDING;
1989
1990 /*
1991 * Since we set PENDING, if another processor is handling a different
1992 * instance of this same irq, the other processor will take care of it.
1993 */
1994 if ( desc->status & (IRQ_DISABLED | IRQ_INPROGRESS) )
1995 goto out;
1996
1997 desc->status |= IRQ_INPROGRESS;
1998
1999 action = desc->action;
2000 while ( desc->status & IRQ_PENDING )
2001 {
2002 desc->status &= ~IRQ_PENDING;
2003 spin_unlock_irq(&desc->lock);
2004
2005 tsc_in = tb_init_done ? get_cycles() : 0;
2006 action->handler(irq, action->dev_id, regs);
2007 TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
2008
2009 spin_lock_irq(&desc->lock);
2010 }
2011
2012 desc->status &= ~IRQ_INPROGRESS;
2013
2014 out:
2015 if ( desc->handler->end )
2016 {
2017 /*
2018 * If higher priority vectors still have their EOIs pending, we may
2019 * not issue an EOI here, as this would EOI the highest priority one.
2020 */
2021 this_cpu(check_eoi_deferral) = true;
2022 desc->handler->end(desc, vector);
2023 this_cpu(check_eoi_deferral) = false;
2024
2025 spin_unlock(&desc->lock);
2026 flush_ready_eoi();
2027 goto out_no_unlock;
2028 }
2029
2030 out_no_end:
2031 spin_unlock(&desc->lock);
2032 out_no_unlock:
2033 irq_exit();
2034 set_irq_regs(old_regs);
2035 }
2036
is_free_pirq(const struct domain * d,const struct pirq * pirq)2037 static inline bool is_free_pirq(const struct domain *d,
2038 const struct pirq *pirq)
2039 {
2040 return !pirq || (!pirq->arch.irq && (!is_hvm_domain(d) ||
2041 pirq->arch.hvm.emuirq == IRQ_UNBOUND));
2042 }
2043
get_free_pirq(struct domain * d,int type)2044 int get_free_pirq(struct domain *d, int type)
2045 {
2046 int i;
2047
2048 ASSERT(spin_is_locked(&d->event_lock));
2049
2050 if ( type == MAP_PIRQ_TYPE_GSI )
2051 {
2052 for ( i = 16; i < nr_irqs_gsi; i++ )
2053 if ( is_free_pirq(d, pirq_info(d, i)) )
2054 {
2055 pirq_get_info(d, i);
2056 return i;
2057 }
2058 }
2059 for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; i-- )
2060 if ( is_free_pirq(d, pirq_info(d, i)) )
2061 {
2062 pirq_get_info(d, i);
2063 return i;
2064 }
2065
2066 return -ENOSPC;
2067 }
2068
get_free_pirqs(struct domain * d,unsigned int nr)2069 int get_free_pirqs(struct domain *d, unsigned int nr)
2070 {
2071 unsigned int i, found = 0;
2072
2073 ASSERT(spin_is_locked(&d->event_lock));
2074
2075 for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
2076 if ( is_free_pirq(d, pirq_info(d, i)) )
2077 {
2078 pirq_get_info(d, i);
2079 if ( ++found == nr )
2080 return i;
2081 }
2082 else
2083 found = 0;
2084
2085 return -ENOSPC;
2086 }
2087
2088 #define MAX_MSI_IRQS 32 /* limited by MSI capability struct properties */
2089
map_domain_pirq(struct domain * d,int pirq,int irq,int type,void * data)2090 int map_domain_pirq(
2091 struct domain *d, int pirq, int irq, int type, void *data)
2092 {
2093 int ret = 0;
2094 int old_irq, old_pirq;
2095 struct pirq *info;
2096 struct irq_desc *desc;
2097 unsigned long flags;
2098 DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {};
2099 DECLARE_BITMAP(granted, MAX_MSI_IRQS) = {};
2100
2101 ASSERT(spin_is_locked(&d->event_lock));
2102
2103 if ( !irq_access_permitted(current->domain, irq))
2104 return -EPERM;
2105
2106 if ( pirq < 0 || pirq >= d->nr_pirqs || irq <= 0 || irq >= nr_irqs )
2107 {
2108 dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or irq %d\n",
2109 d->domain_id, pirq, irq);
2110 return -EINVAL;
2111 }
2112
2113 old_irq = domain_pirq_to_irq(d, pirq);
2114 old_pirq = domain_irq_to_pirq(d, irq);
2115
2116 if ( (old_irq > 0 && (old_irq != irq) ) ||
2117 (old_pirq && (old_pirq != pirq)) )
2118 {
2119 dprintk(XENLOG_G_WARNING,
2120 "dom%d: pirq %d or irq %d already mapped (%d,%d)\n",
2121 d->domain_id, pirq, irq, old_pirq, old_irq);
2122 return 0;
2123 }
2124
2125 ret = xsm_map_domain_irq(XSM_HOOK, d, irq, data);
2126 if ( ret )
2127 {
2128 dprintk(XENLOG_G_ERR, "dom%d: could not permit access to irq %d mapping to pirq %d\n",
2129 d->domain_id, irq, pirq);
2130 return ret;
2131 }
2132
2133 if ( likely(!irq_access_permitted(d, irq)) )
2134 {
2135 ret = irq_permit_access(d, irq);
2136 if ( ret )
2137 {
2138 printk(XENLOG_G_ERR
2139 "dom%d: could not permit access to IRQ%d (pirq %d)\n",
2140 d->domain_id, irq, pirq);
2141 return ret;
2142 }
2143 __set_bit(0, granted);
2144 }
2145
2146 ret = prepare_domain_irq_pirq(d, irq, pirq, &info);
2147 if ( ret < 0 )
2148 goto revoke;
2149 if ( !ret )
2150 __set_bit(0, prepared);
2151
2152 desc = irq_to_desc(irq);
2153
2154 if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
2155 {
2156 struct msi_info *msi = (struct msi_info *)data;
2157 struct msi_desc *msi_desc;
2158 struct pci_dev *pdev;
2159 unsigned int nr = 0;
2160
2161 ASSERT(pcidevs_locked());
2162
2163 ret = -ENODEV;
2164 if ( !cpu_has_apic )
2165 goto done;
2166
2167 pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn);
2168 if ( !pdev )
2169 goto done;
2170
2171 ret = pci_enable_msi(msi, &msi_desc);
2172 if ( ret )
2173 {
2174 if ( ret > 0 )
2175 {
2176 msi->entry_nr = ret;
2177 ret = -ENFILE;
2178 }
2179 goto done;
2180 }
2181
2182 spin_lock_irqsave(&desc->lock, flags);
2183
2184 if ( desc->handler != &no_irq_type )
2185 {
2186 spin_unlock_irqrestore(&desc->lock, flags);
2187 dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
2188 d->domain_id, irq);
2189 pci_disable_msi(msi_desc);
2190 msi_desc->irq = -1;
2191 msi_free_irq(msi_desc);
2192 ret = -EBUSY;
2193 goto done;
2194 }
2195
2196 while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
2197 {
2198 if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
2199 !desc->arch.used_vectors )
2200 {
2201 desc->arch.used_vectors = &pdev->arch.used_vectors;
2202 if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
2203 {
2204 int vector = desc->arch.vector;
2205
2206 ASSERT(!test_bit(vector, desc->arch.used_vectors));
2207 set_bit(vector, desc->arch.used_vectors);
2208 }
2209 }
2210 if ( type == MAP_PIRQ_TYPE_MSI ||
2211 msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
2212 ++nr == msi->entry_nr )
2213 break;
2214
2215 set_domain_irq_pirq(d, irq, info);
2216 spin_unlock_irqrestore(&desc->lock, flags);
2217
2218 info = NULL;
2219 irq = create_irq(NUMA_NO_NODE, true);
2220 ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
2221 : irq;
2222 if ( ret < 0 )
2223 break;
2224 if ( !ret )
2225 __set_bit(nr, prepared);
2226 msi_desc[nr].irq = irq;
2227
2228 if ( likely(!irq_access_permitted(d, irq)) )
2229 {
2230 if ( irq_permit_access(d, irq) )
2231 printk(XENLOG_G_WARNING
2232 "dom%d: could not permit access to IRQ%d (pirq %d)\n",
2233 d->domain_id, irq, pirq);
2234 else
2235 __set_bit(nr, granted);
2236 }
2237
2238 desc = irq_to_desc(irq);
2239 spin_lock_irqsave(&desc->lock, flags);
2240
2241 if ( desc->handler != &no_irq_type )
2242 {
2243 dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n",
2244 d->domain_id, irq, pirq + nr, desc->handler->typename);
2245 ret = -EBUSY;
2246 break;
2247 }
2248 }
2249
2250 if ( ret )
2251 {
2252 spin_unlock_irqrestore(&desc->lock, flags);
2253 pci_disable_msi(msi_desc);
2254 if ( nr )
2255 {
2256 ASSERT(msi_desc->irq >= 0);
2257 desc = irq_to_desc(msi_desc->irq);
2258 spin_lock_irqsave(&desc->lock, flags);
2259 desc->handler = &no_irq_type;
2260 desc->msi_desc = NULL;
2261 spin_unlock_irqrestore(&desc->lock, flags);
2262 }
2263 while ( nr )
2264 {
2265 if ( irq >= 0 && test_bit(nr, granted) &&
2266 irq_deny_access(d, irq) )
2267 printk(XENLOG_G_ERR
2268 "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
2269 d->domain_id, irq, pirq);
2270 if ( info && test_bit(nr, prepared) )
2271 cleanup_domain_irq_pirq(d, irq, info);
2272 info = pirq_info(d, pirq + --nr);
2273 irq = info->arch.irq;
2274 }
2275 msi_desc->irq = -1;
2276 msi_free_irq(msi_desc);
2277 goto done;
2278 }
2279
2280 set_domain_irq_pirq(d, irq, info);
2281 spin_unlock_irqrestore(&desc->lock, flags);
2282 }
2283 else
2284 {
2285 spin_lock_irqsave(&desc->lock, flags);
2286 set_domain_irq_pirq(d, irq, info);
2287 spin_unlock_irqrestore(&desc->lock, flags);
2288 ret = 0;
2289 }
2290
2291 done:
2292 if ( ret )
2293 {
2294 if ( test_bit(0, prepared) )
2295 cleanup_domain_irq_pirq(d, irq, info);
2296 revoke:
2297 if ( test_bit(0, granted) && irq_deny_access(d, irq) )
2298 printk(XENLOG_G_ERR
2299 "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
2300 d->domain_id, irq, pirq);
2301 }
2302 return ret;
2303 }
2304
2305 /* The pirq should have been unbound before this call. */
unmap_domain_pirq(struct domain * d,int pirq)2306 int unmap_domain_pirq(struct domain *d, int pirq)
2307 {
2308 unsigned long flags;
2309 struct irq_desc *desc;
2310 int irq, ret = 0, rc;
2311 unsigned int i, nr = 1;
2312 bool forced_unbind;
2313 struct pirq *info;
2314 struct msi_desc *msi_desc = NULL;
2315
2316 if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
2317 return -EINVAL;
2318
2319 ASSERT(pcidevs_locked());
2320 ASSERT(spin_is_locked(&d->event_lock));
2321
2322 info = pirq_info(d, pirq);
2323 if ( !info || (irq = info->arch.irq) <= 0 )
2324 {
2325 dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
2326 d->domain_id, pirq);
2327 ret = -EINVAL;
2328 goto done;
2329 }
2330
2331 desc = irq_to_desc(irq);
2332 msi_desc = desc->msi_desc;
2333 if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
2334 {
2335 if ( msi_desc->msi_attrib.entry_nr )
2336 {
2337 printk(XENLOG_G_ERR
2338 "dom%d: trying to unmap secondary MSI pirq %d\n",
2339 d->domain_id, pirq);
2340 ret = -EBUSY;
2341 goto done;
2342 }
2343 nr = msi_desc->msi.nvec;
2344 }
2345
2346 ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq,
2347 msi_desc ? msi_desc->dev : NULL);
2348 if ( ret )
2349 goto done;
2350
2351 forced_unbind = pirq_guest_force_unbind(d, info);
2352 if ( forced_unbind )
2353 dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n",
2354 d->domain_id, pirq);
2355
2356 if ( msi_desc != NULL )
2357 pci_disable_msi(msi_desc);
2358
2359 spin_lock_irqsave(&desc->lock, flags);
2360
2361 for ( i = 0; ; )
2362 {
2363 BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
2364
2365 if ( !forced_unbind )
2366 clear_domain_irq_pirq(d, irq, info);
2367 else
2368 {
2369 info->arch.irq = -irq;
2370 radix_tree_replace_slot(
2371 radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
2372 radix_tree_int_to_ptr(-pirq));
2373 }
2374
2375 if ( msi_desc )
2376 {
2377 desc->handler = &no_irq_type;
2378 desc->msi_desc = NULL;
2379 }
2380
2381 if ( ++i == nr )
2382 break;
2383
2384 spin_unlock_irqrestore(&desc->lock, flags);
2385
2386 if ( !forced_unbind )
2387 cleanup_domain_irq_pirq(d, irq, info);
2388
2389 rc = irq_deny_access(d, irq);
2390 if ( rc )
2391 {
2392 printk(XENLOG_G_ERR
2393 "dom%d: could not deny access to IRQ%d (pirq %d)\n",
2394 d->domain_id, irq, pirq + i);
2395 ret = rc;
2396 }
2397
2398 do {
2399 info = pirq_info(d, pirq + i);
2400 if ( info && (irq = info->arch.irq) > 0 )
2401 break;
2402 printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
2403 d->domain_id, pirq + i);
2404 } while ( ++i < nr );
2405
2406 if ( i == nr )
2407 {
2408 desc = NULL;
2409 break;
2410 }
2411
2412 desc = irq_to_desc(irq);
2413 BUG_ON(desc->msi_desc != msi_desc + i);
2414
2415 spin_lock_irqsave(&desc->lock, flags);
2416 }
2417
2418 if ( desc )
2419 {
2420 spin_unlock_irqrestore(&desc->lock, flags);
2421
2422 if ( !forced_unbind )
2423 cleanup_domain_irq_pirq(d, irq, info);
2424
2425 rc = irq_deny_access(d, irq);
2426 if ( rc )
2427 {
2428 printk(XENLOG_G_ERR
2429 "dom%d: could not deny access to IRQ%d (pirq %d)\n",
2430 d->domain_id, irq, pirq + nr - 1);
2431 ret = rc;
2432 }
2433 }
2434
2435 if (msi_desc)
2436 msi_free_irq(msi_desc);
2437
2438 done:
2439 return ret;
2440 }
2441
free_domain_pirqs(struct domain * d)2442 void free_domain_pirqs(struct domain *d)
2443 {
2444 int i;
2445
2446 pcidevs_lock();
2447 spin_lock(&d->event_lock);
2448
2449 for ( i = 0; i < d->nr_pirqs; i++ )
2450 if ( domain_pirq_to_irq(d, i) > 0 )
2451 unmap_domain_pirq(d, i);
2452
2453 spin_unlock(&d->event_lock);
2454 pcidevs_unlock();
2455 }
2456
dump_irqs(unsigned char key)2457 static void dump_irqs(unsigned char key)
2458 {
2459 int i, irq, pirq;
2460 struct irq_desc *desc;
2461 irq_guest_action_t *action;
2462 struct domain *d;
2463 const struct pirq *info;
2464 unsigned long flags;
2465 char *ssid;
2466
2467 printk("IRQ information:\n");
2468
2469 for ( irq = 0; irq < nr_irqs; irq++ )
2470 {
2471 if ( !(irq & 0x1f) )
2472 process_pending_softirqs();
2473
2474 desc = irq_to_desc(irq);
2475
2476 if ( !irq_desc_initialized(desc) || desc->handler == &no_irq_type )
2477 continue;
2478
2479 ssid = in_irq() ? NULL : xsm_show_irq_sid(irq);
2480
2481 spin_lock_irqsave(&desc->lock, flags);
2482
2483 printk(" IRQ:%4d vec:%02x %-15s status=%03x aff:{%*pbl}/{%*pbl} ",
2484 irq, desc->arch.vector, desc->handler->typename, desc->status,
2485 CPUMASK_PR(desc->affinity), CPUMASK_PR(desc->arch.cpu_mask));
2486
2487 if ( ssid )
2488 printk("Z=%-25s ", ssid);
2489
2490 if ( desc->status & IRQ_GUEST )
2491 {
2492 action = (irq_guest_action_t *)desc->action;
2493
2494 printk("in-flight=%d%c",
2495 action->in_flight, action->nr_guests ? ' ' : '\n');
2496
2497 for ( i = 0; i < action->nr_guests; )
2498 {
2499 struct evtchn *evtchn;
2500 unsigned int pending = 2, masked = 2;
2501
2502 d = action->guest[i++];
2503 pirq = domain_irq_to_pirq(d, irq);
2504 info = pirq_info(d, pirq);
2505 evtchn = evtchn_from_port(d, info->evtchn);
2506 if ( evtchn_read_trylock(evtchn) )
2507 {
2508 pending = evtchn_is_pending(d, evtchn);
2509 masked = evtchn_is_masked(d, evtchn);
2510 evtchn_read_unlock(evtchn);
2511 }
2512 printk("d%d:%3d(%c%c%c)%c",
2513 d->domain_id, pirq, "-P?"[pending],
2514 "-M?"[masked], info->masked ? 'M' : '-',
2515 i < action->nr_guests ? ',' : '\n');
2516 }
2517 }
2518 else if ( desc->action )
2519 printk("%ps()\n", desc->action->handler);
2520 else
2521 printk("mapped, unbound\n");
2522
2523 spin_unlock_irqrestore(&desc->lock, flags);
2524
2525 xfree(ssid);
2526 }
2527
2528 process_pending_softirqs();
2529 printk("Direct vector information:\n");
2530 for ( i = FIRST_DYNAMIC_VECTOR; i < X86_NR_VECTORS; ++i )
2531 if ( direct_apic_vector[i] )
2532 printk(" %#02x -> %ps()\n", i, direct_apic_vector[i]);
2533
2534 dump_ioapic_irq_info();
2535 }
2536
setup_dump_irqs(void)2537 static int __init setup_dump_irqs(void)
2538 {
2539 register_keyhandler('i', dump_irqs, "dump interrupt bindings", 1);
2540 return 0;
2541 }
2542 __initcall(setup_dump_irqs);
2543
2544 /* Reset irq affinities to match the given CPU mask. */
fixup_irqs(const cpumask_t * mask,bool verbose)2545 void fixup_irqs(const cpumask_t *mask, bool verbose)
2546 {
2547 unsigned int irq;
2548 static int warned;
2549 struct irq_desc *desc;
2550
2551 for ( irq = 0; irq < nr_irqs; irq++ )
2552 {
2553 bool break_affinity = false, set_affinity = true;
2554 unsigned int vector;
2555 cpumask_t *affinity = this_cpu(scratch_cpumask);
2556
2557 if ( irq == 2 )
2558 continue;
2559
2560 desc = irq_to_desc(irq);
2561 if ( !irq_desc_initialized(desc) )
2562 continue;
2563
2564 spin_lock(&desc->lock);
2565
2566 vector = irq_to_vector(irq);
2567 if ( vector >= FIRST_HIPRIORITY_VECTOR &&
2568 vector <= LAST_HIPRIORITY_VECTOR )
2569 {
2570 cpumask_and(desc->arch.cpu_mask, desc->arch.cpu_mask, mask);
2571
2572 /*
2573 * This can in particular happen when parking secondary threads
2574 * during boot and when the serial console wants to use a PCI IRQ.
2575 */
2576 if ( desc->handler == &no_irq_type )
2577 {
2578 spin_unlock(&desc->lock);
2579 continue;
2580 }
2581 }
2582
2583 if ( desc->arch.move_cleanup_count )
2584 {
2585 /* The cleanup IPI may have got sent while we were still online. */
2586 cpumask_andnot(affinity, desc->arch.old_cpu_mask,
2587 &cpu_online_map);
2588 desc->arch.move_cleanup_count -= cpumask_weight(affinity);
2589 if ( !desc->arch.move_cleanup_count )
2590 release_old_vec(desc);
2591 }
2592
2593 if ( !desc->action || cpumask_subset(desc->affinity, mask) )
2594 {
2595 spin_unlock(&desc->lock);
2596 continue;
2597 }
2598
2599 /*
2600 * In order for the affinity adjustment below to be successful, we
2601 * need _assign_irq_vector() to succeed. This in particular means
2602 * clearing desc->arch.move_in_progress if this would otherwise
2603 * prevent the function from succeeding. Since there's no way for the
2604 * flag to get cleared anymore when there's no possible destination
2605 * left (the only possibility then would be the IRQs enabled window
2606 * after this loop), there's then also no race with us doing it here.
2607 *
2608 * Therefore the logic here and there need to remain in sync.
2609 */
2610 if ( desc->arch.move_in_progress &&
2611 !cpumask_intersects(mask, desc->arch.cpu_mask) )
2612 {
2613 unsigned int cpu;
2614
2615 cpumask_and(affinity, desc->arch.old_cpu_mask, &cpu_online_map);
2616
2617 spin_lock(&vector_lock);
2618 for_each_cpu(cpu, affinity)
2619 per_cpu(vector_irq, cpu)[desc->arch.old_vector] = ~irq;
2620 spin_unlock(&vector_lock);
2621
2622 release_old_vec(desc);
2623 desc->arch.move_in_progress = 0;
2624 }
2625
2626 if ( !cpumask_intersects(mask, desc->affinity) )
2627 {
2628 break_affinity = true;
2629 cpumask_setall(affinity);
2630 }
2631 else
2632 cpumask_copy(affinity, desc->affinity);
2633
2634 if ( desc->handler->disable )
2635 desc->handler->disable(desc);
2636
2637 if ( desc->handler->set_affinity )
2638 desc->handler->set_affinity(desc, affinity);
2639 else if ( !(warned++) )
2640 set_affinity = false;
2641
2642 if ( desc->handler->enable )
2643 desc->handler->enable(desc);
2644
2645 cpumask_copy(affinity, desc->affinity);
2646
2647 spin_unlock(&desc->lock);
2648
2649 if ( !verbose )
2650 continue;
2651
2652 if ( !set_affinity )
2653 printk("Cannot set affinity for IRQ%u\n", irq);
2654 else if ( break_affinity )
2655 printk("Broke affinity for IRQ%u, new: %*pb\n",
2656 irq, CPUMASK_PR(affinity));
2657 }
2658
2659 /* That doesn't seem sufficient. Give it 1ms. */
2660 local_irq_enable();
2661 mdelay(1);
2662 local_irq_disable();
2663 }
2664
fixup_eoi(void)2665 void fixup_eoi(void)
2666 {
2667 unsigned int irq, sp;
2668 struct irq_desc *desc;
2669 irq_guest_action_t *action;
2670 struct pending_eoi *peoi;
2671
2672 /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
2673 for ( irq = 0; irq < nr_irqs; irq++ )
2674 {
2675 desc = irq_to_desc(irq);
2676 if ( !(desc->status & IRQ_GUEST) )
2677 continue;
2678 action = (irq_guest_action_t *)desc->action;
2679 cpumask_clear_cpu(smp_processor_id(), action->cpu_eoi_map);
2680 }
2681
2682 /* Flush the interrupt EOI stack. */
2683 peoi = this_cpu(pending_eoi);
2684 for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
2685 peoi[sp].ready = 1;
2686 flush_ready_eoi();
2687 }
2688
map_domain_emuirq_pirq(struct domain * d,int pirq,int emuirq)2689 int map_domain_emuirq_pirq(struct domain *d, int pirq, int emuirq)
2690 {
2691 int old_emuirq = IRQ_UNBOUND, old_pirq = IRQ_UNBOUND;
2692 struct pirq *info;
2693
2694 ASSERT(spin_is_locked(&d->event_lock));
2695
2696 if ( !is_hvm_domain(d) )
2697 return -EINVAL;
2698
2699 if ( pirq < 0 || pirq >= d->nr_pirqs ||
2700 emuirq == IRQ_UNBOUND || emuirq >= (int) nr_irqs )
2701 {
2702 dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or emuirq %d\n",
2703 d->domain_id, pirq, emuirq);
2704 return -EINVAL;
2705 }
2706
2707 old_emuirq = domain_pirq_to_emuirq(d, pirq);
2708 if ( emuirq != IRQ_PT )
2709 old_pirq = domain_emuirq_to_pirq(d, emuirq);
2710
2711 if ( (old_emuirq != IRQ_UNBOUND && (old_emuirq != emuirq) ) ||
2712 (old_pirq != IRQ_UNBOUND && (old_pirq != pirq)) )
2713 {
2714 dprintk(XENLOG_G_WARNING, "dom%d: pirq %d or emuirq %d already mapped\n",
2715 d->domain_id, pirq, emuirq);
2716 return 0;
2717 }
2718
2719 info = pirq_get_info(d, pirq);
2720 if ( !info )
2721 return -ENOMEM;
2722
2723 /* do not store emuirq mappings for pt devices */
2724 if ( emuirq != IRQ_PT )
2725 {
2726 int err = radix_tree_insert(&d->arch.hvm.emuirq_pirq, emuirq,
2727 radix_tree_int_to_ptr(pirq));
2728
2729 switch ( err )
2730 {
2731 case 0:
2732 break;
2733 case -EEXIST:
2734 radix_tree_replace_slot(
2735 radix_tree_lookup_slot(
2736 &d->arch.hvm.emuirq_pirq, emuirq),
2737 radix_tree_int_to_ptr(pirq));
2738 break;
2739 default:
2740 pirq_cleanup_check(info, d);
2741 return err;
2742 }
2743 }
2744 info->arch.hvm.emuirq = emuirq;
2745
2746 return 0;
2747 }
2748
unmap_domain_pirq_emuirq(struct domain * d,int pirq)2749 int unmap_domain_pirq_emuirq(struct domain *d, int pirq)
2750 {
2751 int emuirq, ret = 0;
2752 struct pirq *info;
2753
2754 if ( !is_hvm_domain(d) )
2755 return -EINVAL;
2756
2757 if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
2758 return -EINVAL;
2759
2760 ASSERT(spin_is_locked(&d->event_lock));
2761
2762 emuirq = domain_pirq_to_emuirq(d, pirq);
2763 if ( emuirq == IRQ_UNBOUND )
2764 {
2765 dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
2766 d->domain_id, pirq);
2767 ret = -EINVAL;
2768 goto done;
2769 }
2770
2771 info = pirq_info(d, pirq);
2772 if ( info )
2773 {
2774 info->arch.hvm.emuirq = IRQ_UNBOUND;
2775 pirq_cleanup_check(info, d);
2776 }
2777 if ( emuirq != IRQ_PT )
2778 radix_tree_delete(&d->arch.hvm.emuirq_pirq, emuirq);
2779
2780 done:
2781 return ret;
2782 }
2783
arch_evtchn_bind_pirq(struct domain * d,int pirq)2784 void arch_evtchn_bind_pirq(struct domain *d, int pirq)
2785 {
2786 int irq = domain_pirq_to_irq(d, pirq);
2787 struct irq_desc *desc;
2788 unsigned long flags;
2789
2790 if ( irq <= 0 )
2791 return;
2792
2793 if ( is_hvm_domain(d) )
2794 map_domain_emuirq_pirq(d, pirq, IRQ_PT);
2795
2796 desc = irq_to_desc(irq);
2797 spin_lock_irqsave(&desc->lock, flags);
2798 if ( desc->msi_desc )
2799 guest_mask_msi_irq(desc, 0);
2800 spin_unlock_irqrestore(&desc->lock, flags);
2801 }
2802
allocate_pirq(struct domain * d,int index,int pirq,int irq,int type,int * nr)2803 static int allocate_pirq(struct domain *d, int index, int pirq, int irq,
2804 int type, int *nr)
2805 {
2806 int current_pirq;
2807
2808 ASSERT(spin_is_locked(&d->event_lock));
2809 current_pirq = domain_irq_to_pirq(d, irq);
2810 if ( pirq < 0 )
2811 {
2812 if ( current_pirq )
2813 {
2814 dprintk(XENLOG_G_ERR, "dom%d: %d:%d already mapped to %d\n",
2815 d->domain_id, index, pirq, current_pirq);
2816 if ( current_pirq < 0 )
2817 return -EBUSY;
2818 }
2819 else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
2820 {
2821 if ( *nr <= 0 || *nr > MAX_MSI_IRQS )
2822 return -EDOM;
2823 if ( *nr != 1 && !iommu_intremap )
2824 return -EOPNOTSUPP;
2825
2826 while ( *nr & (*nr - 1) )
2827 *nr += *nr & -*nr;
2828 pirq = get_free_pirqs(d, *nr);
2829 if ( pirq < 0 )
2830 {
2831 while ( (*nr >>= 1) > 1 )
2832 if ( get_free_pirqs(d, *nr) > 0 )
2833 break;
2834 dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n",
2835 d->domain_id, *nr << 1);
2836 }
2837 }
2838 else
2839 {
2840 pirq = get_free_pirq(d, type);
2841 if ( pirq < 0 )
2842 dprintk(XENLOG_G_ERR, "dom%d: no free pirq\n", d->domain_id);
2843 }
2844 }
2845 else if ( current_pirq && pirq != current_pirq )
2846 {
2847 dprintk(XENLOG_G_ERR, "dom%d: irq %d already mapped to pirq %d\n",
2848 d->domain_id, irq, current_pirq);
2849 return -EEXIST;
2850 }
2851
2852 return pirq;
2853 }
2854
allocate_and_map_gsi_pirq(struct domain * d,int index,int * pirq_p)2855 int allocate_and_map_gsi_pirq(struct domain *d, int index, int *pirq_p)
2856 {
2857 int irq, pirq, ret;
2858
2859 if ( index < 0 || index >= nr_irqs_gsi )
2860 {
2861 dprintk(XENLOG_G_ERR, "dom%d: map invalid irq %d\n", d->domain_id,
2862 index);
2863 return -EINVAL;
2864 }
2865
2866 irq = domain_pirq_to_irq(current->domain, index);
2867 if ( irq <= 0 )
2868 {
2869 if ( is_hardware_domain(current->domain) )
2870 irq = index;
2871 else
2872 {
2873 dprintk(XENLOG_G_ERR, "dom%d: map pirq with incorrect irq!\n",
2874 d->domain_id);
2875 return -EINVAL;
2876 }
2877 }
2878
2879 /* Verify or get pirq. */
2880 spin_lock(&d->event_lock);
2881 pirq = allocate_pirq(d, index, *pirq_p, irq, MAP_PIRQ_TYPE_GSI, NULL);
2882 if ( pirq < 0 )
2883 {
2884 ret = pirq;
2885 goto done;
2886 }
2887
2888 ret = map_domain_pirq(d, pirq, irq, MAP_PIRQ_TYPE_GSI, NULL);
2889 if ( !ret )
2890 *pirq_p = pirq;
2891
2892 done:
2893 spin_unlock(&d->event_lock);
2894
2895 return ret;
2896 }
2897
allocate_and_map_msi_pirq(struct domain * d,int index,int * pirq_p,int type,struct msi_info * msi)2898 int allocate_and_map_msi_pirq(struct domain *d, int index, int *pirq_p,
2899 int type, struct msi_info *msi)
2900 {
2901 int irq, pirq, ret;
2902
2903 switch ( type )
2904 {
2905 case MAP_PIRQ_TYPE_MSI:
2906 if ( !msi->table_base )
2907 msi->entry_nr = 1;
2908 irq = index;
2909 if ( irq == -1 )
2910 {
2911 case MAP_PIRQ_TYPE_MULTI_MSI:
2912 irq = create_irq(NUMA_NO_NODE, true);
2913 }
2914
2915 if ( irq < nr_irqs_gsi || irq >= nr_irqs )
2916 {
2917 dprintk(XENLOG_G_ERR, "dom%d: can't create irq for msi!\n",
2918 d->domain_id);
2919 return -EINVAL;
2920 }
2921 break;
2922
2923 default:
2924 dprintk(XENLOG_G_ERR, "dom%d: wrong pirq type %x\n",
2925 d->domain_id, type);
2926 ASSERT_UNREACHABLE();
2927 return -EINVAL;
2928 }
2929
2930 msi->irq = irq;
2931
2932 pcidevs_lock();
2933 /* Verify or get pirq. */
2934 spin_lock(&d->event_lock);
2935 pirq = allocate_pirq(d, index, *pirq_p, irq, type, &msi->entry_nr);
2936 if ( pirq < 0 )
2937 {
2938 ret = pirq;
2939 goto done;
2940 }
2941
2942 ret = map_domain_pirq(d, pirq, irq, type, msi);
2943 if ( !ret )
2944 *pirq_p = pirq;
2945
2946 done:
2947 spin_unlock(&d->event_lock);
2948 pcidevs_unlock();
2949 if ( ret )
2950 {
2951 switch ( type )
2952 {
2953 case MAP_PIRQ_TYPE_MSI:
2954 if ( index == -1 )
2955 case MAP_PIRQ_TYPE_MULTI_MSI:
2956 destroy_irq(irq);
2957 break;
2958 }
2959 }
2960
2961 return ret;
2962 }
2963