xref: /openbmc/linux/drivers/xen/events/events_base.c (revision 249592bf)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Xen event channels
4  *
5  * Xen models interrupts with abstract event channels.  Because each
6  * domain gets 1024 event channels, but NR_IRQ is not that large, we
7  * must dynamically map irqs<->event channels.  The event channels
8  * interface with the rest of the kernel by defining a xen interrupt
9  * chip.  When an event is received, it is mapped to an irq and sent
10  * through the normal interrupt processing path.
11  *
12  * There are four kinds of events which can be mapped to an event
13  * channel:
14  *
15  * 1. Inter-domain notifications.  This includes all the virtual
16  *    device events, since they're driven by front-ends in another domain
17  *    (typically dom0).
18  * 2. VIRQs, typically used for timers.  These are per-cpu events.
19  * 3. IPIs.
20  * 4. PIRQs - Hardware interrupts.
21  *
22  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
23  */
24 
25 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
26 
27 #include <linux/linkage.h>
28 #include <linux/interrupt.h>
29 #include <linux/irq.h>
30 #include <linux/moduleparam.h>
31 #include <linux/string.h>
32 #include <linux/memblock.h>
33 #include <linux/slab.h>
34 #include <linux/irqnr.h>
35 #include <linux/pci.h>
36 #include <linux/spinlock.h>
37 #include <linux/cpuhotplug.h>
38 #include <linux/atomic.h>
39 #include <linux/ktime.h>
40 
41 #ifdef CONFIG_X86
42 #include <asm/desc.h>
43 #include <asm/ptrace.h>
44 #include <asm/idtentry.h>
45 #include <asm/irq.h>
46 #include <asm/io_apic.h>
47 #include <asm/i8259.h>
48 #include <asm/xen/pci.h>
49 #endif
50 #include <asm/sync_bitops.h>
51 #include <asm/xen/hypercall.h>
52 #include <asm/xen/hypervisor.h>
53 #include <xen/page.h>
54 
55 #include <xen/xen.h>
56 #include <xen/hvm.h>
57 #include <xen/xen-ops.h>
58 #include <xen/events.h>
59 #include <xen/interface/xen.h>
60 #include <xen/interface/event_channel.h>
61 #include <xen/interface/hvm/hvm_op.h>
62 #include <xen/interface/hvm/params.h>
63 #include <xen/interface/physdev.h>
64 #include <xen/interface/sched.h>
65 #include <xen/interface/vcpu.h>
66 #include <xen/xenbus.h>
67 #include <asm/hw_irq.h>
68 
69 #include "events_internal.h"
70 
71 #undef MODULE_PARAM_PREFIX
72 #define MODULE_PARAM_PREFIX "xen."
73 
74 /* Interrupt types. */
75 enum xen_irq_type {
76 	IRQT_UNBOUND = 0,
77 	IRQT_PIRQ,
78 	IRQT_VIRQ,
79 	IRQT_IPI,
80 	IRQT_EVTCHN
81 };
82 
83 /*
84  * Packed IRQ information:
85  * type - enum xen_irq_type
86  * event channel - irq->event channel mapping
87  * cpu - cpu this event channel is bound to
88  * index - type-specific information:
89  *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
90  *           guest, or GSI (real passthrough IRQ) of the device.
91  *    VIRQ - virq number
92  *    IPI - IPI vector
93  *    EVTCHN -
94  */
95 struct irq_info {
96 	struct list_head list;
97 	struct list_head eoi_list;
98 	short refcnt;
99 	u8 spurious_cnt;
100 	u8 is_accounted;
101 	short type;		/* type: IRQT_* */
102 	u8 mask_reason;		/* Why is event channel masked */
103 #define EVT_MASK_REASON_EXPLICIT	0x01
104 #define EVT_MASK_REASON_TEMPORARY	0x02
105 #define EVT_MASK_REASON_EOI_PENDING	0x04
106 	u8 is_active;		/* Is event just being handled? */
107 	unsigned irq;
108 	evtchn_port_t evtchn;   /* event channel */
109 	unsigned short cpu;     /* cpu bound */
110 	unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
111 	unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
112 	u64 eoi_time;           /* Time in jiffies when to EOI. */
113 	raw_spinlock_t lock;
114 
115 	union {
116 		unsigned short virq;
117 		enum ipi_vector ipi;
118 		struct {
119 			unsigned short pirq;
120 			unsigned short gsi;
121 			unsigned char vector;
122 			unsigned char flags;
123 			uint16_t domid;
124 		} pirq;
125 		struct xenbus_device *interdomain;
126 	} u;
127 };
128 
129 #define PIRQ_NEEDS_EOI	(1 << 0)
130 #define PIRQ_SHAREABLE	(1 << 1)
131 #define PIRQ_MSI_GROUP	(1 << 2)
132 
133 static uint __read_mostly event_loop_timeout = 2;
134 module_param(event_loop_timeout, uint, 0644);
135 
136 static uint __read_mostly event_eoi_delay = 10;
137 module_param(event_eoi_delay, uint, 0644);
138 
139 const struct evtchn_ops *evtchn_ops;
140 
141 /*
142  * This lock protects updates to the following mapping and reference-count
143  * arrays. The lock does not need to be acquired to read the mapping tables.
144  */
145 static DEFINE_MUTEX(irq_mapping_update_lock);
146 
147 /*
148  * Lock protecting event handling loop against removing event channels.
149  * Adding of event channels is no issue as the associated IRQ becomes active
150  * only after everything is setup (before request_[threaded_]irq() the handler
151  * can't be entered for an event, as the event channel will be unmasked only
152  * then).
153  */
154 static DEFINE_RWLOCK(evtchn_rwlock);
155 
156 /*
157  * Lock hierarchy:
158  *
159  * irq_mapping_update_lock
160  *   evtchn_rwlock
161  *     IRQ-desc lock
162  *       percpu eoi_list_lock
163  *         irq_info->lock
164  */
165 
166 static LIST_HEAD(xen_irq_list_head);
167 
168 /* IRQ <-> VIRQ mapping. */
169 static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
170 
171 /* IRQ <-> IPI mapping */
172 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
173 
174 /* Event channel distribution data */
175 static atomic_t channels_on_cpu[NR_CPUS];
176 
177 static int **evtchn_to_irq;
178 #ifdef CONFIG_X86
179 static unsigned long *pirq_eoi_map;
180 #endif
181 static bool (*pirq_needs_eoi)(unsigned irq);
182 
183 #define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
184 #define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
185 #define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
186 
187 /* Xen will never allocate port zero for any purpose. */
188 #define VALID_EVTCHN(chn)	((chn) != 0)
189 
190 static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
191 
192 static struct irq_chip xen_dynamic_chip;
193 static struct irq_chip xen_lateeoi_chip;
194 static struct irq_chip xen_percpu_chip;
195 static struct irq_chip xen_pirq_chip;
196 static void enable_dynirq(struct irq_data *data);
197 static void disable_dynirq(struct irq_data *data);
198 
199 static DEFINE_PER_CPU(unsigned int, irq_epoch);
200 
201 static void clear_evtchn_to_irq_row(unsigned row)
202 {
203 	unsigned col;
204 
205 	for (col = 0; col < EVTCHN_PER_ROW; col++)
206 		WRITE_ONCE(evtchn_to_irq[row][col], -1);
207 }
208 
209 static void clear_evtchn_to_irq_all(void)
210 {
211 	unsigned row;
212 
213 	for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
214 		if (evtchn_to_irq[row] == NULL)
215 			continue;
216 		clear_evtchn_to_irq_row(row);
217 	}
218 }
219 
220 static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
221 {
222 	unsigned row;
223 	unsigned col;
224 
225 	if (evtchn >= xen_evtchn_max_channels())
226 		return -EINVAL;
227 
228 	row = EVTCHN_ROW(evtchn);
229 	col = EVTCHN_COL(evtchn);
230 
231 	if (evtchn_to_irq[row] == NULL) {
232 		/* Unallocated irq entries return -1 anyway */
233 		if (irq == -1)
234 			return 0;
235 
236 		evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
237 		if (evtchn_to_irq[row] == NULL)
238 			return -ENOMEM;
239 
240 		clear_evtchn_to_irq_row(row);
241 	}
242 
243 	WRITE_ONCE(evtchn_to_irq[row][col], irq);
244 	return 0;
245 }
246 
247 int get_evtchn_to_irq(evtchn_port_t evtchn)
248 {
249 	if (evtchn >= xen_evtchn_max_channels())
250 		return -1;
251 	if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
252 		return -1;
253 	return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
254 }
255 
256 /* Get info for IRQ */
257 static struct irq_info *info_for_irq(unsigned irq)
258 {
259 	if (irq < nr_legacy_irqs())
260 		return legacy_info_ptrs[irq];
261 	else
262 		return irq_get_chip_data(irq);
263 }
264 
265 static void set_info_for_irq(unsigned int irq, struct irq_info *info)
266 {
267 	if (irq < nr_legacy_irqs())
268 		legacy_info_ptrs[irq] = info;
269 	else
270 		irq_set_chip_data(irq, info);
271 }
272 
273 /* Per CPU channel accounting */
274 static void channels_on_cpu_dec(struct irq_info *info)
275 {
276 	if (!info->is_accounted)
277 		return;
278 
279 	info->is_accounted = 0;
280 
281 	if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
282 		return;
283 
284 	WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
285 }
286 
287 static void channels_on_cpu_inc(struct irq_info *info)
288 {
289 	if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
290 		return;
291 
292 	if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
293 					    INT_MAX)))
294 		return;
295 
296 	info->is_accounted = 1;
297 }
298 
299 /* Constructors for packed IRQ information. */
300 static int xen_irq_info_common_setup(struct irq_info *info,
301 				     unsigned irq,
302 				     enum xen_irq_type type,
303 				     evtchn_port_t evtchn,
304 				     unsigned short cpu)
305 {
306 	int ret;
307 
308 	BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
309 
310 	info->type = type;
311 	info->irq = irq;
312 	info->evtchn = evtchn;
313 	info->cpu = cpu;
314 	info->mask_reason = EVT_MASK_REASON_EXPLICIT;
315 	raw_spin_lock_init(&info->lock);
316 
317 	ret = set_evtchn_to_irq(evtchn, irq);
318 	if (ret < 0)
319 		return ret;
320 
321 	irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
322 
323 	return xen_evtchn_port_setup(evtchn);
324 }
325 
326 static int xen_irq_info_evtchn_setup(unsigned irq,
327 				     evtchn_port_t evtchn,
328 				     struct xenbus_device *dev)
329 {
330 	struct irq_info *info = info_for_irq(irq);
331 	int ret;
332 
333 	ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
334 	info->u.interdomain = dev;
335 	if (dev)
336 		atomic_inc(&dev->event_channels);
337 
338 	return ret;
339 }
340 
341 static int xen_irq_info_ipi_setup(unsigned cpu,
342 				  unsigned irq,
343 				  evtchn_port_t evtchn,
344 				  enum ipi_vector ipi)
345 {
346 	struct irq_info *info = info_for_irq(irq);
347 
348 	info->u.ipi = ipi;
349 
350 	per_cpu(ipi_to_irq, cpu)[ipi] = irq;
351 
352 	return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
353 }
354 
355 static int xen_irq_info_virq_setup(unsigned cpu,
356 				   unsigned irq,
357 				   evtchn_port_t evtchn,
358 				   unsigned virq)
359 {
360 	struct irq_info *info = info_for_irq(irq);
361 
362 	info->u.virq = virq;
363 
364 	per_cpu(virq_to_irq, cpu)[virq] = irq;
365 
366 	return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
367 }
368 
369 static int xen_irq_info_pirq_setup(unsigned irq,
370 				   evtchn_port_t evtchn,
371 				   unsigned pirq,
372 				   unsigned gsi,
373 				   uint16_t domid,
374 				   unsigned char flags)
375 {
376 	struct irq_info *info = info_for_irq(irq);
377 
378 	info->u.pirq.pirq = pirq;
379 	info->u.pirq.gsi = gsi;
380 	info->u.pirq.domid = domid;
381 	info->u.pirq.flags = flags;
382 
383 	return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
384 }
385 
386 static void xen_irq_info_cleanup(struct irq_info *info)
387 {
388 	set_evtchn_to_irq(info->evtchn, -1);
389 	xen_evtchn_port_remove(info->evtchn, info->cpu);
390 	info->evtchn = 0;
391 	channels_on_cpu_dec(info);
392 }
393 
394 /*
395  * Accessors for packed IRQ information.
396  */
397 evtchn_port_t evtchn_from_irq(unsigned irq)
398 {
399 	const struct irq_info *info = NULL;
400 
401 	if (likely(irq < nr_irqs))
402 		info = info_for_irq(irq);
403 	if (!info)
404 		return 0;
405 
406 	return info->evtchn;
407 }
408 
409 unsigned int irq_from_evtchn(evtchn_port_t evtchn)
410 {
411 	return get_evtchn_to_irq(evtchn);
412 }
413 EXPORT_SYMBOL_GPL(irq_from_evtchn);
414 
415 int irq_from_virq(unsigned int cpu, unsigned int virq)
416 {
417 	return per_cpu(virq_to_irq, cpu)[virq];
418 }
419 
420 static enum ipi_vector ipi_from_irq(unsigned irq)
421 {
422 	struct irq_info *info = info_for_irq(irq);
423 
424 	BUG_ON(info == NULL);
425 	BUG_ON(info->type != IRQT_IPI);
426 
427 	return info->u.ipi;
428 }
429 
430 static unsigned virq_from_irq(unsigned irq)
431 {
432 	struct irq_info *info = info_for_irq(irq);
433 
434 	BUG_ON(info == NULL);
435 	BUG_ON(info->type != IRQT_VIRQ);
436 
437 	return info->u.virq;
438 }
439 
440 static unsigned pirq_from_irq(unsigned irq)
441 {
442 	struct irq_info *info = info_for_irq(irq);
443 
444 	BUG_ON(info == NULL);
445 	BUG_ON(info->type != IRQT_PIRQ);
446 
447 	return info->u.pirq.pirq;
448 }
449 
450 static enum xen_irq_type type_from_irq(unsigned irq)
451 {
452 	return info_for_irq(irq)->type;
453 }
454 
455 static unsigned cpu_from_irq(unsigned irq)
456 {
457 	return info_for_irq(irq)->cpu;
458 }
459 
460 unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
461 {
462 	int irq = get_evtchn_to_irq(evtchn);
463 	unsigned ret = 0;
464 
465 	if (irq != -1)
466 		ret = cpu_from_irq(irq);
467 
468 	return ret;
469 }
470 
471 static void do_mask(struct irq_info *info, u8 reason)
472 {
473 	unsigned long flags;
474 
475 	raw_spin_lock_irqsave(&info->lock, flags);
476 
477 	if (!info->mask_reason)
478 		mask_evtchn(info->evtchn);
479 
480 	info->mask_reason |= reason;
481 
482 	raw_spin_unlock_irqrestore(&info->lock, flags);
483 }
484 
485 static void do_unmask(struct irq_info *info, u8 reason)
486 {
487 	unsigned long flags;
488 
489 	raw_spin_lock_irqsave(&info->lock, flags);
490 
491 	info->mask_reason &= ~reason;
492 
493 	if (!info->mask_reason)
494 		unmask_evtchn(info->evtchn);
495 
496 	raw_spin_unlock_irqrestore(&info->lock, flags);
497 }
498 
499 #ifdef CONFIG_X86
500 static bool pirq_check_eoi_map(unsigned irq)
501 {
502 	return test_bit(pirq_from_irq(irq), pirq_eoi_map);
503 }
504 #endif
505 
506 static bool pirq_needs_eoi_flag(unsigned irq)
507 {
508 	struct irq_info *info = info_for_irq(irq);
509 	BUG_ON(info->type != IRQT_PIRQ);
510 
511 	return info->u.pirq.flags & PIRQ_NEEDS_EOI;
512 }
513 
514 static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
515 			       bool force_affinity)
516 {
517 	int irq = get_evtchn_to_irq(evtchn);
518 	struct irq_info *info = info_for_irq(irq);
519 
520 	BUG_ON(irq == -1);
521 
522 	if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
523 		cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
524 		cpumask_copy(irq_get_effective_affinity_mask(irq),
525 			     cpumask_of(cpu));
526 	}
527 
528 	xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
529 
530 	channels_on_cpu_dec(info);
531 	info->cpu = cpu;
532 	channels_on_cpu_inc(info);
533 }
534 
535 /**
536  * notify_remote_via_irq - send event to remote end of event channel via irq
537  * @irq: irq of event channel to send event to
538  *
539  * Unlike notify_remote_via_evtchn(), this is safe to use across
540  * save/restore. Notifications on a broken connection are silently
541  * dropped.
542  */
543 void notify_remote_via_irq(int irq)
544 {
545 	evtchn_port_t evtchn = evtchn_from_irq(irq);
546 
547 	if (VALID_EVTCHN(evtchn))
548 		notify_remote_via_evtchn(evtchn);
549 }
550 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
551 
552 struct lateeoi_work {
553 	struct delayed_work delayed;
554 	spinlock_t eoi_list_lock;
555 	struct list_head eoi_list;
556 };
557 
558 static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
559 
560 static void lateeoi_list_del(struct irq_info *info)
561 {
562 	struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
563 	unsigned long flags;
564 
565 	spin_lock_irqsave(&eoi->eoi_list_lock, flags);
566 	list_del_init(&info->eoi_list);
567 	spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
568 }
569 
570 static void lateeoi_list_add(struct irq_info *info)
571 {
572 	struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
573 	struct irq_info *elem;
574 	u64 now = get_jiffies_64();
575 	unsigned long delay;
576 	unsigned long flags;
577 
578 	if (now < info->eoi_time)
579 		delay = info->eoi_time - now;
580 	else
581 		delay = 1;
582 
583 	spin_lock_irqsave(&eoi->eoi_list_lock, flags);
584 
585 	if (list_empty(&eoi->eoi_list)) {
586 		list_add(&info->eoi_list, &eoi->eoi_list);
587 		mod_delayed_work_on(info->eoi_cpu, system_wq,
588 				    &eoi->delayed, delay);
589 	} else {
590 		list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
591 			if (elem->eoi_time <= info->eoi_time)
592 				break;
593 		}
594 		list_add(&info->eoi_list, &elem->eoi_list);
595 	}
596 
597 	spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
598 }
599 
600 static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
601 {
602 	evtchn_port_t evtchn;
603 	unsigned int cpu;
604 	unsigned int delay = 0;
605 
606 	evtchn = info->evtchn;
607 	if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
608 		return;
609 
610 	if (spurious) {
611 		struct xenbus_device *dev = info->u.interdomain;
612 		unsigned int threshold = 1;
613 
614 		if (dev && dev->spurious_threshold)
615 			threshold = dev->spurious_threshold;
616 
617 		if ((1 << info->spurious_cnt) < (HZ << 2)) {
618 			if (info->spurious_cnt != 0xFF)
619 				info->spurious_cnt++;
620 		}
621 		if (info->spurious_cnt > threshold) {
622 			delay = 1 << (info->spurious_cnt - 1 - threshold);
623 			if (delay > HZ)
624 				delay = HZ;
625 			if (!info->eoi_time)
626 				info->eoi_cpu = smp_processor_id();
627 			info->eoi_time = get_jiffies_64() + delay;
628 			if (dev)
629 				atomic_add(delay, &dev->jiffies_eoi_delayed);
630 		}
631 		if (dev)
632 			atomic_inc(&dev->spurious_events);
633 	} else {
634 		info->spurious_cnt = 0;
635 	}
636 
637 	cpu = info->eoi_cpu;
638 	if (info->eoi_time &&
639 	    (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
640 		lateeoi_list_add(info);
641 		return;
642 	}
643 
644 	info->eoi_time = 0;
645 	do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
646 }
647 
648 static void xen_irq_lateeoi_worker(struct work_struct *work)
649 {
650 	struct lateeoi_work *eoi;
651 	struct irq_info *info;
652 	u64 now = get_jiffies_64();
653 	unsigned long flags;
654 
655 	eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
656 
657 	read_lock_irqsave(&evtchn_rwlock, flags);
658 
659 	while (true) {
660 		spin_lock(&eoi->eoi_list_lock);
661 
662 		info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
663 						eoi_list);
664 
665 		if (info == NULL || now < info->eoi_time) {
666 			spin_unlock(&eoi->eoi_list_lock);
667 			break;
668 		}
669 
670 		list_del_init(&info->eoi_list);
671 
672 		spin_unlock(&eoi->eoi_list_lock);
673 
674 		info->eoi_time = 0;
675 
676 		xen_irq_lateeoi_locked(info, false);
677 	}
678 
679 	if (info)
680 		mod_delayed_work_on(info->eoi_cpu, system_wq,
681 				    &eoi->delayed, info->eoi_time - now);
682 
683 	read_unlock_irqrestore(&evtchn_rwlock, flags);
684 }
685 
686 static void xen_cpu_init_eoi(unsigned int cpu)
687 {
688 	struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
689 
690 	INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
691 	spin_lock_init(&eoi->eoi_list_lock);
692 	INIT_LIST_HEAD(&eoi->eoi_list);
693 }
694 
695 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
696 {
697 	struct irq_info *info;
698 	unsigned long flags;
699 
700 	read_lock_irqsave(&evtchn_rwlock, flags);
701 
702 	info = info_for_irq(irq);
703 
704 	if (info)
705 		xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
706 
707 	read_unlock_irqrestore(&evtchn_rwlock, flags);
708 }
709 EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
710 
711 static void xen_irq_init(unsigned irq)
712 {
713 	struct irq_info *info;
714 
715 	info = kzalloc(sizeof(*info), GFP_KERNEL);
716 	if (info == NULL)
717 		panic("Unable to allocate metadata for IRQ%d\n", irq);
718 
719 	info->type = IRQT_UNBOUND;
720 	info->refcnt = -1;
721 
722 	set_info_for_irq(irq, info);
723 	/*
724 	 * Interrupt affinity setting can be immediate. No point
725 	 * in delaying it until an interrupt is handled.
726 	 */
727 	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
728 
729 	INIT_LIST_HEAD(&info->eoi_list);
730 	list_add_tail(&info->list, &xen_irq_list_head);
731 }
732 
733 static int __must_check xen_allocate_irqs_dynamic(int nvec)
734 {
735 	int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
736 
737 	if (irq >= 0) {
738 		for (i = 0; i < nvec; i++)
739 			xen_irq_init(irq + i);
740 	}
741 
742 	return irq;
743 }
744 
745 static inline int __must_check xen_allocate_irq_dynamic(void)
746 {
747 
748 	return xen_allocate_irqs_dynamic(1);
749 }
750 
751 static int __must_check xen_allocate_irq_gsi(unsigned gsi)
752 {
753 	int irq;
754 
755 	/*
756 	 * A PV guest has no concept of a GSI (since it has no ACPI
757 	 * nor access to/knowledge of the physical APICs). Therefore
758 	 * all IRQs are dynamically allocated from the entire IRQ
759 	 * space.
760 	 */
761 	if (xen_pv_domain() && !xen_initial_domain())
762 		return xen_allocate_irq_dynamic();
763 
764 	/* Legacy IRQ descriptors are already allocated by the arch. */
765 	if (gsi < nr_legacy_irqs())
766 		irq = gsi;
767 	else
768 		irq = irq_alloc_desc_at(gsi, -1);
769 
770 	xen_irq_init(irq);
771 
772 	return irq;
773 }
774 
775 static void xen_free_irq(unsigned irq)
776 {
777 	struct irq_info *info = info_for_irq(irq);
778 	unsigned long flags;
779 
780 	if (WARN_ON(!info))
781 		return;
782 
783 	write_lock_irqsave(&evtchn_rwlock, flags);
784 
785 	if (!list_empty(&info->eoi_list))
786 		lateeoi_list_del(info);
787 
788 	list_del(&info->list);
789 
790 	set_info_for_irq(irq, NULL);
791 
792 	WARN_ON(info->refcnt > 0);
793 
794 	write_unlock_irqrestore(&evtchn_rwlock, flags);
795 
796 	kfree(info);
797 
798 	/* Legacy IRQ descriptors are managed by the arch. */
799 	if (irq < nr_legacy_irqs())
800 		return;
801 
802 	irq_free_desc(irq);
803 }
804 
805 static void xen_evtchn_close(evtchn_port_t port)
806 {
807 	struct evtchn_close close;
808 
809 	close.port = port;
810 	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
811 		BUG();
812 }
813 
814 static void event_handler_exit(struct irq_info *info)
815 {
816 	smp_store_release(&info->is_active, 0);
817 	clear_evtchn(info->evtchn);
818 }
819 
820 static void pirq_query_unmask(int irq)
821 {
822 	struct physdev_irq_status_query irq_status;
823 	struct irq_info *info = info_for_irq(irq);
824 
825 	BUG_ON(info->type != IRQT_PIRQ);
826 
827 	irq_status.irq = pirq_from_irq(irq);
828 	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
829 		irq_status.flags = 0;
830 
831 	info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
832 	if (irq_status.flags & XENIRQSTAT_needs_eoi)
833 		info->u.pirq.flags |= PIRQ_NEEDS_EOI;
834 }
835 
836 static void eoi_pirq(struct irq_data *data)
837 {
838 	struct irq_info *info = info_for_irq(data->irq);
839 	evtchn_port_t evtchn = info ? info->evtchn : 0;
840 	struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
841 	int rc = 0;
842 
843 	if (!VALID_EVTCHN(evtchn))
844 		return;
845 
846 	event_handler_exit(info);
847 
848 	if (pirq_needs_eoi(data->irq)) {
849 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
850 		WARN_ON(rc);
851 	}
852 }
853 
854 static void mask_ack_pirq(struct irq_data *data)
855 {
856 	disable_dynirq(data);
857 	eoi_pirq(data);
858 }
859 
860 static unsigned int __startup_pirq(unsigned int irq)
861 {
862 	struct evtchn_bind_pirq bind_pirq;
863 	struct irq_info *info = info_for_irq(irq);
864 	evtchn_port_t evtchn = evtchn_from_irq(irq);
865 	int rc;
866 
867 	BUG_ON(info->type != IRQT_PIRQ);
868 
869 	if (VALID_EVTCHN(evtchn))
870 		goto out;
871 
872 	bind_pirq.pirq = pirq_from_irq(irq);
873 	/* NB. We are happy to share unless we are probing. */
874 	bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
875 					BIND_PIRQ__WILL_SHARE : 0;
876 	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
877 	if (rc != 0) {
878 		pr_warn("Failed to obtain physical IRQ %d\n", irq);
879 		return 0;
880 	}
881 	evtchn = bind_pirq.port;
882 
883 	pirq_query_unmask(irq);
884 
885 	rc = set_evtchn_to_irq(evtchn, irq);
886 	if (rc)
887 		goto err;
888 
889 	info->evtchn = evtchn;
890 	bind_evtchn_to_cpu(evtchn, 0, false);
891 
892 	rc = xen_evtchn_port_setup(evtchn);
893 	if (rc)
894 		goto err;
895 
896 out:
897 	do_unmask(info, EVT_MASK_REASON_EXPLICIT);
898 
899 	eoi_pirq(irq_get_irq_data(irq));
900 
901 	return 0;
902 
903 err:
904 	pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
905 	xen_evtchn_close(evtchn);
906 	return 0;
907 }
908 
909 static unsigned int startup_pirq(struct irq_data *data)
910 {
911 	return __startup_pirq(data->irq);
912 }
913 
914 static void shutdown_pirq(struct irq_data *data)
915 {
916 	unsigned int irq = data->irq;
917 	struct irq_info *info = info_for_irq(irq);
918 	evtchn_port_t evtchn = evtchn_from_irq(irq);
919 
920 	BUG_ON(info->type != IRQT_PIRQ);
921 
922 	if (!VALID_EVTCHN(evtchn))
923 		return;
924 
925 	do_mask(info, EVT_MASK_REASON_EXPLICIT);
926 	xen_evtchn_close(evtchn);
927 	xen_irq_info_cleanup(info);
928 }
929 
930 static void enable_pirq(struct irq_data *data)
931 {
932 	enable_dynirq(data);
933 }
934 
935 static void disable_pirq(struct irq_data *data)
936 {
937 	disable_dynirq(data);
938 }
939 
940 int xen_irq_from_gsi(unsigned gsi)
941 {
942 	struct irq_info *info;
943 
944 	list_for_each_entry(info, &xen_irq_list_head, list) {
945 		if (info->type != IRQT_PIRQ)
946 			continue;
947 
948 		if (info->u.pirq.gsi == gsi)
949 			return info->irq;
950 	}
951 
952 	return -1;
953 }
954 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
955 
956 static void __unbind_from_irq(unsigned int irq)
957 {
958 	evtchn_port_t evtchn = evtchn_from_irq(irq);
959 	struct irq_info *info = info_for_irq(irq);
960 
961 	if (info->refcnt > 0) {
962 		info->refcnt--;
963 		if (info->refcnt != 0)
964 			return;
965 	}
966 
967 	if (VALID_EVTCHN(evtchn)) {
968 		unsigned int cpu = cpu_from_irq(irq);
969 		struct xenbus_device *dev;
970 
971 		xen_evtchn_close(evtchn);
972 
973 		switch (type_from_irq(irq)) {
974 		case IRQT_VIRQ:
975 			per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
976 			break;
977 		case IRQT_IPI:
978 			per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
979 			break;
980 		case IRQT_EVTCHN:
981 			dev = info->u.interdomain;
982 			if (dev)
983 				atomic_dec(&dev->event_channels);
984 			break;
985 		default:
986 			break;
987 		}
988 
989 		xen_irq_info_cleanup(info);
990 	}
991 
992 	xen_free_irq(irq);
993 }
994 
995 /*
996  * Do not make any assumptions regarding the relationship between the
997  * IRQ number returned here and the Xen pirq argument.
998  *
999  * Note: We don't assign an event channel until the irq actually started
1000  * up.  Return an existing irq if we've already got one for the gsi.
1001  *
1002  * Shareable implies level triggered, not shareable implies edge
1003  * triggered here.
1004  */
1005 int xen_bind_pirq_gsi_to_irq(unsigned gsi,
1006 			     unsigned pirq, int shareable, char *name)
1007 {
1008 	int irq = -1;
1009 	struct physdev_irq irq_op;
1010 	int ret;
1011 
1012 	mutex_lock(&irq_mapping_update_lock);
1013 
1014 	irq = xen_irq_from_gsi(gsi);
1015 	if (irq != -1) {
1016 		pr_info("%s: returning irq %d for gsi %u\n",
1017 			__func__, irq, gsi);
1018 		goto out;
1019 	}
1020 
1021 	irq = xen_allocate_irq_gsi(gsi);
1022 	if (irq < 0)
1023 		goto out;
1024 
1025 	irq_op.irq = irq;
1026 	irq_op.vector = 0;
1027 
1028 	/* Only the privileged domain can do this. For non-priv, the pcifront
1029 	 * driver provides a PCI bus that does the call to do exactly
1030 	 * this in the priv domain. */
1031 	if (xen_initial_domain() &&
1032 	    HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1033 		xen_free_irq(irq);
1034 		irq = -ENOSPC;
1035 		goto out;
1036 	}
1037 
1038 	ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
1039 			       shareable ? PIRQ_SHAREABLE : 0);
1040 	if (ret < 0) {
1041 		__unbind_from_irq(irq);
1042 		irq = ret;
1043 		goto out;
1044 	}
1045 
1046 	pirq_query_unmask(irq);
1047 	/* We try to use the handler with the appropriate semantic for the
1048 	 * type of interrupt: if the interrupt is an edge triggered
1049 	 * interrupt we use handle_edge_irq.
1050 	 *
1051 	 * On the other hand if the interrupt is level triggered we use
1052 	 * handle_fasteoi_irq like the native code does for this kind of
1053 	 * interrupts.
1054 	 *
1055 	 * Depending on the Xen version, pirq_needs_eoi might return true
1056 	 * not only for level triggered interrupts but for edge triggered
1057 	 * interrupts too. In any case Xen always honors the eoi mechanism,
1058 	 * not injecting any more pirqs of the same kind if the first one
1059 	 * hasn't received an eoi yet. Therefore using the fasteoi handler
1060 	 * is the right choice either way.
1061 	 */
1062 	if (shareable)
1063 		irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1064 				handle_fasteoi_irq, name);
1065 	else
1066 		irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1067 				handle_edge_irq, name);
1068 
1069 out:
1070 	mutex_unlock(&irq_mapping_update_lock);
1071 
1072 	return irq;
1073 }
1074 
1075 #ifdef CONFIG_PCI_MSI
1076 int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
1077 {
1078 	int rc;
1079 	struct physdev_get_free_pirq op_get_free_pirq;
1080 
1081 	op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
1082 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
1083 
1084 	WARN_ONCE(rc == -ENOSYS,
1085 		  "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
1086 
1087 	return rc ? -1 : op_get_free_pirq.pirq;
1088 }
1089 
1090 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
1091 			     int pirq, int nvec, const char *name, domid_t domid)
1092 {
1093 	int i, irq, ret;
1094 
1095 	mutex_lock(&irq_mapping_update_lock);
1096 
1097 	irq = xen_allocate_irqs_dynamic(nvec);
1098 	if (irq < 0)
1099 		goto out;
1100 
1101 	for (i = 0; i < nvec; i++) {
1102 		irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
1103 
1104 		ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
1105 					      i == 0 ? 0 : PIRQ_MSI_GROUP);
1106 		if (ret < 0)
1107 			goto error_irq;
1108 	}
1109 
1110 	ret = irq_set_msi_desc(irq, msidesc);
1111 	if (ret < 0)
1112 		goto error_irq;
1113 out:
1114 	mutex_unlock(&irq_mapping_update_lock);
1115 	return irq;
1116 error_irq:
1117 	while (nvec--)
1118 		__unbind_from_irq(irq + nvec);
1119 	mutex_unlock(&irq_mapping_update_lock);
1120 	return ret;
1121 }
1122 #endif
1123 
1124 int xen_destroy_irq(int irq)
1125 {
1126 	struct physdev_unmap_pirq unmap_irq;
1127 	struct irq_info *info = info_for_irq(irq);
1128 	int rc = -ENOENT;
1129 
1130 	mutex_lock(&irq_mapping_update_lock);
1131 
1132 	/*
1133 	 * If trying to remove a vector in a MSI group different
1134 	 * than the first one skip the PIRQ unmap unless this vector
1135 	 * is the first one in the group.
1136 	 */
1137 	if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
1138 		unmap_irq.pirq = info->u.pirq.pirq;
1139 		unmap_irq.domid = info->u.pirq.domid;
1140 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
1141 		/* If another domain quits without making the pci_disable_msix
1142 		 * call, the Xen hypervisor takes care of freeing the PIRQs
1143 		 * (free_domain_pirqs).
1144 		 */
1145 		if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
1146 			pr_info("domain %d does not have %d anymore\n",
1147 				info->u.pirq.domid, info->u.pirq.pirq);
1148 		else if (rc) {
1149 			pr_warn("unmap irq failed %d\n", rc);
1150 			goto out;
1151 		}
1152 	}
1153 
1154 	xen_free_irq(irq);
1155 
1156 out:
1157 	mutex_unlock(&irq_mapping_update_lock);
1158 	return rc;
1159 }
1160 
1161 int xen_irq_from_pirq(unsigned pirq)
1162 {
1163 	int irq;
1164 
1165 	struct irq_info *info;
1166 
1167 	mutex_lock(&irq_mapping_update_lock);
1168 
1169 	list_for_each_entry(info, &xen_irq_list_head, list) {
1170 		if (info->type != IRQT_PIRQ)
1171 			continue;
1172 		irq = info->irq;
1173 		if (info->u.pirq.pirq == pirq)
1174 			goto out;
1175 	}
1176 	irq = -1;
1177 out:
1178 	mutex_unlock(&irq_mapping_update_lock);
1179 
1180 	return irq;
1181 }
1182 
1183 
1184 int xen_pirq_from_irq(unsigned irq)
1185 {
1186 	return pirq_from_irq(irq);
1187 }
1188 EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
1189 
1190 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
1191 				   struct xenbus_device *dev)
1192 {
1193 	int irq;
1194 	int ret;
1195 
1196 	if (evtchn >= xen_evtchn_max_channels())
1197 		return -ENOMEM;
1198 
1199 	mutex_lock(&irq_mapping_update_lock);
1200 
1201 	irq = get_evtchn_to_irq(evtchn);
1202 
1203 	if (irq == -1) {
1204 		irq = xen_allocate_irq_dynamic();
1205 		if (irq < 0)
1206 			goto out;
1207 
1208 		irq_set_chip_and_handler_name(irq, chip,
1209 					      handle_edge_irq, "event");
1210 
1211 		ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
1212 		if (ret < 0) {
1213 			__unbind_from_irq(irq);
1214 			irq = ret;
1215 			goto out;
1216 		}
1217 		/*
1218 		 * New interdomain events are initially bound to vCPU0 This
1219 		 * is required to setup the event channel in the first
1220 		 * place and also important for UP guests because the
1221 		 * affinity setting is not invoked on them so nothing would
1222 		 * bind the channel.
1223 		 */
1224 		bind_evtchn_to_cpu(evtchn, 0, false);
1225 	} else {
1226 		struct irq_info *info = info_for_irq(irq);
1227 		WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
1228 	}
1229 
1230 out:
1231 	mutex_unlock(&irq_mapping_update_lock);
1232 
1233 	return irq;
1234 }
1235 
1236 int bind_evtchn_to_irq(evtchn_port_t evtchn)
1237 {
1238 	return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL);
1239 }
1240 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
1241 
1242 static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
1243 {
1244 	struct evtchn_bind_ipi bind_ipi;
1245 	evtchn_port_t evtchn;
1246 	int ret, irq;
1247 
1248 	mutex_lock(&irq_mapping_update_lock);
1249 
1250 	irq = per_cpu(ipi_to_irq, cpu)[ipi];
1251 
1252 	if (irq == -1) {
1253 		irq = xen_allocate_irq_dynamic();
1254 		if (irq < 0)
1255 			goto out;
1256 
1257 		irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1258 					      handle_percpu_irq, "ipi");
1259 
1260 		bind_ipi.vcpu = xen_vcpu_nr(cpu);
1261 		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1262 						&bind_ipi) != 0)
1263 			BUG();
1264 		evtchn = bind_ipi.port;
1265 
1266 		ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1267 		if (ret < 0) {
1268 			__unbind_from_irq(irq);
1269 			irq = ret;
1270 			goto out;
1271 		}
1272 		/*
1273 		 * Force the affinity mask to the target CPU so proc shows
1274 		 * the correct target.
1275 		 */
1276 		bind_evtchn_to_cpu(evtchn, cpu, true);
1277 	} else {
1278 		struct irq_info *info = info_for_irq(irq);
1279 		WARN_ON(info == NULL || info->type != IRQT_IPI);
1280 	}
1281 
1282  out:
1283 	mutex_unlock(&irq_mapping_update_lock);
1284 	return irq;
1285 }
1286 
1287 static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
1288 					       evtchn_port_t remote_port,
1289 					       struct irq_chip *chip)
1290 {
1291 	struct evtchn_bind_interdomain bind_interdomain;
1292 	int err;
1293 
1294 	bind_interdomain.remote_dom  = dev->otherend_id;
1295 	bind_interdomain.remote_port = remote_port;
1296 
1297 	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
1298 					  &bind_interdomain);
1299 
1300 	return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
1301 					       chip, dev);
1302 }
1303 
1304 int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
1305 					   evtchn_port_t remote_port)
1306 {
1307 	return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
1308 						   &xen_lateeoi_chip);
1309 }
1310 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
1311 
1312 static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
1313 {
1314 	struct evtchn_status status;
1315 	evtchn_port_t port;
1316 	int rc = -ENOENT;
1317 
1318 	memset(&status, 0, sizeof(status));
1319 	for (port = 0; port < xen_evtchn_max_channels(); port++) {
1320 		status.dom = DOMID_SELF;
1321 		status.port = port;
1322 		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
1323 		if (rc < 0)
1324 			continue;
1325 		if (status.status != EVTCHNSTAT_virq)
1326 			continue;
1327 		if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
1328 			*evtchn = port;
1329 			break;
1330 		}
1331 	}
1332 	return rc;
1333 }
1334 
1335 /**
1336  * xen_evtchn_nr_channels - number of usable event channel ports
1337  *
1338  * This may be less than the maximum supported by the current
1339  * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
1340  * supported.
1341  */
1342 unsigned xen_evtchn_nr_channels(void)
1343 {
1344         return evtchn_ops->nr_channels();
1345 }
1346 EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
1347 
1348 int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
1349 {
1350 	struct evtchn_bind_virq bind_virq;
1351 	evtchn_port_t evtchn = 0;
1352 	int irq, ret;
1353 
1354 	mutex_lock(&irq_mapping_update_lock);
1355 
1356 	irq = per_cpu(virq_to_irq, cpu)[virq];
1357 
1358 	if (irq == -1) {
1359 		irq = xen_allocate_irq_dynamic();
1360 		if (irq < 0)
1361 			goto out;
1362 
1363 		if (percpu)
1364 			irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1365 						      handle_percpu_irq, "virq");
1366 		else
1367 			irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
1368 						      handle_edge_irq, "virq");
1369 
1370 		bind_virq.virq = virq;
1371 		bind_virq.vcpu = xen_vcpu_nr(cpu);
1372 		ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1373 						&bind_virq);
1374 		if (ret == 0)
1375 			evtchn = bind_virq.port;
1376 		else {
1377 			if (ret == -EEXIST)
1378 				ret = find_virq(virq, cpu, &evtchn);
1379 			BUG_ON(ret < 0);
1380 		}
1381 
1382 		ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1383 		if (ret < 0) {
1384 			__unbind_from_irq(irq);
1385 			irq = ret;
1386 			goto out;
1387 		}
1388 
1389 		/*
1390 		 * Force the affinity mask for percpu interrupts so proc
1391 		 * shows the correct target.
1392 		 */
1393 		bind_evtchn_to_cpu(evtchn, cpu, percpu);
1394 	} else {
1395 		struct irq_info *info = info_for_irq(irq);
1396 		WARN_ON(info == NULL || info->type != IRQT_VIRQ);
1397 	}
1398 
1399 out:
1400 	mutex_unlock(&irq_mapping_update_lock);
1401 
1402 	return irq;
1403 }
1404 
1405 static void unbind_from_irq(unsigned int irq)
1406 {
1407 	mutex_lock(&irq_mapping_update_lock);
1408 	__unbind_from_irq(irq);
1409 	mutex_unlock(&irq_mapping_update_lock);
1410 }
1411 
1412 static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
1413 					  irq_handler_t handler,
1414 					  unsigned long irqflags,
1415 					  const char *devname, void *dev_id,
1416 					  struct irq_chip *chip)
1417 {
1418 	int irq, retval;
1419 
1420 	irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
1421 	if (irq < 0)
1422 		return irq;
1423 	retval = request_irq(irq, handler, irqflags, devname, dev_id);
1424 	if (retval != 0) {
1425 		unbind_from_irq(irq);
1426 		return retval;
1427 	}
1428 
1429 	return irq;
1430 }
1431 
1432 int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
1433 			      irq_handler_t handler,
1434 			      unsigned long irqflags,
1435 			      const char *devname, void *dev_id)
1436 {
1437 	return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1438 					      devname, dev_id,
1439 					      &xen_dynamic_chip);
1440 }
1441 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
1442 
1443 int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
1444 				      irq_handler_t handler,
1445 				      unsigned long irqflags,
1446 				      const char *devname, void *dev_id)
1447 {
1448 	return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1449 					      devname, dev_id,
1450 					      &xen_lateeoi_chip);
1451 }
1452 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
1453 
1454 static int bind_interdomain_evtchn_to_irqhandler_chip(
1455 		struct xenbus_device *dev, evtchn_port_t remote_port,
1456 		irq_handler_t handler, unsigned long irqflags,
1457 		const char *devname, void *dev_id, struct irq_chip *chip)
1458 {
1459 	int irq, retval;
1460 
1461 	irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
1462 	if (irq < 0)
1463 		return irq;
1464 
1465 	retval = request_irq(irq, handler, irqflags, devname, dev_id);
1466 	if (retval != 0) {
1467 		unbind_from_irq(irq);
1468 		return retval;
1469 	}
1470 
1471 	return irq;
1472 }
1473 
1474 int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
1475 						  evtchn_port_t remote_port,
1476 						  irq_handler_t handler,
1477 						  unsigned long irqflags,
1478 						  const char *devname,
1479 						  void *dev_id)
1480 {
1481 	return bind_interdomain_evtchn_to_irqhandler_chip(dev,
1482 				remote_port, handler, irqflags, devname,
1483 				dev_id, &xen_lateeoi_chip);
1484 }
1485 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
1486 
1487 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1488 			    irq_handler_t handler,
1489 			    unsigned long irqflags, const char *devname, void *dev_id)
1490 {
1491 	int irq, retval;
1492 
1493 	irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
1494 	if (irq < 0)
1495 		return irq;
1496 	retval = request_irq(irq, handler, irqflags, devname, dev_id);
1497 	if (retval != 0) {
1498 		unbind_from_irq(irq);
1499 		return retval;
1500 	}
1501 
1502 	return irq;
1503 }
1504 EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1505 
1506 int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1507 			   unsigned int cpu,
1508 			   irq_handler_t handler,
1509 			   unsigned long irqflags,
1510 			   const char *devname,
1511 			   void *dev_id)
1512 {
1513 	int irq, retval;
1514 
1515 	irq = bind_ipi_to_irq(ipi, cpu);
1516 	if (irq < 0)
1517 		return irq;
1518 
1519 	irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
1520 	retval = request_irq(irq, handler, irqflags, devname, dev_id);
1521 	if (retval != 0) {
1522 		unbind_from_irq(irq);
1523 		return retval;
1524 	}
1525 
1526 	return irq;
1527 }
1528 
1529 void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1530 {
1531 	struct irq_info *info = info_for_irq(irq);
1532 
1533 	if (WARN_ON(!info))
1534 		return;
1535 	free_irq(irq, dev_id);
1536 	unbind_from_irq(irq);
1537 }
1538 EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1539 
1540 /**
1541  * xen_set_irq_priority() - set an event channel priority.
1542  * @irq:irq bound to an event channel.
1543  * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1544  */
1545 int xen_set_irq_priority(unsigned irq, unsigned priority)
1546 {
1547 	struct evtchn_set_priority set_priority;
1548 
1549 	set_priority.port = evtchn_from_irq(irq);
1550 	set_priority.priority = priority;
1551 
1552 	return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1553 					   &set_priority);
1554 }
1555 EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1556 
1557 int evtchn_make_refcounted(evtchn_port_t evtchn)
1558 {
1559 	int irq = get_evtchn_to_irq(evtchn);
1560 	struct irq_info *info;
1561 
1562 	if (irq == -1)
1563 		return -ENOENT;
1564 
1565 	info = info_for_irq(irq);
1566 
1567 	if (!info)
1568 		return -ENOENT;
1569 
1570 	WARN_ON(info->refcnt != -1);
1571 
1572 	info->refcnt = 1;
1573 
1574 	return 0;
1575 }
1576 EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1577 
1578 int evtchn_get(evtchn_port_t evtchn)
1579 {
1580 	int irq;
1581 	struct irq_info *info;
1582 	int err = -ENOENT;
1583 
1584 	if (evtchn >= xen_evtchn_max_channels())
1585 		return -EINVAL;
1586 
1587 	mutex_lock(&irq_mapping_update_lock);
1588 
1589 	irq = get_evtchn_to_irq(evtchn);
1590 	if (irq == -1)
1591 		goto done;
1592 
1593 	info = info_for_irq(irq);
1594 
1595 	if (!info)
1596 		goto done;
1597 
1598 	err = -EINVAL;
1599 	if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
1600 		goto done;
1601 
1602 	info->refcnt++;
1603 	err = 0;
1604  done:
1605 	mutex_unlock(&irq_mapping_update_lock);
1606 
1607 	return err;
1608 }
1609 EXPORT_SYMBOL_GPL(evtchn_get);
1610 
1611 void evtchn_put(evtchn_port_t evtchn)
1612 {
1613 	int irq = get_evtchn_to_irq(evtchn);
1614 	if (WARN_ON(irq == -1))
1615 		return;
1616 	unbind_from_irq(irq);
1617 }
1618 EXPORT_SYMBOL_GPL(evtchn_put);
1619 
1620 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1621 {
1622 	int irq;
1623 
1624 #ifdef CONFIG_X86
1625 	if (unlikely(vector == XEN_NMI_VECTOR)) {
1626 		int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
1627 					     NULL);
1628 		if (rc < 0)
1629 			printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1630 		return;
1631 	}
1632 #endif
1633 	irq = per_cpu(ipi_to_irq, cpu)[vector];
1634 	BUG_ON(irq < 0);
1635 	notify_remote_via_irq(irq);
1636 }
1637 
1638 struct evtchn_loop_ctrl {
1639 	ktime_t timeout;
1640 	unsigned count;
1641 	bool defer_eoi;
1642 };
1643 
1644 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
1645 {
1646 	int irq;
1647 	struct irq_info *info;
1648 	struct xenbus_device *dev;
1649 
1650 	irq = get_evtchn_to_irq(port);
1651 	if (irq == -1)
1652 		return;
1653 
1654 	/*
1655 	 * Check for timeout every 256 events.
1656 	 * We are setting the timeout value only after the first 256
1657 	 * events in order to not hurt the common case of few loop
1658 	 * iterations. The 256 is basically an arbitrary value.
1659 	 *
1660 	 * In case we are hitting the timeout we need to defer all further
1661 	 * EOIs in order to ensure to leave the event handling loop rather
1662 	 * sooner than later.
1663 	 */
1664 	if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
1665 		ktime_t kt = ktime_get();
1666 
1667 		if (!ctrl->timeout) {
1668 			kt = ktime_add_ms(kt,
1669 					  jiffies_to_msecs(event_loop_timeout));
1670 			ctrl->timeout = kt;
1671 		} else if (kt > ctrl->timeout) {
1672 			ctrl->defer_eoi = true;
1673 		}
1674 	}
1675 
1676 	info = info_for_irq(irq);
1677 	if (xchg_acquire(&info->is_active, 1))
1678 		return;
1679 
1680 	dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
1681 	if (dev)
1682 		atomic_inc(&dev->events);
1683 
1684 	if (ctrl->defer_eoi) {
1685 		info->eoi_cpu = smp_processor_id();
1686 		info->irq_epoch = __this_cpu_read(irq_epoch);
1687 		info->eoi_time = get_jiffies_64() + event_eoi_delay;
1688 	}
1689 
1690 	generic_handle_irq(irq);
1691 }
1692 
1693 static void __xen_evtchn_do_upcall(void)
1694 {
1695 	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1696 	int cpu = smp_processor_id();
1697 	struct evtchn_loop_ctrl ctrl = { 0 };
1698 
1699 	read_lock(&evtchn_rwlock);
1700 
1701 	do {
1702 		vcpu_info->evtchn_upcall_pending = 0;
1703 
1704 		xen_evtchn_handle_events(cpu, &ctrl);
1705 
1706 		BUG_ON(!irqs_disabled());
1707 
1708 		virt_rmb(); /* Hypervisor can set upcall pending. */
1709 
1710 	} while (vcpu_info->evtchn_upcall_pending);
1711 
1712 	read_unlock(&evtchn_rwlock);
1713 
1714 	/*
1715 	 * Increment irq_epoch only now to defer EOIs only for
1716 	 * xen_irq_lateeoi() invocations occurring from inside the loop
1717 	 * above.
1718 	 */
1719 	__this_cpu_inc(irq_epoch);
1720 }
1721 
1722 void xen_evtchn_do_upcall(struct pt_regs *regs)
1723 {
1724 	struct pt_regs *old_regs = set_irq_regs(regs);
1725 
1726 	irq_enter();
1727 
1728 	__xen_evtchn_do_upcall();
1729 
1730 	irq_exit();
1731 	set_irq_regs(old_regs);
1732 }
1733 
1734 void xen_hvm_evtchn_do_upcall(void)
1735 {
1736 	__xen_evtchn_do_upcall();
1737 }
1738 EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
1739 
1740 /* Rebind a new event channel to an existing irq. */
1741 void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
1742 {
1743 	struct irq_info *info = info_for_irq(irq);
1744 
1745 	if (WARN_ON(!info))
1746 		return;
1747 
1748 	/* Make sure the irq is masked, since the new event channel
1749 	   will also be masked. */
1750 	disable_irq(irq);
1751 
1752 	mutex_lock(&irq_mapping_update_lock);
1753 
1754 	/* After resume the irq<->evtchn mappings are all cleared out */
1755 	BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1756 	/* Expect irq to have been bound before,
1757 	   so there should be a proper type */
1758 	BUG_ON(info->type == IRQT_UNBOUND);
1759 
1760 	(void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
1761 
1762 	mutex_unlock(&irq_mapping_update_lock);
1763 
1764 	bind_evtchn_to_cpu(evtchn, info->cpu, false);
1765 
1766 	/* Unmask the event channel. */
1767 	enable_irq(irq);
1768 }
1769 
1770 /* Rebind an evtchn so that it gets delivered to a specific cpu */
1771 static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
1772 {
1773 	struct evtchn_bind_vcpu bind_vcpu;
1774 	evtchn_port_t evtchn = info ? info->evtchn : 0;
1775 
1776 	if (!VALID_EVTCHN(evtchn))
1777 		return -1;
1778 
1779 	if (!xen_support_evtchn_rebind())
1780 		return -1;
1781 
1782 	/* Send future instances of this interrupt to other vcpu. */
1783 	bind_vcpu.port = evtchn;
1784 	bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
1785 
1786 	/*
1787 	 * Mask the event while changing the VCPU binding to prevent
1788 	 * it being delivered on an unexpected VCPU.
1789 	 */
1790 	do_mask(info, EVT_MASK_REASON_TEMPORARY);
1791 
1792 	/*
1793 	 * If this fails, it usually just indicates that we're dealing with a
1794 	 * virq or IPI channel, which don't actually need to be rebound. Ignore
1795 	 * it, but don't do the xenlinux-level rebind in that case.
1796 	 */
1797 	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1798 		bind_evtchn_to_cpu(evtchn, tcpu, false);
1799 
1800 	do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1801 
1802 	return 0;
1803 }
1804 
1805 /*
1806  * Find the CPU within @dest mask which has the least number of channels
1807  * assigned. This is not precise as the per cpu counts can be modified
1808  * concurrently.
1809  */
1810 static unsigned int select_target_cpu(const struct cpumask *dest)
1811 {
1812 	unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
1813 
1814 	for_each_cpu_and(cpu, dest, cpu_online_mask) {
1815 		unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
1816 
1817 		if (curch < minch) {
1818 			minch = curch;
1819 			best_cpu = cpu;
1820 		}
1821 	}
1822 
1823 	/*
1824 	 * Catch the unlikely case that dest contains no online CPUs. Can't
1825 	 * recurse.
1826 	 */
1827 	if (best_cpu == UINT_MAX)
1828 		return select_target_cpu(cpu_online_mask);
1829 
1830 	return best_cpu;
1831 }
1832 
1833 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1834 			    bool force)
1835 {
1836 	unsigned int tcpu = select_target_cpu(dest);
1837 	int ret;
1838 
1839 	ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
1840 	if (!ret)
1841 		irq_data_update_effective_affinity(data, cpumask_of(tcpu));
1842 
1843 	return ret;
1844 }
1845 
1846 static void enable_dynirq(struct irq_data *data)
1847 {
1848 	struct irq_info *info = info_for_irq(data->irq);
1849 	evtchn_port_t evtchn = info ? info->evtchn : 0;
1850 
1851 	if (VALID_EVTCHN(evtchn))
1852 		do_unmask(info, EVT_MASK_REASON_EXPLICIT);
1853 }
1854 
1855 static void disable_dynirq(struct irq_data *data)
1856 {
1857 	struct irq_info *info = info_for_irq(data->irq);
1858 	evtchn_port_t evtchn = info ? info->evtchn : 0;
1859 
1860 	if (VALID_EVTCHN(evtchn))
1861 		do_mask(info, EVT_MASK_REASON_EXPLICIT);
1862 }
1863 
1864 static void ack_dynirq(struct irq_data *data)
1865 {
1866 	struct irq_info *info = info_for_irq(data->irq);
1867 	evtchn_port_t evtchn = info ? info->evtchn : 0;
1868 
1869 	if (VALID_EVTCHN(evtchn))
1870 		event_handler_exit(info);
1871 }
1872 
1873 static void mask_ack_dynirq(struct irq_data *data)
1874 {
1875 	disable_dynirq(data);
1876 	ack_dynirq(data);
1877 }
1878 
1879 static void lateeoi_ack_dynirq(struct irq_data *data)
1880 {
1881 	struct irq_info *info = info_for_irq(data->irq);
1882 	evtchn_port_t evtchn = info ? info->evtchn : 0;
1883 
1884 	if (VALID_EVTCHN(evtchn)) {
1885 		do_mask(info, EVT_MASK_REASON_EOI_PENDING);
1886 		event_handler_exit(info);
1887 	}
1888 }
1889 
1890 static void lateeoi_mask_ack_dynirq(struct irq_data *data)
1891 {
1892 	struct irq_info *info = info_for_irq(data->irq);
1893 	evtchn_port_t evtchn = info ? info->evtchn : 0;
1894 
1895 	if (VALID_EVTCHN(evtchn)) {
1896 		do_mask(info, EVT_MASK_REASON_EXPLICIT);
1897 		event_handler_exit(info);
1898 	}
1899 }
1900 
1901 static int retrigger_dynirq(struct irq_data *data)
1902 {
1903 	struct irq_info *info = info_for_irq(data->irq);
1904 	evtchn_port_t evtchn = info ? info->evtchn : 0;
1905 
1906 	if (!VALID_EVTCHN(evtchn))
1907 		return 0;
1908 
1909 	do_mask(info, EVT_MASK_REASON_TEMPORARY);
1910 	set_evtchn(evtchn);
1911 	do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1912 
1913 	return 1;
1914 }
1915 
1916 static void restore_pirqs(void)
1917 {
1918 	int pirq, rc, irq, gsi;
1919 	struct physdev_map_pirq map_irq;
1920 	struct irq_info *info;
1921 
1922 	list_for_each_entry(info, &xen_irq_list_head, list) {
1923 		if (info->type != IRQT_PIRQ)
1924 			continue;
1925 
1926 		pirq = info->u.pirq.pirq;
1927 		gsi = info->u.pirq.gsi;
1928 		irq = info->irq;
1929 
1930 		/* save/restore of PT devices doesn't work, so at this point the
1931 		 * only devices present are GSI based emulated devices */
1932 		if (!gsi)
1933 			continue;
1934 
1935 		map_irq.domid = DOMID_SELF;
1936 		map_irq.type = MAP_PIRQ_TYPE_GSI;
1937 		map_irq.index = gsi;
1938 		map_irq.pirq = pirq;
1939 
1940 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1941 		if (rc) {
1942 			pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1943 				gsi, irq, pirq, rc);
1944 			xen_free_irq(irq);
1945 			continue;
1946 		}
1947 
1948 		printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1949 
1950 		__startup_pirq(irq);
1951 	}
1952 }
1953 
1954 static void restore_cpu_virqs(unsigned int cpu)
1955 {
1956 	struct evtchn_bind_virq bind_virq;
1957 	evtchn_port_t evtchn;
1958 	int virq, irq;
1959 
1960 	for (virq = 0; virq < NR_VIRQS; virq++) {
1961 		if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1962 			continue;
1963 
1964 		BUG_ON(virq_from_irq(irq) != virq);
1965 
1966 		/* Get a new binding from Xen. */
1967 		bind_virq.virq = virq;
1968 		bind_virq.vcpu = xen_vcpu_nr(cpu);
1969 		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1970 						&bind_virq) != 0)
1971 			BUG();
1972 		evtchn = bind_virq.port;
1973 
1974 		/* Record the new mapping. */
1975 		(void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1976 		/* The affinity mask is still valid */
1977 		bind_evtchn_to_cpu(evtchn, cpu, false);
1978 	}
1979 }
1980 
1981 static void restore_cpu_ipis(unsigned int cpu)
1982 {
1983 	struct evtchn_bind_ipi bind_ipi;
1984 	evtchn_port_t evtchn;
1985 	int ipi, irq;
1986 
1987 	for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
1988 		if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
1989 			continue;
1990 
1991 		BUG_ON(ipi_from_irq(irq) != ipi);
1992 
1993 		/* Get a new binding from Xen. */
1994 		bind_ipi.vcpu = xen_vcpu_nr(cpu);
1995 		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1996 						&bind_ipi) != 0)
1997 			BUG();
1998 		evtchn = bind_ipi.port;
1999 
2000 		/* Record the new mapping. */
2001 		(void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
2002 		/* The affinity mask is still valid */
2003 		bind_evtchn_to_cpu(evtchn, cpu, false);
2004 	}
2005 }
2006 
2007 /* Clear an irq's pending state, in preparation for polling on it */
2008 void xen_clear_irq_pending(int irq)
2009 {
2010 	struct irq_info *info = info_for_irq(irq);
2011 	evtchn_port_t evtchn = info ? info->evtchn : 0;
2012 
2013 	if (VALID_EVTCHN(evtchn))
2014 		event_handler_exit(info);
2015 }
2016 EXPORT_SYMBOL(xen_clear_irq_pending);
2017 void xen_set_irq_pending(int irq)
2018 {
2019 	evtchn_port_t evtchn = evtchn_from_irq(irq);
2020 
2021 	if (VALID_EVTCHN(evtchn))
2022 		set_evtchn(evtchn);
2023 }
2024 
2025 bool xen_test_irq_pending(int irq)
2026 {
2027 	evtchn_port_t evtchn = evtchn_from_irq(irq);
2028 	bool ret = false;
2029 
2030 	if (VALID_EVTCHN(evtchn))
2031 		ret = test_evtchn(evtchn);
2032 
2033 	return ret;
2034 }
2035 
2036 /* Poll waiting for an irq to become pending with timeout.  In the usual case,
2037  * the irq will be disabled so it won't deliver an interrupt. */
2038 void xen_poll_irq_timeout(int irq, u64 timeout)
2039 {
2040 	evtchn_port_t evtchn = evtchn_from_irq(irq);
2041 
2042 	if (VALID_EVTCHN(evtchn)) {
2043 		struct sched_poll poll;
2044 
2045 		poll.nr_ports = 1;
2046 		poll.timeout = timeout;
2047 		set_xen_guest_handle(poll.ports, &evtchn);
2048 
2049 		if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
2050 			BUG();
2051 	}
2052 }
2053 EXPORT_SYMBOL(xen_poll_irq_timeout);
2054 /* Poll waiting for an irq to become pending.  In the usual case, the
2055  * irq will be disabled so it won't deliver an interrupt. */
2056 void xen_poll_irq(int irq)
2057 {
2058 	xen_poll_irq_timeout(irq, 0 /* no timeout */);
2059 }
2060 
2061 /* Check whether the IRQ line is shared with other guests. */
2062 int xen_test_irq_shared(int irq)
2063 {
2064 	struct irq_info *info = info_for_irq(irq);
2065 	struct physdev_irq_status_query irq_status;
2066 
2067 	if (WARN_ON(!info))
2068 		return -ENOENT;
2069 
2070 	irq_status.irq = info->u.pirq.pirq;
2071 
2072 	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
2073 		return 0;
2074 	return !(irq_status.flags & XENIRQSTAT_shared);
2075 }
2076 EXPORT_SYMBOL_GPL(xen_test_irq_shared);
2077 
2078 void xen_irq_resume(void)
2079 {
2080 	unsigned int cpu;
2081 	struct irq_info *info;
2082 
2083 	/* New event-channel space is not 'live' yet. */
2084 	xen_evtchn_resume();
2085 
2086 	/* No IRQ <-> event-channel mappings. */
2087 	list_for_each_entry(info, &xen_irq_list_head, list) {
2088 		/* Zap event-channel binding */
2089 		info->evtchn = 0;
2090 		/* Adjust accounting */
2091 		channels_on_cpu_dec(info);
2092 	}
2093 
2094 	clear_evtchn_to_irq_all();
2095 
2096 	for_each_possible_cpu(cpu) {
2097 		restore_cpu_virqs(cpu);
2098 		restore_cpu_ipis(cpu);
2099 	}
2100 
2101 	restore_pirqs();
2102 }
2103 
2104 static struct irq_chip xen_dynamic_chip __read_mostly = {
2105 	.name			= "xen-dyn",
2106 
2107 	.irq_disable		= disable_dynirq,
2108 	.irq_mask		= disable_dynirq,
2109 	.irq_unmask		= enable_dynirq,
2110 
2111 	.irq_ack		= ack_dynirq,
2112 	.irq_mask_ack		= mask_ack_dynirq,
2113 
2114 	.irq_set_affinity	= set_affinity_irq,
2115 	.irq_retrigger		= retrigger_dynirq,
2116 };
2117 
2118 static struct irq_chip xen_lateeoi_chip __read_mostly = {
2119 	/* The chip name needs to contain "xen-dyn" for irqbalance to work. */
2120 	.name			= "xen-dyn-lateeoi",
2121 
2122 	.irq_disable		= disable_dynirq,
2123 	.irq_mask		= disable_dynirq,
2124 	.irq_unmask		= enable_dynirq,
2125 
2126 	.irq_ack		= lateeoi_ack_dynirq,
2127 	.irq_mask_ack		= lateeoi_mask_ack_dynirq,
2128 
2129 	.irq_set_affinity	= set_affinity_irq,
2130 	.irq_retrigger		= retrigger_dynirq,
2131 };
2132 
2133 static struct irq_chip xen_pirq_chip __read_mostly = {
2134 	.name			= "xen-pirq",
2135 
2136 	.irq_startup		= startup_pirq,
2137 	.irq_shutdown		= shutdown_pirq,
2138 	.irq_enable		= enable_pirq,
2139 	.irq_disable		= disable_pirq,
2140 
2141 	.irq_mask		= disable_dynirq,
2142 	.irq_unmask		= enable_dynirq,
2143 
2144 	.irq_ack		= eoi_pirq,
2145 	.irq_eoi		= eoi_pirq,
2146 	.irq_mask_ack		= mask_ack_pirq,
2147 
2148 	.irq_set_affinity	= set_affinity_irq,
2149 
2150 	.irq_retrigger		= retrigger_dynirq,
2151 };
2152 
2153 static struct irq_chip xen_percpu_chip __read_mostly = {
2154 	.name			= "xen-percpu",
2155 
2156 	.irq_disable		= disable_dynirq,
2157 	.irq_mask		= disable_dynirq,
2158 	.irq_unmask		= enable_dynirq,
2159 
2160 	.irq_ack		= ack_dynirq,
2161 };
2162 
2163 #ifdef CONFIG_XEN_PVHVM
2164 /* Vector callbacks are better than PCI interrupts to receive event
2165  * channel notifications because we can receive vector callbacks on any
2166  * vcpu and we don't need PCI support or APIC interactions. */
2167 void xen_setup_callback_vector(void)
2168 {
2169 	uint64_t callback_via;
2170 
2171 	if (xen_have_vector_callback) {
2172 		callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
2173 		if (xen_set_callback_via(callback_via)) {
2174 			pr_err("Request for Xen HVM callback vector failed\n");
2175 			xen_have_vector_callback = 0;
2176 		}
2177 	}
2178 }
2179 
2180 static __init void xen_alloc_callback_vector(void)
2181 {
2182 	if (!xen_have_vector_callback)
2183 		return;
2184 
2185 	pr_info("Xen HVM callback vector for event delivery is enabled\n");
2186 	alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
2187 }
2188 #else
2189 void xen_setup_callback_vector(void) {}
2190 static inline void xen_alloc_callback_vector(void) {}
2191 #endif
2192 
2193 bool xen_fifo_events = true;
2194 module_param_named(fifo_events, xen_fifo_events, bool, 0);
2195 
2196 static int xen_evtchn_cpu_prepare(unsigned int cpu)
2197 {
2198 	int ret = 0;
2199 
2200 	xen_cpu_init_eoi(cpu);
2201 
2202 	if (evtchn_ops->percpu_init)
2203 		ret = evtchn_ops->percpu_init(cpu);
2204 
2205 	return ret;
2206 }
2207 
2208 static int xen_evtchn_cpu_dead(unsigned int cpu)
2209 {
2210 	int ret = 0;
2211 
2212 	if (evtchn_ops->percpu_deinit)
2213 		ret = evtchn_ops->percpu_deinit(cpu);
2214 
2215 	return ret;
2216 }
2217 
2218 void __init xen_init_IRQ(void)
2219 {
2220 	int ret = -EINVAL;
2221 	evtchn_port_t evtchn;
2222 
2223 	if (xen_fifo_events)
2224 		ret = xen_evtchn_fifo_init();
2225 	if (ret < 0) {
2226 		xen_evtchn_2l_init();
2227 		xen_fifo_events = false;
2228 	}
2229 
2230 	xen_cpu_init_eoi(smp_processor_id());
2231 
2232 	cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
2233 				  "xen/evtchn:prepare",
2234 				  xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
2235 
2236 	evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
2237 				sizeof(*evtchn_to_irq), GFP_KERNEL);
2238 	BUG_ON(!evtchn_to_irq);
2239 
2240 	/* No event channels are 'live' right now. */
2241 	for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
2242 		mask_evtchn(evtchn);
2243 
2244 	pirq_needs_eoi = pirq_needs_eoi_flag;
2245 
2246 #ifdef CONFIG_X86
2247 	if (xen_pv_domain()) {
2248 		if (xen_initial_domain())
2249 			pci_xen_initial_domain();
2250 	}
2251 	if (xen_feature(XENFEAT_hvm_callback_vector)) {
2252 		xen_setup_callback_vector();
2253 		xen_alloc_callback_vector();
2254 	}
2255 
2256 	if (xen_hvm_domain()) {
2257 		native_init_IRQ();
2258 		/* pci_xen_hvm_init must be called after native_init_IRQ so that
2259 		 * __acpi_register_gsi can point at the right function */
2260 		pci_xen_hvm_init();
2261 	} else {
2262 		int rc;
2263 		struct physdev_pirq_eoi_gmfn eoi_gmfn;
2264 
2265 		pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
2266 		eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
2267 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
2268 		if (rc != 0) {
2269 			free_page((unsigned long) pirq_eoi_map);
2270 			pirq_eoi_map = NULL;
2271 		} else
2272 			pirq_needs_eoi = pirq_check_eoi_map;
2273 	}
2274 #endif
2275 }
2276