1 /*
2  * VFIO PCI interrupt handling
3  *
4  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
5  *     Author: Alex Williamson <alex.williamson@redhat.com>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * Derived from original vfio:
12  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
13  * Author: Tom Lyon, pugs@cisco.com
14  */
15 
16 #include <linux/device.h>
17 #include <linux/interrupt.h>
18 #include <linux/eventfd.h>
19 #include <linux/msi.h>
20 #include <linux/pci.h>
21 #include <linux/file.h>
22 #include <linux/poll.h>
23 #include <linux/vfio.h>
24 #include <linux/wait.h>
25 #include <linux/workqueue.h>
26 #include <linux/slab.h>
27 
28 #include "vfio_pci_private.h"
29 
30 /*
31  * IRQfd - generic
32  */
33 struct virqfd {
34 	struct vfio_pci_device	*vdev;
35 	struct eventfd_ctx	*eventfd;
36 	int			(*handler)(struct vfio_pci_device *, void *);
37 	void			(*thread)(struct vfio_pci_device *, void *);
38 	void			*data;
39 	struct work_struct	inject;
40 	wait_queue_t		wait;
41 	poll_table		pt;
42 	struct work_struct	shutdown;
43 	struct virqfd		**pvirqfd;
44 };
45 
46 static struct workqueue_struct *vfio_irqfd_cleanup_wq;
47 
48 int __init vfio_pci_virqfd_init(void)
49 {
50 	vfio_irqfd_cleanup_wq =
51 		create_singlethread_workqueue("vfio-irqfd-cleanup");
52 	if (!vfio_irqfd_cleanup_wq)
53 		return -ENOMEM;
54 
55 	return 0;
56 }
57 
58 void vfio_pci_virqfd_exit(void)
59 {
60 	destroy_workqueue(vfio_irqfd_cleanup_wq);
61 }
62 
63 static void virqfd_deactivate(struct virqfd *virqfd)
64 {
65 	queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
66 }
67 
68 static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
69 {
70 	struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
71 	unsigned long flags = (unsigned long)key;
72 
73 	if (flags & POLLIN) {
74 		/* An event has been signaled, call function */
75 		if ((!virqfd->handler ||
76 		     virqfd->handler(virqfd->vdev, virqfd->data)) &&
77 		    virqfd->thread)
78 			schedule_work(&virqfd->inject);
79 	}
80 
81 	if (flags & POLLHUP) {
82 		unsigned long flags;
83 		spin_lock_irqsave(&virqfd->vdev->irqlock, flags);
84 
85 		/*
86 		 * The eventfd is closing, if the virqfd has not yet been
87 		 * queued for release, as determined by testing whether the
88 		 * vdev pointer to it is still valid, queue it now.  As
89 		 * with kvm irqfds, we know we won't race against the virqfd
90 		 * going away because we hold wqh->lock to get here.
91 		 */
92 		if (*(virqfd->pvirqfd) == virqfd) {
93 			*(virqfd->pvirqfd) = NULL;
94 			virqfd_deactivate(virqfd);
95 		}
96 
97 		spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags);
98 	}
99 
100 	return 0;
101 }
102 
103 static void virqfd_ptable_queue_proc(struct file *file,
104 				     wait_queue_head_t *wqh, poll_table *pt)
105 {
106 	struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
107 	add_wait_queue(wqh, &virqfd->wait);
108 }
109 
110 static void virqfd_shutdown(struct work_struct *work)
111 {
112 	struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
113 	u64 cnt;
114 
115 	eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
116 	flush_work(&virqfd->inject);
117 	eventfd_ctx_put(virqfd->eventfd);
118 
119 	kfree(virqfd);
120 }
121 
122 static void virqfd_inject(struct work_struct *work)
123 {
124 	struct virqfd *virqfd = container_of(work, struct virqfd, inject);
125 	if (virqfd->thread)
126 		virqfd->thread(virqfd->vdev, virqfd->data);
127 }
128 
129 static int virqfd_enable(struct vfio_pci_device *vdev,
130 			 int (*handler)(struct vfio_pci_device *, void *),
131 			 void (*thread)(struct vfio_pci_device *, void *),
132 			 void *data, struct virqfd **pvirqfd, int fd)
133 {
134 	struct fd irqfd;
135 	struct eventfd_ctx *ctx;
136 	struct virqfd *virqfd;
137 	int ret = 0;
138 	unsigned int events;
139 
140 	virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL);
141 	if (!virqfd)
142 		return -ENOMEM;
143 
144 	virqfd->pvirqfd = pvirqfd;
145 	virqfd->vdev = vdev;
146 	virqfd->handler = handler;
147 	virqfd->thread = thread;
148 	virqfd->data = data;
149 
150 	INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
151 	INIT_WORK(&virqfd->inject, virqfd_inject);
152 
153 	irqfd = fdget(fd);
154 	if (!irqfd.file) {
155 		ret = -EBADF;
156 		goto err_fd;
157 	}
158 
159 	ctx = eventfd_ctx_fileget(irqfd.file);
160 	if (IS_ERR(ctx)) {
161 		ret = PTR_ERR(ctx);
162 		goto err_ctx;
163 	}
164 
165 	virqfd->eventfd = ctx;
166 
167 	/*
168 	 * virqfds can be released by closing the eventfd or directly
169 	 * through ioctl.  These are both done through a workqueue, so
170 	 * we update the pointer to the virqfd under lock to avoid
171 	 * pushing multiple jobs to release the same virqfd.
172 	 */
173 	spin_lock_irq(&vdev->irqlock);
174 
175 	if (*pvirqfd) {
176 		spin_unlock_irq(&vdev->irqlock);
177 		ret = -EBUSY;
178 		goto err_busy;
179 	}
180 	*pvirqfd = virqfd;
181 
182 	spin_unlock_irq(&vdev->irqlock);
183 
184 	/*
185 	 * Install our own custom wake-up handling so we are notified via
186 	 * a callback whenever someone signals the underlying eventfd.
187 	 */
188 	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
189 	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
190 
191 	events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
192 
193 	/*
194 	 * Check if there was an event already pending on the eventfd
195 	 * before we registered and trigger it as if we didn't miss it.
196 	 */
197 	if (events & POLLIN) {
198 		if ((!handler || handler(vdev, data)) && thread)
199 			schedule_work(&virqfd->inject);
200 	}
201 
202 	/*
203 	 * Do not drop the file until the irqfd is fully initialized,
204 	 * otherwise we might race against the POLLHUP.
205 	 */
206 	fdput(irqfd);
207 
208 	return 0;
209 err_busy:
210 	eventfd_ctx_put(ctx);
211 err_ctx:
212 	fdput(irqfd);
213 err_fd:
214 	kfree(virqfd);
215 
216 	return ret;
217 }
218 
219 static void virqfd_disable(struct vfio_pci_device *vdev,
220 			   struct virqfd **pvirqfd)
221 {
222 	unsigned long flags;
223 
224 	spin_lock_irqsave(&vdev->irqlock, flags);
225 
226 	if (*pvirqfd) {
227 		virqfd_deactivate(*pvirqfd);
228 		*pvirqfd = NULL;
229 	}
230 
231 	spin_unlock_irqrestore(&vdev->irqlock, flags);
232 
233 	/*
234 	 * Block until we know all outstanding shutdown jobs have completed.
235 	 * Even if we don't queue the job, flush the wq to be sure it's
236 	 * been released.
237 	 */
238 	flush_workqueue(vfio_irqfd_cleanup_wq);
239 }
240 
241 /*
242  * INTx
243  */
244 static void vfio_send_intx_eventfd(struct vfio_pci_device *vdev, void *unused)
245 {
246 	if (likely(is_intx(vdev) && !vdev->virq_disabled))
247 		eventfd_signal(vdev->ctx[0].trigger, 1);
248 }
249 
250 void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
251 {
252 	struct pci_dev *pdev = vdev->pdev;
253 	unsigned long flags;
254 
255 	spin_lock_irqsave(&vdev->irqlock, flags);
256 
257 	/*
258 	 * Masking can come from interrupt, ioctl, or config space
259 	 * via INTx disable.  The latter means this can get called
260 	 * even when not using intx delivery.  In this case, just
261 	 * try to have the physical bit follow the virtual bit.
262 	 */
263 	if (unlikely(!is_intx(vdev))) {
264 		if (vdev->pci_2_3)
265 			pci_intx(pdev, 0);
266 	} else if (!vdev->ctx[0].masked) {
267 		/*
268 		 * Can't use check_and_mask here because we always want to
269 		 * mask, not just when something is pending.
270 		 */
271 		if (vdev->pci_2_3)
272 			pci_intx(pdev, 0);
273 		else
274 			disable_irq_nosync(pdev->irq);
275 
276 		vdev->ctx[0].masked = true;
277 	}
278 
279 	spin_unlock_irqrestore(&vdev->irqlock, flags);
280 }
281 
282 /*
283  * If this is triggered by an eventfd, we can't call eventfd_signal
284  * or else we'll deadlock on the eventfd wait queue.  Return >0 when
285  * a signal is necessary, which can then be handled via a work queue
286  * or directly depending on the caller.
287  */
288 static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev,
289 					void *unused)
290 {
291 	struct pci_dev *pdev = vdev->pdev;
292 	unsigned long flags;
293 	int ret = 0;
294 
295 	spin_lock_irqsave(&vdev->irqlock, flags);
296 
297 	/*
298 	 * Unmasking comes from ioctl or config, so again, have the
299 	 * physical bit follow the virtual even when not using INTx.
300 	 */
301 	if (unlikely(!is_intx(vdev))) {
302 		if (vdev->pci_2_3)
303 			pci_intx(pdev, 1);
304 	} else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
305 		/*
306 		 * A pending interrupt here would immediately trigger,
307 		 * but we can avoid that overhead by just re-sending
308 		 * the interrupt to the user.
309 		 */
310 		if (vdev->pci_2_3) {
311 			if (!pci_check_and_unmask_intx(pdev))
312 				ret = 1;
313 		} else
314 			enable_irq(pdev->irq);
315 
316 		vdev->ctx[0].masked = (ret > 0);
317 	}
318 
319 	spin_unlock_irqrestore(&vdev->irqlock, flags);
320 
321 	return ret;
322 }
323 
324 void vfio_pci_intx_unmask(struct vfio_pci_device *vdev)
325 {
326 	if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
327 		vfio_send_intx_eventfd(vdev, NULL);
328 }
329 
330 static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
331 {
332 	struct vfio_pci_device *vdev = dev_id;
333 	unsigned long flags;
334 	int ret = IRQ_NONE;
335 
336 	spin_lock_irqsave(&vdev->irqlock, flags);
337 
338 	if (!vdev->pci_2_3) {
339 		disable_irq_nosync(vdev->pdev->irq);
340 		vdev->ctx[0].masked = true;
341 		ret = IRQ_HANDLED;
342 	} else if (!vdev->ctx[0].masked &&  /* may be shared */
343 		   pci_check_and_mask_intx(vdev->pdev)) {
344 		vdev->ctx[0].masked = true;
345 		ret = IRQ_HANDLED;
346 	}
347 
348 	spin_unlock_irqrestore(&vdev->irqlock, flags);
349 
350 	if (ret == IRQ_HANDLED)
351 		vfio_send_intx_eventfd(vdev, NULL);
352 
353 	return ret;
354 }
355 
356 static int vfio_intx_enable(struct vfio_pci_device *vdev)
357 {
358 	if (!is_irq_none(vdev))
359 		return -EINVAL;
360 
361 	if (!vdev->pdev->irq)
362 		return -ENODEV;
363 
364 	vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
365 	if (!vdev->ctx)
366 		return -ENOMEM;
367 
368 	vdev->num_ctx = 1;
369 
370 	/*
371 	 * If the virtual interrupt is masked, restore it.  Devices
372 	 * supporting DisINTx can be masked at the hardware level
373 	 * here, non-PCI-2.3 devices will have to wait until the
374 	 * interrupt is enabled.
375 	 */
376 	vdev->ctx[0].masked = vdev->virq_disabled;
377 	if (vdev->pci_2_3)
378 		pci_intx(vdev->pdev, !vdev->ctx[0].masked);
379 
380 	vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
381 
382 	return 0;
383 }
384 
385 static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
386 {
387 	struct pci_dev *pdev = vdev->pdev;
388 	unsigned long irqflags = IRQF_SHARED;
389 	struct eventfd_ctx *trigger;
390 	unsigned long flags;
391 	int ret;
392 
393 	if (vdev->ctx[0].trigger) {
394 		free_irq(pdev->irq, vdev);
395 		kfree(vdev->ctx[0].name);
396 		eventfd_ctx_put(vdev->ctx[0].trigger);
397 		vdev->ctx[0].trigger = NULL;
398 	}
399 
400 	if (fd < 0) /* Disable only */
401 		return 0;
402 
403 	vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
404 				      pci_name(pdev));
405 	if (!vdev->ctx[0].name)
406 		return -ENOMEM;
407 
408 	trigger = eventfd_ctx_fdget(fd);
409 	if (IS_ERR(trigger)) {
410 		kfree(vdev->ctx[0].name);
411 		return PTR_ERR(trigger);
412 	}
413 
414 	vdev->ctx[0].trigger = trigger;
415 
416 	if (!vdev->pci_2_3)
417 		irqflags = 0;
418 
419 	ret = request_irq(pdev->irq, vfio_intx_handler,
420 			  irqflags, vdev->ctx[0].name, vdev);
421 	if (ret) {
422 		vdev->ctx[0].trigger = NULL;
423 		kfree(vdev->ctx[0].name);
424 		eventfd_ctx_put(trigger);
425 		return ret;
426 	}
427 
428 	/*
429 	 * INTx disable will stick across the new irq setup,
430 	 * disable_irq won't.
431 	 */
432 	spin_lock_irqsave(&vdev->irqlock, flags);
433 	if (!vdev->pci_2_3 && vdev->ctx[0].masked)
434 		disable_irq_nosync(pdev->irq);
435 	spin_unlock_irqrestore(&vdev->irqlock, flags);
436 
437 	return 0;
438 }
439 
440 static void vfio_intx_disable(struct vfio_pci_device *vdev)
441 {
442 	vfio_intx_set_signal(vdev, -1);
443 	virqfd_disable(vdev, &vdev->ctx[0].unmask);
444 	virqfd_disable(vdev, &vdev->ctx[0].mask);
445 	vdev->irq_type = VFIO_PCI_NUM_IRQS;
446 	vdev->num_ctx = 0;
447 	kfree(vdev->ctx);
448 }
449 
450 /*
451  * MSI/MSI-X
452  */
453 static irqreturn_t vfio_msihandler(int irq, void *arg)
454 {
455 	struct eventfd_ctx *trigger = arg;
456 
457 	eventfd_signal(trigger, 1);
458 	return IRQ_HANDLED;
459 }
460 
461 static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
462 {
463 	struct pci_dev *pdev = vdev->pdev;
464 	int ret;
465 
466 	if (!is_irq_none(vdev))
467 		return -EINVAL;
468 
469 	vdev->ctx = kzalloc(nvec * sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
470 	if (!vdev->ctx)
471 		return -ENOMEM;
472 
473 	if (msix) {
474 		int i;
475 
476 		vdev->msix = kzalloc(nvec * sizeof(struct msix_entry),
477 				     GFP_KERNEL);
478 		if (!vdev->msix) {
479 			kfree(vdev->ctx);
480 			return -ENOMEM;
481 		}
482 
483 		for (i = 0; i < nvec; i++)
484 			vdev->msix[i].entry = i;
485 
486 		ret = pci_enable_msix_range(pdev, vdev->msix, 1, nvec);
487 		if (ret < nvec) {
488 			if (ret > 0)
489 				pci_disable_msix(pdev);
490 			kfree(vdev->msix);
491 			kfree(vdev->ctx);
492 			return ret;
493 		}
494 	} else {
495 		ret = pci_enable_msi_range(pdev, 1, nvec);
496 		if (ret < nvec) {
497 			if (ret > 0)
498 				pci_disable_msi(pdev);
499 			kfree(vdev->ctx);
500 			return ret;
501 		}
502 	}
503 
504 	vdev->num_ctx = nvec;
505 	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
506 				VFIO_PCI_MSI_IRQ_INDEX;
507 
508 	if (!msix) {
509 		/*
510 		 * Compute the virtual hardware field for max msi vectors -
511 		 * it is the log base 2 of the number of vectors.
512 		 */
513 		vdev->msi_qmax = fls(nvec * 2 - 1) - 1;
514 	}
515 
516 	return 0;
517 }
518 
519 static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
520 				      int vector, int fd, bool msix)
521 {
522 	struct pci_dev *pdev = vdev->pdev;
523 	int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
524 	char *name = msix ? "vfio-msix" : "vfio-msi";
525 	struct eventfd_ctx *trigger;
526 	int ret;
527 
528 	if (vector >= vdev->num_ctx)
529 		return -EINVAL;
530 
531 	if (vdev->ctx[vector].trigger) {
532 		free_irq(irq, vdev->ctx[vector].trigger);
533 		kfree(vdev->ctx[vector].name);
534 		eventfd_ctx_put(vdev->ctx[vector].trigger);
535 		vdev->ctx[vector].trigger = NULL;
536 	}
537 
538 	if (fd < 0)
539 		return 0;
540 
541 	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "%s[%d](%s)",
542 					   name, vector, pci_name(pdev));
543 	if (!vdev->ctx[vector].name)
544 		return -ENOMEM;
545 
546 	trigger = eventfd_ctx_fdget(fd);
547 	if (IS_ERR(trigger)) {
548 		kfree(vdev->ctx[vector].name);
549 		return PTR_ERR(trigger);
550 	}
551 
552 	/*
553 	 * The MSIx vector table resides in device memory which may be cleared
554 	 * via backdoor resets. We don't allow direct access to the vector
555 	 * table so even if a userspace driver attempts to save/restore around
556 	 * such a reset it would be unsuccessful. To avoid this, restore the
557 	 * cached value of the message prior to enabling.
558 	 */
559 	if (msix) {
560 		struct msi_msg msg;
561 
562 		get_cached_msi_msg(irq, &msg);
563 		pci_write_msi_msg(irq, &msg);
564 	}
565 
566 	ret = request_irq(irq, vfio_msihandler, 0,
567 			  vdev->ctx[vector].name, trigger);
568 	if (ret) {
569 		kfree(vdev->ctx[vector].name);
570 		eventfd_ctx_put(trigger);
571 		return ret;
572 	}
573 
574 	vdev->ctx[vector].trigger = trigger;
575 
576 	return 0;
577 }
578 
579 static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
580 			      unsigned count, int32_t *fds, bool msix)
581 {
582 	int i, j, ret = 0;
583 
584 	if (start + count > vdev->num_ctx)
585 		return -EINVAL;
586 
587 	for (i = 0, j = start; i < count && !ret; i++, j++) {
588 		int fd = fds ? fds[i] : -1;
589 		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
590 	}
591 
592 	if (ret) {
593 		for (--j; j >= start; j--)
594 			vfio_msi_set_vector_signal(vdev, j, -1, msix);
595 	}
596 
597 	return ret;
598 }
599 
600 static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
601 {
602 	struct pci_dev *pdev = vdev->pdev;
603 	int i;
604 
605 	vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
606 
607 	for (i = 0; i < vdev->num_ctx; i++) {
608 		virqfd_disable(vdev, &vdev->ctx[i].unmask);
609 		virqfd_disable(vdev, &vdev->ctx[i].mask);
610 	}
611 
612 	if (msix) {
613 		pci_disable_msix(vdev->pdev);
614 		kfree(vdev->msix);
615 	} else
616 		pci_disable_msi(pdev);
617 
618 	vdev->irq_type = VFIO_PCI_NUM_IRQS;
619 	vdev->num_ctx = 0;
620 	kfree(vdev->ctx);
621 }
622 
623 /*
624  * IOCTL support
625  */
626 static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
627 				    unsigned index, unsigned start,
628 				    unsigned count, uint32_t flags, void *data)
629 {
630 	if (!is_intx(vdev) || start != 0 || count != 1)
631 		return -EINVAL;
632 
633 	if (flags & VFIO_IRQ_SET_DATA_NONE) {
634 		vfio_pci_intx_unmask(vdev);
635 	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
636 		uint8_t unmask = *(uint8_t *)data;
637 		if (unmask)
638 			vfio_pci_intx_unmask(vdev);
639 	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
640 		int32_t fd = *(int32_t *)data;
641 		if (fd >= 0)
642 			return virqfd_enable(vdev, vfio_pci_intx_unmask_handler,
643 					     vfio_send_intx_eventfd, NULL,
644 					     &vdev->ctx[0].unmask, fd);
645 
646 		virqfd_disable(vdev, &vdev->ctx[0].unmask);
647 	}
648 
649 	return 0;
650 }
651 
652 static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev,
653 				  unsigned index, unsigned start,
654 				  unsigned count, uint32_t flags, void *data)
655 {
656 	if (!is_intx(vdev) || start != 0 || count != 1)
657 		return -EINVAL;
658 
659 	if (flags & VFIO_IRQ_SET_DATA_NONE) {
660 		vfio_pci_intx_mask(vdev);
661 	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
662 		uint8_t mask = *(uint8_t *)data;
663 		if (mask)
664 			vfio_pci_intx_mask(vdev);
665 	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
666 		return -ENOTTY; /* XXX implement me */
667 	}
668 
669 	return 0;
670 }
671 
672 static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev,
673 				     unsigned index, unsigned start,
674 				     unsigned count, uint32_t flags, void *data)
675 {
676 	if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
677 		vfio_intx_disable(vdev);
678 		return 0;
679 	}
680 
681 	if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1)
682 		return -EINVAL;
683 
684 	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
685 		int32_t fd = *(int32_t *)data;
686 		int ret;
687 
688 		if (is_intx(vdev))
689 			return vfio_intx_set_signal(vdev, fd);
690 
691 		ret = vfio_intx_enable(vdev);
692 		if (ret)
693 			return ret;
694 
695 		ret = vfio_intx_set_signal(vdev, fd);
696 		if (ret)
697 			vfio_intx_disable(vdev);
698 
699 		return ret;
700 	}
701 
702 	if (!is_intx(vdev))
703 		return -EINVAL;
704 
705 	if (flags & VFIO_IRQ_SET_DATA_NONE) {
706 		vfio_send_intx_eventfd(vdev, NULL);
707 	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
708 		uint8_t trigger = *(uint8_t *)data;
709 		if (trigger)
710 			vfio_send_intx_eventfd(vdev, NULL);
711 	}
712 	return 0;
713 }
714 
715 static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
716 				    unsigned index, unsigned start,
717 				    unsigned count, uint32_t flags, void *data)
718 {
719 	int i;
720 	bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
721 
722 	if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
723 		vfio_msi_disable(vdev, msix);
724 		return 0;
725 	}
726 
727 	if (!(irq_is(vdev, index) || is_irq_none(vdev)))
728 		return -EINVAL;
729 
730 	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
731 		int32_t *fds = data;
732 		int ret;
733 
734 		if (vdev->irq_type == index)
735 			return vfio_msi_set_block(vdev, start, count,
736 						  fds, msix);
737 
738 		ret = vfio_msi_enable(vdev, start + count, msix);
739 		if (ret)
740 			return ret;
741 
742 		ret = vfio_msi_set_block(vdev, start, count, fds, msix);
743 		if (ret)
744 			vfio_msi_disable(vdev, msix);
745 
746 		return ret;
747 	}
748 
749 	if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
750 		return -EINVAL;
751 
752 	for (i = start; i < start + count; i++) {
753 		if (!vdev->ctx[i].trigger)
754 			continue;
755 		if (flags & VFIO_IRQ_SET_DATA_NONE) {
756 			eventfd_signal(vdev->ctx[i].trigger, 1);
757 		} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
758 			uint8_t *bools = data;
759 			if (bools[i - start])
760 				eventfd_signal(vdev->ctx[i].trigger, 1);
761 		}
762 	}
763 	return 0;
764 }
765 
766 static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
767 				    unsigned index, unsigned start,
768 				    unsigned count, uint32_t flags, void *data)
769 {
770 	int32_t fd = *(int32_t *)data;
771 
772 	if ((index != VFIO_PCI_ERR_IRQ_INDEX) ||
773 	    !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
774 		return -EINVAL;
775 
776 	/* DATA_NONE/DATA_BOOL enables loopback testing */
777 	if (flags & VFIO_IRQ_SET_DATA_NONE) {
778 		if (vdev->err_trigger)
779 			eventfd_signal(vdev->err_trigger, 1);
780 		return 0;
781 	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
782 		uint8_t trigger = *(uint8_t *)data;
783 		if (trigger && vdev->err_trigger)
784 			eventfd_signal(vdev->err_trigger, 1);
785 		return 0;
786 	}
787 
788 	/* Handle SET_DATA_EVENTFD */
789 	if (fd == -1) {
790 		if (vdev->err_trigger)
791 			eventfd_ctx_put(vdev->err_trigger);
792 		vdev->err_trigger = NULL;
793 		return 0;
794 	} else if (fd >= 0) {
795 		struct eventfd_ctx *efdctx;
796 		efdctx = eventfd_ctx_fdget(fd);
797 		if (IS_ERR(efdctx))
798 			return PTR_ERR(efdctx);
799 		if (vdev->err_trigger)
800 			eventfd_ctx_put(vdev->err_trigger);
801 		vdev->err_trigger = efdctx;
802 		return 0;
803 	} else
804 		return -EINVAL;
805 }
806 int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
807 			    unsigned index, unsigned start, unsigned count,
808 			    void *data)
809 {
810 	int (*func)(struct vfio_pci_device *vdev, unsigned index,
811 		    unsigned start, unsigned count, uint32_t flags,
812 		    void *data) = NULL;
813 
814 	switch (index) {
815 	case VFIO_PCI_INTX_IRQ_INDEX:
816 		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
817 		case VFIO_IRQ_SET_ACTION_MASK:
818 			func = vfio_pci_set_intx_mask;
819 			break;
820 		case VFIO_IRQ_SET_ACTION_UNMASK:
821 			func = vfio_pci_set_intx_unmask;
822 			break;
823 		case VFIO_IRQ_SET_ACTION_TRIGGER:
824 			func = vfio_pci_set_intx_trigger;
825 			break;
826 		}
827 		break;
828 	case VFIO_PCI_MSI_IRQ_INDEX:
829 	case VFIO_PCI_MSIX_IRQ_INDEX:
830 		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
831 		case VFIO_IRQ_SET_ACTION_MASK:
832 		case VFIO_IRQ_SET_ACTION_UNMASK:
833 			/* XXX Need masking support exported */
834 			break;
835 		case VFIO_IRQ_SET_ACTION_TRIGGER:
836 			func = vfio_pci_set_msi_trigger;
837 			break;
838 		}
839 		break;
840 	case VFIO_PCI_ERR_IRQ_INDEX:
841 		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
842 		case VFIO_IRQ_SET_ACTION_TRIGGER:
843 			if (pci_is_pcie(vdev->pdev))
844 				func = vfio_pci_set_err_trigger;
845 			break;
846 		}
847 	}
848 
849 	if (!func)
850 		return -ENOTTY;
851 
852 	return func(vdev, index, start, count, flags, data);
853 }
854