xref: /openbmc/linux/drivers/vfio/virqfd.c (revision 2ee432d7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO generic eventfd code for IRQFD support.
4  * Derived from drivers/vfio/pci/vfio_pci_intrs.c
5  *
6  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
7  *     Author: Alex Williamson <alex.williamson@redhat.com>
8  */
9 
10 #include <linux/vfio.h>
11 #include <linux/eventfd.h>
12 #include <linux/file.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include "vfio.h"
16 
17 static struct workqueue_struct *vfio_irqfd_cleanup_wq;
18 static DEFINE_SPINLOCK(virqfd_lock);
19 
vfio_virqfd_init(void)20 int __init vfio_virqfd_init(void)
21 {
22 	vfio_irqfd_cleanup_wq =
23 		create_singlethread_workqueue("vfio-irqfd-cleanup");
24 	if (!vfio_irqfd_cleanup_wq)
25 		return -ENOMEM;
26 
27 	return 0;
28 }
29 
vfio_virqfd_exit(void)30 void vfio_virqfd_exit(void)
31 {
32 	destroy_workqueue(vfio_irqfd_cleanup_wq);
33 }
34 
virqfd_deactivate(struct virqfd * virqfd)35 static void virqfd_deactivate(struct virqfd *virqfd)
36 {
37 	queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
38 }
39 
virqfd_wakeup(wait_queue_entry_t * wait,unsigned mode,int sync,void * key)40 static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
41 {
42 	struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
43 	__poll_t flags = key_to_poll(key);
44 
45 	if (flags & EPOLLIN) {
46 		u64 cnt;
47 		eventfd_ctx_do_read(virqfd->eventfd, &cnt);
48 
49 		/* An event has been signaled, call function */
50 		if ((!virqfd->handler ||
51 		     virqfd->handler(virqfd->opaque, virqfd->data)) &&
52 		    virqfd->thread)
53 			schedule_work(&virqfd->inject);
54 	}
55 
56 	if (flags & EPOLLHUP) {
57 		unsigned long flags;
58 		spin_lock_irqsave(&virqfd_lock, flags);
59 
60 		/*
61 		 * The eventfd is closing, if the virqfd has not yet been
62 		 * queued for release, as determined by testing whether the
63 		 * virqfd pointer to it is still valid, queue it now.  As
64 		 * with kvm irqfds, we know we won't race against the virqfd
65 		 * going away because we hold the lock to get here.
66 		 */
67 		if (*(virqfd->pvirqfd) == virqfd) {
68 			*(virqfd->pvirqfd) = NULL;
69 			virqfd_deactivate(virqfd);
70 		}
71 
72 		spin_unlock_irqrestore(&virqfd_lock, flags);
73 	}
74 
75 	return 0;
76 }
77 
virqfd_ptable_queue_proc(struct file * file,wait_queue_head_t * wqh,poll_table * pt)78 static void virqfd_ptable_queue_proc(struct file *file,
79 				     wait_queue_head_t *wqh, poll_table *pt)
80 {
81 	struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
82 	add_wait_queue(wqh, &virqfd->wait);
83 }
84 
virqfd_shutdown(struct work_struct * work)85 static void virqfd_shutdown(struct work_struct *work)
86 {
87 	struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
88 	u64 cnt;
89 
90 	eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
91 	flush_work(&virqfd->inject);
92 	eventfd_ctx_put(virqfd->eventfd);
93 
94 	kfree(virqfd);
95 }
96 
virqfd_inject(struct work_struct * work)97 static void virqfd_inject(struct work_struct *work)
98 {
99 	struct virqfd *virqfd = container_of(work, struct virqfd, inject);
100 	if (virqfd->thread)
101 		virqfd->thread(virqfd->opaque, virqfd->data);
102 }
103 
virqfd_flush_inject(struct work_struct * work)104 static void virqfd_flush_inject(struct work_struct *work)
105 {
106 	struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject);
107 
108 	flush_work(&virqfd->inject);
109 }
110 
vfio_virqfd_enable(void * opaque,int (* handler)(void *,void *),void (* thread)(void *,void *),void * data,struct virqfd ** pvirqfd,int fd)111 int vfio_virqfd_enable(void *opaque,
112 		       int (*handler)(void *, void *),
113 		       void (*thread)(void *, void *),
114 		       void *data, struct virqfd **pvirqfd, int fd)
115 {
116 	struct fd irqfd;
117 	struct eventfd_ctx *ctx;
118 	struct virqfd *virqfd;
119 	int ret = 0;
120 	__poll_t events;
121 
122 	virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL_ACCOUNT);
123 	if (!virqfd)
124 		return -ENOMEM;
125 
126 	virqfd->pvirqfd = pvirqfd;
127 	virqfd->opaque = opaque;
128 	virqfd->handler = handler;
129 	virqfd->thread = thread;
130 	virqfd->data = data;
131 
132 	INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
133 	INIT_WORK(&virqfd->inject, virqfd_inject);
134 	INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject);
135 
136 	irqfd = fdget(fd);
137 	if (!irqfd.file) {
138 		ret = -EBADF;
139 		goto err_fd;
140 	}
141 
142 	ctx = eventfd_ctx_fileget(irqfd.file);
143 	if (IS_ERR(ctx)) {
144 		ret = PTR_ERR(ctx);
145 		goto err_ctx;
146 	}
147 
148 	virqfd->eventfd = ctx;
149 
150 	/*
151 	 * virqfds can be released by closing the eventfd or directly
152 	 * through ioctl.  These are both done through a workqueue, so
153 	 * we update the pointer to the virqfd under lock to avoid
154 	 * pushing multiple jobs to release the same virqfd.
155 	 */
156 	spin_lock_irq(&virqfd_lock);
157 
158 	if (*pvirqfd) {
159 		spin_unlock_irq(&virqfd_lock);
160 		ret = -EBUSY;
161 		goto err_busy;
162 	}
163 	*pvirqfd = virqfd;
164 
165 	spin_unlock_irq(&virqfd_lock);
166 
167 	/*
168 	 * Install our own custom wake-up handling so we are notified via
169 	 * a callback whenever someone signals the underlying eventfd.
170 	 */
171 	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
172 	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
173 
174 	events = vfs_poll(irqfd.file, &virqfd->pt);
175 
176 	/*
177 	 * Check if there was an event already pending on the eventfd
178 	 * before we registered and trigger it as if we didn't miss it.
179 	 */
180 	if (events & EPOLLIN) {
181 		if ((!handler || handler(opaque, data)) && thread)
182 			schedule_work(&virqfd->inject);
183 	}
184 
185 	/*
186 	 * Do not drop the file until the irqfd is fully initialized,
187 	 * otherwise we might race against the EPOLLHUP.
188 	 */
189 	fdput(irqfd);
190 
191 	return 0;
192 err_busy:
193 	eventfd_ctx_put(ctx);
194 err_ctx:
195 	fdput(irqfd);
196 err_fd:
197 	kfree(virqfd);
198 
199 	return ret;
200 }
201 EXPORT_SYMBOL_GPL(vfio_virqfd_enable);
202 
vfio_virqfd_disable(struct virqfd ** pvirqfd)203 void vfio_virqfd_disable(struct virqfd **pvirqfd)
204 {
205 	unsigned long flags;
206 
207 	spin_lock_irqsave(&virqfd_lock, flags);
208 
209 	if (*pvirqfd) {
210 		virqfd_deactivate(*pvirqfd);
211 		*pvirqfd = NULL;
212 	}
213 
214 	spin_unlock_irqrestore(&virqfd_lock, flags);
215 
216 	/*
217 	 * Block until we know all outstanding shutdown jobs have completed.
218 	 * Even if we don't queue the job, flush the wq to be sure it's
219 	 * been released.
220 	 */
221 	flush_workqueue(vfio_irqfd_cleanup_wq);
222 }
223 EXPORT_SYMBOL_GPL(vfio_virqfd_disable);
224 
vfio_virqfd_flush_thread(struct virqfd ** pvirqfd)225 void vfio_virqfd_flush_thread(struct virqfd **pvirqfd)
226 {
227 	unsigned long flags;
228 
229 	spin_lock_irqsave(&virqfd_lock, flags);
230 	if (*pvirqfd && (*pvirqfd)->thread)
231 		queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject);
232 	spin_unlock_irqrestore(&virqfd_lock, flags);
233 
234 	flush_workqueue(vfio_irqfd_cleanup_wq);
235 }
236 EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread);
237