xref: /openbmc/linux/virt/kvm/eventfd.c (revision 9d56dd3b)
1 /*
2  * kvm eventfd support - use eventfd objects to signal various KVM events
3  *
4  * Copyright 2009 Novell.  All Rights Reserved.
5  *
6  * Author:
7  *	Gregory Haskins <ghaskins@novell.com>
8  *
9  * This file is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License
11  * as published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software Foundation,
20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
21  */
22 
23 #include <linux/kvm_host.h>
24 #include <linux/kvm.h>
25 #include <linux/workqueue.h>
26 #include <linux/syscalls.h>
27 #include <linux/wait.h>
28 #include <linux/poll.h>
29 #include <linux/file.h>
30 #include <linux/list.h>
31 #include <linux/eventfd.h>
32 #include <linux/kernel.h>
33 
34 #include "iodev.h"
35 
36 /*
37  * --------------------------------------------------------------------
38  * irqfd: Allows an fd to be used to inject an interrupt to the guest
39  *
40  * Credit goes to Avi Kivity for the original idea.
41  * --------------------------------------------------------------------
42  */
43 
44 struct _irqfd {
45 	struct kvm               *kvm;
46 	struct eventfd_ctx       *eventfd;
47 	int                       gsi;
48 	struct list_head          list;
49 	poll_table                pt;
50 	wait_queue_head_t        *wqh;
51 	wait_queue_t              wait;
52 	struct work_struct        inject;
53 	struct work_struct        shutdown;
54 };
55 
56 static struct workqueue_struct *irqfd_cleanup_wq;
57 
58 static void
59 irqfd_inject(struct work_struct *work)
60 {
61 	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
62 	struct kvm *kvm = irqfd->kvm;
63 
64 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
65 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
66 }
67 
68 /*
69  * Race-free decouple logic (ordering is critical)
70  */
71 static void
72 irqfd_shutdown(struct work_struct *work)
73 {
74 	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
75 
76 	/*
77 	 * Synchronize with the wait-queue and unhook ourselves to prevent
78 	 * further events.
79 	 */
80 	remove_wait_queue(irqfd->wqh, &irqfd->wait);
81 
82 	/*
83 	 * We know no new events will be scheduled at this point, so block
84 	 * until all previously outstanding events have completed
85 	 */
86 	flush_work(&irqfd->inject);
87 
88 	/*
89 	 * It is now safe to release the object's resources
90 	 */
91 	eventfd_ctx_put(irqfd->eventfd);
92 	kfree(irqfd);
93 }
94 
95 
96 /* assumes kvm->irqfds.lock is held */
97 static bool
98 irqfd_is_active(struct _irqfd *irqfd)
99 {
100 	return list_empty(&irqfd->list) ? false : true;
101 }
102 
103 /*
104  * Mark the irqfd as inactive and schedule it for removal
105  *
106  * assumes kvm->irqfds.lock is held
107  */
108 static void
109 irqfd_deactivate(struct _irqfd *irqfd)
110 {
111 	BUG_ON(!irqfd_is_active(irqfd));
112 
113 	list_del_init(&irqfd->list);
114 
115 	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
116 }
117 
118 /*
119  * Called with wqh->lock held and interrupts disabled
120  */
121 static int
122 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
123 {
124 	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
125 	unsigned long flags = (unsigned long)key;
126 
127 	if (flags & POLLIN)
128 		/* An event has been signaled, inject an interrupt */
129 		schedule_work(&irqfd->inject);
130 
131 	if (flags & POLLHUP) {
132 		/* The eventfd is closing, detach from KVM */
133 		struct kvm *kvm = irqfd->kvm;
134 		unsigned long flags;
135 
136 		spin_lock_irqsave(&kvm->irqfds.lock, flags);
137 
138 		/*
139 		 * We must check if someone deactivated the irqfd before
140 		 * we could acquire the irqfds.lock since the item is
141 		 * deactivated from the KVM side before it is unhooked from
142 		 * the wait-queue.  If it is already deactivated, we can
143 		 * simply return knowing the other side will cleanup for us.
144 		 * We cannot race against the irqfd going away since the
145 		 * other side is required to acquire wqh->lock, which we hold
146 		 */
147 		if (irqfd_is_active(irqfd))
148 			irqfd_deactivate(irqfd);
149 
150 		spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
151 	}
152 
153 	return 0;
154 }
155 
156 static void
157 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
158 			poll_table *pt)
159 {
160 	struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
161 
162 	irqfd->wqh = wqh;
163 	add_wait_queue(wqh, &irqfd->wait);
164 }
165 
166 static int
167 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
168 {
169 	struct _irqfd *irqfd;
170 	struct file *file = NULL;
171 	struct eventfd_ctx *eventfd = NULL;
172 	int ret;
173 	unsigned int events;
174 
175 	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
176 	if (!irqfd)
177 		return -ENOMEM;
178 
179 	irqfd->kvm = kvm;
180 	irqfd->gsi = gsi;
181 	INIT_LIST_HEAD(&irqfd->list);
182 	INIT_WORK(&irqfd->inject, irqfd_inject);
183 	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
184 
185 	file = eventfd_fget(fd);
186 	if (IS_ERR(file)) {
187 		ret = PTR_ERR(file);
188 		goto fail;
189 	}
190 
191 	eventfd = eventfd_ctx_fileget(file);
192 	if (IS_ERR(eventfd)) {
193 		ret = PTR_ERR(eventfd);
194 		goto fail;
195 	}
196 
197 	irqfd->eventfd = eventfd;
198 
199 	/*
200 	 * Install our own custom wake-up handling so we are notified via
201 	 * a callback whenever someone signals the underlying eventfd
202 	 */
203 	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
204 	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
205 
206 	events = file->f_op->poll(file, &irqfd->pt);
207 
208 	spin_lock_irq(&kvm->irqfds.lock);
209 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
210 	spin_unlock_irq(&kvm->irqfds.lock);
211 
212 	/*
213 	 * Check if there was an event already pending on the eventfd
214 	 * before we registered, and trigger it as if we didn't miss it.
215 	 */
216 	if (events & POLLIN)
217 		schedule_work(&irqfd->inject);
218 
219 	/*
220 	 * do not drop the file until the irqfd is fully initialized, otherwise
221 	 * we might race against the POLLHUP
222 	 */
223 	fput(file);
224 
225 	return 0;
226 
227 fail:
228 	if (eventfd && !IS_ERR(eventfd))
229 		eventfd_ctx_put(eventfd);
230 
231 	if (!IS_ERR(file))
232 		fput(file);
233 
234 	kfree(irqfd);
235 	return ret;
236 }
237 
238 void
239 kvm_eventfd_init(struct kvm *kvm)
240 {
241 	spin_lock_init(&kvm->irqfds.lock);
242 	INIT_LIST_HEAD(&kvm->irqfds.items);
243 	INIT_LIST_HEAD(&kvm->ioeventfds);
244 }
245 
246 /*
247  * shutdown any irqfd's that match fd+gsi
248  */
249 static int
250 kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
251 {
252 	struct _irqfd *irqfd, *tmp;
253 	struct eventfd_ctx *eventfd;
254 
255 	eventfd = eventfd_ctx_fdget(fd);
256 	if (IS_ERR(eventfd))
257 		return PTR_ERR(eventfd);
258 
259 	spin_lock_irq(&kvm->irqfds.lock);
260 
261 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
262 		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
263 			irqfd_deactivate(irqfd);
264 	}
265 
266 	spin_unlock_irq(&kvm->irqfds.lock);
267 	eventfd_ctx_put(eventfd);
268 
269 	/*
270 	 * Block until we know all outstanding shutdown jobs have completed
271 	 * so that we guarantee there will not be any more interrupts on this
272 	 * gsi once this deassign function returns.
273 	 */
274 	flush_workqueue(irqfd_cleanup_wq);
275 
276 	return 0;
277 }
278 
279 int
280 kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
281 {
282 	if (flags & KVM_IRQFD_FLAG_DEASSIGN)
283 		return kvm_irqfd_deassign(kvm, fd, gsi);
284 
285 	return kvm_irqfd_assign(kvm, fd, gsi);
286 }
287 
288 /*
289  * This function is called as the kvm VM fd is being released. Shutdown all
290  * irqfds that still remain open
291  */
292 void
293 kvm_irqfd_release(struct kvm *kvm)
294 {
295 	struct _irqfd *irqfd, *tmp;
296 
297 	spin_lock_irq(&kvm->irqfds.lock);
298 
299 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
300 		irqfd_deactivate(irqfd);
301 
302 	spin_unlock_irq(&kvm->irqfds.lock);
303 
304 	/*
305 	 * Block until we know all outstanding shutdown jobs have completed
306 	 * since we do not take a kvm* reference.
307 	 */
308 	flush_workqueue(irqfd_cleanup_wq);
309 
310 }
311 
312 /*
313  * create a host-wide workqueue for issuing deferred shutdown requests
314  * aggregated from all vm* instances. We need our own isolated single-thread
315  * queue to prevent deadlock against flushing the normal work-queue.
316  */
317 static int __init irqfd_module_init(void)
318 {
319 	irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
320 	if (!irqfd_cleanup_wq)
321 		return -ENOMEM;
322 
323 	return 0;
324 }
325 
326 static void __exit irqfd_module_exit(void)
327 {
328 	destroy_workqueue(irqfd_cleanup_wq);
329 }
330 
331 module_init(irqfd_module_init);
332 module_exit(irqfd_module_exit);
333 
334 /*
335  * --------------------------------------------------------------------
336  * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
337  *
338  * userspace can register a PIO/MMIO address with an eventfd for receiving
339  * notification when the memory has been touched.
340  * --------------------------------------------------------------------
341  */
342 
343 struct _ioeventfd {
344 	struct list_head     list;
345 	u64                  addr;
346 	int                  length;
347 	struct eventfd_ctx  *eventfd;
348 	u64                  datamatch;
349 	struct kvm_io_device dev;
350 	bool                 wildcard;
351 };
352 
353 static inline struct _ioeventfd *
354 to_ioeventfd(struct kvm_io_device *dev)
355 {
356 	return container_of(dev, struct _ioeventfd, dev);
357 }
358 
359 static void
360 ioeventfd_release(struct _ioeventfd *p)
361 {
362 	eventfd_ctx_put(p->eventfd);
363 	list_del(&p->list);
364 	kfree(p);
365 }
366 
367 static bool
368 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
369 {
370 	u64 _val;
371 
372 	if (!(addr == p->addr && len == p->length))
373 		/* address-range must be precise for a hit */
374 		return false;
375 
376 	if (p->wildcard)
377 		/* all else equal, wildcard is always a hit */
378 		return true;
379 
380 	/* otherwise, we have to actually compare the data */
381 
382 	BUG_ON(!IS_ALIGNED((unsigned long)val, len));
383 
384 	switch (len) {
385 	case 1:
386 		_val = *(u8 *)val;
387 		break;
388 	case 2:
389 		_val = *(u16 *)val;
390 		break;
391 	case 4:
392 		_val = *(u32 *)val;
393 		break;
394 	case 8:
395 		_val = *(u64 *)val;
396 		break;
397 	default:
398 		return false;
399 	}
400 
401 	return _val == p->datamatch ? true : false;
402 }
403 
404 /* MMIO/PIO writes trigger an event if the addr/val match */
405 static int
406 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
407 		const void *val)
408 {
409 	struct _ioeventfd *p = to_ioeventfd(this);
410 
411 	if (!ioeventfd_in_range(p, addr, len, val))
412 		return -EOPNOTSUPP;
413 
414 	eventfd_signal(p->eventfd, 1);
415 	return 0;
416 }
417 
418 /*
419  * This function is called as KVM is completely shutting down.  We do not
420  * need to worry about locking just nuke anything we have as quickly as possible
421  */
422 static void
423 ioeventfd_destructor(struct kvm_io_device *this)
424 {
425 	struct _ioeventfd *p = to_ioeventfd(this);
426 
427 	ioeventfd_release(p);
428 }
429 
430 static const struct kvm_io_device_ops ioeventfd_ops = {
431 	.write      = ioeventfd_write,
432 	.destructor = ioeventfd_destructor,
433 };
434 
435 /* assumes kvm->slots_lock held */
436 static bool
437 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
438 {
439 	struct _ioeventfd *_p;
440 
441 	list_for_each_entry(_p, &kvm->ioeventfds, list)
442 		if (_p->addr == p->addr && _p->length == p->length &&
443 		    (_p->wildcard || p->wildcard ||
444 		     _p->datamatch == p->datamatch))
445 			return true;
446 
447 	return false;
448 }
449 
450 static int
451 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
452 {
453 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
454 	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
455 	struct _ioeventfd        *p;
456 	struct eventfd_ctx       *eventfd;
457 	int                       ret;
458 
459 	/* must be natural-word sized */
460 	switch (args->len) {
461 	case 1:
462 	case 2:
463 	case 4:
464 	case 8:
465 		break;
466 	default:
467 		return -EINVAL;
468 	}
469 
470 	/* check for range overflow */
471 	if (args->addr + args->len < args->addr)
472 		return -EINVAL;
473 
474 	/* check for extra flags that we don't understand */
475 	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
476 		return -EINVAL;
477 
478 	eventfd = eventfd_ctx_fdget(args->fd);
479 	if (IS_ERR(eventfd))
480 		return PTR_ERR(eventfd);
481 
482 	p = kzalloc(sizeof(*p), GFP_KERNEL);
483 	if (!p) {
484 		ret = -ENOMEM;
485 		goto fail;
486 	}
487 
488 	INIT_LIST_HEAD(&p->list);
489 	p->addr    = args->addr;
490 	p->length  = args->len;
491 	p->eventfd = eventfd;
492 
493 	/* The datamatch feature is optional, otherwise this is a wildcard */
494 	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
495 		p->datamatch = args->datamatch;
496 	else
497 		p->wildcard = true;
498 
499 	down_write(&kvm->slots_lock);
500 
501 	/* Verify that there isnt a match already */
502 	if (ioeventfd_check_collision(kvm, p)) {
503 		ret = -EEXIST;
504 		goto unlock_fail;
505 	}
506 
507 	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
508 
509 	ret = __kvm_io_bus_register_dev(bus, &p->dev);
510 	if (ret < 0)
511 		goto unlock_fail;
512 
513 	list_add_tail(&p->list, &kvm->ioeventfds);
514 
515 	up_write(&kvm->slots_lock);
516 
517 	return 0;
518 
519 unlock_fail:
520 	up_write(&kvm->slots_lock);
521 
522 fail:
523 	kfree(p);
524 	eventfd_ctx_put(eventfd);
525 
526 	return ret;
527 }
528 
529 static int
530 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
531 {
532 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
533 	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
534 	struct _ioeventfd        *p, *tmp;
535 	struct eventfd_ctx       *eventfd;
536 	int                       ret = -ENOENT;
537 
538 	eventfd = eventfd_ctx_fdget(args->fd);
539 	if (IS_ERR(eventfd))
540 		return PTR_ERR(eventfd);
541 
542 	down_write(&kvm->slots_lock);
543 
544 	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
545 		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
546 
547 		if (p->eventfd != eventfd  ||
548 		    p->addr != args->addr  ||
549 		    p->length != args->len ||
550 		    p->wildcard != wildcard)
551 			continue;
552 
553 		if (!p->wildcard && p->datamatch != args->datamatch)
554 			continue;
555 
556 		__kvm_io_bus_unregister_dev(bus, &p->dev);
557 		ioeventfd_release(p);
558 		ret = 0;
559 		break;
560 	}
561 
562 	up_write(&kvm->slots_lock);
563 
564 	eventfd_ctx_put(eventfd);
565 
566 	return ret;
567 }
568 
569 int
570 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
571 {
572 	if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
573 		return kvm_deassign_ioeventfd(kvm, args);
574 
575 	return kvm_assign_ioeventfd(kvm, args);
576 }
577