1 /* 2 * kvm eventfd support - use eventfd objects to signal various KVM events 3 * 4 * Copyright 2009 Novell. All Rights Reserved. 5 * 6 * Author: 7 * Gregory Haskins <ghaskins@novell.com> 8 * 9 * This file is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License 11 * as published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 21 */ 22 23 #include <linux/kvm_host.h> 24 #include <linux/kvm.h> 25 #include <linux/workqueue.h> 26 #include <linux/syscalls.h> 27 #include <linux/wait.h> 28 #include <linux/poll.h> 29 #include <linux/file.h> 30 #include <linux/list.h> 31 #include <linux/eventfd.h> 32 #include <linux/kernel.h> 33 34 #include "iodev.h" 35 36 /* 37 * -------------------------------------------------------------------- 38 * irqfd: Allows an fd to be used to inject an interrupt to the guest 39 * 40 * Credit goes to Avi Kivity for the original idea. 41 * -------------------------------------------------------------------- 42 */ 43 44 struct _irqfd { 45 struct kvm *kvm; 46 struct eventfd_ctx *eventfd; 47 int gsi; 48 struct list_head list; 49 poll_table pt; 50 wait_queue_head_t *wqh; 51 wait_queue_t wait; 52 struct work_struct inject; 53 struct work_struct shutdown; 54 }; 55 56 static struct workqueue_struct *irqfd_cleanup_wq; 57 58 static void 59 irqfd_inject(struct work_struct *work) 60 { 61 struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); 62 struct kvm *kvm = irqfd->kvm; 63 64 mutex_lock(&kvm->irq_lock); 65 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); 66 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); 67 mutex_unlock(&kvm->irq_lock); 68 } 69 70 /* 71 * Race-free decouple logic (ordering is critical) 72 */ 73 static void 74 irqfd_shutdown(struct work_struct *work) 75 { 76 struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); 77 78 /* 79 * Synchronize with the wait-queue and unhook ourselves to prevent 80 * further events. 81 */ 82 remove_wait_queue(irqfd->wqh, &irqfd->wait); 83 84 /* 85 * We know no new events will be scheduled at this point, so block 86 * until all previously outstanding events have completed 87 */ 88 flush_work(&irqfd->inject); 89 90 /* 91 * It is now safe to release the object's resources 92 */ 93 eventfd_ctx_put(irqfd->eventfd); 94 kfree(irqfd); 95 } 96 97 98 /* assumes kvm->irqfds.lock is held */ 99 static bool 100 irqfd_is_active(struct _irqfd *irqfd) 101 { 102 return list_empty(&irqfd->list) ? false : true; 103 } 104 105 /* 106 * Mark the irqfd as inactive and schedule it for removal 107 * 108 * assumes kvm->irqfds.lock is held 109 */ 110 static void 111 irqfd_deactivate(struct _irqfd *irqfd) 112 { 113 BUG_ON(!irqfd_is_active(irqfd)); 114 115 list_del_init(&irqfd->list); 116 117 queue_work(irqfd_cleanup_wq, &irqfd->shutdown); 118 } 119 120 /* 121 * Called with wqh->lock held and interrupts disabled 122 */ 123 static int 124 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) 125 { 126 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); 127 unsigned long flags = (unsigned long)key; 128 129 if (flags & POLLIN) 130 /* An event has been signaled, inject an interrupt */ 131 schedule_work(&irqfd->inject); 132 133 if (flags & POLLHUP) { 134 /* The eventfd is closing, detach from KVM */ 135 struct kvm *kvm = irqfd->kvm; 136 unsigned long flags; 137 138 spin_lock_irqsave(&kvm->irqfds.lock, flags); 139 140 /* 141 * We must check if someone deactivated the irqfd before 142 * we could acquire the irqfds.lock since the item is 143 * deactivated from the KVM side before it is unhooked from 144 * the wait-queue. If it is already deactivated, we can 145 * simply return knowing the other side will cleanup for us. 146 * We cannot race against the irqfd going away since the 147 * other side is required to acquire wqh->lock, which we hold 148 */ 149 if (irqfd_is_active(irqfd)) 150 irqfd_deactivate(irqfd); 151 152 spin_unlock_irqrestore(&kvm->irqfds.lock, flags); 153 } 154 155 return 0; 156 } 157 158 static void 159 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, 160 poll_table *pt) 161 { 162 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); 163 164 irqfd->wqh = wqh; 165 add_wait_queue(wqh, &irqfd->wait); 166 } 167 168 static int 169 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) 170 { 171 struct _irqfd *irqfd; 172 struct file *file = NULL; 173 struct eventfd_ctx *eventfd = NULL; 174 int ret; 175 unsigned int events; 176 177 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); 178 if (!irqfd) 179 return -ENOMEM; 180 181 irqfd->kvm = kvm; 182 irqfd->gsi = gsi; 183 INIT_LIST_HEAD(&irqfd->list); 184 INIT_WORK(&irqfd->inject, irqfd_inject); 185 INIT_WORK(&irqfd->shutdown, irqfd_shutdown); 186 187 file = eventfd_fget(fd); 188 if (IS_ERR(file)) { 189 ret = PTR_ERR(file); 190 goto fail; 191 } 192 193 eventfd = eventfd_ctx_fileget(file); 194 if (IS_ERR(eventfd)) { 195 ret = PTR_ERR(eventfd); 196 goto fail; 197 } 198 199 irqfd->eventfd = eventfd; 200 201 /* 202 * Install our own custom wake-up handling so we are notified via 203 * a callback whenever someone signals the underlying eventfd 204 */ 205 init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); 206 init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); 207 208 events = file->f_op->poll(file, &irqfd->pt); 209 210 spin_lock_irq(&kvm->irqfds.lock); 211 list_add_tail(&irqfd->list, &kvm->irqfds.items); 212 spin_unlock_irq(&kvm->irqfds.lock); 213 214 /* 215 * Check if there was an event already pending on the eventfd 216 * before we registered, and trigger it as if we didn't miss it. 217 */ 218 if (events & POLLIN) 219 schedule_work(&irqfd->inject); 220 221 /* 222 * do not drop the file until the irqfd is fully initialized, otherwise 223 * we might race against the POLLHUP 224 */ 225 fput(file); 226 227 return 0; 228 229 fail: 230 if (eventfd && !IS_ERR(eventfd)) 231 eventfd_ctx_put(eventfd); 232 233 if (!IS_ERR(file)) 234 fput(file); 235 236 kfree(irqfd); 237 return ret; 238 } 239 240 void 241 kvm_eventfd_init(struct kvm *kvm) 242 { 243 spin_lock_init(&kvm->irqfds.lock); 244 INIT_LIST_HEAD(&kvm->irqfds.items); 245 INIT_LIST_HEAD(&kvm->ioeventfds); 246 } 247 248 /* 249 * shutdown any irqfd's that match fd+gsi 250 */ 251 static int 252 kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) 253 { 254 struct _irqfd *irqfd, *tmp; 255 struct eventfd_ctx *eventfd; 256 257 eventfd = eventfd_ctx_fdget(fd); 258 if (IS_ERR(eventfd)) 259 return PTR_ERR(eventfd); 260 261 spin_lock_irq(&kvm->irqfds.lock); 262 263 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { 264 if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) 265 irqfd_deactivate(irqfd); 266 } 267 268 spin_unlock_irq(&kvm->irqfds.lock); 269 eventfd_ctx_put(eventfd); 270 271 /* 272 * Block until we know all outstanding shutdown jobs have completed 273 * so that we guarantee there will not be any more interrupts on this 274 * gsi once this deassign function returns. 275 */ 276 flush_workqueue(irqfd_cleanup_wq); 277 278 return 0; 279 } 280 281 int 282 kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) 283 { 284 if (flags & KVM_IRQFD_FLAG_DEASSIGN) 285 return kvm_irqfd_deassign(kvm, fd, gsi); 286 287 return kvm_irqfd_assign(kvm, fd, gsi); 288 } 289 290 /* 291 * This function is called as the kvm VM fd is being released. Shutdown all 292 * irqfds that still remain open 293 */ 294 void 295 kvm_irqfd_release(struct kvm *kvm) 296 { 297 struct _irqfd *irqfd, *tmp; 298 299 spin_lock_irq(&kvm->irqfds.lock); 300 301 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) 302 irqfd_deactivate(irqfd); 303 304 spin_unlock_irq(&kvm->irqfds.lock); 305 306 /* 307 * Block until we know all outstanding shutdown jobs have completed 308 * since we do not take a kvm* reference. 309 */ 310 flush_workqueue(irqfd_cleanup_wq); 311 312 } 313 314 /* 315 * create a host-wide workqueue for issuing deferred shutdown requests 316 * aggregated from all vm* instances. We need our own isolated single-thread 317 * queue to prevent deadlock against flushing the normal work-queue. 318 */ 319 static int __init irqfd_module_init(void) 320 { 321 irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); 322 if (!irqfd_cleanup_wq) 323 return -ENOMEM; 324 325 return 0; 326 } 327 328 static void __exit irqfd_module_exit(void) 329 { 330 destroy_workqueue(irqfd_cleanup_wq); 331 } 332 333 module_init(irqfd_module_init); 334 module_exit(irqfd_module_exit); 335 336 /* 337 * -------------------------------------------------------------------- 338 * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. 339 * 340 * userspace can register a PIO/MMIO address with an eventfd for receiving 341 * notification when the memory has been touched. 342 * -------------------------------------------------------------------- 343 */ 344 345 struct _ioeventfd { 346 struct list_head list; 347 u64 addr; 348 int length; 349 struct eventfd_ctx *eventfd; 350 u64 datamatch; 351 struct kvm_io_device dev; 352 bool wildcard; 353 }; 354 355 static inline struct _ioeventfd * 356 to_ioeventfd(struct kvm_io_device *dev) 357 { 358 return container_of(dev, struct _ioeventfd, dev); 359 } 360 361 static void 362 ioeventfd_release(struct _ioeventfd *p) 363 { 364 eventfd_ctx_put(p->eventfd); 365 list_del(&p->list); 366 kfree(p); 367 } 368 369 static bool 370 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) 371 { 372 u64 _val; 373 374 if (!(addr == p->addr && len == p->length)) 375 /* address-range must be precise for a hit */ 376 return false; 377 378 if (p->wildcard) 379 /* all else equal, wildcard is always a hit */ 380 return true; 381 382 /* otherwise, we have to actually compare the data */ 383 384 BUG_ON(!IS_ALIGNED((unsigned long)val, len)); 385 386 switch (len) { 387 case 1: 388 _val = *(u8 *)val; 389 break; 390 case 2: 391 _val = *(u16 *)val; 392 break; 393 case 4: 394 _val = *(u32 *)val; 395 break; 396 case 8: 397 _val = *(u64 *)val; 398 break; 399 default: 400 return false; 401 } 402 403 return _val == p->datamatch ? true : false; 404 } 405 406 /* MMIO/PIO writes trigger an event if the addr/val match */ 407 static int 408 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, 409 const void *val) 410 { 411 struct _ioeventfd *p = to_ioeventfd(this); 412 413 if (!ioeventfd_in_range(p, addr, len, val)) 414 return -EOPNOTSUPP; 415 416 eventfd_signal(p->eventfd, 1); 417 return 0; 418 } 419 420 /* 421 * This function is called as KVM is completely shutting down. We do not 422 * need to worry about locking just nuke anything we have as quickly as possible 423 */ 424 static void 425 ioeventfd_destructor(struct kvm_io_device *this) 426 { 427 struct _ioeventfd *p = to_ioeventfd(this); 428 429 ioeventfd_release(p); 430 } 431 432 static const struct kvm_io_device_ops ioeventfd_ops = { 433 .write = ioeventfd_write, 434 .destructor = ioeventfd_destructor, 435 }; 436 437 /* assumes kvm->slots_lock held */ 438 static bool 439 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) 440 { 441 struct _ioeventfd *_p; 442 443 list_for_each_entry(_p, &kvm->ioeventfds, list) 444 if (_p->addr == p->addr && _p->length == p->length && 445 (_p->wildcard || p->wildcard || 446 _p->datamatch == p->datamatch)) 447 return true; 448 449 return false; 450 } 451 452 static int 453 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 454 { 455 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 456 struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; 457 struct _ioeventfd *p; 458 struct eventfd_ctx *eventfd; 459 int ret; 460 461 /* must be natural-word sized */ 462 switch (args->len) { 463 case 1: 464 case 2: 465 case 4: 466 case 8: 467 break; 468 default: 469 return -EINVAL; 470 } 471 472 /* check for range overflow */ 473 if (args->addr + args->len < args->addr) 474 return -EINVAL; 475 476 /* check for extra flags that we don't understand */ 477 if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) 478 return -EINVAL; 479 480 eventfd = eventfd_ctx_fdget(args->fd); 481 if (IS_ERR(eventfd)) 482 return PTR_ERR(eventfd); 483 484 p = kzalloc(sizeof(*p), GFP_KERNEL); 485 if (!p) { 486 ret = -ENOMEM; 487 goto fail; 488 } 489 490 INIT_LIST_HEAD(&p->list); 491 p->addr = args->addr; 492 p->length = args->len; 493 p->eventfd = eventfd; 494 495 /* The datamatch feature is optional, otherwise this is a wildcard */ 496 if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) 497 p->datamatch = args->datamatch; 498 else 499 p->wildcard = true; 500 501 down_write(&kvm->slots_lock); 502 503 /* Verify that there isnt a match already */ 504 if (ioeventfd_check_collision(kvm, p)) { 505 ret = -EEXIST; 506 goto unlock_fail; 507 } 508 509 kvm_iodevice_init(&p->dev, &ioeventfd_ops); 510 511 ret = __kvm_io_bus_register_dev(bus, &p->dev); 512 if (ret < 0) 513 goto unlock_fail; 514 515 list_add_tail(&p->list, &kvm->ioeventfds); 516 517 up_write(&kvm->slots_lock); 518 519 return 0; 520 521 unlock_fail: 522 up_write(&kvm->slots_lock); 523 524 fail: 525 kfree(p); 526 eventfd_ctx_put(eventfd); 527 528 return ret; 529 } 530 531 static int 532 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 533 { 534 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 535 struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; 536 struct _ioeventfd *p, *tmp; 537 struct eventfd_ctx *eventfd; 538 int ret = -ENOENT; 539 540 eventfd = eventfd_ctx_fdget(args->fd); 541 if (IS_ERR(eventfd)) 542 return PTR_ERR(eventfd); 543 544 down_write(&kvm->slots_lock); 545 546 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { 547 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); 548 549 if (p->eventfd != eventfd || 550 p->addr != args->addr || 551 p->length != args->len || 552 p->wildcard != wildcard) 553 continue; 554 555 if (!p->wildcard && p->datamatch != args->datamatch) 556 continue; 557 558 __kvm_io_bus_unregister_dev(bus, &p->dev); 559 ioeventfd_release(p); 560 ret = 0; 561 break; 562 } 563 564 up_write(&kvm->slots_lock); 565 566 eventfd_ctx_put(eventfd); 567 568 return ret; 569 } 570 571 int 572 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 573 { 574 if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) 575 return kvm_deassign_ioeventfd(kvm, args); 576 577 return kvm_assign_ioeventfd(kvm, args); 578 } 579