1 /* 2 * kvm eventfd support - use eventfd objects to signal various KVM events 3 * 4 * Copyright 2009 Novell. All Rights Reserved. 5 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 6 * 7 * Author: 8 * Gregory Haskins <ghaskins@novell.com> 9 * 10 * This file is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software Foundation, 21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 22 */ 23 24 #include <linux/kvm_host.h> 25 #include <linux/kvm.h> 26 #include <linux/workqueue.h> 27 #include <linux/syscalls.h> 28 #include <linux/wait.h> 29 #include <linux/poll.h> 30 #include <linux/file.h> 31 #include <linux/list.h> 32 #include <linux/eventfd.h> 33 #include <linux/kernel.h> 34 #include <linux/srcu.h> 35 #include <linux/slab.h> 36 37 #include "iodev.h" 38 39 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING 40 /* 41 * -------------------------------------------------------------------- 42 * irqfd: Allows an fd to be used to inject an interrupt to the guest 43 * 44 * Credit goes to Avi Kivity for the original idea. 45 * -------------------------------------------------------------------- 46 */ 47 48 /* 49 * Resampling irqfds are a special variety of irqfds used to emulate 50 * level triggered interrupts. The interrupt is asserted on eventfd 51 * trigger. On acknowledgement through the irq ack notifier, the 52 * interrupt is de-asserted and userspace is notified through the 53 * resamplefd. All resamplers on the same gsi are de-asserted 54 * together, so we don't need to track the state of each individual 55 * user. We can also therefore share the same irq source ID. 56 */ 57 struct _irqfd_resampler { 58 struct kvm *kvm; 59 /* 60 * List of resampling struct _irqfd objects sharing this gsi. 61 * RCU list modified under kvm->irqfds.resampler_lock 62 */ 63 struct list_head list; 64 struct kvm_irq_ack_notifier notifier; 65 /* 66 * Entry in list of kvm->irqfd.resampler_list. Use for sharing 67 * resamplers among irqfds on the same gsi. 68 * Accessed and modified under kvm->irqfds.resampler_lock 69 */ 70 struct list_head link; 71 }; 72 73 struct _irqfd { 74 /* Used for MSI fast-path */ 75 struct kvm *kvm; 76 wait_queue_t wait; 77 /* Update side is protected by irqfds.lock */ 78 struct kvm_kernel_irq_routing_entry __rcu *irq_entry; 79 /* Used for level IRQ fast-path */ 80 int gsi; 81 struct work_struct inject; 82 /* The resampler used by this irqfd (resampler-only) */ 83 struct _irqfd_resampler *resampler; 84 /* Eventfd notified on resample (resampler-only) */ 85 struct eventfd_ctx *resamplefd; 86 /* Entry in list of irqfds for a resampler (resampler-only) */ 87 struct list_head resampler_link; 88 /* Used for setup/shutdown */ 89 struct eventfd_ctx *eventfd; 90 struct list_head list; 91 poll_table pt; 92 struct work_struct shutdown; 93 }; 94 95 static struct workqueue_struct *irqfd_cleanup_wq; 96 97 static void 98 irqfd_inject(struct work_struct *work) 99 { 100 struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); 101 struct kvm *kvm = irqfd->kvm; 102 103 if (!irqfd->resampler) { 104 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, 105 false); 106 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, 107 false); 108 } else 109 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 110 irqfd->gsi, 1, false); 111 } 112 113 /* 114 * Since resampler irqfds share an IRQ source ID, we de-assert once 115 * then notify all of the resampler irqfds using this GSI. We can't 116 * do multiple de-asserts or we risk racing with incoming re-asserts. 117 */ 118 static void 119 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) 120 { 121 struct _irqfd_resampler *resampler; 122 struct kvm *kvm; 123 struct _irqfd *irqfd; 124 int idx; 125 126 resampler = container_of(kian, struct _irqfd_resampler, notifier); 127 kvm = resampler->kvm; 128 129 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 130 resampler->notifier.gsi, 0, false); 131 132 idx = srcu_read_lock(&kvm->irq_srcu); 133 134 list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) 135 eventfd_signal(irqfd->resamplefd, 1); 136 137 srcu_read_unlock(&kvm->irq_srcu, idx); 138 } 139 140 static void 141 irqfd_resampler_shutdown(struct _irqfd *irqfd) 142 { 143 struct _irqfd_resampler *resampler = irqfd->resampler; 144 struct kvm *kvm = resampler->kvm; 145 146 mutex_lock(&kvm->irqfds.resampler_lock); 147 148 list_del_rcu(&irqfd->resampler_link); 149 synchronize_srcu(&kvm->irq_srcu); 150 151 if (list_empty(&resampler->list)) { 152 list_del(&resampler->link); 153 kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); 154 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 155 resampler->notifier.gsi, 0, false); 156 kfree(resampler); 157 } 158 159 mutex_unlock(&kvm->irqfds.resampler_lock); 160 } 161 162 /* 163 * Race-free decouple logic (ordering is critical) 164 */ 165 static void 166 irqfd_shutdown(struct work_struct *work) 167 { 168 struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); 169 u64 cnt; 170 171 /* 172 * Synchronize with the wait-queue and unhook ourselves to prevent 173 * further events. 174 */ 175 eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); 176 177 /* 178 * We know no new events will be scheduled at this point, so block 179 * until all previously outstanding events have completed 180 */ 181 flush_work(&irqfd->inject); 182 183 if (irqfd->resampler) { 184 irqfd_resampler_shutdown(irqfd); 185 eventfd_ctx_put(irqfd->resamplefd); 186 } 187 188 /* 189 * It is now safe to release the object's resources 190 */ 191 eventfd_ctx_put(irqfd->eventfd); 192 kfree(irqfd); 193 } 194 195 196 /* assumes kvm->irqfds.lock is held */ 197 static bool 198 irqfd_is_active(struct _irqfd *irqfd) 199 { 200 return list_empty(&irqfd->list) ? false : true; 201 } 202 203 /* 204 * Mark the irqfd as inactive and schedule it for removal 205 * 206 * assumes kvm->irqfds.lock is held 207 */ 208 static void 209 irqfd_deactivate(struct _irqfd *irqfd) 210 { 211 BUG_ON(!irqfd_is_active(irqfd)); 212 213 list_del_init(&irqfd->list); 214 215 queue_work(irqfd_cleanup_wq, &irqfd->shutdown); 216 } 217 218 /* 219 * Called with wqh->lock held and interrupts disabled 220 */ 221 static int 222 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) 223 { 224 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); 225 unsigned long flags = (unsigned long)key; 226 struct kvm_kernel_irq_routing_entry *irq; 227 struct kvm *kvm = irqfd->kvm; 228 int idx; 229 230 if (flags & POLLIN) { 231 idx = srcu_read_lock(&kvm->irq_srcu); 232 irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu); 233 /* An event has been signaled, inject an interrupt */ 234 if (irq) 235 kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, 236 false); 237 else 238 schedule_work(&irqfd->inject); 239 srcu_read_unlock(&kvm->irq_srcu, idx); 240 } 241 242 if (flags & POLLHUP) { 243 /* The eventfd is closing, detach from KVM */ 244 unsigned long flags; 245 246 spin_lock_irqsave(&kvm->irqfds.lock, flags); 247 248 /* 249 * We must check if someone deactivated the irqfd before 250 * we could acquire the irqfds.lock since the item is 251 * deactivated from the KVM side before it is unhooked from 252 * the wait-queue. If it is already deactivated, we can 253 * simply return knowing the other side will cleanup for us. 254 * We cannot race against the irqfd going away since the 255 * other side is required to acquire wqh->lock, which we hold 256 */ 257 if (irqfd_is_active(irqfd)) 258 irqfd_deactivate(irqfd); 259 260 spin_unlock_irqrestore(&kvm->irqfds.lock, flags); 261 } 262 263 return 0; 264 } 265 266 static void 267 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, 268 poll_table *pt) 269 { 270 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); 271 add_wait_queue(wqh, &irqfd->wait); 272 } 273 274 /* Must be called under irqfds.lock */ 275 static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, 276 struct kvm_irq_routing_table *irq_rt) 277 { 278 struct kvm_kernel_irq_routing_entry *e; 279 280 if (irqfd->gsi >= irq_rt->nr_rt_entries) { 281 rcu_assign_pointer(irqfd->irq_entry, NULL); 282 return; 283 } 284 285 hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { 286 /* Only fast-path MSI. */ 287 if (e->type == KVM_IRQ_ROUTING_MSI) 288 rcu_assign_pointer(irqfd->irq_entry, e); 289 else 290 rcu_assign_pointer(irqfd->irq_entry, NULL); 291 } 292 } 293 294 static int 295 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) 296 { 297 struct kvm_irq_routing_table *irq_rt; 298 struct _irqfd *irqfd, *tmp; 299 struct fd f; 300 struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; 301 int ret; 302 unsigned int events; 303 304 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); 305 if (!irqfd) 306 return -ENOMEM; 307 308 irqfd->kvm = kvm; 309 irqfd->gsi = args->gsi; 310 INIT_LIST_HEAD(&irqfd->list); 311 INIT_WORK(&irqfd->inject, irqfd_inject); 312 INIT_WORK(&irqfd->shutdown, irqfd_shutdown); 313 314 f = fdget(args->fd); 315 if (!f.file) { 316 ret = -EBADF; 317 goto out; 318 } 319 320 eventfd = eventfd_ctx_fileget(f.file); 321 if (IS_ERR(eventfd)) { 322 ret = PTR_ERR(eventfd); 323 goto fail; 324 } 325 326 irqfd->eventfd = eventfd; 327 328 if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { 329 struct _irqfd_resampler *resampler; 330 331 resamplefd = eventfd_ctx_fdget(args->resamplefd); 332 if (IS_ERR(resamplefd)) { 333 ret = PTR_ERR(resamplefd); 334 goto fail; 335 } 336 337 irqfd->resamplefd = resamplefd; 338 INIT_LIST_HEAD(&irqfd->resampler_link); 339 340 mutex_lock(&kvm->irqfds.resampler_lock); 341 342 list_for_each_entry(resampler, 343 &kvm->irqfds.resampler_list, link) { 344 if (resampler->notifier.gsi == irqfd->gsi) { 345 irqfd->resampler = resampler; 346 break; 347 } 348 } 349 350 if (!irqfd->resampler) { 351 resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); 352 if (!resampler) { 353 ret = -ENOMEM; 354 mutex_unlock(&kvm->irqfds.resampler_lock); 355 goto fail; 356 } 357 358 resampler->kvm = kvm; 359 INIT_LIST_HEAD(&resampler->list); 360 resampler->notifier.gsi = irqfd->gsi; 361 resampler->notifier.irq_acked = irqfd_resampler_ack; 362 INIT_LIST_HEAD(&resampler->link); 363 364 list_add(&resampler->link, &kvm->irqfds.resampler_list); 365 kvm_register_irq_ack_notifier(kvm, 366 &resampler->notifier); 367 irqfd->resampler = resampler; 368 } 369 370 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); 371 synchronize_srcu(&kvm->irq_srcu); 372 373 mutex_unlock(&kvm->irqfds.resampler_lock); 374 } 375 376 /* 377 * Install our own custom wake-up handling so we are notified via 378 * a callback whenever someone signals the underlying eventfd 379 */ 380 init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); 381 init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); 382 383 spin_lock_irq(&kvm->irqfds.lock); 384 385 ret = 0; 386 list_for_each_entry(tmp, &kvm->irqfds.items, list) { 387 if (irqfd->eventfd != tmp->eventfd) 388 continue; 389 /* This fd is used for another irq already. */ 390 ret = -EBUSY; 391 spin_unlock_irq(&kvm->irqfds.lock); 392 goto fail; 393 } 394 395 irq_rt = rcu_dereference_protected(kvm->irq_routing, 396 lockdep_is_held(&kvm->irqfds.lock)); 397 irqfd_update(kvm, irqfd, irq_rt); 398 399 list_add_tail(&irqfd->list, &kvm->irqfds.items); 400 401 spin_unlock_irq(&kvm->irqfds.lock); 402 403 /* 404 * Check if there was an event already pending on the eventfd 405 * before we registered, and trigger it as if we didn't miss it. 406 */ 407 events = f.file->f_op->poll(f.file, &irqfd->pt); 408 409 if (events & POLLIN) 410 schedule_work(&irqfd->inject); 411 412 /* 413 * do not drop the file until the irqfd is fully initialized, otherwise 414 * we might race against the POLLHUP 415 */ 416 fdput(f); 417 418 return 0; 419 420 fail: 421 if (irqfd->resampler) 422 irqfd_resampler_shutdown(irqfd); 423 424 if (resamplefd && !IS_ERR(resamplefd)) 425 eventfd_ctx_put(resamplefd); 426 427 if (eventfd && !IS_ERR(eventfd)) 428 eventfd_ctx_put(eventfd); 429 430 fdput(f); 431 432 out: 433 kfree(irqfd); 434 return ret; 435 } 436 #endif 437 438 void 439 kvm_eventfd_init(struct kvm *kvm) 440 { 441 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING 442 spin_lock_init(&kvm->irqfds.lock); 443 INIT_LIST_HEAD(&kvm->irqfds.items); 444 INIT_LIST_HEAD(&kvm->irqfds.resampler_list); 445 mutex_init(&kvm->irqfds.resampler_lock); 446 #endif 447 INIT_LIST_HEAD(&kvm->ioeventfds); 448 } 449 450 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING 451 /* 452 * shutdown any irqfd's that match fd+gsi 453 */ 454 static int 455 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) 456 { 457 struct _irqfd *irqfd, *tmp; 458 struct eventfd_ctx *eventfd; 459 460 eventfd = eventfd_ctx_fdget(args->fd); 461 if (IS_ERR(eventfd)) 462 return PTR_ERR(eventfd); 463 464 spin_lock_irq(&kvm->irqfds.lock); 465 466 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { 467 if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { 468 /* 469 * This rcu_assign_pointer is needed for when 470 * another thread calls kvm_irq_routing_update before 471 * we flush workqueue below (we synchronize with 472 * kvm_irq_routing_update using irqfds.lock). 473 * It is paired with synchronize_srcu done by caller 474 * of that function. 475 */ 476 rcu_assign_pointer(irqfd->irq_entry, NULL); 477 irqfd_deactivate(irqfd); 478 } 479 } 480 481 spin_unlock_irq(&kvm->irqfds.lock); 482 eventfd_ctx_put(eventfd); 483 484 /* 485 * Block until we know all outstanding shutdown jobs have completed 486 * so that we guarantee there will not be any more interrupts on this 487 * gsi once this deassign function returns. 488 */ 489 flush_workqueue(irqfd_cleanup_wq); 490 491 return 0; 492 } 493 494 int 495 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) 496 { 497 if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) 498 return -EINVAL; 499 500 if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) 501 return kvm_irqfd_deassign(kvm, args); 502 503 return kvm_irqfd_assign(kvm, args); 504 } 505 506 /* 507 * This function is called as the kvm VM fd is being released. Shutdown all 508 * irqfds that still remain open 509 */ 510 void 511 kvm_irqfd_release(struct kvm *kvm) 512 { 513 struct _irqfd *irqfd, *tmp; 514 515 spin_lock_irq(&kvm->irqfds.lock); 516 517 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) 518 irqfd_deactivate(irqfd); 519 520 spin_unlock_irq(&kvm->irqfds.lock); 521 522 /* 523 * Block until we know all outstanding shutdown jobs have completed 524 * since we do not take a kvm* reference. 525 */ 526 flush_workqueue(irqfd_cleanup_wq); 527 528 } 529 530 /* 531 * Change irq_routing and irqfd. 532 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. 533 */ 534 void kvm_irq_routing_update(struct kvm *kvm, 535 struct kvm_irq_routing_table *irq_rt) 536 { 537 struct _irqfd *irqfd; 538 539 spin_lock_irq(&kvm->irqfds.lock); 540 541 rcu_assign_pointer(kvm->irq_routing, irq_rt); 542 543 list_for_each_entry(irqfd, &kvm->irqfds.items, list) 544 irqfd_update(kvm, irqfd, irq_rt); 545 546 spin_unlock_irq(&kvm->irqfds.lock); 547 } 548 549 /* 550 * create a host-wide workqueue for issuing deferred shutdown requests 551 * aggregated from all vm* instances. We need our own isolated single-thread 552 * queue to prevent deadlock against flushing the normal work-queue. 553 */ 554 int kvm_irqfd_init(void) 555 { 556 irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); 557 if (!irqfd_cleanup_wq) 558 return -ENOMEM; 559 560 return 0; 561 } 562 563 void kvm_irqfd_exit(void) 564 { 565 destroy_workqueue(irqfd_cleanup_wq); 566 } 567 #endif 568 569 /* 570 * -------------------------------------------------------------------- 571 * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. 572 * 573 * userspace can register a PIO/MMIO address with an eventfd for receiving 574 * notification when the memory has been touched. 575 * -------------------------------------------------------------------- 576 */ 577 578 struct _ioeventfd { 579 struct list_head list; 580 u64 addr; 581 int length; 582 struct eventfd_ctx *eventfd; 583 u64 datamatch; 584 struct kvm_io_device dev; 585 u8 bus_idx; 586 bool wildcard; 587 }; 588 589 static inline struct _ioeventfd * 590 to_ioeventfd(struct kvm_io_device *dev) 591 { 592 return container_of(dev, struct _ioeventfd, dev); 593 } 594 595 static void 596 ioeventfd_release(struct _ioeventfd *p) 597 { 598 eventfd_ctx_put(p->eventfd); 599 list_del(&p->list); 600 kfree(p); 601 } 602 603 static bool 604 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) 605 { 606 u64 _val; 607 608 if (addr != p->addr) 609 /* address must be precise for a hit */ 610 return false; 611 612 if (!p->length) 613 /* length = 0 means only look at the address, so always a hit */ 614 return true; 615 616 if (len != p->length) 617 /* address-range must be precise for a hit */ 618 return false; 619 620 if (p->wildcard) 621 /* all else equal, wildcard is always a hit */ 622 return true; 623 624 /* otherwise, we have to actually compare the data */ 625 626 BUG_ON(!IS_ALIGNED((unsigned long)val, len)); 627 628 switch (len) { 629 case 1: 630 _val = *(u8 *)val; 631 break; 632 case 2: 633 _val = *(u16 *)val; 634 break; 635 case 4: 636 _val = *(u32 *)val; 637 break; 638 case 8: 639 _val = *(u64 *)val; 640 break; 641 default: 642 return false; 643 } 644 645 return _val == p->datamatch ? true : false; 646 } 647 648 /* MMIO/PIO writes trigger an event if the addr/val match */ 649 static int 650 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, 651 const void *val) 652 { 653 struct _ioeventfd *p = to_ioeventfd(this); 654 655 if (!ioeventfd_in_range(p, addr, len, val)) 656 return -EOPNOTSUPP; 657 658 eventfd_signal(p->eventfd, 1); 659 return 0; 660 } 661 662 /* 663 * This function is called as KVM is completely shutting down. We do not 664 * need to worry about locking just nuke anything we have as quickly as possible 665 */ 666 static void 667 ioeventfd_destructor(struct kvm_io_device *this) 668 { 669 struct _ioeventfd *p = to_ioeventfd(this); 670 671 ioeventfd_release(p); 672 } 673 674 static const struct kvm_io_device_ops ioeventfd_ops = { 675 .write = ioeventfd_write, 676 .destructor = ioeventfd_destructor, 677 }; 678 679 /* assumes kvm->slots_lock held */ 680 static bool 681 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) 682 { 683 struct _ioeventfd *_p; 684 685 list_for_each_entry(_p, &kvm->ioeventfds, list) 686 if (_p->bus_idx == p->bus_idx && 687 _p->addr == p->addr && 688 (!_p->length || !p->length || 689 (_p->length == p->length && 690 (_p->wildcard || p->wildcard || 691 _p->datamatch == p->datamatch)))) 692 return true; 693 694 return false; 695 } 696 697 static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) 698 { 699 if (flags & KVM_IOEVENTFD_FLAG_PIO) 700 return KVM_PIO_BUS; 701 if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) 702 return KVM_VIRTIO_CCW_NOTIFY_BUS; 703 return KVM_MMIO_BUS; 704 } 705 706 static int 707 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 708 { 709 enum kvm_bus bus_idx; 710 struct _ioeventfd *p; 711 struct eventfd_ctx *eventfd; 712 int ret; 713 714 bus_idx = ioeventfd_bus_from_flags(args->flags); 715 /* must be natural-word sized, or 0 to ignore length */ 716 switch (args->len) { 717 case 0: 718 case 1: 719 case 2: 720 case 4: 721 case 8: 722 break; 723 default: 724 return -EINVAL; 725 } 726 727 /* check for range overflow */ 728 if (args->addr + args->len < args->addr) 729 return -EINVAL; 730 731 /* check for extra flags that we don't understand */ 732 if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) 733 return -EINVAL; 734 735 /* ioeventfd with no length can't be combined with DATAMATCH */ 736 if (!args->len && 737 args->flags & (KVM_IOEVENTFD_FLAG_PIO | 738 KVM_IOEVENTFD_FLAG_DATAMATCH)) 739 return -EINVAL; 740 741 eventfd = eventfd_ctx_fdget(args->fd); 742 if (IS_ERR(eventfd)) 743 return PTR_ERR(eventfd); 744 745 p = kzalloc(sizeof(*p), GFP_KERNEL); 746 if (!p) { 747 ret = -ENOMEM; 748 goto fail; 749 } 750 751 INIT_LIST_HEAD(&p->list); 752 p->addr = args->addr; 753 p->bus_idx = bus_idx; 754 p->length = args->len; 755 p->eventfd = eventfd; 756 757 /* The datamatch feature is optional, otherwise this is a wildcard */ 758 if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) 759 p->datamatch = args->datamatch; 760 else 761 p->wildcard = true; 762 763 mutex_lock(&kvm->slots_lock); 764 765 /* Verify that there isn't a match already */ 766 if (ioeventfd_check_collision(kvm, p)) { 767 ret = -EEXIST; 768 goto unlock_fail; 769 } 770 771 kvm_iodevice_init(&p->dev, &ioeventfd_ops); 772 773 ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, 774 &p->dev); 775 if (ret < 0) 776 goto unlock_fail; 777 778 /* When length is ignored, MMIO is also put on a separate bus, for 779 * faster lookups. 780 */ 781 if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { 782 ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, 783 p->addr, 0, &p->dev); 784 if (ret < 0) 785 goto register_fail; 786 } 787 788 kvm->buses[bus_idx]->ioeventfd_count++; 789 list_add_tail(&p->list, &kvm->ioeventfds); 790 791 mutex_unlock(&kvm->slots_lock); 792 793 return 0; 794 795 register_fail: 796 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 797 unlock_fail: 798 mutex_unlock(&kvm->slots_lock); 799 800 fail: 801 kfree(p); 802 eventfd_ctx_put(eventfd); 803 804 return ret; 805 } 806 807 static int 808 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 809 { 810 enum kvm_bus bus_idx; 811 struct _ioeventfd *p, *tmp; 812 struct eventfd_ctx *eventfd; 813 int ret = -ENOENT; 814 815 bus_idx = ioeventfd_bus_from_flags(args->flags); 816 eventfd = eventfd_ctx_fdget(args->fd); 817 if (IS_ERR(eventfd)) 818 return PTR_ERR(eventfd); 819 820 mutex_lock(&kvm->slots_lock); 821 822 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { 823 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); 824 825 if (p->bus_idx != bus_idx || 826 p->eventfd != eventfd || 827 p->addr != args->addr || 828 p->length != args->len || 829 p->wildcard != wildcard) 830 continue; 831 832 if (!p->wildcard && p->datamatch != args->datamatch) 833 continue; 834 835 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 836 if (!p->length) { 837 kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, 838 &p->dev); 839 } 840 kvm->buses[bus_idx]->ioeventfd_count--; 841 ioeventfd_release(p); 842 ret = 0; 843 break; 844 } 845 846 mutex_unlock(&kvm->slots_lock); 847 848 eventfd_ctx_put(eventfd); 849 850 return ret; 851 } 852 853 int 854 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 855 { 856 if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) 857 return kvm_deassign_ioeventfd(kvm, args); 858 859 return kvm_assign_ioeventfd(kvm, args); 860 } 861