1 /* 2 * kvm eventfd support - use eventfd objects to signal various KVM events 3 * 4 * Copyright 2009 Novell. All Rights Reserved. 5 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 6 * 7 * Author: 8 * Gregory Haskins <ghaskins@novell.com> 9 * 10 * This file is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software Foundation, 21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 22 */ 23 24 #include <linux/kvm_host.h> 25 #include <linux/kvm.h> 26 #include <linux/workqueue.h> 27 #include <linux/syscalls.h> 28 #include <linux/wait.h> 29 #include <linux/poll.h> 30 #include <linux/file.h> 31 #include <linux/list.h> 32 #include <linux/eventfd.h> 33 #include <linux/kernel.h> 34 #include <linux/srcu.h> 35 #include <linux/slab.h> 36 #include <linux/seqlock.h> 37 #include <trace/events/kvm.h> 38 39 #include "irq.h" 40 #include "iodev.h" 41 42 #ifdef CONFIG_HAVE_KVM_IRQFD 43 /* 44 * -------------------------------------------------------------------- 45 * irqfd: Allows an fd to be used to inject an interrupt to the guest 46 * 47 * Credit goes to Avi Kivity for the original idea. 48 * -------------------------------------------------------------------- 49 */ 50 51 /* 52 * Resampling irqfds are a special variety of irqfds used to emulate 53 * level triggered interrupts. The interrupt is asserted on eventfd 54 * trigger. On acknowledgement through the irq ack notifier, the 55 * interrupt is de-asserted and userspace is notified through the 56 * resamplefd. All resamplers on the same gsi are de-asserted 57 * together, so we don't need to track the state of each individual 58 * user. We can also therefore share the same irq source ID. 59 */ 60 struct _irqfd_resampler { 61 struct kvm *kvm; 62 /* 63 * List of resampling struct _irqfd objects sharing this gsi. 64 * RCU list modified under kvm->irqfds.resampler_lock 65 */ 66 struct list_head list; 67 struct kvm_irq_ack_notifier notifier; 68 /* 69 * Entry in list of kvm->irqfd.resampler_list. Use for sharing 70 * resamplers among irqfds on the same gsi. 71 * Accessed and modified under kvm->irqfds.resampler_lock 72 */ 73 struct list_head link; 74 }; 75 76 struct _irqfd { 77 /* Used for MSI fast-path */ 78 struct kvm *kvm; 79 wait_queue_t wait; 80 /* Update side is protected by irqfds.lock */ 81 struct kvm_kernel_irq_routing_entry irq_entry; 82 seqcount_t irq_entry_sc; 83 /* Used for level IRQ fast-path */ 84 int gsi; 85 struct work_struct inject; 86 /* The resampler used by this irqfd (resampler-only) */ 87 struct _irqfd_resampler *resampler; 88 /* Eventfd notified on resample (resampler-only) */ 89 struct eventfd_ctx *resamplefd; 90 /* Entry in list of irqfds for a resampler (resampler-only) */ 91 struct list_head resampler_link; 92 /* Used for setup/shutdown */ 93 struct eventfd_ctx *eventfd; 94 struct list_head list; 95 poll_table pt; 96 struct work_struct shutdown; 97 }; 98 99 static struct workqueue_struct *irqfd_cleanup_wq; 100 101 static void 102 irqfd_inject(struct work_struct *work) 103 { 104 struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); 105 struct kvm *kvm = irqfd->kvm; 106 107 if (!irqfd->resampler) { 108 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, 109 false); 110 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, 111 false); 112 } else 113 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 114 irqfd->gsi, 1, false); 115 } 116 117 /* 118 * Since resampler irqfds share an IRQ source ID, we de-assert once 119 * then notify all of the resampler irqfds using this GSI. We can't 120 * do multiple de-asserts or we risk racing with incoming re-asserts. 121 */ 122 static void 123 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) 124 { 125 struct _irqfd_resampler *resampler; 126 struct kvm *kvm; 127 struct _irqfd *irqfd; 128 int idx; 129 130 resampler = container_of(kian, struct _irqfd_resampler, notifier); 131 kvm = resampler->kvm; 132 133 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 134 resampler->notifier.gsi, 0, false); 135 136 idx = srcu_read_lock(&kvm->irq_srcu); 137 138 list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) 139 eventfd_signal(irqfd->resamplefd, 1); 140 141 srcu_read_unlock(&kvm->irq_srcu, idx); 142 } 143 144 static void 145 irqfd_resampler_shutdown(struct _irqfd *irqfd) 146 { 147 struct _irqfd_resampler *resampler = irqfd->resampler; 148 struct kvm *kvm = resampler->kvm; 149 150 mutex_lock(&kvm->irqfds.resampler_lock); 151 152 list_del_rcu(&irqfd->resampler_link); 153 synchronize_srcu(&kvm->irq_srcu); 154 155 if (list_empty(&resampler->list)) { 156 list_del(&resampler->link); 157 kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); 158 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 159 resampler->notifier.gsi, 0, false); 160 kfree(resampler); 161 } 162 163 mutex_unlock(&kvm->irqfds.resampler_lock); 164 } 165 166 /* 167 * Race-free decouple logic (ordering is critical) 168 */ 169 static void 170 irqfd_shutdown(struct work_struct *work) 171 { 172 struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); 173 u64 cnt; 174 175 /* 176 * Synchronize with the wait-queue and unhook ourselves to prevent 177 * further events. 178 */ 179 eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); 180 181 /* 182 * We know no new events will be scheduled at this point, so block 183 * until all previously outstanding events have completed 184 */ 185 flush_work(&irqfd->inject); 186 187 if (irqfd->resampler) { 188 irqfd_resampler_shutdown(irqfd); 189 eventfd_ctx_put(irqfd->resamplefd); 190 } 191 192 /* 193 * It is now safe to release the object's resources 194 */ 195 eventfd_ctx_put(irqfd->eventfd); 196 kfree(irqfd); 197 } 198 199 200 /* assumes kvm->irqfds.lock is held */ 201 static bool 202 irqfd_is_active(struct _irqfd *irqfd) 203 { 204 return list_empty(&irqfd->list) ? false : true; 205 } 206 207 /* 208 * Mark the irqfd as inactive and schedule it for removal 209 * 210 * assumes kvm->irqfds.lock is held 211 */ 212 static void 213 irqfd_deactivate(struct _irqfd *irqfd) 214 { 215 BUG_ON(!irqfd_is_active(irqfd)); 216 217 list_del_init(&irqfd->list); 218 219 queue_work(irqfd_cleanup_wq, &irqfd->shutdown); 220 } 221 222 /* 223 * Called with wqh->lock held and interrupts disabled 224 */ 225 static int 226 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) 227 { 228 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); 229 unsigned long flags = (unsigned long)key; 230 struct kvm_kernel_irq_routing_entry irq; 231 struct kvm *kvm = irqfd->kvm; 232 unsigned seq; 233 int idx; 234 235 if (flags & POLLIN) { 236 idx = srcu_read_lock(&kvm->irq_srcu); 237 do { 238 seq = read_seqcount_begin(&irqfd->irq_entry_sc); 239 irq = irqfd->irq_entry; 240 } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); 241 /* An event has been signaled, inject an interrupt */ 242 if (irq.type == KVM_IRQ_ROUTING_MSI) 243 kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, 244 false); 245 else 246 schedule_work(&irqfd->inject); 247 srcu_read_unlock(&kvm->irq_srcu, idx); 248 } 249 250 if (flags & POLLHUP) { 251 /* The eventfd is closing, detach from KVM */ 252 unsigned long flags; 253 254 spin_lock_irqsave(&kvm->irqfds.lock, flags); 255 256 /* 257 * We must check if someone deactivated the irqfd before 258 * we could acquire the irqfds.lock since the item is 259 * deactivated from the KVM side before it is unhooked from 260 * the wait-queue. If it is already deactivated, we can 261 * simply return knowing the other side will cleanup for us. 262 * We cannot race against the irqfd going away since the 263 * other side is required to acquire wqh->lock, which we hold 264 */ 265 if (irqfd_is_active(irqfd)) 266 irqfd_deactivate(irqfd); 267 268 spin_unlock_irqrestore(&kvm->irqfds.lock, flags); 269 } 270 271 return 0; 272 } 273 274 static void 275 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, 276 poll_table *pt) 277 { 278 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); 279 add_wait_queue(wqh, &irqfd->wait); 280 } 281 282 /* Must be called under irqfds.lock */ 283 static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) 284 { 285 struct kvm_kernel_irq_routing_entry *e; 286 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; 287 int i, n_entries; 288 289 n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); 290 291 write_seqcount_begin(&irqfd->irq_entry_sc); 292 293 irqfd->irq_entry.type = 0; 294 295 e = entries; 296 for (i = 0; i < n_entries; ++i, ++e) { 297 /* Only fast-path MSI. */ 298 if (e->type == KVM_IRQ_ROUTING_MSI) 299 irqfd->irq_entry = *e; 300 } 301 302 write_seqcount_end(&irqfd->irq_entry_sc); 303 } 304 305 static int 306 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) 307 { 308 struct _irqfd *irqfd, *tmp; 309 struct fd f; 310 struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; 311 int ret; 312 unsigned int events; 313 int idx; 314 315 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); 316 if (!irqfd) 317 return -ENOMEM; 318 319 irqfd->kvm = kvm; 320 irqfd->gsi = args->gsi; 321 INIT_LIST_HEAD(&irqfd->list); 322 INIT_WORK(&irqfd->inject, irqfd_inject); 323 INIT_WORK(&irqfd->shutdown, irqfd_shutdown); 324 seqcount_init(&irqfd->irq_entry_sc); 325 326 f = fdget(args->fd); 327 if (!f.file) { 328 ret = -EBADF; 329 goto out; 330 } 331 332 eventfd = eventfd_ctx_fileget(f.file); 333 if (IS_ERR(eventfd)) { 334 ret = PTR_ERR(eventfd); 335 goto fail; 336 } 337 338 irqfd->eventfd = eventfd; 339 340 if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { 341 struct _irqfd_resampler *resampler; 342 343 resamplefd = eventfd_ctx_fdget(args->resamplefd); 344 if (IS_ERR(resamplefd)) { 345 ret = PTR_ERR(resamplefd); 346 goto fail; 347 } 348 349 irqfd->resamplefd = resamplefd; 350 INIT_LIST_HEAD(&irqfd->resampler_link); 351 352 mutex_lock(&kvm->irqfds.resampler_lock); 353 354 list_for_each_entry(resampler, 355 &kvm->irqfds.resampler_list, link) { 356 if (resampler->notifier.gsi == irqfd->gsi) { 357 irqfd->resampler = resampler; 358 break; 359 } 360 } 361 362 if (!irqfd->resampler) { 363 resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); 364 if (!resampler) { 365 ret = -ENOMEM; 366 mutex_unlock(&kvm->irqfds.resampler_lock); 367 goto fail; 368 } 369 370 resampler->kvm = kvm; 371 INIT_LIST_HEAD(&resampler->list); 372 resampler->notifier.gsi = irqfd->gsi; 373 resampler->notifier.irq_acked = irqfd_resampler_ack; 374 INIT_LIST_HEAD(&resampler->link); 375 376 list_add(&resampler->link, &kvm->irqfds.resampler_list); 377 kvm_register_irq_ack_notifier(kvm, 378 &resampler->notifier); 379 irqfd->resampler = resampler; 380 } 381 382 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); 383 synchronize_srcu(&kvm->irq_srcu); 384 385 mutex_unlock(&kvm->irqfds.resampler_lock); 386 } 387 388 /* 389 * Install our own custom wake-up handling so we are notified via 390 * a callback whenever someone signals the underlying eventfd 391 */ 392 init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); 393 init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); 394 395 spin_lock_irq(&kvm->irqfds.lock); 396 397 ret = 0; 398 list_for_each_entry(tmp, &kvm->irqfds.items, list) { 399 if (irqfd->eventfd != tmp->eventfd) 400 continue; 401 /* This fd is used for another irq already. */ 402 ret = -EBUSY; 403 spin_unlock_irq(&kvm->irqfds.lock); 404 goto fail; 405 } 406 407 idx = srcu_read_lock(&kvm->irq_srcu); 408 irqfd_update(kvm, irqfd); 409 srcu_read_unlock(&kvm->irq_srcu, idx); 410 411 list_add_tail(&irqfd->list, &kvm->irqfds.items); 412 413 spin_unlock_irq(&kvm->irqfds.lock); 414 415 /* 416 * Check if there was an event already pending on the eventfd 417 * before we registered, and trigger it as if we didn't miss it. 418 */ 419 events = f.file->f_op->poll(f.file, &irqfd->pt); 420 421 if (events & POLLIN) 422 schedule_work(&irqfd->inject); 423 424 /* 425 * do not drop the file until the irqfd is fully initialized, otherwise 426 * we might race against the POLLHUP 427 */ 428 fdput(f); 429 430 return 0; 431 432 fail: 433 if (irqfd->resampler) 434 irqfd_resampler_shutdown(irqfd); 435 436 if (resamplefd && !IS_ERR(resamplefd)) 437 eventfd_ctx_put(resamplefd); 438 439 if (eventfd && !IS_ERR(eventfd)) 440 eventfd_ctx_put(eventfd); 441 442 fdput(f); 443 444 out: 445 kfree(irqfd); 446 return ret; 447 } 448 449 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) 450 { 451 struct kvm_irq_ack_notifier *kian; 452 int gsi, idx; 453 454 idx = srcu_read_lock(&kvm->irq_srcu); 455 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); 456 if (gsi != -1) 457 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 458 link) 459 if (kian->gsi == gsi) { 460 srcu_read_unlock(&kvm->irq_srcu, idx); 461 return true; 462 } 463 464 srcu_read_unlock(&kvm->irq_srcu, idx); 465 466 return false; 467 } 468 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); 469 470 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) 471 { 472 struct kvm_irq_ack_notifier *kian; 473 int gsi, idx; 474 475 trace_kvm_ack_irq(irqchip, pin); 476 477 idx = srcu_read_lock(&kvm->irq_srcu); 478 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); 479 if (gsi != -1) 480 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 481 link) 482 if (kian->gsi == gsi) 483 kian->irq_acked(kian); 484 srcu_read_unlock(&kvm->irq_srcu, idx); 485 } 486 487 void kvm_register_irq_ack_notifier(struct kvm *kvm, 488 struct kvm_irq_ack_notifier *kian) 489 { 490 mutex_lock(&kvm->irq_lock); 491 hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); 492 mutex_unlock(&kvm->irq_lock); 493 #ifdef __KVM_HAVE_IOAPIC 494 kvm_vcpu_request_scan_ioapic(kvm); 495 #endif 496 } 497 498 void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 499 struct kvm_irq_ack_notifier *kian) 500 { 501 mutex_lock(&kvm->irq_lock); 502 hlist_del_init_rcu(&kian->link); 503 mutex_unlock(&kvm->irq_lock); 504 synchronize_srcu(&kvm->irq_srcu); 505 #ifdef __KVM_HAVE_IOAPIC 506 kvm_vcpu_request_scan_ioapic(kvm); 507 #endif 508 } 509 #endif 510 511 void 512 kvm_eventfd_init(struct kvm *kvm) 513 { 514 #ifdef CONFIG_HAVE_KVM_IRQFD 515 spin_lock_init(&kvm->irqfds.lock); 516 INIT_LIST_HEAD(&kvm->irqfds.items); 517 INIT_LIST_HEAD(&kvm->irqfds.resampler_list); 518 mutex_init(&kvm->irqfds.resampler_lock); 519 #endif 520 INIT_LIST_HEAD(&kvm->ioeventfds); 521 } 522 523 #ifdef CONFIG_HAVE_KVM_IRQFD 524 /* 525 * shutdown any irqfd's that match fd+gsi 526 */ 527 static int 528 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) 529 { 530 struct _irqfd *irqfd, *tmp; 531 struct eventfd_ctx *eventfd; 532 533 eventfd = eventfd_ctx_fdget(args->fd); 534 if (IS_ERR(eventfd)) 535 return PTR_ERR(eventfd); 536 537 spin_lock_irq(&kvm->irqfds.lock); 538 539 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { 540 if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { 541 /* 542 * This clearing of irq_entry.type is needed for when 543 * another thread calls kvm_irq_routing_update before 544 * we flush workqueue below (we synchronize with 545 * kvm_irq_routing_update using irqfds.lock). 546 */ 547 write_seqcount_begin(&irqfd->irq_entry_sc); 548 irqfd->irq_entry.type = 0; 549 write_seqcount_end(&irqfd->irq_entry_sc); 550 irqfd_deactivate(irqfd); 551 } 552 } 553 554 spin_unlock_irq(&kvm->irqfds.lock); 555 eventfd_ctx_put(eventfd); 556 557 /* 558 * Block until we know all outstanding shutdown jobs have completed 559 * so that we guarantee there will not be any more interrupts on this 560 * gsi once this deassign function returns. 561 */ 562 flush_workqueue(irqfd_cleanup_wq); 563 564 return 0; 565 } 566 567 int 568 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) 569 { 570 if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) 571 return -EINVAL; 572 573 if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) 574 return kvm_irqfd_deassign(kvm, args); 575 576 return kvm_irqfd_assign(kvm, args); 577 } 578 579 /* 580 * This function is called as the kvm VM fd is being released. Shutdown all 581 * irqfds that still remain open 582 */ 583 void 584 kvm_irqfd_release(struct kvm *kvm) 585 { 586 struct _irqfd *irqfd, *tmp; 587 588 spin_lock_irq(&kvm->irqfds.lock); 589 590 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) 591 irqfd_deactivate(irqfd); 592 593 spin_unlock_irq(&kvm->irqfds.lock); 594 595 /* 596 * Block until we know all outstanding shutdown jobs have completed 597 * since we do not take a kvm* reference. 598 */ 599 flush_workqueue(irqfd_cleanup_wq); 600 601 } 602 603 /* 604 * Take note of a change in irq routing. 605 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. 606 */ 607 void kvm_irq_routing_update(struct kvm *kvm) 608 { 609 struct _irqfd *irqfd; 610 611 spin_lock_irq(&kvm->irqfds.lock); 612 613 list_for_each_entry(irqfd, &kvm->irqfds.items, list) 614 irqfd_update(kvm, irqfd); 615 616 spin_unlock_irq(&kvm->irqfds.lock); 617 } 618 619 /* 620 * create a host-wide workqueue for issuing deferred shutdown requests 621 * aggregated from all vm* instances. We need our own isolated single-thread 622 * queue to prevent deadlock against flushing the normal work-queue. 623 */ 624 int kvm_irqfd_init(void) 625 { 626 irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); 627 if (!irqfd_cleanup_wq) 628 return -ENOMEM; 629 630 return 0; 631 } 632 633 void kvm_irqfd_exit(void) 634 { 635 destroy_workqueue(irqfd_cleanup_wq); 636 } 637 #endif 638 639 /* 640 * -------------------------------------------------------------------- 641 * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. 642 * 643 * userspace can register a PIO/MMIO address with an eventfd for receiving 644 * notification when the memory has been touched. 645 * -------------------------------------------------------------------- 646 */ 647 648 struct _ioeventfd { 649 struct list_head list; 650 u64 addr; 651 int length; 652 struct eventfd_ctx *eventfd; 653 u64 datamatch; 654 struct kvm_io_device dev; 655 u8 bus_idx; 656 bool wildcard; 657 }; 658 659 static inline struct _ioeventfd * 660 to_ioeventfd(struct kvm_io_device *dev) 661 { 662 return container_of(dev, struct _ioeventfd, dev); 663 } 664 665 static void 666 ioeventfd_release(struct _ioeventfd *p) 667 { 668 eventfd_ctx_put(p->eventfd); 669 list_del(&p->list); 670 kfree(p); 671 } 672 673 static bool 674 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) 675 { 676 u64 _val; 677 678 if (addr != p->addr) 679 /* address must be precise for a hit */ 680 return false; 681 682 if (!p->length) 683 /* length = 0 means only look at the address, so always a hit */ 684 return true; 685 686 if (len != p->length) 687 /* address-range must be precise for a hit */ 688 return false; 689 690 if (p->wildcard) 691 /* all else equal, wildcard is always a hit */ 692 return true; 693 694 /* otherwise, we have to actually compare the data */ 695 696 BUG_ON(!IS_ALIGNED((unsigned long)val, len)); 697 698 switch (len) { 699 case 1: 700 _val = *(u8 *)val; 701 break; 702 case 2: 703 _val = *(u16 *)val; 704 break; 705 case 4: 706 _val = *(u32 *)val; 707 break; 708 case 8: 709 _val = *(u64 *)val; 710 break; 711 default: 712 return false; 713 } 714 715 return _val == p->datamatch ? true : false; 716 } 717 718 /* MMIO/PIO writes trigger an event if the addr/val match */ 719 static int 720 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, 721 const void *val) 722 { 723 struct _ioeventfd *p = to_ioeventfd(this); 724 725 if (!ioeventfd_in_range(p, addr, len, val)) 726 return -EOPNOTSUPP; 727 728 eventfd_signal(p->eventfd, 1); 729 return 0; 730 } 731 732 /* 733 * This function is called as KVM is completely shutting down. We do not 734 * need to worry about locking just nuke anything we have as quickly as possible 735 */ 736 static void 737 ioeventfd_destructor(struct kvm_io_device *this) 738 { 739 struct _ioeventfd *p = to_ioeventfd(this); 740 741 ioeventfd_release(p); 742 } 743 744 static const struct kvm_io_device_ops ioeventfd_ops = { 745 .write = ioeventfd_write, 746 .destructor = ioeventfd_destructor, 747 }; 748 749 /* assumes kvm->slots_lock held */ 750 static bool 751 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) 752 { 753 struct _ioeventfd *_p; 754 755 list_for_each_entry(_p, &kvm->ioeventfds, list) 756 if (_p->bus_idx == p->bus_idx && 757 _p->addr == p->addr && 758 (!_p->length || !p->length || 759 (_p->length == p->length && 760 (_p->wildcard || p->wildcard || 761 _p->datamatch == p->datamatch)))) 762 return true; 763 764 return false; 765 } 766 767 static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) 768 { 769 if (flags & KVM_IOEVENTFD_FLAG_PIO) 770 return KVM_PIO_BUS; 771 if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) 772 return KVM_VIRTIO_CCW_NOTIFY_BUS; 773 return KVM_MMIO_BUS; 774 } 775 776 static int 777 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 778 { 779 enum kvm_bus bus_idx; 780 struct _ioeventfd *p; 781 struct eventfd_ctx *eventfd; 782 int ret; 783 784 bus_idx = ioeventfd_bus_from_flags(args->flags); 785 /* must be natural-word sized, or 0 to ignore length */ 786 switch (args->len) { 787 case 0: 788 case 1: 789 case 2: 790 case 4: 791 case 8: 792 break; 793 default: 794 return -EINVAL; 795 } 796 797 /* check for range overflow */ 798 if (args->addr + args->len < args->addr) 799 return -EINVAL; 800 801 /* check for extra flags that we don't understand */ 802 if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) 803 return -EINVAL; 804 805 /* ioeventfd with no length can't be combined with DATAMATCH */ 806 if (!args->len && 807 args->flags & (KVM_IOEVENTFD_FLAG_PIO | 808 KVM_IOEVENTFD_FLAG_DATAMATCH)) 809 return -EINVAL; 810 811 eventfd = eventfd_ctx_fdget(args->fd); 812 if (IS_ERR(eventfd)) 813 return PTR_ERR(eventfd); 814 815 p = kzalloc(sizeof(*p), GFP_KERNEL); 816 if (!p) { 817 ret = -ENOMEM; 818 goto fail; 819 } 820 821 INIT_LIST_HEAD(&p->list); 822 p->addr = args->addr; 823 p->bus_idx = bus_idx; 824 p->length = args->len; 825 p->eventfd = eventfd; 826 827 /* The datamatch feature is optional, otherwise this is a wildcard */ 828 if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) 829 p->datamatch = args->datamatch; 830 else 831 p->wildcard = true; 832 833 mutex_lock(&kvm->slots_lock); 834 835 /* Verify that there isn't a match already */ 836 if (ioeventfd_check_collision(kvm, p)) { 837 ret = -EEXIST; 838 goto unlock_fail; 839 } 840 841 kvm_iodevice_init(&p->dev, &ioeventfd_ops); 842 843 ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, 844 &p->dev); 845 if (ret < 0) 846 goto unlock_fail; 847 848 /* When length is ignored, MMIO is also put on a separate bus, for 849 * faster lookups. 850 */ 851 if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { 852 ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, 853 p->addr, 0, &p->dev); 854 if (ret < 0) 855 goto register_fail; 856 } 857 858 kvm->buses[bus_idx]->ioeventfd_count++; 859 list_add_tail(&p->list, &kvm->ioeventfds); 860 861 mutex_unlock(&kvm->slots_lock); 862 863 return 0; 864 865 register_fail: 866 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 867 unlock_fail: 868 mutex_unlock(&kvm->slots_lock); 869 870 fail: 871 kfree(p); 872 eventfd_ctx_put(eventfd); 873 874 return ret; 875 } 876 877 static int 878 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 879 { 880 enum kvm_bus bus_idx; 881 struct _ioeventfd *p, *tmp; 882 struct eventfd_ctx *eventfd; 883 int ret = -ENOENT; 884 885 bus_idx = ioeventfd_bus_from_flags(args->flags); 886 eventfd = eventfd_ctx_fdget(args->fd); 887 if (IS_ERR(eventfd)) 888 return PTR_ERR(eventfd); 889 890 mutex_lock(&kvm->slots_lock); 891 892 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { 893 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); 894 895 if (p->bus_idx != bus_idx || 896 p->eventfd != eventfd || 897 p->addr != args->addr || 898 p->length != args->len || 899 p->wildcard != wildcard) 900 continue; 901 902 if (!p->wildcard && p->datamatch != args->datamatch) 903 continue; 904 905 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 906 if (!p->length) { 907 kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, 908 &p->dev); 909 } 910 kvm->buses[bus_idx]->ioeventfd_count--; 911 ioeventfd_release(p); 912 ret = 0; 913 break; 914 } 915 916 mutex_unlock(&kvm->slots_lock); 917 918 eventfd_ctx_put(eventfd); 919 920 return ret; 921 } 922 923 int 924 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 925 { 926 if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) 927 return kvm_deassign_ioeventfd(kvm, args); 928 929 return kvm_assign_ioeventfd(kvm, args); 930 } 931