1 /* 2 * kvm eventfd support - use eventfd objects to signal various KVM events 3 * 4 * Copyright 2009 Novell. All Rights Reserved. 5 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 6 * 7 * Author: 8 * Gregory Haskins <ghaskins@novell.com> 9 * 10 * This file is free software; you can redistribute it and/or modify 11 * it under the terms of version 2 of the GNU General Public License 12 * as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software Foundation, 21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 22 */ 23 24 #include <linux/kvm_host.h> 25 #include <linux/kvm.h> 26 #include <linux/kvm_irqfd.h> 27 #include <linux/workqueue.h> 28 #include <linux/syscalls.h> 29 #include <linux/wait.h> 30 #include <linux/poll.h> 31 #include <linux/file.h> 32 #include <linux/list.h> 33 #include <linux/eventfd.h> 34 #include <linux/kernel.h> 35 #include <linux/srcu.h> 36 #include <linux/slab.h> 37 #include <linux/seqlock.h> 38 #include <linux/irqbypass.h> 39 #include <trace/events/kvm.h> 40 41 #include <kvm/iodev.h> 42 43 #ifdef CONFIG_HAVE_KVM_IRQFD 44 45 46 static void 47 irqfd_inject(struct work_struct *work) 48 { 49 struct kvm_kernel_irqfd *irqfd = 50 container_of(work, struct kvm_kernel_irqfd, inject); 51 struct kvm *kvm = irqfd->kvm; 52 53 if (!irqfd->resampler) { 54 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, 55 false); 56 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, 57 false); 58 } else 59 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 60 irqfd->gsi, 1, false); 61 } 62 63 /* 64 * Since resampler irqfds share an IRQ source ID, we de-assert once 65 * then notify all of the resampler irqfds using this GSI. We can't 66 * do multiple de-asserts or we risk racing with incoming re-asserts. 67 */ 68 static void 69 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) 70 { 71 struct kvm_kernel_irqfd_resampler *resampler; 72 struct kvm *kvm; 73 struct kvm_kernel_irqfd *irqfd; 74 int idx; 75 76 resampler = container_of(kian, 77 struct kvm_kernel_irqfd_resampler, notifier); 78 kvm = resampler->kvm; 79 80 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 81 resampler->notifier.gsi, 0, false); 82 83 idx = srcu_read_lock(&kvm->irq_srcu); 84 85 list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) 86 eventfd_signal(irqfd->resamplefd, 1); 87 88 srcu_read_unlock(&kvm->irq_srcu, idx); 89 } 90 91 static void 92 irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) 93 { 94 struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; 95 struct kvm *kvm = resampler->kvm; 96 97 mutex_lock(&kvm->irqfds.resampler_lock); 98 99 list_del_rcu(&irqfd->resampler_link); 100 synchronize_srcu(&kvm->irq_srcu); 101 102 if (list_empty(&resampler->list)) { 103 list_del(&resampler->link); 104 kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); 105 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 106 resampler->notifier.gsi, 0, false); 107 kfree(resampler); 108 } 109 110 mutex_unlock(&kvm->irqfds.resampler_lock); 111 } 112 113 /* 114 * Race-free decouple logic (ordering is critical) 115 */ 116 static void 117 irqfd_shutdown(struct work_struct *work) 118 { 119 struct kvm_kernel_irqfd *irqfd = 120 container_of(work, struct kvm_kernel_irqfd, shutdown); 121 u64 cnt; 122 123 /* 124 * Synchronize with the wait-queue and unhook ourselves to prevent 125 * further events. 126 */ 127 eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); 128 129 /* 130 * We know no new events will be scheduled at this point, so block 131 * until all previously outstanding events have completed 132 */ 133 flush_work(&irqfd->inject); 134 135 if (irqfd->resampler) { 136 irqfd_resampler_shutdown(irqfd); 137 eventfd_ctx_put(irqfd->resamplefd); 138 } 139 140 /* 141 * It is now safe to release the object's resources 142 */ 143 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS 144 irq_bypass_unregister_consumer(&irqfd->consumer); 145 #endif 146 eventfd_ctx_put(irqfd->eventfd); 147 kfree(irqfd); 148 } 149 150 151 /* assumes kvm->irqfds.lock is held */ 152 static bool 153 irqfd_is_active(struct kvm_kernel_irqfd *irqfd) 154 { 155 return list_empty(&irqfd->list) ? false : true; 156 } 157 158 /* 159 * Mark the irqfd as inactive and schedule it for removal 160 * 161 * assumes kvm->irqfds.lock is held 162 */ 163 static void 164 irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) 165 { 166 BUG_ON(!irqfd_is_active(irqfd)); 167 168 list_del_init(&irqfd->list); 169 170 schedule_work(&irqfd->shutdown); 171 } 172 173 int __attribute__((weak)) kvm_arch_set_irq_inatomic( 174 struct kvm_kernel_irq_routing_entry *irq, 175 struct kvm *kvm, int irq_source_id, 176 int level, 177 bool line_status) 178 { 179 return -EWOULDBLOCK; 180 } 181 182 /* 183 * Called with wqh->lock held and interrupts disabled 184 */ 185 static int 186 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) 187 { 188 struct kvm_kernel_irqfd *irqfd = 189 container_of(wait, struct kvm_kernel_irqfd, wait); 190 unsigned long flags = (unsigned long)key; 191 struct kvm_kernel_irq_routing_entry irq; 192 struct kvm *kvm = irqfd->kvm; 193 unsigned seq; 194 int idx; 195 196 if (flags & POLLIN) { 197 idx = srcu_read_lock(&kvm->irq_srcu); 198 do { 199 seq = read_seqcount_begin(&irqfd->irq_entry_sc); 200 irq = irqfd->irq_entry; 201 } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); 202 /* An event has been signaled, inject an interrupt */ 203 if (kvm_arch_set_irq_inatomic(&irq, kvm, 204 KVM_USERSPACE_IRQ_SOURCE_ID, 1, 205 false) == -EWOULDBLOCK) 206 schedule_work(&irqfd->inject); 207 srcu_read_unlock(&kvm->irq_srcu, idx); 208 } 209 210 if (flags & POLLHUP) { 211 /* The eventfd is closing, detach from KVM */ 212 unsigned long flags; 213 214 spin_lock_irqsave(&kvm->irqfds.lock, flags); 215 216 /* 217 * We must check if someone deactivated the irqfd before 218 * we could acquire the irqfds.lock since the item is 219 * deactivated from the KVM side before it is unhooked from 220 * the wait-queue. If it is already deactivated, we can 221 * simply return knowing the other side will cleanup for us. 222 * We cannot race against the irqfd going away since the 223 * other side is required to acquire wqh->lock, which we hold 224 */ 225 if (irqfd_is_active(irqfd)) 226 irqfd_deactivate(irqfd); 227 228 spin_unlock_irqrestore(&kvm->irqfds.lock, flags); 229 } 230 231 return 0; 232 } 233 234 static void 235 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, 236 poll_table *pt) 237 { 238 struct kvm_kernel_irqfd *irqfd = 239 container_of(pt, struct kvm_kernel_irqfd, pt); 240 add_wait_queue(wqh, &irqfd->wait); 241 } 242 243 /* Must be called under irqfds.lock */ 244 static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) 245 { 246 struct kvm_kernel_irq_routing_entry *e; 247 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; 248 int n_entries; 249 250 n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); 251 252 write_seqcount_begin(&irqfd->irq_entry_sc); 253 254 e = entries; 255 if (n_entries == 1) 256 irqfd->irq_entry = *e; 257 else 258 irqfd->irq_entry.type = 0; 259 260 write_seqcount_end(&irqfd->irq_entry_sc); 261 } 262 263 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS 264 void __attribute__((weak)) kvm_arch_irq_bypass_stop( 265 struct irq_bypass_consumer *cons) 266 { 267 } 268 269 void __attribute__((weak)) kvm_arch_irq_bypass_start( 270 struct irq_bypass_consumer *cons) 271 { 272 } 273 274 int __attribute__((weak)) kvm_arch_update_irqfd_routing( 275 struct kvm *kvm, unsigned int host_irq, 276 uint32_t guest_irq, bool set) 277 { 278 return 0; 279 } 280 #endif 281 282 static int 283 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) 284 { 285 struct kvm_kernel_irqfd *irqfd, *tmp; 286 struct fd f; 287 struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; 288 int ret; 289 unsigned int events; 290 int idx; 291 292 if (!kvm_arch_intc_initialized(kvm)) 293 return -EAGAIN; 294 295 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); 296 if (!irqfd) 297 return -ENOMEM; 298 299 irqfd->kvm = kvm; 300 irqfd->gsi = args->gsi; 301 INIT_LIST_HEAD(&irqfd->list); 302 INIT_WORK(&irqfd->inject, irqfd_inject); 303 INIT_WORK(&irqfd->shutdown, irqfd_shutdown); 304 seqcount_init(&irqfd->irq_entry_sc); 305 306 f = fdget(args->fd); 307 if (!f.file) { 308 ret = -EBADF; 309 goto out; 310 } 311 312 eventfd = eventfd_ctx_fileget(f.file); 313 if (IS_ERR(eventfd)) { 314 ret = PTR_ERR(eventfd); 315 goto fail; 316 } 317 318 irqfd->eventfd = eventfd; 319 320 if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { 321 struct kvm_kernel_irqfd_resampler *resampler; 322 323 resamplefd = eventfd_ctx_fdget(args->resamplefd); 324 if (IS_ERR(resamplefd)) { 325 ret = PTR_ERR(resamplefd); 326 goto fail; 327 } 328 329 irqfd->resamplefd = resamplefd; 330 INIT_LIST_HEAD(&irqfd->resampler_link); 331 332 mutex_lock(&kvm->irqfds.resampler_lock); 333 334 list_for_each_entry(resampler, 335 &kvm->irqfds.resampler_list, link) { 336 if (resampler->notifier.gsi == irqfd->gsi) { 337 irqfd->resampler = resampler; 338 break; 339 } 340 } 341 342 if (!irqfd->resampler) { 343 resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); 344 if (!resampler) { 345 ret = -ENOMEM; 346 mutex_unlock(&kvm->irqfds.resampler_lock); 347 goto fail; 348 } 349 350 resampler->kvm = kvm; 351 INIT_LIST_HEAD(&resampler->list); 352 resampler->notifier.gsi = irqfd->gsi; 353 resampler->notifier.irq_acked = irqfd_resampler_ack; 354 INIT_LIST_HEAD(&resampler->link); 355 356 list_add(&resampler->link, &kvm->irqfds.resampler_list); 357 kvm_register_irq_ack_notifier(kvm, 358 &resampler->notifier); 359 irqfd->resampler = resampler; 360 } 361 362 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); 363 synchronize_srcu(&kvm->irq_srcu); 364 365 mutex_unlock(&kvm->irqfds.resampler_lock); 366 } 367 368 /* 369 * Install our own custom wake-up handling so we are notified via 370 * a callback whenever someone signals the underlying eventfd 371 */ 372 init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); 373 init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); 374 375 spin_lock_irq(&kvm->irqfds.lock); 376 377 ret = 0; 378 list_for_each_entry(tmp, &kvm->irqfds.items, list) { 379 if (irqfd->eventfd != tmp->eventfd) 380 continue; 381 /* This fd is used for another irq already. */ 382 ret = -EBUSY; 383 spin_unlock_irq(&kvm->irqfds.lock); 384 goto fail; 385 } 386 387 idx = srcu_read_lock(&kvm->irq_srcu); 388 irqfd_update(kvm, irqfd); 389 srcu_read_unlock(&kvm->irq_srcu, idx); 390 391 list_add_tail(&irqfd->list, &kvm->irqfds.items); 392 393 spin_unlock_irq(&kvm->irqfds.lock); 394 395 /* 396 * Check if there was an event already pending on the eventfd 397 * before we registered, and trigger it as if we didn't miss it. 398 */ 399 events = f.file->f_op->poll(f.file, &irqfd->pt); 400 401 if (events & POLLIN) 402 schedule_work(&irqfd->inject); 403 404 /* 405 * do not drop the file until the irqfd is fully initialized, otherwise 406 * we might race against the POLLHUP 407 */ 408 fdput(f); 409 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS 410 if (kvm_arch_has_irq_bypass()) { 411 irqfd->consumer.token = (void *)irqfd->eventfd; 412 irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; 413 irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; 414 irqfd->consumer.stop = kvm_arch_irq_bypass_stop; 415 irqfd->consumer.start = kvm_arch_irq_bypass_start; 416 ret = irq_bypass_register_consumer(&irqfd->consumer); 417 if (ret) 418 pr_info("irq bypass consumer (token %p) registration fails: %d\n", 419 irqfd->consumer.token, ret); 420 } 421 #endif 422 423 return 0; 424 425 fail: 426 if (irqfd->resampler) 427 irqfd_resampler_shutdown(irqfd); 428 429 if (resamplefd && !IS_ERR(resamplefd)) 430 eventfd_ctx_put(resamplefd); 431 432 if (eventfd && !IS_ERR(eventfd)) 433 eventfd_ctx_put(eventfd); 434 435 fdput(f); 436 437 out: 438 kfree(irqfd); 439 return ret; 440 } 441 442 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) 443 { 444 struct kvm_irq_ack_notifier *kian; 445 int gsi, idx; 446 447 idx = srcu_read_lock(&kvm->irq_srcu); 448 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); 449 if (gsi != -1) 450 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 451 link) 452 if (kian->gsi == gsi) { 453 srcu_read_unlock(&kvm->irq_srcu, idx); 454 return true; 455 } 456 457 srcu_read_unlock(&kvm->irq_srcu, idx); 458 459 return false; 460 } 461 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); 462 463 void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) 464 { 465 struct kvm_irq_ack_notifier *kian; 466 467 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 468 link) 469 if (kian->gsi == gsi) 470 kian->irq_acked(kian); 471 } 472 473 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) 474 { 475 int gsi, idx; 476 477 trace_kvm_ack_irq(irqchip, pin); 478 479 idx = srcu_read_lock(&kvm->irq_srcu); 480 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); 481 if (gsi != -1) 482 kvm_notify_acked_gsi(kvm, gsi); 483 srcu_read_unlock(&kvm->irq_srcu, idx); 484 } 485 486 void kvm_register_irq_ack_notifier(struct kvm *kvm, 487 struct kvm_irq_ack_notifier *kian) 488 { 489 mutex_lock(&kvm->irq_lock); 490 hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); 491 mutex_unlock(&kvm->irq_lock); 492 kvm_vcpu_request_scan_ioapic(kvm); 493 } 494 495 void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 496 struct kvm_irq_ack_notifier *kian) 497 { 498 mutex_lock(&kvm->irq_lock); 499 hlist_del_init_rcu(&kian->link); 500 mutex_unlock(&kvm->irq_lock); 501 synchronize_srcu(&kvm->irq_srcu); 502 kvm_vcpu_request_scan_ioapic(kvm); 503 } 504 #endif 505 506 void 507 kvm_eventfd_init(struct kvm *kvm) 508 { 509 #ifdef CONFIG_HAVE_KVM_IRQFD 510 spin_lock_init(&kvm->irqfds.lock); 511 INIT_LIST_HEAD(&kvm->irqfds.items); 512 INIT_LIST_HEAD(&kvm->irqfds.resampler_list); 513 mutex_init(&kvm->irqfds.resampler_lock); 514 #endif 515 INIT_LIST_HEAD(&kvm->ioeventfds); 516 } 517 518 #ifdef CONFIG_HAVE_KVM_IRQFD 519 /* 520 * shutdown any irqfd's that match fd+gsi 521 */ 522 static int 523 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) 524 { 525 struct kvm_kernel_irqfd *irqfd, *tmp; 526 struct eventfd_ctx *eventfd; 527 528 eventfd = eventfd_ctx_fdget(args->fd); 529 if (IS_ERR(eventfd)) 530 return PTR_ERR(eventfd); 531 532 spin_lock_irq(&kvm->irqfds.lock); 533 534 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { 535 if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { 536 /* 537 * This clearing of irq_entry.type is needed for when 538 * another thread calls kvm_irq_routing_update before 539 * we flush workqueue below (we synchronize with 540 * kvm_irq_routing_update using irqfds.lock). 541 */ 542 write_seqcount_begin(&irqfd->irq_entry_sc); 543 irqfd->irq_entry.type = 0; 544 write_seqcount_end(&irqfd->irq_entry_sc); 545 irqfd_deactivate(irqfd); 546 } 547 } 548 549 spin_unlock_irq(&kvm->irqfds.lock); 550 eventfd_ctx_put(eventfd); 551 552 /* 553 * Block until we know all outstanding shutdown jobs have completed 554 * so that we guarantee there will not be any more interrupts on this 555 * gsi once this deassign function returns. 556 */ 557 flush_work(&irqfd->shutdown); 558 559 return 0; 560 } 561 562 int 563 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) 564 { 565 if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) 566 return -EINVAL; 567 568 if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) 569 return kvm_irqfd_deassign(kvm, args); 570 571 return kvm_irqfd_assign(kvm, args); 572 } 573 574 /* 575 * This function is called as the kvm VM fd is being released. Shutdown all 576 * irqfds that still remain open 577 */ 578 void 579 kvm_irqfd_release(struct kvm *kvm) 580 { 581 struct kvm_kernel_irqfd *irqfd, *tmp; 582 583 spin_lock_irq(&kvm->irqfds.lock); 584 585 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) 586 irqfd_deactivate(irqfd); 587 588 spin_unlock_irq(&kvm->irqfds.lock); 589 590 /* 591 * Block until we know all outstanding shutdown jobs have completed 592 * since we do not take a kvm* reference. 593 */ 594 flush_work(&irqfd->shutdown); 595 596 } 597 598 /* 599 * Take note of a change in irq routing. 600 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. 601 */ 602 void kvm_irq_routing_update(struct kvm *kvm) 603 { 604 struct kvm_kernel_irqfd *irqfd; 605 606 spin_lock_irq(&kvm->irqfds.lock); 607 608 list_for_each_entry(irqfd, &kvm->irqfds.items, list) { 609 irqfd_update(kvm, irqfd); 610 611 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS 612 if (irqfd->producer) { 613 int ret = kvm_arch_update_irqfd_routing( 614 irqfd->kvm, irqfd->producer->irq, 615 irqfd->gsi, 1); 616 WARN_ON(ret); 617 } 618 #endif 619 } 620 621 spin_unlock_irq(&kvm->irqfds.lock); 622 } 623 624 void kvm_irqfd_exit(void) 625 { 626 } 627 #endif 628 629 /* 630 * -------------------------------------------------------------------- 631 * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. 632 * 633 * userspace can register a PIO/MMIO address with an eventfd for receiving 634 * notification when the memory has been touched. 635 * -------------------------------------------------------------------- 636 */ 637 638 struct _ioeventfd { 639 struct list_head list; 640 u64 addr; 641 int length; 642 struct eventfd_ctx *eventfd; 643 u64 datamatch; 644 struct kvm_io_device dev; 645 u8 bus_idx; 646 bool wildcard; 647 }; 648 649 static inline struct _ioeventfd * 650 to_ioeventfd(struct kvm_io_device *dev) 651 { 652 return container_of(dev, struct _ioeventfd, dev); 653 } 654 655 static void 656 ioeventfd_release(struct _ioeventfd *p) 657 { 658 eventfd_ctx_put(p->eventfd); 659 list_del(&p->list); 660 kfree(p); 661 } 662 663 static bool 664 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) 665 { 666 u64 _val; 667 668 if (addr != p->addr) 669 /* address must be precise for a hit */ 670 return false; 671 672 if (!p->length) 673 /* length = 0 means only look at the address, so always a hit */ 674 return true; 675 676 if (len != p->length) 677 /* address-range must be precise for a hit */ 678 return false; 679 680 if (p->wildcard) 681 /* all else equal, wildcard is always a hit */ 682 return true; 683 684 /* otherwise, we have to actually compare the data */ 685 686 BUG_ON(!IS_ALIGNED((unsigned long)val, len)); 687 688 switch (len) { 689 case 1: 690 _val = *(u8 *)val; 691 break; 692 case 2: 693 _val = *(u16 *)val; 694 break; 695 case 4: 696 _val = *(u32 *)val; 697 break; 698 case 8: 699 _val = *(u64 *)val; 700 break; 701 default: 702 return false; 703 } 704 705 return _val == p->datamatch ? true : false; 706 } 707 708 /* MMIO/PIO writes trigger an event if the addr/val match */ 709 static int 710 ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, 711 int len, const void *val) 712 { 713 struct _ioeventfd *p = to_ioeventfd(this); 714 715 if (!ioeventfd_in_range(p, addr, len, val)) 716 return -EOPNOTSUPP; 717 718 eventfd_signal(p->eventfd, 1); 719 return 0; 720 } 721 722 /* 723 * This function is called as KVM is completely shutting down. We do not 724 * need to worry about locking just nuke anything we have as quickly as possible 725 */ 726 static void 727 ioeventfd_destructor(struct kvm_io_device *this) 728 { 729 struct _ioeventfd *p = to_ioeventfd(this); 730 731 ioeventfd_release(p); 732 } 733 734 static const struct kvm_io_device_ops ioeventfd_ops = { 735 .write = ioeventfd_write, 736 .destructor = ioeventfd_destructor, 737 }; 738 739 /* assumes kvm->slots_lock held */ 740 static bool 741 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) 742 { 743 struct _ioeventfd *_p; 744 745 list_for_each_entry(_p, &kvm->ioeventfds, list) 746 if (_p->bus_idx == p->bus_idx && 747 _p->addr == p->addr && 748 (!_p->length || !p->length || 749 (_p->length == p->length && 750 (_p->wildcard || p->wildcard || 751 _p->datamatch == p->datamatch)))) 752 return true; 753 754 return false; 755 } 756 757 static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) 758 { 759 if (flags & KVM_IOEVENTFD_FLAG_PIO) 760 return KVM_PIO_BUS; 761 if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) 762 return KVM_VIRTIO_CCW_NOTIFY_BUS; 763 return KVM_MMIO_BUS; 764 } 765 766 static int kvm_assign_ioeventfd_idx(struct kvm *kvm, 767 enum kvm_bus bus_idx, 768 struct kvm_ioeventfd *args) 769 { 770 771 struct eventfd_ctx *eventfd; 772 struct _ioeventfd *p; 773 int ret; 774 775 eventfd = eventfd_ctx_fdget(args->fd); 776 if (IS_ERR(eventfd)) 777 return PTR_ERR(eventfd); 778 779 p = kzalloc(sizeof(*p), GFP_KERNEL); 780 if (!p) { 781 ret = -ENOMEM; 782 goto fail; 783 } 784 785 INIT_LIST_HEAD(&p->list); 786 p->addr = args->addr; 787 p->bus_idx = bus_idx; 788 p->length = args->len; 789 p->eventfd = eventfd; 790 791 /* The datamatch feature is optional, otherwise this is a wildcard */ 792 if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) 793 p->datamatch = args->datamatch; 794 else 795 p->wildcard = true; 796 797 mutex_lock(&kvm->slots_lock); 798 799 /* Verify that there isn't a match already */ 800 if (ioeventfd_check_collision(kvm, p)) { 801 ret = -EEXIST; 802 goto unlock_fail; 803 } 804 805 kvm_iodevice_init(&p->dev, &ioeventfd_ops); 806 807 ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, 808 &p->dev); 809 if (ret < 0) 810 goto unlock_fail; 811 812 kvm->buses[bus_idx]->ioeventfd_count++; 813 list_add_tail(&p->list, &kvm->ioeventfds); 814 815 mutex_unlock(&kvm->slots_lock); 816 817 return 0; 818 819 unlock_fail: 820 mutex_unlock(&kvm->slots_lock); 821 822 fail: 823 kfree(p); 824 eventfd_ctx_put(eventfd); 825 826 return ret; 827 } 828 829 static int 830 kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, 831 struct kvm_ioeventfd *args) 832 { 833 struct _ioeventfd *p, *tmp; 834 struct eventfd_ctx *eventfd; 835 int ret = -ENOENT; 836 837 eventfd = eventfd_ctx_fdget(args->fd); 838 if (IS_ERR(eventfd)) 839 return PTR_ERR(eventfd); 840 841 mutex_lock(&kvm->slots_lock); 842 843 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { 844 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); 845 846 if (p->bus_idx != bus_idx || 847 p->eventfd != eventfd || 848 p->addr != args->addr || 849 p->length != args->len || 850 p->wildcard != wildcard) 851 continue; 852 853 if (!p->wildcard && p->datamatch != args->datamatch) 854 continue; 855 856 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 857 kvm->buses[bus_idx]->ioeventfd_count--; 858 ioeventfd_release(p); 859 ret = 0; 860 break; 861 } 862 863 mutex_unlock(&kvm->slots_lock); 864 865 eventfd_ctx_put(eventfd); 866 867 return ret; 868 } 869 870 static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 871 { 872 enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); 873 int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); 874 875 if (!args->len && bus_idx == KVM_MMIO_BUS) 876 kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); 877 878 return ret; 879 } 880 881 static int 882 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 883 { 884 enum kvm_bus bus_idx; 885 int ret; 886 887 bus_idx = ioeventfd_bus_from_flags(args->flags); 888 /* must be natural-word sized, or 0 to ignore length */ 889 switch (args->len) { 890 case 0: 891 case 1: 892 case 2: 893 case 4: 894 case 8: 895 break; 896 default: 897 return -EINVAL; 898 } 899 900 /* check for range overflow */ 901 if (args->addr + args->len < args->addr) 902 return -EINVAL; 903 904 /* check for extra flags that we don't understand */ 905 if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) 906 return -EINVAL; 907 908 /* ioeventfd with no length can't be combined with DATAMATCH */ 909 if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) 910 return -EINVAL; 911 912 ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); 913 if (ret) 914 goto fail; 915 916 /* When length is ignored, MMIO is also put on a separate bus, for 917 * faster lookups. 918 */ 919 if (!args->len && bus_idx == KVM_MMIO_BUS) { 920 ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); 921 if (ret < 0) 922 goto fast_fail; 923 } 924 925 return 0; 926 927 fast_fail: 928 kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); 929 fail: 930 return ret; 931 } 932 933 int 934 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 935 { 936 if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) 937 return kvm_deassign_ioeventfd(kvm, args); 938 939 return kvm_assign_ioeventfd(kvm, args); 940 } 941