1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * kvm eventfd support - use eventfd objects to signal various KVM events 4 * 5 * Copyright 2009 Novell. All Rights Reserved. 6 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 7 * 8 * Author: 9 * Gregory Haskins <ghaskins@novell.com> 10 */ 11 12 #include <linux/kvm_host.h> 13 #include <linux/kvm.h> 14 #include <linux/kvm_irqfd.h> 15 #include <linux/workqueue.h> 16 #include <linux/syscalls.h> 17 #include <linux/wait.h> 18 #include <linux/poll.h> 19 #include <linux/file.h> 20 #include <linux/list.h> 21 #include <linux/eventfd.h> 22 #include <linux/kernel.h> 23 #include <linux/srcu.h> 24 #include <linux/slab.h> 25 #include <linux/seqlock.h> 26 #include <linux/irqbypass.h> 27 #include <trace/events/kvm.h> 28 29 #include <kvm/iodev.h> 30 31 #ifdef CONFIG_HAVE_KVM_IRQFD 32 33 static struct workqueue_struct *irqfd_cleanup_wq; 34 35 bool __attribute__((weak)) 36 kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) 37 { 38 return true; 39 } 40 41 static void 42 irqfd_inject(struct work_struct *work) 43 { 44 struct kvm_kernel_irqfd *irqfd = 45 container_of(work, struct kvm_kernel_irqfd, inject); 46 struct kvm *kvm = irqfd->kvm; 47 48 if (!irqfd->resampler) { 49 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, 50 false); 51 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, 52 false); 53 } else 54 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 55 irqfd->gsi, 1, false); 56 } 57 58 /* 59 * Since resampler irqfds share an IRQ source ID, we de-assert once 60 * then notify all of the resampler irqfds using this GSI. We can't 61 * do multiple de-asserts or we risk racing with incoming re-asserts. 62 */ 63 static void 64 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) 65 { 66 struct kvm_kernel_irqfd_resampler *resampler; 67 struct kvm *kvm; 68 struct kvm_kernel_irqfd *irqfd; 69 int idx; 70 71 resampler = container_of(kian, 72 struct kvm_kernel_irqfd_resampler, notifier); 73 kvm = resampler->kvm; 74 75 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 76 resampler->notifier.gsi, 0, false); 77 78 idx = srcu_read_lock(&kvm->irq_srcu); 79 80 list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) 81 eventfd_signal(irqfd->resamplefd, 1); 82 83 srcu_read_unlock(&kvm->irq_srcu, idx); 84 } 85 86 static void 87 irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) 88 { 89 struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; 90 struct kvm *kvm = resampler->kvm; 91 92 mutex_lock(&kvm->irqfds.resampler_lock); 93 94 list_del_rcu(&irqfd->resampler_link); 95 synchronize_srcu(&kvm->irq_srcu); 96 97 if (list_empty(&resampler->list)) { 98 list_del(&resampler->link); 99 kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); 100 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, 101 resampler->notifier.gsi, 0, false); 102 kfree(resampler); 103 } 104 105 mutex_unlock(&kvm->irqfds.resampler_lock); 106 } 107 108 /* 109 * Race-free decouple logic (ordering is critical) 110 */ 111 static void 112 irqfd_shutdown(struct work_struct *work) 113 { 114 struct kvm_kernel_irqfd *irqfd = 115 container_of(work, struct kvm_kernel_irqfd, shutdown); 116 struct kvm *kvm = irqfd->kvm; 117 u64 cnt; 118 119 /* Make sure irqfd has been initialized in assign path. */ 120 synchronize_srcu(&kvm->irq_srcu); 121 122 /* 123 * Synchronize with the wait-queue and unhook ourselves to prevent 124 * further events. 125 */ 126 eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); 127 128 /* 129 * We know no new events will be scheduled at this point, so block 130 * until all previously outstanding events have completed 131 */ 132 flush_work(&irqfd->inject); 133 134 if (irqfd->resampler) { 135 irqfd_resampler_shutdown(irqfd); 136 eventfd_ctx_put(irqfd->resamplefd); 137 } 138 139 /* 140 * It is now safe to release the object's resources 141 */ 142 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS 143 irq_bypass_unregister_consumer(&irqfd->consumer); 144 #endif 145 eventfd_ctx_put(irqfd->eventfd); 146 kfree(irqfd); 147 } 148 149 150 /* assumes kvm->irqfds.lock is held */ 151 static bool 152 irqfd_is_active(struct kvm_kernel_irqfd *irqfd) 153 { 154 return list_empty(&irqfd->list) ? false : true; 155 } 156 157 /* 158 * Mark the irqfd as inactive and schedule it for removal 159 * 160 * assumes kvm->irqfds.lock is held 161 */ 162 static void 163 irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) 164 { 165 BUG_ON(!irqfd_is_active(irqfd)); 166 167 list_del_init(&irqfd->list); 168 169 queue_work(irqfd_cleanup_wq, &irqfd->shutdown); 170 } 171 172 int __attribute__((weak)) kvm_arch_set_irq_inatomic( 173 struct kvm_kernel_irq_routing_entry *irq, 174 struct kvm *kvm, int irq_source_id, 175 int level, 176 bool line_status) 177 { 178 return -EWOULDBLOCK; 179 } 180 181 /* 182 * Called with wqh->lock held and interrupts disabled 183 */ 184 static int 185 irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) 186 { 187 struct kvm_kernel_irqfd *irqfd = 188 container_of(wait, struct kvm_kernel_irqfd, wait); 189 __poll_t flags = key_to_poll(key); 190 struct kvm_kernel_irq_routing_entry irq; 191 struct kvm *kvm = irqfd->kvm; 192 unsigned seq; 193 int idx; 194 int ret = 0; 195 196 if (flags & EPOLLIN) { 197 u64 cnt; 198 eventfd_ctx_do_read(irqfd->eventfd, &cnt); 199 200 idx = srcu_read_lock(&kvm->irq_srcu); 201 do { 202 seq = read_seqcount_begin(&irqfd->irq_entry_sc); 203 irq = irqfd->irq_entry; 204 } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); 205 /* An event has been signaled, inject an interrupt */ 206 if (kvm_arch_set_irq_inatomic(&irq, kvm, 207 KVM_USERSPACE_IRQ_SOURCE_ID, 1, 208 false) == -EWOULDBLOCK) 209 schedule_work(&irqfd->inject); 210 srcu_read_unlock(&kvm->irq_srcu, idx); 211 ret = 1; 212 } 213 214 if (flags & EPOLLHUP) { 215 /* The eventfd is closing, detach from KVM */ 216 unsigned long iflags; 217 218 spin_lock_irqsave(&kvm->irqfds.lock, iflags); 219 220 /* 221 * We must check if someone deactivated the irqfd before 222 * we could acquire the irqfds.lock since the item is 223 * deactivated from the KVM side before it is unhooked from 224 * the wait-queue. If it is already deactivated, we can 225 * simply return knowing the other side will cleanup for us. 226 * We cannot race against the irqfd going away since the 227 * other side is required to acquire wqh->lock, which we hold 228 */ 229 if (irqfd_is_active(irqfd)) 230 irqfd_deactivate(irqfd); 231 232 spin_unlock_irqrestore(&kvm->irqfds.lock, iflags); 233 } 234 235 return ret; 236 } 237 238 static void 239 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, 240 poll_table *pt) 241 { 242 struct kvm_kernel_irqfd *irqfd = 243 container_of(pt, struct kvm_kernel_irqfd, pt); 244 add_wait_queue_priority(wqh, &irqfd->wait); 245 } 246 247 /* Must be called under irqfds.lock */ 248 static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) 249 { 250 struct kvm_kernel_irq_routing_entry *e; 251 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; 252 int n_entries; 253 254 n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); 255 256 write_seqcount_begin(&irqfd->irq_entry_sc); 257 258 e = entries; 259 if (n_entries == 1) 260 irqfd->irq_entry = *e; 261 else 262 irqfd->irq_entry.type = 0; 263 264 write_seqcount_end(&irqfd->irq_entry_sc); 265 } 266 267 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS 268 void __attribute__((weak)) kvm_arch_irq_bypass_stop( 269 struct irq_bypass_consumer *cons) 270 { 271 } 272 273 void __attribute__((weak)) kvm_arch_irq_bypass_start( 274 struct irq_bypass_consumer *cons) 275 { 276 } 277 278 int __attribute__((weak)) kvm_arch_update_irqfd_routing( 279 struct kvm *kvm, unsigned int host_irq, 280 uint32_t guest_irq, bool set) 281 { 282 return 0; 283 } 284 #endif 285 286 static int 287 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) 288 { 289 struct kvm_kernel_irqfd *irqfd, *tmp; 290 struct fd f; 291 struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; 292 int ret; 293 __poll_t events; 294 int idx; 295 296 if (!kvm_arch_intc_initialized(kvm)) 297 return -EAGAIN; 298 299 if (!kvm_arch_irqfd_allowed(kvm, args)) 300 return -EINVAL; 301 302 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT); 303 if (!irqfd) 304 return -ENOMEM; 305 306 irqfd->kvm = kvm; 307 irqfd->gsi = args->gsi; 308 INIT_LIST_HEAD(&irqfd->list); 309 INIT_WORK(&irqfd->inject, irqfd_inject); 310 INIT_WORK(&irqfd->shutdown, irqfd_shutdown); 311 seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock); 312 313 f = fdget(args->fd); 314 if (!f.file) { 315 ret = -EBADF; 316 goto out; 317 } 318 319 eventfd = eventfd_ctx_fileget(f.file); 320 if (IS_ERR(eventfd)) { 321 ret = PTR_ERR(eventfd); 322 goto fail; 323 } 324 325 irqfd->eventfd = eventfd; 326 327 if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { 328 struct kvm_kernel_irqfd_resampler *resampler; 329 330 resamplefd = eventfd_ctx_fdget(args->resamplefd); 331 if (IS_ERR(resamplefd)) { 332 ret = PTR_ERR(resamplefd); 333 goto fail; 334 } 335 336 irqfd->resamplefd = resamplefd; 337 INIT_LIST_HEAD(&irqfd->resampler_link); 338 339 mutex_lock(&kvm->irqfds.resampler_lock); 340 341 list_for_each_entry(resampler, 342 &kvm->irqfds.resampler_list, link) { 343 if (resampler->notifier.gsi == irqfd->gsi) { 344 irqfd->resampler = resampler; 345 break; 346 } 347 } 348 349 if (!irqfd->resampler) { 350 resampler = kzalloc(sizeof(*resampler), 351 GFP_KERNEL_ACCOUNT); 352 if (!resampler) { 353 ret = -ENOMEM; 354 mutex_unlock(&kvm->irqfds.resampler_lock); 355 goto fail; 356 } 357 358 resampler->kvm = kvm; 359 INIT_LIST_HEAD(&resampler->list); 360 resampler->notifier.gsi = irqfd->gsi; 361 resampler->notifier.irq_acked = irqfd_resampler_ack; 362 INIT_LIST_HEAD(&resampler->link); 363 364 list_add(&resampler->link, &kvm->irqfds.resampler_list); 365 kvm_register_irq_ack_notifier(kvm, 366 &resampler->notifier); 367 irqfd->resampler = resampler; 368 } 369 370 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); 371 synchronize_srcu(&kvm->irq_srcu); 372 373 mutex_unlock(&kvm->irqfds.resampler_lock); 374 } 375 376 /* 377 * Install our own custom wake-up handling so we are notified via 378 * a callback whenever someone signals the underlying eventfd 379 */ 380 init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); 381 init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); 382 383 spin_lock_irq(&kvm->irqfds.lock); 384 385 ret = 0; 386 list_for_each_entry(tmp, &kvm->irqfds.items, list) { 387 if (irqfd->eventfd != tmp->eventfd) 388 continue; 389 /* This fd is used for another irq already. */ 390 ret = -EBUSY; 391 spin_unlock_irq(&kvm->irqfds.lock); 392 goto fail; 393 } 394 395 idx = srcu_read_lock(&kvm->irq_srcu); 396 irqfd_update(kvm, irqfd); 397 398 list_add_tail(&irqfd->list, &kvm->irqfds.items); 399 400 spin_unlock_irq(&kvm->irqfds.lock); 401 402 /* 403 * Check if there was an event already pending on the eventfd 404 * before we registered, and trigger it as if we didn't miss it. 405 */ 406 events = vfs_poll(f.file, &irqfd->pt); 407 408 if (events & EPOLLIN) 409 schedule_work(&irqfd->inject); 410 411 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS 412 if (kvm_arch_has_irq_bypass()) { 413 irqfd->consumer.token = (void *)irqfd->eventfd; 414 irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; 415 irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; 416 irqfd->consumer.stop = kvm_arch_irq_bypass_stop; 417 irqfd->consumer.start = kvm_arch_irq_bypass_start; 418 ret = irq_bypass_register_consumer(&irqfd->consumer); 419 if (ret) 420 pr_info("irq bypass consumer (token %p) registration fails: %d\n", 421 irqfd->consumer.token, ret); 422 } 423 #endif 424 425 srcu_read_unlock(&kvm->irq_srcu, idx); 426 427 /* 428 * do not drop the file until the irqfd is fully initialized, otherwise 429 * we might race against the EPOLLHUP 430 */ 431 fdput(f); 432 return 0; 433 434 fail: 435 if (irqfd->resampler) 436 irqfd_resampler_shutdown(irqfd); 437 438 if (resamplefd && !IS_ERR(resamplefd)) 439 eventfd_ctx_put(resamplefd); 440 441 if (eventfd && !IS_ERR(eventfd)) 442 eventfd_ctx_put(eventfd); 443 444 fdput(f); 445 446 out: 447 kfree(irqfd); 448 return ret; 449 } 450 451 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) 452 { 453 struct kvm_irq_ack_notifier *kian; 454 int gsi, idx; 455 456 idx = srcu_read_lock(&kvm->irq_srcu); 457 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); 458 if (gsi != -1) 459 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 460 link) 461 if (kian->gsi == gsi) { 462 srcu_read_unlock(&kvm->irq_srcu, idx); 463 return true; 464 } 465 466 srcu_read_unlock(&kvm->irq_srcu, idx); 467 468 return false; 469 } 470 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); 471 472 void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) 473 { 474 struct kvm_irq_ack_notifier *kian; 475 476 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 477 link) 478 if (kian->gsi == gsi) 479 kian->irq_acked(kian); 480 } 481 482 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) 483 { 484 int gsi, idx; 485 486 trace_kvm_ack_irq(irqchip, pin); 487 488 idx = srcu_read_lock(&kvm->irq_srcu); 489 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); 490 if (gsi != -1) 491 kvm_notify_acked_gsi(kvm, gsi); 492 srcu_read_unlock(&kvm->irq_srcu, idx); 493 } 494 495 void kvm_register_irq_ack_notifier(struct kvm *kvm, 496 struct kvm_irq_ack_notifier *kian) 497 { 498 mutex_lock(&kvm->irq_lock); 499 hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); 500 mutex_unlock(&kvm->irq_lock); 501 kvm_arch_post_irq_ack_notifier_list_update(kvm); 502 } 503 504 void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 505 struct kvm_irq_ack_notifier *kian) 506 { 507 mutex_lock(&kvm->irq_lock); 508 hlist_del_init_rcu(&kian->link); 509 mutex_unlock(&kvm->irq_lock); 510 synchronize_srcu(&kvm->irq_srcu); 511 kvm_arch_post_irq_ack_notifier_list_update(kvm); 512 } 513 #endif 514 515 void 516 kvm_eventfd_init(struct kvm *kvm) 517 { 518 #ifdef CONFIG_HAVE_KVM_IRQFD 519 spin_lock_init(&kvm->irqfds.lock); 520 INIT_LIST_HEAD(&kvm->irqfds.items); 521 INIT_LIST_HEAD(&kvm->irqfds.resampler_list); 522 mutex_init(&kvm->irqfds.resampler_lock); 523 #endif 524 INIT_LIST_HEAD(&kvm->ioeventfds); 525 } 526 527 #ifdef CONFIG_HAVE_KVM_IRQFD 528 /* 529 * shutdown any irqfd's that match fd+gsi 530 */ 531 static int 532 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) 533 { 534 struct kvm_kernel_irqfd *irqfd, *tmp; 535 struct eventfd_ctx *eventfd; 536 537 eventfd = eventfd_ctx_fdget(args->fd); 538 if (IS_ERR(eventfd)) 539 return PTR_ERR(eventfd); 540 541 spin_lock_irq(&kvm->irqfds.lock); 542 543 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { 544 if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { 545 /* 546 * This clearing of irq_entry.type is needed for when 547 * another thread calls kvm_irq_routing_update before 548 * we flush workqueue below (we synchronize with 549 * kvm_irq_routing_update using irqfds.lock). 550 */ 551 write_seqcount_begin(&irqfd->irq_entry_sc); 552 irqfd->irq_entry.type = 0; 553 write_seqcount_end(&irqfd->irq_entry_sc); 554 irqfd_deactivate(irqfd); 555 } 556 } 557 558 spin_unlock_irq(&kvm->irqfds.lock); 559 eventfd_ctx_put(eventfd); 560 561 /* 562 * Block until we know all outstanding shutdown jobs have completed 563 * so that we guarantee there will not be any more interrupts on this 564 * gsi once this deassign function returns. 565 */ 566 flush_workqueue(irqfd_cleanup_wq); 567 568 return 0; 569 } 570 571 int 572 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) 573 { 574 if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) 575 return -EINVAL; 576 577 if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) 578 return kvm_irqfd_deassign(kvm, args); 579 580 return kvm_irqfd_assign(kvm, args); 581 } 582 583 /* 584 * This function is called as the kvm VM fd is being released. Shutdown all 585 * irqfds that still remain open 586 */ 587 void 588 kvm_irqfd_release(struct kvm *kvm) 589 { 590 struct kvm_kernel_irqfd *irqfd, *tmp; 591 592 spin_lock_irq(&kvm->irqfds.lock); 593 594 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) 595 irqfd_deactivate(irqfd); 596 597 spin_unlock_irq(&kvm->irqfds.lock); 598 599 /* 600 * Block until we know all outstanding shutdown jobs have completed 601 * since we do not take a kvm* reference. 602 */ 603 flush_workqueue(irqfd_cleanup_wq); 604 605 } 606 607 /* 608 * Take note of a change in irq routing. 609 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. 610 */ 611 void kvm_irq_routing_update(struct kvm *kvm) 612 { 613 struct kvm_kernel_irqfd *irqfd; 614 615 spin_lock_irq(&kvm->irqfds.lock); 616 617 list_for_each_entry(irqfd, &kvm->irqfds.items, list) { 618 irqfd_update(kvm, irqfd); 619 620 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS 621 if (irqfd->producer) { 622 int ret = kvm_arch_update_irqfd_routing( 623 irqfd->kvm, irqfd->producer->irq, 624 irqfd->gsi, 1); 625 WARN_ON(ret); 626 } 627 #endif 628 } 629 630 spin_unlock_irq(&kvm->irqfds.lock); 631 } 632 633 /* 634 * create a host-wide workqueue for issuing deferred shutdown requests 635 * aggregated from all vm* instances. We need our own isolated 636 * queue to ease flushing work items when a VM exits. 637 */ 638 int kvm_irqfd_init(void) 639 { 640 irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0); 641 if (!irqfd_cleanup_wq) 642 return -ENOMEM; 643 644 return 0; 645 } 646 647 void kvm_irqfd_exit(void) 648 { 649 destroy_workqueue(irqfd_cleanup_wq); 650 } 651 #endif 652 653 /* 654 * -------------------------------------------------------------------- 655 * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. 656 * 657 * userspace can register a PIO/MMIO address with an eventfd for receiving 658 * notification when the memory has been touched. 659 * -------------------------------------------------------------------- 660 */ 661 662 struct _ioeventfd { 663 struct list_head list; 664 u64 addr; 665 int length; 666 struct eventfd_ctx *eventfd; 667 u64 datamatch; 668 struct kvm_io_device dev; 669 u8 bus_idx; 670 bool wildcard; 671 }; 672 673 static inline struct _ioeventfd * 674 to_ioeventfd(struct kvm_io_device *dev) 675 { 676 return container_of(dev, struct _ioeventfd, dev); 677 } 678 679 static void 680 ioeventfd_release(struct _ioeventfd *p) 681 { 682 eventfd_ctx_put(p->eventfd); 683 list_del(&p->list); 684 kfree(p); 685 } 686 687 static bool 688 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) 689 { 690 u64 _val; 691 692 if (addr != p->addr) 693 /* address must be precise for a hit */ 694 return false; 695 696 if (!p->length) 697 /* length = 0 means only look at the address, so always a hit */ 698 return true; 699 700 if (len != p->length) 701 /* address-range must be precise for a hit */ 702 return false; 703 704 if (p->wildcard) 705 /* all else equal, wildcard is always a hit */ 706 return true; 707 708 /* otherwise, we have to actually compare the data */ 709 710 BUG_ON(!IS_ALIGNED((unsigned long)val, len)); 711 712 switch (len) { 713 case 1: 714 _val = *(u8 *)val; 715 break; 716 case 2: 717 _val = *(u16 *)val; 718 break; 719 case 4: 720 _val = *(u32 *)val; 721 break; 722 case 8: 723 _val = *(u64 *)val; 724 break; 725 default: 726 return false; 727 } 728 729 return _val == p->datamatch; 730 } 731 732 /* MMIO/PIO writes trigger an event if the addr/val match */ 733 static int 734 ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, 735 int len, const void *val) 736 { 737 struct _ioeventfd *p = to_ioeventfd(this); 738 739 if (!ioeventfd_in_range(p, addr, len, val)) 740 return -EOPNOTSUPP; 741 742 eventfd_signal(p->eventfd, 1); 743 return 0; 744 } 745 746 /* 747 * This function is called as KVM is completely shutting down. We do not 748 * need to worry about locking just nuke anything we have as quickly as possible 749 */ 750 static void 751 ioeventfd_destructor(struct kvm_io_device *this) 752 { 753 struct _ioeventfd *p = to_ioeventfd(this); 754 755 ioeventfd_release(p); 756 } 757 758 static const struct kvm_io_device_ops ioeventfd_ops = { 759 .write = ioeventfd_write, 760 .destructor = ioeventfd_destructor, 761 }; 762 763 /* assumes kvm->slots_lock held */ 764 static bool 765 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) 766 { 767 struct _ioeventfd *_p; 768 769 list_for_each_entry(_p, &kvm->ioeventfds, list) 770 if (_p->bus_idx == p->bus_idx && 771 _p->addr == p->addr && 772 (!_p->length || !p->length || 773 (_p->length == p->length && 774 (_p->wildcard || p->wildcard || 775 _p->datamatch == p->datamatch)))) 776 return true; 777 778 return false; 779 } 780 781 static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) 782 { 783 if (flags & KVM_IOEVENTFD_FLAG_PIO) 784 return KVM_PIO_BUS; 785 if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) 786 return KVM_VIRTIO_CCW_NOTIFY_BUS; 787 return KVM_MMIO_BUS; 788 } 789 790 static int kvm_assign_ioeventfd_idx(struct kvm *kvm, 791 enum kvm_bus bus_idx, 792 struct kvm_ioeventfd *args) 793 { 794 795 struct eventfd_ctx *eventfd; 796 struct _ioeventfd *p; 797 int ret; 798 799 eventfd = eventfd_ctx_fdget(args->fd); 800 if (IS_ERR(eventfd)) 801 return PTR_ERR(eventfd); 802 803 p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT); 804 if (!p) { 805 ret = -ENOMEM; 806 goto fail; 807 } 808 809 INIT_LIST_HEAD(&p->list); 810 p->addr = args->addr; 811 p->bus_idx = bus_idx; 812 p->length = args->len; 813 p->eventfd = eventfd; 814 815 /* The datamatch feature is optional, otherwise this is a wildcard */ 816 if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) 817 p->datamatch = args->datamatch; 818 else 819 p->wildcard = true; 820 821 mutex_lock(&kvm->slots_lock); 822 823 /* Verify that there isn't a match already */ 824 if (ioeventfd_check_collision(kvm, p)) { 825 ret = -EEXIST; 826 goto unlock_fail; 827 } 828 829 kvm_iodevice_init(&p->dev, &ioeventfd_ops); 830 831 ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, 832 &p->dev); 833 if (ret < 0) 834 goto unlock_fail; 835 836 kvm_get_bus(kvm, bus_idx)->ioeventfd_count++; 837 list_add_tail(&p->list, &kvm->ioeventfds); 838 839 mutex_unlock(&kvm->slots_lock); 840 841 return 0; 842 843 unlock_fail: 844 mutex_unlock(&kvm->slots_lock); 845 846 fail: 847 kfree(p); 848 eventfd_ctx_put(eventfd); 849 850 return ret; 851 } 852 853 static int 854 kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, 855 struct kvm_ioeventfd *args) 856 { 857 struct _ioeventfd *p, *tmp; 858 struct eventfd_ctx *eventfd; 859 struct kvm_io_bus *bus; 860 int ret = -ENOENT; 861 bool wildcard; 862 863 eventfd = eventfd_ctx_fdget(args->fd); 864 if (IS_ERR(eventfd)) 865 return PTR_ERR(eventfd); 866 867 wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); 868 869 mutex_lock(&kvm->slots_lock); 870 871 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { 872 873 if (p->bus_idx != bus_idx || 874 p->eventfd != eventfd || 875 p->addr != args->addr || 876 p->length != args->len || 877 p->wildcard != wildcard) 878 continue; 879 880 if (!p->wildcard && p->datamatch != args->datamatch) 881 continue; 882 883 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); 884 bus = kvm_get_bus(kvm, bus_idx); 885 if (bus) 886 bus->ioeventfd_count--; 887 ioeventfd_release(p); 888 ret = 0; 889 break; 890 } 891 892 mutex_unlock(&kvm->slots_lock); 893 894 eventfd_ctx_put(eventfd); 895 896 return ret; 897 } 898 899 static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 900 { 901 enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); 902 int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); 903 904 if (!args->len && bus_idx == KVM_MMIO_BUS) 905 kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); 906 907 return ret; 908 } 909 910 static int 911 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 912 { 913 enum kvm_bus bus_idx; 914 int ret; 915 916 bus_idx = ioeventfd_bus_from_flags(args->flags); 917 /* must be natural-word sized, or 0 to ignore length */ 918 switch (args->len) { 919 case 0: 920 case 1: 921 case 2: 922 case 4: 923 case 8: 924 break; 925 default: 926 return -EINVAL; 927 } 928 929 /* check for range overflow */ 930 if (args->addr + args->len < args->addr) 931 return -EINVAL; 932 933 /* check for extra flags that we don't understand */ 934 if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) 935 return -EINVAL; 936 937 /* ioeventfd with no length can't be combined with DATAMATCH */ 938 if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) 939 return -EINVAL; 940 941 ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); 942 if (ret) 943 goto fail; 944 945 /* When length is ignored, MMIO is also put on a separate bus, for 946 * faster lookups. 947 */ 948 if (!args->len && bus_idx == KVM_MMIO_BUS) { 949 ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); 950 if (ret < 0) 951 goto fast_fail; 952 } 953 954 return 0; 955 956 fast_fail: 957 kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); 958 fail: 959 return ret; 960 } 961 962 int 963 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 964 { 965 if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) 966 return kvm_deassign_ioeventfd(kvm, args); 967 968 return kvm_assign_ioeventfd(kvm, args); 969 } 970