1 /* 2 * Kernel-based Virtual Machine driver for Linux 3 * 4 * This module enables machines with Intel VT-x extensions to run virtual 5 * machines without emulation or binary translation. 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * 9 * Authors: 10 * Avi Kivity <avi@qumranet.com> 11 * Yaniv Kamay <yaniv@qumranet.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2. See 14 * the COPYING file in the top-level directory. 15 * 16 */ 17 18 #include "iodev.h" 19 20 #include <linux/kvm_host.h> 21 #include <linux/kvm.h> 22 #include <linux/module.h> 23 #include <linux/errno.h> 24 #include <linux/percpu.h> 25 #include <linux/gfp.h> 26 #include <linux/mm.h> 27 #include <linux/miscdevice.h> 28 #include <linux/vmalloc.h> 29 #include <linux/reboot.h> 30 #include <linux/debugfs.h> 31 #include <linux/highmem.h> 32 #include <linux/file.h> 33 #include <linux/sysdev.h> 34 #include <linux/cpu.h> 35 #include <linux/sched.h> 36 #include <linux/cpumask.h> 37 #include <linux/smp.h> 38 #include <linux/anon_inodes.h> 39 #include <linux/profile.h> 40 #include <linux/kvm_para.h> 41 #include <linux/pagemap.h> 42 #include <linux/mman.h> 43 #include <linux/swap.h> 44 45 #include <asm/processor.h> 46 #include <asm/io.h> 47 #include <asm/uaccess.h> 48 #include <asm/pgtable.h> 49 50 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 51 #include "coalesced_mmio.h" 52 #endif 53 54 #ifdef KVM_CAP_DEVICE_ASSIGNMENT 55 #include <linux/pci.h> 56 #include <linux/interrupt.h> 57 #include "irq.h" 58 #endif 59 60 MODULE_AUTHOR("Qumranet"); 61 MODULE_LICENSE("GPL"); 62 63 DEFINE_SPINLOCK(kvm_lock); 64 LIST_HEAD(vm_list); 65 66 static cpumask_t cpus_hardware_enabled; 67 68 struct kmem_cache *kvm_vcpu_cache; 69 EXPORT_SYMBOL_GPL(kvm_vcpu_cache); 70 71 static __read_mostly struct preempt_ops kvm_preempt_ops; 72 73 struct dentry *kvm_debugfs_dir; 74 75 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 76 unsigned long arg); 77 78 bool kvm_rebooting; 79 80 #ifdef KVM_CAP_DEVICE_ASSIGNMENT 81 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 82 int assigned_dev_id) 83 { 84 struct list_head *ptr; 85 struct kvm_assigned_dev_kernel *match; 86 87 list_for_each(ptr, head) { 88 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); 89 if (match->assigned_dev_id == assigned_dev_id) 90 return match; 91 } 92 return NULL; 93 } 94 95 static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) 96 { 97 struct kvm_assigned_dev_kernel *assigned_dev; 98 99 assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, 100 interrupt_work); 101 102 /* This is taken to safely inject irq inside the guest. When 103 * the interrupt injection (or the ioapic code) uses a 104 * finer-grained lock, update this 105 */ 106 mutex_lock(&assigned_dev->kvm->lock); 107 kvm_set_irq(assigned_dev->kvm, 108 assigned_dev->irq_source_id, 109 assigned_dev->guest_irq, 1); 110 mutex_unlock(&assigned_dev->kvm->lock); 111 kvm_put_kvm(assigned_dev->kvm); 112 } 113 114 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) 115 { 116 struct kvm_assigned_dev_kernel *assigned_dev = 117 (struct kvm_assigned_dev_kernel *) dev_id; 118 119 kvm_get_kvm(assigned_dev->kvm); 120 schedule_work(&assigned_dev->interrupt_work); 121 disable_irq_nosync(irq); 122 return IRQ_HANDLED; 123 } 124 125 /* Ack the irq line for an assigned device */ 126 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) 127 { 128 struct kvm_assigned_dev_kernel *dev; 129 130 if (kian->gsi == -1) 131 return; 132 133 dev = container_of(kian, struct kvm_assigned_dev_kernel, 134 ack_notifier); 135 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); 136 enable_irq(dev->host_irq); 137 } 138 139 static void kvm_free_assigned_device(struct kvm *kvm, 140 struct kvm_assigned_dev_kernel 141 *assigned_dev) 142 { 143 if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) 144 free_irq(assigned_dev->host_irq, (void *)assigned_dev); 145 146 kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); 147 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); 148 149 if (cancel_work_sync(&assigned_dev->interrupt_work)) 150 /* We had pending work. That means we will have to take 151 * care of kvm_put_kvm. 152 */ 153 kvm_put_kvm(kvm); 154 155 pci_release_regions(assigned_dev->dev); 156 pci_disable_device(assigned_dev->dev); 157 pci_dev_put(assigned_dev->dev); 158 159 list_del(&assigned_dev->list); 160 kfree(assigned_dev); 161 } 162 163 void kvm_free_all_assigned_devices(struct kvm *kvm) 164 { 165 struct list_head *ptr, *ptr2; 166 struct kvm_assigned_dev_kernel *assigned_dev; 167 168 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { 169 assigned_dev = list_entry(ptr, 170 struct kvm_assigned_dev_kernel, 171 list); 172 173 kvm_free_assigned_device(kvm, assigned_dev); 174 } 175 } 176 177 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, 178 struct kvm_assigned_irq 179 *assigned_irq) 180 { 181 int r = 0; 182 struct kvm_assigned_dev_kernel *match; 183 184 mutex_lock(&kvm->lock); 185 186 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 187 assigned_irq->assigned_dev_id); 188 if (!match) { 189 mutex_unlock(&kvm->lock); 190 return -EINVAL; 191 } 192 193 if (match->irq_requested) { 194 match->guest_irq = assigned_irq->guest_irq; 195 match->ack_notifier.gsi = assigned_irq->guest_irq; 196 mutex_unlock(&kvm->lock); 197 return 0; 198 } 199 200 INIT_WORK(&match->interrupt_work, 201 kvm_assigned_dev_interrupt_work_handler); 202 203 if (irqchip_in_kernel(kvm)) { 204 if (!capable(CAP_SYS_RAWIO)) { 205 r = -EPERM; 206 goto out_release; 207 } 208 209 if (assigned_irq->host_irq) 210 match->host_irq = assigned_irq->host_irq; 211 else 212 match->host_irq = match->dev->irq; 213 match->guest_irq = assigned_irq->guest_irq; 214 match->ack_notifier.gsi = assigned_irq->guest_irq; 215 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; 216 kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); 217 r = kvm_request_irq_source_id(kvm); 218 if (r < 0) 219 goto out_release; 220 else 221 match->irq_source_id = r; 222 223 /* Even though this is PCI, we don't want to use shared 224 * interrupts. Sharing host devices with guest-assigned devices 225 * on the same interrupt line is not a happy situation: there 226 * are going to be long delays in accepting, acking, etc. 227 */ 228 if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, 229 "kvm_assigned_device", (void *)match)) { 230 r = -EIO; 231 goto out_release; 232 } 233 } 234 235 match->irq_requested = true; 236 mutex_unlock(&kvm->lock); 237 return r; 238 out_release: 239 mutex_unlock(&kvm->lock); 240 kvm_free_assigned_device(kvm, match); 241 return r; 242 } 243 244 static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 245 struct kvm_assigned_pci_dev *assigned_dev) 246 { 247 int r = 0; 248 struct kvm_assigned_dev_kernel *match; 249 struct pci_dev *dev; 250 251 mutex_lock(&kvm->lock); 252 253 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 254 assigned_dev->assigned_dev_id); 255 if (match) { 256 /* device already assigned */ 257 r = -EINVAL; 258 goto out; 259 } 260 261 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); 262 if (match == NULL) { 263 printk(KERN_INFO "%s: Couldn't allocate memory\n", 264 __func__); 265 r = -ENOMEM; 266 goto out; 267 } 268 dev = pci_get_bus_and_slot(assigned_dev->busnr, 269 assigned_dev->devfn); 270 if (!dev) { 271 printk(KERN_INFO "%s: host device not found\n", __func__); 272 r = -EINVAL; 273 goto out_free; 274 } 275 if (pci_enable_device(dev)) { 276 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); 277 r = -EBUSY; 278 goto out_put; 279 } 280 r = pci_request_regions(dev, "kvm_assigned_device"); 281 if (r) { 282 printk(KERN_INFO "%s: Could not get access to device regions\n", 283 __func__); 284 goto out_disable; 285 } 286 match->assigned_dev_id = assigned_dev->assigned_dev_id; 287 match->host_busnr = assigned_dev->busnr; 288 match->host_devfn = assigned_dev->devfn; 289 match->dev = dev; 290 291 match->kvm = kvm; 292 293 list_add(&match->list, &kvm->arch.assigned_dev_head); 294 295 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { 296 r = kvm_iommu_map_guest(kvm, match); 297 if (r) 298 goto out_list_del; 299 } 300 301 out: 302 mutex_unlock(&kvm->lock); 303 return r; 304 out_list_del: 305 list_del(&match->list); 306 pci_release_regions(dev); 307 out_disable: 308 pci_disable_device(dev); 309 out_put: 310 pci_dev_put(dev); 311 out_free: 312 kfree(match); 313 mutex_unlock(&kvm->lock); 314 return r; 315 } 316 #endif 317 318 static inline int valid_vcpu(int n) 319 { 320 return likely(n >= 0 && n < KVM_MAX_VCPUS); 321 } 322 323 inline int kvm_is_mmio_pfn(pfn_t pfn) 324 { 325 if (pfn_valid(pfn)) 326 return PageReserved(pfn_to_page(pfn)); 327 328 return true; 329 } 330 331 /* 332 * Switches to specified vcpu, until a matching vcpu_put() 333 */ 334 void vcpu_load(struct kvm_vcpu *vcpu) 335 { 336 int cpu; 337 338 mutex_lock(&vcpu->mutex); 339 cpu = get_cpu(); 340 preempt_notifier_register(&vcpu->preempt_notifier); 341 kvm_arch_vcpu_load(vcpu, cpu); 342 put_cpu(); 343 } 344 345 void vcpu_put(struct kvm_vcpu *vcpu) 346 { 347 preempt_disable(); 348 kvm_arch_vcpu_put(vcpu); 349 preempt_notifier_unregister(&vcpu->preempt_notifier); 350 preempt_enable(); 351 mutex_unlock(&vcpu->mutex); 352 } 353 354 static void ack_flush(void *_completed) 355 { 356 } 357 358 void kvm_flush_remote_tlbs(struct kvm *kvm) 359 { 360 int i, cpu, me; 361 cpumask_t cpus; 362 struct kvm_vcpu *vcpu; 363 364 me = get_cpu(); 365 cpus_clear(cpus); 366 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 367 vcpu = kvm->vcpus[i]; 368 if (!vcpu) 369 continue; 370 if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 371 continue; 372 cpu = vcpu->cpu; 373 if (cpu != -1 && cpu != me) 374 cpu_set(cpu, cpus); 375 } 376 if (cpus_empty(cpus)) 377 goto out; 378 ++kvm->stat.remote_tlb_flush; 379 smp_call_function_mask(cpus, ack_flush, NULL, 1); 380 out: 381 put_cpu(); 382 } 383 384 void kvm_reload_remote_mmus(struct kvm *kvm) 385 { 386 int i, cpu, me; 387 cpumask_t cpus; 388 struct kvm_vcpu *vcpu; 389 390 me = get_cpu(); 391 cpus_clear(cpus); 392 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 393 vcpu = kvm->vcpus[i]; 394 if (!vcpu) 395 continue; 396 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 397 continue; 398 cpu = vcpu->cpu; 399 if (cpu != -1 && cpu != me) 400 cpu_set(cpu, cpus); 401 } 402 if (cpus_empty(cpus)) 403 goto out; 404 smp_call_function_mask(cpus, ack_flush, NULL, 1); 405 out: 406 put_cpu(); 407 } 408 409 410 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 411 { 412 struct page *page; 413 int r; 414 415 mutex_init(&vcpu->mutex); 416 vcpu->cpu = -1; 417 vcpu->kvm = kvm; 418 vcpu->vcpu_id = id; 419 init_waitqueue_head(&vcpu->wq); 420 421 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 422 if (!page) { 423 r = -ENOMEM; 424 goto fail; 425 } 426 vcpu->run = page_address(page); 427 428 r = kvm_arch_vcpu_init(vcpu); 429 if (r < 0) 430 goto fail_free_run; 431 return 0; 432 433 fail_free_run: 434 free_page((unsigned long)vcpu->run); 435 fail: 436 return r; 437 } 438 EXPORT_SYMBOL_GPL(kvm_vcpu_init); 439 440 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) 441 { 442 kvm_arch_vcpu_uninit(vcpu); 443 free_page((unsigned long)vcpu->run); 444 } 445 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); 446 447 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 448 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) 449 { 450 return container_of(mn, struct kvm, mmu_notifier); 451 } 452 453 static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, 454 struct mm_struct *mm, 455 unsigned long address) 456 { 457 struct kvm *kvm = mmu_notifier_to_kvm(mn); 458 int need_tlb_flush; 459 460 /* 461 * When ->invalidate_page runs, the linux pte has been zapped 462 * already but the page is still allocated until 463 * ->invalidate_page returns. So if we increase the sequence 464 * here the kvm page fault will notice if the spte can't be 465 * established because the page is going to be freed. If 466 * instead the kvm page fault establishes the spte before 467 * ->invalidate_page runs, kvm_unmap_hva will release it 468 * before returning. 469 * 470 * The sequence increase only need to be seen at spin_unlock 471 * time, and not at spin_lock time. 472 * 473 * Increasing the sequence after the spin_unlock would be 474 * unsafe because the kvm page fault could then establish the 475 * pte after kvm_unmap_hva returned, without noticing the page 476 * is going to be freed. 477 */ 478 spin_lock(&kvm->mmu_lock); 479 kvm->mmu_notifier_seq++; 480 need_tlb_flush = kvm_unmap_hva(kvm, address); 481 spin_unlock(&kvm->mmu_lock); 482 483 /* we've to flush the tlb before the pages can be freed */ 484 if (need_tlb_flush) 485 kvm_flush_remote_tlbs(kvm); 486 487 } 488 489 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 490 struct mm_struct *mm, 491 unsigned long start, 492 unsigned long end) 493 { 494 struct kvm *kvm = mmu_notifier_to_kvm(mn); 495 int need_tlb_flush = 0; 496 497 spin_lock(&kvm->mmu_lock); 498 /* 499 * The count increase must become visible at unlock time as no 500 * spte can be established without taking the mmu_lock and 501 * count is also read inside the mmu_lock critical section. 502 */ 503 kvm->mmu_notifier_count++; 504 for (; start < end; start += PAGE_SIZE) 505 need_tlb_flush |= kvm_unmap_hva(kvm, start); 506 spin_unlock(&kvm->mmu_lock); 507 508 /* we've to flush the tlb before the pages can be freed */ 509 if (need_tlb_flush) 510 kvm_flush_remote_tlbs(kvm); 511 } 512 513 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, 514 struct mm_struct *mm, 515 unsigned long start, 516 unsigned long end) 517 { 518 struct kvm *kvm = mmu_notifier_to_kvm(mn); 519 520 spin_lock(&kvm->mmu_lock); 521 /* 522 * This sequence increase will notify the kvm page fault that 523 * the page that is going to be mapped in the spte could have 524 * been freed. 525 */ 526 kvm->mmu_notifier_seq++; 527 /* 528 * The above sequence increase must be visible before the 529 * below count decrease but both values are read by the kvm 530 * page fault under mmu_lock spinlock so we don't need to add 531 * a smb_wmb() here in between the two. 532 */ 533 kvm->mmu_notifier_count--; 534 spin_unlock(&kvm->mmu_lock); 535 536 BUG_ON(kvm->mmu_notifier_count < 0); 537 } 538 539 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, 540 struct mm_struct *mm, 541 unsigned long address) 542 { 543 struct kvm *kvm = mmu_notifier_to_kvm(mn); 544 int young; 545 546 spin_lock(&kvm->mmu_lock); 547 young = kvm_age_hva(kvm, address); 548 spin_unlock(&kvm->mmu_lock); 549 550 if (young) 551 kvm_flush_remote_tlbs(kvm); 552 553 return young; 554 } 555 556 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { 557 .invalidate_page = kvm_mmu_notifier_invalidate_page, 558 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, 559 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, 560 .clear_flush_young = kvm_mmu_notifier_clear_flush_young, 561 }; 562 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 563 564 static struct kvm *kvm_create_vm(void) 565 { 566 struct kvm *kvm = kvm_arch_create_vm(); 567 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 568 struct page *page; 569 #endif 570 571 if (IS_ERR(kvm)) 572 goto out; 573 574 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 575 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 576 if (!page) { 577 kfree(kvm); 578 return ERR_PTR(-ENOMEM); 579 } 580 kvm->coalesced_mmio_ring = 581 (struct kvm_coalesced_mmio_ring *)page_address(page); 582 #endif 583 584 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 585 { 586 int err; 587 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; 588 err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); 589 if (err) { 590 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 591 put_page(page); 592 #endif 593 kfree(kvm); 594 return ERR_PTR(err); 595 } 596 } 597 #endif 598 599 kvm->mm = current->mm; 600 atomic_inc(&kvm->mm->mm_count); 601 spin_lock_init(&kvm->mmu_lock); 602 kvm_io_bus_init(&kvm->pio_bus); 603 mutex_init(&kvm->lock); 604 kvm_io_bus_init(&kvm->mmio_bus); 605 init_rwsem(&kvm->slots_lock); 606 atomic_set(&kvm->users_count, 1); 607 spin_lock(&kvm_lock); 608 list_add(&kvm->vm_list, &vm_list); 609 spin_unlock(&kvm_lock); 610 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 611 kvm_coalesced_mmio_init(kvm); 612 #endif 613 out: 614 return kvm; 615 } 616 617 /* 618 * Free any memory in @free but not in @dont. 619 */ 620 static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 621 struct kvm_memory_slot *dont) 622 { 623 if (!dont || free->rmap != dont->rmap) 624 vfree(free->rmap); 625 626 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 627 vfree(free->dirty_bitmap); 628 629 if (!dont || free->lpage_info != dont->lpage_info) 630 vfree(free->lpage_info); 631 632 free->npages = 0; 633 free->dirty_bitmap = NULL; 634 free->rmap = NULL; 635 free->lpage_info = NULL; 636 } 637 638 void kvm_free_physmem(struct kvm *kvm) 639 { 640 int i; 641 642 for (i = 0; i < kvm->nmemslots; ++i) 643 kvm_free_physmem_slot(&kvm->memslots[i], NULL); 644 } 645 646 static void kvm_destroy_vm(struct kvm *kvm) 647 { 648 struct mm_struct *mm = kvm->mm; 649 650 spin_lock(&kvm_lock); 651 list_del(&kvm->vm_list); 652 spin_unlock(&kvm_lock); 653 kvm_io_bus_destroy(&kvm->pio_bus); 654 kvm_io_bus_destroy(&kvm->mmio_bus); 655 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 656 if (kvm->coalesced_mmio_ring != NULL) 657 free_page((unsigned long)kvm->coalesced_mmio_ring); 658 #endif 659 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 660 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 661 #endif 662 kvm_arch_destroy_vm(kvm); 663 mmdrop(mm); 664 } 665 666 void kvm_get_kvm(struct kvm *kvm) 667 { 668 atomic_inc(&kvm->users_count); 669 } 670 EXPORT_SYMBOL_GPL(kvm_get_kvm); 671 672 void kvm_put_kvm(struct kvm *kvm) 673 { 674 if (atomic_dec_and_test(&kvm->users_count)) 675 kvm_destroy_vm(kvm); 676 } 677 EXPORT_SYMBOL_GPL(kvm_put_kvm); 678 679 680 static int kvm_vm_release(struct inode *inode, struct file *filp) 681 { 682 struct kvm *kvm = filp->private_data; 683 684 kvm_put_kvm(kvm); 685 return 0; 686 } 687 688 /* 689 * Allocate some memory and give it an address in the guest physical address 690 * space. 691 * 692 * Discontiguous memory is allowed, mostly for framebuffers. 693 * 694 * Must be called holding mmap_sem for write. 695 */ 696 int __kvm_set_memory_region(struct kvm *kvm, 697 struct kvm_userspace_memory_region *mem, 698 int user_alloc) 699 { 700 int r; 701 gfn_t base_gfn; 702 unsigned long npages; 703 unsigned long i; 704 struct kvm_memory_slot *memslot; 705 struct kvm_memory_slot old, new; 706 707 r = -EINVAL; 708 /* General sanity checks */ 709 if (mem->memory_size & (PAGE_SIZE - 1)) 710 goto out; 711 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 712 goto out; 713 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 714 goto out; 715 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 716 goto out; 717 718 memslot = &kvm->memslots[mem->slot]; 719 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 720 npages = mem->memory_size >> PAGE_SHIFT; 721 722 if (!npages) 723 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; 724 725 new = old = *memslot; 726 727 new.base_gfn = base_gfn; 728 new.npages = npages; 729 new.flags = mem->flags; 730 731 /* Disallow changing a memory slot's size. */ 732 r = -EINVAL; 733 if (npages && old.npages && npages != old.npages) 734 goto out_free; 735 736 /* Check for overlaps */ 737 r = -EEXIST; 738 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 739 struct kvm_memory_slot *s = &kvm->memslots[i]; 740 741 if (s == memslot) 742 continue; 743 if (!((base_gfn + npages <= s->base_gfn) || 744 (base_gfn >= s->base_gfn + s->npages))) 745 goto out_free; 746 } 747 748 /* Free page dirty bitmap if unneeded */ 749 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) 750 new.dirty_bitmap = NULL; 751 752 r = -ENOMEM; 753 754 /* Allocate if a slot is being created */ 755 #ifndef CONFIG_S390 756 if (npages && !new.rmap) { 757 new.rmap = vmalloc(npages * sizeof(struct page *)); 758 759 if (!new.rmap) 760 goto out_free; 761 762 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 763 764 new.user_alloc = user_alloc; 765 /* 766 * hva_to_rmmap() serialzies with the mmu_lock and to be 767 * safe it has to ignore memslots with !user_alloc && 768 * !userspace_addr. 769 */ 770 if (user_alloc) 771 new.userspace_addr = mem->userspace_addr; 772 else 773 new.userspace_addr = 0; 774 } 775 if (npages && !new.lpage_info) { 776 int largepages = npages / KVM_PAGES_PER_HPAGE; 777 if (npages % KVM_PAGES_PER_HPAGE) 778 largepages++; 779 if (base_gfn % KVM_PAGES_PER_HPAGE) 780 largepages++; 781 782 new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info)); 783 784 if (!new.lpage_info) 785 goto out_free; 786 787 memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info)); 788 789 if (base_gfn % KVM_PAGES_PER_HPAGE) 790 new.lpage_info[0].write_count = 1; 791 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) 792 new.lpage_info[largepages-1].write_count = 1; 793 } 794 795 /* Allocate page dirty bitmap if needed */ 796 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 797 unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; 798 799 new.dirty_bitmap = vmalloc(dirty_bytes); 800 if (!new.dirty_bitmap) 801 goto out_free; 802 memset(new.dirty_bitmap, 0, dirty_bytes); 803 } 804 #endif /* not defined CONFIG_S390 */ 805 806 if (!npages) 807 kvm_arch_flush_shadow(kvm); 808 809 spin_lock(&kvm->mmu_lock); 810 if (mem->slot >= kvm->nmemslots) 811 kvm->nmemslots = mem->slot + 1; 812 813 *memslot = new; 814 spin_unlock(&kvm->mmu_lock); 815 816 r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); 817 if (r) { 818 spin_lock(&kvm->mmu_lock); 819 *memslot = old; 820 spin_unlock(&kvm->mmu_lock); 821 goto out_free; 822 } 823 824 kvm_free_physmem_slot(&old, &new); 825 #ifdef CONFIG_DMAR 826 /* map the pages in iommu page table */ 827 r = kvm_iommu_map_pages(kvm, base_gfn, npages); 828 if (r) 829 goto out; 830 #endif 831 return 0; 832 833 out_free: 834 kvm_free_physmem_slot(&new, &old); 835 out: 836 return r; 837 838 } 839 EXPORT_SYMBOL_GPL(__kvm_set_memory_region); 840 841 int kvm_set_memory_region(struct kvm *kvm, 842 struct kvm_userspace_memory_region *mem, 843 int user_alloc) 844 { 845 int r; 846 847 down_write(&kvm->slots_lock); 848 r = __kvm_set_memory_region(kvm, mem, user_alloc); 849 up_write(&kvm->slots_lock); 850 return r; 851 } 852 EXPORT_SYMBOL_GPL(kvm_set_memory_region); 853 854 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, 855 struct 856 kvm_userspace_memory_region *mem, 857 int user_alloc) 858 { 859 if (mem->slot >= KVM_MEMORY_SLOTS) 860 return -EINVAL; 861 return kvm_set_memory_region(kvm, mem, user_alloc); 862 } 863 864 int kvm_get_dirty_log(struct kvm *kvm, 865 struct kvm_dirty_log *log, int *is_dirty) 866 { 867 struct kvm_memory_slot *memslot; 868 int r, i; 869 int n; 870 unsigned long any = 0; 871 872 r = -EINVAL; 873 if (log->slot >= KVM_MEMORY_SLOTS) 874 goto out; 875 876 memslot = &kvm->memslots[log->slot]; 877 r = -ENOENT; 878 if (!memslot->dirty_bitmap) 879 goto out; 880 881 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 882 883 for (i = 0; !any && i < n/sizeof(long); ++i) 884 any = memslot->dirty_bitmap[i]; 885 886 r = -EFAULT; 887 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 888 goto out; 889 890 if (any) 891 *is_dirty = 1; 892 893 r = 0; 894 out: 895 return r; 896 } 897 898 int is_error_page(struct page *page) 899 { 900 return page == bad_page; 901 } 902 EXPORT_SYMBOL_GPL(is_error_page); 903 904 int is_error_pfn(pfn_t pfn) 905 { 906 return pfn == bad_pfn; 907 } 908 EXPORT_SYMBOL_GPL(is_error_pfn); 909 910 static inline unsigned long bad_hva(void) 911 { 912 return PAGE_OFFSET; 913 } 914 915 int kvm_is_error_hva(unsigned long addr) 916 { 917 return addr == bad_hva(); 918 } 919 EXPORT_SYMBOL_GPL(kvm_is_error_hva); 920 921 static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 922 { 923 int i; 924 925 for (i = 0; i < kvm->nmemslots; ++i) { 926 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 927 928 if (gfn >= memslot->base_gfn 929 && gfn < memslot->base_gfn + memslot->npages) 930 return memslot; 931 } 932 return NULL; 933 } 934 935 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 936 { 937 gfn = unalias_gfn(kvm, gfn); 938 return __gfn_to_memslot(kvm, gfn); 939 } 940 941 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 942 { 943 int i; 944 945 gfn = unalias_gfn(kvm, gfn); 946 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 947 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 948 949 if (gfn >= memslot->base_gfn 950 && gfn < memslot->base_gfn + memslot->npages) 951 return 1; 952 } 953 return 0; 954 } 955 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 956 957 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 958 { 959 struct kvm_memory_slot *slot; 960 961 gfn = unalias_gfn(kvm, gfn); 962 slot = __gfn_to_memslot(kvm, gfn); 963 if (!slot) 964 return bad_hva(); 965 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 966 } 967 EXPORT_SYMBOL_GPL(gfn_to_hva); 968 969 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 970 { 971 struct page *page[1]; 972 unsigned long addr; 973 int npages; 974 pfn_t pfn; 975 976 might_sleep(); 977 978 addr = gfn_to_hva(kvm, gfn); 979 if (kvm_is_error_hva(addr)) { 980 get_page(bad_page); 981 return page_to_pfn(bad_page); 982 } 983 984 npages = get_user_pages_fast(addr, 1, 1, page); 985 986 if (unlikely(npages != 1)) { 987 struct vm_area_struct *vma; 988 989 down_read(¤t->mm->mmap_sem); 990 vma = find_vma(current->mm, addr); 991 992 if (vma == NULL || addr < vma->vm_start || 993 !(vma->vm_flags & VM_PFNMAP)) { 994 up_read(¤t->mm->mmap_sem); 995 get_page(bad_page); 996 return page_to_pfn(bad_page); 997 } 998 999 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 1000 up_read(¤t->mm->mmap_sem); 1001 BUG_ON(!kvm_is_mmio_pfn(pfn)); 1002 } else 1003 pfn = page_to_pfn(page[0]); 1004 1005 return pfn; 1006 } 1007 1008 EXPORT_SYMBOL_GPL(gfn_to_pfn); 1009 1010 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 1011 { 1012 pfn_t pfn; 1013 1014 pfn = gfn_to_pfn(kvm, gfn); 1015 if (!kvm_is_mmio_pfn(pfn)) 1016 return pfn_to_page(pfn); 1017 1018 WARN_ON(kvm_is_mmio_pfn(pfn)); 1019 1020 get_page(bad_page); 1021 return bad_page; 1022 } 1023 1024 EXPORT_SYMBOL_GPL(gfn_to_page); 1025 1026 void kvm_release_page_clean(struct page *page) 1027 { 1028 kvm_release_pfn_clean(page_to_pfn(page)); 1029 } 1030 EXPORT_SYMBOL_GPL(kvm_release_page_clean); 1031 1032 void kvm_release_pfn_clean(pfn_t pfn) 1033 { 1034 if (!kvm_is_mmio_pfn(pfn)) 1035 put_page(pfn_to_page(pfn)); 1036 } 1037 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); 1038 1039 void kvm_release_page_dirty(struct page *page) 1040 { 1041 kvm_release_pfn_dirty(page_to_pfn(page)); 1042 } 1043 EXPORT_SYMBOL_GPL(kvm_release_page_dirty); 1044 1045 void kvm_release_pfn_dirty(pfn_t pfn) 1046 { 1047 kvm_set_pfn_dirty(pfn); 1048 kvm_release_pfn_clean(pfn); 1049 } 1050 EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); 1051 1052 void kvm_set_page_dirty(struct page *page) 1053 { 1054 kvm_set_pfn_dirty(page_to_pfn(page)); 1055 } 1056 EXPORT_SYMBOL_GPL(kvm_set_page_dirty); 1057 1058 void kvm_set_pfn_dirty(pfn_t pfn) 1059 { 1060 if (!kvm_is_mmio_pfn(pfn)) { 1061 struct page *page = pfn_to_page(pfn); 1062 if (!PageReserved(page)) 1063 SetPageDirty(page); 1064 } 1065 } 1066 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); 1067 1068 void kvm_set_pfn_accessed(pfn_t pfn) 1069 { 1070 if (!kvm_is_mmio_pfn(pfn)) 1071 mark_page_accessed(pfn_to_page(pfn)); 1072 } 1073 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 1074 1075 void kvm_get_pfn(pfn_t pfn) 1076 { 1077 if (!kvm_is_mmio_pfn(pfn)) 1078 get_page(pfn_to_page(pfn)); 1079 } 1080 EXPORT_SYMBOL_GPL(kvm_get_pfn); 1081 1082 static int next_segment(unsigned long len, int offset) 1083 { 1084 if (len > PAGE_SIZE - offset) 1085 return PAGE_SIZE - offset; 1086 else 1087 return len; 1088 } 1089 1090 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 1091 int len) 1092 { 1093 int r; 1094 unsigned long addr; 1095 1096 addr = gfn_to_hva(kvm, gfn); 1097 if (kvm_is_error_hva(addr)) 1098 return -EFAULT; 1099 r = copy_from_user(data, (void __user *)addr + offset, len); 1100 if (r) 1101 return -EFAULT; 1102 return 0; 1103 } 1104 EXPORT_SYMBOL_GPL(kvm_read_guest_page); 1105 1106 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) 1107 { 1108 gfn_t gfn = gpa >> PAGE_SHIFT; 1109 int seg; 1110 int offset = offset_in_page(gpa); 1111 int ret; 1112 1113 while ((seg = next_segment(len, offset)) != 0) { 1114 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); 1115 if (ret < 0) 1116 return ret; 1117 offset = 0; 1118 len -= seg; 1119 data += seg; 1120 ++gfn; 1121 } 1122 return 0; 1123 } 1124 EXPORT_SYMBOL_GPL(kvm_read_guest); 1125 1126 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, 1127 unsigned long len) 1128 { 1129 int r; 1130 unsigned long addr; 1131 gfn_t gfn = gpa >> PAGE_SHIFT; 1132 int offset = offset_in_page(gpa); 1133 1134 addr = gfn_to_hva(kvm, gfn); 1135 if (kvm_is_error_hva(addr)) 1136 return -EFAULT; 1137 pagefault_disable(); 1138 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); 1139 pagefault_enable(); 1140 if (r) 1141 return -EFAULT; 1142 return 0; 1143 } 1144 EXPORT_SYMBOL(kvm_read_guest_atomic); 1145 1146 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, 1147 int offset, int len) 1148 { 1149 int r; 1150 unsigned long addr; 1151 1152 addr = gfn_to_hva(kvm, gfn); 1153 if (kvm_is_error_hva(addr)) 1154 return -EFAULT; 1155 r = copy_to_user((void __user *)addr + offset, data, len); 1156 if (r) 1157 return -EFAULT; 1158 mark_page_dirty(kvm, gfn); 1159 return 0; 1160 } 1161 EXPORT_SYMBOL_GPL(kvm_write_guest_page); 1162 1163 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, 1164 unsigned long len) 1165 { 1166 gfn_t gfn = gpa >> PAGE_SHIFT; 1167 int seg; 1168 int offset = offset_in_page(gpa); 1169 int ret; 1170 1171 while ((seg = next_segment(len, offset)) != 0) { 1172 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); 1173 if (ret < 0) 1174 return ret; 1175 offset = 0; 1176 len -= seg; 1177 data += seg; 1178 ++gfn; 1179 } 1180 return 0; 1181 } 1182 1183 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) 1184 { 1185 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len); 1186 } 1187 EXPORT_SYMBOL_GPL(kvm_clear_guest_page); 1188 1189 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) 1190 { 1191 gfn_t gfn = gpa >> PAGE_SHIFT; 1192 int seg; 1193 int offset = offset_in_page(gpa); 1194 int ret; 1195 1196 while ((seg = next_segment(len, offset)) != 0) { 1197 ret = kvm_clear_guest_page(kvm, gfn, offset, seg); 1198 if (ret < 0) 1199 return ret; 1200 offset = 0; 1201 len -= seg; 1202 ++gfn; 1203 } 1204 return 0; 1205 } 1206 EXPORT_SYMBOL_GPL(kvm_clear_guest); 1207 1208 void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 1209 { 1210 struct kvm_memory_slot *memslot; 1211 1212 gfn = unalias_gfn(kvm, gfn); 1213 memslot = __gfn_to_memslot(kvm, gfn); 1214 if (memslot && memslot->dirty_bitmap) { 1215 unsigned long rel_gfn = gfn - memslot->base_gfn; 1216 1217 /* avoid RMW */ 1218 if (!test_bit(rel_gfn, memslot->dirty_bitmap)) 1219 set_bit(rel_gfn, memslot->dirty_bitmap); 1220 } 1221 } 1222 1223 /* 1224 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1225 */ 1226 void kvm_vcpu_block(struct kvm_vcpu *vcpu) 1227 { 1228 DEFINE_WAIT(wait); 1229 1230 for (;;) { 1231 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1232 1233 if (kvm_cpu_has_interrupt(vcpu) || 1234 kvm_cpu_has_pending_timer(vcpu) || 1235 kvm_arch_vcpu_runnable(vcpu)) { 1236 set_bit(KVM_REQ_UNHALT, &vcpu->requests); 1237 break; 1238 } 1239 if (signal_pending(current)) 1240 break; 1241 1242 vcpu_put(vcpu); 1243 schedule(); 1244 vcpu_load(vcpu); 1245 } 1246 1247 finish_wait(&vcpu->wq, &wait); 1248 } 1249 1250 void kvm_resched(struct kvm_vcpu *vcpu) 1251 { 1252 if (!need_resched()) 1253 return; 1254 cond_resched(); 1255 } 1256 EXPORT_SYMBOL_GPL(kvm_resched); 1257 1258 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1259 { 1260 struct kvm_vcpu *vcpu = vma->vm_file->private_data; 1261 struct page *page; 1262 1263 if (vmf->pgoff == 0) 1264 page = virt_to_page(vcpu->run); 1265 #ifdef CONFIG_X86 1266 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 1267 page = virt_to_page(vcpu->arch.pio_data); 1268 #endif 1269 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1270 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) 1271 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); 1272 #endif 1273 else 1274 return VM_FAULT_SIGBUS; 1275 get_page(page); 1276 vmf->page = page; 1277 return 0; 1278 } 1279 1280 static struct vm_operations_struct kvm_vcpu_vm_ops = { 1281 .fault = kvm_vcpu_fault, 1282 }; 1283 1284 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) 1285 { 1286 vma->vm_ops = &kvm_vcpu_vm_ops; 1287 return 0; 1288 } 1289 1290 static int kvm_vcpu_release(struct inode *inode, struct file *filp) 1291 { 1292 struct kvm_vcpu *vcpu = filp->private_data; 1293 1294 kvm_put_kvm(vcpu->kvm); 1295 return 0; 1296 } 1297 1298 static const struct file_operations kvm_vcpu_fops = { 1299 .release = kvm_vcpu_release, 1300 .unlocked_ioctl = kvm_vcpu_ioctl, 1301 .compat_ioctl = kvm_vcpu_ioctl, 1302 .mmap = kvm_vcpu_mmap, 1303 }; 1304 1305 /* 1306 * Allocates an inode for the vcpu. 1307 */ 1308 static int create_vcpu_fd(struct kvm_vcpu *vcpu) 1309 { 1310 int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); 1311 if (fd < 0) 1312 kvm_put_kvm(vcpu->kvm); 1313 return fd; 1314 } 1315 1316 /* 1317 * Creates some virtual cpus. Good luck creating more than one. 1318 */ 1319 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) 1320 { 1321 int r; 1322 struct kvm_vcpu *vcpu; 1323 1324 if (!valid_vcpu(n)) 1325 return -EINVAL; 1326 1327 vcpu = kvm_arch_vcpu_create(kvm, n); 1328 if (IS_ERR(vcpu)) 1329 return PTR_ERR(vcpu); 1330 1331 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); 1332 1333 r = kvm_arch_vcpu_setup(vcpu); 1334 if (r) 1335 return r; 1336 1337 mutex_lock(&kvm->lock); 1338 if (kvm->vcpus[n]) { 1339 r = -EEXIST; 1340 goto vcpu_destroy; 1341 } 1342 kvm->vcpus[n] = vcpu; 1343 mutex_unlock(&kvm->lock); 1344 1345 /* Now it's all set up, let userspace reach it */ 1346 kvm_get_kvm(kvm); 1347 r = create_vcpu_fd(vcpu); 1348 if (r < 0) 1349 goto unlink; 1350 return r; 1351 1352 unlink: 1353 mutex_lock(&kvm->lock); 1354 kvm->vcpus[n] = NULL; 1355 vcpu_destroy: 1356 mutex_unlock(&kvm->lock); 1357 kvm_arch_vcpu_destroy(vcpu); 1358 return r; 1359 } 1360 1361 static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) 1362 { 1363 if (sigset) { 1364 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1365 vcpu->sigset_active = 1; 1366 vcpu->sigset = *sigset; 1367 } else 1368 vcpu->sigset_active = 0; 1369 return 0; 1370 } 1371 1372 static long kvm_vcpu_ioctl(struct file *filp, 1373 unsigned int ioctl, unsigned long arg) 1374 { 1375 struct kvm_vcpu *vcpu = filp->private_data; 1376 void __user *argp = (void __user *)arg; 1377 int r; 1378 struct kvm_fpu *fpu = NULL; 1379 struct kvm_sregs *kvm_sregs = NULL; 1380 1381 if (vcpu->kvm->mm != current->mm) 1382 return -EIO; 1383 switch (ioctl) { 1384 case KVM_RUN: 1385 r = -EINVAL; 1386 if (arg) 1387 goto out; 1388 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 1389 break; 1390 case KVM_GET_REGS: { 1391 struct kvm_regs *kvm_regs; 1392 1393 r = -ENOMEM; 1394 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1395 if (!kvm_regs) 1396 goto out; 1397 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); 1398 if (r) 1399 goto out_free1; 1400 r = -EFAULT; 1401 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) 1402 goto out_free1; 1403 r = 0; 1404 out_free1: 1405 kfree(kvm_regs); 1406 break; 1407 } 1408 case KVM_SET_REGS: { 1409 struct kvm_regs *kvm_regs; 1410 1411 r = -ENOMEM; 1412 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1413 if (!kvm_regs) 1414 goto out; 1415 r = -EFAULT; 1416 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) 1417 goto out_free2; 1418 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); 1419 if (r) 1420 goto out_free2; 1421 r = 0; 1422 out_free2: 1423 kfree(kvm_regs); 1424 break; 1425 } 1426 case KVM_GET_SREGS: { 1427 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1428 r = -ENOMEM; 1429 if (!kvm_sregs) 1430 goto out; 1431 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); 1432 if (r) 1433 goto out; 1434 r = -EFAULT; 1435 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) 1436 goto out; 1437 r = 0; 1438 break; 1439 } 1440 case KVM_SET_SREGS: { 1441 kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1442 r = -ENOMEM; 1443 if (!kvm_sregs) 1444 goto out; 1445 r = -EFAULT; 1446 if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) 1447 goto out; 1448 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); 1449 if (r) 1450 goto out; 1451 r = 0; 1452 break; 1453 } 1454 case KVM_GET_MP_STATE: { 1455 struct kvm_mp_state mp_state; 1456 1457 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); 1458 if (r) 1459 goto out; 1460 r = -EFAULT; 1461 if (copy_to_user(argp, &mp_state, sizeof mp_state)) 1462 goto out; 1463 r = 0; 1464 break; 1465 } 1466 case KVM_SET_MP_STATE: { 1467 struct kvm_mp_state mp_state; 1468 1469 r = -EFAULT; 1470 if (copy_from_user(&mp_state, argp, sizeof mp_state)) 1471 goto out; 1472 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); 1473 if (r) 1474 goto out; 1475 r = 0; 1476 break; 1477 } 1478 case KVM_TRANSLATE: { 1479 struct kvm_translation tr; 1480 1481 r = -EFAULT; 1482 if (copy_from_user(&tr, argp, sizeof tr)) 1483 goto out; 1484 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); 1485 if (r) 1486 goto out; 1487 r = -EFAULT; 1488 if (copy_to_user(argp, &tr, sizeof tr)) 1489 goto out; 1490 r = 0; 1491 break; 1492 } 1493 case KVM_DEBUG_GUEST: { 1494 struct kvm_debug_guest dbg; 1495 1496 r = -EFAULT; 1497 if (copy_from_user(&dbg, argp, sizeof dbg)) 1498 goto out; 1499 r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg); 1500 if (r) 1501 goto out; 1502 r = 0; 1503 break; 1504 } 1505 case KVM_SET_SIGNAL_MASK: { 1506 struct kvm_signal_mask __user *sigmask_arg = argp; 1507 struct kvm_signal_mask kvm_sigmask; 1508 sigset_t sigset, *p; 1509 1510 p = NULL; 1511 if (argp) { 1512 r = -EFAULT; 1513 if (copy_from_user(&kvm_sigmask, argp, 1514 sizeof kvm_sigmask)) 1515 goto out; 1516 r = -EINVAL; 1517 if (kvm_sigmask.len != sizeof sigset) 1518 goto out; 1519 r = -EFAULT; 1520 if (copy_from_user(&sigset, sigmask_arg->sigset, 1521 sizeof sigset)) 1522 goto out; 1523 p = &sigset; 1524 } 1525 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 1526 break; 1527 } 1528 case KVM_GET_FPU: { 1529 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1530 r = -ENOMEM; 1531 if (!fpu) 1532 goto out; 1533 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); 1534 if (r) 1535 goto out; 1536 r = -EFAULT; 1537 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) 1538 goto out; 1539 r = 0; 1540 break; 1541 } 1542 case KVM_SET_FPU: { 1543 fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1544 r = -ENOMEM; 1545 if (!fpu) 1546 goto out; 1547 r = -EFAULT; 1548 if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) 1549 goto out; 1550 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); 1551 if (r) 1552 goto out; 1553 r = 0; 1554 break; 1555 } 1556 default: 1557 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1558 } 1559 out: 1560 kfree(fpu); 1561 kfree(kvm_sregs); 1562 return r; 1563 } 1564 1565 static long kvm_vm_ioctl(struct file *filp, 1566 unsigned int ioctl, unsigned long arg) 1567 { 1568 struct kvm *kvm = filp->private_data; 1569 void __user *argp = (void __user *)arg; 1570 int r; 1571 1572 if (kvm->mm != current->mm) 1573 return -EIO; 1574 switch (ioctl) { 1575 case KVM_CREATE_VCPU: 1576 r = kvm_vm_ioctl_create_vcpu(kvm, arg); 1577 if (r < 0) 1578 goto out; 1579 break; 1580 case KVM_SET_USER_MEMORY_REGION: { 1581 struct kvm_userspace_memory_region kvm_userspace_mem; 1582 1583 r = -EFAULT; 1584 if (copy_from_user(&kvm_userspace_mem, argp, 1585 sizeof kvm_userspace_mem)) 1586 goto out; 1587 1588 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); 1589 if (r) 1590 goto out; 1591 break; 1592 } 1593 case KVM_GET_DIRTY_LOG: { 1594 struct kvm_dirty_log log; 1595 1596 r = -EFAULT; 1597 if (copy_from_user(&log, argp, sizeof log)) 1598 goto out; 1599 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 1600 if (r) 1601 goto out; 1602 break; 1603 } 1604 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1605 case KVM_REGISTER_COALESCED_MMIO: { 1606 struct kvm_coalesced_mmio_zone zone; 1607 r = -EFAULT; 1608 if (copy_from_user(&zone, argp, sizeof zone)) 1609 goto out; 1610 r = -ENXIO; 1611 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); 1612 if (r) 1613 goto out; 1614 r = 0; 1615 break; 1616 } 1617 case KVM_UNREGISTER_COALESCED_MMIO: { 1618 struct kvm_coalesced_mmio_zone zone; 1619 r = -EFAULT; 1620 if (copy_from_user(&zone, argp, sizeof zone)) 1621 goto out; 1622 r = -ENXIO; 1623 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); 1624 if (r) 1625 goto out; 1626 r = 0; 1627 break; 1628 } 1629 #endif 1630 #ifdef KVM_CAP_DEVICE_ASSIGNMENT 1631 case KVM_ASSIGN_PCI_DEVICE: { 1632 struct kvm_assigned_pci_dev assigned_dev; 1633 1634 r = -EFAULT; 1635 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) 1636 goto out; 1637 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); 1638 if (r) 1639 goto out; 1640 break; 1641 } 1642 case KVM_ASSIGN_IRQ: { 1643 struct kvm_assigned_irq assigned_irq; 1644 1645 r = -EFAULT; 1646 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) 1647 goto out; 1648 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); 1649 if (r) 1650 goto out; 1651 break; 1652 } 1653 #endif 1654 default: 1655 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 1656 } 1657 out: 1658 return r; 1659 } 1660 1661 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1662 { 1663 struct page *page[1]; 1664 unsigned long addr; 1665 int npages; 1666 gfn_t gfn = vmf->pgoff; 1667 struct kvm *kvm = vma->vm_file->private_data; 1668 1669 addr = gfn_to_hva(kvm, gfn); 1670 if (kvm_is_error_hva(addr)) 1671 return VM_FAULT_SIGBUS; 1672 1673 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, 1674 NULL); 1675 if (unlikely(npages != 1)) 1676 return VM_FAULT_SIGBUS; 1677 1678 vmf->page = page[0]; 1679 return 0; 1680 } 1681 1682 static struct vm_operations_struct kvm_vm_vm_ops = { 1683 .fault = kvm_vm_fault, 1684 }; 1685 1686 static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) 1687 { 1688 vma->vm_ops = &kvm_vm_vm_ops; 1689 return 0; 1690 } 1691 1692 static const struct file_operations kvm_vm_fops = { 1693 .release = kvm_vm_release, 1694 .unlocked_ioctl = kvm_vm_ioctl, 1695 .compat_ioctl = kvm_vm_ioctl, 1696 .mmap = kvm_vm_mmap, 1697 }; 1698 1699 static int kvm_dev_ioctl_create_vm(void) 1700 { 1701 int fd; 1702 struct kvm *kvm; 1703 1704 kvm = kvm_create_vm(); 1705 if (IS_ERR(kvm)) 1706 return PTR_ERR(kvm); 1707 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0); 1708 if (fd < 0) 1709 kvm_put_kvm(kvm); 1710 1711 return fd; 1712 } 1713 1714 static long kvm_dev_ioctl(struct file *filp, 1715 unsigned int ioctl, unsigned long arg) 1716 { 1717 long r = -EINVAL; 1718 1719 switch (ioctl) { 1720 case KVM_GET_API_VERSION: 1721 r = -EINVAL; 1722 if (arg) 1723 goto out; 1724 r = KVM_API_VERSION; 1725 break; 1726 case KVM_CREATE_VM: 1727 r = -EINVAL; 1728 if (arg) 1729 goto out; 1730 r = kvm_dev_ioctl_create_vm(); 1731 break; 1732 case KVM_CHECK_EXTENSION: 1733 r = kvm_dev_ioctl_check_extension(arg); 1734 break; 1735 case KVM_GET_VCPU_MMAP_SIZE: 1736 r = -EINVAL; 1737 if (arg) 1738 goto out; 1739 r = PAGE_SIZE; /* struct kvm_run */ 1740 #ifdef CONFIG_X86 1741 r += PAGE_SIZE; /* pio data page */ 1742 #endif 1743 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1744 r += PAGE_SIZE; /* coalesced mmio ring page */ 1745 #endif 1746 break; 1747 case KVM_TRACE_ENABLE: 1748 case KVM_TRACE_PAUSE: 1749 case KVM_TRACE_DISABLE: 1750 r = kvm_trace_ioctl(ioctl, arg); 1751 break; 1752 default: 1753 return kvm_arch_dev_ioctl(filp, ioctl, arg); 1754 } 1755 out: 1756 return r; 1757 } 1758 1759 static struct file_operations kvm_chardev_ops = { 1760 .unlocked_ioctl = kvm_dev_ioctl, 1761 .compat_ioctl = kvm_dev_ioctl, 1762 }; 1763 1764 static struct miscdevice kvm_dev = { 1765 KVM_MINOR, 1766 "kvm", 1767 &kvm_chardev_ops, 1768 }; 1769 1770 static void hardware_enable(void *junk) 1771 { 1772 int cpu = raw_smp_processor_id(); 1773 1774 if (cpu_isset(cpu, cpus_hardware_enabled)) 1775 return; 1776 cpu_set(cpu, cpus_hardware_enabled); 1777 kvm_arch_hardware_enable(NULL); 1778 } 1779 1780 static void hardware_disable(void *junk) 1781 { 1782 int cpu = raw_smp_processor_id(); 1783 1784 if (!cpu_isset(cpu, cpus_hardware_enabled)) 1785 return; 1786 cpu_clear(cpu, cpus_hardware_enabled); 1787 kvm_arch_hardware_disable(NULL); 1788 } 1789 1790 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, 1791 void *v) 1792 { 1793 int cpu = (long)v; 1794 1795 val &= ~CPU_TASKS_FROZEN; 1796 switch (val) { 1797 case CPU_DYING: 1798 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1799 cpu); 1800 hardware_disable(NULL); 1801 break; 1802 case CPU_UP_CANCELED: 1803 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1804 cpu); 1805 smp_call_function_single(cpu, hardware_disable, NULL, 1); 1806 break; 1807 case CPU_ONLINE: 1808 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 1809 cpu); 1810 smp_call_function_single(cpu, hardware_enable, NULL, 1); 1811 break; 1812 } 1813 return NOTIFY_OK; 1814 } 1815 1816 1817 asmlinkage void kvm_handle_fault_on_reboot(void) 1818 { 1819 if (kvm_rebooting) 1820 /* spin while reset goes on */ 1821 while (true) 1822 ; 1823 /* Fault while not rebooting. We want the trace. */ 1824 BUG(); 1825 } 1826 EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); 1827 1828 static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 1829 void *v) 1830 { 1831 if (val == SYS_RESTART) { 1832 /* 1833 * Some (well, at least mine) BIOSes hang on reboot if 1834 * in vmx root mode. 1835 */ 1836 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 1837 kvm_rebooting = true; 1838 on_each_cpu(hardware_disable, NULL, 1); 1839 } 1840 return NOTIFY_OK; 1841 } 1842 1843 static struct notifier_block kvm_reboot_notifier = { 1844 .notifier_call = kvm_reboot, 1845 .priority = 0, 1846 }; 1847 1848 void kvm_io_bus_init(struct kvm_io_bus *bus) 1849 { 1850 memset(bus, 0, sizeof(*bus)); 1851 } 1852 1853 void kvm_io_bus_destroy(struct kvm_io_bus *bus) 1854 { 1855 int i; 1856 1857 for (i = 0; i < bus->dev_count; i++) { 1858 struct kvm_io_device *pos = bus->devs[i]; 1859 1860 kvm_iodevice_destructor(pos); 1861 } 1862 } 1863 1864 struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, 1865 gpa_t addr, int len, int is_write) 1866 { 1867 int i; 1868 1869 for (i = 0; i < bus->dev_count; i++) { 1870 struct kvm_io_device *pos = bus->devs[i]; 1871 1872 if (pos->in_range(pos, addr, len, is_write)) 1873 return pos; 1874 } 1875 1876 return NULL; 1877 } 1878 1879 void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev) 1880 { 1881 BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1)); 1882 1883 bus->devs[bus->dev_count++] = dev; 1884 } 1885 1886 static struct notifier_block kvm_cpu_notifier = { 1887 .notifier_call = kvm_cpu_hotplug, 1888 .priority = 20, /* must be > scheduler priority */ 1889 }; 1890 1891 static int vm_stat_get(void *_offset, u64 *val) 1892 { 1893 unsigned offset = (long)_offset; 1894 struct kvm *kvm; 1895 1896 *val = 0; 1897 spin_lock(&kvm_lock); 1898 list_for_each_entry(kvm, &vm_list, vm_list) 1899 *val += *(u32 *)((void *)kvm + offset); 1900 spin_unlock(&kvm_lock); 1901 return 0; 1902 } 1903 1904 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); 1905 1906 static int vcpu_stat_get(void *_offset, u64 *val) 1907 { 1908 unsigned offset = (long)_offset; 1909 struct kvm *kvm; 1910 struct kvm_vcpu *vcpu; 1911 int i; 1912 1913 *val = 0; 1914 spin_lock(&kvm_lock); 1915 list_for_each_entry(kvm, &vm_list, vm_list) 1916 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 1917 vcpu = kvm->vcpus[i]; 1918 if (vcpu) 1919 *val += *(u32 *)((void *)vcpu + offset); 1920 } 1921 spin_unlock(&kvm_lock); 1922 return 0; 1923 } 1924 1925 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); 1926 1927 static struct file_operations *stat_fops[] = { 1928 [KVM_STAT_VCPU] = &vcpu_stat_fops, 1929 [KVM_STAT_VM] = &vm_stat_fops, 1930 }; 1931 1932 static void kvm_init_debug(void) 1933 { 1934 struct kvm_stats_debugfs_item *p; 1935 1936 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); 1937 for (p = debugfs_entries; p->name; ++p) 1938 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, 1939 (void *)(long)p->offset, 1940 stat_fops[p->kind]); 1941 } 1942 1943 static void kvm_exit_debug(void) 1944 { 1945 struct kvm_stats_debugfs_item *p; 1946 1947 for (p = debugfs_entries; p->name; ++p) 1948 debugfs_remove(p->dentry); 1949 debugfs_remove(kvm_debugfs_dir); 1950 } 1951 1952 static int kvm_suspend(struct sys_device *dev, pm_message_t state) 1953 { 1954 hardware_disable(NULL); 1955 return 0; 1956 } 1957 1958 static int kvm_resume(struct sys_device *dev) 1959 { 1960 hardware_enable(NULL); 1961 return 0; 1962 } 1963 1964 static struct sysdev_class kvm_sysdev_class = { 1965 .name = "kvm", 1966 .suspend = kvm_suspend, 1967 .resume = kvm_resume, 1968 }; 1969 1970 static struct sys_device kvm_sysdev = { 1971 .id = 0, 1972 .cls = &kvm_sysdev_class, 1973 }; 1974 1975 struct page *bad_page; 1976 pfn_t bad_pfn; 1977 1978 static inline 1979 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 1980 { 1981 return container_of(pn, struct kvm_vcpu, preempt_notifier); 1982 } 1983 1984 static void kvm_sched_in(struct preempt_notifier *pn, int cpu) 1985 { 1986 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 1987 1988 kvm_arch_vcpu_load(vcpu, cpu); 1989 } 1990 1991 static void kvm_sched_out(struct preempt_notifier *pn, 1992 struct task_struct *next) 1993 { 1994 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 1995 1996 kvm_arch_vcpu_put(vcpu); 1997 } 1998 1999 int kvm_init(void *opaque, unsigned int vcpu_size, 2000 struct module *module) 2001 { 2002 int r; 2003 int cpu; 2004 2005 kvm_init_debug(); 2006 2007 r = kvm_arch_init(opaque); 2008 if (r) 2009 goto out_fail; 2010 2011 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 2012 2013 if (bad_page == NULL) { 2014 r = -ENOMEM; 2015 goto out; 2016 } 2017 2018 bad_pfn = page_to_pfn(bad_page); 2019 2020 r = kvm_arch_hardware_setup(); 2021 if (r < 0) 2022 goto out_free_0; 2023 2024 for_each_online_cpu(cpu) { 2025 smp_call_function_single(cpu, 2026 kvm_arch_check_processor_compat, 2027 &r, 1); 2028 if (r < 0) 2029 goto out_free_1; 2030 } 2031 2032 on_each_cpu(hardware_enable, NULL, 1); 2033 r = register_cpu_notifier(&kvm_cpu_notifier); 2034 if (r) 2035 goto out_free_2; 2036 register_reboot_notifier(&kvm_reboot_notifier); 2037 2038 r = sysdev_class_register(&kvm_sysdev_class); 2039 if (r) 2040 goto out_free_3; 2041 2042 r = sysdev_register(&kvm_sysdev); 2043 if (r) 2044 goto out_free_4; 2045 2046 /* A kmem cache lets us meet the alignment requirements of fx_save. */ 2047 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, 2048 __alignof__(struct kvm_vcpu), 2049 0, NULL); 2050 if (!kvm_vcpu_cache) { 2051 r = -ENOMEM; 2052 goto out_free_5; 2053 } 2054 2055 kvm_chardev_ops.owner = module; 2056 2057 r = misc_register(&kvm_dev); 2058 if (r) { 2059 printk(KERN_ERR "kvm: misc device register failed\n"); 2060 goto out_free; 2061 } 2062 2063 kvm_preempt_ops.sched_in = kvm_sched_in; 2064 kvm_preempt_ops.sched_out = kvm_sched_out; 2065 2066 return 0; 2067 2068 out_free: 2069 kmem_cache_destroy(kvm_vcpu_cache); 2070 out_free_5: 2071 sysdev_unregister(&kvm_sysdev); 2072 out_free_4: 2073 sysdev_class_unregister(&kvm_sysdev_class); 2074 out_free_3: 2075 unregister_reboot_notifier(&kvm_reboot_notifier); 2076 unregister_cpu_notifier(&kvm_cpu_notifier); 2077 out_free_2: 2078 on_each_cpu(hardware_disable, NULL, 1); 2079 out_free_1: 2080 kvm_arch_hardware_unsetup(); 2081 out_free_0: 2082 __free_page(bad_page); 2083 out: 2084 kvm_arch_exit(); 2085 kvm_exit_debug(); 2086 out_fail: 2087 return r; 2088 } 2089 EXPORT_SYMBOL_GPL(kvm_init); 2090 2091 void kvm_exit(void) 2092 { 2093 kvm_trace_cleanup(); 2094 misc_deregister(&kvm_dev); 2095 kmem_cache_destroy(kvm_vcpu_cache); 2096 sysdev_unregister(&kvm_sysdev); 2097 sysdev_class_unregister(&kvm_sysdev_class); 2098 unregister_reboot_notifier(&kvm_reboot_notifier); 2099 unregister_cpu_notifier(&kvm_cpu_notifier); 2100 on_each_cpu(hardware_disable, NULL, 1); 2101 kvm_arch_hardware_unsetup(); 2102 kvm_arch_exit(); 2103 kvm_exit_debug(); 2104 __free_page(bad_page); 2105 } 2106 EXPORT_SYMBOL_GPL(kvm_exit); 2107