Lines Matching +full:- +full:kvm
1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine driver for Linux
5 * This module enables machines with Intel VT-x extensions to run virtual
16 #include <kvm/iodev.h>
19 #include <linux/kvm.h>
68 #include <trace/events/kvm.h>
84 /* Default doubles per-vcpu halt_poll_ns. */
94 /* Default resets per-vcpu halt_poll_ns . */
102 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
130 * - Prevent a compat task from opening /dev/kvm
131 * - If the open has been done by a 64bit task, and the KVM fd
135 unsigned long arg) { return -EINVAL; } in kvm_no_compat_ioctl()
139 return is_compat_task() ? -ENODEV : 0; in kvm_no_compat_open()
151 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
157 __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) in kvm_arch_guest_memory_reclaimed() argument
215 preempt_notifier_register(&vcpu->preempt_notifier); in vcpu_load()
225 preempt_notifier_unregister(&vcpu->preempt_notifier); in vcpu_put()
279 * after this point is also OK, as the requirement is only that KVM wait in kvm_make_vcpu_request()
284 cpu = READ_ONCE(vcpu->cpu); in kvm_make_vcpu_request()
285 if (cpu != -1 && cpu != current_cpu) in kvm_make_vcpu_request()
290 bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, in kvm_make_vcpus_request_mask() argument
304 vcpu = kvm_get_vcpu(kvm, i); in kvm_make_vcpus_request_mask()
316 bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, in kvm_make_all_cpus_request_except() argument
330 kvm_for_each_vcpu(i, vcpu, kvm) { in kvm_make_all_cpus_request_except()
342 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) in kvm_make_all_cpus_request() argument
344 return kvm_make_all_cpus_request_except(kvm, req, NULL); in kvm_make_all_cpus_request()
348 void kvm_flush_remote_tlbs(struct kvm *kvm) in kvm_flush_remote_tlbs() argument
350 ++kvm->stat.generic.remote_tlb_flush_requests; in kvm_flush_remote_tlbs()
354 * mode. Pairs with a memory barrier in arch-specific code. in kvm_flush_remote_tlbs()
355 * - x86: smp_mb__after_srcu_read_unlock in vcpu_enter_guest in kvm_flush_remote_tlbs()
357 * - powerpc: smp_mb in kvmppc_prepare_to_enter. in kvm_flush_remote_tlbs()
360 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that in kvm_flush_remote_tlbs()
363 if (!kvm_arch_flush_remote_tlbs(kvm) in kvm_flush_remote_tlbs()
364 || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) in kvm_flush_remote_tlbs()
365 ++kvm->stat.generic.remote_tlb_flush; in kvm_flush_remote_tlbs()
369 void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) in kvm_flush_remote_tlbs_range() argument
371 if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages)) in kvm_flush_remote_tlbs_range()
375 * Fall back to a flushing entire TLBs if the architecture range-based in kvm_flush_remote_tlbs_range()
379 kvm_flush_remote_tlbs(kvm); in kvm_flush_remote_tlbs_range()
382 void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, in kvm_flush_remote_tlbs_memslot() argument
392 lockdep_assert_held(&kvm->slots_lock); in kvm_flush_remote_tlbs_memslot()
393 kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages); in kvm_flush_remote_tlbs_memslot()
396 static void kvm_flush_shadow_all(struct kvm *kvm) in kvm_flush_shadow_all() argument
398 kvm_arch_flush_shadow_all(kvm); in kvm_flush_shadow_all()
399 kvm_arch_guest_memory_reclaimed(kvm); in kvm_flush_shadow_all()
406 gfp_flags |= mc->gfp_zero; in mmu_memory_cache_alloc_obj()
408 if (mc->kmem_cache) in mmu_memory_cache_alloc_obj()
409 return kmem_cache_alloc(mc->kmem_cache, gfp_flags); in mmu_memory_cache_alloc_obj()
416 gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT; in __kvm_mmu_topup_memory_cache()
419 if (mc->nobjs >= min) in __kvm_mmu_topup_memory_cache()
422 if (unlikely(!mc->objects)) { in __kvm_mmu_topup_memory_cache()
424 return -EIO; in __kvm_mmu_topup_memory_cache()
426 mc->objects = kvmalloc_array(sizeof(void *), capacity, gfp); in __kvm_mmu_topup_memory_cache()
427 if (!mc->objects) in __kvm_mmu_topup_memory_cache()
428 return -ENOMEM; in __kvm_mmu_topup_memory_cache()
430 mc->capacity = capacity; in __kvm_mmu_topup_memory_cache()
434 if (WARN_ON_ONCE(mc->capacity != capacity)) in __kvm_mmu_topup_memory_cache()
435 return -EIO; in __kvm_mmu_topup_memory_cache()
437 while (mc->nobjs < mc->capacity) { in __kvm_mmu_topup_memory_cache()
440 return mc->nobjs >= min ? 0 : -ENOMEM; in __kvm_mmu_topup_memory_cache()
441 mc->objects[mc->nobjs++] = obj; in __kvm_mmu_topup_memory_cache()
453 return mc->nobjs; in kvm_mmu_memory_cache_nr_free_objects()
458 while (mc->nobjs) { in kvm_mmu_free_memory_cache()
459 if (mc->kmem_cache) in kvm_mmu_free_memory_cache()
460 kmem_cache_free(mc->kmem_cache, mc->objects[--mc->nobjs]); in kvm_mmu_free_memory_cache()
462 free_page((unsigned long)mc->objects[--mc->nobjs]); in kvm_mmu_free_memory_cache()
465 kvfree(mc->objects); in kvm_mmu_free_memory_cache()
467 mc->objects = NULL; in kvm_mmu_free_memory_cache()
468 mc->capacity = 0; in kvm_mmu_free_memory_cache()
475 if (WARN_ON(!mc->nobjs)) in kvm_mmu_memory_cache_alloc()
478 p = mc->objects[--mc->nobjs]; in kvm_mmu_memory_cache_alloc()
484 static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) in kvm_vcpu_init() argument
486 mutex_init(&vcpu->mutex); in kvm_vcpu_init()
487 vcpu->cpu = -1; in kvm_vcpu_init()
488 vcpu->kvm = kvm; in kvm_vcpu_init()
489 vcpu->vcpu_id = id; in kvm_vcpu_init()
490 vcpu->pid = NULL; in kvm_vcpu_init()
492 rcuwait_init(&vcpu->wait); in kvm_vcpu_init()
498 vcpu->preempted = false; in kvm_vcpu_init()
499 vcpu->ready = false; in kvm_vcpu_init()
500 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); in kvm_vcpu_init()
501 vcpu->last_used_slot = NULL; in kvm_vcpu_init()
504 snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", in kvm_vcpu_init()
511 kvm_dirty_ring_free(&vcpu->dirty_ring); in kvm_vcpu_destroy()
515 * the vcpu->pid pointer, and at destruction time all file descriptors in kvm_vcpu_destroy()
518 put_pid(rcu_dereference_protected(vcpu->pid, 1)); in kvm_vcpu_destroy()
520 free_page((unsigned long)vcpu->run); in kvm_vcpu_destroy()
524 void kvm_destroy_vcpus(struct kvm *kvm) in kvm_destroy_vcpus() argument
529 kvm_for_each_vcpu(i, vcpu, kvm) { in kvm_destroy_vcpus()
531 xa_erase(&kvm->vcpu_array, i); in kvm_destroy_vcpus()
534 atomic_set(&kvm->online_vcpus, 0); in kvm_destroy_vcpus()
539 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) in mmu_notifier_to_kvm()
541 return container_of(mn, struct kvm, mmu_notifier); in mmu_notifier_to_kvm()
544 typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
546 typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
549 typedef void (*on_unlock_fn_t)(struct kvm *kvm);
565 * function will have a non-zero address, and so it will generate code to
579 for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
583 static __always_inline int __kvm_handle_hva_range(struct kvm *kvm, in __kvm_handle_hva_range() argument
592 if (WARN_ON_ONCE(range->end <= range->start)) in __kvm_handle_hva_range()
596 if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) && in __kvm_handle_hva_range()
597 IS_KVM_NULL_FN(range->handler))) in __kvm_handle_hva_range()
600 idx = srcu_read_lock(&kvm->srcu); in __kvm_handle_hva_range()
605 slots = __kvm_memslots(kvm, i); in __kvm_handle_hva_range()
607 range->start, range->end - 1) { in __kvm_handle_hva_range()
610 slot = container_of(node, struct kvm_memory_slot, hva_node[slots->node_idx]); in __kvm_handle_hva_range()
611 hva_start = max(range->start, slot->userspace_addr); in __kvm_handle_hva_range()
612 hva_end = min(range->end, slot->userspace_addr + in __kvm_handle_hva_range()
613 (slot->npages << PAGE_SHIFT)); in __kvm_handle_hva_range()
621 gfn_range.arg = range->arg; in __kvm_handle_hva_range()
622 gfn_range.may_block = range->may_block; in __kvm_handle_hva_range()
626 * {gfn_start, gfn_start+1, ..., gfn_end-1}. in __kvm_handle_hva_range()
629 gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot); in __kvm_handle_hva_range()
634 KVM_MMU_LOCK(kvm); in __kvm_handle_hva_range()
635 if (!IS_KVM_NULL_FN(range->on_lock)) in __kvm_handle_hva_range()
636 range->on_lock(kvm, range->start, range->end); in __kvm_handle_hva_range()
637 if (IS_KVM_NULL_FN(range->handler)) in __kvm_handle_hva_range()
640 ret |= range->handler(kvm, &gfn_range); in __kvm_handle_hva_range()
644 if (range->flush_on_ret && ret) in __kvm_handle_hva_range()
645 kvm_flush_remote_tlbs(kvm); in __kvm_handle_hva_range()
648 KVM_MMU_UNLOCK(kvm); in __kvm_handle_hva_range()
649 if (!IS_KVM_NULL_FN(range->on_unlock)) in __kvm_handle_hva_range()
650 range->on_unlock(kvm); in __kvm_handle_hva_range()
653 srcu_read_unlock(&kvm->srcu, idx); in __kvm_handle_hva_range()
655 /* The notifiers are averse to booleans. :-( */ in __kvm_handle_hva_range()
665 struct kvm *kvm = mmu_notifier_to_kvm(mn); in kvm_handle_hva_range() local
677 return __kvm_handle_hva_range(kvm, &range); in kvm_handle_hva_range()
685 struct kvm *kvm = mmu_notifier_to_kvm(mn); in kvm_handle_hva_range_no_flush() local
696 return __kvm_handle_hva_range(kvm, &range); in kvm_handle_hva_range_no_flush()
699 static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) in kvm_change_spte_gfn() argument
704 * guaranteed by the primary MMU. If that ever changes, KVM needs to in kvm_change_spte_gfn()
705 * unmap the memslot instead of skipping the memslot to ensure that KVM in kvm_change_spte_gfn()
708 WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); in kvm_change_spte_gfn()
710 if (range->slot->flags & KVM_MEMSLOT_INVALID) in kvm_change_spte_gfn()
713 return kvm_set_spte_gfn(kvm, range); in kvm_change_spte_gfn()
721 struct kvm *kvm = mmu_notifier_to_kvm(mn); in kvm_mmu_notifier_change_pte() local
728 * If mmu_invalidate_in_progress is zero, then no in-progress in kvm_mmu_notifier_change_pte()
731 * positive (count elevated by a different invalidation) is sub-optimal in kvm_mmu_notifier_change_pte()
734 WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); in kvm_mmu_notifier_change_pte()
735 if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) in kvm_mmu_notifier_change_pte()
741 void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, in kvm_mmu_invalidate_begin() argument
749 kvm->mmu_invalidate_in_progress++; in kvm_mmu_invalidate_begin()
750 if (likely(kvm->mmu_invalidate_in_progress == 1)) { in kvm_mmu_invalidate_begin()
751 kvm->mmu_invalidate_range_start = start; in kvm_mmu_invalidate_begin()
752 kvm->mmu_invalidate_range_end = end; in kvm_mmu_invalidate_begin()
763 kvm->mmu_invalidate_range_start = in kvm_mmu_invalidate_begin()
764 min(kvm->mmu_invalidate_range_start, start); in kvm_mmu_invalidate_begin()
765 kvm->mmu_invalidate_range_end = in kvm_mmu_invalidate_begin()
766 max(kvm->mmu_invalidate_range_end, end); in kvm_mmu_invalidate_begin()
773 struct kvm *kvm = mmu_notifier_to_kvm(mn); in kvm_mmu_notifier_invalidate_range_start() local
775 .start = range->start, in kvm_mmu_notifier_invalidate_range_start()
776 .end = range->end, in kvm_mmu_notifier_invalidate_range_start()
784 trace_kvm_unmap_hva_range(range->start, range->end); in kvm_mmu_notifier_invalidate_range_start()
794 spin_lock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_start()
795 kvm->mn_active_invalidate_count++; in kvm_mmu_notifier_invalidate_range_start()
796 spin_unlock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_start()
808 gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, in kvm_mmu_notifier_invalidate_range_start()
811 __kvm_handle_hva_range(kvm, &hva_range); in kvm_mmu_notifier_invalidate_range_start()
816 void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, in kvm_mmu_invalidate_end() argument
820 * This sequence increase will notify the kvm page fault that in kvm_mmu_invalidate_end()
824 kvm->mmu_invalidate_seq++; in kvm_mmu_invalidate_end()
831 kvm->mmu_invalidate_in_progress--; in kvm_mmu_invalidate_end()
837 struct kvm *kvm = mmu_notifier_to_kvm(mn); in kvm_mmu_notifier_invalidate_range_end() local
839 .start = range->start, in kvm_mmu_notifier_invalidate_range_end()
840 .end = range->end, in kvm_mmu_notifier_invalidate_range_end()
849 __kvm_handle_hva_range(kvm, &hva_range); in kvm_mmu_notifier_invalidate_range_end()
852 spin_lock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_end()
853 wake = (--kvm->mn_active_invalidate_count == 0); in kvm_mmu_notifier_invalidate_range_end()
854 spin_unlock(&kvm->mn_invalidate_lock); in kvm_mmu_notifier_invalidate_range_end()
861 rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); in kvm_mmu_notifier_invalidate_range_end()
863 BUG_ON(kvm->mmu_invalidate_in_progress < 0); in kvm_mmu_notifier_invalidate_range_end()
886 * affect performance on pre-Haswell Intel EPT, where there is in kvm_mmu_notifier_clear_young()
913 struct kvm *kvm = mmu_notifier_to_kvm(mn); in kvm_mmu_notifier_release() local
916 idx = srcu_read_lock(&kvm->srcu); in kvm_mmu_notifier_release()
917 kvm_flush_shadow_all(kvm); in kvm_mmu_notifier_release()
918 srcu_read_unlock(&kvm->srcu, idx); in kvm_mmu_notifier_release()
931 static int kvm_init_mmu_notifier(struct kvm *kvm) in kvm_init_mmu_notifier() argument
933 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; in kvm_init_mmu_notifier()
934 return mmu_notifier_register(&kvm->mmu_notifier, current->mm); in kvm_init_mmu_notifier()
939 static int kvm_init_mmu_notifier(struct kvm *kvm) in kvm_init_mmu_notifier() argument
951 struct kvm *kvm = container_of(bl, struct kvm, pm_notifier); in kvm_pm_notifier_call() local
953 return kvm_arch_pm_notifier(kvm, state); in kvm_pm_notifier_call()
956 static void kvm_init_pm_notifier(struct kvm *kvm) in kvm_init_pm_notifier() argument
958 kvm->pm_notifier.notifier_call = kvm_pm_notifier_call; in kvm_init_pm_notifier()
959 /* Suspend KVM before we suspend ftrace, RCU, etc. */ in kvm_init_pm_notifier()
960 kvm->pm_notifier.priority = INT_MAX; in kvm_init_pm_notifier()
961 register_pm_notifier(&kvm->pm_notifier); in kvm_init_pm_notifier()
964 static void kvm_destroy_pm_notifier(struct kvm *kvm) in kvm_destroy_pm_notifier() argument
966 unregister_pm_notifier(&kvm->pm_notifier); in kvm_destroy_pm_notifier()
969 static void kvm_init_pm_notifier(struct kvm *kvm) in kvm_init_pm_notifier() argument
973 static void kvm_destroy_pm_notifier(struct kvm *kvm) in kvm_destroy_pm_notifier() argument
980 if (!memslot->dirty_bitmap) in kvm_destroy_dirty_bitmap()
983 kvfree(memslot->dirty_bitmap); in kvm_destroy_dirty_bitmap()
984 memslot->dirty_bitmap = NULL; in kvm_destroy_dirty_bitmap()
988 static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) in kvm_free_memslot() argument
992 kvm_arch_free_memslot(kvm, slot); in kvm_free_memslot()
997 static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots) in kvm_free_memslots() argument
1009 if (!slots->node_idx) in kvm_free_memslots()
1012 hash_for_each_safe(slots->id_hash, bkt, idnode, memslot, id_node[1]) in kvm_free_memslots()
1013 kvm_free_memslot(kvm, memslot); in kvm_free_memslots()
1018 switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) { in kvm_stats_debugfs_mode()
1029 static void kvm_destroy_vm_debugfs(struct kvm *kvm) in kvm_destroy_vm_debugfs() argument
1035 if (IS_ERR(kvm->debugfs_dentry)) in kvm_destroy_vm_debugfs()
1038 debugfs_remove_recursive(kvm->debugfs_dentry); in kvm_destroy_vm_debugfs()
1040 if (kvm->debugfs_stat_data) { in kvm_destroy_vm_debugfs()
1042 kfree(kvm->debugfs_stat_data[i]); in kvm_destroy_vm_debugfs()
1043 kfree(kvm->debugfs_stat_data); in kvm_destroy_vm_debugfs()
1047 static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) in kvm_create_vm_debugfs() argument
1054 int i, ret = -ENOMEM; in kvm_create_vm_debugfs()
1061 snprintf(dir_name, sizeof(dir_name), "%d-%s", task_pid_nr(current), fdname); in kvm_create_vm_debugfs()
1065 pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name); in kvm_create_vm_debugfs()
1075 kvm->debugfs_dentry = dent; in kvm_create_vm_debugfs()
1076 kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, in kvm_create_vm_debugfs()
1077 sizeof(*kvm->debugfs_stat_data), in kvm_create_vm_debugfs()
1079 if (!kvm->debugfs_stat_data) in kvm_create_vm_debugfs()
1088 stat_data->kvm = kvm; in kvm_create_vm_debugfs()
1089 stat_data->desc = pdesc; in kvm_create_vm_debugfs()
1090 stat_data->kind = KVM_STAT_VM; in kvm_create_vm_debugfs()
1091 kvm->debugfs_stat_data[i] = stat_data; in kvm_create_vm_debugfs()
1092 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_create_vm_debugfs()
1093 kvm->debugfs_dentry, stat_data, in kvm_create_vm_debugfs()
1103 stat_data->kvm = kvm; in kvm_create_vm_debugfs()
1104 stat_data->desc = pdesc; in kvm_create_vm_debugfs()
1105 stat_data->kind = KVM_STAT_VCPU; in kvm_create_vm_debugfs()
1106 kvm->debugfs_stat_data[i + kvm_vm_stats_header.num_desc] = stat_data; in kvm_create_vm_debugfs()
1107 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_create_vm_debugfs()
1108 kvm->debugfs_dentry, stat_data, in kvm_create_vm_debugfs()
1112 ret = kvm_arch_create_vm_debugfs(kvm); in kvm_create_vm_debugfs()
1118 kvm_destroy_vm_debugfs(kvm); in kvm_create_vm_debugfs()
1126 int __weak kvm_arch_post_init_vm(struct kvm *kvm) in kvm_arch_post_init_vm() argument
1135 void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm) in kvm_arch_pre_destroy_vm() argument
1140 * Called after per-vm debugfs created. When called kvm->debugfs_dentry should
1141 * be setup already, so we can create arch-specific debugfs entries under it.
1143 * a per-arch destroy interface is not needed.
1145 int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm) in kvm_arch_create_vm_debugfs() argument
1150 static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) in kvm_create_vm()
1152 struct kvm *kvm = kvm_arch_alloc_vm(); in kvm_create_vm() local
1154 int r = -ENOMEM; in kvm_create_vm()
1157 if (!kvm) in kvm_create_vm()
1158 return ERR_PTR(-ENOMEM); in kvm_create_vm()
1160 /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */ in kvm_create_vm()
1163 KVM_MMU_LOCK_INIT(kvm); in kvm_create_vm()
1164 mmgrab(current->mm); in kvm_create_vm()
1165 kvm->mm = current->mm; in kvm_create_vm()
1166 kvm_eventfd_init(kvm); in kvm_create_vm()
1167 mutex_init(&kvm->lock); in kvm_create_vm()
1168 mutex_init(&kvm->irq_lock); in kvm_create_vm()
1169 mutex_init(&kvm->slots_lock); in kvm_create_vm()
1170 mutex_init(&kvm->slots_arch_lock); in kvm_create_vm()
1171 spin_lock_init(&kvm->mn_invalidate_lock); in kvm_create_vm()
1172 rcuwait_init(&kvm->mn_memslots_update_rcuwait); in kvm_create_vm()
1173 xa_init(&kvm->vcpu_array); in kvm_create_vm()
1175 INIT_LIST_HEAD(&kvm->gpc_list); in kvm_create_vm()
1176 spin_lock_init(&kvm->gpc_lock); in kvm_create_vm()
1178 INIT_LIST_HEAD(&kvm->devices); in kvm_create_vm()
1179 kvm->max_vcpus = KVM_MAX_VCPUS; in kvm_create_vm()
1187 kvm->debugfs_dentry = ERR_PTR(-ENOENT); in kvm_create_vm()
1189 snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d", in kvm_create_vm()
1192 if (init_srcu_struct(&kvm->srcu)) in kvm_create_vm()
1194 if (init_srcu_struct(&kvm->irq_srcu)) in kvm_create_vm()
1197 refcount_set(&kvm->users_count, 1); in kvm_create_vm()
1200 slots = &kvm->__memslots[i][j]; in kvm_create_vm()
1202 atomic_long_set(&slots->last_used_slot, (unsigned long)NULL); in kvm_create_vm()
1203 slots->hva_tree = RB_ROOT_CACHED; in kvm_create_vm()
1204 slots->gfn_tree = RB_ROOT; in kvm_create_vm()
1205 hash_init(slots->id_hash); in kvm_create_vm()
1206 slots->node_idx = j; in kvm_create_vm()
1209 slots->generation = i; in kvm_create_vm()
1212 rcu_assign_pointer(kvm->memslots[i], &kvm->__memslots[i][0]); in kvm_create_vm()
1216 rcu_assign_pointer(kvm->buses[i], in kvm_create_vm()
1218 if (!kvm->buses[i]) in kvm_create_vm()
1222 r = kvm_arch_init_vm(kvm, type); in kvm_create_vm()
1231 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); in kvm_create_vm()
1234 r = kvm_init_mmu_notifier(kvm); in kvm_create_vm()
1238 r = kvm_coalesced_mmio_init(kvm); in kvm_create_vm()
1242 r = kvm_create_vm_debugfs(kvm, fdname); in kvm_create_vm()
1246 r = kvm_arch_post_init_vm(kvm); in kvm_create_vm()
1251 list_add(&kvm->vm_list, &vm_list); in kvm_create_vm()
1255 kvm_init_pm_notifier(kvm); in kvm_create_vm()
1257 return kvm; in kvm_create_vm()
1260 kvm_destroy_vm_debugfs(kvm); in kvm_create_vm()
1262 kvm_coalesced_mmio_free(kvm); in kvm_create_vm()
1265 if (kvm->mmu_notifier.ops) in kvm_create_vm()
1266 mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); in kvm_create_vm()
1271 kvm_arch_destroy_vm(kvm); in kvm_create_vm()
1273 WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); in kvm_create_vm()
1275 kfree(kvm_get_bus(kvm, i)); in kvm_create_vm()
1276 cleanup_srcu_struct(&kvm->irq_srcu); in kvm_create_vm()
1278 cleanup_srcu_struct(&kvm->srcu); in kvm_create_vm()
1280 kvm_arch_free_vm(kvm); in kvm_create_vm()
1281 mmdrop(current->mm); in kvm_create_vm()
1286 static void kvm_destroy_devices(struct kvm *kvm) in kvm_destroy_devices() argument
1291 * We do not need to take the kvm->lock here, because nobody else in kvm_destroy_devices()
1292 * has a reference to the struct kvm at this point and therefore in kvm_destroy_devices()
1295 list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { in kvm_destroy_devices()
1296 list_del(&dev->vm_node); in kvm_destroy_devices()
1297 dev->ops->destroy(dev); in kvm_destroy_devices()
1301 static void kvm_destroy_vm(struct kvm *kvm) in kvm_destroy_vm() argument
1304 struct mm_struct *mm = kvm->mm; in kvm_destroy_vm()
1306 kvm_destroy_pm_notifier(kvm); in kvm_destroy_vm()
1307 kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); in kvm_destroy_vm()
1308 kvm_destroy_vm_debugfs(kvm); in kvm_destroy_vm()
1309 kvm_arch_sync_events(kvm); in kvm_destroy_vm()
1311 list_del(&kvm->vm_list); in kvm_destroy_vm()
1313 kvm_arch_pre_destroy_vm(kvm); in kvm_destroy_vm()
1315 kvm_free_irq_routing(kvm); in kvm_destroy_vm()
1317 struct kvm_io_bus *bus = kvm_get_bus(kvm, i); in kvm_destroy_vm()
1321 kvm->buses[i] = NULL; in kvm_destroy_vm()
1323 kvm_coalesced_mmio_free(kvm); in kvm_destroy_vm()
1325 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); in kvm_destroy_vm()
1331 * last reference on KVM has been dropped, but freeing in kvm_destroy_vm()
1334 WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait)); in kvm_destroy_vm()
1335 kvm->mn_active_invalidate_count = 0; in kvm_destroy_vm()
1337 kvm_flush_shadow_all(kvm); in kvm_destroy_vm()
1339 kvm_arch_destroy_vm(kvm); in kvm_destroy_vm()
1340 kvm_destroy_devices(kvm); in kvm_destroy_vm()
1342 kvm_free_memslots(kvm, &kvm->__memslots[i][0]); in kvm_destroy_vm()
1343 kvm_free_memslots(kvm, &kvm->__memslots[i][1]); in kvm_destroy_vm()
1345 cleanup_srcu_struct(&kvm->irq_srcu); in kvm_destroy_vm()
1346 cleanup_srcu_struct(&kvm->srcu); in kvm_destroy_vm()
1347 kvm_arch_free_vm(kvm); in kvm_destroy_vm()
1354 void kvm_get_kvm(struct kvm *kvm) in kvm_get_kvm() argument
1356 refcount_inc(&kvm->users_count); in kvm_get_kvm()
1362 * kvm_get_kvm(). Return true if kvm referenced successfully, false otherwise.
1364 bool kvm_get_kvm_safe(struct kvm *kvm) in kvm_get_kvm_safe() argument
1366 return refcount_inc_not_zero(&kvm->users_count); in kvm_get_kvm_safe()
1370 void kvm_put_kvm(struct kvm *kvm) in kvm_put_kvm() argument
1372 if (refcount_dec_and_test(&kvm->users_count)) in kvm_put_kvm()
1373 kvm_destroy_vm(kvm); in kvm_put_kvm()
1379 * with a user-visible file descriptor, e.g. a vcpu or device, if installation
1381 * its final owner. In such cases, the caller is still actively using @kvm and
1384 void kvm_put_kvm_no_destroy(struct kvm *kvm) in kvm_put_kvm_no_destroy() argument
1386 WARN_ON(refcount_dec_and_test(&kvm->users_count)); in kvm_put_kvm_no_destroy()
1392 struct kvm *kvm = filp->private_data; in kvm_vm_release() local
1394 kvm_irqfd_release(kvm); in kvm_vm_release()
1396 kvm_put_kvm(kvm); in kvm_vm_release()
1408 memslot->dirty_bitmap = __vcalloc(2, dirty_bytes, GFP_KERNEL_ACCOUNT); in kvm_alloc_dirty_bitmap()
1409 if (!memslot->dirty_bitmap) in kvm_alloc_dirty_bitmap()
1410 return -ENOMEM; in kvm_alloc_dirty_bitmap()
1415 static struct kvm_memslots *kvm_get_inactive_memslots(struct kvm *kvm, int as_id) in kvm_get_inactive_memslots() argument
1417 struct kvm_memslots *active = __kvm_memslots(kvm, as_id); in kvm_get_inactive_memslots()
1418 int node_idx_inactive = active->node_idx ^ 1; in kvm_get_inactive_memslots()
1420 return &kvm->__memslots[as_id][node_idx_inactive]; in kvm_get_inactive_memslots()
1425 * This also serves as a sanity that at least one of the pointers is non-NULL,
1435 return b->as_id; in kvm_memslots_get_as_id()
1437 return a->as_id; in kvm_memslots_get_as_id()
1439 WARN_ON_ONCE(a->as_id != b->as_id); in kvm_memslots_get_as_id()
1440 return a->as_id; in kvm_memslots_get_as_id()
1446 struct rb_root *gfn_tree = &slots->gfn_tree; in kvm_insert_gfn_node()
1448 int idx = slots->node_idx; in kvm_insert_gfn_node()
1451 for (node = &gfn_tree->rb_node; *node; ) { in kvm_insert_gfn_node()
1456 if (slot->base_gfn < tmp->base_gfn) in kvm_insert_gfn_node()
1457 node = &(*node)->rb_left; in kvm_insert_gfn_node()
1458 else if (slot->base_gfn > tmp->base_gfn) in kvm_insert_gfn_node()
1459 node = &(*node)->rb_right; in kvm_insert_gfn_node()
1464 rb_link_node(&slot->gfn_node[idx], parent, node); in kvm_insert_gfn_node()
1465 rb_insert_color(&slot->gfn_node[idx], gfn_tree); in kvm_insert_gfn_node()
1471 rb_erase(&slot->gfn_node[slots->node_idx], &slots->gfn_tree); in kvm_erase_gfn_node()
1478 int idx = slots->node_idx; in kvm_replace_gfn_node()
1480 WARN_ON_ONCE(old->base_gfn != new->base_gfn); in kvm_replace_gfn_node()
1482 rb_replace_node(&old->gfn_node[idx], &new->gfn_node[idx], in kvm_replace_gfn_node()
1483 &slots->gfn_tree); in kvm_replace_gfn_node()
1492 * If @new is non-NULL its hva_node[slots_idx] range has to be set
1495 static void kvm_replace_memslot(struct kvm *kvm, in kvm_replace_memslot() argument
1500 struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, as_id); in kvm_replace_memslot()
1501 int idx = slots->node_idx; in kvm_replace_memslot()
1504 hash_del(&old->id_node[idx]); in kvm_replace_memslot()
1505 interval_tree_remove(&old->hva_node[idx], &slots->hva_tree); in kvm_replace_memslot()
1507 if ((long)old == atomic_long_read(&slots->last_used_slot)) in kvm_replace_memslot()
1508 atomic_long_set(&slots->last_used_slot, (long)new); in kvm_replace_memslot()
1520 new->hva_node[idx].start = new->userspace_addr; in kvm_replace_memslot()
1521 new->hva_node[idx].last = new->userspace_addr + in kvm_replace_memslot()
1522 (new->npages << PAGE_SHIFT) - 1; in kvm_replace_memslot()
1529 hash_add(slots->id_hash, &new->id_node[idx], new->id); in kvm_replace_memslot()
1530 interval_tree_insert(&new->hva_node[idx], &slots->hva_tree); in kvm_replace_memslot()
1539 if (old && old->base_gfn == new->base_gfn) { in kvm_replace_memslot()
1556 if (mem->flags & ~valid_flags) in check_memory_region_flags()
1557 return -EINVAL; in check_memory_region_flags()
1562 static void kvm_swap_active_memslots(struct kvm *kvm, int as_id) in kvm_swap_active_memslots() argument
1564 struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, as_id); in kvm_swap_active_memslots()
1567 u64 gen = __kvm_memslots(kvm, as_id)->generation; in kvm_swap_active_memslots()
1570 slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; in kvm_swap_active_memslots()
1577 spin_lock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1578 prepare_to_rcuwait(&kvm->mn_memslots_update_rcuwait); in kvm_swap_active_memslots()
1579 while (kvm->mn_active_invalidate_count) { in kvm_swap_active_memslots()
1581 spin_unlock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1583 spin_lock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1585 finish_rcuwait(&kvm->mn_memslots_update_rcuwait); in kvm_swap_active_memslots()
1586 rcu_assign_pointer(kvm->memslots[as_id], slots); in kvm_swap_active_memslots()
1587 spin_unlock(&kvm->mn_invalidate_lock); in kvm_swap_active_memslots()
1594 mutex_unlock(&kvm->slots_arch_lock); in kvm_swap_active_memslots()
1596 synchronize_srcu_expedited(&kvm->srcu); in kvm_swap_active_memslots()
1600 * update in-progress flag and incrementing the generation based on in kvm_swap_active_memslots()
1604 gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS; in kvm_swap_active_memslots()
1615 kvm_arch_memslots_updated(kvm, gen); in kvm_swap_active_memslots()
1617 slots->generation = gen; in kvm_swap_active_memslots()
1620 static int kvm_prepare_memory_region(struct kvm *kvm, in kvm_prepare_memory_region() argument
1631 * new and KVM isn't using a ring buffer, allocate and initialize a in kvm_prepare_memory_region()
1635 if (!(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) in kvm_prepare_memory_region()
1636 new->dirty_bitmap = NULL; in kvm_prepare_memory_region()
1637 else if (old && old->dirty_bitmap) in kvm_prepare_memory_region()
1638 new->dirty_bitmap = old->dirty_bitmap; in kvm_prepare_memory_region()
1639 else if (kvm_use_dirty_bitmap(kvm)) { in kvm_prepare_memory_region()
1644 if (kvm_dirty_log_manual_protect_and_init_set(kvm)) in kvm_prepare_memory_region()
1645 bitmap_set(new->dirty_bitmap, 0, new->npages); in kvm_prepare_memory_region()
1649 r = kvm_arch_prepare_memory_region(kvm, old, new, change); in kvm_prepare_memory_region()
1652 if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap)) in kvm_prepare_memory_region()
1658 static void kvm_commit_memory_region(struct kvm *kvm, in kvm_commit_memory_region() argument
1663 int old_flags = old ? old->flags : 0; in kvm_commit_memory_region()
1664 int new_flags = new ? new->flags : 0; in kvm_commit_memory_region()
1670 kvm->nr_memslot_pages -= old->npages; in kvm_commit_memory_region()
1672 kvm->nr_memslot_pages += new->npages; in kvm_commit_memory_region()
1675 int change = (new_flags & KVM_MEM_LOG_DIRTY_PAGES) ? 1 : -1; in kvm_commit_memory_region()
1676 atomic_set(&kvm->nr_memslots_dirty_logging, in kvm_commit_memory_region()
1677 atomic_read(&kvm->nr_memslots_dirty_logging) + change); in kvm_commit_memory_region()
1680 kvm_arch_commit_memory_region(kvm, old, new, change); in kvm_commit_memory_region()
1688 kvm_free_memslot(kvm, old); in kvm_commit_memory_region()
1696 if (old->dirty_bitmap && !new->dirty_bitmap) in kvm_commit_memory_region()
1720 static void kvm_activate_memslot(struct kvm *kvm, in kvm_activate_memslot() argument
1726 kvm_swap_active_memslots(kvm, as_id); in kvm_activate_memslot()
1729 kvm_replace_memslot(kvm, old, new); in kvm_activate_memslot()
1735 dest->base_gfn = src->base_gfn; in kvm_copy_memslot()
1736 dest->npages = src->npages; in kvm_copy_memslot()
1737 dest->dirty_bitmap = src->dirty_bitmap; in kvm_copy_memslot()
1738 dest->arch = src->arch; in kvm_copy_memslot()
1739 dest->userspace_addr = src->userspace_addr; in kvm_copy_memslot()
1740 dest->flags = src->flags; in kvm_copy_memslot()
1741 dest->id = src->id; in kvm_copy_memslot()
1742 dest->as_id = src->as_id; in kvm_copy_memslot()
1745 static void kvm_invalidate_memslot(struct kvm *kvm, in kvm_invalidate_memslot() argument
1755 invalid_slot->flags |= KVM_MEMSLOT_INVALID; in kvm_invalidate_memslot()
1756 kvm_replace_memslot(kvm, old, invalid_slot); in kvm_invalidate_memslot()
1763 kvm_swap_active_memslots(kvm, old->as_id); in kvm_invalidate_memslot()
1767 * memslot will be created. Validation of sp->gfn happens in: in kvm_invalidate_memslot()
1768 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) in kvm_invalidate_memslot()
1769 * - kvm_is_visible_gfn (mmu_check_root) in kvm_invalidate_memslot()
1771 kvm_arch_flush_shadow_memslot(kvm, old); in kvm_invalidate_memslot()
1772 kvm_arch_guest_memory_reclaimed(kvm); in kvm_invalidate_memslot()
1775 mutex_lock(&kvm->slots_arch_lock); in kvm_invalidate_memslot()
1778 * Copy the arch-specific field of the newly-installed slot back to the in kvm_invalidate_memslot()
1780 * slots_arch_lock in kvm_swap_active_memslots() and re-acquiring the lock in kvm_invalidate_memslot()
1784 old->arch = invalid_slot->arch; in kvm_invalidate_memslot()
1787 static void kvm_create_memslot(struct kvm *kvm, in kvm_create_memslot() argument
1791 kvm_replace_memslot(kvm, NULL, new); in kvm_create_memslot()
1792 kvm_activate_memslot(kvm, NULL, new); in kvm_create_memslot()
1795 static void kvm_delete_memslot(struct kvm *kvm, in kvm_delete_memslot() argument
1803 kvm_replace_memslot(kvm, old, NULL); in kvm_delete_memslot()
1804 kvm_activate_memslot(kvm, invalid_slot, NULL); in kvm_delete_memslot()
1807 static void kvm_move_memslot(struct kvm *kvm, in kvm_move_memslot() argument
1816 kvm_replace_memslot(kvm, old, new); in kvm_move_memslot()
1817 kvm_activate_memslot(kvm, invalid_slot, new); in kvm_move_memslot()
1820 static void kvm_update_flags_memslot(struct kvm *kvm, in kvm_update_flags_memslot() argument
1829 kvm_replace_memslot(kvm, old, new); in kvm_update_flags_memslot()
1830 kvm_activate_memslot(kvm, old, new); in kvm_update_flags_memslot()
1833 static int kvm_set_memslot(struct kvm *kvm, in kvm_set_memslot() argument
1855 mutex_lock(&kvm->slots_arch_lock); in kvm_set_memslot()
1861 * for the memslot when it is deleted/moved. Without pre-invalidation in kvm_set_memslot()
1863 * delete/move and committing the changes in arch code where KVM or a in kvm_set_memslot()
1864 * guest could access a non-existent memslot. in kvm_set_memslot()
1873 mutex_unlock(&kvm->slots_arch_lock); in kvm_set_memslot()
1874 return -ENOMEM; in kvm_set_memslot()
1876 kvm_invalidate_memslot(kvm, old, invalid_slot); in kvm_set_memslot()
1879 r = kvm_prepare_memory_region(kvm, old, new, change); in kvm_set_memslot()
1888 kvm_activate_memslot(kvm, invalid_slot, old); in kvm_set_memslot()
1891 mutex_unlock(&kvm->slots_arch_lock); in kvm_set_memslot()
1904 kvm_create_memslot(kvm, new); in kvm_set_memslot()
1906 kvm_delete_memslot(kvm, old, invalid_slot); in kvm_set_memslot()
1908 kvm_move_memslot(kvm, old, new, invalid_slot); in kvm_set_memslot()
1910 kvm_update_flags_memslot(kvm, old, new); in kvm_set_memslot()
1919 * No need to refresh new->arch, changes after dropping slots_arch_lock in kvm_set_memslot()
1921 * responsible for knowing that new->arch may be stale. in kvm_set_memslot()
1923 kvm_commit_memory_region(kvm, old, new, change); in kvm_set_memslot()
1934 if (iter.slot->id != id) in kvm_check_memslot_overlap()
1947 * Must be called holding kvm->slots_lock for write.
1949 int __kvm_set_memory_region(struct kvm *kvm, in __kvm_set_memory_region() argument
1964 as_id = mem->slot >> 16; in __kvm_set_memory_region()
1965 id = (u16)mem->slot; in __kvm_set_memory_region()
1968 if ((mem->memory_size & (PAGE_SIZE - 1)) || in __kvm_set_memory_region()
1969 (mem->memory_size != (unsigned long)mem->memory_size)) in __kvm_set_memory_region()
1970 return -EINVAL; in __kvm_set_memory_region()
1971 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) in __kvm_set_memory_region()
1972 return -EINVAL; in __kvm_set_memory_region()
1974 if ((mem->userspace_addr & (PAGE_SIZE - 1)) || in __kvm_set_memory_region()
1975 (mem->userspace_addr != untagged_addr(mem->userspace_addr)) || in __kvm_set_memory_region()
1976 !access_ok((void __user *)(unsigned long)mem->userspace_addr, in __kvm_set_memory_region()
1977 mem->memory_size)) in __kvm_set_memory_region()
1978 return -EINVAL; in __kvm_set_memory_region()
1980 return -EINVAL; in __kvm_set_memory_region()
1981 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) in __kvm_set_memory_region()
1982 return -EINVAL; in __kvm_set_memory_region()
1983 if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) in __kvm_set_memory_region()
1984 return -EINVAL; in __kvm_set_memory_region()
1986 slots = __kvm_memslots(kvm, as_id); in __kvm_set_memory_region()
1994 if (!mem->memory_size) { in __kvm_set_memory_region()
1995 if (!old || !old->npages) in __kvm_set_memory_region()
1996 return -EINVAL; in __kvm_set_memory_region()
1998 if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages)) in __kvm_set_memory_region()
1999 return -EIO; in __kvm_set_memory_region()
2001 return kvm_set_memslot(kvm, old, NULL, KVM_MR_DELETE); in __kvm_set_memory_region()
2004 base_gfn = (mem->guest_phys_addr >> PAGE_SHIFT); in __kvm_set_memory_region()
2005 npages = (mem->memory_size >> PAGE_SHIFT); in __kvm_set_memory_region()
2007 if (!old || !old->npages) { in __kvm_set_memory_region()
2011 * To simplify KVM internals, the total number of pages across in __kvm_set_memory_region()
2014 if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages) in __kvm_set_memory_region()
2015 return -EINVAL; in __kvm_set_memory_region()
2017 if ((mem->userspace_addr != old->userspace_addr) || in __kvm_set_memory_region()
2018 (npages != old->npages) || in __kvm_set_memory_region()
2019 ((mem->flags ^ old->flags) & KVM_MEM_READONLY)) in __kvm_set_memory_region()
2020 return -EINVAL; in __kvm_set_memory_region()
2022 if (base_gfn != old->base_gfn) in __kvm_set_memory_region()
2024 else if (mem->flags != old->flags) in __kvm_set_memory_region()
2032 return -EEXIST; in __kvm_set_memory_region()
2037 return -ENOMEM; in __kvm_set_memory_region()
2039 new->as_id = as_id; in __kvm_set_memory_region()
2040 new->id = id; in __kvm_set_memory_region()
2041 new->base_gfn = base_gfn; in __kvm_set_memory_region()
2042 new->npages = npages; in __kvm_set_memory_region()
2043 new->flags = mem->flags; in __kvm_set_memory_region()
2044 new->userspace_addr = mem->userspace_addr; in __kvm_set_memory_region()
2046 r = kvm_set_memslot(kvm, old, new, change); in __kvm_set_memory_region()
2053 int kvm_set_memory_region(struct kvm *kvm, in kvm_set_memory_region() argument
2058 mutex_lock(&kvm->slots_lock); in kvm_set_memory_region()
2059 r = __kvm_set_memory_region(kvm, mem); in kvm_set_memory_region()
2060 mutex_unlock(&kvm->slots_lock); in kvm_set_memory_region()
2065 static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, in kvm_vm_ioctl_set_memory_region() argument
2068 if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) in kvm_vm_ioctl_set_memory_region()
2069 return -EINVAL; in kvm_vm_ioctl_set_memory_region()
2071 return kvm_set_memory_region(kvm, mem); in kvm_vm_ioctl_set_memory_region()
2076 * kvm_get_dirty_log - get a snapshot of dirty pages
2077 * @kvm: pointer to kvm instance
2082 int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, in kvm_get_dirty_log() argument
2091 if (!kvm_use_dirty_bitmap(kvm)) in kvm_get_dirty_log()
2092 return -ENXIO; in kvm_get_dirty_log()
2097 as_id = log->slot >> 16; in kvm_get_dirty_log()
2098 id = (u16)log->slot; in kvm_get_dirty_log()
2100 return -EINVAL; in kvm_get_dirty_log()
2102 slots = __kvm_memslots(kvm, as_id); in kvm_get_dirty_log()
2104 if (!(*memslot) || !(*memslot)->dirty_bitmap) in kvm_get_dirty_log()
2105 return -ENOENT; in kvm_get_dirty_log()
2107 kvm_arch_sync_dirty_log(kvm, *memslot); in kvm_get_dirty_log()
2112 any = (*memslot)->dirty_bitmap[i]; in kvm_get_dirty_log()
2114 if (copy_to_user(log->dirty_bitmap, (*memslot)->dirty_bitmap, n)) in kvm_get_dirty_log()
2115 return -EFAULT; in kvm_get_dirty_log()
2125 * kvm_get_dirty_log_protect - get a snapshot of dirty pages
2127 * @kvm: pointer to kvm instance
2145 static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) in kvm_get_dirty_log_protect() argument
2156 if (!kvm_use_dirty_bitmap(kvm)) in kvm_get_dirty_log_protect()
2157 return -ENXIO; in kvm_get_dirty_log_protect()
2159 as_id = log->slot >> 16; in kvm_get_dirty_log_protect()
2160 id = (u16)log->slot; in kvm_get_dirty_log_protect()
2162 return -EINVAL; in kvm_get_dirty_log_protect()
2164 slots = __kvm_memslots(kvm, as_id); in kvm_get_dirty_log_protect()
2166 if (!memslot || !memslot->dirty_bitmap) in kvm_get_dirty_log_protect()
2167 return -ENOENT; in kvm_get_dirty_log_protect()
2169 dirty_bitmap = memslot->dirty_bitmap; in kvm_get_dirty_log_protect()
2171 kvm_arch_sync_dirty_log(kvm, memslot); in kvm_get_dirty_log_protect()
2175 if (kvm->manual_dirty_log_protect) { in kvm_get_dirty_log_protect()
2189 KVM_MMU_LOCK(kvm); in kvm_get_dirty_log_protect()
2202 kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, in kvm_get_dirty_log_protect()
2205 KVM_MMU_UNLOCK(kvm); in kvm_get_dirty_log_protect()
2209 kvm_flush_remote_tlbs_memslot(kvm, memslot); in kvm_get_dirty_log_protect()
2211 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) in kvm_get_dirty_log_protect()
2212 return -EFAULT; in kvm_get_dirty_log_protect()
2218 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
2219 * @kvm: kvm instance
2222 * Steps 1-4 below provide general overview of dirty page logging. See
2225 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
2227 * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
2236 static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, in kvm_vm_ioctl_get_dirty_log() argument
2241 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_get_dirty_log()
2243 r = kvm_get_dirty_log_protect(kvm, log); in kvm_vm_ioctl_get_dirty_log()
2245 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_get_dirty_log()
2250 * kvm_clear_dirty_log_protect - clear dirty bits in the bitmap
2252 * @kvm: pointer to kvm instance
2255 static int kvm_clear_dirty_log_protect(struct kvm *kvm, in kvm_clear_dirty_log_protect() argument
2268 if (!kvm_use_dirty_bitmap(kvm)) in kvm_clear_dirty_log_protect()
2269 return -ENXIO; in kvm_clear_dirty_log_protect()
2271 as_id = log->slot >> 16; in kvm_clear_dirty_log_protect()
2272 id = (u16)log->slot; in kvm_clear_dirty_log_protect()
2274 return -EINVAL; in kvm_clear_dirty_log_protect()
2276 if (log->first_page & 63) in kvm_clear_dirty_log_protect()
2277 return -EINVAL; in kvm_clear_dirty_log_protect()
2279 slots = __kvm_memslots(kvm, as_id); in kvm_clear_dirty_log_protect()
2281 if (!memslot || !memslot->dirty_bitmap) in kvm_clear_dirty_log_protect()
2282 return -ENOENT; in kvm_clear_dirty_log_protect()
2284 dirty_bitmap = memslot->dirty_bitmap; in kvm_clear_dirty_log_protect()
2286 n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; in kvm_clear_dirty_log_protect()
2288 if (log->first_page > memslot->npages || in kvm_clear_dirty_log_protect()
2289 log->num_pages > memslot->npages - log->first_page || in kvm_clear_dirty_log_protect()
2290 (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) in kvm_clear_dirty_log_protect()
2291 return -EINVAL; in kvm_clear_dirty_log_protect()
2293 kvm_arch_sync_dirty_log(kvm, memslot); in kvm_clear_dirty_log_protect()
2297 if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) in kvm_clear_dirty_log_protect()
2298 return -EFAULT; in kvm_clear_dirty_log_protect()
2300 KVM_MMU_LOCK(kvm); in kvm_clear_dirty_log_protect()
2301 for (offset = log->first_page, i = offset / BITS_PER_LONG, in kvm_clear_dirty_log_protect()
2302 n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--; in kvm_clear_dirty_log_protect()
2315 * a problem if userspace sets them in log->dirty_bitmap. in kvm_clear_dirty_log_protect()
2319 kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, in kvm_clear_dirty_log_protect()
2323 KVM_MMU_UNLOCK(kvm); in kvm_clear_dirty_log_protect()
2326 kvm_flush_remote_tlbs_memslot(kvm, memslot); in kvm_clear_dirty_log_protect()
2331 static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, in kvm_vm_ioctl_clear_dirty_log() argument
2336 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_clear_dirty_log()
2338 r = kvm_clear_dirty_log_protect(kvm, log); in kvm_vm_ioctl_clear_dirty_log()
2340 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_clear_dirty_log()
2345 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) in gfn_to_memslot() argument
2347 return __gfn_to_memslot(kvm_memslots(kvm), gfn); in gfn_to_memslot()
2354 u64 gen = slots->generation; in kvm_vcpu_gfn_to_memslot()
2361 if (unlikely(gen != vcpu->last_used_slot_gen)) { in kvm_vcpu_gfn_to_memslot()
2362 vcpu->last_used_slot = NULL; in kvm_vcpu_gfn_to_memslot()
2363 vcpu->last_used_slot_gen = gen; in kvm_vcpu_gfn_to_memslot()
2366 slot = try_get_memslot(vcpu->last_used_slot, gfn); in kvm_vcpu_gfn_to_memslot()
2373 * thrashing the VM-wide last_used_slot in kvm_memslots. in kvm_vcpu_gfn_to_memslot()
2377 vcpu->last_used_slot = slot; in kvm_vcpu_gfn_to_memslot()
2384 bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) in kvm_is_visible_gfn() argument
2386 struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); in kvm_is_visible_gfn()
2411 mmap_read_lock(current->mm); in kvm_host_page_size()
2412 vma = find_vma(current->mm, addr); in kvm_host_page_size()
2419 mmap_read_unlock(current->mm); in kvm_host_page_size()
2426 return slot->flags & KVM_MEM_READONLY; in memslot_is_readonly()
2432 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) in __gfn_to_hva_many()
2439 *nr_pages = slot->npages - (gfn - slot->base_gfn); in __gfn_to_hva_many()
2457 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) in gfn_to_hva() argument
2459 return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); in gfn_to_hva()
2488 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) in gfn_to_hva_prot() argument
2490 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); in gfn_to_hva_prot()
2507 return rc == -EHWPOISON; in check_user_page_hwpoison()
2541 * 1 indicates success, -errno is returned if error is detected.
2593 if (unlikely(!(vma->vm_flags & VM_READ))) in vma_is_valid()
2596 if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE)))) in vma_is_valid()
2622 r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); in hva_to_pfn_remapped()
2629 r = fixup_user_fault(current->mm, addr, in hva_to_pfn_remapped()
2633 return -EAGAIN; in hva_to_pfn_remapped()
2637 r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); in hva_to_pfn_remapped()
2666 * tail pages of non-compound higher order allocations, which in hva_to_pfn_remapped()
2671 r = -EFAULT; in hva_to_pfn_remapped()
2684 * @interruptible: whether the process can be interrupted by non-fatal signals
2715 if (npages == -EINTR) in hva_to_pfn()
2718 mmap_read_lock(current->mm); in hva_to_pfn()
2719 if (npages == -EHWPOISON || in hva_to_pfn()
2726 vma = vma_lookup(current->mm, addr); in hva_to_pfn()
2730 else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { in hva_to_pfn()
2732 if (r == -EAGAIN) in hva_to_pfn()
2742 mmap_read_unlock(current->mm); in hva_to_pfn()
2778 kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, in gfn_to_pfn_prot() argument
2781 return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, false, in gfn_to_pfn_prot()
2806 kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) in gfn_to_pfn() argument
2808 return gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn); in gfn_to_pfn()
2826 return -1; in gfn_to_page_many_atomic()
2838 * controlled by KVM. Note, if the returned page is valid, it's refcount has
2841 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) in gfn_to_page() argument
2846 pfn = gfn_to_pfn(kvm, gfn); in gfn_to_page()
2874 return -EINVAL; in kvm_vcpu_map()
2876 pfn = gfn_to_pfn(vcpu->kvm, gfn); in kvm_vcpu_map()
2878 return -EINVAL; in kvm_vcpu_map()
2890 return -EFAULT; in kvm_vcpu_map()
2892 map->page = page; in kvm_vcpu_map()
2893 map->hva = hva; in kvm_vcpu_map()
2894 map->pfn = pfn; in kvm_vcpu_map()
2895 map->gfn = gfn; in kvm_vcpu_map()
2906 if (!map->hva) in kvm_vcpu_unmap()
2909 if (map->page != KVM_UNMAPPED_PAGE) in kvm_vcpu_unmap()
2910 kunmap(map->page); in kvm_vcpu_unmap()
2913 memunmap(map->hva); in kvm_vcpu_unmap()
2917 kvm_vcpu_mark_page_dirty(vcpu, map->gfn); in kvm_vcpu_unmap()
2919 kvm_release_pfn(map->pfn, dirty); in kvm_vcpu_unmap()
2921 map->hva = NULL; in kvm_vcpu_unmap()
2922 map->page = NULL; in kvm_vcpu_unmap()
2929 * Per page-flags.h, pages tagged PG_reserved "should in general not be in kvm_is_ad_tracked_page()
3022 if (len > PAGE_SIZE - offset) in next_segment()
3023 return PAGE_SIZE - offset; in next_segment()
3036 return -EFAULT; in __kvm_read_guest_page()
3039 return -EFAULT; in __kvm_read_guest_page()
3043 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, in kvm_read_guest_page() argument
3046 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); in kvm_read_guest_page()
3061 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) in kvm_read_guest() argument
3069 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); in kvm_read_guest()
3073 len -= seg; in kvm_read_guest()
3093 len -= seg; in kvm_vcpu_read_guest()
3109 return -EFAULT; in __kvm_read_guest_atomic()
3114 return -EFAULT; in __kvm_read_guest_atomic()
3129 static int __kvm_write_guest_page(struct kvm *kvm, in __kvm_write_guest_page() argument
3138 return -EFAULT; in __kvm_write_guest_page()
3141 return -EFAULT; in __kvm_write_guest_page()
3142 mark_page_dirty_in_slot(kvm, memslot, gfn); in __kvm_write_guest_page()
3146 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, in kvm_write_guest_page() argument
3149 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); in kvm_write_guest_page()
3151 return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len); in kvm_write_guest_page()
3160 return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len); in kvm_vcpu_write_guest_page()
3164 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, in kvm_write_guest() argument
3173 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); in kvm_write_guest()
3177 len -= seg; in kvm_write_guest()
3198 len -= seg; in kvm_vcpu_write_guest()
3212 gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; in __kvm_gfn_to_hva_cache_init()
3213 gfn_t nr_pages_needed = end_gfn - start_gfn + 1; in __kvm_gfn_to_hva_cache_init()
3216 /* Update ghc->generation before performing any error checks. */ in __kvm_gfn_to_hva_cache_init()
3217 ghc->generation = slots->generation; in __kvm_gfn_to_hva_cache_init()
3220 ghc->hva = KVM_HVA_ERR_BAD; in __kvm_gfn_to_hva_cache_init()
3221 return -EINVAL; in __kvm_gfn_to_hva_cache_init()
3229 ghc->memslot = __gfn_to_memslot(slots, start_gfn); in __kvm_gfn_to_hva_cache_init()
3230 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, in __kvm_gfn_to_hva_cache_init()
3232 if (kvm_is_error_hva(ghc->hva)) in __kvm_gfn_to_hva_cache_init()
3233 return -EFAULT; in __kvm_gfn_to_hva_cache_init()
3238 ghc->hva += offset; in __kvm_gfn_to_hva_cache_init()
3240 ghc->memslot = NULL; in __kvm_gfn_to_hva_cache_init()
3242 ghc->gpa = gpa; in __kvm_gfn_to_hva_cache_init()
3243 ghc->len = len; in __kvm_gfn_to_hva_cache_init()
3247 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, in kvm_gfn_to_hva_cache_init() argument
3250 struct kvm_memslots *slots = kvm_memslots(kvm); in kvm_gfn_to_hva_cache_init()
3255 int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, in kvm_write_guest_offset_cached() argument
3259 struct kvm_memslots *slots = kvm_memslots(kvm); in kvm_write_guest_offset_cached()
3261 gpa_t gpa = ghc->gpa + offset; in kvm_write_guest_offset_cached()
3263 if (WARN_ON_ONCE(len + offset > ghc->len)) in kvm_write_guest_offset_cached()
3264 return -EINVAL; in kvm_write_guest_offset_cached()
3266 if (slots->generation != ghc->generation) { in kvm_write_guest_offset_cached()
3267 if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) in kvm_write_guest_offset_cached()
3268 return -EFAULT; in kvm_write_guest_offset_cached()
3271 if (kvm_is_error_hva(ghc->hva)) in kvm_write_guest_offset_cached()
3272 return -EFAULT; in kvm_write_guest_offset_cached()
3274 if (unlikely(!ghc->memslot)) in kvm_write_guest_offset_cached()
3275 return kvm_write_guest(kvm, gpa, data, len); in kvm_write_guest_offset_cached()
3277 r = __copy_to_user((void __user *)ghc->hva + offset, data, len); in kvm_write_guest_offset_cached()
3279 return -EFAULT; in kvm_write_guest_offset_cached()
3280 mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT); in kvm_write_guest_offset_cached()
3286 int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, in kvm_write_guest_cached() argument
3289 return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); in kvm_write_guest_cached()
3293 int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, in kvm_read_guest_offset_cached() argument
3297 struct kvm_memslots *slots = kvm_memslots(kvm); in kvm_read_guest_offset_cached()
3299 gpa_t gpa = ghc->gpa + offset; in kvm_read_guest_offset_cached()
3301 if (WARN_ON_ONCE(len + offset > ghc->len)) in kvm_read_guest_offset_cached()
3302 return -EINVAL; in kvm_read_guest_offset_cached()
3304 if (slots->generation != ghc->generation) { in kvm_read_guest_offset_cached()
3305 if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) in kvm_read_guest_offset_cached()
3306 return -EFAULT; in kvm_read_guest_offset_cached()
3309 if (kvm_is_error_hva(ghc->hva)) in kvm_read_guest_offset_cached()
3310 return -EFAULT; in kvm_read_guest_offset_cached()
3312 if (unlikely(!ghc->memslot)) in kvm_read_guest_offset_cached()
3313 return kvm_read_guest(kvm, gpa, data, len); in kvm_read_guest_offset_cached()
3315 r = __copy_from_user(data, (void __user *)ghc->hva + offset, len); in kvm_read_guest_offset_cached()
3317 return -EFAULT; in kvm_read_guest_offset_cached()
3323 int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, in kvm_read_guest_cached() argument
3326 return kvm_read_guest_offset_cached(kvm, ghc, data, 0, len); in kvm_read_guest_cached()
3330 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) in kvm_clear_guest() argument
3339 ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len); in kvm_clear_guest()
3343 len -= seg; in kvm_clear_guest()
3350 void mark_page_dirty_in_slot(struct kvm *kvm, in mark_page_dirty_in_slot() argument
3357 if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm)) in mark_page_dirty_in_slot()
3360 WARN_ON_ONCE(!vcpu && !kvm_arch_allow_write_without_running_vcpu(kvm)); in mark_page_dirty_in_slot()
3364 unsigned long rel_gfn = gfn - memslot->base_gfn; in mark_page_dirty_in_slot()
3365 u32 slot = (memslot->as_id << 16) | memslot->id; in mark_page_dirty_in_slot()
3367 if (kvm->dirty_ring_size && vcpu) in mark_page_dirty_in_slot()
3369 else if (memslot->dirty_bitmap) in mark_page_dirty_in_slot()
3370 set_bit_le(rel_gfn, memslot->dirty_bitmap); in mark_page_dirty_in_slot()
3375 void mark_page_dirty(struct kvm *kvm, gfn_t gfn) in mark_page_dirty() argument
3379 memslot = gfn_to_memslot(kvm, gfn); in mark_page_dirty()
3380 mark_page_dirty_in_slot(kvm, memslot, gfn); in mark_page_dirty()
3389 mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn); in kvm_vcpu_mark_page_dirty()
3395 if (!vcpu->sigset_active) in kvm_sigset_activate()
3399 * This does a lockless modification of ->real_blocked, which is fine in kvm_sigset_activate()
3400 * because, only current can change ->real_blocked and all readers of in kvm_sigset_activate()
3401 * ->real_blocked don't care as long ->real_blocked is always a subset in kvm_sigset_activate()
3402 * of ->blocked. in kvm_sigset_activate()
3404 sigprocmask(SIG_SETMASK, &vcpu->sigset, ¤t->real_blocked); in kvm_sigset_activate()
3409 if (!vcpu->sigset_active) in kvm_sigset_deactivate()
3412 sigprocmask(SIG_SETMASK, ¤t->real_blocked, NULL); in kvm_sigset_deactivate()
3413 sigemptyset(¤t->real_blocked); in kvm_sigset_deactivate()
3420 old = val = vcpu->halt_poll_ns; in grow_halt_poll_ns()
3430 vcpu->halt_poll_ns = val; in grow_halt_poll_ns()
3432 trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); in grow_halt_poll_ns()
3439 old = val = vcpu->halt_poll_ns; in shrink_halt_poll_ns()
3450 vcpu->halt_poll_ns = val; in shrink_halt_poll_ns()
3451 trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); in shrink_halt_poll_ns()
3456 int ret = -EINTR; in kvm_vcpu_check_block()
3457 int idx = srcu_read_lock(&vcpu->kvm->srcu); in kvm_vcpu_check_block()
3470 srcu_read_unlock(&vcpu->kvm->srcu, idx); in kvm_vcpu_check_block()
3477 * directly for other vCPU non-runnable states, e.g. x86's Wait-For-SIPI.
3484 vcpu->stat.generic.blocking = 1; in kvm_vcpu_block()
3506 vcpu->stat.generic.blocking = 0; in kvm_vcpu_block()
3514 struct kvm_vcpu_stat_generic *stats = &vcpu->stat.generic; in update_halt_poll_stats()
3517 ++vcpu->stat.generic.halt_attempted_poll; in update_halt_poll_stats()
3520 ++vcpu->stat.generic.halt_successful_poll; in update_halt_poll_stats()
3523 ++vcpu->stat.generic.halt_poll_invalid; in update_halt_poll_stats()
3525 stats->halt_poll_success_ns += poll_ns; in update_halt_poll_stats()
3526 KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_success_hist, poll_ns); in update_halt_poll_stats()
3528 stats->halt_poll_fail_ns += poll_ns; in update_halt_poll_stats()
3529 KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_fail_hist, poll_ns); in update_halt_poll_stats()
3535 struct kvm *kvm = vcpu->kvm; in kvm_vcpu_max_halt_poll_ns() local
3537 if (kvm->override_halt_poll_ns) { in kvm_vcpu_max_halt_poll_ns()
3539 * Ensure kvm->max_halt_poll_ns is not read before in kvm_vcpu_max_halt_poll_ns()
3540 * kvm->override_halt_poll_ns. in kvm_vcpu_max_halt_poll_ns()
3545 return READ_ONCE(kvm->max_halt_poll_ns); in kvm_vcpu_max_halt_poll_ns()
3566 if (vcpu->halt_poll_ns > max_halt_poll_ns) in kvm_vcpu_halt()
3567 vcpu->halt_poll_ns = max_halt_poll_ns; in kvm_vcpu_halt()
3569 do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; in kvm_vcpu_halt()
3573 ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); in kvm_vcpu_halt()
3587 vcpu->stat.generic.halt_wait_ns += in kvm_vcpu_halt()
3588 ktime_to_ns(cur) - ktime_to_ns(poll_end); in kvm_vcpu_halt()
3589 KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.generic.halt_wait_hist, in kvm_vcpu_halt()
3590 ktime_to_ns(cur) - ktime_to_ns(poll_end)); in kvm_vcpu_halt()
3594 halt_ns = ktime_to_ns(cur) - ktime_to_ns(start); in kvm_vcpu_halt()
3597 * Note, halt-polling is considered successful so long as the vCPU was in kvm_vcpu_halt()
3599 * after of the halt-polling loop itself, but before the full wait. in kvm_vcpu_halt()
3611 if (halt_ns <= vcpu->halt_poll_ns) in kvm_vcpu_halt()
3614 else if (vcpu->halt_poll_ns && in kvm_vcpu_halt()
3618 else if (vcpu->halt_poll_ns < max_halt_poll_ns && in kvm_vcpu_halt()
3622 vcpu->halt_poll_ns = 0; in kvm_vcpu_halt()
3633 WRITE_ONCE(vcpu->ready, true); in kvm_vcpu_wake_up()
3634 ++vcpu->stat.generic.halt_wakeup; in kvm_vcpu_wake_up()
3661 if (vcpu->mode == IN_GUEST_MODE) in kvm_vcpu_kick()
3662 WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE); in kvm_vcpu_kick()
3674 cpu = READ_ONCE(vcpu->cpu); in kvm_vcpu_kick()
3691 pid = rcu_dereference(target->pid); in kvm_vcpu_yield_to()
3708 * (a) VCPU which has not done pl-exit or cpu relax intercepted recently
3712 * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
3717 * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
3718 * to preempted lock-holder could result in wrong VCPU selection and CPU
3719 * burning. Giving priority for a potential lock-holder increases lock
3731 eligible = !vcpu->spin_loop.in_spin_loop || in kvm_vcpu_eligible_for_directed_yield()
3732 vcpu->spin_loop.dy_eligible; in kvm_vcpu_eligible_for_directed_yield()
3734 if (vcpu->spin_loop.in_spin_loop) in kvm_vcpu_eligible_for_directed_yield()
3735 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); in kvm_vcpu_eligible_for_directed_yield()
3759 if (!list_empty_careful(&vcpu->async_pf.done)) in vcpu_dy_runnable()
3773 struct kvm *kvm = me->kvm; in kvm_vcpu_on_spin() local
3781 last_boosted_vcpu = READ_ONCE(kvm->last_boosted_vcpu); in kvm_vcpu_on_spin()
3788 * We approximate round-robin by starting at the last boosted VCPU. in kvm_vcpu_on_spin()
3791 kvm_for_each_vcpu(i, vcpu, kvm) { in kvm_vcpu_on_spin()
3797 if (!READ_ONCE(vcpu->ready)) in kvm_vcpu_on_spin()
3803 if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && in kvm_vcpu_on_spin()
3812 WRITE_ONCE(kvm->last_boosted_vcpu, i); in kvm_vcpu_on_spin()
3815 try--; in kvm_vcpu_on_spin()
3828 static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff) in kvm_page_in_dirty_ring() argument
3833 kvm->dirty_ring_size / PAGE_SIZE); in kvm_page_in_dirty_ring()
3841 struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; in kvm_vcpu_fault()
3844 if (vmf->pgoff == 0) in kvm_vcpu_fault()
3845 page = virt_to_page(vcpu->run); in kvm_vcpu_fault()
3847 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) in kvm_vcpu_fault()
3848 page = virt_to_page(vcpu->arch.pio_data); in kvm_vcpu_fault()
3851 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) in kvm_vcpu_fault()
3852 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); in kvm_vcpu_fault()
3854 else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff)) in kvm_vcpu_fault()
3856 &vcpu->dirty_ring, in kvm_vcpu_fault()
3857 vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET); in kvm_vcpu_fault()
3861 vmf->page = page; in kvm_vcpu_fault()
3871 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_mmap()
3874 if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) || in kvm_vcpu_mmap()
3875 kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) && in kvm_vcpu_mmap()
3876 ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED))) in kvm_vcpu_mmap()
3877 return -EINVAL; in kvm_vcpu_mmap()
3879 vma->vm_ops = &kvm_vcpu_vm_ops; in kvm_vcpu_mmap()
3885 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_release()
3887 kvm_put_kvm(vcpu->kvm); in kvm_vcpu_release()
3906 snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id); in create_vcpu_fd()
3916 *val = pid_nr(rcu_dereference(vcpu->pid)); in vcpu_get_pid()
3931 snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); in kvm_create_vcpu_debugfs()
3933 vcpu->kvm->debugfs_dentry); in kvm_create_vcpu_debugfs()
3944 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) in kvm_vm_ioctl_create_vcpu() argument
3951 return -EINVAL; in kvm_vm_ioctl_create_vcpu()
3953 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
3954 if (kvm->created_vcpus >= kvm->max_vcpus) { in kvm_vm_ioctl_create_vcpu()
3955 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
3956 return -EINVAL; in kvm_vm_ioctl_create_vcpu()
3959 r = kvm_arch_vcpu_precreate(kvm, id); in kvm_vm_ioctl_create_vcpu()
3961 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
3965 kvm->created_vcpus++; in kvm_vm_ioctl_create_vcpu()
3966 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
3970 r = -ENOMEM; in kvm_vm_ioctl_create_vcpu()
3977 r = -ENOMEM; in kvm_vm_ioctl_create_vcpu()
3980 vcpu->run = page_address(page); in kvm_vm_ioctl_create_vcpu()
3982 kvm_vcpu_init(vcpu, kvm, id); in kvm_vm_ioctl_create_vcpu()
3988 if (kvm->dirty_ring_size) { in kvm_vm_ioctl_create_vcpu()
3989 r = kvm_dirty_ring_alloc(&vcpu->dirty_ring, in kvm_vm_ioctl_create_vcpu()
3990 id, kvm->dirty_ring_size); in kvm_vm_ioctl_create_vcpu()
3995 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
3998 /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */ in kvm_vm_ioctl_create_vcpu()
3999 mutex_lock(&vcpu->mutex); in kvm_vm_ioctl_create_vcpu()
4000 mutex_unlock(&vcpu->mutex); in kvm_vm_ioctl_create_vcpu()
4003 if (kvm_get_vcpu_by_id(kvm, id)) { in kvm_vm_ioctl_create_vcpu()
4004 r = -EEXIST; in kvm_vm_ioctl_create_vcpu()
4008 vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); in kvm_vm_ioctl_create_vcpu()
4009 r = xa_reserve(&kvm->vcpu_array, vcpu->vcpu_idx, GFP_KERNEL_ACCOUNT); in kvm_vm_ioctl_create_vcpu()
4014 kvm_get_kvm(kvm); in kvm_vm_ioctl_create_vcpu()
4019 if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) { in kvm_vm_ioctl_create_vcpu()
4020 r = -EINVAL; in kvm_vm_ioctl_create_vcpu()
4026 * pointer before kvm->online_vcpu's incremented value. in kvm_vm_ioctl_create_vcpu()
4029 atomic_inc(&kvm->online_vcpus); in kvm_vm_ioctl_create_vcpu()
4031 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4037 kvm_put_kvm_no_destroy(kvm); in kvm_vm_ioctl_create_vcpu()
4038 xa_release(&kvm->vcpu_array, vcpu->vcpu_idx); in kvm_vm_ioctl_create_vcpu()
4040 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4041 kvm_dirty_ring_free(&vcpu->dirty_ring); in kvm_vm_ioctl_create_vcpu()
4045 free_page((unsigned long)vcpu->run); in kvm_vm_ioctl_create_vcpu()
4049 mutex_lock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4050 kvm->created_vcpus--; in kvm_vm_ioctl_create_vcpu()
4051 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_create_vcpu()
4059 vcpu->sigset_active = 1; in kvm_vcpu_ioctl_set_sigmask()
4060 vcpu->sigset = *sigset; in kvm_vcpu_ioctl_set_sigmask()
4062 vcpu->sigset_active = 0; in kvm_vcpu_ioctl_set_sigmask()
4069 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_stats_read()
4071 return kvm_stats_read(vcpu->stats_id, &kvm_vcpu_stats_header, in kvm_vcpu_stats_read()
4072 &kvm_vcpu_stats_desc[0], &vcpu->stat, in kvm_vcpu_stats_read()
4073 sizeof(vcpu->stat), user_buffer, size, offset); in kvm_vcpu_stats_read()
4078 struct kvm_vcpu *vcpu = file->private_data; in kvm_vcpu_stats_release()
4080 kvm_put_kvm(vcpu->kvm); in kvm_vcpu_stats_release()
4096 snprintf(name, sizeof(name), "kvm-vcpu-stats:%d", vcpu->vcpu_id); in kvm_vcpu_ioctl_get_stats_fd()
4108 kvm_get_kvm(vcpu->kvm); in kvm_vcpu_ioctl_get_stats_fd()
4110 file->f_mode |= FMODE_PREAD; in kvm_vcpu_ioctl_get_stats_fd()
4119 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_ioctl()
4125 if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) in kvm_vcpu_ioctl()
4126 return -EIO; in kvm_vcpu_ioctl()
4129 return -EINVAL; in kvm_vcpu_ioctl()
4136 if (r != -ENOIOCTLCMD) in kvm_vcpu_ioctl()
4139 if (mutex_lock_killable(&vcpu->mutex)) in kvm_vcpu_ioctl()
4140 return -EINTR; in kvm_vcpu_ioctl()
4144 r = -EINVAL; in kvm_vcpu_ioctl()
4147 oldpid = rcu_access_pointer(vcpu->pid); in kvm_vcpu_ioctl()
4157 rcu_assign_pointer(vcpu->pid, newpid); in kvm_vcpu_ioctl()
4163 trace_kvm_userspace_exit(vcpu->run->exit_reason, r); in kvm_vcpu_ioctl()
4169 r = -ENOMEM; in kvm_vcpu_ioctl()
4176 r = -EFAULT; in kvm_vcpu_ioctl()
4199 r = -ENOMEM; in kvm_vcpu_ioctl()
4205 r = -EFAULT; in kvm_vcpu_ioctl()
4227 r = -EFAULT; in kvm_vcpu_ioctl()
4236 r = -EFAULT; in kvm_vcpu_ioctl()
4245 r = -EFAULT; in kvm_vcpu_ioctl()
4251 r = -EFAULT; in kvm_vcpu_ioctl()
4260 r = -EFAULT; in kvm_vcpu_ioctl()
4273 r = -EFAULT; in kvm_vcpu_ioctl()
4277 r = -EINVAL; in kvm_vcpu_ioctl()
4280 r = -EFAULT; in kvm_vcpu_ioctl()
4281 if (copy_from_user(&sigset, sigmask_arg->sigset, in kvm_vcpu_ioctl()
4291 r = -ENOMEM; in kvm_vcpu_ioctl()
4297 r = -EFAULT; in kvm_vcpu_ioctl()
4321 mutex_unlock(&vcpu->mutex); in kvm_vcpu_ioctl()
4331 struct kvm_vcpu *vcpu = filp->private_data; in kvm_vcpu_compat_ioctl()
4335 if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) in kvm_vcpu_compat_ioctl()
4336 return -EIO; in kvm_vcpu_compat_ioctl()
4345 r = -EFAULT; in kvm_vcpu_compat_ioctl()
4349 r = -EINVAL; in kvm_vcpu_compat_ioctl()
4352 r = -EFAULT; in kvm_vcpu_compat_ioctl()
4354 (compat_sigset_t __user *)sigmask_arg->sigset)) in kvm_vcpu_compat_ioctl()
4372 struct kvm_device *dev = filp->private_data; in kvm_device_mmap()
4374 if (dev->ops->mmap) in kvm_device_mmap()
4375 return dev->ops->mmap(dev, vma); in kvm_device_mmap()
4377 return -ENODEV; in kvm_device_mmap()
4388 return -EPERM; in kvm_device_ioctl_attr()
4391 return -EFAULT; in kvm_device_ioctl_attr()
4399 struct kvm_device *dev = filp->private_data; in kvm_device_ioctl()
4401 if (dev->kvm->mm != current->mm || dev->kvm->vm_dead) in kvm_device_ioctl()
4402 return -EIO; in kvm_device_ioctl()
4406 return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); in kvm_device_ioctl()
4408 return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); in kvm_device_ioctl()
4410 return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); in kvm_device_ioctl()
4412 if (dev->ops->ioctl) in kvm_device_ioctl()
4413 return dev->ops->ioctl(dev, ioctl, arg); in kvm_device_ioctl()
4415 return -ENOTTY; in kvm_device_ioctl()
4421 struct kvm_device *dev = filp->private_data; in kvm_device_release()
4422 struct kvm *kvm = dev->kvm; in kvm_device_release() local
4424 if (dev->ops->release) { in kvm_device_release()
4425 mutex_lock(&kvm->lock); in kvm_device_release()
4426 list_del(&dev->vm_node); in kvm_device_release()
4427 dev->ops->release(dev); in kvm_device_release()
4428 mutex_unlock(&kvm->lock); in kvm_device_release()
4431 kvm_put_kvm(kvm); in kvm_device_release()
4444 if (filp->f_op != &kvm_device_fops) in kvm_device_from_filp()
4447 return filp->private_data; in kvm_device_from_filp()
4460 return -ENOSPC; in kvm_register_device_ops()
4463 return -EEXIST; in kvm_register_device_ops()
4475 static int kvm_ioctl_create_device(struct kvm *kvm, in kvm_ioctl_create_device() argument
4480 bool test = cd->flags & KVM_CREATE_DEVICE_TEST; in kvm_ioctl_create_device()
4484 if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) in kvm_ioctl_create_device()
4485 return -ENODEV; in kvm_ioctl_create_device()
4487 type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table)); in kvm_ioctl_create_device()
4490 return -ENODEV; in kvm_ioctl_create_device()
4497 return -ENOMEM; in kvm_ioctl_create_device()
4499 dev->ops = ops; in kvm_ioctl_create_device()
4500 dev->kvm = kvm; in kvm_ioctl_create_device()
4502 mutex_lock(&kvm->lock); in kvm_ioctl_create_device()
4503 ret = ops->create(dev, type); in kvm_ioctl_create_device()
4505 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4509 list_add(&dev->vm_node, &kvm->devices); in kvm_ioctl_create_device()
4510 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4512 if (ops->init) in kvm_ioctl_create_device()
4513 ops->init(dev); in kvm_ioctl_create_device()
4515 kvm_get_kvm(kvm); in kvm_ioctl_create_device()
4516 ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC); in kvm_ioctl_create_device()
4518 kvm_put_kvm_no_destroy(kvm); in kvm_ioctl_create_device()
4519 mutex_lock(&kvm->lock); in kvm_ioctl_create_device()
4520 list_del(&dev->vm_node); in kvm_ioctl_create_device()
4521 if (ops->release) in kvm_ioctl_create_device()
4522 ops->release(dev); in kvm_ioctl_create_device()
4523 mutex_unlock(&kvm->lock); in kvm_ioctl_create_device()
4524 if (ops->destroy) in kvm_ioctl_create_device()
4525 ops->destroy(dev); in kvm_ioctl_create_device()
4529 cd->fd = ret; in kvm_ioctl_create_device()
4533 static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) in kvm_vm_ioctl_check_extension_generic() argument
4592 return kvm_vm_ioctl_check_extension(kvm, arg); in kvm_vm_ioctl_check_extension_generic()
4595 static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size) in kvm_vm_ioctl_enable_dirty_log_ring() argument
4600 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4603 if (!size || (size & (size - 1))) in kvm_vm_ioctl_enable_dirty_log_ring()
4604 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4609 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4613 return -E2BIG; in kvm_vm_ioctl_enable_dirty_log_ring()
4616 if (kvm->dirty_ring_size) in kvm_vm_ioctl_enable_dirty_log_ring()
4617 return -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4619 mutex_lock(&kvm->lock); in kvm_vm_ioctl_enable_dirty_log_ring()
4621 if (kvm->created_vcpus) { in kvm_vm_ioctl_enable_dirty_log_ring()
4623 r = -EINVAL; in kvm_vm_ioctl_enable_dirty_log_ring()
4625 kvm->dirty_ring_size = size; in kvm_vm_ioctl_enable_dirty_log_ring()
4629 mutex_unlock(&kvm->lock); in kvm_vm_ioctl_enable_dirty_log_ring()
4633 static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm) in kvm_vm_ioctl_reset_dirty_pages() argument
4639 if (!kvm->dirty_ring_size) in kvm_vm_ioctl_reset_dirty_pages()
4640 return -EINVAL; in kvm_vm_ioctl_reset_dirty_pages()
4642 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_reset_dirty_pages()
4644 kvm_for_each_vcpu(i, vcpu, kvm) in kvm_vm_ioctl_reset_dirty_pages()
4645 cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring); in kvm_vm_ioctl_reset_dirty_pages()
4647 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_reset_dirty_pages()
4650 kvm_flush_remote_tlbs(kvm); in kvm_vm_ioctl_reset_dirty_pages()
4655 int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm, in kvm_vm_ioctl_enable_cap() argument
4658 return -EINVAL; in kvm_vm_ioctl_enable_cap()
4661 bool kvm_are_all_memslots_empty(struct kvm *kvm) in kvm_are_all_memslots_empty() argument
4665 lockdep_assert_held(&kvm->slots_lock); in kvm_are_all_memslots_empty()
4668 if (!kvm_memslots_empty(__kvm_memslots(kvm, i))) in kvm_are_all_memslots_empty()
4676 static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, in kvm_vm_ioctl_enable_cap_generic() argument
4679 switch (cap->cap) { in kvm_vm_ioctl_enable_cap_generic()
4684 if (cap->args[0] & KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE) in kvm_vm_ioctl_enable_cap_generic()
4687 if (cap->flags || (cap->args[0] & ~allowed_options)) in kvm_vm_ioctl_enable_cap_generic()
4688 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
4689 kvm->manual_dirty_log_protect = cap->args[0]; in kvm_vm_ioctl_enable_cap_generic()
4694 if (cap->flags || cap->args[0] != (unsigned int)cap->args[0]) in kvm_vm_ioctl_enable_cap_generic()
4695 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
4697 kvm->max_halt_poll_ns = cap->args[0]; in kvm_vm_ioctl_enable_cap_generic()
4700 * Ensure kvm->override_halt_poll_ns does not become visible in kvm_vm_ioctl_enable_cap_generic()
4701 * before kvm->max_halt_poll_ns. in kvm_vm_ioctl_enable_cap_generic()
4706 kvm->override_halt_poll_ns = true; in kvm_vm_ioctl_enable_cap_generic()
4712 if (!kvm_vm_ioctl_check_extension_generic(kvm, cap->cap)) in kvm_vm_ioctl_enable_cap_generic()
4713 return -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
4715 return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); in kvm_vm_ioctl_enable_cap_generic()
4717 int r = -EINVAL; in kvm_vm_ioctl_enable_cap_generic()
4720 !kvm->dirty_ring_size || cap->flags) in kvm_vm_ioctl_enable_cap_generic()
4723 mutex_lock(&kvm->slots_lock); in kvm_vm_ioctl_enable_cap_generic()
4730 if (kvm_are_all_memslots_empty(kvm)) { in kvm_vm_ioctl_enable_cap_generic()
4731 kvm->dirty_ring_with_bitmap = true; in kvm_vm_ioctl_enable_cap_generic()
4735 mutex_unlock(&kvm->slots_lock); in kvm_vm_ioctl_enable_cap_generic()
4740 return kvm_vm_ioctl_enable_cap(kvm, cap); in kvm_vm_ioctl_enable_cap_generic()
4747 struct kvm *kvm = file->private_data; in kvm_vm_stats_read() local
4749 return kvm_stats_read(kvm->stats_id, &kvm_vm_stats_header, in kvm_vm_stats_read()
4750 &kvm_vm_stats_desc[0], &kvm->stat, in kvm_vm_stats_read()
4751 sizeof(kvm->stat), user_buffer, size, offset); in kvm_vm_stats_read()
4756 struct kvm *kvm = file->private_data; in kvm_vm_stats_release() local
4758 kvm_put_kvm(kvm); in kvm_vm_stats_release()
4768 static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm) in kvm_vm_ioctl_get_stats_fd() argument
4777 file = anon_inode_getfile("kvm-vm-stats", in kvm_vm_ioctl_get_stats_fd()
4778 &kvm_vm_stats_fops, kvm, O_RDONLY); in kvm_vm_ioctl_get_stats_fd()
4784 kvm_get_kvm(kvm); in kvm_vm_ioctl_get_stats_fd()
4786 file->f_mode |= FMODE_PREAD; in kvm_vm_ioctl_get_stats_fd()
4795 struct kvm *kvm = filp->private_data; in kvm_vm_ioctl() local
4799 if (kvm->mm != current->mm || kvm->vm_dead) in kvm_vm_ioctl()
4800 return -EIO; in kvm_vm_ioctl()
4803 r = kvm_vm_ioctl_create_vcpu(kvm, arg); in kvm_vm_ioctl()
4808 r = -EFAULT; in kvm_vm_ioctl()
4811 r = kvm_vm_ioctl_enable_cap_generic(kvm, &cap); in kvm_vm_ioctl()
4817 r = -EFAULT; in kvm_vm_ioctl()
4822 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); in kvm_vm_ioctl()
4828 r = -EFAULT; in kvm_vm_ioctl()
4831 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); in kvm_vm_ioctl()
4838 r = -EFAULT; in kvm_vm_ioctl()
4841 r = kvm_vm_ioctl_clear_dirty_log(kvm, &log); in kvm_vm_ioctl()
4849 r = -EFAULT; in kvm_vm_ioctl()
4852 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); in kvm_vm_ioctl()
4858 r = -EFAULT; in kvm_vm_ioctl()
4861 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); in kvm_vm_ioctl()
4868 r = -EFAULT; in kvm_vm_ioctl()
4871 r = kvm_irqfd(kvm, &data); in kvm_vm_ioctl()
4877 r = -EFAULT; in kvm_vm_ioctl()
4880 r = kvm_ioeventfd(kvm, &data); in kvm_vm_ioctl()
4887 r = -EFAULT; in kvm_vm_ioctl()
4890 r = kvm_send_userspace_msi(kvm, &msi); in kvm_vm_ioctl()
4899 r = -EFAULT; in kvm_vm_ioctl()
4903 r = kvm_vm_ioctl_irq_line(kvm, &irq_event, in kvm_vm_ioctl()
4908 r = -EFAULT; in kvm_vm_ioctl()
4924 r = -EFAULT; in kvm_vm_ioctl()
4927 r = -EINVAL; in kvm_vm_ioctl()
4928 if (!kvm_arch_can_set_irq_routing(kvm)) in kvm_vm_ioctl()
4936 entries = vmemdup_user(urouting->entries, in kvm_vm_ioctl()
4944 r = kvm_set_irq_routing(kvm, entries, routing.nr, in kvm_vm_ioctl()
4953 r = -EFAULT; in kvm_vm_ioctl()
4957 r = kvm_ioctl_create_device(kvm, &cd); in kvm_vm_ioctl()
4961 r = -EFAULT; in kvm_vm_ioctl()
4969 r = kvm_vm_ioctl_check_extension_generic(kvm, arg); in kvm_vm_ioctl()
4972 r = kvm_vm_ioctl_reset_dirty_pages(kvm); in kvm_vm_ioctl()
4975 r = kvm_vm_ioctl_get_stats_fd(kvm); in kvm_vm_ioctl()
5007 return -ENOTTY; in kvm_arch_vm_compat_ioctl()
5013 struct kvm *kvm = filp->private_data; in kvm_vm_compat_ioctl() local
5016 if (kvm->mm != current->mm || kvm->vm_dead) in kvm_vm_compat_ioctl()
5017 return -EIO; in kvm_vm_compat_ioctl()
5020 if (r != -ENOTTY) in kvm_vm_compat_ioctl()
5031 return -EFAULT; in kvm_vm_compat_ioctl()
5038 r = kvm_vm_ioctl_clear_dirty_log(kvm, &log); in kvm_vm_compat_ioctl()
5048 return -EFAULT; in kvm_vm_compat_ioctl()
5054 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); in kvm_vm_compat_ioctl()
5073 return file && file->f_op == &kvm_vm_fops; in file_is_kvm()
5081 struct kvm *kvm; in kvm_dev_ioctl_create_vm() local
5090 kvm = kvm_create_vm(type, fdname); in kvm_dev_ioctl_create_vm()
5091 if (IS_ERR(kvm)) { in kvm_dev_ioctl_create_vm()
5092 r = PTR_ERR(kvm); in kvm_dev_ioctl_create_vm()
5096 file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); in kvm_dev_ioctl_create_vm()
5103 * Don't call kvm_put_kvm anymore at this point; file->f_op is in kvm_dev_ioctl_create_vm()
5104 * already set, with ->release() being kvm_vm_release(). In error in kvm_dev_ioctl_create_vm()
5106 * care of doing kvm_put_kvm(kvm). in kvm_dev_ioctl_create_vm()
5108 kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm); in kvm_dev_ioctl_create_vm()
5114 kvm_put_kvm(kvm); in kvm_dev_ioctl_create_vm()
5123 int r = -EINVAL; in kvm_dev_ioctl()
5151 r = -EOPNOTSUPP; in kvm_dev_ioctl()
5168 "kvm",
5186 pr_info("kvm: enabling virtualization on CPU%d failed\n", in __hardware_enable_nolock()
5188 return -EIO; in __hardware_enable_nolock()
5244 kvm_usage_count--; in hardware_disable_all_nolock()
5265 * If userspace initiated a forced reboot, e.g. reboot -f, then it's in hardware_enable_all()
5266 * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling in hardware_enable_all()
5268 * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops in hardware_enable_all()
5274 return -EBUSY; in hardware_enable_all()
5281 * usage count is non-zero. Disable CPU hotplug to avoid attempting to in hardware_enable_all()
5295 r = -EBUSY; in hardware_enable_all()
5309 * that KVM has asynchronously disabled hardware virtualization, i.e. in kvm_shutdown()
5318 pr_info("kvm: exiting hardware virtualization\n"); in kvm_shutdown()
5329 * the system isn't suspended while KVM is enabling hardware. Hardware in kvm_suspend()
5369 if (dev->ops->destructor) in kvm_iodevice_destructor()
5370 dev->ops->destructor(dev); in kvm_iodevice_destructor()
5377 for (i = 0; i < bus->dev_count; i++) { in kvm_io_bus_destroy()
5378 struct kvm_io_device *pos = bus->range[i].dev; in kvm_io_bus_destroy()
5388 gpa_t addr1 = r1->addr; in kvm_io_bus_cmp()
5389 gpa_t addr2 = r2->addr; in kvm_io_bus_cmp()
5392 return -1; in kvm_io_bus_cmp()
5394 /* If r2->len == 0, match the exact address. If r2->len != 0, in kvm_io_bus_cmp()
5399 if (r2->len) { in kvm_io_bus_cmp()
5400 addr1 += r1->len; in kvm_io_bus_cmp()
5401 addr2 += r2->len; in kvm_io_bus_cmp()
5426 range = bsearch(&key, bus->range, bus->dev_count, in kvm_io_bus_get_first_dev()
5429 return -ENOENT; in kvm_io_bus_get_first_dev()
5431 off = range - bus->range; in kvm_io_bus_get_first_dev()
5433 while (off > 0 && kvm_io_bus_cmp(&key, &bus->range[off-1]) == 0) in kvm_io_bus_get_first_dev()
5434 off--; in kvm_io_bus_get_first_dev()
5444 idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); in __kvm_io_bus_write()
5446 return -EOPNOTSUPP; in __kvm_io_bus_write()
5448 while (idx < bus->dev_count && in __kvm_io_bus_write()
5449 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { in __kvm_io_bus_write()
5450 if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr, in __kvm_io_bus_write()
5451 range->len, val)) in __kvm_io_bus_write()
5456 return -EOPNOTSUPP; in __kvm_io_bus_write()
5459 /* kvm_io_bus_write - called under kvm->slots_lock */
5472 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); in kvm_io_bus_write()
5474 return -ENOMEM; in kvm_io_bus_write()
5480 /* kvm_io_bus_write_cookie - called under kvm->slots_lock */
5492 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); in kvm_io_bus_write_cookie()
5494 return -ENOMEM; in kvm_io_bus_write_cookie()
5497 if ((cookie >= 0) && (cookie < bus->dev_count) && in kvm_io_bus_write_cookie()
5498 (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) in kvm_io_bus_write_cookie()
5499 if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len, in kvm_io_bus_write_cookie()
5515 idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); in __kvm_io_bus_read()
5517 return -EOPNOTSUPP; in __kvm_io_bus_read()
5519 while (idx < bus->dev_count && in __kvm_io_bus_read()
5520 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { in __kvm_io_bus_read()
5521 if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr, in __kvm_io_bus_read()
5522 range->len, val)) in __kvm_io_bus_read()
5527 return -EOPNOTSUPP; in __kvm_io_bus_read()
5530 /* kvm_io_bus_read - called under kvm->slots_lock */
5543 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); in kvm_io_bus_read()
5545 return -ENOMEM; in kvm_io_bus_read()
5551 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, in kvm_io_bus_register_dev() argument
5558 bus = kvm_get_bus(kvm, bus_idx); in kvm_io_bus_register_dev()
5560 return -ENOMEM; in kvm_io_bus_register_dev()
5563 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) in kvm_io_bus_register_dev()
5564 return -ENOSPC; in kvm_io_bus_register_dev()
5566 new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1), in kvm_io_bus_register_dev()
5569 return -ENOMEM; in kvm_io_bus_register_dev()
5577 for (i = 0; i < bus->dev_count; i++) in kvm_io_bus_register_dev()
5578 if (kvm_io_bus_cmp(&bus->range[i], &range) > 0) in kvm_io_bus_register_dev()
5582 new_bus->dev_count++; in kvm_io_bus_register_dev()
5583 new_bus->range[i] = range; in kvm_io_bus_register_dev()
5584 memcpy(new_bus->range + i + 1, bus->range + i, in kvm_io_bus_register_dev()
5585 (bus->dev_count - i) * sizeof(struct kvm_io_range)); in kvm_io_bus_register_dev()
5586 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); in kvm_io_bus_register_dev()
5587 synchronize_srcu_expedited(&kvm->srcu); in kvm_io_bus_register_dev()
5593 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, in kvm_io_bus_unregister_dev() argument
5599 lockdep_assert_held(&kvm->slots_lock); in kvm_io_bus_unregister_dev()
5601 bus = kvm_get_bus(kvm, bus_idx); in kvm_io_bus_unregister_dev()
5605 for (i = 0; i < bus->dev_count; i++) { in kvm_io_bus_unregister_dev()
5606 if (bus->range[i].dev == dev) { in kvm_io_bus_unregister_dev()
5611 if (i == bus->dev_count) in kvm_io_bus_unregister_dev()
5614 new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), in kvm_io_bus_unregister_dev()
5618 new_bus->dev_count--; in kvm_io_bus_unregister_dev()
5619 memcpy(new_bus->range + i, bus->range + i + 1, in kvm_io_bus_unregister_dev()
5620 flex_array_size(new_bus, range, new_bus->dev_count - i)); in kvm_io_bus_unregister_dev()
5623 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); in kvm_io_bus_unregister_dev()
5624 synchronize_srcu_expedited(&kvm->srcu); in kvm_io_bus_unregister_dev()
5631 pr_err("kvm: failed to shrink bus, removing it completely\n"); in kvm_io_bus_unregister_dev()
5633 return -ENOMEM; in kvm_io_bus_unregister_dev()
5641 struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, in kvm_io_bus_get_dev() argument
5648 srcu_idx = srcu_read_lock(&kvm->srcu); in kvm_io_bus_get_dev()
5650 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); in kvm_io_bus_get_dev()
5658 iodev = bus->range[dev_idx].dev; in kvm_io_bus_get_dev()
5661 srcu_read_unlock(&kvm->srcu, srcu_idx); in kvm_io_bus_get_dev()
5672 struct kvm_stat_data *stat_data = inode->i_private; in kvm_debugfs_open()
5675 * The debugfs files are a reference to the kvm struct which in kvm_debugfs_open()
5679 if (!kvm_get_kvm_safe(stat_data->kvm)) in kvm_debugfs_open()
5680 return -ENOENT; in kvm_debugfs_open()
5683 kvm_stats_debugfs_mode(stat_data->desc) & 0222 in kvm_debugfs_open()
5686 kvm_put_kvm(stat_data->kvm); in kvm_debugfs_open()
5693 struct kvm_stat_data *stat_data = inode->i_private; in kvm_debugfs_release()
5696 kvm_put_kvm(stat_data->kvm); in kvm_debugfs_release()
5701 static int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val) in kvm_get_stat_per_vm() argument
5703 *val = *(u64 *)((void *)(&kvm->stat) + offset); in kvm_get_stat_per_vm()
5708 static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset) in kvm_clear_stat_per_vm() argument
5710 *(u64 *)((void *)(&kvm->stat) + offset) = 0; in kvm_clear_stat_per_vm()
5715 static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val) in kvm_get_stat_per_vcpu() argument
5722 kvm_for_each_vcpu(i, vcpu, kvm) in kvm_get_stat_per_vcpu()
5723 *val += *(u64 *)((void *)(&vcpu->stat) + offset); in kvm_get_stat_per_vcpu()
5728 static int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset) in kvm_clear_stat_per_vcpu() argument
5733 kvm_for_each_vcpu(i, vcpu, kvm) in kvm_clear_stat_per_vcpu()
5734 *(u64 *)((void *)(&vcpu->stat) + offset) = 0; in kvm_clear_stat_per_vcpu()
5741 int r = -EFAULT; in kvm_stat_data_get()
5744 switch (stat_data->kind) { in kvm_stat_data_get()
5746 r = kvm_get_stat_per_vm(stat_data->kvm, in kvm_stat_data_get()
5747 stat_data->desc->desc.offset, val); in kvm_stat_data_get()
5750 r = kvm_get_stat_per_vcpu(stat_data->kvm, in kvm_stat_data_get()
5751 stat_data->desc->desc.offset, val); in kvm_stat_data_get()
5760 int r = -EFAULT; in kvm_stat_data_clear()
5764 return -EINVAL; in kvm_stat_data_clear()
5766 switch (stat_data->kind) { in kvm_stat_data_clear()
5768 r = kvm_clear_stat_per_vm(stat_data->kvm, in kvm_stat_data_clear()
5769 stat_data->desc->desc.offset); in kvm_stat_data_clear()
5772 r = kvm_clear_stat_per_vcpu(stat_data->kvm, in kvm_stat_data_clear()
5773 stat_data->desc->desc.offset); in kvm_stat_data_clear()
5799 struct kvm *kvm; in vm_stat_get() local
5804 list_for_each_entry(kvm, &vm_list, vm_list) { in vm_stat_get()
5805 kvm_get_stat_per_vm(kvm, offset, &tmp_val); in vm_stat_get()
5815 struct kvm *kvm; in vm_stat_clear() local
5818 return -EINVAL; in vm_stat_clear()
5821 list_for_each_entry(kvm, &vm_list, vm_list) { in vm_stat_clear()
5822 kvm_clear_stat_per_vm(kvm, offset); in vm_stat_clear()
5835 struct kvm *kvm; in vcpu_stat_get() local
5840 list_for_each_entry(kvm, &vm_list, vm_list) { in vcpu_stat_get()
5841 kvm_get_stat_per_vcpu(kvm, offset, &tmp_val); in vcpu_stat_get()
5851 struct kvm *kvm; in vcpu_stat_clear() local
5854 return -EINVAL; in vcpu_stat_clear()
5857 list_for_each_entry(kvm, &vm_list, vm_list) { in vcpu_stat_clear()
5858 kvm_clear_stat_per_vcpu(kvm, offset); in vcpu_stat_clear()
5869 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) in kvm_uevent_notify_change() argument
5874 if (!kvm_dev.this_device || !kvm) in kvm_uevent_notify_change()
5882 kvm_active_vms--; in kvm_uevent_notify_change()
5897 kvm->userspace_pid = task_pid_nr(current); in kvm_uevent_notify_change()
5901 add_uevent_var(env, "PID=%d", kvm->userspace_pid); in kvm_uevent_notify_change()
5903 if (!IS_ERR(kvm->debugfs_dentry)) { in kvm_uevent_notify_change()
5907 tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); in kvm_uevent_notify_change()
5914 env->envp[env->envp_idx++] = NULL; in kvm_uevent_notify_change()
5915 kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp); in kvm_uevent_notify_change()
5925 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); in kvm_init_debug()
5933 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_init_debug()
5935 (void *)(long)pdesc->desc.offset, fops); in kvm_init_debug()
5944 debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc), in kvm_init_debug()
5946 (void *)(long)pdesc->desc.offset, fops); in kvm_init_debug()
5960 WRITE_ONCE(vcpu->preempted, false); in kvm_sched_in()
5961 WRITE_ONCE(vcpu->ready, false); in kvm_sched_in()
5973 if (current->on_rq) { in kvm_sched_out()
5974 WRITE_ONCE(vcpu->preempted, true); in kvm_sched_out()
5975 WRITE_ONCE(vcpu->ready, true); in kvm_sched_out()
5982 * kvm_get_running_vcpu - get the vcpu running on the current CPU.
5984 * We can disable preemption locally around accessing the per-CPU variable,
5987 * the per-CPU value later will give us the same value as we update the
5988 * per-CPU variable in the preempt notifier handlers.
6003 * kvm_get_running_vcpus - get the per-CPU array of currently running vcpus.
6060 r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", in kvm_init()
6076 - offsetof(struct kvm_vcpu, arch), in kvm_init()
6079 r = -ENOMEM; in kvm_init()
6086 r = -ENOMEM; in kvm_init()
6112 * /dev/kvm to userspace, i.e. all infrastructure must be setup! in kvm_init()
6116 pr_err("kvm: misc device register failed\n"); in kvm_init()
6147 * Note, unregistering /dev/kvm doesn't strictly need to come first, in kvm_exit()
6149 * to KVM while the module is being stopped. in kvm_exit()
6168 struct kvm *kvm; member
6184 struct kvm *kvm = init_context->kvm; in kvm_vm_worker_thread() local
6185 kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; in kvm_vm_worker_thread()
6186 uintptr_t data = init_context->data; in kvm_vm_worker_thread()
6195 err = cgroup_attach_task_all(init_context->parent, current); in kvm_vm_worker_thread()
6202 set_user_nice(current, task_nice(init_context->parent)); in kvm_vm_worker_thread()
6205 init_context->err = err; in kvm_vm_worker_thread()
6206 complete(&init_context->init_done); in kvm_vm_worker_thread()
6216 err = thread_fn(kvm, data); in kvm_vm_worker_thread()
6231 parent = rcu_dereference(current->real_parent); in kvm_vm_worker_thread()
6240 int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, in kvm_vm_create_worker_thread() argument
6248 init_context.kvm = kvm; in kvm_vm_create_worker_thread()
6255 "%s-%d", name, task_pid_nr(current)); in kvm_vm_create_worker_thread()