x86.c (402efdb48d6a6aef9e91e3112a94b679d787fef7) x86.c (2df72e9bc4c505d8357012f2924589f3d16f9d44)
1/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * derived from drivers/kvm/kvm_main.c
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008

--- 232 unchanged lines hidden (view full) ---

241 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
242
243 if (smsr->registered)
244 kvm_on_user_return(&smsr->urn);
245}
246
247u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
248{
1/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * derived from drivers/kvm/kvm_main.c
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008

--- 232 unchanged lines hidden (view full) ---

241 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
242
243 if (smsr->registered)
244 kvm_on_user_return(&smsr->urn);
245}
246
247u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
248{
249 if (irqchip_in_kernel(vcpu->kvm))
250 return vcpu->arch.apic_base;
251 else
252 return vcpu->arch.apic_base;
249 return vcpu->arch.apic_base;
253}
254EXPORT_SYMBOL_GPL(kvm_get_apic_base);
255
256void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
257{
258 /* TODO: reserve bits check */
250}
251EXPORT_SYMBOL_GPL(kvm_get_apic_base);
252
253void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
254{
255 /* TODO: reserve bits check */
259 if (irqchip_in_kernel(vcpu->kvm))
260 kvm_lapic_set_base(vcpu, data);
261 else
262 vcpu->arch.apic_base = data;
256 kvm_lapic_set_base(vcpu, data);
263}
264EXPORT_SYMBOL_GPL(kvm_set_apic_base);
265
266#define EXCPT_BENIGN 0
267#define EXCPT_CONTRIBUTORY 1
268#define EXCPT_PF 2
269
270static int exception_class(int vector)

--- 547 unchanged lines hidden (view full) ---

818#ifdef CONFIG_X86_64
819 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
820#endif
821 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
822};
823
824static unsigned num_msrs_to_save;
825
257}
258EXPORT_SYMBOL_GPL(kvm_set_apic_base);
259
260#define EXCPT_BENIGN 0
261#define EXCPT_CONTRIBUTORY 1
262#define EXCPT_PF 2
263
264static int exception_class(int vector)

--- 547 unchanged lines hidden (view full) ---

812#ifdef CONFIG_X86_64
813 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
814#endif
815 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
816};
817
818static unsigned num_msrs_to_save;
819
826static u32 emulated_msrs[] = {
820static const u32 emulated_msrs[] = {
827 MSR_IA32_TSCDEADLINE,
828 MSR_IA32_MISC_ENABLE,
829 MSR_IA32_MCG_STATUS,
830 MSR_IA32_MCG_CTL,
831};
832
833static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
834{

--- 257 unchanged lines hidden (view full) ---

1092 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1093 }
1094 } else {
1095 /*
1096 * We split periods of matched TSC writes into generations.
1097 * For each generation, we track the original measured
1098 * nanosecond time, offset, and write, so if TSCs are in
1099 * sync, we can match exact offset, and if not, we can match
821 MSR_IA32_TSCDEADLINE,
822 MSR_IA32_MISC_ENABLE,
823 MSR_IA32_MCG_STATUS,
824 MSR_IA32_MCG_CTL,
825};
826
827static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
828{

--- 257 unchanged lines hidden (view full) ---

1086 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1087 }
1088 } else {
1089 /*
1090 * We split periods of matched TSC writes into generations.
1091 * For each generation, we track the original measured
1092 * nanosecond time, offset, and write, so if TSCs are in
1093 * sync, we can match exact offset, and if not, we can match
1100 * exact software computaion in compute_guest_tsc()
1094 * exact software computation in compute_guest_tsc()
1101 *
1102 * These values are tracked in kvm->arch.cur_xxx variables.
1103 */
1104 kvm->arch.cur_tsc_generation++;
1105 kvm->arch.cur_tsc_nsec = ns;
1106 kvm->arch.cur_tsc_write = data;
1107 kvm->arch.cur_tsc_offset = offset;
1108 pr_debug("kvm: new tsc generation %u, clock %llu\n",

--- 26 unchanged lines hidden (view full) ---

1135static int kvm_guest_time_update(struct kvm_vcpu *v)
1136{
1137 unsigned long flags;
1138 struct kvm_vcpu_arch *vcpu = &v->arch;
1139 void *shared_kaddr;
1140 unsigned long this_tsc_khz;
1141 s64 kernel_ns, max_kernel_ns;
1142 u64 tsc_timestamp;
1095 *
1096 * These values are tracked in kvm->arch.cur_xxx variables.
1097 */
1098 kvm->arch.cur_tsc_generation++;
1099 kvm->arch.cur_tsc_nsec = ns;
1100 kvm->arch.cur_tsc_write = data;
1101 kvm->arch.cur_tsc_offset = offset;
1102 pr_debug("kvm: new tsc generation %u, clock %llu\n",

--- 26 unchanged lines hidden (view full) ---

1129static int kvm_guest_time_update(struct kvm_vcpu *v)
1130{
1131 unsigned long flags;
1132 struct kvm_vcpu_arch *vcpu = &v->arch;
1133 void *shared_kaddr;
1134 unsigned long this_tsc_khz;
1135 s64 kernel_ns, max_kernel_ns;
1136 u64 tsc_timestamp;
1137 u8 pvclock_flags;
1143
1144 /* Keep irq disabled to prevent changes to the clock */
1145 local_irq_save(flags);
1146 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
1147 kernel_ns = get_kernel_ns();
1148 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1149 if (unlikely(this_tsc_khz == 0)) {
1150 local_irq_restore(flags);

--- 65 unchanged lines hidden (view full) ---

1216 if (max_kernel_ns > kernel_ns)
1217 kernel_ns = max_kernel_ns;
1218
1219 /* With all the info we got, fill in the values */
1220 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1221 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1222 vcpu->last_kernel_ns = kernel_ns;
1223 vcpu->last_guest_tsc = tsc_timestamp;
1138
1139 /* Keep irq disabled to prevent changes to the clock */
1140 local_irq_save(flags);
1141 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
1142 kernel_ns = get_kernel_ns();
1143 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1144 if (unlikely(this_tsc_khz == 0)) {
1145 local_irq_restore(flags);

--- 65 unchanged lines hidden (view full) ---

1211 if (max_kernel_ns > kernel_ns)
1212 kernel_ns = max_kernel_ns;
1213
1214 /* With all the info we got, fill in the values */
1215 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1216 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1217 vcpu->last_kernel_ns = kernel_ns;
1218 vcpu->last_guest_tsc = tsc_timestamp;
1224 vcpu->hv_clock.flags = 0;
1225
1219
1220 pvclock_flags = 0;
1221 if (vcpu->pvclock_set_guest_stopped_request) {
1222 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1223 vcpu->pvclock_set_guest_stopped_request = false;
1224 }
1225
1226 vcpu->hv_clock.flags = pvclock_flags;
1227
1226 /*
1227 * The interface expects us to write an even number signaling that the
1228 * update is finished. Since the guest won't see the intermediate
1229 * state, we just increase by 2 at the end.
1230 */
1231 vcpu->hv_clock.version += 2;
1232
1233 shared_kaddr = kmap_atomic(vcpu->time_page);

--- 265 unchanged lines hidden (view full) ---

1499
1500 return 0;
1501}
1502
1503static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1504{
1505 gpa_t gpa = data & ~0x3f;
1506
1228 /*
1229 * The interface expects us to write an even number signaling that the
1230 * update is finished. Since the guest won't see the intermediate
1231 * state, we just increase by 2 at the end.
1232 */
1233 vcpu->hv_clock.version += 2;
1234
1235 shared_kaddr = kmap_atomic(vcpu->time_page);

--- 265 unchanged lines hidden (view full) ---

1501
1502 return 0;
1503}
1504
1505static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1506{
1507 gpa_t gpa = data & ~0x3f;
1508
1507 /* Bits 2:5 are resrved, Should be zero */
1509 /* Bits 2:5 are reserved, Should be zero */
1508 if (data & 0x3c)
1509 return 1;
1510
1511 vcpu->arch.apf.msr_val = data;
1512
1513 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1514 kvm_clear_async_pf_completion_queue(vcpu);
1515 kvm_async_pf_hash_reset(vcpu);

--- 118 unchanged lines hidden (view full) ---

1634 break;
1635
1636 /* ...but clean it before doing the actual write */
1637 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
1638
1639 vcpu->arch.time_page =
1640 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
1641
1510 if (data & 0x3c)
1511 return 1;
1512
1513 vcpu->arch.apf.msr_val = data;
1514
1515 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1516 kvm_clear_async_pf_completion_queue(vcpu);
1517 kvm_async_pf_hash_reset(vcpu);

--- 118 unchanged lines hidden (view full) ---

1636 break;
1637
1638 /* ...but clean it before doing the actual write */
1639 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
1640
1641 vcpu->arch.time_page =
1642 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
1643
1642 if (is_error_page(vcpu->arch.time_page)) {
1643 kvm_release_page_clean(vcpu->arch.time_page);
1644 if (is_error_page(vcpu->arch.time_page))
1644 vcpu->arch.time_page = NULL;
1645 vcpu->arch.time_page = NULL;
1645 }
1646
1646 break;
1647 }
1648 case MSR_KVM_ASYNC_PF_EN:
1649 if (kvm_pv_enable_async_pf(vcpu, data))
1650 return 1;
1651 break;
1652 case MSR_KVM_STEAL_TIME:
1653

--- 68 unchanged lines hidden (view full) ---

1722 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
1723 "0x%x data 0x%llx\n", msr, data);
1724 break;
1725 case MSR_K7_CLK_CTL:
1726 /*
1727 * Ignore all writes to this no longer documented MSR.
1728 * Writes are only relevant for old K7 processors,
1729 * all pre-dating SVM, but a recommended workaround from
1647 break;
1648 }
1649 case MSR_KVM_ASYNC_PF_EN:
1650 if (kvm_pv_enable_async_pf(vcpu, data))
1651 return 1;
1652 break;
1653 case MSR_KVM_STEAL_TIME:
1654

--- 68 unchanged lines hidden (view full) ---

1723 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
1724 "0x%x data 0x%llx\n", msr, data);
1725 break;
1726 case MSR_K7_CLK_CTL:
1727 /*
1728 * Ignore all writes to this no longer documented MSR.
1729 * Writes are only relevant for old K7 processors,
1730 * all pre-dating SVM, but a recommended workaround from
1730 * AMD for these chips. It is possible to speicify the
1731 * AMD for these chips. It is possible to specify the
1731 * affected processor models on the command line, hence
1732 * the need to ignore the workaround.
1733 */
1734 break;
1735 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1736 if (kvm_hv_msr_partition_wide(msr)) {
1737 int r;
1738 mutex_lock(&vcpu->kvm->lock);

--- 256 unchanged lines hidden (view full) ---

1995 data = vcpu->arch.time;
1996 break;
1997 case MSR_KVM_ASYNC_PF_EN:
1998 data = vcpu->arch.apf.msr_val;
1999 break;
2000 case MSR_KVM_STEAL_TIME:
2001 data = vcpu->arch.st.msr_val;
2002 break;
1732 * affected processor models on the command line, hence
1733 * the need to ignore the workaround.
1734 */
1735 break;
1736 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1737 if (kvm_hv_msr_partition_wide(msr)) {
1738 int r;
1739 mutex_lock(&vcpu->kvm->lock);

--- 256 unchanged lines hidden (view full) ---

1996 data = vcpu->arch.time;
1997 break;
1998 case MSR_KVM_ASYNC_PF_EN:
1999 data = vcpu->arch.apf.msr_val;
2000 break;
2001 case MSR_KVM_STEAL_TIME:
2002 data = vcpu->arch.st.msr_val;
2003 break;
2003 case MSR_KVM_PV_EOI_EN:
2004 data = vcpu->arch.pv_eoi.msr_val;
2005 break;
2006 case MSR_IA32_P5_MC_ADDR:
2007 case MSR_IA32_P5_MC_TYPE:
2008 case MSR_IA32_MCG_CAP:
2009 case MSR_IA32_MCG_CTL:
2010 case MSR_IA32_MCG_STATUS:
2011 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2012 return get_msr_mce(vcpu, msr, pdata);
2013 case MSR_K7_CLK_CTL:

--- 158 unchanged lines hidden (view full) ---

2172 case KVM_CAP_PCI_SEGMENT:
2173 case KVM_CAP_DEBUGREGS:
2174 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2175 case KVM_CAP_XSAVE:
2176 case KVM_CAP_ASYNC_PF:
2177 case KVM_CAP_GET_TSC_KHZ:
2178 case KVM_CAP_PCI_2_3:
2179 case KVM_CAP_KVMCLOCK_CTRL:
2004 case MSR_IA32_P5_MC_ADDR:
2005 case MSR_IA32_P5_MC_TYPE:
2006 case MSR_IA32_MCG_CAP:
2007 case MSR_IA32_MCG_CTL:
2008 case MSR_IA32_MCG_STATUS:
2009 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2010 return get_msr_mce(vcpu, msr, pdata);
2011 case MSR_K7_CLK_CTL:

--- 158 unchanged lines hidden (view full) ---

2170 case KVM_CAP_PCI_SEGMENT:
2171 case KVM_CAP_DEBUGREGS:
2172 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2173 case KVM_CAP_XSAVE:
2174 case KVM_CAP_ASYNC_PF:
2175 case KVM_CAP_GET_TSC_KHZ:
2176 case KVM_CAP_PCI_2_3:
2177 case KVM_CAP_KVMCLOCK_CTRL:
2178 case KVM_CAP_READONLY_MEM:
2180 r = 1;
2181 break;
2182 case KVM_CAP_COALESCED_MMIO:
2183 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2184 break;
2185 case KVM_CAP_VAPIC:
2186 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2187 break;

--- 165 unchanged lines hidden (view full) ---

2353 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2354
2355 return 0;
2356}
2357
2358static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2359 struct kvm_lapic_state *s)
2360{
2179 r = 1;
2180 break;
2181 case KVM_CAP_COALESCED_MMIO:
2182 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2183 break;
2184 case KVM_CAP_VAPIC:
2185 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2186 break;

--- 165 unchanged lines hidden (view full) ---

2352 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2353
2354 return 0;
2355}
2356
2357static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2358 struct kvm_lapic_state *s)
2359{
2361 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
2362 kvm_apic_post_state_restore(vcpu);
2360 kvm_apic_post_state_restore(vcpu, s);
2363 update_cr8_intercept(vcpu);
2364
2365 return 0;
2366}
2367
2368static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2369 struct kvm_interrupt *irq)
2370{

--- 259 unchanged lines hidden (view full) ---

2630/*
2631 * kvm_set_guest_paused() indicates to the guest kernel that it has been
2632 * stopped by the hypervisor. This function will be called from the host only.
2633 * EINVAL is returned when the host attempts to set the flag for a guest that
2634 * does not support pv clocks.
2635 */
2636static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
2637{
2361 update_cr8_intercept(vcpu);
2362
2363 return 0;
2364}
2365
2366static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2367 struct kvm_interrupt *irq)
2368{

--- 259 unchanged lines hidden (view full) ---

2628/*
2629 * kvm_set_guest_paused() indicates to the guest kernel that it has been
2630 * stopped by the hypervisor. This function will be called from the host only.
2631 * EINVAL is returned when the host attempts to set the flag for a guest that
2632 * does not support pv clocks.
2633 */
2634static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
2635{
2638 struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
2639 if (!vcpu->arch.time_page)
2640 return -EINVAL;
2636 if (!vcpu->arch.time_page)
2637 return -EINVAL;
2641 src->flags |= PVCLOCK_GUEST_STOPPED;
2642 mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
2638 vcpu->arch.pvclock_set_guest_stopped_request = true;
2643 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2644 return 0;
2645}
2646
2647long kvm_arch_vcpu_ioctl(struct file *filp,
2648 unsigned int ioctl, unsigned long arg)
2649{
2650 struct kvm_vcpu *vcpu = filp->private_data;

--- 434 unchanged lines hidden (view full) ---

3085}
3086
3087static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3088 struct kvm_reinject_control *control)
3089{
3090 if (!kvm->arch.vpit)
3091 return -ENXIO;
3092 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2639 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2640 return 0;
2641}
2642
2643long kvm_arch_vcpu_ioctl(struct file *filp,
2644 unsigned int ioctl, unsigned long arg)
2645{
2646 struct kvm_vcpu *vcpu = filp->private_data;

--- 434 unchanged lines hidden (view full) ---

3081}
3082
3083static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3084 struct kvm_reinject_control *control)
3085{
3086 if (!kvm->arch.vpit)
3087 return -ENXIO;
3088 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3093 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
3089 kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
3094 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3095 return 0;
3096}
3097
3098/**
3099 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
3100 * @kvm: kvm instance
3101 * @log: slot id and address to which we copy the log

--- 66 unchanged lines hidden (view full) ---

3168 goto out;
3169
3170 r = 0;
3171out:
3172 mutex_unlock(&kvm->slots_lock);
3173 return r;
3174}
3175
3090 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3091 return 0;
3092}
3093
3094/**
3095 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
3096 * @kvm: kvm instance
3097 * @log: slot id and address to which we copy the log

--- 66 unchanged lines hidden (view full) ---

3164 goto out;
3165
3166 r = 0;
3167out:
3168 mutex_unlock(&kvm->slots_lock);
3169 return r;
3170}
3171
3172int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
3173{
3174 if (!irqchip_in_kernel(kvm))
3175 return -ENXIO;
3176
3177 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3178 irq_event->irq, irq_event->level);
3179 return 0;
3180}
3181
3176long kvm_arch_vm_ioctl(struct file *filp,
3177 unsigned int ioctl, unsigned long arg)
3178{
3179 struct kvm *kvm = filp->private_data;
3180 void __user *argp = (void __user *)arg;
3181 int r = -ENOTTY;
3182 /*
3183 * This union makes it completely explicit to gcc-3.x

--- 90 unchanged lines hidden (view full) ---

3274 goto create_pit_unlock;
3275 r = -ENOMEM;
3276 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3277 if (kvm->arch.vpit)
3278 r = 0;
3279 create_pit_unlock:
3280 mutex_unlock(&kvm->slots_lock);
3281 break;
3182long kvm_arch_vm_ioctl(struct file *filp,
3183 unsigned int ioctl, unsigned long arg)
3184{
3185 struct kvm *kvm = filp->private_data;
3186 void __user *argp = (void __user *)arg;
3187 int r = -ENOTTY;
3188 /*
3189 * This union makes it completely explicit to gcc-3.x

--- 90 unchanged lines hidden (view full) ---

3280 goto create_pit_unlock;
3281 r = -ENOMEM;
3282 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3283 if (kvm->arch.vpit)
3284 r = 0;
3285 create_pit_unlock:
3286 mutex_unlock(&kvm->slots_lock);
3287 break;
3282 case KVM_IRQ_LINE_STATUS:
3283 case KVM_IRQ_LINE: {
3284 struct kvm_irq_level irq_event;
3285
3286 r = -EFAULT;
3287 if (copy_from_user(&irq_event, argp, sizeof irq_event))
3288 goto out;
3289 r = -ENXIO;
3290 if (irqchip_in_kernel(kvm)) {
3291 __s32 status;
3292 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3293 irq_event.irq, irq_event.level);
3294 if (ioctl == KVM_IRQ_LINE_STATUS) {
3295 r = -EFAULT;
3296 irq_event.status = status;
3297 if (copy_to_user(argp, &irq_event,
3298 sizeof irq_event))
3299 goto out;
3300 }
3301 r = 0;
3302 }
3303 break;
3304 }
3305 case KVM_GET_IRQCHIP: {
3306 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
3307 struct kvm_irqchip *chip;
3308
3309 chip = memdup_user(argp, sizeof(*chip));
3310 if (IS_ERR(chip)) {
3311 r = PTR_ERR(chip);
3312 goto out;

--- 472 unchanged lines hidden (view full) ---

3785 void *val, int bytes)
3786{
3787 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
3788
3789 memcpy(vcpu->run->mmio.data, frag->data, frag->len);
3790 return X86EMUL_CONTINUE;
3791}
3792
3288 case KVM_GET_IRQCHIP: {
3289 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
3290 struct kvm_irqchip *chip;
3291
3292 chip = memdup_user(argp, sizeof(*chip));
3293 if (IS_ERR(chip)) {
3294 r = PTR_ERR(chip);
3295 goto out;

--- 472 unchanged lines hidden (view full) ---

3768 void *val, int bytes)
3769{
3770 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
3771
3772 memcpy(vcpu->run->mmio.data, frag->data, frag->len);
3773 return X86EMUL_CONTINUE;
3774}
3775
3793static struct read_write_emulator_ops read_emultor = {
3776static const struct read_write_emulator_ops read_emultor = {
3794 .read_write_prepare = read_prepare,
3795 .read_write_emulate = read_emulate,
3796 .read_write_mmio = vcpu_mmio_read,
3797 .read_write_exit_mmio = read_exit_mmio,
3798};
3799
3777 .read_write_prepare = read_prepare,
3778 .read_write_emulate = read_emulate,
3779 .read_write_mmio = vcpu_mmio_read,
3780 .read_write_exit_mmio = read_exit_mmio,
3781};
3782
3800static struct read_write_emulator_ops write_emultor = {
3783static const struct read_write_emulator_ops write_emultor = {
3801 .read_write_emulate = write_emulate,
3802 .read_write_mmio = write_mmio,
3803 .read_write_exit_mmio = write_exit_mmio,
3804 .write = true,
3805};
3806
3807static int emulator_read_write_onepage(unsigned long addr, void *val,
3808 unsigned int bytes,
3809 struct x86_exception *exception,
3810 struct kvm_vcpu *vcpu,
3784 .read_write_emulate = write_emulate,
3785 .read_write_mmio = write_mmio,
3786 .read_write_exit_mmio = write_exit_mmio,
3787 .write = true,
3788};
3789
3790static int emulator_read_write_onepage(unsigned long addr, void *val,
3791 unsigned int bytes,
3792 struct x86_exception *exception,
3793 struct kvm_vcpu *vcpu,
3811 struct read_write_emulator_ops *ops)
3794 const struct read_write_emulator_ops *ops)
3812{
3813 gpa_t gpa;
3814 int handled, ret;
3815 bool write = ops->write;
3816 struct kvm_mmio_fragment *frag;
3817
3818 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
3819

--- 32 unchanged lines hidden (view full) ---

3852 bytes -= now;
3853 }
3854 return X86EMUL_CONTINUE;
3855}
3856
3857int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
3858 void *val, unsigned int bytes,
3859 struct x86_exception *exception,
3795{
3796 gpa_t gpa;
3797 int handled, ret;
3798 bool write = ops->write;
3799 struct kvm_mmio_fragment *frag;
3800
3801 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
3802

--- 32 unchanged lines hidden (view full) ---

3835 bytes -= now;
3836 }
3837 return X86EMUL_CONTINUE;
3838}
3839
3840int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
3841 void *val, unsigned int bytes,
3842 struct x86_exception *exception,
3860 struct read_write_emulator_ops *ops)
3843 const struct read_write_emulator_ops *ops)
3861{
3862 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3863 gpa_t gpa;
3864 int rc;
3865
3866 if (ops->read_write_prepare &&
3867 ops->read_write_prepare(vcpu, val, bytes))
3868 return X86EMUL_CONTINUE;

--- 88 unchanged lines hidden (view full) ---

3957 if (gpa == UNMAPPED_GVA ||
3958 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3959 goto emul_write;
3960
3961 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
3962 goto emul_write;
3963
3964 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3844{
3845 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3846 gpa_t gpa;
3847 int rc;
3848
3849 if (ops->read_write_prepare &&
3850 ops->read_write_prepare(vcpu, val, bytes))
3851 return X86EMUL_CONTINUE;

--- 88 unchanged lines hidden (view full) ---

3940 if (gpa == UNMAPPED_GVA ||
3941 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3942 goto emul_write;
3943
3944 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
3945 goto emul_write;
3946
3947 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3965 if (is_error_page(page)) {
3966 kvm_release_page_clean(page);
3948 if (is_error_page(page))
3967 goto emul_write;
3949 goto emul_write;
3968 }
3969
3970 kaddr = kmap_atomic(page);
3971 kaddr += offset_in_page(gpa);
3972 switch (bytes) {
3973 case 1:
3974 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
3975 break;
3976 case 2:

--- 350 unchanged lines hidden (view full) ---

4327}
4328
4329static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
4330 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
4331{
4332 kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
4333}
4334
3950
3951 kaddr = kmap_atomic(page);
3952 kaddr += offset_in_page(gpa);
3953 switch (bytes) {
3954 case 1:
3955 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
3956 break;
3957 case 2:

--- 350 unchanged lines hidden (view full) ---

4308}
4309
4310static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
4311 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
4312{
4313 kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
4314}
4315
4335static struct x86_emulate_ops emulate_ops = {
4316static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
4317{
4318 return kvm_register_read(emul_to_vcpu(ctxt), reg);
4319}
4320
4321static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
4322{
4323 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
4324}
4325
4326static const struct x86_emulate_ops emulate_ops = {
4327 .read_gpr = emulator_read_gpr,
4328 .write_gpr = emulator_write_gpr,
4336 .read_std = kvm_read_guest_virt_system,
4337 .write_std = kvm_write_guest_virt_system,
4338 .fetch = kvm_fetch_guest_virt,
4339 .read_emulated = emulator_read_emulated,
4340 .write_emulated = emulator_write_emulated,
4341 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4342 .invlpg = emulator_invlpg,
4343 .pio_in_emulated = emulator_pio_in_emulated,

--- 18 unchanged lines hidden (view full) ---

4362 .wbinvd = emulator_wbinvd,
4363 .fix_hypercall = emulator_fix_hypercall,
4364 .get_fpu = emulator_get_fpu,
4365 .put_fpu = emulator_put_fpu,
4366 .intercept = emulator_intercept,
4367 .get_cpuid = emulator_get_cpuid,
4368};
4369
4329 .read_std = kvm_read_guest_virt_system,
4330 .write_std = kvm_write_guest_virt_system,
4331 .fetch = kvm_fetch_guest_virt,
4332 .read_emulated = emulator_read_emulated,
4333 .write_emulated = emulator_write_emulated,
4334 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4335 .invlpg = emulator_invlpg,
4336 .pio_in_emulated = emulator_pio_in_emulated,

--- 18 unchanged lines hidden (view full) ---

4355 .wbinvd = emulator_wbinvd,
4356 .fix_hypercall = emulator_fix_hypercall,
4357 .get_fpu = emulator_get_fpu,
4358 .put_fpu = emulator_put_fpu,
4359 .intercept = emulator_intercept,
4360 .get_cpuid = emulator_get_cpuid,
4361};
4362
4370static void cache_all_regs(struct kvm_vcpu *vcpu)
4371{
4372 kvm_register_read(vcpu, VCPU_REGS_RAX);
4373 kvm_register_read(vcpu, VCPU_REGS_RSP);
4374 kvm_register_read(vcpu, VCPU_REGS_RIP);
4375 vcpu->arch.regs_dirty = ~0;
4376}
4377
4378static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4379{
4380 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
4381 /*
4382 * an sti; sti; sequence only disable interrupts for the first
4383 * instruction. So, if the last instruction, be it emulated or
4384 * not, left the system with the INT_STI flag enabled, it
4385 * means that the last instruction is an sti. We should not

--- 10 unchanged lines hidden (view full) ---

4396 kvm_propagate_fault(vcpu, &ctxt->exception);
4397 else if (ctxt->exception.error_code_valid)
4398 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4399 ctxt->exception.error_code);
4400 else
4401 kvm_queue_exception(vcpu, ctxt->exception.vector);
4402}
4403
4363static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4364{
4365 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
4366 /*
4367 * an sti; sti; sequence only disable interrupts for the first
4368 * instruction. So, if the last instruction, be it emulated or
4369 * not, left the system with the INT_STI flag enabled, it
4370 * means that the last instruction is an sti. We should not

--- 10 unchanged lines hidden (view full) ---

4381 kvm_propagate_fault(vcpu, &ctxt->exception);
4382 else if (ctxt->exception.error_code_valid)
4383 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4384 ctxt->exception.error_code);
4385 else
4386 kvm_queue_exception(vcpu, ctxt->exception.vector);
4387}
4388
4404static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
4405 const unsigned long *regs)
4389static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4406{
4407 memset(&ctxt->twobyte, 0,
4390{
4391 memset(&ctxt->twobyte, 0,
4408 (void *)&ctxt->regs - (void *)&ctxt->twobyte);
4409 memcpy(ctxt->regs, regs, sizeof(ctxt->regs));
4392 (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
4410
4411 ctxt->fetch.start = 0;
4412 ctxt->fetch.end = 0;
4413 ctxt->io_read.pos = 0;
4414 ctxt->io_read.end = 0;
4415 ctxt->mem_read.pos = 0;
4416 ctxt->mem_read.end = 0;
4417}
4418
4419static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4420{
4421 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4422 int cs_db, cs_l;
4423
4393
4394 ctxt->fetch.start = 0;
4395 ctxt->fetch.end = 0;
4396 ctxt->io_read.pos = 0;
4397 ctxt->io_read.end = 0;
4398 ctxt->mem_read.pos = 0;
4399 ctxt->mem_read.end = 0;
4400}
4401
4402static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4403{
4404 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4405 int cs_db, cs_l;
4406
4424 /*
4425 * TODO: fix emulate.c to use guest_read/write_register
4426 * instead of direct ->regs accesses, can save hundred cycles
4427 * on Intel for instructions that don't read/change RSP, for
4428 * for example.
4429 */
4430 cache_all_regs(vcpu);
4431
4432 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4433
4434 ctxt->eflags = kvm_get_rflags(vcpu);
4435 ctxt->eip = kvm_rip_read(vcpu);
4436 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4437 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4438 cs_l ? X86EMUL_MODE_PROT64 :
4439 cs_db ? X86EMUL_MODE_PROT32 :
4440 X86EMUL_MODE_PROT16;
4441 ctxt->guest_mode = is_guest_mode(vcpu);
4442
4407 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4408
4409 ctxt->eflags = kvm_get_rflags(vcpu);
4410 ctxt->eip = kvm_rip_read(vcpu);
4411 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4412 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4413 cs_l ? X86EMUL_MODE_PROT64 :
4414 cs_db ? X86EMUL_MODE_PROT32 :
4415 X86EMUL_MODE_PROT16;
4416 ctxt->guest_mode = is_guest_mode(vcpu);
4417
4443 init_decode_cache(ctxt, vcpu->arch.regs);
4418 init_decode_cache(ctxt);
4444 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4445}
4446
4447int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4448{
4449 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4450 int ret;
4451
4452 init_emulate_ctxt(vcpu);
4453
4454 ctxt->op_bytes = 2;
4455 ctxt->ad_bytes = 2;
4456 ctxt->_eip = ctxt->eip + inc_eip;
4457 ret = emulate_int_real(ctxt, irq);
4458
4459 if (ret != X86EMUL_CONTINUE)
4460 return EMULATE_FAIL;
4461
4462 ctxt->eip = ctxt->_eip;
4419 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4420}
4421
4422int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4423{
4424 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4425 int ret;
4426
4427 init_emulate_ctxt(vcpu);
4428
4429 ctxt->op_bytes = 2;
4430 ctxt->ad_bytes = 2;
4431 ctxt->_eip = ctxt->eip + inc_eip;
4432 ret = emulate_int_real(ctxt, irq);
4433
4434 if (ret != X86EMUL_CONTINUE)
4435 return EMULATE_FAIL;
4436
4437 ctxt->eip = ctxt->_eip;
4463 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
4464 kvm_rip_write(vcpu, ctxt->eip);
4465 kvm_set_rflags(vcpu, ctxt->eflags);
4466
4467 if (irq == NMI_VECTOR)
4468 vcpu->arch.nmi_pending = 0;
4469 else
4470 vcpu->arch.interrupt.pending = false;
4471

--- 16 unchanged lines hidden (view full) ---

4488 kvm_queue_exception(vcpu, UD_VECTOR);
4489
4490 return r;
4491}
4492
4493static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4494{
4495 gpa_t gpa;
4438 kvm_rip_write(vcpu, ctxt->eip);
4439 kvm_set_rflags(vcpu, ctxt->eflags);
4440
4441 if (irq == NMI_VECTOR)
4442 vcpu->arch.nmi_pending = 0;
4443 else
4444 vcpu->arch.interrupt.pending = false;
4445

--- 16 unchanged lines hidden (view full) ---

4462 kvm_queue_exception(vcpu, UD_VECTOR);
4463
4464 return r;
4465}
4466
4467static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4468{
4469 gpa_t gpa;
4470 pfn_t pfn;
4496
4497 if (tdp_enabled)
4498 return false;
4499
4500 /*
4501 * if emulation was due to access to shadowed page table
4471
4472 if (tdp_enabled)
4473 return false;
4474
4475 /*
4476 * if emulation was due to access to shadowed page table
4502 * and it failed try to unshadow page and re-entetr the
4477 * and it failed try to unshadow page and re-enter the
4503 * guest to let CPU execute the instruction.
4504 */
4505 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4506 return true;
4507
4508 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4509
4510 if (gpa == UNMAPPED_GVA)
4511 return true; /* let cpu generate fault */
4512
4478 * guest to let CPU execute the instruction.
4479 */
4480 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4481 return true;
4482
4483 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4484
4485 if (gpa == UNMAPPED_GVA)
4486 return true; /* let cpu generate fault */
4487
4513 if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
4488 /*
4489 * Do not retry the unhandleable instruction if it faults on the
4490 * readonly host memory, otherwise it will goto a infinite loop:
4491 * retry instruction -> write #PF -> emulation fail -> retry
4492 * instruction -> ...
4493 */
4494 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4495 if (!is_error_pfn(pfn)) {
4496 kvm_release_pfn_clean(pfn);
4514 return true;
4497 return true;
4498 }
4515
4516 return false;
4517}
4518
4519static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4520 unsigned long cr2, int emulation_type)
4521{
4522 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);

--- 80 unchanged lines hidden (view full) ---

4603
4604 if (retry_instruction(ctxt, cr2, emulation_type))
4605 return EMULATE_DONE;
4606
4607 /* this is needed for vmware backdoor interface to work since it
4608 changes registers values during IO operation */
4609 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
4610 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4499
4500 return false;
4501}
4502
4503static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4504 unsigned long cr2, int emulation_type)
4505{
4506 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);

--- 80 unchanged lines hidden (view full) ---

4587
4588 if (retry_instruction(ctxt, cr2, emulation_type))
4589 return EMULATE_DONE;
4590
4591 /* this is needed for vmware backdoor interface to work since it
4592 changes registers values during IO operation */
4593 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
4594 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4611 memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
4595 emulator_invalidate_register_cache(ctxt);
4612 }
4613
4614restart:
4615 r = x86_emulate_insn(ctxt);
4616
4617 if (r == EMULATION_INTERCEPTED)
4618 return EMULATE_DONE;
4619

--- 21 unchanged lines hidden (view full) ---

4641 goto restart;
4642 else
4643 r = EMULATE_DONE;
4644
4645 if (writeback) {
4646 toggle_interruptibility(vcpu, ctxt->interruptibility);
4647 kvm_set_rflags(vcpu, ctxt->eflags);
4648 kvm_make_request(KVM_REQ_EVENT, vcpu);
4596 }
4597
4598restart:
4599 r = x86_emulate_insn(ctxt);
4600
4601 if (r == EMULATION_INTERCEPTED)
4602 return EMULATE_DONE;
4603

--- 21 unchanged lines hidden (view full) ---

4625 goto restart;
4626 else
4627 r = EMULATE_DONE;
4628
4629 if (writeback) {
4630 toggle_interruptibility(vcpu, ctxt->interruptibility);
4631 kvm_set_rflags(vcpu, ctxt->eflags);
4632 kvm_make_request(KVM_REQ_EVENT, vcpu);
4649 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
4650 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
4651 kvm_rip_write(vcpu, ctxt->eip);
4652 } else
4653 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
4654
4655 return r;
4656}
4657EXPORT_SYMBOL_GPL(x86_emulate_instruction);

--- 266 unchanged lines hidden (view full) ---

4924
4925 kvm_timer_init();
4926
4927 perf_register_guest_info_callbacks(&kvm_guest_cbs);
4928
4929 if (cpu_has_xsave)
4930 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
4931
4633 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
4634 kvm_rip_write(vcpu, ctxt->eip);
4635 } else
4636 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
4637
4638 return r;
4639}
4640EXPORT_SYMBOL_GPL(x86_emulate_instruction);

--- 266 unchanged lines hidden (view full) ---

4907
4908 kvm_timer_init();
4909
4910 perf_register_guest_info_callbacks(&kvm_guest_cbs);
4911
4912 if (cpu_has_xsave)
4913 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
4914
4915 kvm_lapic_init();
4932 return 0;
4933
4934out:
4935 return r;
4936}
4937
4938void kvm_arch_exit(void)
4939{

--- 650 unchanged lines hidden (view full) ---

5590}
5591
5592int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5593{
5594 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
5595 /*
5596 * We are here if userspace calls get_regs() in the middle of
5597 * instruction emulation. Registers state needs to be copied
4916 return 0;
4917
4918out:
4919 return r;
4920}
4921
4922void kvm_arch_exit(void)
4923{

--- 650 unchanged lines hidden (view full) ---

5574}
5575
5576int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5577{
5578 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
5579 /*
5580 * We are here if userspace calls get_regs() in the middle of
5581 * instruction emulation. Registers state needs to be copied
5598 * back from emulation context to vcpu. Usrapace shouldn't do
5582 * back from emulation context to vcpu. Userspace shouldn't do
5599 * that usually, but some bad designed PV devices (vmware
5600 * backdoor interface) need this to work
5601 */
5583 * that usually, but some bad designed PV devices (vmware
5584 * backdoor interface) need this to work
5585 */
5602 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5603 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
5586 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
5604 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5605 }
5606 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5607 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5608 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
5609 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
5610 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
5611 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);

--- 123 unchanged lines hidden (view full) ---

5735 init_emulate_ctxt(vcpu);
5736
5737 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
5738 has_error_code, error_code);
5739
5740 if (ret)
5741 return EMULATE_FAIL;
5742
5587 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5588 }
5589 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5590 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5591 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
5592 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
5593 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
5594 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);

--- 123 unchanged lines hidden (view full) ---

5718 init_emulate_ctxt(vcpu);
5719
5720 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
5721 has_error_code, error_code);
5722
5723 if (ret)
5724 return EMULATE_FAIL;
5725
5743 memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
5744 kvm_rip_write(vcpu, ctxt->eip);
5745 kvm_set_rflags(vcpu, ctxt->eflags);
5746 kvm_make_request(KVM_REQ_EVENT, vcpu);
5747 return EMULATE_DONE;
5748}
5749EXPORT_SYMBOL_GPL(kvm_task_switch);
5750
5751int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,

--- 367 unchanged lines hidden (view full) ---

6119 * N.B. - this code below runs only on platforms with reliable TSC,
6120 * as that is the only way backwards_tsc is set above. Also note
6121 * that this runs for ALL vcpus, which is not a bug; all VCPUs should
6122 * have the same delta_cyc adjustment applied if backwards_tsc
6123 * is detected. Note further, this adjustment is only done once,
6124 * as we reset last_host_tsc on all VCPUs to stop this from being
6125 * called multiple times (one for each physical CPU bringup).
6126 *
5726 kvm_rip_write(vcpu, ctxt->eip);
5727 kvm_set_rflags(vcpu, ctxt->eflags);
5728 kvm_make_request(KVM_REQ_EVENT, vcpu);
5729 return EMULATE_DONE;
5730}
5731EXPORT_SYMBOL_GPL(kvm_task_switch);
5732
5733int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,

--- 367 unchanged lines hidden (view full) ---

6101 * N.B. - this code below runs only on platforms with reliable TSC,
6102 * as that is the only way backwards_tsc is set above. Also note
6103 * that this runs for ALL vcpus, which is not a bug; all VCPUs should
6104 * have the same delta_cyc adjustment applied if backwards_tsc
6105 * is detected. Note further, this adjustment is only done once,
6106 * as we reset last_host_tsc on all VCPUs to stop this from being
6107 * called multiple times (one for each physical CPU bringup).
6108 *
6127 * Platforms with unnreliable TSCs don't have to deal with this, they
6109 * Platforms with unreliable TSCs don't have to deal with this, they
6128 * will be compensated by the logic in vcpu_load, which sets the TSC to
6129 * catchup mode. This will catchup all VCPUs to real time, but cannot
6130 * guarantee that they stay in perfect synchronization.
6131 */
6132 if (backwards_tsc) {
6133 u64 delta_cyc = max_tsc - local_tsc;
6134 list_for_each_entry(kvm, &vm_list, vm_list) {
6135 kvm_for_each_vcpu(i, vcpu, kvm) {

--- 36 unchanged lines hidden (view full) ---

6172 kvm_x86_ops->check_processor_compatibility(rtn);
6173}
6174
6175bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
6176{
6177 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
6178}
6179
6110 * will be compensated by the logic in vcpu_load, which sets the TSC to
6111 * catchup mode. This will catchup all VCPUs to real time, but cannot
6112 * guarantee that they stay in perfect synchronization.
6113 */
6114 if (backwards_tsc) {
6115 u64 delta_cyc = max_tsc - local_tsc;
6116 list_for_each_entry(kvm, &vm_list, vm_list) {
6117 kvm_for_each_vcpu(i, vcpu, kvm) {

--- 36 unchanged lines hidden (view full) ---

6154 kvm_x86_ops->check_processor_compatibility(rtn);
6155}
6156
6157bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
6158{
6159 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
6160}
6161
6162struct static_key kvm_no_apic_vcpu __read_mostly;
6163
6180int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6181{
6182 struct page *page;
6183 struct kvm *kvm;
6184 int r;
6185
6186 BUG_ON(vcpu->kvm == NULL);
6187 kvm = vcpu->kvm;

--- 16 unchanged lines hidden (view full) ---

6204 r = kvm_mmu_create(vcpu);
6205 if (r < 0)
6206 goto fail_free_pio_data;
6207
6208 if (irqchip_in_kernel(kvm)) {
6209 r = kvm_create_lapic(vcpu);
6210 if (r < 0)
6211 goto fail_mmu_destroy;
6164int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6165{
6166 struct page *page;
6167 struct kvm *kvm;
6168 int r;
6169
6170 BUG_ON(vcpu->kvm == NULL);
6171 kvm = vcpu->kvm;

--- 16 unchanged lines hidden (view full) ---

6188 r = kvm_mmu_create(vcpu);
6189 if (r < 0)
6190 goto fail_free_pio_data;
6191
6192 if (irqchip_in_kernel(kvm)) {
6193 r = kvm_create_lapic(vcpu);
6194 if (r < 0)
6195 goto fail_mmu_destroy;
6212 }
6196 } else
6197 static_key_slow_inc(&kvm_no_apic_vcpu);
6213
6214 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
6215 GFP_KERNEL);
6216 if (!vcpu->arch.mce_banks) {
6217 r = -ENOMEM;
6218 goto fail_free_lapic;
6219 }
6220 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;

--- 23 unchanged lines hidden (view full) ---

6244
6245 kvm_pmu_destroy(vcpu);
6246 kfree(vcpu->arch.mce_banks);
6247 kvm_free_lapic(vcpu);
6248 idx = srcu_read_lock(&vcpu->kvm->srcu);
6249 kvm_mmu_destroy(vcpu);
6250 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6251 free_page((unsigned long)vcpu->arch.pio_data);
6198
6199 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
6200 GFP_KERNEL);
6201 if (!vcpu->arch.mce_banks) {
6202 r = -ENOMEM;
6203 goto fail_free_lapic;
6204 }
6205 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;

--- 23 unchanged lines hidden (view full) ---

6229
6230 kvm_pmu_destroy(vcpu);
6231 kfree(vcpu->arch.mce_banks);
6232 kvm_free_lapic(vcpu);
6233 idx = srcu_read_lock(&vcpu->kvm->srcu);
6234 kvm_mmu_destroy(vcpu);
6235 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6236 free_page((unsigned long)vcpu->arch.pio_data);
6237 if (!irqchip_in_kernel(vcpu->kvm))
6238 static_key_slow_dec(&kvm_no_apic_vcpu);
6252}
6253
6254int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6255{
6256 if (type)
6257 return -EINVAL;
6258
6259 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);

--- 55 unchanged lines hidden (view full) ---

6315 put_page(kvm->arch.ept_identity_pagetable);
6316}
6317
6318void kvm_arch_free_memslot(struct kvm_memory_slot *free,
6319 struct kvm_memory_slot *dont)
6320{
6321 int i;
6322
6239}
6240
6241int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6242{
6243 if (type)
6244 return -EINVAL;
6245
6246 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);

--- 55 unchanged lines hidden (view full) ---

6302 put_page(kvm->arch.ept_identity_pagetable);
6303}
6304
6305void kvm_arch_free_memslot(struct kvm_memory_slot *free,
6306 struct kvm_memory_slot *dont)
6307{
6308 int i;
6309
6323 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
6324 if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
6325 kvm_kvfree(free->arch.lpage_info[i]);
6326 free->arch.lpage_info[i] = NULL;
6310 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6311 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
6312 kvm_kvfree(free->arch.rmap[i]);
6313 free->arch.rmap[i] = NULL;
6327 }
6314 }
6315 if (i == 0)
6316 continue;
6317
6318 if (!dont || free->arch.lpage_info[i - 1] !=
6319 dont->arch.lpage_info[i - 1]) {
6320 kvm_kvfree(free->arch.lpage_info[i - 1]);
6321 free->arch.lpage_info[i - 1] = NULL;
6322 }
6328 }
6329}
6330
6331int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
6332{
6333 int i;
6334
6323 }
6324}
6325
6326int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
6327{
6328 int i;
6329
6335 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
6330 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6336 unsigned long ugfn;
6337 int lpages;
6331 unsigned long ugfn;
6332 int lpages;
6338 int level = i + 2;
6333 int level = i + 1;
6339
6340 lpages = gfn_to_index(slot->base_gfn + npages - 1,
6341 slot->base_gfn, level) + 1;
6342
6334
6335 lpages = gfn_to_index(slot->base_gfn + npages - 1,
6336 slot->base_gfn, level) + 1;
6337
6343 slot->arch.lpage_info[i] =
6344 kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
6345 if (!slot->arch.lpage_info[i])
6338 slot->arch.rmap[i] =
6339 kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
6340 if (!slot->arch.rmap[i])
6346 goto out_free;
6341 goto out_free;
6342 if (i == 0)
6343 continue;
6347
6344
6345 slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
6346 sizeof(*slot->arch.lpage_info[i - 1]));
6347 if (!slot->arch.lpage_info[i - 1])
6348 goto out_free;
6349
6348 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
6350 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
6349 slot->arch.lpage_info[i][0].write_count = 1;
6351 slot->arch.lpage_info[i - 1][0].write_count = 1;
6350 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
6352 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
6351 slot->arch.lpage_info[i][lpages - 1].write_count = 1;
6353 slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
6352 ugfn = slot->userspace_addr >> PAGE_SHIFT;
6353 /*
6354 * If the gfn and userspace address are not aligned wrt each
6355 * other, or if explicitly asked to, disable large page
6356 * support for this slot
6357 */
6358 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
6359 !kvm_largepages_enabled()) {
6360 unsigned long j;
6361
6362 for (j = 0; j < lpages; ++j)
6354 ugfn = slot->userspace_addr >> PAGE_SHIFT;
6355 /*
6356 * If the gfn and userspace address are not aligned wrt each
6357 * other, or if explicitly asked to, disable large page
6358 * support for this slot
6359 */
6360 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
6361 !kvm_largepages_enabled()) {
6362 unsigned long j;
6363
6364 for (j = 0; j < lpages; ++j)
6363 slot->arch.lpage_info[i][j].write_count = 1;
6365 slot->arch.lpage_info[i - 1][j].write_count = 1;
6364 }
6365 }
6366
6367 return 0;
6368
6369out_free:
6366 }
6367 }
6368
6369 return 0;
6370
6371out_free:
6370 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
6371 kvm_kvfree(slot->arch.lpage_info[i]);
6372 slot->arch.lpage_info[i] = NULL;
6372 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6373 kvm_kvfree(slot->arch.rmap[i]);
6374 slot->arch.rmap[i] = NULL;
6375 if (i == 0)
6376 continue;
6377
6378 kvm_kvfree(slot->arch.lpage_info[i - 1]);
6379 slot->arch.lpage_info[i - 1] = NULL;
6373 }
6374 return -ENOMEM;
6375}
6376
6377int kvm_arch_prepare_memory_region(struct kvm *kvm,
6378 struct kvm_memory_slot *memslot,
6379 struct kvm_memory_slot old,
6380 struct kvm_userspace_memory_region *mem,
6381 int user_alloc)
6382{
6383 int npages = memslot->npages;
6384 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6385
6386 /* Prevent internal slot pages from being moved by fork()/COW. */
6387 if (memslot->id >= KVM_MEMORY_SLOTS)
6388 map_flags = MAP_SHARED | MAP_ANONYMOUS;
6389
6390 /*To keep backward compatibility with older userspace,
6380 }
6381 return -ENOMEM;
6382}
6383
6384int kvm_arch_prepare_memory_region(struct kvm *kvm,
6385 struct kvm_memory_slot *memslot,
6386 struct kvm_memory_slot old,
6387 struct kvm_userspace_memory_region *mem,
6388 int user_alloc)
6389{
6390 int npages = memslot->npages;
6391 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6392
6393 /* Prevent internal slot pages from being moved by fork()/COW. */
6394 if (memslot->id >= KVM_MEMORY_SLOTS)
6395 map_flags = MAP_SHARED | MAP_ANONYMOUS;
6396
6397 /*To keep backward compatibility with older userspace,
6391 *x86 needs to hanlde !user_alloc case.
6398 *x86 needs to handle !user_alloc case.
6392 */
6393 if (!user_alloc) {
6399 */
6400 if (!user_alloc) {
6394 if (npages && !old.rmap) {
6401 if (npages && !old.npages) {
6395 unsigned long userspace_addr;
6396
6397 userspace_addr = vm_mmap(NULL, 0,
6398 npages * PAGE_SIZE,
6399 PROT_READ | PROT_WRITE,
6400 map_flags,
6401 0);
6402

--- 11 unchanged lines hidden (view full) ---

6414void kvm_arch_commit_memory_region(struct kvm *kvm,
6415 struct kvm_userspace_memory_region *mem,
6416 struct kvm_memory_slot old,
6417 int user_alloc)
6418{
6419
6420 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6421
6402 unsigned long userspace_addr;
6403
6404 userspace_addr = vm_mmap(NULL, 0,
6405 npages * PAGE_SIZE,
6406 PROT_READ | PROT_WRITE,
6407 map_flags,
6408 0);
6409

--- 11 unchanged lines hidden (view full) ---

6421void kvm_arch_commit_memory_region(struct kvm *kvm,
6422 struct kvm_userspace_memory_region *mem,
6423 struct kvm_memory_slot old,
6424 int user_alloc)
6425{
6426
6427 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6428
6422 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
6429 if (!user_alloc && !old.user_alloc && old.npages && !npages) {
6423 int ret;
6424
6425 ret = vm_munmap(old.userspace_addr,
6426 old.npages * PAGE_SIZE);
6427 if (ret < 0)
6428 printk(KERN_WARNING
6429 "kvm_vm_ioctl_set_memory_region: "
6430 "failed to munmap memory\n");

--- 4 unchanged lines hidden (view full) ---

6435
6436 spin_lock(&kvm->mmu_lock);
6437 if (nr_mmu_pages)
6438 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6439 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6440 spin_unlock(&kvm->mmu_lock);
6441}
6442
6430 int ret;
6431
6432 ret = vm_munmap(old.userspace_addr,
6433 old.npages * PAGE_SIZE);
6434 if (ret < 0)
6435 printk(KERN_WARNING
6436 "kvm_vm_ioctl_set_memory_region: "
6437 "failed to munmap memory\n");

--- 4 unchanged lines hidden (view full) ---

6442
6443 spin_lock(&kvm->mmu_lock);
6444 if (nr_mmu_pages)
6445 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6446 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6447 spin_unlock(&kvm->mmu_lock);
6448}
6449
6443void kvm_arch_flush_shadow(struct kvm *kvm)
6450void kvm_arch_flush_shadow_all(struct kvm *kvm)
6444{
6445 kvm_mmu_zap_all(kvm);
6446 kvm_reload_remote_mmus(kvm);
6447}
6448
6451{
6452 kvm_mmu_zap_all(kvm);
6453 kvm_reload_remote_mmus(kvm);
6454}
6455
6456void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
6457 struct kvm_memory_slot *slot)
6458{
6459 kvm_arch_flush_shadow_all(kvm);
6460}
6461
6449int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6450{
6451 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6452 !vcpu->arch.apf.halted)
6453 || !list_empty_careful(&vcpu->async_pf.done)
6454 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
6455 || atomic_read(&vcpu->arch.nmi_queued) ||
6456 (kvm_arch_interrupt_allowed(vcpu) &&

--- 197 unchanged lines hidden ---
6462int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6463{
6464 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6465 !vcpu->arch.apf.halted)
6466 || !list_empty_careful(&vcpu->async_pf.done)
6467 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
6468 || atomic_read(&vcpu->arch.nmi_queued) ||
6469 (kvm_arch_interrupt_allowed(vcpu) &&

--- 197 unchanged lines hidden ---