1 /* 2 * QEMU KVM support, paravirtual clock device 3 * 4 * Copyright (C) 2011 Siemens AG 5 * 6 * Authors: 7 * Jan Kiszka <jan.kiszka@siemens.com> 8 * 9 * This work is licensed under the terms of the GNU GPL version 2. 10 * See the COPYING file in the top-level directory. 11 * 12 * Contributions after 2012-01-13 are licensed under the terms of the 13 * GNU GPL, version 2 or (at your option) any later version. 14 */ 15 16 #include "qemu-common.h" 17 #include "qemu/host-utils.h" 18 #include "sysemu/sysemu.h" 19 #include "sysemu/kvm.h" 20 #include "sysemu/cpus.h" 21 #include "hw/sysbus.h" 22 #include "hw/kvm/clock.h" 23 24 #include <linux/kvm.h> 25 #include <linux/kvm_para.h> 26 27 #define TYPE_KVM_CLOCK "kvmclock" 28 #define KVM_CLOCK(obj) OBJECT_CHECK(KVMClockState, (obj), TYPE_KVM_CLOCK) 29 30 typedef struct KVMClockState { 31 /*< private >*/ 32 SysBusDevice busdev; 33 /*< public >*/ 34 35 uint64_t clock; 36 bool clock_valid; 37 } KVMClockState; 38 39 struct pvclock_vcpu_time_info { 40 uint32_t version; 41 uint32_t pad0; 42 uint64_t tsc_timestamp; 43 uint64_t system_time; 44 uint32_t tsc_to_system_mul; 45 int8_t tsc_shift; 46 uint8_t flags; 47 uint8_t pad[2]; 48 } __attribute__((__packed__)); /* 32 bytes */ 49 50 static uint64_t kvmclock_current_nsec(KVMClockState *s) 51 { 52 CPUState *cpu = first_cpu; 53 CPUX86State *env = cpu->env_ptr; 54 hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL; 55 uint64_t migration_tsc = env->tsc; 56 struct pvclock_vcpu_time_info time; 57 uint64_t delta; 58 uint64_t nsec_lo; 59 uint64_t nsec_hi; 60 uint64_t nsec; 61 62 if (!(env->system_time_msr & 1ULL)) { 63 /* KVM clock not active */ 64 return 0; 65 } 66 67 cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time)); 68 69 assert(time.tsc_timestamp <= migration_tsc); 70 delta = migration_tsc - time.tsc_timestamp; 71 if (time.tsc_shift < 0) { 72 delta >>= -time.tsc_shift; 73 } else { 74 delta <<= time.tsc_shift; 75 } 76 77 mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul); 78 nsec = (nsec_lo >> 32) | (nsec_hi << 32); 79 return nsec + time.system_time; 80 } 81 82 static void kvmclock_vm_state_change(void *opaque, int running, 83 RunState state) 84 { 85 KVMClockState *s = opaque; 86 CPUState *cpu; 87 int cap_clock_ctrl = kvm_check_extension(kvm_state, KVM_CAP_KVMCLOCK_CTRL); 88 int ret; 89 90 if (running) { 91 struct kvm_clock_data data = {}; 92 uint64_t time_at_migration = kvmclock_current_nsec(s); 93 94 s->clock_valid = false; 95 96 /* We can't rely on the migrated clock value, just discard it */ 97 if (time_at_migration) { 98 s->clock = time_at_migration; 99 } 100 101 data.clock = s->clock; 102 ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); 103 if (ret < 0) { 104 fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(ret)); 105 abort(); 106 } 107 108 if (!cap_clock_ctrl) { 109 return; 110 } 111 CPU_FOREACH(cpu) { 112 ret = kvm_vcpu_ioctl(cpu, KVM_KVMCLOCK_CTRL, 0); 113 if (ret) { 114 if (ret != -EINVAL) { 115 fprintf(stderr, "%s: %s\n", __func__, strerror(-ret)); 116 } 117 return; 118 } 119 } 120 } else { 121 struct kvm_clock_data data; 122 int ret; 123 124 if (s->clock_valid) { 125 return; 126 } 127 128 cpu_synchronize_all_states(); 129 /* In theory, the cpu_synchronize_all_states() call above wouldn't 130 * affect the rest of the code, as the VCPU state inside CPUState 131 * is supposed to always match the VCPU state on the kernel side. 132 * 133 * In practice, calling cpu_synchronize_state() too soon will load the 134 * kernel-side APIC state into X86CPU.apic_state too early, APIC state 135 * won't be reloaded later because CPUState.vcpu_dirty==true, and 136 * outdated APIC state may be migrated to another host. 137 * 138 * The real fix would be to make sure outdated APIC state is read 139 * from the kernel again when necessary. While this is not fixed, we 140 * need the cpu_clean_all_dirty() call below. 141 */ 142 cpu_clean_all_dirty(); 143 144 ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); 145 if (ret < 0) { 146 fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); 147 abort(); 148 } 149 s->clock = data.clock; 150 151 /* 152 * If the VM is stopped, declare the clock state valid to 153 * avoid re-reading it on next vmsave (which would return 154 * a different value). Will be reset when the VM is continued. 155 */ 156 s->clock_valid = true; 157 } 158 } 159 160 static void kvmclock_realize(DeviceState *dev, Error **errp) 161 { 162 KVMClockState *s = KVM_CLOCK(dev); 163 164 qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); 165 } 166 167 static const VMStateDescription kvmclock_vmsd = { 168 .name = "kvmclock", 169 .version_id = 1, 170 .minimum_version_id = 1, 171 .fields = (VMStateField[]) { 172 VMSTATE_UINT64(clock, KVMClockState), 173 VMSTATE_END_OF_LIST() 174 } 175 }; 176 177 static void kvmclock_class_init(ObjectClass *klass, void *data) 178 { 179 DeviceClass *dc = DEVICE_CLASS(klass); 180 181 dc->realize = kvmclock_realize; 182 dc->vmsd = &kvmclock_vmsd; 183 } 184 185 static const TypeInfo kvmclock_info = { 186 .name = TYPE_KVM_CLOCK, 187 .parent = TYPE_SYS_BUS_DEVICE, 188 .instance_size = sizeof(KVMClockState), 189 .class_init = kvmclock_class_init, 190 }; 191 192 /* Note: Must be called after VCPU initialization. */ 193 void kvmclock_create(void) 194 { 195 X86CPU *cpu = X86_CPU(first_cpu); 196 197 if (kvm_enabled() && 198 cpu->env.features[FEAT_KVM] & ((1ULL << KVM_FEATURE_CLOCKSOURCE) | 199 (1ULL << KVM_FEATURE_CLOCKSOURCE2))) { 200 sysbus_create_simple(TYPE_KVM_CLOCK, -1, NULL); 201 } 202 } 203 204 static void kvmclock_register_types(void) 205 { 206 type_register_static(&kvmclock_info); 207 } 208 209 type_init(kvmclock_register_types) 210