1 /* 2 * QEMU KVM support 3 * 4 * Copyright IBM, Corp. 2008 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 * 12 */ 13 14 #ifndef QEMU_KVM_H 15 #define QEMU_KVM_H 16 17 #include "qemu/accel.h" 18 #include "qom/object.h" 19 20 #ifdef NEED_CPU_H 21 # ifdef CONFIG_KVM 22 # include <linux/kvm.h> 23 # define CONFIG_KVM_IS_POSSIBLE 24 # endif 25 #else 26 # define CONFIG_KVM_IS_POSSIBLE 27 #endif 28 29 #ifdef CONFIG_KVM_IS_POSSIBLE 30 31 extern bool kvm_allowed; 32 extern bool kvm_kernel_irqchip; 33 extern bool kvm_split_irqchip; 34 extern bool kvm_async_interrupts_allowed; 35 extern bool kvm_halt_in_kernel_allowed; 36 extern bool kvm_eventfds_allowed; 37 extern bool kvm_irqfds_allowed; 38 extern bool kvm_resamplefds_allowed; 39 extern bool kvm_msi_via_irqfd_allowed; 40 extern bool kvm_gsi_routing_allowed; 41 extern bool kvm_gsi_direct_mapping; 42 extern bool kvm_readonly_mem_allowed; 43 extern bool kvm_direct_msi_allowed; 44 extern bool kvm_ioeventfd_any_length_allowed; 45 extern bool kvm_msi_use_devid; 46 47 #define kvm_enabled() (kvm_allowed) 48 /** 49 * kvm_irqchip_in_kernel: 50 * 51 * Returns: true if an in-kernel irqchip was created. 52 * What this actually means is architecture and machine model 53 * specific: on PC, for instance, it means that the LAPIC 54 * is in kernel. This function should never be used from generic 55 * target-independent code: use one of the following functions or 56 * some other specific check instead. 57 */ 58 #define kvm_irqchip_in_kernel() (kvm_kernel_irqchip) 59 60 /** 61 * kvm_irqchip_is_split: 62 * 63 * Returns: true if the irqchip implementation is split between 64 * user and kernel space. The details are architecture and 65 * machine specific. On PC, it means that the PIC, IOAPIC, and 66 * PIT are in user space while the LAPIC is in the kernel. 67 */ 68 #define kvm_irqchip_is_split() (kvm_split_irqchip) 69 70 /** 71 * kvm_async_interrupts_enabled: 72 * 73 * Returns: true if we can deliver interrupts to KVM 74 * asynchronously (ie by ioctl from any thread at any time) 75 * rather than having to do interrupt delivery synchronously 76 * (where the vcpu must be stopped at a suitable point first). 77 */ 78 #define kvm_async_interrupts_enabled() (kvm_async_interrupts_allowed) 79 80 /** 81 * kvm_halt_in_kernel 82 * 83 * Returns: true if halted cpus should still get a KVM_RUN ioctl to run 84 * inside of kernel space. This only works if MP state is implemented. 85 */ 86 #define kvm_halt_in_kernel() (kvm_halt_in_kernel_allowed) 87 88 /** 89 * kvm_eventfds_enabled: 90 * 91 * Returns: true if we can use eventfds to receive notifications 92 * from a KVM CPU (ie the kernel supports eventds and we are running 93 * with a configuration where it is meaningful to use them). 94 */ 95 #define kvm_eventfds_enabled() (kvm_eventfds_allowed) 96 97 /** 98 * kvm_irqfds_enabled: 99 * 100 * Returns: true if we can use irqfds to inject interrupts into 101 * a KVM CPU (ie the kernel supports irqfds and we are running 102 * with a configuration where it is meaningful to use them). 103 */ 104 #define kvm_irqfds_enabled() (kvm_irqfds_allowed) 105 106 /** 107 * kvm_resamplefds_enabled: 108 * 109 * Returns: true if we can use resamplefds to inject interrupts into 110 * a KVM CPU (ie the kernel supports resamplefds and we are running 111 * with a configuration where it is meaningful to use them). 112 */ 113 #define kvm_resamplefds_enabled() (kvm_resamplefds_allowed) 114 115 /** 116 * kvm_msi_via_irqfd_enabled: 117 * 118 * Returns: true if we can route a PCI MSI (Message Signaled Interrupt) 119 * to a KVM CPU via an irqfd. This requires that the kernel supports 120 * this and that we're running in a configuration that permits it. 121 */ 122 #define kvm_msi_via_irqfd_enabled() (kvm_msi_via_irqfd_allowed) 123 124 /** 125 * kvm_gsi_routing_enabled: 126 * 127 * Returns: true if GSI routing is enabled (ie the kernel supports 128 * it and we're running in a configuration that permits it). 129 */ 130 #define kvm_gsi_routing_enabled() (kvm_gsi_routing_allowed) 131 132 /** 133 * kvm_gsi_direct_mapping: 134 * 135 * Returns: true if GSI direct mapping is enabled. 136 */ 137 #define kvm_gsi_direct_mapping() (kvm_gsi_direct_mapping) 138 139 /** 140 * kvm_readonly_mem_enabled: 141 * 142 * Returns: true if KVM readonly memory is enabled (ie the kernel 143 * supports it and we're running in a configuration that permits it). 144 */ 145 #define kvm_readonly_mem_enabled() (kvm_readonly_mem_allowed) 146 147 /** 148 * kvm_direct_msi_enabled: 149 * 150 * Returns: true if KVM allows direct MSI injection. 151 */ 152 #define kvm_direct_msi_enabled() (kvm_direct_msi_allowed) 153 154 /** 155 * kvm_ioeventfd_any_length_enabled: 156 * Returns: true if KVM allows any length io eventfd. 157 */ 158 #define kvm_ioeventfd_any_length_enabled() (kvm_ioeventfd_any_length_allowed) 159 160 /** 161 * kvm_msi_devid_required: 162 * Returns: true if KVM requires a device id to be provided while 163 * defining an MSI routing entry. 164 */ 165 #define kvm_msi_devid_required() (kvm_msi_use_devid) 166 167 #else 168 169 #define kvm_enabled() (0) 170 #define kvm_irqchip_in_kernel() (false) 171 #define kvm_irqchip_is_split() (false) 172 #define kvm_async_interrupts_enabled() (false) 173 #define kvm_halt_in_kernel() (false) 174 #define kvm_eventfds_enabled() (false) 175 #define kvm_irqfds_enabled() (false) 176 #define kvm_resamplefds_enabled() (false) 177 #define kvm_msi_via_irqfd_enabled() (false) 178 #define kvm_gsi_routing_allowed() (false) 179 #define kvm_gsi_direct_mapping() (false) 180 #define kvm_readonly_mem_enabled() (false) 181 #define kvm_direct_msi_enabled() (false) 182 #define kvm_ioeventfd_any_length_enabled() (false) 183 #define kvm_msi_devid_required() (false) 184 185 #endif /* CONFIG_KVM_IS_POSSIBLE */ 186 187 struct kvm_run; 188 struct kvm_lapic_state; 189 struct kvm_irq_routing_entry; 190 191 typedef struct KVMCapabilityInfo { 192 const char *name; 193 int value; 194 } KVMCapabilityInfo; 195 196 #define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP } 197 #define KVM_CAP_LAST_INFO { NULL, 0 } 198 199 struct KVMState; 200 201 #define TYPE_KVM_ACCEL ACCEL_CLASS_NAME("kvm") 202 typedef struct KVMState KVMState; 203 DECLARE_INSTANCE_CHECKER(KVMState, KVM_STATE, 204 TYPE_KVM_ACCEL) 205 206 extern KVMState *kvm_state; 207 typedef struct Notifier Notifier; 208 209 typedef struct KVMRouteChange { 210 KVMState *s; 211 int changes; 212 } KVMRouteChange; 213 214 /* external API */ 215 216 bool kvm_has_free_slot(MachineState *ms); 217 bool kvm_has_sync_mmu(void); 218 int kvm_has_vcpu_events(void); 219 int kvm_has_robust_singlestep(void); 220 int kvm_has_debugregs(void); 221 int kvm_max_nested_state_length(void); 222 int kvm_has_pit_state2(void); 223 int kvm_has_many_ioeventfds(void); 224 int kvm_has_gsi_routing(void); 225 int kvm_has_intx_set_mask(void); 226 227 /** 228 * kvm_arm_supports_user_irq 229 * 230 * Not all KVM implementations support notifications for kernel generated 231 * interrupt events to user space. This function indicates whether the current 232 * KVM implementation does support them. 233 * 234 * Returns: true if KVM supports using kernel generated IRQs from user space 235 */ 236 bool kvm_arm_supports_user_irq(void); 237 238 239 int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); 240 int kvm_on_sigbus(int code, void *addr); 241 242 #ifdef NEED_CPU_H 243 #include "cpu.h" 244 245 void kvm_flush_coalesced_mmio_buffer(void); 246 247 /** 248 * kvm_update_guest_debug(): ensure KVM debug structures updated 249 * @cs: the CPUState for this cpu 250 * @reinject_trap: KVM trap injection control 251 * 252 * There are usually per-arch specifics which will be handled by 253 * calling down to kvm_arch_update_guest_debug after the generic 254 * fields have been set. 255 */ 256 #ifdef KVM_CAP_SET_GUEST_DEBUG 257 int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap); 258 #else 259 static inline int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap) 260 { 261 return -EINVAL; 262 } 263 #endif 264 265 /* internal API */ 266 267 int kvm_ioctl(KVMState *s, int type, ...); 268 269 int kvm_vm_ioctl(KVMState *s, int type, ...); 270 271 int kvm_vcpu_ioctl(CPUState *cpu, int type, ...); 272 273 /** 274 * kvm_device_ioctl - call an ioctl on a kvm device 275 * @fd: The KVM device file descriptor as returned from KVM_CREATE_DEVICE 276 * @type: The device-ctrl ioctl number 277 * 278 * Returns: -errno on error, nonnegative on success 279 */ 280 int kvm_device_ioctl(int fd, int type, ...); 281 282 /** 283 * kvm_vm_check_attr - check for existence of a specific vm attribute 284 * @s: The KVMState pointer 285 * @group: the group 286 * @attr: the attribute of that group to query for 287 * 288 * Returns: 1 if the attribute exists 289 * 0 if the attribute either does not exist or if the vm device 290 * interface is unavailable 291 */ 292 int kvm_vm_check_attr(KVMState *s, uint32_t group, uint64_t attr); 293 294 /** 295 * kvm_device_check_attr - check for existence of a specific device attribute 296 * @fd: The device file descriptor 297 * @group: the group 298 * @attr: the attribute of that group to query for 299 * 300 * Returns: 1 if the attribute exists 301 * 0 if the attribute either does not exist or if the vm device 302 * interface is unavailable 303 */ 304 int kvm_device_check_attr(int fd, uint32_t group, uint64_t attr); 305 306 /** 307 * kvm_device_access - set or get value of a specific device attribute 308 * @fd: The device file descriptor 309 * @group: the group 310 * @attr: the attribute of that group to set or get 311 * @val: pointer to a storage area for the value 312 * @write: true for set and false for get operation 313 * @errp: error object handle 314 * 315 * Returns: 0 on success 316 * < 0 on error 317 * Use kvm_device_check_attr() in order to check for the availability 318 * of optional attributes. 319 */ 320 int kvm_device_access(int fd, int group, uint64_t attr, 321 void *val, bool write, Error **errp); 322 323 /** 324 * kvm_create_device - create a KVM device for the device control API 325 * @KVMState: The KVMState pointer 326 * @type: The KVM device type (see Documentation/virtual/kvm/devices in the 327 * kernel source) 328 * @test: If true, only test if device can be created, but don't actually 329 * create the device. 330 * 331 * Returns: -errno on error, nonnegative on success: @test ? 0 : device fd; 332 */ 333 int kvm_create_device(KVMState *s, uint64_t type, bool test); 334 335 /** 336 * kvm_device_supported - probe whether KVM supports specific device 337 * 338 * @vmfd: The fd handler for VM 339 * @type: type of device 340 * 341 * @return: true if supported, otherwise false. 342 */ 343 bool kvm_device_supported(int vmfd, uint64_t type); 344 345 /* Arch specific hooks */ 346 347 extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; 348 349 void kvm_arch_accel_class_init(ObjectClass *oc); 350 351 void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run); 352 MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run); 353 354 int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run); 355 356 int kvm_arch_process_async_events(CPUState *cpu); 357 358 int kvm_arch_get_registers(CPUState *cpu); 359 360 /* state subset only touched by the VCPU itself during runtime */ 361 #define KVM_PUT_RUNTIME_STATE 1 362 /* state subset modified during VCPU reset */ 363 #define KVM_PUT_RESET_STATE 2 364 /* full state set, modified during initialization or on vmload */ 365 #define KVM_PUT_FULL_STATE 3 366 367 int kvm_arch_put_registers(CPUState *cpu, int level); 368 369 int kvm_arch_init(MachineState *ms, KVMState *s); 370 371 int kvm_arch_init_vcpu(CPUState *cpu); 372 int kvm_arch_destroy_vcpu(CPUState *cpu); 373 374 bool kvm_vcpu_id_is_valid(int vcpu_id); 375 376 /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */ 377 unsigned long kvm_arch_vcpu_id(CPUState *cpu); 378 379 #ifdef KVM_HAVE_MCE_INJECTION 380 void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); 381 #endif 382 383 void kvm_arch_init_irq_routing(KVMState *s); 384 385 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 386 uint64_t address, uint32_t data, PCIDevice *dev); 387 388 /* Notify arch about newly added MSI routes */ 389 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 390 int vector, PCIDevice *dev); 391 /* Notify arch about released MSI routes */ 392 int kvm_arch_release_virq_post(int virq); 393 394 int kvm_arch_msi_data_to_gsi(uint32_t data); 395 396 int kvm_set_irq(KVMState *s, int irq, int level); 397 int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); 398 399 void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin); 400 401 void kvm_irqchip_add_change_notifier(Notifier *n); 402 void kvm_irqchip_remove_change_notifier(Notifier *n); 403 void kvm_irqchip_change_notify(void); 404 405 void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); 406 407 struct kvm_guest_debug; 408 struct kvm_debug_exit_arch; 409 410 struct kvm_sw_breakpoint { 411 target_ulong pc; 412 target_ulong saved_insn; 413 int use_count; 414 QTAILQ_ENTRY(kvm_sw_breakpoint) entry; 415 }; 416 417 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu, 418 target_ulong pc); 419 420 int kvm_sw_breakpoints_active(CPUState *cpu); 421 422 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, 423 struct kvm_sw_breakpoint *bp); 424 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, 425 struct kvm_sw_breakpoint *bp); 426 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 427 target_ulong len, int type); 428 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 429 target_ulong len, int type); 430 void kvm_arch_remove_all_hw_breakpoints(void); 431 432 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg); 433 434 bool kvm_arch_stop_on_emulation_error(CPUState *cpu); 435 436 int kvm_check_extension(KVMState *s, unsigned int extension); 437 438 int kvm_vm_check_extension(KVMState *s, unsigned int extension); 439 440 #define kvm_vm_enable_cap(s, capability, cap_flags, ...) \ 441 ({ \ 442 struct kvm_enable_cap cap = { \ 443 .cap = capability, \ 444 .flags = cap_flags, \ 445 }; \ 446 uint64_t args_tmp[] = { __VA_ARGS__ }; \ 447 size_t n = MIN(ARRAY_SIZE(args_tmp), ARRAY_SIZE(cap.args)); \ 448 memcpy(cap.args, args_tmp, n * sizeof(cap.args[0])); \ 449 kvm_vm_ioctl(s, KVM_ENABLE_CAP, &cap); \ 450 }) 451 452 #define kvm_vcpu_enable_cap(cpu, capability, cap_flags, ...) \ 453 ({ \ 454 struct kvm_enable_cap cap = { \ 455 .cap = capability, \ 456 .flags = cap_flags, \ 457 }; \ 458 uint64_t args_tmp[] = { __VA_ARGS__ }; \ 459 size_t n = MIN(ARRAY_SIZE(args_tmp), ARRAY_SIZE(cap.args)); \ 460 memcpy(cap.args, args_tmp, n * sizeof(cap.args[0])); \ 461 kvm_vcpu_ioctl(cpu, KVM_ENABLE_CAP, &cap); \ 462 }) 463 464 uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, 465 uint32_t index, int reg); 466 uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); 467 468 469 void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); 470 471 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, 472 hwaddr *phys_addr); 473 474 #endif /* NEED_CPU_H */ 475 476 void kvm_cpu_synchronize_state(CPUState *cpu); 477 478 void kvm_init_cpu_signals(CPUState *cpu); 479 480 /** 481 * kvm_irqchip_add_msi_route - Add MSI route for specific vector 482 * @c: KVMRouteChange instance. 483 * @vector: which vector to add. This can be either MSI/MSIX 484 * vector. The function will automatically detect whether 485 * MSI/MSIX is enabled, and fetch corresponding MSI 486 * message. 487 * @dev: Owner PCI device to add the route. If @dev is specified 488 * as @NULL, an empty MSI message will be inited. 489 * @return: virq (>=0) when success, errno (<0) when failed. 490 */ 491 int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev); 492 int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, 493 PCIDevice *dev); 494 void kvm_irqchip_commit_routes(KVMState *s); 495 496 static inline KVMRouteChange kvm_irqchip_begin_route_changes(KVMState *s) 497 { 498 return (KVMRouteChange) { .s = s, .changes = 0 }; 499 } 500 501 static inline void kvm_irqchip_commit_route_changes(KVMRouteChange *c) 502 { 503 if (c->changes) { 504 kvm_irqchip_commit_routes(c->s); 505 c->changes = 0; 506 } 507 } 508 509 void kvm_irqchip_release_virq(KVMState *s, int virq); 510 511 int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter); 512 int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint); 513 514 int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, 515 EventNotifier *rn, int virq); 516 int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, 517 int virq); 518 int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, 519 EventNotifier *rn, qemu_irq irq); 520 int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, 521 qemu_irq irq); 522 void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi); 523 void kvm_pc_setup_irq_routing(bool pci_enabled); 524 void kvm_init_irq_routing(KVMState *s); 525 526 bool kvm_kernel_irqchip_allowed(void); 527 bool kvm_kernel_irqchip_required(void); 528 bool kvm_kernel_irqchip_split(void); 529 530 /** 531 * kvm_arch_irqchip_create: 532 * @KVMState: The KVMState pointer 533 * 534 * Allow architectures to create an in-kernel irq chip themselves. 535 * 536 * Returns: < 0: error 537 * 0: irq chip was not created 538 * > 0: irq chip was created 539 */ 540 int kvm_arch_irqchip_create(KVMState *s); 541 542 /** 543 * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl 544 * @id: The register ID 545 * @source: The pointer to the value to be set. It must point to a variable 546 * of the correct type/size for the register being accessed. 547 * 548 * Returns: 0 on success, or a negative errno on failure. 549 */ 550 int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source); 551 552 /** 553 * kvm_get_one_reg - get a register value from KVM via KVM_GET_ONE_REG ioctl 554 * @id: The register ID 555 * @target: The pointer where the value is to be stored. It must point to a 556 * variable of the correct type/size for the register being accessed. 557 * 558 * Returns: 0 on success, or a negative errno on failure. 559 */ 560 int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); 561 struct ppc_radix_page_info *kvm_get_radix_page_info(void); 562 int kvm_get_max_memslots(void); 563 564 /* Notify resamplefd for EOI of specific interrupts. */ 565 void kvm_resample_fd_notify(int gsi); 566 567 /** 568 * kvm_cpu_check_are_resettable - return whether CPUs can be reset 569 * 570 * Returns: true: CPUs are resettable 571 * false: CPUs are not resettable 572 */ 573 bool kvm_cpu_check_are_resettable(void); 574 575 bool kvm_arch_cpu_check_are_resettable(void); 576 577 bool kvm_dirty_ring_enabled(void); 578 579 uint32_t kvm_dirty_ring_size(void); 580 #endif 581