1 /* 2 * ARM implementation of KVM hooks 3 * 4 * Copyright Christoffer Dall 2009-2010 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include <sys/ioctl.h> 13 14 #include <linux/kvm.h> 15 16 #include "qemu-common.h" 17 #include "qemu/timer.h" 18 #include "qemu/error-report.h" 19 #include "sysemu/sysemu.h" 20 #include "sysemu/kvm.h" 21 #include "kvm_arm.h" 22 #include "cpu.h" 23 #include "internals.h" 24 #include "hw/arm/arm.h" 25 #include "exec/memattrs.h" 26 #include "exec/address-spaces.h" 27 #include "hw/boards.h" 28 #include "qemu/log.h" 29 30 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 31 KVM_CAP_LAST_INFO 32 }; 33 34 static bool cap_has_mp_state; 35 36 int kvm_arm_vcpu_init(CPUState *cs) 37 { 38 ARMCPU *cpu = ARM_CPU(cs); 39 struct kvm_vcpu_init init; 40 41 init.target = cpu->kvm_target; 42 memcpy(init.features, cpu->kvm_init_features, sizeof(init.features)); 43 44 return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); 45 } 46 47 bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, 48 int *fdarray, 49 struct kvm_vcpu_init *init) 50 { 51 int ret, kvmfd = -1, vmfd = -1, cpufd = -1; 52 53 kvmfd = qemu_open("/dev/kvm", O_RDWR); 54 if (kvmfd < 0) { 55 goto err; 56 } 57 vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0); 58 if (vmfd < 0) { 59 goto err; 60 } 61 cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0); 62 if (cpufd < 0) { 63 goto err; 64 } 65 66 if (!init) { 67 /* Caller doesn't want the VCPU to be initialized, so skip it */ 68 goto finish; 69 } 70 71 ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init); 72 if (ret >= 0) { 73 ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); 74 if (ret < 0) { 75 goto err; 76 } 77 } else if (cpus_to_try) { 78 /* Old kernel which doesn't know about the 79 * PREFERRED_TARGET ioctl: we know it will only support 80 * creating one kind of guest CPU which is its preferred 81 * CPU type. 82 */ 83 while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) { 84 init->target = *cpus_to_try++; 85 memset(init->features, 0, sizeof(init->features)); 86 ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); 87 if (ret >= 0) { 88 break; 89 } 90 } 91 if (ret < 0) { 92 goto err; 93 } 94 } else { 95 /* Treat a NULL cpus_to_try argument the same as an empty 96 * list, which means we will fail the call since this must 97 * be an old kernel which doesn't support PREFERRED_TARGET. 98 */ 99 goto err; 100 } 101 102 finish: 103 fdarray[0] = kvmfd; 104 fdarray[1] = vmfd; 105 fdarray[2] = cpufd; 106 107 return true; 108 109 err: 110 if (cpufd >= 0) { 111 close(cpufd); 112 } 113 if (vmfd >= 0) { 114 close(vmfd); 115 } 116 if (kvmfd >= 0) { 117 close(kvmfd); 118 } 119 120 return false; 121 } 122 123 void kvm_arm_destroy_scratch_host_vcpu(int *fdarray) 124 { 125 int i; 126 127 for (i = 2; i >= 0; i--) { 128 close(fdarray[i]); 129 } 130 } 131 132 static void kvm_arm_host_cpu_class_init(ObjectClass *oc, void *data) 133 { 134 ARMHostCPUClass *ahcc = ARM_HOST_CPU_CLASS(oc); 135 136 /* All we really need to set up for the 'host' CPU 137 * is the feature bits -- we rely on the fact that the 138 * various ID register values in ARMCPU are only used for 139 * TCG CPUs. 140 */ 141 if (!kvm_arm_get_host_cpu_features(ahcc)) { 142 fprintf(stderr, "Failed to retrieve host CPU features!\n"); 143 abort(); 144 } 145 } 146 147 static void kvm_arm_host_cpu_initfn(Object *obj) 148 { 149 ARMHostCPUClass *ahcc = ARM_HOST_CPU_GET_CLASS(obj); 150 ARMCPU *cpu = ARM_CPU(obj); 151 CPUARMState *env = &cpu->env; 152 153 cpu->kvm_target = ahcc->target; 154 cpu->dtb_compatible = ahcc->dtb_compatible; 155 env->features = ahcc->features; 156 } 157 158 static const TypeInfo host_arm_cpu_type_info = { 159 .name = TYPE_ARM_HOST_CPU, 160 #ifdef TARGET_AARCH64 161 .parent = TYPE_AARCH64_CPU, 162 #else 163 .parent = TYPE_ARM_CPU, 164 #endif 165 .instance_init = kvm_arm_host_cpu_initfn, 166 .class_init = kvm_arm_host_cpu_class_init, 167 .class_size = sizeof(ARMHostCPUClass), 168 }; 169 170 int kvm_arch_init(MachineState *ms, KVMState *s) 171 { 172 /* For ARM interrupt delivery is always asynchronous, 173 * whether we are using an in-kernel VGIC or not. 174 */ 175 kvm_async_interrupts_allowed = true; 176 177 /* 178 * PSCI wakes up secondary cores, so we always need to 179 * have vCPUs waiting in kernel space 180 */ 181 kvm_halt_in_kernel_allowed = true; 182 183 cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); 184 185 type_register_static(&host_arm_cpu_type_info); 186 187 return 0; 188 } 189 190 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 191 { 192 return cpu->cpu_index; 193 } 194 195 /* We track all the KVM devices which need their memory addresses 196 * passing to the kernel in a list of these structures. 197 * When board init is complete we run through the list and 198 * tell the kernel the base addresses of the memory regions. 199 * We use a MemoryListener to track mapping and unmapping of 200 * the regions during board creation, so the board models don't 201 * need to do anything special for the KVM case. 202 */ 203 typedef struct KVMDevice { 204 struct kvm_arm_device_addr kda; 205 struct kvm_device_attr kdattr; 206 MemoryRegion *mr; 207 QSLIST_ENTRY(KVMDevice) entries; 208 int dev_fd; 209 } KVMDevice; 210 211 static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head; 212 213 static void kvm_arm_devlistener_add(MemoryListener *listener, 214 MemoryRegionSection *section) 215 { 216 KVMDevice *kd; 217 218 QSLIST_FOREACH(kd, &kvm_devices_head, entries) { 219 if (section->mr == kd->mr) { 220 kd->kda.addr = section->offset_within_address_space; 221 } 222 } 223 } 224 225 static void kvm_arm_devlistener_del(MemoryListener *listener, 226 MemoryRegionSection *section) 227 { 228 KVMDevice *kd; 229 230 QSLIST_FOREACH(kd, &kvm_devices_head, entries) { 231 if (section->mr == kd->mr) { 232 kd->kda.addr = -1; 233 } 234 } 235 } 236 237 static MemoryListener devlistener = { 238 .region_add = kvm_arm_devlistener_add, 239 .region_del = kvm_arm_devlistener_del, 240 }; 241 242 static void kvm_arm_set_device_addr(KVMDevice *kd) 243 { 244 struct kvm_device_attr *attr = &kd->kdattr; 245 int ret; 246 247 /* If the device control API is available and we have a device fd on the 248 * KVMDevice struct, let's use the newer API 249 */ 250 if (kd->dev_fd >= 0) { 251 uint64_t addr = kd->kda.addr; 252 attr->addr = (uintptr_t)&addr; 253 ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr); 254 } else { 255 ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda); 256 } 257 258 if (ret < 0) { 259 fprintf(stderr, "Failed to set device address: %s\n", 260 strerror(-ret)); 261 abort(); 262 } 263 } 264 265 static void kvm_arm_machine_init_done(Notifier *notifier, void *data) 266 { 267 KVMDevice *kd, *tkd; 268 269 memory_listener_unregister(&devlistener); 270 QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) { 271 if (kd->kda.addr != -1) { 272 kvm_arm_set_device_addr(kd); 273 } 274 memory_region_unref(kd->mr); 275 g_free(kd); 276 } 277 } 278 279 static Notifier notify = { 280 .notify = kvm_arm_machine_init_done, 281 }; 282 283 void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, 284 uint64_t attr, int dev_fd) 285 { 286 KVMDevice *kd; 287 288 if (!kvm_irqchip_in_kernel()) { 289 return; 290 } 291 292 if (QSLIST_EMPTY(&kvm_devices_head)) { 293 memory_listener_register(&devlistener, &address_space_memory); 294 qemu_add_machine_init_done_notifier(¬ify); 295 } 296 kd = g_new0(KVMDevice, 1); 297 kd->mr = mr; 298 kd->kda.id = devid; 299 kd->kda.addr = -1; 300 kd->kdattr.flags = 0; 301 kd->kdattr.group = group; 302 kd->kdattr.attr = attr; 303 kd->dev_fd = dev_fd; 304 QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries); 305 memory_region_ref(kd->mr); 306 } 307 308 static int compare_u64(const void *a, const void *b) 309 { 310 if (*(uint64_t *)a > *(uint64_t *)b) { 311 return 1; 312 } 313 if (*(uint64_t *)a < *(uint64_t *)b) { 314 return -1; 315 } 316 return 0; 317 } 318 319 /* Initialize the CPUState's cpreg list according to the kernel's 320 * definition of what CPU registers it knows about (and throw away 321 * the previous TCG-created cpreg list). 322 */ 323 int kvm_arm_init_cpreg_list(ARMCPU *cpu) 324 { 325 struct kvm_reg_list rl; 326 struct kvm_reg_list *rlp; 327 int i, ret, arraylen; 328 CPUState *cs = CPU(cpu); 329 330 rl.n = 0; 331 ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl); 332 if (ret != -E2BIG) { 333 return ret; 334 } 335 rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t)); 336 rlp->n = rl.n; 337 ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp); 338 if (ret) { 339 goto out; 340 } 341 /* Sort the list we get back from the kernel, since cpreg_tuples 342 * must be in strictly ascending order. 343 */ 344 qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64); 345 346 for (i = 0, arraylen = 0; i < rlp->n; i++) { 347 if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) { 348 continue; 349 } 350 switch (rlp->reg[i] & KVM_REG_SIZE_MASK) { 351 case KVM_REG_SIZE_U32: 352 case KVM_REG_SIZE_U64: 353 break; 354 default: 355 fprintf(stderr, "Can't handle size of register in kernel list\n"); 356 ret = -EINVAL; 357 goto out; 358 } 359 360 arraylen++; 361 } 362 363 cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen); 364 cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen); 365 cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes, 366 arraylen); 367 cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values, 368 arraylen); 369 cpu->cpreg_array_len = arraylen; 370 cpu->cpreg_vmstate_array_len = arraylen; 371 372 for (i = 0, arraylen = 0; i < rlp->n; i++) { 373 uint64_t regidx = rlp->reg[i]; 374 if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) { 375 continue; 376 } 377 cpu->cpreg_indexes[arraylen] = regidx; 378 arraylen++; 379 } 380 assert(cpu->cpreg_array_len == arraylen); 381 382 if (!write_kvmstate_to_list(cpu)) { 383 /* Shouldn't happen unless kernel is inconsistent about 384 * what registers exist. 385 */ 386 fprintf(stderr, "Initial read of kernel register state failed\n"); 387 ret = -EINVAL; 388 goto out; 389 } 390 391 out: 392 g_free(rlp); 393 return ret; 394 } 395 396 bool write_kvmstate_to_list(ARMCPU *cpu) 397 { 398 CPUState *cs = CPU(cpu); 399 int i; 400 bool ok = true; 401 402 for (i = 0; i < cpu->cpreg_array_len; i++) { 403 struct kvm_one_reg r; 404 uint64_t regidx = cpu->cpreg_indexes[i]; 405 uint32_t v32; 406 int ret; 407 408 r.id = regidx; 409 410 switch (regidx & KVM_REG_SIZE_MASK) { 411 case KVM_REG_SIZE_U32: 412 r.addr = (uintptr_t)&v32; 413 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); 414 if (!ret) { 415 cpu->cpreg_values[i] = v32; 416 } 417 break; 418 case KVM_REG_SIZE_U64: 419 r.addr = (uintptr_t)(cpu->cpreg_values + i); 420 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); 421 break; 422 default: 423 abort(); 424 } 425 if (ret) { 426 ok = false; 427 } 428 } 429 return ok; 430 } 431 432 bool write_list_to_kvmstate(ARMCPU *cpu, int level) 433 { 434 CPUState *cs = CPU(cpu); 435 int i; 436 bool ok = true; 437 438 for (i = 0; i < cpu->cpreg_array_len; i++) { 439 struct kvm_one_reg r; 440 uint64_t regidx = cpu->cpreg_indexes[i]; 441 uint32_t v32; 442 int ret; 443 444 if (kvm_arm_cpreg_level(regidx) > level) { 445 continue; 446 } 447 448 r.id = regidx; 449 switch (regidx & KVM_REG_SIZE_MASK) { 450 case KVM_REG_SIZE_U32: 451 v32 = cpu->cpreg_values[i]; 452 r.addr = (uintptr_t)&v32; 453 break; 454 case KVM_REG_SIZE_U64: 455 r.addr = (uintptr_t)(cpu->cpreg_values + i); 456 break; 457 default: 458 abort(); 459 } 460 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); 461 if (ret) { 462 /* We might fail for "unknown register" and also for 463 * "you tried to set a register which is constant with 464 * a different value from what it actually contains". 465 */ 466 ok = false; 467 } 468 } 469 return ok; 470 } 471 472 void kvm_arm_reset_vcpu(ARMCPU *cpu) 473 { 474 int ret; 475 476 /* Re-init VCPU so that all registers are set to 477 * their respective reset values. 478 */ 479 ret = kvm_arm_vcpu_init(CPU(cpu)); 480 if (ret < 0) { 481 fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); 482 abort(); 483 } 484 if (!write_kvmstate_to_list(cpu)) { 485 fprintf(stderr, "write_kvmstate_to_list failed\n"); 486 abort(); 487 } 488 } 489 490 /* 491 * Update KVM's MP_STATE based on what QEMU thinks it is 492 */ 493 int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu) 494 { 495 if (cap_has_mp_state) { 496 struct kvm_mp_state mp_state = { 497 .mp_state = (cpu->power_state == PSCI_OFF) ? 498 KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE 499 }; 500 int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); 501 if (ret) { 502 fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n", 503 __func__, ret, strerror(-ret)); 504 return -1; 505 } 506 } 507 508 return 0; 509 } 510 511 /* 512 * Sync the KVM MP_STATE into QEMU 513 */ 514 int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) 515 { 516 if (cap_has_mp_state) { 517 struct kvm_mp_state mp_state; 518 int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state); 519 if (ret) { 520 fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n", 521 __func__, ret, strerror(-ret)); 522 abort(); 523 } 524 cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ? 525 PSCI_OFF : PSCI_ON; 526 } 527 528 return 0; 529 } 530 531 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 532 { 533 } 534 535 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 536 { 537 ARMCPU *cpu; 538 uint32_t switched_level; 539 540 if (kvm_irqchip_in_kernel()) { 541 /* 542 * We only need to sync timer states with user-space interrupt 543 * controllers, so return early and save cycles if we don't. 544 */ 545 return MEMTXATTRS_UNSPECIFIED; 546 } 547 548 cpu = ARM_CPU(cs); 549 550 /* Synchronize our shadowed in-kernel device irq lines with the kvm ones */ 551 if (run->s.regs.device_irq_level != cpu->device_irq_level) { 552 switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level; 553 554 qemu_mutex_lock_iothread(); 555 556 if (switched_level & KVM_ARM_DEV_EL1_VTIMER) { 557 qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT], 558 !!(run->s.regs.device_irq_level & 559 KVM_ARM_DEV_EL1_VTIMER)); 560 switched_level &= ~KVM_ARM_DEV_EL1_VTIMER; 561 } 562 563 if (switched_level & KVM_ARM_DEV_EL1_PTIMER) { 564 qemu_set_irq(cpu->gt_timer_outputs[GTIMER_PHYS], 565 !!(run->s.regs.device_irq_level & 566 KVM_ARM_DEV_EL1_PTIMER)); 567 switched_level &= ~KVM_ARM_DEV_EL1_PTIMER; 568 } 569 570 if (switched_level & KVM_ARM_DEV_PMU) { 571 qemu_set_irq(cpu->pmu_interrupt, 572 !!(run->s.regs.device_irq_level & KVM_ARM_DEV_PMU)); 573 switched_level &= ~KVM_ARM_DEV_PMU; 574 } 575 576 if (switched_level) { 577 qemu_log_mask(LOG_UNIMP, "%s: unhandled in-kernel device IRQ %x\n", 578 __func__, switched_level); 579 } 580 581 /* We also mark unknown levels as processed to not waste cycles */ 582 cpu->device_irq_level = run->s.regs.device_irq_level; 583 qemu_mutex_unlock_iothread(); 584 } 585 586 return MEMTXATTRS_UNSPECIFIED; 587 } 588 589 590 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 591 { 592 int ret = 0; 593 594 switch (run->exit_reason) { 595 case KVM_EXIT_DEBUG: 596 if (kvm_arm_handle_debug(cs, &run->debug.arch)) { 597 ret = EXCP_DEBUG; 598 } /* otherwise return to guest */ 599 break; 600 default: 601 qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", 602 __func__, run->exit_reason); 603 break; 604 } 605 return ret; 606 } 607 608 bool kvm_arch_stop_on_emulation_error(CPUState *cs) 609 { 610 return true; 611 } 612 613 int kvm_arch_process_async_events(CPUState *cs) 614 { 615 return 0; 616 } 617 618 /* The #ifdef protections are until 32bit headers are imported and can 619 * be removed once both 32 and 64 bit reach feature parity. 620 */ 621 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 622 { 623 #ifdef KVM_GUESTDBG_USE_SW_BP 624 if (kvm_sw_breakpoints_active(cs)) { 625 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 626 } 627 #endif 628 #ifdef KVM_GUESTDBG_USE_HW 629 if (kvm_arm_hw_debug_active(cs)) { 630 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW; 631 kvm_arm_copy_hw_debug_data(&dbg->arch); 632 } 633 #endif 634 } 635 636 void kvm_arch_init_irq_routing(KVMState *s) 637 { 638 } 639 640 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 641 { 642 if (machine_kernel_irqchip_split(ms)) { 643 perror("-machine kernel_irqchip=split is not supported on ARM."); 644 exit(1); 645 } 646 647 /* If we can create the VGIC using the newer device control API, we 648 * let the device do this when it initializes itself, otherwise we 649 * fall back to the old API */ 650 return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); 651 } 652 653 int kvm_arm_vgic_probe(void) 654 { 655 if (kvm_create_device(kvm_state, 656 KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) { 657 return 3; 658 } else if (kvm_create_device(kvm_state, 659 KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) { 660 return 2; 661 } else { 662 return 0; 663 } 664 } 665 666 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 667 uint64_t address, uint32_t data, PCIDevice *dev) 668 { 669 return 0; 670 } 671 672 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 673 int vector, PCIDevice *dev) 674 { 675 return 0; 676 } 677 678 int kvm_arch_release_virq_post(int virq) 679 { 680 return 0; 681 } 682 683 int kvm_arch_msi_data_to_gsi(uint32_t data) 684 { 685 return (data - 32) & 0xffff; 686 } 687