1 /* 2 * ARM implementation of KVM hooks 3 * 4 * Copyright Christoffer Dall 2009-2010 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11 #include "qemu/osdep.h" 12 #include <sys/ioctl.h> 13 14 #include <linux/kvm.h> 15 16 #include "qemu-common.h" 17 #include "qemu/timer.h" 18 #include "qemu/error-report.h" 19 #include "sysemu/sysemu.h" 20 #include "sysemu/kvm.h" 21 #include "kvm_arm.h" 22 #include "cpu.h" 23 #include "internals.h" 24 #include "hw/arm/arm.h" 25 #include "exec/memattrs.h" 26 #include "exec/address-spaces.h" 27 #include "hw/boards.h" 28 #include "qemu/log.h" 29 30 const KVMCapabilityInfo kvm_arch_required_capabilities[] = { 31 KVM_CAP_LAST_INFO 32 }; 33 34 static bool cap_has_mp_state; 35 36 int kvm_arm_vcpu_init(CPUState *cs) 37 { 38 ARMCPU *cpu = ARM_CPU(cs); 39 struct kvm_vcpu_init init; 40 41 init.target = cpu->kvm_target; 42 memcpy(init.features, cpu->kvm_init_features, sizeof(init.features)); 43 44 return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); 45 } 46 47 bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, 48 int *fdarray, 49 struct kvm_vcpu_init *init) 50 { 51 int ret, kvmfd = -1, vmfd = -1, cpufd = -1; 52 53 kvmfd = qemu_open("/dev/kvm", O_RDWR); 54 if (kvmfd < 0) { 55 goto err; 56 } 57 vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0); 58 if (vmfd < 0) { 59 goto err; 60 } 61 cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0); 62 if (cpufd < 0) { 63 goto err; 64 } 65 66 if (!init) { 67 /* Caller doesn't want the VCPU to be initialized, so skip it */ 68 goto finish; 69 } 70 71 ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init); 72 if (ret >= 0) { 73 ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); 74 if (ret < 0) { 75 goto err; 76 } 77 } else if (cpus_to_try) { 78 /* Old kernel which doesn't know about the 79 * PREFERRED_TARGET ioctl: we know it will only support 80 * creating one kind of guest CPU which is its preferred 81 * CPU type. 82 */ 83 while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) { 84 init->target = *cpus_to_try++; 85 memset(init->features, 0, sizeof(init->features)); 86 ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init); 87 if (ret >= 0) { 88 break; 89 } 90 } 91 if (ret < 0) { 92 goto err; 93 } 94 } else { 95 /* Treat a NULL cpus_to_try argument the same as an empty 96 * list, which means we will fail the call since this must 97 * be an old kernel which doesn't support PREFERRED_TARGET. 98 */ 99 goto err; 100 } 101 102 finish: 103 fdarray[0] = kvmfd; 104 fdarray[1] = vmfd; 105 fdarray[2] = cpufd; 106 107 return true; 108 109 err: 110 if (cpufd >= 0) { 111 close(cpufd); 112 } 113 if (vmfd >= 0) { 114 close(vmfd); 115 } 116 if (kvmfd >= 0) { 117 close(kvmfd); 118 } 119 120 return false; 121 } 122 123 void kvm_arm_destroy_scratch_host_vcpu(int *fdarray) 124 { 125 int i; 126 127 for (i = 2; i >= 0; i--) { 128 close(fdarray[i]); 129 } 130 } 131 132 static void kvm_arm_host_cpu_class_init(ObjectClass *oc, void *data) 133 { 134 ARMHostCPUClass *ahcc = ARM_HOST_CPU_CLASS(oc); 135 136 /* All we really need to set up for the 'host' CPU 137 * is the feature bits -- we rely on the fact that the 138 * various ID register values in ARMCPU are only used for 139 * TCG CPUs. 140 */ 141 if (!kvm_arm_get_host_cpu_features(ahcc)) { 142 fprintf(stderr, "Failed to retrieve host CPU features!\n"); 143 abort(); 144 } 145 } 146 147 static void kvm_arm_host_cpu_initfn(Object *obj) 148 { 149 ARMHostCPUClass *ahcc = ARM_HOST_CPU_GET_CLASS(obj); 150 ARMCPU *cpu = ARM_CPU(obj); 151 CPUARMState *env = &cpu->env; 152 153 cpu->kvm_target = ahcc->target; 154 cpu->dtb_compatible = ahcc->dtb_compatible; 155 env->features = ahcc->features; 156 } 157 158 static const TypeInfo host_arm_cpu_type_info = { 159 .name = TYPE_ARM_HOST_CPU, 160 #ifdef TARGET_AARCH64 161 .parent = TYPE_AARCH64_CPU, 162 #else 163 .parent = TYPE_ARM_CPU, 164 #endif 165 .instance_init = kvm_arm_host_cpu_initfn, 166 .class_init = kvm_arm_host_cpu_class_init, 167 .class_size = sizeof(ARMHostCPUClass), 168 }; 169 170 int kvm_arch_init(MachineState *ms, KVMState *s) 171 { 172 /* For ARM interrupt delivery is always asynchronous, 173 * whether we are using an in-kernel VGIC or not. 174 */ 175 kvm_async_interrupts_allowed = true; 176 177 cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); 178 179 type_register_static(&host_arm_cpu_type_info); 180 181 return 0; 182 } 183 184 unsigned long kvm_arch_vcpu_id(CPUState *cpu) 185 { 186 return cpu->cpu_index; 187 } 188 189 /* We track all the KVM devices which need their memory addresses 190 * passing to the kernel in a list of these structures. 191 * When board init is complete we run through the list and 192 * tell the kernel the base addresses of the memory regions. 193 * We use a MemoryListener to track mapping and unmapping of 194 * the regions during board creation, so the board models don't 195 * need to do anything special for the KVM case. 196 */ 197 typedef struct KVMDevice { 198 struct kvm_arm_device_addr kda; 199 struct kvm_device_attr kdattr; 200 MemoryRegion *mr; 201 QSLIST_ENTRY(KVMDevice) entries; 202 int dev_fd; 203 } KVMDevice; 204 205 static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head; 206 207 static void kvm_arm_devlistener_add(MemoryListener *listener, 208 MemoryRegionSection *section) 209 { 210 KVMDevice *kd; 211 212 QSLIST_FOREACH(kd, &kvm_devices_head, entries) { 213 if (section->mr == kd->mr) { 214 kd->kda.addr = section->offset_within_address_space; 215 } 216 } 217 } 218 219 static void kvm_arm_devlistener_del(MemoryListener *listener, 220 MemoryRegionSection *section) 221 { 222 KVMDevice *kd; 223 224 QSLIST_FOREACH(kd, &kvm_devices_head, entries) { 225 if (section->mr == kd->mr) { 226 kd->kda.addr = -1; 227 } 228 } 229 } 230 231 static MemoryListener devlistener = { 232 .region_add = kvm_arm_devlistener_add, 233 .region_del = kvm_arm_devlistener_del, 234 }; 235 236 static void kvm_arm_set_device_addr(KVMDevice *kd) 237 { 238 struct kvm_device_attr *attr = &kd->kdattr; 239 int ret; 240 241 /* If the device control API is available and we have a device fd on the 242 * KVMDevice struct, let's use the newer API 243 */ 244 if (kd->dev_fd >= 0) { 245 uint64_t addr = kd->kda.addr; 246 attr->addr = (uintptr_t)&addr; 247 ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr); 248 } else { 249 ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda); 250 } 251 252 if (ret < 0) { 253 fprintf(stderr, "Failed to set device address: %s\n", 254 strerror(-ret)); 255 abort(); 256 } 257 } 258 259 static void kvm_arm_machine_init_done(Notifier *notifier, void *data) 260 { 261 KVMDevice *kd, *tkd; 262 263 memory_listener_unregister(&devlistener); 264 QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) { 265 if (kd->kda.addr != -1) { 266 kvm_arm_set_device_addr(kd); 267 } 268 memory_region_unref(kd->mr); 269 g_free(kd); 270 } 271 } 272 273 static Notifier notify = { 274 .notify = kvm_arm_machine_init_done, 275 }; 276 277 void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, 278 uint64_t attr, int dev_fd) 279 { 280 KVMDevice *kd; 281 282 if (!kvm_irqchip_in_kernel()) { 283 return; 284 } 285 286 if (QSLIST_EMPTY(&kvm_devices_head)) { 287 memory_listener_register(&devlistener, &address_space_memory); 288 qemu_add_machine_init_done_notifier(¬ify); 289 } 290 kd = g_new0(KVMDevice, 1); 291 kd->mr = mr; 292 kd->kda.id = devid; 293 kd->kda.addr = -1; 294 kd->kdattr.flags = 0; 295 kd->kdattr.group = group; 296 kd->kdattr.attr = attr; 297 kd->dev_fd = dev_fd; 298 QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries); 299 memory_region_ref(kd->mr); 300 } 301 302 static int compare_u64(const void *a, const void *b) 303 { 304 if (*(uint64_t *)a > *(uint64_t *)b) { 305 return 1; 306 } 307 if (*(uint64_t *)a < *(uint64_t *)b) { 308 return -1; 309 } 310 return 0; 311 } 312 313 /* Initialize the CPUState's cpreg list according to the kernel's 314 * definition of what CPU registers it knows about (and throw away 315 * the previous TCG-created cpreg list). 316 */ 317 int kvm_arm_init_cpreg_list(ARMCPU *cpu) 318 { 319 struct kvm_reg_list rl; 320 struct kvm_reg_list *rlp; 321 int i, ret, arraylen; 322 CPUState *cs = CPU(cpu); 323 324 rl.n = 0; 325 ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl); 326 if (ret != -E2BIG) { 327 return ret; 328 } 329 rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t)); 330 rlp->n = rl.n; 331 ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp); 332 if (ret) { 333 goto out; 334 } 335 /* Sort the list we get back from the kernel, since cpreg_tuples 336 * must be in strictly ascending order. 337 */ 338 qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64); 339 340 for (i = 0, arraylen = 0; i < rlp->n; i++) { 341 if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) { 342 continue; 343 } 344 switch (rlp->reg[i] & KVM_REG_SIZE_MASK) { 345 case KVM_REG_SIZE_U32: 346 case KVM_REG_SIZE_U64: 347 break; 348 default: 349 fprintf(stderr, "Can't handle size of register in kernel list\n"); 350 ret = -EINVAL; 351 goto out; 352 } 353 354 arraylen++; 355 } 356 357 cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen); 358 cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen); 359 cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes, 360 arraylen); 361 cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values, 362 arraylen); 363 cpu->cpreg_array_len = arraylen; 364 cpu->cpreg_vmstate_array_len = arraylen; 365 366 for (i = 0, arraylen = 0; i < rlp->n; i++) { 367 uint64_t regidx = rlp->reg[i]; 368 if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) { 369 continue; 370 } 371 cpu->cpreg_indexes[arraylen] = regidx; 372 arraylen++; 373 } 374 assert(cpu->cpreg_array_len == arraylen); 375 376 if (!write_kvmstate_to_list(cpu)) { 377 /* Shouldn't happen unless kernel is inconsistent about 378 * what registers exist. 379 */ 380 fprintf(stderr, "Initial read of kernel register state failed\n"); 381 ret = -EINVAL; 382 goto out; 383 } 384 385 out: 386 g_free(rlp); 387 return ret; 388 } 389 390 bool write_kvmstate_to_list(ARMCPU *cpu) 391 { 392 CPUState *cs = CPU(cpu); 393 int i; 394 bool ok = true; 395 396 for (i = 0; i < cpu->cpreg_array_len; i++) { 397 struct kvm_one_reg r; 398 uint64_t regidx = cpu->cpreg_indexes[i]; 399 uint32_t v32; 400 int ret; 401 402 r.id = regidx; 403 404 switch (regidx & KVM_REG_SIZE_MASK) { 405 case KVM_REG_SIZE_U32: 406 r.addr = (uintptr_t)&v32; 407 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); 408 if (!ret) { 409 cpu->cpreg_values[i] = v32; 410 } 411 break; 412 case KVM_REG_SIZE_U64: 413 r.addr = (uintptr_t)(cpu->cpreg_values + i); 414 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); 415 break; 416 default: 417 abort(); 418 } 419 if (ret) { 420 ok = false; 421 } 422 } 423 return ok; 424 } 425 426 bool write_list_to_kvmstate(ARMCPU *cpu, int level) 427 { 428 CPUState *cs = CPU(cpu); 429 int i; 430 bool ok = true; 431 432 for (i = 0; i < cpu->cpreg_array_len; i++) { 433 struct kvm_one_reg r; 434 uint64_t regidx = cpu->cpreg_indexes[i]; 435 uint32_t v32; 436 int ret; 437 438 if (kvm_arm_cpreg_level(regidx) > level) { 439 continue; 440 } 441 442 r.id = regidx; 443 switch (regidx & KVM_REG_SIZE_MASK) { 444 case KVM_REG_SIZE_U32: 445 v32 = cpu->cpreg_values[i]; 446 r.addr = (uintptr_t)&v32; 447 break; 448 case KVM_REG_SIZE_U64: 449 r.addr = (uintptr_t)(cpu->cpreg_values + i); 450 break; 451 default: 452 abort(); 453 } 454 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); 455 if (ret) { 456 /* We might fail for "unknown register" and also for 457 * "you tried to set a register which is constant with 458 * a different value from what it actually contains". 459 */ 460 ok = false; 461 } 462 } 463 return ok; 464 } 465 466 void kvm_arm_reset_vcpu(ARMCPU *cpu) 467 { 468 int ret; 469 470 /* Re-init VCPU so that all registers are set to 471 * their respective reset values. 472 */ 473 ret = kvm_arm_vcpu_init(CPU(cpu)); 474 if (ret < 0) { 475 fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); 476 abort(); 477 } 478 if (!write_kvmstate_to_list(cpu)) { 479 fprintf(stderr, "write_kvmstate_to_list failed\n"); 480 abort(); 481 } 482 } 483 484 /* 485 * Update KVM's MP_STATE based on what QEMU thinks it is 486 */ 487 int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu) 488 { 489 if (cap_has_mp_state) { 490 struct kvm_mp_state mp_state = { 491 .mp_state = (cpu->power_state == PSCI_OFF) ? 492 KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE 493 }; 494 int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); 495 if (ret) { 496 fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n", 497 __func__, ret, strerror(-ret)); 498 return -1; 499 } 500 } 501 502 return 0; 503 } 504 505 /* 506 * Sync the KVM MP_STATE into QEMU 507 */ 508 int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) 509 { 510 if (cap_has_mp_state) { 511 struct kvm_mp_state mp_state; 512 int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state); 513 if (ret) { 514 fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n", 515 __func__, ret, strerror(-ret)); 516 abort(); 517 } 518 cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ? 519 PSCI_OFF : PSCI_ON; 520 } 521 522 return 0; 523 } 524 525 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) 526 { 527 } 528 529 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) 530 { 531 return MEMTXATTRS_UNSPECIFIED; 532 } 533 534 535 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) 536 { 537 int ret = 0; 538 539 switch (run->exit_reason) { 540 case KVM_EXIT_DEBUG: 541 if (kvm_arm_handle_debug(cs, &run->debug.arch)) { 542 ret = EXCP_DEBUG; 543 } /* otherwise return to guest */ 544 break; 545 default: 546 qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", 547 __func__, run->exit_reason); 548 break; 549 } 550 return ret; 551 } 552 553 bool kvm_arch_stop_on_emulation_error(CPUState *cs) 554 { 555 return true; 556 } 557 558 int kvm_arch_process_async_events(CPUState *cs) 559 { 560 return 0; 561 } 562 563 int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr) 564 { 565 return 1; 566 } 567 568 int kvm_arch_on_sigbus(int code, void *addr) 569 { 570 return 1; 571 } 572 573 /* The #ifdef protections are until 32bit headers are imported and can 574 * be removed once both 32 and 64 bit reach feature parity. 575 */ 576 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) 577 { 578 #ifdef KVM_GUESTDBG_USE_SW_BP 579 if (kvm_sw_breakpoints_active(cs)) { 580 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 581 } 582 #endif 583 #ifdef KVM_GUESTDBG_USE_HW 584 if (kvm_arm_hw_debug_active(cs)) { 585 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW; 586 kvm_arm_copy_hw_debug_data(&dbg->arch); 587 } 588 #endif 589 } 590 591 void kvm_arch_init_irq_routing(KVMState *s) 592 { 593 } 594 595 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) 596 { 597 if (machine_kernel_irqchip_split(ms)) { 598 perror("-machine kernel_irqchip=split is not supported on ARM."); 599 exit(1); 600 } 601 602 /* If we can create the VGIC using the newer device control API, we 603 * let the device do this when it initializes itself, otherwise we 604 * fall back to the old API */ 605 return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); 606 } 607 608 int kvm_arm_vgic_probe(void) 609 { 610 if (kvm_create_device(kvm_state, 611 KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) { 612 return 3; 613 } else if (kvm_create_device(kvm_state, 614 KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) { 615 return 2; 616 } else { 617 return 0; 618 } 619 } 620 621 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, 622 uint64_t address, uint32_t data, PCIDevice *dev) 623 { 624 return 0; 625 } 626 627 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, 628 int vector, PCIDevice *dev) 629 { 630 return 0; 631 } 632 633 int kvm_arch_release_virq_post(int virq) 634 { 635 return 0; 636 } 637 638 int kvm_arch_msi_data_to_gsi(uint32_t data) 639 { 640 return (data - 32) & 0xffff; 641 } 642