1 /* 2 * ARM implementation of KVM hooks, 64 bit specific code 3 * 4 * Copyright Mian-M. Hamayun 2013, Virtual Open Systems 5 * Copyright Alex Bennée 2014, Linaro 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <sys/ioctl.h> 14 #include <sys/ptrace.h> 15 16 #include <linux/elf.h> 17 #include <linux/kvm.h> 18 19 #include "qemu-common.h" 20 #include "cpu.h" 21 #include "qemu/timer.h" 22 #include "qemu/error-report.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "exec/gdbstub.h" 26 #include "sysemu/kvm.h" 27 #include "sysemu/kvm_int.h" 28 #include "kvm_arm.h" 29 #include "hw/boards.h" 30 #include "internals.h" 31 32 static bool have_guest_debug; 33 34 /* 35 * Although the ARM implementation of hardware assisted debugging 36 * allows for different breakpoints per-core, the current GDB 37 * interface treats them as a global pool of registers (which seems to 38 * be the case for x86, ppc and s390). As a result we store one copy 39 * of registers which is used for all active cores. 40 * 41 * Write access is serialised by virtue of the GDB protocol which 42 * updates things. Read access (i.e. when the values are copied to the 43 * vCPU) is also gated by GDB's run control. 44 * 45 * This is not unreasonable as most of the time debugging kernels you 46 * never know which core will eventually execute your function. 47 */ 48 49 typedef struct { 50 uint64_t bcr; 51 uint64_t bvr; 52 } HWBreakpoint; 53 54 /* The watchpoint registers can cover more area than the requested 55 * watchpoint so we need to store the additional information 56 * somewhere. We also need to supply a CPUWatchpoint to the GDB stub 57 * when the watchpoint is hit. 58 */ 59 typedef struct { 60 uint64_t wcr; 61 uint64_t wvr; 62 CPUWatchpoint details; 63 } HWWatchpoint; 64 65 /* Maximum and current break/watch point counts */ 66 int max_hw_bps, max_hw_wps; 67 GArray *hw_breakpoints, *hw_watchpoints; 68 69 #define cur_hw_wps (hw_watchpoints->len) 70 #define cur_hw_bps (hw_breakpoints->len) 71 #define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) 72 #define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) 73 74 /** 75 * kvm_arm_init_debug() - check for guest debug capabilities 76 * @cs: CPUState 77 * 78 * kvm_check_extension returns the number of debug registers we have 79 * or 0 if we have none. 80 * 81 */ 82 static void kvm_arm_init_debug(CPUState *cs) 83 { 84 have_guest_debug = kvm_check_extension(cs->kvm_state, 85 KVM_CAP_SET_GUEST_DEBUG); 86 87 max_hw_wps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_WPS); 88 hw_watchpoints = g_array_sized_new(true, true, 89 sizeof(HWWatchpoint), max_hw_wps); 90 91 max_hw_bps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_BPS); 92 hw_breakpoints = g_array_sized_new(true, true, 93 sizeof(HWBreakpoint), max_hw_bps); 94 return; 95 } 96 97 /** 98 * insert_hw_breakpoint() 99 * @addr: address of breakpoint 100 * 101 * See ARM ARM D2.9.1 for details but here we are only going to create 102 * simple un-linked breakpoints (i.e. we don't chain breakpoints 103 * together to match address and context or vmid). The hardware is 104 * capable of fancier matching but that will require exposing that 105 * fanciness to GDB's interface 106 * 107 * DBGBCR<n>_EL1, Debug Breakpoint Control Registers 108 * 109 * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0 110 * +------+------+-------+-----+----+------+-----+------+-----+---+ 111 * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E | 112 * +------+------+-------+-----+----+------+-----+------+-----+---+ 113 * 114 * BT: Breakpoint type (0 = unlinked address match) 115 * LBN: Linked BP number (0 = unused) 116 * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12) 117 * BAS: Byte Address Select (RES1 for AArch64) 118 * E: Enable bit 119 * 120 * DBGBVR<n>_EL1, Debug Breakpoint Value Registers 121 * 122 * 63 53 52 49 48 2 1 0 123 * +------+-----------+----------+-----+ 124 * | RESS | VA[52:49] | VA[48:2] | 0 0 | 125 * +------+-----------+----------+-----+ 126 * 127 * Depending on the addressing mode bits the top bits of the register 128 * are a sign extension of the highest applicable VA bit. Some 129 * versions of GDB don't do it correctly so we ensure they are correct 130 * here so future PC comparisons will work properly. 131 */ 132 133 static int insert_hw_breakpoint(target_ulong addr) 134 { 135 HWBreakpoint brk = { 136 .bcr = 0x1, /* BCR E=1, enable */ 137 .bvr = sextract64(addr, 0, 53) 138 }; 139 140 if (cur_hw_bps >= max_hw_bps) { 141 return -ENOBUFS; 142 } 143 144 brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */ 145 brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */ 146 147 g_array_append_val(hw_breakpoints, brk); 148 149 return 0; 150 } 151 152 /** 153 * delete_hw_breakpoint() 154 * @pc: address of breakpoint 155 * 156 * Delete a breakpoint and shuffle any above down 157 */ 158 159 static int delete_hw_breakpoint(target_ulong pc) 160 { 161 int i; 162 for (i = 0; i < hw_breakpoints->len; i++) { 163 HWBreakpoint *brk = get_hw_bp(i); 164 if (brk->bvr == pc) { 165 g_array_remove_index(hw_breakpoints, i); 166 return 0; 167 } 168 } 169 return -ENOENT; 170 } 171 172 /** 173 * insert_hw_watchpoint() 174 * @addr: address of watch point 175 * @len: size of area 176 * @type: type of watch point 177 * 178 * See ARM ARM D2.10. As with the breakpoints we can do some advanced 179 * stuff if we want to. The watch points can be linked with the break 180 * points above to make them context aware. However for simplicity 181 * currently we only deal with simple read/write watch points. 182 * 183 * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers 184 * 185 * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0 186 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 187 * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E | 188 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 189 * 190 * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes)) 191 * WT: 0 - unlinked, 1 - linked (not currently used) 192 * LBN: Linked BP number (not currently used) 193 * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11) 194 * BAS: Byte Address Select 195 * LSC: Load/Store control (01: load, 10: store, 11: both) 196 * E: Enable 197 * 198 * The bottom 2 bits of the value register are masked. Therefore to 199 * break on any sizes smaller than an unaligned word you need to set 200 * MASK=0, BAS=bit per byte in question. For larger regions (^2) you 201 * need to ensure you mask the address as required and set BAS=0xff 202 */ 203 204 static int insert_hw_watchpoint(target_ulong addr, 205 target_ulong len, int type) 206 { 207 HWWatchpoint wp = { 208 .wcr = 1, /* E=1, enable */ 209 .wvr = addr & (~0x7ULL), 210 .details = { .vaddr = addr, .len = len } 211 }; 212 213 if (cur_hw_wps >= max_hw_wps) { 214 return -ENOBUFS; 215 } 216 217 /* 218 * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state, 219 * valid whether EL3 is implemented or not 220 */ 221 wp.wcr = deposit32(wp.wcr, 1, 2, 3); 222 223 switch (type) { 224 case GDB_WATCHPOINT_READ: 225 wp.wcr = deposit32(wp.wcr, 3, 2, 1); 226 wp.details.flags = BP_MEM_READ; 227 break; 228 case GDB_WATCHPOINT_WRITE: 229 wp.wcr = deposit32(wp.wcr, 3, 2, 2); 230 wp.details.flags = BP_MEM_WRITE; 231 break; 232 case GDB_WATCHPOINT_ACCESS: 233 wp.wcr = deposit32(wp.wcr, 3, 2, 3); 234 wp.details.flags = BP_MEM_ACCESS; 235 break; 236 default: 237 g_assert_not_reached(); 238 break; 239 } 240 if (len <= 8) { 241 /* we align the address and set the bits in BAS */ 242 int off = addr & 0x7; 243 int bas = (1 << len) - 1; 244 245 wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas); 246 } else { 247 /* For ranges above 8 bytes we need to be a power of 2 */ 248 if (is_power_of_2(len)) { 249 int bits = ctz64(len); 250 251 wp.wvr &= ~((1 << bits) - 1); 252 wp.wcr = deposit32(wp.wcr, 24, 4, bits); 253 wp.wcr = deposit32(wp.wcr, 5, 8, 0xff); 254 } else { 255 return -ENOBUFS; 256 } 257 } 258 259 g_array_append_val(hw_watchpoints, wp); 260 return 0; 261 } 262 263 264 static bool check_watchpoint_in_range(int i, target_ulong addr) 265 { 266 HWWatchpoint *wp = get_hw_wp(i); 267 uint64_t addr_top, addr_bottom = wp->wvr; 268 int bas = extract32(wp->wcr, 5, 8); 269 int mask = extract32(wp->wcr, 24, 4); 270 271 if (mask) { 272 addr_top = addr_bottom + (1 << mask); 273 } else { 274 /* BAS must be contiguous but can offset against the base 275 * address in DBGWVR */ 276 addr_bottom = addr_bottom + ctz32(bas); 277 addr_top = addr_bottom + clo32(bas); 278 } 279 280 if (addr >= addr_bottom && addr <= addr_top) { 281 return true; 282 } 283 284 return false; 285 } 286 287 /** 288 * delete_hw_watchpoint() 289 * @addr: address of breakpoint 290 * 291 * Delete a breakpoint and shuffle any above down 292 */ 293 294 static int delete_hw_watchpoint(target_ulong addr, 295 target_ulong len, int type) 296 { 297 int i; 298 for (i = 0; i < cur_hw_wps; i++) { 299 if (check_watchpoint_in_range(i, addr)) { 300 g_array_remove_index(hw_watchpoints, i); 301 return 0; 302 } 303 } 304 return -ENOENT; 305 } 306 307 308 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 309 target_ulong len, int type) 310 { 311 switch (type) { 312 case GDB_BREAKPOINT_HW: 313 return insert_hw_breakpoint(addr); 314 break; 315 case GDB_WATCHPOINT_READ: 316 case GDB_WATCHPOINT_WRITE: 317 case GDB_WATCHPOINT_ACCESS: 318 return insert_hw_watchpoint(addr, len, type); 319 default: 320 return -ENOSYS; 321 } 322 } 323 324 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 325 target_ulong len, int type) 326 { 327 switch (type) { 328 case GDB_BREAKPOINT_HW: 329 return delete_hw_breakpoint(addr); 330 break; 331 case GDB_WATCHPOINT_READ: 332 case GDB_WATCHPOINT_WRITE: 333 case GDB_WATCHPOINT_ACCESS: 334 return delete_hw_watchpoint(addr, len, type); 335 default: 336 return -ENOSYS; 337 } 338 } 339 340 341 void kvm_arch_remove_all_hw_breakpoints(void) 342 { 343 if (cur_hw_wps > 0) { 344 g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); 345 } 346 if (cur_hw_bps > 0) { 347 g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); 348 } 349 } 350 351 void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr) 352 { 353 int i; 354 memset(ptr, 0, sizeof(struct kvm_guest_debug_arch)); 355 356 for (i = 0; i < max_hw_wps; i++) { 357 HWWatchpoint *wp = get_hw_wp(i); 358 ptr->dbg_wcr[i] = wp->wcr; 359 ptr->dbg_wvr[i] = wp->wvr; 360 } 361 for (i = 0; i < max_hw_bps; i++) { 362 HWBreakpoint *bp = get_hw_bp(i); 363 ptr->dbg_bcr[i] = bp->bcr; 364 ptr->dbg_bvr[i] = bp->bvr; 365 } 366 } 367 368 bool kvm_arm_hw_debug_active(CPUState *cs) 369 { 370 return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); 371 } 372 373 static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) 374 { 375 int i; 376 377 for (i = 0; i < cur_hw_bps; i++) { 378 HWBreakpoint *bp = get_hw_bp(i); 379 if (bp->bvr == pc) { 380 return true; 381 } 382 } 383 return false; 384 } 385 386 static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) 387 { 388 int i; 389 390 for (i = 0; i < cur_hw_wps; i++) { 391 if (check_watchpoint_in_range(i, addr)) { 392 return &get_hw_wp(i)->details; 393 } 394 } 395 return NULL; 396 } 397 398 static bool kvm_arm_pmu_set_attr(CPUState *cs, struct kvm_device_attr *attr) 399 { 400 int err; 401 402 err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr); 403 if (err != 0) { 404 error_report("PMU: KVM_HAS_DEVICE_ATTR: %s", strerror(-err)); 405 return false; 406 } 407 408 err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr); 409 if (err != 0) { 410 error_report("PMU: KVM_SET_DEVICE_ATTR: %s", strerror(-err)); 411 return false; 412 } 413 414 return true; 415 } 416 417 void kvm_arm_pmu_init(CPUState *cs) 418 { 419 struct kvm_device_attr attr = { 420 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 421 .attr = KVM_ARM_VCPU_PMU_V3_INIT, 422 }; 423 424 if (!ARM_CPU(cs)->has_pmu) { 425 return; 426 } 427 if (!kvm_arm_pmu_set_attr(cs, &attr)) { 428 error_report("failed to init PMU"); 429 abort(); 430 } 431 } 432 433 void kvm_arm_pmu_set_irq(CPUState *cs, int irq) 434 { 435 struct kvm_device_attr attr = { 436 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 437 .addr = (intptr_t)&irq, 438 .attr = KVM_ARM_VCPU_PMU_V3_IRQ, 439 }; 440 441 if (!ARM_CPU(cs)->has_pmu) { 442 return; 443 } 444 if (!kvm_arm_pmu_set_attr(cs, &attr)) { 445 error_report("failed to set irq for PMU"); 446 abort(); 447 } 448 } 449 450 static inline void set_feature(uint64_t *features, int feature) 451 { 452 *features |= 1ULL << feature; 453 } 454 455 static inline void unset_feature(uint64_t *features, int feature) 456 { 457 *features &= ~(1ULL << feature); 458 } 459 460 static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) 461 { 462 uint64_t ret; 463 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; 464 int err; 465 466 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 467 err = ioctl(fd, KVM_GET_ONE_REG, &idreg); 468 if (err < 0) { 469 return -1; 470 } 471 *pret = ret; 472 return 0; 473 } 474 475 static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id) 476 { 477 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; 478 479 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 480 return ioctl(fd, KVM_GET_ONE_REG, &idreg); 481 } 482 483 bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) 484 { 485 /* Identify the feature bits corresponding to the host CPU, and 486 * fill out the ARMHostCPUClass fields accordingly. To do this 487 * we have to create a scratch VM, create a single CPU inside it, 488 * and then query that CPU for the relevant ID registers. 489 */ 490 int fdarray[3]; 491 uint64_t features = 0; 492 int err; 493 494 /* Old kernels may not know about the PREFERRED_TARGET ioctl: however 495 * we know these will only support creating one kind of guest CPU, 496 * which is its preferred CPU type. Fortunately these old kernels 497 * support only a very limited number of CPUs. 498 */ 499 static const uint32_t cpus_to_try[] = { 500 KVM_ARM_TARGET_AEM_V8, 501 KVM_ARM_TARGET_FOUNDATION_V8, 502 KVM_ARM_TARGET_CORTEX_A57, 503 QEMU_KVM_ARM_TARGET_NONE 504 }; 505 struct kvm_vcpu_init init; 506 507 if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { 508 return false; 509 } 510 511 ahcf->target = init.target; 512 ahcf->dtb_compatible = "arm,arm-v8"; 513 514 err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, 515 ARM64_SYS_REG(3, 0, 0, 4, 0)); 516 if (unlikely(err < 0)) { 517 /* 518 * Before v4.15, the kernel only exposed a limited number of system 519 * registers, not including any of the interesting AArch64 ID regs. 520 * For the most part we could leave these fields as zero with minimal 521 * effect, since this does not affect the values seen by the guest. 522 * 523 * However, it could cause problems down the line for QEMU, 524 * so provide a minimal v8.0 default. 525 * 526 * ??? Could read MIDR and use knowledge from cpu64.c. 527 * ??? Could map a page of memory into our temp guest and 528 * run the tiniest of hand-crafted kernels to extract 529 * the values seen by the guest. 530 * ??? Either of these sounds like too much effort just 531 * to work around running a modern host kernel. 532 */ 533 ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ 534 err = 0; 535 } else { 536 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, 537 ARM64_SYS_REG(3, 0, 0, 4, 1)); 538 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, 539 ARM64_SYS_REG(3, 0, 0, 6, 0)); 540 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, 541 ARM64_SYS_REG(3, 0, 0, 6, 1)); 542 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, 543 ARM64_SYS_REG(3, 0, 0, 7, 0)); 544 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, 545 ARM64_SYS_REG(3, 0, 0, 7, 1)); 546 547 /* 548 * Note that if AArch32 support is not present in the host, 549 * the AArch32 sysregs are present to be read, but will 550 * return UNKNOWN values. This is neither better nor worse 551 * than skipping the reads and leaving 0, as we must avoid 552 * considering the values in every case. 553 */ 554 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, 555 ARM64_SYS_REG(3, 0, 0, 2, 0)); 556 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, 557 ARM64_SYS_REG(3, 0, 0, 2, 1)); 558 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, 559 ARM64_SYS_REG(3, 0, 0, 2, 2)); 560 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, 561 ARM64_SYS_REG(3, 0, 0, 2, 3)); 562 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, 563 ARM64_SYS_REG(3, 0, 0, 2, 4)); 564 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, 565 ARM64_SYS_REG(3, 0, 0, 2, 5)); 566 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, 567 ARM64_SYS_REG(3, 0, 0, 2, 7)); 568 569 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, 570 ARM64_SYS_REG(3, 0, 0, 3, 0)); 571 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, 572 ARM64_SYS_REG(3, 0, 0, 3, 1)); 573 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, 574 ARM64_SYS_REG(3, 0, 0, 3, 2)); 575 } 576 577 kvm_arm_destroy_scratch_host_vcpu(fdarray); 578 579 if (err < 0) { 580 return false; 581 } 582 583 /* We can assume any KVM supporting CPU is at least a v8 584 * with VFPv4+Neon; this in turn implies most of the other 585 * feature bits. 586 */ 587 set_feature(&features, ARM_FEATURE_V8); 588 set_feature(&features, ARM_FEATURE_VFP4); 589 set_feature(&features, ARM_FEATURE_NEON); 590 set_feature(&features, ARM_FEATURE_AARCH64); 591 set_feature(&features, ARM_FEATURE_PMU); 592 593 ahcf->features = features; 594 595 return true; 596 } 597 598 bool kvm_arm_aarch32_supported(CPUState *cpu) 599 { 600 KVMState *s = KVM_STATE(current_machine->accelerator); 601 602 return kvm_check_extension(s, KVM_CAP_ARM_EL1_32BIT); 603 } 604 605 #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 606 607 int kvm_arch_init_vcpu(CPUState *cs) 608 { 609 int ret; 610 uint64_t mpidr; 611 ARMCPU *cpu = ARM_CPU(cs); 612 CPUARMState *env = &cpu->env; 613 614 if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || 615 !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { 616 fprintf(stderr, "KVM is not supported for this guest CPU type\n"); 617 return -EINVAL; 618 } 619 620 /* Determine init features for this CPU */ 621 memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); 622 if (cpu->start_powered_off) { 623 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; 624 } 625 if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { 626 cpu->psci_version = 2; 627 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; 628 } 629 if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 630 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; 631 } 632 if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { 633 cpu->has_pmu = false; 634 } 635 if (cpu->has_pmu) { 636 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; 637 } else { 638 unset_feature(&env->features, ARM_FEATURE_PMU); 639 } 640 641 /* Do KVM_ARM_VCPU_INIT ioctl */ 642 ret = kvm_arm_vcpu_init(cs); 643 if (ret) { 644 return ret; 645 } 646 647 /* 648 * When KVM is in use, PSCI is emulated in-kernel and not by qemu. 649 * Currently KVM has its own idea about MPIDR assignment, so we 650 * override our defaults with what we get from KVM. 651 */ 652 ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); 653 if (ret) { 654 return ret; 655 } 656 cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK; 657 658 kvm_arm_init_debug(cs); 659 660 /* Check whether user space can specify guest syndrome value */ 661 kvm_arm_init_serror_injection(cs); 662 663 return kvm_arm_init_cpreg_list(cpu); 664 } 665 666 int kvm_arch_destroy_vcpu(CPUState *cs) 667 { 668 return 0; 669 } 670 671 bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) 672 { 673 /* Return true if the regidx is a register we should synchronize 674 * via the cpreg_tuples array (ie is not a core reg we sync by 675 * hand in kvm_arch_get/put_registers()) 676 */ 677 switch (regidx & KVM_REG_ARM_COPROC_MASK) { 678 case KVM_REG_ARM_CORE: 679 return false; 680 default: 681 return true; 682 } 683 } 684 685 typedef struct CPRegStateLevel { 686 uint64_t regidx; 687 int level; 688 } CPRegStateLevel; 689 690 /* All system registers not listed in the following table are assumed to be 691 * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less 692 * often, you must add it to this table with a state of either 693 * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. 694 */ 695 static const CPRegStateLevel non_runtime_cpregs[] = { 696 { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE }, 697 }; 698 699 int kvm_arm_cpreg_level(uint64_t regidx) 700 { 701 int i; 702 703 for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) { 704 const CPRegStateLevel *l = &non_runtime_cpregs[i]; 705 if (l->regidx == regidx) { 706 return l->level; 707 } 708 } 709 710 return KVM_PUT_RUNTIME_STATE; 711 } 712 713 #define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ 714 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 715 716 #define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ 717 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 718 719 #define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ 720 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 721 722 static int kvm_arch_put_fpsimd(CPUState *cs) 723 { 724 ARMCPU *cpu = ARM_CPU(cs); 725 CPUARMState *env = &cpu->env; 726 struct kvm_one_reg reg; 727 uint32_t fpr; 728 int i, ret; 729 730 for (i = 0; i < 32; i++) { 731 uint64_t *q = aa64_vfp_qreg(env, i); 732 #ifdef HOST_WORDS_BIGENDIAN 733 uint64_t fp_val[2] = { q[1], q[0] }; 734 reg.addr = (uintptr_t)fp_val; 735 #else 736 reg.addr = (uintptr_t)q; 737 #endif 738 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 739 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 740 if (ret) { 741 return ret; 742 } 743 } 744 745 reg.addr = (uintptr_t)(&fpr); 746 fpr = vfp_get_fpsr(env); 747 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 748 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 749 if (ret) { 750 return ret; 751 } 752 753 reg.addr = (uintptr_t)(&fpr); 754 fpr = vfp_get_fpcr(env); 755 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 756 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 757 if (ret) { 758 return ret; 759 } 760 761 return 0; 762 } 763 764 int kvm_arch_put_registers(CPUState *cs, int level) 765 { 766 struct kvm_one_reg reg; 767 uint64_t val; 768 int i, ret; 769 unsigned int el; 770 771 ARMCPU *cpu = ARM_CPU(cs); 772 CPUARMState *env = &cpu->env; 773 774 /* If we are in AArch32 mode then we need to copy the AArch32 regs to the 775 * AArch64 registers before pushing them out to 64-bit KVM. 776 */ 777 if (!is_a64(env)) { 778 aarch64_sync_32_to_64(env); 779 } 780 781 for (i = 0; i < 31; i++) { 782 reg.id = AARCH64_CORE_REG(regs.regs[i]); 783 reg.addr = (uintptr_t) &env->xregs[i]; 784 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 785 if (ret) { 786 return ret; 787 } 788 } 789 790 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 791 * QEMU side we keep the current SP in xregs[31] as well. 792 */ 793 aarch64_save_sp(env, 1); 794 795 reg.id = AARCH64_CORE_REG(regs.sp); 796 reg.addr = (uintptr_t) &env->sp_el[0]; 797 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 798 if (ret) { 799 return ret; 800 } 801 802 reg.id = AARCH64_CORE_REG(sp_el1); 803 reg.addr = (uintptr_t) &env->sp_el[1]; 804 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 805 if (ret) { 806 return ret; 807 } 808 809 /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ 810 if (is_a64(env)) { 811 val = pstate_read(env); 812 } else { 813 val = cpsr_read(env); 814 } 815 reg.id = AARCH64_CORE_REG(regs.pstate); 816 reg.addr = (uintptr_t) &val; 817 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 818 if (ret) { 819 return ret; 820 } 821 822 reg.id = AARCH64_CORE_REG(regs.pc); 823 reg.addr = (uintptr_t) &env->pc; 824 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 825 if (ret) { 826 return ret; 827 } 828 829 reg.id = AARCH64_CORE_REG(elr_el1); 830 reg.addr = (uintptr_t) &env->elr_el[1]; 831 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 832 if (ret) { 833 return ret; 834 } 835 836 /* Saved Program State Registers 837 * 838 * Before we restore from the banked_spsr[] array we need to 839 * ensure that any modifications to env->spsr are correctly 840 * reflected in the banks. 841 */ 842 el = arm_current_el(env); 843 if (el > 0 && !is_a64(env)) { 844 i = bank_number(env->uncached_cpsr & CPSR_M); 845 env->banked_spsr[i] = env->spsr; 846 } 847 848 /* KVM 0-4 map to QEMU banks 1-5 */ 849 for (i = 0; i < KVM_NR_SPSR; i++) { 850 reg.id = AARCH64_CORE_REG(spsr[i]); 851 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 852 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 853 if (ret) { 854 return ret; 855 } 856 } 857 858 ret = kvm_arch_put_fpsimd(cs); 859 if (ret) { 860 return ret; 861 } 862 863 ret = kvm_put_vcpu_events(cpu); 864 if (ret) { 865 return ret; 866 } 867 868 write_cpustate_to_list(cpu, true); 869 870 if (!write_list_to_kvmstate(cpu, level)) { 871 return -EINVAL; 872 } 873 874 kvm_arm_sync_mpstate_to_kvm(cpu); 875 876 return ret; 877 } 878 879 static int kvm_arch_get_fpsimd(CPUState *cs) 880 { 881 ARMCPU *cpu = ARM_CPU(cs); 882 CPUARMState *env = &cpu->env; 883 struct kvm_one_reg reg; 884 uint32_t fpr; 885 int i, ret; 886 887 for (i = 0; i < 32; i++) { 888 uint64_t *q = aa64_vfp_qreg(env, i); 889 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 890 reg.addr = (uintptr_t)q; 891 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 892 if (ret) { 893 return ret; 894 } else { 895 #ifdef HOST_WORDS_BIGENDIAN 896 uint64_t t; 897 t = q[0], q[0] = q[1], q[1] = t; 898 #endif 899 } 900 } 901 902 reg.addr = (uintptr_t)(&fpr); 903 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 904 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 905 if (ret) { 906 return ret; 907 } 908 vfp_set_fpsr(env, fpr); 909 910 reg.addr = (uintptr_t)(&fpr); 911 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 912 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 913 if (ret) { 914 return ret; 915 } 916 vfp_set_fpcr(env, fpr); 917 918 return 0; 919 } 920 921 int kvm_arch_get_registers(CPUState *cs) 922 { 923 struct kvm_one_reg reg; 924 uint64_t val; 925 unsigned int el; 926 int i, ret; 927 928 ARMCPU *cpu = ARM_CPU(cs); 929 CPUARMState *env = &cpu->env; 930 931 for (i = 0; i < 31; i++) { 932 reg.id = AARCH64_CORE_REG(regs.regs[i]); 933 reg.addr = (uintptr_t) &env->xregs[i]; 934 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 935 if (ret) { 936 return ret; 937 } 938 } 939 940 reg.id = AARCH64_CORE_REG(regs.sp); 941 reg.addr = (uintptr_t) &env->sp_el[0]; 942 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 943 if (ret) { 944 return ret; 945 } 946 947 reg.id = AARCH64_CORE_REG(sp_el1); 948 reg.addr = (uintptr_t) &env->sp_el[1]; 949 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 950 if (ret) { 951 return ret; 952 } 953 954 reg.id = AARCH64_CORE_REG(regs.pstate); 955 reg.addr = (uintptr_t) &val; 956 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 957 if (ret) { 958 return ret; 959 } 960 961 env->aarch64 = ((val & PSTATE_nRW) == 0); 962 if (is_a64(env)) { 963 pstate_write(env, val); 964 } else { 965 cpsr_write(env, val, 0xffffffff, CPSRWriteRaw); 966 } 967 968 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 969 * QEMU side we keep the current SP in xregs[31] as well. 970 */ 971 aarch64_restore_sp(env, 1); 972 973 reg.id = AARCH64_CORE_REG(regs.pc); 974 reg.addr = (uintptr_t) &env->pc; 975 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 976 if (ret) { 977 return ret; 978 } 979 980 /* If we are in AArch32 mode then we need to sync the AArch32 regs with the 981 * incoming AArch64 regs received from 64-bit KVM. 982 * We must perform this after all of the registers have been acquired from 983 * the kernel. 984 */ 985 if (!is_a64(env)) { 986 aarch64_sync_64_to_32(env); 987 } 988 989 reg.id = AARCH64_CORE_REG(elr_el1); 990 reg.addr = (uintptr_t) &env->elr_el[1]; 991 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 992 if (ret) { 993 return ret; 994 } 995 996 /* Fetch the SPSR registers 997 * 998 * KVM SPSRs 0-4 map to QEMU banks 1-5 999 */ 1000 for (i = 0; i < KVM_NR_SPSR; i++) { 1001 reg.id = AARCH64_CORE_REG(spsr[i]); 1002 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 1003 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1004 if (ret) { 1005 return ret; 1006 } 1007 } 1008 1009 el = arm_current_el(env); 1010 if (el > 0 && !is_a64(env)) { 1011 i = bank_number(env->uncached_cpsr & CPSR_M); 1012 env->spsr = env->banked_spsr[i]; 1013 } 1014 1015 ret = kvm_arch_get_fpsimd(cs); 1016 if (ret) { 1017 return ret; 1018 } 1019 1020 ret = kvm_get_vcpu_events(cpu); 1021 if (ret) { 1022 return ret; 1023 } 1024 1025 if (!write_kvmstate_to_list(cpu)) { 1026 return -EINVAL; 1027 } 1028 /* Note that it's OK to have registers which aren't in CPUState, 1029 * so we can ignore a failure return here. 1030 */ 1031 write_list_to_cpustate(cpu); 1032 1033 kvm_arm_sync_mpstate_to_qemu(cpu); 1034 1035 /* TODO: other registers */ 1036 return ret; 1037 } 1038 1039 /* C6.6.29 BRK instruction */ 1040 static const uint32_t brk_insn = 0xd4200000; 1041 1042 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1043 { 1044 if (have_guest_debug) { 1045 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || 1046 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { 1047 return -EINVAL; 1048 } 1049 return 0; 1050 } else { 1051 error_report("guest debug not supported on this kernel"); 1052 return -EINVAL; 1053 } 1054 } 1055 1056 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1057 { 1058 static uint32_t brk; 1059 1060 if (have_guest_debug) { 1061 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || 1062 brk != brk_insn || 1063 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { 1064 return -EINVAL; 1065 } 1066 return 0; 1067 } else { 1068 error_report("guest debug not supported on this kernel"); 1069 return -EINVAL; 1070 } 1071 } 1072 1073 /* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register 1074 * 1075 * To minimise translating between kernel and user-space the kernel 1076 * ABI just provides user-space with the full exception syndrome 1077 * register value to be decoded in QEMU. 1078 */ 1079 1080 bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) 1081 { 1082 int hsr_ec = syn_get_ec(debug_exit->hsr); 1083 ARMCPU *cpu = ARM_CPU(cs); 1084 CPUClass *cc = CPU_GET_CLASS(cs); 1085 CPUARMState *env = &cpu->env; 1086 1087 /* Ensure PC is synchronised */ 1088 kvm_cpu_synchronize_state(cs); 1089 1090 switch (hsr_ec) { 1091 case EC_SOFTWARESTEP: 1092 if (cs->singlestep_enabled) { 1093 return true; 1094 } else { 1095 /* 1096 * The kernel should have suppressed the guest's ability to 1097 * single step at this point so something has gone wrong. 1098 */ 1099 error_report("%s: guest single-step while debugging unsupported" 1100 " (%"PRIx64", %"PRIx32")", 1101 __func__, env->pc, debug_exit->hsr); 1102 return false; 1103 } 1104 break; 1105 case EC_AA64_BKPT: 1106 if (kvm_find_sw_breakpoint(cs, env->pc)) { 1107 return true; 1108 } 1109 break; 1110 case EC_BREAKPOINT: 1111 if (find_hw_breakpoint(cs, env->pc)) { 1112 return true; 1113 } 1114 break; 1115 case EC_WATCHPOINT: 1116 { 1117 CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far); 1118 if (wp) { 1119 cs->watchpoint_hit = wp; 1120 return true; 1121 } 1122 break; 1123 } 1124 default: 1125 error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", 1126 __func__, debug_exit->hsr, env->pc); 1127 } 1128 1129 /* If we are not handling the debug exception it must belong to 1130 * the guest. Let's re-use the existing TCG interrupt code to set 1131 * everything up properly. 1132 */ 1133 cs->exception_index = EXCP_BKPT; 1134 env->exception.syndrome = debug_exit->hsr; 1135 env->exception.vaddress = debug_exit->far; 1136 env->exception.target_el = 1; 1137 qemu_mutex_lock_iothread(); 1138 cc->do_interrupt(cs); 1139 qemu_mutex_unlock_iothread(); 1140 1141 return false; 1142 } 1143