1 /* 2 * ARM implementation of KVM hooks, 64 bit specific code 3 * 4 * Copyright Mian-M. Hamayun 2013, Virtual Open Systems 5 * Copyright Alex Bennée 2014, Linaro 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <sys/ioctl.h> 14 #include <sys/ptrace.h> 15 16 #include <linux/elf.h> 17 #include <linux/kvm.h> 18 19 #include "qemu-common.h" 20 #include "cpu.h" 21 #include "qemu/timer.h" 22 #include "qemu/error-report.h" 23 #include "qemu/host-utils.h" 24 #include "exec/gdbstub.h" 25 #include "sysemu/sysemu.h" 26 #include "sysemu/kvm.h" 27 #include "kvm_arm.h" 28 #include "internals.h" 29 #include "hw/arm/arm.h" 30 31 static bool have_guest_debug; 32 33 /* 34 * Although the ARM implementation of hardware assisted debugging 35 * allows for different breakpoints per-core, the current GDB 36 * interface treats them as a global pool of registers (which seems to 37 * be the case for x86, ppc and s390). As a result we store one copy 38 * of registers which is used for all active cores. 39 * 40 * Write access is serialised by virtue of the GDB protocol which 41 * updates things. Read access (i.e. when the values are copied to the 42 * vCPU) is also gated by GDB's run control. 43 * 44 * This is not unreasonable as most of the time debugging kernels you 45 * never know which core will eventually execute your function. 46 */ 47 48 typedef struct { 49 uint64_t bcr; 50 uint64_t bvr; 51 } HWBreakpoint; 52 53 /* The watchpoint registers can cover more area than the requested 54 * watchpoint so we need to store the additional information 55 * somewhere. We also need to supply a CPUWatchpoint to the GDB stub 56 * when the watchpoint is hit. 57 */ 58 typedef struct { 59 uint64_t wcr; 60 uint64_t wvr; 61 CPUWatchpoint details; 62 } HWWatchpoint; 63 64 /* Maximum and current break/watch point counts */ 65 int max_hw_bps, max_hw_wps; 66 GArray *hw_breakpoints, *hw_watchpoints; 67 68 #define cur_hw_wps (hw_watchpoints->len) 69 #define cur_hw_bps (hw_breakpoints->len) 70 #define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) 71 #define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) 72 73 /** 74 * kvm_arm_init_debug() - check for guest debug capabilities 75 * @cs: CPUState 76 * 77 * kvm_check_extension returns the number of debug registers we have 78 * or 0 if we have none. 79 * 80 */ 81 static void kvm_arm_init_debug(CPUState *cs) 82 { 83 have_guest_debug = kvm_check_extension(cs->kvm_state, 84 KVM_CAP_SET_GUEST_DEBUG); 85 86 max_hw_wps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_WPS); 87 hw_watchpoints = g_array_sized_new(true, true, 88 sizeof(HWWatchpoint), max_hw_wps); 89 90 max_hw_bps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_BPS); 91 hw_breakpoints = g_array_sized_new(true, true, 92 sizeof(HWBreakpoint), max_hw_bps); 93 return; 94 } 95 96 /** 97 * insert_hw_breakpoint() 98 * @addr: address of breakpoint 99 * 100 * See ARM ARM D2.9.1 for details but here we are only going to create 101 * simple un-linked breakpoints (i.e. we don't chain breakpoints 102 * together to match address and context or vmid). The hardware is 103 * capable of fancier matching but that will require exposing that 104 * fanciness to GDB's interface 105 * 106 * DBGBCR<n>_EL1, Debug Breakpoint Control Registers 107 * 108 * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0 109 * +------+------+-------+-----+----+------+-----+------+-----+---+ 110 * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E | 111 * +------+------+-------+-----+----+------+-----+------+-----+---+ 112 * 113 * BT: Breakpoint type (0 = unlinked address match) 114 * LBN: Linked BP number (0 = unused) 115 * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12) 116 * BAS: Byte Address Select (RES1 for AArch64) 117 * E: Enable bit 118 * 119 * DBGBVR<n>_EL1, Debug Breakpoint Value Registers 120 * 121 * 63 53 52 49 48 2 1 0 122 * +------+-----------+----------+-----+ 123 * | RESS | VA[52:49] | VA[48:2] | 0 0 | 124 * +------+-----------+----------+-----+ 125 * 126 * Depending on the addressing mode bits the top bits of the register 127 * are a sign extension of the highest applicable VA bit. Some 128 * versions of GDB don't do it correctly so we ensure they are correct 129 * here so future PC comparisons will work properly. 130 */ 131 132 static int insert_hw_breakpoint(target_ulong addr) 133 { 134 HWBreakpoint brk = { 135 .bcr = 0x1, /* BCR E=1, enable */ 136 .bvr = sextract64(addr, 0, 53) 137 }; 138 139 if (cur_hw_bps >= max_hw_bps) { 140 return -ENOBUFS; 141 } 142 143 brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */ 144 brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */ 145 146 g_array_append_val(hw_breakpoints, brk); 147 148 return 0; 149 } 150 151 /** 152 * delete_hw_breakpoint() 153 * @pc: address of breakpoint 154 * 155 * Delete a breakpoint and shuffle any above down 156 */ 157 158 static int delete_hw_breakpoint(target_ulong pc) 159 { 160 int i; 161 for (i = 0; i < hw_breakpoints->len; i++) { 162 HWBreakpoint *brk = get_hw_bp(i); 163 if (brk->bvr == pc) { 164 g_array_remove_index(hw_breakpoints, i); 165 return 0; 166 } 167 } 168 return -ENOENT; 169 } 170 171 /** 172 * insert_hw_watchpoint() 173 * @addr: address of watch point 174 * @len: size of area 175 * @type: type of watch point 176 * 177 * See ARM ARM D2.10. As with the breakpoints we can do some advanced 178 * stuff if we want to. The watch points can be linked with the break 179 * points above to make them context aware. However for simplicity 180 * currently we only deal with simple read/write watch points. 181 * 182 * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers 183 * 184 * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0 185 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 186 * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E | 187 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 188 * 189 * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes)) 190 * WT: 0 - unlinked, 1 - linked (not currently used) 191 * LBN: Linked BP number (not currently used) 192 * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11) 193 * BAS: Byte Address Select 194 * LSC: Load/Store control (01: load, 10: store, 11: both) 195 * E: Enable 196 * 197 * The bottom 2 bits of the value register are masked. Therefore to 198 * break on any sizes smaller than an unaligned word you need to set 199 * MASK=0, BAS=bit per byte in question. For larger regions (^2) you 200 * need to ensure you mask the address as required and set BAS=0xff 201 */ 202 203 static int insert_hw_watchpoint(target_ulong addr, 204 target_ulong len, int type) 205 { 206 HWWatchpoint wp = { 207 .wcr = 1, /* E=1, enable */ 208 .wvr = addr & (~0x7ULL), 209 .details = { .vaddr = addr, .len = len } 210 }; 211 212 if (cur_hw_wps >= max_hw_wps) { 213 return -ENOBUFS; 214 } 215 216 /* 217 * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state, 218 * valid whether EL3 is implemented or not 219 */ 220 wp.wcr = deposit32(wp.wcr, 1, 2, 3); 221 222 switch (type) { 223 case GDB_WATCHPOINT_READ: 224 wp.wcr = deposit32(wp.wcr, 3, 2, 1); 225 wp.details.flags = BP_MEM_READ; 226 break; 227 case GDB_WATCHPOINT_WRITE: 228 wp.wcr = deposit32(wp.wcr, 3, 2, 2); 229 wp.details.flags = BP_MEM_WRITE; 230 break; 231 case GDB_WATCHPOINT_ACCESS: 232 wp.wcr = deposit32(wp.wcr, 3, 2, 3); 233 wp.details.flags = BP_MEM_ACCESS; 234 break; 235 default: 236 g_assert_not_reached(); 237 break; 238 } 239 if (len <= 8) { 240 /* we align the address and set the bits in BAS */ 241 int off = addr & 0x7; 242 int bas = (1 << len) - 1; 243 244 wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas); 245 } else { 246 /* For ranges above 8 bytes we need to be a power of 2 */ 247 if (is_power_of_2(len)) { 248 int bits = ctz64(len); 249 250 wp.wvr &= ~((1 << bits) - 1); 251 wp.wcr = deposit32(wp.wcr, 24, 4, bits); 252 wp.wcr = deposit32(wp.wcr, 5, 8, 0xff); 253 } else { 254 return -ENOBUFS; 255 } 256 } 257 258 g_array_append_val(hw_watchpoints, wp); 259 return 0; 260 } 261 262 263 static bool check_watchpoint_in_range(int i, target_ulong addr) 264 { 265 HWWatchpoint *wp = get_hw_wp(i); 266 uint64_t addr_top, addr_bottom = wp->wvr; 267 int bas = extract32(wp->wcr, 5, 8); 268 int mask = extract32(wp->wcr, 24, 4); 269 270 if (mask) { 271 addr_top = addr_bottom + (1 << mask); 272 } else { 273 /* BAS must be contiguous but can offset against the base 274 * address in DBGWVR */ 275 addr_bottom = addr_bottom + ctz32(bas); 276 addr_top = addr_bottom + clo32(bas); 277 } 278 279 if (addr >= addr_bottom && addr <= addr_top) { 280 return true; 281 } 282 283 return false; 284 } 285 286 /** 287 * delete_hw_watchpoint() 288 * @addr: address of breakpoint 289 * 290 * Delete a breakpoint and shuffle any above down 291 */ 292 293 static int delete_hw_watchpoint(target_ulong addr, 294 target_ulong len, int type) 295 { 296 int i; 297 for (i = 0; i < cur_hw_wps; i++) { 298 if (check_watchpoint_in_range(i, addr)) { 299 g_array_remove_index(hw_watchpoints, i); 300 return 0; 301 } 302 } 303 return -ENOENT; 304 } 305 306 307 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 308 target_ulong len, int type) 309 { 310 switch (type) { 311 case GDB_BREAKPOINT_HW: 312 return insert_hw_breakpoint(addr); 313 break; 314 case GDB_WATCHPOINT_READ: 315 case GDB_WATCHPOINT_WRITE: 316 case GDB_WATCHPOINT_ACCESS: 317 return insert_hw_watchpoint(addr, len, type); 318 default: 319 return -ENOSYS; 320 } 321 } 322 323 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 324 target_ulong len, int type) 325 { 326 switch (type) { 327 case GDB_BREAKPOINT_HW: 328 return delete_hw_breakpoint(addr); 329 break; 330 case GDB_WATCHPOINT_READ: 331 case GDB_WATCHPOINT_WRITE: 332 case GDB_WATCHPOINT_ACCESS: 333 return delete_hw_watchpoint(addr, len, type); 334 default: 335 return -ENOSYS; 336 } 337 } 338 339 340 void kvm_arch_remove_all_hw_breakpoints(void) 341 { 342 if (cur_hw_wps > 0) { 343 g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); 344 } 345 if (cur_hw_bps > 0) { 346 g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); 347 } 348 } 349 350 void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr) 351 { 352 int i; 353 memset(ptr, 0, sizeof(struct kvm_guest_debug_arch)); 354 355 for (i = 0; i < max_hw_wps; i++) { 356 HWWatchpoint *wp = get_hw_wp(i); 357 ptr->dbg_wcr[i] = wp->wcr; 358 ptr->dbg_wvr[i] = wp->wvr; 359 } 360 for (i = 0; i < max_hw_bps; i++) { 361 HWBreakpoint *bp = get_hw_bp(i); 362 ptr->dbg_bcr[i] = bp->bcr; 363 ptr->dbg_bvr[i] = bp->bvr; 364 } 365 } 366 367 bool kvm_arm_hw_debug_active(CPUState *cs) 368 { 369 return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); 370 } 371 372 static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) 373 { 374 int i; 375 376 for (i = 0; i < cur_hw_bps; i++) { 377 HWBreakpoint *bp = get_hw_bp(i); 378 if (bp->bvr == pc) { 379 return true; 380 } 381 } 382 return false; 383 } 384 385 static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) 386 { 387 int i; 388 389 for (i = 0; i < cur_hw_wps; i++) { 390 if (check_watchpoint_in_range(i, addr)) { 391 return &get_hw_wp(i)->details; 392 } 393 } 394 return NULL; 395 } 396 397 static bool kvm_arm_pmu_set_attr(CPUState *cs, struct kvm_device_attr *attr) 398 { 399 int err; 400 401 err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr); 402 if (err != 0) { 403 error_report("PMU: KVM_HAS_DEVICE_ATTR: %s", strerror(-err)); 404 return false; 405 } 406 407 err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr); 408 if (err != 0) { 409 error_report("PMU: KVM_SET_DEVICE_ATTR: %s", strerror(-err)); 410 return false; 411 } 412 413 return true; 414 } 415 416 void kvm_arm_pmu_init(CPUState *cs) 417 { 418 struct kvm_device_attr attr = { 419 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 420 .attr = KVM_ARM_VCPU_PMU_V3_INIT, 421 }; 422 423 if (!ARM_CPU(cs)->has_pmu) { 424 return; 425 } 426 if (!kvm_arm_pmu_set_attr(cs, &attr)) { 427 error_report("failed to init PMU"); 428 abort(); 429 } 430 } 431 432 void kvm_arm_pmu_set_irq(CPUState *cs, int irq) 433 { 434 struct kvm_device_attr attr = { 435 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 436 .addr = (intptr_t)&irq, 437 .attr = KVM_ARM_VCPU_PMU_V3_IRQ, 438 }; 439 440 if (!ARM_CPU(cs)->has_pmu) { 441 return; 442 } 443 if (!kvm_arm_pmu_set_attr(cs, &attr)) { 444 error_report("failed to set irq for PMU"); 445 abort(); 446 } 447 } 448 449 static inline void set_feature(uint64_t *features, int feature) 450 { 451 *features |= 1ULL << feature; 452 } 453 454 static inline void unset_feature(uint64_t *features, int feature) 455 { 456 *features &= ~(1ULL << feature); 457 } 458 459 static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) 460 { 461 uint64_t ret; 462 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; 463 int err; 464 465 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 466 err = ioctl(fd, KVM_GET_ONE_REG, &idreg); 467 if (err < 0) { 468 return -1; 469 } 470 *pret = ret; 471 return 0; 472 } 473 474 static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id) 475 { 476 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; 477 478 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 479 return ioctl(fd, KVM_GET_ONE_REG, &idreg); 480 } 481 482 bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) 483 { 484 /* Identify the feature bits corresponding to the host CPU, and 485 * fill out the ARMHostCPUClass fields accordingly. To do this 486 * we have to create a scratch VM, create a single CPU inside it, 487 * and then query that CPU for the relevant ID registers. 488 */ 489 int fdarray[3]; 490 uint64_t features = 0; 491 int err; 492 493 /* Old kernels may not know about the PREFERRED_TARGET ioctl: however 494 * we know these will only support creating one kind of guest CPU, 495 * which is its preferred CPU type. Fortunately these old kernels 496 * support only a very limited number of CPUs. 497 */ 498 static const uint32_t cpus_to_try[] = { 499 KVM_ARM_TARGET_AEM_V8, 500 KVM_ARM_TARGET_FOUNDATION_V8, 501 KVM_ARM_TARGET_CORTEX_A57, 502 QEMU_KVM_ARM_TARGET_NONE 503 }; 504 struct kvm_vcpu_init init; 505 506 if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { 507 return false; 508 } 509 510 ahcf->target = init.target; 511 ahcf->dtb_compatible = "arm,arm-v8"; 512 513 err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, 514 ARM64_SYS_REG(3, 0, 0, 4, 0)); 515 if (unlikely(err < 0)) { 516 /* 517 * Before v4.15, the kernel only exposed a limited number of system 518 * registers, not including any of the interesting AArch64 ID regs. 519 * For the most part we could leave these fields as zero with minimal 520 * effect, since this does not affect the values seen by the guest. 521 * 522 * However, it could cause problems down the line for QEMU, 523 * so provide a minimal v8.0 default. 524 * 525 * ??? Could read MIDR and use knowledge from cpu64.c. 526 * ??? Could map a page of memory into our temp guest and 527 * run the tiniest of hand-crafted kernels to extract 528 * the values seen by the guest. 529 * ??? Either of these sounds like too much effort just 530 * to work around running a modern host kernel. 531 */ 532 ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ 533 err = 0; 534 } else { 535 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, 536 ARM64_SYS_REG(3, 0, 0, 4, 1)); 537 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, 538 ARM64_SYS_REG(3, 0, 0, 6, 0)); 539 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, 540 ARM64_SYS_REG(3, 0, 0, 6, 1)); 541 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, 542 ARM64_SYS_REG(3, 0, 0, 7, 0)); 543 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, 544 ARM64_SYS_REG(3, 0, 0, 7, 1)); 545 546 /* 547 * Note that if AArch32 support is not present in the host, 548 * the AArch32 sysregs are present to be read, but will 549 * return UNKNOWN values. This is neither better nor worse 550 * than skipping the reads and leaving 0, as we must avoid 551 * considering the values in every case. 552 */ 553 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, 554 ARM64_SYS_REG(3, 0, 0, 2, 0)); 555 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, 556 ARM64_SYS_REG(3, 0, 0, 2, 1)); 557 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, 558 ARM64_SYS_REG(3, 0, 0, 2, 2)); 559 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, 560 ARM64_SYS_REG(3, 0, 0, 2, 3)); 561 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, 562 ARM64_SYS_REG(3, 0, 0, 2, 4)); 563 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, 564 ARM64_SYS_REG(3, 0, 0, 2, 5)); 565 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, 566 ARM64_SYS_REG(3, 0, 0, 2, 7)); 567 568 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, 569 ARM64_SYS_REG(3, 0, 0, 3, 0)); 570 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, 571 ARM64_SYS_REG(3, 0, 0, 3, 1)); 572 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, 573 ARM64_SYS_REG(3, 0, 0, 3, 2)); 574 } 575 576 kvm_arm_destroy_scratch_host_vcpu(fdarray); 577 578 if (err < 0) { 579 return false; 580 } 581 582 /* We can assume any KVM supporting CPU is at least a v8 583 * with VFPv4+Neon; this in turn implies most of the other 584 * feature bits. 585 */ 586 set_feature(&features, ARM_FEATURE_V8); 587 set_feature(&features, ARM_FEATURE_VFP4); 588 set_feature(&features, ARM_FEATURE_NEON); 589 set_feature(&features, ARM_FEATURE_AARCH64); 590 set_feature(&features, ARM_FEATURE_PMU); 591 592 ahcf->features = features; 593 594 return true; 595 } 596 597 #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 598 599 int kvm_arch_init_vcpu(CPUState *cs) 600 { 601 int ret; 602 uint64_t mpidr; 603 ARMCPU *cpu = ARM_CPU(cs); 604 CPUARMState *env = &cpu->env; 605 606 if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || 607 !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { 608 fprintf(stderr, "KVM is not supported for this guest CPU type\n"); 609 return -EINVAL; 610 } 611 612 /* Determine init features for this CPU */ 613 memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); 614 if (cpu->start_powered_off) { 615 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; 616 } 617 if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { 618 cpu->psci_version = 2; 619 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; 620 } 621 if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 622 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; 623 } 624 if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { 625 cpu->has_pmu = false; 626 } 627 if (cpu->has_pmu) { 628 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; 629 } else { 630 unset_feature(&env->features, ARM_FEATURE_PMU); 631 } 632 633 /* Do KVM_ARM_VCPU_INIT ioctl */ 634 ret = kvm_arm_vcpu_init(cs); 635 if (ret) { 636 return ret; 637 } 638 639 /* 640 * When KVM is in use, PSCI is emulated in-kernel and not by qemu. 641 * Currently KVM has its own idea about MPIDR assignment, so we 642 * override our defaults with what we get from KVM. 643 */ 644 ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); 645 if (ret) { 646 return ret; 647 } 648 cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK; 649 650 kvm_arm_init_debug(cs); 651 652 /* Check whether user space can specify guest syndrome value */ 653 kvm_arm_init_serror_injection(cs); 654 655 return kvm_arm_init_cpreg_list(cpu); 656 } 657 658 bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) 659 { 660 /* Return true if the regidx is a register we should synchronize 661 * via the cpreg_tuples array (ie is not a core reg we sync by 662 * hand in kvm_arch_get/put_registers()) 663 */ 664 switch (regidx & KVM_REG_ARM_COPROC_MASK) { 665 case KVM_REG_ARM_CORE: 666 return false; 667 default: 668 return true; 669 } 670 } 671 672 typedef struct CPRegStateLevel { 673 uint64_t regidx; 674 int level; 675 } CPRegStateLevel; 676 677 /* All system registers not listed in the following table are assumed to be 678 * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less 679 * often, you must add it to this table with a state of either 680 * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. 681 */ 682 static const CPRegStateLevel non_runtime_cpregs[] = { 683 { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE }, 684 }; 685 686 int kvm_arm_cpreg_level(uint64_t regidx) 687 { 688 int i; 689 690 for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) { 691 const CPRegStateLevel *l = &non_runtime_cpregs[i]; 692 if (l->regidx == regidx) { 693 return l->level; 694 } 695 } 696 697 return KVM_PUT_RUNTIME_STATE; 698 } 699 700 #define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ 701 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 702 703 #define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ 704 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 705 706 #define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ 707 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 708 709 int kvm_arch_put_registers(CPUState *cs, int level) 710 { 711 struct kvm_one_reg reg; 712 uint32_t fpr; 713 uint64_t val; 714 int i; 715 int ret; 716 unsigned int el; 717 718 ARMCPU *cpu = ARM_CPU(cs); 719 CPUARMState *env = &cpu->env; 720 721 /* If we are in AArch32 mode then we need to copy the AArch32 regs to the 722 * AArch64 registers before pushing them out to 64-bit KVM. 723 */ 724 if (!is_a64(env)) { 725 aarch64_sync_32_to_64(env); 726 } 727 728 for (i = 0; i < 31; i++) { 729 reg.id = AARCH64_CORE_REG(regs.regs[i]); 730 reg.addr = (uintptr_t) &env->xregs[i]; 731 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 732 if (ret) { 733 return ret; 734 } 735 } 736 737 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 738 * QEMU side we keep the current SP in xregs[31] as well. 739 */ 740 aarch64_save_sp(env, 1); 741 742 reg.id = AARCH64_CORE_REG(regs.sp); 743 reg.addr = (uintptr_t) &env->sp_el[0]; 744 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 745 if (ret) { 746 return ret; 747 } 748 749 reg.id = AARCH64_CORE_REG(sp_el1); 750 reg.addr = (uintptr_t) &env->sp_el[1]; 751 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 752 if (ret) { 753 return ret; 754 } 755 756 /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ 757 if (is_a64(env)) { 758 val = pstate_read(env); 759 } else { 760 val = cpsr_read(env); 761 } 762 reg.id = AARCH64_CORE_REG(regs.pstate); 763 reg.addr = (uintptr_t) &val; 764 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 765 if (ret) { 766 return ret; 767 } 768 769 reg.id = AARCH64_CORE_REG(regs.pc); 770 reg.addr = (uintptr_t) &env->pc; 771 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 772 if (ret) { 773 return ret; 774 } 775 776 reg.id = AARCH64_CORE_REG(elr_el1); 777 reg.addr = (uintptr_t) &env->elr_el[1]; 778 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 779 if (ret) { 780 return ret; 781 } 782 783 /* Saved Program State Registers 784 * 785 * Before we restore from the banked_spsr[] array we need to 786 * ensure that any modifications to env->spsr are correctly 787 * reflected in the banks. 788 */ 789 el = arm_current_el(env); 790 if (el > 0 && !is_a64(env)) { 791 i = bank_number(env->uncached_cpsr & CPSR_M); 792 env->banked_spsr[i] = env->spsr; 793 } 794 795 /* KVM 0-4 map to QEMU banks 1-5 */ 796 for (i = 0; i < KVM_NR_SPSR; i++) { 797 reg.id = AARCH64_CORE_REG(spsr[i]); 798 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 799 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 800 if (ret) { 801 return ret; 802 } 803 } 804 805 /* Advanced SIMD and FP registers. */ 806 for (i = 0; i < 32; i++) { 807 uint64_t *q = aa64_vfp_qreg(env, i); 808 #ifdef HOST_WORDS_BIGENDIAN 809 uint64_t fp_val[2] = { q[1], q[0] }; 810 reg.addr = (uintptr_t)fp_val; 811 #else 812 reg.addr = (uintptr_t)q; 813 #endif 814 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 815 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 816 if (ret) { 817 return ret; 818 } 819 } 820 821 reg.addr = (uintptr_t)(&fpr); 822 fpr = vfp_get_fpsr(env); 823 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 824 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 825 if (ret) { 826 return ret; 827 } 828 829 fpr = vfp_get_fpcr(env); 830 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 831 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 832 if (ret) { 833 return ret; 834 } 835 836 ret = kvm_put_vcpu_events(cpu); 837 if (ret) { 838 return ret; 839 } 840 841 if (!write_list_to_kvmstate(cpu, level)) { 842 return EINVAL; 843 } 844 845 kvm_arm_sync_mpstate_to_kvm(cpu); 846 847 return ret; 848 } 849 850 int kvm_arch_get_registers(CPUState *cs) 851 { 852 struct kvm_one_reg reg; 853 uint64_t val; 854 uint32_t fpr; 855 unsigned int el; 856 int i; 857 int ret; 858 859 ARMCPU *cpu = ARM_CPU(cs); 860 CPUARMState *env = &cpu->env; 861 862 for (i = 0; i < 31; i++) { 863 reg.id = AARCH64_CORE_REG(regs.regs[i]); 864 reg.addr = (uintptr_t) &env->xregs[i]; 865 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 866 if (ret) { 867 return ret; 868 } 869 } 870 871 reg.id = AARCH64_CORE_REG(regs.sp); 872 reg.addr = (uintptr_t) &env->sp_el[0]; 873 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 874 if (ret) { 875 return ret; 876 } 877 878 reg.id = AARCH64_CORE_REG(sp_el1); 879 reg.addr = (uintptr_t) &env->sp_el[1]; 880 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 881 if (ret) { 882 return ret; 883 } 884 885 reg.id = AARCH64_CORE_REG(regs.pstate); 886 reg.addr = (uintptr_t) &val; 887 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 888 if (ret) { 889 return ret; 890 } 891 892 env->aarch64 = ((val & PSTATE_nRW) == 0); 893 if (is_a64(env)) { 894 pstate_write(env, val); 895 } else { 896 cpsr_write(env, val, 0xffffffff, CPSRWriteRaw); 897 } 898 899 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 900 * QEMU side we keep the current SP in xregs[31] as well. 901 */ 902 aarch64_restore_sp(env, 1); 903 904 reg.id = AARCH64_CORE_REG(regs.pc); 905 reg.addr = (uintptr_t) &env->pc; 906 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 907 if (ret) { 908 return ret; 909 } 910 911 /* If we are in AArch32 mode then we need to sync the AArch32 regs with the 912 * incoming AArch64 regs received from 64-bit KVM. 913 * We must perform this after all of the registers have been acquired from 914 * the kernel. 915 */ 916 if (!is_a64(env)) { 917 aarch64_sync_64_to_32(env); 918 } 919 920 reg.id = AARCH64_CORE_REG(elr_el1); 921 reg.addr = (uintptr_t) &env->elr_el[1]; 922 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 923 if (ret) { 924 return ret; 925 } 926 927 /* Fetch the SPSR registers 928 * 929 * KVM SPSRs 0-4 map to QEMU banks 1-5 930 */ 931 for (i = 0; i < KVM_NR_SPSR; i++) { 932 reg.id = AARCH64_CORE_REG(spsr[i]); 933 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 934 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 935 if (ret) { 936 return ret; 937 } 938 } 939 940 el = arm_current_el(env); 941 if (el > 0 && !is_a64(env)) { 942 i = bank_number(env->uncached_cpsr & CPSR_M); 943 env->spsr = env->banked_spsr[i]; 944 } 945 946 /* Advanced SIMD and FP registers */ 947 for (i = 0; i < 32; i++) { 948 uint64_t *q = aa64_vfp_qreg(env, i); 949 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 950 reg.addr = (uintptr_t)q; 951 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 952 if (ret) { 953 return ret; 954 } else { 955 #ifdef HOST_WORDS_BIGENDIAN 956 uint64_t t; 957 t = q[0], q[0] = q[1], q[1] = t; 958 #endif 959 } 960 } 961 962 reg.addr = (uintptr_t)(&fpr); 963 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 964 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 965 if (ret) { 966 return ret; 967 } 968 vfp_set_fpsr(env, fpr); 969 970 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 971 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 972 if (ret) { 973 return ret; 974 } 975 vfp_set_fpcr(env, fpr); 976 977 ret = kvm_get_vcpu_events(cpu); 978 if (ret) { 979 return ret; 980 } 981 982 if (!write_kvmstate_to_list(cpu)) { 983 return EINVAL; 984 } 985 /* Note that it's OK to have registers which aren't in CPUState, 986 * so we can ignore a failure return here. 987 */ 988 write_list_to_cpustate(cpu); 989 990 kvm_arm_sync_mpstate_to_qemu(cpu); 991 992 /* TODO: other registers */ 993 return ret; 994 } 995 996 /* C6.6.29 BRK instruction */ 997 static const uint32_t brk_insn = 0xd4200000; 998 999 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1000 { 1001 if (have_guest_debug) { 1002 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || 1003 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { 1004 return -EINVAL; 1005 } 1006 return 0; 1007 } else { 1008 error_report("guest debug not supported on this kernel"); 1009 return -EINVAL; 1010 } 1011 } 1012 1013 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1014 { 1015 static uint32_t brk; 1016 1017 if (have_guest_debug) { 1018 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || 1019 brk != brk_insn || 1020 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { 1021 return -EINVAL; 1022 } 1023 return 0; 1024 } else { 1025 error_report("guest debug not supported on this kernel"); 1026 return -EINVAL; 1027 } 1028 } 1029 1030 /* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register 1031 * 1032 * To minimise translating between kernel and user-space the kernel 1033 * ABI just provides user-space with the full exception syndrome 1034 * register value to be decoded in QEMU. 1035 */ 1036 1037 bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) 1038 { 1039 int hsr_ec = syn_get_ec(debug_exit->hsr); 1040 ARMCPU *cpu = ARM_CPU(cs); 1041 CPUClass *cc = CPU_GET_CLASS(cs); 1042 CPUARMState *env = &cpu->env; 1043 1044 /* Ensure PC is synchronised */ 1045 kvm_cpu_synchronize_state(cs); 1046 1047 switch (hsr_ec) { 1048 case EC_SOFTWARESTEP: 1049 if (cs->singlestep_enabled) { 1050 return true; 1051 } else { 1052 /* 1053 * The kernel should have suppressed the guest's ability to 1054 * single step at this point so something has gone wrong. 1055 */ 1056 error_report("%s: guest single-step while debugging unsupported" 1057 " (%"PRIx64", %"PRIx32")", 1058 __func__, env->pc, debug_exit->hsr); 1059 return false; 1060 } 1061 break; 1062 case EC_AA64_BKPT: 1063 if (kvm_find_sw_breakpoint(cs, env->pc)) { 1064 return true; 1065 } 1066 break; 1067 case EC_BREAKPOINT: 1068 if (find_hw_breakpoint(cs, env->pc)) { 1069 return true; 1070 } 1071 break; 1072 case EC_WATCHPOINT: 1073 { 1074 CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far); 1075 if (wp) { 1076 cs->watchpoint_hit = wp; 1077 return true; 1078 } 1079 break; 1080 } 1081 default: 1082 error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", 1083 __func__, debug_exit->hsr, env->pc); 1084 } 1085 1086 /* If we are not handling the debug exception it must belong to 1087 * the guest. Let's re-use the existing TCG interrupt code to set 1088 * everything up properly. 1089 */ 1090 cs->exception_index = EXCP_BKPT; 1091 env->exception.syndrome = debug_exit->hsr; 1092 env->exception.vaddress = debug_exit->far; 1093 env->exception.target_el = 1; 1094 qemu_mutex_lock_iothread(); 1095 cc->do_interrupt(cs); 1096 qemu_mutex_unlock_iothread(); 1097 1098 return false; 1099 } 1100