1 /* 2 * ARM implementation of KVM hooks, 64 bit specific code 3 * 4 * Copyright Mian-M. Hamayun 2013, Virtual Open Systems 5 * Copyright Alex Bennée 2014, Linaro 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <sys/ioctl.h> 14 #include <sys/ptrace.h> 15 16 #include <linux/elf.h> 17 #include <linux/kvm.h> 18 19 #include "qemu-common.h" 20 #include "cpu.h" 21 #include "qemu/timer.h" 22 #include "qemu/error-report.h" 23 #include "qemu/host-utils.h" 24 #include "exec/gdbstub.h" 25 #include "sysemu/sysemu.h" 26 #include "sysemu/kvm.h" 27 #include "kvm_arm.h" 28 #include "internals.h" 29 30 static bool have_guest_debug; 31 32 /* 33 * Although the ARM implementation of hardware assisted debugging 34 * allows for different breakpoints per-core, the current GDB 35 * interface treats them as a global pool of registers (which seems to 36 * be the case for x86, ppc and s390). As a result we store one copy 37 * of registers which is used for all active cores. 38 * 39 * Write access is serialised by virtue of the GDB protocol which 40 * updates things. Read access (i.e. when the values are copied to the 41 * vCPU) is also gated by GDB's run control. 42 * 43 * This is not unreasonable as most of the time debugging kernels you 44 * never know which core will eventually execute your function. 45 */ 46 47 typedef struct { 48 uint64_t bcr; 49 uint64_t bvr; 50 } HWBreakpoint; 51 52 /* The watchpoint registers can cover more area than the requested 53 * watchpoint so we need to store the additional information 54 * somewhere. We also need to supply a CPUWatchpoint to the GDB stub 55 * when the watchpoint is hit. 56 */ 57 typedef struct { 58 uint64_t wcr; 59 uint64_t wvr; 60 CPUWatchpoint details; 61 } HWWatchpoint; 62 63 /* Maximum and current break/watch point counts */ 64 int max_hw_bps, max_hw_wps; 65 GArray *hw_breakpoints, *hw_watchpoints; 66 67 #define cur_hw_wps (hw_watchpoints->len) 68 #define cur_hw_bps (hw_breakpoints->len) 69 #define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) 70 #define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) 71 72 /** 73 * kvm_arm_init_debug() - check for guest debug capabilities 74 * @cs: CPUState 75 * 76 * kvm_check_extension returns the number of debug registers we have 77 * or 0 if we have none. 78 * 79 */ 80 static void kvm_arm_init_debug(CPUState *cs) 81 { 82 have_guest_debug = kvm_check_extension(cs->kvm_state, 83 KVM_CAP_SET_GUEST_DEBUG); 84 85 max_hw_wps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_WPS); 86 hw_watchpoints = g_array_sized_new(true, true, 87 sizeof(HWWatchpoint), max_hw_wps); 88 89 max_hw_bps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_BPS); 90 hw_breakpoints = g_array_sized_new(true, true, 91 sizeof(HWBreakpoint), max_hw_bps); 92 return; 93 } 94 95 /** 96 * insert_hw_breakpoint() 97 * @addr: address of breakpoint 98 * 99 * See ARM ARM D2.9.1 for details but here we are only going to create 100 * simple un-linked breakpoints (i.e. we don't chain breakpoints 101 * together to match address and context or vmid). The hardware is 102 * capable of fancier matching but that will require exposing that 103 * fanciness to GDB's interface 104 * 105 * DBGBCR<n>_EL1, Debug Breakpoint Control Registers 106 * 107 * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0 108 * +------+------+-------+-----+----+------+-----+------+-----+---+ 109 * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E | 110 * +------+------+-------+-----+----+------+-----+------+-----+---+ 111 * 112 * BT: Breakpoint type (0 = unlinked address match) 113 * LBN: Linked BP number (0 = unused) 114 * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12) 115 * BAS: Byte Address Select (RES1 for AArch64) 116 * E: Enable bit 117 * 118 * DBGBVR<n>_EL1, Debug Breakpoint Value Registers 119 * 120 * 63 53 52 49 48 2 1 0 121 * +------+-----------+----------+-----+ 122 * | RESS | VA[52:49] | VA[48:2] | 0 0 | 123 * +------+-----------+----------+-----+ 124 * 125 * Depending on the addressing mode bits the top bits of the register 126 * are a sign extension of the highest applicable VA bit. Some 127 * versions of GDB don't do it correctly so we ensure they are correct 128 * here so future PC comparisons will work properly. 129 */ 130 131 static int insert_hw_breakpoint(target_ulong addr) 132 { 133 HWBreakpoint brk = { 134 .bcr = 0x1, /* BCR E=1, enable */ 135 .bvr = sextract64(addr, 0, 53) 136 }; 137 138 if (cur_hw_bps >= max_hw_bps) { 139 return -ENOBUFS; 140 } 141 142 brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */ 143 brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */ 144 145 g_array_append_val(hw_breakpoints, brk); 146 147 return 0; 148 } 149 150 /** 151 * delete_hw_breakpoint() 152 * @pc: address of breakpoint 153 * 154 * Delete a breakpoint and shuffle any above down 155 */ 156 157 static int delete_hw_breakpoint(target_ulong pc) 158 { 159 int i; 160 for (i = 0; i < hw_breakpoints->len; i++) { 161 HWBreakpoint *brk = get_hw_bp(i); 162 if (brk->bvr == pc) { 163 g_array_remove_index(hw_breakpoints, i); 164 return 0; 165 } 166 } 167 return -ENOENT; 168 } 169 170 /** 171 * insert_hw_watchpoint() 172 * @addr: address of watch point 173 * @len: size of area 174 * @type: type of watch point 175 * 176 * See ARM ARM D2.10. As with the breakpoints we can do some advanced 177 * stuff if we want to. The watch points can be linked with the break 178 * points above to make them context aware. However for simplicity 179 * currently we only deal with simple read/write watch points. 180 * 181 * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers 182 * 183 * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0 184 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 185 * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E | 186 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 187 * 188 * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes)) 189 * WT: 0 - unlinked, 1 - linked (not currently used) 190 * LBN: Linked BP number (not currently used) 191 * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11) 192 * BAS: Byte Address Select 193 * LSC: Load/Store control (01: load, 10: store, 11: both) 194 * E: Enable 195 * 196 * The bottom 2 bits of the value register are masked. Therefore to 197 * break on any sizes smaller than an unaligned word you need to set 198 * MASK=0, BAS=bit per byte in question. For larger regions (^2) you 199 * need to ensure you mask the address as required and set BAS=0xff 200 */ 201 202 static int insert_hw_watchpoint(target_ulong addr, 203 target_ulong len, int type) 204 { 205 HWWatchpoint wp = { 206 .wcr = 1, /* E=1, enable */ 207 .wvr = addr & (~0x7ULL), 208 .details = { .vaddr = addr, .len = len } 209 }; 210 211 if (cur_hw_wps >= max_hw_wps) { 212 return -ENOBUFS; 213 } 214 215 /* 216 * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state, 217 * valid whether EL3 is implemented or not 218 */ 219 wp.wcr = deposit32(wp.wcr, 1, 2, 3); 220 221 switch (type) { 222 case GDB_WATCHPOINT_READ: 223 wp.wcr = deposit32(wp.wcr, 3, 2, 1); 224 wp.details.flags = BP_MEM_READ; 225 break; 226 case GDB_WATCHPOINT_WRITE: 227 wp.wcr = deposit32(wp.wcr, 3, 2, 2); 228 wp.details.flags = BP_MEM_WRITE; 229 break; 230 case GDB_WATCHPOINT_ACCESS: 231 wp.wcr = deposit32(wp.wcr, 3, 2, 3); 232 wp.details.flags = BP_MEM_ACCESS; 233 break; 234 default: 235 g_assert_not_reached(); 236 break; 237 } 238 if (len <= 8) { 239 /* we align the address and set the bits in BAS */ 240 int off = addr & 0x7; 241 int bas = (1 << len) - 1; 242 243 wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas); 244 } else { 245 /* For ranges above 8 bytes we need to be a power of 2 */ 246 if (is_power_of_2(len)) { 247 int bits = ctz64(len); 248 249 wp.wvr &= ~((1 << bits) - 1); 250 wp.wcr = deposit32(wp.wcr, 24, 4, bits); 251 wp.wcr = deposit32(wp.wcr, 5, 8, 0xff); 252 } else { 253 return -ENOBUFS; 254 } 255 } 256 257 g_array_append_val(hw_watchpoints, wp); 258 return 0; 259 } 260 261 262 static bool check_watchpoint_in_range(int i, target_ulong addr) 263 { 264 HWWatchpoint *wp = get_hw_wp(i); 265 uint64_t addr_top, addr_bottom = wp->wvr; 266 int bas = extract32(wp->wcr, 5, 8); 267 int mask = extract32(wp->wcr, 24, 4); 268 269 if (mask) { 270 addr_top = addr_bottom + (1 << mask); 271 } else { 272 /* BAS must be contiguous but can offset against the base 273 * address in DBGWVR */ 274 addr_bottom = addr_bottom + ctz32(bas); 275 addr_top = addr_bottom + clo32(bas); 276 } 277 278 if (addr >= addr_bottom && addr <= addr_top) { 279 return true; 280 } 281 282 return false; 283 } 284 285 /** 286 * delete_hw_watchpoint() 287 * @addr: address of breakpoint 288 * 289 * Delete a breakpoint and shuffle any above down 290 */ 291 292 static int delete_hw_watchpoint(target_ulong addr, 293 target_ulong len, int type) 294 { 295 int i; 296 for (i = 0; i < cur_hw_wps; i++) { 297 if (check_watchpoint_in_range(i, addr)) { 298 g_array_remove_index(hw_watchpoints, i); 299 return 0; 300 } 301 } 302 return -ENOENT; 303 } 304 305 306 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 307 target_ulong len, int type) 308 { 309 switch (type) { 310 case GDB_BREAKPOINT_HW: 311 return insert_hw_breakpoint(addr); 312 break; 313 case GDB_WATCHPOINT_READ: 314 case GDB_WATCHPOINT_WRITE: 315 case GDB_WATCHPOINT_ACCESS: 316 return insert_hw_watchpoint(addr, len, type); 317 default: 318 return -ENOSYS; 319 } 320 } 321 322 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 323 target_ulong len, int type) 324 { 325 switch (type) { 326 case GDB_BREAKPOINT_HW: 327 return delete_hw_breakpoint(addr); 328 break; 329 case GDB_WATCHPOINT_READ: 330 case GDB_WATCHPOINT_WRITE: 331 case GDB_WATCHPOINT_ACCESS: 332 return delete_hw_watchpoint(addr, len, type); 333 default: 334 return -ENOSYS; 335 } 336 } 337 338 339 void kvm_arch_remove_all_hw_breakpoints(void) 340 { 341 if (cur_hw_wps > 0) { 342 g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); 343 } 344 if (cur_hw_bps > 0) { 345 g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); 346 } 347 } 348 349 void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr) 350 { 351 int i; 352 memset(ptr, 0, sizeof(struct kvm_guest_debug_arch)); 353 354 for (i = 0; i < max_hw_wps; i++) { 355 HWWatchpoint *wp = get_hw_wp(i); 356 ptr->dbg_wcr[i] = wp->wcr; 357 ptr->dbg_wvr[i] = wp->wvr; 358 } 359 for (i = 0; i < max_hw_bps; i++) { 360 HWBreakpoint *bp = get_hw_bp(i); 361 ptr->dbg_bcr[i] = bp->bcr; 362 ptr->dbg_bvr[i] = bp->bvr; 363 } 364 } 365 366 bool kvm_arm_hw_debug_active(CPUState *cs) 367 { 368 return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); 369 } 370 371 static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) 372 { 373 int i; 374 375 for (i = 0; i < cur_hw_bps; i++) { 376 HWBreakpoint *bp = get_hw_bp(i); 377 if (bp->bvr == pc) { 378 return true; 379 } 380 } 381 return false; 382 } 383 384 static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) 385 { 386 int i; 387 388 for (i = 0; i < cur_hw_wps; i++) { 389 if (check_watchpoint_in_range(i, addr)) { 390 return &get_hw_wp(i)->details; 391 } 392 } 393 return NULL; 394 } 395 396 static bool kvm_arm_pmu_set_attr(CPUState *cs, struct kvm_device_attr *attr) 397 { 398 int err; 399 400 err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr); 401 if (err != 0) { 402 error_report("PMU: KVM_HAS_DEVICE_ATTR: %s", strerror(-err)); 403 return false; 404 } 405 406 err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr); 407 if (err != 0) { 408 error_report("PMU: KVM_SET_DEVICE_ATTR: %s", strerror(-err)); 409 return false; 410 } 411 412 return true; 413 } 414 415 void kvm_arm_pmu_init(CPUState *cs) 416 { 417 struct kvm_device_attr attr = { 418 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 419 .attr = KVM_ARM_VCPU_PMU_V3_INIT, 420 }; 421 422 if (!ARM_CPU(cs)->has_pmu) { 423 return; 424 } 425 if (!kvm_arm_pmu_set_attr(cs, &attr)) { 426 error_report("failed to init PMU"); 427 abort(); 428 } 429 } 430 431 void kvm_arm_pmu_set_irq(CPUState *cs, int irq) 432 { 433 struct kvm_device_attr attr = { 434 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 435 .addr = (intptr_t)&irq, 436 .attr = KVM_ARM_VCPU_PMU_V3_IRQ, 437 }; 438 439 if (!ARM_CPU(cs)->has_pmu) { 440 return; 441 } 442 if (!kvm_arm_pmu_set_attr(cs, &attr)) { 443 error_report("failed to set irq for PMU"); 444 abort(); 445 } 446 } 447 448 static inline void set_feature(uint64_t *features, int feature) 449 { 450 *features |= 1ULL << feature; 451 } 452 453 static inline void unset_feature(uint64_t *features, int feature) 454 { 455 *features &= ~(1ULL << feature); 456 } 457 458 static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) 459 { 460 uint64_t ret; 461 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; 462 int err; 463 464 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 465 err = ioctl(fd, KVM_GET_ONE_REG, &idreg); 466 if (err < 0) { 467 return -1; 468 } 469 *pret = ret; 470 return 0; 471 } 472 473 static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id) 474 { 475 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; 476 477 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 478 return ioctl(fd, KVM_GET_ONE_REG, &idreg); 479 } 480 481 bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) 482 { 483 /* Identify the feature bits corresponding to the host CPU, and 484 * fill out the ARMHostCPUClass fields accordingly. To do this 485 * we have to create a scratch VM, create a single CPU inside it, 486 * and then query that CPU for the relevant ID registers. 487 */ 488 int fdarray[3]; 489 uint64_t features = 0; 490 int err; 491 492 /* Old kernels may not know about the PREFERRED_TARGET ioctl: however 493 * we know these will only support creating one kind of guest CPU, 494 * which is its preferred CPU type. Fortunately these old kernels 495 * support only a very limited number of CPUs. 496 */ 497 static const uint32_t cpus_to_try[] = { 498 KVM_ARM_TARGET_AEM_V8, 499 KVM_ARM_TARGET_FOUNDATION_V8, 500 KVM_ARM_TARGET_CORTEX_A57, 501 QEMU_KVM_ARM_TARGET_NONE 502 }; 503 struct kvm_vcpu_init init; 504 505 if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { 506 return false; 507 } 508 509 ahcf->target = init.target; 510 ahcf->dtb_compatible = "arm,arm-v8"; 511 512 err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, 513 ARM64_SYS_REG(3, 0, 0, 4, 0)); 514 if (unlikely(err < 0)) { 515 /* 516 * Before v4.15, the kernel only exposed a limited number of system 517 * registers, not including any of the interesting AArch64 ID regs. 518 * For the most part we could leave these fields as zero with minimal 519 * effect, since this does not affect the values seen by the guest. 520 * 521 * However, it could cause problems down the line for QEMU, 522 * so provide a minimal v8.0 default. 523 * 524 * ??? Could read MIDR and use knowledge from cpu64.c. 525 * ??? Could map a page of memory into our temp guest and 526 * run the tiniest of hand-crafted kernels to extract 527 * the values seen by the guest. 528 * ??? Either of these sounds like too much effort just 529 * to work around running a modern host kernel. 530 */ 531 ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ 532 err = 0; 533 } else { 534 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, 535 ARM64_SYS_REG(3, 0, 0, 4, 1)); 536 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, 537 ARM64_SYS_REG(3, 0, 0, 6, 0)); 538 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, 539 ARM64_SYS_REG(3, 0, 0, 6, 1)); 540 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, 541 ARM64_SYS_REG(3, 0, 0, 7, 0)); 542 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, 543 ARM64_SYS_REG(3, 0, 0, 7, 1)); 544 545 /* 546 * Note that if AArch32 support is not present in the host, 547 * the AArch32 sysregs are present to be read, but will 548 * return UNKNOWN values. This is neither better nor worse 549 * than skipping the reads and leaving 0, as we must avoid 550 * considering the values in every case. 551 */ 552 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, 553 ARM64_SYS_REG(3, 0, 0, 2, 0)); 554 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, 555 ARM64_SYS_REG(3, 0, 0, 2, 1)); 556 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, 557 ARM64_SYS_REG(3, 0, 0, 2, 2)); 558 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, 559 ARM64_SYS_REG(3, 0, 0, 2, 3)); 560 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, 561 ARM64_SYS_REG(3, 0, 0, 2, 4)); 562 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, 563 ARM64_SYS_REG(3, 0, 0, 2, 5)); 564 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, 565 ARM64_SYS_REG(3, 0, 0, 2, 7)); 566 567 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, 568 ARM64_SYS_REG(3, 0, 0, 3, 0)); 569 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, 570 ARM64_SYS_REG(3, 0, 0, 3, 1)); 571 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, 572 ARM64_SYS_REG(3, 0, 0, 3, 2)); 573 } 574 575 kvm_arm_destroy_scratch_host_vcpu(fdarray); 576 577 if (err < 0) { 578 return false; 579 } 580 581 /* We can assume any KVM supporting CPU is at least a v8 582 * with VFPv4+Neon; this in turn implies most of the other 583 * feature bits. 584 */ 585 set_feature(&features, ARM_FEATURE_V8); 586 set_feature(&features, ARM_FEATURE_VFP4); 587 set_feature(&features, ARM_FEATURE_NEON); 588 set_feature(&features, ARM_FEATURE_AARCH64); 589 set_feature(&features, ARM_FEATURE_PMU); 590 591 ahcf->features = features; 592 593 return true; 594 } 595 596 #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 597 598 int kvm_arch_init_vcpu(CPUState *cs) 599 { 600 int ret; 601 uint64_t mpidr; 602 ARMCPU *cpu = ARM_CPU(cs); 603 CPUARMState *env = &cpu->env; 604 605 if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || 606 !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { 607 fprintf(stderr, "KVM is not supported for this guest CPU type\n"); 608 return -EINVAL; 609 } 610 611 /* Determine init features for this CPU */ 612 memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); 613 if (cpu->start_powered_off) { 614 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; 615 } 616 if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { 617 cpu->psci_version = 2; 618 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; 619 } 620 if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 621 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; 622 } 623 if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { 624 cpu->has_pmu = false; 625 } 626 if (cpu->has_pmu) { 627 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; 628 } else { 629 unset_feature(&env->features, ARM_FEATURE_PMU); 630 } 631 632 /* Do KVM_ARM_VCPU_INIT ioctl */ 633 ret = kvm_arm_vcpu_init(cs); 634 if (ret) { 635 return ret; 636 } 637 638 /* 639 * When KVM is in use, PSCI is emulated in-kernel and not by qemu. 640 * Currently KVM has its own idea about MPIDR assignment, so we 641 * override our defaults with what we get from KVM. 642 */ 643 ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); 644 if (ret) { 645 return ret; 646 } 647 cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK; 648 649 kvm_arm_init_debug(cs); 650 651 /* Check whether user space can specify guest syndrome value */ 652 kvm_arm_init_serror_injection(cs); 653 654 return kvm_arm_init_cpreg_list(cpu); 655 } 656 657 bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) 658 { 659 /* Return true if the regidx is a register we should synchronize 660 * via the cpreg_tuples array (ie is not a core reg we sync by 661 * hand in kvm_arch_get/put_registers()) 662 */ 663 switch (regidx & KVM_REG_ARM_COPROC_MASK) { 664 case KVM_REG_ARM_CORE: 665 return false; 666 default: 667 return true; 668 } 669 } 670 671 typedef struct CPRegStateLevel { 672 uint64_t regidx; 673 int level; 674 } CPRegStateLevel; 675 676 /* All system registers not listed in the following table are assumed to be 677 * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less 678 * often, you must add it to this table with a state of either 679 * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. 680 */ 681 static const CPRegStateLevel non_runtime_cpregs[] = { 682 { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE }, 683 }; 684 685 int kvm_arm_cpreg_level(uint64_t regidx) 686 { 687 int i; 688 689 for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) { 690 const CPRegStateLevel *l = &non_runtime_cpregs[i]; 691 if (l->regidx == regidx) { 692 return l->level; 693 } 694 } 695 696 return KVM_PUT_RUNTIME_STATE; 697 } 698 699 #define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ 700 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 701 702 #define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ 703 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 704 705 #define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ 706 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 707 708 int kvm_arch_put_registers(CPUState *cs, int level) 709 { 710 struct kvm_one_reg reg; 711 uint32_t fpr; 712 uint64_t val; 713 int i; 714 int ret; 715 unsigned int el; 716 717 ARMCPU *cpu = ARM_CPU(cs); 718 CPUARMState *env = &cpu->env; 719 720 /* If we are in AArch32 mode then we need to copy the AArch32 regs to the 721 * AArch64 registers before pushing them out to 64-bit KVM. 722 */ 723 if (!is_a64(env)) { 724 aarch64_sync_32_to_64(env); 725 } 726 727 for (i = 0; i < 31; i++) { 728 reg.id = AARCH64_CORE_REG(regs.regs[i]); 729 reg.addr = (uintptr_t) &env->xregs[i]; 730 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 731 if (ret) { 732 return ret; 733 } 734 } 735 736 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 737 * QEMU side we keep the current SP in xregs[31] as well. 738 */ 739 aarch64_save_sp(env, 1); 740 741 reg.id = AARCH64_CORE_REG(regs.sp); 742 reg.addr = (uintptr_t) &env->sp_el[0]; 743 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 744 if (ret) { 745 return ret; 746 } 747 748 reg.id = AARCH64_CORE_REG(sp_el1); 749 reg.addr = (uintptr_t) &env->sp_el[1]; 750 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 751 if (ret) { 752 return ret; 753 } 754 755 /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ 756 if (is_a64(env)) { 757 val = pstate_read(env); 758 } else { 759 val = cpsr_read(env); 760 } 761 reg.id = AARCH64_CORE_REG(regs.pstate); 762 reg.addr = (uintptr_t) &val; 763 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 764 if (ret) { 765 return ret; 766 } 767 768 reg.id = AARCH64_CORE_REG(regs.pc); 769 reg.addr = (uintptr_t) &env->pc; 770 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 771 if (ret) { 772 return ret; 773 } 774 775 reg.id = AARCH64_CORE_REG(elr_el1); 776 reg.addr = (uintptr_t) &env->elr_el[1]; 777 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 778 if (ret) { 779 return ret; 780 } 781 782 /* Saved Program State Registers 783 * 784 * Before we restore from the banked_spsr[] array we need to 785 * ensure that any modifications to env->spsr are correctly 786 * reflected in the banks. 787 */ 788 el = arm_current_el(env); 789 if (el > 0 && !is_a64(env)) { 790 i = bank_number(env->uncached_cpsr & CPSR_M); 791 env->banked_spsr[i] = env->spsr; 792 } 793 794 /* KVM 0-4 map to QEMU banks 1-5 */ 795 for (i = 0; i < KVM_NR_SPSR; i++) { 796 reg.id = AARCH64_CORE_REG(spsr[i]); 797 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 798 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 799 if (ret) { 800 return ret; 801 } 802 } 803 804 /* Advanced SIMD and FP registers. */ 805 for (i = 0; i < 32; i++) { 806 uint64_t *q = aa64_vfp_qreg(env, i); 807 #ifdef HOST_WORDS_BIGENDIAN 808 uint64_t fp_val[2] = { q[1], q[0] }; 809 reg.addr = (uintptr_t)fp_val; 810 #else 811 reg.addr = (uintptr_t)q; 812 #endif 813 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 814 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 815 if (ret) { 816 return ret; 817 } 818 } 819 820 reg.addr = (uintptr_t)(&fpr); 821 fpr = vfp_get_fpsr(env); 822 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 823 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 824 if (ret) { 825 return ret; 826 } 827 828 fpr = vfp_get_fpcr(env); 829 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 830 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 831 if (ret) { 832 return ret; 833 } 834 835 ret = kvm_put_vcpu_events(cpu); 836 if (ret) { 837 return ret; 838 } 839 840 write_cpustate_to_list(cpu, true); 841 842 if (!write_list_to_kvmstate(cpu, level)) { 843 return EINVAL; 844 } 845 846 kvm_arm_sync_mpstate_to_kvm(cpu); 847 848 return ret; 849 } 850 851 int kvm_arch_get_registers(CPUState *cs) 852 { 853 struct kvm_one_reg reg; 854 uint64_t val; 855 uint32_t fpr; 856 unsigned int el; 857 int i; 858 int ret; 859 860 ARMCPU *cpu = ARM_CPU(cs); 861 CPUARMState *env = &cpu->env; 862 863 for (i = 0; i < 31; i++) { 864 reg.id = AARCH64_CORE_REG(regs.regs[i]); 865 reg.addr = (uintptr_t) &env->xregs[i]; 866 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 867 if (ret) { 868 return ret; 869 } 870 } 871 872 reg.id = AARCH64_CORE_REG(regs.sp); 873 reg.addr = (uintptr_t) &env->sp_el[0]; 874 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 875 if (ret) { 876 return ret; 877 } 878 879 reg.id = AARCH64_CORE_REG(sp_el1); 880 reg.addr = (uintptr_t) &env->sp_el[1]; 881 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 882 if (ret) { 883 return ret; 884 } 885 886 reg.id = AARCH64_CORE_REG(regs.pstate); 887 reg.addr = (uintptr_t) &val; 888 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 889 if (ret) { 890 return ret; 891 } 892 893 env->aarch64 = ((val & PSTATE_nRW) == 0); 894 if (is_a64(env)) { 895 pstate_write(env, val); 896 } else { 897 cpsr_write(env, val, 0xffffffff, CPSRWriteRaw); 898 } 899 900 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 901 * QEMU side we keep the current SP in xregs[31] as well. 902 */ 903 aarch64_restore_sp(env, 1); 904 905 reg.id = AARCH64_CORE_REG(regs.pc); 906 reg.addr = (uintptr_t) &env->pc; 907 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 908 if (ret) { 909 return ret; 910 } 911 912 /* If we are in AArch32 mode then we need to sync the AArch32 regs with the 913 * incoming AArch64 regs received from 64-bit KVM. 914 * We must perform this after all of the registers have been acquired from 915 * the kernel. 916 */ 917 if (!is_a64(env)) { 918 aarch64_sync_64_to_32(env); 919 } 920 921 reg.id = AARCH64_CORE_REG(elr_el1); 922 reg.addr = (uintptr_t) &env->elr_el[1]; 923 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 924 if (ret) { 925 return ret; 926 } 927 928 /* Fetch the SPSR registers 929 * 930 * KVM SPSRs 0-4 map to QEMU banks 1-5 931 */ 932 for (i = 0; i < KVM_NR_SPSR; i++) { 933 reg.id = AARCH64_CORE_REG(spsr[i]); 934 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 935 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 936 if (ret) { 937 return ret; 938 } 939 } 940 941 el = arm_current_el(env); 942 if (el > 0 && !is_a64(env)) { 943 i = bank_number(env->uncached_cpsr & CPSR_M); 944 env->spsr = env->banked_spsr[i]; 945 } 946 947 /* Advanced SIMD and FP registers */ 948 for (i = 0; i < 32; i++) { 949 uint64_t *q = aa64_vfp_qreg(env, i); 950 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 951 reg.addr = (uintptr_t)q; 952 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 953 if (ret) { 954 return ret; 955 } else { 956 #ifdef HOST_WORDS_BIGENDIAN 957 uint64_t t; 958 t = q[0], q[0] = q[1], q[1] = t; 959 #endif 960 } 961 } 962 963 reg.addr = (uintptr_t)(&fpr); 964 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 965 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 966 if (ret) { 967 return ret; 968 } 969 vfp_set_fpsr(env, fpr); 970 971 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 972 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 973 if (ret) { 974 return ret; 975 } 976 vfp_set_fpcr(env, fpr); 977 978 ret = kvm_get_vcpu_events(cpu); 979 if (ret) { 980 return ret; 981 } 982 983 if (!write_kvmstate_to_list(cpu)) { 984 return EINVAL; 985 } 986 /* Note that it's OK to have registers which aren't in CPUState, 987 * so we can ignore a failure return here. 988 */ 989 write_list_to_cpustate(cpu); 990 991 kvm_arm_sync_mpstate_to_qemu(cpu); 992 993 /* TODO: other registers */ 994 return ret; 995 } 996 997 /* C6.6.29 BRK instruction */ 998 static const uint32_t brk_insn = 0xd4200000; 999 1000 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1001 { 1002 if (have_guest_debug) { 1003 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || 1004 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { 1005 return -EINVAL; 1006 } 1007 return 0; 1008 } else { 1009 error_report("guest debug not supported on this kernel"); 1010 return -EINVAL; 1011 } 1012 } 1013 1014 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1015 { 1016 static uint32_t brk; 1017 1018 if (have_guest_debug) { 1019 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || 1020 brk != brk_insn || 1021 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { 1022 return -EINVAL; 1023 } 1024 return 0; 1025 } else { 1026 error_report("guest debug not supported on this kernel"); 1027 return -EINVAL; 1028 } 1029 } 1030 1031 /* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register 1032 * 1033 * To minimise translating between kernel and user-space the kernel 1034 * ABI just provides user-space with the full exception syndrome 1035 * register value to be decoded in QEMU. 1036 */ 1037 1038 bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) 1039 { 1040 int hsr_ec = syn_get_ec(debug_exit->hsr); 1041 ARMCPU *cpu = ARM_CPU(cs); 1042 CPUClass *cc = CPU_GET_CLASS(cs); 1043 CPUARMState *env = &cpu->env; 1044 1045 /* Ensure PC is synchronised */ 1046 kvm_cpu_synchronize_state(cs); 1047 1048 switch (hsr_ec) { 1049 case EC_SOFTWARESTEP: 1050 if (cs->singlestep_enabled) { 1051 return true; 1052 } else { 1053 /* 1054 * The kernel should have suppressed the guest's ability to 1055 * single step at this point so something has gone wrong. 1056 */ 1057 error_report("%s: guest single-step while debugging unsupported" 1058 " (%"PRIx64", %"PRIx32")", 1059 __func__, env->pc, debug_exit->hsr); 1060 return false; 1061 } 1062 break; 1063 case EC_AA64_BKPT: 1064 if (kvm_find_sw_breakpoint(cs, env->pc)) { 1065 return true; 1066 } 1067 break; 1068 case EC_BREAKPOINT: 1069 if (find_hw_breakpoint(cs, env->pc)) { 1070 return true; 1071 } 1072 break; 1073 case EC_WATCHPOINT: 1074 { 1075 CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far); 1076 if (wp) { 1077 cs->watchpoint_hit = wp; 1078 return true; 1079 } 1080 break; 1081 } 1082 default: 1083 error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", 1084 __func__, debug_exit->hsr, env->pc); 1085 } 1086 1087 /* If we are not handling the debug exception it must belong to 1088 * the guest. Let's re-use the existing TCG interrupt code to set 1089 * everything up properly. 1090 */ 1091 cs->exception_index = EXCP_BKPT; 1092 env->exception.syndrome = debug_exit->hsr; 1093 env->exception.vaddress = debug_exit->far; 1094 env->exception.target_el = 1; 1095 qemu_mutex_lock_iothread(); 1096 cc->do_interrupt(cs); 1097 qemu_mutex_unlock_iothread(); 1098 1099 return false; 1100 } 1101