1 /* 2 * ARM implementation of KVM hooks, 64 bit specific code 3 * 4 * Copyright Mian-M. Hamayun 2013, Virtual Open Systems 5 * Copyright Alex Bennée 2014, Linaro 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <sys/ioctl.h> 14 #include <sys/ptrace.h> 15 16 #include <linux/elf.h> 17 #include <linux/kvm.h> 18 19 #include "qapi/error.h" 20 #include "cpu.h" 21 #include "qemu/timer.h" 22 #include "qemu/error-report.h" 23 #include "qemu/host-utils.h" 24 #include "qemu/main-loop.h" 25 #include "exec/gdbstub.h" 26 #include "sysemu/runstate.h" 27 #include "sysemu/kvm.h" 28 #include "sysemu/kvm_int.h" 29 #include "kvm_arm.h" 30 #include "internals.h" 31 #include "hw/acpi/acpi.h" 32 #include "hw/acpi/ghes.h" 33 #include "hw/arm/virt.h" 34 35 static bool have_guest_debug; 36 37 /* 38 * Although the ARM implementation of hardware assisted debugging 39 * allows for different breakpoints per-core, the current GDB 40 * interface treats them as a global pool of registers (which seems to 41 * be the case for x86, ppc and s390). As a result we store one copy 42 * of registers which is used for all active cores. 43 * 44 * Write access is serialised by virtue of the GDB protocol which 45 * updates things. Read access (i.e. when the values are copied to the 46 * vCPU) is also gated by GDB's run control. 47 * 48 * This is not unreasonable as most of the time debugging kernels you 49 * never know which core will eventually execute your function. 50 */ 51 52 typedef struct { 53 uint64_t bcr; 54 uint64_t bvr; 55 } HWBreakpoint; 56 57 /* The watchpoint registers can cover more area than the requested 58 * watchpoint so we need to store the additional information 59 * somewhere. We also need to supply a CPUWatchpoint to the GDB stub 60 * when the watchpoint is hit. 61 */ 62 typedef struct { 63 uint64_t wcr; 64 uint64_t wvr; 65 CPUWatchpoint details; 66 } HWWatchpoint; 67 68 /* Maximum and current break/watch point counts */ 69 int max_hw_bps, max_hw_wps; 70 GArray *hw_breakpoints, *hw_watchpoints; 71 72 #define cur_hw_wps (hw_watchpoints->len) 73 #define cur_hw_bps (hw_breakpoints->len) 74 #define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) 75 #define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) 76 77 void kvm_arm_init_debug(KVMState *s) 78 { 79 have_guest_debug = kvm_check_extension(s, 80 KVM_CAP_SET_GUEST_DEBUG); 81 82 max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS); 83 hw_watchpoints = g_array_sized_new(true, true, 84 sizeof(HWWatchpoint), max_hw_wps); 85 86 max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS); 87 hw_breakpoints = g_array_sized_new(true, true, 88 sizeof(HWBreakpoint), max_hw_bps); 89 return; 90 } 91 92 /** 93 * insert_hw_breakpoint() 94 * @addr: address of breakpoint 95 * 96 * See ARM ARM D2.9.1 for details but here we are only going to create 97 * simple un-linked breakpoints (i.e. we don't chain breakpoints 98 * together to match address and context or vmid). The hardware is 99 * capable of fancier matching but that will require exposing that 100 * fanciness to GDB's interface 101 * 102 * DBGBCR<n>_EL1, Debug Breakpoint Control Registers 103 * 104 * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0 105 * +------+------+-------+-----+----+------+-----+------+-----+---+ 106 * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E | 107 * +------+------+-------+-----+----+------+-----+------+-----+---+ 108 * 109 * BT: Breakpoint type (0 = unlinked address match) 110 * LBN: Linked BP number (0 = unused) 111 * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12) 112 * BAS: Byte Address Select (RES1 for AArch64) 113 * E: Enable bit 114 * 115 * DBGBVR<n>_EL1, Debug Breakpoint Value Registers 116 * 117 * 63 53 52 49 48 2 1 0 118 * +------+-----------+----------+-----+ 119 * | RESS | VA[52:49] | VA[48:2] | 0 0 | 120 * +------+-----------+----------+-----+ 121 * 122 * Depending on the addressing mode bits the top bits of the register 123 * are a sign extension of the highest applicable VA bit. Some 124 * versions of GDB don't do it correctly so we ensure they are correct 125 * here so future PC comparisons will work properly. 126 */ 127 128 static int insert_hw_breakpoint(target_ulong addr) 129 { 130 HWBreakpoint brk = { 131 .bcr = 0x1, /* BCR E=1, enable */ 132 .bvr = sextract64(addr, 0, 53) 133 }; 134 135 if (cur_hw_bps >= max_hw_bps) { 136 return -ENOBUFS; 137 } 138 139 brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */ 140 brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */ 141 142 g_array_append_val(hw_breakpoints, brk); 143 144 return 0; 145 } 146 147 /** 148 * delete_hw_breakpoint() 149 * @pc: address of breakpoint 150 * 151 * Delete a breakpoint and shuffle any above down 152 */ 153 154 static int delete_hw_breakpoint(target_ulong pc) 155 { 156 int i; 157 for (i = 0; i < hw_breakpoints->len; i++) { 158 HWBreakpoint *brk = get_hw_bp(i); 159 if (brk->bvr == pc) { 160 g_array_remove_index(hw_breakpoints, i); 161 return 0; 162 } 163 } 164 return -ENOENT; 165 } 166 167 /** 168 * insert_hw_watchpoint() 169 * @addr: address of watch point 170 * @len: size of area 171 * @type: type of watch point 172 * 173 * See ARM ARM D2.10. As with the breakpoints we can do some advanced 174 * stuff if we want to. The watch points can be linked with the break 175 * points above to make them context aware. However for simplicity 176 * currently we only deal with simple read/write watch points. 177 * 178 * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers 179 * 180 * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0 181 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 182 * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E | 183 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 184 * 185 * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes)) 186 * WT: 0 - unlinked, 1 - linked (not currently used) 187 * LBN: Linked BP number (not currently used) 188 * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11) 189 * BAS: Byte Address Select 190 * LSC: Load/Store control (01: load, 10: store, 11: both) 191 * E: Enable 192 * 193 * The bottom 2 bits of the value register are masked. Therefore to 194 * break on any sizes smaller than an unaligned word you need to set 195 * MASK=0, BAS=bit per byte in question. For larger regions (^2) you 196 * need to ensure you mask the address as required and set BAS=0xff 197 */ 198 199 static int insert_hw_watchpoint(target_ulong addr, 200 target_ulong len, int type) 201 { 202 HWWatchpoint wp = { 203 .wcr = R_DBGWCR_E_MASK, /* E=1, enable */ 204 .wvr = addr & (~0x7ULL), 205 .details = { .vaddr = addr, .len = len } 206 }; 207 208 if (cur_hw_wps >= max_hw_wps) { 209 return -ENOBUFS; 210 } 211 212 /* 213 * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state, 214 * valid whether EL3 is implemented or not 215 */ 216 wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, PAC, 3); 217 218 switch (type) { 219 case GDB_WATCHPOINT_READ: 220 wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 1); 221 wp.details.flags = BP_MEM_READ; 222 break; 223 case GDB_WATCHPOINT_WRITE: 224 wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 2); 225 wp.details.flags = BP_MEM_WRITE; 226 break; 227 case GDB_WATCHPOINT_ACCESS: 228 wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 3); 229 wp.details.flags = BP_MEM_ACCESS; 230 break; 231 default: 232 g_assert_not_reached(); 233 break; 234 } 235 if (len <= 8) { 236 /* we align the address and set the bits in BAS */ 237 int off = addr & 0x7; 238 int bas = (1 << len) - 1; 239 240 wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas); 241 } else { 242 /* For ranges above 8 bytes we need to be a power of 2 */ 243 if (is_power_of_2(len)) { 244 int bits = ctz64(len); 245 246 wp.wvr &= ~((1 << bits) - 1); 247 wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, MASK, bits); 248 wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, BAS, 0xff); 249 } else { 250 return -ENOBUFS; 251 } 252 } 253 254 g_array_append_val(hw_watchpoints, wp); 255 return 0; 256 } 257 258 259 static bool check_watchpoint_in_range(int i, target_ulong addr) 260 { 261 HWWatchpoint *wp = get_hw_wp(i); 262 uint64_t addr_top, addr_bottom = wp->wvr; 263 int bas = extract32(wp->wcr, 5, 8); 264 int mask = extract32(wp->wcr, 24, 4); 265 266 if (mask) { 267 addr_top = addr_bottom + (1 << mask); 268 } else { 269 /* BAS must be contiguous but can offset against the base 270 * address in DBGWVR */ 271 addr_bottom = addr_bottom + ctz32(bas); 272 addr_top = addr_bottom + clo32(bas); 273 } 274 275 if (addr >= addr_bottom && addr <= addr_top) { 276 return true; 277 } 278 279 return false; 280 } 281 282 /** 283 * delete_hw_watchpoint() 284 * @addr: address of breakpoint 285 * 286 * Delete a breakpoint and shuffle any above down 287 */ 288 289 static int delete_hw_watchpoint(target_ulong addr, 290 target_ulong len, int type) 291 { 292 int i; 293 for (i = 0; i < cur_hw_wps; i++) { 294 if (check_watchpoint_in_range(i, addr)) { 295 g_array_remove_index(hw_watchpoints, i); 296 return 0; 297 } 298 } 299 return -ENOENT; 300 } 301 302 303 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 304 target_ulong len, int type) 305 { 306 switch (type) { 307 case GDB_BREAKPOINT_HW: 308 return insert_hw_breakpoint(addr); 309 break; 310 case GDB_WATCHPOINT_READ: 311 case GDB_WATCHPOINT_WRITE: 312 case GDB_WATCHPOINT_ACCESS: 313 return insert_hw_watchpoint(addr, len, type); 314 default: 315 return -ENOSYS; 316 } 317 } 318 319 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 320 target_ulong len, int type) 321 { 322 switch (type) { 323 case GDB_BREAKPOINT_HW: 324 return delete_hw_breakpoint(addr); 325 case GDB_WATCHPOINT_READ: 326 case GDB_WATCHPOINT_WRITE: 327 case GDB_WATCHPOINT_ACCESS: 328 return delete_hw_watchpoint(addr, len, type); 329 default: 330 return -ENOSYS; 331 } 332 } 333 334 335 void kvm_arch_remove_all_hw_breakpoints(void) 336 { 337 if (cur_hw_wps > 0) { 338 g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); 339 } 340 if (cur_hw_bps > 0) { 341 g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); 342 } 343 } 344 345 void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr) 346 { 347 int i; 348 memset(ptr, 0, sizeof(struct kvm_guest_debug_arch)); 349 350 for (i = 0; i < max_hw_wps; i++) { 351 HWWatchpoint *wp = get_hw_wp(i); 352 ptr->dbg_wcr[i] = wp->wcr; 353 ptr->dbg_wvr[i] = wp->wvr; 354 } 355 for (i = 0; i < max_hw_bps; i++) { 356 HWBreakpoint *bp = get_hw_bp(i); 357 ptr->dbg_bcr[i] = bp->bcr; 358 ptr->dbg_bvr[i] = bp->bvr; 359 } 360 } 361 362 bool kvm_arm_hw_debug_active(CPUState *cs) 363 { 364 return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); 365 } 366 367 static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) 368 { 369 int i; 370 371 for (i = 0; i < cur_hw_bps; i++) { 372 HWBreakpoint *bp = get_hw_bp(i); 373 if (bp->bvr == pc) { 374 return true; 375 } 376 } 377 return false; 378 } 379 380 static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) 381 { 382 int i; 383 384 for (i = 0; i < cur_hw_wps; i++) { 385 if (check_watchpoint_in_range(i, addr)) { 386 return &get_hw_wp(i)->details; 387 } 388 } 389 return NULL; 390 } 391 392 static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr, 393 const char *name) 394 { 395 int err; 396 397 err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr); 398 if (err != 0) { 399 error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); 400 return false; 401 } 402 403 err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr); 404 if (err != 0) { 405 error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); 406 return false; 407 } 408 409 return true; 410 } 411 412 void kvm_arm_pmu_init(CPUState *cs) 413 { 414 struct kvm_device_attr attr = { 415 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 416 .attr = KVM_ARM_VCPU_PMU_V3_INIT, 417 }; 418 419 if (!ARM_CPU(cs)->has_pmu) { 420 return; 421 } 422 if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) { 423 error_report("failed to init PMU"); 424 abort(); 425 } 426 } 427 428 void kvm_arm_pmu_set_irq(CPUState *cs, int irq) 429 { 430 struct kvm_device_attr attr = { 431 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 432 .addr = (intptr_t)&irq, 433 .attr = KVM_ARM_VCPU_PMU_V3_IRQ, 434 }; 435 436 if (!ARM_CPU(cs)->has_pmu) { 437 return; 438 } 439 if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) { 440 error_report("failed to set irq for PMU"); 441 abort(); 442 } 443 } 444 445 void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa) 446 { 447 struct kvm_device_attr attr = { 448 .group = KVM_ARM_VCPU_PVTIME_CTRL, 449 .attr = KVM_ARM_VCPU_PVTIME_IPA, 450 .addr = (uint64_t)&ipa, 451 }; 452 453 if (ARM_CPU(cs)->kvm_steal_time == ON_OFF_AUTO_OFF) { 454 return; 455 } 456 if (!kvm_arm_set_device_attr(cs, &attr, "PVTIME IPA")) { 457 error_report("failed to init PVTIME IPA"); 458 abort(); 459 } 460 } 461 462 static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) 463 { 464 uint64_t ret; 465 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; 466 int err; 467 468 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 469 err = ioctl(fd, KVM_GET_ONE_REG, &idreg); 470 if (err < 0) { 471 return -1; 472 } 473 *pret = ret; 474 return 0; 475 } 476 477 static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id) 478 { 479 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; 480 481 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 482 return ioctl(fd, KVM_GET_ONE_REG, &idreg); 483 } 484 485 static bool kvm_arm_pauth_supported(void) 486 { 487 return (kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_ADDRESS) && 488 kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_GENERIC)); 489 } 490 491 bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) 492 { 493 /* Identify the feature bits corresponding to the host CPU, and 494 * fill out the ARMHostCPUClass fields accordingly. To do this 495 * we have to create a scratch VM, create a single CPU inside it, 496 * and then query that CPU for the relevant ID registers. 497 */ 498 int fdarray[3]; 499 bool sve_supported; 500 bool pmu_supported = false; 501 uint64_t features = 0; 502 int err; 503 504 /* Old kernels may not know about the PREFERRED_TARGET ioctl: however 505 * we know these will only support creating one kind of guest CPU, 506 * which is its preferred CPU type. Fortunately these old kernels 507 * support only a very limited number of CPUs. 508 */ 509 static const uint32_t cpus_to_try[] = { 510 KVM_ARM_TARGET_AEM_V8, 511 KVM_ARM_TARGET_FOUNDATION_V8, 512 KVM_ARM_TARGET_CORTEX_A57, 513 QEMU_KVM_ARM_TARGET_NONE 514 }; 515 /* 516 * target = -1 informs kvm_arm_create_scratch_host_vcpu() 517 * to use the preferred target 518 */ 519 struct kvm_vcpu_init init = { .target = -1, }; 520 521 /* 522 * Ask for SVE if supported, so that we can query ID_AA64ZFR0, 523 * which is otherwise RAZ. 524 */ 525 sve_supported = kvm_arm_sve_supported(); 526 if (sve_supported) { 527 init.features[0] |= 1 << KVM_ARM_VCPU_SVE; 528 } 529 530 /* 531 * Ask for Pointer Authentication if supported, so that we get 532 * the unsanitized field values for AA64ISAR1_EL1. 533 */ 534 if (kvm_arm_pauth_supported()) { 535 init.features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS | 536 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); 537 } 538 539 if (kvm_arm_pmu_supported()) { 540 init.features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; 541 pmu_supported = true; 542 } 543 544 if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { 545 return false; 546 } 547 548 ahcf->target = init.target; 549 ahcf->dtb_compatible = "arm,arm-v8"; 550 551 err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, 552 ARM64_SYS_REG(3, 0, 0, 4, 0)); 553 if (unlikely(err < 0)) { 554 /* 555 * Before v4.15, the kernel only exposed a limited number of system 556 * registers, not including any of the interesting AArch64 ID regs. 557 * For the most part we could leave these fields as zero with minimal 558 * effect, since this does not affect the values seen by the guest. 559 * 560 * However, it could cause problems down the line for QEMU, 561 * so provide a minimal v8.0 default. 562 * 563 * ??? Could read MIDR and use knowledge from cpu64.c. 564 * ??? Could map a page of memory into our temp guest and 565 * run the tiniest of hand-crafted kernels to extract 566 * the values seen by the guest. 567 * ??? Either of these sounds like too much effort just 568 * to work around running a modern host kernel. 569 */ 570 ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ 571 err = 0; 572 } else { 573 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, 574 ARM64_SYS_REG(3, 0, 0, 4, 1)); 575 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0, 576 ARM64_SYS_REG(3, 0, 0, 4, 5)); 577 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, 578 ARM64_SYS_REG(3, 0, 0, 5, 0)); 579 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, 580 ARM64_SYS_REG(3, 0, 0, 5, 1)); 581 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, 582 ARM64_SYS_REG(3, 0, 0, 6, 0)); 583 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, 584 ARM64_SYS_REG(3, 0, 0, 6, 1)); 585 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, 586 ARM64_SYS_REG(3, 0, 0, 7, 0)); 587 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, 588 ARM64_SYS_REG(3, 0, 0, 7, 1)); 589 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, 590 ARM64_SYS_REG(3, 0, 0, 7, 2)); 591 592 /* 593 * Note that if AArch32 support is not present in the host, 594 * the AArch32 sysregs are present to be read, but will 595 * return UNKNOWN values. This is neither better nor worse 596 * than skipping the reads and leaving 0, as we must avoid 597 * considering the values in every case. 598 */ 599 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0, 600 ARM64_SYS_REG(3, 0, 0, 1, 0)); 601 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1, 602 ARM64_SYS_REG(3, 0, 0, 1, 1)); 603 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, 604 ARM64_SYS_REG(3, 0, 0, 1, 2)); 605 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, 606 ARM64_SYS_REG(3, 0, 0, 1, 4)); 607 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, 608 ARM64_SYS_REG(3, 0, 0, 1, 5)); 609 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, 610 ARM64_SYS_REG(3, 0, 0, 1, 6)); 611 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, 612 ARM64_SYS_REG(3, 0, 0, 1, 7)); 613 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, 614 ARM64_SYS_REG(3, 0, 0, 2, 0)); 615 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, 616 ARM64_SYS_REG(3, 0, 0, 2, 1)); 617 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, 618 ARM64_SYS_REG(3, 0, 0, 2, 2)); 619 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, 620 ARM64_SYS_REG(3, 0, 0, 2, 3)); 621 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, 622 ARM64_SYS_REG(3, 0, 0, 2, 4)); 623 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, 624 ARM64_SYS_REG(3, 0, 0, 2, 5)); 625 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, 626 ARM64_SYS_REG(3, 0, 0, 2, 6)); 627 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, 628 ARM64_SYS_REG(3, 0, 0, 2, 7)); 629 630 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, 631 ARM64_SYS_REG(3, 0, 0, 3, 0)); 632 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, 633 ARM64_SYS_REG(3, 0, 0, 3, 1)); 634 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, 635 ARM64_SYS_REG(3, 0, 0, 3, 2)); 636 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2, 637 ARM64_SYS_REG(3, 0, 0, 3, 4)); 638 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr1, 639 ARM64_SYS_REG(3, 0, 0, 3, 5)); 640 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr5, 641 ARM64_SYS_REG(3, 0, 0, 3, 6)); 642 643 /* 644 * DBGDIDR is a bit complicated because the kernel doesn't 645 * provide an accessor for it in 64-bit mode, which is what this 646 * scratch VM is in, and there's no architected "64-bit sysreg 647 * which reads the same as the 32-bit register" the way there is 648 * for other ID registers. Instead we synthesize a value from the 649 * AArch64 ID_AA64DFR0, the same way the kernel code in 650 * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. 651 * We only do this if the CPU supports AArch32 at EL1. 652 */ 653 if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { 654 int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); 655 int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); 656 int ctx_cmps = 657 FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); 658 int version = 6; /* ARMv8 debug architecture */ 659 bool has_el3 = 660 !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); 661 uint32_t dbgdidr = 0; 662 663 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); 664 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps); 665 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps); 666 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version); 667 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); 668 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); 669 dbgdidr |= (1 << 15); /* RES1 bit */ 670 ahcf->isar.dbgdidr = dbgdidr; 671 } 672 673 if (pmu_supported) { 674 /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */ 675 err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0, 676 ARM64_SYS_REG(3, 3, 9, 12, 0)); 677 } 678 679 if (sve_supported) { 680 /* 681 * There is a range of kernels between kernel commit 73433762fcae 682 * and f81cb2c3ad41 which have a bug where the kernel doesn't 683 * expose SYS_ID_AA64ZFR0_EL1 via the ONE_REG API unless the VM has 684 * enabled SVE support, which resulted in an error rather than RAZ. 685 * So only read the register if we set KVM_ARM_VCPU_SVE above. 686 */ 687 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0, 688 ARM64_SYS_REG(3, 0, 0, 4, 4)); 689 } 690 } 691 692 kvm_arm_destroy_scratch_host_vcpu(fdarray); 693 694 if (err < 0) { 695 return false; 696 } 697 698 /* 699 * We can assume any KVM supporting CPU is at least a v8 700 * with VFPv4+Neon; this in turn implies most of the other 701 * feature bits. 702 */ 703 features |= 1ULL << ARM_FEATURE_V8; 704 features |= 1ULL << ARM_FEATURE_NEON; 705 features |= 1ULL << ARM_FEATURE_AARCH64; 706 features |= 1ULL << ARM_FEATURE_PMU; 707 features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; 708 709 ahcf->features = features; 710 711 return true; 712 } 713 714 void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) 715 { 716 bool has_steal_time = kvm_arm_steal_time_supported(); 717 718 if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) { 719 if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 720 cpu->kvm_steal_time = ON_OFF_AUTO_OFF; 721 } else { 722 cpu->kvm_steal_time = ON_OFF_AUTO_ON; 723 } 724 } else if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) { 725 if (!has_steal_time) { 726 error_setg(errp, "'kvm-steal-time' cannot be enabled " 727 "on this host"); 728 return; 729 } else if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 730 /* 731 * DEN0057A chapter 2 says "This specification only covers 732 * systems in which the Execution state of the hypervisor 733 * as well as EL1 of virtual machines is AArch64.". And, 734 * to ensure that, the smc/hvc calls are only specified as 735 * smc64/hvc64. 736 */ 737 error_setg(errp, "'kvm-steal-time' cannot be enabled " 738 "for AArch32 guests"); 739 return; 740 } 741 } 742 } 743 744 bool kvm_arm_aarch32_supported(void) 745 { 746 return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT); 747 } 748 749 bool kvm_arm_sve_supported(void) 750 { 751 return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); 752 } 753 754 bool kvm_arm_steal_time_supported(void) 755 { 756 return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); 757 } 758 759 QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); 760 761 uint32_t kvm_arm_sve_get_vls(CPUState *cs) 762 { 763 /* Only call this function if kvm_arm_sve_supported() returns true. */ 764 static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS]; 765 static bool probed; 766 uint32_t vq = 0; 767 int i; 768 769 /* 770 * KVM ensures all host CPUs support the same set of vector lengths. 771 * So we only need to create the scratch VCPUs once and then cache 772 * the results. 773 */ 774 if (!probed) { 775 struct kvm_vcpu_init init = { 776 .target = -1, 777 .features[0] = (1 << KVM_ARM_VCPU_SVE), 778 }; 779 struct kvm_one_reg reg = { 780 .id = KVM_REG_ARM64_SVE_VLS, 781 .addr = (uint64_t)&vls[0], 782 }; 783 int fdarray[3], ret; 784 785 probed = true; 786 787 if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) { 788 error_report("failed to create scratch VCPU with SVE enabled"); 789 abort(); 790 } 791 ret = ioctl(fdarray[2], KVM_GET_ONE_REG, ®); 792 kvm_arm_destroy_scratch_host_vcpu(fdarray); 793 if (ret) { 794 error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s", 795 strerror(errno)); 796 abort(); 797 } 798 799 for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) { 800 if (vls[i]) { 801 vq = 64 - clz64(vls[i]) + i * 64; 802 break; 803 } 804 } 805 if (vq > ARM_MAX_VQ) { 806 warn_report("KVM supports vector lengths larger than " 807 "QEMU can enable"); 808 vls[0] &= MAKE_64BIT_MASK(0, ARM_MAX_VQ); 809 } 810 } 811 812 return vls[0]; 813 } 814 815 static int kvm_arm_sve_set_vls(CPUState *cs) 816 { 817 ARMCPU *cpu = ARM_CPU(cs); 818 uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq.map }; 819 struct kvm_one_reg reg = { 820 .id = KVM_REG_ARM64_SVE_VLS, 821 .addr = (uint64_t)&vls[0], 822 }; 823 824 assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); 825 826 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 827 } 828 829 #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 830 831 int kvm_arch_init_vcpu(CPUState *cs) 832 { 833 int ret; 834 uint64_t mpidr; 835 ARMCPU *cpu = ARM_CPU(cs); 836 CPUARMState *env = &cpu->env; 837 uint64_t psciver; 838 839 if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || 840 !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { 841 error_report("KVM is not supported for this guest CPU type"); 842 return -EINVAL; 843 } 844 845 qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); 846 847 /* Determine init features for this CPU */ 848 memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); 849 if (cs->start_powered_off) { 850 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; 851 } 852 if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { 853 cpu->psci_version = QEMU_PSCI_VERSION_0_2; 854 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; 855 } 856 if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 857 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; 858 } 859 if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { 860 cpu->has_pmu = false; 861 } 862 if (cpu->has_pmu) { 863 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; 864 } else { 865 env->features &= ~(1ULL << ARM_FEATURE_PMU); 866 } 867 if (cpu_isar_feature(aa64_sve, cpu)) { 868 assert(kvm_arm_sve_supported()); 869 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE; 870 } 871 if (cpu_isar_feature(aa64_pauth, cpu)) { 872 cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS | 873 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); 874 } 875 876 /* Do KVM_ARM_VCPU_INIT ioctl */ 877 ret = kvm_arm_vcpu_init(cs); 878 if (ret) { 879 return ret; 880 } 881 882 if (cpu_isar_feature(aa64_sve, cpu)) { 883 ret = kvm_arm_sve_set_vls(cs); 884 if (ret) { 885 return ret; 886 } 887 ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE); 888 if (ret) { 889 return ret; 890 } 891 } 892 893 /* 894 * KVM reports the exact PSCI version it is implementing via a 895 * special sysreg. If it is present, use its contents to determine 896 * what to report to the guest in the dtb (it is the PSCI version, 897 * in the same 15-bits major 16-bits minor format that PSCI_VERSION 898 * returns). 899 */ 900 if (!kvm_get_one_reg(cs, KVM_REG_ARM_PSCI_VERSION, &psciver)) { 901 cpu->psci_version = psciver; 902 } 903 904 /* 905 * When KVM is in use, PSCI is emulated in-kernel and not by qemu. 906 * Currently KVM has its own idea about MPIDR assignment, so we 907 * override our defaults with what we get from KVM. 908 */ 909 ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); 910 if (ret) { 911 return ret; 912 } 913 cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK; 914 915 /* Check whether user space can specify guest syndrome value */ 916 kvm_arm_init_serror_injection(cs); 917 918 return kvm_arm_init_cpreg_list(cpu); 919 } 920 921 int kvm_arch_destroy_vcpu(CPUState *cs) 922 { 923 return 0; 924 } 925 926 bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) 927 { 928 /* Return true if the regidx is a register we should synchronize 929 * via the cpreg_tuples array (ie is not a core or sve reg that 930 * we sync by hand in kvm_arch_get/put_registers()) 931 */ 932 switch (regidx & KVM_REG_ARM_COPROC_MASK) { 933 case KVM_REG_ARM_CORE: 934 case KVM_REG_ARM64_SVE: 935 return false; 936 default: 937 return true; 938 } 939 } 940 941 typedef struct CPRegStateLevel { 942 uint64_t regidx; 943 int level; 944 } CPRegStateLevel; 945 946 /* All system registers not listed in the following table are assumed to be 947 * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less 948 * often, you must add it to this table with a state of either 949 * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. 950 */ 951 static const CPRegStateLevel non_runtime_cpregs[] = { 952 { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE }, 953 }; 954 955 int kvm_arm_cpreg_level(uint64_t regidx) 956 { 957 int i; 958 959 for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) { 960 const CPRegStateLevel *l = &non_runtime_cpregs[i]; 961 if (l->regidx == regidx) { 962 return l->level; 963 } 964 } 965 966 return KVM_PUT_RUNTIME_STATE; 967 } 968 969 /* Callers must hold the iothread mutex lock */ 970 static void kvm_inject_arm_sea(CPUState *c) 971 { 972 ARMCPU *cpu = ARM_CPU(c); 973 CPUARMState *env = &cpu->env; 974 uint32_t esr; 975 bool same_el; 976 977 c->exception_index = EXCP_DATA_ABORT; 978 env->exception.target_el = 1; 979 980 /* 981 * Set the DFSC to synchronous external abort and set FnV to not valid, 982 * this will tell guest the FAR_ELx is UNKNOWN for this abort. 983 */ 984 same_el = arm_current_el(env) == env->exception.target_el; 985 esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10); 986 987 env->exception.syndrome = esr; 988 989 arm_cpu_do_interrupt(c); 990 } 991 992 #define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ 993 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 994 995 #define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ 996 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 997 998 #define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ 999 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 1000 1001 static int kvm_arch_put_fpsimd(CPUState *cs) 1002 { 1003 CPUARMState *env = &ARM_CPU(cs)->env; 1004 struct kvm_one_reg reg; 1005 int i, ret; 1006 1007 for (i = 0; i < 32; i++) { 1008 uint64_t *q = aa64_vfp_qreg(env, i); 1009 #if HOST_BIG_ENDIAN 1010 uint64_t fp_val[2] = { q[1], q[0] }; 1011 reg.addr = (uintptr_t)fp_val; 1012 #else 1013 reg.addr = (uintptr_t)q; 1014 #endif 1015 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 1016 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1017 if (ret) { 1018 return ret; 1019 } 1020 } 1021 1022 return 0; 1023 } 1024 1025 /* 1026 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits 1027 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard 1028 * code the slice index to zero for now as it's unlikely we'll need more than 1029 * one slice for quite some time. 1030 */ 1031 static int kvm_arch_put_sve(CPUState *cs) 1032 { 1033 ARMCPU *cpu = ARM_CPU(cs); 1034 CPUARMState *env = &cpu->env; 1035 uint64_t tmp[ARM_MAX_VQ * 2]; 1036 uint64_t *r; 1037 struct kvm_one_reg reg; 1038 int n, ret; 1039 1040 for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { 1041 r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2); 1042 reg.addr = (uintptr_t)r; 1043 reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); 1044 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1045 if (ret) { 1046 return ret; 1047 } 1048 } 1049 1050 for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { 1051 r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0], 1052 DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1053 reg.addr = (uintptr_t)r; 1054 reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); 1055 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1056 if (ret) { 1057 return ret; 1058 } 1059 } 1060 1061 r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0], 1062 DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1063 reg.addr = (uintptr_t)r; 1064 reg.id = KVM_REG_ARM64_SVE_FFR(0); 1065 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1066 if (ret) { 1067 return ret; 1068 } 1069 1070 return 0; 1071 } 1072 1073 int kvm_arch_put_registers(CPUState *cs, int level) 1074 { 1075 struct kvm_one_reg reg; 1076 uint64_t val; 1077 uint32_t fpr; 1078 int i, ret; 1079 unsigned int el; 1080 1081 ARMCPU *cpu = ARM_CPU(cs); 1082 CPUARMState *env = &cpu->env; 1083 1084 /* If we are in AArch32 mode then we need to copy the AArch32 regs to the 1085 * AArch64 registers before pushing them out to 64-bit KVM. 1086 */ 1087 if (!is_a64(env)) { 1088 aarch64_sync_32_to_64(env); 1089 } 1090 1091 for (i = 0; i < 31; i++) { 1092 reg.id = AARCH64_CORE_REG(regs.regs[i]); 1093 reg.addr = (uintptr_t) &env->xregs[i]; 1094 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1095 if (ret) { 1096 return ret; 1097 } 1098 } 1099 1100 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 1101 * QEMU side we keep the current SP in xregs[31] as well. 1102 */ 1103 aarch64_save_sp(env, 1); 1104 1105 reg.id = AARCH64_CORE_REG(regs.sp); 1106 reg.addr = (uintptr_t) &env->sp_el[0]; 1107 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1108 if (ret) { 1109 return ret; 1110 } 1111 1112 reg.id = AARCH64_CORE_REG(sp_el1); 1113 reg.addr = (uintptr_t) &env->sp_el[1]; 1114 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1115 if (ret) { 1116 return ret; 1117 } 1118 1119 /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ 1120 if (is_a64(env)) { 1121 val = pstate_read(env); 1122 } else { 1123 val = cpsr_read(env); 1124 } 1125 reg.id = AARCH64_CORE_REG(regs.pstate); 1126 reg.addr = (uintptr_t) &val; 1127 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1128 if (ret) { 1129 return ret; 1130 } 1131 1132 reg.id = AARCH64_CORE_REG(regs.pc); 1133 reg.addr = (uintptr_t) &env->pc; 1134 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1135 if (ret) { 1136 return ret; 1137 } 1138 1139 reg.id = AARCH64_CORE_REG(elr_el1); 1140 reg.addr = (uintptr_t) &env->elr_el[1]; 1141 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1142 if (ret) { 1143 return ret; 1144 } 1145 1146 /* Saved Program State Registers 1147 * 1148 * Before we restore from the banked_spsr[] array we need to 1149 * ensure that any modifications to env->spsr are correctly 1150 * reflected in the banks. 1151 */ 1152 el = arm_current_el(env); 1153 if (el > 0 && !is_a64(env)) { 1154 i = bank_number(env->uncached_cpsr & CPSR_M); 1155 env->banked_spsr[i] = env->spsr; 1156 } 1157 1158 /* KVM 0-4 map to QEMU banks 1-5 */ 1159 for (i = 0; i < KVM_NR_SPSR; i++) { 1160 reg.id = AARCH64_CORE_REG(spsr[i]); 1161 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 1162 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1163 if (ret) { 1164 return ret; 1165 } 1166 } 1167 1168 if (cpu_isar_feature(aa64_sve, cpu)) { 1169 ret = kvm_arch_put_sve(cs); 1170 } else { 1171 ret = kvm_arch_put_fpsimd(cs); 1172 } 1173 if (ret) { 1174 return ret; 1175 } 1176 1177 reg.addr = (uintptr_t)(&fpr); 1178 fpr = vfp_get_fpsr(env); 1179 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 1180 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1181 if (ret) { 1182 return ret; 1183 } 1184 1185 reg.addr = (uintptr_t)(&fpr); 1186 fpr = vfp_get_fpcr(env); 1187 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 1188 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1189 if (ret) { 1190 return ret; 1191 } 1192 1193 write_cpustate_to_list(cpu, true); 1194 1195 if (!write_list_to_kvmstate(cpu, level)) { 1196 return -EINVAL; 1197 } 1198 1199 /* 1200 * Setting VCPU events should be triggered after syncing the registers 1201 * to avoid overwriting potential changes made by KVM upon calling 1202 * KVM_SET_VCPU_EVENTS ioctl 1203 */ 1204 ret = kvm_put_vcpu_events(cpu); 1205 if (ret) { 1206 return ret; 1207 } 1208 1209 kvm_arm_sync_mpstate_to_kvm(cpu); 1210 1211 return ret; 1212 } 1213 1214 static int kvm_arch_get_fpsimd(CPUState *cs) 1215 { 1216 CPUARMState *env = &ARM_CPU(cs)->env; 1217 struct kvm_one_reg reg; 1218 int i, ret; 1219 1220 for (i = 0; i < 32; i++) { 1221 uint64_t *q = aa64_vfp_qreg(env, i); 1222 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 1223 reg.addr = (uintptr_t)q; 1224 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1225 if (ret) { 1226 return ret; 1227 } else { 1228 #if HOST_BIG_ENDIAN 1229 uint64_t t; 1230 t = q[0], q[0] = q[1], q[1] = t; 1231 #endif 1232 } 1233 } 1234 1235 return 0; 1236 } 1237 1238 /* 1239 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits 1240 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard 1241 * code the slice index to zero for now as it's unlikely we'll need more than 1242 * one slice for quite some time. 1243 */ 1244 static int kvm_arch_get_sve(CPUState *cs) 1245 { 1246 ARMCPU *cpu = ARM_CPU(cs); 1247 CPUARMState *env = &cpu->env; 1248 struct kvm_one_reg reg; 1249 uint64_t *r; 1250 int n, ret; 1251 1252 for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { 1253 r = &env->vfp.zregs[n].d[0]; 1254 reg.addr = (uintptr_t)r; 1255 reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); 1256 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1257 if (ret) { 1258 return ret; 1259 } 1260 sve_bswap64(r, r, cpu->sve_max_vq * 2); 1261 } 1262 1263 for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { 1264 r = &env->vfp.pregs[n].p[0]; 1265 reg.addr = (uintptr_t)r; 1266 reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); 1267 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1268 if (ret) { 1269 return ret; 1270 } 1271 sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1272 } 1273 1274 r = &env->vfp.pregs[FFR_PRED_NUM].p[0]; 1275 reg.addr = (uintptr_t)r; 1276 reg.id = KVM_REG_ARM64_SVE_FFR(0); 1277 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1278 if (ret) { 1279 return ret; 1280 } 1281 sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1282 1283 return 0; 1284 } 1285 1286 int kvm_arch_get_registers(CPUState *cs) 1287 { 1288 struct kvm_one_reg reg; 1289 uint64_t val; 1290 unsigned int el; 1291 uint32_t fpr; 1292 int i, ret; 1293 1294 ARMCPU *cpu = ARM_CPU(cs); 1295 CPUARMState *env = &cpu->env; 1296 1297 for (i = 0; i < 31; i++) { 1298 reg.id = AARCH64_CORE_REG(regs.regs[i]); 1299 reg.addr = (uintptr_t) &env->xregs[i]; 1300 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1301 if (ret) { 1302 return ret; 1303 } 1304 } 1305 1306 reg.id = AARCH64_CORE_REG(regs.sp); 1307 reg.addr = (uintptr_t) &env->sp_el[0]; 1308 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1309 if (ret) { 1310 return ret; 1311 } 1312 1313 reg.id = AARCH64_CORE_REG(sp_el1); 1314 reg.addr = (uintptr_t) &env->sp_el[1]; 1315 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1316 if (ret) { 1317 return ret; 1318 } 1319 1320 reg.id = AARCH64_CORE_REG(regs.pstate); 1321 reg.addr = (uintptr_t) &val; 1322 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1323 if (ret) { 1324 return ret; 1325 } 1326 1327 env->aarch64 = ((val & PSTATE_nRW) == 0); 1328 if (is_a64(env)) { 1329 pstate_write(env, val); 1330 } else { 1331 cpsr_write(env, val, 0xffffffff, CPSRWriteRaw); 1332 } 1333 1334 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 1335 * QEMU side we keep the current SP in xregs[31] as well. 1336 */ 1337 aarch64_restore_sp(env, 1); 1338 1339 reg.id = AARCH64_CORE_REG(regs.pc); 1340 reg.addr = (uintptr_t) &env->pc; 1341 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1342 if (ret) { 1343 return ret; 1344 } 1345 1346 /* If we are in AArch32 mode then we need to sync the AArch32 regs with the 1347 * incoming AArch64 regs received from 64-bit KVM. 1348 * We must perform this after all of the registers have been acquired from 1349 * the kernel. 1350 */ 1351 if (!is_a64(env)) { 1352 aarch64_sync_64_to_32(env); 1353 } 1354 1355 reg.id = AARCH64_CORE_REG(elr_el1); 1356 reg.addr = (uintptr_t) &env->elr_el[1]; 1357 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1358 if (ret) { 1359 return ret; 1360 } 1361 1362 /* Fetch the SPSR registers 1363 * 1364 * KVM SPSRs 0-4 map to QEMU banks 1-5 1365 */ 1366 for (i = 0; i < KVM_NR_SPSR; i++) { 1367 reg.id = AARCH64_CORE_REG(spsr[i]); 1368 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 1369 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1370 if (ret) { 1371 return ret; 1372 } 1373 } 1374 1375 el = arm_current_el(env); 1376 if (el > 0 && !is_a64(env)) { 1377 i = bank_number(env->uncached_cpsr & CPSR_M); 1378 env->spsr = env->banked_spsr[i]; 1379 } 1380 1381 if (cpu_isar_feature(aa64_sve, cpu)) { 1382 ret = kvm_arch_get_sve(cs); 1383 } else { 1384 ret = kvm_arch_get_fpsimd(cs); 1385 } 1386 if (ret) { 1387 return ret; 1388 } 1389 1390 reg.addr = (uintptr_t)(&fpr); 1391 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 1392 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1393 if (ret) { 1394 return ret; 1395 } 1396 vfp_set_fpsr(env, fpr); 1397 1398 reg.addr = (uintptr_t)(&fpr); 1399 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 1400 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1401 if (ret) { 1402 return ret; 1403 } 1404 vfp_set_fpcr(env, fpr); 1405 1406 ret = kvm_get_vcpu_events(cpu); 1407 if (ret) { 1408 return ret; 1409 } 1410 1411 if (!write_kvmstate_to_list(cpu)) { 1412 return -EINVAL; 1413 } 1414 /* Note that it's OK to have registers which aren't in CPUState, 1415 * so we can ignore a failure return here. 1416 */ 1417 write_list_to_cpustate(cpu); 1418 1419 kvm_arm_sync_mpstate_to_qemu(cpu); 1420 1421 /* TODO: other registers */ 1422 return ret; 1423 } 1424 1425 void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) 1426 { 1427 ram_addr_t ram_addr; 1428 hwaddr paddr; 1429 1430 assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); 1431 1432 if (acpi_ghes_present() && addr) { 1433 ram_addr = qemu_ram_addr_from_host(addr); 1434 if (ram_addr != RAM_ADDR_INVALID && 1435 kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { 1436 kvm_hwpoison_page_add(ram_addr); 1437 /* 1438 * If this is a BUS_MCEERR_AR, we know we have been called 1439 * synchronously from the vCPU thread, so we can easily 1440 * synchronize the state and inject an error. 1441 * 1442 * TODO: we currently don't tell the guest at all about 1443 * BUS_MCEERR_AO. In that case we might either be being 1444 * called synchronously from the vCPU thread, or a bit 1445 * later from the main thread, so doing the injection of 1446 * the error would be more complicated. 1447 */ 1448 if (code == BUS_MCEERR_AR) { 1449 kvm_cpu_synchronize_state(c); 1450 if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { 1451 kvm_inject_arm_sea(c); 1452 } else { 1453 error_report("failed to record the error"); 1454 abort(); 1455 } 1456 } 1457 return; 1458 } 1459 if (code == BUS_MCEERR_AO) { 1460 error_report("Hardware memory error at addr %p for memory used by " 1461 "QEMU itself instead of guest system!", addr); 1462 } 1463 } 1464 1465 if (code == BUS_MCEERR_AR) { 1466 error_report("Hardware memory error!"); 1467 exit(1); 1468 } 1469 } 1470 1471 /* C6.6.29 BRK instruction */ 1472 static const uint32_t brk_insn = 0xd4200000; 1473 1474 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1475 { 1476 if (have_guest_debug) { 1477 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || 1478 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { 1479 return -EINVAL; 1480 } 1481 return 0; 1482 } else { 1483 error_report("guest debug not supported on this kernel"); 1484 return -EINVAL; 1485 } 1486 } 1487 1488 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1489 { 1490 static uint32_t brk; 1491 1492 if (have_guest_debug) { 1493 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || 1494 brk != brk_insn || 1495 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { 1496 return -EINVAL; 1497 } 1498 return 0; 1499 } else { 1500 error_report("guest debug not supported on this kernel"); 1501 return -EINVAL; 1502 } 1503 } 1504 1505 /* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register 1506 * 1507 * To minimise translating between kernel and user-space the kernel 1508 * ABI just provides user-space with the full exception syndrome 1509 * register value to be decoded in QEMU. 1510 */ 1511 1512 bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) 1513 { 1514 int hsr_ec = syn_get_ec(debug_exit->hsr); 1515 ARMCPU *cpu = ARM_CPU(cs); 1516 CPUARMState *env = &cpu->env; 1517 1518 /* Ensure PC is synchronised */ 1519 kvm_cpu_synchronize_state(cs); 1520 1521 switch (hsr_ec) { 1522 case EC_SOFTWARESTEP: 1523 if (cs->singlestep_enabled) { 1524 return true; 1525 } else { 1526 /* 1527 * The kernel should have suppressed the guest's ability to 1528 * single step at this point so something has gone wrong. 1529 */ 1530 error_report("%s: guest single-step while debugging unsupported" 1531 " (%"PRIx64", %"PRIx32")", 1532 __func__, env->pc, debug_exit->hsr); 1533 return false; 1534 } 1535 break; 1536 case EC_AA64_BKPT: 1537 if (kvm_find_sw_breakpoint(cs, env->pc)) { 1538 return true; 1539 } 1540 break; 1541 case EC_BREAKPOINT: 1542 if (find_hw_breakpoint(cs, env->pc)) { 1543 return true; 1544 } 1545 break; 1546 case EC_WATCHPOINT: 1547 { 1548 CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far); 1549 if (wp) { 1550 cs->watchpoint_hit = wp; 1551 return true; 1552 } 1553 break; 1554 } 1555 default: 1556 error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", 1557 __func__, debug_exit->hsr, env->pc); 1558 } 1559 1560 /* If we are not handling the debug exception it must belong to 1561 * the guest. Let's re-use the existing TCG interrupt code to set 1562 * everything up properly. 1563 */ 1564 cs->exception_index = EXCP_BKPT; 1565 env->exception.syndrome = debug_exit->hsr; 1566 env->exception.vaddress = debug_exit->far; 1567 env->exception.target_el = 1; 1568 qemu_mutex_lock_iothread(); 1569 arm_cpu_do_interrupt(cs); 1570 qemu_mutex_unlock_iothread(); 1571 1572 return false; 1573 } 1574 1575 #define ARM64_REG_ESR_EL1 ARM64_SYS_REG(3, 0, 5, 2, 0) 1576 #define ARM64_REG_TCR_EL1 ARM64_SYS_REG(3, 0, 2, 0, 2) 1577 1578 /* 1579 * ESR_EL1 1580 * ISS encoding 1581 * AARCH64: DFSC, bits [5:0] 1582 * AARCH32: 1583 * TTBCR.EAE == 0 1584 * FS[4] - DFSR[10] 1585 * FS[3:0] - DFSR[3:0] 1586 * TTBCR.EAE == 1 1587 * FS, bits [5:0] 1588 */ 1589 #define ESR_DFSC(aarch64, lpae, v) \ 1590 ((aarch64 || (lpae)) ? ((v) & 0x3F) \ 1591 : (((v) >> 6) | ((v) & 0x1F))) 1592 1593 #define ESR_DFSC_EXTABT(aarch64, lpae) \ 1594 ((aarch64) ? 0x10 : (lpae) ? 0x10 : 0x8) 1595 1596 bool kvm_arm_verify_ext_dabt_pending(CPUState *cs) 1597 { 1598 uint64_t dfsr_val; 1599 1600 if (!kvm_get_one_reg(cs, ARM64_REG_ESR_EL1, &dfsr_val)) { 1601 ARMCPU *cpu = ARM_CPU(cs); 1602 CPUARMState *env = &cpu->env; 1603 int aarch64_mode = arm_feature(env, ARM_FEATURE_AARCH64); 1604 int lpae = 0; 1605 1606 if (!aarch64_mode) { 1607 uint64_t ttbcr; 1608 1609 if (!kvm_get_one_reg(cs, ARM64_REG_TCR_EL1, &ttbcr)) { 1610 lpae = arm_feature(env, ARM_FEATURE_LPAE) 1611 && (ttbcr & TTBCR_EAE); 1612 } 1613 } 1614 /* 1615 * The verification here is based on the DFSC bits 1616 * of the ESR_EL1 reg only 1617 */ 1618 return (ESR_DFSC(aarch64_mode, lpae, dfsr_val) == 1619 ESR_DFSC_EXTABT(aarch64_mode, lpae)); 1620 } 1621 return false; 1622 } 1623