1 /* 2 * ARM implementation of KVM hooks, 64 bit specific code 3 * 4 * Copyright Mian-M. Hamayun 2013, Virtual Open Systems 5 * Copyright Alex Bennée 2014, Linaro 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12 #include "qemu/osdep.h" 13 #include <sys/ioctl.h> 14 #include <sys/ptrace.h> 15 16 #include <linux/elf.h> 17 #include <linux/kvm.h> 18 19 #include "qemu-common.h" 20 #include "qapi/error.h" 21 #include "cpu.h" 22 #include "qemu/timer.h" 23 #include "qemu/error-report.h" 24 #include "qemu/host-utils.h" 25 #include "qemu/main-loop.h" 26 #include "exec/gdbstub.h" 27 #include "sysemu/runstate.h" 28 #include "sysemu/kvm.h" 29 #include "sysemu/kvm_int.h" 30 #include "kvm_arm.h" 31 #include "internals.h" 32 #include "hw/acpi/acpi.h" 33 #include "hw/acpi/ghes.h" 34 #include "hw/arm/virt.h" 35 36 static bool have_guest_debug; 37 38 /* 39 * Although the ARM implementation of hardware assisted debugging 40 * allows for different breakpoints per-core, the current GDB 41 * interface treats them as a global pool of registers (which seems to 42 * be the case for x86, ppc and s390). As a result we store one copy 43 * of registers which is used for all active cores. 44 * 45 * Write access is serialised by virtue of the GDB protocol which 46 * updates things. Read access (i.e. when the values are copied to the 47 * vCPU) is also gated by GDB's run control. 48 * 49 * This is not unreasonable as most of the time debugging kernels you 50 * never know which core will eventually execute your function. 51 */ 52 53 typedef struct { 54 uint64_t bcr; 55 uint64_t bvr; 56 } HWBreakpoint; 57 58 /* The watchpoint registers can cover more area than the requested 59 * watchpoint so we need to store the additional information 60 * somewhere. We also need to supply a CPUWatchpoint to the GDB stub 61 * when the watchpoint is hit. 62 */ 63 typedef struct { 64 uint64_t wcr; 65 uint64_t wvr; 66 CPUWatchpoint details; 67 } HWWatchpoint; 68 69 /* Maximum and current break/watch point counts */ 70 int max_hw_bps, max_hw_wps; 71 GArray *hw_breakpoints, *hw_watchpoints; 72 73 #define cur_hw_wps (hw_watchpoints->len) 74 #define cur_hw_bps (hw_breakpoints->len) 75 #define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) 76 #define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) 77 78 /** 79 * kvm_arm_init_debug() - check for guest debug capabilities 80 * @cs: CPUState 81 * 82 * kvm_check_extension returns the number of debug registers we have 83 * or 0 if we have none. 84 * 85 */ 86 static void kvm_arm_init_debug(CPUState *cs) 87 { 88 have_guest_debug = kvm_check_extension(cs->kvm_state, 89 KVM_CAP_SET_GUEST_DEBUG); 90 91 max_hw_wps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_WPS); 92 hw_watchpoints = g_array_sized_new(true, true, 93 sizeof(HWWatchpoint), max_hw_wps); 94 95 max_hw_bps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_BPS); 96 hw_breakpoints = g_array_sized_new(true, true, 97 sizeof(HWBreakpoint), max_hw_bps); 98 return; 99 } 100 101 /** 102 * insert_hw_breakpoint() 103 * @addr: address of breakpoint 104 * 105 * See ARM ARM D2.9.1 for details but here we are only going to create 106 * simple un-linked breakpoints (i.e. we don't chain breakpoints 107 * together to match address and context or vmid). The hardware is 108 * capable of fancier matching but that will require exposing that 109 * fanciness to GDB's interface 110 * 111 * DBGBCR<n>_EL1, Debug Breakpoint Control Registers 112 * 113 * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0 114 * +------+------+-------+-----+----+------+-----+------+-----+---+ 115 * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E | 116 * +------+------+-------+-----+----+------+-----+------+-----+---+ 117 * 118 * BT: Breakpoint type (0 = unlinked address match) 119 * LBN: Linked BP number (0 = unused) 120 * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12) 121 * BAS: Byte Address Select (RES1 for AArch64) 122 * E: Enable bit 123 * 124 * DBGBVR<n>_EL1, Debug Breakpoint Value Registers 125 * 126 * 63 53 52 49 48 2 1 0 127 * +------+-----------+----------+-----+ 128 * | RESS | VA[52:49] | VA[48:2] | 0 0 | 129 * +------+-----------+----------+-----+ 130 * 131 * Depending on the addressing mode bits the top bits of the register 132 * are a sign extension of the highest applicable VA bit. Some 133 * versions of GDB don't do it correctly so we ensure they are correct 134 * here so future PC comparisons will work properly. 135 */ 136 137 static int insert_hw_breakpoint(target_ulong addr) 138 { 139 HWBreakpoint brk = { 140 .bcr = 0x1, /* BCR E=1, enable */ 141 .bvr = sextract64(addr, 0, 53) 142 }; 143 144 if (cur_hw_bps >= max_hw_bps) { 145 return -ENOBUFS; 146 } 147 148 brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */ 149 brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */ 150 151 g_array_append_val(hw_breakpoints, brk); 152 153 return 0; 154 } 155 156 /** 157 * delete_hw_breakpoint() 158 * @pc: address of breakpoint 159 * 160 * Delete a breakpoint and shuffle any above down 161 */ 162 163 static int delete_hw_breakpoint(target_ulong pc) 164 { 165 int i; 166 for (i = 0; i < hw_breakpoints->len; i++) { 167 HWBreakpoint *brk = get_hw_bp(i); 168 if (brk->bvr == pc) { 169 g_array_remove_index(hw_breakpoints, i); 170 return 0; 171 } 172 } 173 return -ENOENT; 174 } 175 176 /** 177 * insert_hw_watchpoint() 178 * @addr: address of watch point 179 * @len: size of area 180 * @type: type of watch point 181 * 182 * See ARM ARM D2.10. As with the breakpoints we can do some advanced 183 * stuff if we want to. The watch points can be linked with the break 184 * points above to make them context aware. However for simplicity 185 * currently we only deal with simple read/write watch points. 186 * 187 * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers 188 * 189 * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0 190 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 191 * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E | 192 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 193 * 194 * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes)) 195 * WT: 0 - unlinked, 1 - linked (not currently used) 196 * LBN: Linked BP number (not currently used) 197 * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11) 198 * BAS: Byte Address Select 199 * LSC: Load/Store control (01: load, 10: store, 11: both) 200 * E: Enable 201 * 202 * The bottom 2 bits of the value register are masked. Therefore to 203 * break on any sizes smaller than an unaligned word you need to set 204 * MASK=0, BAS=bit per byte in question. For larger regions (^2) you 205 * need to ensure you mask the address as required and set BAS=0xff 206 */ 207 208 static int insert_hw_watchpoint(target_ulong addr, 209 target_ulong len, int type) 210 { 211 HWWatchpoint wp = { 212 .wcr = 1, /* E=1, enable */ 213 .wvr = addr & (~0x7ULL), 214 .details = { .vaddr = addr, .len = len } 215 }; 216 217 if (cur_hw_wps >= max_hw_wps) { 218 return -ENOBUFS; 219 } 220 221 /* 222 * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state, 223 * valid whether EL3 is implemented or not 224 */ 225 wp.wcr = deposit32(wp.wcr, 1, 2, 3); 226 227 switch (type) { 228 case GDB_WATCHPOINT_READ: 229 wp.wcr = deposit32(wp.wcr, 3, 2, 1); 230 wp.details.flags = BP_MEM_READ; 231 break; 232 case GDB_WATCHPOINT_WRITE: 233 wp.wcr = deposit32(wp.wcr, 3, 2, 2); 234 wp.details.flags = BP_MEM_WRITE; 235 break; 236 case GDB_WATCHPOINT_ACCESS: 237 wp.wcr = deposit32(wp.wcr, 3, 2, 3); 238 wp.details.flags = BP_MEM_ACCESS; 239 break; 240 default: 241 g_assert_not_reached(); 242 break; 243 } 244 if (len <= 8) { 245 /* we align the address and set the bits in BAS */ 246 int off = addr & 0x7; 247 int bas = (1 << len) - 1; 248 249 wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas); 250 } else { 251 /* For ranges above 8 bytes we need to be a power of 2 */ 252 if (is_power_of_2(len)) { 253 int bits = ctz64(len); 254 255 wp.wvr &= ~((1 << bits) - 1); 256 wp.wcr = deposit32(wp.wcr, 24, 4, bits); 257 wp.wcr = deposit32(wp.wcr, 5, 8, 0xff); 258 } else { 259 return -ENOBUFS; 260 } 261 } 262 263 g_array_append_val(hw_watchpoints, wp); 264 return 0; 265 } 266 267 268 static bool check_watchpoint_in_range(int i, target_ulong addr) 269 { 270 HWWatchpoint *wp = get_hw_wp(i); 271 uint64_t addr_top, addr_bottom = wp->wvr; 272 int bas = extract32(wp->wcr, 5, 8); 273 int mask = extract32(wp->wcr, 24, 4); 274 275 if (mask) { 276 addr_top = addr_bottom + (1 << mask); 277 } else { 278 /* BAS must be contiguous but can offset against the base 279 * address in DBGWVR */ 280 addr_bottom = addr_bottom + ctz32(bas); 281 addr_top = addr_bottom + clo32(bas); 282 } 283 284 if (addr >= addr_bottom && addr <= addr_top) { 285 return true; 286 } 287 288 return false; 289 } 290 291 /** 292 * delete_hw_watchpoint() 293 * @addr: address of breakpoint 294 * 295 * Delete a breakpoint and shuffle any above down 296 */ 297 298 static int delete_hw_watchpoint(target_ulong addr, 299 target_ulong len, int type) 300 { 301 int i; 302 for (i = 0; i < cur_hw_wps; i++) { 303 if (check_watchpoint_in_range(i, addr)) { 304 g_array_remove_index(hw_watchpoints, i); 305 return 0; 306 } 307 } 308 return -ENOENT; 309 } 310 311 312 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 313 target_ulong len, int type) 314 { 315 switch (type) { 316 case GDB_BREAKPOINT_HW: 317 return insert_hw_breakpoint(addr); 318 break; 319 case GDB_WATCHPOINT_READ: 320 case GDB_WATCHPOINT_WRITE: 321 case GDB_WATCHPOINT_ACCESS: 322 return insert_hw_watchpoint(addr, len, type); 323 default: 324 return -ENOSYS; 325 } 326 } 327 328 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 329 target_ulong len, int type) 330 { 331 switch (type) { 332 case GDB_BREAKPOINT_HW: 333 return delete_hw_breakpoint(addr); 334 case GDB_WATCHPOINT_READ: 335 case GDB_WATCHPOINT_WRITE: 336 case GDB_WATCHPOINT_ACCESS: 337 return delete_hw_watchpoint(addr, len, type); 338 default: 339 return -ENOSYS; 340 } 341 } 342 343 344 void kvm_arch_remove_all_hw_breakpoints(void) 345 { 346 if (cur_hw_wps > 0) { 347 g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); 348 } 349 if (cur_hw_bps > 0) { 350 g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); 351 } 352 } 353 354 void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr) 355 { 356 int i; 357 memset(ptr, 0, sizeof(struct kvm_guest_debug_arch)); 358 359 for (i = 0; i < max_hw_wps; i++) { 360 HWWatchpoint *wp = get_hw_wp(i); 361 ptr->dbg_wcr[i] = wp->wcr; 362 ptr->dbg_wvr[i] = wp->wvr; 363 } 364 for (i = 0; i < max_hw_bps; i++) { 365 HWBreakpoint *bp = get_hw_bp(i); 366 ptr->dbg_bcr[i] = bp->bcr; 367 ptr->dbg_bvr[i] = bp->bvr; 368 } 369 } 370 371 bool kvm_arm_hw_debug_active(CPUState *cs) 372 { 373 return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); 374 } 375 376 static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) 377 { 378 int i; 379 380 for (i = 0; i < cur_hw_bps; i++) { 381 HWBreakpoint *bp = get_hw_bp(i); 382 if (bp->bvr == pc) { 383 return true; 384 } 385 } 386 return false; 387 } 388 389 static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) 390 { 391 int i; 392 393 for (i = 0; i < cur_hw_wps; i++) { 394 if (check_watchpoint_in_range(i, addr)) { 395 return &get_hw_wp(i)->details; 396 } 397 } 398 return NULL; 399 } 400 401 static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr, 402 const char *name) 403 { 404 int err; 405 406 err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr); 407 if (err != 0) { 408 error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); 409 return false; 410 } 411 412 err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr); 413 if (err != 0) { 414 error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); 415 return false; 416 } 417 418 return true; 419 } 420 421 void kvm_arm_pmu_init(CPUState *cs) 422 { 423 struct kvm_device_attr attr = { 424 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 425 .attr = KVM_ARM_VCPU_PMU_V3_INIT, 426 }; 427 428 if (!ARM_CPU(cs)->has_pmu) { 429 return; 430 } 431 if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) { 432 error_report("failed to init PMU"); 433 abort(); 434 } 435 } 436 437 void kvm_arm_pmu_set_irq(CPUState *cs, int irq) 438 { 439 struct kvm_device_attr attr = { 440 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 441 .addr = (intptr_t)&irq, 442 .attr = KVM_ARM_VCPU_PMU_V3_IRQ, 443 }; 444 445 if (!ARM_CPU(cs)->has_pmu) { 446 return; 447 } 448 if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) { 449 error_report("failed to set irq for PMU"); 450 abort(); 451 } 452 } 453 454 void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa) 455 { 456 struct kvm_device_attr attr = { 457 .group = KVM_ARM_VCPU_PVTIME_CTRL, 458 .attr = KVM_ARM_VCPU_PVTIME_IPA, 459 .addr = (uint64_t)&ipa, 460 }; 461 462 if (ARM_CPU(cs)->kvm_steal_time == ON_OFF_AUTO_OFF) { 463 return; 464 } 465 if (!kvm_arm_set_device_attr(cs, &attr, "PVTIME IPA")) { 466 error_report("failed to init PVTIME IPA"); 467 abort(); 468 } 469 } 470 471 static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) 472 { 473 uint64_t ret; 474 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; 475 int err; 476 477 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 478 err = ioctl(fd, KVM_GET_ONE_REG, &idreg); 479 if (err < 0) { 480 return -1; 481 } 482 *pret = ret; 483 return 0; 484 } 485 486 static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id) 487 { 488 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; 489 490 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 491 return ioctl(fd, KVM_GET_ONE_REG, &idreg); 492 } 493 494 bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) 495 { 496 /* Identify the feature bits corresponding to the host CPU, and 497 * fill out the ARMHostCPUClass fields accordingly. To do this 498 * we have to create a scratch VM, create a single CPU inside it, 499 * and then query that CPU for the relevant ID registers. 500 */ 501 int fdarray[3]; 502 bool sve_supported; 503 uint64_t features = 0; 504 uint64_t t; 505 int err; 506 507 /* Old kernels may not know about the PREFERRED_TARGET ioctl: however 508 * we know these will only support creating one kind of guest CPU, 509 * which is its preferred CPU type. Fortunately these old kernels 510 * support only a very limited number of CPUs. 511 */ 512 static const uint32_t cpus_to_try[] = { 513 KVM_ARM_TARGET_AEM_V8, 514 KVM_ARM_TARGET_FOUNDATION_V8, 515 KVM_ARM_TARGET_CORTEX_A57, 516 QEMU_KVM_ARM_TARGET_NONE 517 }; 518 /* 519 * target = -1 informs kvm_arm_create_scratch_host_vcpu() 520 * to use the preferred target 521 */ 522 struct kvm_vcpu_init init = { .target = -1, }; 523 524 if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { 525 return false; 526 } 527 528 ahcf->target = init.target; 529 ahcf->dtb_compatible = "arm,arm-v8"; 530 531 err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, 532 ARM64_SYS_REG(3, 0, 0, 4, 0)); 533 if (unlikely(err < 0)) { 534 /* 535 * Before v4.15, the kernel only exposed a limited number of system 536 * registers, not including any of the interesting AArch64 ID regs. 537 * For the most part we could leave these fields as zero with minimal 538 * effect, since this does not affect the values seen by the guest. 539 * 540 * However, it could cause problems down the line for QEMU, 541 * so provide a minimal v8.0 default. 542 * 543 * ??? Could read MIDR and use knowledge from cpu64.c. 544 * ??? Could map a page of memory into our temp guest and 545 * run the tiniest of hand-crafted kernels to extract 546 * the values seen by the guest. 547 * ??? Either of these sounds like too much effort just 548 * to work around running a modern host kernel. 549 */ 550 ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ 551 err = 0; 552 } else { 553 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, 554 ARM64_SYS_REG(3, 0, 0, 4, 1)); 555 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, 556 ARM64_SYS_REG(3, 0, 0, 5, 0)); 557 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, 558 ARM64_SYS_REG(3, 0, 0, 5, 1)); 559 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, 560 ARM64_SYS_REG(3, 0, 0, 6, 0)); 561 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, 562 ARM64_SYS_REG(3, 0, 0, 6, 1)); 563 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, 564 ARM64_SYS_REG(3, 0, 0, 7, 0)); 565 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, 566 ARM64_SYS_REG(3, 0, 0, 7, 1)); 567 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, 568 ARM64_SYS_REG(3, 0, 0, 7, 2)); 569 570 /* 571 * Note that if AArch32 support is not present in the host, 572 * the AArch32 sysregs are present to be read, but will 573 * return UNKNOWN values. This is neither better nor worse 574 * than skipping the reads and leaving 0, as we must avoid 575 * considering the values in every case. 576 */ 577 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0, 578 ARM64_SYS_REG(3, 0, 0, 1, 0)); 579 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1, 580 ARM64_SYS_REG(3, 0, 0, 1, 1)); 581 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, 582 ARM64_SYS_REG(3, 0, 0, 1, 2)); 583 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, 584 ARM64_SYS_REG(3, 0, 0, 1, 4)); 585 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, 586 ARM64_SYS_REG(3, 0, 0, 1, 5)); 587 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, 588 ARM64_SYS_REG(3, 0, 0, 1, 6)); 589 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, 590 ARM64_SYS_REG(3, 0, 0, 1, 7)); 591 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, 592 ARM64_SYS_REG(3, 0, 0, 2, 0)); 593 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, 594 ARM64_SYS_REG(3, 0, 0, 2, 1)); 595 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, 596 ARM64_SYS_REG(3, 0, 0, 2, 2)); 597 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, 598 ARM64_SYS_REG(3, 0, 0, 2, 3)); 599 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, 600 ARM64_SYS_REG(3, 0, 0, 2, 4)); 601 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, 602 ARM64_SYS_REG(3, 0, 0, 2, 5)); 603 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, 604 ARM64_SYS_REG(3, 0, 0, 2, 6)); 605 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, 606 ARM64_SYS_REG(3, 0, 0, 2, 7)); 607 608 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, 609 ARM64_SYS_REG(3, 0, 0, 3, 0)); 610 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, 611 ARM64_SYS_REG(3, 0, 0, 3, 1)); 612 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, 613 ARM64_SYS_REG(3, 0, 0, 3, 2)); 614 615 /* 616 * DBGDIDR is a bit complicated because the kernel doesn't 617 * provide an accessor for it in 64-bit mode, which is what this 618 * scratch VM is in, and there's no architected "64-bit sysreg 619 * which reads the same as the 32-bit register" the way there is 620 * for other ID registers. Instead we synthesize a value from the 621 * AArch64 ID_AA64DFR0, the same way the kernel code in 622 * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. 623 * We only do this if the CPU supports AArch32 at EL1. 624 */ 625 if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { 626 int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); 627 int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); 628 int ctx_cmps = 629 FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); 630 int version = 6; /* ARMv8 debug architecture */ 631 bool has_el3 = 632 !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); 633 uint32_t dbgdidr = 0; 634 635 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); 636 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps); 637 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps); 638 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version); 639 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); 640 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); 641 dbgdidr |= (1 << 15); /* RES1 bit */ 642 ahcf->isar.dbgdidr = dbgdidr; 643 } 644 } 645 646 sve_supported = ioctl(fdarray[0], KVM_CHECK_EXTENSION, KVM_CAP_ARM_SVE) > 0; 647 648 kvm_arm_destroy_scratch_host_vcpu(fdarray); 649 650 if (err < 0) { 651 return false; 652 } 653 654 /* Add feature bits that can't appear until after VCPU init. */ 655 if (sve_supported) { 656 t = ahcf->isar.id_aa64pfr0; 657 t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); 658 ahcf->isar.id_aa64pfr0 = t; 659 } 660 661 /* 662 * We can assume any KVM supporting CPU is at least a v8 663 * with VFPv4+Neon; this in turn implies most of the other 664 * feature bits. 665 */ 666 features |= 1ULL << ARM_FEATURE_V8; 667 features |= 1ULL << ARM_FEATURE_NEON; 668 features |= 1ULL << ARM_FEATURE_AARCH64; 669 features |= 1ULL << ARM_FEATURE_PMU; 670 features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; 671 672 ahcf->features = features; 673 674 return true; 675 } 676 677 void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) 678 { 679 bool has_steal_time = kvm_arm_steal_time_supported(); 680 681 if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) { 682 if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 683 cpu->kvm_steal_time = ON_OFF_AUTO_OFF; 684 } else { 685 cpu->kvm_steal_time = ON_OFF_AUTO_ON; 686 } 687 } else if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) { 688 if (!has_steal_time) { 689 error_setg(errp, "'kvm-steal-time' cannot be enabled " 690 "on this host"); 691 return; 692 } else if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 693 /* 694 * DEN0057A chapter 2 says "This specification only covers 695 * systems in which the Execution state of the hypervisor 696 * as well as EL1 of virtual machines is AArch64.". And, 697 * to ensure that, the smc/hvc calls are only specified as 698 * smc64/hvc64. 699 */ 700 error_setg(errp, "'kvm-steal-time' cannot be enabled " 701 "for AArch32 guests"); 702 return; 703 } 704 } 705 } 706 707 bool kvm_arm_aarch32_supported(void) 708 { 709 return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT); 710 } 711 712 bool kvm_arm_sve_supported(void) 713 { 714 return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); 715 } 716 717 bool kvm_arm_steal_time_supported(void) 718 { 719 return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); 720 } 721 722 QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); 723 724 void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) 725 { 726 /* Only call this function if kvm_arm_sve_supported() returns true. */ 727 static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS]; 728 static bool probed; 729 uint32_t vq = 0; 730 int i, j; 731 732 bitmap_clear(map, 0, ARM_MAX_VQ); 733 734 /* 735 * KVM ensures all host CPUs support the same set of vector lengths. 736 * So we only need to create the scratch VCPUs once and then cache 737 * the results. 738 */ 739 if (!probed) { 740 struct kvm_vcpu_init init = { 741 .target = -1, 742 .features[0] = (1 << KVM_ARM_VCPU_SVE), 743 }; 744 struct kvm_one_reg reg = { 745 .id = KVM_REG_ARM64_SVE_VLS, 746 .addr = (uint64_t)&vls[0], 747 }; 748 int fdarray[3], ret; 749 750 probed = true; 751 752 if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) { 753 error_report("failed to create scratch VCPU with SVE enabled"); 754 abort(); 755 } 756 ret = ioctl(fdarray[2], KVM_GET_ONE_REG, ®); 757 kvm_arm_destroy_scratch_host_vcpu(fdarray); 758 if (ret) { 759 error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s", 760 strerror(errno)); 761 abort(); 762 } 763 764 for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) { 765 if (vls[i]) { 766 vq = 64 - clz64(vls[i]) + i * 64; 767 break; 768 } 769 } 770 if (vq > ARM_MAX_VQ) { 771 warn_report("KVM supports vector lengths larger than " 772 "QEMU can enable"); 773 } 774 } 775 776 for (i = 0; i < KVM_ARM64_SVE_VLS_WORDS; ++i) { 777 if (!vls[i]) { 778 continue; 779 } 780 for (j = 1; j <= 64; ++j) { 781 vq = j + i * 64; 782 if (vq > ARM_MAX_VQ) { 783 return; 784 } 785 if (vls[i] & (1UL << (j - 1))) { 786 set_bit(vq - 1, map); 787 } 788 } 789 } 790 } 791 792 static int kvm_arm_sve_set_vls(CPUState *cs) 793 { 794 uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = {0}; 795 struct kvm_one_reg reg = { 796 .id = KVM_REG_ARM64_SVE_VLS, 797 .addr = (uint64_t)&vls[0], 798 }; 799 ARMCPU *cpu = ARM_CPU(cs); 800 uint32_t vq; 801 int i, j; 802 803 assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); 804 805 for (vq = 1; vq <= cpu->sve_max_vq; ++vq) { 806 if (test_bit(vq - 1, cpu->sve_vq_map)) { 807 i = (vq - 1) / 64; 808 j = (vq - 1) % 64; 809 vls[i] |= 1UL << j; 810 } 811 } 812 813 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 814 } 815 816 #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 817 818 int kvm_arch_init_vcpu(CPUState *cs) 819 { 820 int ret; 821 uint64_t mpidr; 822 ARMCPU *cpu = ARM_CPU(cs); 823 CPUARMState *env = &cpu->env; 824 825 if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || 826 !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { 827 error_report("KVM is not supported for this guest CPU type"); 828 return -EINVAL; 829 } 830 831 qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); 832 833 /* Determine init features for this CPU */ 834 memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); 835 if (cs->start_powered_off) { 836 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; 837 } 838 if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { 839 cpu->psci_version = 2; 840 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; 841 } 842 if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 843 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; 844 } 845 if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { 846 cpu->has_pmu = false; 847 } 848 if (cpu->has_pmu) { 849 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; 850 } else { 851 env->features &= ~(1ULL << ARM_FEATURE_PMU); 852 } 853 if (cpu_isar_feature(aa64_sve, cpu)) { 854 assert(kvm_arm_sve_supported()); 855 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE; 856 } 857 858 /* Do KVM_ARM_VCPU_INIT ioctl */ 859 ret = kvm_arm_vcpu_init(cs); 860 if (ret) { 861 return ret; 862 } 863 864 if (cpu_isar_feature(aa64_sve, cpu)) { 865 ret = kvm_arm_sve_set_vls(cs); 866 if (ret) { 867 return ret; 868 } 869 ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE); 870 if (ret) { 871 return ret; 872 } 873 } 874 875 /* 876 * When KVM is in use, PSCI is emulated in-kernel and not by qemu. 877 * Currently KVM has its own idea about MPIDR assignment, so we 878 * override our defaults with what we get from KVM. 879 */ 880 ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); 881 if (ret) { 882 return ret; 883 } 884 cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK; 885 886 kvm_arm_init_debug(cs); 887 888 /* Check whether user space can specify guest syndrome value */ 889 kvm_arm_init_serror_injection(cs); 890 891 return kvm_arm_init_cpreg_list(cpu); 892 } 893 894 int kvm_arch_destroy_vcpu(CPUState *cs) 895 { 896 return 0; 897 } 898 899 bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) 900 { 901 /* Return true if the regidx is a register we should synchronize 902 * via the cpreg_tuples array (ie is not a core or sve reg that 903 * we sync by hand in kvm_arch_get/put_registers()) 904 */ 905 switch (regidx & KVM_REG_ARM_COPROC_MASK) { 906 case KVM_REG_ARM_CORE: 907 case KVM_REG_ARM64_SVE: 908 return false; 909 default: 910 return true; 911 } 912 } 913 914 typedef struct CPRegStateLevel { 915 uint64_t regidx; 916 int level; 917 } CPRegStateLevel; 918 919 /* All system registers not listed in the following table are assumed to be 920 * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less 921 * often, you must add it to this table with a state of either 922 * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. 923 */ 924 static const CPRegStateLevel non_runtime_cpregs[] = { 925 { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE }, 926 }; 927 928 int kvm_arm_cpreg_level(uint64_t regidx) 929 { 930 int i; 931 932 for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) { 933 const CPRegStateLevel *l = &non_runtime_cpregs[i]; 934 if (l->regidx == regidx) { 935 return l->level; 936 } 937 } 938 939 return KVM_PUT_RUNTIME_STATE; 940 } 941 942 /* Callers must hold the iothread mutex lock */ 943 static void kvm_inject_arm_sea(CPUState *c) 944 { 945 ARMCPU *cpu = ARM_CPU(c); 946 CPUARMState *env = &cpu->env; 947 CPUClass *cc = CPU_GET_CLASS(c); 948 uint32_t esr; 949 bool same_el; 950 951 c->exception_index = EXCP_DATA_ABORT; 952 env->exception.target_el = 1; 953 954 /* 955 * Set the DFSC to synchronous external abort and set FnV to not valid, 956 * this will tell guest the FAR_ELx is UNKNOWN for this abort. 957 */ 958 same_el = arm_current_el(env) == env->exception.target_el; 959 esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10); 960 961 env->exception.syndrome = esr; 962 963 cc->do_interrupt(c); 964 } 965 966 #define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ 967 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 968 969 #define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ 970 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 971 972 #define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ 973 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 974 975 static int kvm_arch_put_fpsimd(CPUState *cs) 976 { 977 CPUARMState *env = &ARM_CPU(cs)->env; 978 struct kvm_one_reg reg; 979 int i, ret; 980 981 for (i = 0; i < 32; i++) { 982 uint64_t *q = aa64_vfp_qreg(env, i); 983 #ifdef HOST_WORDS_BIGENDIAN 984 uint64_t fp_val[2] = { q[1], q[0] }; 985 reg.addr = (uintptr_t)fp_val; 986 #else 987 reg.addr = (uintptr_t)q; 988 #endif 989 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 990 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 991 if (ret) { 992 return ret; 993 } 994 } 995 996 return 0; 997 } 998 999 /* 1000 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits 1001 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard 1002 * code the slice index to zero for now as it's unlikely we'll need more than 1003 * one slice for quite some time. 1004 */ 1005 static int kvm_arch_put_sve(CPUState *cs) 1006 { 1007 ARMCPU *cpu = ARM_CPU(cs); 1008 CPUARMState *env = &cpu->env; 1009 uint64_t tmp[ARM_MAX_VQ * 2]; 1010 uint64_t *r; 1011 struct kvm_one_reg reg; 1012 int n, ret; 1013 1014 for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { 1015 r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2); 1016 reg.addr = (uintptr_t)r; 1017 reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); 1018 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1019 if (ret) { 1020 return ret; 1021 } 1022 } 1023 1024 for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { 1025 r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0], 1026 DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1027 reg.addr = (uintptr_t)r; 1028 reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); 1029 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1030 if (ret) { 1031 return ret; 1032 } 1033 } 1034 1035 r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0], 1036 DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1037 reg.addr = (uintptr_t)r; 1038 reg.id = KVM_REG_ARM64_SVE_FFR(0); 1039 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1040 if (ret) { 1041 return ret; 1042 } 1043 1044 return 0; 1045 } 1046 1047 int kvm_arch_put_registers(CPUState *cs, int level) 1048 { 1049 struct kvm_one_reg reg; 1050 uint64_t val; 1051 uint32_t fpr; 1052 int i, ret; 1053 unsigned int el; 1054 1055 ARMCPU *cpu = ARM_CPU(cs); 1056 CPUARMState *env = &cpu->env; 1057 1058 /* If we are in AArch32 mode then we need to copy the AArch32 regs to the 1059 * AArch64 registers before pushing them out to 64-bit KVM. 1060 */ 1061 if (!is_a64(env)) { 1062 aarch64_sync_32_to_64(env); 1063 } 1064 1065 for (i = 0; i < 31; i++) { 1066 reg.id = AARCH64_CORE_REG(regs.regs[i]); 1067 reg.addr = (uintptr_t) &env->xregs[i]; 1068 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1069 if (ret) { 1070 return ret; 1071 } 1072 } 1073 1074 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 1075 * QEMU side we keep the current SP in xregs[31] as well. 1076 */ 1077 aarch64_save_sp(env, 1); 1078 1079 reg.id = AARCH64_CORE_REG(regs.sp); 1080 reg.addr = (uintptr_t) &env->sp_el[0]; 1081 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1082 if (ret) { 1083 return ret; 1084 } 1085 1086 reg.id = AARCH64_CORE_REG(sp_el1); 1087 reg.addr = (uintptr_t) &env->sp_el[1]; 1088 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1089 if (ret) { 1090 return ret; 1091 } 1092 1093 /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ 1094 if (is_a64(env)) { 1095 val = pstate_read(env); 1096 } else { 1097 val = cpsr_read(env); 1098 } 1099 reg.id = AARCH64_CORE_REG(regs.pstate); 1100 reg.addr = (uintptr_t) &val; 1101 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1102 if (ret) { 1103 return ret; 1104 } 1105 1106 reg.id = AARCH64_CORE_REG(regs.pc); 1107 reg.addr = (uintptr_t) &env->pc; 1108 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1109 if (ret) { 1110 return ret; 1111 } 1112 1113 reg.id = AARCH64_CORE_REG(elr_el1); 1114 reg.addr = (uintptr_t) &env->elr_el[1]; 1115 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1116 if (ret) { 1117 return ret; 1118 } 1119 1120 /* Saved Program State Registers 1121 * 1122 * Before we restore from the banked_spsr[] array we need to 1123 * ensure that any modifications to env->spsr are correctly 1124 * reflected in the banks. 1125 */ 1126 el = arm_current_el(env); 1127 if (el > 0 && !is_a64(env)) { 1128 i = bank_number(env->uncached_cpsr & CPSR_M); 1129 env->banked_spsr[i] = env->spsr; 1130 } 1131 1132 /* KVM 0-4 map to QEMU banks 1-5 */ 1133 for (i = 0; i < KVM_NR_SPSR; i++) { 1134 reg.id = AARCH64_CORE_REG(spsr[i]); 1135 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 1136 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1137 if (ret) { 1138 return ret; 1139 } 1140 } 1141 1142 if (cpu_isar_feature(aa64_sve, cpu)) { 1143 ret = kvm_arch_put_sve(cs); 1144 } else { 1145 ret = kvm_arch_put_fpsimd(cs); 1146 } 1147 if (ret) { 1148 return ret; 1149 } 1150 1151 reg.addr = (uintptr_t)(&fpr); 1152 fpr = vfp_get_fpsr(env); 1153 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 1154 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1155 if (ret) { 1156 return ret; 1157 } 1158 1159 reg.addr = (uintptr_t)(&fpr); 1160 fpr = vfp_get_fpcr(env); 1161 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 1162 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1163 if (ret) { 1164 return ret; 1165 } 1166 1167 write_cpustate_to_list(cpu, true); 1168 1169 if (!write_list_to_kvmstate(cpu, level)) { 1170 return -EINVAL; 1171 } 1172 1173 /* 1174 * Setting VCPU events should be triggered after syncing the registers 1175 * to avoid overwriting potential changes made by KVM upon calling 1176 * KVM_SET_VCPU_EVENTS ioctl 1177 */ 1178 ret = kvm_put_vcpu_events(cpu); 1179 if (ret) { 1180 return ret; 1181 } 1182 1183 kvm_arm_sync_mpstate_to_kvm(cpu); 1184 1185 return ret; 1186 } 1187 1188 static int kvm_arch_get_fpsimd(CPUState *cs) 1189 { 1190 CPUARMState *env = &ARM_CPU(cs)->env; 1191 struct kvm_one_reg reg; 1192 int i, ret; 1193 1194 for (i = 0; i < 32; i++) { 1195 uint64_t *q = aa64_vfp_qreg(env, i); 1196 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 1197 reg.addr = (uintptr_t)q; 1198 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1199 if (ret) { 1200 return ret; 1201 } else { 1202 #ifdef HOST_WORDS_BIGENDIAN 1203 uint64_t t; 1204 t = q[0], q[0] = q[1], q[1] = t; 1205 #endif 1206 } 1207 } 1208 1209 return 0; 1210 } 1211 1212 /* 1213 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits 1214 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard 1215 * code the slice index to zero for now as it's unlikely we'll need more than 1216 * one slice for quite some time. 1217 */ 1218 static int kvm_arch_get_sve(CPUState *cs) 1219 { 1220 ARMCPU *cpu = ARM_CPU(cs); 1221 CPUARMState *env = &cpu->env; 1222 struct kvm_one_reg reg; 1223 uint64_t *r; 1224 int n, ret; 1225 1226 for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { 1227 r = &env->vfp.zregs[n].d[0]; 1228 reg.addr = (uintptr_t)r; 1229 reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); 1230 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1231 if (ret) { 1232 return ret; 1233 } 1234 sve_bswap64(r, r, cpu->sve_max_vq * 2); 1235 } 1236 1237 for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { 1238 r = &env->vfp.pregs[n].p[0]; 1239 reg.addr = (uintptr_t)r; 1240 reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); 1241 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1242 if (ret) { 1243 return ret; 1244 } 1245 sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1246 } 1247 1248 r = &env->vfp.pregs[FFR_PRED_NUM].p[0]; 1249 reg.addr = (uintptr_t)r; 1250 reg.id = KVM_REG_ARM64_SVE_FFR(0); 1251 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1252 if (ret) { 1253 return ret; 1254 } 1255 sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1256 1257 return 0; 1258 } 1259 1260 int kvm_arch_get_registers(CPUState *cs) 1261 { 1262 struct kvm_one_reg reg; 1263 uint64_t val; 1264 unsigned int el; 1265 uint32_t fpr; 1266 int i, ret; 1267 1268 ARMCPU *cpu = ARM_CPU(cs); 1269 CPUARMState *env = &cpu->env; 1270 1271 for (i = 0; i < 31; i++) { 1272 reg.id = AARCH64_CORE_REG(regs.regs[i]); 1273 reg.addr = (uintptr_t) &env->xregs[i]; 1274 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1275 if (ret) { 1276 return ret; 1277 } 1278 } 1279 1280 reg.id = AARCH64_CORE_REG(regs.sp); 1281 reg.addr = (uintptr_t) &env->sp_el[0]; 1282 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1283 if (ret) { 1284 return ret; 1285 } 1286 1287 reg.id = AARCH64_CORE_REG(sp_el1); 1288 reg.addr = (uintptr_t) &env->sp_el[1]; 1289 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1290 if (ret) { 1291 return ret; 1292 } 1293 1294 reg.id = AARCH64_CORE_REG(regs.pstate); 1295 reg.addr = (uintptr_t) &val; 1296 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1297 if (ret) { 1298 return ret; 1299 } 1300 1301 env->aarch64 = ((val & PSTATE_nRW) == 0); 1302 if (is_a64(env)) { 1303 pstate_write(env, val); 1304 } else { 1305 cpsr_write(env, val, 0xffffffff, CPSRWriteRaw); 1306 } 1307 1308 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 1309 * QEMU side we keep the current SP in xregs[31] as well. 1310 */ 1311 aarch64_restore_sp(env, 1); 1312 1313 reg.id = AARCH64_CORE_REG(regs.pc); 1314 reg.addr = (uintptr_t) &env->pc; 1315 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1316 if (ret) { 1317 return ret; 1318 } 1319 1320 /* If we are in AArch32 mode then we need to sync the AArch32 regs with the 1321 * incoming AArch64 regs received from 64-bit KVM. 1322 * We must perform this after all of the registers have been acquired from 1323 * the kernel. 1324 */ 1325 if (!is_a64(env)) { 1326 aarch64_sync_64_to_32(env); 1327 } 1328 1329 reg.id = AARCH64_CORE_REG(elr_el1); 1330 reg.addr = (uintptr_t) &env->elr_el[1]; 1331 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1332 if (ret) { 1333 return ret; 1334 } 1335 1336 /* Fetch the SPSR registers 1337 * 1338 * KVM SPSRs 0-4 map to QEMU banks 1-5 1339 */ 1340 for (i = 0; i < KVM_NR_SPSR; i++) { 1341 reg.id = AARCH64_CORE_REG(spsr[i]); 1342 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 1343 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1344 if (ret) { 1345 return ret; 1346 } 1347 } 1348 1349 el = arm_current_el(env); 1350 if (el > 0 && !is_a64(env)) { 1351 i = bank_number(env->uncached_cpsr & CPSR_M); 1352 env->spsr = env->banked_spsr[i]; 1353 } 1354 1355 if (cpu_isar_feature(aa64_sve, cpu)) { 1356 ret = kvm_arch_get_sve(cs); 1357 } else { 1358 ret = kvm_arch_get_fpsimd(cs); 1359 } 1360 if (ret) { 1361 return ret; 1362 } 1363 1364 reg.addr = (uintptr_t)(&fpr); 1365 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 1366 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1367 if (ret) { 1368 return ret; 1369 } 1370 vfp_set_fpsr(env, fpr); 1371 1372 reg.addr = (uintptr_t)(&fpr); 1373 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 1374 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1375 if (ret) { 1376 return ret; 1377 } 1378 vfp_set_fpcr(env, fpr); 1379 1380 ret = kvm_get_vcpu_events(cpu); 1381 if (ret) { 1382 return ret; 1383 } 1384 1385 if (!write_kvmstate_to_list(cpu)) { 1386 return -EINVAL; 1387 } 1388 /* Note that it's OK to have registers which aren't in CPUState, 1389 * so we can ignore a failure return here. 1390 */ 1391 write_list_to_cpustate(cpu); 1392 1393 kvm_arm_sync_mpstate_to_qemu(cpu); 1394 1395 /* TODO: other registers */ 1396 return ret; 1397 } 1398 1399 void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) 1400 { 1401 ram_addr_t ram_addr; 1402 hwaddr paddr; 1403 Object *obj = qdev_get_machine(); 1404 VirtMachineState *vms = VIRT_MACHINE(obj); 1405 bool acpi_enabled = virt_is_acpi_enabled(vms); 1406 1407 assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); 1408 1409 if (acpi_enabled && addr && 1410 object_property_get_bool(obj, "ras", NULL)) { 1411 ram_addr = qemu_ram_addr_from_host(addr); 1412 if (ram_addr != RAM_ADDR_INVALID && 1413 kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { 1414 kvm_hwpoison_page_add(ram_addr); 1415 /* 1416 * If this is a BUS_MCEERR_AR, we know we have been called 1417 * synchronously from the vCPU thread, so we can easily 1418 * synchronize the state and inject an error. 1419 * 1420 * TODO: we currently don't tell the guest at all about 1421 * BUS_MCEERR_AO. In that case we might either be being 1422 * called synchronously from the vCPU thread, or a bit 1423 * later from the main thread, so doing the injection of 1424 * the error would be more complicated. 1425 */ 1426 if (code == BUS_MCEERR_AR) { 1427 kvm_cpu_synchronize_state(c); 1428 if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { 1429 kvm_inject_arm_sea(c); 1430 } else { 1431 error_report("failed to record the error"); 1432 abort(); 1433 } 1434 } 1435 return; 1436 } 1437 if (code == BUS_MCEERR_AO) { 1438 error_report("Hardware memory error at addr %p for memory used by " 1439 "QEMU itself instead of guest system!", addr); 1440 } 1441 } 1442 1443 if (code == BUS_MCEERR_AR) { 1444 error_report("Hardware memory error!"); 1445 exit(1); 1446 } 1447 } 1448 1449 /* C6.6.29 BRK instruction */ 1450 static const uint32_t brk_insn = 0xd4200000; 1451 1452 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1453 { 1454 if (have_guest_debug) { 1455 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || 1456 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { 1457 return -EINVAL; 1458 } 1459 return 0; 1460 } else { 1461 error_report("guest debug not supported on this kernel"); 1462 return -EINVAL; 1463 } 1464 } 1465 1466 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1467 { 1468 static uint32_t brk; 1469 1470 if (have_guest_debug) { 1471 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || 1472 brk != brk_insn || 1473 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { 1474 return -EINVAL; 1475 } 1476 return 0; 1477 } else { 1478 error_report("guest debug not supported on this kernel"); 1479 return -EINVAL; 1480 } 1481 } 1482 1483 /* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register 1484 * 1485 * To minimise translating between kernel and user-space the kernel 1486 * ABI just provides user-space with the full exception syndrome 1487 * register value to be decoded in QEMU. 1488 */ 1489 1490 bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) 1491 { 1492 int hsr_ec = syn_get_ec(debug_exit->hsr); 1493 ARMCPU *cpu = ARM_CPU(cs); 1494 CPUClass *cc = CPU_GET_CLASS(cs); 1495 CPUARMState *env = &cpu->env; 1496 1497 /* Ensure PC is synchronised */ 1498 kvm_cpu_synchronize_state(cs); 1499 1500 switch (hsr_ec) { 1501 case EC_SOFTWARESTEP: 1502 if (cs->singlestep_enabled) { 1503 return true; 1504 } else { 1505 /* 1506 * The kernel should have suppressed the guest's ability to 1507 * single step at this point so something has gone wrong. 1508 */ 1509 error_report("%s: guest single-step while debugging unsupported" 1510 " (%"PRIx64", %"PRIx32")", 1511 __func__, env->pc, debug_exit->hsr); 1512 return false; 1513 } 1514 break; 1515 case EC_AA64_BKPT: 1516 if (kvm_find_sw_breakpoint(cs, env->pc)) { 1517 return true; 1518 } 1519 break; 1520 case EC_BREAKPOINT: 1521 if (find_hw_breakpoint(cs, env->pc)) { 1522 return true; 1523 } 1524 break; 1525 case EC_WATCHPOINT: 1526 { 1527 CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far); 1528 if (wp) { 1529 cs->watchpoint_hit = wp; 1530 return true; 1531 } 1532 break; 1533 } 1534 default: 1535 error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", 1536 __func__, debug_exit->hsr, env->pc); 1537 } 1538 1539 /* If we are not handling the debug exception it must belong to 1540 * the guest. Let's re-use the existing TCG interrupt code to set 1541 * everything up properly. 1542 */ 1543 cs->exception_index = EXCP_BKPT; 1544 env->exception.syndrome = debug_exit->hsr; 1545 env->exception.vaddress = debug_exit->far; 1546 env->exception.target_el = 1; 1547 qemu_mutex_lock_iothread(); 1548 cc->do_interrupt(cs); 1549 qemu_mutex_unlock_iothread(); 1550 1551 return false; 1552 } 1553 1554 #define ARM64_REG_ESR_EL1 ARM64_SYS_REG(3, 0, 5, 2, 0) 1555 #define ARM64_REG_TCR_EL1 ARM64_SYS_REG(3, 0, 2, 0, 2) 1556 1557 /* 1558 * ESR_EL1 1559 * ISS encoding 1560 * AARCH64: DFSC, bits [5:0] 1561 * AARCH32: 1562 * TTBCR.EAE == 0 1563 * FS[4] - DFSR[10] 1564 * FS[3:0] - DFSR[3:0] 1565 * TTBCR.EAE == 1 1566 * FS, bits [5:0] 1567 */ 1568 #define ESR_DFSC(aarch64, lpae, v) \ 1569 ((aarch64 || (lpae)) ? ((v) & 0x3F) \ 1570 : (((v) >> 6) | ((v) & 0x1F))) 1571 1572 #define ESR_DFSC_EXTABT(aarch64, lpae) \ 1573 ((aarch64) ? 0x10 : (lpae) ? 0x10 : 0x8) 1574 1575 bool kvm_arm_verify_ext_dabt_pending(CPUState *cs) 1576 { 1577 uint64_t dfsr_val; 1578 1579 if (!kvm_get_one_reg(cs, ARM64_REG_ESR_EL1, &dfsr_val)) { 1580 ARMCPU *cpu = ARM_CPU(cs); 1581 CPUARMState *env = &cpu->env; 1582 int aarch64_mode = arm_feature(env, ARM_FEATURE_AARCH64); 1583 int lpae = 0; 1584 1585 if (!aarch64_mode) { 1586 uint64_t ttbcr; 1587 1588 if (!kvm_get_one_reg(cs, ARM64_REG_TCR_EL1, &ttbcr)) { 1589 lpae = arm_feature(env, ARM_FEATURE_LPAE) 1590 && (ttbcr & TTBCR_EAE); 1591 } 1592 } 1593 /* 1594 * The verification here is based on the DFSC bits 1595 * of the ESR_EL1 reg only 1596 */ 1597 return (ESR_DFSC(aarch64_mode, lpae, dfsr_val) == 1598 ESR_DFSC_EXTABT(aarch64_mode, lpae)); 1599 } 1600 return false; 1601 } 1602