1 /* 2 * Copyright 2008 IBM Corporation 3 * 2008 Red Hat, Inc. 4 * Copyright 2011 Intel Corporation 5 * Copyright 2016 Veertu, Inc. 6 * Copyright 2017 The Android Open Source Project 7 * 8 * QEMU Hypervisor.framework support 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of version 2 of the GNU General Public 12 * License as published by the Free Software Foundation. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, see <http://www.gnu.org/licenses/>. 21 * 22 * This file contain code under public domain from the hvdos project: 23 * https://github.com/mist64/hvdos 24 * 25 * Parts Copyright (c) 2011 NetApp, Inc. 26 * All rights reserved. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 37 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 40 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 47 * SUCH DAMAGE. 48 */ 49 50 #include "qemu/osdep.h" 51 #include "qemu/error-report.h" 52 #include "qemu/main-loop.h" 53 #include "exec/address-spaces.h" 54 #include "exec/exec-all.h" 55 #include "exec/gdbstub.h" 56 #include "sysemu/cpus.h" 57 #include "sysemu/hvf.h" 58 #include "sysemu/hvf_int.h" 59 #include "sysemu/runstate.h" 60 #include "qemu/guest-random.h" 61 62 HVFState *hvf_state; 63 64 #ifdef __aarch64__ 65 #define HV_VM_DEFAULT NULL 66 #endif 67 68 /* Memory slots */ 69 70 hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size) 71 { 72 hvf_slot *slot; 73 int x; 74 for (x = 0; x < hvf_state->num_slots; ++x) { 75 slot = &hvf_state->slots[x]; 76 if (slot->size && start < (slot->start + slot->size) && 77 (start + size) > slot->start) { 78 return slot; 79 } 80 } 81 return NULL; 82 } 83 84 struct mac_slot { 85 int present; 86 uint64_t size; 87 uint64_t gpa_start; 88 uint64_t gva; 89 }; 90 91 struct mac_slot mac_slots[32]; 92 93 static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags) 94 { 95 struct mac_slot *macslot; 96 hv_return_t ret; 97 98 macslot = &mac_slots[slot->slot_id]; 99 100 if (macslot->present) { 101 if (macslot->size != slot->size) { 102 macslot->present = 0; 103 ret = hv_vm_unmap(macslot->gpa_start, macslot->size); 104 assert_hvf_ok(ret); 105 } 106 } 107 108 if (!slot->size) { 109 return 0; 110 } 111 112 macslot->present = 1; 113 macslot->gpa_start = slot->start; 114 macslot->size = slot->size; 115 ret = hv_vm_map(slot->mem, slot->start, slot->size, flags); 116 assert_hvf_ok(ret); 117 return 0; 118 } 119 120 static void hvf_set_phys_mem(MemoryRegionSection *section, bool add) 121 { 122 hvf_slot *mem; 123 MemoryRegion *area = section->mr; 124 bool writable = !area->readonly && !area->rom_device; 125 hv_memory_flags_t flags; 126 uint64_t page_size = qemu_real_host_page_size(); 127 128 if (!memory_region_is_ram(area)) { 129 if (writable) { 130 return; 131 } else if (!memory_region_is_romd(area)) { 132 /* 133 * If the memory device is not in romd_mode, then we actually want 134 * to remove the hvf memory slot so all accesses will trap. 135 */ 136 add = false; 137 } 138 } 139 140 if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) || 141 !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) { 142 /* Not page aligned, so we can not map as RAM */ 143 add = false; 144 } 145 146 mem = hvf_find_overlap_slot( 147 section->offset_within_address_space, 148 int128_get64(section->size)); 149 150 if (mem && add) { 151 if (mem->size == int128_get64(section->size) && 152 mem->start == section->offset_within_address_space && 153 mem->mem == (memory_region_get_ram_ptr(area) + 154 section->offset_within_region)) { 155 return; /* Same region was attempted to register, go away. */ 156 } 157 } 158 159 /* Region needs to be reset. set the size to 0 and remap it. */ 160 if (mem) { 161 mem->size = 0; 162 if (do_hvf_set_memory(mem, 0)) { 163 error_report("Failed to reset overlapping slot"); 164 abort(); 165 } 166 } 167 168 if (!add) { 169 return; 170 } 171 172 if (area->readonly || 173 (!memory_region_is_ram(area) && memory_region_is_romd(area))) { 174 flags = HV_MEMORY_READ | HV_MEMORY_EXEC; 175 } else { 176 flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; 177 } 178 179 /* Now make a new slot. */ 180 int x; 181 182 for (x = 0; x < hvf_state->num_slots; ++x) { 183 mem = &hvf_state->slots[x]; 184 if (!mem->size) { 185 break; 186 } 187 } 188 189 if (x == hvf_state->num_slots) { 190 error_report("No free slots"); 191 abort(); 192 } 193 194 mem->size = int128_get64(section->size); 195 mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; 196 mem->start = section->offset_within_address_space; 197 mem->region = area; 198 199 if (do_hvf_set_memory(mem, flags)) { 200 error_report("Error registering new memory slot"); 201 abort(); 202 } 203 } 204 205 static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) 206 { 207 if (!cpu->accel->dirty) { 208 hvf_get_registers(cpu); 209 cpu->accel->dirty = true; 210 } 211 } 212 213 static void hvf_cpu_synchronize_state(CPUState *cpu) 214 { 215 if (!cpu->accel->dirty) { 216 run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); 217 } 218 } 219 220 static void do_hvf_cpu_synchronize_set_dirty(CPUState *cpu, 221 run_on_cpu_data arg) 222 { 223 /* QEMU state is the reference, push it to HVF now and on next entry */ 224 cpu->accel->dirty = true; 225 } 226 227 static void hvf_cpu_synchronize_post_reset(CPUState *cpu) 228 { 229 run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); 230 } 231 232 static void hvf_cpu_synchronize_post_init(CPUState *cpu) 233 { 234 run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); 235 } 236 237 static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu) 238 { 239 run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL); 240 } 241 242 static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) 243 { 244 hvf_slot *slot; 245 246 slot = hvf_find_overlap_slot( 247 section->offset_within_address_space, 248 int128_get64(section->size)); 249 250 /* protect region against writes; begin tracking it */ 251 if (on) { 252 slot->flags |= HVF_SLOT_LOG; 253 hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, 254 HV_MEMORY_READ | HV_MEMORY_EXEC); 255 /* stop tracking region*/ 256 } else { 257 slot->flags &= ~HVF_SLOT_LOG; 258 hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size, 259 HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC); 260 } 261 } 262 263 static void hvf_log_start(MemoryListener *listener, 264 MemoryRegionSection *section, int old, int new) 265 { 266 if (old != 0) { 267 return; 268 } 269 270 hvf_set_dirty_tracking(section, 1); 271 } 272 273 static void hvf_log_stop(MemoryListener *listener, 274 MemoryRegionSection *section, int old, int new) 275 { 276 if (new != 0) { 277 return; 278 } 279 280 hvf_set_dirty_tracking(section, 0); 281 } 282 283 static void hvf_log_sync(MemoryListener *listener, 284 MemoryRegionSection *section) 285 { 286 /* 287 * sync of dirty pages is handled elsewhere; just make sure we keep 288 * tracking the region. 289 */ 290 hvf_set_dirty_tracking(section, 1); 291 } 292 293 static void hvf_region_add(MemoryListener *listener, 294 MemoryRegionSection *section) 295 { 296 hvf_set_phys_mem(section, true); 297 } 298 299 static void hvf_region_del(MemoryListener *listener, 300 MemoryRegionSection *section) 301 { 302 hvf_set_phys_mem(section, false); 303 } 304 305 static MemoryListener hvf_memory_listener = { 306 .name = "hvf", 307 .priority = MEMORY_LISTENER_PRIORITY_ACCEL, 308 .region_add = hvf_region_add, 309 .region_del = hvf_region_del, 310 .log_start = hvf_log_start, 311 .log_stop = hvf_log_stop, 312 .log_sync = hvf_log_sync, 313 }; 314 315 static void dummy_signal(int sig) 316 { 317 } 318 319 bool hvf_allowed; 320 321 static int hvf_accel_init(MachineState *ms) 322 { 323 int x; 324 hv_return_t ret; 325 HVFState *s; 326 327 ret = hv_vm_create(HV_VM_DEFAULT); 328 assert_hvf_ok(ret); 329 330 s = g_new0(HVFState, 1); 331 332 s->num_slots = ARRAY_SIZE(s->slots); 333 for (x = 0; x < s->num_slots; ++x) { 334 s->slots[x].size = 0; 335 s->slots[x].slot_id = x; 336 } 337 338 QTAILQ_INIT(&s->hvf_sw_breakpoints); 339 340 hvf_state = s; 341 memory_listener_register(&hvf_memory_listener, &address_space_memory); 342 343 return hvf_arch_init(); 344 } 345 346 static inline int hvf_gdbstub_sstep_flags(void) 347 { 348 return SSTEP_ENABLE | SSTEP_NOIRQ; 349 } 350 351 static void hvf_accel_class_init(ObjectClass *oc, void *data) 352 { 353 AccelClass *ac = ACCEL_CLASS(oc); 354 ac->name = "HVF"; 355 ac->init_machine = hvf_accel_init; 356 ac->allowed = &hvf_allowed; 357 ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags; 358 } 359 360 static const TypeInfo hvf_accel_type = { 361 .name = TYPE_HVF_ACCEL, 362 .parent = TYPE_ACCEL, 363 .class_init = hvf_accel_class_init, 364 }; 365 366 static void hvf_type_init(void) 367 { 368 type_register_static(&hvf_accel_type); 369 } 370 371 type_init(hvf_type_init); 372 373 static void hvf_vcpu_destroy(CPUState *cpu) 374 { 375 hv_return_t ret = hv_vcpu_destroy(cpu->accel->fd); 376 assert_hvf_ok(ret); 377 378 hvf_arch_vcpu_destroy(cpu); 379 g_free(cpu->accel); 380 cpu->accel = NULL; 381 } 382 383 static int hvf_init_vcpu(CPUState *cpu) 384 { 385 int r; 386 387 cpu->accel = g_new0(AccelCPUState, 1); 388 389 /* init cpu signals */ 390 struct sigaction sigact; 391 392 memset(&sigact, 0, sizeof(sigact)); 393 sigact.sa_handler = dummy_signal; 394 sigaction(SIG_IPI, &sigact, NULL); 395 396 pthread_sigmask(SIG_BLOCK, NULL, &cpu->accel->unblock_ipi_mask); 397 sigdelset(&cpu->accel->unblock_ipi_mask, SIG_IPI); 398 399 #ifdef __aarch64__ 400 r = hv_vcpu_create(&cpu->accel->fd, 401 (hv_vcpu_exit_t **)&cpu->accel->exit, NULL); 402 #else 403 r = hv_vcpu_create((hv_vcpuid_t *)&cpu->accel->fd, HV_VCPU_DEFAULT); 404 #endif 405 cpu->accel->dirty = true; 406 assert_hvf_ok(r); 407 408 cpu->accel->guest_debug_enabled = false; 409 410 return hvf_arch_init_vcpu(cpu); 411 } 412 413 /* 414 * The HVF-specific vCPU thread function. This one should only run when the host 415 * CPU supports the VMX "unrestricted guest" feature. 416 */ 417 static void *hvf_cpu_thread_fn(void *arg) 418 { 419 CPUState *cpu = arg; 420 421 int r; 422 423 assert(hvf_enabled()); 424 425 rcu_register_thread(); 426 427 bql_lock(); 428 qemu_thread_get_self(cpu->thread); 429 430 cpu->thread_id = qemu_get_thread_id(); 431 current_cpu = cpu; 432 433 hvf_init_vcpu(cpu); 434 435 /* signal CPU creation */ 436 cpu_thread_signal_created(cpu); 437 qemu_guest_random_seed_thread_part2(cpu->random_seed); 438 439 do { 440 if (cpu_can_run(cpu)) { 441 r = hvf_vcpu_exec(cpu); 442 if (r == EXCP_DEBUG) { 443 cpu_handle_guest_debug(cpu); 444 } 445 } 446 qemu_wait_io_event(cpu); 447 } while (!cpu->unplug || cpu_can_run(cpu)); 448 449 hvf_vcpu_destroy(cpu); 450 cpu_thread_signal_destroyed(cpu); 451 bql_unlock(); 452 rcu_unregister_thread(); 453 return NULL; 454 } 455 456 static void hvf_start_vcpu_thread(CPUState *cpu) 457 { 458 char thread_name[VCPU_THREAD_NAME_SIZE]; 459 460 /* 461 * HVF currently does not support TCG, and only runs in 462 * unrestricted-guest mode. 463 */ 464 assert(hvf_enabled()); 465 466 cpu->thread = g_malloc0(sizeof(QemuThread)); 467 cpu->halt_cond = g_malloc0(sizeof(QemuCond)); 468 qemu_cond_init(cpu->halt_cond); 469 470 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF", 471 cpu->cpu_index); 472 qemu_thread_create(cpu->thread, thread_name, hvf_cpu_thread_fn, 473 cpu, QEMU_THREAD_JOINABLE); 474 } 475 476 static int hvf_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len) 477 { 478 struct hvf_sw_breakpoint *bp; 479 int err; 480 481 if (type == GDB_BREAKPOINT_SW) { 482 bp = hvf_find_sw_breakpoint(cpu, addr); 483 if (bp) { 484 bp->use_count++; 485 return 0; 486 } 487 488 bp = g_new(struct hvf_sw_breakpoint, 1); 489 bp->pc = addr; 490 bp->use_count = 1; 491 err = hvf_arch_insert_sw_breakpoint(cpu, bp); 492 if (err) { 493 g_free(bp); 494 return err; 495 } 496 497 QTAILQ_INSERT_HEAD(&hvf_state->hvf_sw_breakpoints, bp, entry); 498 } else { 499 err = hvf_arch_insert_hw_breakpoint(addr, len, type); 500 if (err) { 501 return err; 502 } 503 } 504 505 CPU_FOREACH(cpu) { 506 err = hvf_update_guest_debug(cpu); 507 if (err) { 508 return err; 509 } 510 } 511 return 0; 512 } 513 514 static int hvf_remove_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len) 515 { 516 struct hvf_sw_breakpoint *bp; 517 int err; 518 519 if (type == GDB_BREAKPOINT_SW) { 520 bp = hvf_find_sw_breakpoint(cpu, addr); 521 if (!bp) { 522 return -ENOENT; 523 } 524 525 if (bp->use_count > 1) { 526 bp->use_count--; 527 return 0; 528 } 529 530 err = hvf_arch_remove_sw_breakpoint(cpu, bp); 531 if (err) { 532 return err; 533 } 534 535 QTAILQ_REMOVE(&hvf_state->hvf_sw_breakpoints, bp, entry); 536 g_free(bp); 537 } else { 538 err = hvf_arch_remove_hw_breakpoint(addr, len, type); 539 if (err) { 540 return err; 541 } 542 } 543 544 CPU_FOREACH(cpu) { 545 err = hvf_update_guest_debug(cpu); 546 if (err) { 547 return err; 548 } 549 } 550 return 0; 551 } 552 553 static void hvf_remove_all_breakpoints(CPUState *cpu) 554 { 555 struct hvf_sw_breakpoint *bp, *next; 556 CPUState *tmpcpu; 557 558 QTAILQ_FOREACH_SAFE(bp, &hvf_state->hvf_sw_breakpoints, entry, next) { 559 if (hvf_arch_remove_sw_breakpoint(cpu, bp) != 0) { 560 /* Try harder to find a CPU that currently sees the breakpoint. */ 561 CPU_FOREACH(tmpcpu) 562 { 563 if (hvf_arch_remove_sw_breakpoint(tmpcpu, bp) == 0) { 564 break; 565 } 566 } 567 } 568 QTAILQ_REMOVE(&hvf_state->hvf_sw_breakpoints, bp, entry); 569 g_free(bp); 570 } 571 hvf_arch_remove_all_hw_breakpoints(); 572 573 CPU_FOREACH(cpu) { 574 hvf_update_guest_debug(cpu); 575 } 576 } 577 578 static void hvf_accel_ops_class_init(ObjectClass *oc, void *data) 579 { 580 AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); 581 582 ops->create_vcpu_thread = hvf_start_vcpu_thread; 583 ops->kick_vcpu_thread = hvf_kick_vcpu_thread; 584 585 ops->synchronize_post_reset = hvf_cpu_synchronize_post_reset; 586 ops->synchronize_post_init = hvf_cpu_synchronize_post_init; 587 ops->synchronize_state = hvf_cpu_synchronize_state; 588 ops->synchronize_pre_loadvm = hvf_cpu_synchronize_pre_loadvm; 589 590 ops->insert_breakpoint = hvf_insert_breakpoint; 591 ops->remove_breakpoint = hvf_remove_breakpoint; 592 ops->remove_all_breakpoints = hvf_remove_all_breakpoints; 593 ops->update_guest_debug = hvf_update_guest_debug; 594 ops->supports_guest_debug = hvf_arch_supports_guest_debug; 595 }; 596 static const TypeInfo hvf_accel_ops_type = { 597 .name = ACCEL_OPS_NAME("hvf"), 598 599 .parent = TYPE_ACCEL_OPS, 600 .class_init = hvf_accel_ops_class_init, 601 .abstract = true, 602 }; 603 static void hvf_accel_ops_register_types(void) 604 { 605 type_register_static(&hvf_accel_ops_type); 606 } 607 type_init(hvf_accel_ops_register_types); 608