1 /* 2 * Copyright (c) 2003-2004 Fabrice Bellard 3 * Copyright (c) 2019 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a copy 6 * of this software and associated documentation files (the "Software"), to deal 7 * in the Software without restriction, including without limitation the rights 8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 * copies of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 * THE SOFTWARE. 22 */ 23 #include "qemu/osdep.h" 24 #include "qemu/error-report.h" 25 #include "qemu/option.h" 26 #include "qemu/cutils.h" 27 #include "qemu/units.h" 28 #include "qemu/datadir.h" 29 #include "qemu/guest-random.h" 30 #include "qapi/error.h" 31 #include "qapi/qmp/qerror.h" 32 #include "qapi/qapi-visit-common.h" 33 #include "qapi/clone-visitor.h" 34 #include "qapi/qapi-visit-machine.h" 35 #include "qapi/visitor.h" 36 #include "sysemu/qtest.h" 37 #include "sysemu/whpx.h" 38 #include "sysemu/numa.h" 39 #include "sysemu/replay.h" 40 #include "sysemu/reset.h" 41 #include "sysemu/sysemu.h" 42 #include "sysemu/cpu-timers.h" 43 #include "sysemu/xen.h" 44 #include "trace.h" 45 46 #include "hw/i386/x86.h" 47 #include "target/i386/cpu.h" 48 #include "hw/i386/topology.h" 49 #include "hw/i386/fw_cfg.h" 50 #include "hw/intc/i8259.h" 51 #include "hw/rtc/mc146818rtc.h" 52 #include "target/i386/sev.h" 53 #include "hw/i386/microvm.h" 54 55 #include "hw/acpi/cpu_hotplug.h" 56 #include "hw/irq.h" 57 #include "hw/nmi.h" 58 #include "hw/loader.h" 59 #include "multiboot.h" 60 #include "elf.h" 61 #include "standard-headers/asm-x86/bootparam.h" 62 #include CONFIG_DEVICES 63 #include "kvm/kvm_i386.h" 64 65 /* Physical Address of PVH entry point read from kernel ELF NOTE */ 66 static size_t pvh_start_addr; 67 68 inline void init_topo_info(X86CPUTopoInfo *topo_info, 69 const X86MachineState *x86ms) 70 { 71 MachineState *ms = MACHINE(x86ms); 72 73 topo_info->dies_per_pkg = ms->smp.dies; 74 topo_info->cores_per_die = ms->smp.cores; 75 topo_info->threads_per_core = ms->smp.threads; 76 } 77 78 /* 79 * Calculates initial APIC ID for a specific CPU index 80 * 81 * Currently we need to be able to calculate the APIC ID from the CPU index 82 * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have 83 * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of 84 * all CPUs up to max_cpus. 85 */ 86 uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, 87 unsigned int cpu_index) 88 { 89 X86CPUTopoInfo topo_info; 90 91 init_topo_info(&topo_info, x86ms); 92 93 return x86_apicid_from_cpu_idx(&topo_info, cpu_index); 94 } 95 96 97 void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) 98 { 99 Object *cpu = object_new(MACHINE(x86ms)->cpu_type); 100 101 if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { 102 goto out; 103 } 104 qdev_realize(DEVICE(cpu), NULL, errp); 105 106 out: 107 object_unref(cpu); 108 } 109 110 void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) 111 { 112 int i; 113 const CPUArchIdList *possible_cpus; 114 MachineState *ms = MACHINE(x86ms); 115 MachineClass *mc = MACHINE_GET_CLASS(x86ms); 116 117 x86_cpu_set_default_version(default_cpu_version); 118 119 /* 120 * Calculates the limit to CPU APIC ID values 121 * 122 * Limit for the APIC ID value, so that all 123 * CPU APIC IDs are < x86ms->apic_id_limit. 124 * 125 * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). 126 */ 127 x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, 128 ms->smp.max_cpus - 1) + 1; 129 130 /* 131 * Can we support APIC ID 255 or higher? 132 * 133 * Under Xen: yes. 134 * With userspace emulated lapic: no 135 * With KVM's in-kernel lapic: only if X2APIC API is enabled. 136 */ 137 if (x86ms->apic_id_limit > 255 && !xen_enabled() && 138 (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) { 139 error_report("current -smp configuration requires kernel " 140 "irqchip and X2APIC API support."); 141 exit(EXIT_FAILURE); 142 } 143 144 if (kvm_enabled()) { 145 kvm_set_max_apic_id(x86ms->apic_id_limit); 146 } 147 148 possible_cpus = mc->possible_cpu_arch_ids(ms); 149 for (i = 0; i < ms->smp.cpus; i++) { 150 x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); 151 } 152 } 153 154 void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count) 155 { 156 if (cpus_count > 0xff) { 157 /* 158 * If the number of CPUs can't be represented in 8 bits, the 159 * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just 160 * to make old BIOSes fail more predictably. 161 */ 162 rtc_set_memory(rtc, 0x5f, 0); 163 } else { 164 rtc_set_memory(rtc, 0x5f, cpus_count - 1); 165 } 166 } 167 168 static int x86_apic_cmp(const void *a, const void *b) 169 { 170 CPUArchId *apic_a = (CPUArchId *)a; 171 CPUArchId *apic_b = (CPUArchId *)b; 172 173 return apic_a->arch_id - apic_b->arch_id; 174 } 175 176 /* 177 * returns pointer to CPUArchId descriptor that matches CPU's apic_id 178 * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no 179 * entry corresponding to CPU's apic_id returns NULL. 180 */ 181 CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) 182 { 183 CPUArchId apic_id, *found_cpu; 184 185 apic_id.arch_id = id; 186 found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, 187 ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), 188 x86_apic_cmp); 189 if (found_cpu && idx) { 190 *idx = found_cpu - ms->possible_cpus->cpus; 191 } 192 return found_cpu; 193 } 194 195 void x86_cpu_plug(HotplugHandler *hotplug_dev, 196 DeviceState *dev, Error **errp) 197 { 198 CPUArchId *found_cpu; 199 Error *local_err = NULL; 200 X86CPU *cpu = X86_CPU(dev); 201 X86MachineState *x86ms = X86_MACHINE(hotplug_dev); 202 203 if (x86ms->acpi_dev) { 204 hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); 205 if (local_err) { 206 goto out; 207 } 208 } 209 210 /* increment the number of CPUs */ 211 x86ms->boot_cpus++; 212 if (x86ms->rtc) { 213 x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); 214 } 215 if (x86ms->fw_cfg) { 216 fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); 217 } 218 219 found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); 220 found_cpu->cpu = OBJECT(dev); 221 out: 222 error_propagate(errp, local_err); 223 } 224 225 void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, 226 DeviceState *dev, Error **errp) 227 { 228 int idx = -1; 229 X86CPU *cpu = X86_CPU(dev); 230 X86MachineState *x86ms = X86_MACHINE(hotplug_dev); 231 232 if (!x86ms->acpi_dev) { 233 error_setg(errp, "CPU hot unplug not supported without ACPI"); 234 return; 235 } 236 237 x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); 238 assert(idx != -1); 239 if (idx == 0) { 240 error_setg(errp, "Boot CPU is unpluggable"); 241 return; 242 } 243 244 hotplug_handler_unplug_request(x86ms->acpi_dev, dev, 245 errp); 246 } 247 248 void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, 249 DeviceState *dev, Error **errp) 250 { 251 CPUArchId *found_cpu; 252 Error *local_err = NULL; 253 X86CPU *cpu = X86_CPU(dev); 254 X86MachineState *x86ms = X86_MACHINE(hotplug_dev); 255 256 hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); 257 if (local_err) { 258 goto out; 259 } 260 261 found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); 262 found_cpu->cpu = NULL; 263 qdev_unrealize(dev); 264 265 /* decrement the number of CPUs */ 266 x86ms->boot_cpus--; 267 /* Update the number of CPUs in CMOS */ 268 x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); 269 fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); 270 out: 271 error_propagate(errp, local_err); 272 } 273 274 void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, 275 DeviceState *dev, Error **errp) 276 { 277 int idx; 278 CPUState *cs; 279 CPUArchId *cpu_slot; 280 X86CPUTopoIDs topo_ids; 281 X86CPU *cpu = X86_CPU(dev); 282 CPUX86State *env = &cpu->env; 283 MachineState *ms = MACHINE(hotplug_dev); 284 X86MachineState *x86ms = X86_MACHINE(hotplug_dev); 285 unsigned int smp_cores = ms->smp.cores; 286 unsigned int smp_threads = ms->smp.threads; 287 X86CPUTopoInfo topo_info; 288 289 if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { 290 error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", 291 ms->cpu_type); 292 return; 293 } 294 295 if (x86ms->acpi_dev) { 296 Error *local_err = NULL; 297 298 hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, 299 &local_err); 300 if (local_err) { 301 error_propagate(errp, local_err); 302 return; 303 } 304 } 305 306 init_topo_info(&topo_info, x86ms); 307 308 env->nr_dies = ms->smp.dies; 309 310 /* 311 * If APIC ID is not set, 312 * set it based on socket/die/core/thread properties. 313 */ 314 if (cpu->apic_id == UNASSIGNED_APIC_ID) { 315 int max_socket = (ms->smp.max_cpus - 1) / 316 smp_threads / smp_cores / ms->smp.dies; 317 318 /* 319 * die-id was optional in QEMU 4.0 and older, so keep it optional 320 * if there's only one die per socket. 321 */ 322 if (cpu->die_id < 0 && ms->smp.dies == 1) { 323 cpu->die_id = 0; 324 } 325 326 if (cpu->socket_id < 0) { 327 error_setg(errp, "CPU socket-id is not set"); 328 return; 329 } else if (cpu->socket_id > max_socket) { 330 error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", 331 cpu->socket_id, max_socket); 332 return; 333 } 334 if (cpu->die_id < 0) { 335 error_setg(errp, "CPU die-id is not set"); 336 return; 337 } else if (cpu->die_id > ms->smp.dies - 1) { 338 error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", 339 cpu->die_id, ms->smp.dies - 1); 340 return; 341 } 342 if (cpu->core_id < 0) { 343 error_setg(errp, "CPU core-id is not set"); 344 return; 345 } else if (cpu->core_id > (smp_cores - 1)) { 346 error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", 347 cpu->core_id, smp_cores - 1); 348 return; 349 } 350 if (cpu->thread_id < 0) { 351 error_setg(errp, "CPU thread-id is not set"); 352 return; 353 } else if (cpu->thread_id > (smp_threads - 1)) { 354 error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", 355 cpu->thread_id, smp_threads - 1); 356 return; 357 } 358 359 topo_ids.pkg_id = cpu->socket_id; 360 topo_ids.die_id = cpu->die_id; 361 topo_ids.core_id = cpu->core_id; 362 topo_ids.smt_id = cpu->thread_id; 363 cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); 364 } 365 366 cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); 367 if (!cpu_slot) { 368 MachineState *ms = MACHINE(x86ms); 369 370 x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); 371 error_setg(errp, 372 "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" 373 " APIC ID %" PRIu32 ", valid index range 0:%d", 374 topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, 375 cpu->apic_id, ms->possible_cpus->len - 1); 376 return; 377 } 378 379 if (cpu_slot->cpu) { 380 error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", 381 idx, cpu->apic_id); 382 return; 383 } 384 385 /* if 'address' properties socket-id/core-id/thread-id are not set, set them 386 * so that machine_query_hotpluggable_cpus would show correct values 387 */ 388 /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() 389 * once -smp refactoring is complete and there will be CPU private 390 * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ 391 x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); 392 if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { 393 error_setg(errp, "property socket-id: %u doesn't match set apic-id:" 394 " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, 395 topo_ids.pkg_id); 396 return; 397 } 398 cpu->socket_id = topo_ids.pkg_id; 399 400 if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { 401 error_setg(errp, "property die-id: %u doesn't match set apic-id:" 402 " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); 403 return; 404 } 405 cpu->die_id = topo_ids.die_id; 406 407 if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { 408 error_setg(errp, "property core-id: %u doesn't match set apic-id:" 409 " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, 410 topo_ids.core_id); 411 return; 412 } 413 cpu->core_id = topo_ids.core_id; 414 415 if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { 416 error_setg(errp, "property thread-id: %u doesn't match set apic-id:" 417 " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, 418 topo_ids.smt_id); 419 return; 420 } 421 cpu->thread_id = topo_ids.smt_id; 422 423 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && 424 !kvm_hv_vpindex_settable()) { 425 error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); 426 return; 427 } 428 429 cs = CPU(cpu); 430 cs->cpu_index = idx; 431 432 numa_cpu_pre_plug(cpu_slot, dev, errp); 433 } 434 435 CpuInstanceProperties 436 x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) 437 { 438 MachineClass *mc = MACHINE_GET_CLASS(ms); 439 const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); 440 441 assert(cpu_index < possible_cpus->len); 442 return possible_cpus->cpus[cpu_index].props; 443 } 444 445 int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) 446 { 447 X86CPUTopoIDs topo_ids; 448 X86MachineState *x86ms = X86_MACHINE(ms); 449 X86CPUTopoInfo topo_info; 450 451 init_topo_info(&topo_info, x86ms); 452 453 assert(idx < ms->possible_cpus->len); 454 x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, 455 &topo_info, &topo_ids); 456 return topo_ids.pkg_id % ms->numa_state->num_nodes; 457 } 458 459 const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) 460 { 461 X86MachineState *x86ms = X86_MACHINE(ms); 462 unsigned int max_cpus = ms->smp.max_cpus; 463 X86CPUTopoInfo topo_info; 464 int i; 465 466 if (ms->possible_cpus) { 467 /* 468 * make sure that max_cpus hasn't changed since the first use, i.e. 469 * -smp hasn't been parsed after it 470 */ 471 assert(ms->possible_cpus->len == max_cpus); 472 return ms->possible_cpus; 473 } 474 475 ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + 476 sizeof(CPUArchId) * max_cpus); 477 ms->possible_cpus->len = max_cpus; 478 479 init_topo_info(&topo_info, x86ms); 480 481 for (i = 0; i < ms->possible_cpus->len; i++) { 482 X86CPUTopoIDs topo_ids; 483 484 ms->possible_cpus->cpus[i].type = ms->cpu_type; 485 ms->possible_cpus->cpus[i].vcpus_count = 1; 486 ms->possible_cpus->cpus[i].arch_id = 487 x86_cpu_apic_id_from_index(x86ms, i); 488 x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, 489 &topo_info, &topo_ids); 490 ms->possible_cpus->cpus[i].props.has_socket_id = true; 491 ms->possible_cpus->cpus[i].props.socket_id = topo_ids.pkg_id; 492 if (ms->smp.dies > 1) { 493 ms->possible_cpus->cpus[i].props.has_die_id = true; 494 ms->possible_cpus->cpus[i].props.die_id = topo_ids.die_id; 495 } 496 ms->possible_cpus->cpus[i].props.has_core_id = true; 497 ms->possible_cpus->cpus[i].props.core_id = topo_ids.core_id; 498 ms->possible_cpus->cpus[i].props.has_thread_id = true; 499 ms->possible_cpus->cpus[i].props.thread_id = topo_ids.smt_id; 500 } 501 return ms->possible_cpus; 502 } 503 504 static void x86_nmi(NMIState *n, int cpu_index, Error **errp) 505 { 506 /* cpu index isn't used */ 507 CPUState *cs; 508 509 CPU_FOREACH(cs) { 510 X86CPU *cpu = X86_CPU(cs); 511 512 if (!cpu->apic_state) { 513 cpu_interrupt(cs, CPU_INTERRUPT_NMI); 514 } else { 515 apic_deliver_nmi(cpu->apic_state); 516 } 517 } 518 } 519 520 static long get_file_size(FILE *f) 521 { 522 long where, size; 523 524 /* XXX: on Unix systems, using fstat() probably makes more sense */ 525 526 where = ftell(f); 527 fseek(f, 0, SEEK_END); 528 size = ftell(f); 529 fseek(f, where, SEEK_SET); 530 531 return size; 532 } 533 534 /* TSC handling */ 535 uint64_t cpu_get_tsc(CPUX86State *env) 536 { 537 return cpus_get_elapsed_ticks(); 538 } 539 540 /* IRQ handling */ 541 static void pic_irq_request(void *opaque, int irq, int level) 542 { 543 CPUState *cs = first_cpu; 544 X86CPU *cpu = X86_CPU(cs); 545 546 trace_x86_pic_interrupt(irq, level); 547 if (cpu->apic_state && !kvm_irqchip_in_kernel() && 548 !whpx_apic_in_platform()) { 549 CPU_FOREACH(cs) { 550 cpu = X86_CPU(cs); 551 if (apic_accept_pic_intr(cpu->apic_state)) { 552 apic_deliver_pic_intr(cpu->apic_state, level); 553 } 554 } 555 } else { 556 if (level) { 557 cpu_interrupt(cs, CPU_INTERRUPT_HARD); 558 } else { 559 cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); 560 } 561 } 562 } 563 564 qemu_irq x86_allocate_cpu_irq(void) 565 { 566 return qemu_allocate_irq(pic_irq_request, NULL, 0); 567 } 568 569 int cpu_get_pic_interrupt(CPUX86State *env) 570 { 571 X86CPU *cpu = env_archcpu(env); 572 int intno; 573 574 if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { 575 intno = apic_get_interrupt(cpu->apic_state); 576 if (intno >= 0) { 577 return intno; 578 } 579 /* read the irq from the PIC */ 580 if (!apic_accept_pic_intr(cpu->apic_state)) { 581 return -1; 582 } 583 } 584 585 intno = pic_read_irq(isa_pic); 586 return intno; 587 } 588 589 DeviceState *cpu_get_current_apic(void) 590 { 591 if (current_cpu) { 592 X86CPU *cpu = X86_CPU(current_cpu); 593 return cpu->apic_state; 594 } else { 595 return NULL; 596 } 597 } 598 599 void gsi_handler(void *opaque, int n, int level) 600 { 601 GSIState *s = opaque; 602 603 trace_x86_gsi_interrupt(n, level); 604 switch (n) { 605 case 0 ... ISA_NUM_IRQS - 1: 606 if (s->i8259_irq[n]) { 607 /* Under KVM, Kernel will forward to both PIC and IOAPIC */ 608 qemu_set_irq(s->i8259_irq[n], level); 609 } 610 /* fall through */ 611 case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: 612 qemu_set_irq(s->ioapic_irq[n], level); 613 break; 614 case IO_APIC_SECONDARY_IRQBASE 615 ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: 616 qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); 617 break; 618 } 619 } 620 621 void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) 622 { 623 DeviceState *dev; 624 SysBusDevice *d; 625 unsigned int i; 626 627 assert(parent_name); 628 if (kvm_ioapic_in_kernel()) { 629 dev = qdev_new(TYPE_KVM_IOAPIC); 630 } else { 631 dev = qdev_new(TYPE_IOAPIC); 632 } 633 object_property_add_child(object_resolve_path(parent_name, NULL), 634 "ioapic", OBJECT(dev)); 635 d = SYS_BUS_DEVICE(dev); 636 sysbus_realize_and_unref(d, &error_fatal); 637 sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); 638 639 for (i = 0; i < IOAPIC_NUM_PINS; i++) { 640 gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); 641 } 642 } 643 644 DeviceState *ioapic_init_secondary(GSIState *gsi_state) 645 { 646 DeviceState *dev; 647 SysBusDevice *d; 648 unsigned int i; 649 650 dev = qdev_new(TYPE_IOAPIC); 651 d = SYS_BUS_DEVICE(dev); 652 sysbus_realize_and_unref(d, &error_fatal); 653 sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); 654 655 for (i = 0; i < IOAPIC_NUM_PINS; i++) { 656 gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); 657 } 658 return dev; 659 } 660 661 typedef struct SetupData { 662 uint64_t next; 663 uint32_t type; 664 uint32_t len; 665 uint8_t data[]; 666 } __attribute__((packed)) SetupData; 667 668 669 /* 670 * The entry point into the kernel for PVH boot is different from 671 * the native entry point. The PVH entry is defined by the x86/HVM 672 * direct boot ABI and is available in an ELFNOTE in the kernel binary. 673 * 674 * This function is passed to load_elf() when it is called from 675 * load_elfboot() which then additionally checks for an ELF Note of 676 * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to 677 * parse the PVH entry address from the ELF Note. 678 * 679 * Due to trickery in elf_opts.h, load_elf() is actually available as 680 * load_elf32() or load_elf64() and this routine needs to be able 681 * to deal with being called as 32 or 64 bit. 682 * 683 * The address of the PVH entry point is saved to the 'pvh_start_addr' 684 * global variable. (although the entry point is 32-bit, the kernel 685 * binary can be either 32-bit or 64-bit). 686 */ 687 static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) 688 { 689 size_t *elf_note_data_addr; 690 691 /* Check if ELF Note header passed in is valid */ 692 if (arg1 == NULL) { 693 return 0; 694 } 695 696 if (is64) { 697 struct elf64_note *nhdr64 = (struct elf64_note *)arg1; 698 uint64_t nhdr_size64 = sizeof(struct elf64_note); 699 uint64_t phdr_align = *(uint64_t *)arg2; 700 uint64_t nhdr_namesz = nhdr64->n_namesz; 701 702 elf_note_data_addr = 703 ((void *)nhdr64) + nhdr_size64 + 704 QEMU_ALIGN_UP(nhdr_namesz, phdr_align); 705 706 pvh_start_addr = *elf_note_data_addr; 707 } else { 708 struct elf32_note *nhdr32 = (struct elf32_note *)arg1; 709 uint32_t nhdr_size32 = sizeof(struct elf32_note); 710 uint32_t phdr_align = *(uint32_t *)arg2; 711 uint32_t nhdr_namesz = nhdr32->n_namesz; 712 713 elf_note_data_addr = 714 ((void *)nhdr32) + nhdr_size32 + 715 QEMU_ALIGN_UP(nhdr_namesz, phdr_align); 716 717 pvh_start_addr = *(uint32_t *)elf_note_data_addr; 718 } 719 720 return pvh_start_addr; 721 } 722 723 static bool load_elfboot(const char *kernel_filename, 724 int kernel_file_size, 725 uint8_t *header, 726 size_t pvh_xen_start_addr, 727 FWCfgState *fw_cfg) 728 { 729 uint32_t flags = 0; 730 uint32_t mh_load_addr = 0; 731 uint32_t elf_kernel_size = 0; 732 uint64_t elf_entry; 733 uint64_t elf_low, elf_high; 734 int kernel_size; 735 736 if (ldl_p(header) != 0x464c457f) { 737 return false; /* no elfboot */ 738 } 739 740 bool elf_is64 = header[EI_CLASS] == ELFCLASS64; 741 flags = elf_is64 ? 742 ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; 743 744 if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ 745 error_report("elfboot unsupported flags = %x", flags); 746 exit(1); 747 } 748 749 uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; 750 kernel_size = load_elf(kernel_filename, read_pvh_start_addr, 751 NULL, &elf_note_type, &elf_entry, 752 &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, 753 0, 0); 754 755 if (kernel_size < 0) { 756 error_report("Error while loading elf kernel"); 757 exit(1); 758 } 759 mh_load_addr = elf_low; 760 elf_kernel_size = elf_high - elf_low; 761 762 if (pvh_start_addr == 0) { 763 error_report("Error loading uncompressed kernel without PVH ELF Note"); 764 exit(1); 765 } 766 fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); 767 fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); 768 fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); 769 770 return true; 771 } 772 773 typedef struct SetupDataFixup { 774 void *pos; 775 hwaddr orig_val, new_val; 776 uint32_t addr; 777 } SetupDataFixup; 778 779 static void fixup_setup_data(void *opaque) 780 { 781 SetupDataFixup *fixup = opaque; 782 stq_p(fixup->pos, fixup->new_val); 783 } 784 785 static void reset_setup_data(void *opaque) 786 { 787 SetupDataFixup *fixup = opaque; 788 stq_p(fixup->pos, fixup->orig_val); 789 } 790 791 static void reset_rng_seed(void *opaque) 792 { 793 SetupData *setup_data = opaque; 794 qemu_guest_getrandom_nofail(setup_data->data, le32_to_cpu(setup_data->len)); 795 } 796 797 void x86_load_linux(X86MachineState *x86ms, 798 FWCfgState *fw_cfg, 799 int acpi_data_size, 800 bool pvh_enabled, 801 bool legacy_no_rng_seed) 802 { 803 bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; 804 uint16_t protocol; 805 int setup_size, kernel_size, cmdline_size; 806 int dtb_size, setup_data_offset; 807 uint32_t initrd_max; 808 uint8_t header[8192], *setup, *kernel; 809 hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0; 810 FILE *f; 811 char *vmode; 812 MachineState *machine = MACHINE(x86ms); 813 SetupData *setup_data; 814 const char *kernel_filename = machine->kernel_filename; 815 const char *initrd_filename = machine->initrd_filename; 816 const char *dtb_filename = machine->dtb; 817 char *kernel_cmdline; 818 SevKernelLoaderContext sev_load_ctx = {}; 819 enum { RNG_SEED_LENGTH = 32 }; 820 821 /* 822 * Add the NUL terminator, some padding for the microvm cmdline fiddling 823 * hack, and then align to 16 bytes as a paranoia measure 824 */ 825 cmdline_size = (strlen(machine->kernel_cmdline) + 1 + 826 VIRTIO_CMDLINE_TOTAL_MAX_LEN + 16) & ~15; 827 /* Make a copy, since we might append arbitrary bytes to it later. */ 828 kernel_cmdline = g_strndup(machine->kernel_cmdline, cmdline_size); 829 830 /* load the kernel header */ 831 f = fopen(kernel_filename, "rb"); 832 if (!f) { 833 fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", 834 kernel_filename, strerror(errno)); 835 exit(1); 836 } 837 838 kernel_size = get_file_size(f); 839 if (!kernel_size || 840 fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != 841 MIN(ARRAY_SIZE(header), kernel_size)) { 842 fprintf(stderr, "qemu: could not load kernel '%s': %s\n", 843 kernel_filename, strerror(errno)); 844 exit(1); 845 } 846 847 /* kernel protocol version */ 848 if (ldl_p(header + 0x202) == 0x53726448) { 849 protocol = lduw_p(header + 0x206); 850 } else { 851 /* 852 * This could be a multiboot kernel. If it is, let's stop treating it 853 * like a Linux kernel. 854 * Note: some multiboot images could be in the ELF format (the same of 855 * PVH), so we try multiboot first since we check the multiboot magic 856 * header before to load it. 857 */ 858 if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, 859 kernel_cmdline, kernel_size, header)) { 860 return; 861 } 862 /* 863 * Check if the file is an uncompressed kernel file (ELF) and load it, 864 * saving the PVH entry point used by the x86/HVM direct boot ABI. 865 * If load_elfboot() is successful, populate the fw_cfg info. 866 */ 867 if (pvh_enabled && 868 load_elfboot(kernel_filename, kernel_size, 869 header, pvh_start_addr, fw_cfg)) { 870 fclose(f); 871 872 fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, 873 strlen(kernel_cmdline) + 1); 874 fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); 875 876 fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); 877 fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, 878 header, sizeof(header)); 879 880 /* load initrd */ 881 if (initrd_filename) { 882 GMappedFile *mapped_file; 883 gsize initrd_size; 884 gchar *initrd_data; 885 GError *gerr = NULL; 886 887 mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); 888 if (!mapped_file) { 889 fprintf(stderr, "qemu: error reading initrd %s: %s\n", 890 initrd_filename, gerr->message); 891 exit(1); 892 } 893 x86ms->initrd_mapped_file = mapped_file; 894 895 initrd_data = g_mapped_file_get_contents(mapped_file); 896 initrd_size = g_mapped_file_get_length(mapped_file); 897 initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; 898 if (initrd_size >= initrd_max) { 899 fprintf(stderr, "qemu: initrd is too large, cannot support." 900 "(max: %"PRIu32", need %"PRId64")\n", 901 initrd_max, (uint64_t)initrd_size); 902 exit(1); 903 } 904 905 initrd_addr = (initrd_max - initrd_size) & ~4095; 906 907 fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); 908 fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); 909 fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, 910 initrd_size); 911 } 912 913 option_rom[nb_option_roms].bootindex = 0; 914 option_rom[nb_option_roms].name = "pvh.bin"; 915 nb_option_roms++; 916 917 return; 918 } 919 protocol = 0; 920 } 921 922 if (protocol < 0x200 || !(header[0x211] & 0x01)) { 923 /* Low kernel */ 924 real_addr = 0x90000; 925 cmdline_addr = 0x9a000 - cmdline_size; 926 prot_addr = 0x10000; 927 } else if (protocol < 0x202) { 928 /* High but ancient kernel */ 929 real_addr = 0x90000; 930 cmdline_addr = 0x9a000 - cmdline_size; 931 prot_addr = 0x100000; 932 } else { 933 /* High and recent kernel */ 934 real_addr = 0x10000; 935 cmdline_addr = 0x20000; 936 prot_addr = 0x100000; 937 } 938 939 /* highest address for loading the initrd */ 940 if (protocol >= 0x20c && 941 lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { 942 /* 943 * Linux has supported initrd up to 4 GB for a very long time (2007, 944 * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), 945 * though it only sets initrd_max to 2 GB to "work around bootloader 946 * bugs". Luckily, QEMU firmware(which does something like bootloader) 947 * has supported this. 948 * 949 * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can 950 * be loaded into any address. 951 * 952 * In addition, initrd_max is uint32_t simply because QEMU doesn't 953 * support the 64-bit boot protocol (specifically the ext_ramdisk_image 954 * field). 955 * 956 * Therefore here just limit initrd_max to UINT32_MAX simply as well. 957 */ 958 initrd_max = UINT32_MAX; 959 } else if (protocol >= 0x203) { 960 initrd_max = ldl_p(header + 0x22c); 961 } else { 962 initrd_max = 0x37ffffff; 963 } 964 965 if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { 966 initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; 967 } 968 969 if (protocol >= 0x202) { 970 stl_p(header + 0x228, cmdline_addr); 971 } else { 972 stw_p(header + 0x20, 0xA33F); 973 stw_p(header + 0x22, cmdline_addr - real_addr); 974 } 975 976 /* handle vga= parameter */ 977 vmode = strstr(kernel_cmdline, "vga="); 978 if (vmode) { 979 unsigned int video_mode; 980 const char *end; 981 int ret; 982 /* skip "vga=" */ 983 vmode += 4; 984 if (!strncmp(vmode, "normal", 6)) { 985 video_mode = 0xffff; 986 } else if (!strncmp(vmode, "ext", 3)) { 987 video_mode = 0xfffe; 988 } else if (!strncmp(vmode, "ask", 3)) { 989 video_mode = 0xfffd; 990 } else { 991 ret = qemu_strtoui(vmode, &end, 0, &video_mode); 992 if (ret != 0 || (*end && *end != ' ')) { 993 fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); 994 exit(1); 995 } 996 } 997 stw_p(header + 0x1fa, video_mode); 998 } 999 1000 /* loader type */ 1001 /* 1002 * High nybble = B reserved for QEMU; low nybble is revision number. 1003 * If this code is substantially changed, you may want to consider 1004 * incrementing the revision. 1005 */ 1006 if (protocol >= 0x200) { 1007 header[0x210] = 0xB0; 1008 } 1009 /* heap */ 1010 if (protocol >= 0x201) { 1011 header[0x211] |= 0x80; /* CAN_USE_HEAP */ 1012 stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); 1013 } 1014 1015 /* load initrd */ 1016 if (initrd_filename) { 1017 GMappedFile *mapped_file; 1018 gsize initrd_size; 1019 gchar *initrd_data; 1020 GError *gerr = NULL; 1021 1022 if (protocol < 0x200) { 1023 fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); 1024 exit(1); 1025 } 1026 1027 mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); 1028 if (!mapped_file) { 1029 fprintf(stderr, "qemu: error reading initrd %s: %s\n", 1030 initrd_filename, gerr->message); 1031 exit(1); 1032 } 1033 x86ms->initrd_mapped_file = mapped_file; 1034 1035 initrd_data = g_mapped_file_get_contents(mapped_file); 1036 initrd_size = g_mapped_file_get_length(mapped_file); 1037 if (initrd_size >= initrd_max) { 1038 fprintf(stderr, "qemu: initrd is too large, cannot support." 1039 "(max: %"PRIu32", need %"PRId64")\n", 1040 initrd_max, (uint64_t)initrd_size); 1041 exit(1); 1042 } 1043 1044 initrd_addr = (initrd_max - initrd_size) & ~4095; 1045 1046 fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); 1047 fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); 1048 fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); 1049 sev_load_ctx.initrd_data = initrd_data; 1050 sev_load_ctx.initrd_size = initrd_size; 1051 1052 stl_p(header + 0x218, initrd_addr); 1053 stl_p(header + 0x21c, initrd_size); 1054 } 1055 1056 /* load kernel and setup */ 1057 setup_size = header[0x1f1]; 1058 if (setup_size == 0) { 1059 setup_size = 4; 1060 } 1061 setup_size = (setup_size + 1) * 512; 1062 if (setup_size > kernel_size) { 1063 fprintf(stderr, "qemu: invalid kernel header\n"); 1064 exit(1); 1065 } 1066 kernel_size -= setup_size; 1067 1068 setup = g_malloc(setup_size); 1069 kernel = g_malloc(kernel_size); 1070 fseek(f, 0, SEEK_SET); 1071 if (fread(setup, 1, setup_size, f) != setup_size) { 1072 fprintf(stderr, "fread() failed\n"); 1073 exit(1); 1074 } 1075 if (fread(kernel, 1, kernel_size, f) != kernel_size) { 1076 fprintf(stderr, "fread() failed\n"); 1077 exit(1); 1078 } 1079 fclose(f); 1080 1081 /* append dtb to kernel */ 1082 if (dtb_filename) { 1083 if (protocol < 0x209) { 1084 fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); 1085 exit(1); 1086 } 1087 1088 dtb_size = get_image_size(dtb_filename); 1089 if (dtb_size <= 0) { 1090 fprintf(stderr, "qemu: error reading dtb %s: %s\n", 1091 dtb_filename, strerror(errno)); 1092 exit(1); 1093 } 1094 1095 setup_data_offset = cmdline_size; 1096 cmdline_size += sizeof(SetupData) + dtb_size; 1097 kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size); 1098 setup_data = (void *)kernel_cmdline + setup_data_offset; 1099 setup_data->next = cpu_to_le64(first_setup_data); 1100 first_setup_data = cmdline_addr + setup_data_offset; 1101 setup_data->type = cpu_to_le32(SETUP_DTB); 1102 setup_data->len = cpu_to_le32(dtb_size); 1103 load_image_size(dtb_filename, setup_data->data, dtb_size); 1104 } 1105 1106 if (!legacy_no_rng_seed && protocol >= 0x209) { 1107 setup_data_offset = cmdline_size; 1108 cmdline_size += sizeof(SetupData) + RNG_SEED_LENGTH; 1109 kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size); 1110 setup_data = (void *)kernel_cmdline + setup_data_offset; 1111 setup_data->next = cpu_to_le64(first_setup_data); 1112 first_setup_data = cmdline_addr + setup_data_offset; 1113 setup_data->type = cpu_to_le32(SETUP_RNG_SEED); 1114 setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); 1115 qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); 1116 qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data); 1117 fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL, 1118 setup_data, kernel, kernel_size, true); 1119 } else { 1120 fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); 1121 } 1122 1123 fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); 1124 fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, cmdline_size); 1125 fw_cfg_add_bytes(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline, cmdline_size); 1126 sev_load_ctx.cmdline_data = (char *)kernel_cmdline; 1127 sev_load_ctx.cmdline_size = cmdline_size; 1128 1129 fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); 1130 fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); 1131 sev_load_ctx.kernel_data = (char *)kernel; 1132 sev_load_ctx.kernel_size = kernel_size; 1133 1134 /* 1135 * If we're starting an encrypted VM, it will be OVMF based, which uses the 1136 * efi stub for booting and doesn't require any values to be placed in the 1137 * kernel header. We therefore don't update the header so the hash of the 1138 * kernel on the other side of the fw_cfg interface matches the hash of the 1139 * file the user passed in. 1140 */ 1141 if (!sev_enabled() && first_setup_data) { 1142 SetupDataFixup *fixup = g_malloc(sizeof(*fixup)); 1143 1144 memcpy(setup, header, MIN(sizeof(header), setup_size)); 1145 /* Offset 0x250 is a pointer to the first setup_data link. */ 1146 fixup->pos = setup + 0x250; 1147 fixup->orig_val = ldq_p(fixup->pos); 1148 fixup->new_val = first_setup_data; 1149 fixup->addr = cpu_to_le32(real_addr); 1150 fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL, 1151 fixup, &fixup->addr, sizeof(fixup->addr), true); 1152 qemu_register_reset(reset_setup_data, fixup); 1153 } else { 1154 fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); 1155 } 1156 fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); 1157 fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); 1158 sev_load_ctx.setup_data = (char *)setup; 1159 sev_load_ctx.setup_size = setup_size; 1160 1161 if (sev_enabled()) { 1162 sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); 1163 } 1164 1165 option_rom[nb_option_roms].bootindex = 0; 1166 option_rom[nb_option_roms].name = "linuxboot.bin"; 1167 if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { 1168 option_rom[nb_option_roms].name = "linuxboot_dma.bin"; 1169 } 1170 nb_option_roms++; 1171 } 1172 1173 void x86_bios_rom_init(MachineState *ms, const char *default_firmware, 1174 MemoryRegion *rom_memory, bool isapc_ram_fw) 1175 { 1176 const char *bios_name; 1177 char *filename; 1178 MemoryRegion *bios, *isa_bios; 1179 int bios_size, isa_bios_size; 1180 ssize_t ret; 1181 1182 /* BIOS load */ 1183 bios_name = ms->firmware ?: default_firmware; 1184 filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); 1185 if (filename) { 1186 bios_size = get_image_size(filename); 1187 } else { 1188 bios_size = -1; 1189 } 1190 if (bios_size <= 0 || 1191 (bios_size % 65536) != 0) { 1192 goto bios_error; 1193 } 1194 bios = g_malloc(sizeof(*bios)); 1195 memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); 1196 if (sev_enabled()) { 1197 /* 1198 * The concept of a "reset" simply doesn't exist for 1199 * confidential computing guests, we have to destroy and 1200 * re-launch them instead. So there is no need to register 1201 * the firmware as rom to properly re-initialize on reset. 1202 * Just go for a straight file load instead. 1203 */ 1204 void *ptr = memory_region_get_ram_ptr(bios); 1205 load_image_size(filename, ptr, bios_size); 1206 x86_firmware_configure(ptr, bios_size); 1207 } else { 1208 if (!isapc_ram_fw) { 1209 memory_region_set_readonly(bios, true); 1210 } 1211 ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); 1212 if (ret != 0) { 1213 goto bios_error; 1214 } 1215 } 1216 g_free(filename); 1217 1218 /* map the last 128KB of the BIOS in ISA space */ 1219 isa_bios_size = MIN(bios_size, 128 * KiB); 1220 isa_bios = g_malloc(sizeof(*isa_bios)); 1221 memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, 1222 bios_size - isa_bios_size, isa_bios_size); 1223 memory_region_add_subregion_overlap(rom_memory, 1224 0x100000 - isa_bios_size, 1225 isa_bios, 1226 1); 1227 if (!isapc_ram_fw) { 1228 memory_region_set_readonly(isa_bios, true); 1229 } 1230 1231 /* map all the bios at the top of memory */ 1232 memory_region_add_subregion(rom_memory, 1233 (uint32_t)(-bios_size), 1234 bios); 1235 return; 1236 1237 bios_error: 1238 fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); 1239 exit(1); 1240 } 1241 1242 bool x86_machine_is_smm_enabled(const X86MachineState *x86ms) 1243 { 1244 bool smm_available = false; 1245 1246 if (x86ms->smm == ON_OFF_AUTO_OFF) { 1247 return false; 1248 } 1249 1250 if (tcg_enabled() || qtest_enabled()) { 1251 smm_available = true; 1252 } else if (kvm_enabled()) { 1253 smm_available = kvm_has_smm(); 1254 } 1255 1256 if (smm_available) { 1257 return true; 1258 } 1259 1260 if (x86ms->smm == ON_OFF_AUTO_ON) { 1261 error_report("System Management Mode not supported by this hypervisor."); 1262 exit(1); 1263 } 1264 return false; 1265 } 1266 1267 static void x86_machine_get_smm(Object *obj, Visitor *v, const char *name, 1268 void *opaque, Error **errp) 1269 { 1270 X86MachineState *x86ms = X86_MACHINE(obj); 1271 OnOffAuto smm = x86ms->smm; 1272 1273 visit_type_OnOffAuto(v, name, &smm, errp); 1274 } 1275 1276 static void x86_machine_set_smm(Object *obj, Visitor *v, const char *name, 1277 void *opaque, Error **errp) 1278 { 1279 X86MachineState *x86ms = X86_MACHINE(obj); 1280 1281 visit_type_OnOffAuto(v, name, &x86ms->smm, errp); 1282 } 1283 1284 bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms) 1285 { 1286 if (x86ms->acpi == ON_OFF_AUTO_OFF) { 1287 return false; 1288 } 1289 return true; 1290 } 1291 1292 static void x86_machine_get_acpi(Object *obj, Visitor *v, const char *name, 1293 void *opaque, Error **errp) 1294 { 1295 X86MachineState *x86ms = X86_MACHINE(obj); 1296 OnOffAuto acpi = x86ms->acpi; 1297 1298 visit_type_OnOffAuto(v, name, &acpi, errp); 1299 } 1300 1301 static void x86_machine_set_acpi(Object *obj, Visitor *v, const char *name, 1302 void *opaque, Error **errp) 1303 { 1304 X86MachineState *x86ms = X86_MACHINE(obj); 1305 1306 visit_type_OnOffAuto(v, name, &x86ms->acpi, errp); 1307 } 1308 1309 static void x86_machine_get_pit(Object *obj, Visitor *v, const char *name, 1310 void *opaque, Error **errp) 1311 { 1312 X86MachineState *x86ms = X86_MACHINE(obj); 1313 OnOffAuto pit = x86ms->pit; 1314 1315 visit_type_OnOffAuto(v, name, &pit, errp); 1316 } 1317 1318 static void x86_machine_set_pit(Object *obj, Visitor *v, const char *name, 1319 void *opaque, Error **errp) 1320 { 1321 X86MachineState *x86ms = X86_MACHINE(obj);; 1322 1323 visit_type_OnOffAuto(v, name, &x86ms->pit, errp); 1324 } 1325 1326 static void x86_machine_get_pic(Object *obj, Visitor *v, const char *name, 1327 void *opaque, Error **errp) 1328 { 1329 X86MachineState *x86ms = X86_MACHINE(obj); 1330 OnOffAuto pic = x86ms->pic; 1331 1332 visit_type_OnOffAuto(v, name, &pic, errp); 1333 } 1334 1335 static void x86_machine_set_pic(Object *obj, Visitor *v, const char *name, 1336 void *opaque, Error **errp) 1337 { 1338 X86MachineState *x86ms = X86_MACHINE(obj); 1339 1340 visit_type_OnOffAuto(v, name, &x86ms->pic, errp); 1341 } 1342 1343 static char *x86_machine_get_oem_id(Object *obj, Error **errp) 1344 { 1345 X86MachineState *x86ms = X86_MACHINE(obj); 1346 1347 return g_strdup(x86ms->oem_id); 1348 } 1349 1350 static void x86_machine_set_oem_id(Object *obj, const char *value, Error **errp) 1351 { 1352 X86MachineState *x86ms = X86_MACHINE(obj); 1353 size_t len = strlen(value); 1354 1355 if (len > 6) { 1356 error_setg(errp, 1357 "User specified "X86_MACHINE_OEM_ID" value is bigger than " 1358 "6 bytes in size"); 1359 return; 1360 } 1361 1362 strncpy(x86ms->oem_id, value, 6); 1363 } 1364 1365 static char *x86_machine_get_oem_table_id(Object *obj, Error **errp) 1366 { 1367 X86MachineState *x86ms = X86_MACHINE(obj); 1368 1369 return g_strdup(x86ms->oem_table_id); 1370 } 1371 1372 static void x86_machine_set_oem_table_id(Object *obj, const char *value, 1373 Error **errp) 1374 { 1375 X86MachineState *x86ms = X86_MACHINE(obj); 1376 size_t len = strlen(value); 1377 1378 if (len > 8) { 1379 error_setg(errp, 1380 "User specified "X86_MACHINE_OEM_TABLE_ID 1381 " value is bigger than " 1382 "8 bytes in size"); 1383 return; 1384 } 1385 strncpy(x86ms->oem_table_id, value, 8); 1386 } 1387 1388 static void x86_machine_get_bus_lock_ratelimit(Object *obj, Visitor *v, 1389 const char *name, void *opaque, Error **errp) 1390 { 1391 X86MachineState *x86ms = X86_MACHINE(obj); 1392 uint64_t bus_lock_ratelimit = x86ms->bus_lock_ratelimit; 1393 1394 visit_type_uint64(v, name, &bus_lock_ratelimit, errp); 1395 } 1396 1397 static void x86_machine_set_bus_lock_ratelimit(Object *obj, Visitor *v, 1398 const char *name, void *opaque, Error **errp) 1399 { 1400 X86MachineState *x86ms = X86_MACHINE(obj); 1401 1402 visit_type_uint64(v, name, &x86ms->bus_lock_ratelimit, errp); 1403 } 1404 1405 static void machine_get_sgx_epc(Object *obj, Visitor *v, const char *name, 1406 void *opaque, Error **errp) 1407 { 1408 X86MachineState *x86ms = X86_MACHINE(obj); 1409 SgxEPCList *list = x86ms->sgx_epc_list; 1410 1411 visit_type_SgxEPCList(v, name, &list, errp); 1412 } 1413 1414 static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name, 1415 void *opaque, Error **errp) 1416 { 1417 X86MachineState *x86ms = X86_MACHINE(obj); 1418 SgxEPCList *list; 1419 1420 list = x86ms->sgx_epc_list; 1421 visit_type_SgxEPCList(v, name, &x86ms->sgx_epc_list, errp); 1422 1423 qapi_free_SgxEPCList(list); 1424 } 1425 1426 static void x86_machine_initfn(Object *obj) 1427 { 1428 X86MachineState *x86ms = X86_MACHINE(obj); 1429 1430 x86ms->smm = ON_OFF_AUTO_AUTO; 1431 x86ms->acpi = ON_OFF_AUTO_AUTO; 1432 x86ms->pit = ON_OFF_AUTO_AUTO; 1433 x86ms->pic = ON_OFF_AUTO_AUTO; 1434 x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS; 1435 x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); 1436 x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); 1437 x86ms->bus_lock_ratelimit = 0; 1438 x86ms->above_4g_mem_start = 4 * GiB; 1439 } 1440 1441 static void x86_machine_class_init(ObjectClass *oc, void *data) 1442 { 1443 MachineClass *mc = MACHINE_CLASS(oc); 1444 X86MachineClass *x86mc = X86_MACHINE_CLASS(oc); 1445 NMIClass *nc = NMI_CLASS(oc); 1446 1447 mc->cpu_index_to_instance_props = x86_cpu_index_to_props; 1448 mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; 1449 mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; 1450 x86mc->save_tsc_khz = true; 1451 x86mc->fwcfg_dma_enabled = true; 1452 nc->nmi_monitor_handler = x86_nmi; 1453 1454 object_class_property_add(oc, X86_MACHINE_SMM, "OnOffAuto", 1455 x86_machine_get_smm, x86_machine_set_smm, 1456 NULL, NULL); 1457 object_class_property_set_description(oc, X86_MACHINE_SMM, 1458 "Enable SMM"); 1459 1460 object_class_property_add(oc, X86_MACHINE_ACPI, "OnOffAuto", 1461 x86_machine_get_acpi, x86_machine_set_acpi, 1462 NULL, NULL); 1463 object_class_property_set_description(oc, X86_MACHINE_ACPI, 1464 "Enable ACPI"); 1465 1466 object_class_property_add(oc, X86_MACHINE_PIT, "OnOffAuto", 1467 x86_machine_get_pit, 1468 x86_machine_set_pit, 1469 NULL, NULL); 1470 object_class_property_set_description(oc, X86_MACHINE_PIT, 1471 "Enable i8254 PIT"); 1472 1473 object_class_property_add(oc, X86_MACHINE_PIC, "OnOffAuto", 1474 x86_machine_get_pic, 1475 x86_machine_set_pic, 1476 NULL, NULL); 1477 object_class_property_set_description(oc, X86_MACHINE_PIC, 1478 "Enable i8259 PIC"); 1479 1480 object_class_property_add_str(oc, X86_MACHINE_OEM_ID, 1481 x86_machine_get_oem_id, 1482 x86_machine_set_oem_id); 1483 object_class_property_set_description(oc, X86_MACHINE_OEM_ID, 1484 "Override the default value of field OEMID " 1485 "in ACPI table header." 1486 "The string may be up to 6 bytes in size"); 1487 1488 1489 object_class_property_add_str(oc, X86_MACHINE_OEM_TABLE_ID, 1490 x86_machine_get_oem_table_id, 1491 x86_machine_set_oem_table_id); 1492 object_class_property_set_description(oc, X86_MACHINE_OEM_TABLE_ID, 1493 "Override the default value of field OEM Table ID " 1494 "in ACPI table header." 1495 "The string may be up to 8 bytes in size"); 1496 1497 object_class_property_add(oc, X86_MACHINE_BUS_LOCK_RATELIMIT, "uint64_t", 1498 x86_machine_get_bus_lock_ratelimit, 1499 x86_machine_set_bus_lock_ratelimit, NULL, NULL); 1500 object_class_property_set_description(oc, X86_MACHINE_BUS_LOCK_RATELIMIT, 1501 "Set the ratelimit for the bus locks acquired in VMs"); 1502 1503 object_class_property_add(oc, "sgx-epc", "SgxEPC", 1504 machine_get_sgx_epc, machine_set_sgx_epc, 1505 NULL, NULL); 1506 object_class_property_set_description(oc, "sgx-epc", 1507 "SGX EPC device"); 1508 } 1509 1510 static const TypeInfo x86_machine_info = { 1511 .name = TYPE_X86_MACHINE, 1512 .parent = TYPE_MACHINE, 1513 .abstract = true, 1514 .instance_size = sizeof(X86MachineState), 1515 .instance_init = x86_machine_initfn, 1516 .class_size = sizeof(X86MachineClass), 1517 .class_init = x86_machine_class_init, 1518 .interfaces = (InterfaceInfo[]) { 1519 { TYPE_NMI }, 1520 { } 1521 }, 1522 }; 1523 1524 static void x86_machine_register_types(void) 1525 { 1526 type_register_static(&x86_machine_info); 1527 } 1528 1529 type_init(x86_machine_register_types) 1530