1 /* 2 * TPR optimization for 32-bit Windows guests (XP and Server 2003) 3 * 4 * Copyright (C) 2007-2008 Qumranet Technologies 5 * Copyright (C) 2012 Jan Kiszka, Siemens AG 6 * 7 * This work is licensed under the terms of the GNU GPL version 2, or 8 * (at your option) any later version. See the COPYING file in the 9 * top-level directory. 10 */ 11 12 #include "qemu/osdep.h" 13 #include "qemu/module.h" 14 #include "sysemu/sysemu.h" 15 #include "sysemu/cpus.h" 16 #include "sysemu/hw_accel.h" 17 #include "sysemu/kvm.h" 18 #include "sysemu/runstate.h" 19 #include "exec/address-spaces.h" 20 #include "hw/i386/apic_internal.h" 21 #include "hw/sysbus.h" 22 #include "hw/boards.h" 23 #include "migration/vmstate.h" 24 #include "qom/object.h" 25 26 #define VAPIC_IO_PORT 0x7e 27 28 #define VAPIC_CPU_SHIFT 7 29 30 #define ROM_BLOCK_SIZE 512 31 #define ROM_BLOCK_MASK (~(ROM_BLOCK_SIZE - 1)) 32 33 typedef enum VAPICMode { 34 VAPIC_INACTIVE = 0, 35 VAPIC_ACTIVE = 1, 36 VAPIC_STANDBY = 2, 37 } VAPICMode; 38 39 typedef struct VAPICHandlers { 40 uint32_t set_tpr; 41 uint32_t set_tpr_eax; 42 uint32_t get_tpr[8]; 43 uint32_t get_tpr_stack; 44 } QEMU_PACKED VAPICHandlers; 45 46 typedef struct GuestROMState { 47 char signature[8]; 48 uint32_t vaddr; 49 uint32_t fixup_start; 50 uint32_t fixup_end; 51 uint32_t vapic_vaddr; 52 uint32_t vapic_size; 53 uint32_t vcpu_shift; 54 uint32_t real_tpr_addr; 55 VAPICHandlers up; 56 VAPICHandlers mp; 57 } QEMU_PACKED GuestROMState; 58 59 struct VAPICROMState { 60 SysBusDevice busdev; 61 62 MemoryRegion io; 63 MemoryRegion rom; 64 uint32_t state; 65 uint32_t rom_state_paddr; 66 uint32_t rom_state_vaddr; 67 uint32_t vapic_paddr; 68 uint32_t real_tpr_addr; 69 GuestROMState rom_state; 70 size_t rom_size; 71 bool rom_mapped_writable; 72 VMChangeStateEntry *vmsentry; 73 }; 74 75 #define TYPE_VAPIC "kvmvapic" 76 OBJECT_DECLARE_SIMPLE_TYPE(VAPICROMState, VAPIC) 77 78 #define TPR_INSTR_ABS_MODRM 0x1 79 #define TPR_INSTR_MATCH_MODRM_REG 0x2 80 81 typedef struct TPRInstruction { 82 uint8_t opcode; 83 uint8_t modrm_reg; 84 unsigned int flags; 85 TPRAccess access; 86 size_t length; 87 off_t addr_offset; 88 } TPRInstruction; 89 90 /* must be sorted by length, shortest first */ 91 static const TPRInstruction tpr_instr[] = { 92 { /* mov abs to eax */ 93 .opcode = 0xa1, 94 .access = TPR_ACCESS_READ, 95 .length = 5, 96 .addr_offset = 1, 97 }, 98 { /* mov eax to abs */ 99 .opcode = 0xa3, 100 .access = TPR_ACCESS_WRITE, 101 .length = 5, 102 .addr_offset = 1, 103 }, 104 { /* mov r32 to r/m32 */ 105 .opcode = 0x89, 106 .flags = TPR_INSTR_ABS_MODRM, 107 .access = TPR_ACCESS_WRITE, 108 .length = 6, 109 .addr_offset = 2, 110 }, 111 { /* mov r/m32 to r32 */ 112 .opcode = 0x8b, 113 .flags = TPR_INSTR_ABS_MODRM, 114 .access = TPR_ACCESS_READ, 115 .length = 6, 116 .addr_offset = 2, 117 }, 118 { /* push r/m32 */ 119 .opcode = 0xff, 120 .modrm_reg = 6, 121 .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG, 122 .access = TPR_ACCESS_READ, 123 .length = 6, 124 .addr_offset = 2, 125 }, 126 { /* mov imm32, r/m32 (c7/0) */ 127 .opcode = 0xc7, 128 .modrm_reg = 0, 129 .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG, 130 .access = TPR_ACCESS_WRITE, 131 .length = 10, 132 .addr_offset = 2, 133 }, 134 }; 135 136 static void read_guest_rom_state(VAPICROMState *s) 137 { 138 cpu_physical_memory_read(s->rom_state_paddr, &s->rom_state, 139 sizeof(GuestROMState)); 140 } 141 142 static void write_guest_rom_state(VAPICROMState *s) 143 { 144 cpu_physical_memory_write(s->rom_state_paddr, &s->rom_state, 145 sizeof(GuestROMState)); 146 } 147 148 static void update_guest_rom_state(VAPICROMState *s) 149 { 150 read_guest_rom_state(s); 151 152 s->rom_state.real_tpr_addr = cpu_to_le32(s->real_tpr_addr); 153 s->rom_state.vcpu_shift = cpu_to_le32(VAPIC_CPU_SHIFT); 154 155 write_guest_rom_state(s); 156 } 157 158 static int find_real_tpr_addr(VAPICROMState *s, CPUX86State *env) 159 { 160 CPUState *cs = env_cpu(env); 161 hwaddr paddr; 162 target_ulong addr; 163 164 if (s->state == VAPIC_ACTIVE) { 165 return 0; 166 } 167 /* 168 * If there is no prior TPR access instruction we could analyze (which is 169 * the case after resume from hibernation), we need to scan the possible 170 * virtual address space for the APIC mapping. 171 */ 172 for (addr = 0xfffff000; addr >= 0x80000000; addr -= TARGET_PAGE_SIZE) { 173 paddr = cpu_get_phys_page_debug(cs, addr); 174 if (paddr != APIC_DEFAULT_ADDRESS) { 175 continue; 176 } 177 s->real_tpr_addr = addr + 0x80; 178 update_guest_rom_state(s); 179 return 0; 180 } 181 return -1; 182 } 183 184 static uint8_t modrm_reg(uint8_t modrm) 185 { 186 return (modrm >> 3) & 7; 187 } 188 189 static bool is_abs_modrm(uint8_t modrm) 190 { 191 return (modrm & 0xc7) == 0x05; 192 } 193 194 static bool opcode_matches(uint8_t *opcode, const TPRInstruction *instr) 195 { 196 return opcode[0] == instr->opcode && 197 (!(instr->flags & TPR_INSTR_ABS_MODRM) || is_abs_modrm(opcode[1])) && 198 (!(instr->flags & TPR_INSTR_MATCH_MODRM_REG) || 199 modrm_reg(opcode[1]) == instr->modrm_reg); 200 } 201 202 static int evaluate_tpr_instruction(VAPICROMState *s, X86CPU *cpu, 203 target_ulong *pip, TPRAccess access) 204 { 205 CPUState *cs = CPU(cpu); 206 const TPRInstruction *instr; 207 target_ulong ip = *pip; 208 uint8_t opcode[2]; 209 uint32_t real_tpr_addr; 210 int i; 211 212 if ((ip & 0xf0000000ULL) != 0x80000000ULL && 213 (ip & 0xf0000000ULL) != 0xe0000000ULL) { 214 return -1; 215 } 216 217 /* 218 * Early Windows 2003 SMP initialization contains a 219 * 220 * mov imm32, r/m32 221 * 222 * instruction that is patched by TPR optimization. The problem is that 223 * RSP, used by the patched instruction, is zero, so the guest gets a 224 * double fault and dies. 225 */ 226 if (cpu->env.regs[R_ESP] == 0) { 227 return -1; 228 } 229 230 if (kvm_enabled() && !kvm_irqchip_in_kernel()) { 231 /* 232 * KVM without kernel-based TPR access reporting will pass an IP that 233 * points after the accessing instruction. So we need to look backward 234 * to find the reason. 235 */ 236 for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) { 237 instr = &tpr_instr[i]; 238 if (instr->access != access) { 239 continue; 240 } 241 if (cpu_memory_rw_debug(cs, ip - instr->length, opcode, 242 sizeof(opcode), 0) < 0) { 243 return -1; 244 } 245 if (opcode_matches(opcode, instr)) { 246 ip -= instr->length; 247 goto instruction_ok; 248 } 249 } 250 return -1; 251 } else { 252 if (cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0) < 0) { 253 return -1; 254 } 255 for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) { 256 instr = &tpr_instr[i]; 257 if (opcode_matches(opcode, instr)) { 258 goto instruction_ok; 259 } 260 } 261 return -1; 262 } 263 264 instruction_ok: 265 /* 266 * Grab the virtual TPR address from the instruction 267 * and update the cached values. 268 */ 269 if (cpu_memory_rw_debug(cs, ip + instr->addr_offset, 270 (void *)&real_tpr_addr, 271 sizeof(real_tpr_addr), 0) < 0) { 272 return -1; 273 } 274 real_tpr_addr = le32_to_cpu(real_tpr_addr); 275 if ((real_tpr_addr & 0xfff) != 0x80) { 276 return -1; 277 } 278 s->real_tpr_addr = real_tpr_addr; 279 update_guest_rom_state(s); 280 281 *pip = ip; 282 return 0; 283 } 284 285 static int update_rom_mapping(VAPICROMState *s, CPUX86State *env, target_ulong ip) 286 { 287 CPUState *cs = env_cpu(env); 288 hwaddr paddr; 289 uint32_t rom_state_vaddr; 290 uint32_t pos, patch, offset; 291 292 /* nothing to do if already activated */ 293 if (s->state == VAPIC_ACTIVE) { 294 return 0; 295 } 296 297 /* bail out if ROM init code was not executed (missing ROM?) */ 298 if (s->state == VAPIC_INACTIVE) { 299 return -1; 300 } 301 302 /* find out virtual address of the ROM */ 303 rom_state_vaddr = s->rom_state_paddr + (ip & 0xf0000000); 304 paddr = cpu_get_phys_page_debug(cs, rom_state_vaddr); 305 if (paddr == -1) { 306 return -1; 307 } 308 paddr += rom_state_vaddr & ~TARGET_PAGE_MASK; 309 if (paddr != s->rom_state_paddr) { 310 return -1; 311 } 312 read_guest_rom_state(s); 313 if (memcmp(s->rom_state.signature, "kvm aPiC", 8) != 0) { 314 return -1; 315 } 316 s->rom_state_vaddr = rom_state_vaddr; 317 318 /* fixup addresses in ROM if needed */ 319 if (rom_state_vaddr == le32_to_cpu(s->rom_state.vaddr)) { 320 return 0; 321 } 322 for (pos = le32_to_cpu(s->rom_state.fixup_start); 323 pos < le32_to_cpu(s->rom_state.fixup_end); 324 pos += 4) { 325 cpu_physical_memory_read(paddr + pos - s->rom_state.vaddr, 326 &offset, sizeof(offset)); 327 offset = le32_to_cpu(offset); 328 cpu_physical_memory_read(paddr + offset, &patch, sizeof(patch)); 329 patch = le32_to_cpu(patch); 330 patch += rom_state_vaddr - le32_to_cpu(s->rom_state.vaddr); 331 patch = cpu_to_le32(patch); 332 cpu_physical_memory_write(paddr + offset, &patch, sizeof(patch)); 333 } 334 read_guest_rom_state(s); 335 s->vapic_paddr = paddr + le32_to_cpu(s->rom_state.vapic_vaddr) - 336 le32_to_cpu(s->rom_state.vaddr); 337 338 return 0; 339 } 340 341 /* 342 * Tries to read the unique processor number from the Kernel Processor Control 343 * Region (KPCR) of 32-bit Windows XP and Server 2003. Returns -1 if the KPCR 344 * cannot be accessed or is considered invalid. This also ensures that we are 345 * not patching the wrong guest. 346 */ 347 static int get_kpcr_number(X86CPU *cpu) 348 { 349 CPUX86State *env = &cpu->env; 350 struct kpcr { 351 uint8_t fill1[0x1c]; 352 uint32_t self; 353 uint8_t fill2[0x31]; 354 uint8_t number; 355 } QEMU_PACKED kpcr; 356 357 if (cpu_memory_rw_debug(CPU(cpu), env->segs[R_FS].base, 358 (void *)&kpcr, sizeof(kpcr), 0) < 0 || 359 kpcr.self != env->segs[R_FS].base) { 360 return -1; 361 } 362 return kpcr.number; 363 } 364 365 static int vapic_enable(VAPICROMState *s, X86CPU *cpu) 366 { 367 int cpu_number = get_kpcr_number(cpu); 368 hwaddr vapic_paddr; 369 static const uint8_t enabled = 1; 370 371 if (cpu_number < 0) { 372 return -1; 373 } 374 vapic_paddr = s->vapic_paddr + 375 (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT); 376 cpu_physical_memory_write(vapic_paddr + offsetof(VAPICState, enabled), 377 &enabled, sizeof(enabled)); 378 apic_enable_vapic(cpu->apic_state, vapic_paddr); 379 380 s->state = VAPIC_ACTIVE; 381 382 return 0; 383 } 384 385 static void patch_byte(X86CPU *cpu, target_ulong addr, uint8_t byte) 386 { 387 cpu_memory_rw_debug(CPU(cpu), addr, &byte, 1, 1); 388 } 389 390 static void patch_call(X86CPU *cpu, target_ulong ip, uint32_t target) 391 { 392 uint32_t offset; 393 394 offset = cpu_to_le32(target - ip - 5); 395 patch_byte(cpu, ip, 0xe8); /* call near */ 396 cpu_memory_rw_debug(CPU(cpu), ip + 1, (void *)&offset, sizeof(offset), 1); 397 } 398 399 typedef struct PatchInfo { 400 VAPICHandlers *handler; 401 target_ulong ip; 402 } PatchInfo; 403 404 static void do_patch_instruction(CPUState *cs, run_on_cpu_data data) 405 { 406 X86CPU *x86_cpu = X86_CPU(cs); 407 PatchInfo *info = (PatchInfo *) data.host_ptr; 408 VAPICHandlers *handlers = info->handler; 409 target_ulong ip = info->ip; 410 uint8_t opcode[2]; 411 uint32_t imm32 = 0; 412 413 cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0); 414 415 switch (opcode[0]) { 416 case 0x89: /* mov r32 to r/m32 */ 417 patch_byte(x86_cpu, ip, 0x50 + modrm_reg(opcode[1])); /* push reg */ 418 patch_call(x86_cpu, ip + 1, handlers->set_tpr); 419 break; 420 case 0x8b: /* mov r/m32 to r32 */ 421 patch_byte(x86_cpu, ip, 0x90); 422 patch_call(x86_cpu, ip + 1, handlers->get_tpr[modrm_reg(opcode[1])]); 423 break; 424 case 0xa1: /* mov abs to eax */ 425 patch_call(x86_cpu, ip, handlers->get_tpr[0]); 426 break; 427 case 0xa3: /* mov eax to abs */ 428 patch_call(x86_cpu, ip, handlers->set_tpr_eax); 429 break; 430 case 0xc7: /* mov imm32, r/m32 (c7/0) */ 431 patch_byte(x86_cpu, ip, 0x68); /* push imm32 */ 432 cpu_memory_rw_debug(cs, ip + 6, (void *)&imm32, sizeof(imm32), 0); 433 cpu_memory_rw_debug(cs, ip + 1, (void *)&imm32, sizeof(imm32), 1); 434 patch_call(x86_cpu, ip + 5, handlers->set_tpr); 435 break; 436 case 0xff: /* push r/m32 */ 437 patch_byte(x86_cpu, ip, 0x50); /* push eax */ 438 patch_call(x86_cpu, ip + 1, handlers->get_tpr_stack); 439 break; 440 default: 441 abort(); 442 } 443 444 g_free(info); 445 } 446 447 static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip) 448 { 449 MachineState *ms = MACHINE(qdev_get_machine()); 450 CPUState *cs = CPU(cpu); 451 VAPICHandlers *handlers; 452 PatchInfo *info; 453 454 if (ms->smp.cpus == 1) { 455 handlers = &s->rom_state.up; 456 } else { 457 handlers = &s->rom_state.mp; 458 } 459 460 info = g_new(PatchInfo, 1); 461 info->handler = handlers; 462 info->ip = ip; 463 464 async_safe_run_on_cpu(cs, do_patch_instruction, RUN_ON_CPU_HOST_PTR(info)); 465 } 466 467 void vapic_report_tpr_access(DeviceState *dev, CPUState *cs, target_ulong ip, 468 TPRAccess access) 469 { 470 VAPICROMState *s = VAPIC(dev); 471 X86CPU *cpu = X86_CPU(cs); 472 CPUX86State *env = &cpu->env; 473 474 cpu_synchronize_state(cs); 475 476 if (evaluate_tpr_instruction(s, cpu, &ip, access) < 0) { 477 if (s->state == VAPIC_ACTIVE) { 478 vapic_enable(s, cpu); 479 } 480 return; 481 } 482 if (update_rom_mapping(s, env, ip) < 0) { 483 return; 484 } 485 if (vapic_enable(s, cpu) < 0) { 486 return; 487 } 488 patch_instruction(s, cpu, ip); 489 } 490 491 typedef struct VAPICEnableTPRReporting { 492 DeviceState *apic; 493 bool enable; 494 } VAPICEnableTPRReporting; 495 496 static void vapic_do_enable_tpr_reporting(CPUState *cpu, run_on_cpu_data data) 497 { 498 VAPICEnableTPRReporting *info = data.host_ptr; 499 apic_enable_tpr_access_reporting(info->apic, info->enable); 500 } 501 502 static void vapic_enable_tpr_reporting(bool enable) 503 { 504 VAPICEnableTPRReporting info = { 505 .enable = enable, 506 }; 507 CPUState *cs; 508 X86CPU *cpu; 509 510 CPU_FOREACH(cs) { 511 cpu = X86_CPU(cs); 512 info.apic = cpu->apic_state; 513 run_on_cpu(cs, vapic_do_enable_tpr_reporting, RUN_ON_CPU_HOST_PTR(&info)); 514 } 515 } 516 517 static void vapic_reset(DeviceState *dev) 518 { 519 VAPICROMState *s = VAPIC(dev); 520 521 s->state = VAPIC_INACTIVE; 522 s->rom_state_paddr = 0; 523 vapic_enable_tpr_reporting(false); 524 } 525 526 /* 527 * Set the IRQ polling hypercalls to the supported variant: 528 * - vmcall if using KVM in-kernel irqchip 529 * - 32-bit VAPIC port write otherwise 530 */ 531 static int patch_hypercalls(VAPICROMState *s) 532 { 533 hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK; 534 static const uint8_t vmcall_pattern[] = { /* vmcall */ 535 0xb8, 0x1, 0, 0, 0, 0xf, 0x1, 0xc1 536 }; 537 static const uint8_t outl_pattern[] = { /* nop; outl %eax,0x7e */ 538 0xb8, 0x1, 0, 0, 0, 0x90, 0xe7, 0x7e 539 }; 540 uint8_t alternates[2]; 541 const uint8_t *pattern; 542 const uint8_t *patch; 543 off_t pos; 544 uint8_t *rom; 545 546 rom = g_malloc(s->rom_size); 547 cpu_physical_memory_read(rom_paddr, rom, s->rom_size); 548 549 for (pos = 0; pos < s->rom_size - sizeof(vmcall_pattern); pos++) { 550 if (kvm_irqchip_in_kernel()) { 551 pattern = outl_pattern; 552 alternates[0] = outl_pattern[7]; 553 alternates[1] = outl_pattern[7]; 554 patch = &vmcall_pattern[5]; 555 } else { 556 pattern = vmcall_pattern; 557 alternates[0] = vmcall_pattern[7]; 558 alternates[1] = 0xd9; /* AMD's VMMCALL */ 559 patch = &outl_pattern[5]; 560 } 561 if (memcmp(rom + pos, pattern, 7) == 0 && 562 (rom[pos + 7] == alternates[0] || rom[pos + 7] == alternates[1])) { 563 cpu_physical_memory_write(rom_paddr + pos + 5, patch, 3); 564 /* 565 * Don't flush the tb here. Under ordinary conditions, the patched 566 * calls are miles away from the current IP. Under malicious 567 * conditions, the guest could trick us to crash. 568 */ 569 } 570 } 571 572 g_free(rom); 573 return 0; 574 } 575 576 /* 577 * For TCG mode or the time KVM honors read-only memory regions, we need to 578 * enable write access to the option ROM so that variables can be updated by 579 * the guest. 580 */ 581 static int vapic_map_rom_writable(VAPICROMState *s) 582 { 583 hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK; 584 MemoryRegionSection section; 585 MemoryRegion *mr = get_system_memory(); 586 size_t rom_size; 587 uint8_t *ram; 588 589 if (s->rom_mapped_writable) { 590 memory_region_del_subregion(mr, &s->rom); 591 object_unparent(OBJECT(&s->rom)); 592 } 593 594 /* grab RAM memory region (region @rom_paddr may still be pc.rom) */ 595 section = memory_region_find(mr, 0, 1); 596 597 /* read ROM size from RAM region */ 598 if (rom_paddr + 2 >= memory_region_size(section.mr)) { 599 return -1; 600 } 601 ram = memory_region_get_ram_ptr(section.mr); 602 rom_size = ram[rom_paddr + 2] * ROM_BLOCK_SIZE; 603 if (rom_size == 0) { 604 return -1; 605 } 606 s->rom_size = rom_size; 607 608 /* We need to round to avoid creating subpages 609 * from which we cannot run code. */ 610 rom_size += rom_paddr & ~TARGET_PAGE_MASK; 611 rom_paddr &= TARGET_PAGE_MASK; 612 rom_size = TARGET_PAGE_ALIGN(rom_size); 613 614 memory_region_init_alias(&s->rom, OBJECT(s), "kvmvapic-rom", section.mr, 615 rom_paddr, rom_size); 616 memory_region_add_subregion_overlap(mr, rom_paddr, &s->rom, 1000); 617 s->rom_mapped_writable = true; 618 memory_region_unref(section.mr); 619 620 return 0; 621 } 622 623 static int vapic_prepare(VAPICROMState *s) 624 { 625 if (vapic_map_rom_writable(s) < 0) { 626 return -1; 627 } 628 629 if (patch_hypercalls(s) < 0) { 630 return -1; 631 } 632 633 vapic_enable_tpr_reporting(true); 634 635 return 0; 636 } 637 638 static void vapic_write(void *opaque, hwaddr addr, uint64_t data, 639 unsigned int size) 640 { 641 VAPICROMState *s = opaque; 642 X86CPU *cpu; 643 CPUX86State *env; 644 hwaddr rom_paddr; 645 646 if (!current_cpu) { 647 return; 648 } 649 650 cpu_synchronize_state(current_cpu); 651 cpu = X86_CPU(current_cpu); 652 env = &cpu->env; 653 654 /* 655 * The VAPIC supports two PIO-based hypercalls, both via port 0x7E. 656 * o 16-bit write access: 657 * Reports the option ROM initialization to the hypervisor. Written 658 * value is the offset of the state structure in the ROM. 659 * o 8-bit write access: 660 * Reactivates the VAPIC after a guest hibernation, i.e. after the 661 * option ROM content has been re-initialized by a guest power cycle. 662 * o 32-bit write access: 663 * Poll for pending IRQs, considering the current VAPIC state. 664 */ 665 switch (size) { 666 case 2: 667 if (s->state == VAPIC_INACTIVE) { 668 rom_paddr = (env->segs[R_CS].base + env->eip) & ROM_BLOCK_MASK; 669 s->rom_state_paddr = rom_paddr + data; 670 671 s->state = VAPIC_STANDBY; 672 } 673 if (vapic_prepare(s) < 0) { 674 s->state = VAPIC_INACTIVE; 675 s->rom_state_paddr = 0; 676 break; 677 } 678 break; 679 case 1: 680 if (kvm_enabled()) { 681 /* 682 * Disable triggering instruction in ROM by writing a NOP. 683 * 684 * We cannot do this in TCG mode as the reported IP is not 685 * accurate. 686 */ 687 pause_all_vcpus(); 688 patch_byte(cpu, env->eip - 2, 0x66); 689 patch_byte(cpu, env->eip - 1, 0x90); 690 resume_all_vcpus(); 691 } 692 693 if (s->state == VAPIC_ACTIVE) { 694 break; 695 } 696 if (update_rom_mapping(s, env, env->eip) < 0) { 697 break; 698 } 699 if (find_real_tpr_addr(s, env) < 0) { 700 break; 701 } 702 vapic_enable(s, cpu); 703 break; 704 default: 705 case 4: 706 if (!kvm_irqchip_in_kernel()) { 707 apic_poll_irq(cpu->apic_state); 708 } 709 break; 710 } 711 } 712 713 static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size) 714 { 715 return 0xffffffff; 716 } 717 718 static const MemoryRegionOps vapic_ops = { 719 .write = vapic_write, 720 .read = vapic_read, 721 .endianness = DEVICE_NATIVE_ENDIAN, 722 }; 723 724 static void vapic_realize(DeviceState *dev, Error **errp) 725 { 726 SysBusDevice *sbd = SYS_BUS_DEVICE(dev); 727 VAPICROMState *s = VAPIC(dev); 728 729 memory_region_init_io(&s->io, OBJECT(s), &vapic_ops, s, "kvmvapic", 2); 730 memory_region_add_subregion(get_system_io(), VAPIC_IO_PORT, &s->io); 731 sysbus_init_ioports(sbd, VAPIC_IO_PORT, 2); 732 733 option_rom[nb_option_roms].name = "kvmvapic.bin"; 734 option_rom[nb_option_roms].bootindex = -1; 735 nb_option_roms++; 736 } 737 738 static void do_vapic_enable(CPUState *cs, run_on_cpu_data data) 739 { 740 VAPICROMState *s = data.host_ptr; 741 X86CPU *cpu = X86_CPU(cs); 742 743 static const uint8_t enabled = 1; 744 cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled), 745 &enabled, sizeof(enabled)); 746 apic_enable_vapic(cpu->apic_state, s->vapic_paddr); 747 s->state = VAPIC_ACTIVE; 748 } 749 750 static void vapic_vm_state_change(void *opaque, bool running, RunState state) 751 { 752 MachineState *ms = MACHINE(qdev_get_machine()); 753 VAPICROMState *s = opaque; 754 uint8_t *zero; 755 756 if (!running) { 757 return; 758 } 759 760 if (s->state == VAPIC_ACTIVE) { 761 if (ms->smp.cpus == 1) { 762 run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s)); 763 } else { 764 zero = g_malloc0(s->rom_state.vapic_size); 765 cpu_physical_memory_write(s->vapic_paddr, zero, 766 s->rom_state.vapic_size); 767 g_free(zero); 768 } 769 } 770 771 qemu_del_vm_change_state_handler(s->vmsentry); 772 s->vmsentry = NULL; 773 } 774 775 static int vapic_post_load(void *opaque, int version_id) 776 { 777 VAPICROMState *s = opaque; 778 779 /* 780 * The old implementation of qemu-kvm did not provide the state 781 * VAPIC_STANDBY. Reconstruct it. 782 */ 783 if (s->state == VAPIC_INACTIVE && s->rom_state_paddr != 0) { 784 s->state = VAPIC_STANDBY; 785 } 786 787 if (s->state != VAPIC_INACTIVE) { 788 if (vapic_prepare(s) < 0) { 789 return -1; 790 } 791 } 792 793 if (!s->vmsentry) { 794 s->vmsentry = 795 qemu_add_vm_change_state_handler(vapic_vm_state_change, s); 796 } 797 return 0; 798 } 799 800 static const VMStateDescription vmstate_handlers = { 801 .name = "kvmvapic-handlers", 802 .version_id = 1, 803 .minimum_version_id = 1, 804 .fields = (const VMStateField[]) { 805 VMSTATE_UINT32(set_tpr, VAPICHandlers), 806 VMSTATE_UINT32(set_tpr_eax, VAPICHandlers), 807 VMSTATE_UINT32_ARRAY(get_tpr, VAPICHandlers, 8), 808 VMSTATE_UINT32(get_tpr_stack, VAPICHandlers), 809 VMSTATE_END_OF_LIST() 810 } 811 }; 812 813 static const VMStateDescription vmstate_guest_rom = { 814 .name = "kvmvapic-guest-rom", 815 .version_id = 1, 816 .minimum_version_id = 1, 817 .fields = (const VMStateField[]) { 818 VMSTATE_UNUSED(8), /* signature */ 819 VMSTATE_UINT32(vaddr, GuestROMState), 820 VMSTATE_UINT32(fixup_start, GuestROMState), 821 VMSTATE_UINT32(fixup_end, GuestROMState), 822 VMSTATE_UINT32(vapic_vaddr, GuestROMState), 823 VMSTATE_UINT32(vapic_size, GuestROMState), 824 VMSTATE_UINT32(vcpu_shift, GuestROMState), 825 VMSTATE_UINT32(real_tpr_addr, GuestROMState), 826 VMSTATE_STRUCT(up, GuestROMState, 0, vmstate_handlers, VAPICHandlers), 827 VMSTATE_STRUCT(mp, GuestROMState, 0, vmstate_handlers, VAPICHandlers), 828 VMSTATE_END_OF_LIST() 829 } 830 }; 831 832 static const VMStateDescription vmstate_vapic = { 833 .name = "kvm-tpr-opt", /* compatible with qemu-kvm VAPIC */ 834 .version_id = 1, 835 .minimum_version_id = 1, 836 .post_load = vapic_post_load, 837 .fields = (const VMStateField[]) { 838 VMSTATE_STRUCT(rom_state, VAPICROMState, 0, vmstate_guest_rom, 839 GuestROMState), 840 VMSTATE_UINT32(state, VAPICROMState), 841 VMSTATE_UINT32(real_tpr_addr, VAPICROMState), 842 VMSTATE_UINT32(rom_state_vaddr, VAPICROMState), 843 VMSTATE_UINT32(vapic_paddr, VAPICROMState), 844 VMSTATE_UINT32(rom_state_paddr, VAPICROMState), 845 VMSTATE_END_OF_LIST() 846 } 847 }; 848 849 static void vapic_class_init(ObjectClass *klass, void *data) 850 { 851 DeviceClass *dc = DEVICE_CLASS(klass); 852 853 device_class_set_legacy_reset(dc, vapic_reset); 854 dc->vmsd = &vmstate_vapic; 855 dc->realize = vapic_realize; 856 } 857 858 static const TypeInfo vapic_type = { 859 .name = TYPE_VAPIC, 860 .parent = TYPE_SYS_BUS_DEVICE, 861 .instance_size = sizeof(VAPICROMState), 862 .class_init = vapic_class_init, 863 }; 864 865 static void vapic_register(void) 866 { 867 type_register_static(&vapic_type); 868 } 869 870 type_init(vapic_register); 871