1 // SPDX-License-Identifier: GPL-2.0-only 2 3 /* 4 * Local APIC virtualization 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2007 Novell 8 * Copyright (C) 2007 Intel 9 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 10 * 11 * Authors: 12 * Dor Laor <dor.laor@qumranet.com> 13 * Gregory Haskins <ghaskins@novell.com> 14 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 15 * 16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 17 */ 18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 19 20 #include <linux/kvm_host.h> 21 #include <linux/kvm.h> 22 #include <linux/mm.h> 23 #include <linux/highmem.h> 24 #include <linux/smp.h> 25 #include <linux/hrtimer.h> 26 #include <linux/io.h> 27 #include <linux/export.h> 28 #include <linux/math64.h> 29 #include <linux/slab.h> 30 #include <asm/processor.h> 31 #include <asm/mce.h> 32 #include <asm/msr.h> 33 #include <asm/page.h> 34 #include <asm/current.h> 35 #include <asm/apicdef.h> 36 #include <asm/delay.h> 37 #include <linux/atomic.h> 38 #include <linux/jump_label.h> 39 #include "kvm_cache_regs.h" 40 #include "irq.h" 41 #include "ioapic.h" 42 #include "trace.h" 43 #include "x86.h" 44 #include "xen.h" 45 #include "cpuid.h" 46 #include "hyperv.h" 47 #include "smm.h" 48 49 #ifndef CONFIG_X86_64 50 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 51 #else 52 #define mod_64(x, y) ((x) % (y)) 53 #endif 54 55 /* 14 is the version for Xeon and Pentium 8.4.8*/ 56 #define APIC_VERSION 0x14UL 57 #define LAPIC_MMIO_LENGTH (1 << 12) 58 /* followed define is not in apicdef.h */ 59 #define MAX_APIC_VECTOR 256 60 #define APIC_VECTORS_PER_REG 32 61 62 static bool lapic_timer_advance_dynamic __read_mostly; 63 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */ 64 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */ 65 #define LAPIC_TIMER_ADVANCE_NS_INIT 1000 66 #define LAPIC_TIMER_ADVANCE_NS_MAX 5000 67 /* step-by-step approximation to mitigate fluctuation */ 68 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 69 static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data); 70 static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data); 71 72 static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val) 73 { 74 *((u32 *) (regs + reg_off)) = val; 75 } 76 77 static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 78 { 79 __kvm_lapic_set_reg(apic->regs, reg_off, val); 80 } 81 82 static __always_inline u64 __kvm_lapic_get_reg64(char *regs, int reg) 83 { 84 BUILD_BUG_ON(reg != APIC_ICR); 85 return *((u64 *) (regs + reg)); 86 } 87 88 static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg) 89 { 90 return __kvm_lapic_get_reg64(apic->regs, reg); 91 } 92 93 static __always_inline void __kvm_lapic_set_reg64(char *regs, int reg, u64 val) 94 { 95 BUILD_BUG_ON(reg != APIC_ICR); 96 *((u64 *) (regs + reg)) = val; 97 } 98 99 static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic, 100 int reg, u64 val) 101 { 102 __kvm_lapic_set_reg64(apic->regs, reg, val); 103 } 104 105 static inline int apic_test_vector(int vec, void *bitmap) 106 { 107 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 108 } 109 110 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) 111 { 112 struct kvm_lapic *apic = vcpu->arch.apic; 113 114 return apic_test_vector(vector, apic->regs + APIC_ISR) || 115 apic_test_vector(vector, apic->regs + APIC_IRR); 116 } 117 118 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 119 { 120 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 121 } 122 123 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 124 { 125 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 126 } 127 128 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ); 129 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ); 130 131 static inline int apic_enabled(struct kvm_lapic *apic) 132 { 133 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 134 } 135 136 #define LVT_MASK \ 137 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 138 139 #define LINT_MASK \ 140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 142 143 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) 144 { 145 return apic->vcpu->vcpu_id; 146 } 147 148 static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) 149 { 150 return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) && 151 (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm)); 152 } 153 154 bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) 155 { 156 return kvm_x86_ops.set_hv_timer 157 && !(kvm_mwait_in_guest(vcpu->kvm) || 158 kvm_can_post_timer_interrupt(vcpu)); 159 } 160 161 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) 162 { 163 return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE; 164 } 165 166 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) 167 { 168 return ((id >> 4) << 16) | (1 << (id & 0xf)); 169 } 170 171 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, 172 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { 173 switch (map->logical_mode) { 174 case KVM_APIC_MODE_SW_DISABLED: 175 /* Arbitrarily use the flat map so that @cluster isn't NULL. */ 176 *cluster = map->xapic_flat_map; 177 *mask = 0; 178 return true; 179 case KVM_APIC_MODE_X2APIC: { 180 u32 offset = (dest_id >> 16) * 16; 181 u32 max_apic_id = map->max_apic_id; 182 183 if (offset <= max_apic_id) { 184 u8 cluster_size = min(max_apic_id - offset + 1, 16U); 185 186 offset = array_index_nospec(offset, map->max_apic_id + 1); 187 *cluster = &map->phys_map[offset]; 188 *mask = dest_id & (0xffff >> (16 - cluster_size)); 189 } else { 190 *mask = 0; 191 } 192 193 return true; 194 } 195 case KVM_APIC_MODE_XAPIC_FLAT: 196 *cluster = map->xapic_flat_map; 197 *mask = dest_id & 0xff; 198 return true; 199 case KVM_APIC_MODE_XAPIC_CLUSTER: 200 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf]; 201 *mask = dest_id & 0xf; 202 return true; 203 case KVM_APIC_MODE_MAP_DISABLED: 204 return false; 205 default: 206 WARN_ON_ONCE(1); 207 return false; 208 } 209 } 210 211 static void kvm_apic_map_free(struct rcu_head *rcu) 212 { 213 struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu); 214 215 kvfree(map); 216 } 217 218 static int kvm_recalculate_phys_map(struct kvm_apic_map *new, 219 struct kvm_vcpu *vcpu, 220 bool *xapic_id_mismatch) 221 { 222 struct kvm_lapic *apic = vcpu->arch.apic; 223 u32 x2apic_id = kvm_x2apic_id(apic); 224 u32 xapic_id = kvm_xapic_id(apic); 225 u32 physical_id; 226 227 /* 228 * For simplicity, KVM always allocates enough space for all possible 229 * xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on 230 * without the optimized map. 231 */ 232 if (WARN_ON_ONCE(xapic_id > new->max_apic_id)) 233 return -EINVAL; 234 235 /* 236 * Bail if a vCPU was added and/or enabled its APIC between allocating 237 * the map and doing the actual calculations for the map. Note, KVM 238 * hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if 239 * the compiler decides to reload x2apic_id after this check. 240 */ 241 if (x2apic_id > new->max_apic_id) 242 return -E2BIG; 243 244 /* 245 * Deliberately truncate the vCPU ID when detecting a mismatched APIC 246 * ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a 247 * 32-bit value. Any unwanted aliasing due to truncation results will 248 * be detected below. 249 */ 250 if (!apic_x2apic_mode(apic) && xapic_id != (u8)vcpu->vcpu_id) 251 *xapic_id_mismatch = true; 252 253 /* 254 * Apply KVM's hotplug hack if userspace has enable 32-bit APIC IDs. 255 * Allow sending events to vCPUs by their x2APIC ID even if the target 256 * vCPU is in legacy xAPIC mode, and silently ignore aliased xAPIC IDs 257 * (the x2APIC ID is truncated to 8 bits, causing IDs > 0xff to wrap 258 * and collide). 259 * 260 * Honor the architectural (and KVM's non-optimized) behavior if 261 * userspace has not enabled 32-bit x2APIC IDs. Each APIC is supposed 262 * to process messages independently. If multiple vCPUs have the same 263 * effective APIC ID, e.g. due to the x2APIC wrap or because the guest 264 * manually modified its xAPIC IDs, events targeting that ID are 265 * supposed to be recognized by all vCPUs with said ID. 266 */ 267 if (vcpu->kvm->arch.x2apic_format) { 268 /* See also kvm_apic_match_physical_addr(). */ 269 if (apic_x2apic_mode(apic) || x2apic_id > 0xff) 270 new->phys_map[x2apic_id] = apic; 271 272 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) 273 new->phys_map[xapic_id] = apic; 274 } else { 275 /* 276 * Disable the optimized map if the physical APIC ID is already 277 * mapped, i.e. is aliased to multiple vCPUs. The optimized 278 * map requires a strict 1:1 mapping between IDs and vCPUs. 279 */ 280 if (apic_x2apic_mode(apic)) 281 physical_id = x2apic_id; 282 else 283 physical_id = xapic_id; 284 285 if (new->phys_map[physical_id]) 286 return -EINVAL; 287 288 new->phys_map[physical_id] = apic; 289 } 290 291 return 0; 292 } 293 294 static void kvm_recalculate_logical_map(struct kvm_apic_map *new, 295 struct kvm_vcpu *vcpu) 296 { 297 struct kvm_lapic *apic = vcpu->arch.apic; 298 enum kvm_apic_logical_mode logical_mode; 299 struct kvm_lapic **cluster; 300 u16 mask; 301 u32 ldr; 302 303 if (new->logical_mode == KVM_APIC_MODE_MAP_DISABLED) 304 return; 305 306 if (!kvm_apic_sw_enabled(apic)) 307 return; 308 309 ldr = kvm_lapic_get_reg(apic, APIC_LDR); 310 if (!ldr) 311 return; 312 313 if (apic_x2apic_mode(apic)) { 314 logical_mode = KVM_APIC_MODE_X2APIC; 315 } else { 316 ldr = GET_APIC_LOGICAL_ID(ldr); 317 if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) 318 logical_mode = KVM_APIC_MODE_XAPIC_FLAT; 319 else 320 logical_mode = KVM_APIC_MODE_XAPIC_CLUSTER; 321 } 322 323 /* 324 * To optimize logical mode delivery, all software-enabled APICs must 325 * be configured for the same mode. 326 */ 327 if (new->logical_mode == KVM_APIC_MODE_SW_DISABLED) { 328 new->logical_mode = logical_mode; 329 } else if (new->logical_mode != logical_mode) { 330 new->logical_mode = KVM_APIC_MODE_MAP_DISABLED; 331 return; 332 } 333 334 /* 335 * In x2APIC mode, the LDR is read-only and derived directly from the 336 * x2APIC ID, thus is guaranteed to be addressable. KVM reuses 337 * kvm_apic_map.phys_map to optimize logical mode x2APIC interrupts by 338 * reversing the LDR calculation to get cluster of APICs, i.e. no 339 * additional work is required. 340 */ 341 if (apic_x2apic_mode(apic)) { 342 WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic))); 343 return; 344 } 345 346 if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr, 347 &cluster, &mask))) { 348 new->logical_mode = KVM_APIC_MODE_MAP_DISABLED; 349 return; 350 } 351 352 if (!mask) 353 return; 354 355 ldr = ffs(mask) - 1; 356 if (!is_power_of_2(mask) || cluster[ldr]) 357 new->logical_mode = KVM_APIC_MODE_MAP_DISABLED; 358 else 359 cluster[ldr] = apic; 360 } 361 362 /* 363 * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock. 364 * 365 * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with 366 * apic_map_lock_held. 367 */ 368 enum { 369 CLEAN, 370 UPDATE_IN_PROGRESS, 371 DIRTY 372 }; 373 374 void kvm_recalculate_apic_map(struct kvm *kvm) 375 { 376 struct kvm_apic_map *new, *old = NULL; 377 struct kvm_vcpu *vcpu; 378 unsigned long i; 379 u32 max_id = 255; /* enough space for any xAPIC ID */ 380 bool xapic_id_mismatch; 381 int r; 382 383 /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ 384 if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) 385 return; 386 387 WARN_ONCE(!irqchip_in_kernel(kvm), 388 "Dirty APIC map without an in-kernel local APIC"); 389 390 mutex_lock(&kvm->arch.apic_map_lock); 391 392 retry: 393 /* 394 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean) 395 * or the APIC registers (if dirty). Note, on retry the map may have 396 * not yet been marked dirty by whatever task changed a vCPU's x2APIC 397 * ID, i.e. the map may still show up as in-progress. In that case 398 * this task still needs to retry and complete its calculation. 399 */ 400 if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty, 401 DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { 402 /* Someone else has updated the map. */ 403 mutex_unlock(&kvm->arch.apic_map_lock); 404 return; 405 } 406 407 /* 408 * Reset the mismatch flag between attempts so that KVM does the right 409 * thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e. 410 * keep max_id strictly increasing. Disallowing max_id from shrinking 411 * ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU 412 * with the highest x2APIC ID is toggling its APIC on and off. 413 */ 414 xapic_id_mismatch = false; 415 416 kvm_for_each_vcpu(i, vcpu, kvm) 417 if (kvm_apic_present(vcpu)) 418 max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); 419 420 new = kvzalloc(sizeof(struct kvm_apic_map) + 421 sizeof(struct kvm_lapic *) * ((u64)max_id + 1), 422 GFP_KERNEL_ACCOUNT); 423 424 if (!new) 425 goto out; 426 427 new->max_apic_id = max_id; 428 new->logical_mode = KVM_APIC_MODE_SW_DISABLED; 429 430 kvm_for_each_vcpu(i, vcpu, kvm) { 431 if (!kvm_apic_present(vcpu)) 432 continue; 433 434 r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch); 435 if (r) { 436 kvfree(new); 437 new = NULL; 438 if (r == -E2BIG) { 439 cond_resched(); 440 goto retry; 441 } 442 443 goto out; 444 } 445 446 kvm_recalculate_logical_map(new, vcpu); 447 } 448 out: 449 /* 450 * The optimized map is effectively KVM's internal version of APICv, 451 * and all unwanted aliasing that results in disabling the optimized 452 * map also applies to APICv. 453 */ 454 if (!new) 455 kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED); 456 else 457 kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED); 458 459 if (!new || new->logical_mode == KVM_APIC_MODE_MAP_DISABLED) 460 kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED); 461 else 462 kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED); 463 464 if (xapic_id_mismatch) 465 kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); 466 else 467 kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); 468 469 old = rcu_dereference_protected(kvm->arch.apic_map, 470 lockdep_is_held(&kvm->arch.apic_map_lock)); 471 rcu_assign_pointer(kvm->arch.apic_map, new); 472 /* 473 * Write kvm->arch.apic_map before clearing apic->apic_map_dirty. 474 * If another update has come in, leave it DIRTY. 475 */ 476 atomic_cmpxchg_release(&kvm->arch.apic_map_dirty, 477 UPDATE_IN_PROGRESS, CLEAN); 478 mutex_unlock(&kvm->arch.apic_map_lock); 479 480 if (old) 481 call_rcu(&old->rcu, kvm_apic_map_free); 482 483 kvm_make_scan_ioapic_request(kvm); 484 } 485 486 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 487 { 488 bool enabled = val & APIC_SPIV_APIC_ENABLED; 489 490 kvm_lapic_set_reg(apic, APIC_SPIV, val); 491 492 if (enabled != apic->sw_enabled) { 493 apic->sw_enabled = enabled; 494 if (enabled) 495 static_branch_slow_dec_deferred(&apic_sw_disabled); 496 else 497 static_branch_inc(&apic_sw_disabled.key); 498 499 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 500 } 501 502 /* Check if there are APF page ready requests pending */ 503 if (enabled) { 504 kvm_make_request(KVM_REQ_APF_READY, apic->vcpu); 505 kvm_xen_sw_enable_lapic(apic->vcpu); 506 } 507 } 508 509 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) 510 { 511 kvm_lapic_set_reg(apic, APIC_ID, id << 24); 512 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 513 } 514 515 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 516 { 517 kvm_lapic_set_reg(apic, APIC_LDR, id); 518 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 519 } 520 521 static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val) 522 { 523 kvm_lapic_set_reg(apic, APIC_DFR, val); 524 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 525 } 526 527 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) 528 { 529 u32 ldr = kvm_apic_calc_x2apic_ldr(id); 530 531 WARN_ON_ONCE(id != apic->vcpu->vcpu_id); 532 533 kvm_lapic_set_reg(apic, APIC_ID, id); 534 kvm_lapic_set_reg(apic, APIC_LDR, ldr); 535 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 536 } 537 538 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 539 { 540 return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 541 } 542 543 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 544 { 545 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; 546 } 547 548 static inline int apic_lvtt_period(struct kvm_lapic *apic) 549 { 550 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC; 551 } 552 553 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 554 { 555 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; 556 } 557 558 static inline int apic_lvt_nmi_mode(u32 lvt_val) 559 { 560 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 561 } 562 563 static inline bool kvm_lapic_lvt_supported(struct kvm_lapic *apic, int lvt_index) 564 { 565 return apic->nr_lvt_entries > lvt_index; 566 } 567 568 static inline int kvm_apic_calc_nr_lvt_entries(struct kvm_vcpu *vcpu) 569 { 570 return KVM_APIC_MAX_NR_LVT_ENTRIES - !(vcpu->arch.mcg_cap & MCG_CMCI_P); 571 } 572 573 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 574 { 575 struct kvm_lapic *apic = vcpu->arch.apic; 576 u32 v = 0; 577 578 if (!lapic_in_kernel(vcpu)) 579 return; 580 581 v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16); 582 583 /* 584 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) 585 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with 586 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC 587 * version first and level-triggered interrupts never get EOIed in 588 * IOAPIC. 589 */ 590 if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) && 591 !ioapic_in_kernel(vcpu->kvm)) 592 v |= APIC_LVR_DIRECTED_EOI; 593 kvm_lapic_set_reg(apic, APIC_LVR, v); 594 } 595 596 void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu) 597 { 598 int nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu); 599 struct kvm_lapic *apic = vcpu->arch.apic; 600 int i; 601 602 if (!lapic_in_kernel(vcpu) || nr_lvt_entries == apic->nr_lvt_entries) 603 return; 604 605 /* Initialize/mask any "new" LVT entries. */ 606 for (i = apic->nr_lvt_entries; i < nr_lvt_entries; i++) 607 kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED); 608 609 apic->nr_lvt_entries = nr_lvt_entries; 610 611 /* The number of LVT entries is reflected in the version register. */ 612 kvm_apic_set_version(vcpu); 613 } 614 615 static const unsigned int apic_lvt_mask[KVM_APIC_MAX_NR_LVT_ENTRIES] = { 616 [LVT_TIMER] = LVT_MASK, /* timer mode mask added at runtime */ 617 [LVT_THERMAL_MONITOR] = LVT_MASK | APIC_MODE_MASK, 618 [LVT_PERFORMANCE_COUNTER] = LVT_MASK | APIC_MODE_MASK, 619 [LVT_LINT0] = LINT_MASK, 620 [LVT_LINT1] = LINT_MASK, 621 [LVT_ERROR] = LVT_MASK, 622 [LVT_CMCI] = LVT_MASK | APIC_MODE_MASK 623 }; 624 625 static int find_highest_vector(void *bitmap) 626 { 627 int vec; 628 u32 *reg; 629 630 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 631 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 632 reg = bitmap + REG_POS(vec); 633 if (*reg) 634 return __fls(*reg) + vec; 635 } 636 637 return -1; 638 } 639 640 static u8 count_vectors(void *bitmap) 641 { 642 int vec; 643 u32 *reg; 644 u8 count = 0; 645 646 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 647 reg = bitmap + REG_POS(vec); 648 count += hweight32(*reg); 649 } 650 651 return count; 652 } 653 654 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr) 655 { 656 u32 i, vec; 657 u32 pir_val, irr_val, prev_irr_val; 658 int max_updated_irr; 659 660 max_updated_irr = -1; 661 *max_irr = -1; 662 663 for (i = vec = 0; i <= 7; i++, vec += 32) { 664 u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10); 665 666 irr_val = *p_irr; 667 pir_val = READ_ONCE(pir[i]); 668 669 if (pir_val) { 670 pir_val = xchg(&pir[i], 0); 671 672 prev_irr_val = irr_val; 673 do { 674 irr_val = prev_irr_val | pir_val; 675 } while (prev_irr_val != irr_val && 676 !try_cmpxchg(p_irr, &prev_irr_val, irr_val)); 677 678 if (prev_irr_val != irr_val) 679 max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec; 680 } 681 if (irr_val) 682 *max_irr = __fls(irr_val) + vec; 683 } 684 685 return ((max_updated_irr != -1) && 686 (max_updated_irr == *max_irr)); 687 } 688 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); 689 690 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr) 691 { 692 struct kvm_lapic *apic = vcpu->arch.apic; 693 bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr); 694 695 if (unlikely(!apic->apicv_active && irr_updated)) 696 apic->irr_pending = true; 697 return irr_updated; 698 } 699 EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 700 701 static inline int apic_search_irr(struct kvm_lapic *apic) 702 { 703 return find_highest_vector(apic->regs + APIC_IRR); 704 } 705 706 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 707 { 708 int result; 709 710 /* 711 * Note that irr_pending is just a hint. It will be always 712 * true with virtual interrupt delivery enabled. 713 */ 714 if (!apic->irr_pending) 715 return -1; 716 717 result = apic_search_irr(apic); 718 ASSERT(result == -1 || result >= 16); 719 720 return result; 721 } 722 723 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 724 { 725 if (unlikely(apic->apicv_active)) { 726 /* need to update RVI */ 727 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 728 static_call_cond(kvm_x86_hwapic_irr_update)(apic->vcpu, 729 apic_find_highest_irr(apic)); 730 } else { 731 apic->irr_pending = false; 732 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 733 if (apic_search_irr(apic) != -1) 734 apic->irr_pending = true; 735 } 736 } 737 738 void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec) 739 { 740 apic_clear_irr(vec, vcpu->arch.apic); 741 } 742 EXPORT_SYMBOL_GPL(kvm_apic_clear_irr); 743 744 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 745 { 746 if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 747 return; 748 749 /* 750 * With APIC virtualization enabled, all caching is disabled 751 * because the processor can modify ISR under the hood. Instead 752 * just set SVI. 753 */ 754 if (unlikely(apic->apicv_active)) 755 static_call_cond(kvm_x86_hwapic_isr_update)(vec); 756 else { 757 ++apic->isr_count; 758 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 759 /* 760 * ISR (in service register) bit is set when injecting an interrupt. 761 * The highest vector is injected. Thus the latest bit set matches 762 * the highest bit in ISR. 763 */ 764 apic->highest_isr_cache = vec; 765 } 766 } 767 768 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 769 { 770 int result; 771 772 /* 773 * Note that isr_count is always 1, and highest_isr_cache 774 * is always -1, with APIC virtualization enabled. 775 */ 776 if (!apic->isr_count) 777 return -1; 778 if (likely(apic->highest_isr_cache != -1)) 779 return apic->highest_isr_cache; 780 781 result = find_highest_vector(apic->regs + APIC_ISR); 782 ASSERT(result == -1 || result >= 16); 783 784 return result; 785 } 786 787 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 788 { 789 if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 790 return; 791 792 /* 793 * We do get here for APIC virtualization enabled if the guest 794 * uses the Hyper-V APIC enlightenment. In this case we may need 795 * to trigger a new interrupt delivery by writing the SVI field; 796 * on the other hand isr_count and highest_isr_cache are unused 797 * and must be left alone. 798 */ 799 if (unlikely(apic->apicv_active)) 800 static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic)); 801 else { 802 --apic->isr_count; 803 BUG_ON(apic->isr_count < 0); 804 apic->highest_isr_cache = -1; 805 } 806 } 807 808 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 809 { 810 /* This may race with setting of irr in __apic_accept_irq() and 811 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 812 * will cause vmexit immediately and the value will be recalculated 813 * on the next vmentry. 814 */ 815 return apic_find_highest_irr(vcpu->arch.apic); 816 } 817 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); 818 819 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 820 int vector, int level, int trig_mode, 821 struct dest_map *dest_map); 822 823 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 824 struct dest_map *dest_map) 825 { 826 struct kvm_lapic *apic = vcpu->arch.apic; 827 828 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 829 irq->level, irq->trig_mode, dest_map); 830 } 831 832 static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, 833 struct kvm_lapic_irq *irq, u32 min) 834 { 835 int i, count = 0; 836 struct kvm_vcpu *vcpu; 837 838 if (min > map->max_apic_id) 839 return 0; 840 841 for_each_set_bit(i, ipi_bitmap, 842 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 843 if (map->phys_map[min + i]) { 844 vcpu = map->phys_map[min + i]->vcpu; 845 count += kvm_apic_set_irq(vcpu, irq, NULL); 846 } 847 } 848 849 return count; 850 } 851 852 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, 853 unsigned long ipi_bitmap_high, u32 min, 854 unsigned long icr, int op_64_bit) 855 { 856 struct kvm_apic_map *map; 857 struct kvm_lapic_irq irq = {0}; 858 int cluster_size = op_64_bit ? 64 : 32; 859 int count; 860 861 if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK)) 862 return -KVM_EINVAL; 863 864 irq.vector = icr & APIC_VECTOR_MASK; 865 irq.delivery_mode = icr & APIC_MODE_MASK; 866 irq.level = (icr & APIC_INT_ASSERT) != 0; 867 irq.trig_mode = icr & APIC_INT_LEVELTRIG; 868 869 rcu_read_lock(); 870 map = rcu_dereference(kvm->arch.apic_map); 871 872 count = -EOPNOTSUPP; 873 if (likely(map)) { 874 count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min); 875 min += cluster_size; 876 count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min); 877 } 878 879 rcu_read_unlock(); 880 return count; 881 } 882 883 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 884 { 885 886 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 887 sizeof(val)); 888 } 889 890 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 891 { 892 893 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 894 sizeof(*val)); 895 } 896 897 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 898 { 899 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 900 } 901 902 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 903 { 904 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) 905 return; 906 907 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 908 } 909 910 static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu) 911 { 912 u8 val; 913 914 if (pv_eoi_get_user(vcpu, &val) < 0) 915 return false; 916 917 val &= KVM_PV_EOI_ENABLED; 918 919 if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) 920 return false; 921 922 /* 923 * Clear pending bit in any case: it will be set again on vmentry. 924 * While this might not be ideal from performance point of view, 925 * this makes sure pv eoi is only enabled when we know it's safe. 926 */ 927 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 928 929 return val; 930 } 931 932 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) 933 { 934 int highest_irr; 935 if (kvm_x86_ops.sync_pir_to_irr) 936 highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu); 937 else 938 highest_irr = apic_find_highest_irr(apic); 939 if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) 940 return -1; 941 return highest_irr; 942 } 943 944 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr) 945 { 946 u32 tpr, isrv, ppr, old_ppr; 947 int isr; 948 949 old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI); 950 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI); 951 isr = apic_find_highest_isr(apic); 952 isrv = (isr != -1) ? isr : 0; 953 954 if ((tpr & 0xf0) >= (isrv & 0xf0)) 955 ppr = tpr & 0xff; 956 else 957 ppr = isrv & 0xf0; 958 959 *new_ppr = ppr; 960 if (old_ppr != ppr) 961 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); 962 963 return ppr < old_ppr; 964 } 965 966 static void apic_update_ppr(struct kvm_lapic *apic) 967 { 968 u32 ppr; 969 970 if (__apic_update_ppr(apic, &ppr) && 971 apic_has_interrupt_for_ppr(apic, ppr) != -1) 972 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 973 } 974 975 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) 976 { 977 apic_update_ppr(vcpu->arch.apic); 978 } 979 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr); 980 981 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 982 { 983 kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr); 984 apic_update_ppr(apic); 985 } 986 987 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) 988 { 989 return mda == (apic_x2apic_mode(apic) ? 990 X2APIC_BROADCAST : APIC_BROADCAST); 991 } 992 993 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) 994 { 995 if (kvm_apic_broadcast(apic, mda)) 996 return true; 997 998 /* 999 * Hotplug hack: Accept interrupts for vCPUs in xAPIC mode as if they 1000 * were in x2APIC mode if the target APIC ID can't be encoded as an 1001 * xAPIC ID. This allows unique addressing of hotplugged vCPUs (which 1002 * start in xAPIC mode) with an APIC ID that is unaddressable in xAPIC 1003 * mode. Match the x2APIC ID if and only if the target APIC ID can't 1004 * be encoded in xAPIC to avoid spurious matches against a vCPU that 1005 * changed its (addressable) xAPIC ID (which is writable). 1006 */ 1007 if (apic_x2apic_mode(apic) || mda > 0xff) 1008 return mda == kvm_x2apic_id(apic); 1009 1010 return mda == kvm_xapic_id(apic); 1011 } 1012 1013 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) 1014 { 1015 u32 logical_id; 1016 1017 if (kvm_apic_broadcast(apic, mda)) 1018 return true; 1019 1020 logical_id = kvm_lapic_get_reg(apic, APIC_LDR); 1021 1022 if (apic_x2apic_mode(apic)) 1023 return ((logical_id >> 16) == (mda >> 16)) 1024 && (logical_id & mda & 0xffff) != 0; 1025 1026 logical_id = GET_APIC_LOGICAL_ID(logical_id); 1027 1028 switch (kvm_lapic_get_reg(apic, APIC_DFR)) { 1029 case APIC_DFR_FLAT: 1030 return (logical_id & mda) != 0; 1031 case APIC_DFR_CLUSTER: 1032 return ((logical_id >> 4) == (mda >> 4)) 1033 && (logical_id & mda & 0xf) != 0; 1034 default: 1035 return false; 1036 } 1037 } 1038 1039 /* The KVM local APIC implementation has two quirks: 1040 * 1041 * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs 1042 * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID. 1043 * KVM doesn't do that aliasing. 1044 * 1045 * - in-kernel IOAPIC messages have to be delivered directly to 1046 * x2APIC, because the kernel does not support interrupt remapping. 1047 * In order to support broadcast without interrupt remapping, x2APIC 1048 * rewrites the destination of non-IPI messages from APIC_BROADCAST 1049 * to X2APIC_BROADCAST. 1050 * 1051 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is 1052 * important when userspace wants to use x2APIC-format MSIs, because 1053 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7". 1054 */ 1055 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, 1056 struct kvm_lapic *source, struct kvm_lapic *target) 1057 { 1058 bool ipi = source != NULL; 1059 1060 if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled && 1061 !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target)) 1062 return X2APIC_BROADCAST; 1063 1064 return dest_id; 1065 } 1066 1067 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 1068 int shorthand, unsigned int dest, int dest_mode) 1069 { 1070 struct kvm_lapic *target = vcpu->arch.apic; 1071 u32 mda = kvm_apic_mda(vcpu, dest, source, target); 1072 1073 ASSERT(target); 1074 switch (shorthand) { 1075 case APIC_DEST_NOSHORT: 1076 if (dest_mode == APIC_DEST_PHYSICAL) 1077 return kvm_apic_match_physical_addr(target, mda); 1078 else 1079 return kvm_apic_match_logical_addr(target, mda); 1080 case APIC_DEST_SELF: 1081 return target == source; 1082 case APIC_DEST_ALLINC: 1083 return true; 1084 case APIC_DEST_ALLBUT: 1085 return target != source; 1086 default: 1087 return false; 1088 } 1089 } 1090 EXPORT_SYMBOL_GPL(kvm_apic_match_dest); 1091 1092 int kvm_vector_to_index(u32 vector, u32 dest_vcpus, 1093 const unsigned long *bitmap, u32 bitmap_size) 1094 { 1095 u32 mod; 1096 int i, idx = -1; 1097 1098 mod = vector % dest_vcpus; 1099 1100 for (i = 0; i <= mod; i++) { 1101 idx = find_next_bit(bitmap, bitmap_size, idx + 1); 1102 BUG_ON(idx == bitmap_size); 1103 } 1104 1105 return idx; 1106 } 1107 1108 static void kvm_apic_disabled_lapic_found(struct kvm *kvm) 1109 { 1110 if (!kvm->arch.disabled_lapic_found) { 1111 kvm->arch.disabled_lapic_found = true; 1112 pr_info("Disabled LAPIC found during irq injection\n"); 1113 } 1114 } 1115 1116 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src, 1117 struct kvm_lapic_irq *irq, struct kvm_apic_map *map) 1118 { 1119 if (kvm->arch.x2apic_broadcast_quirk_disabled) { 1120 if ((irq->dest_id == APIC_BROADCAST && 1121 map->logical_mode != KVM_APIC_MODE_X2APIC)) 1122 return true; 1123 if (irq->dest_id == X2APIC_BROADCAST) 1124 return true; 1125 } else { 1126 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src); 1127 if (irq->dest_id == (x2apic_ipi ? 1128 X2APIC_BROADCAST : APIC_BROADCAST)) 1129 return true; 1130 } 1131 1132 return false; 1133 } 1134 1135 /* Return true if the interrupt can be handled by using *bitmap as index mask 1136 * for valid destinations in *dst array. 1137 * Return false if kvm_apic_map_get_dest_lapic did nothing useful. 1138 * Note: we may have zero kvm_lapic destinations when we return true, which 1139 * means that the interrupt should be dropped. In this case, *bitmap would be 1140 * zero and *dst undefined. 1141 */ 1142 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, 1143 struct kvm_lapic **src, struct kvm_lapic_irq *irq, 1144 struct kvm_apic_map *map, struct kvm_lapic ***dst, 1145 unsigned long *bitmap) 1146 { 1147 int i, lowest; 1148 1149 if (irq->shorthand == APIC_DEST_SELF && src) { 1150 *dst = src; 1151 *bitmap = 1; 1152 return true; 1153 } else if (irq->shorthand) 1154 return false; 1155 1156 if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map)) 1157 return false; 1158 1159 if (irq->dest_mode == APIC_DEST_PHYSICAL) { 1160 if (irq->dest_id > map->max_apic_id) { 1161 *bitmap = 0; 1162 } else { 1163 u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); 1164 *dst = &map->phys_map[dest_id]; 1165 *bitmap = 1; 1166 } 1167 return true; 1168 } 1169 1170 *bitmap = 0; 1171 if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, 1172 (u16 *)bitmap)) 1173 return false; 1174 1175 if (!kvm_lowest_prio_delivery(irq)) 1176 return true; 1177 1178 if (!kvm_vector_hashing_enabled()) { 1179 lowest = -1; 1180 for_each_set_bit(i, bitmap, 16) { 1181 if (!(*dst)[i]) 1182 continue; 1183 if (lowest < 0) 1184 lowest = i; 1185 else if (kvm_apic_compare_prio((*dst)[i]->vcpu, 1186 (*dst)[lowest]->vcpu) < 0) 1187 lowest = i; 1188 } 1189 } else { 1190 if (!*bitmap) 1191 return true; 1192 1193 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap), 1194 bitmap, 16); 1195 1196 if (!(*dst)[lowest]) { 1197 kvm_apic_disabled_lapic_found(kvm); 1198 *bitmap = 0; 1199 return true; 1200 } 1201 } 1202 1203 *bitmap = (lowest >= 0) ? 1 << lowest : 0; 1204 1205 return true; 1206 } 1207 1208 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 1209 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) 1210 { 1211 struct kvm_apic_map *map; 1212 unsigned long bitmap; 1213 struct kvm_lapic **dst = NULL; 1214 int i; 1215 bool ret; 1216 1217 *r = -1; 1218 1219 if (irq->shorthand == APIC_DEST_SELF) { 1220 if (KVM_BUG_ON(!src, kvm)) { 1221 *r = 0; 1222 return true; 1223 } 1224 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); 1225 return true; 1226 } 1227 1228 rcu_read_lock(); 1229 map = rcu_dereference(kvm->arch.apic_map); 1230 1231 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); 1232 if (ret) { 1233 *r = 0; 1234 for_each_set_bit(i, &bitmap, 16) { 1235 if (!dst[i]) 1236 continue; 1237 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 1238 } 1239 } 1240 1241 rcu_read_unlock(); 1242 return ret; 1243 } 1244 1245 /* 1246 * This routine tries to handle interrupts in posted mode, here is how 1247 * it deals with different cases: 1248 * - For single-destination interrupts, handle it in posted mode 1249 * - Else if vector hashing is enabled and it is a lowest-priority 1250 * interrupt, handle it in posted mode and use the following mechanism 1251 * to find the destination vCPU. 1252 * 1. For lowest-priority interrupts, store all the possible 1253 * destination vCPUs in an array. 1254 * 2. Use "guest vector % max number of destination vCPUs" to find 1255 * the right destination vCPU in the array for the lowest-priority 1256 * interrupt. 1257 * - Otherwise, use remapped mode to inject the interrupt. 1258 */ 1259 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, 1260 struct kvm_vcpu **dest_vcpu) 1261 { 1262 struct kvm_apic_map *map; 1263 unsigned long bitmap; 1264 struct kvm_lapic **dst = NULL; 1265 bool ret = false; 1266 1267 if (irq->shorthand) 1268 return false; 1269 1270 rcu_read_lock(); 1271 map = rcu_dereference(kvm->arch.apic_map); 1272 1273 if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && 1274 hweight16(bitmap) == 1) { 1275 unsigned long i = find_first_bit(&bitmap, 16); 1276 1277 if (dst[i]) { 1278 *dest_vcpu = dst[i]->vcpu; 1279 ret = true; 1280 } 1281 } 1282 1283 rcu_read_unlock(); 1284 return ret; 1285 } 1286 1287 /* 1288 * Add a pending IRQ into lapic. 1289 * Return 1 if successfully added and 0 if discarded. 1290 */ 1291 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 1292 int vector, int level, int trig_mode, 1293 struct dest_map *dest_map) 1294 { 1295 int result = 0; 1296 struct kvm_vcpu *vcpu = apic->vcpu; 1297 1298 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 1299 trig_mode, vector); 1300 switch (delivery_mode) { 1301 case APIC_DM_LOWEST: 1302 vcpu->arch.apic_arb_prio++; 1303 fallthrough; 1304 case APIC_DM_FIXED: 1305 if (unlikely(trig_mode && !level)) 1306 break; 1307 1308 /* FIXME add logic for vcpu on reset */ 1309 if (unlikely(!apic_enabled(apic))) 1310 break; 1311 1312 result = 1; 1313 1314 if (dest_map) { 1315 __set_bit(vcpu->vcpu_id, dest_map->map); 1316 dest_map->vectors[vcpu->vcpu_id] = vector; 1317 } 1318 1319 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { 1320 if (trig_mode) 1321 kvm_lapic_set_vector(vector, 1322 apic->regs + APIC_TMR); 1323 else 1324 kvm_lapic_clear_vector(vector, 1325 apic->regs + APIC_TMR); 1326 } 1327 1328 static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode, 1329 trig_mode, vector); 1330 break; 1331 1332 case APIC_DM_REMRD: 1333 result = 1; 1334 vcpu->arch.pv.pv_unhalted = 1; 1335 kvm_make_request(KVM_REQ_EVENT, vcpu); 1336 kvm_vcpu_kick(vcpu); 1337 break; 1338 1339 case APIC_DM_SMI: 1340 if (!kvm_inject_smi(vcpu)) { 1341 kvm_vcpu_kick(vcpu); 1342 result = 1; 1343 } 1344 break; 1345 1346 case APIC_DM_NMI: 1347 result = 1; 1348 kvm_inject_nmi(vcpu); 1349 kvm_vcpu_kick(vcpu); 1350 break; 1351 1352 case APIC_DM_INIT: 1353 if (!trig_mode || level) { 1354 result = 1; 1355 /* assumes that there are only KVM_APIC_INIT/SIPI */ 1356 apic->pending_events = (1UL << KVM_APIC_INIT); 1357 kvm_make_request(KVM_REQ_EVENT, vcpu); 1358 kvm_vcpu_kick(vcpu); 1359 } 1360 break; 1361 1362 case APIC_DM_STARTUP: 1363 result = 1; 1364 apic->sipi_vector = vector; 1365 /* make sure sipi_vector is visible for the receiver */ 1366 smp_wmb(); 1367 set_bit(KVM_APIC_SIPI, &apic->pending_events); 1368 kvm_make_request(KVM_REQ_EVENT, vcpu); 1369 kvm_vcpu_kick(vcpu); 1370 break; 1371 1372 case APIC_DM_EXTINT: 1373 /* 1374 * Should only be called by kvm_apic_local_deliver() with LVT0, 1375 * before NMI watchdog was enabled. Already handled by 1376 * kvm_apic_accept_pic_intr(). 1377 */ 1378 break; 1379 1380 default: 1381 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 1382 delivery_mode); 1383 break; 1384 } 1385 return result; 1386 } 1387 1388 /* 1389 * This routine identifies the destination vcpus mask meant to receive the 1390 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find 1391 * out the destination vcpus array and set the bitmap or it traverses to 1392 * each available vcpu to identify the same. 1393 */ 1394 void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, 1395 unsigned long *vcpu_bitmap) 1396 { 1397 struct kvm_lapic **dest_vcpu = NULL; 1398 struct kvm_lapic *src = NULL; 1399 struct kvm_apic_map *map; 1400 struct kvm_vcpu *vcpu; 1401 unsigned long bitmap, i; 1402 int vcpu_idx; 1403 bool ret; 1404 1405 rcu_read_lock(); 1406 map = rcu_dereference(kvm->arch.apic_map); 1407 1408 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu, 1409 &bitmap); 1410 if (ret) { 1411 for_each_set_bit(i, &bitmap, 16) { 1412 if (!dest_vcpu[i]) 1413 continue; 1414 vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx; 1415 __set_bit(vcpu_idx, vcpu_bitmap); 1416 } 1417 } else { 1418 kvm_for_each_vcpu(i, vcpu, kvm) { 1419 if (!kvm_apic_present(vcpu)) 1420 continue; 1421 if (!kvm_apic_match_dest(vcpu, NULL, 1422 irq->shorthand, 1423 irq->dest_id, 1424 irq->dest_mode)) 1425 continue; 1426 __set_bit(i, vcpu_bitmap); 1427 } 1428 } 1429 rcu_read_unlock(); 1430 } 1431 1432 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 1433 { 1434 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 1435 } 1436 1437 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) 1438 { 1439 return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); 1440 } 1441 1442 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 1443 { 1444 int trigger_mode; 1445 1446 /* Eoi the ioapic only if the ioapic doesn't own the vector. */ 1447 if (!kvm_ioapic_handles_vector(apic, vector)) 1448 return; 1449 1450 /* Request a KVM exit to inform the userspace IOAPIC. */ 1451 if (irqchip_split(apic->vcpu->kvm)) { 1452 apic->vcpu->arch.pending_ioapic_eoi = vector; 1453 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); 1454 return; 1455 } 1456 1457 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 1458 trigger_mode = IOAPIC_LEVEL_TRIG; 1459 else 1460 trigger_mode = IOAPIC_EDGE_TRIG; 1461 1462 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); 1463 } 1464 1465 static int apic_set_eoi(struct kvm_lapic *apic) 1466 { 1467 int vector = apic_find_highest_isr(apic); 1468 1469 trace_kvm_eoi(apic, vector); 1470 1471 /* 1472 * Not every write EOI will has corresponding ISR, 1473 * one example is when Kernel check timer on setup_IO_APIC 1474 */ 1475 if (vector == -1) 1476 return vector; 1477 1478 apic_clear_isr(vector, apic); 1479 apic_update_ppr(apic); 1480 1481 if (to_hv_vcpu(apic->vcpu) && 1482 test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap)) 1483 kvm_hv_synic_send_eoi(apic->vcpu, vector); 1484 1485 kvm_ioapic_send_eoi(apic, vector); 1486 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1487 return vector; 1488 } 1489 1490 /* 1491 * this interface assumes a trap-like exit, which has already finished 1492 * desired side effect including vISR and vPPR update. 1493 */ 1494 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) 1495 { 1496 struct kvm_lapic *apic = vcpu->arch.apic; 1497 1498 trace_kvm_eoi(apic, vector); 1499 1500 kvm_ioapic_send_eoi(apic, vector); 1501 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1502 } 1503 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 1504 1505 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) 1506 { 1507 struct kvm_lapic_irq irq; 1508 1509 /* KVM has no delay and should always clear the BUSY/PENDING flag. */ 1510 WARN_ON_ONCE(icr_low & APIC_ICR_BUSY); 1511 1512 irq.vector = icr_low & APIC_VECTOR_MASK; 1513 irq.delivery_mode = icr_low & APIC_MODE_MASK; 1514 irq.dest_mode = icr_low & APIC_DEST_MASK; 1515 irq.level = (icr_low & APIC_INT_ASSERT) != 0; 1516 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 1517 irq.shorthand = icr_low & APIC_SHORT_MASK; 1518 irq.msi_redir_hint = false; 1519 if (apic_x2apic_mode(apic)) 1520 irq.dest_id = icr_high; 1521 else 1522 irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high); 1523 1524 trace_kvm_apic_ipi(icr_low, irq.dest_id); 1525 1526 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 1527 } 1528 EXPORT_SYMBOL_GPL(kvm_apic_send_ipi); 1529 1530 static u32 apic_get_tmcct(struct kvm_lapic *apic) 1531 { 1532 ktime_t remaining, now; 1533 s64 ns; 1534 1535 ASSERT(apic != NULL); 1536 1537 /* if initial count is 0, current count should also be 0 */ 1538 if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 || 1539 apic->lapic_timer.period == 0) 1540 return 0; 1541 1542 now = ktime_get(); 1543 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1544 if (ktime_to_ns(remaining) < 0) 1545 remaining = 0; 1546 1547 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 1548 return div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->divide_count)); 1549 } 1550 1551 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 1552 { 1553 struct kvm_vcpu *vcpu = apic->vcpu; 1554 struct kvm_run *run = vcpu->run; 1555 1556 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 1557 run->tpr_access.rip = kvm_rip_read(vcpu); 1558 run->tpr_access.is_write = write; 1559 } 1560 1561 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 1562 { 1563 if (apic->vcpu->arch.tpr_access_reporting) 1564 __report_tpr_access(apic, write); 1565 } 1566 1567 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 1568 { 1569 u32 val = 0; 1570 1571 if (offset >= LAPIC_MMIO_LENGTH) 1572 return 0; 1573 1574 switch (offset) { 1575 case APIC_ARBPRI: 1576 break; 1577 1578 case APIC_TMCCT: /* Timer CCR */ 1579 if (apic_lvtt_tscdeadline(apic)) 1580 return 0; 1581 1582 val = apic_get_tmcct(apic); 1583 break; 1584 case APIC_PROCPRI: 1585 apic_update_ppr(apic); 1586 val = kvm_lapic_get_reg(apic, offset); 1587 break; 1588 case APIC_TASKPRI: 1589 report_tpr_access(apic, false); 1590 fallthrough; 1591 default: 1592 val = kvm_lapic_get_reg(apic, offset); 1593 break; 1594 } 1595 1596 return val; 1597 } 1598 1599 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 1600 { 1601 return container_of(dev, struct kvm_lapic, dev); 1602 } 1603 1604 #define APIC_REG_MASK(reg) (1ull << ((reg) >> 4)) 1605 #define APIC_REGS_MASK(first, count) \ 1606 (APIC_REG_MASK(first) * ((1ull << (count)) - 1)) 1607 1608 u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic) 1609 { 1610 /* Leave bits '0' for reserved and write-only registers. */ 1611 u64 valid_reg_mask = 1612 APIC_REG_MASK(APIC_ID) | 1613 APIC_REG_MASK(APIC_LVR) | 1614 APIC_REG_MASK(APIC_TASKPRI) | 1615 APIC_REG_MASK(APIC_PROCPRI) | 1616 APIC_REG_MASK(APIC_LDR) | 1617 APIC_REG_MASK(APIC_SPIV) | 1618 APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) | 1619 APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) | 1620 APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) | 1621 APIC_REG_MASK(APIC_ESR) | 1622 APIC_REG_MASK(APIC_ICR) | 1623 APIC_REG_MASK(APIC_LVTT) | 1624 APIC_REG_MASK(APIC_LVTTHMR) | 1625 APIC_REG_MASK(APIC_LVTPC) | 1626 APIC_REG_MASK(APIC_LVT0) | 1627 APIC_REG_MASK(APIC_LVT1) | 1628 APIC_REG_MASK(APIC_LVTERR) | 1629 APIC_REG_MASK(APIC_TMICT) | 1630 APIC_REG_MASK(APIC_TMCCT) | 1631 APIC_REG_MASK(APIC_TDCR); 1632 1633 if (kvm_lapic_lvt_supported(apic, LVT_CMCI)) 1634 valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI); 1635 1636 /* ARBPRI, DFR, and ICR2 are not valid in x2APIC mode. */ 1637 if (!apic_x2apic_mode(apic)) 1638 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) | 1639 APIC_REG_MASK(APIC_DFR) | 1640 APIC_REG_MASK(APIC_ICR2); 1641 1642 return valid_reg_mask; 1643 } 1644 EXPORT_SYMBOL_GPL(kvm_lapic_readable_reg_mask); 1645 1646 static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 1647 void *data) 1648 { 1649 unsigned char alignment = offset & 0xf; 1650 u32 result; 1651 1652 /* 1653 * WARN if KVM reads ICR in x2APIC mode, as it's an 8-byte register in 1654 * x2APIC and needs to be manually handled by the caller. 1655 */ 1656 WARN_ON_ONCE(apic_x2apic_mode(apic) && offset == APIC_ICR); 1657 1658 if (alignment + len > 4) 1659 return 1; 1660 1661 if (offset > 0x3f0 || 1662 !(kvm_lapic_readable_reg_mask(apic) & APIC_REG_MASK(offset))) 1663 return 1; 1664 1665 result = __apic_read(apic, offset & ~0xf); 1666 1667 trace_kvm_apic_read(offset, result); 1668 1669 switch (len) { 1670 case 1: 1671 case 2: 1672 case 4: 1673 memcpy(data, (char *)&result + alignment, len); 1674 break; 1675 default: 1676 printk(KERN_ERR "Local APIC read with len = %x, " 1677 "should be 1,2, or 4 instead\n", len); 1678 break; 1679 } 1680 return 0; 1681 } 1682 1683 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 1684 { 1685 return addr >= apic->base_address && 1686 addr < apic->base_address + LAPIC_MMIO_LENGTH; 1687 } 1688 1689 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 1690 gpa_t address, int len, void *data) 1691 { 1692 struct kvm_lapic *apic = to_lapic(this); 1693 u32 offset = address - apic->base_address; 1694 1695 if (!apic_mmio_in_range(apic, address)) 1696 return -EOPNOTSUPP; 1697 1698 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 1699 if (!kvm_check_has_quirk(vcpu->kvm, 1700 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 1701 return -EOPNOTSUPP; 1702 1703 memset(data, 0xff, len); 1704 return 0; 1705 } 1706 1707 kvm_lapic_reg_read(apic, offset, len, data); 1708 1709 return 0; 1710 } 1711 1712 static void update_divide_count(struct kvm_lapic *apic) 1713 { 1714 u32 tmp1, tmp2, tdcr; 1715 1716 tdcr = kvm_lapic_get_reg(apic, APIC_TDCR); 1717 tmp1 = tdcr & 0xf; 1718 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 1719 apic->divide_count = 0x1 << (tmp2 & 0x7); 1720 } 1721 1722 static void limit_periodic_timer_frequency(struct kvm_lapic *apic) 1723 { 1724 /* 1725 * Do not allow the guest to program periodic timers with small 1726 * interval, since the hrtimers are not throttled by the host 1727 * scheduler. 1728 */ 1729 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1730 s64 min_period = min_timer_period_us * 1000LL; 1731 1732 if (apic->lapic_timer.period < min_period) { 1733 pr_info_ratelimited( 1734 "vcpu %i: requested %lld ns " 1735 "lapic timer period limited to %lld ns\n", 1736 apic->vcpu->vcpu_id, 1737 apic->lapic_timer.period, min_period); 1738 apic->lapic_timer.period = min_period; 1739 } 1740 } 1741 } 1742 1743 static void cancel_hv_timer(struct kvm_lapic *apic); 1744 1745 static void cancel_apic_timer(struct kvm_lapic *apic) 1746 { 1747 hrtimer_cancel(&apic->lapic_timer.timer); 1748 preempt_disable(); 1749 if (apic->lapic_timer.hv_timer_in_use) 1750 cancel_hv_timer(apic); 1751 preempt_enable(); 1752 atomic_set(&apic->lapic_timer.pending, 0); 1753 } 1754 1755 static void apic_update_lvtt(struct kvm_lapic *apic) 1756 { 1757 u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) & 1758 apic->lapic_timer.timer_mode_mask; 1759 1760 if (apic->lapic_timer.timer_mode != timer_mode) { 1761 if (apic_lvtt_tscdeadline(apic) != (timer_mode == 1762 APIC_LVT_TIMER_TSCDEADLINE)) { 1763 cancel_apic_timer(apic); 1764 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 1765 apic->lapic_timer.period = 0; 1766 apic->lapic_timer.tscdeadline = 0; 1767 } 1768 apic->lapic_timer.timer_mode = timer_mode; 1769 limit_periodic_timer_frequency(apic); 1770 } 1771 } 1772 1773 /* 1774 * On APICv, this test will cause a busy wait 1775 * during a higher-priority task. 1776 */ 1777 1778 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) 1779 { 1780 struct kvm_lapic *apic = vcpu->arch.apic; 1781 u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT); 1782 1783 if (kvm_apic_hw_enabled(apic)) { 1784 int vec = reg & APIC_VECTOR_MASK; 1785 void *bitmap = apic->regs + APIC_ISR; 1786 1787 if (apic->apicv_active) 1788 bitmap = apic->regs + APIC_IRR; 1789 1790 if (apic_test_vector(vec, bitmap)) 1791 return true; 1792 } 1793 return false; 1794 } 1795 1796 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) 1797 { 1798 u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns; 1799 1800 /* 1801 * If the guest TSC is running at a different ratio than the host, then 1802 * convert the delay to nanoseconds to achieve an accurate delay. Note 1803 * that __delay() uses delay_tsc whenever the hardware has TSC, thus 1804 * always for VMX enabled hardware. 1805 */ 1806 if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) { 1807 __delay(min(guest_cycles, 1808 nsec_to_cycles(vcpu, timer_advance_ns))); 1809 } else { 1810 u64 delay_ns = guest_cycles * 1000000ULL; 1811 do_div(delay_ns, vcpu->arch.virtual_tsc_khz); 1812 ndelay(min_t(u32, delay_ns, timer_advance_ns)); 1813 } 1814 } 1815 1816 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, 1817 s64 advance_expire_delta) 1818 { 1819 struct kvm_lapic *apic = vcpu->arch.apic; 1820 u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; 1821 u64 ns; 1822 1823 /* Do not adjust for tiny fluctuations or large random spikes. */ 1824 if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX || 1825 abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN) 1826 return; 1827 1828 /* too early */ 1829 if (advance_expire_delta < 0) { 1830 ns = -advance_expire_delta * 1000000ULL; 1831 do_div(ns, vcpu->arch.virtual_tsc_khz); 1832 timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; 1833 } else { 1834 /* too late */ 1835 ns = advance_expire_delta * 1000000ULL; 1836 do_div(ns, vcpu->arch.virtual_tsc_khz); 1837 timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; 1838 } 1839 1840 if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX)) 1841 timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; 1842 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 1843 } 1844 1845 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1846 { 1847 struct kvm_lapic *apic = vcpu->arch.apic; 1848 u64 guest_tsc, tsc_deadline; 1849 1850 tsc_deadline = apic->lapic_timer.expired_tscdeadline; 1851 apic->lapic_timer.expired_tscdeadline = 0; 1852 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1853 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); 1854 1855 if (lapic_timer_advance_dynamic) { 1856 adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline); 1857 /* 1858 * If the timer fired early, reread the TSC to account for the 1859 * overhead of the above adjustment to avoid waiting longer 1860 * than is necessary. 1861 */ 1862 if (guest_tsc < tsc_deadline) 1863 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1864 } 1865 1866 if (guest_tsc < tsc_deadline) 1867 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); 1868 } 1869 1870 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1871 { 1872 if (lapic_in_kernel(vcpu) && 1873 vcpu->arch.apic->lapic_timer.expired_tscdeadline && 1874 vcpu->arch.apic->lapic_timer.timer_advance_ns && 1875 lapic_timer_int_injected(vcpu)) 1876 __kvm_wait_lapic_expire(vcpu); 1877 } 1878 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); 1879 1880 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) 1881 { 1882 struct kvm_timer *ktimer = &apic->lapic_timer; 1883 1884 kvm_apic_local_deliver(apic, APIC_LVTT); 1885 if (apic_lvtt_tscdeadline(apic)) { 1886 ktimer->tscdeadline = 0; 1887 } else if (apic_lvtt_oneshot(apic)) { 1888 ktimer->tscdeadline = 0; 1889 ktimer->target_expiration = 0; 1890 } 1891 } 1892 1893 static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) 1894 { 1895 struct kvm_vcpu *vcpu = apic->vcpu; 1896 struct kvm_timer *ktimer = &apic->lapic_timer; 1897 1898 if (atomic_read(&apic->lapic_timer.pending)) 1899 return; 1900 1901 if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) 1902 ktimer->expired_tscdeadline = ktimer->tscdeadline; 1903 1904 if (!from_timer_fn && apic->apicv_active) { 1905 WARN_ON(kvm_get_running_vcpu() != vcpu); 1906 kvm_apic_inject_pending_timer_irqs(apic); 1907 return; 1908 } 1909 1910 if (kvm_use_posted_timer_interrupt(apic->vcpu)) { 1911 /* 1912 * Ensure the guest's timer has truly expired before posting an 1913 * interrupt. Open code the relevant checks to avoid querying 1914 * lapic_timer_int_injected(), which will be false since the 1915 * interrupt isn't yet injected. Waiting until after injecting 1916 * is not an option since that won't help a posted interrupt. 1917 */ 1918 if (vcpu->arch.apic->lapic_timer.expired_tscdeadline && 1919 vcpu->arch.apic->lapic_timer.timer_advance_ns) 1920 __kvm_wait_lapic_expire(vcpu); 1921 kvm_apic_inject_pending_timer_irqs(apic); 1922 return; 1923 } 1924 1925 atomic_inc(&apic->lapic_timer.pending); 1926 kvm_make_request(KVM_REQ_UNBLOCK, vcpu); 1927 if (from_timer_fn) 1928 kvm_vcpu_kick(vcpu); 1929 } 1930 1931 static void start_sw_tscdeadline(struct kvm_lapic *apic) 1932 { 1933 struct kvm_timer *ktimer = &apic->lapic_timer; 1934 u64 guest_tsc, tscdeadline = ktimer->tscdeadline; 1935 u64 ns = 0; 1936 ktime_t expire; 1937 struct kvm_vcpu *vcpu = apic->vcpu; 1938 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1939 unsigned long flags; 1940 ktime_t now; 1941 1942 if (unlikely(!tscdeadline || !this_tsc_khz)) 1943 return; 1944 1945 local_irq_save(flags); 1946 1947 now = ktime_get(); 1948 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1949 1950 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1951 do_div(ns, this_tsc_khz); 1952 1953 if (likely(tscdeadline > guest_tsc) && 1954 likely(ns > apic->lapic_timer.timer_advance_ns)) { 1955 expire = ktime_add_ns(now, ns); 1956 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); 1957 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD); 1958 } else 1959 apic_timer_expired(apic, false); 1960 1961 local_irq_restore(flags); 1962 } 1963 1964 static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict) 1965 { 1966 return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count; 1967 } 1968 1969 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) 1970 { 1971 ktime_t now, remaining; 1972 u64 ns_remaining_old, ns_remaining_new; 1973 1974 apic->lapic_timer.period = 1975 tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); 1976 limit_periodic_timer_frequency(apic); 1977 1978 now = ktime_get(); 1979 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1980 if (ktime_to_ns(remaining) < 0) 1981 remaining = 0; 1982 1983 ns_remaining_old = ktime_to_ns(remaining); 1984 ns_remaining_new = mul_u64_u32_div(ns_remaining_old, 1985 apic->divide_count, old_divisor); 1986 1987 apic->lapic_timer.tscdeadline += 1988 nsec_to_cycles(apic->vcpu, ns_remaining_new) - 1989 nsec_to_cycles(apic->vcpu, ns_remaining_old); 1990 apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); 1991 } 1992 1993 static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg) 1994 { 1995 ktime_t now; 1996 u64 tscl = rdtsc(); 1997 s64 deadline; 1998 1999 now = ktime_get(); 2000 apic->lapic_timer.period = 2001 tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); 2002 2003 if (!apic->lapic_timer.period) { 2004 apic->lapic_timer.tscdeadline = 0; 2005 return false; 2006 } 2007 2008 limit_periodic_timer_frequency(apic); 2009 deadline = apic->lapic_timer.period; 2010 2011 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 2012 if (unlikely(count_reg != APIC_TMICT)) { 2013 deadline = tmict_to_ns(apic, 2014 kvm_lapic_get_reg(apic, count_reg)); 2015 if (unlikely(deadline <= 0)) { 2016 if (apic_lvtt_period(apic)) 2017 deadline = apic->lapic_timer.period; 2018 else 2019 deadline = 0; 2020 } 2021 else if (unlikely(deadline > apic->lapic_timer.period)) { 2022 pr_info_ratelimited( 2023 "vcpu %i: requested lapic timer restore with " 2024 "starting count register %#x=%u (%lld ns) > initial count (%lld ns). " 2025 "Using initial count to start timer.\n", 2026 apic->vcpu->vcpu_id, 2027 count_reg, 2028 kvm_lapic_get_reg(apic, count_reg), 2029 deadline, apic->lapic_timer.period); 2030 kvm_lapic_set_reg(apic, count_reg, 0); 2031 deadline = apic->lapic_timer.period; 2032 } 2033 } 2034 } 2035 2036 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 2037 nsec_to_cycles(apic->vcpu, deadline); 2038 apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline); 2039 2040 return true; 2041 } 2042 2043 static void advance_periodic_target_expiration(struct kvm_lapic *apic) 2044 { 2045 ktime_t now = ktime_get(); 2046 u64 tscl = rdtsc(); 2047 ktime_t delta; 2048 2049 /* 2050 * Synchronize both deadlines to the same time source or 2051 * differences in the periods (caused by differences in the 2052 * underlying clocks or numerical approximation errors) will 2053 * cause the two to drift apart over time as the errors 2054 * accumulate. 2055 */ 2056 apic->lapic_timer.target_expiration = 2057 ktime_add_ns(apic->lapic_timer.target_expiration, 2058 apic->lapic_timer.period); 2059 delta = ktime_sub(apic->lapic_timer.target_expiration, now); 2060 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 2061 nsec_to_cycles(apic->vcpu, delta); 2062 } 2063 2064 static void start_sw_period(struct kvm_lapic *apic) 2065 { 2066 if (!apic->lapic_timer.period) 2067 return; 2068 2069 if (ktime_after(ktime_get(), 2070 apic->lapic_timer.target_expiration)) { 2071 apic_timer_expired(apic, false); 2072 2073 if (apic_lvtt_oneshot(apic)) 2074 return; 2075 2076 advance_periodic_target_expiration(apic); 2077 } 2078 2079 hrtimer_start(&apic->lapic_timer.timer, 2080 apic->lapic_timer.target_expiration, 2081 HRTIMER_MODE_ABS_HARD); 2082 } 2083 2084 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) 2085 { 2086 if (!lapic_in_kernel(vcpu)) 2087 return false; 2088 2089 return vcpu->arch.apic->lapic_timer.hv_timer_in_use; 2090 } 2091 2092 static void cancel_hv_timer(struct kvm_lapic *apic) 2093 { 2094 WARN_ON(preemptible()); 2095 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 2096 static_call(kvm_x86_cancel_hv_timer)(apic->vcpu); 2097 apic->lapic_timer.hv_timer_in_use = false; 2098 } 2099 2100 static bool start_hv_timer(struct kvm_lapic *apic) 2101 { 2102 struct kvm_timer *ktimer = &apic->lapic_timer; 2103 struct kvm_vcpu *vcpu = apic->vcpu; 2104 bool expired; 2105 2106 WARN_ON(preemptible()); 2107 if (!kvm_can_use_hv_timer(vcpu)) 2108 return false; 2109 2110 if (!ktimer->tscdeadline) 2111 return false; 2112 2113 if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired)) 2114 return false; 2115 2116 ktimer->hv_timer_in_use = true; 2117 hrtimer_cancel(&ktimer->timer); 2118 2119 /* 2120 * To simplify handling the periodic timer, leave the hv timer running 2121 * even if the deadline timer has expired, i.e. rely on the resulting 2122 * VM-Exit to recompute the periodic timer's target expiration. 2123 */ 2124 if (!apic_lvtt_period(apic)) { 2125 /* 2126 * Cancel the hv timer if the sw timer fired while the hv timer 2127 * was being programmed, or if the hv timer itself expired. 2128 */ 2129 if (atomic_read(&ktimer->pending)) { 2130 cancel_hv_timer(apic); 2131 } else if (expired) { 2132 apic_timer_expired(apic, false); 2133 cancel_hv_timer(apic); 2134 } 2135 } 2136 2137 trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use); 2138 2139 return true; 2140 } 2141 2142 static void start_sw_timer(struct kvm_lapic *apic) 2143 { 2144 struct kvm_timer *ktimer = &apic->lapic_timer; 2145 2146 WARN_ON(preemptible()); 2147 if (apic->lapic_timer.hv_timer_in_use) 2148 cancel_hv_timer(apic); 2149 if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) 2150 return; 2151 2152 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 2153 start_sw_period(apic); 2154 else if (apic_lvtt_tscdeadline(apic)) 2155 start_sw_tscdeadline(apic); 2156 trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false); 2157 } 2158 2159 static void restart_apic_timer(struct kvm_lapic *apic) 2160 { 2161 preempt_disable(); 2162 2163 if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending)) 2164 goto out; 2165 2166 if (!start_hv_timer(apic)) 2167 start_sw_timer(apic); 2168 out: 2169 preempt_enable(); 2170 } 2171 2172 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) 2173 { 2174 struct kvm_lapic *apic = vcpu->arch.apic; 2175 2176 preempt_disable(); 2177 /* If the preempt notifier has already run, it also called apic_timer_expired */ 2178 if (!apic->lapic_timer.hv_timer_in_use) 2179 goto out; 2180 WARN_ON(kvm_vcpu_is_blocking(vcpu)); 2181 apic_timer_expired(apic, false); 2182 cancel_hv_timer(apic); 2183 2184 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 2185 advance_periodic_target_expiration(apic); 2186 restart_apic_timer(apic); 2187 } 2188 out: 2189 preempt_enable(); 2190 } 2191 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); 2192 2193 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) 2194 { 2195 restart_apic_timer(vcpu->arch.apic); 2196 } 2197 2198 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) 2199 { 2200 struct kvm_lapic *apic = vcpu->arch.apic; 2201 2202 preempt_disable(); 2203 /* Possibly the TSC deadline timer is not enabled yet */ 2204 if (apic->lapic_timer.hv_timer_in_use) 2205 start_sw_timer(apic); 2206 preempt_enable(); 2207 } 2208 2209 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) 2210 { 2211 struct kvm_lapic *apic = vcpu->arch.apic; 2212 2213 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 2214 restart_apic_timer(apic); 2215 } 2216 2217 static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg) 2218 { 2219 atomic_set(&apic->lapic_timer.pending, 0); 2220 2221 if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 2222 && !set_target_expiration(apic, count_reg)) 2223 return; 2224 2225 restart_apic_timer(apic); 2226 } 2227 2228 static void start_apic_timer(struct kvm_lapic *apic) 2229 { 2230 __start_apic_timer(apic, APIC_TMICT); 2231 } 2232 2233 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 2234 { 2235 bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); 2236 2237 if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { 2238 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; 2239 if (lvt0_in_nmi_mode) { 2240 atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 2241 } else 2242 atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 2243 } 2244 } 2245 2246 static int get_lvt_index(u32 reg) 2247 { 2248 if (reg == APIC_LVTCMCI) 2249 return LVT_CMCI; 2250 if (reg < APIC_LVTT || reg > APIC_LVTERR) 2251 return -1; 2252 return array_index_nospec( 2253 (reg - APIC_LVTT) >> 4, KVM_APIC_MAX_NR_LVT_ENTRIES); 2254 } 2255 2256 static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 2257 { 2258 int ret = 0; 2259 2260 trace_kvm_apic_write(reg, val); 2261 2262 switch (reg) { 2263 case APIC_ID: /* Local APIC ID */ 2264 if (!apic_x2apic_mode(apic)) { 2265 kvm_apic_set_xapic_id(apic, val >> 24); 2266 } else { 2267 ret = 1; 2268 } 2269 break; 2270 2271 case APIC_TASKPRI: 2272 report_tpr_access(apic, true); 2273 apic_set_tpr(apic, val & 0xff); 2274 break; 2275 2276 case APIC_EOI: 2277 apic_set_eoi(apic); 2278 break; 2279 2280 case APIC_LDR: 2281 if (!apic_x2apic_mode(apic)) 2282 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 2283 else 2284 ret = 1; 2285 break; 2286 2287 case APIC_DFR: 2288 if (!apic_x2apic_mode(apic)) 2289 kvm_apic_set_dfr(apic, val | 0x0FFFFFFF); 2290 else 2291 ret = 1; 2292 break; 2293 2294 case APIC_SPIV: { 2295 u32 mask = 0x3ff; 2296 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 2297 mask |= APIC_SPIV_DIRECTED_EOI; 2298 apic_set_spiv(apic, val & mask); 2299 if (!(val & APIC_SPIV_APIC_ENABLED)) { 2300 int i; 2301 2302 for (i = 0; i < apic->nr_lvt_entries; i++) { 2303 kvm_lapic_set_reg(apic, APIC_LVTx(i), 2304 kvm_lapic_get_reg(apic, APIC_LVTx(i)) | APIC_LVT_MASKED); 2305 } 2306 apic_update_lvtt(apic); 2307 atomic_set(&apic->lapic_timer.pending, 0); 2308 2309 } 2310 break; 2311 } 2312 case APIC_ICR: 2313 WARN_ON_ONCE(apic_x2apic_mode(apic)); 2314 2315 /* No delay here, so we always clear the pending bit */ 2316 val &= ~APIC_ICR_BUSY; 2317 kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2)); 2318 kvm_lapic_set_reg(apic, APIC_ICR, val); 2319 break; 2320 case APIC_ICR2: 2321 if (apic_x2apic_mode(apic)) 2322 ret = 1; 2323 else 2324 kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000); 2325 break; 2326 2327 case APIC_LVT0: 2328 apic_manage_nmi_watchdog(apic, val); 2329 fallthrough; 2330 case APIC_LVTTHMR: 2331 case APIC_LVTPC: 2332 case APIC_LVT1: 2333 case APIC_LVTERR: 2334 case APIC_LVTCMCI: { 2335 u32 index = get_lvt_index(reg); 2336 if (!kvm_lapic_lvt_supported(apic, index)) { 2337 ret = 1; 2338 break; 2339 } 2340 if (!kvm_apic_sw_enabled(apic)) 2341 val |= APIC_LVT_MASKED; 2342 val &= apic_lvt_mask[index]; 2343 kvm_lapic_set_reg(apic, reg, val); 2344 break; 2345 } 2346 2347 case APIC_LVTT: 2348 if (!kvm_apic_sw_enabled(apic)) 2349 val |= APIC_LVT_MASKED; 2350 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 2351 kvm_lapic_set_reg(apic, APIC_LVTT, val); 2352 apic_update_lvtt(apic); 2353 break; 2354 2355 case APIC_TMICT: 2356 if (apic_lvtt_tscdeadline(apic)) 2357 break; 2358 2359 cancel_apic_timer(apic); 2360 kvm_lapic_set_reg(apic, APIC_TMICT, val); 2361 start_apic_timer(apic); 2362 break; 2363 2364 case APIC_TDCR: { 2365 uint32_t old_divisor = apic->divide_count; 2366 2367 kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb); 2368 update_divide_count(apic); 2369 if (apic->divide_count != old_divisor && 2370 apic->lapic_timer.period) { 2371 hrtimer_cancel(&apic->lapic_timer.timer); 2372 update_target_expiration(apic, old_divisor); 2373 restart_apic_timer(apic); 2374 } 2375 break; 2376 } 2377 case APIC_ESR: 2378 if (apic_x2apic_mode(apic) && val != 0) 2379 ret = 1; 2380 break; 2381 2382 case APIC_SELF_IPI: 2383 /* 2384 * Self-IPI exists only when x2APIC is enabled. Bits 7:0 hold 2385 * the vector, everything else is reserved. 2386 */ 2387 if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK)) 2388 ret = 1; 2389 else 2390 kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0); 2391 break; 2392 default: 2393 ret = 1; 2394 break; 2395 } 2396 2397 /* 2398 * Recalculate APIC maps if necessary, e.g. if the software enable bit 2399 * was toggled, the APIC ID changed, etc... The maps are marked dirty 2400 * on relevant changes, i.e. this is a nop for most writes. 2401 */ 2402 kvm_recalculate_apic_map(apic->vcpu->kvm); 2403 2404 return ret; 2405 } 2406 2407 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 2408 gpa_t address, int len, const void *data) 2409 { 2410 struct kvm_lapic *apic = to_lapic(this); 2411 unsigned int offset = address - apic->base_address; 2412 u32 val; 2413 2414 if (!apic_mmio_in_range(apic, address)) 2415 return -EOPNOTSUPP; 2416 2417 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 2418 if (!kvm_check_has_quirk(vcpu->kvm, 2419 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 2420 return -EOPNOTSUPP; 2421 2422 return 0; 2423 } 2424 2425 /* 2426 * APIC register must be aligned on 128-bits boundary. 2427 * 32/64/128 bits registers must be accessed thru 32 bits. 2428 * Refer SDM 8.4.1 2429 */ 2430 if (len != 4 || (offset & 0xf)) 2431 return 0; 2432 2433 val = *(u32*)data; 2434 2435 kvm_lapic_reg_write(apic, offset & 0xff0, val); 2436 2437 return 0; 2438 } 2439 2440 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 2441 { 2442 kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 2443 } 2444 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 2445 2446 #define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13)) 2447 2448 int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) 2449 { 2450 if (data & X2APIC_ICR_RESERVED_BITS) 2451 return 1; 2452 2453 /* 2454 * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but 2455 * only AMD requires it to be zero, Intel essentially just ignores the 2456 * bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled, 2457 * the CPU performs the reserved bits checks, i.e. the underlying CPU 2458 * behavior will "win". Arbitrarily clear the BUSY bit, as there is no 2459 * sane way to provide consistent behavior with respect to hardware. 2460 */ 2461 data &= ~APIC_ICR_BUSY; 2462 2463 kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); 2464 kvm_lapic_set_reg64(apic, APIC_ICR, data); 2465 trace_kvm_apic_write(APIC_ICR, data); 2466 return 0; 2467 } 2468 2469 /* emulate APIC access in a trap manner */ 2470 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 2471 { 2472 struct kvm_lapic *apic = vcpu->arch.apic; 2473 2474 /* 2475 * ICR is a single 64-bit register when x2APIC is enabled, all others 2476 * registers hold 32-bit values. For legacy xAPIC, ICR writes need to 2477 * go down the common path to get the upper half from ICR2. 2478 * 2479 * Note, using the write helpers may incur an unnecessary write to the 2480 * virtual APIC state, but KVM needs to conditionally modify the value 2481 * in certain cases, e.g. to clear the ICR busy bit. The cost of extra 2482 * conditional branches is likely a wash relative to the cost of the 2483 * maybe-unecessary write, and both are in the noise anyways. 2484 */ 2485 if (apic_x2apic_mode(apic) && offset == APIC_ICR) 2486 WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR))); 2487 else 2488 kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); 2489 } 2490 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 2491 2492 void kvm_free_lapic(struct kvm_vcpu *vcpu) 2493 { 2494 struct kvm_lapic *apic = vcpu->arch.apic; 2495 2496 if (!vcpu->arch.apic) 2497 return; 2498 2499 hrtimer_cancel(&apic->lapic_timer.timer); 2500 2501 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 2502 static_branch_slow_dec_deferred(&apic_hw_disabled); 2503 2504 if (!apic->sw_enabled) 2505 static_branch_slow_dec_deferred(&apic_sw_disabled); 2506 2507 if (apic->regs) 2508 free_page((unsigned long)apic->regs); 2509 2510 kfree(apic); 2511 } 2512 2513 /* 2514 *---------------------------------------------------------------------- 2515 * LAPIC interface 2516 *---------------------------------------------------------------------- 2517 */ 2518 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 2519 { 2520 struct kvm_lapic *apic = vcpu->arch.apic; 2521 2522 if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic)) 2523 return 0; 2524 2525 return apic->lapic_timer.tscdeadline; 2526 } 2527 2528 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 2529 { 2530 struct kvm_lapic *apic = vcpu->arch.apic; 2531 2532 if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic)) 2533 return; 2534 2535 hrtimer_cancel(&apic->lapic_timer.timer); 2536 apic->lapic_timer.tscdeadline = data; 2537 start_apic_timer(apic); 2538 } 2539 2540 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 2541 { 2542 apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4); 2543 } 2544 2545 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 2546 { 2547 u64 tpr; 2548 2549 tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 2550 2551 return (tpr & 0xf0) >> 4; 2552 } 2553 2554 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 2555 { 2556 u64 old_value = vcpu->arch.apic_base; 2557 struct kvm_lapic *apic = vcpu->arch.apic; 2558 2559 vcpu->arch.apic_base = value; 2560 2561 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) 2562 kvm_update_cpuid_runtime(vcpu); 2563 2564 if (!apic) 2565 return; 2566 2567 /* update jump label if enable bit changes */ 2568 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { 2569 if (value & MSR_IA32_APICBASE_ENABLE) { 2570 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2571 static_branch_slow_dec_deferred(&apic_hw_disabled); 2572 /* Check if there are APF page ready requests pending */ 2573 kvm_make_request(KVM_REQ_APF_READY, vcpu); 2574 } else { 2575 static_branch_inc(&apic_hw_disabled.key); 2576 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 2577 } 2578 } 2579 2580 if ((old_value ^ value) & X2APIC_ENABLE) { 2581 if (value & X2APIC_ENABLE) 2582 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); 2583 else if (value & MSR_IA32_APICBASE_ENABLE) 2584 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2585 } 2586 2587 if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) { 2588 kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); 2589 static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu); 2590 } 2591 2592 apic->base_address = apic->vcpu->arch.apic_base & 2593 MSR_IA32_APICBASE_BASE; 2594 2595 if ((value & MSR_IA32_APICBASE_ENABLE) && 2596 apic->base_address != APIC_DEFAULT_PHYS_BASE) { 2597 kvm_set_apicv_inhibit(apic->vcpu->kvm, 2598 APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); 2599 } 2600 } 2601 2602 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) 2603 { 2604 struct kvm_lapic *apic = vcpu->arch.apic; 2605 2606 /* 2607 * When APICv is enabled, KVM must always search the IRR for a pending 2608 * IRQ, as other vCPUs and devices can set IRR bits even if the vCPU 2609 * isn't running. If APICv is disabled, KVM _should_ search the IRR 2610 * for a pending IRQ. But KVM currently doesn't ensure *all* hardware, 2611 * e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching 2612 * the IRR at this time could race with IRQ delivery from hardware that 2613 * still sees APICv as being enabled. 2614 * 2615 * FIXME: Ensure other vCPUs and devices observe the change in APICv 2616 * state prior to updating KVM's metadata caches, so that KVM 2617 * can safely search the IRR and set irr_pending accordingly. 2618 */ 2619 apic->irr_pending = true; 2620 2621 if (apic->apicv_active) 2622 apic->isr_count = 1; 2623 else 2624 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 2625 2626 apic->highest_isr_cache = -1; 2627 } 2628 2629 int kvm_alloc_apic_access_page(struct kvm *kvm) 2630 { 2631 struct page *page; 2632 void __user *hva; 2633 int ret = 0; 2634 2635 mutex_lock(&kvm->slots_lock); 2636 if (kvm->arch.apic_access_memslot_enabled || 2637 kvm->arch.apic_access_memslot_inhibited) 2638 goto out; 2639 2640 hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 2641 APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); 2642 if (IS_ERR(hva)) { 2643 ret = PTR_ERR(hva); 2644 goto out; 2645 } 2646 2647 page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); 2648 if (is_error_page(page)) { 2649 ret = -EFAULT; 2650 goto out; 2651 } 2652 2653 /* 2654 * Do not pin the page in memory, so that memory hot-unplug 2655 * is able to migrate it. 2656 */ 2657 put_page(page); 2658 kvm->arch.apic_access_memslot_enabled = true; 2659 out: 2660 mutex_unlock(&kvm->slots_lock); 2661 return ret; 2662 } 2663 EXPORT_SYMBOL_GPL(kvm_alloc_apic_access_page); 2664 2665 void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu) 2666 { 2667 struct kvm *kvm = vcpu->kvm; 2668 2669 if (!kvm->arch.apic_access_memslot_enabled) 2670 return; 2671 2672 kvm_vcpu_srcu_read_unlock(vcpu); 2673 2674 mutex_lock(&kvm->slots_lock); 2675 2676 if (kvm->arch.apic_access_memslot_enabled) { 2677 __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0); 2678 /* 2679 * Clear "enabled" after the memslot is deleted so that a 2680 * different vCPU doesn't get a false negative when checking 2681 * the flag out of slots_lock. No additional memory barrier is 2682 * needed as modifying memslots requires waiting other vCPUs to 2683 * drop SRCU (see above), and false positives are ok as the 2684 * flag is rechecked after acquiring slots_lock. 2685 */ 2686 kvm->arch.apic_access_memslot_enabled = false; 2687 2688 /* 2689 * Mark the memslot as inhibited to prevent reallocating the 2690 * memslot during vCPU creation, e.g. if a vCPU is hotplugged. 2691 */ 2692 kvm->arch.apic_access_memslot_inhibited = true; 2693 } 2694 2695 mutex_unlock(&kvm->slots_lock); 2696 2697 kvm_vcpu_srcu_read_lock(vcpu); 2698 } 2699 2700 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) 2701 { 2702 struct kvm_lapic *apic = vcpu->arch.apic; 2703 u64 msr_val; 2704 int i; 2705 2706 static_call_cond(kvm_x86_apicv_pre_state_restore)(vcpu); 2707 2708 if (!init_event) { 2709 msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; 2710 if (kvm_vcpu_is_reset_bsp(vcpu)) 2711 msr_val |= MSR_IA32_APICBASE_BSP; 2712 kvm_lapic_set_base(vcpu, msr_val); 2713 } 2714 2715 if (!apic) 2716 return; 2717 2718 /* Stop the timer in case it's a reset to an active apic */ 2719 hrtimer_cancel(&apic->lapic_timer.timer); 2720 2721 /* The xAPIC ID is set at RESET even if the APIC was already enabled. */ 2722 if (!init_event) 2723 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2724 kvm_apic_set_version(apic->vcpu); 2725 2726 for (i = 0; i < apic->nr_lvt_entries; i++) 2727 kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED); 2728 apic_update_lvtt(apic); 2729 if (kvm_vcpu_is_reset_bsp(vcpu) && 2730 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) 2731 kvm_lapic_set_reg(apic, APIC_LVT0, 2732 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 2733 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2734 2735 kvm_apic_set_dfr(apic, 0xffffffffU); 2736 apic_set_spiv(apic, 0xff); 2737 kvm_lapic_set_reg(apic, APIC_TASKPRI, 0); 2738 if (!apic_x2apic_mode(apic)) 2739 kvm_apic_set_ldr(apic, 0); 2740 kvm_lapic_set_reg(apic, APIC_ESR, 0); 2741 if (!apic_x2apic_mode(apic)) { 2742 kvm_lapic_set_reg(apic, APIC_ICR, 0); 2743 kvm_lapic_set_reg(apic, APIC_ICR2, 0); 2744 } else { 2745 kvm_lapic_set_reg64(apic, APIC_ICR, 0); 2746 } 2747 kvm_lapic_set_reg(apic, APIC_TDCR, 0); 2748 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 2749 for (i = 0; i < 8; i++) { 2750 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 2751 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 2752 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 2753 } 2754 kvm_apic_update_apicv(vcpu); 2755 update_divide_count(apic); 2756 atomic_set(&apic->lapic_timer.pending, 0); 2757 2758 vcpu->arch.pv_eoi.msr_val = 0; 2759 apic_update_ppr(apic); 2760 if (apic->apicv_active) { 2761 static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu); 2762 static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1); 2763 static_call_cond(kvm_x86_hwapic_isr_update)(-1); 2764 } 2765 2766 vcpu->arch.apic_arb_prio = 0; 2767 vcpu->arch.apic_attention = 0; 2768 2769 kvm_recalculate_apic_map(vcpu->kvm); 2770 } 2771 2772 /* 2773 *---------------------------------------------------------------------- 2774 * timer interface 2775 *---------------------------------------------------------------------- 2776 */ 2777 2778 static bool lapic_is_periodic(struct kvm_lapic *apic) 2779 { 2780 return apic_lvtt_period(apic); 2781 } 2782 2783 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 2784 { 2785 struct kvm_lapic *apic = vcpu->arch.apic; 2786 2787 if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT)) 2788 return atomic_read(&apic->lapic_timer.pending); 2789 2790 return 0; 2791 } 2792 2793 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 2794 { 2795 u32 reg = kvm_lapic_get_reg(apic, lvt_type); 2796 int vector, mode, trig_mode; 2797 int r; 2798 2799 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 2800 vector = reg & APIC_VECTOR_MASK; 2801 mode = reg & APIC_MODE_MASK; 2802 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 2803 2804 r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); 2805 if (r && lvt_type == APIC_LVTPC && 2806 guest_cpuid_is_intel_compatible(apic->vcpu)) 2807 kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); 2808 return r; 2809 } 2810 return 0; 2811 } 2812 2813 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 2814 { 2815 struct kvm_lapic *apic = vcpu->arch.apic; 2816 2817 if (apic) 2818 kvm_apic_local_deliver(apic, APIC_LVT0); 2819 } 2820 2821 static const struct kvm_io_device_ops apic_mmio_ops = { 2822 .read = apic_mmio_read, 2823 .write = apic_mmio_write, 2824 }; 2825 2826 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 2827 { 2828 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 2829 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 2830 2831 apic_timer_expired(apic, true); 2832 2833 if (lapic_is_periodic(apic)) { 2834 advance_periodic_target_expiration(apic); 2835 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 2836 return HRTIMER_RESTART; 2837 } else 2838 return HRTIMER_NORESTART; 2839 } 2840 2841 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) 2842 { 2843 struct kvm_lapic *apic; 2844 2845 ASSERT(vcpu != NULL); 2846 2847 apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); 2848 if (!apic) 2849 goto nomem; 2850 2851 vcpu->arch.apic = apic; 2852 2853 apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); 2854 if (!apic->regs) { 2855 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 2856 vcpu->vcpu_id); 2857 goto nomem_free_apic; 2858 } 2859 apic->vcpu = vcpu; 2860 2861 apic->nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu); 2862 2863 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 2864 HRTIMER_MODE_ABS_HARD); 2865 apic->lapic_timer.timer.function = apic_timer_fn; 2866 if (timer_advance_ns == -1) { 2867 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; 2868 lapic_timer_advance_dynamic = true; 2869 } else { 2870 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 2871 lapic_timer_advance_dynamic = false; 2872 } 2873 2874 /* 2875 * Stuff the APIC ENABLE bit in lieu of temporarily incrementing 2876 * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset(). 2877 */ 2878 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 2879 static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 2880 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 2881 2882 return 0; 2883 nomem_free_apic: 2884 kfree(apic); 2885 vcpu->arch.apic = NULL; 2886 nomem: 2887 return -ENOMEM; 2888 } 2889 2890 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 2891 { 2892 struct kvm_lapic *apic = vcpu->arch.apic; 2893 u32 ppr; 2894 2895 if (!kvm_apic_present(vcpu)) 2896 return -1; 2897 2898 __apic_update_ppr(apic, &ppr); 2899 return apic_has_interrupt_for_ppr(apic, ppr); 2900 } 2901 EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt); 2902 2903 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 2904 { 2905 u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0); 2906 2907 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 2908 return 1; 2909 if ((lvt0 & APIC_LVT_MASKED) == 0 && 2910 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 2911 return 1; 2912 return 0; 2913 } 2914 2915 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 2916 { 2917 struct kvm_lapic *apic = vcpu->arch.apic; 2918 2919 if (atomic_read(&apic->lapic_timer.pending) > 0) { 2920 kvm_apic_inject_pending_timer_irqs(apic); 2921 atomic_set(&apic->lapic_timer.pending, 0); 2922 } 2923 } 2924 2925 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 2926 { 2927 int vector = kvm_apic_has_interrupt(vcpu); 2928 struct kvm_lapic *apic = vcpu->arch.apic; 2929 u32 ppr; 2930 2931 if (vector == -1) 2932 return -1; 2933 2934 /* 2935 * We get here even with APIC virtualization enabled, if doing 2936 * nested virtualization and L1 runs with the "acknowledge interrupt 2937 * on exit" mode. Then we cannot inject the interrupt via RVI, 2938 * because the process would deliver it through the IDT. 2939 */ 2940 2941 apic_clear_irr(vector, apic); 2942 if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) { 2943 /* 2944 * For auto-EOI interrupts, there might be another pending 2945 * interrupt above PPR, so check whether to raise another 2946 * KVM_REQ_EVENT. 2947 */ 2948 apic_update_ppr(apic); 2949 } else { 2950 /* 2951 * For normal interrupts, PPR has been raised and there cannot 2952 * be a higher-priority pending interrupt---except if there was 2953 * a concurrent interrupt injection, but that would have 2954 * triggered KVM_REQ_EVENT already. 2955 */ 2956 apic_set_isr(vector, apic); 2957 __apic_update_ppr(apic, &ppr); 2958 } 2959 2960 return vector; 2961 } 2962 2963 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, 2964 struct kvm_lapic_state *s, bool set) 2965 { 2966 if (apic_x2apic_mode(vcpu->arch.apic)) { 2967 u32 *id = (u32 *)(s->regs + APIC_ID); 2968 u32 *ldr = (u32 *)(s->regs + APIC_LDR); 2969 u64 icr; 2970 2971 if (vcpu->kvm->arch.x2apic_format) { 2972 if (*id != vcpu->vcpu_id) 2973 return -EINVAL; 2974 } else { 2975 if (set) 2976 *id >>= 24; 2977 else 2978 *id <<= 24; 2979 } 2980 2981 /* 2982 * In x2APIC mode, the LDR is fixed and based on the id. And 2983 * ICR is internally a single 64-bit register, but needs to be 2984 * split to ICR+ICR2 in userspace for backwards compatibility. 2985 */ 2986 if (set) { 2987 *ldr = kvm_apic_calc_x2apic_ldr(*id); 2988 2989 icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | 2990 (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; 2991 __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); 2992 } else { 2993 icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); 2994 __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); 2995 } 2996 } 2997 2998 return 0; 2999 } 3000 3001 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 3002 { 3003 memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); 3004 3005 /* 3006 * Get calculated timer current count for remaining timer period (if 3007 * any) and store it in the returned register set. 3008 */ 3009 __kvm_lapic_set_reg(s->regs, APIC_TMCCT, 3010 __apic_read(vcpu->arch.apic, APIC_TMCCT)); 3011 3012 return kvm_apic_state_fixup(vcpu, s, false); 3013 } 3014 3015 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 3016 { 3017 struct kvm_lapic *apic = vcpu->arch.apic; 3018 int r; 3019 3020 static_call_cond(kvm_x86_apicv_pre_state_restore)(vcpu); 3021 3022 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 3023 /* set SPIV separately to get count of SW disabled APICs right */ 3024 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 3025 3026 r = kvm_apic_state_fixup(vcpu, s, true); 3027 if (r) { 3028 kvm_recalculate_apic_map(vcpu->kvm); 3029 return r; 3030 } 3031 memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); 3032 3033 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 3034 kvm_recalculate_apic_map(vcpu->kvm); 3035 kvm_apic_set_version(vcpu); 3036 3037 apic_update_ppr(apic); 3038 cancel_apic_timer(apic); 3039 apic->lapic_timer.expired_tscdeadline = 0; 3040 apic_update_lvtt(apic); 3041 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 3042 update_divide_count(apic); 3043 __start_apic_timer(apic, APIC_TMCCT); 3044 kvm_lapic_set_reg(apic, APIC_TMCCT, 0); 3045 kvm_apic_update_apicv(vcpu); 3046 if (apic->apicv_active) { 3047 static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu); 3048 static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic)); 3049 static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic)); 3050 } 3051 kvm_make_request(KVM_REQ_EVENT, vcpu); 3052 if (ioapic_in_kernel(vcpu->kvm)) 3053 kvm_rtc_eoi_tracking_restore_one(vcpu); 3054 3055 vcpu->arch.apic_arb_prio = 0; 3056 3057 return 0; 3058 } 3059 3060 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 3061 { 3062 struct hrtimer *timer; 3063 3064 if (!lapic_in_kernel(vcpu) || 3065 kvm_can_post_timer_interrupt(vcpu)) 3066 return; 3067 3068 timer = &vcpu->arch.apic->lapic_timer.timer; 3069 if (hrtimer_cancel(timer)) 3070 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD); 3071 } 3072 3073 /* 3074 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 3075 * 3076 * Detect whether guest triggered PV EOI since the 3077 * last entry. If yes, set EOI on guests's behalf. 3078 * Clear PV EOI in guest memory in any case. 3079 */ 3080 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 3081 struct kvm_lapic *apic) 3082 { 3083 int vector; 3084 /* 3085 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 3086 * and KVM_PV_EOI_ENABLED in guest memory as follows: 3087 * 3088 * KVM_APIC_PV_EOI_PENDING is unset: 3089 * -> host disabled PV EOI. 3090 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 3091 * -> host enabled PV EOI, guest did not execute EOI yet. 3092 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 3093 * -> host enabled PV EOI, guest executed EOI. 3094 */ 3095 BUG_ON(!pv_eoi_enabled(vcpu)); 3096 3097 if (pv_eoi_test_and_clr_pending(vcpu)) 3098 return; 3099 vector = apic_set_eoi(apic); 3100 trace_kvm_pv_eoi(apic, vector); 3101 } 3102 3103 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 3104 { 3105 u32 data; 3106 3107 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 3108 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 3109 3110 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 3111 return; 3112 3113 if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 3114 sizeof(u32))) 3115 return; 3116 3117 apic_set_tpr(vcpu->arch.apic, data & 0xff); 3118 } 3119 3120 /* 3121 * apic_sync_pv_eoi_to_guest - called before vmentry 3122 * 3123 * Detect whether it's safe to enable PV EOI and 3124 * if yes do so. 3125 */ 3126 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 3127 struct kvm_lapic *apic) 3128 { 3129 if (!pv_eoi_enabled(vcpu) || 3130 /* IRR set or many bits in ISR: could be nested. */ 3131 apic->irr_pending || 3132 /* Cache not set: could be safe but we don't bother. */ 3133 apic->highest_isr_cache == -1 || 3134 /* Need EOI to update ioapic. */ 3135 kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) { 3136 /* 3137 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 3138 * so we need not do anything here. 3139 */ 3140 return; 3141 } 3142 3143 pv_eoi_set_pending(apic->vcpu); 3144 } 3145 3146 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 3147 { 3148 u32 data, tpr; 3149 int max_irr, max_isr; 3150 struct kvm_lapic *apic = vcpu->arch.apic; 3151 3152 apic_sync_pv_eoi_to_guest(vcpu, apic); 3153 3154 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 3155 return; 3156 3157 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff; 3158 max_irr = apic_find_highest_irr(apic); 3159 if (max_irr < 0) 3160 max_irr = 0; 3161 max_isr = apic_find_highest_isr(apic); 3162 if (max_isr < 0) 3163 max_isr = 0; 3164 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 3165 3166 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 3167 sizeof(u32)); 3168 } 3169 3170 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 3171 { 3172 if (vapic_addr) { 3173 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, 3174 &vcpu->arch.apic->vapic_cache, 3175 vapic_addr, sizeof(u32))) 3176 return -EINVAL; 3177 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 3178 } else { 3179 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 3180 } 3181 3182 vcpu->arch.apic->vapic_addr = vapic_addr; 3183 return 0; 3184 } 3185 3186 static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data) 3187 { 3188 u32 low; 3189 3190 if (reg == APIC_ICR) { 3191 *data = kvm_lapic_get_reg64(apic, APIC_ICR); 3192 return 0; 3193 } 3194 3195 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 3196 return 1; 3197 3198 *data = low; 3199 3200 return 0; 3201 } 3202 3203 static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data) 3204 { 3205 /* 3206 * ICR is a 64-bit register in x2APIC mode (and Hyper-V PV vAPIC) and 3207 * can be written as such, all other registers remain accessible only 3208 * through 32-bit reads/writes. 3209 */ 3210 if (reg == APIC_ICR) 3211 return kvm_x2apic_icr_write(apic, data); 3212 3213 /* Bits 63:32 are reserved in all other registers. */ 3214 if (data >> 32) 3215 return 1; 3216 3217 return kvm_lapic_reg_write(apic, reg, (u32)data); 3218 } 3219 3220 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 3221 { 3222 struct kvm_lapic *apic = vcpu->arch.apic; 3223 u32 reg = (msr - APIC_BASE_MSR) << 4; 3224 3225 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 3226 return 1; 3227 3228 return kvm_lapic_msr_write(apic, reg, data); 3229 } 3230 3231 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 3232 { 3233 struct kvm_lapic *apic = vcpu->arch.apic; 3234 u32 reg = (msr - APIC_BASE_MSR) << 4; 3235 3236 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 3237 return 1; 3238 3239 return kvm_lapic_msr_read(apic, reg, data); 3240 } 3241 3242 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 3243 { 3244 if (!lapic_in_kernel(vcpu)) 3245 return 1; 3246 3247 return kvm_lapic_msr_write(vcpu->arch.apic, reg, data); 3248 } 3249 3250 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 3251 { 3252 if (!lapic_in_kernel(vcpu)) 3253 return 1; 3254 3255 return kvm_lapic_msr_read(vcpu->arch.apic, reg, data); 3256 } 3257 3258 int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) 3259 { 3260 u64 addr = data & ~KVM_MSR_ENABLED; 3261 struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data; 3262 unsigned long new_len; 3263 int ret; 3264 3265 if (!IS_ALIGNED(addr, 4)) 3266 return 1; 3267 3268 if (data & KVM_MSR_ENABLED) { 3269 if (addr == ghc->gpa && len <= ghc->len) 3270 new_len = ghc->len; 3271 else 3272 new_len = len; 3273 3274 ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); 3275 if (ret) 3276 return ret; 3277 } 3278 3279 vcpu->arch.pv_eoi.msr_val = data; 3280 3281 return 0; 3282 } 3283 3284 int kvm_apic_accept_events(struct kvm_vcpu *vcpu) 3285 { 3286 struct kvm_lapic *apic = vcpu->arch.apic; 3287 u8 sipi_vector; 3288 int r; 3289 3290 if (!kvm_apic_has_pending_init_or_sipi(vcpu)) 3291 return 0; 3292 3293 if (is_guest_mode(vcpu)) { 3294 r = kvm_check_nested_events(vcpu); 3295 if (r < 0) 3296 return r == -EBUSY ? 0 : r; 3297 /* 3298 * Continue processing INIT/SIPI even if a nested VM-Exit 3299 * occurred, e.g. pending SIPIs should be dropped if INIT+SIPI 3300 * are blocked as a result of transitioning to VMX root mode. 3301 */ 3302 } 3303 3304 /* 3305 * INITs are blocked while CPU is in specific states (SMM, VMX root 3306 * mode, SVM with GIF=0), while SIPIs are dropped if the CPU isn't in 3307 * wait-for-SIPI (WFS). 3308 */ 3309 if (!kvm_apic_init_sipi_allowed(vcpu)) { 3310 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); 3311 clear_bit(KVM_APIC_SIPI, &apic->pending_events); 3312 return 0; 3313 } 3314 3315 if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { 3316 kvm_vcpu_reset(vcpu, true); 3317 if (kvm_vcpu_is_bsp(apic->vcpu)) 3318 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 3319 else 3320 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 3321 } 3322 if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) { 3323 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 3324 /* evaluate pending_events before reading the vector */ 3325 smp_rmb(); 3326 sipi_vector = apic->sipi_vector; 3327 static_call(kvm_x86_vcpu_deliver_sipi_vector)(vcpu, sipi_vector); 3328 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 3329 } 3330 } 3331 return 0; 3332 } 3333 3334 void kvm_lapic_exit(void) 3335 { 3336 static_key_deferred_flush(&apic_hw_disabled); 3337 WARN_ON(static_branch_unlikely(&apic_hw_disabled.key)); 3338 static_key_deferred_flush(&apic_sw_disabled); 3339 WARN_ON(static_branch_unlikely(&apic_sw_disabled.key)); 3340 } 3341