1 // SPDX-License-Identifier: GPL-2.0-only 2 3 /* 4 * Local APIC virtualization 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2007 Novell 8 * Copyright (C) 2007 Intel 9 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 10 * 11 * Authors: 12 * Dor Laor <dor.laor@qumranet.com> 13 * Gregory Haskins <ghaskins@novell.com> 14 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 15 * 16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 17 */ 18 19 #include <linux/kvm_host.h> 20 #include <linux/kvm.h> 21 #include <linux/mm.h> 22 #include <linux/highmem.h> 23 #include <linux/smp.h> 24 #include <linux/hrtimer.h> 25 #include <linux/io.h> 26 #include <linux/export.h> 27 #include <linux/math64.h> 28 #include <linux/slab.h> 29 #include <asm/processor.h> 30 #include <asm/msr.h> 31 #include <asm/page.h> 32 #include <asm/current.h> 33 #include <asm/apicdef.h> 34 #include <asm/delay.h> 35 #include <linux/atomic.h> 36 #include <linux/jump_label.h> 37 #include "kvm_cache_regs.h" 38 #include "irq.h" 39 #include "ioapic.h" 40 #include "trace.h" 41 #include "x86.h" 42 #include "cpuid.h" 43 #include "hyperv.h" 44 45 #ifndef CONFIG_X86_64 46 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 47 #else 48 #define mod_64(x, y) ((x) % (y)) 49 #endif 50 51 #define PRId64 "d" 52 #define PRIx64 "llx" 53 #define PRIu64 "u" 54 #define PRIo64 "o" 55 56 /* 14 is the version for Xeon and Pentium 8.4.8*/ 57 #define APIC_VERSION 0x14UL 58 #define LAPIC_MMIO_LENGTH (1 << 12) 59 /* followed define is not in apicdef.h */ 60 #define MAX_APIC_VECTOR 256 61 #define APIC_VECTORS_PER_REG 32 62 63 static bool lapic_timer_advance_dynamic __read_mostly; 64 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */ 65 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */ 66 #define LAPIC_TIMER_ADVANCE_NS_INIT 1000 67 #define LAPIC_TIMER_ADVANCE_NS_MAX 5000 68 /* step-by-step approximation to mitigate fluctuation */ 69 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 70 static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data); 71 72 static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val) 73 { 74 *((u32 *) (regs + reg_off)) = val; 75 } 76 77 static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 78 { 79 __kvm_lapic_set_reg(apic->regs, reg_off, val); 80 } 81 82 static __always_inline u64 __kvm_lapic_get_reg64(char *regs, int reg) 83 { 84 BUILD_BUG_ON(reg != APIC_ICR); 85 return *((u64 *) (regs + reg)); 86 } 87 88 static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg) 89 { 90 return __kvm_lapic_get_reg64(apic->regs, reg); 91 } 92 93 static __always_inline void __kvm_lapic_set_reg64(char *regs, int reg, u64 val) 94 { 95 BUILD_BUG_ON(reg != APIC_ICR); 96 *((u64 *) (regs + reg)) = val; 97 } 98 99 static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic, 100 int reg, u64 val) 101 { 102 __kvm_lapic_set_reg64(apic->regs, reg, val); 103 } 104 105 static inline int apic_test_vector(int vec, void *bitmap) 106 { 107 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 108 } 109 110 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) 111 { 112 struct kvm_lapic *apic = vcpu->arch.apic; 113 114 return apic_test_vector(vector, apic->regs + APIC_ISR) || 115 apic_test_vector(vector, apic->regs + APIC_IRR); 116 } 117 118 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 119 { 120 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 121 } 122 123 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 124 { 125 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 126 } 127 128 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ); 129 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ); 130 131 static inline int apic_enabled(struct kvm_lapic *apic) 132 { 133 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 134 } 135 136 #define LVT_MASK \ 137 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 138 139 #define LINT_MASK \ 140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 142 143 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) 144 { 145 return apic->vcpu->vcpu_id; 146 } 147 148 static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) 149 { 150 return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) && 151 (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm)); 152 } 153 154 bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) 155 { 156 return kvm_x86_ops.set_hv_timer 157 && !(kvm_mwait_in_guest(vcpu->kvm) || 158 kvm_can_post_timer_interrupt(vcpu)); 159 } 160 EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer); 161 162 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) 163 { 164 return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE; 165 } 166 167 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, 168 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { 169 switch (map->mode) { 170 case KVM_APIC_MODE_X2APIC: { 171 u32 offset = (dest_id >> 16) * 16; 172 u32 max_apic_id = map->max_apic_id; 173 174 if (offset <= max_apic_id) { 175 u8 cluster_size = min(max_apic_id - offset + 1, 16U); 176 177 offset = array_index_nospec(offset, map->max_apic_id + 1); 178 *cluster = &map->phys_map[offset]; 179 *mask = dest_id & (0xffff >> (16 - cluster_size)); 180 } else { 181 *mask = 0; 182 } 183 184 return true; 185 } 186 case KVM_APIC_MODE_XAPIC_FLAT: 187 *cluster = map->xapic_flat_map; 188 *mask = dest_id & 0xff; 189 return true; 190 case KVM_APIC_MODE_XAPIC_CLUSTER: 191 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf]; 192 *mask = dest_id & 0xf; 193 return true; 194 default: 195 /* Not optimized. */ 196 return false; 197 } 198 } 199 200 static void kvm_apic_map_free(struct rcu_head *rcu) 201 { 202 struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu); 203 204 kvfree(map); 205 } 206 207 /* 208 * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock. 209 * 210 * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with 211 * apic_map_lock_held. 212 */ 213 enum { 214 CLEAN, 215 UPDATE_IN_PROGRESS, 216 DIRTY 217 }; 218 219 void kvm_recalculate_apic_map(struct kvm *kvm) 220 { 221 struct kvm_apic_map *new, *old = NULL; 222 struct kvm_vcpu *vcpu; 223 unsigned long i; 224 u32 max_id = 255; /* enough space for any xAPIC ID */ 225 226 /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ 227 if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) 228 return; 229 230 WARN_ONCE(!irqchip_in_kernel(kvm), 231 "Dirty APIC map without an in-kernel local APIC"); 232 233 mutex_lock(&kvm->arch.apic_map_lock); 234 /* 235 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map 236 * (if clean) or the APIC registers (if dirty). 237 */ 238 if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty, 239 DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { 240 /* Someone else has updated the map. */ 241 mutex_unlock(&kvm->arch.apic_map_lock); 242 return; 243 } 244 245 kvm_for_each_vcpu(i, vcpu, kvm) 246 if (kvm_apic_present(vcpu)) 247 max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); 248 249 new = kvzalloc(sizeof(struct kvm_apic_map) + 250 sizeof(struct kvm_lapic *) * ((u64)max_id + 1), 251 GFP_KERNEL_ACCOUNT); 252 253 if (!new) 254 goto out; 255 256 new->max_apic_id = max_id; 257 258 kvm_for_each_vcpu(i, vcpu, kvm) { 259 struct kvm_lapic *apic = vcpu->arch.apic; 260 struct kvm_lapic **cluster; 261 u16 mask; 262 u32 ldr; 263 u8 xapic_id; 264 u32 x2apic_id; 265 266 if (!kvm_apic_present(vcpu)) 267 continue; 268 269 xapic_id = kvm_xapic_id(apic); 270 x2apic_id = kvm_x2apic_id(apic); 271 272 /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */ 273 if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && 274 x2apic_id <= new->max_apic_id) 275 new->phys_map[x2apic_id] = apic; 276 /* 277 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around, 278 * prevent them from masking VCPUs with APIC ID <= 0xff. 279 */ 280 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) 281 new->phys_map[xapic_id] = apic; 282 283 if (!kvm_apic_sw_enabled(apic)) 284 continue; 285 286 ldr = kvm_lapic_get_reg(apic, APIC_LDR); 287 288 if (apic_x2apic_mode(apic)) { 289 new->mode |= KVM_APIC_MODE_X2APIC; 290 } else if (ldr) { 291 ldr = GET_APIC_LOGICAL_ID(ldr); 292 if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) 293 new->mode |= KVM_APIC_MODE_XAPIC_FLAT; 294 else 295 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; 296 } 297 298 if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask)) 299 continue; 300 301 if (mask) 302 cluster[ffs(mask) - 1] = apic; 303 } 304 out: 305 old = rcu_dereference_protected(kvm->arch.apic_map, 306 lockdep_is_held(&kvm->arch.apic_map_lock)); 307 rcu_assign_pointer(kvm->arch.apic_map, new); 308 /* 309 * Write kvm->arch.apic_map before clearing apic->apic_map_dirty. 310 * If another update has come in, leave it DIRTY. 311 */ 312 atomic_cmpxchg_release(&kvm->arch.apic_map_dirty, 313 UPDATE_IN_PROGRESS, CLEAN); 314 mutex_unlock(&kvm->arch.apic_map_lock); 315 316 if (old) 317 call_rcu(&old->rcu, kvm_apic_map_free); 318 319 kvm_make_scan_ioapic_request(kvm); 320 } 321 322 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 323 { 324 bool enabled = val & APIC_SPIV_APIC_ENABLED; 325 326 kvm_lapic_set_reg(apic, APIC_SPIV, val); 327 328 if (enabled != apic->sw_enabled) { 329 apic->sw_enabled = enabled; 330 if (enabled) 331 static_branch_slow_dec_deferred(&apic_sw_disabled); 332 else 333 static_branch_inc(&apic_sw_disabled.key); 334 335 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 336 } 337 338 /* Check if there are APF page ready requests pending */ 339 if (enabled) 340 kvm_make_request(KVM_REQ_APF_READY, apic->vcpu); 341 } 342 343 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) 344 { 345 kvm_lapic_set_reg(apic, APIC_ID, id << 24); 346 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 347 } 348 349 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 350 { 351 kvm_lapic_set_reg(apic, APIC_LDR, id); 352 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 353 } 354 355 static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val) 356 { 357 kvm_lapic_set_reg(apic, APIC_DFR, val); 358 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 359 } 360 361 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) 362 { 363 return ((id >> 4) << 16) | (1 << (id & 0xf)); 364 } 365 366 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) 367 { 368 u32 ldr = kvm_apic_calc_x2apic_ldr(id); 369 370 WARN_ON_ONCE(id != apic->vcpu->vcpu_id); 371 372 kvm_lapic_set_reg(apic, APIC_ID, id); 373 kvm_lapic_set_reg(apic, APIC_LDR, ldr); 374 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 375 } 376 377 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 378 { 379 return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 380 } 381 382 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 383 { 384 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; 385 } 386 387 static inline int apic_lvtt_period(struct kvm_lapic *apic) 388 { 389 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC; 390 } 391 392 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 393 { 394 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; 395 } 396 397 static inline int apic_lvt_nmi_mode(u32 lvt_val) 398 { 399 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 400 } 401 402 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 403 { 404 struct kvm_lapic *apic = vcpu->arch.apic; 405 u32 v = APIC_VERSION | ((KVM_APIC_MAX_NR_LVT_ENTRIES - 1) << 16); 406 407 if (!lapic_in_kernel(vcpu)) 408 return; 409 410 /* 411 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) 412 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with 413 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC 414 * version first and level-triggered interrupts never get EOIed in 415 * IOAPIC. 416 */ 417 if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) && 418 !ioapic_in_kernel(vcpu->kvm)) 419 v |= APIC_LVR_DIRECTED_EOI; 420 kvm_lapic_set_reg(apic, APIC_LVR, v); 421 } 422 423 static const unsigned int apic_lvt_mask[KVM_APIC_MAX_NR_LVT_ENTRIES] = { 424 [LVT_TIMER] = LVT_MASK, /* timer mode mask added at runtime */ 425 [LVT_THERMAL_MONITOR] = LVT_MASK | APIC_MODE_MASK, 426 [LVT_PERFORMANCE_COUNTER] = LVT_MASK | APIC_MODE_MASK, 427 [LVT_LINT0] = LINT_MASK, 428 [LVT_LINT1] = LINT_MASK, 429 [LVT_ERROR] = LVT_MASK 430 }; 431 432 static int find_highest_vector(void *bitmap) 433 { 434 int vec; 435 u32 *reg; 436 437 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 438 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 439 reg = bitmap + REG_POS(vec); 440 if (*reg) 441 return __fls(*reg) + vec; 442 } 443 444 return -1; 445 } 446 447 static u8 count_vectors(void *bitmap) 448 { 449 int vec; 450 u32 *reg; 451 u8 count = 0; 452 453 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 454 reg = bitmap + REG_POS(vec); 455 count += hweight32(*reg); 456 } 457 458 return count; 459 } 460 461 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr) 462 { 463 u32 i, vec; 464 u32 pir_val, irr_val, prev_irr_val; 465 int max_updated_irr; 466 467 max_updated_irr = -1; 468 *max_irr = -1; 469 470 for (i = vec = 0; i <= 7; i++, vec += 32) { 471 pir_val = READ_ONCE(pir[i]); 472 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10)); 473 if (pir_val) { 474 prev_irr_val = irr_val; 475 irr_val |= xchg(&pir[i], 0); 476 *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val; 477 if (prev_irr_val != irr_val) { 478 max_updated_irr = 479 __fls(irr_val ^ prev_irr_val) + vec; 480 } 481 } 482 if (irr_val) 483 *max_irr = __fls(irr_val) + vec; 484 } 485 486 return ((max_updated_irr != -1) && 487 (max_updated_irr == *max_irr)); 488 } 489 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); 490 491 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr) 492 { 493 struct kvm_lapic *apic = vcpu->arch.apic; 494 495 return __kvm_apic_update_irr(pir, apic->regs, max_irr); 496 } 497 EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 498 499 static inline int apic_search_irr(struct kvm_lapic *apic) 500 { 501 return find_highest_vector(apic->regs + APIC_IRR); 502 } 503 504 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 505 { 506 int result; 507 508 /* 509 * Note that irr_pending is just a hint. It will be always 510 * true with virtual interrupt delivery enabled. 511 */ 512 if (!apic->irr_pending) 513 return -1; 514 515 result = apic_search_irr(apic); 516 ASSERT(result == -1 || result >= 16); 517 518 return result; 519 } 520 521 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 522 { 523 if (unlikely(apic->apicv_active)) { 524 /* need to update RVI */ 525 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 526 static_call_cond(kvm_x86_hwapic_irr_update)(apic->vcpu, 527 apic_find_highest_irr(apic)); 528 } else { 529 apic->irr_pending = false; 530 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 531 if (apic_search_irr(apic) != -1) 532 apic->irr_pending = true; 533 } 534 } 535 536 void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec) 537 { 538 apic_clear_irr(vec, vcpu->arch.apic); 539 } 540 EXPORT_SYMBOL_GPL(kvm_apic_clear_irr); 541 542 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 543 { 544 if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 545 return; 546 547 /* 548 * With APIC virtualization enabled, all caching is disabled 549 * because the processor can modify ISR under the hood. Instead 550 * just set SVI. 551 */ 552 if (unlikely(apic->apicv_active)) 553 static_call_cond(kvm_x86_hwapic_isr_update)(vec); 554 else { 555 ++apic->isr_count; 556 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 557 /* 558 * ISR (in service register) bit is set when injecting an interrupt. 559 * The highest vector is injected. Thus the latest bit set matches 560 * the highest bit in ISR. 561 */ 562 apic->highest_isr_cache = vec; 563 } 564 } 565 566 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 567 { 568 int result; 569 570 /* 571 * Note that isr_count is always 1, and highest_isr_cache 572 * is always -1, with APIC virtualization enabled. 573 */ 574 if (!apic->isr_count) 575 return -1; 576 if (likely(apic->highest_isr_cache != -1)) 577 return apic->highest_isr_cache; 578 579 result = find_highest_vector(apic->regs + APIC_ISR); 580 ASSERT(result == -1 || result >= 16); 581 582 return result; 583 } 584 585 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 586 { 587 if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 588 return; 589 590 /* 591 * We do get here for APIC virtualization enabled if the guest 592 * uses the Hyper-V APIC enlightenment. In this case we may need 593 * to trigger a new interrupt delivery by writing the SVI field; 594 * on the other hand isr_count and highest_isr_cache are unused 595 * and must be left alone. 596 */ 597 if (unlikely(apic->apicv_active)) 598 static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic)); 599 else { 600 --apic->isr_count; 601 BUG_ON(apic->isr_count < 0); 602 apic->highest_isr_cache = -1; 603 } 604 } 605 606 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 607 { 608 /* This may race with setting of irr in __apic_accept_irq() and 609 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 610 * will cause vmexit immediately and the value will be recalculated 611 * on the next vmentry. 612 */ 613 return apic_find_highest_irr(vcpu->arch.apic); 614 } 615 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); 616 617 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 618 int vector, int level, int trig_mode, 619 struct dest_map *dest_map); 620 621 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 622 struct dest_map *dest_map) 623 { 624 struct kvm_lapic *apic = vcpu->arch.apic; 625 626 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 627 irq->level, irq->trig_mode, dest_map); 628 } 629 630 static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, 631 struct kvm_lapic_irq *irq, u32 min) 632 { 633 int i, count = 0; 634 struct kvm_vcpu *vcpu; 635 636 if (min > map->max_apic_id) 637 return 0; 638 639 for_each_set_bit(i, ipi_bitmap, 640 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 641 if (map->phys_map[min + i]) { 642 vcpu = map->phys_map[min + i]->vcpu; 643 count += kvm_apic_set_irq(vcpu, irq, NULL); 644 } 645 } 646 647 return count; 648 } 649 650 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, 651 unsigned long ipi_bitmap_high, u32 min, 652 unsigned long icr, int op_64_bit) 653 { 654 struct kvm_apic_map *map; 655 struct kvm_lapic_irq irq = {0}; 656 int cluster_size = op_64_bit ? 64 : 32; 657 int count; 658 659 if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK)) 660 return -KVM_EINVAL; 661 662 irq.vector = icr & APIC_VECTOR_MASK; 663 irq.delivery_mode = icr & APIC_MODE_MASK; 664 irq.level = (icr & APIC_INT_ASSERT) != 0; 665 irq.trig_mode = icr & APIC_INT_LEVELTRIG; 666 667 rcu_read_lock(); 668 map = rcu_dereference(kvm->arch.apic_map); 669 670 count = -EOPNOTSUPP; 671 if (likely(map)) { 672 count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min); 673 min += cluster_size; 674 count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min); 675 } 676 677 rcu_read_unlock(); 678 return count; 679 } 680 681 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 682 { 683 684 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 685 sizeof(val)); 686 } 687 688 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 689 { 690 691 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 692 sizeof(*val)); 693 } 694 695 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 696 { 697 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 698 } 699 700 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 701 { 702 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) 703 return; 704 705 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 706 } 707 708 static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu) 709 { 710 u8 val; 711 712 if (pv_eoi_get_user(vcpu, &val) < 0) 713 return false; 714 715 val &= KVM_PV_EOI_ENABLED; 716 717 if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) 718 return false; 719 720 /* 721 * Clear pending bit in any case: it will be set again on vmentry. 722 * While this might not be ideal from performance point of view, 723 * this makes sure pv eoi is only enabled when we know it's safe. 724 */ 725 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 726 727 return val; 728 } 729 730 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) 731 { 732 int highest_irr; 733 if (kvm_x86_ops.sync_pir_to_irr) 734 highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu); 735 else 736 highest_irr = apic_find_highest_irr(apic); 737 if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) 738 return -1; 739 return highest_irr; 740 } 741 742 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr) 743 { 744 u32 tpr, isrv, ppr, old_ppr; 745 int isr; 746 747 old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI); 748 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI); 749 isr = apic_find_highest_isr(apic); 750 isrv = (isr != -1) ? isr : 0; 751 752 if ((tpr & 0xf0) >= (isrv & 0xf0)) 753 ppr = tpr & 0xff; 754 else 755 ppr = isrv & 0xf0; 756 757 *new_ppr = ppr; 758 if (old_ppr != ppr) 759 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); 760 761 return ppr < old_ppr; 762 } 763 764 static void apic_update_ppr(struct kvm_lapic *apic) 765 { 766 u32 ppr; 767 768 if (__apic_update_ppr(apic, &ppr) && 769 apic_has_interrupt_for_ppr(apic, ppr) != -1) 770 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 771 } 772 773 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) 774 { 775 apic_update_ppr(vcpu->arch.apic); 776 } 777 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr); 778 779 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 780 { 781 kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr); 782 apic_update_ppr(apic); 783 } 784 785 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) 786 { 787 return mda == (apic_x2apic_mode(apic) ? 788 X2APIC_BROADCAST : APIC_BROADCAST); 789 } 790 791 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) 792 { 793 if (kvm_apic_broadcast(apic, mda)) 794 return true; 795 796 if (apic_x2apic_mode(apic)) 797 return mda == kvm_x2apic_id(apic); 798 799 /* 800 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if 801 * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and 802 * this allows unique addressing of VCPUs with APIC ID over 0xff. 803 * The 0xff condition is needed because writeable xAPIC ID. 804 */ 805 if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic)) 806 return true; 807 808 return mda == kvm_xapic_id(apic); 809 } 810 811 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) 812 { 813 u32 logical_id; 814 815 if (kvm_apic_broadcast(apic, mda)) 816 return true; 817 818 logical_id = kvm_lapic_get_reg(apic, APIC_LDR); 819 820 if (apic_x2apic_mode(apic)) 821 return ((logical_id >> 16) == (mda >> 16)) 822 && (logical_id & mda & 0xffff) != 0; 823 824 logical_id = GET_APIC_LOGICAL_ID(logical_id); 825 826 switch (kvm_lapic_get_reg(apic, APIC_DFR)) { 827 case APIC_DFR_FLAT: 828 return (logical_id & mda) != 0; 829 case APIC_DFR_CLUSTER: 830 return ((logical_id >> 4) == (mda >> 4)) 831 && (logical_id & mda & 0xf) != 0; 832 default: 833 return false; 834 } 835 } 836 837 /* The KVM local APIC implementation has two quirks: 838 * 839 * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs 840 * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID. 841 * KVM doesn't do that aliasing. 842 * 843 * - in-kernel IOAPIC messages have to be delivered directly to 844 * x2APIC, because the kernel does not support interrupt remapping. 845 * In order to support broadcast without interrupt remapping, x2APIC 846 * rewrites the destination of non-IPI messages from APIC_BROADCAST 847 * to X2APIC_BROADCAST. 848 * 849 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is 850 * important when userspace wants to use x2APIC-format MSIs, because 851 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7". 852 */ 853 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, 854 struct kvm_lapic *source, struct kvm_lapic *target) 855 { 856 bool ipi = source != NULL; 857 858 if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled && 859 !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target)) 860 return X2APIC_BROADCAST; 861 862 return dest_id; 863 } 864 865 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 866 int shorthand, unsigned int dest, int dest_mode) 867 { 868 struct kvm_lapic *target = vcpu->arch.apic; 869 u32 mda = kvm_apic_mda(vcpu, dest, source, target); 870 871 ASSERT(target); 872 switch (shorthand) { 873 case APIC_DEST_NOSHORT: 874 if (dest_mode == APIC_DEST_PHYSICAL) 875 return kvm_apic_match_physical_addr(target, mda); 876 else 877 return kvm_apic_match_logical_addr(target, mda); 878 case APIC_DEST_SELF: 879 return target == source; 880 case APIC_DEST_ALLINC: 881 return true; 882 case APIC_DEST_ALLBUT: 883 return target != source; 884 default: 885 return false; 886 } 887 } 888 EXPORT_SYMBOL_GPL(kvm_apic_match_dest); 889 890 int kvm_vector_to_index(u32 vector, u32 dest_vcpus, 891 const unsigned long *bitmap, u32 bitmap_size) 892 { 893 u32 mod; 894 int i, idx = -1; 895 896 mod = vector % dest_vcpus; 897 898 for (i = 0; i <= mod; i++) { 899 idx = find_next_bit(bitmap, bitmap_size, idx + 1); 900 BUG_ON(idx == bitmap_size); 901 } 902 903 return idx; 904 } 905 906 static void kvm_apic_disabled_lapic_found(struct kvm *kvm) 907 { 908 if (!kvm->arch.disabled_lapic_found) { 909 kvm->arch.disabled_lapic_found = true; 910 printk(KERN_INFO 911 "Disabled LAPIC found during irq injection\n"); 912 } 913 } 914 915 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src, 916 struct kvm_lapic_irq *irq, struct kvm_apic_map *map) 917 { 918 if (kvm->arch.x2apic_broadcast_quirk_disabled) { 919 if ((irq->dest_id == APIC_BROADCAST && 920 map->mode != KVM_APIC_MODE_X2APIC)) 921 return true; 922 if (irq->dest_id == X2APIC_BROADCAST) 923 return true; 924 } else { 925 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src); 926 if (irq->dest_id == (x2apic_ipi ? 927 X2APIC_BROADCAST : APIC_BROADCAST)) 928 return true; 929 } 930 931 return false; 932 } 933 934 /* Return true if the interrupt can be handled by using *bitmap as index mask 935 * for valid destinations in *dst array. 936 * Return false if kvm_apic_map_get_dest_lapic did nothing useful. 937 * Note: we may have zero kvm_lapic destinations when we return true, which 938 * means that the interrupt should be dropped. In this case, *bitmap would be 939 * zero and *dst undefined. 940 */ 941 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, 942 struct kvm_lapic **src, struct kvm_lapic_irq *irq, 943 struct kvm_apic_map *map, struct kvm_lapic ***dst, 944 unsigned long *bitmap) 945 { 946 int i, lowest; 947 948 if (irq->shorthand == APIC_DEST_SELF && src) { 949 *dst = src; 950 *bitmap = 1; 951 return true; 952 } else if (irq->shorthand) 953 return false; 954 955 if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map)) 956 return false; 957 958 if (irq->dest_mode == APIC_DEST_PHYSICAL) { 959 if (irq->dest_id > map->max_apic_id) { 960 *bitmap = 0; 961 } else { 962 u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); 963 *dst = &map->phys_map[dest_id]; 964 *bitmap = 1; 965 } 966 return true; 967 } 968 969 *bitmap = 0; 970 if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, 971 (u16 *)bitmap)) 972 return false; 973 974 if (!kvm_lowest_prio_delivery(irq)) 975 return true; 976 977 if (!kvm_vector_hashing_enabled()) { 978 lowest = -1; 979 for_each_set_bit(i, bitmap, 16) { 980 if (!(*dst)[i]) 981 continue; 982 if (lowest < 0) 983 lowest = i; 984 else if (kvm_apic_compare_prio((*dst)[i]->vcpu, 985 (*dst)[lowest]->vcpu) < 0) 986 lowest = i; 987 } 988 } else { 989 if (!*bitmap) 990 return true; 991 992 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap), 993 bitmap, 16); 994 995 if (!(*dst)[lowest]) { 996 kvm_apic_disabled_lapic_found(kvm); 997 *bitmap = 0; 998 return true; 999 } 1000 } 1001 1002 *bitmap = (lowest >= 0) ? 1 << lowest : 0; 1003 1004 return true; 1005 } 1006 1007 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 1008 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) 1009 { 1010 struct kvm_apic_map *map; 1011 unsigned long bitmap; 1012 struct kvm_lapic **dst = NULL; 1013 int i; 1014 bool ret; 1015 1016 *r = -1; 1017 1018 if (irq->shorthand == APIC_DEST_SELF) { 1019 if (KVM_BUG_ON(!src, kvm)) { 1020 *r = 0; 1021 return true; 1022 } 1023 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); 1024 return true; 1025 } 1026 1027 rcu_read_lock(); 1028 map = rcu_dereference(kvm->arch.apic_map); 1029 1030 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); 1031 if (ret) { 1032 *r = 0; 1033 for_each_set_bit(i, &bitmap, 16) { 1034 if (!dst[i]) 1035 continue; 1036 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 1037 } 1038 } 1039 1040 rcu_read_unlock(); 1041 return ret; 1042 } 1043 1044 /* 1045 * This routine tries to handle interrupts in posted mode, here is how 1046 * it deals with different cases: 1047 * - For single-destination interrupts, handle it in posted mode 1048 * - Else if vector hashing is enabled and it is a lowest-priority 1049 * interrupt, handle it in posted mode and use the following mechanism 1050 * to find the destination vCPU. 1051 * 1. For lowest-priority interrupts, store all the possible 1052 * destination vCPUs in an array. 1053 * 2. Use "guest vector % max number of destination vCPUs" to find 1054 * the right destination vCPU in the array for the lowest-priority 1055 * interrupt. 1056 * - Otherwise, use remapped mode to inject the interrupt. 1057 */ 1058 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, 1059 struct kvm_vcpu **dest_vcpu) 1060 { 1061 struct kvm_apic_map *map; 1062 unsigned long bitmap; 1063 struct kvm_lapic **dst = NULL; 1064 bool ret = false; 1065 1066 if (irq->shorthand) 1067 return false; 1068 1069 rcu_read_lock(); 1070 map = rcu_dereference(kvm->arch.apic_map); 1071 1072 if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && 1073 hweight16(bitmap) == 1) { 1074 unsigned long i = find_first_bit(&bitmap, 16); 1075 1076 if (dst[i]) { 1077 *dest_vcpu = dst[i]->vcpu; 1078 ret = true; 1079 } 1080 } 1081 1082 rcu_read_unlock(); 1083 return ret; 1084 } 1085 1086 /* 1087 * Add a pending IRQ into lapic. 1088 * Return 1 if successfully added and 0 if discarded. 1089 */ 1090 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 1091 int vector, int level, int trig_mode, 1092 struct dest_map *dest_map) 1093 { 1094 int result = 0; 1095 struct kvm_vcpu *vcpu = apic->vcpu; 1096 1097 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 1098 trig_mode, vector); 1099 switch (delivery_mode) { 1100 case APIC_DM_LOWEST: 1101 vcpu->arch.apic_arb_prio++; 1102 fallthrough; 1103 case APIC_DM_FIXED: 1104 if (unlikely(trig_mode && !level)) 1105 break; 1106 1107 /* FIXME add logic for vcpu on reset */ 1108 if (unlikely(!apic_enabled(apic))) 1109 break; 1110 1111 result = 1; 1112 1113 if (dest_map) { 1114 __set_bit(vcpu->vcpu_id, dest_map->map); 1115 dest_map->vectors[vcpu->vcpu_id] = vector; 1116 } 1117 1118 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { 1119 if (trig_mode) 1120 kvm_lapic_set_vector(vector, 1121 apic->regs + APIC_TMR); 1122 else 1123 kvm_lapic_clear_vector(vector, 1124 apic->regs + APIC_TMR); 1125 } 1126 1127 static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode, 1128 trig_mode, vector); 1129 break; 1130 1131 case APIC_DM_REMRD: 1132 result = 1; 1133 vcpu->arch.pv.pv_unhalted = 1; 1134 kvm_make_request(KVM_REQ_EVENT, vcpu); 1135 kvm_vcpu_kick(vcpu); 1136 break; 1137 1138 case APIC_DM_SMI: 1139 result = 1; 1140 kvm_make_request(KVM_REQ_SMI, vcpu); 1141 kvm_vcpu_kick(vcpu); 1142 break; 1143 1144 case APIC_DM_NMI: 1145 result = 1; 1146 kvm_inject_nmi(vcpu); 1147 kvm_vcpu_kick(vcpu); 1148 break; 1149 1150 case APIC_DM_INIT: 1151 if (!trig_mode || level) { 1152 result = 1; 1153 /* assumes that there are only KVM_APIC_INIT/SIPI */ 1154 apic->pending_events = (1UL << KVM_APIC_INIT); 1155 kvm_make_request(KVM_REQ_EVENT, vcpu); 1156 kvm_vcpu_kick(vcpu); 1157 } 1158 break; 1159 1160 case APIC_DM_STARTUP: 1161 result = 1; 1162 apic->sipi_vector = vector; 1163 /* make sure sipi_vector is visible for the receiver */ 1164 smp_wmb(); 1165 set_bit(KVM_APIC_SIPI, &apic->pending_events); 1166 kvm_make_request(KVM_REQ_EVENT, vcpu); 1167 kvm_vcpu_kick(vcpu); 1168 break; 1169 1170 case APIC_DM_EXTINT: 1171 /* 1172 * Should only be called by kvm_apic_local_deliver() with LVT0, 1173 * before NMI watchdog was enabled. Already handled by 1174 * kvm_apic_accept_pic_intr(). 1175 */ 1176 break; 1177 1178 default: 1179 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 1180 delivery_mode); 1181 break; 1182 } 1183 return result; 1184 } 1185 1186 /* 1187 * This routine identifies the destination vcpus mask meant to receive the 1188 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find 1189 * out the destination vcpus array and set the bitmap or it traverses to 1190 * each available vcpu to identify the same. 1191 */ 1192 void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, 1193 unsigned long *vcpu_bitmap) 1194 { 1195 struct kvm_lapic **dest_vcpu = NULL; 1196 struct kvm_lapic *src = NULL; 1197 struct kvm_apic_map *map; 1198 struct kvm_vcpu *vcpu; 1199 unsigned long bitmap, i; 1200 int vcpu_idx; 1201 bool ret; 1202 1203 rcu_read_lock(); 1204 map = rcu_dereference(kvm->arch.apic_map); 1205 1206 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu, 1207 &bitmap); 1208 if (ret) { 1209 for_each_set_bit(i, &bitmap, 16) { 1210 if (!dest_vcpu[i]) 1211 continue; 1212 vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx; 1213 __set_bit(vcpu_idx, vcpu_bitmap); 1214 } 1215 } else { 1216 kvm_for_each_vcpu(i, vcpu, kvm) { 1217 if (!kvm_apic_present(vcpu)) 1218 continue; 1219 if (!kvm_apic_match_dest(vcpu, NULL, 1220 irq->shorthand, 1221 irq->dest_id, 1222 irq->dest_mode)) 1223 continue; 1224 __set_bit(i, vcpu_bitmap); 1225 } 1226 } 1227 rcu_read_unlock(); 1228 } 1229 1230 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 1231 { 1232 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 1233 } 1234 1235 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) 1236 { 1237 return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); 1238 } 1239 1240 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 1241 { 1242 int trigger_mode; 1243 1244 /* Eoi the ioapic only if the ioapic doesn't own the vector. */ 1245 if (!kvm_ioapic_handles_vector(apic, vector)) 1246 return; 1247 1248 /* Request a KVM exit to inform the userspace IOAPIC. */ 1249 if (irqchip_split(apic->vcpu->kvm)) { 1250 apic->vcpu->arch.pending_ioapic_eoi = vector; 1251 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); 1252 return; 1253 } 1254 1255 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 1256 trigger_mode = IOAPIC_LEVEL_TRIG; 1257 else 1258 trigger_mode = IOAPIC_EDGE_TRIG; 1259 1260 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); 1261 } 1262 1263 static int apic_set_eoi(struct kvm_lapic *apic) 1264 { 1265 int vector = apic_find_highest_isr(apic); 1266 1267 trace_kvm_eoi(apic, vector); 1268 1269 /* 1270 * Not every write EOI will has corresponding ISR, 1271 * one example is when Kernel check timer on setup_IO_APIC 1272 */ 1273 if (vector == -1) 1274 return vector; 1275 1276 apic_clear_isr(vector, apic); 1277 apic_update_ppr(apic); 1278 1279 if (to_hv_vcpu(apic->vcpu) && 1280 test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap)) 1281 kvm_hv_synic_send_eoi(apic->vcpu, vector); 1282 1283 kvm_ioapic_send_eoi(apic, vector); 1284 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1285 return vector; 1286 } 1287 1288 /* 1289 * this interface assumes a trap-like exit, which has already finished 1290 * desired side effect including vISR and vPPR update. 1291 */ 1292 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) 1293 { 1294 struct kvm_lapic *apic = vcpu->arch.apic; 1295 1296 trace_kvm_eoi(apic, vector); 1297 1298 kvm_ioapic_send_eoi(apic, vector); 1299 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1300 } 1301 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 1302 1303 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) 1304 { 1305 struct kvm_lapic_irq irq; 1306 1307 /* KVM has no delay and should always clear the BUSY/PENDING flag. */ 1308 WARN_ON_ONCE(icr_low & APIC_ICR_BUSY); 1309 1310 irq.vector = icr_low & APIC_VECTOR_MASK; 1311 irq.delivery_mode = icr_low & APIC_MODE_MASK; 1312 irq.dest_mode = icr_low & APIC_DEST_MASK; 1313 irq.level = (icr_low & APIC_INT_ASSERT) != 0; 1314 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 1315 irq.shorthand = icr_low & APIC_SHORT_MASK; 1316 irq.msi_redir_hint = false; 1317 if (apic_x2apic_mode(apic)) 1318 irq.dest_id = icr_high; 1319 else 1320 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 1321 1322 trace_kvm_apic_ipi(icr_low, irq.dest_id); 1323 1324 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 1325 } 1326 EXPORT_SYMBOL_GPL(kvm_apic_send_ipi); 1327 1328 static u32 apic_get_tmcct(struct kvm_lapic *apic) 1329 { 1330 ktime_t remaining, now; 1331 s64 ns; 1332 u32 tmcct; 1333 1334 ASSERT(apic != NULL); 1335 1336 /* if initial count is 0, current count should also be 0 */ 1337 if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 || 1338 apic->lapic_timer.period == 0) 1339 return 0; 1340 1341 now = ktime_get(); 1342 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1343 if (ktime_to_ns(remaining) < 0) 1344 remaining = 0; 1345 1346 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 1347 tmcct = div64_u64(ns, 1348 (APIC_BUS_CYCLE_NS * apic->divide_count)); 1349 1350 return tmcct; 1351 } 1352 1353 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 1354 { 1355 struct kvm_vcpu *vcpu = apic->vcpu; 1356 struct kvm_run *run = vcpu->run; 1357 1358 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 1359 run->tpr_access.rip = kvm_rip_read(vcpu); 1360 run->tpr_access.is_write = write; 1361 } 1362 1363 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 1364 { 1365 if (apic->vcpu->arch.tpr_access_reporting) 1366 __report_tpr_access(apic, write); 1367 } 1368 1369 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 1370 { 1371 u32 val = 0; 1372 1373 if (offset >= LAPIC_MMIO_LENGTH) 1374 return 0; 1375 1376 switch (offset) { 1377 case APIC_ARBPRI: 1378 break; 1379 1380 case APIC_TMCCT: /* Timer CCR */ 1381 if (apic_lvtt_tscdeadline(apic)) 1382 return 0; 1383 1384 val = apic_get_tmcct(apic); 1385 break; 1386 case APIC_PROCPRI: 1387 apic_update_ppr(apic); 1388 val = kvm_lapic_get_reg(apic, offset); 1389 break; 1390 case APIC_TASKPRI: 1391 report_tpr_access(apic, false); 1392 fallthrough; 1393 default: 1394 val = kvm_lapic_get_reg(apic, offset); 1395 break; 1396 } 1397 1398 return val; 1399 } 1400 1401 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 1402 { 1403 return container_of(dev, struct kvm_lapic, dev); 1404 } 1405 1406 #define APIC_REG_MASK(reg) (1ull << ((reg) >> 4)) 1407 #define APIC_REGS_MASK(first, count) \ 1408 (APIC_REG_MASK(first) * ((1ull << (count)) - 1)) 1409 1410 static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 1411 void *data) 1412 { 1413 unsigned char alignment = offset & 0xf; 1414 u32 result; 1415 /* this bitmask has a bit cleared for each reserved register */ 1416 u64 valid_reg_mask = 1417 APIC_REG_MASK(APIC_ID) | 1418 APIC_REG_MASK(APIC_LVR) | 1419 APIC_REG_MASK(APIC_TASKPRI) | 1420 APIC_REG_MASK(APIC_PROCPRI) | 1421 APIC_REG_MASK(APIC_LDR) | 1422 APIC_REG_MASK(APIC_DFR) | 1423 APIC_REG_MASK(APIC_SPIV) | 1424 APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) | 1425 APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) | 1426 APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) | 1427 APIC_REG_MASK(APIC_ESR) | 1428 APIC_REG_MASK(APIC_ICR) | 1429 APIC_REG_MASK(APIC_LVTT) | 1430 APIC_REG_MASK(APIC_LVTTHMR) | 1431 APIC_REG_MASK(APIC_LVTPC) | 1432 APIC_REG_MASK(APIC_LVT0) | 1433 APIC_REG_MASK(APIC_LVT1) | 1434 APIC_REG_MASK(APIC_LVTERR) | 1435 APIC_REG_MASK(APIC_TMICT) | 1436 APIC_REG_MASK(APIC_TMCCT) | 1437 APIC_REG_MASK(APIC_TDCR); 1438 1439 /* 1440 * ARBPRI and ICR2 are not valid in x2APIC mode. WARN if KVM reads ICR 1441 * in x2APIC mode as it's an 8-byte register in x2APIC and needs to be 1442 * manually handled by the caller. 1443 */ 1444 if (!apic_x2apic_mode(apic)) 1445 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) | 1446 APIC_REG_MASK(APIC_ICR2); 1447 else 1448 WARN_ON_ONCE(offset == APIC_ICR); 1449 1450 if (alignment + len > 4) 1451 return 1; 1452 1453 if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) 1454 return 1; 1455 1456 result = __apic_read(apic, offset & ~0xf); 1457 1458 trace_kvm_apic_read(offset, result); 1459 1460 switch (len) { 1461 case 1: 1462 case 2: 1463 case 4: 1464 memcpy(data, (char *)&result + alignment, len); 1465 break; 1466 default: 1467 printk(KERN_ERR "Local APIC read with len = %x, " 1468 "should be 1,2, or 4 instead\n", len); 1469 break; 1470 } 1471 return 0; 1472 } 1473 1474 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 1475 { 1476 return addr >= apic->base_address && 1477 addr < apic->base_address + LAPIC_MMIO_LENGTH; 1478 } 1479 1480 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 1481 gpa_t address, int len, void *data) 1482 { 1483 struct kvm_lapic *apic = to_lapic(this); 1484 u32 offset = address - apic->base_address; 1485 1486 if (!apic_mmio_in_range(apic, address)) 1487 return -EOPNOTSUPP; 1488 1489 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 1490 if (!kvm_check_has_quirk(vcpu->kvm, 1491 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 1492 return -EOPNOTSUPP; 1493 1494 memset(data, 0xff, len); 1495 return 0; 1496 } 1497 1498 kvm_lapic_reg_read(apic, offset, len, data); 1499 1500 return 0; 1501 } 1502 1503 static void update_divide_count(struct kvm_lapic *apic) 1504 { 1505 u32 tmp1, tmp2, tdcr; 1506 1507 tdcr = kvm_lapic_get_reg(apic, APIC_TDCR); 1508 tmp1 = tdcr & 0xf; 1509 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 1510 apic->divide_count = 0x1 << (tmp2 & 0x7); 1511 } 1512 1513 static void limit_periodic_timer_frequency(struct kvm_lapic *apic) 1514 { 1515 /* 1516 * Do not allow the guest to program periodic timers with small 1517 * interval, since the hrtimers are not throttled by the host 1518 * scheduler. 1519 */ 1520 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1521 s64 min_period = min_timer_period_us * 1000LL; 1522 1523 if (apic->lapic_timer.period < min_period) { 1524 pr_info_ratelimited( 1525 "kvm: vcpu %i: requested %lld ns " 1526 "lapic timer period limited to %lld ns\n", 1527 apic->vcpu->vcpu_id, 1528 apic->lapic_timer.period, min_period); 1529 apic->lapic_timer.period = min_period; 1530 } 1531 } 1532 } 1533 1534 static void cancel_hv_timer(struct kvm_lapic *apic); 1535 1536 static void cancel_apic_timer(struct kvm_lapic *apic) 1537 { 1538 hrtimer_cancel(&apic->lapic_timer.timer); 1539 preempt_disable(); 1540 if (apic->lapic_timer.hv_timer_in_use) 1541 cancel_hv_timer(apic); 1542 preempt_enable(); 1543 atomic_set(&apic->lapic_timer.pending, 0); 1544 } 1545 1546 static void apic_update_lvtt(struct kvm_lapic *apic) 1547 { 1548 u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) & 1549 apic->lapic_timer.timer_mode_mask; 1550 1551 if (apic->lapic_timer.timer_mode != timer_mode) { 1552 if (apic_lvtt_tscdeadline(apic) != (timer_mode == 1553 APIC_LVT_TIMER_TSCDEADLINE)) { 1554 cancel_apic_timer(apic); 1555 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 1556 apic->lapic_timer.period = 0; 1557 apic->lapic_timer.tscdeadline = 0; 1558 } 1559 apic->lapic_timer.timer_mode = timer_mode; 1560 limit_periodic_timer_frequency(apic); 1561 } 1562 } 1563 1564 /* 1565 * On APICv, this test will cause a busy wait 1566 * during a higher-priority task. 1567 */ 1568 1569 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) 1570 { 1571 struct kvm_lapic *apic = vcpu->arch.apic; 1572 u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT); 1573 1574 if (kvm_apic_hw_enabled(apic)) { 1575 int vec = reg & APIC_VECTOR_MASK; 1576 void *bitmap = apic->regs + APIC_ISR; 1577 1578 if (apic->apicv_active) 1579 bitmap = apic->regs + APIC_IRR; 1580 1581 if (apic_test_vector(vec, bitmap)) 1582 return true; 1583 } 1584 return false; 1585 } 1586 1587 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) 1588 { 1589 u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns; 1590 1591 /* 1592 * If the guest TSC is running at a different ratio than the host, then 1593 * convert the delay to nanoseconds to achieve an accurate delay. Note 1594 * that __delay() uses delay_tsc whenever the hardware has TSC, thus 1595 * always for VMX enabled hardware. 1596 */ 1597 if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) { 1598 __delay(min(guest_cycles, 1599 nsec_to_cycles(vcpu, timer_advance_ns))); 1600 } else { 1601 u64 delay_ns = guest_cycles * 1000000ULL; 1602 do_div(delay_ns, vcpu->arch.virtual_tsc_khz); 1603 ndelay(min_t(u32, delay_ns, timer_advance_ns)); 1604 } 1605 } 1606 1607 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, 1608 s64 advance_expire_delta) 1609 { 1610 struct kvm_lapic *apic = vcpu->arch.apic; 1611 u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; 1612 u64 ns; 1613 1614 /* Do not adjust for tiny fluctuations or large random spikes. */ 1615 if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX || 1616 abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN) 1617 return; 1618 1619 /* too early */ 1620 if (advance_expire_delta < 0) { 1621 ns = -advance_expire_delta * 1000000ULL; 1622 do_div(ns, vcpu->arch.virtual_tsc_khz); 1623 timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; 1624 } else { 1625 /* too late */ 1626 ns = advance_expire_delta * 1000000ULL; 1627 do_div(ns, vcpu->arch.virtual_tsc_khz); 1628 timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; 1629 } 1630 1631 if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX)) 1632 timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; 1633 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 1634 } 1635 1636 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1637 { 1638 struct kvm_lapic *apic = vcpu->arch.apic; 1639 u64 guest_tsc, tsc_deadline; 1640 1641 tsc_deadline = apic->lapic_timer.expired_tscdeadline; 1642 apic->lapic_timer.expired_tscdeadline = 0; 1643 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1644 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); 1645 1646 if (lapic_timer_advance_dynamic) { 1647 adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline); 1648 /* 1649 * If the timer fired early, reread the TSC to account for the 1650 * overhead of the above adjustment to avoid waiting longer 1651 * than is necessary. 1652 */ 1653 if (guest_tsc < tsc_deadline) 1654 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1655 } 1656 1657 if (guest_tsc < tsc_deadline) 1658 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); 1659 } 1660 1661 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1662 { 1663 if (lapic_in_kernel(vcpu) && 1664 vcpu->arch.apic->lapic_timer.expired_tscdeadline && 1665 vcpu->arch.apic->lapic_timer.timer_advance_ns && 1666 lapic_timer_int_injected(vcpu)) 1667 __kvm_wait_lapic_expire(vcpu); 1668 } 1669 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); 1670 1671 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) 1672 { 1673 struct kvm_timer *ktimer = &apic->lapic_timer; 1674 1675 kvm_apic_local_deliver(apic, APIC_LVTT); 1676 if (apic_lvtt_tscdeadline(apic)) { 1677 ktimer->tscdeadline = 0; 1678 } else if (apic_lvtt_oneshot(apic)) { 1679 ktimer->tscdeadline = 0; 1680 ktimer->target_expiration = 0; 1681 } 1682 } 1683 1684 static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) 1685 { 1686 struct kvm_vcpu *vcpu = apic->vcpu; 1687 struct kvm_timer *ktimer = &apic->lapic_timer; 1688 1689 if (atomic_read(&apic->lapic_timer.pending)) 1690 return; 1691 1692 if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) 1693 ktimer->expired_tscdeadline = ktimer->tscdeadline; 1694 1695 if (!from_timer_fn && apic->apicv_active) { 1696 WARN_ON(kvm_get_running_vcpu() != vcpu); 1697 kvm_apic_inject_pending_timer_irqs(apic); 1698 return; 1699 } 1700 1701 if (kvm_use_posted_timer_interrupt(apic->vcpu)) { 1702 /* 1703 * Ensure the guest's timer has truly expired before posting an 1704 * interrupt. Open code the relevant checks to avoid querying 1705 * lapic_timer_int_injected(), which will be false since the 1706 * interrupt isn't yet injected. Waiting until after injecting 1707 * is not an option since that won't help a posted interrupt. 1708 */ 1709 if (vcpu->arch.apic->lapic_timer.expired_tscdeadline && 1710 vcpu->arch.apic->lapic_timer.timer_advance_ns) 1711 __kvm_wait_lapic_expire(vcpu); 1712 kvm_apic_inject_pending_timer_irqs(apic); 1713 return; 1714 } 1715 1716 atomic_inc(&apic->lapic_timer.pending); 1717 kvm_make_request(KVM_REQ_UNBLOCK, vcpu); 1718 if (from_timer_fn) 1719 kvm_vcpu_kick(vcpu); 1720 } 1721 1722 static void start_sw_tscdeadline(struct kvm_lapic *apic) 1723 { 1724 struct kvm_timer *ktimer = &apic->lapic_timer; 1725 u64 guest_tsc, tscdeadline = ktimer->tscdeadline; 1726 u64 ns = 0; 1727 ktime_t expire; 1728 struct kvm_vcpu *vcpu = apic->vcpu; 1729 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1730 unsigned long flags; 1731 ktime_t now; 1732 1733 if (unlikely(!tscdeadline || !this_tsc_khz)) 1734 return; 1735 1736 local_irq_save(flags); 1737 1738 now = ktime_get(); 1739 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1740 1741 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1742 do_div(ns, this_tsc_khz); 1743 1744 if (likely(tscdeadline > guest_tsc) && 1745 likely(ns > apic->lapic_timer.timer_advance_ns)) { 1746 expire = ktime_add_ns(now, ns); 1747 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); 1748 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD); 1749 } else 1750 apic_timer_expired(apic, false); 1751 1752 local_irq_restore(flags); 1753 } 1754 1755 static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict) 1756 { 1757 return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count; 1758 } 1759 1760 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) 1761 { 1762 ktime_t now, remaining; 1763 u64 ns_remaining_old, ns_remaining_new; 1764 1765 apic->lapic_timer.period = 1766 tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); 1767 limit_periodic_timer_frequency(apic); 1768 1769 now = ktime_get(); 1770 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1771 if (ktime_to_ns(remaining) < 0) 1772 remaining = 0; 1773 1774 ns_remaining_old = ktime_to_ns(remaining); 1775 ns_remaining_new = mul_u64_u32_div(ns_remaining_old, 1776 apic->divide_count, old_divisor); 1777 1778 apic->lapic_timer.tscdeadline += 1779 nsec_to_cycles(apic->vcpu, ns_remaining_new) - 1780 nsec_to_cycles(apic->vcpu, ns_remaining_old); 1781 apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); 1782 } 1783 1784 static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg) 1785 { 1786 ktime_t now; 1787 u64 tscl = rdtsc(); 1788 s64 deadline; 1789 1790 now = ktime_get(); 1791 apic->lapic_timer.period = 1792 tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); 1793 1794 if (!apic->lapic_timer.period) { 1795 apic->lapic_timer.tscdeadline = 0; 1796 return false; 1797 } 1798 1799 limit_periodic_timer_frequency(apic); 1800 deadline = apic->lapic_timer.period; 1801 1802 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 1803 if (unlikely(count_reg != APIC_TMICT)) { 1804 deadline = tmict_to_ns(apic, 1805 kvm_lapic_get_reg(apic, count_reg)); 1806 if (unlikely(deadline <= 0)) 1807 deadline = apic->lapic_timer.period; 1808 else if (unlikely(deadline > apic->lapic_timer.period)) { 1809 pr_info_ratelimited( 1810 "kvm: vcpu %i: requested lapic timer restore with " 1811 "starting count register %#x=%u (%lld ns) > initial count (%lld ns). " 1812 "Using initial count to start timer.\n", 1813 apic->vcpu->vcpu_id, 1814 count_reg, 1815 kvm_lapic_get_reg(apic, count_reg), 1816 deadline, apic->lapic_timer.period); 1817 kvm_lapic_set_reg(apic, count_reg, 0); 1818 deadline = apic->lapic_timer.period; 1819 } 1820 } 1821 } 1822 1823 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1824 nsec_to_cycles(apic->vcpu, deadline); 1825 apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline); 1826 1827 return true; 1828 } 1829 1830 static void advance_periodic_target_expiration(struct kvm_lapic *apic) 1831 { 1832 ktime_t now = ktime_get(); 1833 u64 tscl = rdtsc(); 1834 ktime_t delta; 1835 1836 /* 1837 * Synchronize both deadlines to the same time source or 1838 * differences in the periods (caused by differences in the 1839 * underlying clocks or numerical approximation errors) will 1840 * cause the two to drift apart over time as the errors 1841 * accumulate. 1842 */ 1843 apic->lapic_timer.target_expiration = 1844 ktime_add_ns(apic->lapic_timer.target_expiration, 1845 apic->lapic_timer.period); 1846 delta = ktime_sub(apic->lapic_timer.target_expiration, now); 1847 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1848 nsec_to_cycles(apic->vcpu, delta); 1849 } 1850 1851 static void start_sw_period(struct kvm_lapic *apic) 1852 { 1853 if (!apic->lapic_timer.period) 1854 return; 1855 1856 if (ktime_after(ktime_get(), 1857 apic->lapic_timer.target_expiration)) { 1858 apic_timer_expired(apic, false); 1859 1860 if (apic_lvtt_oneshot(apic)) 1861 return; 1862 1863 advance_periodic_target_expiration(apic); 1864 } 1865 1866 hrtimer_start(&apic->lapic_timer.timer, 1867 apic->lapic_timer.target_expiration, 1868 HRTIMER_MODE_ABS_HARD); 1869 } 1870 1871 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) 1872 { 1873 if (!lapic_in_kernel(vcpu)) 1874 return false; 1875 1876 return vcpu->arch.apic->lapic_timer.hv_timer_in_use; 1877 } 1878 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); 1879 1880 static void cancel_hv_timer(struct kvm_lapic *apic) 1881 { 1882 WARN_ON(preemptible()); 1883 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1884 static_call(kvm_x86_cancel_hv_timer)(apic->vcpu); 1885 apic->lapic_timer.hv_timer_in_use = false; 1886 } 1887 1888 static bool start_hv_timer(struct kvm_lapic *apic) 1889 { 1890 struct kvm_timer *ktimer = &apic->lapic_timer; 1891 struct kvm_vcpu *vcpu = apic->vcpu; 1892 bool expired; 1893 1894 WARN_ON(preemptible()); 1895 if (!kvm_can_use_hv_timer(vcpu)) 1896 return false; 1897 1898 if (!ktimer->tscdeadline) 1899 return false; 1900 1901 if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired)) 1902 return false; 1903 1904 ktimer->hv_timer_in_use = true; 1905 hrtimer_cancel(&ktimer->timer); 1906 1907 /* 1908 * To simplify handling the periodic timer, leave the hv timer running 1909 * even if the deadline timer has expired, i.e. rely on the resulting 1910 * VM-Exit to recompute the periodic timer's target expiration. 1911 */ 1912 if (!apic_lvtt_period(apic)) { 1913 /* 1914 * Cancel the hv timer if the sw timer fired while the hv timer 1915 * was being programmed, or if the hv timer itself expired. 1916 */ 1917 if (atomic_read(&ktimer->pending)) { 1918 cancel_hv_timer(apic); 1919 } else if (expired) { 1920 apic_timer_expired(apic, false); 1921 cancel_hv_timer(apic); 1922 } 1923 } 1924 1925 trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use); 1926 1927 return true; 1928 } 1929 1930 static void start_sw_timer(struct kvm_lapic *apic) 1931 { 1932 struct kvm_timer *ktimer = &apic->lapic_timer; 1933 1934 WARN_ON(preemptible()); 1935 if (apic->lapic_timer.hv_timer_in_use) 1936 cancel_hv_timer(apic); 1937 if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) 1938 return; 1939 1940 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 1941 start_sw_period(apic); 1942 else if (apic_lvtt_tscdeadline(apic)) 1943 start_sw_tscdeadline(apic); 1944 trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false); 1945 } 1946 1947 static void restart_apic_timer(struct kvm_lapic *apic) 1948 { 1949 preempt_disable(); 1950 1951 if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending)) 1952 goto out; 1953 1954 if (!start_hv_timer(apic)) 1955 start_sw_timer(apic); 1956 out: 1957 preempt_enable(); 1958 } 1959 1960 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) 1961 { 1962 struct kvm_lapic *apic = vcpu->arch.apic; 1963 1964 preempt_disable(); 1965 /* If the preempt notifier has already run, it also called apic_timer_expired */ 1966 if (!apic->lapic_timer.hv_timer_in_use) 1967 goto out; 1968 WARN_ON(kvm_vcpu_is_blocking(vcpu)); 1969 apic_timer_expired(apic, false); 1970 cancel_hv_timer(apic); 1971 1972 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1973 advance_periodic_target_expiration(apic); 1974 restart_apic_timer(apic); 1975 } 1976 out: 1977 preempt_enable(); 1978 } 1979 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); 1980 1981 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) 1982 { 1983 restart_apic_timer(vcpu->arch.apic); 1984 } 1985 1986 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) 1987 { 1988 struct kvm_lapic *apic = vcpu->arch.apic; 1989 1990 preempt_disable(); 1991 /* Possibly the TSC deadline timer is not enabled yet */ 1992 if (apic->lapic_timer.hv_timer_in_use) 1993 start_sw_timer(apic); 1994 preempt_enable(); 1995 } 1996 1997 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) 1998 { 1999 struct kvm_lapic *apic = vcpu->arch.apic; 2000 2001 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 2002 restart_apic_timer(apic); 2003 } 2004 2005 static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg) 2006 { 2007 atomic_set(&apic->lapic_timer.pending, 0); 2008 2009 if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 2010 && !set_target_expiration(apic, count_reg)) 2011 return; 2012 2013 restart_apic_timer(apic); 2014 } 2015 2016 static void start_apic_timer(struct kvm_lapic *apic) 2017 { 2018 __start_apic_timer(apic, APIC_TMICT); 2019 } 2020 2021 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 2022 { 2023 bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); 2024 2025 if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { 2026 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; 2027 if (lvt0_in_nmi_mode) { 2028 atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 2029 } else 2030 atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 2031 } 2032 } 2033 2034 static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic) 2035 { 2036 struct kvm *kvm = apic->vcpu->kvm; 2037 2038 if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm)) 2039 return; 2040 2041 if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id) 2042 return; 2043 2044 kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); 2045 } 2046 2047 static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 2048 { 2049 int ret = 0; 2050 2051 trace_kvm_apic_write(reg, val); 2052 2053 switch (reg) { 2054 case APIC_ID: /* Local APIC ID */ 2055 if (!apic_x2apic_mode(apic)) { 2056 kvm_apic_set_xapic_id(apic, val >> 24); 2057 kvm_lapic_xapic_id_updated(apic); 2058 } else { 2059 ret = 1; 2060 } 2061 break; 2062 2063 case APIC_TASKPRI: 2064 report_tpr_access(apic, true); 2065 apic_set_tpr(apic, val & 0xff); 2066 break; 2067 2068 case APIC_EOI: 2069 apic_set_eoi(apic); 2070 break; 2071 2072 case APIC_LDR: 2073 if (!apic_x2apic_mode(apic)) 2074 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 2075 else 2076 ret = 1; 2077 break; 2078 2079 case APIC_DFR: 2080 if (!apic_x2apic_mode(apic)) 2081 kvm_apic_set_dfr(apic, val | 0x0FFFFFFF); 2082 else 2083 ret = 1; 2084 break; 2085 2086 case APIC_SPIV: { 2087 u32 mask = 0x3ff; 2088 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 2089 mask |= APIC_SPIV_DIRECTED_EOI; 2090 apic_set_spiv(apic, val & mask); 2091 if (!(val & APIC_SPIV_APIC_ENABLED)) { 2092 int i; 2093 u32 lvt_val; 2094 2095 for (i = 0; i < KVM_APIC_MAX_NR_LVT_ENTRIES; i++) { 2096 lvt_val = kvm_lapic_get_reg(apic, 2097 APIC_LVTT + 0x10 * i); 2098 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, 2099 lvt_val | APIC_LVT_MASKED); 2100 } 2101 apic_update_lvtt(apic); 2102 atomic_set(&apic->lapic_timer.pending, 0); 2103 2104 } 2105 break; 2106 } 2107 case APIC_ICR: 2108 WARN_ON_ONCE(apic_x2apic_mode(apic)); 2109 2110 /* No delay here, so we always clear the pending bit */ 2111 val &= ~APIC_ICR_BUSY; 2112 kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2)); 2113 kvm_lapic_set_reg(apic, APIC_ICR, val); 2114 break; 2115 case APIC_ICR2: 2116 if (apic_x2apic_mode(apic)) 2117 ret = 1; 2118 else 2119 kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000); 2120 break; 2121 2122 case APIC_LVT0: 2123 apic_manage_nmi_watchdog(apic, val); 2124 fallthrough; 2125 case APIC_LVTTHMR: 2126 case APIC_LVTPC: 2127 case APIC_LVT1: 2128 case APIC_LVTERR: { 2129 /* TODO: Check vector */ 2130 size_t size; 2131 u32 index; 2132 2133 if (!kvm_apic_sw_enabled(apic)) 2134 val |= APIC_LVT_MASKED; 2135 size = ARRAY_SIZE(apic_lvt_mask); 2136 index = array_index_nospec( 2137 (reg - APIC_LVTT) >> 4, size); 2138 val &= apic_lvt_mask[index]; 2139 kvm_lapic_set_reg(apic, reg, val); 2140 break; 2141 } 2142 2143 case APIC_LVTT: 2144 if (!kvm_apic_sw_enabled(apic)) 2145 val |= APIC_LVT_MASKED; 2146 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 2147 kvm_lapic_set_reg(apic, APIC_LVTT, val); 2148 apic_update_lvtt(apic); 2149 break; 2150 2151 case APIC_TMICT: 2152 if (apic_lvtt_tscdeadline(apic)) 2153 break; 2154 2155 cancel_apic_timer(apic); 2156 kvm_lapic_set_reg(apic, APIC_TMICT, val); 2157 start_apic_timer(apic); 2158 break; 2159 2160 case APIC_TDCR: { 2161 uint32_t old_divisor = apic->divide_count; 2162 2163 kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb); 2164 update_divide_count(apic); 2165 if (apic->divide_count != old_divisor && 2166 apic->lapic_timer.period) { 2167 hrtimer_cancel(&apic->lapic_timer.timer); 2168 update_target_expiration(apic, old_divisor); 2169 restart_apic_timer(apic); 2170 } 2171 break; 2172 } 2173 case APIC_ESR: 2174 if (apic_x2apic_mode(apic) && val != 0) 2175 ret = 1; 2176 break; 2177 2178 case APIC_SELF_IPI: 2179 if (apic_x2apic_mode(apic)) 2180 kvm_apic_send_ipi(apic, APIC_DEST_SELF | (val & APIC_VECTOR_MASK), 0); 2181 else 2182 ret = 1; 2183 break; 2184 default: 2185 ret = 1; 2186 break; 2187 } 2188 2189 /* 2190 * Recalculate APIC maps if necessary, e.g. if the software enable bit 2191 * was toggled, the APIC ID changed, etc... The maps are marked dirty 2192 * on relevant changes, i.e. this is a nop for most writes. 2193 */ 2194 kvm_recalculate_apic_map(apic->vcpu->kvm); 2195 2196 return ret; 2197 } 2198 2199 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 2200 gpa_t address, int len, const void *data) 2201 { 2202 struct kvm_lapic *apic = to_lapic(this); 2203 unsigned int offset = address - apic->base_address; 2204 u32 val; 2205 2206 if (!apic_mmio_in_range(apic, address)) 2207 return -EOPNOTSUPP; 2208 2209 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 2210 if (!kvm_check_has_quirk(vcpu->kvm, 2211 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 2212 return -EOPNOTSUPP; 2213 2214 return 0; 2215 } 2216 2217 /* 2218 * APIC register must be aligned on 128-bits boundary. 2219 * 32/64/128 bits registers must be accessed thru 32 bits. 2220 * Refer SDM 8.4.1 2221 */ 2222 if (len != 4 || (offset & 0xf)) 2223 return 0; 2224 2225 val = *(u32*)data; 2226 2227 kvm_lapic_reg_write(apic, offset & 0xff0, val); 2228 2229 return 0; 2230 } 2231 2232 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 2233 { 2234 kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 2235 } 2236 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 2237 2238 /* emulate APIC access in a trap manner */ 2239 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 2240 { 2241 struct kvm_lapic *apic = vcpu->arch.apic; 2242 u64 val; 2243 2244 if (apic_x2apic_mode(apic)) { 2245 /* 2246 * When guest APIC is in x2APIC mode and IPI virtualization 2247 * is enabled, accessing APIC_ICR may cause trap-like VM-exit 2248 * on Intel hardware. Other offsets are not possible. 2249 */ 2250 if (WARN_ON_ONCE(offset != APIC_ICR)) 2251 return; 2252 2253 kvm_lapic_msr_read(apic, offset, &val); 2254 kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32)); 2255 trace_kvm_apic_write(APIC_ICR, val); 2256 } else { 2257 val = kvm_lapic_get_reg(apic, offset); 2258 2259 /* TODO: optimize to just emulate side effect w/o one more write */ 2260 kvm_lapic_reg_write(apic, offset, (u32)val); 2261 } 2262 } 2263 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 2264 2265 void kvm_free_lapic(struct kvm_vcpu *vcpu) 2266 { 2267 struct kvm_lapic *apic = vcpu->arch.apic; 2268 2269 if (!vcpu->arch.apic) 2270 return; 2271 2272 hrtimer_cancel(&apic->lapic_timer.timer); 2273 2274 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 2275 static_branch_slow_dec_deferred(&apic_hw_disabled); 2276 2277 if (!apic->sw_enabled) 2278 static_branch_slow_dec_deferred(&apic_sw_disabled); 2279 2280 if (apic->regs) 2281 free_page((unsigned long)apic->regs); 2282 2283 kfree(apic); 2284 } 2285 2286 /* 2287 *---------------------------------------------------------------------- 2288 * LAPIC interface 2289 *---------------------------------------------------------------------- 2290 */ 2291 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 2292 { 2293 struct kvm_lapic *apic = vcpu->arch.apic; 2294 2295 if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic)) 2296 return 0; 2297 2298 return apic->lapic_timer.tscdeadline; 2299 } 2300 2301 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 2302 { 2303 struct kvm_lapic *apic = vcpu->arch.apic; 2304 2305 if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic)) 2306 return; 2307 2308 hrtimer_cancel(&apic->lapic_timer.timer); 2309 apic->lapic_timer.tscdeadline = data; 2310 start_apic_timer(apic); 2311 } 2312 2313 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 2314 { 2315 apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4); 2316 } 2317 2318 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 2319 { 2320 u64 tpr; 2321 2322 tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 2323 2324 return (tpr & 0xf0) >> 4; 2325 } 2326 2327 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 2328 { 2329 u64 old_value = vcpu->arch.apic_base; 2330 struct kvm_lapic *apic = vcpu->arch.apic; 2331 2332 vcpu->arch.apic_base = value; 2333 2334 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) 2335 kvm_update_cpuid_runtime(vcpu); 2336 2337 if (!apic) 2338 return; 2339 2340 /* update jump label if enable bit changes */ 2341 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { 2342 if (value & MSR_IA32_APICBASE_ENABLE) { 2343 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2344 static_branch_slow_dec_deferred(&apic_hw_disabled); 2345 /* Check if there are APF page ready requests pending */ 2346 kvm_make_request(KVM_REQ_APF_READY, vcpu); 2347 } else { 2348 static_branch_inc(&apic_hw_disabled.key); 2349 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 2350 } 2351 } 2352 2353 if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) 2354 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); 2355 2356 if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) 2357 static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu); 2358 2359 apic->base_address = apic->vcpu->arch.apic_base & 2360 MSR_IA32_APICBASE_BASE; 2361 2362 if ((value & MSR_IA32_APICBASE_ENABLE) && 2363 apic->base_address != APIC_DEFAULT_PHYS_BASE) { 2364 kvm_set_apicv_inhibit(apic->vcpu->kvm, 2365 APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); 2366 } 2367 } 2368 2369 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) 2370 { 2371 struct kvm_lapic *apic = vcpu->arch.apic; 2372 2373 if (apic->apicv_active) { 2374 /* irr_pending is always true when apicv is activated. */ 2375 apic->irr_pending = true; 2376 apic->isr_count = 1; 2377 } else { 2378 /* 2379 * Don't clear irr_pending, searching the IRR can race with 2380 * updates from the CPU as APICv is still active from hardware's 2381 * perspective. The flag will be cleared as appropriate when 2382 * KVM injects the interrupt. 2383 */ 2384 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 2385 } 2386 } 2387 EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); 2388 2389 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) 2390 { 2391 struct kvm_lapic *apic = vcpu->arch.apic; 2392 u64 msr_val; 2393 int i; 2394 2395 if (!init_event) { 2396 msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; 2397 if (kvm_vcpu_is_reset_bsp(vcpu)) 2398 msr_val |= MSR_IA32_APICBASE_BSP; 2399 kvm_lapic_set_base(vcpu, msr_val); 2400 } 2401 2402 if (!apic) 2403 return; 2404 2405 /* Stop the timer in case it's a reset to an active apic */ 2406 hrtimer_cancel(&apic->lapic_timer.timer); 2407 2408 /* The xAPIC ID is set at RESET even if the APIC was already enabled. */ 2409 if (!init_event) 2410 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2411 kvm_apic_set_version(apic->vcpu); 2412 2413 for (i = 0; i < KVM_APIC_MAX_NR_LVT_ENTRIES; i++) 2414 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 2415 apic_update_lvtt(apic); 2416 if (kvm_vcpu_is_reset_bsp(vcpu) && 2417 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) 2418 kvm_lapic_set_reg(apic, APIC_LVT0, 2419 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 2420 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2421 2422 kvm_apic_set_dfr(apic, 0xffffffffU); 2423 apic_set_spiv(apic, 0xff); 2424 kvm_lapic_set_reg(apic, APIC_TASKPRI, 0); 2425 if (!apic_x2apic_mode(apic)) 2426 kvm_apic_set_ldr(apic, 0); 2427 kvm_lapic_set_reg(apic, APIC_ESR, 0); 2428 if (!apic_x2apic_mode(apic)) { 2429 kvm_lapic_set_reg(apic, APIC_ICR, 0); 2430 kvm_lapic_set_reg(apic, APIC_ICR2, 0); 2431 } else { 2432 kvm_lapic_set_reg64(apic, APIC_ICR, 0); 2433 } 2434 kvm_lapic_set_reg(apic, APIC_TDCR, 0); 2435 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 2436 for (i = 0; i < 8; i++) { 2437 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 2438 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 2439 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 2440 } 2441 kvm_apic_update_apicv(vcpu); 2442 apic->highest_isr_cache = -1; 2443 update_divide_count(apic); 2444 atomic_set(&apic->lapic_timer.pending, 0); 2445 2446 vcpu->arch.pv_eoi.msr_val = 0; 2447 apic_update_ppr(apic); 2448 if (apic->apicv_active) { 2449 static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu); 2450 static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1); 2451 static_call_cond(kvm_x86_hwapic_isr_update)(-1); 2452 } 2453 2454 vcpu->arch.apic_arb_prio = 0; 2455 vcpu->arch.apic_attention = 0; 2456 2457 kvm_recalculate_apic_map(vcpu->kvm); 2458 } 2459 2460 /* 2461 *---------------------------------------------------------------------- 2462 * timer interface 2463 *---------------------------------------------------------------------- 2464 */ 2465 2466 static bool lapic_is_periodic(struct kvm_lapic *apic) 2467 { 2468 return apic_lvtt_period(apic); 2469 } 2470 2471 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 2472 { 2473 struct kvm_lapic *apic = vcpu->arch.apic; 2474 2475 if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT)) 2476 return atomic_read(&apic->lapic_timer.pending); 2477 2478 return 0; 2479 } 2480 2481 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 2482 { 2483 u32 reg = kvm_lapic_get_reg(apic, lvt_type); 2484 int vector, mode, trig_mode; 2485 2486 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 2487 vector = reg & APIC_VECTOR_MASK; 2488 mode = reg & APIC_MODE_MASK; 2489 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 2490 return __apic_accept_irq(apic, mode, vector, 1, trig_mode, 2491 NULL); 2492 } 2493 return 0; 2494 } 2495 2496 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 2497 { 2498 struct kvm_lapic *apic = vcpu->arch.apic; 2499 2500 if (apic) 2501 kvm_apic_local_deliver(apic, APIC_LVT0); 2502 } 2503 2504 static const struct kvm_io_device_ops apic_mmio_ops = { 2505 .read = apic_mmio_read, 2506 .write = apic_mmio_write, 2507 }; 2508 2509 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 2510 { 2511 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 2512 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 2513 2514 apic_timer_expired(apic, true); 2515 2516 if (lapic_is_periodic(apic)) { 2517 advance_periodic_target_expiration(apic); 2518 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 2519 return HRTIMER_RESTART; 2520 } else 2521 return HRTIMER_NORESTART; 2522 } 2523 2524 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) 2525 { 2526 struct kvm_lapic *apic; 2527 2528 ASSERT(vcpu != NULL); 2529 2530 apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); 2531 if (!apic) 2532 goto nomem; 2533 2534 vcpu->arch.apic = apic; 2535 2536 apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); 2537 if (!apic->regs) { 2538 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 2539 vcpu->vcpu_id); 2540 goto nomem_free_apic; 2541 } 2542 apic->vcpu = vcpu; 2543 2544 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 2545 HRTIMER_MODE_ABS_HARD); 2546 apic->lapic_timer.timer.function = apic_timer_fn; 2547 if (timer_advance_ns == -1) { 2548 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; 2549 lapic_timer_advance_dynamic = true; 2550 } else { 2551 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 2552 lapic_timer_advance_dynamic = false; 2553 } 2554 2555 /* 2556 * Stuff the APIC ENABLE bit in lieu of temporarily incrementing 2557 * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset(). 2558 */ 2559 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 2560 static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 2561 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 2562 2563 return 0; 2564 nomem_free_apic: 2565 kfree(apic); 2566 vcpu->arch.apic = NULL; 2567 nomem: 2568 return -ENOMEM; 2569 } 2570 2571 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 2572 { 2573 struct kvm_lapic *apic = vcpu->arch.apic; 2574 u32 ppr; 2575 2576 if (!kvm_apic_present(vcpu)) 2577 return -1; 2578 2579 __apic_update_ppr(apic, &ppr); 2580 return apic_has_interrupt_for_ppr(apic, ppr); 2581 } 2582 EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt); 2583 2584 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 2585 { 2586 u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0); 2587 2588 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 2589 return 1; 2590 if ((lvt0 & APIC_LVT_MASKED) == 0 && 2591 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 2592 return 1; 2593 return 0; 2594 } 2595 2596 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 2597 { 2598 struct kvm_lapic *apic = vcpu->arch.apic; 2599 2600 if (atomic_read(&apic->lapic_timer.pending) > 0) { 2601 kvm_apic_inject_pending_timer_irqs(apic); 2602 atomic_set(&apic->lapic_timer.pending, 0); 2603 } 2604 } 2605 2606 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 2607 { 2608 int vector = kvm_apic_has_interrupt(vcpu); 2609 struct kvm_lapic *apic = vcpu->arch.apic; 2610 u32 ppr; 2611 2612 if (vector == -1) 2613 return -1; 2614 2615 /* 2616 * We get here even with APIC virtualization enabled, if doing 2617 * nested virtualization and L1 runs with the "acknowledge interrupt 2618 * on exit" mode. Then we cannot inject the interrupt via RVI, 2619 * because the process would deliver it through the IDT. 2620 */ 2621 2622 apic_clear_irr(vector, apic); 2623 if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) { 2624 /* 2625 * For auto-EOI interrupts, there might be another pending 2626 * interrupt above PPR, so check whether to raise another 2627 * KVM_REQ_EVENT. 2628 */ 2629 apic_update_ppr(apic); 2630 } else { 2631 /* 2632 * For normal interrupts, PPR has been raised and there cannot 2633 * be a higher-priority pending interrupt---except if there was 2634 * a concurrent interrupt injection, but that would have 2635 * triggered KVM_REQ_EVENT already. 2636 */ 2637 apic_set_isr(vector, apic); 2638 __apic_update_ppr(apic, &ppr); 2639 } 2640 2641 return vector; 2642 } 2643 2644 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, 2645 struct kvm_lapic_state *s, bool set) 2646 { 2647 if (apic_x2apic_mode(vcpu->arch.apic)) { 2648 u32 *id = (u32 *)(s->regs + APIC_ID); 2649 u32 *ldr = (u32 *)(s->regs + APIC_LDR); 2650 u64 icr; 2651 2652 if (vcpu->kvm->arch.x2apic_format) { 2653 if (*id != vcpu->vcpu_id) 2654 return -EINVAL; 2655 } else { 2656 if (set) 2657 *id >>= 24; 2658 else 2659 *id <<= 24; 2660 } 2661 2662 /* 2663 * In x2APIC mode, the LDR is fixed and based on the id. And 2664 * ICR is internally a single 64-bit register, but needs to be 2665 * split to ICR+ICR2 in userspace for backwards compatibility. 2666 */ 2667 if (set) { 2668 *ldr = kvm_apic_calc_x2apic_ldr(*id); 2669 2670 icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | 2671 (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; 2672 __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr); 2673 } else { 2674 icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); 2675 __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); 2676 } 2677 } else { 2678 kvm_lapic_xapic_id_updated(vcpu->arch.apic); 2679 } 2680 2681 return 0; 2682 } 2683 2684 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 2685 { 2686 memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); 2687 2688 /* 2689 * Get calculated timer current count for remaining timer period (if 2690 * any) and store it in the returned register set. 2691 */ 2692 __kvm_lapic_set_reg(s->regs, APIC_TMCCT, 2693 __apic_read(vcpu->arch.apic, APIC_TMCCT)); 2694 2695 return kvm_apic_state_fixup(vcpu, s, false); 2696 } 2697 2698 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 2699 { 2700 struct kvm_lapic *apic = vcpu->arch.apic; 2701 int r; 2702 2703 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 2704 /* set SPIV separately to get count of SW disabled APICs right */ 2705 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 2706 2707 r = kvm_apic_state_fixup(vcpu, s, true); 2708 if (r) { 2709 kvm_recalculate_apic_map(vcpu->kvm); 2710 return r; 2711 } 2712 memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); 2713 2714 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 2715 kvm_recalculate_apic_map(vcpu->kvm); 2716 kvm_apic_set_version(vcpu); 2717 2718 apic_update_ppr(apic); 2719 cancel_apic_timer(apic); 2720 apic->lapic_timer.expired_tscdeadline = 0; 2721 apic_update_lvtt(apic); 2722 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2723 update_divide_count(apic); 2724 __start_apic_timer(apic, APIC_TMCCT); 2725 kvm_lapic_set_reg(apic, APIC_TMCCT, 0); 2726 kvm_apic_update_apicv(vcpu); 2727 apic->highest_isr_cache = -1; 2728 if (apic->apicv_active) { 2729 static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu); 2730 static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic)); 2731 static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic)); 2732 } 2733 kvm_make_request(KVM_REQ_EVENT, vcpu); 2734 if (ioapic_in_kernel(vcpu->kvm)) 2735 kvm_rtc_eoi_tracking_restore_one(vcpu); 2736 2737 vcpu->arch.apic_arb_prio = 0; 2738 2739 return 0; 2740 } 2741 2742 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 2743 { 2744 struct hrtimer *timer; 2745 2746 if (!lapic_in_kernel(vcpu) || 2747 kvm_can_post_timer_interrupt(vcpu)) 2748 return; 2749 2750 timer = &vcpu->arch.apic->lapic_timer.timer; 2751 if (hrtimer_cancel(timer)) 2752 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD); 2753 } 2754 2755 /* 2756 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 2757 * 2758 * Detect whether guest triggered PV EOI since the 2759 * last entry. If yes, set EOI on guests's behalf. 2760 * Clear PV EOI in guest memory in any case. 2761 */ 2762 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 2763 struct kvm_lapic *apic) 2764 { 2765 int vector; 2766 /* 2767 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 2768 * and KVM_PV_EOI_ENABLED in guest memory as follows: 2769 * 2770 * KVM_APIC_PV_EOI_PENDING is unset: 2771 * -> host disabled PV EOI. 2772 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 2773 * -> host enabled PV EOI, guest did not execute EOI yet. 2774 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 2775 * -> host enabled PV EOI, guest executed EOI. 2776 */ 2777 BUG_ON(!pv_eoi_enabled(vcpu)); 2778 2779 if (pv_eoi_test_and_clr_pending(vcpu)) 2780 return; 2781 vector = apic_set_eoi(apic); 2782 trace_kvm_pv_eoi(apic, vector); 2783 } 2784 2785 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 2786 { 2787 u32 data; 2788 2789 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 2790 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 2791 2792 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2793 return; 2794 2795 if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2796 sizeof(u32))) 2797 return; 2798 2799 apic_set_tpr(vcpu->arch.apic, data & 0xff); 2800 } 2801 2802 /* 2803 * apic_sync_pv_eoi_to_guest - called before vmentry 2804 * 2805 * Detect whether it's safe to enable PV EOI and 2806 * if yes do so. 2807 */ 2808 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 2809 struct kvm_lapic *apic) 2810 { 2811 if (!pv_eoi_enabled(vcpu) || 2812 /* IRR set or many bits in ISR: could be nested. */ 2813 apic->irr_pending || 2814 /* Cache not set: could be safe but we don't bother. */ 2815 apic->highest_isr_cache == -1 || 2816 /* Need EOI to update ioapic. */ 2817 kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) { 2818 /* 2819 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 2820 * so we need not do anything here. 2821 */ 2822 return; 2823 } 2824 2825 pv_eoi_set_pending(apic->vcpu); 2826 } 2827 2828 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 2829 { 2830 u32 data, tpr; 2831 int max_irr, max_isr; 2832 struct kvm_lapic *apic = vcpu->arch.apic; 2833 2834 apic_sync_pv_eoi_to_guest(vcpu, apic); 2835 2836 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2837 return; 2838 2839 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff; 2840 max_irr = apic_find_highest_irr(apic); 2841 if (max_irr < 0) 2842 max_irr = 0; 2843 max_isr = apic_find_highest_isr(apic); 2844 if (max_isr < 0) 2845 max_isr = 0; 2846 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 2847 2848 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2849 sizeof(u32)); 2850 } 2851 2852 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 2853 { 2854 if (vapic_addr) { 2855 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, 2856 &vcpu->arch.apic->vapic_cache, 2857 vapic_addr, sizeof(u32))) 2858 return -EINVAL; 2859 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 2860 } else { 2861 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 2862 } 2863 2864 vcpu->arch.apic->vapic_addr = vapic_addr; 2865 return 0; 2866 } 2867 2868 int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data) 2869 { 2870 data &= ~APIC_ICR_BUSY; 2871 2872 kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32)); 2873 kvm_lapic_set_reg64(apic, APIC_ICR, data); 2874 trace_kvm_apic_write(APIC_ICR, data); 2875 return 0; 2876 } 2877 2878 static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data) 2879 { 2880 u32 low; 2881 2882 if (reg == APIC_ICR) { 2883 *data = kvm_lapic_get_reg64(apic, APIC_ICR); 2884 return 0; 2885 } 2886 2887 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 2888 return 1; 2889 2890 *data = low; 2891 2892 return 0; 2893 } 2894 2895 static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data) 2896 { 2897 /* 2898 * ICR is a 64-bit register in x2APIC mode (and Hyper'v PV vAPIC) and 2899 * can be written as such, all other registers remain accessible only 2900 * through 32-bit reads/writes. 2901 */ 2902 if (reg == APIC_ICR) 2903 return kvm_x2apic_icr_write(apic, data); 2904 2905 return kvm_lapic_reg_write(apic, reg, (u32)data); 2906 } 2907 2908 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 2909 { 2910 struct kvm_lapic *apic = vcpu->arch.apic; 2911 u32 reg = (msr - APIC_BASE_MSR) << 4; 2912 2913 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2914 return 1; 2915 2916 return kvm_lapic_msr_write(apic, reg, data); 2917 } 2918 2919 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 2920 { 2921 struct kvm_lapic *apic = vcpu->arch.apic; 2922 u32 reg = (msr - APIC_BASE_MSR) << 4; 2923 2924 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2925 return 1; 2926 2927 if (reg == APIC_DFR) 2928 return 1; 2929 2930 return kvm_lapic_msr_read(apic, reg, data); 2931 } 2932 2933 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 2934 { 2935 if (!lapic_in_kernel(vcpu)) 2936 return 1; 2937 2938 return kvm_lapic_msr_write(vcpu->arch.apic, reg, data); 2939 } 2940 2941 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 2942 { 2943 if (!lapic_in_kernel(vcpu)) 2944 return 1; 2945 2946 return kvm_lapic_msr_read(vcpu->arch.apic, reg, data); 2947 } 2948 2949 int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) 2950 { 2951 u64 addr = data & ~KVM_MSR_ENABLED; 2952 struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data; 2953 unsigned long new_len; 2954 int ret; 2955 2956 if (!IS_ALIGNED(addr, 4)) 2957 return 1; 2958 2959 if (data & KVM_MSR_ENABLED) { 2960 if (addr == ghc->gpa && len <= ghc->len) 2961 new_len = ghc->len; 2962 else 2963 new_len = len; 2964 2965 ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); 2966 if (ret) 2967 return ret; 2968 } 2969 2970 vcpu->arch.pv_eoi.msr_val = data; 2971 2972 return 0; 2973 } 2974 2975 int kvm_apic_accept_events(struct kvm_vcpu *vcpu) 2976 { 2977 struct kvm_lapic *apic = vcpu->arch.apic; 2978 u8 sipi_vector; 2979 int r; 2980 unsigned long pe; 2981 2982 if (!lapic_in_kernel(vcpu)) 2983 return 0; 2984 2985 /* 2986 * Read pending events before calling the check_events 2987 * callback. 2988 */ 2989 pe = smp_load_acquire(&apic->pending_events); 2990 if (!pe) 2991 return 0; 2992 2993 if (is_guest_mode(vcpu)) { 2994 r = kvm_check_nested_events(vcpu); 2995 if (r < 0) 2996 return r == -EBUSY ? 0 : r; 2997 /* 2998 * If an event has happened and caused a vmexit, 2999 * we know INITs are latched and therefore 3000 * we will not incorrectly deliver an APIC 3001 * event instead of a vmexit. 3002 */ 3003 } 3004 3005 /* 3006 * INITs are latched while CPU is in specific states 3007 * (SMM, VMX root mode, SVM with GIF=0). 3008 * Because a CPU cannot be in these states immediately 3009 * after it has processed an INIT signal (and thus in 3010 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs 3011 * and leave the INIT pending. 3012 */ 3013 if (kvm_vcpu_latch_init(vcpu)) { 3014 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); 3015 if (test_bit(KVM_APIC_SIPI, &pe)) 3016 clear_bit(KVM_APIC_SIPI, &apic->pending_events); 3017 return 0; 3018 } 3019 3020 if (test_bit(KVM_APIC_INIT, &pe)) { 3021 clear_bit(KVM_APIC_INIT, &apic->pending_events); 3022 kvm_vcpu_reset(vcpu, true); 3023 if (kvm_vcpu_is_bsp(apic->vcpu)) 3024 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 3025 else 3026 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 3027 } 3028 if (test_bit(KVM_APIC_SIPI, &pe)) { 3029 clear_bit(KVM_APIC_SIPI, &apic->pending_events); 3030 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 3031 /* evaluate pending_events before reading the vector */ 3032 smp_rmb(); 3033 sipi_vector = apic->sipi_vector; 3034 static_call(kvm_x86_vcpu_deliver_sipi_vector)(vcpu, sipi_vector); 3035 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 3036 } 3037 } 3038 return 0; 3039 } 3040 3041 void kvm_lapic_exit(void) 3042 { 3043 static_key_deferred_flush(&apic_hw_disabled); 3044 WARN_ON(static_branch_unlikely(&apic_hw_disabled.key)); 3045 static_key_deferred_flush(&apic_sw_disabled); 3046 WARN_ON(static_branch_unlikely(&apic_sw_disabled.key)); 3047 } 3048