1 // SPDX-License-Identifier: GPL-2.0-only 2 3 /* 4 * Local APIC virtualization 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2007 Novell 8 * Copyright (C) 2007 Intel 9 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 10 * 11 * Authors: 12 * Dor Laor <dor.laor@qumranet.com> 13 * Gregory Haskins <ghaskins@novell.com> 14 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 15 * 16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 17 */ 18 19 #include <linux/kvm_host.h> 20 #include <linux/kvm.h> 21 #include <linux/mm.h> 22 #include <linux/highmem.h> 23 #include <linux/smp.h> 24 #include <linux/hrtimer.h> 25 #include <linux/io.h> 26 #include <linux/export.h> 27 #include <linux/math64.h> 28 #include <linux/slab.h> 29 #include <asm/processor.h> 30 #include <asm/msr.h> 31 #include <asm/page.h> 32 #include <asm/current.h> 33 #include <asm/apicdef.h> 34 #include <asm/delay.h> 35 #include <linux/atomic.h> 36 #include <linux/jump_label.h> 37 #include "kvm_cache_regs.h" 38 #include "irq.h" 39 #include "ioapic.h" 40 #include "trace.h" 41 #include "x86.h" 42 #include "cpuid.h" 43 #include "hyperv.h" 44 45 #ifndef CONFIG_X86_64 46 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 47 #else 48 #define mod_64(x, y) ((x) % (y)) 49 #endif 50 51 #define PRId64 "d" 52 #define PRIx64 "llx" 53 #define PRIu64 "u" 54 #define PRIo64 "o" 55 56 /* 14 is the version for Xeon and Pentium 8.4.8*/ 57 #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) 58 #define LAPIC_MMIO_LENGTH (1 << 12) 59 /* followed define is not in apicdef.h */ 60 #define MAX_APIC_VECTOR 256 61 #define APIC_VECTORS_PER_REG 32 62 63 static bool lapic_timer_advance_dynamic __read_mostly; 64 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */ 65 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */ 66 #define LAPIC_TIMER_ADVANCE_NS_INIT 1000 67 #define LAPIC_TIMER_ADVANCE_NS_MAX 5000 68 /* step-by-step approximation to mitigate fluctuation */ 69 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 70 71 static inline int apic_test_vector(int vec, void *bitmap) 72 { 73 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 74 } 75 76 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) 77 { 78 struct kvm_lapic *apic = vcpu->arch.apic; 79 80 return apic_test_vector(vector, apic->regs + APIC_ISR) || 81 apic_test_vector(vector, apic->regs + APIC_IRR); 82 } 83 84 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 85 { 86 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 87 } 88 89 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 90 { 91 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 92 } 93 94 struct static_key_deferred apic_hw_disabled __read_mostly; 95 struct static_key_deferred apic_sw_disabled __read_mostly; 96 97 static inline int apic_enabled(struct kvm_lapic *apic) 98 { 99 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 100 } 101 102 #define LVT_MASK \ 103 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 104 105 #define LINT_MASK \ 106 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 107 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 108 109 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) 110 { 111 return apic->vcpu->vcpu_id; 112 } 113 114 static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) 115 { 116 return pi_inject_timer && kvm_vcpu_apicv_active(vcpu); 117 } 118 119 bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) 120 { 121 return kvm_x86_ops.set_hv_timer 122 && !(kvm_mwait_in_guest(vcpu->kvm) || 123 kvm_can_post_timer_interrupt(vcpu)); 124 } 125 EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer); 126 127 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) 128 { 129 return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE; 130 } 131 132 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, 133 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { 134 switch (map->mode) { 135 case KVM_APIC_MODE_X2APIC: { 136 u32 offset = (dest_id >> 16) * 16; 137 u32 max_apic_id = map->max_apic_id; 138 139 if (offset <= max_apic_id) { 140 u8 cluster_size = min(max_apic_id - offset + 1, 16U); 141 142 offset = array_index_nospec(offset, map->max_apic_id + 1); 143 *cluster = &map->phys_map[offset]; 144 *mask = dest_id & (0xffff >> (16 - cluster_size)); 145 } else { 146 *mask = 0; 147 } 148 149 return true; 150 } 151 case KVM_APIC_MODE_XAPIC_FLAT: 152 *cluster = map->xapic_flat_map; 153 *mask = dest_id & 0xff; 154 return true; 155 case KVM_APIC_MODE_XAPIC_CLUSTER: 156 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf]; 157 *mask = dest_id & 0xf; 158 return true; 159 default: 160 /* Not optimized. */ 161 return false; 162 } 163 } 164 165 static void kvm_apic_map_free(struct rcu_head *rcu) 166 { 167 struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu); 168 169 kvfree(map); 170 } 171 172 /* 173 * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock. 174 * 175 * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with 176 * apic_map_lock_held. 177 */ 178 enum { 179 CLEAN, 180 UPDATE_IN_PROGRESS, 181 DIRTY 182 }; 183 184 void kvm_recalculate_apic_map(struct kvm *kvm) 185 { 186 struct kvm_apic_map *new, *old = NULL; 187 struct kvm_vcpu *vcpu; 188 int i; 189 u32 max_id = 255; /* enough space for any xAPIC ID */ 190 191 /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ 192 if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) 193 return; 194 195 mutex_lock(&kvm->arch.apic_map_lock); 196 /* 197 * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map 198 * (if clean) or the APIC registers (if dirty). 199 */ 200 if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty, 201 DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { 202 /* Someone else has updated the map. */ 203 mutex_unlock(&kvm->arch.apic_map_lock); 204 return; 205 } 206 207 kvm_for_each_vcpu(i, vcpu, kvm) 208 if (kvm_apic_present(vcpu)) 209 max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); 210 211 new = kvzalloc(sizeof(struct kvm_apic_map) + 212 sizeof(struct kvm_lapic *) * ((u64)max_id + 1), 213 GFP_KERNEL_ACCOUNT); 214 215 if (!new) 216 goto out; 217 218 new->max_apic_id = max_id; 219 220 kvm_for_each_vcpu(i, vcpu, kvm) { 221 struct kvm_lapic *apic = vcpu->arch.apic; 222 struct kvm_lapic **cluster; 223 u16 mask; 224 u32 ldr; 225 u8 xapic_id; 226 u32 x2apic_id; 227 228 if (!kvm_apic_present(vcpu)) 229 continue; 230 231 xapic_id = kvm_xapic_id(apic); 232 x2apic_id = kvm_x2apic_id(apic); 233 234 /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */ 235 if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && 236 x2apic_id <= new->max_apic_id) 237 new->phys_map[x2apic_id] = apic; 238 /* 239 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around, 240 * prevent them from masking VCPUs with APIC ID <= 0xff. 241 */ 242 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) 243 new->phys_map[xapic_id] = apic; 244 245 if (!kvm_apic_sw_enabled(apic)) 246 continue; 247 248 ldr = kvm_lapic_get_reg(apic, APIC_LDR); 249 250 if (apic_x2apic_mode(apic)) { 251 new->mode |= KVM_APIC_MODE_X2APIC; 252 } else if (ldr) { 253 ldr = GET_APIC_LOGICAL_ID(ldr); 254 if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) 255 new->mode |= KVM_APIC_MODE_XAPIC_FLAT; 256 else 257 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; 258 } 259 260 if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask)) 261 continue; 262 263 if (mask) 264 cluster[ffs(mask) - 1] = apic; 265 } 266 out: 267 old = rcu_dereference_protected(kvm->arch.apic_map, 268 lockdep_is_held(&kvm->arch.apic_map_lock)); 269 rcu_assign_pointer(kvm->arch.apic_map, new); 270 /* 271 * Write kvm->arch.apic_map before clearing apic->apic_map_dirty. 272 * If another update has come in, leave it DIRTY. 273 */ 274 atomic_cmpxchg_release(&kvm->arch.apic_map_dirty, 275 UPDATE_IN_PROGRESS, CLEAN); 276 mutex_unlock(&kvm->arch.apic_map_lock); 277 278 if (old) 279 call_rcu(&old->rcu, kvm_apic_map_free); 280 281 kvm_make_scan_ioapic_request(kvm); 282 } 283 284 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 285 { 286 bool enabled = val & APIC_SPIV_APIC_ENABLED; 287 288 kvm_lapic_set_reg(apic, APIC_SPIV, val); 289 290 if (enabled != apic->sw_enabled) { 291 apic->sw_enabled = enabled; 292 if (enabled) 293 static_key_slow_dec_deferred(&apic_sw_disabled); 294 else 295 static_key_slow_inc(&apic_sw_disabled.key); 296 297 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 298 } 299 } 300 301 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) 302 { 303 kvm_lapic_set_reg(apic, APIC_ID, id << 24); 304 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 305 } 306 307 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 308 { 309 kvm_lapic_set_reg(apic, APIC_LDR, id); 310 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 311 } 312 313 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) 314 { 315 return ((id >> 4) << 16) | (1 << (id & 0xf)); 316 } 317 318 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) 319 { 320 u32 ldr = kvm_apic_calc_x2apic_ldr(id); 321 322 WARN_ON_ONCE(id != apic->vcpu->vcpu_id); 323 324 kvm_lapic_set_reg(apic, APIC_ID, id); 325 kvm_lapic_set_reg(apic, APIC_LDR, ldr); 326 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 327 } 328 329 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 330 { 331 return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 332 } 333 334 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 335 { 336 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT; 337 } 338 339 static inline int apic_lvtt_period(struct kvm_lapic *apic) 340 { 341 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC; 342 } 343 344 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 345 { 346 return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE; 347 } 348 349 static inline int apic_lvt_nmi_mode(u32 lvt_val) 350 { 351 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 352 } 353 354 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 355 { 356 struct kvm_lapic *apic = vcpu->arch.apic; 357 struct kvm_cpuid_entry2 *feat; 358 u32 v = APIC_VERSION; 359 360 if (!lapic_in_kernel(vcpu)) 361 return; 362 363 /* 364 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) 365 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with 366 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC 367 * version first and level-triggered interrupts never get EOIed in 368 * IOAPIC. 369 */ 370 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 371 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) && 372 !ioapic_in_kernel(vcpu->kvm)) 373 v |= APIC_LVR_DIRECTED_EOI; 374 kvm_lapic_set_reg(apic, APIC_LVR, v); 375 } 376 377 static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = { 378 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 379 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 380 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 381 LINT_MASK, LINT_MASK, /* LVT0-1 */ 382 LVT_MASK /* LVTERR */ 383 }; 384 385 static int find_highest_vector(void *bitmap) 386 { 387 int vec; 388 u32 *reg; 389 390 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 391 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 392 reg = bitmap + REG_POS(vec); 393 if (*reg) 394 return __fls(*reg) + vec; 395 } 396 397 return -1; 398 } 399 400 static u8 count_vectors(void *bitmap) 401 { 402 int vec; 403 u32 *reg; 404 u8 count = 0; 405 406 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 407 reg = bitmap + REG_POS(vec); 408 count += hweight32(*reg); 409 } 410 411 return count; 412 } 413 414 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr) 415 { 416 u32 i, vec; 417 u32 pir_val, irr_val, prev_irr_val; 418 int max_updated_irr; 419 420 max_updated_irr = -1; 421 *max_irr = -1; 422 423 for (i = vec = 0; i <= 7; i++, vec += 32) { 424 pir_val = READ_ONCE(pir[i]); 425 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10)); 426 if (pir_val) { 427 prev_irr_val = irr_val; 428 irr_val |= xchg(&pir[i], 0); 429 *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val; 430 if (prev_irr_val != irr_val) { 431 max_updated_irr = 432 __fls(irr_val ^ prev_irr_val) + vec; 433 } 434 } 435 if (irr_val) 436 *max_irr = __fls(irr_val) + vec; 437 } 438 439 return ((max_updated_irr != -1) && 440 (max_updated_irr == *max_irr)); 441 } 442 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); 443 444 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr) 445 { 446 struct kvm_lapic *apic = vcpu->arch.apic; 447 448 return __kvm_apic_update_irr(pir, apic->regs, max_irr); 449 } 450 EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 451 452 static inline int apic_search_irr(struct kvm_lapic *apic) 453 { 454 return find_highest_vector(apic->regs + APIC_IRR); 455 } 456 457 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 458 { 459 int result; 460 461 /* 462 * Note that irr_pending is just a hint. It will be always 463 * true with virtual interrupt delivery enabled. 464 */ 465 if (!apic->irr_pending) 466 return -1; 467 468 result = apic_search_irr(apic); 469 ASSERT(result == -1 || result >= 16); 470 471 return result; 472 } 473 474 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 475 { 476 struct kvm_vcpu *vcpu; 477 478 vcpu = apic->vcpu; 479 480 if (unlikely(vcpu->arch.apicv_active)) { 481 /* need to update RVI */ 482 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 483 kvm_x86_ops.hwapic_irr_update(vcpu, 484 apic_find_highest_irr(apic)); 485 } else { 486 apic->irr_pending = false; 487 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); 488 if (apic_search_irr(apic) != -1) 489 apic->irr_pending = true; 490 } 491 } 492 493 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 494 { 495 struct kvm_vcpu *vcpu; 496 497 if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 498 return; 499 500 vcpu = apic->vcpu; 501 502 /* 503 * With APIC virtualization enabled, all caching is disabled 504 * because the processor can modify ISR under the hood. Instead 505 * just set SVI. 506 */ 507 if (unlikely(vcpu->arch.apicv_active)) 508 kvm_x86_ops.hwapic_isr_update(vcpu, vec); 509 else { 510 ++apic->isr_count; 511 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 512 /* 513 * ISR (in service register) bit is set when injecting an interrupt. 514 * The highest vector is injected. Thus the latest bit set matches 515 * the highest bit in ISR. 516 */ 517 apic->highest_isr_cache = vec; 518 } 519 } 520 521 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 522 { 523 int result; 524 525 /* 526 * Note that isr_count is always 1, and highest_isr_cache 527 * is always -1, with APIC virtualization enabled. 528 */ 529 if (!apic->isr_count) 530 return -1; 531 if (likely(apic->highest_isr_cache != -1)) 532 return apic->highest_isr_cache; 533 534 result = find_highest_vector(apic->regs + APIC_ISR); 535 ASSERT(result == -1 || result >= 16); 536 537 return result; 538 } 539 540 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 541 { 542 struct kvm_vcpu *vcpu; 543 if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 544 return; 545 546 vcpu = apic->vcpu; 547 548 /* 549 * We do get here for APIC virtualization enabled if the guest 550 * uses the Hyper-V APIC enlightenment. In this case we may need 551 * to trigger a new interrupt delivery by writing the SVI field; 552 * on the other hand isr_count and highest_isr_cache are unused 553 * and must be left alone. 554 */ 555 if (unlikely(vcpu->arch.apicv_active)) 556 kvm_x86_ops.hwapic_isr_update(vcpu, 557 apic_find_highest_isr(apic)); 558 else { 559 --apic->isr_count; 560 BUG_ON(apic->isr_count < 0); 561 apic->highest_isr_cache = -1; 562 } 563 } 564 565 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 566 { 567 /* This may race with setting of irr in __apic_accept_irq() and 568 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 569 * will cause vmexit immediately and the value will be recalculated 570 * on the next vmentry. 571 */ 572 return apic_find_highest_irr(vcpu->arch.apic); 573 } 574 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); 575 576 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 577 int vector, int level, int trig_mode, 578 struct dest_map *dest_map); 579 580 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 581 struct dest_map *dest_map) 582 { 583 struct kvm_lapic *apic = vcpu->arch.apic; 584 585 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 586 irq->level, irq->trig_mode, dest_map); 587 } 588 589 static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map, 590 struct kvm_lapic_irq *irq, u32 min) 591 { 592 int i, count = 0; 593 struct kvm_vcpu *vcpu; 594 595 if (min > map->max_apic_id) 596 return 0; 597 598 for_each_set_bit(i, ipi_bitmap, 599 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { 600 if (map->phys_map[min + i]) { 601 vcpu = map->phys_map[min + i]->vcpu; 602 count += kvm_apic_set_irq(vcpu, irq, NULL); 603 } 604 } 605 606 return count; 607 } 608 609 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, 610 unsigned long ipi_bitmap_high, u32 min, 611 unsigned long icr, int op_64_bit) 612 { 613 struct kvm_apic_map *map; 614 struct kvm_lapic_irq irq = {0}; 615 int cluster_size = op_64_bit ? 64 : 32; 616 int count; 617 618 if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK)) 619 return -KVM_EINVAL; 620 621 irq.vector = icr & APIC_VECTOR_MASK; 622 irq.delivery_mode = icr & APIC_MODE_MASK; 623 irq.level = (icr & APIC_INT_ASSERT) != 0; 624 irq.trig_mode = icr & APIC_INT_LEVELTRIG; 625 626 rcu_read_lock(); 627 map = rcu_dereference(kvm->arch.apic_map); 628 629 count = -EOPNOTSUPP; 630 if (likely(map)) { 631 count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min); 632 min += cluster_size; 633 count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min); 634 } 635 636 rcu_read_unlock(); 637 return count; 638 } 639 640 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 641 { 642 643 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 644 sizeof(val)); 645 } 646 647 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 648 { 649 650 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 651 sizeof(*val)); 652 } 653 654 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 655 { 656 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 657 } 658 659 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 660 { 661 u8 val; 662 if (pv_eoi_get_user(vcpu, &val) < 0) { 663 printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n", 664 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 665 return false; 666 } 667 return val & 0x1; 668 } 669 670 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 671 { 672 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 673 printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n", 674 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 675 return; 676 } 677 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 678 } 679 680 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 681 { 682 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 683 printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n", 684 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 685 return; 686 } 687 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 688 } 689 690 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) 691 { 692 int highest_irr; 693 if (apic->vcpu->arch.apicv_active) 694 highest_irr = kvm_x86_ops.sync_pir_to_irr(apic->vcpu); 695 else 696 highest_irr = apic_find_highest_irr(apic); 697 if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) 698 return -1; 699 return highest_irr; 700 } 701 702 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr) 703 { 704 u32 tpr, isrv, ppr, old_ppr; 705 int isr; 706 707 old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI); 708 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI); 709 isr = apic_find_highest_isr(apic); 710 isrv = (isr != -1) ? isr : 0; 711 712 if ((tpr & 0xf0) >= (isrv & 0xf0)) 713 ppr = tpr & 0xff; 714 else 715 ppr = isrv & 0xf0; 716 717 *new_ppr = ppr; 718 if (old_ppr != ppr) 719 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); 720 721 return ppr < old_ppr; 722 } 723 724 static void apic_update_ppr(struct kvm_lapic *apic) 725 { 726 u32 ppr; 727 728 if (__apic_update_ppr(apic, &ppr) && 729 apic_has_interrupt_for_ppr(apic, ppr) != -1) 730 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 731 } 732 733 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) 734 { 735 apic_update_ppr(vcpu->arch.apic); 736 } 737 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr); 738 739 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 740 { 741 kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr); 742 apic_update_ppr(apic); 743 } 744 745 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) 746 { 747 return mda == (apic_x2apic_mode(apic) ? 748 X2APIC_BROADCAST : APIC_BROADCAST); 749 } 750 751 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) 752 { 753 if (kvm_apic_broadcast(apic, mda)) 754 return true; 755 756 if (apic_x2apic_mode(apic)) 757 return mda == kvm_x2apic_id(apic); 758 759 /* 760 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if 761 * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and 762 * this allows unique addressing of VCPUs with APIC ID over 0xff. 763 * The 0xff condition is needed because writeable xAPIC ID. 764 */ 765 if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic)) 766 return true; 767 768 return mda == kvm_xapic_id(apic); 769 } 770 771 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) 772 { 773 u32 logical_id; 774 775 if (kvm_apic_broadcast(apic, mda)) 776 return true; 777 778 logical_id = kvm_lapic_get_reg(apic, APIC_LDR); 779 780 if (apic_x2apic_mode(apic)) 781 return ((logical_id >> 16) == (mda >> 16)) 782 && (logical_id & mda & 0xffff) != 0; 783 784 logical_id = GET_APIC_LOGICAL_ID(logical_id); 785 786 switch (kvm_lapic_get_reg(apic, APIC_DFR)) { 787 case APIC_DFR_FLAT: 788 return (logical_id & mda) != 0; 789 case APIC_DFR_CLUSTER: 790 return ((logical_id >> 4) == (mda >> 4)) 791 && (logical_id & mda & 0xf) != 0; 792 default: 793 return false; 794 } 795 } 796 797 /* The KVM local APIC implementation has two quirks: 798 * 799 * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs 800 * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID. 801 * KVM doesn't do that aliasing. 802 * 803 * - in-kernel IOAPIC messages have to be delivered directly to 804 * x2APIC, because the kernel does not support interrupt remapping. 805 * In order to support broadcast without interrupt remapping, x2APIC 806 * rewrites the destination of non-IPI messages from APIC_BROADCAST 807 * to X2APIC_BROADCAST. 808 * 809 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is 810 * important when userspace wants to use x2APIC-format MSIs, because 811 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7". 812 */ 813 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, 814 struct kvm_lapic *source, struct kvm_lapic *target) 815 { 816 bool ipi = source != NULL; 817 818 if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled && 819 !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target)) 820 return X2APIC_BROADCAST; 821 822 return dest_id; 823 } 824 825 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 826 int shorthand, unsigned int dest, int dest_mode) 827 { 828 struct kvm_lapic *target = vcpu->arch.apic; 829 u32 mda = kvm_apic_mda(vcpu, dest, source, target); 830 831 ASSERT(target); 832 switch (shorthand) { 833 case APIC_DEST_NOSHORT: 834 if (dest_mode == APIC_DEST_PHYSICAL) 835 return kvm_apic_match_physical_addr(target, mda); 836 else 837 return kvm_apic_match_logical_addr(target, mda); 838 case APIC_DEST_SELF: 839 return target == source; 840 case APIC_DEST_ALLINC: 841 return true; 842 case APIC_DEST_ALLBUT: 843 return target != source; 844 default: 845 return false; 846 } 847 } 848 EXPORT_SYMBOL_GPL(kvm_apic_match_dest); 849 850 int kvm_vector_to_index(u32 vector, u32 dest_vcpus, 851 const unsigned long *bitmap, u32 bitmap_size) 852 { 853 u32 mod; 854 int i, idx = -1; 855 856 mod = vector % dest_vcpus; 857 858 for (i = 0; i <= mod; i++) { 859 idx = find_next_bit(bitmap, bitmap_size, idx + 1); 860 BUG_ON(idx == bitmap_size); 861 } 862 863 return idx; 864 } 865 866 static void kvm_apic_disabled_lapic_found(struct kvm *kvm) 867 { 868 if (!kvm->arch.disabled_lapic_found) { 869 kvm->arch.disabled_lapic_found = true; 870 printk(KERN_INFO 871 "Disabled LAPIC found during irq injection\n"); 872 } 873 } 874 875 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src, 876 struct kvm_lapic_irq *irq, struct kvm_apic_map *map) 877 { 878 if (kvm->arch.x2apic_broadcast_quirk_disabled) { 879 if ((irq->dest_id == APIC_BROADCAST && 880 map->mode != KVM_APIC_MODE_X2APIC)) 881 return true; 882 if (irq->dest_id == X2APIC_BROADCAST) 883 return true; 884 } else { 885 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src); 886 if (irq->dest_id == (x2apic_ipi ? 887 X2APIC_BROADCAST : APIC_BROADCAST)) 888 return true; 889 } 890 891 return false; 892 } 893 894 /* Return true if the interrupt can be handled by using *bitmap as index mask 895 * for valid destinations in *dst array. 896 * Return false if kvm_apic_map_get_dest_lapic did nothing useful. 897 * Note: we may have zero kvm_lapic destinations when we return true, which 898 * means that the interrupt should be dropped. In this case, *bitmap would be 899 * zero and *dst undefined. 900 */ 901 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm, 902 struct kvm_lapic **src, struct kvm_lapic_irq *irq, 903 struct kvm_apic_map *map, struct kvm_lapic ***dst, 904 unsigned long *bitmap) 905 { 906 int i, lowest; 907 908 if (irq->shorthand == APIC_DEST_SELF && src) { 909 *dst = src; 910 *bitmap = 1; 911 return true; 912 } else if (irq->shorthand) 913 return false; 914 915 if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map)) 916 return false; 917 918 if (irq->dest_mode == APIC_DEST_PHYSICAL) { 919 if (irq->dest_id > map->max_apic_id) { 920 *bitmap = 0; 921 } else { 922 u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1); 923 *dst = &map->phys_map[dest_id]; 924 *bitmap = 1; 925 } 926 return true; 927 } 928 929 *bitmap = 0; 930 if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst, 931 (u16 *)bitmap)) 932 return false; 933 934 if (!kvm_lowest_prio_delivery(irq)) 935 return true; 936 937 if (!kvm_vector_hashing_enabled()) { 938 lowest = -1; 939 for_each_set_bit(i, bitmap, 16) { 940 if (!(*dst)[i]) 941 continue; 942 if (lowest < 0) 943 lowest = i; 944 else if (kvm_apic_compare_prio((*dst)[i]->vcpu, 945 (*dst)[lowest]->vcpu) < 0) 946 lowest = i; 947 } 948 } else { 949 if (!*bitmap) 950 return true; 951 952 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap), 953 bitmap, 16); 954 955 if (!(*dst)[lowest]) { 956 kvm_apic_disabled_lapic_found(kvm); 957 *bitmap = 0; 958 return true; 959 } 960 } 961 962 *bitmap = (lowest >= 0) ? 1 << lowest : 0; 963 964 return true; 965 } 966 967 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 968 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map) 969 { 970 struct kvm_apic_map *map; 971 unsigned long bitmap; 972 struct kvm_lapic **dst = NULL; 973 int i; 974 bool ret; 975 976 *r = -1; 977 978 if (irq->shorthand == APIC_DEST_SELF) { 979 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); 980 return true; 981 } 982 983 rcu_read_lock(); 984 map = rcu_dereference(kvm->arch.apic_map); 985 986 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); 987 if (ret) { 988 *r = 0; 989 for_each_set_bit(i, &bitmap, 16) { 990 if (!dst[i]) 991 continue; 992 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 993 } 994 } 995 996 rcu_read_unlock(); 997 return ret; 998 } 999 1000 /* 1001 * This routine tries to handle interrupts in posted mode, here is how 1002 * it deals with different cases: 1003 * - For single-destination interrupts, handle it in posted mode 1004 * - Else if vector hashing is enabled and it is a lowest-priority 1005 * interrupt, handle it in posted mode and use the following mechanism 1006 * to find the destination vCPU. 1007 * 1. For lowest-priority interrupts, store all the possible 1008 * destination vCPUs in an array. 1009 * 2. Use "guest vector % max number of destination vCPUs" to find 1010 * the right destination vCPU in the array for the lowest-priority 1011 * interrupt. 1012 * - Otherwise, use remapped mode to inject the interrupt. 1013 */ 1014 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, 1015 struct kvm_vcpu **dest_vcpu) 1016 { 1017 struct kvm_apic_map *map; 1018 unsigned long bitmap; 1019 struct kvm_lapic **dst = NULL; 1020 bool ret = false; 1021 1022 if (irq->shorthand) 1023 return false; 1024 1025 rcu_read_lock(); 1026 map = rcu_dereference(kvm->arch.apic_map); 1027 1028 if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) && 1029 hweight16(bitmap) == 1) { 1030 unsigned long i = find_first_bit(&bitmap, 16); 1031 1032 if (dst[i]) { 1033 *dest_vcpu = dst[i]->vcpu; 1034 ret = true; 1035 } 1036 } 1037 1038 rcu_read_unlock(); 1039 return ret; 1040 } 1041 1042 /* 1043 * Add a pending IRQ into lapic. 1044 * Return 1 if successfully added and 0 if discarded. 1045 */ 1046 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 1047 int vector, int level, int trig_mode, 1048 struct dest_map *dest_map) 1049 { 1050 int result = 0; 1051 struct kvm_vcpu *vcpu = apic->vcpu; 1052 1053 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 1054 trig_mode, vector); 1055 switch (delivery_mode) { 1056 case APIC_DM_LOWEST: 1057 vcpu->arch.apic_arb_prio++; 1058 /* fall through */ 1059 case APIC_DM_FIXED: 1060 if (unlikely(trig_mode && !level)) 1061 break; 1062 1063 /* FIXME add logic for vcpu on reset */ 1064 if (unlikely(!apic_enabled(apic))) 1065 break; 1066 1067 result = 1; 1068 1069 if (dest_map) { 1070 __set_bit(vcpu->vcpu_id, dest_map->map); 1071 dest_map->vectors[vcpu->vcpu_id] = vector; 1072 } 1073 1074 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { 1075 if (trig_mode) 1076 kvm_lapic_set_vector(vector, 1077 apic->regs + APIC_TMR); 1078 else 1079 kvm_lapic_clear_vector(vector, 1080 apic->regs + APIC_TMR); 1081 } 1082 1083 if (kvm_x86_ops.deliver_posted_interrupt(vcpu, vector)) { 1084 kvm_lapic_set_irr(vector, apic); 1085 kvm_make_request(KVM_REQ_EVENT, vcpu); 1086 kvm_vcpu_kick(vcpu); 1087 } 1088 break; 1089 1090 case APIC_DM_REMRD: 1091 result = 1; 1092 vcpu->arch.pv.pv_unhalted = 1; 1093 kvm_make_request(KVM_REQ_EVENT, vcpu); 1094 kvm_vcpu_kick(vcpu); 1095 break; 1096 1097 case APIC_DM_SMI: 1098 result = 1; 1099 kvm_make_request(KVM_REQ_SMI, vcpu); 1100 kvm_vcpu_kick(vcpu); 1101 break; 1102 1103 case APIC_DM_NMI: 1104 result = 1; 1105 kvm_inject_nmi(vcpu); 1106 kvm_vcpu_kick(vcpu); 1107 break; 1108 1109 case APIC_DM_INIT: 1110 if (!trig_mode || level) { 1111 result = 1; 1112 /* assumes that there are only KVM_APIC_INIT/SIPI */ 1113 apic->pending_events = (1UL << KVM_APIC_INIT); 1114 kvm_make_request(KVM_REQ_EVENT, vcpu); 1115 kvm_vcpu_kick(vcpu); 1116 } 1117 break; 1118 1119 case APIC_DM_STARTUP: 1120 result = 1; 1121 apic->sipi_vector = vector; 1122 /* make sure sipi_vector is visible for the receiver */ 1123 smp_wmb(); 1124 set_bit(KVM_APIC_SIPI, &apic->pending_events); 1125 kvm_make_request(KVM_REQ_EVENT, vcpu); 1126 kvm_vcpu_kick(vcpu); 1127 break; 1128 1129 case APIC_DM_EXTINT: 1130 /* 1131 * Should only be called by kvm_apic_local_deliver() with LVT0, 1132 * before NMI watchdog was enabled. Already handled by 1133 * kvm_apic_accept_pic_intr(). 1134 */ 1135 break; 1136 1137 default: 1138 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 1139 delivery_mode); 1140 break; 1141 } 1142 return result; 1143 } 1144 1145 /* 1146 * This routine identifies the destination vcpus mask meant to receive the 1147 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find 1148 * out the destination vcpus array and set the bitmap or it traverses to 1149 * each available vcpu to identify the same. 1150 */ 1151 void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, 1152 unsigned long *vcpu_bitmap) 1153 { 1154 struct kvm_lapic **dest_vcpu = NULL; 1155 struct kvm_lapic *src = NULL; 1156 struct kvm_apic_map *map; 1157 struct kvm_vcpu *vcpu; 1158 unsigned long bitmap; 1159 int i, vcpu_idx; 1160 bool ret; 1161 1162 rcu_read_lock(); 1163 map = rcu_dereference(kvm->arch.apic_map); 1164 1165 ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu, 1166 &bitmap); 1167 if (ret) { 1168 for_each_set_bit(i, &bitmap, 16) { 1169 if (!dest_vcpu[i]) 1170 continue; 1171 vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx; 1172 __set_bit(vcpu_idx, vcpu_bitmap); 1173 } 1174 } else { 1175 kvm_for_each_vcpu(i, vcpu, kvm) { 1176 if (!kvm_apic_present(vcpu)) 1177 continue; 1178 if (!kvm_apic_match_dest(vcpu, NULL, 1179 irq->shorthand, 1180 irq->dest_id, 1181 irq->dest_mode)) 1182 continue; 1183 __set_bit(i, vcpu_bitmap); 1184 } 1185 } 1186 rcu_read_unlock(); 1187 } 1188 1189 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 1190 { 1191 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 1192 } 1193 1194 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) 1195 { 1196 return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); 1197 } 1198 1199 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 1200 { 1201 int trigger_mode; 1202 1203 /* Eoi the ioapic only if the ioapic doesn't own the vector. */ 1204 if (!kvm_ioapic_handles_vector(apic, vector)) 1205 return; 1206 1207 /* Request a KVM exit to inform the userspace IOAPIC. */ 1208 if (irqchip_split(apic->vcpu->kvm)) { 1209 apic->vcpu->arch.pending_ioapic_eoi = vector; 1210 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); 1211 return; 1212 } 1213 1214 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 1215 trigger_mode = IOAPIC_LEVEL_TRIG; 1216 else 1217 trigger_mode = IOAPIC_EDGE_TRIG; 1218 1219 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); 1220 } 1221 1222 static int apic_set_eoi(struct kvm_lapic *apic) 1223 { 1224 int vector = apic_find_highest_isr(apic); 1225 1226 trace_kvm_eoi(apic, vector); 1227 1228 /* 1229 * Not every write EOI will has corresponding ISR, 1230 * one example is when Kernel check timer on setup_IO_APIC 1231 */ 1232 if (vector == -1) 1233 return vector; 1234 1235 apic_clear_isr(vector, apic); 1236 apic_update_ppr(apic); 1237 1238 if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap)) 1239 kvm_hv_synic_send_eoi(apic->vcpu, vector); 1240 1241 kvm_ioapic_send_eoi(apic, vector); 1242 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1243 return vector; 1244 } 1245 1246 /* 1247 * this interface assumes a trap-like exit, which has already finished 1248 * desired side effect including vISR and vPPR update. 1249 */ 1250 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) 1251 { 1252 struct kvm_lapic *apic = vcpu->arch.apic; 1253 1254 trace_kvm_eoi(apic, vector); 1255 1256 kvm_ioapic_send_eoi(apic, vector); 1257 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 1258 } 1259 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 1260 1261 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high) 1262 { 1263 struct kvm_lapic_irq irq; 1264 1265 irq.vector = icr_low & APIC_VECTOR_MASK; 1266 irq.delivery_mode = icr_low & APIC_MODE_MASK; 1267 irq.dest_mode = icr_low & APIC_DEST_MASK; 1268 irq.level = (icr_low & APIC_INT_ASSERT) != 0; 1269 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 1270 irq.shorthand = icr_low & APIC_SHORT_MASK; 1271 irq.msi_redir_hint = false; 1272 if (apic_x2apic_mode(apic)) 1273 irq.dest_id = icr_high; 1274 else 1275 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 1276 1277 trace_kvm_apic_ipi(icr_low, irq.dest_id); 1278 1279 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 1280 } 1281 1282 static u32 apic_get_tmcct(struct kvm_lapic *apic) 1283 { 1284 ktime_t remaining, now; 1285 s64 ns; 1286 u32 tmcct; 1287 1288 ASSERT(apic != NULL); 1289 1290 /* if initial count is 0, current count should also be 0 */ 1291 if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 || 1292 apic->lapic_timer.period == 0) 1293 return 0; 1294 1295 now = ktime_get(); 1296 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1297 if (ktime_to_ns(remaining) < 0) 1298 remaining = 0; 1299 1300 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 1301 tmcct = div64_u64(ns, 1302 (APIC_BUS_CYCLE_NS * apic->divide_count)); 1303 1304 return tmcct; 1305 } 1306 1307 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 1308 { 1309 struct kvm_vcpu *vcpu = apic->vcpu; 1310 struct kvm_run *run = vcpu->run; 1311 1312 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 1313 run->tpr_access.rip = kvm_rip_read(vcpu); 1314 run->tpr_access.is_write = write; 1315 } 1316 1317 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 1318 { 1319 if (apic->vcpu->arch.tpr_access_reporting) 1320 __report_tpr_access(apic, write); 1321 } 1322 1323 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 1324 { 1325 u32 val = 0; 1326 1327 if (offset >= LAPIC_MMIO_LENGTH) 1328 return 0; 1329 1330 switch (offset) { 1331 case APIC_ARBPRI: 1332 break; 1333 1334 case APIC_TMCCT: /* Timer CCR */ 1335 if (apic_lvtt_tscdeadline(apic)) 1336 return 0; 1337 1338 val = apic_get_tmcct(apic); 1339 break; 1340 case APIC_PROCPRI: 1341 apic_update_ppr(apic); 1342 val = kvm_lapic_get_reg(apic, offset); 1343 break; 1344 case APIC_TASKPRI: 1345 report_tpr_access(apic, false); 1346 /* fall thru */ 1347 default: 1348 val = kvm_lapic_get_reg(apic, offset); 1349 break; 1350 } 1351 1352 return val; 1353 } 1354 1355 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 1356 { 1357 return container_of(dev, struct kvm_lapic, dev); 1358 } 1359 1360 #define APIC_REG_MASK(reg) (1ull << ((reg) >> 4)) 1361 #define APIC_REGS_MASK(first, count) \ 1362 (APIC_REG_MASK(first) * ((1ull << (count)) - 1)) 1363 1364 int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 1365 void *data) 1366 { 1367 unsigned char alignment = offset & 0xf; 1368 u32 result; 1369 /* this bitmask has a bit cleared for each reserved register */ 1370 u64 valid_reg_mask = 1371 APIC_REG_MASK(APIC_ID) | 1372 APIC_REG_MASK(APIC_LVR) | 1373 APIC_REG_MASK(APIC_TASKPRI) | 1374 APIC_REG_MASK(APIC_PROCPRI) | 1375 APIC_REG_MASK(APIC_LDR) | 1376 APIC_REG_MASK(APIC_DFR) | 1377 APIC_REG_MASK(APIC_SPIV) | 1378 APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) | 1379 APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) | 1380 APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) | 1381 APIC_REG_MASK(APIC_ESR) | 1382 APIC_REG_MASK(APIC_ICR) | 1383 APIC_REG_MASK(APIC_ICR2) | 1384 APIC_REG_MASK(APIC_LVTT) | 1385 APIC_REG_MASK(APIC_LVTTHMR) | 1386 APIC_REG_MASK(APIC_LVTPC) | 1387 APIC_REG_MASK(APIC_LVT0) | 1388 APIC_REG_MASK(APIC_LVT1) | 1389 APIC_REG_MASK(APIC_LVTERR) | 1390 APIC_REG_MASK(APIC_TMICT) | 1391 APIC_REG_MASK(APIC_TMCCT) | 1392 APIC_REG_MASK(APIC_TDCR); 1393 1394 /* ARBPRI is not valid on x2APIC */ 1395 if (!apic_x2apic_mode(apic)) 1396 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); 1397 1398 if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) 1399 return 1; 1400 1401 result = __apic_read(apic, offset & ~0xf); 1402 1403 trace_kvm_apic_read(offset, result); 1404 1405 switch (len) { 1406 case 1: 1407 case 2: 1408 case 4: 1409 memcpy(data, (char *)&result + alignment, len); 1410 break; 1411 default: 1412 printk(KERN_ERR "Local APIC read with len = %x, " 1413 "should be 1,2, or 4 instead\n", len); 1414 break; 1415 } 1416 return 0; 1417 } 1418 EXPORT_SYMBOL_GPL(kvm_lapic_reg_read); 1419 1420 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 1421 { 1422 return addr >= apic->base_address && 1423 addr < apic->base_address + LAPIC_MMIO_LENGTH; 1424 } 1425 1426 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 1427 gpa_t address, int len, void *data) 1428 { 1429 struct kvm_lapic *apic = to_lapic(this); 1430 u32 offset = address - apic->base_address; 1431 1432 if (!apic_mmio_in_range(apic, address)) 1433 return -EOPNOTSUPP; 1434 1435 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 1436 if (!kvm_check_has_quirk(vcpu->kvm, 1437 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 1438 return -EOPNOTSUPP; 1439 1440 memset(data, 0xff, len); 1441 return 0; 1442 } 1443 1444 kvm_lapic_reg_read(apic, offset, len, data); 1445 1446 return 0; 1447 } 1448 1449 static void update_divide_count(struct kvm_lapic *apic) 1450 { 1451 u32 tmp1, tmp2, tdcr; 1452 1453 tdcr = kvm_lapic_get_reg(apic, APIC_TDCR); 1454 tmp1 = tdcr & 0xf; 1455 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 1456 apic->divide_count = 0x1 << (tmp2 & 0x7); 1457 } 1458 1459 static void limit_periodic_timer_frequency(struct kvm_lapic *apic) 1460 { 1461 /* 1462 * Do not allow the guest to program periodic timers with small 1463 * interval, since the hrtimers are not throttled by the host 1464 * scheduler. 1465 */ 1466 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1467 s64 min_period = min_timer_period_us * 1000LL; 1468 1469 if (apic->lapic_timer.period < min_period) { 1470 pr_info_ratelimited( 1471 "kvm: vcpu %i: requested %lld ns " 1472 "lapic timer period limited to %lld ns\n", 1473 apic->vcpu->vcpu_id, 1474 apic->lapic_timer.period, min_period); 1475 apic->lapic_timer.period = min_period; 1476 } 1477 } 1478 } 1479 1480 static void cancel_hv_timer(struct kvm_lapic *apic); 1481 1482 static void apic_update_lvtt(struct kvm_lapic *apic) 1483 { 1484 u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) & 1485 apic->lapic_timer.timer_mode_mask; 1486 1487 if (apic->lapic_timer.timer_mode != timer_mode) { 1488 if (apic_lvtt_tscdeadline(apic) != (timer_mode == 1489 APIC_LVT_TIMER_TSCDEADLINE)) { 1490 hrtimer_cancel(&apic->lapic_timer.timer); 1491 preempt_disable(); 1492 if (apic->lapic_timer.hv_timer_in_use) 1493 cancel_hv_timer(apic); 1494 preempt_enable(); 1495 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 1496 apic->lapic_timer.period = 0; 1497 apic->lapic_timer.tscdeadline = 0; 1498 } 1499 apic->lapic_timer.timer_mode = timer_mode; 1500 limit_periodic_timer_frequency(apic); 1501 } 1502 } 1503 1504 /* 1505 * On APICv, this test will cause a busy wait 1506 * during a higher-priority task. 1507 */ 1508 1509 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) 1510 { 1511 struct kvm_lapic *apic = vcpu->arch.apic; 1512 u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT); 1513 1514 if (kvm_apic_hw_enabled(apic)) { 1515 int vec = reg & APIC_VECTOR_MASK; 1516 void *bitmap = apic->regs + APIC_ISR; 1517 1518 if (vcpu->arch.apicv_active) 1519 bitmap = apic->regs + APIC_IRR; 1520 1521 if (apic_test_vector(vec, bitmap)) 1522 return true; 1523 } 1524 return false; 1525 } 1526 1527 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) 1528 { 1529 u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns; 1530 1531 /* 1532 * If the guest TSC is running at a different ratio than the host, then 1533 * convert the delay to nanoseconds to achieve an accurate delay. Note 1534 * that __delay() uses delay_tsc whenever the hardware has TSC, thus 1535 * always for VMX enabled hardware. 1536 */ 1537 if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) { 1538 __delay(min(guest_cycles, 1539 nsec_to_cycles(vcpu, timer_advance_ns))); 1540 } else { 1541 u64 delay_ns = guest_cycles * 1000000ULL; 1542 do_div(delay_ns, vcpu->arch.virtual_tsc_khz); 1543 ndelay(min_t(u32, delay_ns, timer_advance_ns)); 1544 } 1545 } 1546 1547 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, 1548 s64 advance_expire_delta) 1549 { 1550 struct kvm_lapic *apic = vcpu->arch.apic; 1551 u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; 1552 u64 ns; 1553 1554 /* Do not adjust for tiny fluctuations or large random spikes. */ 1555 if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX || 1556 abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN) 1557 return; 1558 1559 /* too early */ 1560 if (advance_expire_delta < 0) { 1561 ns = -advance_expire_delta * 1000000ULL; 1562 do_div(ns, vcpu->arch.virtual_tsc_khz); 1563 timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; 1564 } else { 1565 /* too late */ 1566 ns = advance_expire_delta * 1000000ULL; 1567 do_div(ns, vcpu->arch.virtual_tsc_khz); 1568 timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP; 1569 } 1570 1571 if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX)) 1572 timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; 1573 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 1574 } 1575 1576 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1577 { 1578 struct kvm_lapic *apic = vcpu->arch.apic; 1579 u64 guest_tsc, tsc_deadline; 1580 1581 if (apic->lapic_timer.expired_tscdeadline == 0) 1582 return; 1583 1584 tsc_deadline = apic->lapic_timer.expired_tscdeadline; 1585 apic->lapic_timer.expired_tscdeadline = 0; 1586 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1587 apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; 1588 1589 if (guest_tsc < tsc_deadline) 1590 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); 1591 1592 if (lapic_timer_advance_dynamic) 1593 adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); 1594 } 1595 1596 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) 1597 { 1598 if (lapic_timer_int_injected(vcpu)) 1599 __kvm_wait_lapic_expire(vcpu); 1600 } 1601 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); 1602 1603 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) 1604 { 1605 struct kvm_timer *ktimer = &apic->lapic_timer; 1606 1607 kvm_apic_local_deliver(apic, APIC_LVTT); 1608 if (apic_lvtt_tscdeadline(apic)) { 1609 ktimer->tscdeadline = 0; 1610 } else if (apic_lvtt_oneshot(apic)) { 1611 ktimer->tscdeadline = 0; 1612 ktimer->target_expiration = 0; 1613 } 1614 } 1615 1616 static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) 1617 { 1618 struct kvm_vcpu *vcpu = apic->vcpu; 1619 struct kvm_timer *ktimer = &apic->lapic_timer; 1620 1621 if (atomic_read(&apic->lapic_timer.pending)) 1622 return; 1623 1624 if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) 1625 ktimer->expired_tscdeadline = ktimer->tscdeadline; 1626 1627 if (!from_timer_fn && vcpu->arch.apicv_active) { 1628 WARN_ON(kvm_get_running_vcpu() != vcpu); 1629 kvm_apic_inject_pending_timer_irqs(apic); 1630 return; 1631 } 1632 1633 if (kvm_use_posted_timer_interrupt(apic->vcpu)) { 1634 if (apic->lapic_timer.timer_advance_ns) 1635 __kvm_wait_lapic_expire(vcpu); 1636 kvm_apic_inject_pending_timer_irqs(apic); 1637 return; 1638 } 1639 1640 atomic_inc(&apic->lapic_timer.pending); 1641 kvm_set_pending_timer(vcpu); 1642 } 1643 1644 static void start_sw_tscdeadline(struct kvm_lapic *apic) 1645 { 1646 struct kvm_timer *ktimer = &apic->lapic_timer; 1647 u64 guest_tsc, tscdeadline = ktimer->tscdeadline; 1648 u64 ns = 0; 1649 ktime_t expire; 1650 struct kvm_vcpu *vcpu = apic->vcpu; 1651 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1652 unsigned long flags; 1653 ktime_t now; 1654 1655 if (unlikely(!tscdeadline || !this_tsc_khz)) 1656 return; 1657 1658 local_irq_save(flags); 1659 1660 now = ktime_get(); 1661 guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 1662 1663 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1664 do_div(ns, this_tsc_khz); 1665 1666 if (likely(tscdeadline > guest_tsc) && 1667 likely(ns > apic->lapic_timer.timer_advance_ns)) { 1668 expire = ktime_add_ns(now, ns); 1669 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); 1670 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD); 1671 } else 1672 apic_timer_expired(apic, false); 1673 1674 local_irq_restore(flags); 1675 } 1676 1677 static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict) 1678 { 1679 return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count; 1680 } 1681 1682 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) 1683 { 1684 ktime_t now, remaining; 1685 u64 ns_remaining_old, ns_remaining_new; 1686 1687 apic->lapic_timer.period = 1688 tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); 1689 limit_periodic_timer_frequency(apic); 1690 1691 now = ktime_get(); 1692 remaining = ktime_sub(apic->lapic_timer.target_expiration, now); 1693 if (ktime_to_ns(remaining) < 0) 1694 remaining = 0; 1695 1696 ns_remaining_old = ktime_to_ns(remaining); 1697 ns_remaining_new = mul_u64_u32_div(ns_remaining_old, 1698 apic->divide_count, old_divisor); 1699 1700 apic->lapic_timer.tscdeadline += 1701 nsec_to_cycles(apic->vcpu, ns_remaining_new) - 1702 nsec_to_cycles(apic->vcpu, ns_remaining_old); 1703 apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); 1704 } 1705 1706 static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg) 1707 { 1708 ktime_t now; 1709 u64 tscl = rdtsc(); 1710 s64 deadline; 1711 1712 now = ktime_get(); 1713 apic->lapic_timer.period = 1714 tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); 1715 1716 if (!apic->lapic_timer.period) { 1717 apic->lapic_timer.tscdeadline = 0; 1718 return false; 1719 } 1720 1721 limit_periodic_timer_frequency(apic); 1722 deadline = apic->lapic_timer.period; 1723 1724 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 1725 if (unlikely(count_reg != APIC_TMICT)) { 1726 deadline = tmict_to_ns(apic, 1727 kvm_lapic_get_reg(apic, count_reg)); 1728 if (unlikely(deadline <= 0)) 1729 deadline = apic->lapic_timer.period; 1730 else if (unlikely(deadline > apic->lapic_timer.period)) { 1731 pr_info_ratelimited( 1732 "kvm: vcpu %i: requested lapic timer restore with " 1733 "starting count register %#x=%u (%lld ns) > initial count (%lld ns). " 1734 "Using initial count to start timer.\n", 1735 apic->vcpu->vcpu_id, 1736 count_reg, 1737 kvm_lapic_get_reg(apic, count_reg), 1738 deadline, apic->lapic_timer.period); 1739 kvm_lapic_set_reg(apic, count_reg, 0); 1740 deadline = apic->lapic_timer.period; 1741 } 1742 } 1743 } 1744 1745 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1746 nsec_to_cycles(apic->vcpu, deadline); 1747 apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline); 1748 1749 return true; 1750 } 1751 1752 static void advance_periodic_target_expiration(struct kvm_lapic *apic) 1753 { 1754 ktime_t now = ktime_get(); 1755 u64 tscl = rdtsc(); 1756 ktime_t delta; 1757 1758 /* 1759 * Synchronize both deadlines to the same time source or 1760 * differences in the periods (caused by differences in the 1761 * underlying clocks or numerical approximation errors) will 1762 * cause the two to drift apart over time as the errors 1763 * accumulate. 1764 */ 1765 apic->lapic_timer.target_expiration = 1766 ktime_add_ns(apic->lapic_timer.target_expiration, 1767 apic->lapic_timer.period); 1768 delta = ktime_sub(apic->lapic_timer.target_expiration, now); 1769 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + 1770 nsec_to_cycles(apic->vcpu, delta); 1771 } 1772 1773 static void start_sw_period(struct kvm_lapic *apic) 1774 { 1775 if (!apic->lapic_timer.period) 1776 return; 1777 1778 if (ktime_after(ktime_get(), 1779 apic->lapic_timer.target_expiration)) { 1780 apic_timer_expired(apic, false); 1781 1782 if (apic_lvtt_oneshot(apic)) 1783 return; 1784 1785 advance_periodic_target_expiration(apic); 1786 } 1787 1788 hrtimer_start(&apic->lapic_timer.timer, 1789 apic->lapic_timer.target_expiration, 1790 HRTIMER_MODE_ABS_HARD); 1791 } 1792 1793 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) 1794 { 1795 if (!lapic_in_kernel(vcpu)) 1796 return false; 1797 1798 return vcpu->arch.apic->lapic_timer.hv_timer_in_use; 1799 } 1800 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); 1801 1802 static void cancel_hv_timer(struct kvm_lapic *apic) 1803 { 1804 WARN_ON(preemptible()); 1805 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1806 kvm_x86_ops.cancel_hv_timer(apic->vcpu); 1807 apic->lapic_timer.hv_timer_in_use = false; 1808 } 1809 1810 static bool start_hv_timer(struct kvm_lapic *apic) 1811 { 1812 struct kvm_timer *ktimer = &apic->lapic_timer; 1813 struct kvm_vcpu *vcpu = apic->vcpu; 1814 bool expired; 1815 1816 WARN_ON(preemptible()); 1817 if (!kvm_can_use_hv_timer(vcpu)) 1818 return false; 1819 1820 if (!ktimer->tscdeadline) 1821 return false; 1822 1823 if (kvm_x86_ops.set_hv_timer(vcpu, ktimer->tscdeadline, &expired)) 1824 return false; 1825 1826 ktimer->hv_timer_in_use = true; 1827 hrtimer_cancel(&ktimer->timer); 1828 1829 /* 1830 * To simplify handling the periodic timer, leave the hv timer running 1831 * even if the deadline timer has expired, i.e. rely on the resulting 1832 * VM-Exit to recompute the periodic timer's target expiration. 1833 */ 1834 if (!apic_lvtt_period(apic)) { 1835 /* 1836 * Cancel the hv timer if the sw timer fired while the hv timer 1837 * was being programmed, or if the hv timer itself expired. 1838 */ 1839 if (atomic_read(&ktimer->pending)) { 1840 cancel_hv_timer(apic); 1841 } else if (expired) { 1842 apic_timer_expired(apic, false); 1843 cancel_hv_timer(apic); 1844 } 1845 } 1846 1847 trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use); 1848 1849 return true; 1850 } 1851 1852 static void start_sw_timer(struct kvm_lapic *apic) 1853 { 1854 struct kvm_timer *ktimer = &apic->lapic_timer; 1855 1856 WARN_ON(preemptible()); 1857 if (apic->lapic_timer.hv_timer_in_use) 1858 cancel_hv_timer(apic); 1859 if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) 1860 return; 1861 1862 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 1863 start_sw_period(apic); 1864 else if (apic_lvtt_tscdeadline(apic)) 1865 start_sw_tscdeadline(apic); 1866 trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false); 1867 } 1868 1869 static void restart_apic_timer(struct kvm_lapic *apic) 1870 { 1871 preempt_disable(); 1872 1873 if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending)) 1874 goto out; 1875 1876 if (!start_hv_timer(apic)) 1877 start_sw_timer(apic); 1878 out: 1879 preempt_enable(); 1880 } 1881 1882 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) 1883 { 1884 struct kvm_lapic *apic = vcpu->arch.apic; 1885 1886 preempt_disable(); 1887 /* If the preempt notifier has already run, it also called apic_timer_expired */ 1888 if (!apic->lapic_timer.hv_timer_in_use) 1889 goto out; 1890 WARN_ON(rcuwait_active(&vcpu->wait)); 1891 cancel_hv_timer(apic); 1892 apic_timer_expired(apic, false); 1893 1894 if (apic_lvtt_period(apic) && apic->lapic_timer.period) { 1895 advance_periodic_target_expiration(apic); 1896 restart_apic_timer(apic); 1897 } 1898 out: 1899 preempt_enable(); 1900 } 1901 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); 1902 1903 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) 1904 { 1905 restart_apic_timer(vcpu->arch.apic); 1906 } 1907 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); 1908 1909 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) 1910 { 1911 struct kvm_lapic *apic = vcpu->arch.apic; 1912 1913 preempt_disable(); 1914 /* Possibly the TSC deadline timer is not enabled yet */ 1915 if (apic->lapic_timer.hv_timer_in_use) 1916 start_sw_timer(apic); 1917 preempt_enable(); 1918 } 1919 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); 1920 1921 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) 1922 { 1923 struct kvm_lapic *apic = vcpu->arch.apic; 1924 1925 WARN_ON(!apic->lapic_timer.hv_timer_in_use); 1926 restart_apic_timer(apic); 1927 } 1928 1929 static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg) 1930 { 1931 atomic_set(&apic->lapic_timer.pending, 0); 1932 1933 if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) 1934 && !set_target_expiration(apic, count_reg)) 1935 return; 1936 1937 restart_apic_timer(apic); 1938 } 1939 1940 static void start_apic_timer(struct kvm_lapic *apic) 1941 { 1942 __start_apic_timer(apic, APIC_TMICT); 1943 } 1944 1945 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 1946 { 1947 bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); 1948 1949 if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { 1950 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; 1951 if (lvt0_in_nmi_mode) { 1952 atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 1953 } else 1954 atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); 1955 } 1956 } 1957 1958 int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1959 { 1960 int ret = 0; 1961 1962 trace_kvm_apic_write(reg, val); 1963 1964 switch (reg) { 1965 case APIC_ID: /* Local APIC ID */ 1966 if (!apic_x2apic_mode(apic)) 1967 kvm_apic_set_xapic_id(apic, val >> 24); 1968 else 1969 ret = 1; 1970 break; 1971 1972 case APIC_TASKPRI: 1973 report_tpr_access(apic, true); 1974 apic_set_tpr(apic, val & 0xff); 1975 break; 1976 1977 case APIC_EOI: 1978 apic_set_eoi(apic); 1979 break; 1980 1981 case APIC_LDR: 1982 if (!apic_x2apic_mode(apic)) 1983 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 1984 else 1985 ret = 1; 1986 break; 1987 1988 case APIC_DFR: 1989 if (!apic_x2apic_mode(apic)) { 1990 kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 1991 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 1992 } else 1993 ret = 1; 1994 break; 1995 1996 case APIC_SPIV: { 1997 u32 mask = 0x3ff; 1998 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 1999 mask |= APIC_SPIV_DIRECTED_EOI; 2000 apic_set_spiv(apic, val & mask); 2001 if (!(val & APIC_SPIV_APIC_ENABLED)) { 2002 int i; 2003 u32 lvt_val; 2004 2005 for (i = 0; i < KVM_APIC_LVT_NUM; i++) { 2006 lvt_val = kvm_lapic_get_reg(apic, 2007 APIC_LVTT + 0x10 * i); 2008 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, 2009 lvt_val | APIC_LVT_MASKED); 2010 } 2011 apic_update_lvtt(apic); 2012 atomic_set(&apic->lapic_timer.pending, 0); 2013 2014 } 2015 break; 2016 } 2017 case APIC_ICR: 2018 /* No delay here, so we always clear the pending bit */ 2019 val &= ~(1 << 12); 2020 kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2)); 2021 kvm_lapic_set_reg(apic, APIC_ICR, val); 2022 break; 2023 2024 case APIC_ICR2: 2025 if (!apic_x2apic_mode(apic)) 2026 val &= 0xff000000; 2027 kvm_lapic_set_reg(apic, APIC_ICR2, val); 2028 break; 2029 2030 case APIC_LVT0: 2031 apic_manage_nmi_watchdog(apic, val); 2032 /* fall through */ 2033 case APIC_LVTTHMR: 2034 case APIC_LVTPC: 2035 case APIC_LVT1: 2036 case APIC_LVTERR: { 2037 /* TODO: Check vector */ 2038 size_t size; 2039 u32 index; 2040 2041 if (!kvm_apic_sw_enabled(apic)) 2042 val |= APIC_LVT_MASKED; 2043 size = ARRAY_SIZE(apic_lvt_mask); 2044 index = array_index_nospec( 2045 (reg - APIC_LVTT) >> 4, size); 2046 val &= apic_lvt_mask[index]; 2047 kvm_lapic_set_reg(apic, reg, val); 2048 break; 2049 } 2050 2051 case APIC_LVTT: 2052 if (!kvm_apic_sw_enabled(apic)) 2053 val |= APIC_LVT_MASKED; 2054 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 2055 kvm_lapic_set_reg(apic, APIC_LVTT, val); 2056 apic_update_lvtt(apic); 2057 break; 2058 2059 case APIC_TMICT: 2060 if (apic_lvtt_tscdeadline(apic)) 2061 break; 2062 2063 hrtimer_cancel(&apic->lapic_timer.timer); 2064 kvm_lapic_set_reg(apic, APIC_TMICT, val); 2065 start_apic_timer(apic); 2066 break; 2067 2068 case APIC_TDCR: { 2069 uint32_t old_divisor = apic->divide_count; 2070 2071 kvm_lapic_set_reg(apic, APIC_TDCR, val); 2072 update_divide_count(apic); 2073 if (apic->divide_count != old_divisor && 2074 apic->lapic_timer.period) { 2075 hrtimer_cancel(&apic->lapic_timer.timer); 2076 update_target_expiration(apic, old_divisor); 2077 restart_apic_timer(apic); 2078 } 2079 break; 2080 } 2081 case APIC_ESR: 2082 if (apic_x2apic_mode(apic) && val != 0) 2083 ret = 1; 2084 break; 2085 2086 case APIC_SELF_IPI: 2087 if (apic_x2apic_mode(apic)) { 2088 kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 2089 } else 2090 ret = 1; 2091 break; 2092 default: 2093 ret = 1; 2094 break; 2095 } 2096 2097 kvm_recalculate_apic_map(apic->vcpu->kvm); 2098 2099 return ret; 2100 } 2101 EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); 2102 2103 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 2104 gpa_t address, int len, const void *data) 2105 { 2106 struct kvm_lapic *apic = to_lapic(this); 2107 unsigned int offset = address - apic->base_address; 2108 u32 val; 2109 2110 if (!apic_mmio_in_range(apic, address)) 2111 return -EOPNOTSUPP; 2112 2113 if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { 2114 if (!kvm_check_has_quirk(vcpu->kvm, 2115 KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) 2116 return -EOPNOTSUPP; 2117 2118 return 0; 2119 } 2120 2121 /* 2122 * APIC register must be aligned on 128-bits boundary. 2123 * 32/64/128 bits registers must be accessed thru 32 bits. 2124 * Refer SDM 8.4.1 2125 */ 2126 if (len != 4 || (offset & 0xf)) 2127 return 0; 2128 2129 val = *(u32*)data; 2130 2131 kvm_lapic_reg_write(apic, offset & 0xff0, val); 2132 2133 return 0; 2134 } 2135 2136 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 2137 { 2138 kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 2139 } 2140 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 2141 2142 /* emulate APIC access in a trap manner */ 2143 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 2144 { 2145 u32 val = 0; 2146 2147 /* hw has done the conditional check and inst decode */ 2148 offset &= 0xff0; 2149 2150 kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val); 2151 2152 /* TODO: optimize to just emulate side effect w/o one more write */ 2153 kvm_lapic_reg_write(vcpu->arch.apic, offset, val); 2154 } 2155 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 2156 2157 void kvm_free_lapic(struct kvm_vcpu *vcpu) 2158 { 2159 struct kvm_lapic *apic = vcpu->arch.apic; 2160 2161 if (!vcpu->arch.apic) 2162 return; 2163 2164 hrtimer_cancel(&apic->lapic_timer.timer); 2165 2166 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 2167 static_key_slow_dec_deferred(&apic_hw_disabled); 2168 2169 if (!apic->sw_enabled) 2170 static_key_slow_dec_deferred(&apic_sw_disabled); 2171 2172 if (apic->regs) 2173 free_page((unsigned long)apic->regs); 2174 2175 kfree(apic); 2176 } 2177 2178 /* 2179 *---------------------------------------------------------------------- 2180 * LAPIC interface 2181 *---------------------------------------------------------------------- 2182 */ 2183 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 2184 { 2185 struct kvm_lapic *apic = vcpu->arch.apic; 2186 2187 if (!lapic_in_kernel(vcpu) || 2188 !apic_lvtt_tscdeadline(apic)) 2189 return 0; 2190 2191 return apic->lapic_timer.tscdeadline; 2192 } 2193 2194 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 2195 { 2196 struct kvm_lapic *apic = vcpu->arch.apic; 2197 2198 if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || 2199 apic_lvtt_period(apic)) 2200 return; 2201 2202 hrtimer_cancel(&apic->lapic_timer.timer); 2203 apic->lapic_timer.tscdeadline = data; 2204 start_apic_timer(apic); 2205 } 2206 2207 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 2208 { 2209 struct kvm_lapic *apic = vcpu->arch.apic; 2210 2211 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 2212 | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4)); 2213 } 2214 2215 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 2216 { 2217 u64 tpr; 2218 2219 tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 2220 2221 return (tpr & 0xf0) >> 4; 2222 } 2223 2224 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 2225 { 2226 u64 old_value = vcpu->arch.apic_base; 2227 struct kvm_lapic *apic = vcpu->arch.apic; 2228 2229 if (!apic) 2230 value |= MSR_IA32_APICBASE_BSP; 2231 2232 vcpu->arch.apic_base = value; 2233 2234 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) 2235 kvm_update_cpuid(vcpu); 2236 2237 if (!apic) 2238 return; 2239 2240 /* update jump label if enable bit changes */ 2241 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { 2242 if (value & MSR_IA32_APICBASE_ENABLE) { 2243 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2244 static_key_slow_dec_deferred(&apic_hw_disabled); 2245 } else { 2246 static_key_slow_inc(&apic_hw_disabled.key); 2247 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 2248 } 2249 } 2250 2251 if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) 2252 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); 2253 2254 if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) 2255 kvm_x86_ops.set_virtual_apic_mode(vcpu); 2256 2257 apic->base_address = apic->vcpu->arch.apic_base & 2258 MSR_IA32_APICBASE_BASE; 2259 2260 if ((value & MSR_IA32_APICBASE_ENABLE) && 2261 apic->base_address != APIC_DEFAULT_PHYS_BASE) 2262 pr_warn_once("APIC base relocation is unsupported by KVM"); 2263 } 2264 2265 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) 2266 { 2267 struct kvm_lapic *apic = vcpu->arch.apic; 2268 2269 if (vcpu->arch.apicv_active) { 2270 /* irr_pending is always true when apicv is activated. */ 2271 apic->irr_pending = true; 2272 apic->isr_count = 1; 2273 } else { 2274 apic->irr_pending = (apic_search_irr(apic) != -1); 2275 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 2276 } 2277 } 2278 EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); 2279 2280 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) 2281 { 2282 struct kvm_lapic *apic = vcpu->arch.apic; 2283 int i; 2284 2285 if (!apic) 2286 return; 2287 2288 /* Stop the timer in case it's a reset to an active apic */ 2289 hrtimer_cancel(&apic->lapic_timer.timer); 2290 2291 if (!init_event) { 2292 kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE | 2293 MSR_IA32_APICBASE_ENABLE); 2294 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); 2295 } 2296 kvm_apic_set_version(apic->vcpu); 2297 2298 for (i = 0; i < KVM_APIC_LVT_NUM; i++) 2299 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 2300 apic_update_lvtt(apic); 2301 if (kvm_vcpu_is_reset_bsp(vcpu) && 2302 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) 2303 kvm_lapic_set_reg(apic, APIC_LVT0, 2304 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 2305 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2306 2307 kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU); 2308 apic_set_spiv(apic, 0xff); 2309 kvm_lapic_set_reg(apic, APIC_TASKPRI, 0); 2310 if (!apic_x2apic_mode(apic)) 2311 kvm_apic_set_ldr(apic, 0); 2312 kvm_lapic_set_reg(apic, APIC_ESR, 0); 2313 kvm_lapic_set_reg(apic, APIC_ICR, 0); 2314 kvm_lapic_set_reg(apic, APIC_ICR2, 0); 2315 kvm_lapic_set_reg(apic, APIC_TDCR, 0); 2316 kvm_lapic_set_reg(apic, APIC_TMICT, 0); 2317 for (i = 0; i < 8; i++) { 2318 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 2319 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 2320 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 2321 } 2322 kvm_apic_update_apicv(vcpu); 2323 apic->highest_isr_cache = -1; 2324 update_divide_count(apic); 2325 atomic_set(&apic->lapic_timer.pending, 0); 2326 if (kvm_vcpu_is_bsp(vcpu)) 2327 kvm_lapic_set_base(vcpu, 2328 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); 2329 vcpu->arch.pv_eoi.msr_val = 0; 2330 apic_update_ppr(apic); 2331 if (vcpu->arch.apicv_active) { 2332 kvm_x86_ops.apicv_post_state_restore(vcpu); 2333 kvm_x86_ops.hwapic_irr_update(vcpu, -1); 2334 kvm_x86_ops.hwapic_isr_update(vcpu, -1); 2335 } 2336 2337 vcpu->arch.apic_arb_prio = 0; 2338 vcpu->arch.apic_attention = 0; 2339 2340 kvm_recalculate_apic_map(vcpu->kvm); 2341 } 2342 2343 /* 2344 *---------------------------------------------------------------------- 2345 * timer interface 2346 *---------------------------------------------------------------------- 2347 */ 2348 2349 static bool lapic_is_periodic(struct kvm_lapic *apic) 2350 { 2351 return apic_lvtt_period(apic); 2352 } 2353 2354 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 2355 { 2356 struct kvm_lapic *apic = vcpu->arch.apic; 2357 2358 if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT)) 2359 return atomic_read(&apic->lapic_timer.pending); 2360 2361 return 0; 2362 } 2363 2364 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 2365 { 2366 u32 reg = kvm_lapic_get_reg(apic, lvt_type); 2367 int vector, mode, trig_mode; 2368 2369 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 2370 vector = reg & APIC_VECTOR_MASK; 2371 mode = reg & APIC_MODE_MASK; 2372 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 2373 return __apic_accept_irq(apic, mode, vector, 1, trig_mode, 2374 NULL); 2375 } 2376 return 0; 2377 } 2378 2379 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 2380 { 2381 struct kvm_lapic *apic = vcpu->arch.apic; 2382 2383 if (apic) 2384 kvm_apic_local_deliver(apic, APIC_LVT0); 2385 } 2386 2387 static const struct kvm_io_device_ops apic_mmio_ops = { 2388 .read = apic_mmio_read, 2389 .write = apic_mmio_write, 2390 }; 2391 2392 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 2393 { 2394 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 2395 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 2396 2397 apic_timer_expired(apic, true); 2398 2399 if (lapic_is_periodic(apic)) { 2400 advance_periodic_target_expiration(apic); 2401 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 2402 return HRTIMER_RESTART; 2403 } else 2404 return HRTIMER_NORESTART; 2405 } 2406 2407 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) 2408 { 2409 struct kvm_lapic *apic; 2410 2411 ASSERT(vcpu != NULL); 2412 2413 apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); 2414 if (!apic) 2415 goto nomem; 2416 2417 vcpu->arch.apic = apic; 2418 2419 apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); 2420 if (!apic->regs) { 2421 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 2422 vcpu->vcpu_id); 2423 goto nomem_free_apic; 2424 } 2425 apic->vcpu = vcpu; 2426 2427 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 2428 HRTIMER_MODE_ABS_HARD); 2429 apic->lapic_timer.timer.function = apic_timer_fn; 2430 if (timer_advance_ns == -1) { 2431 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT; 2432 lapic_timer_advance_dynamic = true; 2433 } else { 2434 apic->lapic_timer.timer_advance_ns = timer_advance_ns; 2435 lapic_timer_advance_dynamic = false; 2436 } 2437 2438 /* 2439 * APIC is created enabled. This will prevent kvm_lapic_set_base from 2440 * thinking that APIC state has changed. 2441 */ 2442 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 2443 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 2444 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 2445 2446 return 0; 2447 nomem_free_apic: 2448 kfree(apic); 2449 vcpu->arch.apic = NULL; 2450 nomem: 2451 return -ENOMEM; 2452 } 2453 2454 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 2455 { 2456 struct kvm_lapic *apic = vcpu->arch.apic; 2457 u32 ppr; 2458 2459 if (!kvm_apic_hw_enabled(apic)) 2460 return -1; 2461 2462 __apic_update_ppr(apic, &ppr); 2463 return apic_has_interrupt_for_ppr(apic, ppr); 2464 } 2465 2466 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 2467 { 2468 u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0); 2469 2470 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 2471 return 1; 2472 if ((lvt0 & APIC_LVT_MASKED) == 0 && 2473 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 2474 return 1; 2475 return 0; 2476 } 2477 2478 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 2479 { 2480 struct kvm_lapic *apic = vcpu->arch.apic; 2481 2482 if (atomic_read(&apic->lapic_timer.pending) > 0) { 2483 kvm_apic_inject_pending_timer_irqs(apic); 2484 atomic_set(&apic->lapic_timer.pending, 0); 2485 } 2486 } 2487 2488 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 2489 { 2490 int vector = kvm_apic_has_interrupt(vcpu); 2491 struct kvm_lapic *apic = vcpu->arch.apic; 2492 u32 ppr; 2493 2494 if (vector == -1) 2495 return -1; 2496 2497 /* 2498 * We get here even with APIC virtualization enabled, if doing 2499 * nested virtualization and L1 runs with the "acknowledge interrupt 2500 * on exit" mode. Then we cannot inject the interrupt via RVI, 2501 * because the process would deliver it through the IDT. 2502 */ 2503 2504 apic_clear_irr(vector, apic); 2505 if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { 2506 /* 2507 * For auto-EOI interrupts, there might be another pending 2508 * interrupt above PPR, so check whether to raise another 2509 * KVM_REQ_EVENT. 2510 */ 2511 apic_update_ppr(apic); 2512 } else { 2513 /* 2514 * For normal interrupts, PPR has been raised and there cannot 2515 * be a higher-priority pending interrupt---except if there was 2516 * a concurrent interrupt injection, but that would have 2517 * triggered KVM_REQ_EVENT already. 2518 */ 2519 apic_set_isr(vector, apic); 2520 __apic_update_ppr(apic, &ppr); 2521 } 2522 2523 return vector; 2524 } 2525 2526 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, 2527 struct kvm_lapic_state *s, bool set) 2528 { 2529 if (apic_x2apic_mode(vcpu->arch.apic)) { 2530 u32 *id = (u32 *)(s->regs + APIC_ID); 2531 u32 *ldr = (u32 *)(s->regs + APIC_LDR); 2532 2533 if (vcpu->kvm->arch.x2apic_format) { 2534 if (*id != vcpu->vcpu_id) 2535 return -EINVAL; 2536 } else { 2537 if (set) 2538 *id >>= 24; 2539 else 2540 *id <<= 24; 2541 } 2542 2543 /* In x2APIC mode, the LDR is fixed and based on the id */ 2544 if (set) 2545 *ldr = kvm_apic_calc_x2apic_ldr(*id); 2546 } 2547 2548 return 0; 2549 } 2550 2551 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 2552 { 2553 memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); 2554 2555 /* 2556 * Get calculated timer current count for remaining timer period (if 2557 * any) and store it in the returned register set. 2558 */ 2559 __kvm_lapic_set_reg(s->regs, APIC_TMCCT, 2560 __apic_read(vcpu->arch.apic, APIC_TMCCT)); 2561 2562 return kvm_apic_state_fixup(vcpu, s, false); 2563 } 2564 2565 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 2566 { 2567 struct kvm_lapic *apic = vcpu->arch.apic; 2568 int r; 2569 2570 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 2571 /* set SPIV separately to get count of SW disabled APICs right */ 2572 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 2573 2574 r = kvm_apic_state_fixup(vcpu, s, true); 2575 if (r) { 2576 kvm_recalculate_apic_map(vcpu->kvm); 2577 return r; 2578 } 2579 memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); 2580 2581 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); 2582 kvm_recalculate_apic_map(vcpu->kvm); 2583 kvm_apic_set_version(vcpu); 2584 2585 apic_update_ppr(apic); 2586 hrtimer_cancel(&apic->lapic_timer.timer); 2587 apic_update_lvtt(apic); 2588 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); 2589 update_divide_count(apic); 2590 __start_apic_timer(apic, APIC_TMCCT); 2591 kvm_apic_update_apicv(vcpu); 2592 apic->highest_isr_cache = -1; 2593 if (vcpu->arch.apicv_active) { 2594 kvm_x86_ops.apicv_post_state_restore(vcpu); 2595 kvm_x86_ops.hwapic_irr_update(vcpu, 2596 apic_find_highest_irr(apic)); 2597 kvm_x86_ops.hwapic_isr_update(vcpu, 2598 apic_find_highest_isr(apic)); 2599 } 2600 kvm_make_request(KVM_REQ_EVENT, vcpu); 2601 if (ioapic_in_kernel(vcpu->kvm)) 2602 kvm_rtc_eoi_tracking_restore_one(vcpu); 2603 2604 vcpu->arch.apic_arb_prio = 0; 2605 2606 return 0; 2607 } 2608 2609 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 2610 { 2611 struct hrtimer *timer; 2612 2613 if (!lapic_in_kernel(vcpu) || 2614 kvm_can_post_timer_interrupt(vcpu)) 2615 return; 2616 2617 timer = &vcpu->arch.apic->lapic_timer.timer; 2618 if (hrtimer_cancel(timer)) 2619 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD); 2620 } 2621 2622 /* 2623 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 2624 * 2625 * Detect whether guest triggered PV EOI since the 2626 * last entry. If yes, set EOI on guests's behalf. 2627 * Clear PV EOI in guest memory in any case. 2628 */ 2629 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 2630 struct kvm_lapic *apic) 2631 { 2632 bool pending; 2633 int vector; 2634 /* 2635 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 2636 * and KVM_PV_EOI_ENABLED in guest memory as follows: 2637 * 2638 * KVM_APIC_PV_EOI_PENDING is unset: 2639 * -> host disabled PV EOI. 2640 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 2641 * -> host enabled PV EOI, guest did not execute EOI yet. 2642 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 2643 * -> host enabled PV EOI, guest executed EOI. 2644 */ 2645 BUG_ON(!pv_eoi_enabled(vcpu)); 2646 pending = pv_eoi_get_pending(vcpu); 2647 /* 2648 * Clear pending bit in any case: it will be set again on vmentry. 2649 * While this might not be ideal from performance point of view, 2650 * this makes sure pv eoi is only enabled when we know it's safe. 2651 */ 2652 pv_eoi_clr_pending(vcpu); 2653 if (pending) 2654 return; 2655 vector = apic_set_eoi(apic); 2656 trace_kvm_pv_eoi(apic, vector); 2657 } 2658 2659 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 2660 { 2661 u32 data; 2662 2663 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 2664 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 2665 2666 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2667 return; 2668 2669 if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2670 sizeof(u32))) 2671 return; 2672 2673 apic_set_tpr(vcpu->arch.apic, data & 0xff); 2674 } 2675 2676 /* 2677 * apic_sync_pv_eoi_to_guest - called before vmentry 2678 * 2679 * Detect whether it's safe to enable PV EOI and 2680 * if yes do so. 2681 */ 2682 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 2683 struct kvm_lapic *apic) 2684 { 2685 if (!pv_eoi_enabled(vcpu) || 2686 /* IRR set or many bits in ISR: could be nested. */ 2687 apic->irr_pending || 2688 /* Cache not set: could be safe but we don't bother. */ 2689 apic->highest_isr_cache == -1 || 2690 /* Need EOI to update ioapic. */ 2691 kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) { 2692 /* 2693 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 2694 * so we need not do anything here. 2695 */ 2696 return; 2697 } 2698 2699 pv_eoi_set_pending(apic->vcpu); 2700 } 2701 2702 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 2703 { 2704 u32 data, tpr; 2705 int max_irr, max_isr; 2706 struct kvm_lapic *apic = vcpu->arch.apic; 2707 2708 apic_sync_pv_eoi_to_guest(vcpu, apic); 2709 2710 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2711 return; 2712 2713 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff; 2714 max_irr = apic_find_highest_irr(apic); 2715 if (max_irr < 0) 2716 max_irr = 0; 2717 max_isr = apic_find_highest_isr(apic); 2718 if (max_isr < 0) 2719 max_isr = 0; 2720 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 2721 2722 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2723 sizeof(u32)); 2724 } 2725 2726 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 2727 { 2728 if (vapic_addr) { 2729 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, 2730 &vcpu->arch.apic->vapic_cache, 2731 vapic_addr, sizeof(u32))) 2732 return -EINVAL; 2733 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 2734 } else { 2735 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 2736 } 2737 2738 vcpu->arch.apic->vapic_addr = vapic_addr; 2739 return 0; 2740 } 2741 2742 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 2743 { 2744 struct kvm_lapic *apic = vcpu->arch.apic; 2745 u32 reg = (msr - APIC_BASE_MSR) << 4; 2746 2747 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2748 return 1; 2749 2750 if (reg == APIC_ICR2) 2751 return 1; 2752 2753 /* if this is ICR write vector before command */ 2754 if (reg == APIC_ICR) 2755 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 2756 return kvm_lapic_reg_write(apic, reg, (u32)data); 2757 } 2758 2759 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 2760 { 2761 struct kvm_lapic *apic = vcpu->arch.apic; 2762 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 2763 2764 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) 2765 return 1; 2766 2767 if (reg == APIC_DFR || reg == APIC_ICR2) 2768 return 1; 2769 2770 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 2771 return 1; 2772 if (reg == APIC_ICR) 2773 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high); 2774 2775 *data = (((u64)high) << 32) | low; 2776 2777 return 0; 2778 } 2779 2780 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 2781 { 2782 struct kvm_lapic *apic = vcpu->arch.apic; 2783 2784 if (!lapic_in_kernel(vcpu)) 2785 return 1; 2786 2787 /* if this is ICR write vector before command */ 2788 if (reg == APIC_ICR) 2789 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 2790 return kvm_lapic_reg_write(apic, reg, (u32)data); 2791 } 2792 2793 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 2794 { 2795 struct kvm_lapic *apic = vcpu->arch.apic; 2796 u32 low, high = 0; 2797 2798 if (!lapic_in_kernel(vcpu)) 2799 return 1; 2800 2801 if (kvm_lapic_reg_read(apic, reg, 4, &low)) 2802 return 1; 2803 if (reg == APIC_ICR) 2804 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high); 2805 2806 *data = (((u64)high) << 32) | low; 2807 2808 return 0; 2809 } 2810 2811 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) 2812 { 2813 u64 addr = data & ~KVM_MSR_ENABLED; 2814 struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data; 2815 unsigned long new_len; 2816 2817 if (!IS_ALIGNED(addr, 4)) 2818 return 1; 2819 2820 vcpu->arch.pv_eoi.msr_val = data; 2821 if (!pv_eoi_enabled(vcpu)) 2822 return 0; 2823 2824 if (addr == ghc->gpa && len <= ghc->len) 2825 new_len = ghc->len; 2826 else 2827 new_len = len; 2828 2829 return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); 2830 } 2831 2832 void kvm_apic_accept_events(struct kvm_vcpu *vcpu) 2833 { 2834 struct kvm_lapic *apic = vcpu->arch.apic; 2835 u8 sipi_vector; 2836 unsigned long pe; 2837 2838 if (!lapic_in_kernel(vcpu) || !apic->pending_events) 2839 return; 2840 2841 /* 2842 * INITs are latched while CPU is in specific states 2843 * (SMM, VMX non-root mode, SVM with GIF=0). 2844 * Because a CPU cannot be in these states immediately 2845 * after it has processed an INIT signal (and thus in 2846 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs 2847 * and leave the INIT pending. 2848 */ 2849 if (kvm_vcpu_latch_init(vcpu)) { 2850 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED); 2851 if (test_bit(KVM_APIC_SIPI, &apic->pending_events)) 2852 clear_bit(KVM_APIC_SIPI, &apic->pending_events); 2853 return; 2854 } 2855 2856 pe = xchg(&apic->pending_events, 0); 2857 if (test_bit(KVM_APIC_INIT, &pe)) { 2858 kvm_vcpu_reset(vcpu, true); 2859 if (kvm_vcpu_is_bsp(apic->vcpu)) 2860 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 2861 else 2862 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 2863 } 2864 if (test_bit(KVM_APIC_SIPI, &pe) && 2865 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 2866 /* evaluate pending_events before reading the vector */ 2867 smp_rmb(); 2868 sipi_vector = apic->sipi_vector; 2869 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); 2870 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 2871 } 2872 } 2873 2874 void kvm_lapic_init(void) 2875 { 2876 /* do not patch jump label more than once per second */ 2877 jump_label_rate_limit(&apic_hw_disabled, HZ); 2878 jump_label_rate_limit(&apic_sw_disabled, HZ); 2879 } 2880 2881 void kvm_lapic_exit(void) 2882 { 2883 static_key_deferred_flush(&apic_hw_disabled); 2884 static_key_deferred_flush(&apic_sw_disabled); 2885 } 2886