1 2 /* 3 * Local APIC virtualization 4 * 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2007 Novell 7 * Copyright (C) 2007 Intel 8 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Dor Laor <dor.laor@qumranet.com> 12 * Gregory Haskins <ghaskins@novell.com> 13 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 14 * 15 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 16 * 17 * This work is licensed under the terms of the GNU GPL, version 2. See 18 * the COPYING file in the top-level directory. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/kvm.h> 23 #include <linux/mm.h> 24 #include <linux/highmem.h> 25 #include <linux/smp.h> 26 #include <linux/hrtimer.h> 27 #include <linux/io.h> 28 #include <linux/module.h> 29 #include <linux/math64.h> 30 #include <linux/slab.h> 31 #include <asm/processor.h> 32 #include <asm/msr.h> 33 #include <asm/page.h> 34 #include <asm/current.h> 35 #include <asm/apicdef.h> 36 #include <linux/atomic.h> 37 #include <linux/jump_label.h> 38 #include "kvm_cache_regs.h" 39 #include "irq.h" 40 #include "trace.h" 41 #include "x86.h" 42 #include "cpuid.h" 43 44 #ifndef CONFIG_X86_64 45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 46 #else 47 #define mod_64(x, y) ((x) % (y)) 48 #endif 49 50 #define PRId64 "d" 51 #define PRIx64 "llx" 52 #define PRIu64 "u" 53 #define PRIo64 "o" 54 55 #define APIC_BUS_CYCLE_NS 1 56 57 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 58 #define apic_debug(fmt, arg...) 59 60 #define APIC_LVT_NUM 6 61 /* 14 is the version for Xeon and Pentium 8.4.8*/ 62 #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) 63 #define LAPIC_MMIO_LENGTH (1 << 12) 64 /* followed define is not in apicdef.h */ 65 #define APIC_SHORT_MASK 0xc0000 66 #define APIC_DEST_NOSHORT 0x0 67 #define APIC_DEST_MASK 0x800 68 #define MAX_APIC_VECTOR 256 69 #define APIC_VECTORS_PER_REG 32 70 71 #define VEC_POS(v) ((v) & (32 - 1)) 72 #define REG_POS(v) (((v) >> 5) << 4) 73 74 static unsigned int min_timer_period_us = 500; 75 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); 76 77 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 78 { 79 *((u32 *) (apic->regs + reg_off)) = val; 80 } 81 82 static inline int apic_test_and_set_vector(int vec, void *bitmap) 83 { 84 return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 85 } 86 87 static inline int apic_test_and_clear_vector(int vec, void *bitmap) 88 { 89 return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 90 } 91 92 static inline int apic_test_vector(int vec, void *bitmap) 93 { 94 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 95 } 96 97 static inline void apic_set_vector(int vec, void *bitmap) 98 { 99 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 100 } 101 102 static inline void apic_clear_vector(int vec, void *bitmap) 103 { 104 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 105 } 106 107 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 108 { 109 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 110 } 111 112 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 113 { 114 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 115 } 116 117 struct static_key_deferred apic_hw_disabled __read_mostly; 118 struct static_key_deferred apic_sw_disabled __read_mostly; 119 120 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 121 { 122 if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { 123 if (val & APIC_SPIV_APIC_ENABLED) 124 static_key_slow_dec_deferred(&apic_sw_disabled); 125 else 126 static_key_slow_inc(&apic_sw_disabled.key); 127 } 128 apic_set_reg(apic, APIC_SPIV, val); 129 } 130 131 static inline int apic_enabled(struct kvm_lapic *apic) 132 { 133 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 134 } 135 136 #define LVT_MASK \ 137 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 138 139 #define LINT_MASK \ 140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 142 143 static inline int kvm_apic_id(struct kvm_lapic *apic) 144 { 145 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 146 } 147 148 void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, 149 struct kvm_lapic_irq *irq, 150 u64 *eoi_exit_bitmap) 151 { 152 struct kvm_lapic **dst; 153 struct kvm_apic_map *map; 154 unsigned long bitmap = 1; 155 int i; 156 157 rcu_read_lock(); 158 map = rcu_dereference(vcpu->kvm->arch.apic_map); 159 160 if (unlikely(!map)) { 161 __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); 162 goto out; 163 } 164 165 if (irq->dest_mode == 0) { /* physical mode */ 166 if (irq->delivery_mode == APIC_DM_LOWEST || 167 irq->dest_id == 0xff) { 168 __set_bit(irq->vector, 169 (unsigned long *)eoi_exit_bitmap); 170 goto out; 171 } 172 dst = &map->phys_map[irq->dest_id & 0xff]; 173 } else { 174 u32 mda = irq->dest_id << (32 - map->ldr_bits); 175 176 dst = map->logical_map[apic_cluster_id(map, mda)]; 177 178 bitmap = apic_logical_id(map, mda); 179 } 180 181 for_each_set_bit(i, &bitmap, 16) { 182 if (!dst[i]) 183 continue; 184 if (dst[i]->vcpu == vcpu) { 185 __set_bit(irq->vector, 186 (unsigned long *)eoi_exit_bitmap); 187 break; 188 } 189 } 190 191 out: 192 rcu_read_unlock(); 193 } 194 195 static void recalculate_apic_map(struct kvm *kvm) 196 { 197 struct kvm_apic_map *new, *old = NULL; 198 struct kvm_vcpu *vcpu; 199 int i; 200 201 new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); 202 203 mutex_lock(&kvm->arch.apic_map_lock); 204 205 if (!new) 206 goto out; 207 208 new->ldr_bits = 8; 209 /* flat mode is default */ 210 new->cid_shift = 8; 211 new->cid_mask = 0; 212 new->lid_mask = 0xff; 213 214 kvm_for_each_vcpu(i, vcpu, kvm) { 215 struct kvm_lapic *apic = vcpu->arch.apic; 216 u16 cid, lid; 217 u32 ldr; 218 219 if (!kvm_apic_present(vcpu)) 220 continue; 221 222 /* 223 * All APICs have to be configured in the same mode by an OS. 224 * We take advatage of this while building logical id loockup 225 * table. After reset APICs are in xapic/flat mode, so if we 226 * find apic with different setting we assume this is the mode 227 * OS wants all apics to be in; build lookup table accordingly. 228 */ 229 if (apic_x2apic_mode(apic)) { 230 new->ldr_bits = 32; 231 new->cid_shift = 16; 232 new->cid_mask = new->lid_mask = 0xffff; 233 } else if (kvm_apic_sw_enabled(apic) && 234 !new->cid_mask /* flat mode */ && 235 kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { 236 new->cid_shift = 4; 237 new->cid_mask = 0xf; 238 new->lid_mask = 0xf; 239 } 240 241 new->phys_map[kvm_apic_id(apic)] = apic; 242 243 ldr = kvm_apic_get_reg(apic, APIC_LDR); 244 cid = apic_cluster_id(new, ldr); 245 lid = apic_logical_id(new, ldr); 246 247 if (lid) 248 new->logical_map[cid][ffs(lid) - 1] = apic; 249 } 250 out: 251 old = rcu_dereference_protected(kvm->arch.apic_map, 252 lockdep_is_held(&kvm->arch.apic_map_lock)); 253 rcu_assign_pointer(kvm->arch.apic_map, new); 254 mutex_unlock(&kvm->arch.apic_map_lock); 255 256 if (old) 257 kfree_rcu(old, rcu); 258 259 kvm_ioapic_make_eoibitmap_request(kvm); 260 } 261 262 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 263 { 264 apic_set_reg(apic, APIC_ID, id << 24); 265 recalculate_apic_map(apic->vcpu->kvm); 266 } 267 268 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 269 { 270 apic_set_reg(apic, APIC_LDR, id); 271 recalculate_apic_map(apic->vcpu->kvm); 272 } 273 274 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 275 { 276 return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 277 } 278 279 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) 280 { 281 return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; 282 } 283 284 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 285 { 286 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 287 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); 288 } 289 290 static inline int apic_lvtt_period(struct kvm_lapic *apic) 291 { 292 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 293 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); 294 } 295 296 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 297 { 298 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 299 apic->lapic_timer.timer_mode_mask) == 300 APIC_LVT_TIMER_TSCDEADLINE); 301 } 302 303 static inline int apic_lvt_nmi_mode(u32 lvt_val) 304 { 305 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 306 } 307 308 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 309 { 310 struct kvm_lapic *apic = vcpu->arch.apic; 311 struct kvm_cpuid_entry2 *feat; 312 u32 v = APIC_VERSION; 313 314 if (!kvm_vcpu_has_lapic(vcpu)) 315 return; 316 317 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 318 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) 319 v |= APIC_LVR_DIRECTED_EOI; 320 apic_set_reg(apic, APIC_LVR, v); 321 } 322 323 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 324 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 325 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 326 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 327 LINT_MASK, LINT_MASK, /* LVT0-1 */ 328 LVT_MASK /* LVTERR */ 329 }; 330 331 static int find_highest_vector(void *bitmap) 332 { 333 int vec; 334 u32 *reg; 335 336 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 337 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 338 reg = bitmap + REG_POS(vec); 339 if (*reg) 340 return fls(*reg) - 1 + vec; 341 } 342 343 return -1; 344 } 345 346 static u8 count_vectors(void *bitmap) 347 { 348 int vec; 349 u32 *reg; 350 u8 count = 0; 351 352 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 353 reg = bitmap + REG_POS(vec); 354 count += hweight32(*reg); 355 } 356 357 return count; 358 } 359 360 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) 361 { 362 apic->irr_pending = true; 363 return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); 364 } 365 366 static inline int apic_search_irr(struct kvm_lapic *apic) 367 { 368 return find_highest_vector(apic->regs + APIC_IRR); 369 } 370 371 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 372 { 373 int result; 374 375 /* 376 * Note that irr_pending is just a hint. It will be always 377 * true with virtual interrupt delivery enabled. 378 */ 379 if (!apic->irr_pending) 380 return -1; 381 382 result = apic_search_irr(apic); 383 ASSERT(result == -1 || result >= 16); 384 385 return result; 386 } 387 388 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 389 { 390 apic->irr_pending = false; 391 apic_clear_vector(vec, apic->regs + APIC_IRR); 392 if (apic_search_irr(apic) != -1) 393 apic->irr_pending = true; 394 } 395 396 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 397 { 398 if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 399 ++apic->isr_count; 400 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 401 /* 402 * ISR (in service register) bit is set when injecting an interrupt. 403 * The highest vector is injected. Thus the latest bit set matches 404 * the highest bit in ISR. 405 */ 406 apic->highest_isr_cache = vec; 407 } 408 409 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 410 { 411 if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 412 --apic->isr_count; 413 BUG_ON(apic->isr_count < 0); 414 apic->highest_isr_cache = -1; 415 } 416 417 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 418 { 419 int highest_irr; 420 421 /* This may race with setting of irr in __apic_accept_irq() and 422 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 423 * will cause vmexit immediately and the value will be recalculated 424 * on the next vmentry. 425 */ 426 if (!kvm_vcpu_has_lapic(vcpu)) 427 return 0; 428 highest_irr = apic_find_highest_irr(vcpu->arch.apic); 429 430 return highest_irr; 431 } 432 433 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 434 int vector, int level, int trig_mode); 435 436 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 437 { 438 struct kvm_lapic *apic = vcpu->arch.apic; 439 440 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 441 irq->level, irq->trig_mode); 442 } 443 444 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 445 { 446 447 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 448 sizeof(val)); 449 } 450 451 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 452 { 453 454 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 455 sizeof(*val)); 456 } 457 458 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 459 { 460 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 461 } 462 463 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 464 { 465 u8 val; 466 if (pv_eoi_get_user(vcpu, &val) < 0) 467 apic_debug("Can't read EOI MSR value: 0x%llx\n", 468 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 469 return val & 0x1; 470 } 471 472 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 473 { 474 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 475 apic_debug("Can't set EOI MSR value: 0x%llx\n", 476 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 477 return; 478 } 479 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 480 } 481 482 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 483 { 484 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 485 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 486 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 487 return; 488 } 489 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 490 } 491 492 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 493 { 494 int result; 495 496 /* Note that isr_count is always 1 with vid enabled */ 497 if (!apic->isr_count) 498 return -1; 499 if (likely(apic->highest_isr_cache != -1)) 500 return apic->highest_isr_cache; 501 502 result = find_highest_vector(apic->regs + APIC_ISR); 503 ASSERT(result == -1 || result >= 16); 504 505 return result; 506 } 507 508 static void apic_update_ppr(struct kvm_lapic *apic) 509 { 510 u32 tpr, isrv, ppr, old_ppr; 511 int isr; 512 513 old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI); 514 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI); 515 isr = apic_find_highest_isr(apic); 516 isrv = (isr != -1) ? isr : 0; 517 518 if ((tpr & 0xf0) >= (isrv & 0xf0)) 519 ppr = tpr & 0xff; 520 else 521 ppr = isrv & 0xf0; 522 523 apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", 524 apic, ppr, isr, isrv); 525 526 if (old_ppr != ppr) { 527 apic_set_reg(apic, APIC_PROCPRI, ppr); 528 if (ppr < old_ppr) 529 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 530 } 531 } 532 533 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 534 { 535 apic_set_reg(apic, APIC_TASKPRI, tpr); 536 apic_update_ppr(apic); 537 } 538 539 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 540 { 541 return dest == 0xff || kvm_apic_id(apic) == dest; 542 } 543 544 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 545 { 546 int result = 0; 547 u32 logical_id; 548 549 if (apic_x2apic_mode(apic)) { 550 logical_id = kvm_apic_get_reg(apic, APIC_LDR); 551 return logical_id & mda; 552 } 553 554 logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); 555 556 switch (kvm_apic_get_reg(apic, APIC_DFR)) { 557 case APIC_DFR_FLAT: 558 if (logical_id & mda) 559 result = 1; 560 break; 561 case APIC_DFR_CLUSTER: 562 if (((logical_id >> 4) == (mda >> 0x4)) 563 && (logical_id & mda & 0xf)) 564 result = 1; 565 break; 566 default: 567 apic_debug("Bad DFR vcpu %d: %08x\n", 568 apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); 569 break; 570 } 571 572 return result; 573 } 574 575 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 576 int short_hand, int dest, int dest_mode) 577 { 578 int result = 0; 579 struct kvm_lapic *target = vcpu->arch.apic; 580 581 apic_debug("target %p, source %p, dest 0x%x, " 582 "dest_mode 0x%x, short_hand 0x%x\n", 583 target, source, dest, dest_mode, short_hand); 584 585 ASSERT(target); 586 switch (short_hand) { 587 case APIC_DEST_NOSHORT: 588 if (dest_mode == 0) 589 /* Physical mode. */ 590 result = kvm_apic_match_physical_addr(target, dest); 591 else 592 /* Logical mode. */ 593 result = kvm_apic_match_logical_addr(target, dest); 594 break; 595 case APIC_DEST_SELF: 596 result = (target == source); 597 break; 598 case APIC_DEST_ALLINC: 599 result = 1; 600 break; 601 case APIC_DEST_ALLBUT: 602 result = (target != source); 603 break; 604 default: 605 apic_debug("kvm: apic: Bad dest shorthand value %x\n", 606 short_hand); 607 break; 608 } 609 610 return result; 611 } 612 613 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 614 struct kvm_lapic_irq *irq, int *r) 615 { 616 struct kvm_apic_map *map; 617 unsigned long bitmap = 1; 618 struct kvm_lapic **dst; 619 int i; 620 bool ret = false; 621 622 *r = -1; 623 624 if (irq->shorthand == APIC_DEST_SELF) { 625 *r = kvm_apic_set_irq(src->vcpu, irq); 626 return true; 627 } 628 629 if (irq->shorthand) 630 return false; 631 632 rcu_read_lock(); 633 map = rcu_dereference(kvm->arch.apic_map); 634 635 if (!map) 636 goto out; 637 638 if (irq->dest_mode == 0) { /* physical mode */ 639 if (irq->delivery_mode == APIC_DM_LOWEST || 640 irq->dest_id == 0xff) 641 goto out; 642 dst = &map->phys_map[irq->dest_id & 0xff]; 643 } else { 644 u32 mda = irq->dest_id << (32 - map->ldr_bits); 645 646 dst = map->logical_map[apic_cluster_id(map, mda)]; 647 648 bitmap = apic_logical_id(map, mda); 649 650 if (irq->delivery_mode == APIC_DM_LOWEST) { 651 int l = -1; 652 for_each_set_bit(i, &bitmap, 16) { 653 if (!dst[i]) 654 continue; 655 if (l < 0) 656 l = i; 657 else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) 658 l = i; 659 } 660 661 bitmap = (l >= 0) ? 1 << l : 0; 662 } 663 } 664 665 for_each_set_bit(i, &bitmap, 16) { 666 if (!dst[i]) 667 continue; 668 if (*r < 0) 669 *r = 0; 670 *r += kvm_apic_set_irq(dst[i]->vcpu, irq); 671 } 672 673 ret = true; 674 out: 675 rcu_read_unlock(); 676 return ret; 677 } 678 679 /* 680 * Add a pending IRQ into lapic. 681 * Return 1 if successfully added and 0 if discarded. 682 */ 683 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 684 int vector, int level, int trig_mode) 685 { 686 int result = 0; 687 struct kvm_vcpu *vcpu = apic->vcpu; 688 689 switch (delivery_mode) { 690 case APIC_DM_LOWEST: 691 vcpu->arch.apic_arb_prio++; 692 case APIC_DM_FIXED: 693 /* FIXME add logic for vcpu on reset */ 694 if (unlikely(!apic_enabled(apic))) 695 break; 696 697 if (trig_mode) { 698 apic_debug("level trig mode for vector %d", vector); 699 apic_set_vector(vector, apic->regs + APIC_TMR); 700 } else 701 apic_clear_vector(vector, apic->regs + APIC_TMR); 702 703 result = !apic_test_and_set_irr(vector, apic); 704 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 705 trig_mode, vector, !result); 706 if (!result) { 707 if (trig_mode) 708 apic_debug("level trig mode repeatedly for " 709 "vector %d", vector); 710 break; 711 } 712 713 kvm_make_request(KVM_REQ_EVENT, vcpu); 714 kvm_vcpu_kick(vcpu); 715 break; 716 717 case APIC_DM_REMRD: 718 apic_debug("Ignoring delivery mode 3\n"); 719 break; 720 721 case APIC_DM_SMI: 722 apic_debug("Ignoring guest SMI\n"); 723 break; 724 725 case APIC_DM_NMI: 726 result = 1; 727 kvm_inject_nmi(vcpu); 728 kvm_vcpu_kick(vcpu); 729 break; 730 731 case APIC_DM_INIT: 732 if (!trig_mode || level) { 733 result = 1; 734 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 735 kvm_make_request(KVM_REQ_EVENT, vcpu); 736 kvm_vcpu_kick(vcpu); 737 } else { 738 apic_debug("Ignoring de-assert INIT to vcpu %d\n", 739 vcpu->vcpu_id); 740 } 741 break; 742 743 case APIC_DM_STARTUP: 744 apic_debug("SIPI to vcpu %d vector 0x%02x\n", 745 vcpu->vcpu_id, vector); 746 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 747 result = 1; 748 vcpu->arch.sipi_vector = vector; 749 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; 750 kvm_make_request(KVM_REQ_EVENT, vcpu); 751 kvm_vcpu_kick(vcpu); 752 } 753 break; 754 755 case APIC_DM_EXTINT: 756 /* 757 * Should only be called by kvm_apic_local_deliver() with LVT0, 758 * before NMI watchdog was enabled. Already handled by 759 * kvm_apic_accept_pic_intr(). 760 */ 761 break; 762 763 default: 764 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 765 delivery_mode); 766 break; 767 } 768 return result; 769 } 770 771 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 772 { 773 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 774 } 775 776 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 777 { 778 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 779 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { 780 int trigger_mode; 781 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 782 trigger_mode = IOAPIC_LEVEL_TRIG; 783 else 784 trigger_mode = IOAPIC_EDGE_TRIG; 785 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); 786 } 787 } 788 789 static int apic_set_eoi(struct kvm_lapic *apic) 790 { 791 int vector = apic_find_highest_isr(apic); 792 793 trace_kvm_eoi(apic, vector); 794 795 /* 796 * Not every write EOI will has corresponding ISR, 797 * one example is when Kernel check timer on setup_IO_APIC 798 */ 799 if (vector == -1) 800 return vector; 801 802 apic_clear_isr(vector, apic); 803 apic_update_ppr(apic); 804 805 kvm_ioapic_send_eoi(apic, vector); 806 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 807 return vector; 808 } 809 810 /* 811 * this interface assumes a trap-like exit, which has already finished 812 * desired side effect including vISR and vPPR update. 813 */ 814 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) 815 { 816 struct kvm_lapic *apic = vcpu->arch.apic; 817 818 trace_kvm_eoi(apic, vector); 819 820 kvm_ioapic_send_eoi(apic, vector); 821 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 822 } 823 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 824 825 static void apic_send_ipi(struct kvm_lapic *apic) 826 { 827 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); 828 u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2); 829 struct kvm_lapic_irq irq; 830 831 irq.vector = icr_low & APIC_VECTOR_MASK; 832 irq.delivery_mode = icr_low & APIC_MODE_MASK; 833 irq.dest_mode = icr_low & APIC_DEST_MASK; 834 irq.level = icr_low & APIC_INT_ASSERT; 835 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 836 irq.shorthand = icr_low & APIC_SHORT_MASK; 837 if (apic_x2apic_mode(apic)) 838 irq.dest_id = icr_high; 839 else 840 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 841 842 trace_kvm_apic_ipi(icr_low, irq.dest_id); 843 844 apic_debug("icr_high 0x%x, icr_low 0x%x, " 845 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 846 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", 847 icr_high, icr_low, irq.shorthand, irq.dest_id, 848 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 849 irq.vector); 850 851 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); 852 } 853 854 static u32 apic_get_tmcct(struct kvm_lapic *apic) 855 { 856 ktime_t remaining; 857 s64 ns; 858 u32 tmcct; 859 860 ASSERT(apic != NULL); 861 862 /* if initial count is 0, current count should also be 0 */ 863 if (kvm_apic_get_reg(apic, APIC_TMICT) == 0) 864 return 0; 865 866 remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); 867 if (ktime_to_ns(remaining) < 0) 868 remaining = ktime_set(0, 0); 869 870 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 871 tmcct = div64_u64(ns, 872 (APIC_BUS_CYCLE_NS * apic->divide_count)); 873 874 return tmcct; 875 } 876 877 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 878 { 879 struct kvm_vcpu *vcpu = apic->vcpu; 880 struct kvm_run *run = vcpu->run; 881 882 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 883 run->tpr_access.rip = kvm_rip_read(vcpu); 884 run->tpr_access.is_write = write; 885 } 886 887 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 888 { 889 if (apic->vcpu->arch.tpr_access_reporting) 890 __report_tpr_access(apic, write); 891 } 892 893 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 894 { 895 u32 val = 0; 896 897 if (offset >= LAPIC_MMIO_LENGTH) 898 return 0; 899 900 switch (offset) { 901 case APIC_ID: 902 if (apic_x2apic_mode(apic)) 903 val = kvm_apic_id(apic); 904 else 905 val = kvm_apic_id(apic) << 24; 906 break; 907 case APIC_ARBPRI: 908 apic_debug("Access APIC ARBPRI register which is for P6\n"); 909 break; 910 911 case APIC_TMCCT: /* Timer CCR */ 912 if (apic_lvtt_tscdeadline(apic)) 913 return 0; 914 915 val = apic_get_tmcct(apic); 916 break; 917 case APIC_PROCPRI: 918 apic_update_ppr(apic); 919 val = kvm_apic_get_reg(apic, offset); 920 break; 921 case APIC_TASKPRI: 922 report_tpr_access(apic, false); 923 /* fall thru */ 924 default: 925 val = kvm_apic_get_reg(apic, offset); 926 break; 927 } 928 929 return val; 930 } 931 932 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 933 { 934 return container_of(dev, struct kvm_lapic, dev); 935 } 936 937 static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 938 void *data) 939 { 940 unsigned char alignment = offset & 0xf; 941 u32 result; 942 /* this bitmask has a bit cleared for each reserved register */ 943 static const u64 rmask = 0x43ff01ffffffe70cULL; 944 945 if ((alignment + len) > 4) { 946 apic_debug("KVM_APIC_READ: alignment error %x %d\n", 947 offset, len); 948 return 1; 949 } 950 951 if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { 952 apic_debug("KVM_APIC_READ: read reserved register %x\n", 953 offset); 954 return 1; 955 } 956 957 result = __apic_read(apic, offset & ~0xf); 958 959 trace_kvm_apic_read(offset, result); 960 961 switch (len) { 962 case 1: 963 case 2: 964 case 4: 965 memcpy(data, (char *)&result + alignment, len); 966 break; 967 default: 968 printk(KERN_ERR "Local APIC read with len = %x, " 969 "should be 1,2, or 4 instead\n", len); 970 break; 971 } 972 return 0; 973 } 974 975 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 976 { 977 return kvm_apic_hw_enabled(apic) && 978 addr >= apic->base_address && 979 addr < apic->base_address + LAPIC_MMIO_LENGTH; 980 } 981 982 static int apic_mmio_read(struct kvm_io_device *this, 983 gpa_t address, int len, void *data) 984 { 985 struct kvm_lapic *apic = to_lapic(this); 986 u32 offset = address - apic->base_address; 987 988 if (!apic_mmio_in_range(apic, address)) 989 return -EOPNOTSUPP; 990 991 apic_reg_read(apic, offset, len, data); 992 993 return 0; 994 } 995 996 static void update_divide_count(struct kvm_lapic *apic) 997 { 998 u32 tmp1, tmp2, tdcr; 999 1000 tdcr = kvm_apic_get_reg(apic, APIC_TDCR); 1001 tmp1 = tdcr & 0xf; 1002 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 1003 apic->divide_count = 0x1 << (tmp2 & 0x7); 1004 1005 apic_debug("timer divide count is 0x%x\n", 1006 apic->divide_count); 1007 } 1008 1009 static void start_apic_timer(struct kvm_lapic *apic) 1010 { 1011 ktime_t now; 1012 atomic_set(&apic->lapic_timer.pending, 0); 1013 1014 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 1015 /* lapic timer in oneshot or periodic mode */ 1016 now = apic->lapic_timer.timer.base->get_time(); 1017 apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT) 1018 * APIC_BUS_CYCLE_NS * apic->divide_count; 1019 1020 if (!apic->lapic_timer.period) 1021 return; 1022 /* 1023 * Do not allow the guest to program periodic timers with small 1024 * interval, since the hrtimers are not throttled by the host 1025 * scheduler. 1026 */ 1027 if (apic_lvtt_period(apic)) { 1028 s64 min_period = min_timer_period_us * 1000LL; 1029 1030 if (apic->lapic_timer.period < min_period) { 1031 pr_info_ratelimited( 1032 "kvm: vcpu %i: requested %lld ns " 1033 "lapic timer period limited to %lld ns\n", 1034 apic->vcpu->vcpu_id, 1035 apic->lapic_timer.period, min_period); 1036 apic->lapic_timer.period = min_period; 1037 } 1038 } 1039 1040 hrtimer_start(&apic->lapic_timer.timer, 1041 ktime_add_ns(now, apic->lapic_timer.period), 1042 HRTIMER_MODE_ABS); 1043 1044 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 1045 PRIx64 ", " 1046 "timer initial count 0x%x, period %lldns, " 1047 "expire @ 0x%016" PRIx64 ".\n", __func__, 1048 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 1049 kvm_apic_get_reg(apic, APIC_TMICT), 1050 apic->lapic_timer.period, 1051 ktime_to_ns(ktime_add_ns(now, 1052 apic->lapic_timer.period))); 1053 } else if (apic_lvtt_tscdeadline(apic)) { 1054 /* lapic timer in tsc deadline mode */ 1055 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 1056 u64 ns = 0; 1057 struct kvm_vcpu *vcpu = apic->vcpu; 1058 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1059 unsigned long flags; 1060 1061 if (unlikely(!tscdeadline || !this_tsc_khz)) 1062 return; 1063 1064 local_irq_save(flags); 1065 1066 now = apic->lapic_timer.timer.base->get_time(); 1067 guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); 1068 if (likely(tscdeadline > guest_tsc)) { 1069 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1070 do_div(ns, this_tsc_khz); 1071 } 1072 hrtimer_start(&apic->lapic_timer.timer, 1073 ktime_add_ns(now, ns), HRTIMER_MODE_ABS); 1074 1075 local_irq_restore(flags); 1076 } 1077 } 1078 1079 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 1080 { 1081 int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); 1082 1083 if (apic_lvt_nmi_mode(lvt0_val)) { 1084 if (!nmi_wd_enabled) { 1085 apic_debug("Receive NMI setting on APIC_LVT0 " 1086 "for cpu %d\n", apic->vcpu->vcpu_id); 1087 apic->vcpu->kvm->arch.vapics_in_nmi_mode++; 1088 } 1089 } else if (nmi_wd_enabled) 1090 apic->vcpu->kvm->arch.vapics_in_nmi_mode--; 1091 } 1092 1093 static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1094 { 1095 int ret = 0; 1096 1097 trace_kvm_apic_write(reg, val); 1098 1099 switch (reg) { 1100 case APIC_ID: /* Local APIC ID */ 1101 if (!apic_x2apic_mode(apic)) 1102 kvm_apic_set_id(apic, val >> 24); 1103 else 1104 ret = 1; 1105 break; 1106 1107 case APIC_TASKPRI: 1108 report_tpr_access(apic, true); 1109 apic_set_tpr(apic, val & 0xff); 1110 break; 1111 1112 case APIC_EOI: 1113 apic_set_eoi(apic); 1114 break; 1115 1116 case APIC_LDR: 1117 if (!apic_x2apic_mode(apic)) 1118 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 1119 else 1120 ret = 1; 1121 break; 1122 1123 case APIC_DFR: 1124 if (!apic_x2apic_mode(apic)) { 1125 apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 1126 recalculate_apic_map(apic->vcpu->kvm); 1127 } else 1128 ret = 1; 1129 break; 1130 1131 case APIC_SPIV: { 1132 u32 mask = 0x3ff; 1133 if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 1134 mask |= APIC_SPIV_DIRECTED_EOI; 1135 apic_set_spiv(apic, val & mask); 1136 if (!(val & APIC_SPIV_APIC_ENABLED)) { 1137 int i; 1138 u32 lvt_val; 1139 1140 for (i = 0; i < APIC_LVT_NUM; i++) { 1141 lvt_val = kvm_apic_get_reg(apic, 1142 APIC_LVTT + 0x10 * i); 1143 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 1144 lvt_val | APIC_LVT_MASKED); 1145 } 1146 atomic_set(&apic->lapic_timer.pending, 0); 1147 1148 } 1149 break; 1150 } 1151 case APIC_ICR: 1152 /* No delay here, so we always clear the pending bit */ 1153 apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); 1154 apic_send_ipi(apic); 1155 break; 1156 1157 case APIC_ICR2: 1158 if (!apic_x2apic_mode(apic)) 1159 val &= 0xff000000; 1160 apic_set_reg(apic, APIC_ICR2, val); 1161 break; 1162 1163 case APIC_LVT0: 1164 apic_manage_nmi_watchdog(apic, val); 1165 case APIC_LVTTHMR: 1166 case APIC_LVTPC: 1167 case APIC_LVT1: 1168 case APIC_LVTERR: 1169 /* TODO: Check vector */ 1170 if (!kvm_apic_sw_enabled(apic)) 1171 val |= APIC_LVT_MASKED; 1172 1173 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; 1174 apic_set_reg(apic, reg, val); 1175 1176 break; 1177 1178 case APIC_LVTT: 1179 if ((kvm_apic_get_reg(apic, APIC_LVTT) & 1180 apic->lapic_timer.timer_mode_mask) != 1181 (val & apic->lapic_timer.timer_mode_mask)) 1182 hrtimer_cancel(&apic->lapic_timer.timer); 1183 1184 if (!kvm_apic_sw_enabled(apic)) 1185 val |= APIC_LVT_MASKED; 1186 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1187 apic_set_reg(apic, APIC_LVTT, val); 1188 break; 1189 1190 case APIC_TMICT: 1191 if (apic_lvtt_tscdeadline(apic)) 1192 break; 1193 1194 hrtimer_cancel(&apic->lapic_timer.timer); 1195 apic_set_reg(apic, APIC_TMICT, val); 1196 start_apic_timer(apic); 1197 break; 1198 1199 case APIC_TDCR: 1200 if (val & 4) 1201 apic_debug("KVM_WRITE:TDCR %x\n", val); 1202 apic_set_reg(apic, APIC_TDCR, val); 1203 update_divide_count(apic); 1204 break; 1205 1206 case APIC_ESR: 1207 if (apic_x2apic_mode(apic) && val != 0) { 1208 apic_debug("KVM_WRITE:ESR not zero %x\n", val); 1209 ret = 1; 1210 } 1211 break; 1212 1213 case APIC_SELF_IPI: 1214 if (apic_x2apic_mode(apic)) { 1215 apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 1216 } else 1217 ret = 1; 1218 break; 1219 default: 1220 ret = 1; 1221 break; 1222 } 1223 if (ret) 1224 apic_debug("Local APIC Write to read-only register %x\n", reg); 1225 return ret; 1226 } 1227 1228 static int apic_mmio_write(struct kvm_io_device *this, 1229 gpa_t address, int len, const void *data) 1230 { 1231 struct kvm_lapic *apic = to_lapic(this); 1232 unsigned int offset = address - apic->base_address; 1233 u32 val; 1234 1235 if (!apic_mmio_in_range(apic, address)) 1236 return -EOPNOTSUPP; 1237 1238 /* 1239 * APIC register must be aligned on 128-bits boundary. 1240 * 32/64/128 bits registers must be accessed thru 32 bits. 1241 * Refer SDM 8.4.1 1242 */ 1243 if (len != 4 || (offset & 0xf)) { 1244 /* Don't shout loud, $infamous_os would cause only noise. */ 1245 apic_debug("apic write: bad size=%d %lx\n", len, (long)address); 1246 return 0; 1247 } 1248 1249 val = *(u32*)data; 1250 1251 /* too common printing */ 1252 if (offset != APIC_EOI) 1253 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 1254 "0x%x\n", __func__, offset, len, val); 1255 1256 apic_reg_write(apic, offset & 0xff0, val); 1257 1258 return 0; 1259 } 1260 1261 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 1262 { 1263 if (kvm_vcpu_has_lapic(vcpu)) 1264 apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 1265 } 1266 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1267 1268 /* emulate APIC access in a trap manner */ 1269 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 1270 { 1271 u32 val = 0; 1272 1273 /* hw has done the conditional check and inst decode */ 1274 offset &= 0xff0; 1275 1276 apic_reg_read(vcpu->arch.apic, offset, 4, &val); 1277 1278 /* TODO: optimize to just emulate side effect w/o one more write */ 1279 apic_reg_write(vcpu->arch.apic, offset, val); 1280 } 1281 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 1282 1283 void kvm_free_lapic(struct kvm_vcpu *vcpu) 1284 { 1285 struct kvm_lapic *apic = vcpu->arch.apic; 1286 1287 if (!vcpu->arch.apic) 1288 return; 1289 1290 hrtimer_cancel(&apic->lapic_timer.timer); 1291 1292 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 1293 static_key_slow_dec_deferred(&apic_hw_disabled); 1294 1295 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) 1296 static_key_slow_dec_deferred(&apic_sw_disabled); 1297 1298 if (apic->regs) 1299 free_page((unsigned long)apic->regs); 1300 1301 kfree(apic); 1302 } 1303 1304 /* 1305 *---------------------------------------------------------------------- 1306 * LAPIC interface 1307 *---------------------------------------------------------------------- 1308 */ 1309 1310 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 1311 { 1312 struct kvm_lapic *apic = vcpu->arch.apic; 1313 1314 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1315 apic_lvtt_period(apic)) 1316 return 0; 1317 1318 return apic->lapic_timer.tscdeadline; 1319 } 1320 1321 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 1322 { 1323 struct kvm_lapic *apic = vcpu->arch.apic; 1324 1325 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1326 apic_lvtt_period(apic)) 1327 return; 1328 1329 hrtimer_cancel(&apic->lapic_timer.timer); 1330 apic->lapic_timer.tscdeadline = data; 1331 start_apic_timer(apic); 1332 } 1333 1334 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 1335 { 1336 struct kvm_lapic *apic = vcpu->arch.apic; 1337 1338 if (!kvm_vcpu_has_lapic(vcpu)) 1339 return; 1340 1341 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 1342 | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); 1343 } 1344 1345 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 1346 { 1347 u64 tpr; 1348 1349 if (!kvm_vcpu_has_lapic(vcpu)) 1350 return 0; 1351 1352 tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 1353 1354 return (tpr & 0xf0) >> 4; 1355 } 1356 1357 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 1358 { 1359 u64 old_value = vcpu->arch.apic_base; 1360 struct kvm_lapic *apic = vcpu->arch.apic; 1361 1362 if (!apic) { 1363 value |= MSR_IA32_APICBASE_BSP; 1364 vcpu->arch.apic_base = value; 1365 return; 1366 } 1367 1368 /* update jump label if enable bit changes */ 1369 if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { 1370 if (value & MSR_IA32_APICBASE_ENABLE) 1371 static_key_slow_dec_deferred(&apic_hw_disabled); 1372 else 1373 static_key_slow_inc(&apic_hw_disabled.key); 1374 recalculate_apic_map(vcpu->kvm); 1375 } 1376 1377 if (!kvm_vcpu_is_bsp(apic->vcpu)) 1378 value &= ~MSR_IA32_APICBASE_BSP; 1379 1380 vcpu->arch.apic_base = value; 1381 if ((old_value ^ value) & X2APIC_ENABLE) { 1382 if (value & X2APIC_ENABLE) { 1383 u32 id = kvm_apic_id(apic); 1384 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 1385 kvm_apic_set_ldr(apic, ldr); 1386 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); 1387 } else 1388 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); 1389 } 1390 1391 apic->base_address = apic->vcpu->arch.apic_base & 1392 MSR_IA32_APICBASE_BASE; 1393 1394 /* with FSB delivery interrupt, we can restart APIC functionality */ 1395 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 1396 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 1397 1398 } 1399 1400 void kvm_lapic_reset(struct kvm_vcpu *vcpu) 1401 { 1402 struct kvm_lapic *apic; 1403 int i; 1404 1405 apic_debug("%s\n", __func__); 1406 1407 ASSERT(vcpu); 1408 apic = vcpu->arch.apic; 1409 ASSERT(apic != NULL); 1410 1411 /* Stop the timer in case it's a reset to an active apic */ 1412 hrtimer_cancel(&apic->lapic_timer.timer); 1413 1414 kvm_apic_set_id(apic, vcpu->vcpu_id); 1415 kvm_apic_set_version(apic->vcpu); 1416 1417 for (i = 0; i < APIC_LVT_NUM; i++) 1418 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 1419 apic_set_reg(apic, APIC_LVT0, 1420 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 1421 1422 apic_set_reg(apic, APIC_DFR, 0xffffffffU); 1423 apic_set_spiv(apic, 0xff); 1424 apic_set_reg(apic, APIC_TASKPRI, 0); 1425 kvm_apic_set_ldr(apic, 0); 1426 apic_set_reg(apic, APIC_ESR, 0); 1427 apic_set_reg(apic, APIC_ICR, 0); 1428 apic_set_reg(apic, APIC_ICR2, 0); 1429 apic_set_reg(apic, APIC_TDCR, 0); 1430 apic_set_reg(apic, APIC_TMICT, 0); 1431 for (i = 0; i < 8; i++) { 1432 apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 1433 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1434 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1435 } 1436 apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); 1437 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); 1438 apic->highest_isr_cache = -1; 1439 update_divide_count(apic); 1440 atomic_set(&apic->lapic_timer.pending, 0); 1441 if (kvm_vcpu_is_bsp(vcpu)) 1442 kvm_lapic_set_base(vcpu, 1443 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); 1444 vcpu->arch.pv_eoi.msr_val = 0; 1445 apic_update_ppr(apic); 1446 1447 vcpu->arch.apic_arb_prio = 0; 1448 vcpu->arch.apic_attention = 0; 1449 1450 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 1451 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 1452 vcpu, kvm_apic_id(apic), 1453 vcpu->arch.apic_base, apic->base_address); 1454 } 1455 1456 /* 1457 *---------------------------------------------------------------------- 1458 * timer interface 1459 *---------------------------------------------------------------------- 1460 */ 1461 1462 static bool lapic_is_periodic(struct kvm_lapic *apic) 1463 { 1464 return apic_lvtt_period(apic); 1465 } 1466 1467 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 1468 { 1469 struct kvm_lapic *apic = vcpu->arch.apic; 1470 1471 if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && 1472 apic_lvt_enabled(apic, APIC_LVTT)) 1473 return atomic_read(&apic->lapic_timer.pending); 1474 1475 return 0; 1476 } 1477 1478 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 1479 { 1480 u32 reg = kvm_apic_get_reg(apic, lvt_type); 1481 int vector, mode, trig_mode; 1482 1483 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 1484 vector = reg & APIC_VECTOR_MASK; 1485 mode = reg & APIC_MODE_MASK; 1486 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 1487 return __apic_accept_irq(apic, mode, vector, 1, trig_mode); 1488 } 1489 return 0; 1490 } 1491 1492 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 1493 { 1494 struct kvm_lapic *apic = vcpu->arch.apic; 1495 1496 if (apic) 1497 kvm_apic_local_deliver(apic, APIC_LVT0); 1498 } 1499 1500 static const struct kvm_io_device_ops apic_mmio_ops = { 1501 .read = apic_mmio_read, 1502 .write = apic_mmio_write, 1503 }; 1504 1505 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 1506 { 1507 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 1508 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 1509 struct kvm_vcpu *vcpu = apic->vcpu; 1510 wait_queue_head_t *q = &vcpu->wq; 1511 1512 /* 1513 * There is a race window between reading and incrementing, but we do 1514 * not care about potentially losing timer events in the !reinject 1515 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked 1516 * in vcpu_enter_guest. 1517 */ 1518 if (!atomic_read(&ktimer->pending)) { 1519 atomic_inc(&ktimer->pending); 1520 /* FIXME: this code should not know anything about vcpus */ 1521 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); 1522 } 1523 1524 if (waitqueue_active(q)) 1525 wake_up_interruptible(q); 1526 1527 if (lapic_is_periodic(apic)) { 1528 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 1529 return HRTIMER_RESTART; 1530 } else 1531 return HRTIMER_NORESTART; 1532 } 1533 1534 int kvm_create_lapic(struct kvm_vcpu *vcpu) 1535 { 1536 struct kvm_lapic *apic; 1537 1538 ASSERT(vcpu != NULL); 1539 apic_debug("apic_init %d\n", vcpu->vcpu_id); 1540 1541 apic = kzalloc(sizeof(*apic), GFP_KERNEL); 1542 if (!apic) 1543 goto nomem; 1544 1545 vcpu->arch.apic = apic; 1546 1547 apic->regs = (void *)get_zeroed_page(GFP_KERNEL); 1548 if (!apic->regs) { 1549 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1550 vcpu->vcpu_id); 1551 goto nomem_free_apic; 1552 } 1553 apic->vcpu = vcpu; 1554 1555 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1556 HRTIMER_MODE_ABS); 1557 apic->lapic_timer.timer.function = apic_timer_fn; 1558 1559 /* 1560 * APIC is created enabled. This will prevent kvm_lapic_set_base from 1561 * thinking that APIC satet has changed. 1562 */ 1563 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 1564 kvm_lapic_set_base(vcpu, 1565 APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); 1566 1567 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 1568 kvm_lapic_reset(vcpu); 1569 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 1570 1571 return 0; 1572 nomem_free_apic: 1573 kfree(apic); 1574 nomem: 1575 return -ENOMEM; 1576 } 1577 1578 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 1579 { 1580 struct kvm_lapic *apic = vcpu->arch.apic; 1581 int highest_irr; 1582 1583 if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) 1584 return -1; 1585 1586 apic_update_ppr(apic); 1587 highest_irr = apic_find_highest_irr(apic); 1588 if ((highest_irr == -1) || 1589 ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI))) 1590 return -1; 1591 return highest_irr; 1592 } 1593 1594 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 1595 { 1596 u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1597 int r = 0; 1598 1599 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 1600 r = 1; 1601 if ((lvt0 & APIC_LVT_MASKED) == 0 && 1602 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 1603 r = 1; 1604 return r; 1605 } 1606 1607 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 1608 { 1609 struct kvm_lapic *apic = vcpu->arch.apic; 1610 1611 if (!kvm_vcpu_has_lapic(vcpu)) 1612 return; 1613 1614 if (atomic_read(&apic->lapic_timer.pending) > 0) { 1615 if (kvm_apic_local_deliver(apic, APIC_LVTT)) 1616 atomic_dec(&apic->lapic_timer.pending); 1617 } 1618 } 1619 1620 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1621 { 1622 int vector = kvm_apic_has_interrupt(vcpu); 1623 struct kvm_lapic *apic = vcpu->arch.apic; 1624 1625 if (vector == -1) 1626 return -1; 1627 1628 apic_set_isr(vector, apic); 1629 apic_update_ppr(apic); 1630 apic_clear_irr(vector, apic); 1631 return vector; 1632 } 1633 1634 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, 1635 struct kvm_lapic_state *s) 1636 { 1637 struct kvm_lapic *apic = vcpu->arch.apic; 1638 1639 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 1640 /* set SPIV separately to get count of SW disabled APICs right */ 1641 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 1642 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 1643 /* call kvm_apic_set_id() to put apic into apic_map */ 1644 kvm_apic_set_id(apic, kvm_apic_id(apic)); 1645 kvm_apic_set_version(vcpu); 1646 1647 apic_update_ppr(apic); 1648 hrtimer_cancel(&apic->lapic_timer.timer); 1649 update_divide_count(apic); 1650 start_apic_timer(apic); 1651 apic->irr_pending = true; 1652 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? 1653 1 : count_vectors(apic->regs + APIC_ISR); 1654 apic->highest_isr_cache = -1; 1655 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); 1656 kvm_make_request(KVM_REQ_EVENT, vcpu); 1657 } 1658 1659 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 1660 { 1661 struct hrtimer *timer; 1662 1663 if (!kvm_vcpu_has_lapic(vcpu)) 1664 return; 1665 1666 timer = &vcpu->arch.apic->lapic_timer.timer; 1667 if (hrtimer_cancel(timer)) 1668 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 1669 } 1670 1671 /* 1672 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 1673 * 1674 * Detect whether guest triggered PV EOI since the 1675 * last entry. If yes, set EOI on guests's behalf. 1676 * Clear PV EOI in guest memory in any case. 1677 */ 1678 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 1679 struct kvm_lapic *apic) 1680 { 1681 bool pending; 1682 int vector; 1683 /* 1684 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 1685 * and KVM_PV_EOI_ENABLED in guest memory as follows: 1686 * 1687 * KVM_APIC_PV_EOI_PENDING is unset: 1688 * -> host disabled PV EOI. 1689 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 1690 * -> host enabled PV EOI, guest did not execute EOI yet. 1691 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 1692 * -> host enabled PV EOI, guest executed EOI. 1693 */ 1694 BUG_ON(!pv_eoi_enabled(vcpu)); 1695 pending = pv_eoi_get_pending(vcpu); 1696 /* 1697 * Clear pending bit in any case: it will be set again on vmentry. 1698 * While this might not be ideal from performance point of view, 1699 * this makes sure pv eoi is only enabled when we know it's safe. 1700 */ 1701 pv_eoi_clr_pending(vcpu); 1702 if (pending) 1703 return; 1704 vector = apic_set_eoi(apic); 1705 trace_kvm_pv_eoi(apic, vector); 1706 } 1707 1708 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 1709 { 1710 u32 data; 1711 void *vapic; 1712 1713 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 1714 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 1715 1716 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1717 return; 1718 1719 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1720 data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); 1721 kunmap_atomic(vapic); 1722 1723 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1724 } 1725 1726 /* 1727 * apic_sync_pv_eoi_to_guest - called before vmentry 1728 * 1729 * Detect whether it's safe to enable PV EOI and 1730 * if yes do so. 1731 */ 1732 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 1733 struct kvm_lapic *apic) 1734 { 1735 if (!pv_eoi_enabled(vcpu) || 1736 /* IRR set or many bits in ISR: could be nested. */ 1737 apic->irr_pending || 1738 /* Cache not set: could be safe but we don't bother. */ 1739 apic->highest_isr_cache == -1 || 1740 /* Need EOI to update ioapic. */ 1741 kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { 1742 /* 1743 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 1744 * so we need not do anything here. 1745 */ 1746 return; 1747 } 1748 1749 pv_eoi_set_pending(apic->vcpu); 1750 } 1751 1752 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 1753 { 1754 u32 data, tpr; 1755 int max_irr, max_isr; 1756 struct kvm_lapic *apic = vcpu->arch.apic; 1757 void *vapic; 1758 1759 apic_sync_pv_eoi_to_guest(vcpu, apic); 1760 1761 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1762 return; 1763 1764 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff; 1765 max_irr = apic_find_highest_irr(apic); 1766 if (max_irr < 0) 1767 max_irr = 0; 1768 max_isr = apic_find_highest_isr(apic); 1769 if (max_isr < 0) 1770 max_isr = 0; 1771 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 1772 1773 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1774 *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; 1775 kunmap_atomic(vapic); 1776 } 1777 1778 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1779 { 1780 vcpu->arch.apic->vapic_addr = vapic_addr; 1781 if (vapic_addr) 1782 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1783 else 1784 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1785 } 1786 1787 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1788 { 1789 struct kvm_lapic *apic = vcpu->arch.apic; 1790 u32 reg = (msr - APIC_BASE_MSR) << 4; 1791 1792 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1793 return 1; 1794 1795 /* if this is ICR write vector before command */ 1796 if (msr == 0x830) 1797 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1798 return apic_reg_write(apic, reg, (u32)data); 1799 } 1800 1801 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 1802 { 1803 struct kvm_lapic *apic = vcpu->arch.apic; 1804 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 1805 1806 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1807 return 1; 1808 1809 if (apic_reg_read(apic, reg, 4, &low)) 1810 return 1; 1811 if (msr == 0x830) 1812 apic_reg_read(apic, APIC_ICR2, 4, &high); 1813 1814 *data = (((u64)high) << 32) | low; 1815 1816 return 0; 1817 } 1818 1819 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 1820 { 1821 struct kvm_lapic *apic = vcpu->arch.apic; 1822 1823 if (!kvm_vcpu_has_lapic(vcpu)) 1824 return 1; 1825 1826 /* if this is ICR write vector before command */ 1827 if (reg == APIC_ICR) 1828 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1829 return apic_reg_write(apic, reg, (u32)data); 1830 } 1831 1832 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 1833 { 1834 struct kvm_lapic *apic = vcpu->arch.apic; 1835 u32 low, high = 0; 1836 1837 if (!kvm_vcpu_has_lapic(vcpu)) 1838 return 1; 1839 1840 if (apic_reg_read(apic, reg, 4, &low)) 1841 return 1; 1842 if (reg == APIC_ICR) 1843 apic_reg_read(apic, APIC_ICR2, 4, &high); 1844 1845 *data = (((u64)high) << 32) | low; 1846 1847 return 0; 1848 } 1849 1850 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) 1851 { 1852 u64 addr = data & ~KVM_MSR_ENABLED; 1853 if (!IS_ALIGNED(addr, 4)) 1854 return 1; 1855 1856 vcpu->arch.pv_eoi.msr_val = data; 1857 if (!pv_eoi_enabled(vcpu)) 1858 return 0; 1859 return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, 1860 addr); 1861 } 1862 1863 void kvm_lapic_init(void) 1864 { 1865 /* do not patch jump label more than once per second */ 1866 jump_label_rate_limit(&apic_hw_disabled, HZ); 1867 jump_label_rate_limit(&apic_sw_disabled, HZ); 1868 } 1869