1 2 /* 3 * Local APIC virtualization 4 * 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2007 Novell 7 * Copyright (C) 2007 Intel 8 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Dor Laor <dor.laor@qumranet.com> 12 * Gregory Haskins <ghaskins@novell.com> 13 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 14 * 15 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 16 * 17 * This work is licensed under the terms of the GNU GPL, version 2. See 18 * the COPYING file in the top-level directory. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/kvm.h> 23 #include <linux/mm.h> 24 #include <linux/highmem.h> 25 #include <linux/smp.h> 26 #include <linux/hrtimer.h> 27 #include <linux/io.h> 28 #include <linux/module.h> 29 #include <linux/math64.h> 30 #include <linux/slab.h> 31 #include <asm/processor.h> 32 #include <asm/msr.h> 33 #include <asm/page.h> 34 #include <asm/current.h> 35 #include <asm/apicdef.h> 36 #include <linux/atomic.h> 37 #include <linux/jump_label.h> 38 #include "kvm_cache_regs.h" 39 #include "irq.h" 40 #include "trace.h" 41 #include "x86.h" 42 #include "cpuid.h" 43 44 #ifndef CONFIG_X86_64 45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 46 #else 47 #define mod_64(x, y) ((x) % (y)) 48 #endif 49 50 #define PRId64 "d" 51 #define PRIx64 "llx" 52 #define PRIu64 "u" 53 #define PRIo64 "o" 54 55 #define APIC_BUS_CYCLE_NS 1 56 57 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 58 #define apic_debug(fmt, arg...) 59 60 #define APIC_LVT_NUM 6 61 /* 14 is the version for Xeon and Pentium 8.4.8*/ 62 #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) 63 #define LAPIC_MMIO_LENGTH (1 << 12) 64 /* followed define is not in apicdef.h */ 65 #define APIC_SHORT_MASK 0xc0000 66 #define APIC_DEST_NOSHORT 0x0 67 #define APIC_DEST_MASK 0x800 68 #define MAX_APIC_VECTOR 256 69 #define APIC_VECTORS_PER_REG 32 70 71 #define VEC_POS(v) ((v) & (32 - 1)) 72 #define REG_POS(v) (((v) >> 5) << 4) 73 74 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 75 { 76 *((u32 *) (apic->regs + reg_off)) = val; 77 } 78 79 static inline int apic_test_vector(int vec, void *bitmap) 80 { 81 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 82 } 83 84 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) 85 { 86 struct kvm_lapic *apic = vcpu->arch.apic; 87 88 return apic_test_vector(vector, apic->regs + APIC_ISR) || 89 apic_test_vector(vector, apic->regs + APIC_IRR); 90 } 91 92 static inline void apic_set_vector(int vec, void *bitmap) 93 { 94 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 95 } 96 97 static inline void apic_clear_vector(int vec, void *bitmap) 98 { 99 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 100 } 101 102 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 103 { 104 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 105 } 106 107 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 108 { 109 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 110 } 111 112 struct static_key_deferred apic_hw_disabled __read_mostly; 113 struct static_key_deferred apic_sw_disabled __read_mostly; 114 115 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 116 { 117 if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { 118 if (val & APIC_SPIV_APIC_ENABLED) 119 static_key_slow_dec_deferred(&apic_sw_disabled); 120 else 121 static_key_slow_inc(&apic_sw_disabled.key); 122 } 123 apic_set_reg(apic, APIC_SPIV, val); 124 } 125 126 static inline int apic_enabled(struct kvm_lapic *apic) 127 { 128 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 129 } 130 131 #define LVT_MASK \ 132 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 133 134 #define LINT_MASK \ 135 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 136 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 137 138 static inline int kvm_apic_id(struct kvm_lapic *apic) 139 { 140 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 141 } 142 143 #define KVM_X2APIC_CID_BITS 0 144 145 static void recalculate_apic_map(struct kvm *kvm) 146 { 147 struct kvm_apic_map *new, *old = NULL; 148 struct kvm_vcpu *vcpu; 149 int i; 150 151 new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); 152 153 mutex_lock(&kvm->arch.apic_map_lock); 154 155 if (!new) 156 goto out; 157 158 new->ldr_bits = 8; 159 /* flat mode is default */ 160 new->cid_shift = 8; 161 new->cid_mask = 0; 162 new->lid_mask = 0xff; 163 164 kvm_for_each_vcpu(i, vcpu, kvm) { 165 struct kvm_lapic *apic = vcpu->arch.apic; 166 u16 cid, lid; 167 u32 ldr; 168 169 if (!kvm_apic_present(vcpu)) 170 continue; 171 172 /* 173 * All APICs have to be configured in the same mode by an OS. 174 * We take advatage of this while building logical id loockup 175 * table. After reset APICs are in xapic/flat mode, so if we 176 * find apic with different setting we assume this is the mode 177 * OS wants all apics to be in; build lookup table accordingly. 178 */ 179 if (apic_x2apic_mode(apic)) { 180 new->ldr_bits = 32; 181 new->cid_shift = 16; 182 new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1; 183 new->lid_mask = 0xffff; 184 } else if (kvm_apic_sw_enabled(apic) && 185 !new->cid_mask /* flat mode */ && 186 kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { 187 new->cid_shift = 4; 188 new->cid_mask = 0xf; 189 new->lid_mask = 0xf; 190 } 191 192 new->phys_map[kvm_apic_id(apic)] = apic; 193 194 ldr = kvm_apic_get_reg(apic, APIC_LDR); 195 cid = apic_cluster_id(new, ldr); 196 lid = apic_logical_id(new, ldr); 197 198 if (lid) 199 new->logical_map[cid][ffs(lid) - 1] = apic; 200 } 201 out: 202 old = rcu_dereference_protected(kvm->arch.apic_map, 203 lockdep_is_held(&kvm->arch.apic_map_lock)); 204 rcu_assign_pointer(kvm->arch.apic_map, new); 205 mutex_unlock(&kvm->arch.apic_map_lock); 206 207 if (old) 208 kfree_rcu(old, rcu); 209 210 kvm_vcpu_request_scan_ioapic(kvm); 211 } 212 213 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 214 { 215 apic_set_reg(apic, APIC_ID, id << 24); 216 recalculate_apic_map(apic->vcpu->kvm); 217 } 218 219 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 220 { 221 apic_set_reg(apic, APIC_LDR, id); 222 recalculate_apic_map(apic->vcpu->kvm); 223 } 224 225 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 226 { 227 return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 228 } 229 230 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) 231 { 232 return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; 233 } 234 235 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 236 { 237 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 238 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); 239 } 240 241 static inline int apic_lvtt_period(struct kvm_lapic *apic) 242 { 243 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 244 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); 245 } 246 247 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 248 { 249 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 250 apic->lapic_timer.timer_mode_mask) == 251 APIC_LVT_TIMER_TSCDEADLINE); 252 } 253 254 static inline int apic_lvt_nmi_mode(u32 lvt_val) 255 { 256 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 257 } 258 259 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 260 { 261 struct kvm_lapic *apic = vcpu->arch.apic; 262 struct kvm_cpuid_entry2 *feat; 263 u32 v = APIC_VERSION; 264 265 if (!kvm_vcpu_has_lapic(vcpu)) 266 return; 267 268 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 269 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) 270 v |= APIC_LVR_DIRECTED_EOI; 271 apic_set_reg(apic, APIC_LVR, v); 272 } 273 274 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 275 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 276 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 277 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 278 LINT_MASK, LINT_MASK, /* LVT0-1 */ 279 LVT_MASK /* LVTERR */ 280 }; 281 282 static int find_highest_vector(void *bitmap) 283 { 284 int vec; 285 u32 *reg; 286 287 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 288 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 289 reg = bitmap + REG_POS(vec); 290 if (*reg) 291 return fls(*reg) - 1 + vec; 292 } 293 294 return -1; 295 } 296 297 static u8 count_vectors(void *bitmap) 298 { 299 int vec; 300 u32 *reg; 301 u8 count = 0; 302 303 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 304 reg = bitmap + REG_POS(vec); 305 count += hweight32(*reg); 306 } 307 308 return count; 309 } 310 311 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) 312 { 313 u32 i, pir_val; 314 struct kvm_lapic *apic = vcpu->arch.apic; 315 316 for (i = 0; i <= 7; i++) { 317 pir_val = xchg(&pir[i], 0); 318 if (pir_val) 319 *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; 320 } 321 } 322 EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 323 324 static inline void apic_set_irr(int vec, struct kvm_lapic *apic) 325 { 326 apic->irr_pending = true; 327 apic_set_vector(vec, apic->regs + APIC_IRR); 328 } 329 330 static inline int apic_search_irr(struct kvm_lapic *apic) 331 { 332 return find_highest_vector(apic->regs + APIC_IRR); 333 } 334 335 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 336 { 337 int result; 338 339 /* 340 * Note that irr_pending is just a hint. It will be always 341 * true with virtual interrupt delivery enabled. 342 */ 343 if (!apic->irr_pending) 344 return -1; 345 346 kvm_x86_ops->sync_pir_to_irr(apic->vcpu); 347 result = apic_search_irr(apic); 348 ASSERT(result == -1 || result >= 16); 349 350 return result; 351 } 352 353 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 354 { 355 apic->irr_pending = false; 356 apic_clear_vector(vec, apic->regs + APIC_IRR); 357 if (apic_search_irr(apic) != -1) 358 apic->irr_pending = true; 359 } 360 361 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 362 { 363 if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 364 ++apic->isr_count; 365 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 366 /* 367 * ISR (in service register) bit is set when injecting an interrupt. 368 * The highest vector is injected. Thus the latest bit set matches 369 * the highest bit in ISR. 370 */ 371 apic->highest_isr_cache = vec; 372 } 373 374 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 375 { 376 if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 377 --apic->isr_count; 378 BUG_ON(apic->isr_count < 0); 379 apic->highest_isr_cache = -1; 380 } 381 382 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 383 { 384 int highest_irr; 385 386 /* This may race with setting of irr in __apic_accept_irq() and 387 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 388 * will cause vmexit immediately and the value will be recalculated 389 * on the next vmentry. 390 */ 391 if (!kvm_vcpu_has_lapic(vcpu)) 392 return 0; 393 highest_irr = apic_find_highest_irr(vcpu->arch.apic); 394 395 return highest_irr; 396 } 397 398 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 399 int vector, int level, int trig_mode, 400 unsigned long *dest_map); 401 402 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 403 unsigned long *dest_map) 404 { 405 struct kvm_lapic *apic = vcpu->arch.apic; 406 407 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 408 irq->level, irq->trig_mode, dest_map); 409 } 410 411 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 412 { 413 414 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 415 sizeof(val)); 416 } 417 418 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 419 { 420 421 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 422 sizeof(*val)); 423 } 424 425 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 426 { 427 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 428 } 429 430 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 431 { 432 u8 val; 433 if (pv_eoi_get_user(vcpu, &val) < 0) 434 apic_debug("Can't read EOI MSR value: 0x%llx\n", 435 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 436 return val & 0x1; 437 } 438 439 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 440 { 441 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 442 apic_debug("Can't set EOI MSR value: 0x%llx\n", 443 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 444 return; 445 } 446 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 447 } 448 449 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 450 { 451 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 452 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 453 (unsigned long long)vcpu->arch.pv_eoi.msr_val); 454 return; 455 } 456 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 457 } 458 459 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 460 { 461 int result; 462 463 /* Note that isr_count is always 1 with vid enabled */ 464 if (!apic->isr_count) 465 return -1; 466 if (likely(apic->highest_isr_cache != -1)) 467 return apic->highest_isr_cache; 468 469 result = find_highest_vector(apic->regs + APIC_ISR); 470 ASSERT(result == -1 || result >= 16); 471 472 return result; 473 } 474 475 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) 476 { 477 struct kvm_lapic *apic = vcpu->arch.apic; 478 int i; 479 480 for (i = 0; i < 8; i++) 481 apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); 482 } 483 484 static void apic_update_ppr(struct kvm_lapic *apic) 485 { 486 u32 tpr, isrv, ppr, old_ppr; 487 int isr; 488 489 old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI); 490 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI); 491 isr = apic_find_highest_isr(apic); 492 isrv = (isr != -1) ? isr : 0; 493 494 if ((tpr & 0xf0) >= (isrv & 0xf0)) 495 ppr = tpr & 0xff; 496 else 497 ppr = isrv & 0xf0; 498 499 apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", 500 apic, ppr, isr, isrv); 501 502 if (old_ppr != ppr) { 503 apic_set_reg(apic, APIC_PROCPRI, ppr); 504 if (ppr < old_ppr) 505 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 506 } 507 } 508 509 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 510 { 511 apic_set_reg(apic, APIC_TASKPRI, tpr); 512 apic_update_ppr(apic); 513 } 514 515 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 516 { 517 return dest == 0xff || kvm_apic_id(apic) == dest; 518 } 519 520 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 521 { 522 int result = 0; 523 u32 logical_id; 524 525 if (apic_x2apic_mode(apic)) { 526 logical_id = kvm_apic_get_reg(apic, APIC_LDR); 527 return logical_id & mda; 528 } 529 530 logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); 531 532 switch (kvm_apic_get_reg(apic, APIC_DFR)) { 533 case APIC_DFR_FLAT: 534 if (logical_id & mda) 535 result = 1; 536 break; 537 case APIC_DFR_CLUSTER: 538 if (((logical_id >> 4) == (mda >> 0x4)) 539 && (logical_id & mda & 0xf)) 540 result = 1; 541 break; 542 default: 543 apic_debug("Bad DFR vcpu %d: %08x\n", 544 apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); 545 break; 546 } 547 548 return result; 549 } 550 551 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 552 int short_hand, int dest, int dest_mode) 553 { 554 int result = 0; 555 struct kvm_lapic *target = vcpu->arch.apic; 556 557 apic_debug("target %p, source %p, dest 0x%x, " 558 "dest_mode 0x%x, short_hand 0x%x\n", 559 target, source, dest, dest_mode, short_hand); 560 561 ASSERT(target); 562 switch (short_hand) { 563 case APIC_DEST_NOSHORT: 564 if (dest_mode == 0) 565 /* Physical mode. */ 566 result = kvm_apic_match_physical_addr(target, dest); 567 else 568 /* Logical mode. */ 569 result = kvm_apic_match_logical_addr(target, dest); 570 break; 571 case APIC_DEST_SELF: 572 result = (target == source); 573 break; 574 case APIC_DEST_ALLINC: 575 result = 1; 576 break; 577 case APIC_DEST_ALLBUT: 578 result = (target != source); 579 break; 580 default: 581 apic_debug("kvm: apic: Bad dest shorthand value %x\n", 582 short_hand); 583 break; 584 } 585 586 return result; 587 } 588 589 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 590 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) 591 { 592 struct kvm_apic_map *map; 593 unsigned long bitmap = 1; 594 struct kvm_lapic **dst; 595 int i; 596 bool ret = false; 597 598 *r = -1; 599 600 if (irq->shorthand == APIC_DEST_SELF) { 601 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); 602 return true; 603 } 604 605 if (irq->shorthand) 606 return false; 607 608 rcu_read_lock(); 609 map = rcu_dereference(kvm->arch.apic_map); 610 611 if (!map) 612 goto out; 613 614 if (irq->dest_mode == 0) { /* physical mode */ 615 if (irq->delivery_mode == APIC_DM_LOWEST || 616 irq->dest_id == 0xff) 617 goto out; 618 dst = &map->phys_map[irq->dest_id & 0xff]; 619 } else { 620 u32 mda = irq->dest_id << (32 - map->ldr_bits); 621 622 dst = map->logical_map[apic_cluster_id(map, mda)]; 623 624 bitmap = apic_logical_id(map, mda); 625 626 if (irq->delivery_mode == APIC_DM_LOWEST) { 627 int l = -1; 628 for_each_set_bit(i, &bitmap, 16) { 629 if (!dst[i]) 630 continue; 631 if (l < 0) 632 l = i; 633 else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) 634 l = i; 635 } 636 637 bitmap = (l >= 0) ? 1 << l : 0; 638 } 639 } 640 641 for_each_set_bit(i, &bitmap, 16) { 642 if (!dst[i]) 643 continue; 644 if (*r < 0) 645 *r = 0; 646 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 647 } 648 649 ret = true; 650 out: 651 rcu_read_unlock(); 652 return ret; 653 } 654 655 /* 656 * Add a pending IRQ into lapic. 657 * Return 1 if successfully added and 0 if discarded. 658 */ 659 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 660 int vector, int level, int trig_mode, 661 unsigned long *dest_map) 662 { 663 int result = 0; 664 struct kvm_vcpu *vcpu = apic->vcpu; 665 666 switch (delivery_mode) { 667 case APIC_DM_LOWEST: 668 vcpu->arch.apic_arb_prio++; 669 case APIC_DM_FIXED: 670 /* FIXME add logic for vcpu on reset */ 671 if (unlikely(!apic_enabled(apic))) 672 break; 673 674 result = 1; 675 676 if (dest_map) 677 __set_bit(vcpu->vcpu_id, dest_map); 678 679 if (kvm_x86_ops->deliver_posted_interrupt) 680 kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); 681 else { 682 apic_set_irr(vector, apic); 683 684 kvm_make_request(KVM_REQ_EVENT, vcpu); 685 kvm_vcpu_kick(vcpu); 686 } 687 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 688 trig_mode, vector, false); 689 break; 690 691 case APIC_DM_REMRD: 692 result = 1; 693 vcpu->arch.pv.pv_unhalted = 1; 694 kvm_make_request(KVM_REQ_EVENT, vcpu); 695 kvm_vcpu_kick(vcpu); 696 break; 697 698 case APIC_DM_SMI: 699 apic_debug("Ignoring guest SMI\n"); 700 break; 701 702 case APIC_DM_NMI: 703 result = 1; 704 kvm_inject_nmi(vcpu); 705 kvm_vcpu_kick(vcpu); 706 break; 707 708 case APIC_DM_INIT: 709 if (!trig_mode || level) { 710 result = 1; 711 /* assumes that there are only KVM_APIC_INIT/SIPI */ 712 apic->pending_events = (1UL << KVM_APIC_INIT); 713 /* make sure pending_events is visible before sending 714 * the request */ 715 smp_wmb(); 716 kvm_make_request(KVM_REQ_EVENT, vcpu); 717 kvm_vcpu_kick(vcpu); 718 } else { 719 apic_debug("Ignoring de-assert INIT to vcpu %d\n", 720 vcpu->vcpu_id); 721 } 722 break; 723 724 case APIC_DM_STARTUP: 725 apic_debug("SIPI to vcpu %d vector 0x%02x\n", 726 vcpu->vcpu_id, vector); 727 result = 1; 728 apic->sipi_vector = vector; 729 /* make sure sipi_vector is visible for the receiver */ 730 smp_wmb(); 731 set_bit(KVM_APIC_SIPI, &apic->pending_events); 732 kvm_make_request(KVM_REQ_EVENT, vcpu); 733 kvm_vcpu_kick(vcpu); 734 break; 735 736 case APIC_DM_EXTINT: 737 /* 738 * Should only be called by kvm_apic_local_deliver() with LVT0, 739 * before NMI watchdog was enabled. Already handled by 740 * kvm_apic_accept_pic_intr(). 741 */ 742 break; 743 744 default: 745 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 746 delivery_mode); 747 break; 748 } 749 return result; 750 } 751 752 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 753 { 754 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 755 } 756 757 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 758 { 759 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 760 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { 761 int trigger_mode; 762 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 763 trigger_mode = IOAPIC_LEVEL_TRIG; 764 else 765 trigger_mode = IOAPIC_EDGE_TRIG; 766 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); 767 } 768 } 769 770 static int apic_set_eoi(struct kvm_lapic *apic) 771 { 772 int vector = apic_find_highest_isr(apic); 773 774 trace_kvm_eoi(apic, vector); 775 776 /* 777 * Not every write EOI will has corresponding ISR, 778 * one example is when Kernel check timer on setup_IO_APIC 779 */ 780 if (vector == -1) 781 return vector; 782 783 apic_clear_isr(vector, apic); 784 apic_update_ppr(apic); 785 786 kvm_ioapic_send_eoi(apic, vector); 787 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 788 return vector; 789 } 790 791 /* 792 * this interface assumes a trap-like exit, which has already finished 793 * desired side effect including vISR and vPPR update. 794 */ 795 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) 796 { 797 struct kvm_lapic *apic = vcpu->arch.apic; 798 799 trace_kvm_eoi(apic, vector); 800 801 kvm_ioapic_send_eoi(apic, vector); 802 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 803 } 804 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 805 806 static void apic_send_ipi(struct kvm_lapic *apic) 807 { 808 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); 809 u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2); 810 struct kvm_lapic_irq irq; 811 812 irq.vector = icr_low & APIC_VECTOR_MASK; 813 irq.delivery_mode = icr_low & APIC_MODE_MASK; 814 irq.dest_mode = icr_low & APIC_DEST_MASK; 815 irq.level = icr_low & APIC_INT_ASSERT; 816 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 817 irq.shorthand = icr_low & APIC_SHORT_MASK; 818 if (apic_x2apic_mode(apic)) 819 irq.dest_id = icr_high; 820 else 821 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 822 823 trace_kvm_apic_ipi(icr_low, irq.dest_id); 824 825 apic_debug("icr_high 0x%x, icr_low 0x%x, " 826 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 827 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", 828 icr_high, icr_low, irq.shorthand, irq.dest_id, 829 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 830 irq.vector); 831 832 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 833 } 834 835 static u32 apic_get_tmcct(struct kvm_lapic *apic) 836 { 837 ktime_t remaining; 838 s64 ns; 839 u32 tmcct; 840 841 ASSERT(apic != NULL); 842 843 /* if initial count is 0, current count should also be 0 */ 844 if (kvm_apic_get_reg(apic, APIC_TMICT) == 0 || 845 apic->lapic_timer.period == 0) 846 return 0; 847 848 remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); 849 if (ktime_to_ns(remaining) < 0) 850 remaining = ktime_set(0, 0); 851 852 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 853 tmcct = div64_u64(ns, 854 (APIC_BUS_CYCLE_NS * apic->divide_count)); 855 856 return tmcct; 857 } 858 859 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 860 { 861 struct kvm_vcpu *vcpu = apic->vcpu; 862 struct kvm_run *run = vcpu->run; 863 864 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 865 run->tpr_access.rip = kvm_rip_read(vcpu); 866 run->tpr_access.is_write = write; 867 } 868 869 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 870 { 871 if (apic->vcpu->arch.tpr_access_reporting) 872 __report_tpr_access(apic, write); 873 } 874 875 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 876 { 877 u32 val = 0; 878 879 if (offset >= LAPIC_MMIO_LENGTH) 880 return 0; 881 882 switch (offset) { 883 case APIC_ID: 884 if (apic_x2apic_mode(apic)) 885 val = kvm_apic_id(apic); 886 else 887 val = kvm_apic_id(apic) << 24; 888 break; 889 case APIC_ARBPRI: 890 apic_debug("Access APIC ARBPRI register which is for P6\n"); 891 break; 892 893 case APIC_TMCCT: /* Timer CCR */ 894 if (apic_lvtt_tscdeadline(apic)) 895 return 0; 896 897 val = apic_get_tmcct(apic); 898 break; 899 case APIC_PROCPRI: 900 apic_update_ppr(apic); 901 val = kvm_apic_get_reg(apic, offset); 902 break; 903 case APIC_TASKPRI: 904 report_tpr_access(apic, false); 905 /* fall thru */ 906 default: 907 val = kvm_apic_get_reg(apic, offset); 908 break; 909 } 910 911 return val; 912 } 913 914 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 915 { 916 return container_of(dev, struct kvm_lapic, dev); 917 } 918 919 static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 920 void *data) 921 { 922 unsigned char alignment = offset & 0xf; 923 u32 result; 924 /* this bitmask has a bit cleared for each reserved register */ 925 static const u64 rmask = 0x43ff01ffffffe70cULL; 926 927 if ((alignment + len) > 4) { 928 apic_debug("KVM_APIC_READ: alignment error %x %d\n", 929 offset, len); 930 return 1; 931 } 932 933 if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { 934 apic_debug("KVM_APIC_READ: read reserved register %x\n", 935 offset); 936 return 1; 937 } 938 939 result = __apic_read(apic, offset & ~0xf); 940 941 trace_kvm_apic_read(offset, result); 942 943 switch (len) { 944 case 1: 945 case 2: 946 case 4: 947 memcpy(data, (char *)&result + alignment, len); 948 break; 949 default: 950 printk(KERN_ERR "Local APIC read with len = %x, " 951 "should be 1,2, or 4 instead\n", len); 952 break; 953 } 954 return 0; 955 } 956 957 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 958 { 959 return kvm_apic_hw_enabled(apic) && 960 addr >= apic->base_address && 961 addr < apic->base_address + LAPIC_MMIO_LENGTH; 962 } 963 964 static int apic_mmio_read(struct kvm_io_device *this, 965 gpa_t address, int len, void *data) 966 { 967 struct kvm_lapic *apic = to_lapic(this); 968 u32 offset = address - apic->base_address; 969 970 if (!apic_mmio_in_range(apic, address)) 971 return -EOPNOTSUPP; 972 973 apic_reg_read(apic, offset, len, data); 974 975 return 0; 976 } 977 978 static void update_divide_count(struct kvm_lapic *apic) 979 { 980 u32 tmp1, tmp2, tdcr; 981 982 tdcr = kvm_apic_get_reg(apic, APIC_TDCR); 983 tmp1 = tdcr & 0xf; 984 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 985 apic->divide_count = 0x1 << (tmp2 & 0x7); 986 987 apic_debug("timer divide count is 0x%x\n", 988 apic->divide_count); 989 } 990 991 static void start_apic_timer(struct kvm_lapic *apic) 992 { 993 ktime_t now; 994 atomic_set(&apic->lapic_timer.pending, 0); 995 996 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 997 /* lapic timer in oneshot or periodic mode */ 998 now = apic->lapic_timer.timer.base->get_time(); 999 apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT) 1000 * APIC_BUS_CYCLE_NS * apic->divide_count; 1001 1002 if (!apic->lapic_timer.period) 1003 return; 1004 /* 1005 * Do not allow the guest to program periodic timers with small 1006 * interval, since the hrtimers are not throttled by the host 1007 * scheduler. 1008 */ 1009 if (apic_lvtt_period(apic)) { 1010 s64 min_period = min_timer_period_us * 1000LL; 1011 1012 if (apic->lapic_timer.period < min_period) { 1013 pr_info_ratelimited( 1014 "kvm: vcpu %i: requested %lld ns " 1015 "lapic timer period limited to %lld ns\n", 1016 apic->vcpu->vcpu_id, 1017 apic->lapic_timer.period, min_period); 1018 apic->lapic_timer.period = min_period; 1019 } 1020 } 1021 1022 hrtimer_start(&apic->lapic_timer.timer, 1023 ktime_add_ns(now, apic->lapic_timer.period), 1024 HRTIMER_MODE_ABS); 1025 1026 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 1027 PRIx64 ", " 1028 "timer initial count 0x%x, period %lldns, " 1029 "expire @ 0x%016" PRIx64 ".\n", __func__, 1030 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 1031 kvm_apic_get_reg(apic, APIC_TMICT), 1032 apic->lapic_timer.period, 1033 ktime_to_ns(ktime_add_ns(now, 1034 apic->lapic_timer.period))); 1035 } else if (apic_lvtt_tscdeadline(apic)) { 1036 /* lapic timer in tsc deadline mode */ 1037 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 1038 u64 ns = 0; 1039 struct kvm_vcpu *vcpu = apic->vcpu; 1040 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1041 unsigned long flags; 1042 1043 if (unlikely(!tscdeadline || !this_tsc_khz)) 1044 return; 1045 1046 local_irq_save(flags); 1047 1048 now = apic->lapic_timer.timer.base->get_time(); 1049 guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); 1050 if (likely(tscdeadline > guest_tsc)) { 1051 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1052 do_div(ns, this_tsc_khz); 1053 } 1054 hrtimer_start(&apic->lapic_timer.timer, 1055 ktime_add_ns(now, ns), HRTIMER_MODE_ABS); 1056 1057 local_irq_restore(flags); 1058 } 1059 } 1060 1061 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 1062 { 1063 int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); 1064 1065 if (apic_lvt_nmi_mode(lvt0_val)) { 1066 if (!nmi_wd_enabled) { 1067 apic_debug("Receive NMI setting on APIC_LVT0 " 1068 "for cpu %d\n", apic->vcpu->vcpu_id); 1069 apic->vcpu->kvm->arch.vapics_in_nmi_mode++; 1070 } 1071 } else if (nmi_wd_enabled) 1072 apic->vcpu->kvm->arch.vapics_in_nmi_mode--; 1073 } 1074 1075 static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1076 { 1077 int ret = 0; 1078 1079 trace_kvm_apic_write(reg, val); 1080 1081 switch (reg) { 1082 case APIC_ID: /* Local APIC ID */ 1083 if (!apic_x2apic_mode(apic)) 1084 kvm_apic_set_id(apic, val >> 24); 1085 else 1086 ret = 1; 1087 break; 1088 1089 case APIC_TASKPRI: 1090 report_tpr_access(apic, true); 1091 apic_set_tpr(apic, val & 0xff); 1092 break; 1093 1094 case APIC_EOI: 1095 apic_set_eoi(apic); 1096 break; 1097 1098 case APIC_LDR: 1099 if (!apic_x2apic_mode(apic)) 1100 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 1101 else 1102 ret = 1; 1103 break; 1104 1105 case APIC_DFR: 1106 if (!apic_x2apic_mode(apic)) { 1107 apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 1108 recalculate_apic_map(apic->vcpu->kvm); 1109 } else 1110 ret = 1; 1111 break; 1112 1113 case APIC_SPIV: { 1114 u32 mask = 0x3ff; 1115 if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 1116 mask |= APIC_SPIV_DIRECTED_EOI; 1117 apic_set_spiv(apic, val & mask); 1118 if (!(val & APIC_SPIV_APIC_ENABLED)) { 1119 int i; 1120 u32 lvt_val; 1121 1122 for (i = 0; i < APIC_LVT_NUM; i++) { 1123 lvt_val = kvm_apic_get_reg(apic, 1124 APIC_LVTT + 0x10 * i); 1125 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 1126 lvt_val | APIC_LVT_MASKED); 1127 } 1128 atomic_set(&apic->lapic_timer.pending, 0); 1129 1130 } 1131 break; 1132 } 1133 case APIC_ICR: 1134 /* No delay here, so we always clear the pending bit */ 1135 apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); 1136 apic_send_ipi(apic); 1137 break; 1138 1139 case APIC_ICR2: 1140 if (!apic_x2apic_mode(apic)) 1141 val &= 0xff000000; 1142 apic_set_reg(apic, APIC_ICR2, val); 1143 break; 1144 1145 case APIC_LVT0: 1146 apic_manage_nmi_watchdog(apic, val); 1147 case APIC_LVTTHMR: 1148 case APIC_LVTPC: 1149 case APIC_LVT1: 1150 case APIC_LVTERR: 1151 /* TODO: Check vector */ 1152 if (!kvm_apic_sw_enabled(apic)) 1153 val |= APIC_LVT_MASKED; 1154 1155 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; 1156 apic_set_reg(apic, reg, val); 1157 1158 break; 1159 1160 case APIC_LVTT: 1161 if ((kvm_apic_get_reg(apic, APIC_LVTT) & 1162 apic->lapic_timer.timer_mode_mask) != 1163 (val & apic->lapic_timer.timer_mode_mask)) 1164 hrtimer_cancel(&apic->lapic_timer.timer); 1165 1166 if (!kvm_apic_sw_enabled(apic)) 1167 val |= APIC_LVT_MASKED; 1168 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1169 apic_set_reg(apic, APIC_LVTT, val); 1170 break; 1171 1172 case APIC_TMICT: 1173 if (apic_lvtt_tscdeadline(apic)) 1174 break; 1175 1176 hrtimer_cancel(&apic->lapic_timer.timer); 1177 apic_set_reg(apic, APIC_TMICT, val); 1178 start_apic_timer(apic); 1179 break; 1180 1181 case APIC_TDCR: 1182 if (val & 4) 1183 apic_debug("KVM_WRITE:TDCR %x\n", val); 1184 apic_set_reg(apic, APIC_TDCR, val); 1185 update_divide_count(apic); 1186 break; 1187 1188 case APIC_ESR: 1189 if (apic_x2apic_mode(apic) && val != 0) { 1190 apic_debug("KVM_WRITE:ESR not zero %x\n", val); 1191 ret = 1; 1192 } 1193 break; 1194 1195 case APIC_SELF_IPI: 1196 if (apic_x2apic_mode(apic)) { 1197 apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 1198 } else 1199 ret = 1; 1200 break; 1201 default: 1202 ret = 1; 1203 break; 1204 } 1205 if (ret) 1206 apic_debug("Local APIC Write to read-only register %x\n", reg); 1207 return ret; 1208 } 1209 1210 static int apic_mmio_write(struct kvm_io_device *this, 1211 gpa_t address, int len, const void *data) 1212 { 1213 struct kvm_lapic *apic = to_lapic(this); 1214 unsigned int offset = address - apic->base_address; 1215 u32 val; 1216 1217 if (!apic_mmio_in_range(apic, address)) 1218 return -EOPNOTSUPP; 1219 1220 /* 1221 * APIC register must be aligned on 128-bits boundary. 1222 * 32/64/128 bits registers must be accessed thru 32 bits. 1223 * Refer SDM 8.4.1 1224 */ 1225 if (len != 4 || (offset & 0xf)) { 1226 /* Don't shout loud, $infamous_os would cause only noise. */ 1227 apic_debug("apic write: bad size=%d %lx\n", len, (long)address); 1228 return 0; 1229 } 1230 1231 val = *(u32*)data; 1232 1233 /* too common printing */ 1234 if (offset != APIC_EOI) 1235 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 1236 "0x%x\n", __func__, offset, len, val); 1237 1238 apic_reg_write(apic, offset & 0xff0, val); 1239 1240 return 0; 1241 } 1242 1243 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 1244 { 1245 if (kvm_vcpu_has_lapic(vcpu)) 1246 apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 1247 } 1248 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1249 1250 /* emulate APIC access in a trap manner */ 1251 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 1252 { 1253 u32 val = 0; 1254 1255 /* hw has done the conditional check and inst decode */ 1256 offset &= 0xff0; 1257 1258 apic_reg_read(vcpu->arch.apic, offset, 4, &val); 1259 1260 /* TODO: optimize to just emulate side effect w/o one more write */ 1261 apic_reg_write(vcpu->arch.apic, offset, val); 1262 } 1263 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 1264 1265 void kvm_free_lapic(struct kvm_vcpu *vcpu) 1266 { 1267 struct kvm_lapic *apic = vcpu->arch.apic; 1268 1269 if (!vcpu->arch.apic) 1270 return; 1271 1272 hrtimer_cancel(&apic->lapic_timer.timer); 1273 1274 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 1275 static_key_slow_dec_deferred(&apic_hw_disabled); 1276 1277 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) 1278 static_key_slow_dec_deferred(&apic_sw_disabled); 1279 1280 if (apic->regs) 1281 free_page((unsigned long)apic->regs); 1282 1283 kfree(apic); 1284 } 1285 1286 /* 1287 *---------------------------------------------------------------------- 1288 * LAPIC interface 1289 *---------------------------------------------------------------------- 1290 */ 1291 1292 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 1293 { 1294 struct kvm_lapic *apic = vcpu->arch.apic; 1295 1296 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1297 apic_lvtt_period(apic)) 1298 return 0; 1299 1300 return apic->lapic_timer.tscdeadline; 1301 } 1302 1303 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 1304 { 1305 struct kvm_lapic *apic = vcpu->arch.apic; 1306 1307 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1308 apic_lvtt_period(apic)) 1309 return; 1310 1311 hrtimer_cancel(&apic->lapic_timer.timer); 1312 apic->lapic_timer.tscdeadline = data; 1313 start_apic_timer(apic); 1314 } 1315 1316 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 1317 { 1318 struct kvm_lapic *apic = vcpu->arch.apic; 1319 1320 if (!kvm_vcpu_has_lapic(vcpu)) 1321 return; 1322 1323 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 1324 | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); 1325 } 1326 1327 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 1328 { 1329 u64 tpr; 1330 1331 if (!kvm_vcpu_has_lapic(vcpu)) 1332 return 0; 1333 1334 tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 1335 1336 return (tpr & 0xf0) >> 4; 1337 } 1338 1339 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 1340 { 1341 u64 old_value = vcpu->arch.apic_base; 1342 struct kvm_lapic *apic = vcpu->arch.apic; 1343 1344 if (!apic) { 1345 value |= MSR_IA32_APICBASE_BSP; 1346 vcpu->arch.apic_base = value; 1347 return; 1348 } 1349 1350 if (!kvm_vcpu_is_bsp(apic->vcpu)) 1351 value &= ~MSR_IA32_APICBASE_BSP; 1352 vcpu->arch.apic_base = value; 1353 1354 /* update jump label if enable bit changes */ 1355 if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { 1356 if (value & MSR_IA32_APICBASE_ENABLE) 1357 static_key_slow_dec_deferred(&apic_hw_disabled); 1358 else 1359 static_key_slow_inc(&apic_hw_disabled.key); 1360 recalculate_apic_map(vcpu->kvm); 1361 } 1362 1363 if ((old_value ^ value) & X2APIC_ENABLE) { 1364 if (value & X2APIC_ENABLE) { 1365 u32 id = kvm_apic_id(apic); 1366 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 1367 kvm_apic_set_ldr(apic, ldr); 1368 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); 1369 } else 1370 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); 1371 } 1372 1373 apic->base_address = apic->vcpu->arch.apic_base & 1374 MSR_IA32_APICBASE_BASE; 1375 1376 /* with FSB delivery interrupt, we can restart APIC functionality */ 1377 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 1378 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 1379 1380 } 1381 1382 void kvm_lapic_reset(struct kvm_vcpu *vcpu) 1383 { 1384 struct kvm_lapic *apic; 1385 int i; 1386 1387 apic_debug("%s\n", __func__); 1388 1389 ASSERT(vcpu); 1390 apic = vcpu->arch.apic; 1391 ASSERT(apic != NULL); 1392 1393 /* Stop the timer in case it's a reset to an active apic */ 1394 hrtimer_cancel(&apic->lapic_timer.timer); 1395 1396 kvm_apic_set_id(apic, vcpu->vcpu_id); 1397 kvm_apic_set_version(apic->vcpu); 1398 1399 for (i = 0; i < APIC_LVT_NUM; i++) 1400 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 1401 apic_set_reg(apic, APIC_LVT0, 1402 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 1403 1404 apic_set_reg(apic, APIC_DFR, 0xffffffffU); 1405 apic_set_spiv(apic, 0xff); 1406 apic_set_reg(apic, APIC_TASKPRI, 0); 1407 kvm_apic_set_ldr(apic, 0); 1408 apic_set_reg(apic, APIC_ESR, 0); 1409 apic_set_reg(apic, APIC_ICR, 0); 1410 apic_set_reg(apic, APIC_ICR2, 0); 1411 apic_set_reg(apic, APIC_TDCR, 0); 1412 apic_set_reg(apic, APIC_TMICT, 0); 1413 for (i = 0; i < 8; i++) { 1414 apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 1415 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1416 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1417 } 1418 apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); 1419 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); 1420 apic->highest_isr_cache = -1; 1421 update_divide_count(apic); 1422 atomic_set(&apic->lapic_timer.pending, 0); 1423 if (kvm_vcpu_is_bsp(vcpu)) 1424 kvm_lapic_set_base(vcpu, 1425 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); 1426 vcpu->arch.pv_eoi.msr_val = 0; 1427 apic_update_ppr(apic); 1428 1429 vcpu->arch.apic_arb_prio = 0; 1430 vcpu->arch.apic_attention = 0; 1431 1432 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 1433 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 1434 vcpu, kvm_apic_id(apic), 1435 vcpu->arch.apic_base, apic->base_address); 1436 } 1437 1438 /* 1439 *---------------------------------------------------------------------- 1440 * timer interface 1441 *---------------------------------------------------------------------- 1442 */ 1443 1444 static bool lapic_is_periodic(struct kvm_lapic *apic) 1445 { 1446 return apic_lvtt_period(apic); 1447 } 1448 1449 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 1450 { 1451 struct kvm_lapic *apic = vcpu->arch.apic; 1452 1453 if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && 1454 apic_lvt_enabled(apic, APIC_LVTT)) 1455 return atomic_read(&apic->lapic_timer.pending); 1456 1457 return 0; 1458 } 1459 1460 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 1461 { 1462 u32 reg = kvm_apic_get_reg(apic, lvt_type); 1463 int vector, mode, trig_mode; 1464 1465 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 1466 vector = reg & APIC_VECTOR_MASK; 1467 mode = reg & APIC_MODE_MASK; 1468 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 1469 return __apic_accept_irq(apic, mode, vector, 1, trig_mode, 1470 NULL); 1471 } 1472 return 0; 1473 } 1474 1475 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 1476 { 1477 struct kvm_lapic *apic = vcpu->arch.apic; 1478 1479 if (apic) 1480 kvm_apic_local_deliver(apic, APIC_LVT0); 1481 } 1482 1483 static const struct kvm_io_device_ops apic_mmio_ops = { 1484 .read = apic_mmio_read, 1485 .write = apic_mmio_write, 1486 }; 1487 1488 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 1489 { 1490 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 1491 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 1492 struct kvm_vcpu *vcpu = apic->vcpu; 1493 wait_queue_head_t *q = &vcpu->wq; 1494 1495 /* 1496 * There is a race window between reading and incrementing, but we do 1497 * not care about potentially losing timer events in the !reinject 1498 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked 1499 * in vcpu_enter_guest. 1500 */ 1501 if (!atomic_read(&ktimer->pending)) { 1502 atomic_inc(&ktimer->pending); 1503 /* FIXME: this code should not know anything about vcpus */ 1504 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); 1505 } 1506 1507 if (waitqueue_active(q)) 1508 wake_up_interruptible(q); 1509 1510 if (lapic_is_periodic(apic)) { 1511 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 1512 return HRTIMER_RESTART; 1513 } else 1514 return HRTIMER_NORESTART; 1515 } 1516 1517 int kvm_create_lapic(struct kvm_vcpu *vcpu) 1518 { 1519 struct kvm_lapic *apic; 1520 1521 ASSERT(vcpu != NULL); 1522 apic_debug("apic_init %d\n", vcpu->vcpu_id); 1523 1524 apic = kzalloc(sizeof(*apic), GFP_KERNEL); 1525 if (!apic) 1526 goto nomem; 1527 1528 vcpu->arch.apic = apic; 1529 1530 apic->regs = (void *)get_zeroed_page(GFP_KERNEL); 1531 if (!apic->regs) { 1532 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1533 vcpu->vcpu_id); 1534 goto nomem_free_apic; 1535 } 1536 apic->vcpu = vcpu; 1537 1538 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1539 HRTIMER_MODE_ABS); 1540 apic->lapic_timer.timer.function = apic_timer_fn; 1541 1542 /* 1543 * APIC is created enabled. This will prevent kvm_lapic_set_base from 1544 * thinking that APIC satet has changed. 1545 */ 1546 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 1547 kvm_lapic_set_base(vcpu, 1548 APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); 1549 1550 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 1551 kvm_lapic_reset(vcpu); 1552 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 1553 1554 return 0; 1555 nomem_free_apic: 1556 kfree(apic); 1557 nomem: 1558 return -ENOMEM; 1559 } 1560 1561 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 1562 { 1563 struct kvm_lapic *apic = vcpu->arch.apic; 1564 int highest_irr; 1565 1566 if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) 1567 return -1; 1568 1569 apic_update_ppr(apic); 1570 highest_irr = apic_find_highest_irr(apic); 1571 if ((highest_irr == -1) || 1572 ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI))) 1573 return -1; 1574 return highest_irr; 1575 } 1576 1577 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 1578 { 1579 u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1580 int r = 0; 1581 1582 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 1583 r = 1; 1584 if ((lvt0 & APIC_LVT_MASKED) == 0 && 1585 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 1586 r = 1; 1587 return r; 1588 } 1589 1590 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 1591 { 1592 struct kvm_lapic *apic = vcpu->arch.apic; 1593 1594 if (!kvm_vcpu_has_lapic(vcpu)) 1595 return; 1596 1597 if (atomic_read(&apic->lapic_timer.pending) > 0) { 1598 kvm_apic_local_deliver(apic, APIC_LVTT); 1599 atomic_set(&apic->lapic_timer.pending, 0); 1600 } 1601 } 1602 1603 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1604 { 1605 int vector = kvm_apic_has_interrupt(vcpu); 1606 struct kvm_lapic *apic = vcpu->arch.apic; 1607 1608 if (vector == -1) 1609 return -1; 1610 1611 apic_set_isr(vector, apic); 1612 apic_update_ppr(apic); 1613 apic_clear_irr(vector, apic); 1614 return vector; 1615 } 1616 1617 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, 1618 struct kvm_lapic_state *s) 1619 { 1620 struct kvm_lapic *apic = vcpu->arch.apic; 1621 1622 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 1623 /* set SPIV separately to get count of SW disabled APICs right */ 1624 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 1625 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 1626 /* call kvm_apic_set_id() to put apic into apic_map */ 1627 kvm_apic_set_id(apic, kvm_apic_id(apic)); 1628 kvm_apic_set_version(vcpu); 1629 1630 apic_update_ppr(apic); 1631 hrtimer_cancel(&apic->lapic_timer.timer); 1632 update_divide_count(apic); 1633 start_apic_timer(apic); 1634 apic->irr_pending = true; 1635 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? 1636 1 : count_vectors(apic->regs + APIC_ISR); 1637 apic->highest_isr_cache = -1; 1638 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); 1639 kvm_make_request(KVM_REQ_EVENT, vcpu); 1640 kvm_rtc_eoi_tracking_restore_one(vcpu); 1641 } 1642 1643 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 1644 { 1645 struct hrtimer *timer; 1646 1647 if (!kvm_vcpu_has_lapic(vcpu)) 1648 return; 1649 1650 timer = &vcpu->arch.apic->lapic_timer.timer; 1651 if (hrtimer_cancel(timer)) 1652 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 1653 } 1654 1655 /* 1656 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 1657 * 1658 * Detect whether guest triggered PV EOI since the 1659 * last entry. If yes, set EOI on guests's behalf. 1660 * Clear PV EOI in guest memory in any case. 1661 */ 1662 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 1663 struct kvm_lapic *apic) 1664 { 1665 bool pending; 1666 int vector; 1667 /* 1668 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 1669 * and KVM_PV_EOI_ENABLED in guest memory as follows: 1670 * 1671 * KVM_APIC_PV_EOI_PENDING is unset: 1672 * -> host disabled PV EOI. 1673 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 1674 * -> host enabled PV EOI, guest did not execute EOI yet. 1675 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 1676 * -> host enabled PV EOI, guest executed EOI. 1677 */ 1678 BUG_ON(!pv_eoi_enabled(vcpu)); 1679 pending = pv_eoi_get_pending(vcpu); 1680 /* 1681 * Clear pending bit in any case: it will be set again on vmentry. 1682 * While this might not be ideal from performance point of view, 1683 * this makes sure pv eoi is only enabled when we know it's safe. 1684 */ 1685 pv_eoi_clr_pending(vcpu); 1686 if (pending) 1687 return; 1688 vector = apic_set_eoi(apic); 1689 trace_kvm_pv_eoi(apic, vector); 1690 } 1691 1692 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 1693 { 1694 u32 data; 1695 1696 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 1697 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 1698 1699 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1700 return; 1701 1702 kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 1703 sizeof(u32)); 1704 1705 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1706 } 1707 1708 /* 1709 * apic_sync_pv_eoi_to_guest - called before vmentry 1710 * 1711 * Detect whether it's safe to enable PV EOI and 1712 * if yes do so. 1713 */ 1714 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 1715 struct kvm_lapic *apic) 1716 { 1717 if (!pv_eoi_enabled(vcpu) || 1718 /* IRR set or many bits in ISR: could be nested. */ 1719 apic->irr_pending || 1720 /* Cache not set: could be safe but we don't bother. */ 1721 apic->highest_isr_cache == -1 || 1722 /* Need EOI to update ioapic. */ 1723 kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { 1724 /* 1725 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 1726 * so we need not do anything here. 1727 */ 1728 return; 1729 } 1730 1731 pv_eoi_set_pending(apic->vcpu); 1732 } 1733 1734 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 1735 { 1736 u32 data, tpr; 1737 int max_irr, max_isr; 1738 struct kvm_lapic *apic = vcpu->arch.apic; 1739 1740 apic_sync_pv_eoi_to_guest(vcpu, apic); 1741 1742 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1743 return; 1744 1745 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff; 1746 max_irr = apic_find_highest_irr(apic); 1747 if (max_irr < 0) 1748 max_irr = 0; 1749 max_isr = apic_find_highest_isr(apic); 1750 if (max_isr < 0) 1751 max_isr = 0; 1752 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 1753 1754 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 1755 sizeof(u32)); 1756 } 1757 1758 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1759 { 1760 if (vapic_addr) { 1761 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, 1762 &vcpu->arch.apic->vapic_cache, 1763 vapic_addr, sizeof(u32))) 1764 return -EINVAL; 1765 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1766 } else { 1767 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1768 } 1769 1770 vcpu->arch.apic->vapic_addr = vapic_addr; 1771 return 0; 1772 } 1773 1774 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1775 { 1776 struct kvm_lapic *apic = vcpu->arch.apic; 1777 u32 reg = (msr - APIC_BASE_MSR) << 4; 1778 1779 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1780 return 1; 1781 1782 /* if this is ICR write vector before command */ 1783 if (msr == 0x830) 1784 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1785 return apic_reg_write(apic, reg, (u32)data); 1786 } 1787 1788 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 1789 { 1790 struct kvm_lapic *apic = vcpu->arch.apic; 1791 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 1792 1793 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1794 return 1; 1795 1796 if (apic_reg_read(apic, reg, 4, &low)) 1797 return 1; 1798 if (msr == 0x830) 1799 apic_reg_read(apic, APIC_ICR2, 4, &high); 1800 1801 *data = (((u64)high) << 32) | low; 1802 1803 return 0; 1804 } 1805 1806 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 1807 { 1808 struct kvm_lapic *apic = vcpu->arch.apic; 1809 1810 if (!kvm_vcpu_has_lapic(vcpu)) 1811 return 1; 1812 1813 /* if this is ICR write vector before command */ 1814 if (reg == APIC_ICR) 1815 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1816 return apic_reg_write(apic, reg, (u32)data); 1817 } 1818 1819 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 1820 { 1821 struct kvm_lapic *apic = vcpu->arch.apic; 1822 u32 low, high = 0; 1823 1824 if (!kvm_vcpu_has_lapic(vcpu)) 1825 return 1; 1826 1827 if (apic_reg_read(apic, reg, 4, &low)) 1828 return 1; 1829 if (reg == APIC_ICR) 1830 apic_reg_read(apic, APIC_ICR2, 4, &high); 1831 1832 *data = (((u64)high) << 32) | low; 1833 1834 return 0; 1835 } 1836 1837 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) 1838 { 1839 u64 addr = data & ~KVM_MSR_ENABLED; 1840 if (!IS_ALIGNED(addr, 4)) 1841 return 1; 1842 1843 vcpu->arch.pv_eoi.msr_val = data; 1844 if (!pv_eoi_enabled(vcpu)) 1845 return 0; 1846 return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, 1847 addr, sizeof(u8)); 1848 } 1849 1850 void kvm_apic_accept_events(struct kvm_vcpu *vcpu) 1851 { 1852 struct kvm_lapic *apic = vcpu->arch.apic; 1853 unsigned int sipi_vector; 1854 unsigned long pe; 1855 1856 if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) 1857 return; 1858 1859 pe = xchg(&apic->pending_events, 0); 1860 1861 if (test_bit(KVM_APIC_INIT, &pe)) { 1862 kvm_lapic_reset(vcpu); 1863 kvm_vcpu_reset(vcpu); 1864 if (kvm_vcpu_is_bsp(apic->vcpu)) 1865 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1866 else 1867 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 1868 } 1869 if (test_bit(KVM_APIC_SIPI, &pe) && 1870 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 1871 /* evaluate pending_events before reading the vector */ 1872 smp_rmb(); 1873 sipi_vector = apic->sipi_vector; 1874 pr_debug("vcpu %d received sipi with vector # %x\n", 1875 vcpu->vcpu_id, sipi_vector); 1876 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); 1877 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1878 } 1879 } 1880 1881 void kvm_lapic_init(void) 1882 { 1883 /* do not patch jump label more than once per second */ 1884 jump_label_rate_limit(&apic_hw_disabled, HZ); 1885 jump_label_rate_limit(&apic_sw_disabled, HZ); 1886 } 1887