1 2 /* 3 * Local APIC virtualization 4 * 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2007 Novell 7 * Copyright (C) 2007 Intel 8 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Dor Laor <dor.laor@qumranet.com> 12 * Gregory Haskins <ghaskins@novell.com> 13 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 14 * 15 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 16 * 17 * This work is licensed under the terms of the GNU GPL, version 2. See 18 * the COPYING file in the top-level directory. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/kvm.h> 23 #include <linux/mm.h> 24 #include <linux/highmem.h> 25 #include <linux/smp.h> 26 #include <linux/hrtimer.h> 27 #include <linux/io.h> 28 #include <linux/module.h> 29 #include <linux/math64.h> 30 #include <linux/slab.h> 31 #include <asm/processor.h> 32 #include <asm/msr.h> 33 #include <asm/page.h> 34 #include <asm/current.h> 35 #include <asm/apicdef.h> 36 #include <linux/atomic.h> 37 #include <linux/jump_label.h> 38 #include "kvm_cache_regs.h" 39 #include "irq.h" 40 #include "trace.h" 41 #include "x86.h" 42 #include "cpuid.h" 43 44 #ifndef CONFIG_X86_64 45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 46 #else 47 #define mod_64(x, y) ((x) % (y)) 48 #endif 49 50 #define PRId64 "d" 51 #define PRIx64 "llx" 52 #define PRIu64 "u" 53 #define PRIo64 "o" 54 55 #define APIC_BUS_CYCLE_NS 1 56 57 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 58 #define apic_debug(fmt, arg...) 59 60 #define APIC_LVT_NUM 6 61 /* 14 is the version for Xeon and Pentium 8.4.8*/ 62 #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) 63 #define LAPIC_MMIO_LENGTH (1 << 12) 64 /* followed define is not in apicdef.h */ 65 #define APIC_SHORT_MASK 0xc0000 66 #define APIC_DEST_NOSHORT 0x0 67 #define APIC_DEST_MASK 0x800 68 #define MAX_APIC_VECTOR 256 69 #define APIC_VECTORS_PER_REG 32 70 71 #define VEC_POS(v) ((v) & (32 - 1)) 72 #define REG_POS(v) (((v) >> 5) << 4) 73 74 static unsigned int min_timer_period_us = 500; 75 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); 76 77 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 78 { 79 *((u32 *) (apic->regs + reg_off)) = val; 80 } 81 82 static inline int apic_test_vector(int vec, void *bitmap) 83 { 84 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 85 } 86 87 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) 88 { 89 struct kvm_lapic *apic = vcpu->arch.apic; 90 91 return apic_test_vector(vector, apic->regs + APIC_ISR) || 92 apic_test_vector(vector, apic->regs + APIC_IRR); 93 } 94 95 static inline void apic_set_vector(int vec, void *bitmap) 96 { 97 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 98 } 99 100 static inline void apic_clear_vector(int vec, void *bitmap) 101 { 102 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 103 } 104 105 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 106 { 107 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 108 } 109 110 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 111 { 112 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 113 } 114 115 struct static_key_deferred apic_hw_disabled __read_mostly; 116 struct static_key_deferred apic_sw_disabled __read_mostly; 117 118 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 119 { 120 if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { 121 if (val & APIC_SPIV_APIC_ENABLED) 122 static_key_slow_dec_deferred(&apic_sw_disabled); 123 else 124 static_key_slow_inc(&apic_sw_disabled.key); 125 } 126 apic_set_reg(apic, APIC_SPIV, val); 127 } 128 129 static inline int apic_enabled(struct kvm_lapic *apic) 130 { 131 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 132 } 133 134 #define LVT_MASK \ 135 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 136 137 #define LINT_MASK \ 138 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 139 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 140 141 static inline int kvm_apic_id(struct kvm_lapic *apic) 142 { 143 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 144 } 145 146 static void recalculate_apic_map(struct kvm *kvm) 147 { 148 struct kvm_apic_map *new, *old = NULL; 149 struct kvm_vcpu *vcpu; 150 int i; 151 152 new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); 153 154 mutex_lock(&kvm->arch.apic_map_lock); 155 156 if (!new) 157 goto out; 158 159 new->ldr_bits = 8; 160 /* flat mode is default */ 161 new->cid_shift = 8; 162 new->cid_mask = 0; 163 new->lid_mask = 0xff; 164 165 kvm_for_each_vcpu(i, vcpu, kvm) { 166 struct kvm_lapic *apic = vcpu->arch.apic; 167 u16 cid, lid; 168 u32 ldr; 169 170 if (!kvm_apic_present(vcpu)) 171 continue; 172 173 /* 174 * All APICs have to be configured in the same mode by an OS. 175 * We take advatage of this while building logical id loockup 176 * table. After reset APICs are in xapic/flat mode, so if we 177 * find apic with different setting we assume this is the mode 178 * OS wants all apics to be in; build lookup table accordingly. 179 */ 180 if (apic_x2apic_mode(apic)) { 181 new->ldr_bits = 32; 182 new->cid_shift = 16; 183 new->cid_mask = new->lid_mask = 0xffff; 184 } else if (kvm_apic_sw_enabled(apic) && 185 !new->cid_mask /* flat mode */ && 186 kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { 187 new->cid_shift = 4; 188 new->cid_mask = 0xf; 189 new->lid_mask = 0xf; 190 } 191 192 new->phys_map[kvm_apic_id(apic)] = apic; 193 194 ldr = kvm_apic_get_reg(apic, APIC_LDR); 195 cid = apic_cluster_id(new, ldr); 196 lid = apic_logical_id(new, ldr); 197 198 if (lid) 199 new->logical_map[cid][ffs(lid) - 1] = apic; 200 } 201 out: 202 old = rcu_dereference_protected(kvm->arch.apic_map, 203 lockdep_is_held(&kvm->arch.apic_map_lock)); 204 rcu_assign_pointer(kvm->arch.apic_map, new); 205 mutex_unlock(&kvm->arch.apic_map_lock); 206 207 if (old) 208 kfree_rcu(old, rcu); 209 210 kvm_vcpu_request_scan_ioapic(kvm); 211 } 212 213 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 214 { 215 apic_set_reg(apic, APIC_ID, id << 24); 216 recalculate_apic_map(apic->vcpu->kvm); 217 } 218 219 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 220 { 221 apic_set_reg(apic, APIC_LDR, id); 222 recalculate_apic_map(apic->vcpu->kvm); 223 } 224 225 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 226 { 227 return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 228 } 229 230 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) 231 { 232 return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; 233 } 234 235 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 236 { 237 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 238 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); 239 } 240 241 static inline int apic_lvtt_period(struct kvm_lapic *apic) 242 { 243 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 244 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); 245 } 246 247 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 248 { 249 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 250 apic->lapic_timer.timer_mode_mask) == 251 APIC_LVT_TIMER_TSCDEADLINE); 252 } 253 254 static inline int apic_lvt_nmi_mode(u32 lvt_val) 255 { 256 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 257 } 258 259 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 260 { 261 struct kvm_lapic *apic = vcpu->arch.apic; 262 struct kvm_cpuid_entry2 *feat; 263 u32 v = APIC_VERSION; 264 265 if (!kvm_vcpu_has_lapic(vcpu)) 266 return; 267 268 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 269 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) 270 v |= APIC_LVR_DIRECTED_EOI; 271 apic_set_reg(apic, APIC_LVR, v); 272 } 273 274 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 275 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 276 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 277 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 278 LINT_MASK, LINT_MASK, /* LVT0-1 */ 279 LVT_MASK /* LVTERR */ 280 }; 281 282 static int find_highest_vector(void *bitmap) 283 { 284 int vec; 285 u32 *reg; 286 287 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 288 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 289 reg = bitmap + REG_POS(vec); 290 if (*reg) 291 return fls(*reg) - 1 + vec; 292 } 293 294 return -1; 295 } 296 297 static u8 count_vectors(void *bitmap) 298 { 299 int vec; 300 u32 *reg; 301 u8 count = 0; 302 303 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 304 reg = bitmap + REG_POS(vec); 305 count += hweight32(*reg); 306 } 307 308 return count; 309 } 310 311 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) 312 { 313 u32 i, pir_val; 314 struct kvm_lapic *apic = vcpu->arch.apic; 315 316 for (i = 0; i <= 7; i++) { 317 pir_val = xchg(&pir[i], 0); 318 if (pir_val) 319 *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; 320 } 321 } 322 EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 323 324 static inline void apic_set_irr(int vec, struct kvm_lapic *apic) 325 { 326 apic->irr_pending = true; 327 apic_set_vector(vec, apic->regs + APIC_IRR); 328 } 329 330 static inline int apic_search_irr(struct kvm_lapic *apic) 331 { 332 return find_highest_vector(apic->regs + APIC_IRR); 333 } 334 335 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 336 { 337 int result; 338 339 /* 340 * Note that irr_pending is just a hint. It will be always 341 * true with virtual interrupt delivery enabled. 342 */ 343 if (!apic->irr_pending) 344 return -1; 345 346 kvm_x86_ops->sync_pir_to_irr(apic->vcpu); 347 result = apic_search_irr(apic); 348 ASSERT(result == -1 || result >= 16); 349 350 return result; 351 } 352 353 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 354 { 355 apic->irr_pending = false; 356 apic_clear_vector(vec, apic->regs + APIC_IRR); 357 if (apic_search_irr(apic) != -1) 358 apic->irr_pending = true; 359 } 360 361 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 362 { 363 if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 364 ++apic->isr_count; 365 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 366 /* 367 * ISR (in service register) bit is set when injecting an interrupt. 368 * The highest vector is injected. Thus the latest bit set matches 369 * the highest bit in ISR. 370 */ 371 apic->highest_isr_cache = vec; 372 } 373 374 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 375 { 376 if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 377 --apic->isr_count; 378 BUG_ON(apic->isr_count < 0); 379 apic->highest_isr_cache = -1; 380 } 381 382 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 383 { 384 int highest_irr; 385 386 /* This may race with setting of irr in __apic_accept_irq() and 387 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 388 * will cause vmexit immediately and the value will be recalculated 389 * on the next vmentry. 390 */ 391 if (!kvm_vcpu_has_lapic(vcpu)) 392 return 0; 393 highest_irr = apic_find_highest_irr(vcpu->arch.apic); 394 395 return highest_irr; 396 } 397 398 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 399 int vector, int level, int trig_mode, 400 unsigned long *dest_map); 401 402 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 403 unsigned long *dest_map) 404 { 405 struct kvm_lapic *apic = vcpu->arch.apic; 406 407 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 408 irq->level, irq->trig_mode, dest_map); 409 } 410 411 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 412 { 413 414 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 415 sizeof(val)); 416 } 417 418 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 419 { 420 421 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 422 sizeof(*val)); 423 } 424 425 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 426 { 427 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 428 } 429 430 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 431 { 432 u8 val; 433 if (pv_eoi_get_user(vcpu, &val) < 0) 434 apic_debug("Can't read EOI MSR value: 0x%llx\n", 435 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 436 return val & 0x1; 437 } 438 439 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 440 { 441 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 442 apic_debug("Can't set EOI MSR value: 0x%llx\n", 443 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 444 return; 445 } 446 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 447 } 448 449 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 450 { 451 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 452 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 453 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 454 return; 455 } 456 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 457 } 458 459 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 460 { 461 int result; 462 463 /* Note that isr_count is always 1 with vid enabled */ 464 if (!apic->isr_count) 465 return -1; 466 if (likely(apic->highest_isr_cache != -1)) 467 return apic->highest_isr_cache; 468 469 result = find_highest_vector(apic->regs + APIC_ISR); 470 ASSERT(result == -1 || result >= 16); 471 472 return result; 473 } 474 475 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) 476 { 477 struct kvm_lapic *apic = vcpu->arch.apic; 478 int i; 479 480 for (i = 0; i < 8; i++) 481 apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); 482 } 483 484 static void apic_update_ppr(struct kvm_lapic *apic) 485 { 486 u32 tpr, isrv, ppr, old_ppr; 487 int isr; 488 489 old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI); 490 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI); 491 isr = apic_find_highest_isr(apic); 492 isrv = (isr != -1) ? isr : 0; 493 494 if ((tpr & 0xf0) >= (isrv & 0xf0)) 495 ppr = tpr & 0xff; 496 else 497 ppr = isrv & 0xf0; 498 499 apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", 500 apic, ppr, isr, isrv); 501 502 if (old_ppr != ppr) { 503 apic_set_reg(apic, APIC_PROCPRI, ppr); 504 if (ppr < old_ppr) 505 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 506 } 507 } 508 509 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 510 { 511 apic_set_reg(apic, APIC_TASKPRI, tpr); 512 apic_update_ppr(apic); 513 } 514 515 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 516 { 517 return dest == 0xff || kvm_apic_id(apic) == dest; 518 } 519 520 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 521 { 522 int result = 0; 523 u32 logical_id; 524 525 if (apic_x2apic_mode(apic)) { 526 logical_id = kvm_apic_get_reg(apic, APIC_LDR); 527 return logical_id & mda; 528 } 529 530 logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); 531 532 switch (kvm_apic_get_reg(apic, APIC_DFR)) { 533 case APIC_DFR_FLAT: 534 if (logical_id & mda) 535 result = 1; 536 break; 537 case APIC_DFR_CLUSTER: 538 if (((logical_id >> 4) == (mda >> 0x4)) 539 && (logical_id & mda & 0xf)) 540 result = 1; 541 break; 542 default: 543 apic_debug("Bad DFR vcpu %d: %08x\n", 544 apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); 545 break; 546 } 547 548 return result; 549 } 550 551 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 552 int short_hand, int dest, int dest_mode) 553 { 554 int result = 0; 555 struct kvm_lapic *target = vcpu->arch.apic; 556 557 apic_debug("target %p, source %p, dest 0x%x, " 558 "dest_mode 0x%x, short_hand 0x%x\n", 559 target, source, dest, dest_mode, short_hand); 560 561 ASSERT(target); 562 switch (short_hand) { 563 case APIC_DEST_NOSHORT: 564 if (dest_mode == 0) 565 /* Physical mode. */ 566 result = kvm_apic_match_physical_addr(target, dest); 567 else 568 /* Logical mode. */ 569 result = kvm_apic_match_logical_addr(target, dest); 570 break; 571 case APIC_DEST_SELF: 572 result = (target == source); 573 break; 574 case APIC_DEST_ALLINC: 575 result = 1; 576 break; 577 case APIC_DEST_ALLBUT: 578 result = (target != source); 579 break; 580 default: 581 apic_debug("kvm: apic: Bad dest shorthand value %x\n", 582 short_hand); 583 break; 584 } 585 586 return result; 587 } 588 589 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 590 struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) 591 { 592 struct kvm_apic_map *map; 593 unsigned long bitmap = 1; 594 struct kvm_lapic **dst; 595 int i; 596 bool ret = false; 597 598 *r = -1; 599 600 if (irq->shorthand == APIC_DEST_SELF) { 601 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); 602 return true; 603 } 604 605 if (irq->shorthand) 606 return false; 607 608 rcu_read_lock(); 609 map = rcu_dereference(kvm->arch.apic_map); 610 611 if (!map) 612 goto out; 613 614 if (irq->dest_mode == 0) { /* physical mode */ 615 if (irq->delivery_mode == APIC_DM_LOWEST || 616 irq->dest_id == 0xff) 617 goto out; 618 dst = &map->phys_map[irq->dest_id & 0xff]; 619 } else { 620 u32 mda = irq->dest_id << (32 - map->ldr_bits); 621 622 dst = map->logical_map[apic_cluster_id(map, mda)]; 623 624 bitmap = apic_logical_id(map, mda); 625 626 if (irq->delivery_mode == APIC_DM_LOWEST) { 627 int l = -1; 628 for_each_set_bit(i, &bitmap, 16) { 629 if (!dst[i]) 630 continue; 631 if (l < 0) 632 l = i; 633 else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) 634 l = i; 635 } 636 637 bitmap = (l >= 0) ? 1 << l : 0; 638 } 639 } 640 641 for_each_set_bit(i, &bitmap, 16) { 642 if (!dst[i]) 643 continue; 644 if (*r < 0) 645 *r = 0; 646 *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); 647 } 648 649 ret = true; 650 out: 651 rcu_read_unlock(); 652 return ret; 653 } 654 655 /* 656 * Add a pending IRQ into lapic. 657 * Return 1 if successfully added and 0 if discarded. 658 */ 659 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 660 int vector, int level, int trig_mode, 661 unsigned long *dest_map) 662 { 663 int result = 0; 664 struct kvm_vcpu *vcpu = apic->vcpu; 665 666 switch (delivery_mode) { 667 case APIC_DM_LOWEST: 668 vcpu->arch.apic_arb_prio++; 669 case APIC_DM_FIXED: 670 /* FIXME add logic for vcpu on reset */ 671 if (unlikely(!apic_enabled(apic))) 672 break; 673 674 result = 1; 675 676 if (dest_map) 677 __set_bit(vcpu->vcpu_id, dest_map); 678 679 if (kvm_x86_ops->deliver_posted_interrupt) 680 kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); 681 else { 682 apic_set_irr(vector, apic); 683 684 kvm_make_request(KVM_REQ_EVENT, vcpu); 685 kvm_vcpu_kick(vcpu); 686 } 687 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 688 trig_mode, vector, false); 689 break; 690 691 case APIC_DM_REMRD: 692 result = 1; 693 vcpu->arch.pv.pv_unhalted = 1; 694 kvm_make_request(KVM_REQ_EVENT, vcpu); 695 kvm_vcpu_kick(vcpu); 696 break; 697 698 case APIC_DM_SMI: 699 apic_debug("Ignoring guest SMI\n"); 700 break; 701 702 case APIC_DM_NMI: 703 result = 1; 704 kvm_inject_nmi(vcpu); 705 kvm_vcpu_kick(vcpu); 706 break; 707 708 case APIC_DM_INIT: 709 if (!trig_mode || level) { 710 result = 1; 711 /* assumes that there are only KVM_APIC_INIT/SIPI */ 712 apic->pending_events = (1UL << KVM_APIC_INIT); 713 /* make sure pending_events is visible before sending 714 * the request */ 715 smp_wmb(); 716 kvm_make_request(KVM_REQ_EVENT, vcpu); 717 kvm_vcpu_kick(vcpu); 718 } else { 719 apic_debug("Ignoring de-assert INIT to vcpu %d\n", 720 vcpu->vcpu_id); 721 } 722 break; 723 724 case APIC_DM_STARTUP: 725 apic_debug("SIPI to vcpu %d vector 0x%02x\n", 726 vcpu->vcpu_id, vector); 727 result = 1; 728 apic->sipi_vector = vector; 729 /* make sure sipi_vector is visible for the receiver */ 730 smp_wmb(); 731 set_bit(KVM_APIC_SIPI, &apic->pending_events); 732 kvm_make_request(KVM_REQ_EVENT, vcpu); 733 kvm_vcpu_kick(vcpu); 734 break; 735 736 case APIC_DM_EXTINT: 737 /* 738 * Should only be called by kvm_apic_local_deliver() with LVT0, 739 * before NMI watchdog was enabled. Already handled by 740 * kvm_apic_accept_pic_intr(). 741 */ 742 break; 743 744 default: 745 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 746 delivery_mode); 747 break; 748 } 749 return result; 750 } 751 752 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 753 { 754 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 755 } 756 757 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) 758 { 759 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 760 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { 761 int trigger_mode; 762 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 763 trigger_mode = IOAPIC_LEVEL_TRIG; 764 else 765 trigger_mode = IOAPIC_EDGE_TRIG; 766 kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); 767 } 768 } 769 770 static int apic_set_eoi(struct kvm_lapic *apic) 771 { 772 int vector = apic_find_highest_isr(apic); 773 774 trace_kvm_eoi(apic, vector); 775 776 /* 777 * Not every write EOI will has corresponding ISR, 778 * one example is when Kernel check timer on setup_IO_APIC 779 */ 780 if (vector == -1) 781 return vector; 782 783 apic_clear_isr(vector, apic); 784 apic_update_ppr(apic); 785 786 kvm_ioapic_send_eoi(apic, vector); 787 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 788 return vector; 789 } 790 791 /* 792 * this interface assumes a trap-like exit, which has already finished 793 * desired side effect including vISR and vPPR update. 794 */ 795 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) 796 { 797 struct kvm_lapic *apic = vcpu->arch.apic; 798 799 trace_kvm_eoi(apic, vector); 800 801 kvm_ioapic_send_eoi(apic, vector); 802 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 803 } 804 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); 805 806 static void apic_send_ipi(struct kvm_lapic *apic) 807 { 808 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); 809 u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2); 810 struct kvm_lapic_irq irq; 811 812 irq.vector = icr_low & APIC_VECTOR_MASK; 813 irq.delivery_mode = icr_low & APIC_MODE_MASK; 814 irq.dest_mode = icr_low & APIC_DEST_MASK; 815 irq.level = icr_low & APIC_INT_ASSERT; 816 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 817 irq.shorthand = icr_low & APIC_SHORT_MASK; 818 if (apic_x2apic_mode(apic)) 819 irq.dest_id = icr_high; 820 else 821 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 822 823 trace_kvm_apic_ipi(icr_low, irq.dest_id); 824 825 apic_debug("icr_high 0x%x, icr_low 0x%x, " 826 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 827 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", 828 icr_high, icr_low, irq.shorthand, irq.dest_id, 829 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 830 irq.vector); 831 832 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); 833 } 834 835 static u32 apic_get_tmcct(struct kvm_lapic *apic) 836 { 837 ktime_t remaining; 838 s64 ns; 839 u32 tmcct; 840 841 ASSERT(apic != NULL); 842 843 /* if initial count is 0, current count should also be 0 */ 844 if (kvm_apic_get_reg(apic, APIC_TMICT) == 0) 845 return 0; 846 847 remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); 848 if (ktime_to_ns(remaining) < 0) 849 remaining = ktime_set(0, 0); 850 851 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 852 tmcct = div64_u64(ns, 853 (APIC_BUS_CYCLE_NS * apic->divide_count)); 854 855 return tmcct; 856 } 857 858 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 859 { 860 struct kvm_vcpu *vcpu = apic->vcpu; 861 struct kvm_run *run = vcpu->run; 862 863 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 864 run->tpr_access.rip = kvm_rip_read(vcpu); 865 run->tpr_access.is_write = write; 866 } 867 868 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 869 { 870 if (apic->vcpu->arch.tpr_access_reporting) 871 __report_tpr_access(apic, write); 872 } 873 874 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 875 { 876 u32 val = 0; 877 878 if (offset >= LAPIC_MMIO_LENGTH) 879 return 0; 880 881 switch (offset) { 882 case APIC_ID: 883 if (apic_x2apic_mode(apic)) 884 val = kvm_apic_id(apic); 885 else 886 val = kvm_apic_id(apic) << 24; 887 break; 888 case APIC_ARBPRI: 889 apic_debug("Access APIC ARBPRI register which is for P6\n"); 890 break; 891 892 case APIC_TMCCT: /* Timer CCR */ 893 if (apic_lvtt_tscdeadline(apic)) 894 return 0; 895 896 val = apic_get_tmcct(apic); 897 break; 898 case APIC_PROCPRI: 899 apic_update_ppr(apic); 900 val = kvm_apic_get_reg(apic, offset); 901 break; 902 case APIC_TASKPRI: 903 report_tpr_access(apic, false); 904 /* fall thru */ 905 default: 906 val = kvm_apic_get_reg(apic, offset); 907 break; 908 } 909 910 return val; 911 } 912 913 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 914 { 915 return container_of(dev, struct kvm_lapic, dev); 916 } 917 918 static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 919 void *data) 920 { 921 unsigned char alignment = offset & 0xf; 922 u32 result; 923 /* this bitmask has a bit cleared for each reserved register */ 924 static const u64 rmask = 0x43ff01ffffffe70cULL; 925 926 if ((alignment + len) > 4) { 927 apic_debug("KVM_APIC_READ: alignment error %x %d\n", 928 offset, len); 929 return 1; 930 } 931 932 if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { 933 apic_debug("KVM_APIC_READ: read reserved register %x\n", 934 offset); 935 return 1; 936 } 937 938 result = __apic_read(apic, offset & ~0xf); 939 940 trace_kvm_apic_read(offset, result); 941 942 switch (len) { 943 case 1: 944 case 2: 945 case 4: 946 memcpy(data, (char *)&result + alignment, len); 947 break; 948 default: 949 printk(KERN_ERR "Local APIC read with len = %x, " 950 "should be 1,2, or 4 instead\n", len); 951 break; 952 } 953 return 0; 954 } 955 956 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 957 { 958 return kvm_apic_hw_enabled(apic) && 959 addr >= apic->base_address && 960 addr < apic->base_address + LAPIC_MMIO_LENGTH; 961 } 962 963 static int apic_mmio_read(struct kvm_io_device *this, 964 gpa_t address, int len, void *data) 965 { 966 struct kvm_lapic *apic = to_lapic(this); 967 u32 offset = address - apic->base_address; 968 969 if (!apic_mmio_in_range(apic, address)) 970 return -EOPNOTSUPP; 971 972 apic_reg_read(apic, offset, len, data); 973 974 return 0; 975 } 976 977 static void update_divide_count(struct kvm_lapic *apic) 978 { 979 u32 tmp1, tmp2, tdcr; 980 981 tdcr = kvm_apic_get_reg(apic, APIC_TDCR); 982 tmp1 = tdcr & 0xf; 983 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 984 apic->divide_count = 0x1 << (tmp2 & 0x7); 985 986 apic_debug("timer divide count is 0x%x\n", 987 apic->divide_count); 988 } 989 990 static void start_apic_timer(struct kvm_lapic *apic) 991 { 992 ktime_t now; 993 atomic_set(&apic->lapic_timer.pending, 0); 994 995 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 996 /* lapic timer in oneshot or periodic mode */ 997 now = apic->lapic_timer.timer.base->get_time(); 998 apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT) 999 * APIC_BUS_CYCLE_NS * apic->divide_count; 1000 1001 if (!apic->lapic_timer.period) 1002 return; 1003 /* 1004 * Do not allow the guest to program periodic timers with small 1005 * interval, since the hrtimers are not throttled by the host 1006 * scheduler. 1007 */ 1008 if (apic_lvtt_period(apic)) { 1009 s64 min_period = min_timer_period_us * 1000LL; 1010 1011 if (apic->lapic_timer.period < min_period) { 1012 pr_info_ratelimited( 1013 "kvm: vcpu %i: requested %lld ns " 1014 "lapic timer period limited to %lld ns\n", 1015 apic->vcpu->vcpu_id, 1016 apic->lapic_timer.period, min_period); 1017 apic->lapic_timer.period = min_period; 1018 } 1019 } 1020 1021 hrtimer_start(&apic->lapic_timer.timer, 1022 ktime_add_ns(now, apic->lapic_timer.period), 1023 HRTIMER_MODE_ABS); 1024 1025 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 1026 PRIx64 ", " 1027 "timer initial count 0x%x, period %lldns, " 1028 "expire @ 0x%016" PRIx64 ".\n", __func__, 1029 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 1030 kvm_apic_get_reg(apic, APIC_TMICT), 1031 apic->lapic_timer.period, 1032 ktime_to_ns(ktime_add_ns(now, 1033 apic->lapic_timer.period))); 1034 } else if (apic_lvtt_tscdeadline(apic)) { 1035 /* lapic timer in tsc deadline mode */ 1036 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 1037 u64 ns = 0; 1038 struct kvm_vcpu *vcpu = apic->vcpu; 1039 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1040 unsigned long flags; 1041 1042 if (unlikely(!tscdeadline || !this_tsc_khz)) 1043 return; 1044 1045 local_irq_save(flags); 1046 1047 now = apic->lapic_timer.timer.base->get_time(); 1048 guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); 1049 if (likely(tscdeadline > guest_tsc)) { 1050 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1051 do_div(ns, this_tsc_khz); 1052 } 1053 hrtimer_start(&apic->lapic_timer.timer, 1054 ktime_add_ns(now, ns), HRTIMER_MODE_ABS); 1055 1056 local_irq_restore(flags); 1057 } 1058 } 1059 1060 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 1061 { 1062 int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); 1063 1064 if (apic_lvt_nmi_mode(lvt0_val)) { 1065 if (!nmi_wd_enabled) { 1066 apic_debug("Receive NMI setting on APIC_LVT0 " 1067 "for cpu %d\n", apic->vcpu->vcpu_id); 1068 apic->vcpu->kvm->arch.vapics_in_nmi_mode++; 1069 } 1070 } else if (nmi_wd_enabled) 1071 apic->vcpu->kvm->arch.vapics_in_nmi_mode--; 1072 } 1073 1074 static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1075 { 1076 int ret = 0; 1077 1078 trace_kvm_apic_write(reg, val); 1079 1080 switch (reg) { 1081 case APIC_ID: /* Local APIC ID */ 1082 if (!apic_x2apic_mode(apic)) 1083 kvm_apic_set_id(apic, val >> 24); 1084 else 1085 ret = 1; 1086 break; 1087 1088 case APIC_TASKPRI: 1089 report_tpr_access(apic, true); 1090 apic_set_tpr(apic, val & 0xff); 1091 break; 1092 1093 case APIC_EOI: 1094 apic_set_eoi(apic); 1095 break; 1096 1097 case APIC_LDR: 1098 if (!apic_x2apic_mode(apic)) 1099 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 1100 else 1101 ret = 1; 1102 break; 1103 1104 case APIC_DFR: 1105 if (!apic_x2apic_mode(apic)) { 1106 apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 1107 recalculate_apic_map(apic->vcpu->kvm); 1108 } else 1109 ret = 1; 1110 break; 1111 1112 case APIC_SPIV: { 1113 u32 mask = 0x3ff; 1114 if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 1115 mask |= APIC_SPIV_DIRECTED_EOI; 1116 apic_set_spiv(apic, val & mask); 1117 if (!(val & APIC_SPIV_APIC_ENABLED)) { 1118 int i; 1119 u32 lvt_val; 1120 1121 for (i = 0; i < APIC_LVT_NUM; i++) { 1122 lvt_val = kvm_apic_get_reg(apic, 1123 APIC_LVTT + 0x10 * i); 1124 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 1125 lvt_val | APIC_LVT_MASKED); 1126 } 1127 atomic_set(&apic->lapic_timer.pending, 0); 1128 1129 } 1130 break; 1131 } 1132 case APIC_ICR: 1133 /* No delay here, so we always clear the pending bit */ 1134 apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); 1135 apic_send_ipi(apic); 1136 break; 1137 1138 case APIC_ICR2: 1139 if (!apic_x2apic_mode(apic)) 1140 val &= 0xff000000; 1141 apic_set_reg(apic, APIC_ICR2, val); 1142 break; 1143 1144 case APIC_LVT0: 1145 apic_manage_nmi_watchdog(apic, val); 1146 case APIC_LVTTHMR: 1147 case APIC_LVTPC: 1148 case APIC_LVT1: 1149 case APIC_LVTERR: 1150 /* TODO: Check vector */ 1151 if (!kvm_apic_sw_enabled(apic)) 1152 val |= APIC_LVT_MASKED; 1153 1154 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; 1155 apic_set_reg(apic, reg, val); 1156 1157 break; 1158 1159 case APIC_LVTT: 1160 if ((kvm_apic_get_reg(apic, APIC_LVTT) & 1161 apic->lapic_timer.timer_mode_mask) != 1162 (val & apic->lapic_timer.timer_mode_mask)) 1163 hrtimer_cancel(&apic->lapic_timer.timer); 1164 1165 if (!kvm_apic_sw_enabled(apic)) 1166 val |= APIC_LVT_MASKED; 1167 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1168 apic_set_reg(apic, APIC_LVTT, val); 1169 break; 1170 1171 case APIC_TMICT: 1172 if (apic_lvtt_tscdeadline(apic)) 1173 break; 1174 1175 hrtimer_cancel(&apic->lapic_timer.timer); 1176 apic_set_reg(apic, APIC_TMICT, val); 1177 start_apic_timer(apic); 1178 break; 1179 1180 case APIC_TDCR: 1181 if (val & 4) 1182 apic_debug("KVM_WRITE:TDCR %x\n", val); 1183 apic_set_reg(apic, APIC_TDCR, val); 1184 update_divide_count(apic); 1185 break; 1186 1187 case APIC_ESR: 1188 if (apic_x2apic_mode(apic) && val != 0) { 1189 apic_debug("KVM_WRITE:ESR not zero %x\n", val); 1190 ret = 1; 1191 } 1192 break; 1193 1194 case APIC_SELF_IPI: 1195 if (apic_x2apic_mode(apic)) { 1196 apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 1197 } else 1198 ret = 1; 1199 break; 1200 default: 1201 ret = 1; 1202 break; 1203 } 1204 if (ret) 1205 apic_debug("Local APIC Write to read-only register %x\n", reg); 1206 return ret; 1207 } 1208 1209 static int apic_mmio_write(struct kvm_io_device *this, 1210 gpa_t address, int len, const void *data) 1211 { 1212 struct kvm_lapic *apic = to_lapic(this); 1213 unsigned int offset = address - apic->base_address; 1214 u32 val; 1215 1216 if (!apic_mmio_in_range(apic, address)) 1217 return -EOPNOTSUPP; 1218 1219 /* 1220 * APIC register must be aligned on 128-bits boundary. 1221 * 32/64/128 bits registers must be accessed thru 32 bits. 1222 * Refer SDM 8.4.1 1223 */ 1224 if (len != 4 || (offset & 0xf)) { 1225 /* Don't shout loud, $infamous_os would cause only noise. */ 1226 apic_debug("apic write: bad size=%d %lx\n", len, (long)address); 1227 return 0; 1228 } 1229 1230 val = *(u32*)data; 1231 1232 /* too common printing */ 1233 if (offset != APIC_EOI) 1234 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 1235 "0x%x\n", __func__, offset, len, val); 1236 1237 apic_reg_write(apic, offset & 0xff0, val); 1238 1239 return 0; 1240 } 1241 1242 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 1243 { 1244 if (kvm_vcpu_has_lapic(vcpu)) 1245 apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 1246 } 1247 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1248 1249 /* emulate APIC access in a trap manner */ 1250 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 1251 { 1252 u32 val = 0; 1253 1254 /* hw has done the conditional check and inst decode */ 1255 offset &= 0xff0; 1256 1257 apic_reg_read(vcpu->arch.apic, offset, 4, &val); 1258 1259 /* TODO: optimize to just emulate side effect w/o one more write */ 1260 apic_reg_write(vcpu->arch.apic, offset, val); 1261 } 1262 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 1263 1264 void kvm_free_lapic(struct kvm_vcpu *vcpu) 1265 { 1266 struct kvm_lapic *apic = vcpu->arch.apic; 1267 1268 if (!vcpu->arch.apic) 1269 return; 1270 1271 hrtimer_cancel(&apic->lapic_timer.timer); 1272 1273 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 1274 static_key_slow_dec_deferred(&apic_hw_disabled); 1275 1276 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) 1277 static_key_slow_dec_deferred(&apic_sw_disabled); 1278 1279 if (apic->regs) 1280 free_page((unsigned long)apic->regs); 1281 1282 kfree(apic); 1283 } 1284 1285 /* 1286 *---------------------------------------------------------------------- 1287 * LAPIC interface 1288 *---------------------------------------------------------------------- 1289 */ 1290 1291 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 1292 { 1293 struct kvm_lapic *apic = vcpu->arch.apic; 1294 1295 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1296 apic_lvtt_period(apic)) 1297 return 0; 1298 1299 return apic->lapic_timer.tscdeadline; 1300 } 1301 1302 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 1303 { 1304 struct kvm_lapic *apic = vcpu->arch.apic; 1305 1306 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1307 apic_lvtt_period(apic)) 1308 return; 1309 1310 hrtimer_cancel(&apic->lapic_timer.timer); 1311 apic->lapic_timer.tscdeadline = data; 1312 start_apic_timer(apic); 1313 } 1314 1315 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 1316 { 1317 struct kvm_lapic *apic = vcpu->arch.apic; 1318 1319 if (!kvm_vcpu_has_lapic(vcpu)) 1320 return; 1321 1322 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 1323 | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); 1324 } 1325 1326 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 1327 { 1328 u64 tpr; 1329 1330 if (!kvm_vcpu_has_lapic(vcpu)) 1331 return 0; 1332 1333 tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 1334 1335 return (tpr & 0xf0) >> 4; 1336 } 1337 1338 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 1339 { 1340 u64 old_value = vcpu->arch.apic_base; 1341 struct kvm_lapic *apic = vcpu->arch.apic; 1342 1343 if (!apic) { 1344 value |= MSR_IA32_APICBASE_BSP; 1345 vcpu->arch.apic_base = value; 1346 return; 1347 } 1348 1349 /* update jump label if enable bit changes */ 1350 if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { 1351 if (value & MSR_IA32_APICBASE_ENABLE) 1352 static_key_slow_dec_deferred(&apic_hw_disabled); 1353 else 1354 static_key_slow_inc(&apic_hw_disabled.key); 1355 recalculate_apic_map(vcpu->kvm); 1356 } 1357 1358 if (!kvm_vcpu_is_bsp(apic->vcpu)) 1359 value &= ~MSR_IA32_APICBASE_BSP; 1360 1361 vcpu->arch.apic_base = value; 1362 if ((old_value ^ value) & X2APIC_ENABLE) { 1363 if (value & X2APIC_ENABLE) { 1364 u32 id = kvm_apic_id(apic); 1365 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 1366 kvm_apic_set_ldr(apic, ldr); 1367 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); 1368 } else 1369 kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); 1370 } 1371 1372 apic->base_address = apic->vcpu->arch.apic_base & 1373 MSR_IA32_APICBASE_BASE; 1374 1375 /* with FSB delivery interrupt, we can restart APIC functionality */ 1376 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 1377 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 1378 1379 } 1380 1381 void kvm_lapic_reset(struct kvm_vcpu *vcpu) 1382 { 1383 struct kvm_lapic *apic; 1384 int i; 1385 1386 apic_debug("%s\n", __func__); 1387 1388 ASSERT(vcpu); 1389 apic = vcpu->arch.apic; 1390 ASSERT(apic != NULL); 1391 1392 /* Stop the timer in case it's a reset to an active apic */ 1393 hrtimer_cancel(&apic->lapic_timer.timer); 1394 1395 kvm_apic_set_id(apic, vcpu->vcpu_id); 1396 kvm_apic_set_version(apic->vcpu); 1397 1398 for (i = 0; i < APIC_LVT_NUM; i++) 1399 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 1400 apic_set_reg(apic, APIC_LVT0, 1401 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 1402 1403 apic_set_reg(apic, APIC_DFR, 0xffffffffU); 1404 apic_set_spiv(apic, 0xff); 1405 apic_set_reg(apic, APIC_TASKPRI, 0); 1406 kvm_apic_set_ldr(apic, 0); 1407 apic_set_reg(apic, APIC_ESR, 0); 1408 apic_set_reg(apic, APIC_ICR, 0); 1409 apic_set_reg(apic, APIC_ICR2, 0); 1410 apic_set_reg(apic, APIC_TDCR, 0); 1411 apic_set_reg(apic, APIC_TMICT, 0); 1412 for (i = 0; i < 8; i++) { 1413 apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 1414 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1415 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1416 } 1417 apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); 1418 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); 1419 apic->highest_isr_cache = -1; 1420 update_divide_count(apic); 1421 atomic_set(&apic->lapic_timer.pending, 0); 1422 if (kvm_vcpu_is_bsp(vcpu)) 1423 kvm_lapic_set_base(vcpu, 1424 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); 1425 vcpu->arch.pv_eoi.msr_val = 0; 1426 apic_update_ppr(apic); 1427 1428 vcpu->arch.apic_arb_prio = 0; 1429 vcpu->arch.apic_attention = 0; 1430 1431 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 1432 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 1433 vcpu, kvm_apic_id(apic), 1434 vcpu->arch.apic_base, apic->base_address); 1435 } 1436 1437 /* 1438 *---------------------------------------------------------------------- 1439 * timer interface 1440 *---------------------------------------------------------------------- 1441 */ 1442 1443 static bool lapic_is_periodic(struct kvm_lapic *apic) 1444 { 1445 return apic_lvtt_period(apic); 1446 } 1447 1448 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 1449 { 1450 struct kvm_lapic *apic = vcpu->arch.apic; 1451 1452 if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && 1453 apic_lvt_enabled(apic, APIC_LVTT)) 1454 return atomic_read(&apic->lapic_timer.pending); 1455 1456 return 0; 1457 } 1458 1459 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 1460 { 1461 u32 reg = kvm_apic_get_reg(apic, lvt_type); 1462 int vector, mode, trig_mode; 1463 1464 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 1465 vector = reg & APIC_VECTOR_MASK; 1466 mode = reg & APIC_MODE_MASK; 1467 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 1468 return __apic_accept_irq(apic, mode, vector, 1, trig_mode, 1469 NULL); 1470 } 1471 return 0; 1472 } 1473 1474 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 1475 { 1476 struct kvm_lapic *apic = vcpu->arch.apic; 1477 1478 if (apic) 1479 kvm_apic_local_deliver(apic, APIC_LVT0); 1480 } 1481 1482 static const struct kvm_io_device_ops apic_mmio_ops = { 1483 .read = apic_mmio_read, 1484 .write = apic_mmio_write, 1485 }; 1486 1487 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 1488 { 1489 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 1490 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 1491 struct kvm_vcpu *vcpu = apic->vcpu; 1492 wait_queue_head_t *q = &vcpu->wq; 1493 1494 /* 1495 * There is a race window between reading and incrementing, but we do 1496 * not care about potentially losing timer events in the !reinject 1497 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked 1498 * in vcpu_enter_guest. 1499 */ 1500 if (!atomic_read(&ktimer->pending)) { 1501 atomic_inc(&ktimer->pending); 1502 /* FIXME: this code should not know anything about vcpus */ 1503 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); 1504 } 1505 1506 if (waitqueue_active(q)) 1507 wake_up_interruptible(q); 1508 1509 if (lapic_is_periodic(apic)) { 1510 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 1511 return HRTIMER_RESTART; 1512 } else 1513 return HRTIMER_NORESTART; 1514 } 1515 1516 int kvm_create_lapic(struct kvm_vcpu *vcpu) 1517 { 1518 struct kvm_lapic *apic; 1519 1520 ASSERT(vcpu != NULL); 1521 apic_debug("apic_init %d\n", vcpu->vcpu_id); 1522 1523 apic = kzalloc(sizeof(*apic), GFP_KERNEL); 1524 if (!apic) 1525 goto nomem; 1526 1527 vcpu->arch.apic = apic; 1528 1529 apic->regs = (void *)get_zeroed_page(GFP_KERNEL); 1530 if (!apic->regs) { 1531 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1532 vcpu->vcpu_id); 1533 goto nomem_free_apic; 1534 } 1535 apic->vcpu = vcpu; 1536 1537 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1538 HRTIMER_MODE_ABS); 1539 apic->lapic_timer.timer.function = apic_timer_fn; 1540 1541 /* 1542 * APIC is created enabled. This will prevent kvm_lapic_set_base from 1543 * thinking that APIC satet has changed. 1544 */ 1545 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 1546 kvm_lapic_set_base(vcpu, 1547 APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); 1548 1549 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 1550 kvm_lapic_reset(vcpu); 1551 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 1552 1553 return 0; 1554 nomem_free_apic: 1555 kfree(apic); 1556 nomem: 1557 return -ENOMEM; 1558 } 1559 1560 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 1561 { 1562 struct kvm_lapic *apic = vcpu->arch.apic; 1563 int highest_irr; 1564 1565 if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) 1566 return -1; 1567 1568 apic_update_ppr(apic); 1569 highest_irr = apic_find_highest_irr(apic); 1570 if ((highest_irr == -1) || 1571 ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI))) 1572 return -1; 1573 return highest_irr; 1574 } 1575 1576 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 1577 { 1578 u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1579 int r = 0; 1580 1581 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 1582 r = 1; 1583 if ((lvt0 & APIC_LVT_MASKED) == 0 && 1584 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 1585 r = 1; 1586 return r; 1587 } 1588 1589 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 1590 { 1591 struct kvm_lapic *apic = vcpu->arch.apic; 1592 1593 if (!kvm_vcpu_has_lapic(vcpu)) 1594 return; 1595 1596 if (atomic_read(&apic->lapic_timer.pending) > 0) { 1597 kvm_apic_local_deliver(apic, APIC_LVTT); 1598 atomic_set(&apic->lapic_timer.pending, 0); 1599 } 1600 } 1601 1602 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1603 { 1604 int vector = kvm_apic_has_interrupt(vcpu); 1605 struct kvm_lapic *apic = vcpu->arch.apic; 1606 1607 if (vector == -1) 1608 return -1; 1609 1610 apic_set_isr(vector, apic); 1611 apic_update_ppr(apic); 1612 apic_clear_irr(vector, apic); 1613 return vector; 1614 } 1615 1616 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, 1617 struct kvm_lapic_state *s) 1618 { 1619 struct kvm_lapic *apic = vcpu->arch.apic; 1620 1621 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 1622 /* set SPIV separately to get count of SW disabled APICs right */ 1623 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 1624 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 1625 /* call kvm_apic_set_id() to put apic into apic_map */ 1626 kvm_apic_set_id(apic, kvm_apic_id(apic)); 1627 kvm_apic_set_version(vcpu); 1628 1629 apic_update_ppr(apic); 1630 hrtimer_cancel(&apic->lapic_timer.timer); 1631 update_divide_count(apic); 1632 start_apic_timer(apic); 1633 apic->irr_pending = true; 1634 apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? 1635 1 : count_vectors(apic->regs + APIC_ISR); 1636 apic->highest_isr_cache = -1; 1637 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); 1638 kvm_make_request(KVM_REQ_EVENT, vcpu); 1639 kvm_rtc_eoi_tracking_restore_one(vcpu); 1640 } 1641 1642 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 1643 { 1644 struct hrtimer *timer; 1645 1646 if (!kvm_vcpu_has_lapic(vcpu)) 1647 return; 1648 1649 timer = &vcpu->arch.apic->lapic_timer.timer; 1650 if (hrtimer_cancel(timer)) 1651 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 1652 } 1653 1654 /* 1655 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 1656 * 1657 * Detect whether guest triggered PV EOI since the 1658 * last entry. If yes, set EOI on guests's behalf. 1659 * Clear PV EOI in guest memory in any case. 1660 */ 1661 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 1662 struct kvm_lapic *apic) 1663 { 1664 bool pending; 1665 int vector; 1666 /* 1667 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 1668 * and KVM_PV_EOI_ENABLED in guest memory as follows: 1669 * 1670 * KVM_APIC_PV_EOI_PENDING is unset: 1671 * -> host disabled PV EOI. 1672 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 1673 * -> host enabled PV EOI, guest did not execute EOI yet. 1674 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 1675 * -> host enabled PV EOI, guest executed EOI. 1676 */ 1677 BUG_ON(!pv_eoi_enabled(vcpu)); 1678 pending = pv_eoi_get_pending(vcpu); 1679 /* 1680 * Clear pending bit in any case: it will be set again on vmentry. 1681 * While this might not be ideal from performance point of view, 1682 * this makes sure pv eoi is only enabled when we know it's safe. 1683 */ 1684 pv_eoi_clr_pending(vcpu); 1685 if (pending) 1686 return; 1687 vector = apic_set_eoi(apic); 1688 trace_kvm_pv_eoi(apic, vector); 1689 } 1690 1691 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 1692 { 1693 u32 data; 1694 void *vapic; 1695 1696 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 1697 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 1698 1699 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1700 return; 1701 1702 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1703 data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); 1704 kunmap_atomic(vapic); 1705 1706 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1707 } 1708 1709 /* 1710 * apic_sync_pv_eoi_to_guest - called before vmentry 1711 * 1712 * Detect whether it's safe to enable PV EOI and 1713 * if yes do so. 1714 */ 1715 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 1716 struct kvm_lapic *apic) 1717 { 1718 if (!pv_eoi_enabled(vcpu) || 1719 /* IRR set or many bits in ISR: could be nested. */ 1720 apic->irr_pending || 1721 /* Cache not set: could be safe but we don't bother. */ 1722 apic->highest_isr_cache == -1 || 1723 /* Need EOI to update ioapic. */ 1724 kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { 1725 /* 1726 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 1727 * so we need not do anything here. 1728 */ 1729 return; 1730 } 1731 1732 pv_eoi_set_pending(apic->vcpu); 1733 } 1734 1735 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 1736 { 1737 u32 data, tpr; 1738 int max_irr, max_isr; 1739 struct kvm_lapic *apic = vcpu->arch.apic; 1740 void *vapic; 1741 1742 apic_sync_pv_eoi_to_guest(vcpu, apic); 1743 1744 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1745 return; 1746 1747 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff; 1748 max_irr = apic_find_highest_irr(apic); 1749 if (max_irr < 0) 1750 max_irr = 0; 1751 max_isr = apic_find_highest_isr(apic); 1752 if (max_isr < 0) 1753 max_isr = 0; 1754 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 1755 1756 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1757 *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; 1758 kunmap_atomic(vapic); 1759 } 1760 1761 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1762 { 1763 vcpu->arch.apic->vapic_addr = vapic_addr; 1764 if (vapic_addr) 1765 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1766 else 1767 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1768 } 1769 1770 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1771 { 1772 struct kvm_lapic *apic = vcpu->arch.apic; 1773 u32 reg = (msr - APIC_BASE_MSR) << 4; 1774 1775 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1776 return 1; 1777 1778 /* if this is ICR write vector before command */ 1779 if (msr == 0x830) 1780 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1781 return apic_reg_write(apic, reg, (u32)data); 1782 } 1783 1784 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 1785 { 1786 struct kvm_lapic *apic = vcpu->arch.apic; 1787 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 1788 1789 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1790 return 1; 1791 1792 if (apic_reg_read(apic, reg, 4, &low)) 1793 return 1; 1794 if (msr == 0x830) 1795 apic_reg_read(apic, APIC_ICR2, 4, &high); 1796 1797 *data = (((u64)high) << 32) | low; 1798 1799 return 0; 1800 } 1801 1802 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 1803 { 1804 struct kvm_lapic *apic = vcpu->arch.apic; 1805 1806 if (!kvm_vcpu_has_lapic(vcpu)) 1807 return 1; 1808 1809 /* if this is ICR write vector before command */ 1810 if (reg == APIC_ICR) 1811 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1812 return apic_reg_write(apic, reg, (u32)data); 1813 } 1814 1815 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 1816 { 1817 struct kvm_lapic *apic = vcpu->arch.apic; 1818 u32 low, high = 0; 1819 1820 if (!kvm_vcpu_has_lapic(vcpu)) 1821 return 1; 1822 1823 if (apic_reg_read(apic, reg, 4, &low)) 1824 return 1; 1825 if (reg == APIC_ICR) 1826 apic_reg_read(apic, APIC_ICR2, 4, &high); 1827 1828 *data = (((u64)high) << 32) | low; 1829 1830 return 0; 1831 } 1832 1833 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) 1834 { 1835 u64 addr = data & ~KVM_MSR_ENABLED; 1836 if (!IS_ALIGNED(addr, 4)) 1837 return 1; 1838 1839 vcpu->arch.pv_eoi.msr_val = data; 1840 if (!pv_eoi_enabled(vcpu)) 1841 return 0; 1842 return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, 1843 addr, sizeof(u8)); 1844 } 1845 1846 void kvm_apic_accept_events(struct kvm_vcpu *vcpu) 1847 { 1848 struct kvm_lapic *apic = vcpu->arch.apic; 1849 unsigned int sipi_vector; 1850 unsigned long pe; 1851 1852 if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) 1853 return; 1854 1855 pe = xchg(&apic->pending_events, 0); 1856 1857 if (test_bit(KVM_APIC_INIT, &pe)) { 1858 kvm_lapic_reset(vcpu); 1859 kvm_vcpu_reset(vcpu); 1860 if (kvm_vcpu_is_bsp(apic->vcpu)) 1861 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1862 else 1863 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 1864 } 1865 if (test_bit(KVM_APIC_SIPI, &pe) && 1866 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 1867 /* evaluate pending_events before reading the vector */ 1868 smp_rmb(); 1869 sipi_vector = apic->sipi_vector; 1870 pr_debug("vcpu %d received sipi with vector # %x\n", 1871 vcpu->vcpu_id, sipi_vector); 1872 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); 1873 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1874 } 1875 } 1876 1877 void kvm_lapic_init(void) 1878 { 1879 /* do not patch jump label more than once per second */ 1880 jump_label_rate_limit(&apic_hw_disabled, HZ); 1881 jump_label_rate_limit(&apic_sw_disabled, HZ); 1882 } 1883