1 2 /* 3 * Local APIC virtualization 4 * 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2007 Novell 7 * Copyright (C) 2007 Intel 8 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Dor Laor <dor.laor@qumranet.com> 12 * Gregory Haskins <ghaskins@novell.com> 13 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 14 * 15 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 16 * 17 * This work is licensed under the terms of the GNU GPL, version 2. See 18 * the COPYING file in the top-level directory. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/kvm.h> 23 #include <linux/mm.h> 24 #include <linux/highmem.h> 25 #include <linux/smp.h> 26 #include <linux/hrtimer.h> 27 #include <linux/io.h> 28 #include <linux/module.h> 29 #include <linux/math64.h> 30 #include <linux/slab.h> 31 #include <asm/processor.h> 32 #include <asm/msr.h> 33 #include <asm/page.h> 34 #include <asm/current.h> 35 #include <asm/apicdef.h> 36 #include <linux/atomic.h> 37 #include <linux/jump_label.h> 38 #include "kvm_cache_regs.h" 39 #include "irq.h" 40 #include "trace.h" 41 #include "x86.h" 42 #include "cpuid.h" 43 44 #ifndef CONFIG_X86_64 45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 46 #else 47 #define mod_64(x, y) ((x) % (y)) 48 #endif 49 50 #define PRId64 "d" 51 #define PRIx64 "llx" 52 #define PRIu64 "u" 53 #define PRIo64 "o" 54 55 #define APIC_BUS_CYCLE_NS 1 56 57 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 58 #define apic_debug(fmt, arg...) 59 60 #define APIC_LVT_NUM 6 61 /* 14 is the version for Xeon and Pentium 8.4.8*/ 62 #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) 63 #define LAPIC_MMIO_LENGTH (1 << 12) 64 /* followed define is not in apicdef.h */ 65 #define APIC_SHORT_MASK 0xc0000 66 #define APIC_DEST_NOSHORT 0x0 67 #define APIC_DEST_MASK 0x800 68 #define MAX_APIC_VECTOR 256 69 #define APIC_VECTORS_PER_REG 32 70 71 #define VEC_POS(v) ((v) & (32 - 1)) 72 #define REG_POS(v) (((v) >> 5) << 4) 73 74 static unsigned int min_timer_period_us = 500; 75 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); 76 77 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 78 { 79 *((u32 *) (apic->regs + reg_off)) = val; 80 } 81 82 static inline int apic_test_and_set_vector(int vec, void *bitmap) 83 { 84 return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 85 } 86 87 static inline int apic_test_and_clear_vector(int vec, void *bitmap) 88 { 89 return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 90 } 91 92 static inline int apic_test_vector(int vec, void *bitmap) 93 { 94 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 95 } 96 97 static inline void apic_set_vector(int vec, void *bitmap) 98 { 99 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 100 } 101 102 static inline void apic_clear_vector(int vec, void *bitmap) 103 { 104 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 105 } 106 107 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 108 { 109 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 110 } 111 112 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 113 { 114 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 115 } 116 117 struct static_key_deferred apic_hw_disabled __read_mostly; 118 struct static_key_deferred apic_sw_disabled __read_mostly; 119 120 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) 121 { 122 if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { 123 if (val & APIC_SPIV_APIC_ENABLED) 124 static_key_slow_dec_deferred(&apic_sw_disabled); 125 else 126 static_key_slow_inc(&apic_sw_disabled.key); 127 } 128 apic_set_reg(apic, APIC_SPIV, val); 129 } 130 131 static inline int apic_enabled(struct kvm_lapic *apic) 132 { 133 return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); 134 } 135 136 #define LVT_MASK \ 137 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 138 139 #define LINT_MASK \ 140 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 141 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 142 143 static inline int apic_x2apic_mode(struct kvm_lapic *apic) 144 { 145 return apic->vcpu->arch.apic_base & X2APIC_ENABLE; 146 } 147 148 static inline int kvm_apic_id(struct kvm_lapic *apic) 149 { 150 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 151 } 152 153 static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) 154 { 155 u16 cid; 156 ldr >>= 32 - map->ldr_bits; 157 cid = (ldr >> map->cid_shift) & map->cid_mask; 158 159 BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); 160 161 return cid; 162 } 163 164 static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) 165 { 166 ldr >>= (32 - map->ldr_bits); 167 return ldr & map->lid_mask; 168 } 169 170 static void recalculate_apic_map(struct kvm *kvm) 171 { 172 struct kvm_apic_map *new, *old = NULL; 173 struct kvm_vcpu *vcpu; 174 int i; 175 176 new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); 177 178 mutex_lock(&kvm->arch.apic_map_lock); 179 180 if (!new) 181 goto out; 182 183 new->ldr_bits = 8; 184 /* flat mode is default */ 185 new->cid_shift = 8; 186 new->cid_mask = 0; 187 new->lid_mask = 0xff; 188 189 kvm_for_each_vcpu(i, vcpu, kvm) { 190 struct kvm_lapic *apic = vcpu->arch.apic; 191 u16 cid, lid; 192 u32 ldr; 193 194 if (!kvm_apic_present(vcpu)) 195 continue; 196 197 /* 198 * All APICs have to be configured in the same mode by an OS. 199 * We take advatage of this while building logical id loockup 200 * table. After reset APICs are in xapic/flat mode, so if we 201 * find apic with different setting we assume this is the mode 202 * OS wants all apics to be in; build lookup table accordingly. 203 */ 204 if (apic_x2apic_mode(apic)) { 205 new->ldr_bits = 32; 206 new->cid_shift = 16; 207 new->cid_mask = new->lid_mask = 0xffff; 208 } else if (kvm_apic_sw_enabled(apic) && 209 !new->cid_mask /* flat mode */ && 210 kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { 211 new->cid_shift = 4; 212 new->cid_mask = 0xf; 213 new->lid_mask = 0xf; 214 } 215 216 new->phys_map[kvm_apic_id(apic)] = apic; 217 218 ldr = kvm_apic_get_reg(apic, APIC_LDR); 219 cid = apic_cluster_id(new, ldr); 220 lid = apic_logical_id(new, ldr); 221 222 if (lid) 223 new->logical_map[cid][ffs(lid) - 1] = apic; 224 } 225 out: 226 old = rcu_dereference_protected(kvm->arch.apic_map, 227 lockdep_is_held(&kvm->arch.apic_map_lock)); 228 rcu_assign_pointer(kvm->arch.apic_map, new); 229 mutex_unlock(&kvm->arch.apic_map_lock); 230 231 if (old) 232 kfree_rcu(old, rcu); 233 } 234 235 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 236 { 237 apic_set_reg(apic, APIC_ID, id << 24); 238 recalculate_apic_map(apic->vcpu->kvm); 239 } 240 241 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 242 { 243 apic_set_reg(apic, APIC_LDR, id); 244 recalculate_apic_map(apic->vcpu->kvm); 245 } 246 247 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 248 { 249 return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 250 } 251 252 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) 253 { 254 return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; 255 } 256 257 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 258 { 259 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 260 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); 261 } 262 263 static inline int apic_lvtt_period(struct kvm_lapic *apic) 264 { 265 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 266 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); 267 } 268 269 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 270 { 271 return ((kvm_apic_get_reg(apic, APIC_LVTT) & 272 apic->lapic_timer.timer_mode_mask) == 273 APIC_LVT_TIMER_TSCDEADLINE); 274 } 275 276 static inline int apic_lvt_nmi_mode(u32 lvt_val) 277 { 278 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 279 } 280 281 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 282 { 283 struct kvm_lapic *apic = vcpu->arch.apic; 284 struct kvm_cpuid_entry2 *feat; 285 u32 v = APIC_VERSION; 286 287 if (!kvm_vcpu_has_lapic(vcpu)) 288 return; 289 290 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 291 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) 292 v |= APIC_LVR_DIRECTED_EOI; 293 apic_set_reg(apic, APIC_LVR, v); 294 } 295 296 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 297 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 298 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 299 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 300 LINT_MASK, LINT_MASK, /* LVT0-1 */ 301 LVT_MASK /* LVTERR */ 302 }; 303 304 static int find_highest_vector(void *bitmap) 305 { 306 int vec; 307 u32 *reg; 308 309 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; 310 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 311 reg = bitmap + REG_POS(vec); 312 if (*reg) 313 return fls(*reg) - 1 + vec; 314 } 315 316 return -1; 317 } 318 319 static u8 count_vectors(void *bitmap) 320 { 321 int vec; 322 u32 *reg; 323 u8 count = 0; 324 325 for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { 326 reg = bitmap + REG_POS(vec); 327 count += hweight32(*reg); 328 } 329 330 return count; 331 } 332 333 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) 334 { 335 apic->irr_pending = true; 336 return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); 337 } 338 339 static inline int apic_search_irr(struct kvm_lapic *apic) 340 { 341 return find_highest_vector(apic->regs + APIC_IRR); 342 } 343 344 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 345 { 346 int result; 347 348 if (!apic->irr_pending) 349 return -1; 350 351 result = apic_search_irr(apic); 352 ASSERT(result == -1 || result >= 16); 353 354 return result; 355 } 356 357 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 358 { 359 apic->irr_pending = false; 360 apic_clear_vector(vec, apic->regs + APIC_IRR); 361 if (apic_search_irr(apic) != -1) 362 apic->irr_pending = true; 363 } 364 365 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 366 { 367 if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 368 ++apic->isr_count; 369 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 370 /* 371 * ISR (in service register) bit is set when injecting an interrupt. 372 * The highest vector is injected. Thus the latest bit set matches 373 * the highest bit in ISR. 374 */ 375 apic->highest_isr_cache = vec; 376 } 377 378 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 379 { 380 if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 381 --apic->isr_count; 382 BUG_ON(apic->isr_count < 0); 383 apic->highest_isr_cache = -1; 384 } 385 386 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 387 { 388 int highest_irr; 389 390 /* This may race with setting of irr in __apic_accept_irq() and 391 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 392 * will cause vmexit immediately and the value will be recalculated 393 * on the next vmentry. 394 */ 395 if (!kvm_vcpu_has_lapic(vcpu)) 396 return 0; 397 highest_irr = apic_find_highest_irr(vcpu->arch.apic); 398 399 return highest_irr; 400 } 401 402 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 403 int vector, int level, int trig_mode); 404 405 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 406 { 407 struct kvm_lapic *apic = vcpu->arch.apic; 408 409 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 410 irq->level, irq->trig_mode); 411 } 412 413 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 414 { 415 416 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 417 sizeof(val)); 418 } 419 420 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 421 { 422 423 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 424 sizeof(*val)); 425 } 426 427 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 428 { 429 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 430 } 431 432 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 433 { 434 u8 val; 435 if (pv_eoi_get_user(vcpu, &val) < 0) 436 apic_debug("Can't read EOI MSR value: 0x%llx\n", 437 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 438 return val & 0x1; 439 } 440 441 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 442 { 443 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 444 apic_debug("Can't set EOI MSR value: 0x%llx\n", 445 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 446 return; 447 } 448 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 449 } 450 451 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 452 { 453 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 454 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 455 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 456 return; 457 } 458 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 459 } 460 461 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 462 { 463 int result; 464 if (!apic->isr_count) 465 return -1; 466 if (likely(apic->highest_isr_cache != -1)) 467 return apic->highest_isr_cache; 468 469 result = find_highest_vector(apic->regs + APIC_ISR); 470 ASSERT(result == -1 || result >= 16); 471 472 return result; 473 } 474 475 static void apic_update_ppr(struct kvm_lapic *apic) 476 { 477 u32 tpr, isrv, ppr, old_ppr; 478 int isr; 479 480 old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI); 481 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI); 482 isr = apic_find_highest_isr(apic); 483 isrv = (isr != -1) ? isr : 0; 484 485 if ((tpr & 0xf0) >= (isrv & 0xf0)) 486 ppr = tpr & 0xff; 487 else 488 ppr = isrv & 0xf0; 489 490 apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", 491 apic, ppr, isr, isrv); 492 493 if (old_ppr != ppr) { 494 apic_set_reg(apic, APIC_PROCPRI, ppr); 495 if (ppr < old_ppr) 496 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 497 } 498 } 499 500 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 501 { 502 apic_set_reg(apic, APIC_TASKPRI, tpr); 503 apic_update_ppr(apic); 504 } 505 506 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 507 { 508 return dest == 0xff || kvm_apic_id(apic) == dest; 509 } 510 511 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 512 { 513 int result = 0; 514 u32 logical_id; 515 516 if (apic_x2apic_mode(apic)) { 517 logical_id = kvm_apic_get_reg(apic, APIC_LDR); 518 return logical_id & mda; 519 } 520 521 logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); 522 523 switch (kvm_apic_get_reg(apic, APIC_DFR)) { 524 case APIC_DFR_FLAT: 525 if (logical_id & mda) 526 result = 1; 527 break; 528 case APIC_DFR_CLUSTER: 529 if (((logical_id >> 4) == (mda >> 0x4)) 530 && (logical_id & mda & 0xf)) 531 result = 1; 532 break; 533 default: 534 apic_debug("Bad DFR vcpu %d: %08x\n", 535 apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); 536 break; 537 } 538 539 return result; 540 } 541 542 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 543 int short_hand, int dest, int dest_mode) 544 { 545 int result = 0; 546 struct kvm_lapic *target = vcpu->arch.apic; 547 548 apic_debug("target %p, source %p, dest 0x%x, " 549 "dest_mode 0x%x, short_hand 0x%x\n", 550 target, source, dest, dest_mode, short_hand); 551 552 ASSERT(target); 553 switch (short_hand) { 554 case APIC_DEST_NOSHORT: 555 if (dest_mode == 0) 556 /* Physical mode. */ 557 result = kvm_apic_match_physical_addr(target, dest); 558 else 559 /* Logical mode. */ 560 result = kvm_apic_match_logical_addr(target, dest); 561 break; 562 case APIC_DEST_SELF: 563 result = (target == source); 564 break; 565 case APIC_DEST_ALLINC: 566 result = 1; 567 break; 568 case APIC_DEST_ALLBUT: 569 result = (target != source); 570 break; 571 default: 572 apic_debug("kvm: apic: Bad dest shorthand value %x\n", 573 short_hand); 574 break; 575 } 576 577 return result; 578 } 579 580 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, 581 struct kvm_lapic_irq *irq, int *r) 582 { 583 struct kvm_apic_map *map; 584 unsigned long bitmap = 1; 585 struct kvm_lapic **dst; 586 int i; 587 bool ret = false; 588 589 *r = -1; 590 591 if (irq->shorthand == APIC_DEST_SELF) { 592 *r = kvm_apic_set_irq(src->vcpu, irq); 593 return true; 594 } 595 596 if (irq->shorthand) 597 return false; 598 599 rcu_read_lock(); 600 map = rcu_dereference(kvm->arch.apic_map); 601 602 if (!map) 603 goto out; 604 605 if (irq->dest_mode == 0) { /* physical mode */ 606 if (irq->delivery_mode == APIC_DM_LOWEST || 607 irq->dest_id == 0xff) 608 goto out; 609 dst = &map->phys_map[irq->dest_id & 0xff]; 610 } else { 611 u32 mda = irq->dest_id << (32 - map->ldr_bits); 612 613 dst = map->logical_map[apic_cluster_id(map, mda)]; 614 615 bitmap = apic_logical_id(map, mda); 616 617 if (irq->delivery_mode == APIC_DM_LOWEST) { 618 int l = -1; 619 for_each_set_bit(i, &bitmap, 16) { 620 if (!dst[i]) 621 continue; 622 if (l < 0) 623 l = i; 624 else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) 625 l = i; 626 } 627 628 bitmap = (l >= 0) ? 1 << l : 0; 629 } 630 } 631 632 for_each_set_bit(i, &bitmap, 16) { 633 if (!dst[i]) 634 continue; 635 if (*r < 0) 636 *r = 0; 637 *r += kvm_apic_set_irq(dst[i]->vcpu, irq); 638 } 639 640 ret = true; 641 out: 642 rcu_read_unlock(); 643 return ret; 644 } 645 646 /* 647 * Add a pending IRQ into lapic. 648 * Return 1 if successfully added and 0 if discarded. 649 */ 650 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 651 int vector, int level, int trig_mode) 652 { 653 int result = 0; 654 struct kvm_vcpu *vcpu = apic->vcpu; 655 656 switch (delivery_mode) { 657 case APIC_DM_LOWEST: 658 vcpu->arch.apic_arb_prio++; 659 case APIC_DM_FIXED: 660 /* FIXME add logic for vcpu on reset */ 661 if (unlikely(!apic_enabled(apic))) 662 break; 663 664 if (trig_mode) { 665 apic_debug("level trig mode for vector %d", vector); 666 apic_set_vector(vector, apic->regs + APIC_TMR); 667 } else 668 apic_clear_vector(vector, apic->regs + APIC_TMR); 669 670 result = !apic_test_and_set_irr(vector, apic); 671 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 672 trig_mode, vector, !result); 673 if (!result) { 674 if (trig_mode) 675 apic_debug("level trig mode repeatedly for " 676 "vector %d", vector); 677 break; 678 } 679 680 kvm_make_request(KVM_REQ_EVENT, vcpu); 681 kvm_vcpu_kick(vcpu); 682 break; 683 684 case APIC_DM_REMRD: 685 apic_debug("Ignoring delivery mode 3\n"); 686 break; 687 688 case APIC_DM_SMI: 689 apic_debug("Ignoring guest SMI\n"); 690 break; 691 692 case APIC_DM_NMI: 693 result = 1; 694 kvm_inject_nmi(vcpu); 695 kvm_vcpu_kick(vcpu); 696 break; 697 698 case APIC_DM_INIT: 699 if (!trig_mode || level) { 700 result = 1; 701 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 702 kvm_make_request(KVM_REQ_EVENT, vcpu); 703 kvm_vcpu_kick(vcpu); 704 } else { 705 apic_debug("Ignoring de-assert INIT to vcpu %d\n", 706 vcpu->vcpu_id); 707 } 708 break; 709 710 case APIC_DM_STARTUP: 711 apic_debug("SIPI to vcpu %d vector 0x%02x\n", 712 vcpu->vcpu_id, vector); 713 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 714 result = 1; 715 vcpu->arch.sipi_vector = vector; 716 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; 717 kvm_make_request(KVM_REQ_EVENT, vcpu); 718 kvm_vcpu_kick(vcpu); 719 } 720 break; 721 722 case APIC_DM_EXTINT: 723 /* 724 * Should only be called by kvm_apic_local_deliver() with LVT0, 725 * before NMI watchdog was enabled. Already handled by 726 * kvm_apic_accept_pic_intr(). 727 */ 728 break; 729 730 default: 731 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 732 delivery_mode); 733 break; 734 } 735 return result; 736 } 737 738 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 739 { 740 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 741 } 742 743 static int apic_set_eoi(struct kvm_lapic *apic) 744 { 745 int vector = apic_find_highest_isr(apic); 746 747 trace_kvm_eoi(apic, vector); 748 749 /* 750 * Not every write EOI will has corresponding ISR, 751 * one example is when Kernel check timer on setup_IO_APIC 752 */ 753 if (vector == -1) 754 return vector; 755 756 apic_clear_isr(vector, apic); 757 apic_update_ppr(apic); 758 759 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 760 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { 761 int trigger_mode; 762 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 763 trigger_mode = IOAPIC_LEVEL_TRIG; 764 else 765 trigger_mode = IOAPIC_EDGE_TRIG; 766 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); 767 } 768 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 769 return vector; 770 } 771 772 static void apic_send_ipi(struct kvm_lapic *apic) 773 { 774 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); 775 u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2); 776 struct kvm_lapic_irq irq; 777 778 irq.vector = icr_low & APIC_VECTOR_MASK; 779 irq.delivery_mode = icr_low & APIC_MODE_MASK; 780 irq.dest_mode = icr_low & APIC_DEST_MASK; 781 irq.level = icr_low & APIC_INT_ASSERT; 782 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 783 irq.shorthand = icr_low & APIC_SHORT_MASK; 784 if (apic_x2apic_mode(apic)) 785 irq.dest_id = icr_high; 786 else 787 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 788 789 trace_kvm_apic_ipi(icr_low, irq.dest_id); 790 791 apic_debug("icr_high 0x%x, icr_low 0x%x, " 792 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 793 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", 794 icr_high, icr_low, irq.shorthand, irq.dest_id, 795 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 796 irq.vector); 797 798 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); 799 } 800 801 static u32 apic_get_tmcct(struct kvm_lapic *apic) 802 { 803 ktime_t remaining; 804 s64 ns; 805 u32 tmcct; 806 807 ASSERT(apic != NULL); 808 809 /* if initial count is 0, current count should also be 0 */ 810 if (kvm_apic_get_reg(apic, APIC_TMICT) == 0) 811 return 0; 812 813 remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); 814 if (ktime_to_ns(remaining) < 0) 815 remaining = ktime_set(0, 0); 816 817 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 818 tmcct = div64_u64(ns, 819 (APIC_BUS_CYCLE_NS * apic->divide_count)); 820 821 return tmcct; 822 } 823 824 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 825 { 826 struct kvm_vcpu *vcpu = apic->vcpu; 827 struct kvm_run *run = vcpu->run; 828 829 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 830 run->tpr_access.rip = kvm_rip_read(vcpu); 831 run->tpr_access.is_write = write; 832 } 833 834 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 835 { 836 if (apic->vcpu->arch.tpr_access_reporting) 837 __report_tpr_access(apic, write); 838 } 839 840 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 841 { 842 u32 val = 0; 843 844 if (offset >= LAPIC_MMIO_LENGTH) 845 return 0; 846 847 switch (offset) { 848 case APIC_ID: 849 if (apic_x2apic_mode(apic)) 850 val = kvm_apic_id(apic); 851 else 852 val = kvm_apic_id(apic) << 24; 853 break; 854 case APIC_ARBPRI: 855 apic_debug("Access APIC ARBPRI register which is for P6\n"); 856 break; 857 858 case APIC_TMCCT: /* Timer CCR */ 859 if (apic_lvtt_tscdeadline(apic)) 860 return 0; 861 862 val = apic_get_tmcct(apic); 863 break; 864 case APIC_PROCPRI: 865 apic_update_ppr(apic); 866 val = kvm_apic_get_reg(apic, offset); 867 break; 868 case APIC_TASKPRI: 869 report_tpr_access(apic, false); 870 /* fall thru */ 871 default: 872 val = kvm_apic_get_reg(apic, offset); 873 break; 874 } 875 876 return val; 877 } 878 879 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 880 { 881 return container_of(dev, struct kvm_lapic, dev); 882 } 883 884 static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 885 void *data) 886 { 887 unsigned char alignment = offset & 0xf; 888 u32 result; 889 /* this bitmask has a bit cleared for each reserved register */ 890 static const u64 rmask = 0x43ff01ffffffe70cULL; 891 892 if ((alignment + len) > 4) { 893 apic_debug("KVM_APIC_READ: alignment error %x %d\n", 894 offset, len); 895 return 1; 896 } 897 898 if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { 899 apic_debug("KVM_APIC_READ: read reserved register %x\n", 900 offset); 901 return 1; 902 } 903 904 result = __apic_read(apic, offset & ~0xf); 905 906 trace_kvm_apic_read(offset, result); 907 908 switch (len) { 909 case 1: 910 case 2: 911 case 4: 912 memcpy(data, (char *)&result + alignment, len); 913 break; 914 default: 915 printk(KERN_ERR "Local APIC read with len = %x, " 916 "should be 1,2, or 4 instead\n", len); 917 break; 918 } 919 return 0; 920 } 921 922 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 923 { 924 return kvm_apic_hw_enabled(apic) && 925 addr >= apic->base_address && 926 addr < apic->base_address + LAPIC_MMIO_LENGTH; 927 } 928 929 static int apic_mmio_read(struct kvm_io_device *this, 930 gpa_t address, int len, void *data) 931 { 932 struct kvm_lapic *apic = to_lapic(this); 933 u32 offset = address - apic->base_address; 934 935 if (!apic_mmio_in_range(apic, address)) 936 return -EOPNOTSUPP; 937 938 apic_reg_read(apic, offset, len, data); 939 940 return 0; 941 } 942 943 static void update_divide_count(struct kvm_lapic *apic) 944 { 945 u32 tmp1, tmp2, tdcr; 946 947 tdcr = kvm_apic_get_reg(apic, APIC_TDCR); 948 tmp1 = tdcr & 0xf; 949 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 950 apic->divide_count = 0x1 << (tmp2 & 0x7); 951 952 apic_debug("timer divide count is 0x%x\n", 953 apic->divide_count); 954 } 955 956 static void start_apic_timer(struct kvm_lapic *apic) 957 { 958 ktime_t now; 959 atomic_set(&apic->lapic_timer.pending, 0); 960 961 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 962 /* lapic timer in oneshot or periodic mode */ 963 now = apic->lapic_timer.timer.base->get_time(); 964 apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT) 965 * APIC_BUS_CYCLE_NS * apic->divide_count; 966 967 if (!apic->lapic_timer.period) 968 return; 969 /* 970 * Do not allow the guest to program periodic timers with small 971 * interval, since the hrtimers are not throttled by the host 972 * scheduler. 973 */ 974 if (apic_lvtt_period(apic)) { 975 s64 min_period = min_timer_period_us * 1000LL; 976 977 if (apic->lapic_timer.period < min_period) { 978 pr_info_ratelimited( 979 "kvm: vcpu %i: requested %lld ns " 980 "lapic timer period limited to %lld ns\n", 981 apic->vcpu->vcpu_id, 982 apic->lapic_timer.period, min_period); 983 apic->lapic_timer.period = min_period; 984 } 985 } 986 987 hrtimer_start(&apic->lapic_timer.timer, 988 ktime_add_ns(now, apic->lapic_timer.period), 989 HRTIMER_MODE_ABS); 990 991 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 992 PRIx64 ", " 993 "timer initial count 0x%x, period %lldns, " 994 "expire @ 0x%016" PRIx64 ".\n", __func__, 995 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 996 kvm_apic_get_reg(apic, APIC_TMICT), 997 apic->lapic_timer.period, 998 ktime_to_ns(ktime_add_ns(now, 999 apic->lapic_timer.period))); 1000 } else if (apic_lvtt_tscdeadline(apic)) { 1001 /* lapic timer in tsc deadline mode */ 1002 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 1003 u64 ns = 0; 1004 struct kvm_vcpu *vcpu = apic->vcpu; 1005 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 1006 unsigned long flags; 1007 1008 if (unlikely(!tscdeadline || !this_tsc_khz)) 1009 return; 1010 1011 local_irq_save(flags); 1012 1013 now = apic->lapic_timer.timer.base->get_time(); 1014 guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); 1015 if (likely(tscdeadline > guest_tsc)) { 1016 ns = (tscdeadline - guest_tsc) * 1000000ULL; 1017 do_div(ns, this_tsc_khz); 1018 } 1019 hrtimer_start(&apic->lapic_timer.timer, 1020 ktime_add_ns(now, ns), HRTIMER_MODE_ABS); 1021 1022 local_irq_restore(flags); 1023 } 1024 } 1025 1026 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 1027 { 1028 int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); 1029 1030 if (apic_lvt_nmi_mode(lvt0_val)) { 1031 if (!nmi_wd_enabled) { 1032 apic_debug("Receive NMI setting on APIC_LVT0 " 1033 "for cpu %d\n", apic->vcpu->vcpu_id); 1034 apic->vcpu->kvm->arch.vapics_in_nmi_mode++; 1035 } 1036 } else if (nmi_wd_enabled) 1037 apic->vcpu->kvm->arch.vapics_in_nmi_mode--; 1038 } 1039 1040 static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1041 { 1042 int ret = 0; 1043 1044 trace_kvm_apic_write(reg, val); 1045 1046 switch (reg) { 1047 case APIC_ID: /* Local APIC ID */ 1048 if (!apic_x2apic_mode(apic)) 1049 kvm_apic_set_id(apic, val >> 24); 1050 else 1051 ret = 1; 1052 break; 1053 1054 case APIC_TASKPRI: 1055 report_tpr_access(apic, true); 1056 apic_set_tpr(apic, val & 0xff); 1057 break; 1058 1059 case APIC_EOI: 1060 apic_set_eoi(apic); 1061 break; 1062 1063 case APIC_LDR: 1064 if (!apic_x2apic_mode(apic)) 1065 kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); 1066 else 1067 ret = 1; 1068 break; 1069 1070 case APIC_DFR: 1071 if (!apic_x2apic_mode(apic)) { 1072 apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 1073 recalculate_apic_map(apic->vcpu->kvm); 1074 } else 1075 ret = 1; 1076 break; 1077 1078 case APIC_SPIV: { 1079 u32 mask = 0x3ff; 1080 if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 1081 mask |= APIC_SPIV_DIRECTED_EOI; 1082 apic_set_spiv(apic, val & mask); 1083 if (!(val & APIC_SPIV_APIC_ENABLED)) { 1084 int i; 1085 u32 lvt_val; 1086 1087 for (i = 0; i < APIC_LVT_NUM; i++) { 1088 lvt_val = kvm_apic_get_reg(apic, 1089 APIC_LVTT + 0x10 * i); 1090 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 1091 lvt_val | APIC_LVT_MASKED); 1092 } 1093 atomic_set(&apic->lapic_timer.pending, 0); 1094 1095 } 1096 break; 1097 } 1098 case APIC_ICR: 1099 /* No delay here, so we always clear the pending bit */ 1100 apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); 1101 apic_send_ipi(apic); 1102 break; 1103 1104 case APIC_ICR2: 1105 if (!apic_x2apic_mode(apic)) 1106 val &= 0xff000000; 1107 apic_set_reg(apic, APIC_ICR2, val); 1108 break; 1109 1110 case APIC_LVT0: 1111 apic_manage_nmi_watchdog(apic, val); 1112 case APIC_LVTTHMR: 1113 case APIC_LVTPC: 1114 case APIC_LVT1: 1115 case APIC_LVTERR: 1116 /* TODO: Check vector */ 1117 if (!kvm_apic_sw_enabled(apic)) 1118 val |= APIC_LVT_MASKED; 1119 1120 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; 1121 apic_set_reg(apic, reg, val); 1122 1123 break; 1124 1125 case APIC_LVTT: 1126 if ((kvm_apic_get_reg(apic, APIC_LVTT) & 1127 apic->lapic_timer.timer_mode_mask) != 1128 (val & apic->lapic_timer.timer_mode_mask)) 1129 hrtimer_cancel(&apic->lapic_timer.timer); 1130 1131 if (!kvm_apic_sw_enabled(apic)) 1132 val |= APIC_LVT_MASKED; 1133 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1134 apic_set_reg(apic, APIC_LVTT, val); 1135 break; 1136 1137 case APIC_TMICT: 1138 if (apic_lvtt_tscdeadline(apic)) 1139 break; 1140 1141 hrtimer_cancel(&apic->lapic_timer.timer); 1142 apic_set_reg(apic, APIC_TMICT, val); 1143 start_apic_timer(apic); 1144 break; 1145 1146 case APIC_TDCR: 1147 if (val & 4) 1148 apic_debug("KVM_WRITE:TDCR %x\n", val); 1149 apic_set_reg(apic, APIC_TDCR, val); 1150 update_divide_count(apic); 1151 break; 1152 1153 case APIC_ESR: 1154 if (apic_x2apic_mode(apic) && val != 0) { 1155 apic_debug("KVM_WRITE:ESR not zero %x\n", val); 1156 ret = 1; 1157 } 1158 break; 1159 1160 case APIC_SELF_IPI: 1161 if (apic_x2apic_mode(apic)) { 1162 apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 1163 } else 1164 ret = 1; 1165 break; 1166 default: 1167 ret = 1; 1168 break; 1169 } 1170 if (ret) 1171 apic_debug("Local APIC Write to read-only register %x\n", reg); 1172 return ret; 1173 } 1174 1175 static int apic_mmio_write(struct kvm_io_device *this, 1176 gpa_t address, int len, const void *data) 1177 { 1178 struct kvm_lapic *apic = to_lapic(this); 1179 unsigned int offset = address - apic->base_address; 1180 u32 val; 1181 1182 if (!apic_mmio_in_range(apic, address)) 1183 return -EOPNOTSUPP; 1184 1185 /* 1186 * APIC register must be aligned on 128-bits boundary. 1187 * 32/64/128 bits registers must be accessed thru 32 bits. 1188 * Refer SDM 8.4.1 1189 */ 1190 if (len != 4 || (offset & 0xf)) { 1191 /* Don't shout loud, $infamous_os would cause only noise. */ 1192 apic_debug("apic write: bad size=%d %lx\n", len, (long)address); 1193 return 0; 1194 } 1195 1196 val = *(u32*)data; 1197 1198 /* too common printing */ 1199 if (offset != APIC_EOI) 1200 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 1201 "0x%x\n", __func__, offset, len, val); 1202 1203 apic_reg_write(apic, offset & 0xff0, val); 1204 1205 return 0; 1206 } 1207 1208 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 1209 { 1210 if (kvm_vcpu_has_lapic(vcpu)) 1211 apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 1212 } 1213 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1214 1215 /* emulate APIC access in a trap manner */ 1216 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) 1217 { 1218 u32 val = 0; 1219 1220 /* hw has done the conditional check and inst decode */ 1221 offset &= 0xff0; 1222 1223 apic_reg_read(vcpu->arch.apic, offset, 4, &val); 1224 1225 /* TODO: optimize to just emulate side effect w/o one more write */ 1226 apic_reg_write(vcpu->arch.apic, offset, val); 1227 } 1228 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 1229 1230 void kvm_free_lapic(struct kvm_vcpu *vcpu) 1231 { 1232 struct kvm_lapic *apic = vcpu->arch.apic; 1233 1234 if (!vcpu->arch.apic) 1235 return; 1236 1237 hrtimer_cancel(&apic->lapic_timer.timer); 1238 1239 if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) 1240 static_key_slow_dec_deferred(&apic_hw_disabled); 1241 1242 if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) 1243 static_key_slow_dec_deferred(&apic_sw_disabled); 1244 1245 if (apic->regs) 1246 free_page((unsigned long)apic->regs); 1247 1248 kfree(apic); 1249 } 1250 1251 /* 1252 *---------------------------------------------------------------------- 1253 * LAPIC interface 1254 *---------------------------------------------------------------------- 1255 */ 1256 1257 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 1258 { 1259 struct kvm_lapic *apic = vcpu->arch.apic; 1260 1261 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1262 apic_lvtt_period(apic)) 1263 return 0; 1264 1265 return apic->lapic_timer.tscdeadline; 1266 } 1267 1268 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 1269 { 1270 struct kvm_lapic *apic = vcpu->arch.apic; 1271 1272 if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || 1273 apic_lvtt_period(apic)) 1274 return; 1275 1276 hrtimer_cancel(&apic->lapic_timer.timer); 1277 apic->lapic_timer.tscdeadline = data; 1278 start_apic_timer(apic); 1279 } 1280 1281 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 1282 { 1283 struct kvm_lapic *apic = vcpu->arch.apic; 1284 1285 if (!kvm_vcpu_has_lapic(vcpu)) 1286 return; 1287 1288 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 1289 | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); 1290 } 1291 1292 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 1293 { 1294 u64 tpr; 1295 1296 if (!kvm_vcpu_has_lapic(vcpu)) 1297 return 0; 1298 1299 tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 1300 1301 return (tpr & 0xf0) >> 4; 1302 } 1303 1304 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 1305 { 1306 struct kvm_lapic *apic = vcpu->arch.apic; 1307 1308 if (!apic) { 1309 value |= MSR_IA32_APICBASE_BSP; 1310 vcpu->arch.apic_base = value; 1311 return; 1312 } 1313 1314 /* update jump label if enable bit changes */ 1315 if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { 1316 if (value & MSR_IA32_APICBASE_ENABLE) 1317 static_key_slow_dec_deferred(&apic_hw_disabled); 1318 else 1319 static_key_slow_inc(&apic_hw_disabled.key); 1320 recalculate_apic_map(vcpu->kvm); 1321 } 1322 1323 if (!kvm_vcpu_is_bsp(apic->vcpu)) 1324 value &= ~MSR_IA32_APICBASE_BSP; 1325 1326 vcpu->arch.apic_base = value; 1327 if (apic_x2apic_mode(apic)) { 1328 u32 id = kvm_apic_id(apic); 1329 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 1330 kvm_apic_set_ldr(apic, ldr); 1331 } 1332 apic->base_address = apic->vcpu->arch.apic_base & 1333 MSR_IA32_APICBASE_BASE; 1334 1335 /* with FSB delivery interrupt, we can restart APIC functionality */ 1336 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 1337 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 1338 1339 } 1340 1341 void kvm_lapic_reset(struct kvm_vcpu *vcpu) 1342 { 1343 struct kvm_lapic *apic; 1344 int i; 1345 1346 apic_debug("%s\n", __func__); 1347 1348 ASSERT(vcpu); 1349 apic = vcpu->arch.apic; 1350 ASSERT(apic != NULL); 1351 1352 /* Stop the timer in case it's a reset to an active apic */ 1353 hrtimer_cancel(&apic->lapic_timer.timer); 1354 1355 kvm_apic_set_id(apic, vcpu->vcpu_id); 1356 kvm_apic_set_version(apic->vcpu); 1357 1358 for (i = 0; i < APIC_LVT_NUM; i++) 1359 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 1360 apic_set_reg(apic, APIC_LVT0, 1361 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 1362 1363 apic_set_reg(apic, APIC_DFR, 0xffffffffU); 1364 apic_set_spiv(apic, 0xff); 1365 apic_set_reg(apic, APIC_TASKPRI, 0); 1366 kvm_apic_set_ldr(apic, 0); 1367 apic_set_reg(apic, APIC_ESR, 0); 1368 apic_set_reg(apic, APIC_ICR, 0); 1369 apic_set_reg(apic, APIC_ICR2, 0); 1370 apic_set_reg(apic, APIC_TDCR, 0); 1371 apic_set_reg(apic, APIC_TMICT, 0); 1372 for (i = 0; i < 8; i++) { 1373 apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 1374 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1375 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1376 } 1377 apic->irr_pending = false; 1378 apic->isr_count = 0; 1379 apic->highest_isr_cache = -1; 1380 update_divide_count(apic); 1381 atomic_set(&apic->lapic_timer.pending, 0); 1382 if (kvm_vcpu_is_bsp(vcpu)) 1383 kvm_lapic_set_base(vcpu, 1384 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); 1385 vcpu->arch.pv_eoi.msr_val = 0; 1386 apic_update_ppr(apic); 1387 1388 vcpu->arch.apic_arb_prio = 0; 1389 vcpu->arch.apic_attention = 0; 1390 1391 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 1392 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 1393 vcpu, kvm_apic_id(apic), 1394 vcpu->arch.apic_base, apic->base_address); 1395 } 1396 1397 /* 1398 *---------------------------------------------------------------------- 1399 * timer interface 1400 *---------------------------------------------------------------------- 1401 */ 1402 1403 static bool lapic_is_periodic(struct kvm_lapic *apic) 1404 { 1405 return apic_lvtt_period(apic); 1406 } 1407 1408 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 1409 { 1410 struct kvm_lapic *apic = vcpu->arch.apic; 1411 1412 if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && 1413 apic_lvt_enabled(apic, APIC_LVTT)) 1414 return atomic_read(&apic->lapic_timer.pending); 1415 1416 return 0; 1417 } 1418 1419 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 1420 { 1421 u32 reg = kvm_apic_get_reg(apic, lvt_type); 1422 int vector, mode, trig_mode; 1423 1424 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 1425 vector = reg & APIC_VECTOR_MASK; 1426 mode = reg & APIC_MODE_MASK; 1427 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 1428 return __apic_accept_irq(apic, mode, vector, 1, trig_mode); 1429 } 1430 return 0; 1431 } 1432 1433 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 1434 { 1435 struct kvm_lapic *apic = vcpu->arch.apic; 1436 1437 if (apic) 1438 kvm_apic_local_deliver(apic, APIC_LVT0); 1439 } 1440 1441 static const struct kvm_io_device_ops apic_mmio_ops = { 1442 .read = apic_mmio_read, 1443 .write = apic_mmio_write, 1444 }; 1445 1446 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 1447 { 1448 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 1449 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); 1450 struct kvm_vcpu *vcpu = apic->vcpu; 1451 wait_queue_head_t *q = &vcpu->wq; 1452 1453 /* 1454 * There is a race window between reading and incrementing, but we do 1455 * not care about potentially losing timer events in the !reinject 1456 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked 1457 * in vcpu_enter_guest. 1458 */ 1459 if (!atomic_read(&ktimer->pending)) { 1460 atomic_inc(&ktimer->pending); 1461 /* FIXME: this code should not know anything about vcpus */ 1462 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); 1463 } 1464 1465 if (waitqueue_active(q)) 1466 wake_up_interruptible(q); 1467 1468 if (lapic_is_periodic(apic)) { 1469 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 1470 return HRTIMER_RESTART; 1471 } else 1472 return HRTIMER_NORESTART; 1473 } 1474 1475 int kvm_create_lapic(struct kvm_vcpu *vcpu) 1476 { 1477 struct kvm_lapic *apic; 1478 1479 ASSERT(vcpu != NULL); 1480 apic_debug("apic_init %d\n", vcpu->vcpu_id); 1481 1482 apic = kzalloc(sizeof(*apic), GFP_KERNEL); 1483 if (!apic) 1484 goto nomem; 1485 1486 vcpu->arch.apic = apic; 1487 1488 apic->regs = (void *)get_zeroed_page(GFP_KERNEL); 1489 if (!apic->regs) { 1490 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1491 vcpu->vcpu_id); 1492 goto nomem_free_apic; 1493 } 1494 apic->vcpu = vcpu; 1495 1496 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1497 HRTIMER_MODE_ABS); 1498 apic->lapic_timer.timer.function = apic_timer_fn; 1499 1500 /* 1501 * APIC is created enabled. This will prevent kvm_lapic_set_base from 1502 * thinking that APIC satet has changed. 1503 */ 1504 vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; 1505 kvm_lapic_set_base(vcpu, 1506 APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); 1507 1508 static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ 1509 kvm_lapic_reset(vcpu); 1510 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 1511 1512 return 0; 1513 nomem_free_apic: 1514 kfree(apic); 1515 nomem: 1516 return -ENOMEM; 1517 } 1518 1519 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 1520 { 1521 struct kvm_lapic *apic = vcpu->arch.apic; 1522 int highest_irr; 1523 1524 if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) 1525 return -1; 1526 1527 apic_update_ppr(apic); 1528 highest_irr = apic_find_highest_irr(apic); 1529 if ((highest_irr == -1) || 1530 ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI))) 1531 return -1; 1532 return highest_irr; 1533 } 1534 1535 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 1536 { 1537 u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1538 int r = 0; 1539 1540 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 1541 r = 1; 1542 if ((lvt0 & APIC_LVT_MASKED) == 0 && 1543 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 1544 r = 1; 1545 return r; 1546 } 1547 1548 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 1549 { 1550 struct kvm_lapic *apic = vcpu->arch.apic; 1551 1552 if (!kvm_vcpu_has_lapic(vcpu)) 1553 return; 1554 1555 if (atomic_read(&apic->lapic_timer.pending) > 0) { 1556 if (kvm_apic_local_deliver(apic, APIC_LVTT)) 1557 atomic_dec(&apic->lapic_timer.pending); 1558 } 1559 } 1560 1561 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1562 { 1563 int vector = kvm_apic_has_interrupt(vcpu); 1564 struct kvm_lapic *apic = vcpu->arch.apic; 1565 1566 if (vector == -1) 1567 return -1; 1568 1569 apic_set_isr(vector, apic); 1570 apic_update_ppr(apic); 1571 apic_clear_irr(vector, apic); 1572 return vector; 1573 } 1574 1575 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, 1576 struct kvm_lapic_state *s) 1577 { 1578 struct kvm_lapic *apic = vcpu->arch.apic; 1579 1580 kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); 1581 /* set SPIV separately to get count of SW disabled APICs right */ 1582 apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); 1583 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 1584 /* call kvm_apic_set_id() to put apic into apic_map */ 1585 kvm_apic_set_id(apic, kvm_apic_id(apic)); 1586 kvm_apic_set_version(vcpu); 1587 1588 apic_update_ppr(apic); 1589 hrtimer_cancel(&apic->lapic_timer.timer); 1590 update_divide_count(apic); 1591 start_apic_timer(apic); 1592 apic->irr_pending = true; 1593 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 1594 apic->highest_isr_cache = -1; 1595 kvm_make_request(KVM_REQ_EVENT, vcpu); 1596 } 1597 1598 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 1599 { 1600 struct hrtimer *timer; 1601 1602 if (!kvm_vcpu_has_lapic(vcpu)) 1603 return; 1604 1605 timer = &vcpu->arch.apic->lapic_timer.timer; 1606 if (hrtimer_cancel(timer)) 1607 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 1608 } 1609 1610 /* 1611 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 1612 * 1613 * Detect whether guest triggered PV EOI since the 1614 * last entry. If yes, set EOI on guests's behalf. 1615 * Clear PV EOI in guest memory in any case. 1616 */ 1617 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 1618 struct kvm_lapic *apic) 1619 { 1620 bool pending; 1621 int vector; 1622 /* 1623 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 1624 * and KVM_PV_EOI_ENABLED in guest memory as follows: 1625 * 1626 * KVM_APIC_PV_EOI_PENDING is unset: 1627 * -> host disabled PV EOI. 1628 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 1629 * -> host enabled PV EOI, guest did not execute EOI yet. 1630 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 1631 * -> host enabled PV EOI, guest executed EOI. 1632 */ 1633 BUG_ON(!pv_eoi_enabled(vcpu)); 1634 pending = pv_eoi_get_pending(vcpu); 1635 /* 1636 * Clear pending bit in any case: it will be set again on vmentry. 1637 * While this might not be ideal from performance point of view, 1638 * this makes sure pv eoi is only enabled when we know it's safe. 1639 */ 1640 pv_eoi_clr_pending(vcpu); 1641 if (pending) 1642 return; 1643 vector = apic_set_eoi(apic); 1644 trace_kvm_pv_eoi(apic, vector); 1645 } 1646 1647 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 1648 { 1649 u32 data; 1650 void *vapic; 1651 1652 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 1653 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 1654 1655 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1656 return; 1657 1658 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1659 data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); 1660 kunmap_atomic(vapic); 1661 1662 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1663 } 1664 1665 /* 1666 * apic_sync_pv_eoi_to_guest - called before vmentry 1667 * 1668 * Detect whether it's safe to enable PV EOI and 1669 * if yes do so. 1670 */ 1671 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 1672 struct kvm_lapic *apic) 1673 { 1674 if (!pv_eoi_enabled(vcpu) || 1675 /* IRR set or many bits in ISR: could be nested. */ 1676 apic->irr_pending || 1677 /* Cache not set: could be safe but we don't bother. */ 1678 apic->highest_isr_cache == -1 || 1679 /* Need EOI to update ioapic. */ 1680 kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { 1681 /* 1682 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 1683 * so we need not do anything here. 1684 */ 1685 return; 1686 } 1687 1688 pv_eoi_set_pending(apic->vcpu); 1689 } 1690 1691 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 1692 { 1693 u32 data, tpr; 1694 int max_irr, max_isr; 1695 struct kvm_lapic *apic = vcpu->arch.apic; 1696 void *vapic; 1697 1698 apic_sync_pv_eoi_to_guest(vcpu, apic); 1699 1700 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1701 return; 1702 1703 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff; 1704 max_irr = apic_find_highest_irr(apic); 1705 if (max_irr < 0) 1706 max_irr = 0; 1707 max_isr = apic_find_highest_isr(apic); 1708 if (max_isr < 0) 1709 max_isr = 0; 1710 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 1711 1712 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1713 *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; 1714 kunmap_atomic(vapic); 1715 } 1716 1717 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1718 { 1719 vcpu->arch.apic->vapic_addr = vapic_addr; 1720 if (vapic_addr) 1721 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1722 else 1723 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1724 } 1725 1726 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1727 { 1728 struct kvm_lapic *apic = vcpu->arch.apic; 1729 u32 reg = (msr - APIC_BASE_MSR) << 4; 1730 1731 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1732 return 1; 1733 1734 /* if this is ICR write vector before command */ 1735 if (msr == 0x830) 1736 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1737 return apic_reg_write(apic, reg, (u32)data); 1738 } 1739 1740 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 1741 { 1742 struct kvm_lapic *apic = vcpu->arch.apic; 1743 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 1744 1745 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1746 return 1; 1747 1748 if (apic_reg_read(apic, reg, 4, &low)) 1749 return 1; 1750 if (msr == 0x830) 1751 apic_reg_read(apic, APIC_ICR2, 4, &high); 1752 1753 *data = (((u64)high) << 32) | low; 1754 1755 return 0; 1756 } 1757 1758 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 1759 { 1760 struct kvm_lapic *apic = vcpu->arch.apic; 1761 1762 if (!kvm_vcpu_has_lapic(vcpu)) 1763 return 1; 1764 1765 /* if this is ICR write vector before command */ 1766 if (reg == APIC_ICR) 1767 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1768 return apic_reg_write(apic, reg, (u32)data); 1769 } 1770 1771 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 1772 { 1773 struct kvm_lapic *apic = vcpu->arch.apic; 1774 u32 low, high = 0; 1775 1776 if (!kvm_vcpu_has_lapic(vcpu)) 1777 return 1; 1778 1779 if (apic_reg_read(apic, reg, 4, &low)) 1780 return 1; 1781 if (reg == APIC_ICR) 1782 apic_reg_read(apic, APIC_ICR2, 4, &high); 1783 1784 *data = (((u64)high) << 32) | low; 1785 1786 return 0; 1787 } 1788 1789 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) 1790 { 1791 u64 addr = data & ~KVM_MSR_ENABLED; 1792 if (!IS_ALIGNED(addr, 4)) 1793 return 1; 1794 1795 vcpu->arch.pv_eoi.msr_val = data; 1796 if (!pv_eoi_enabled(vcpu)) 1797 return 0; 1798 return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, 1799 addr); 1800 } 1801 1802 void kvm_lapic_init(void) 1803 { 1804 /* do not patch jump label more than once per second */ 1805 jump_label_rate_limit(&apic_hw_disabled, HZ); 1806 jump_label_rate_limit(&apic_sw_disabled, HZ); 1807 } 1808