1 2 /* 3 * Local APIC virtualization 4 * 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2007 Novell 7 * Copyright (C) 2007 Intel 8 * Copyright 2009 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Dor Laor <dor.laor@qumranet.com> 12 * Gregory Haskins <ghaskins@novell.com> 13 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 14 * 15 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 16 * 17 * This work is licensed under the terms of the GNU GPL, version 2. See 18 * the COPYING file in the top-level directory. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/kvm.h> 23 #include <linux/mm.h> 24 #include <linux/highmem.h> 25 #include <linux/smp.h> 26 #include <linux/hrtimer.h> 27 #include <linux/io.h> 28 #include <linux/module.h> 29 #include <linux/math64.h> 30 #include <linux/slab.h> 31 #include <asm/processor.h> 32 #include <asm/msr.h> 33 #include <asm/page.h> 34 #include <asm/current.h> 35 #include <asm/apicdef.h> 36 #include <linux/atomic.h> 37 #include "kvm_cache_regs.h" 38 #include "irq.h" 39 #include "trace.h" 40 #include "x86.h" 41 #include "cpuid.h" 42 43 #ifndef CONFIG_X86_64 44 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 45 #else 46 #define mod_64(x, y) ((x) % (y)) 47 #endif 48 49 #define PRId64 "d" 50 #define PRIx64 "llx" 51 #define PRIu64 "u" 52 #define PRIo64 "o" 53 54 #define APIC_BUS_CYCLE_NS 1 55 56 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 57 #define apic_debug(fmt, arg...) 58 59 #define APIC_LVT_NUM 6 60 /* 14 is the version for Xeon and Pentium 8.4.8*/ 61 #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) 62 #define LAPIC_MMIO_LENGTH (1 << 12) 63 /* followed define is not in apicdef.h */ 64 #define APIC_SHORT_MASK 0xc0000 65 #define APIC_DEST_NOSHORT 0x0 66 #define APIC_DEST_MASK 0x800 67 #define MAX_APIC_VECTOR 256 68 69 #define VEC_POS(v) ((v) & (32 - 1)) 70 #define REG_POS(v) (((v) >> 5) << 4) 71 72 static unsigned int min_timer_period_us = 500; 73 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); 74 75 static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) 76 { 77 return *((u32 *) (apic->regs + reg_off)); 78 } 79 80 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 81 { 82 *((u32 *) (apic->regs + reg_off)) = val; 83 } 84 85 static inline int apic_test_and_set_vector(int vec, void *bitmap) 86 { 87 return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 88 } 89 90 static inline int apic_test_and_clear_vector(int vec, void *bitmap) 91 { 92 return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 93 } 94 95 static inline int apic_test_vector(int vec, void *bitmap) 96 { 97 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 98 } 99 100 static inline void apic_set_vector(int vec, void *bitmap) 101 { 102 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 103 } 104 105 static inline void apic_clear_vector(int vec, void *bitmap) 106 { 107 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 108 } 109 110 static inline int __apic_test_and_set_vector(int vec, void *bitmap) 111 { 112 return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 113 } 114 115 static inline int __apic_test_and_clear_vector(int vec, void *bitmap) 116 { 117 return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 118 } 119 120 static inline int apic_hw_enabled(struct kvm_lapic *apic) 121 { 122 return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; 123 } 124 125 static inline int apic_sw_enabled(struct kvm_lapic *apic) 126 { 127 return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; 128 } 129 130 static inline int apic_enabled(struct kvm_lapic *apic) 131 { 132 return apic_sw_enabled(apic) && apic_hw_enabled(apic); 133 } 134 135 #define LVT_MASK \ 136 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 137 138 #define LINT_MASK \ 139 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 140 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 141 142 static inline int kvm_apic_id(struct kvm_lapic *apic) 143 { 144 return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 145 } 146 147 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 148 { 149 return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 150 } 151 152 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) 153 { 154 return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; 155 } 156 157 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 158 { 159 return ((apic_get_reg(apic, APIC_LVTT) & 160 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); 161 } 162 163 static inline int apic_lvtt_period(struct kvm_lapic *apic) 164 { 165 return ((apic_get_reg(apic, APIC_LVTT) & 166 apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); 167 } 168 169 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) 170 { 171 return ((apic_get_reg(apic, APIC_LVTT) & 172 apic->lapic_timer.timer_mode_mask) == 173 APIC_LVT_TIMER_TSCDEADLINE); 174 } 175 176 static inline int apic_lvt_nmi_mode(u32 lvt_val) 177 { 178 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 179 } 180 181 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 182 { 183 struct kvm_lapic *apic = vcpu->arch.apic; 184 struct kvm_cpuid_entry2 *feat; 185 u32 v = APIC_VERSION; 186 187 if (!irqchip_in_kernel(vcpu->kvm)) 188 return; 189 190 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 191 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) 192 v |= APIC_LVR_DIRECTED_EOI; 193 apic_set_reg(apic, APIC_LVR, v); 194 } 195 196 static inline int apic_x2apic_mode(struct kvm_lapic *apic) 197 { 198 return apic->vcpu->arch.apic_base & X2APIC_ENABLE; 199 } 200 201 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 202 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 203 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 204 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 205 LINT_MASK, LINT_MASK, /* LVT0-1 */ 206 LVT_MASK /* LVTERR */ 207 }; 208 209 static int find_highest_vector(void *bitmap) 210 { 211 u32 *word = bitmap; 212 int word_offset = MAX_APIC_VECTOR >> 5; 213 214 while ((word_offset != 0) && (word[(--word_offset) << 2] == 0)) 215 continue; 216 217 if (likely(!word_offset && !word[0])) 218 return -1; 219 else 220 return fls(word[word_offset << 2]) - 1 + (word_offset << 5); 221 } 222 223 static u8 count_vectors(void *bitmap) 224 { 225 u32 *word = bitmap; 226 int word_offset; 227 u8 count = 0; 228 for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset) 229 count += hweight32(word[word_offset << 2]); 230 return count; 231 } 232 233 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) 234 { 235 apic->irr_pending = true; 236 return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); 237 } 238 239 static inline int apic_search_irr(struct kvm_lapic *apic) 240 { 241 return find_highest_vector(apic->regs + APIC_IRR); 242 } 243 244 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 245 { 246 int result; 247 248 if (!apic->irr_pending) 249 return -1; 250 251 result = apic_search_irr(apic); 252 ASSERT(result == -1 || result >= 16); 253 254 return result; 255 } 256 257 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 258 { 259 apic->irr_pending = false; 260 apic_clear_vector(vec, apic->regs + APIC_IRR); 261 if (apic_search_irr(apic) != -1) 262 apic->irr_pending = true; 263 } 264 265 static inline void apic_set_isr(int vec, struct kvm_lapic *apic) 266 { 267 if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) 268 ++apic->isr_count; 269 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 270 /* 271 * ISR (in service register) bit is set when injecting an interrupt. 272 * The highest vector is injected. Thus the latest bit set matches 273 * the highest bit in ISR. 274 */ 275 apic->highest_isr_cache = vec; 276 } 277 278 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) 279 { 280 if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) 281 --apic->isr_count; 282 BUG_ON(apic->isr_count < 0); 283 apic->highest_isr_cache = -1; 284 } 285 286 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 287 { 288 struct kvm_lapic *apic = vcpu->arch.apic; 289 int highest_irr; 290 291 /* This may race with setting of irr in __apic_accept_irq() and 292 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 293 * will cause vmexit immediately and the value will be recalculated 294 * on the next vmentry. 295 */ 296 if (!apic) 297 return 0; 298 highest_irr = apic_find_highest_irr(apic); 299 300 return highest_irr; 301 } 302 303 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 304 int vector, int level, int trig_mode); 305 306 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 307 { 308 struct kvm_lapic *apic = vcpu->arch.apic; 309 310 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 311 irq->level, irq->trig_mode); 312 } 313 314 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 315 { 316 317 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 318 sizeof(val)); 319 } 320 321 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 322 { 323 324 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 325 sizeof(*val)); 326 } 327 328 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 329 { 330 return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; 331 } 332 333 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) 334 { 335 u8 val; 336 if (pv_eoi_get_user(vcpu, &val) < 0) 337 apic_debug("Can't read EOI MSR value: 0x%llx\n", 338 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 339 return val & 0x1; 340 } 341 342 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) 343 { 344 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { 345 apic_debug("Can't set EOI MSR value: 0x%llx\n", 346 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 347 return; 348 } 349 __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 350 } 351 352 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) 353 { 354 if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { 355 apic_debug("Can't clear EOI MSR value: 0x%llx\n", 356 (unsigned long long)vcpi->arch.pv_eoi.msr_val); 357 return; 358 } 359 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); 360 } 361 362 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 363 { 364 int result; 365 if (!apic->isr_count) 366 return -1; 367 if (likely(apic->highest_isr_cache != -1)) 368 return apic->highest_isr_cache; 369 370 result = find_highest_vector(apic->regs + APIC_ISR); 371 ASSERT(result == -1 || result >= 16); 372 373 return result; 374 } 375 376 static void apic_update_ppr(struct kvm_lapic *apic) 377 { 378 u32 tpr, isrv, ppr, old_ppr; 379 int isr; 380 381 old_ppr = apic_get_reg(apic, APIC_PROCPRI); 382 tpr = apic_get_reg(apic, APIC_TASKPRI); 383 isr = apic_find_highest_isr(apic); 384 isrv = (isr != -1) ? isr : 0; 385 386 if ((tpr & 0xf0) >= (isrv & 0xf0)) 387 ppr = tpr & 0xff; 388 else 389 ppr = isrv & 0xf0; 390 391 apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", 392 apic, ppr, isr, isrv); 393 394 if (old_ppr != ppr) { 395 apic_set_reg(apic, APIC_PROCPRI, ppr); 396 if (ppr < old_ppr) 397 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 398 } 399 } 400 401 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 402 { 403 apic_set_reg(apic, APIC_TASKPRI, tpr); 404 apic_update_ppr(apic); 405 } 406 407 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 408 { 409 return dest == 0xff || kvm_apic_id(apic) == dest; 410 } 411 412 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 413 { 414 int result = 0; 415 u32 logical_id; 416 417 if (apic_x2apic_mode(apic)) { 418 logical_id = apic_get_reg(apic, APIC_LDR); 419 return logical_id & mda; 420 } 421 422 logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR)); 423 424 switch (apic_get_reg(apic, APIC_DFR)) { 425 case APIC_DFR_FLAT: 426 if (logical_id & mda) 427 result = 1; 428 break; 429 case APIC_DFR_CLUSTER: 430 if (((logical_id >> 4) == (mda >> 0x4)) 431 && (logical_id & mda & 0xf)) 432 result = 1; 433 break; 434 default: 435 apic_debug("Bad DFR vcpu %d: %08x\n", 436 apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); 437 break; 438 } 439 440 return result; 441 } 442 443 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 444 int short_hand, int dest, int dest_mode) 445 { 446 int result = 0; 447 struct kvm_lapic *target = vcpu->arch.apic; 448 449 apic_debug("target %p, source %p, dest 0x%x, " 450 "dest_mode 0x%x, short_hand 0x%x\n", 451 target, source, dest, dest_mode, short_hand); 452 453 ASSERT(target); 454 switch (short_hand) { 455 case APIC_DEST_NOSHORT: 456 if (dest_mode == 0) 457 /* Physical mode. */ 458 result = kvm_apic_match_physical_addr(target, dest); 459 else 460 /* Logical mode. */ 461 result = kvm_apic_match_logical_addr(target, dest); 462 break; 463 case APIC_DEST_SELF: 464 result = (target == source); 465 break; 466 case APIC_DEST_ALLINC: 467 result = 1; 468 break; 469 case APIC_DEST_ALLBUT: 470 result = (target != source); 471 break; 472 default: 473 apic_debug("kvm: apic: Bad dest shorthand value %x\n", 474 short_hand); 475 break; 476 } 477 478 return result; 479 } 480 481 /* 482 * Add a pending IRQ into lapic. 483 * Return 1 if successfully added and 0 if discarded. 484 */ 485 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 486 int vector, int level, int trig_mode) 487 { 488 int result = 0; 489 struct kvm_vcpu *vcpu = apic->vcpu; 490 491 switch (delivery_mode) { 492 case APIC_DM_LOWEST: 493 vcpu->arch.apic_arb_prio++; 494 case APIC_DM_FIXED: 495 /* FIXME add logic for vcpu on reset */ 496 if (unlikely(!apic_enabled(apic))) 497 break; 498 499 if (trig_mode) { 500 apic_debug("level trig mode for vector %d", vector); 501 apic_set_vector(vector, apic->regs + APIC_TMR); 502 } else 503 apic_clear_vector(vector, apic->regs + APIC_TMR); 504 505 result = !apic_test_and_set_irr(vector, apic); 506 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 507 trig_mode, vector, !result); 508 if (!result) { 509 if (trig_mode) 510 apic_debug("level trig mode repeatedly for " 511 "vector %d", vector); 512 break; 513 } 514 515 kvm_make_request(KVM_REQ_EVENT, vcpu); 516 kvm_vcpu_kick(vcpu); 517 break; 518 519 case APIC_DM_REMRD: 520 apic_debug("Ignoring delivery mode 3\n"); 521 break; 522 523 case APIC_DM_SMI: 524 apic_debug("Ignoring guest SMI\n"); 525 break; 526 527 case APIC_DM_NMI: 528 result = 1; 529 kvm_inject_nmi(vcpu); 530 kvm_vcpu_kick(vcpu); 531 break; 532 533 case APIC_DM_INIT: 534 if (!trig_mode || level) { 535 result = 1; 536 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 537 kvm_make_request(KVM_REQ_EVENT, vcpu); 538 kvm_vcpu_kick(vcpu); 539 } else { 540 apic_debug("Ignoring de-assert INIT to vcpu %d\n", 541 vcpu->vcpu_id); 542 } 543 break; 544 545 case APIC_DM_STARTUP: 546 apic_debug("SIPI to vcpu %d vector 0x%02x\n", 547 vcpu->vcpu_id, vector); 548 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 549 result = 1; 550 vcpu->arch.sipi_vector = vector; 551 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; 552 kvm_make_request(KVM_REQ_EVENT, vcpu); 553 kvm_vcpu_kick(vcpu); 554 } 555 break; 556 557 case APIC_DM_EXTINT: 558 /* 559 * Should only be called by kvm_apic_local_deliver() with LVT0, 560 * before NMI watchdog was enabled. Already handled by 561 * kvm_apic_accept_pic_intr(). 562 */ 563 break; 564 565 default: 566 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 567 delivery_mode); 568 break; 569 } 570 return result; 571 } 572 573 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 574 { 575 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 576 } 577 578 static int apic_set_eoi(struct kvm_lapic *apic) 579 { 580 int vector = apic_find_highest_isr(apic); 581 582 trace_kvm_eoi(apic, vector); 583 584 /* 585 * Not every write EOI will has corresponding ISR, 586 * one example is when Kernel check timer on setup_IO_APIC 587 */ 588 if (vector == -1) 589 return vector; 590 591 apic_clear_isr(vector, apic); 592 apic_update_ppr(apic); 593 594 if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && 595 kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { 596 int trigger_mode; 597 if (apic_test_vector(vector, apic->regs + APIC_TMR)) 598 trigger_mode = IOAPIC_LEVEL_TRIG; 599 else 600 trigger_mode = IOAPIC_EDGE_TRIG; 601 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); 602 } 603 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 604 return vector; 605 } 606 607 static void apic_send_ipi(struct kvm_lapic *apic) 608 { 609 u32 icr_low = apic_get_reg(apic, APIC_ICR); 610 u32 icr_high = apic_get_reg(apic, APIC_ICR2); 611 struct kvm_lapic_irq irq; 612 613 irq.vector = icr_low & APIC_VECTOR_MASK; 614 irq.delivery_mode = icr_low & APIC_MODE_MASK; 615 irq.dest_mode = icr_low & APIC_DEST_MASK; 616 irq.level = icr_low & APIC_INT_ASSERT; 617 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 618 irq.shorthand = icr_low & APIC_SHORT_MASK; 619 if (apic_x2apic_mode(apic)) 620 irq.dest_id = icr_high; 621 else 622 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 623 624 trace_kvm_apic_ipi(icr_low, irq.dest_id); 625 626 apic_debug("icr_high 0x%x, icr_low 0x%x, " 627 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 628 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", 629 icr_high, icr_low, irq.shorthand, irq.dest_id, 630 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 631 irq.vector); 632 633 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); 634 } 635 636 static u32 apic_get_tmcct(struct kvm_lapic *apic) 637 { 638 ktime_t remaining; 639 s64 ns; 640 u32 tmcct; 641 642 ASSERT(apic != NULL); 643 644 /* if initial count is 0, current count should also be 0 */ 645 if (apic_get_reg(apic, APIC_TMICT) == 0) 646 return 0; 647 648 remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); 649 if (ktime_to_ns(remaining) < 0) 650 remaining = ktime_set(0, 0); 651 652 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 653 tmcct = div64_u64(ns, 654 (APIC_BUS_CYCLE_NS * apic->divide_count)); 655 656 return tmcct; 657 } 658 659 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 660 { 661 struct kvm_vcpu *vcpu = apic->vcpu; 662 struct kvm_run *run = vcpu->run; 663 664 kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); 665 run->tpr_access.rip = kvm_rip_read(vcpu); 666 run->tpr_access.is_write = write; 667 } 668 669 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 670 { 671 if (apic->vcpu->arch.tpr_access_reporting) 672 __report_tpr_access(apic, write); 673 } 674 675 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 676 { 677 u32 val = 0; 678 679 if (offset >= LAPIC_MMIO_LENGTH) 680 return 0; 681 682 switch (offset) { 683 case APIC_ID: 684 if (apic_x2apic_mode(apic)) 685 val = kvm_apic_id(apic); 686 else 687 val = kvm_apic_id(apic) << 24; 688 break; 689 case APIC_ARBPRI: 690 apic_debug("Access APIC ARBPRI register which is for P6\n"); 691 break; 692 693 case APIC_TMCCT: /* Timer CCR */ 694 if (apic_lvtt_tscdeadline(apic)) 695 return 0; 696 697 val = apic_get_tmcct(apic); 698 break; 699 case APIC_PROCPRI: 700 apic_update_ppr(apic); 701 val = apic_get_reg(apic, offset); 702 break; 703 case APIC_TASKPRI: 704 report_tpr_access(apic, false); 705 /* fall thru */ 706 default: 707 val = apic_get_reg(apic, offset); 708 break; 709 } 710 711 return val; 712 } 713 714 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 715 { 716 return container_of(dev, struct kvm_lapic, dev); 717 } 718 719 static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 720 void *data) 721 { 722 unsigned char alignment = offset & 0xf; 723 u32 result; 724 /* this bitmask has a bit cleared for each reserved register */ 725 static const u64 rmask = 0x43ff01ffffffe70cULL; 726 727 if ((alignment + len) > 4) { 728 apic_debug("KVM_APIC_READ: alignment error %x %d\n", 729 offset, len); 730 return 1; 731 } 732 733 if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { 734 apic_debug("KVM_APIC_READ: read reserved register %x\n", 735 offset); 736 return 1; 737 } 738 739 result = __apic_read(apic, offset & ~0xf); 740 741 trace_kvm_apic_read(offset, result); 742 743 switch (len) { 744 case 1: 745 case 2: 746 case 4: 747 memcpy(data, (char *)&result + alignment, len); 748 break; 749 default: 750 printk(KERN_ERR "Local APIC read with len = %x, " 751 "should be 1,2, or 4 instead\n", len); 752 break; 753 } 754 return 0; 755 } 756 757 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 758 { 759 return apic_hw_enabled(apic) && 760 addr >= apic->base_address && 761 addr < apic->base_address + LAPIC_MMIO_LENGTH; 762 } 763 764 static int apic_mmio_read(struct kvm_io_device *this, 765 gpa_t address, int len, void *data) 766 { 767 struct kvm_lapic *apic = to_lapic(this); 768 u32 offset = address - apic->base_address; 769 770 if (!apic_mmio_in_range(apic, address)) 771 return -EOPNOTSUPP; 772 773 apic_reg_read(apic, offset, len, data); 774 775 return 0; 776 } 777 778 static void update_divide_count(struct kvm_lapic *apic) 779 { 780 u32 tmp1, tmp2, tdcr; 781 782 tdcr = apic_get_reg(apic, APIC_TDCR); 783 tmp1 = tdcr & 0xf; 784 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 785 apic->divide_count = 0x1 << (tmp2 & 0x7); 786 787 apic_debug("timer divide count is 0x%x\n", 788 apic->divide_count); 789 } 790 791 static void start_apic_timer(struct kvm_lapic *apic) 792 { 793 ktime_t now; 794 atomic_set(&apic->lapic_timer.pending, 0); 795 796 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 797 /* lapic timer in oneshot or periodic mode */ 798 now = apic->lapic_timer.timer.base->get_time(); 799 apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) 800 * APIC_BUS_CYCLE_NS * apic->divide_count; 801 802 if (!apic->lapic_timer.period) 803 return; 804 /* 805 * Do not allow the guest to program periodic timers with small 806 * interval, since the hrtimers are not throttled by the host 807 * scheduler. 808 */ 809 if (apic_lvtt_period(apic)) { 810 s64 min_period = min_timer_period_us * 1000LL; 811 812 if (apic->lapic_timer.period < min_period) { 813 pr_info_ratelimited( 814 "kvm: vcpu %i: requested %lld ns " 815 "lapic timer period limited to %lld ns\n", 816 apic->vcpu->vcpu_id, 817 apic->lapic_timer.period, min_period); 818 apic->lapic_timer.period = min_period; 819 } 820 } 821 822 hrtimer_start(&apic->lapic_timer.timer, 823 ktime_add_ns(now, apic->lapic_timer.period), 824 HRTIMER_MODE_ABS); 825 826 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 827 PRIx64 ", " 828 "timer initial count 0x%x, period %lldns, " 829 "expire @ 0x%016" PRIx64 ".\n", __func__, 830 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 831 apic_get_reg(apic, APIC_TMICT), 832 apic->lapic_timer.period, 833 ktime_to_ns(ktime_add_ns(now, 834 apic->lapic_timer.period))); 835 } else if (apic_lvtt_tscdeadline(apic)) { 836 /* lapic timer in tsc deadline mode */ 837 u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; 838 u64 ns = 0; 839 struct kvm_vcpu *vcpu = apic->vcpu; 840 unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; 841 unsigned long flags; 842 843 if (unlikely(!tscdeadline || !this_tsc_khz)) 844 return; 845 846 local_irq_save(flags); 847 848 now = apic->lapic_timer.timer.base->get_time(); 849 guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); 850 if (likely(tscdeadline > guest_tsc)) { 851 ns = (tscdeadline - guest_tsc) * 1000000ULL; 852 do_div(ns, this_tsc_khz); 853 } 854 hrtimer_start(&apic->lapic_timer.timer, 855 ktime_add_ns(now, ns), HRTIMER_MODE_ABS); 856 857 local_irq_restore(flags); 858 } 859 } 860 861 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 862 { 863 int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); 864 865 if (apic_lvt_nmi_mode(lvt0_val)) { 866 if (!nmi_wd_enabled) { 867 apic_debug("Receive NMI setting on APIC_LVT0 " 868 "for cpu %d\n", apic->vcpu->vcpu_id); 869 apic->vcpu->kvm->arch.vapics_in_nmi_mode++; 870 } 871 } else if (nmi_wd_enabled) 872 apic->vcpu->kvm->arch.vapics_in_nmi_mode--; 873 } 874 875 static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 876 { 877 int ret = 0; 878 879 trace_kvm_apic_write(reg, val); 880 881 switch (reg) { 882 case APIC_ID: /* Local APIC ID */ 883 if (!apic_x2apic_mode(apic)) 884 apic_set_reg(apic, APIC_ID, val); 885 else 886 ret = 1; 887 break; 888 889 case APIC_TASKPRI: 890 report_tpr_access(apic, true); 891 apic_set_tpr(apic, val & 0xff); 892 break; 893 894 case APIC_EOI: 895 apic_set_eoi(apic); 896 break; 897 898 case APIC_LDR: 899 if (!apic_x2apic_mode(apic)) 900 apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK); 901 else 902 ret = 1; 903 break; 904 905 case APIC_DFR: 906 if (!apic_x2apic_mode(apic)) 907 apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 908 else 909 ret = 1; 910 break; 911 912 case APIC_SPIV: { 913 u32 mask = 0x3ff; 914 if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 915 mask |= APIC_SPIV_DIRECTED_EOI; 916 apic_set_reg(apic, APIC_SPIV, val & mask); 917 if (!(val & APIC_SPIV_APIC_ENABLED)) { 918 int i; 919 u32 lvt_val; 920 921 for (i = 0; i < APIC_LVT_NUM; i++) { 922 lvt_val = apic_get_reg(apic, 923 APIC_LVTT + 0x10 * i); 924 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 925 lvt_val | APIC_LVT_MASKED); 926 } 927 atomic_set(&apic->lapic_timer.pending, 0); 928 929 } 930 break; 931 } 932 case APIC_ICR: 933 /* No delay here, so we always clear the pending bit */ 934 apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); 935 apic_send_ipi(apic); 936 break; 937 938 case APIC_ICR2: 939 if (!apic_x2apic_mode(apic)) 940 val &= 0xff000000; 941 apic_set_reg(apic, APIC_ICR2, val); 942 break; 943 944 case APIC_LVT0: 945 apic_manage_nmi_watchdog(apic, val); 946 case APIC_LVTTHMR: 947 case APIC_LVTPC: 948 case APIC_LVT1: 949 case APIC_LVTERR: 950 /* TODO: Check vector */ 951 if (!apic_sw_enabled(apic)) 952 val |= APIC_LVT_MASKED; 953 954 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; 955 apic_set_reg(apic, reg, val); 956 957 break; 958 959 case APIC_LVTT: 960 if ((apic_get_reg(apic, APIC_LVTT) & 961 apic->lapic_timer.timer_mode_mask) != 962 (val & apic->lapic_timer.timer_mode_mask)) 963 hrtimer_cancel(&apic->lapic_timer.timer); 964 965 if (!apic_sw_enabled(apic)) 966 val |= APIC_LVT_MASKED; 967 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 968 apic_set_reg(apic, APIC_LVTT, val); 969 break; 970 971 case APIC_TMICT: 972 if (apic_lvtt_tscdeadline(apic)) 973 break; 974 975 hrtimer_cancel(&apic->lapic_timer.timer); 976 apic_set_reg(apic, APIC_TMICT, val); 977 start_apic_timer(apic); 978 break; 979 980 case APIC_TDCR: 981 if (val & 4) 982 apic_debug("KVM_WRITE:TDCR %x\n", val); 983 apic_set_reg(apic, APIC_TDCR, val); 984 update_divide_count(apic); 985 break; 986 987 case APIC_ESR: 988 if (apic_x2apic_mode(apic) && val != 0) { 989 apic_debug("KVM_WRITE:ESR not zero %x\n", val); 990 ret = 1; 991 } 992 break; 993 994 case APIC_SELF_IPI: 995 if (apic_x2apic_mode(apic)) { 996 apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 997 } else 998 ret = 1; 999 break; 1000 default: 1001 ret = 1; 1002 break; 1003 } 1004 if (ret) 1005 apic_debug("Local APIC Write to read-only register %x\n", reg); 1006 return ret; 1007 } 1008 1009 static int apic_mmio_write(struct kvm_io_device *this, 1010 gpa_t address, int len, const void *data) 1011 { 1012 struct kvm_lapic *apic = to_lapic(this); 1013 unsigned int offset = address - apic->base_address; 1014 u32 val; 1015 1016 if (!apic_mmio_in_range(apic, address)) 1017 return -EOPNOTSUPP; 1018 1019 /* 1020 * APIC register must be aligned on 128-bits boundary. 1021 * 32/64/128 bits registers must be accessed thru 32 bits. 1022 * Refer SDM 8.4.1 1023 */ 1024 if (len != 4 || (offset & 0xf)) { 1025 /* Don't shout loud, $infamous_os would cause only noise. */ 1026 apic_debug("apic write: bad size=%d %lx\n", len, (long)address); 1027 return 0; 1028 } 1029 1030 val = *(u32*)data; 1031 1032 /* too common printing */ 1033 if (offset != APIC_EOI) 1034 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 1035 "0x%x\n", __func__, offset, len, val); 1036 1037 apic_reg_write(apic, offset & 0xff0, val); 1038 1039 return 0; 1040 } 1041 1042 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 1043 { 1044 struct kvm_lapic *apic = vcpu->arch.apic; 1045 1046 if (apic) 1047 apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 1048 } 1049 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1050 1051 void kvm_free_lapic(struct kvm_vcpu *vcpu) 1052 { 1053 if (!vcpu->arch.apic) 1054 return; 1055 1056 hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); 1057 1058 if (vcpu->arch.apic->regs) 1059 free_page((unsigned long)vcpu->arch.apic->regs); 1060 1061 kfree(vcpu->arch.apic); 1062 } 1063 1064 /* 1065 *---------------------------------------------------------------------- 1066 * LAPIC interface 1067 *---------------------------------------------------------------------- 1068 */ 1069 1070 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) 1071 { 1072 struct kvm_lapic *apic = vcpu->arch.apic; 1073 if (!apic) 1074 return 0; 1075 1076 if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) 1077 return 0; 1078 1079 return apic->lapic_timer.tscdeadline; 1080 } 1081 1082 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) 1083 { 1084 struct kvm_lapic *apic = vcpu->arch.apic; 1085 if (!apic) 1086 return; 1087 1088 if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) 1089 return; 1090 1091 hrtimer_cancel(&apic->lapic_timer.timer); 1092 apic->lapic_timer.tscdeadline = data; 1093 start_apic_timer(apic); 1094 } 1095 1096 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 1097 { 1098 struct kvm_lapic *apic = vcpu->arch.apic; 1099 1100 if (!apic) 1101 return; 1102 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 1103 | (apic_get_reg(apic, APIC_TASKPRI) & 4)); 1104 } 1105 1106 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 1107 { 1108 struct kvm_lapic *apic = vcpu->arch.apic; 1109 u64 tpr; 1110 1111 if (!apic) 1112 return 0; 1113 tpr = (u64) apic_get_reg(apic, APIC_TASKPRI); 1114 1115 return (tpr & 0xf0) >> 4; 1116 } 1117 1118 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 1119 { 1120 struct kvm_lapic *apic = vcpu->arch.apic; 1121 1122 if (!apic) { 1123 value |= MSR_IA32_APICBASE_BSP; 1124 vcpu->arch.apic_base = value; 1125 return; 1126 } 1127 1128 if (!kvm_vcpu_is_bsp(apic->vcpu)) 1129 value &= ~MSR_IA32_APICBASE_BSP; 1130 1131 vcpu->arch.apic_base = value; 1132 if (apic_x2apic_mode(apic)) { 1133 u32 id = kvm_apic_id(apic); 1134 u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf)); 1135 apic_set_reg(apic, APIC_LDR, ldr); 1136 } 1137 apic->base_address = apic->vcpu->arch.apic_base & 1138 MSR_IA32_APICBASE_BASE; 1139 1140 /* with FSB delivery interrupt, we can restart APIC functionality */ 1141 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 1142 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 1143 1144 } 1145 1146 void kvm_lapic_reset(struct kvm_vcpu *vcpu) 1147 { 1148 struct kvm_lapic *apic; 1149 int i; 1150 1151 apic_debug("%s\n", __func__); 1152 1153 ASSERT(vcpu); 1154 apic = vcpu->arch.apic; 1155 ASSERT(apic != NULL); 1156 1157 /* Stop the timer in case it's a reset to an active apic */ 1158 hrtimer_cancel(&apic->lapic_timer.timer); 1159 1160 apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); 1161 kvm_apic_set_version(apic->vcpu); 1162 1163 for (i = 0; i < APIC_LVT_NUM; i++) 1164 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 1165 apic_set_reg(apic, APIC_LVT0, 1166 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 1167 1168 apic_set_reg(apic, APIC_DFR, 0xffffffffU); 1169 apic_set_reg(apic, APIC_SPIV, 0xff); 1170 apic_set_reg(apic, APIC_TASKPRI, 0); 1171 apic_set_reg(apic, APIC_LDR, 0); 1172 apic_set_reg(apic, APIC_ESR, 0); 1173 apic_set_reg(apic, APIC_ICR, 0); 1174 apic_set_reg(apic, APIC_ICR2, 0); 1175 apic_set_reg(apic, APIC_TDCR, 0); 1176 apic_set_reg(apic, APIC_TMICT, 0); 1177 for (i = 0; i < 8; i++) { 1178 apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 1179 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1180 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1181 } 1182 apic->irr_pending = false; 1183 apic->isr_count = 0; 1184 apic->highest_isr_cache = -1; 1185 update_divide_count(apic); 1186 atomic_set(&apic->lapic_timer.pending, 0); 1187 if (kvm_vcpu_is_bsp(vcpu)) 1188 vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; 1189 vcpu->arch.pv_eoi.msr_val = 0; 1190 apic_update_ppr(apic); 1191 1192 vcpu->arch.apic_arb_prio = 0; 1193 vcpu->arch.apic_attention = 0; 1194 1195 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 1196 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 1197 vcpu, kvm_apic_id(apic), 1198 vcpu->arch.apic_base, apic->base_address); 1199 } 1200 1201 bool kvm_apic_present(struct kvm_vcpu *vcpu) 1202 { 1203 return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic); 1204 } 1205 1206 int kvm_lapic_enabled(struct kvm_vcpu *vcpu) 1207 { 1208 return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic); 1209 } 1210 1211 /* 1212 *---------------------------------------------------------------------- 1213 * timer interface 1214 *---------------------------------------------------------------------- 1215 */ 1216 1217 static bool lapic_is_periodic(struct kvm_timer *ktimer) 1218 { 1219 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, 1220 lapic_timer); 1221 return apic_lvtt_period(apic); 1222 } 1223 1224 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 1225 { 1226 struct kvm_lapic *lapic = vcpu->arch.apic; 1227 1228 if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) 1229 return atomic_read(&lapic->lapic_timer.pending); 1230 1231 return 0; 1232 } 1233 1234 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 1235 { 1236 u32 reg = apic_get_reg(apic, lvt_type); 1237 int vector, mode, trig_mode; 1238 1239 if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 1240 vector = reg & APIC_VECTOR_MASK; 1241 mode = reg & APIC_MODE_MASK; 1242 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 1243 return __apic_accept_irq(apic, mode, vector, 1, trig_mode); 1244 } 1245 return 0; 1246 } 1247 1248 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 1249 { 1250 struct kvm_lapic *apic = vcpu->arch.apic; 1251 1252 if (apic) 1253 kvm_apic_local_deliver(apic, APIC_LVT0); 1254 } 1255 1256 static struct kvm_timer_ops lapic_timer_ops = { 1257 .is_periodic = lapic_is_periodic, 1258 }; 1259 1260 static const struct kvm_io_device_ops apic_mmio_ops = { 1261 .read = apic_mmio_read, 1262 .write = apic_mmio_write, 1263 }; 1264 1265 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) 1266 { 1267 struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); 1268 struct kvm_vcpu *vcpu = ktimer->vcpu; 1269 wait_queue_head_t *q = &vcpu->wq; 1270 1271 /* 1272 * There is a race window between reading and incrementing, but we do 1273 * not care about potentially losing timer events in the !reinject 1274 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked 1275 * in vcpu_enter_guest. 1276 */ 1277 if (ktimer->reinject || !atomic_read(&ktimer->pending)) { 1278 atomic_inc(&ktimer->pending); 1279 /* FIXME: this code should not know anything about vcpus */ 1280 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); 1281 } 1282 1283 if (waitqueue_active(q)) 1284 wake_up_interruptible(q); 1285 1286 if (ktimer->t_ops->is_periodic(ktimer)) { 1287 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); 1288 return HRTIMER_RESTART; 1289 } else 1290 return HRTIMER_NORESTART; 1291 } 1292 1293 int kvm_create_lapic(struct kvm_vcpu *vcpu) 1294 { 1295 struct kvm_lapic *apic; 1296 1297 ASSERT(vcpu != NULL); 1298 apic_debug("apic_init %d\n", vcpu->vcpu_id); 1299 1300 apic = kzalloc(sizeof(*apic), GFP_KERNEL); 1301 if (!apic) 1302 goto nomem; 1303 1304 vcpu->arch.apic = apic; 1305 1306 apic->regs = (void *)get_zeroed_page(GFP_KERNEL); 1307 if (!apic->regs) { 1308 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1309 vcpu->vcpu_id); 1310 goto nomem_free_apic; 1311 } 1312 apic->vcpu = vcpu; 1313 1314 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1315 HRTIMER_MODE_ABS); 1316 apic->lapic_timer.timer.function = apic_timer_fn; 1317 apic->lapic_timer.t_ops = &lapic_timer_ops; 1318 apic->lapic_timer.kvm = vcpu->kvm; 1319 apic->lapic_timer.vcpu = vcpu; 1320 1321 apic->base_address = APIC_DEFAULT_PHYS_BASE; 1322 vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; 1323 1324 kvm_lapic_reset(vcpu); 1325 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 1326 1327 return 0; 1328 nomem_free_apic: 1329 kfree(apic); 1330 nomem: 1331 return -ENOMEM; 1332 } 1333 1334 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 1335 { 1336 struct kvm_lapic *apic = vcpu->arch.apic; 1337 int highest_irr; 1338 1339 if (!apic || !apic_enabled(apic)) 1340 return -1; 1341 1342 apic_update_ppr(apic); 1343 highest_irr = apic_find_highest_irr(apic); 1344 if ((highest_irr == -1) || 1345 ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) 1346 return -1; 1347 return highest_irr; 1348 } 1349 1350 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 1351 { 1352 u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1353 int r = 0; 1354 1355 if (!apic_hw_enabled(vcpu->arch.apic)) 1356 r = 1; 1357 if ((lvt0 & APIC_LVT_MASKED) == 0 && 1358 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 1359 r = 1; 1360 return r; 1361 } 1362 1363 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 1364 { 1365 struct kvm_lapic *apic = vcpu->arch.apic; 1366 1367 if (apic && atomic_read(&apic->lapic_timer.pending) > 0) { 1368 if (kvm_apic_local_deliver(apic, APIC_LVTT)) 1369 atomic_dec(&apic->lapic_timer.pending); 1370 } 1371 } 1372 1373 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1374 { 1375 int vector = kvm_apic_has_interrupt(vcpu); 1376 struct kvm_lapic *apic = vcpu->arch.apic; 1377 1378 if (vector == -1) 1379 return -1; 1380 1381 apic_set_isr(vector, apic); 1382 apic_update_ppr(apic); 1383 apic_clear_irr(vector, apic); 1384 return vector; 1385 } 1386 1387 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) 1388 { 1389 struct kvm_lapic *apic = vcpu->arch.apic; 1390 1391 apic->base_address = vcpu->arch.apic_base & 1392 MSR_IA32_APICBASE_BASE; 1393 kvm_apic_set_version(vcpu); 1394 1395 apic_update_ppr(apic); 1396 hrtimer_cancel(&apic->lapic_timer.timer); 1397 update_divide_count(apic); 1398 start_apic_timer(apic); 1399 apic->irr_pending = true; 1400 apic->isr_count = count_vectors(apic->regs + APIC_ISR); 1401 apic->highest_isr_cache = -1; 1402 kvm_make_request(KVM_REQ_EVENT, vcpu); 1403 } 1404 1405 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 1406 { 1407 struct kvm_lapic *apic = vcpu->arch.apic; 1408 struct hrtimer *timer; 1409 1410 if (!apic) 1411 return; 1412 1413 timer = &apic->lapic_timer.timer; 1414 if (hrtimer_cancel(timer)) 1415 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 1416 } 1417 1418 /* 1419 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt 1420 * 1421 * Detect whether guest triggered PV EOI since the 1422 * last entry. If yes, set EOI on guests's behalf. 1423 * Clear PV EOI in guest memory in any case. 1424 */ 1425 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, 1426 struct kvm_lapic *apic) 1427 { 1428 bool pending; 1429 int vector; 1430 /* 1431 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host 1432 * and KVM_PV_EOI_ENABLED in guest memory as follows: 1433 * 1434 * KVM_APIC_PV_EOI_PENDING is unset: 1435 * -> host disabled PV EOI. 1436 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set: 1437 * -> host enabled PV EOI, guest did not execute EOI yet. 1438 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset: 1439 * -> host enabled PV EOI, guest executed EOI. 1440 */ 1441 BUG_ON(!pv_eoi_enabled(vcpu)); 1442 pending = pv_eoi_get_pending(vcpu); 1443 /* 1444 * Clear pending bit in any case: it will be set again on vmentry. 1445 * While this might not be ideal from performance point of view, 1446 * this makes sure pv eoi is only enabled when we know it's safe. 1447 */ 1448 pv_eoi_clr_pending(vcpu); 1449 if (pending) 1450 return; 1451 vector = apic_set_eoi(apic); 1452 trace_kvm_pv_eoi(apic, vector); 1453 } 1454 1455 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 1456 { 1457 u32 data; 1458 void *vapic; 1459 1460 if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) 1461 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); 1462 1463 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1464 return; 1465 1466 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1467 data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); 1468 kunmap_atomic(vapic); 1469 1470 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1471 } 1472 1473 /* 1474 * apic_sync_pv_eoi_to_guest - called before vmentry 1475 * 1476 * Detect whether it's safe to enable PV EOI and 1477 * if yes do so. 1478 */ 1479 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, 1480 struct kvm_lapic *apic) 1481 { 1482 if (!pv_eoi_enabled(vcpu) || 1483 /* IRR set or many bits in ISR: could be nested. */ 1484 apic->irr_pending || 1485 /* Cache not set: could be safe but we don't bother. */ 1486 apic->highest_isr_cache == -1 || 1487 /* Need EOI to update ioapic. */ 1488 kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { 1489 /* 1490 * PV EOI was disabled by apic_sync_pv_eoi_from_guest 1491 * so we need not do anything here. 1492 */ 1493 return; 1494 } 1495 1496 pv_eoi_set_pending(apic->vcpu); 1497 } 1498 1499 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 1500 { 1501 u32 data, tpr; 1502 int max_irr, max_isr; 1503 struct kvm_lapic *apic = vcpu->arch.apic; 1504 void *vapic; 1505 1506 apic_sync_pv_eoi_to_guest(vcpu, apic); 1507 1508 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 1509 return; 1510 1511 tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; 1512 max_irr = apic_find_highest_irr(apic); 1513 if (max_irr < 0) 1514 max_irr = 0; 1515 max_isr = apic_find_highest_isr(apic); 1516 if (max_isr < 0) 1517 max_isr = 0; 1518 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 1519 1520 vapic = kmap_atomic(vcpu->arch.apic->vapic_page); 1521 *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; 1522 kunmap_atomic(vapic); 1523 } 1524 1525 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1526 { 1527 vcpu->arch.apic->vapic_addr = vapic_addr; 1528 if (vapic_addr) 1529 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1530 else 1531 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); 1532 } 1533 1534 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1535 { 1536 struct kvm_lapic *apic = vcpu->arch.apic; 1537 u32 reg = (msr - APIC_BASE_MSR) << 4; 1538 1539 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1540 return 1; 1541 1542 /* if this is ICR write vector before command */ 1543 if (msr == 0x830) 1544 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1545 return apic_reg_write(apic, reg, (u32)data); 1546 } 1547 1548 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 1549 { 1550 struct kvm_lapic *apic = vcpu->arch.apic; 1551 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 1552 1553 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1554 return 1; 1555 1556 if (apic_reg_read(apic, reg, 4, &low)) 1557 return 1; 1558 if (msr == 0x830) 1559 apic_reg_read(apic, APIC_ICR2, 4, &high); 1560 1561 *data = (((u64)high) << 32) | low; 1562 1563 return 0; 1564 } 1565 1566 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) 1567 { 1568 struct kvm_lapic *apic = vcpu->arch.apic; 1569 1570 if (!irqchip_in_kernel(vcpu->kvm)) 1571 return 1; 1572 1573 /* if this is ICR write vector before command */ 1574 if (reg == APIC_ICR) 1575 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1576 return apic_reg_write(apic, reg, (u32)data); 1577 } 1578 1579 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 1580 { 1581 struct kvm_lapic *apic = vcpu->arch.apic; 1582 u32 low, high = 0; 1583 1584 if (!irqchip_in_kernel(vcpu->kvm)) 1585 return 1; 1586 1587 if (apic_reg_read(apic, reg, 4, &low)) 1588 return 1; 1589 if (reg == APIC_ICR) 1590 apic_reg_read(apic, APIC_ICR2, 4, &high); 1591 1592 *data = (((u64)high) << 32) | low; 1593 1594 return 0; 1595 } 1596 1597 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) 1598 { 1599 u64 addr = data & ~KVM_MSR_ENABLED; 1600 if (!IS_ALIGNED(addr, 4)) 1601 return 1; 1602 1603 vcpu->arch.pv_eoi.msr_val = data; 1604 if (!pv_eoi_enabled(vcpu)) 1605 return 0; 1606 return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, 1607 addr); 1608 } 1609