1 2 /* 3 * Local APIC virtualization 4 * 5 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2007 Novell 7 * Copyright (C) 2007 Intel 8 * 9 * Authors: 10 * Dor Laor <dor.laor@qumranet.com> 11 * Gregory Haskins <ghaskins@novell.com> 12 * Yaozu (Eddie) Dong <eddie.dong@intel.com> 13 * 14 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. 15 * 16 * This work is licensed under the terms of the GNU GPL, version 2. See 17 * the COPYING file in the top-level directory. 18 */ 19 20 #include <linux/kvm_host.h> 21 #include <linux/kvm.h> 22 #include <linux/mm.h> 23 #include <linux/highmem.h> 24 #include <linux/smp.h> 25 #include <linux/hrtimer.h> 26 #include <linux/io.h> 27 #include <linux/module.h> 28 #include <linux/math64.h> 29 #include <asm/processor.h> 30 #include <asm/msr.h> 31 #include <asm/page.h> 32 #include <asm/current.h> 33 #include <asm/apicdef.h> 34 #include <asm/atomic.h> 35 #include <asm/apicdef.h> 36 #include "kvm_cache_regs.h" 37 #include "irq.h" 38 #include "trace.h" 39 #include "x86.h" 40 41 #ifndef CONFIG_X86_64 42 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) 43 #else 44 #define mod_64(x, y) ((x) % (y)) 45 #endif 46 47 #define PRId64 "d" 48 #define PRIx64 "llx" 49 #define PRIu64 "u" 50 #define PRIo64 "o" 51 52 #define APIC_BUS_CYCLE_NS 1 53 54 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 55 #define apic_debug(fmt, arg...) 56 57 #define APIC_LVT_NUM 6 58 /* 14 is the version for Xeon and Pentium 8.4.8*/ 59 #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) 60 #define LAPIC_MMIO_LENGTH (1 << 12) 61 /* followed define is not in apicdef.h */ 62 #define APIC_SHORT_MASK 0xc0000 63 #define APIC_DEST_NOSHORT 0x0 64 #define APIC_DEST_MASK 0x800 65 #define MAX_APIC_VECTOR 256 66 67 #define VEC_POS(v) ((v) & (32 - 1)) 68 #define REG_POS(v) (((v) >> 5) << 4) 69 70 static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) 71 { 72 return *((u32 *) (apic->regs + reg_off)); 73 } 74 75 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) 76 { 77 *((u32 *) (apic->regs + reg_off)) = val; 78 } 79 80 static inline int apic_test_and_set_vector(int vec, void *bitmap) 81 { 82 return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 83 } 84 85 static inline int apic_test_and_clear_vector(int vec, void *bitmap) 86 { 87 return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 88 } 89 90 static inline void apic_set_vector(int vec, void *bitmap) 91 { 92 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 93 } 94 95 static inline void apic_clear_vector(int vec, void *bitmap) 96 { 97 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 98 } 99 100 static inline int apic_hw_enabled(struct kvm_lapic *apic) 101 { 102 return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; 103 } 104 105 static inline int apic_sw_enabled(struct kvm_lapic *apic) 106 { 107 return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; 108 } 109 110 static inline int apic_enabled(struct kvm_lapic *apic) 111 { 112 return apic_sw_enabled(apic) && apic_hw_enabled(apic); 113 } 114 115 #define LVT_MASK \ 116 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) 117 118 #define LINT_MASK \ 119 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ 120 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) 121 122 static inline int kvm_apic_id(struct kvm_lapic *apic) 123 { 124 return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 125 } 126 127 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 128 { 129 return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 130 } 131 132 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) 133 { 134 return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; 135 } 136 137 static inline int apic_lvtt_period(struct kvm_lapic *apic) 138 { 139 return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; 140 } 141 142 static inline int apic_lvt_nmi_mode(u32 lvt_val) 143 { 144 return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; 145 } 146 147 void kvm_apic_set_version(struct kvm_vcpu *vcpu) 148 { 149 struct kvm_lapic *apic = vcpu->arch.apic; 150 struct kvm_cpuid_entry2 *feat; 151 u32 v = APIC_VERSION; 152 153 if (!irqchip_in_kernel(vcpu->kvm)) 154 return; 155 156 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 157 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) 158 v |= APIC_LVR_DIRECTED_EOI; 159 apic_set_reg(apic, APIC_LVR, v); 160 } 161 162 static inline int apic_x2apic_mode(struct kvm_lapic *apic) 163 { 164 return apic->vcpu->arch.apic_base & X2APIC_ENABLE; 165 } 166 167 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 168 LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ 169 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 170 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 171 LINT_MASK, LINT_MASK, /* LVT0-1 */ 172 LVT_MASK /* LVTERR */ 173 }; 174 175 static int find_highest_vector(void *bitmap) 176 { 177 u32 *word = bitmap; 178 int word_offset = MAX_APIC_VECTOR >> 5; 179 180 while ((word_offset != 0) && (word[(--word_offset) << 2] == 0)) 181 continue; 182 183 if (likely(!word_offset && !word[0])) 184 return -1; 185 else 186 return fls(word[word_offset << 2]) - 1 + (word_offset << 5); 187 } 188 189 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) 190 { 191 apic->irr_pending = true; 192 return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); 193 } 194 195 static inline int apic_search_irr(struct kvm_lapic *apic) 196 { 197 return find_highest_vector(apic->regs + APIC_IRR); 198 } 199 200 static inline int apic_find_highest_irr(struct kvm_lapic *apic) 201 { 202 int result; 203 204 if (!apic->irr_pending) 205 return -1; 206 207 result = apic_search_irr(apic); 208 ASSERT(result == -1 || result >= 16); 209 210 return result; 211 } 212 213 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) 214 { 215 apic->irr_pending = false; 216 apic_clear_vector(vec, apic->regs + APIC_IRR); 217 if (apic_search_irr(apic) != -1) 218 apic->irr_pending = true; 219 } 220 221 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) 222 { 223 struct kvm_lapic *apic = vcpu->arch.apic; 224 int highest_irr; 225 226 /* This may race with setting of irr in __apic_accept_irq() and 227 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq 228 * will cause vmexit immediately and the value will be recalculated 229 * on the next vmentry. 230 */ 231 if (!apic) 232 return 0; 233 highest_irr = apic_find_highest_irr(apic); 234 235 return highest_irr; 236 } 237 238 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 239 int vector, int level, int trig_mode); 240 241 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 242 { 243 struct kvm_lapic *apic = vcpu->arch.apic; 244 245 return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, 246 irq->level, irq->trig_mode); 247 } 248 249 static inline int apic_find_highest_isr(struct kvm_lapic *apic) 250 { 251 int result; 252 253 result = find_highest_vector(apic->regs + APIC_ISR); 254 ASSERT(result == -1 || result >= 16); 255 256 return result; 257 } 258 259 static void apic_update_ppr(struct kvm_lapic *apic) 260 { 261 u32 tpr, isrv, ppr; 262 int isr; 263 264 tpr = apic_get_reg(apic, APIC_TASKPRI); 265 isr = apic_find_highest_isr(apic); 266 isrv = (isr != -1) ? isr : 0; 267 268 if ((tpr & 0xf0) >= (isrv & 0xf0)) 269 ppr = tpr & 0xff; 270 else 271 ppr = isrv & 0xf0; 272 273 apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", 274 apic, ppr, isr, isrv); 275 276 apic_set_reg(apic, APIC_PROCPRI, ppr); 277 } 278 279 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 280 { 281 apic_set_reg(apic, APIC_TASKPRI, tpr); 282 apic_update_ppr(apic); 283 } 284 285 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 286 { 287 return dest == 0xff || kvm_apic_id(apic) == dest; 288 } 289 290 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 291 { 292 int result = 0; 293 u32 logical_id; 294 295 if (apic_x2apic_mode(apic)) { 296 logical_id = apic_get_reg(apic, APIC_LDR); 297 return logical_id & mda; 298 } 299 300 logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR)); 301 302 switch (apic_get_reg(apic, APIC_DFR)) { 303 case APIC_DFR_FLAT: 304 if (logical_id & mda) 305 result = 1; 306 break; 307 case APIC_DFR_CLUSTER: 308 if (((logical_id >> 4) == (mda >> 0x4)) 309 && (logical_id & mda & 0xf)) 310 result = 1; 311 break; 312 default: 313 printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n", 314 apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); 315 break; 316 } 317 318 return result; 319 } 320 321 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 322 int short_hand, int dest, int dest_mode) 323 { 324 int result = 0; 325 struct kvm_lapic *target = vcpu->arch.apic; 326 327 apic_debug("target %p, source %p, dest 0x%x, " 328 "dest_mode 0x%x, short_hand 0x%x\n", 329 target, source, dest, dest_mode, short_hand); 330 331 ASSERT(!target); 332 switch (short_hand) { 333 case APIC_DEST_NOSHORT: 334 if (dest_mode == 0) 335 /* Physical mode. */ 336 result = kvm_apic_match_physical_addr(target, dest); 337 else 338 /* Logical mode. */ 339 result = kvm_apic_match_logical_addr(target, dest); 340 break; 341 case APIC_DEST_SELF: 342 result = (target == source); 343 break; 344 case APIC_DEST_ALLINC: 345 result = 1; 346 break; 347 case APIC_DEST_ALLBUT: 348 result = (target != source); 349 break; 350 default: 351 printk(KERN_WARNING "Bad dest shorthand value %x\n", 352 short_hand); 353 break; 354 } 355 356 return result; 357 } 358 359 /* 360 * Add a pending IRQ into lapic. 361 * Return 1 if successfully added and 0 if discarded. 362 */ 363 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 364 int vector, int level, int trig_mode) 365 { 366 int result = 0; 367 struct kvm_vcpu *vcpu = apic->vcpu; 368 369 switch (delivery_mode) { 370 case APIC_DM_LOWEST: 371 vcpu->arch.apic_arb_prio++; 372 case APIC_DM_FIXED: 373 /* FIXME add logic for vcpu on reset */ 374 if (unlikely(!apic_enabled(apic))) 375 break; 376 377 result = !apic_test_and_set_irr(vector, apic); 378 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 379 trig_mode, vector, !result); 380 if (!result) { 381 if (trig_mode) 382 apic_debug("level trig mode repeatedly for " 383 "vector %d", vector); 384 break; 385 } 386 387 if (trig_mode) { 388 apic_debug("level trig mode for vector %d", vector); 389 apic_set_vector(vector, apic->regs + APIC_TMR); 390 } else 391 apic_clear_vector(vector, apic->regs + APIC_TMR); 392 kvm_vcpu_kick(vcpu); 393 break; 394 395 case APIC_DM_REMRD: 396 printk(KERN_DEBUG "Ignoring delivery mode 3\n"); 397 break; 398 399 case APIC_DM_SMI: 400 printk(KERN_DEBUG "Ignoring guest SMI\n"); 401 break; 402 403 case APIC_DM_NMI: 404 result = 1; 405 kvm_inject_nmi(vcpu); 406 kvm_vcpu_kick(vcpu); 407 break; 408 409 case APIC_DM_INIT: 410 if (level) { 411 result = 1; 412 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 413 printk(KERN_DEBUG 414 "INIT on a runnable vcpu %d\n", 415 vcpu->vcpu_id); 416 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; 417 kvm_vcpu_kick(vcpu); 418 } else { 419 apic_debug("Ignoring de-assert INIT to vcpu %d\n", 420 vcpu->vcpu_id); 421 } 422 break; 423 424 case APIC_DM_STARTUP: 425 apic_debug("SIPI to vcpu %d vector 0x%02x\n", 426 vcpu->vcpu_id, vector); 427 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { 428 result = 1; 429 vcpu->arch.sipi_vector = vector; 430 vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; 431 kvm_vcpu_kick(vcpu); 432 } 433 break; 434 435 case APIC_DM_EXTINT: 436 /* 437 * Should only be called by kvm_apic_local_deliver() with LVT0, 438 * before NMI watchdog was enabled. Already handled by 439 * kvm_apic_accept_pic_intr(). 440 */ 441 break; 442 443 default: 444 printk(KERN_ERR "TODO: unsupported delivery mode %x\n", 445 delivery_mode); 446 break; 447 } 448 return result; 449 } 450 451 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 452 { 453 return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; 454 } 455 456 static void apic_set_eoi(struct kvm_lapic *apic) 457 { 458 int vector = apic_find_highest_isr(apic); 459 int trigger_mode; 460 /* 461 * Not every write EOI will has corresponding ISR, 462 * one example is when Kernel check timer on setup_IO_APIC 463 */ 464 if (vector == -1) 465 return; 466 467 apic_clear_vector(vector, apic->regs + APIC_ISR); 468 apic_update_ppr(apic); 469 470 if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) 471 trigger_mode = IOAPIC_LEVEL_TRIG; 472 else 473 trigger_mode = IOAPIC_EDGE_TRIG; 474 if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) { 475 mutex_lock(&apic->vcpu->kvm->irq_lock); 476 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); 477 mutex_unlock(&apic->vcpu->kvm->irq_lock); 478 } 479 } 480 481 static void apic_send_ipi(struct kvm_lapic *apic) 482 { 483 u32 icr_low = apic_get_reg(apic, APIC_ICR); 484 u32 icr_high = apic_get_reg(apic, APIC_ICR2); 485 struct kvm_lapic_irq irq; 486 487 irq.vector = icr_low & APIC_VECTOR_MASK; 488 irq.delivery_mode = icr_low & APIC_MODE_MASK; 489 irq.dest_mode = icr_low & APIC_DEST_MASK; 490 irq.level = icr_low & APIC_INT_ASSERT; 491 irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; 492 irq.shorthand = icr_low & APIC_SHORT_MASK; 493 if (apic_x2apic_mode(apic)) 494 irq.dest_id = icr_high; 495 else 496 irq.dest_id = GET_APIC_DEST_FIELD(icr_high); 497 498 trace_kvm_apic_ipi(icr_low, irq.dest_id); 499 500 apic_debug("icr_high 0x%x, icr_low 0x%x, " 501 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " 502 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", 503 icr_high, icr_low, irq.shorthand, irq.dest_id, 504 irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, 505 irq.vector); 506 507 mutex_lock(&apic->vcpu->kvm->irq_lock); 508 kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); 509 mutex_unlock(&apic->vcpu->kvm->irq_lock); 510 } 511 512 static u32 apic_get_tmcct(struct kvm_lapic *apic) 513 { 514 ktime_t remaining; 515 s64 ns; 516 u32 tmcct; 517 518 ASSERT(apic != NULL); 519 520 /* if initial count is 0, current count should also be 0 */ 521 if (apic_get_reg(apic, APIC_TMICT) == 0) 522 return 0; 523 524 remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); 525 if (ktime_to_ns(remaining) < 0) 526 remaining = ktime_set(0, 0); 527 528 ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); 529 tmcct = div64_u64(ns, 530 (APIC_BUS_CYCLE_NS * apic->divide_count)); 531 532 return tmcct; 533 } 534 535 static void __report_tpr_access(struct kvm_lapic *apic, bool write) 536 { 537 struct kvm_vcpu *vcpu = apic->vcpu; 538 struct kvm_run *run = vcpu->run; 539 540 set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); 541 run->tpr_access.rip = kvm_rip_read(vcpu); 542 run->tpr_access.is_write = write; 543 } 544 545 static inline void report_tpr_access(struct kvm_lapic *apic, bool write) 546 { 547 if (apic->vcpu->arch.tpr_access_reporting) 548 __report_tpr_access(apic, write); 549 } 550 551 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) 552 { 553 u32 val = 0; 554 555 if (offset >= LAPIC_MMIO_LENGTH) 556 return 0; 557 558 switch (offset) { 559 case APIC_ID: 560 if (apic_x2apic_mode(apic)) 561 val = kvm_apic_id(apic); 562 else 563 val = kvm_apic_id(apic) << 24; 564 break; 565 case APIC_ARBPRI: 566 printk(KERN_WARNING "Access APIC ARBPRI register " 567 "which is for P6\n"); 568 break; 569 570 case APIC_TMCCT: /* Timer CCR */ 571 val = apic_get_tmcct(apic); 572 break; 573 574 case APIC_TASKPRI: 575 report_tpr_access(apic, false); 576 /* fall thru */ 577 default: 578 apic_update_ppr(apic); 579 val = apic_get_reg(apic, offset); 580 break; 581 } 582 583 return val; 584 } 585 586 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) 587 { 588 return container_of(dev, struct kvm_lapic, dev); 589 } 590 591 static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 592 void *data) 593 { 594 unsigned char alignment = offset & 0xf; 595 u32 result; 596 /* this bitmask has a bit cleared for each reserver register */ 597 static const u64 rmask = 0x43ff01ffffffe70cULL; 598 599 if ((alignment + len) > 4) { 600 apic_debug("KVM_APIC_READ: alignment error %x %d\n", 601 offset, len); 602 return 1; 603 } 604 605 if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { 606 apic_debug("KVM_APIC_READ: read reserved register %x\n", 607 offset); 608 return 1; 609 } 610 611 result = __apic_read(apic, offset & ~0xf); 612 613 trace_kvm_apic_read(offset, result); 614 615 switch (len) { 616 case 1: 617 case 2: 618 case 4: 619 memcpy(data, (char *)&result + alignment, len); 620 break; 621 default: 622 printk(KERN_ERR "Local APIC read with len = %x, " 623 "should be 1,2, or 4 instead\n", len); 624 break; 625 } 626 return 0; 627 } 628 629 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 630 { 631 return apic_hw_enabled(apic) && 632 addr >= apic->base_address && 633 addr < apic->base_address + LAPIC_MMIO_LENGTH; 634 } 635 636 static int apic_mmio_read(struct kvm_io_device *this, 637 gpa_t address, int len, void *data) 638 { 639 struct kvm_lapic *apic = to_lapic(this); 640 u32 offset = address - apic->base_address; 641 642 if (!apic_mmio_in_range(apic, address)) 643 return -EOPNOTSUPP; 644 645 apic_reg_read(apic, offset, len, data); 646 647 return 0; 648 } 649 650 static void update_divide_count(struct kvm_lapic *apic) 651 { 652 u32 tmp1, tmp2, tdcr; 653 654 tdcr = apic_get_reg(apic, APIC_TDCR); 655 tmp1 = tdcr & 0xf; 656 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 657 apic->divide_count = 0x1 << (tmp2 & 0x7); 658 659 apic_debug("timer divide count is 0x%x\n", 660 apic->divide_count); 661 } 662 663 static void start_apic_timer(struct kvm_lapic *apic) 664 { 665 ktime_t now = apic->lapic_timer.timer.base->get_time(); 666 667 apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * 668 APIC_BUS_CYCLE_NS * apic->divide_count; 669 atomic_set(&apic->lapic_timer.pending, 0); 670 671 if (!apic->lapic_timer.period) 672 return; 673 /* 674 * Do not allow the guest to program periodic timers with small 675 * interval, since the hrtimers are not throttled by the host 676 * scheduler. 677 */ 678 if (apic_lvtt_period(apic)) { 679 if (apic->lapic_timer.period < NSEC_PER_MSEC/2) 680 apic->lapic_timer.period = NSEC_PER_MSEC/2; 681 } 682 683 hrtimer_start(&apic->lapic_timer.timer, 684 ktime_add_ns(now, apic->lapic_timer.period), 685 HRTIMER_MODE_ABS); 686 687 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" 688 PRIx64 ", " 689 "timer initial count 0x%x, period %lldns, " 690 "expire @ 0x%016" PRIx64 ".\n", __func__, 691 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 692 apic_get_reg(apic, APIC_TMICT), 693 apic->lapic_timer.period, 694 ktime_to_ns(ktime_add_ns(now, 695 apic->lapic_timer.period))); 696 } 697 698 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) 699 { 700 int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); 701 702 if (apic_lvt_nmi_mode(lvt0_val)) { 703 if (!nmi_wd_enabled) { 704 apic_debug("Receive NMI setting on APIC_LVT0 " 705 "for cpu %d\n", apic->vcpu->vcpu_id); 706 apic->vcpu->kvm->arch.vapics_in_nmi_mode++; 707 } 708 } else if (nmi_wd_enabled) 709 apic->vcpu->kvm->arch.vapics_in_nmi_mode--; 710 } 711 712 static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 713 { 714 int ret = 0; 715 716 trace_kvm_apic_write(reg, val); 717 718 switch (reg) { 719 case APIC_ID: /* Local APIC ID */ 720 if (!apic_x2apic_mode(apic)) 721 apic_set_reg(apic, APIC_ID, val); 722 else 723 ret = 1; 724 break; 725 726 case APIC_TASKPRI: 727 report_tpr_access(apic, true); 728 apic_set_tpr(apic, val & 0xff); 729 break; 730 731 case APIC_EOI: 732 apic_set_eoi(apic); 733 break; 734 735 case APIC_LDR: 736 if (!apic_x2apic_mode(apic)) 737 apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK); 738 else 739 ret = 1; 740 break; 741 742 case APIC_DFR: 743 if (!apic_x2apic_mode(apic)) 744 apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 745 else 746 ret = 1; 747 break; 748 749 case APIC_SPIV: { 750 u32 mask = 0x3ff; 751 if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 752 mask |= APIC_SPIV_DIRECTED_EOI; 753 apic_set_reg(apic, APIC_SPIV, val & mask); 754 if (!(val & APIC_SPIV_APIC_ENABLED)) { 755 int i; 756 u32 lvt_val; 757 758 for (i = 0; i < APIC_LVT_NUM; i++) { 759 lvt_val = apic_get_reg(apic, 760 APIC_LVTT + 0x10 * i); 761 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 762 lvt_val | APIC_LVT_MASKED); 763 } 764 atomic_set(&apic->lapic_timer.pending, 0); 765 766 } 767 break; 768 } 769 case APIC_ICR: 770 /* No delay here, so we always clear the pending bit */ 771 apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); 772 apic_send_ipi(apic); 773 break; 774 775 case APIC_ICR2: 776 if (!apic_x2apic_mode(apic)) 777 val &= 0xff000000; 778 apic_set_reg(apic, APIC_ICR2, val); 779 break; 780 781 case APIC_LVT0: 782 apic_manage_nmi_watchdog(apic, val); 783 case APIC_LVTT: 784 case APIC_LVTTHMR: 785 case APIC_LVTPC: 786 case APIC_LVT1: 787 case APIC_LVTERR: 788 /* TODO: Check vector */ 789 if (!apic_sw_enabled(apic)) 790 val |= APIC_LVT_MASKED; 791 792 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; 793 apic_set_reg(apic, reg, val); 794 795 break; 796 797 case APIC_TMICT: 798 hrtimer_cancel(&apic->lapic_timer.timer); 799 apic_set_reg(apic, APIC_TMICT, val); 800 start_apic_timer(apic); 801 break; 802 803 case APIC_TDCR: 804 if (val & 4) 805 printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val); 806 apic_set_reg(apic, APIC_TDCR, val); 807 update_divide_count(apic); 808 break; 809 810 case APIC_ESR: 811 if (apic_x2apic_mode(apic) && val != 0) { 812 printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val); 813 ret = 1; 814 } 815 break; 816 817 case APIC_SELF_IPI: 818 if (apic_x2apic_mode(apic)) { 819 apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 820 } else 821 ret = 1; 822 break; 823 default: 824 ret = 1; 825 break; 826 } 827 if (ret) 828 apic_debug("Local APIC Write to read-only register %x\n", reg); 829 return ret; 830 } 831 832 static int apic_mmio_write(struct kvm_io_device *this, 833 gpa_t address, int len, const void *data) 834 { 835 struct kvm_lapic *apic = to_lapic(this); 836 unsigned int offset = address - apic->base_address; 837 u32 val; 838 839 if (!apic_mmio_in_range(apic, address)) 840 return -EOPNOTSUPP; 841 842 /* 843 * APIC register must be aligned on 128-bits boundary. 844 * 32/64/128 bits registers must be accessed thru 32 bits. 845 * Refer SDM 8.4.1 846 */ 847 if (len != 4 || (offset & 0xf)) { 848 /* Don't shout loud, $infamous_os would cause only noise. */ 849 apic_debug("apic write: bad size=%d %lx\n", len, (long)address); 850 return 0; 851 } 852 853 val = *(u32*)data; 854 855 /* too common printing */ 856 if (offset != APIC_EOI) 857 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 858 "0x%x\n", __func__, offset, len, val); 859 860 apic_reg_write(apic, offset & 0xff0, val); 861 862 return 0; 863 } 864 865 void kvm_free_lapic(struct kvm_vcpu *vcpu) 866 { 867 if (!vcpu->arch.apic) 868 return; 869 870 hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); 871 872 if (vcpu->arch.apic->regs_page) 873 __free_page(vcpu->arch.apic->regs_page); 874 875 kfree(vcpu->arch.apic); 876 } 877 878 /* 879 *---------------------------------------------------------------------- 880 * LAPIC interface 881 *---------------------------------------------------------------------- 882 */ 883 884 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) 885 { 886 struct kvm_lapic *apic = vcpu->arch.apic; 887 888 if (!apic) 889 return; 890 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 891 | (apic_get_reg(apic, APIC_TASKPRI) & 4)); 892 } 893 894 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 895 { 896 struct kvm_lapic *apic = vcpu->arch.apic; 897 u64 tpr; 898 899 if (!apic) 900 return 0; 901 tpr = (u64) apic_get_reg(apic, APIC_TASKPRI); 902 903 return (tpr & 0xf0) >> 4; 904 } 905 906 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) 907 { 908 struct kvm_lapic *apic = vcpu->arch.apic; 909 910 if (!apic) { 911 value |= MSR_IA32_APICBASE_BSP; 912 vcpu->arch.apic_base = value; 913 return; 914 } 915 916 if (!kvm_vcpu_is_bsp(apic->vcpu)) 917 value &= ~MSR_IA32_APICBASE_BSP; 918 919 vcpu->arch.apic_base = value; 920 if (apic_x2apic_mode(apic)) { 921 u32 id = kvm_apic_id(apic); 922 u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf)); 923 apic_set_reg(apic, APIC_LDR, ldr); 924 } 925 apic->base_address = apic->vcpu->arch.apic_base & 926 MSR_IA32_APICBASE_BASE; 927 928 /* with FSB delivery interrupt, we can restart APIC functionality */ 929 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " 930 "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); 931 932 } 933 934 void kvm_lapic_reset(struct kvm_vcpu *vcpu) 935 { 936 struct kvm_lapic *apic; 937 int i; 938 939 apic_debug("%s\n", __func__); 940 941 ASSERT(vcpu); 942 apic = vcpu->arch.apic; 943 ASSERT(apic != NULL); 944 945 /* Stop the timer in case it's a reset to an active apic */ 946 hrtimer_cancel(&apic->lapic_timer.timer); 947 948 apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); 949 kvm_apic_set_version(apic->vcpu); 950 951 for (i = 0; i < APIC_LVT_NUM; i++) 952 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 953 apic_set_reg(apic, APIC_LVT0, 954 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 955 956 apic_set_reg(apic, APIC_DFR, 0xffffffffU); 957 apic_set_reg(apic, APIC_SPIV, 0xff); 958 apic_set_reg(apic, APIC_TASKPRI, 0); 959 apic_set_reg(apic, APIC_LDR, 0); 960 apic_set_reg(apic, APIC_ESR, 0); 961 apic_set_reg(apic, APIC_ICR, 0); 962 apic_set_reg(apic, APIC_ICR2, 0); 963 apic_set_reg(apic, APIC_TDCR, 0); 964 apic_set_reg(apic, APIC_TMICT, 0); 965 for (i = 0; i < 8; i++) { 966 apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 967 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 968 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 969 } 970 apic->irr_pending = false; 971 update_divide_count(apic); 972 atomic_set(&apic->lapic_timer.pending, 0); 973 if (kvm_vcpu_is_bsp(vcpu)) 974 vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; 975 apic_update_ppr(apic); 976 977 vcpu->arch.apic_arb_prio = 0; 978 979 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 980 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 981 vcpu, kvm_apic_id(apic), 982 vcpu->arch.apic_base, apic->base_address); 983 } 984 985 bool kvm_apic_present(struct kvm_vcpu *vcpu) 986 { 987 return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic); 988 } 989 990 int kvm_lapic_enabled(struct kvm_vcpu *vcpu) 991 { 992 return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic); 993 } 994 995 /* 996 *---------------------------------------------------------------------- 997 * timer interface 998 *---------------------------------------------------------------------- 999 */ 1000 1001 static bool lapic_is_periodic(struct kvm_timer *ktimer) 1002 { 1003 struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, 1004 lapic_timer); 1005 return apic_lvtt_period(apic); 1006 } 1007 1008 int apic_has_pending_timer(struct kvm_vcpu *vcpu) 1009 { 1010 struct kvm_lapic *lapic = vcpu->arch.apic; 1011 1012 if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) 1013 return atomic_read(&lapic->lapic_timer.pending); 1014 1015 return 0; 1016 } 1017 1018 static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 1019 { 1020 u32 reg = apic_get_reg(apic, lvt_type); 1021 int vector, mode, trig_mode; 1022 1023 if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 1024 vector = reg & APIC_VECTOR_MASK; 1025 mode = reg & APIC_MODE_MASK; 1026 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; 1027 return __apic_accept_irq(apic, mode, vector, 1, trig_mode); 1028 } 1029 return 0; 1030 } 1031 1032 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) 1033 { 1034 struct kvm_lapic *apic = vcpu->arch.apic; 1035 1036 if (apic) 1037 kvm_apic_local_deliver(apic, APIC_LVT0); 1038 } 1039 1040 static struct kvm_timer_ops lapic_timer_ops = { 1041 .is_periodic = lapic_is_periodic, 1042 }; 1043 1044 static const struct kvm_io_device_ops apic_mmio_ops = { 1045 .read = apic_mmio_read, 1046 .write = apic_mmio_write, 1047 }; 1048 1049 int kvm_create_lapic(struct kvm_vcpu *vcpu) 1050 { 1051 struct kvm_lapic *apic; 1052 1053 ASSERT(vcpu != NULL); 1054 apic_debug("apic_init %d\n", vcpu->vcpu_id); 1055 1056 apic = kzalloc(sizeof(*apic), GFP_KERNEL); 1057 if (!apic) 1058 goto nomem; 1059 1060 vcpu->arch.apic = apic; 1061 1062 apic->regs_page = alloc_page(GFP_KERNEL); 1063 if (apic->regs_page == NULL) { 1064 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1065 vcpu->vcpu_id); 1066 goto nomem_free_apic; 1067 } 1068 apic->regs = page_address(apic->regs_page); 1069 memset(apic->regs, 0, PAGE_SIZE); 1070 apic->vcpu = vcpu; 1071 1072 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1073 HRTIMER_MODE_ABS); 1074 apic->lapic_timer.timer.function = kvm_timer_fn; 1075 apic->lapic_timer.t_ops = &lapic_timer_ops; 1076 apic->lapic_timer.kvm = vcpu->kvm; 1077 apic->lapic_timer.vcpu = vcpu; 1078 1079 apic->base_address = APIC_DEFAULT_PHYS_BASE; 1080 vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; 1081 1082 kvm_lapic_reset(vcpu); 1083 kvm_iodevice_init(&apic->dev, &apic_mmio_ops); 1084 1085 return 0; 1086 nomem_free_apic: 1087 kfree(apic); 1088 nomem: 1089 return -ENOMEM; 1090 } 1091 1092 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) 1093 { 1094 struct kvm_lapic *apic = vcpu->arch.apic; 1095 int highest_irr; 1096 1097 if (!apic || !apic_enabled(apic)) 1098 return -1; 1099 1100 apic_update_ppr(apic); 1101 highest_irr = apic_find_highest_irr(apic); 1102 if ((highest_irr == -1) || 1103 ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) 1104 return -1; 1105 return highest_irr; 1106 } 1107 1108 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 1109 { 1110 u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1111 int r = 0; 1112 1113 if (kvm_vcpu_is_bsp(vcpu)) { 1114 if (!apic_hw_enabled(vcpu->arch.apic)) 1115 r = 1; 1116 if ((lvt0 & APIC_LVT_MASKED) == 0 && 1117 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) 1118 r = 1; 1119 } 1120 return r; 1121 } 1122 1123 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) 1124 { 1125 struct kvm_lapic *apic = vcpu->arch.apic; 1126 1127 if (apic && atomic_read(&apic->lapic_timer.pending) > 0) { 1128 if (kvm_apic_local_deliver(apic, APIC_LVTT)) 1129 atomic_dec(&apic->lapic_timer.pending); 1130 } 1131 } 1132 1133 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1134 { 1135 int vector = kvm_apic_has_interrupt(vcpu); 1136 struct kvm_lapic *apic = vcpu->arch.apic; 1137 1138 if (vector == -1) 1139 return -1; 1140 1141 apic_set_vector(vector, apic->regs + APIC_ISR); 1142 apic_update_ppr(apic); 1143 apic_clear_irr(vector, apic); 1144 return vector; 1145 } 1146 1147 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) 1148 { 1149 struct kvm_lapic *apic = vcpu->arch.apic; 1150 1151 apic->base_address = vcpu->arch.apic_base & 1152 MSR_IA32_APICBASE_BASE; 1153 kvm_apic_set_version(vcpu); 1154 1155 apic_update_ppr(apic); 1156 hrtimer_cancel(&apic->lapic_timer.timer); 1157 update_divide_count(apic); 1158 start_apic_timer(apic); 1159 } 1160 1161 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) 1162 { 1163 struct kvm_lapic *apic = vcpu->arch.apic; 1164 struct hrtimer *timer; 1165 1166 if (!apic) 1167 return; 1168 1169 timer = &apic->lapic_timer.timer; 1170 if (hrtimer_cancel(timer)) 1171 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 1172 } 1173 1174 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) 1175 { 1176 u32 data; 1177 void *vapic; 1178 1179 if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) 1180 return; 1181 1182 vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0); 1183 data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); 1184 kunmap_atomic(vapic, KM_USER0); 1185 1186 apic_set_tpr(vcpu->arch.apic, data & 0xff); 1187 } 1188 1189 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) 1190 { 1191 u32 data, tpr; 1192 int max_irr, max_isr; 1193 struct kvm_lapic *apic; 1194 void *vapic; 1195 1196 if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) 1197 return; 1198 1199 apic = vcpu->arch.apic; 1200 tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; 1201 max_irr = apic_find_highest_irr(apic); 1202 if (max_irr < 0) 1203 max_irr = 0; 1204 max_isr = apic_find_highest_isr(apic); 1205 if (max_isr < 0) 1206 max_isr = 0; 1207 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 1208 1209 vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0); 1210 *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; 1211 kunmap_atomic(vapic, KM_USER0); 1212 } 1213 1214 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 1215 { 1216 if (!irqchip_in_kernel(vcpu->kvm)) 1217 return; 1218 1219 vcpu->arch.apic->vapic_addr = vapic_addr; 1220 } 1221 1222 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1223 { 1224 struct kvm_lapic *apic = vcpu->arch.apic; 1225 u32 reg = (msr - APIC_BASE_MSR) << 4; 1226 1227 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1228 return 1; 1229 1230 /* if this is ICR write vector before command */ 1231 if (msr == 0x830) 1232 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 1233 return apic_reg_write(apic, reg, (u32)data); 1234 } 1235 1236 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 1237 { 1238 struct kvm_lapic *apic = vcpu->arch.apic; 1239 u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; 1240 1241 if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) 1242 return 1; 1243 1244 if (apic_reg_read(apic, reg, 4, &low)) 1245 return 1; 1246 if (msr == 0x830) 1247 apic_reg_read(apic, APIC_ICR2, 4, &high); 1248 1249 *data = (((u64)high) << 32) | low; 1250 1251 return 0; 1252 } 1253