1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Local APIC handling, local APIC timers 4 * 5 * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> 6 * 7 * Fixes 8 * Maciej W. Rozycki : Bits for genuine 82489DX APICs; 9 * thanks to Eric Gilmore 10 * and Rolf G. Tews 11 * for testing these extensively. 12 * Maciej W. Rozycki : Various updates and fixes. 13 * Mikael Pettersson : Power Management for UP-APIC. 14 * Pavel Machek and 15 * Mikael Pettersson : PM converted to driver model. 16 */ 17 18 #include <linux/perf_event.h> 19 #include <linux/kernel_stat.h> 20 #include <linux/mc146818rtc.h> 21 #include <linux/acpi_pmtmr.h> 22 #include <linux/clockchips.h> 23 #include <linux/interrupt.h> 24 #include <linux/memblock.h> 25 #include <linux/ftrace.h> 26 #include <linux/ioport.h> 27 #include <linux/export.h> 28 #include <linux/syscore_ops.h> 29 #include <linux/delay.h> 30 #include <linux/timex.h> 31 #include <linux/i8253.h> 32 #include <linux/dmar.h> 33 #include <linux/init.h> 34 #include <linux/cpu.h> 35 #include <linux/dmi.h> 36 #include <linux/smp.h> 37 #include <linux/mm.h> 38 39 #include <asm/trace/irq_vectors.h> 40 #include <asm/irq_remapping.h> 41 #include <asm/pc-conf-reg.h> 42 #include <asm/perf_event.h> 43 #include <asm/x86_init.h> 44 #include <linux/atomic.h> 45 #include <asm/barrier.h> 46 #include <asm/mpspec.h> 47 #include <asm/i8259.h> 48 #include <asm/proto.h> 49 #include <asm/traps.h> 50 #include <asm/apic.h> 51 #include <asm/acpi.h> 52 #include <asm/io_apic.h> 53 #include <asm/desc.h> 54 #include <asm/hpet.h> 55 #include <asm/mtrr.h> 56 #include <asm/time.h> 57 #include <asm/smp.h> 58 #include <asm/mce.h> 59 #include <asm/tsc.h> 60 #include <asm/hypervisor.h> 61 #include <asm/cpu_device_id.h> 62 #include <asm/intel-family.h> 63 #include <asm/irq_regs.h> 64 #include <asm/cpu.h> 65 66 unsigned int num_processors; 67 68 unsigned disabled_cpus; 69 70 /* Processor that is doing the boot up */ 71 unsigned int boot_cpu_physical_apicid __ro_after_init = -1U; 72 EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); 73 74 u8 boot_cpu_apic_version __ro_after_init; 75 76 /* 77 * The highest APIC ID seen during enumeration. 78 */ 79 static unsigned int max_physical_apicid; 80 81 /* 82 * Bitmask of physically existing CPUs: 83 */ 84 physid_mask_t phys_cpu_present_map; 85 86 /* 87 * Processor to be disabled specified by kernel parameter 88 * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to 89 * avoid undefined behaviour caused by sending INIT from AP to BSP. 90 */ 91 static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID; 92 93 /* 94 * This variable controls which CPUs receive external NMIs. By default, 95 * external NMIs are delivered only to the BSP. 96 */ 97 static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP; 98 99 /* 100 * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID 101 */ 102 static bool virt_ext_dest_id __ro_after_init; 103 104 /* For parallel bootup. */ 105 unsigned long apic_mmio_base __ro_after_init; 106 107 /* 108 * Map cpu index to physical APIC ID 109 */ 110 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); 111 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); 112 DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); 113 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 114 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 115 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); 116 117 #ifdef CONFIG_X86_32 118 119 /* 120 * On x86_32, the mapping between cpu and logical apicid may vary 121 * depending on apic in use. The following early percpu variable is 122 * used for the mapping. This is where the behaviors of x86_64 and 32 123 * actually diverge. Let's keep it ugly for now. 124 */ 125 DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); 126 127 /* Local APIC was disabled by the BIOS and enabled by the kernel */ 128 static int enabled_via_apicbase __ro_after_init; 129 130 /* 131 * Handle interrupt mode configuration register (IMCR). 132 * This register controls whether the interrupt signals 133 * that reach the BSP come from the master PIC or from the 134 * local APIC. Before entering Symmetric I/O Mode, either 135 * the BIOS or the operating system must switch out of 136 * PIC Mode by changing the IMCR. 137 */ 138 static inline void imcr_pic_to_apic(void) 139 { 140 /* NMI and 8259 INTR go through APIC */ 141 pc_conf_set(PC_CONF_MPS_IMCR, 0x01); 142 } 143 144 static inline void imcr_apic_to_pic(void) 145 { 146 /* NMI and 8259 INTR go directly to BSP */ 147 pc_conf_set(PC_CONF_MPS_IMCR, 0x00); 148 } 149 #endif 150 151 /* 152 * Knob to control our willingness to enable the local APIC. 153 * 154 * +1=force-enable 155 */ 156 static int force_enable_local_apic __initdata; 157 158 /* 159 * APIC command line parameters 160 */ 161 static int __init parse_lapic(char *arg) 162 { 163 if (IS_ENABLED(CONFIG_X86_32) && !arg) 164 force_enable_local_apic = 1; 165 else if (arg && !strncmp(arg, "notscdeadline", 13)) 166 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 167 return 0; 168 } 169 early_param("lapic", parse_lapic); 170 171 #ifdef CONFIG_X86_64 172 static int apic_calibrate_pmtmr __initdata; 173 static __init int setup_apicpmtimer(char *s) 174 { 175 apic_calibrate_pmtmr = 1; 176 notsc_setup(NULL); 177 return 1; 178 } 179 __setup("apicpmtimer", setup_apicpmtimer); 180 #endif 181 182 unsigned long mp_lapic_addr __ro_after_init; 183 int disable_apic __ro_after_init; 184 /* Disable local APIC timer from the kernel commandline or via dmi quirk */ 185 static int disable_apic_timer __initdata; 186 /* Local APIC timer works in C2 */ 187 int local_apic_timer_c2_ok __ro_after_init; 188 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 189 190 /* 191 * Debug level, exported for io_apic.c 192 */ 193 int apic_verbosity __ro_after_init; 194 195 int pic_mode __ro_after_init; 196 197 /* Have we found an MP table */ 198 int smp_found_config __ro_after_init; 199 200 static struct resource lapic_resource = { 201 .name = "Local APIC", 202 .flags = IORESOURCE_MEM | IORESOURCE_BUSY, 203 }; 204 205 unsigned int lapic_timer_period = 0; 206 207 static void apic_pm_activate(void); 208 209 static unsigned long apic_phys __ro_after_init; 210 211 /* 212 * Get the LAPIC version 213 */ 214 static inline int lapic_get_version(void) 215 { 216 return GET_APIC_VERSION(apic_read(APIC_LVR)); 217 } 218 219 /* 220 * Check, if the APIC is integrated or a separate chip 221 */ 222 static inline int lapic_is_integrated(void) 223 { 224 return APIC_INTEGRATED(lapic_get_version()); 225 } 226 227 /* 228 * Check, whether this is a modern or a first generation APIC 229 */ 230 static int modern_apic(void) 231 { 232 /* AMD systems use old APIC versions, so check the CPU */ 233 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && 234 boot_cpu_data.x86 >= 0xf) 235 return 1; 236 237 /* Hygon systems use modern APIC */ 238 if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) 239 return 1; 240 241 return lapic_get_version() >= 0x14; 242 } 243 244 /* 245 * right after this call apic become NOOP driven 246 * so apic->write/read doesn't do anything 247 */ 248 static void __init apic_disable(void) 249 { 250 pr_info("APIC: switched to apic NOOP\n"); 251 apic = &apic_noop; 252 } 253 254 void native_apic_wait_icr_idle(void) 255 { 256 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 257 cpu_relax(); 258 } 259 260 u32 native_safe_apic_wait_icr_idle(void) 261 { 262 u32 send_status; 263 int timeout; 264 265 timeout = 0; 266 do { 267 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; 268 if (!send_status) 269 break; 270 inc_irq_stat(icr_read_retry_count); 271 udelay(100); 272 } while (timeout++ < 1000); 273 274 return send_status; 275 } 276 277 void native_apic_icr_write(u32 low, u32 id) 278 { 279 unsigned long flags; 280 281 local_irq_save(flags); 282 apic_write(APIC_ICR2, SET_XAPIC_DEST_FIELD(id)); 283 apic_write(APIC_ICR, low); 284 local_irq_restore(flags); 285 } 286 287 u64 native_apic_icr_read(void) 288 { 289 u32 icr1, icr2; 290 291 icr2 = apic_read(APIC_ICR2); 292 icr1 = apic_read(APIC_ICR); 293 294 return icr1 | ((u64)icr2 << 32); 295 } 296 297 #ifdef CONFIG_X86_32 298 /** 299 * get_physical_broadcast - Get number of physical broadcast IDs 300 */ 301 int get_physical_broadcast(void) 302 { 303 return modern_apic() ? 0xff : 0xf; 304 } 305 #endif 306 307 /** 308 * lapic_get_maxlvt - get the maximum number of local vector table entries 309 */ 310 int lapic_get_maxlvt(void) 311 { 312 /* 313 * - we always have APIC integrated on 64bit mode 314 * - 82489DXs do not report # of LVT entries 315 */ 316 return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2; 317 } 318 319 /* 320 * Local APIC timer 321 */ 322 323 /* Clock divisor */ 324 #define APIC_DIVISOR 16 325 #define TSC_DIVISOR 8 326 327 /* i82489DX specific */ 328 #define I82489DX_BASE_DIVIDER (((0x2) << 18)) 329 330 /* 331 * This function sets up the local APIC timer, with a timeout of 332 * 'clocks' APIC bus clock. During calibration we actually call 333 * this function twice on the boot CPU, once with a bogus timeout 334 * value, second time for real. The other (noncalibrating) CPUs 335 * call this function only once, with the real, calibrated value. 336 * 337 * We do reads before writes even if unnecessary, to get around the 338 * P5 APIC double write bug. 339 */ 340 static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 341 { 342 unsigned int lvtt_value, tmp_value; 343 344 lvtt_value = LOCAL_TIMER_VECTOR; 345 if (!oneshot) 346 lvtt_value |= APIC_LVT_TIMER_PERIODIC; 347 else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 348 lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE; 349 350 /* 351 * The i82489DX APIC uses bit 18 and 19 for the base divider. This 352 * overlaps with bit 18 on integrated APICs, but is not documented 353 * in the SDM. No problem though. i82489DX equipped systems do not 354 * have TSC deadline timer. 355 */ 356 if (!lapic_is_integrated()) 357 lvtt_value |= I82489DX_BASE_DIVIDER; 358 359 if (!irqen) 360 lvtt_value |= APIC_LVT_MASKED; 361 362 apic_write(APIC_LVTT, lvtt_value); 363 364 if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { 365 /* 366 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, 367 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. 368 * According to Intel, MFENCE can do the serialization here. 369 */ 370 asm volatile("mfence" : : : "memory"); 371 return; 372 } 373 374 /* 375 * Divide PICLK by 16 376 */ 377 tmp_value = apic_read(APIC_TDCR); 378 apic_write(APIC_TDCR, 379 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 380 APIC_TDR_DIV_16); 381 382 if (!oneshot) 383 apic_write(APIC_TMICT, clocks / APIC_DIVISOR); 384 } 385 386 /* 387 * Setup extended LVT, AMD specific 388 * 389 * Software should use the LVT offsets the BIOS provides. The offsets 390 * are determined by the subsystems using it like those for MCE 391 * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts 392 * are supported. Beginning with family 10h at least 4 offsets are 393 * available. 394 * 395 * Since the offsets must be consistent for all cores, we keep track 396 * of the LVT offsets in software and reserve the offset for the same 397 * vector also to be used on other cores. An offset is freed by 398 * setting the entry to APIC_EILVT_MASKED. 399 * 400 * If the BIOS is right, there should be no conflicts. Otherwise a 401 * "[Firmware Bug]: ..." error message is generated. However, if 402 * software does not properly determines the offsets, it is not 403 * necessarily a BIOS bug. 404 */ 405 406 static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX]; 407 408 static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new) 409 { 410 return (old & APIC_EILVT_MASKED) 411 || (new == APIC_EILVT_MASKED) 412 || ((new & ~APIC_EILVT_MASKED) == old); 413 } 414 415 static unsigned int reserve_eilvt_offset(int offset, unsigned int new) 416 { 417 unsigned int rsvd, vector; 418 419 if (offset >= APIC_EILVT_NR_MAX) 420 return ~0; 421 422 rsvd = atomic_read(&eilvt_offsets[offset]); 423 do { 424 vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */ 425 if (vector && !eilvt_entry_is_changeable(vector, new)) 426 /* may not change if vectors are different */ 427 return rsvd; 428 } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new)); 429 430 rsvd = new & ~APIC_EILVT_MASKED; 431 if (rsvd && rsvd != vector) 432 pr_info("LVT offset %d assigned for vector 0x%02x\n", 433 offset, rsvd); 434 435 return new; 436 } 437 438 /* 439 * If mask=1, the LVT entry does not generate interrupts while mask=0 440 * enables the vector. See also the BKDGs. Must be called with 441 * preemption disabled. 442 */ 443 444 int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) 445 { 446 unsigned long reg = APIC_EILVTn(offset); 447 unsigned int new, old, reserved; 448 449 new = (mask << 16) | (msg_type << 8) | vector; 450 old = apic_read(reg); 451 reserved = reserve_eilvt_offset(offset, new); 452 453 if (reserved != new) { 454 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " 455 "vector 0x%x, but the register is already in use for " 456 "vector 0x%x on another cpu\n", 457 smp_processor_id(), reg, offset, new, reserved); 458 return -EINVAL; 459 } 460 461 if (!eilvt_entry_is_changeable(old, new)) { 462 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " 463 "vector 0x%x, but the register is already in use for " 464 "vector 0x%x on this cpu\n", 465 smp_processor_id(), reg, offset, new, old); 466 return -EBUSY; 467 } 468 469 apic_write(reg, new); 470 471 return 0; 472 } 473 EXPORT_SYMBOL_GPL(setup_APIC_eilvt); 474 475 /* 476 * Program the next event, relative to now 477 */ 478 static int lapic_next_event(unsigned long delta, 479 struct clock_event_device *evt) 480 { 481 apic_write(APIC_TMICT, delta); 482 return 0; 483 } 484 485 static int lapic_next_deadline(unsigned long delta, 486 struct clock_event_device *evt) 487 { 488 u64 tsc; 489 490 /* This MSR is special and need a special fence: */ 491 weak_wrmsr_fence(); 492 493 tsc = rdtsc(); 494 wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); 495 return 0; 496 } 497 498 static int lapic_timer_shutdown(struct clock_event_device *evt) 499 { 500 unsigned int v; 501 502 /* Lapic used as dummy for broadcast ? */ 503 if (evt->features & CLOCK_EVT_FEAT_DUMMY) 504 return 0; 505 506 v = apic_read(APIC_LVTT); 507 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 508 apic_write(APIC_LVTT, v); 509 apic_write(APIC_TMICT, 0); 510 return 0; 511 } 512 513 static inline int 514 lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot) 515 { 516 /* Lapic used as dummy for broadcast ? */ 517 if (evt->features & CLOCK_EVT_FEAT_DUMMY) 518 return 0; 519 520 __setup_APIC_LVTT(lapic_timer_period, oneshot, 1); 521 return 0; 522 } 523 524 static int lapic_timer_set_periodic(struct clock_event_device *evt) 525 { 526 return lapic_timer_set_periodic_oneshot(evt, false); 527 } 528 529 static int lapic_timer_set_oneshot(struct clock_event_device *evt) 530 { 531 return lapic_timer_set_periodic_oneshot(evt, true); 532 } 533 534 /* 535 * Local APIC timer broadcast function 536 */ 537 static void lapic_timer_broadcast(const struct cpumask *mask) 538 { 539 #ifdef CONFIG_SMP 540 apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 541 #endif 542 } 543 544 545 /* 546 * The local apic timer can be used for any function which is CPU local. 547 */ 548 static struct clock_event_device lapic_clockevent = { 549 .name = "lapic", 550 .features = CLOCK_EVT_FEAT_PERIODIC | 551 CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP 552 | CLOCK_EVT_FEAT_DUMMY, 553 .shift = 32, 554 .set_state_shutdown = lapic_timer_shutdown, 555 .set_state_periodic = lapic_timer_set_periodic, 556 .set_state_oneshot = lapic_timer_set_oneshot, 557 .set_state_oneshot_stopped = lapic_timer_shutdown, 558 .set_next_event = lapic_next_event, 559 .broadcast = lapic_timer_broadcast, 560 .rating = 100, 561 .irq = -1, 562 }; 563 static DEFINE_PER_CPU(struct clock_event_device, lapic_events); 564 565 static const struct x86_cpu_id deadline_match[] __initconst = { 566 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */ 567 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */ 568 569 X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020), 570 571 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011), 572 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e), 573 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c), 574 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003), 575 576 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136), 577 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014), 578 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0), 579 580 X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22), 581 X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20), 582 X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G, 0x17), 583 584 X86_MATCH_INTEL_FAM6_MODEL( BROADWELL, 0x25), 585 X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G, 0x17), 586 587 X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L, 0xb2), 588 X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE, 0xb2), 589 590 X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L, 0x52), 591 X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE, 0x52), 592 593 {}, 594 }; 595 596 static __init bool apic_validate_deadline_timer(void) 597 { 598 const struct x86_cpu_id *m; 599 u32 rev; 600 601 if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 602 return false; 603 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 604 return true; 605 606 m = x86_match_cpu(deadline_match); 607 if (!m) 608 return true; 609 610 rev = (u32)m->driver_data; 611 612 if (boot_cpu_data.microcode >= rev) 613 return true; 614 615 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); 616 pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; " 617 "please update microcode to version: 0x%x (or later)\n", rev); 618 return false; 619 } 620 621 /* 622 * Setup the local APIC timer for this CPU. Copy the initialized values 623 * of the boot CPU and register the clock event in the framework. 624 */ 625 static void setup_APIC_timer(void) 626 { 627 struct clock_event_device *levt = this_cpu_ptr(&lapic_events); 628 629 if (this_cpu_has(X86_FEATURE_ARAT)) { 630 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; 631 /* Make LAPIC timer preferable over percpu HPET */ 632 lapic_clockevent.rating = 150; 633 } 634 635 memcpy(levt, &lapic_clockevent, sizeof(*levt)); 636 levt->cpumask = cpumask_of(smp_processor_id()); 637 638 if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { 639 levt->name = "lapic-deadline"; 640 levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC | 641 CLOCK_EVT_FEAT_DUMMY); 642 levt->set_next_event = lapic_next_deadline; 643 clockevents_config_and_register(levt, 644 tsc_khz * (1000 / TSC_DIVISOR), 645 0xF, ~0UL); 646 } else 647 clockevents_register_device(levt); 648 } 649 650 /* 651 * Install the updated TSC frequency from recalibration at the TSC 652 * deadline clockevent devices. 653 */ 654 static void __lapic_update_tsc_freq(void *info) 655 { 656 struct clock_event_device *levt = this_cpu_ptr(&lapic_events); 657 658 if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 659 return; 660 661 clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR)); 662 } 663 664 void lapic_update_tsc_freq(void) 665 { 666 /* 667 * The clockevent device's ->mult and ->shift can both be 668 * changed. In order to avoid races, schedule the frequency 669 * update code on each CPU. 670 */ 671 on_each_cpu(__lapic_update_tsc_freq, NULL, 0); 672 } 673 674 /* 675 * In this functions we calibrate APIC bus clocks to the external timer. 676 * 677 * We want to do the calibration only once since we want to have local timer 678 * irqs synchronous. CPUs connected by the same APIC bus have the very same bus 679 * frequency. 680 * 681 * This was previously done by reading the PIT/HPET and waiting for a wrap 682 * around to find out, that a tick has elapsed. I have a box, where the PIT 683 * readout is broken, so it never gets out of the wait loop again. This was 684 * also reported by others. 685 * 686 * Monitoring the jiffies value is inaccurate and the clockevents 687 * infrastructure allows us to do a simple substitution of the interrupt 688 * handler. 689 * 690 * The calibration routine also uses the pm_timer when possible, as the PIT 691 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes 692 * back to normal later in the boot process). 693 */ 694 695 #define LAPIC_CAL_LOOPS (HZ/10) 696 697 static __initdata int lapic_cal_loops = -1; 698 static __initdata long lapic_cal_t1, lapic_cal_t2; 699 static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; 700 static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; 701 static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; 702 703 /* 704 * Temporary interrupt handler and polled calibration function. 705 */ 706 static void __init lapic_cal_handler(struct clock_event_device *dev) 707 { 708 unsigned long long tsc = 0; 709 long tapic = apic_read(APIC_TMCCT); 710 unsigned long pm = acpi_pm_read_early(); 711 712 if (boot_cpu_has(X86_FEATURE_TSC)) 713 tsc = rdtsc(); 714 715 switch (lapic_cal_loops++) { 716 case 0: 717 lapic_cal_t1 = tapic; 718 lapic_cal_tsc1 = tsc; 719 lapic_cal_pm1 = pm; 720 lapic_cal_j1 = jiffies; 721 break; 722 723 case LAPIC_CAL_LOOPS: 724 lapic_cal_t2 = tapic; 725 lapic_cal_tsc2 = tsc; 726 if (pm < lapic_cal_pm1) 727 pm += ACPI_PM_OVRRUN; 728 lapic_cal_pm2 = pm; 729 lapic_cal_j2 = jiffies; 730 break; 731 } 732 } 733 734 static int __init 735 calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) 736 { 737 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; 738 const long pm_thresh = pm_100ms / 100; 739 unsigned long mult; 740 u64 res; 741 742 #ifndef CONFIG_X86_PM_TIMER 743 return -1; 744 #endif 745 746 apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); 747 748 /* Check, if the PM timer is available */ 749 if (!deltapm) 750 return -1; 751 752 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); 753 754 if (deltapm > (pm_100ms - pm_thresh) && 755 deltapm < (pm_100ms + pm_thresh)) { 756 apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); 757 return 0; 758 } 759 760 res = (((u64)deltapm) * mult) >> 22; 761 do_div(res, 1000000); 762 pr_warn("APIC calibration not consistent " 763 "with PM-Timer: %ldms instead of 100ms\n", (long)res); 764 765 /* Correct the lapic counter value */ 766 res = (((u64)(*delta)) * pm_100ms); 767 do_div(res, deltapm); 768 pr_info("APIC delta adjusted to PM-Timer: " 769 "%lu (%ld)\n", (unsigned long)res, *delta); 770 *delta = (long)res; 771 772 /* Correct the tsc counter value */ 773 if (boot_cpu_has(X86_FEATURE_TSC)) { 774 res = (((u64)(*deltatsc)) * pm_100ms); 775 do_div(res, deltapm); 776 apic_printk(APIC_VERBOSE, "TSC delta adjusted to " 777 "PM-Timer: %lu (%ld)\n", 778 (unsigned long)res, *deltatsc); 779 *deltatsc = (long)res; 780 } 781 782 return 0; 783 } 784 785 static int __init lapic_init_clockevent(void) 786 { 787 if (!lapic_timer_period) 788 return -1; 789 790 /* Calculate the scaled math multiplication factor */ 791 lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR, 792 TICK_NSEC, lapic_clockevent.shift); 793 lapic_clockevent.max_delta_ns = 794 clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); 795 lapic_clockevent.max_delta_ticks = 0x7FFFFFFF; 796 lapic_clockevent.min_delta_ns = 797 clockevent_delta2ns(0xF, &lapic_clockevent); 798 lapic_clockevent.min_delta_ticks = 0xF; 799 800 return 0; 801 } 802 803 bool __init apic_needs_pit(void) 804 { 805 /* 806 * If the frequencies are not known, PIT is required for both TSC 807 * and apic timer calibration. 808 */ 809 if (!tsc_khz || !cpu_khz) 810 return true; 811 812 /* Is there an APIC at all or is it disabled? */ 813 if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) 814 return true; 815 816 /* 817 * If interrupt delivery mode is legacy PIC or virtual wire without 818 * configuration, the local APIC timer wont be set up. Make sure 819 * that the PIT is initialized. 820 */ 821 if (apic_intr_mode == APIC_PIC || 822 apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) 823 return true; 824 825 /* Virt guests may lack ARAT, but still have DEADLINE */ 826 if (!boot_cpu_has(X86_FEATURE_ARAT)) 827 return true; 828 829 /* Deadline timer is based on TSC so no further PIT action required */ 830 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 831 return false; 832 833 /* APIC timer disabled? */ 834 if (disable_apic_timer) 835 return true; 836 /* 837 * The APIC timer frequency is known already, no PIT calibration 838 * required. If unknown, let the PIT be initialized. 839 */ 840 return lapic_timer_period == 0; 841 } 842 843 static int __init calibrate_APIC_clock(void) 844 { 845 struct clock_event_device *levt = this_cpu_ptr(&lapic_events); 846 u64 tsc_perj = 0, tsc_start = 0; 847 unsigned long jif_start; 848 unsigned long deltaj; 849 long delta, deltatsc; 850 int pm_referenced = 0; 851 852 if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) 853 return 0; 854 855 /* 856 * Check if lapic timer has already been calibrated by platform 857 * specific routine, such as tsc calibration code. If so just fill 858 * in the clockevent structure and return. 859 */ 860 if (!lapic_init_clockevent()) { 861 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", 862 lapic_timer_period); 863 /* 864 * Direct calibration methods must have an always running 865 * local APIC timer, no need for broadcast timer. 866 */ 867 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 868 return 0; 869 } 870 871 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" 872 "calibrating APIC timer ...\n"); 873 874 /* 875 * There are platforms w/o global clockevent devices. Instead of 876 * making the calibration conditional on that, use a polling based 877 * approach everywhere. 878 */ 879 local_irq_disable(); 880 881 /* 882 * Setup the APIC counter to maximum. There is no way the lapic 883 * can underflow in the 100ms detection time frame 884 */ 885 __setup_APIC_LVTT(0xffffffff, 0, 0); 886 887 /* 888 * Methods to terminate the calibration loop: 889 * 1) Global clockevent if available (jiffies) 890 * 2) TSC if available and frequency is known 891 */ 892 jif_start = READ_ONCE(jiffies); 893 894 if (tsc_khz) { 895 tsc_start = rdtsc(); 896 tsc_perj = div_u64((u64)tsc_khz * 1000, HZ); 897 } 898 899 /* 900 * Enable interrupts so the tick can fire, if a global 901 * clockevent device is available 902 */ 903 local_irq_enable(); 904 905 while (lapic_cal_loops <= LAPIC_CAL_LOOPS) { 906 /* Wait for a tick to elapse */ 907 while (1) { 908 if (tsc_khz) { 909 u64 tsc_now = rdtsc(); 910 if ((tsc_now - tsc_start) >= tsc_perj) { 911 tsc_start += tsc_perj; 912 break; 913 } 914 } else { 915 unsigned long jif_now = READ_ONCE(jiffies); 916 917 if (time_after(jif_now, jif_start)) { 918 jif_start = jif_now; 919 break; 920 } 921 } 922 cpu_relax(); 923 } 924 925 /* Invoke the calibration routine */ 926 local_irq_disable(); 927 lapic_cal_handler(NULL); 928 local_irq_enable(); 929 } 930 931 local_irq_disable(); 932 933 /* Build delta t1-t2 as apic timer counts down */ 934 delta = lapic_cal_t1 - lapic_cal_t2; 935 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); 936 937 deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); 938 939 /* we trust the PM based calibration if possible */ 940 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, 941 &delta, &deltatsc); 942 943 lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; 944 lapic_init_clockevent(); 945 946 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); 947 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); 948 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", 949 lapic_timer_period); 950 951 if (boot_cpu_has(X86_FEATURE_TSC)) { 952 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 953 "%ld.%04ld MHz.\n", 954 (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), 955 (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); 956 } 957 958 apic_printk(APIC_VERBOSE, "..... host bus clock speed is " 959 "%u.%04u MHz.\n", 960 lapic_timer_period / (1000000 / HZ), 961 lapic_timer_period % (1000000 / HZ)); 962 963 /* 964 * Do a sanity check on the APIC calibration result 965 */ 966 if (lapic_timer_period < (1000000 / HZ)) { 967 local_irq_enable(); 968 pr_warn("APIC frequency too slow, disabling apic timer\n"); 969 return -1; 970 } 971 972 levt->features &= ~CLOCK_EVT_FEAT_DUMMY; 973 974 /* 975 * PM timer calibration failed or not turned on so lets try APIC 976 * timer based calibration, if a global clockevent device is 977 * available. 978 */ 979 if (!pm_referenced && global_clock_event) { 980 apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); 981 982 /* 983 * Setup the apic timer manually 984 */ 985 levt->event_handler = lapic_cal_handler; 986 lapic_timer_set_periodic(levt); 987 lapic_cal_loops = -1; 988 989 /* Let the interrupts run */ 990 local_irq_enable(); 991 992 while (lapic_cal_loops <= LAPIC_CAL_LOOPS) 993 cpu_relax(); 994 995 /* Stop the lapic timer */ 996 local_irq_disable(); 997 lapic_timer_shutdown(levt); 998 999 /* Jiffies delta */ 1000 deltaj = lapic_cal_j2 - lapic_cal_j1; 1001 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); 1002 1003 /* Check, if the jiffies result is consistent */ 1004 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) 1005 apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); 1006 else 1007 levt->features |= CLOCK_EVT_FEAT_DUMMY; 1008 } 1009 local_irq_enable(); 1010 1011 if (levt->features & CLOCK_EVT_FEAT_DUMMY) { 1012 pr_warn("APIC timer disabled due to verification failure\n"); 1013 return -1; 1014 } 1015 1016 return 0; 1017 } 1018 1019 /* 1020 * Setup the boot APIC 1021 * 1022 * Calibrate and verify the result. 1023 */ 1024 void __init setup_boot_APIC_clock(void) 1025 { 1026 /* 1027 * The local apic timer can be disabled via the kernel 1028 * commandline or from the CPU detection code. Register the lapic 1029 * timer as a dummy clock event source on SMP systems, so the 1030 * broadcast mechanism is used. On UP systems simply ignore it. 1031 */ 1032 if (disable_apic_timer) { 1033 pr_info("Disabling APIC timer\n"); 1034 /* No broadcast on UP ! */ 1035 if (num_possible_cpus() > 1) { 1036 lapic_clockevent.mult = 1; 1037 setup_APIC_timer(); 1038 } 1039 return; 1040 } 1041 1042 if (calibrate_APIC_clock()) { 1043 /* No broadcast on UP ! */ 1044 if (num_possible_cpus() > 1) 1045 setup_APIC_timer(); 1046 return; 1047 } 1048 1049 /* 1050 * If nmi_watchdog is set to IO_APIC, we need the 1051 * PIT/HPET going. Otherwise register lapic as a dummy 1052 * device. 1053 */ 1054 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 1055 1056 /* Setup the lapic or request the broadcast */ 1057 setup_APIC_timer(); 1058 amd_e400_c1e_apic_setup(); 1059 } 1060 1061 void setup_secondary_APIC_clock(void) 1062 { 1063 setup_APIC_timer(); 1064 amd_e400_c1e_apic_setup(); 1065 } 1066 1067 /* 1068 * The guts of the apic timer interrupt 1069 */ 1070 static void local_apic_timer_interrupt(void) 1071 { 1072 struct clock_event_device *evt = this_cpu_ptr(&lapic_events); 1073 1074 /* 1075 * Normally we should not be here till LAPIC has been initialized but 1076 * in some cases like kdump, its possible that there is a pending LAPIC 1077 * timer interrupt from previous kernel's context and is delivered in 1078 * new kernel the moment interrupts are enabled. 1079 * 1080 * Interrupts are enabled early and LAPIC is setup much later, hence 1081 * its possible that when we get here evt->event_handler is NULL. 1082 * Check for event_handler being NULL and discard the interrupt as 1083 * spurious. 1084 */ 1085 if (!evt->event_handler) { 1086 pr_warn("Spurious LAPIC timer interrupt on cpu %d\n", 1087 smp_processor_id()); 1088 /* Switch it off */ 1089 lapic_timer_shutdown(evt); 1090 return; 1091 } 1092 1093 /* 1094 * the NMI deadlock-detector uses this. 1095 */ 1096 inc_irq_stat(apic_timer_irqs); 1097 1098 evt->event_handler(evt); 1099 } 1100 1101 /* 1102 * Local APIC timer interrupt. This is the most natural way for doing 1103 * local interrupts, but local timer interrupts can be emulated by 1104 * broadcast interrupts too. [in case the hw doesn't support APIC timers] 1105 * 1106 * [ if a single-CPU system runs an SMP kernel then we call the local 1107 * interrupt as well. Thus we cannot inline the local irq ... ] 1108 */ 1109 DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt) 1110 { 1111 struct pt_regs *old_regs = set_irq_regs(regs); 1112 1113 ack_APIC_irq(); 1114 trace_local_timer_entry(LOCAL_TIMER_VECTOR); 1115 local_apic_timer_interrupt(); 1116 trace_local_timer_exit(LOCAL_TIMER_VECTOR); 1117 1118 set_irq_regs(old_regs); 1119 } 1120 1121 /* 1122 * Local APIC start and shutdown 1123 */ 1124 1125 /** 1126 * clear_local_APIC - shutdown the local APIC 1127 * 1128 * This is called, when a CPU is disabled and before rebooting, so the state of 1129 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS 1130 * leftovers during boot. 1131 */ 1132 void clear_local_APIC(void) 1133 { 1134 int maxlvt; 1135 u32 v; 1136 1137 /* APIC hasn't been mapped yet */ 1138 if (!x2apic_mode && !apic_phys) 1139 return; 1140 1141 maxlvt = lapic_get_maxlvt(); 1142 /* 1143 * Masking an LVT entry can trigger a local APIC error 1144 * if the vector is zero. Mask LVTERR first to prevent this. 1145 */ 1146 if (maxlvt >= 3) { 1147 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ 1148 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); 1149 } 1150 /* 1151 * Careful: we have to set masks only first to deassert 1152 * any level-triggered sources. 1153 */ 1154 v = apic_read(APIC_LVTT); 1155 apic_write(APIC_LVTT, v | APIC_LVT_MASKED); 1156 v = apic_read(APIC_LVT0); 1157 apic_write(APIC_LVT0, v | APIC_LVT_MASKED); 1158 v = apic_read(APIC_LVT1); 1159 apic_write(APIC_LVT1, v | APIC_LVT_MASKED); 1160 if (maxlvt >= 4) { 1161 v = apic_read(APIC_LVTPC); 1162 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); 1163 } 1164 1165 /* lets not touch this if we didn't frob it */ 1166 #ifdef CONFIG_X86_THERMAL_VECTOR 1167 if (maxlvt >= 5) { 1168 v = apic_read(APIC_LVTTHMR); 1169 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); 1170 } 1171 #endif 1172 #ifdef CONFIG_X86_MCE_INTEL 1173 if (maxlvt >= 6) { 1174 v = apic_read(APIC_LVTCMCI); 1175 if (!(v & APIC_LVT_MASKED)) 1176 apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED); 1177 } 1178 #endif 1179 1180 /* 1181 * Clean APIC state for other OSs: 1182 */ 1183 apic_write(APIC_LVTT, APIC_LVT_MASKED); 1184 apic_write(APIC_LVT0, APIC_LVT_MASKED); 1185 apic_write(APIC_LVT1, APIC_LVT_MASKED); 1186 if (maxlvt >= 3) 1187 apic_write(APIC_LVTERR, APIC_LVT_MASKED); 1188 if (maxlvt >= 4) 1189 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 1190 1191 /* Integrated APIC (!82489DX) ? */ 1192 if (lapic_is_integrated()) { 1193 if (maxlvt > 3) 1194 /* Clear ESR due to Pentium errata 3AP and 11AP */ 1195 apic_write(APIC_ESR, 0); 1196 apic_read(APIC_ESR); 1197 } 1198 } 1199 1200 /** 1201 * apic_soft_disable - Clears and software disables the local APIC on hotplug 1202 * 1203 * Contrary to disable_local_APIC() this does not touch the enable bit in 1204 * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC 1205 * bus would require a hardware reset as the APIC would lose track of bus 1206 * arbitration. On systems with FSB delivery APICBASE could be disabled, 1207 * but it has to be guaranteed that no interrupt is sent to the APIC while 1208 * in that state and it's not clear from the SDM whether it still responds 1209 * to INIT/SIPI messages. Stay on the safe side and use software disable. 1210 */ 1211 void apic_soft_disable(void) 1212 { 1213 u32 value; 1214 1215 clear_local_APIC(); 1216 1217 /* Soft disable APIC (implies clearing of registers for 82489DX!). */ 1218 value = apic_read(APIC_SPIV); 1219 value &= ~APIC_SPIV_APIC_ENABLED; 1220 apic_write(APIC_SPIV, value); 1221 } 1222 1223 /** 1224 * disable_local_APIC - clear and disable the local APIC 1225 */ 1226 void disable_local_APIC(void) 1227 { 1228 /* APIC hasn't been mapped yet */ 1229 if (!x2apic_mode && !apic_phys) 1230 return; 1231 1232 apic_soft_disable(); 1233 1234 #ifdef CONFIG_X86_32 1235 /* 1236 * When LAPIC was disabled by the BIOS and enabled by the kernel, 1237 * restore the disabled state. 1238 */ 1239 if (enabled_via_apicbase) { 1240 unsigned int l, h; 1241 1242 rdmsr(MSR_IA32_APICBASE, l, h); 1243 l &= ~MSR_IA32_APICBASE_ENABLE; 1244 wrmsr(MSR_IA32_APICBASE, l, h); 1245 } 1246 #endif 1247 } 1248 1249 /* 1250 * If Linux enabled the LAPIC against the BIOS default disable it down before 1251 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and 1252 * not power-off. Additionally clear all LVT entries before disable_local_APIC 1253 * for the case where Linux didn't enable the LAPIC. 1254 */ 1255 void lapic_shutdown(void) 1256 { 1257 unsigned long flags; 1258 1259 if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config()) 1260 return; 1261 1262 local_irq_save(flags); 1263 1264 #ifdef CONFIG_X86_32 1265 if (!enabled_via_apicbase) 1266 clear_local_APIC(); 1267 else 1268 #endif 1269 disable_local_APIC(); 1270 1271 1272 local_irq_restore(flags); 1273 } 1274 1275 /** 1276 * sync_Arb_IDs - synchronize APIC bus arbitration IDs 1277 */ 1278 void __init sync_Arb_IDs(void) 1279 { 1280 /* 1281 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not 1282 * needed on AMD. 1283 */ 1284 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 1285 return; 1286 1287 /* 1288 * Wait for idle. 1289 */ 1290 apic_wait_icr_idle(); 1291 1292 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 1293 apic_write(APIC_ICR, APIC_DEST_ALLINC | 1294 APIC_INT_LEVELTRIG | APIC_DM_INIT); 1295 } 1296 1297 enum apic_intr_mode_id apic_intr_mode __ro_after_init; 1298 1299 static int __init __apic_intr_mode_select(void) 1300 { 1301 /* Check kernel option */ 1302 if (disable_apic) { 1303 pr_info("APIC disabled via kernel command line\n"); 1304 return APIC_PIC; 1305 } 1306 1307 /* Check BIOS */ 1308 #ifdef CONFIG_X86_64 1309 /* On 64-bit, the APIC must be integrated, Check local APIC only */ 1310 if (!boot_cpu_has(X86_FEATURE_APIC)) { 1311 disable_apic = 1; 1312 pr_info("APIC disabled by BIOS\n"); 1313 return APIC_PIC; 1314 } 1315 #else 1316 /* On 32-bit, the APIC may be integrated APIC or 82489DX */ 1317 1318 /* Neither 82489DX nor integrated APIC ? */ 1319 if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) { 1320 disable_apic = 1; 1321 return APIC_PIC; 1322 } 1323 1324 /* If the BIOS pretends there is an integrated APIC ? */ 1325 if (!boot_cpu_has(X86_FEATURE_APIC) && 1326 APIC_INTEGRATED(boot_cpu_apic_version)) { 1327 disable_apic = 1; 1328 pr_err(FW_BUG "Local APIC %d not detected, force emulation\n", 1329 boot_cpu_physical_apicid); 1330 return APIC_PIC; 1331 } 1332 #endif 1333 1334 /* Check MP table or ACPI MADT configuration */ 1335 if (!smp_found_config) { 1336 disable_ioapic_support(); 1337 if (!acpi_lapic) { 1338 pr_info("APIC: ACPI MADT or MP tables are not detected\n"); 1339 return APIC_VIRTUAL_WIRE_NO_CONFIG; 1340 } 1341 return APIC_VIRTUAL_WIRE; 1342 } 1343 1344 #ifdef CONFIG_SMP 1345 /* If SMP should be disabled, then really disable it! */ 1346 if (!setup_max_cpus) { 1347 pr_info("APIC: SMP mode deactivated\n"); 1348 return APIC_SYMMETRIC_IO_NO_ROUTING; 1349 } 1350 1351 if (read_apic_id() != boot_cpu_physical_apicid) { 1352 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1353 read_apic_id(), boot_cpu_physical_apicid); 1354 /* Or can we switch back to PIC here? */ 1355 } 1356 #endif 1357 1358 return APIC_SYMMETRIC_IO; 1359 } 1360 1361 /* Select the interrupt delivery mode for the BSP */ 1362 void __init apic_intr_mode_select(void) 1363 { 1364 apic_intr_mode = __apic_intr_mode_select(); 1365 } 1366 1367 /* 1368 * An initial setup of the virtual wire mode. 1369 */ 1370 void __init init_bsp_APIC(void) 1371 { 1372 unsigned int value; 1373 1374 /* 1375 * Don't do the setup now if we have a SMP BIOS as the 1376 * through-I/O-APIC virtual wire mode might be active. 1377 */ 1378 if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) 1379 return; 1380 1381 /* 1382 * Do not trust the local APIC being empty at bootup. 1383 */ 1384 clear_local_APIC(); 1385 1386 /* 1387 * Enable APIC. 1388 */ 1389 value = apic_read(APIC_SPIV); 1390 value &= ~APIC_VECTOR_MASK; 1391 value |= APIC_SPIV_APIC_ENABLED; 1392 1393 #ifdef CONFIG_X86_32 1394 /* This bit is reserved on P4/Xeon and should be cleared */ 1395 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 1396 (boot_cpu_data.x86 == 15)) 1397 value &= ~APIC_SPIV_FOCUS_DISABLED; 1398 else 1399 #endif 1400 value |= APIC_SPIV_FOCUS_DISABLED; 1401 value |= SPURIOUS_APIC_VECTOR; 1402 apic_write(APIC_SPIV, value); 1403 1404 /* 1405 * Set up the virtual wire mode. 1406 */ 1407 apic_write(APIC_LVT0, APIC_DM_EXTINT); 1408 value = APIC_DM_NMI; 1409 if (!lapic_is_integrated()) /* 82489DX */ 1410 value |= APIC_LVT_LEVEL_TRIGGER; 1411 if (apic_extnmi == APIC_EXTNMI_NONE) 1412 value |= APIC_LVT_MASKED; 1413 apic_write(APIC_LVT1, value); 1414 } 1415 1416 static void __init apic_bsp_setup(bool upmode); 1417 1418 /* Init the interrupt delivery mode for the BSP */ 1419 void __init apic_intr_mode_init(void) 1420 { 1421 bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); 1422 1423 switch (apic_intr_mode) { 1424 case APIC_PIC: 1425 pr_info("APIC: Keep in PIC mode(8259)\n"); 1426 return; 1427 case APIC_VIRTUAL_WIRE: 1428 pr_info("APIC: Switch to virtual wire mode setup\n"); 1429 break; 1430 case APIC_VIRTUAL_WIRE_NO_CONFIG: 1431 pr_info("APIC: Switch to virtual wire mode setup with no configuration\n"); 1432 upmode = true; 1433 break; 1434 case APIC_SYMMETRIC_IO: 1435 pr_info("APIC: Switch to symmetric I/O mode setup\n"); 1436 break; 1437 case APIC_SYMMETRIC_IO_NO_ROUTING: 1438 pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n"); 1439 break; 1440 } 1441 1442 default_setup_apic_routing(); 1443 1444 if (x86_platform.apic_post_init) 1445 x86_platform.apic_post_init(); 1446 1447 apic_bsp_setup(upmode); 1448 } 1449 1450 static void lapic_setup_esr(void) 1451 { 1452 unsigned int oldvalue, value, maxlvt; 1453 1454 if (!lapic_is_integrated()) { 1455 pr_info("No ESR for 82489DX.\n"); 1456 return; 1457 } 1458 1459 if (apic->disable_esr) { 1460 /* 1461 * Something untraceable is creating bad interrupts on 1462 * secondary quads ... for the moment, just leave the 1463 * ESR disabled - we can't do anything useful with the 1464 * errors anyway - mbligh 1465 */ 1466 pr_info("Leaving ESR disabled.\n"); 1467 return; 1468 } 1469 1470 maxlvt = lapic_get_maxlvt(); 1471 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 1472 apic_write(APIC_ESR, 0); 1473 oldvalue = apic_read(APIC_ESR); 1474 1475 /* enables sending errors */ 1476 value = ERROR_APIC_VECTOR; 1477 apic_write(APIC_LVTERR, value); 1478 1479 /* 1480 * spec says clear errors after enabling vector. 1481 */ 1482 if (maxlvt > 3) 1483 apic_write(APIC_ESR, 0); 1484 value = apic_read(APIC_ESR); 1485 if (value != oldvalue) 1486 apic_printk(APIC_VERBOSE, "ESR value before enabling " 1487 "vector: 0x%08x after: 0x%08x\n", 1488 oldvalue, value); 1489 } 1490 1491 #define APIC_IR_REGS APIC_ISR_NR 1492 #define APIC_IR_BITS (APIC_IR_REGS * 32) 1493 #define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG) 1494 1495 union apic_ir { 1496 unsigned long map[APIC_IR_MAPSIZE]; 1497 u32 regs[APIC_IR_REGS]; 1498 }; 1499 1500 static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr) 1501 { 1502 int i, bit; 1503 1504 /* Read the IRRs */ 1505 for (i = 0; i < APIC_IR_REGS; i++) 1506 irr->regs[i] = apic_read(APIC_IRR + i * 0x10); 1507 1508 /* Read the ISRs */ 1509 for (i = 0; i < APIC_IR_REGS; i++) 1510 isr->regs[i] = apic_read(APIC_ISR + i * 0x10); 1511 1512 /* 1513 * If the ISR map is not empty. ACK the APIC and run another round 1514 * to verify whether a pending IRR has been unblocked and turned 1515 * into a ISR. 1516 */ 1517 if (!bitmap_empty(isr->map, APIC_IR_BITS)) { 1518 /* 1519 * There can be multiple ISR bits set when a high priority 1520 * interrupt preempted a lower priority one. Issue an ACK 1521 * per set bit. 1522 */ 1523 for_each_set_bit(bit, isr->map, APIC_IR_BITS) 1524 ack_APIC_irq(); 1525 return true; 1526 } 1527 1528 return !bitmap_empty(irr->map, APIC_IR_BITS); 1529 } 1530 1531 /* 1532 * After a crash, we no longer service the interrupts and a pending 1533 * interrupt from previous kernel might still have ISR bit set. 1534 * 1535 * Most probably by now the CPU has serviced that pending interrupt and it 1536 * might not have done the ack_APIC_irq() because it thought, interrupt 1537 * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear 1538 * the ISR bit and cpu thinks it has already serviced the interrupt. Hence 1539 * a vector might get locked. It was noticed for timer irq (vector 1540 * 0x31). Issue an extra EOI to clear ISR. 1541 * 1542 * If there are pending IRR bits they turn into ISR bits after a higher 1543 * priority ISR bit has been acked. 1544 */ 1545 static void apic_pending_intr_clear(void) 1546 { 1547 union apic_ir irr, isr; 1548 unsigned int i; 1549 1550 /* 512 loops are way oversized and give the APIC a chance to obey. */ 1551 for (i = 0; i < 512; i++) { 1552 if (!apic_check_and_ack(&irr, &isr)) 1553 return; 1554 } 1555 /* Dump the IRR/ISR content if that failed */ 1556 pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map); 1557 } 1558 1559 /** 1560 * setup_local_APIC - setup the local APIC 1561 * 1562 * Used to setup local APIC while initializing BSP or bringing up APs. 1563 * Always called with preemption disabled. 1564 */ 1565 static void setup_local_APIC(void) 1566 { 1567 int cpu = smp_processor_id(); 1568 unsigned int value; 1569 1570 if (disable_apic) { 1571 disable_ioapic_support(); 1572 return; 1573 } 1574 1575 /* 1576 * If this comes from kexec/kcrash the APIC might be enabled in 1577 * SPIV. Soft disable it before doing further initialization. 1578 */ 1579 value = apic_read(APIC_SPIV); 1580 value &= ~APIC_SPIV_APIC_ENABLED; 1581 apic_write(APIC_SPIV, value); 1582 1583 #ifdef CONFIG_X86_32 1584 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1585 if (lapic_is_integrated() && apic->disable_esr) { 1586 apic_write(APIC_ESR, 0); 1587 apic_write(APIC_ESR, 0); 1588 apic_write(APIC_ESR, 0); 1589 apic_write(APIC_ESR, 0); 1590 } 1591 #endif 1592 /* 1593 * Double-check whether this APIC is really registered. 1594 * This is meaningless in clustered apic mode, so we skip it. 1595 */ 1596 BUG_ON(!apic->apic_id_registered()); 1597 1598 /* 1599 * Intel recommends to set DFR, LDR and TPR before enabling 1600 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 1601 * document number 292116). So here it goes... 1602 */ 1603 apic->init_apic_ldr(); 1604 1605 #ifdef CONFIG_X86_32 1606 if (apic->dest_mode_logical) { 1607 int logical_apicid, ldr_apicid; 1608 1609 /* 1610 * APIC LDR is initialized. If logical_apicid mapping was 1611 * initialized during get_smp_config(), make sure it matches 1612 * the actual value. 1613 */ 1614 logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); 1615 ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); 1616 if (logical_apicid != BAD_APICID) 1617 WARN_ON(logical_apicid != ldr_apicid); 1618 /* Always use the value from LDR. */ 1619 early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; 1620 } 1621 #endif 1622 1623 /* 1624 * Set Task Priority to 'accept all except vectors 0-31'. An APIC 1625 * vector in the 16-31 range could be delivered if TPR == 0, but we 1626 * would think it's an exception and terrible things will happen. We 1627 * never change this later on. 1628 */ 1629 value = apic_read(APIC_TASKPRI); 1630 value &= ~APIC_TPRI_MASK; 1631 value |= 0x10; 1632 apic_write(APIC_TASKPRI, value); 1633 1634 /* Clear eventually stale ISR/IRR bits */ 1635 apic_pending_intr_clear(); 1636 1637 /* 1638 * Now that we are all set up, enable the APIC 1639 */ 1640 value = apic_read(APIC_SPIV); 1641 value &= ~APIC_VECTOR_MASK; 1642 /* 1643 * Enable APIC 1644 */ 1645 value |= APIC_SPIV_APIC_ENABLED; 1646 1647 #ifdef CONFIG_X86_32 1648 /* 1649 * Some unknown Intel IO/APIC (or APIC) errata is biting us with 1650 * certain networking cards. If high frequency interrupts are 1651 * happening on a particular IOAPIC pin, plus the IOAPIC routing 1652 * entry is masked/unmasked at a high rate as well then sooner or 1653 * later IOAPIC line gets 'stuck', no more interrupts are received 1654 * from the device. If focus CPU is disabled then the hang goes 1655 * away, oh well :-( 1656 * 1657 * [ This bug can be reproduced easily with a level-triggered 1658 * PCI Ne2000 networking cards and PII/PIII processors, dual 1659 * BX chipset. ] 1660 */ 1661 /* 1662 * Actually disabling the focus CPU check just makes the hang less 1663 * frequent as it makes the interrupt distribution model be more 1664 * like LRU than MRU (the short-term load is more even across CPUs). 1665 */ 1666 1667 /* 1668 * - enable focus processor (bit==0) 1669 * - 64bit mode always use processor focus 1670 * so no need to set it 1671 */ 1672 value &= ~APIC_SPIV_FOCUS_DISABLED; 1673 #endif 1674 1675 /* 1676 * Set spurious IRQ vector 1677 */ 1678 value |= SPURIOUS_APIC_VECTOR; 1679 apic_write(APIC_SPIV, value); 1680 1681 perf_events_lapic_init(); 1682 1683 /* 1684 * Set up LVT0, LVT1: 1685 * 1686 * set up through-local-APIC on the boot CPU's LINT0. This is not 1687 * strictly necessary in pure symmetric-IO mode, but sometimes 1688 * we delegate interrupts to the 8259A. 1689 */ 1690 /* 1691 * TODO: set up through-local-APIC from through-I/O-APIC? --macro 1692 */ 1693 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; 1694 if (!cpu && (pic_mode || !value || skip_ioapic_setup)) { 1695 value = APIC_DM_EXTINT; 1696 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu); 1697 } else { 1698 value = APIC_DM_EXTINT | APIC_LVT_MASKED; 1699 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu); 1700 } 1701 apic_write(APIC_LVT0, value); 1702 1703 /* 1704 * Only the BSP sees the LINT1 NMI signal by default. This can be 1705 * modified by apic_extnmi= boot option. 1706 */ 1707 if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) || 1708 apic_extnmi == APIC_EXTNMI_ALL) 1709 value = APIC_DM_NMI; 1710 else 1711 value = APIC_DM_NMI | APIC_LVT_MASKED; 1712 1713 /* Is 82489DX ? */ 1714 if (!lapic_is_integrated()) 1715 value |= APIC_LVT_LEVEL_TRIGGER; 1716 apic_write(APIC_LVT1, value); 1717 1718 #ifdef CONFIG_X86_MCE_INTEL 1719 /* Recheck CMCI information after local APIC is up on CPU #0 */ 1720 if (!cpu) 1721 cmci_recheck(); 1722 #endif 1723 } 1724 1725 static void end_local_APIC_setup(void) 1726 { 1727 lapic_setup_esr(); 1728 1729 #ifdef CONFIG_X86_32 1730 { 1731 unsigned int value; 1732 /* Disable the local apic timer */ 1733 value = apic_read(APIC_LVTT); 1734 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 1735 apic_write(APIC_LVTT, value); 1736 } 1737 #endif 1738 1739 apic_pm_activate(); 1740 } 1741 1742 /* 1743 * APIC setup function for application processors. Called from smpboot.c 1744 */ 1745 void apic_ap_setup(void) 1746 { 1747 setup_local_APIC(); 1748 end_local_APIC_setup(); 1749 } 1750 1751 #ifdef CONFIG_X86_X2APIC 1752 int x2apic_mode; 1753 EXPORT_SYMBOL_GPL(x2apic_mode); 1754 1755 enum { 1756 X2APIC_OFF, 1757 X2APIC_DISABLED, 1758 /* All states below here have X2APIC enabled */ 1759 X2APIC_ON, 1760 X2APIC_ON_LOCKED 1761 }; 1762 static int x2apic_state; 1763 1764 static bool x2apic_hw_locked(void) 1765 { 1766 u64 ia32_cap; 1767 u64 msr; 1768 1769 ia32_cap = x86_read_arch_cap_msr(); 1770 if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { 1771 rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); 1772 return (msr & LEGACY_XAPIC_DISABLED); 1773 } 1774 return false; 1775 } 1776 1777 static void __x2apic_disable(void) 1778 { 1779 u64 msr; 1780 1781 if (!boot_cpu_has(X86_FEATURE_APIC)) 1782 return; 1783 1784 rdmsrl(MSR_IA32_APICBASE, msr); 1785 if (!(msr & X2APIC_ENABLE)) 1786 return; 1787 /* Disable xapic and x2apic first and then reenable xapic mode */ 1788 wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); 1789 wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); 1790 printk_once(KERN_INFO "x2apic disabled\n"); 1791 } 1792 1793 static void __x2apic_enable(void) 1794 { 1795 u64 msr; 1796 1797 rdmsrl(MSR_IA32_APICBASE, msr); 1798 if (msr & X2APIC_ENABLE) 1799 return; 1800 wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); 1801 printk_once(KERN_INFO "x2apic enabled\n"); 1802 } 1803 1804 static int __init setup_nox2apic(char *str) 1805 { 1806 if (x2apic_enabled()) { 1807 int apicid = native_apic_msr_read(APIC_ID); 1808 1809 if (apicid >= 255) { 1810 pr_warn("Apicid: %08x, cannot enforce nox2apic\n", 1811 apicid); 1812 return 0; 1813 } 1814 if (x2apic_hw_locked()) { 1815 pr_warn("APIC locked in x2apic mode, can't disable\n"); 1816 return 0; 1817 } 1818 pr_warn("x2apic already enabled.\n"); 1819 __x2apic_disable(); 1820 } 1821 setup_clear_cpu_cap(X86_FEATURE_X2APIC); 1822 x2apic_state = X2APIC_DISABLED; 1823 x2apic_mode = 0; 1824 return 0; 1825 } 1826 early_param("nox2apic", setup_nox2apic); 1827 1828 /* Called from cpu_init() to enable x2apic on (secondary) cpus */ 1829 void x2apic_setup(void) 1830 { 1831 /* 1832 * Try to make the AP's APIC state match that of the BSP, but if the 1833 * BSP is unlocked and the AP is locked then there is a state mismatch. 1834 * Warn about the mismatch in case a GP fault occurs due to a locked AP 1835 * trying to be turned off. 1836 */ 1837 if (x2apic_state != X2APIC_ON_LOCKED && x2apic_hw_locked()) 1838 pr_warn("x2apic lock mismatch between BSP and AP.\n"); 1839 /* 1840 * If x2apic is not in ON or LOCKED state, disable it if already enabled 1841 * from BIOS. 1842 */ 1843 if (x2apic_state < X2APIC_ON) { 1844 __x2apic_disable(); 1845 return; 1846 } 1847 __x2apic_enable(); 1848 } 1849 1850 static __init void x2apic_disable(void) 1851 { 1852 u32 x2apic_id, state = x2apic_state; 1853 1854 x2apic_mode = 0; 1855 x2apic_state = X2APIC_DISABLED; 1856 1857 if (state != X2APIC_ON) 1858 return; 1859 1860 x2apic_id = read_apic_id(); 1861 if (x2apic_id >= 255) 1862 panic("Cannot disable x2apic, id: %08x\n", x2apic_id); 1863 1864 if (x2apic_hw_locked()) { 1865 pr_warn("Cannot disable locked x2apic, id: %08x\n", x2apic_id); 1866 return; 1867 } 1868 1869 __x2apic_disable(); 1870 register_lapic_address(mp_lapic_addr); 1871 } 1872 1873 static __init void x2apic_enable(void) 1874 { 1875 if (x2apic_state != X2APIC_OFF) 1876 return; 1877 1878 x2apic_mode = 1; 1879 x2apic_state = X2APIC_ON; 1880 __x2apic_enable(); 1881 } 1882 1883 static __init void try_to_enable_x2apic(int remap_mode) 1884 { 1885 if (x2apic_state == X2APIC_DISABLED) 1886 return; 1887 1888 if (remap_mode != IRQ_REMAP_X2APIC_MODE) { 1889 u32 apic_limit = 255; 1890 1891 /* 1892 * Using X2APIC without IR is not architecturally supported 1893 * on bare metal but may be supported in guests. 1894 */ 1895 if (!x86_init.hyper.x2apic_available()) { 1896 pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); 1897 x2apic_disable(); 1898 return; 1899 } 1900 1901 /* 1902 * If the hypervisor supports extended destination ID in 1903 * MSI, that increases the maximum APIC ID that can be 1904 * used for non-remapped IRQ domains. 1905 */ 1906 if (x86_init.hyper.msi_ext_dest_id()) { 1907 virt_ext_dest_id = 1; 1908 apic_limit = 32767; 1909 } 1910 1911 /* 1912 * Without IR, all CPUs can be addressed by IOAPIC/MSI only 1913 * in physical mode, and CPUs with an APIC ID that cannot 1914 * be addressed must not be brought online. 1915 */ 1916 x2apic_set_max_apicid(apic_limit); 1917 x2apic_phys = 1; 1918 } 1919 x2apic_enable(); 1920 } 1921 1922 void __init check_x2apic(void) 1923 { 1924 if (x2apic_enabled()) { 1925 pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); 1926 x2apic_mode = 1; 1927 if (x2apic_hw_locked()) 1928 x2apic_state = X2APIC_ON_LOCKED; 1929 else 1930 x2apic_state = X2APIC_ON; 1931 } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) { 1932 x2apic_state = X2APIC_DISABLED; 1933 } 1934 } 1935 #else /* CONFIG_X86_X2APIC */ 1936 void __init check_x2apic(void) 1937 { 1938 if (!apic_is_x2apic_enabled()) 1939 return; 1940 /* 1941 * Checkme: Can we simply turn off x2APIC here instead of disabling the APIC? 1942 */ 1943 pr_err("Kernel does not support x2APIC, please recompile with CONFIG_X86_X2APIC.\n"); 1944 pr_err("Disabling APIC, expect reduced performance and functionality.\n"); 1945 1946 disable_apic = 1; 1947 setup_clear_cpu_cap(X86_FEATURE_APIC); 1948 } 1949 1950 static inline void try_to_enable_x2apic(int remap_mode) { } 1951 static inline void __x2apic_enable(void) { } 1952 #endif /* !CONFIG_X86_X2APIC */ 1953 1954 void __init enable_IR_x2apic(void) 1955 { 1956 unsigned long flags; 1957 int ret, ir_stat; 1958 1959 if (skip_ioapic_setup) { 1960 pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); 1961 return; 1962 } 1963 1964 ir_stat = irq_remapping_prepare(); 1965 if (ir_stat < 0 && !x2apic_supported()) 1966 return; 1967 1968 ret = save_ioapic_entries(); 1969 if (ret) { 1970 pr_info("Saving IO-APIC state failed: %d\n", ret); 1971 return; 1972 } 1973 1974 local_irq_save(flags); 1975 legacy_pic->mask_all(); 1976 mask_ioapic_entries(); 1977 1978 /* If irq_remapping_prepare() succeeded, try to enable it */ 1979 if (ir_stat >= 0) 1980 ir_stat = irq_remapping_enable(); 1981 /* ir_stat contains the remap mode or an error code */ 1982 try_to_enable_x2apic(ir_stat); 1983 1984 if (ir_stat < 0) 1985 restore_ioapic_entries(); 1986 legacy_pic->restore_mask(); 1987 local_irq_restore(flags); 1988 } 1989 1990 #ifdef CONFIG_X86_64 1991 /* 1992 * Detect and enable local APICs on non-SMP boards. 1993 * Original code written by Keir Fraser. 1994 * On AMD64 we trust the BIOS - if it says no APIC it is likely 1995 * not correctly set up (usually the APIC timer won't work etc.) 1996 */ 1997 static int __init detect_init_APIC(void) 1998 { 1999 if (!boot_cpu_has(X86_FEATURE_APIC)) { 2000 pr_info("No local APIC present\n"); 2001 return -1; 2002 } 2003 2004 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 2005 return 0; 2006 } 2007 #else 2008 2009 static int __init apic_verify(void) 2010 { 2011 u32 features, h, l; 2012 2013 /* 2014 * The APIC feature bit should now be enabled 2015 * in `cpuid' 2016 */ 2017 features = cpuid_edx(1); 2018 if (!(features & (1 << X86_FEATURE_APIC))) { 2019 pr_warn("Could not enable APIC!\n"); 2020 return -1; 2021 } 2022 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 2023 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 2024 2025 /* The BIOS may have set up the APIC at some other address */ 2026 if (boot_cpu_data.x86 >= 6) { 2027 rdmsr(MSR_IA32_APICBASE, l, h); 2028 if (l & MSR_IA32_APICBASE_ENABLE) 2029 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; 2030 } 2031 2032 pr_info("Found and enabled local APIC!\n"); 2033 return 0; 2034 } 2035 2036 int __init apic_force_enable(unsigned long addr) 2037 { 2038 u32 h, l; 2039 2040 if (disable_apic) 2041 return -1; 2042 2043 /* 2044 * Some BIOSes disable the local APIC in the APIC_BASE 2045 * MSR. This can only be done in software for Intel P6 or later 2046 * and AMD K7 (Model > 1) or later. 2047 */ 2048 if (boot_cpu_data.x86 >= 6) { 2049 rdmsr(MSR_IA32_APICBASE, l, h); 2050 if (!(l & MSR_IA32_APICBASE_ENABLE)) { 2051 pr_info("Local APIC disabled by BIOS -- reenabling.\n"); 2052 l &= ~MSR_IA32_APICBASE_BASE; 2053 l |= MSR_IA32_APICBASE_ENABLE | addr; 2054 wrmsr(MSR_IA32_APICBASE, l, h); 2055 enabled_via_apicbase = 1; 2056 } 2057 } 2058 return apic_verify(); 2059 } 2060 2061 /* 2062 * Detect and initialize APIC 2063 */ 2064 static int __init detect_init_APIC(void) 2065 { 2066 /* Disabled by kernel option? */ 2067 if (disable_apic) 2068 return -1; 2069 2070 switch (boot_cpu_data.x86_vendor) { 2071 case X86_VENDOR_AMD: 2072 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || 2073 (boot_cpu_data.x86 >= 15)) 2074 break; 2075 goto no_apic; 2076 case X86_VENDOR_HYGON: 2077 break; 2078 case X86_VENDOR_INTEL: 2079 if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || 2080 (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC))) 2081 break; 2082 goto no_apic; 2083 default: 2084 goto no_apic; 2085 } 2086 2087 if (!boot_cpu_has(X86_FEATURE_APIC)) { 2088 /* 2089 * Over-ride BIOS and try to enable the local APIC only if 2090 * "lapic" specified. 2091 */ 2092 if (!force_enable_local_apic) { 2093 pr_info("Local APIC disabled by BIOS -- " 2094 "you can enable it with \"lapic\"\n"); 2095 return -1; 2096 } 2097 if (apic_force_enable(APIC_DEFAULT_PHYS_BASE)) 2098 return -1; 2099 } else { 2100 if (apic_verify()) 2101 return -1; 2102 } 2103 2104 apic_pm_activate(); 2105 2106 return 0; 2107 2108 no_apic: 2109 pr_info("No local APIC present or hardware disabled\n"); 2110 return -1; 2111 } 2112 #endif 2113 2114 /** 2115 * init_apic_mappings - initialize APIC mappings 2116 */ 2117 void __init init_apic_mappings(void) 2118 { 2119 unsigned int new_apicid; 2120 2121 if (apic_validate_deadline_timer()) 2122 pr_info("TSC deadline timer available\n"); 2123 2124 if (x2apic_mode) { 2125 boot_cpu_physical_apicid = read_apic_id(); 2126 return; 2127 } 2128 2129 /* If no local APIC can be found return early */ 2130 if (!smp_found_config && detect_init_APIC()) { 2131 /* lets NOP'ify apic operations */ 2132 pr_info("APIC: disable apic facility\n"); 2133 apic_disable(); 2134 } else { 2135 apic_phys = mp_lapic_addr; 2136 2137 /* 2138 * If the system has ACPI MADT tables or MP info, the LAPIC 2139 * address is already registered. 2140 */ 2141 if (!acpi_lapic && !smp_found_config) 2142 register_lapic_address(apic_phys); 2143 } 2144 2145 /* 2146 * Fetch the APIC ID of the BSP in case we have a 2147 * default configuration (or the MP table is broken). 2148 */ 2149 new_apicid = read_apic_id(); 2150 if (boot_cpu_physical_apicid != new_apicid) { 2151 boot_cpu_physical_apicid = new_apicid; 2152 /* 2153 * yeah -- we lie about apic_version 2154 * in case if apic was disabled via boot option 2155 * but it's not a problem for SMP compiled kernel 2156 * since apic_intr_mode_select is prepared for such 2157 * a case and disable smp mode 2158 */ 2159 boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); 2160 } 2161 } 2162 2163 void __init register_lapic_address(unsigned long address) 2164 { 2165 mp_lapic_addr = address; 2166 2167 if (!x2apic_mode) { 2168 set_fixmap_nocache(FIX_APIC_BASE, address); 2169 apic_mmio_base = APIC_BASE; 2170 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", 2171 APIC_BASE, address); 2172 } 2173 if (boot_cpu_physical_apicid == -1U) { 2174 boot_cpu_physical_apicid = read_apic_id(); 2175 boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); 2176 } 2177 } 2178 2179 /* 2180 * Local APIC interrupts 2181 */ 2182 2183 /* 2184 * Common handling code for spurious_interrupt and spurious_vector entry 2185 * points below. No point in allowing the compiler to inline it twice. 2186 */ 2187 static noinline void handle_spurious_interrupt(u8 vector) 2188 { 2189 u32 v; 2190 2191 trace_spurious_apic_entry(vector); 2192 2193 inc_irq_stat(irq_spurious_count); 2194 2195 /* 2196 * If this is a spurious interrupt then do not acknowledge 2197 */ 2198 if (vector == SPURIOUS_APIC_VECTOR) { 2199 /* See SDM vol 3 */ 2200 pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n", 2201 smp_processor_id()); 2202 goto out; 2203 } 2204 2205 /* 2206 * If it is a vectored one, verify it's set in the ISR. If set, 2207 * acknowledge it. 2208 */ 2209 v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); 2210 if (v & (1 << (vector & 0x1f))) { 2211 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", 2212 vector, smp_processor_id()); 2213 ack_APIC_irq(); 2214 } else { 2215 pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", 2216 vector, smp_processor_id()); 2217 } 2218 out: 2219 trace_spurious_apic_exit(vector); 2220 } 2221 2222 /** 2223 * spurious_interrupt - Catch all for interrupts raised on unused vectors 2224 * @regs: Pointer to pt_regs on stack 2225 * @vector: The vector number 2226 * 2227 * This is invoked from ASM entry code to catch all interrupts which 2228 * trigger on an entry which is routed to the common_spurious idtentry 2229 * point. 2230 */ 2231 DEFINE_IDTENTRY_IRQ(spurious_interrupt) 2232 { 2233 handle_spurious_interrupt(vector); 2234 } 2235 2236 DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt) 2237 { 2238 handle_spurious_interrupt(SPURIOUS_APIC_VECTOR); 2239 } 2240 2241 /* 2242 * This interrupt should never happen with our APIC/SMP architecture 2243 */ 2244 DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt) 2245 { 2246 static const char * const error_interrupt_reason[] = { 2247 "Send CS error", /* APIC Error Bit 0 */ 2248 "Receive CS error", /* APIC Error Bit 1 */ 2249 "Send accept error", /* APIC Error Bit 2 */ 2250 "Receive accept error", /* APIC Error Bit 3 */ 2251 "Redirectable IPI", /* APIC Error Bit 4 */ 2252 "Send illegal vector", /* APIC Error Bit 5 */ 2253 "Received illegal vector", /* APIC Error Bit 6 */ 2254 "Illegal register address", /* APIC Error Bit 7 */ 2255 }; 2256 u32 v, i = 0; 2257 2258 trace_error_apic_entry(ERROR_APIC_VECTOR); 2259 2260 /* First tickle the hardware, only then report what went on. -- REW */ 2261 if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ 2262 apic_write(APIC_ESR, 0); 2263 v = apic_read(APIC_ESR); 2264 ack_APIC_irq(); 2265 atomic_inc(&irq_err_count); 2266 2267 apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x", 2268 smp_processor_id(), v); 2269 2270 v &= 0xff; 2271 while (v) { 2272 if (v & 0x1) 2273 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); 2274 i++; 2275 v >>= 1; 2276 } 2277 2278 apic_printk(APIC_DEBUG, KERN_CONT "\n"); 2279 2280 trace_error_apic_exit(ERROR_APIC_VECTOR); 2281 } 2282 2283 /** 2284 * connect_bsp_APIC - attach the APIC to the interrupt system 2285 */ 2286 static void __init connect_bsp_APIC(void) 2287 { 2288 #ifdef CONFIG_X86_32 2289 if (pic_mode) { 2290 /* 2291 * Do not trust the local APIC being empty at bootup. 2292 */ 2293 clear_local_APIC(); 2294 /* 2295 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's 2296 * local APIC to INT and NMI lines. 2297 */ 2298 apic_printk(APIC_VERBOSE, "leaving PIC mode, " 2299 "enabling APIC mode.\n"); 2300 imcr_pic_to_apic(); 2301 } 2302 #endif 2303 } 2304 2305 /** 2306 * disconnect_bsp_APIC - detach the APIC from the interrupt system 2307 * @virt_wire_setup: indicates, whether virtual wire mode is selected 2308 * 2309 * Virtual wire mode is necessary to deliver legacy interrupts even when the 2310 * APIC is disabled. 2311 */ 2312 void disconnect_bsp_APIC(int virt_wire_setup) 2313 { 2314 unsigned int value; 2315 2316 #ifdef CONFIG_X86_32 2317 if (pic_mode) { 2318 /* 2319 * Put the board back into PIC mode (has an effect only on 2320 * certain older boards). Note that APIC interrupts, including 2321 * IPIs, won't work beyond this point! The only exception are 2322 * INIT IPIs. 2323 */ 2324 apic_printk(APIC_VERBOSE, "disabling APIC mode, " 2325 "entering PIC mode.\n"); 2326 imcr_apic_to_pic(); 2327 return; 2328 } 2329 #endif 2330 2331 /* Go back to Virtual Wire compatibility mode */ 2332 2333 /* For the spurious interrupt use vector F, and enable it */ 2334 value = apic_read(APIC_SPIV); 2335 value &= ~APIC_VECTOR_MASK; 2336 value |= APIC_SPIV_APIC_ENABLED; 2337 value |= 0xf; 2338 apic_write(APIC_SPIV, value); 2339 2340 if (!virt_wire_setup) { 2341 /* 2342 * For LVT0 make it edge triggered, active high, 2343 * external and enabled 2344 */ 2345 value = apic_read(APIC_LVT0); 2346 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 2347 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 2348 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 2349 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 2350 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); 2351 apic_write(APIC_LVT0, value); 2352 } else { 2353 /* Disable LVT0 */ 2354 apic_write(APIC_LVT0, APIC_LVT_MASKED); 2355 } 2356 2357 /* 2358 * For LVT1 make it edge triggered, active high, 2359 * nmi and enabled 2360 */ 2361 value = apic_read(APIC_LVT1); 2362 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 2363 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 2364 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 2365 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 2366 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); 2367 apic_write(APIC_LVT1, value); 2368 } 2369 2370 /* 2371 * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated 2372 * contiguously, it equals to current allocated max logical CPU ID plus 1. 2373 * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, 2374 * so the maximum of nr_logical_cpuids is nr_cpu_ids. 2375 * 2376 * NOTE: Reserve 0 for BSP. 2377 */ 2378 static int nr_logical_cpuids = 1; 2379 2380 /* 2381 * Used to store mapping between logical CPU IDs and APIC IDs. 2382 */ 2383 int cpuid_to_apicid[] = { 2384 [0 ... NR_CPUS - 1] = -1, 2385 }; 2386 2387 bool arch_match_cpu_phys_id(int cpu, u64 phys_id) 2388 { 2389 return phys_id == cpuid_to_apicid[cpu]; 2390 } 2391 2392 #ifdef CONFIG_SMP 2393 static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) 2394 { 2395 /* Isolate the SMT bit(s) in the APICID and check for 0 */ 2396 u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; 2397 2398 if (smp_num_siblings == 1 || !(apicid & mask)) 2399 cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); 2400 } 2401 2402 /* 2403 * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid 2404 * during early boot. Initialize the primary thread mask before SMP 2405 * bringup. 2406 */ 2407 static int __init smp_init_primary_thread_mask(void) 2408 { 2409 unsigned int cpu; 2410 2411 for (cpu = 0; cpu < nr_logical_cpuids; cpu++) 2412 cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); 2413 return 0; 2414 } 2415 early_initcall(smp_init_primary_thread_mask); 2416 #else 2417 static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } 2418 #endif 2419 2420 /* 2421 * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids 2422 * and cpuid_to_apicid[] synchronized. 2423 */ 2424 static int allocate_logical_cpuid(int apicid) 2425 { 2426 int i; 2427 2428 /* 2429 * cpuid <-> apicid mapping is persistent, so when a cpu is up, 2430 * check if the kernel has allocated a cpuid for it. 2431 */ 2432 for (i = 0; i < nr_logical_cpuids; i++) { 2433 if (cpuid_to_apicid[i] == apicid) 2434 return i; 2435 } 2436 2437 /* Allocate a new cpuid. */ 2438 if (nr_logical_cpuids >= nr_cpu_ids) { 2439 WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " 2440 "Processor %d/0x%x and the rest are ignored.\n", 2441 nr_cpu_ids, nr_logical_cpuids, apicid); 2442 return -EINVAL; 2443 } 2444 2445 cpuid_to_apicid[nr_logical_cpuids] = apicid; 2446 return nr_logical_cpuids++; 2447 } 2448 2449 int generic_processor_info(int apicid, int version) 2450 { 2451 int cpu, max = nr_cpu_ids; 2452 bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, 2453 phys_cpu_present_map); 2454 2455 /* 2456 * boot_cpu_physical_apicid is designed to have the apicid 2457 * returned by read_apic_id(), i.e, the apicid of the 2458 * currently booting-up processor. However, on some platforms, 2459 * it is temporarily modified by the apicid reported as BSP 2460 * through MP table. Concretely: 2461 * 2462 * - arch/x86/kernel/mpparse.c: MP_processor_info() 2463 * - arch/x86/mm/amdtopology.c: amd_numa_init() 2464 * 2465 * This function is executed with the modified 2466 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel 2467 * parameter doesn't work to disable APs on kdump 2nd kernel. 2468 * 2469 * Since fixing handling of boot_cpu_physical_apicid requires 2470 * another discussion and tests on each platform, we leave it 2471 * for now and here we use read_apic_id() directly in this 2472 * function, generic_processor_info(). 2473 */ 2474 if (disabled_cpu_apicid != BAD_APICID && 2475 disabled_cpu_apicid != read_apic_id() && 2476 disabled_cpu_apicid == apicid) { 2477 int thiscpu = num_processors + disabled_cpus; 2478 2479 pr_warn("APIC: Disabling requested cpu." 2480 " Processor %d/0x%x ignored.\n", thiscpu, apicid); 2481 2482 disabled_cpus++; 2483 return -ENODEV; 2484 } 2485 2486 /* 2487 * If boot cpu has not been detected yet, then only allow upto 2488 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu 2489 */ 2490 if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 && 2491 apicid != boot_cpu_physical_apicid) { 2492 int thiscpu = max + disabled_cpus - 1; 2493 2494 pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost" 2495 " reached. Keeping one slot for boot cpu." 2496 " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); 2497 2498 disabled_cpus++; 2499 return -ENODEV; 2500 } 2501 2502 if (num_processors >= nr_cpu_ids) { 2503 int thiscpu = max + disabled_cpus; 2504 2505 pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " 2506 "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); 2507 2508 disabled_cpus++; 2509 return -EINVAL; 2510 } 2511 2512 if (apicid == boot_cpu_physical_apicid) { 2513 /* 2514 * x86_bios_cpu_apicid is required to have processors listed 2515 * in same order as logical cpu numbers. Hence the first 2516 * entry is BSP, and so on. 2517 * boot_cpu_init() already hold bit 0 in cpu_present_mask 2518 * for BSP. 2519 */ 2520 cpu = 0; 2521 2522 /* Logical cpuid 0 is reserved for BSP. */ 2523 cpuid_to_apicid[0] = apicid; 2524 } else { 2525 cpu = allocate_logical_cpuid(apicid); 2526 if (cpu < 0) { 2527 disabled_cpus++; 2528 return -EINVAL; 2529 } 2530 } 2531 2532 /* 2533 * Validate version 2534 */ 2535 if (version == 0x0) { 2536 pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", 2537 cpu, apicid); 2538 version = 0x10; 2539 } 2540 2541 if (version != boot_cpu_apic_version) { 2542 pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", 2543 boot_cpu_apic_version, cpu, version); 2544 } 2545 2546 if (apicid > max_physical_apicid) 2547 max_physical_apicid = apicid; 2548 2549 #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) 2550 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; 2551 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 2552 #endif 2553 #ifdef CONFIG_X86_32 2554 early_per_cpu(x86_cpu_to_logical_apicid, cpu) = 2555 apic->x86_32_early_logical_apicid(cpu); 2556 #endif 2557 set_cpu_possible(cpu, true); 2558 physid_set(apicid, phys_cpu_present_map); 2559 set_cpu_present(cpu, true); 2560 num_processors++; 2561 2562 if (system_state != SYSTEM_BOOTING) 2563 cpu_mark_primary_thread(cpu, apicid); 2564 2565 return cpu; 2566 } 2567 2568 int hard_smp_processor_id(void) 2569 { 2570 return read_apic_id(); 2571 } 2572 2573 void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, 2574 bool dmar) 2575 { 2576 memset(msg, 0, sizeof(*msg)); 2577 2578 msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; 2579 msg->arch_addr_lo.dest_mode_logical = apic->dest_mode_logical; 2580 msg->arch_addr_lo.destid_0_7 = cfg->dest_apicid & 0xFF; 2581 2582 msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_FIXED; 2583 msg->arch_data.vector = cfg->vector; 2584 2585 msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; 2586 /* 2587 * Only the IOMMU itself can use the trick of putting destination 2588 * APIC ID into the high bits of the address. Anything else would 2589 * just be writing to memory if it tried that, and needs IR to 2590 * address APICs which can't be addressed in the normal 32-bit 2591 * address range at 0xFFExxxxx. That is typically just 8 bits, but 2592 * some hypervisors allow the extended destination ID field in bits 2593 * 5-11 to be used, giving support for 15 bits of APIC IDs in total. 2594 */ 2595 if (dmar) 2596 msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8; 2597 else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000) 2598 msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8; 2599 else 2600 WARN_ON_ONCE(cfg->dest_apicid > 0xFF); 2601 } 2602 2603 u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid) 2604 { 2605 u32 dest = msg->arch_addr_lo.destid_0_7; 2606 2607 if (extid) 2608 dest |= msg->arch_addr_hi.destid_8_31 << 8; 2609 return dest; 2610 } 2611 EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid); 2612 2613 #ifdef CONFIG_X86_64 2614 void __init acpi_wake_cpu_handler_update(wakeup_cpu_handler handler) 2615 { 2616 struct apic **drv; 2617 2618 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) 2619 (*drv)->wakeup_secondary_cpu_64 = handler; 2620 } 2621 #endif 2622 2623 /* 2624 * Override the generic EOI implementation with an optimized version. 2625 * Only called during early boot when only one CPU is active and with 2626 * interrupts disabled, so we know this does not race with actual APIC driver 2627 * use. 2628 */ 2629 void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) 2630 { 2631 struct apic **drv; 2632 2633 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { 2634 /* Should happen once for each apic */ 2635 WARN_ON((*drv)->eoi_write == eoi_write); 2636 (*drv)->native_eoi_write = (*drv)->eoi_write; 2637 (*drv)->eoi_write = eoi_write; 2638 } 2639 } 2640 2641 static void __init apic_bsp_up_setup(void) 2642 { 2643 #ifdef CONFIG_X86_64 2644 apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid)); 2645 #else 2646 /* 2647 * Hack: In case of kdump, after a crash, kernel might be booting 2648 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid 2649 * might be zero if read from MP tables. Get it from LAPIC. 2650 */ 2651 # ifdef CONFIG_CRASH_DUMP 2652 boot_cpu_physical_apicid = read_apic_id(); 2653 # endif 2654 #endif 2655 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 2656 } 2657 2658 /** 2659 * apic_bsp_setup - Setup function for local apic and io-apic 2660 * @upmode: Force UP mode (for APIC_init_uniprocessor) 2661 */ 2662 static void __init apic_bsp_setup(bool upmode) 2663 { 2664 connect_bsp_APIC(); 2665 if (upmode) 2666 apic_bsp_up_setup(); 2667 setup_local_APIC(); 2668 2669 enable_IO_APIC(); 2670 end_local_APIC_setup(); 2671 irq_remap_enable_fault_handling(); 2672 setup_IO_APIC(); 2673 lapic_update_legacy_vectors(); 2674 } 2675 2676 #ifdef CONFIG_UP_LATE_INIT 2677 void __init up_late_init(void) 2678 { 2679 if (apic_intr_mode == APIC_PIC) 2680 return; 2681 2682 /* Setup local timer */ 2683 x86_init.timers.setup_percpu_clockev(); 2684 } 2685 #endif 2686 2687 /* 2688 * Power management 2689 */ 2690 #ifdef CONFIG_PM 2691 2692 static struct { 2693 /* 2694 * 'active' is true if the local APIC was enabled by us and 2695 * not the BIOS; this signifies that we are also responsible 2696 * for disabling it before entering apm/acpi suspend 2697 */ 2698 int active; 2699 /* r/w apic fields */ 2700 unsigned int apic_id; 2701 unsigned int apic_taskpri; 2702 unsigned int apic_ldr; 2703 unsigned int apic_dfr; 2704 unsigned int apic_spiv; 2705 unsigned int apic_lvtt; 2706 unsigned int apic_lvtpc; 2707 unsigned int apic_lvt0; 2708 unsigned int apic_lvt1; 2709 unsigned int apic_lvterr; 2710 unsigned int apic_tmict; 2711 unsigned int apic_tdcr; 2712 unsigned int apic_thmr; 2713 unsigned int apic_cmci; 2714 } apic_pm_state; 2715 2716 static int lapic_suspend(void) 2717 { 2718 unsigned long flags; 2719 int maxlvt; 2720 2721 if (!apic_pm_state.active) 2722 return 0; 2723 2724 maxlvt = lapic_get_maxlvt(); 2725 2726 apic_pm_state.apic_id = apic_read(APIC_ID); 2727 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 2728 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 2729 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 2730 apic_pm_state.apic_spiv = apic_read(APIC_SPIV); 2731 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); 2732 if (maxlvt >= 4) 2733 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); 2734 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); 2735 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); 2736 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 2737 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 2738 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 2739 #ifdef CONFIG_X86_THERMAL_VECTOR 2740 if (maxlvt >= 5) 2741 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 2742 #endif 2743 #ifdef CONFIG_X86_MCE_INTEL 2744 if (maxlvt >= 6) 2745 apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI); 2746 #endif 2747 2748 local_irq_save(flags); 2749 2750 /* 2751 * Mask IOAPIC before disabling the local APIC to prevent stale IRR 2752 * entries on some implementations. 2753 */ 2754 mask_ioapic_entries(); 2755 2756 disable_local_APIC(); 2757 2758 irq_remapping_disable(); 2759 2760 local_irq_restore(flags); 2761 return 0; 2762 } 2763 2764 static void lapic_resume(void) 2765 { 2766 unsigned int l, h; 2767 unsigned long flags; 2768 int maxlvt; 2769 2770 if (!apic_pm_state.active) 2771 return; 2772 2773 local_irq_save(flags); 2774 2775 /* 2776 * IO-APIC and PIC have their own resume routines. 2777 * We just mask them here to make sure the interrupt 2778 * subsystem is completely quiet while we enable x2apic 2779 * and interrupt-remapping. 2780 */ 2781 mask_ioapic_entries(); 2782 legacy_pic->mask_all(); 2783 2784 if (x2apic_mode) { 2785 __x2apic_enable(); 2786 } else { 2787 /* 2788 * Make sure the APICBASE points to the right address 2789 * 2790 * FIXME! This will be wrong if we ever support suspend on 2791 * SMP! We'll need to do this as part of the CPU restore! 2792 */ 2793 if (boot_cpu_data.x86 >= 6) { 2794 rdmsr(MSR_IA32_APICBASE, l, h); 2795 l &= ~MSR_IA32_APICBASE_BASE; 2796 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 2797 wrmsr(MSR_IA32_APICBASE, l, h); 2798 } 2799 } 2800 2801 maxlvt = lapic_get_maxlvt(); 2802 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 2803 apic_write(APIC_ID, apic_pm_state.apic_id); 2804 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 2805 apic_write(APIC_LDR, apic_pm_state.apic_ldr); 2806 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); 2807 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 2808 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 2809 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 2810 #ifdef CONFIG_X86_THERMAL_VECTOR 2811 if (maxlvt >= 5) 2812 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 2813 #endif 2814 #ifdef CONFIG_X86_MCE_INTEL 2815 if (maxlvt >= 6) 2816 apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci); 2817 #endif 2818 if (maxlvt >= 4) 2819 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); 2820 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); 2821 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); 2822 apic_write(APIC_TMICT, apic_pm_state.apic_tmict); 2823 apic_write(APIC_ESR, 0); 2824 apic_read(APIC_ESR); 2825 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 2826 apic_write(APIC_ESR, 0); 2827 apic_read(APIC_ESR); 2828 2829 irq_remapping_reenable(x2apic_mode); 2830 2831 local_irq_restore(flags); 2832 } 2833 2834 /* 2835 * This device has no shutdown method - fully functioning local APICs 2836 * are needed on every CPU up until machine_halt/restart/poweroff. 2837 */ 2838 2839 static struct syscore_ops lapic_syscore_ops = { 2840 .resume = lapic_resume, 2841 .suspend = lapic_suspend, 2842 }; 2843 2844 static void apic_pm_activate(void) 2845 { 2846 apic_pm_state.active = 1; 2847 } 2848 2849 static int __init init_lapic_sysfs(void) 2850 { 2851 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ 2852 if (boot_cpu_has(X86_FEATURE_APIC)) 2853 register_syscore_ops(&lapic_syscore_ops); 2854 2855 return 0; 2856 } 2857 2858 /* local apic needs to resume before other devices access its registers. */ 2859 core_initcall(init_lapic_sysfs); 2860 2861 #else /* CONFIG_PM */ 2862 2863 static void apic_pm_activate(void) { } 2864 2865 #endif /* CONFIG_PM */ 2866 2867 #ifdef CONFIG_X86_64 2868 2869 static int multi_checked; 2870 static int multi; 2871 2872 static int set_multi(const struct dmi_system_id *d) 2873 { 2874 if (multi) 2875 return 0; 2876 pr_info("APIC: %s detected, Multi Chassis\n", d->ident); 2877 multi = 1; 2878 return 0; 2879 } 2880 2881 static const struct dmi_system_id multi_dmi_table[] = { 2882 { 2883 .callback = set_multi, 2884 .ident = "IBM System Summit2", 2885 .matches = { 2886 DMI_MATCH(DMI_SYS_VENDOR, "IBM"), 2887 DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"), 2888 }, 2889 }, 2890 {} 2891 }; 2892 2893 static void dmi_check_multi(void) 2894 { 2895 if (multi_checked) 2896 return; 2897 2898 dmi_check_system(multi_dmi_table); 2899 multi_checked = 1; 2900 } 2901 2902 /* 2903 * apic_is_clustered_box() -- Check if we can expect good TSC 2904 * 2905 * Thus far, the major user of this is IBM's Summit2 series: 2906 * Clustered boxes may have unsynced TSC problems if they are 2907 * multi-chassis. 2908 * Use DMI to check them 2909 */ 2910 int apic_is_clustered_box(void) 2911 { 2912 dmi_check_multi(); 2913 return multi; 2914 } 2915 #endif 2916 2917 /* 2918 * APIC command line parameters 2919 */ 2920 static int __init setup_disableapic(char *arg) 2921 { 2922 disable_apic = 1; 2923 setup_clear_cpu_cap(X86_FEATURE_APIC); 2924 return 0; 2925 } 2926 early_param("disableapic", setup_disableapic); 2927 2928 /* same as disableapic, for compatibility */ 2929 static int __init setup_nolapic(char *arg) 2930 { 2931 return setup_disableapic(arg); 2932 } 2933 early_param("nolapic", setup_nolapic); 2934 2935 static int __init parse_lapic_timer_c2_ok(char *arg) 2936 { 2937 local_apic_timer_c2_ok = 1; 2938 return 0; 2939 } 2940 early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 2941 2942 static int __init parse_disable_apic_timer(char *arg) 2943 { 2944 disable_apic_timer = 1; 2945 return 0; 2946 } 2947 early_param("noapictimer", parse_disable_apic_timer); 2948 2949 static int __init parse_nolapic_timer(char *arg) 2950 { 2951 disable_apic_timer = 1; 2952 return 0; 2953 } 2954 early_param("nolapic_timer", parse_nolapic_timer); 2955 2956 static int __init apic_set_verbosity(char *arg) 2957 { 2958 if (!arg) { 2959 #ifdef CONFIG_X86_64 2960 skip_ioapic_setup = 0; 2961 return 0; 2962 #endif 2963 return -EINVAL; 2964 } 2965 2966 if (strcmp("debug", arg) == 0) 2967 apic_verbosity = APIC_DEBUG; 2968 else if (strcmp("verbose", arg) == 0) 2969 apic_verbosity = APIC_VERBOSE; 2970 #ifdef CONFIG_X86_64 2971 else { 2972 pr_warn("APIC Verbosity level %s not recognised" 2973 " use apic=verbose or apic=debug\n", arg); 2974 return -EINVAL; 2975 } 2976 #endif 2977 2978 return 0; 2979 } 2980 early_param("apic", apic_set_verbosity); 2981 2982 static int __init lapic_insert_resource(void) 2983 { 2984 if (!apic_phys) 2985 return -1; 2986 2987 /* Put local APIC into the resource map. */ 2988 lapic_resource.start = apic_phys; 2989 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; 2990 insert_resource(&iomem_resource, &lapic_resource); 2991 2992 return 0; 2993 } 2994 2995 /* 2996 * need call insert after e820__reserve_resources() 2997 * that is using request_resource 2998 */ 2999 late_initcall(lapic_insert_resource); 3000 3001 static int __init apic_set_disabled_cpu_apicid(char *arg) 3002 { 3003 if (!arg || !get_option(&arg, &disabled_cpu_apicid)) 3004 return -EINVAL; 3005 3006 return 0; 3007 } 3008 early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); 3009 3010 static int __init apic_set_extnmi(char *arg) 3011 { 3012 if (!arg) 3013 return -EINVAL; 3014 3015 if (!strncmp("all", arg, 3)) 3016 apic_extnmi = APIC_EXTNMI_ALL; 3017 else if (!strncmp("none", arg, 4)) 3018 apic_extnmi = APIC_EXTNMI_NONE; 3019 else if (!strncmp("bsp", arg, 3)) 3020 apic_extnmi = APIC_EXTNMI_BSP; 3021 else { 3022 pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg); 3023 return -EINVAL; 3024 } 3025 3026 return 0; 3027 } 3028 early_param("apic_extnmi", apic_set_extnmi); 3029