1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Intel specific MCE features. 4 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 5 * Copyright (C) 2008, 2009 Intel Corporation 6 * Author: Andi Kleen 7 */ 8 9 #include <linux/gfp.h> 10 #include <linux/interrupt.h> 11 #include <linux/percpu.h> 12 #include <linux/sched.h> 13 #include <linux/cpumask.h> 14 #include <asm/apic.h> 15 #include <asm/cpufeature.h> 16 #include <asm/intel-family.h> 17 #include <asm/processor.h> 18 #include <asm/msr.h> 19 #include <asm/mce.h> 20 21 #include "internal.h" 22 23 /* 24 * Support for Intel Correct Machine Check Interrupts. This allows 25 * the CPU to raise an interrupt when a corrected machine check happened. 26 * Normally we pick those up using a regular polling timer. 27 * Also supports reliable discovery of shared banks. 28 */ 29 30 /* 31 * CMCI can be delivered to multiple cpus that share a machine check bank 32 * so we need to designate a single cpu to process errors logged in each bank 33 * in the interrupt handler (otherwise we would have many races and potential 34 * double reporting of the same error). 35 * Note that this can change when a cpu is offlined or brought online since 36 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() 37 * disables CMCI on all banks owned by the cpu and clears this bitfield. At 38 * this point, cmci_rediscover() kicks in and a different cpu may end up 39 * taking ownership of some of the shared MCA banks that were previously 40 * owned by the offlined cpu. 41 */ 42 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 43 44 /* 45 * CMCI storm detection backoff counter 46 * 47 * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've 48 * encountered an error. If not, we decrement it by one. We signal the end of 49 * the CMCI storm when it reaches 0. 50 */ 51 static DEFINE_PER_CPU(int, cmci_backoff_cnt); 52 53 /* 54 * cmci_discover_lock protects against parallel discovery attempts 55 * which could race against each other. 56 */ 57 static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 58 59 #define CMCI_THRESHOLD 1 60 #define CMCI_POLL_INTERVAL (30 * HZ) 61 #define CMCI_STORM_INTERVAL (HZ) 62 #define CMCI_STORM_THRESHOLD 15 63 64 static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); 65 static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); 66 static DEFINE_PER_CPU(unsigned int, cmci_storm_state); 67 68 enum { 69 CMCI_STORM_NONE, 70 CMCI_STORM_ACTIVE, 71 CMCI_STORM_SUBSIDED, 72 }; 73 74 static atomic_t cmci_storm_on_cpus; 75 76 static int cmci_supported(int *banks) 77 { 78 u64 cap; 79 80 if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) 81 return 0; 82 83 /* 84 * Vendor check is not strictly needed, but the initial 85 * initialization is vendor keyed and this 86 * makes sure none of the backdoors are entered otherwise. 87 */ 88 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && 89 boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) 90 return 0; 91 92 if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) 93 return 0; 94 rdmsrl(MSR_IA32_MCG_CAP, cap); 95 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); 96 return !!(cap & MCG_CMCI_P); 97 } 98 99 static bool lmce_supported(void) 100 { 101 u64 tmp; 102 103 if (mca_cfg.lmce_disabled) 104 return false; 105 106 rdmsrl(MSR_IA32_MCG_CAP, tmp); 107 108 /* 109 * LMCE depends on recovery support in the processor. Hence both 110 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. 111 */ 112 if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != 113 (MCG_SER_P | MCG_LMCE_P)) 114 return false; 115 116 /* 117 * BIOS should indicate support for LMCE by setting bit 20 in 118 * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP 119 * fault. The MSR must also be locked for LMCE_ENABLED to take effect. 120 * WARN if the MSR isn't locked as init_ia32_feat_ctl() unconditionally 121 * locks the MSR in the event that it wasn't already locked by BIOS. 122 */ 123 rdmsrl(MSR_IA32_FEAT_CTL, tmp); 124 if (WARN_ON_ONCE(!(tmp & FEAT_CTL_LOCKED))) 125 return false; 126 127 return tmp & FEAT_CTL_LMCE_ENABLED; 128 } 129 130 bool mce_intel_cmci_poll(void) 131 { 132 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) 133 return false; 134 135 /* 136 * Reset the counter if we've logged an error in the last poll 137 * during the storm. 138 */ 139 if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned))) 140 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 141 else 142 this_cpu_dec(cmci_backoff_cnt); 143 144 return true; 145 } 146 147 void mce_intel_hcpu_update(unsigned long cpu) 148 { 149 if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) 150 atomic_dec(&cmci_storm_on_cpus); 151 152 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; 153 } 154 155 static void cmci_toggle_interrupt_mode(bool on) 156 { 157 unsigned long flags, *owned; 158 int bank; 159 u64 val; 160 161 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 162 owned = this_cpu_ptr(mce_banks_owned); 163 for_each_set_bit(bank, owned, MAX_NR_BANKS) { 164 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 165 166 if (on) 167 val |= MCI_CTL2_CMCI_EN; 168 else 169 val &= ~MCI_CTL2_CMCI_EN; 170 171 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 172 } 173 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 174 } 175 176 unsigned long cmci_intel_adjust_timer(unsigned long interval) 177 { 178 if ((this_cpu_read(cmci_backoff_cnt) > 0) && 179 (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) { 180 mce_notify_irq(); 181 return CMCI_STORM_INTERVAL; 182 } 183 184 switch (__this_cpu_read(cmci_storm_state)) { 185 case CMCI_STORM_ACTIVE: 186 187 /* 188 * We switch back to interrupt mode once the poll timer has 189 * silenced itself. That means no events recorded and the timer 190 * interval is back to our poll interval. 191 */ 192 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); 193 if (!atomic_sub_return(1, &cmci_storm_on_cpus)) 194 pr_notice("CMCI storm subsided: switching to interrupt mode\n"); 195 196 /* FALLTHROUGH */ 197 198 case CMCI_STORM_SUBSIDED: 199 /* 200 * We wait for all CPUs to go back to SUBSIDED state. When that 201 * happens we switch back to interrupt mode. 202 */ 203 if (!atomic_read(&cmci_storm_on_cpus)) { 204 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); 205 cmci_toggle_interrupt_mode(true); 206 cmci_recheck(); 207 } 208 return CMCI_POLL_INTERVAL; 209 default: 210 211 /* We have shiny weather. Let the poll do whatever it thinks. */ 212 return interval; 213 } 214 } 215 216 static bool cmci_storm_detect(void) 217 { 218 unsigned int cnt = __this_cpu_read(cmci_storm_cnt); 219 unsigned long ts = __this_cpu_read(cmci_time_stamp); 220 unsigned long now = jiffies; 221 int r; 222 223 if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) 224 return true; 225 226 if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { 227 cnt++; 228 } else { 229 cnt = 1; 230 __this_cpu_write(cmci_time_stamp, now); 231 } 232 __this_cpu_write(cmci_storm_cnt, cnt); 233 234 if (cnt <= CMCI_STORM_THRESHOLD) 235 return false; 236 237 cmci_toggle_interrupt_mode(false); 238 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 239 r = atomic_add_return(1, &cmci_storm_on_cpus); 240 mce_timer_kick(CMCI_STORM_INTERVAL); 241 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 242 243 if (r == 1) 244 pr_notice("CMCI storm detected: switching to poll mode\n"); 245 return true; 246 } 247 248 /* 249 * The interrupt handler. This is called on every event. 250 * Just call the poller directly to log any events. 251 * This could in theory increase the threshold under high load, 252 * but doesn't for now. 253 */ 254 static void intel_threshold_interrupt(void) 255 { 256 if (cmci_storm_detect()) 257 return; 258 259 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 260 } 261 262 /* 263 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 264 * on this CPU. Use the algorithm recommended in the SDM to discover shared 265 * banks. 266 */ 267 static void cmci_discover(int banks) 268 { 269 unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); 270 unsigned long flags; 271 int i; 272 int bios_wrong_thresh = 0; 273 274 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 275 for (i = 0; i < banks; i++) { 276 u64 val; 277 int bios_zero_thresh = 0; 278 279 if (test_bit(i, owned)) 280 continue; 281 282 /* Skip banks in firmware first mode */ 283 if (test_bit(i, mce_banks_ce_disabled)) 284 continue; 285 286 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 287 288 /* Already owned by someone else? */ 289 if (val & MCI_CTL2_CMCI_EN) { 290 clear_bit(i, owned); 291 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 292 continue; 293 } 294 295 if (!mca_cfg.bios_cmci_threshold) { 296 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 297 val |= CMCI_THRESHOLD; 298 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { 299 /* 300 * If bios_cmci_threshold boot option was specified 301 * but the threshold is zero, we'll try to initialize 302 * it to 1. 303 */ 304 bios_zero_thresh = 1; 305 val |= CMCI_THRESHOLD; 306 } 307 308 val |= MCI_CTL2_CMCI_EN; 309 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 310 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 311 312 /* Did the enable bit stick? -- the bank supports CMCI */ 313 if (val & MCI_CTL2_CMCI_EN) { 314 set_bit(i, owned); 315 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 316 /* 317 * We are able to set thresholds for some banks that 318 * had a threshold of 0. This means the BIOS has not 319 * set the thresholds properly or does not work with 320 * this boot option. Note down now and report later. 321 */ 322 if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && 323 (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) 324 bios_wrong_thresh = 1; 325 } else { 326 WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); 327 } 328 } 329 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 330 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { 331 pr_info_once( 332 "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 333 pr_info_once( 334 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); 335 } 336 } 337 338 /* 339 * Just in case we missed an event during initialization check 340 * all the CMCI owned banks. 341 */ 342 void cmci_recheck(void) 343 { 344 unsigned long flags; 345 int banks; 346 347 if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) 348 return; 349 350 local_irq_save(flags); 351 machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)); 352 local_irq_restore(flags); 353 } 354 355 /* Caller must hold the lock on cmci_discover_lock */ 356 static void __cmci_disable_bank(int bank) 357 { 358 u64 val; 359 360 if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) 361 return; 362 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 363 val &= ~MCI_CTL2_CMCI_EN; 364 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 365 __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); 366 } 367 368 /* 369 * Disable CMCI on this CPU for all banks it owns when it goes down. 370 * This allows other CPUs to claim the banks on rediscovery. 371 */ 372 void cmci_clear(void) 373 { 374 unsigned long flags; 375 int i; 376 int banks; 377 378 if (!cmci_supported(&banks)) 379 return; 380 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 381 for (i = 0; i < banks; i++) 382 __cmci_disable_bank(i); 383 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 384 } 385 386 static void cmci_rediscover_work_func(void *arg) 387 { 388 int banks; 389 390 /* Recheck banks in case CPUs don't all have the same */ 391 if (cmci_supported(&banks)) 392 cmci_discover(banks); 393 } 394 395 /* After a CPU went down cycle through all the others and rediscover */ 396 void cmci_rediscover(void) 397 { 398 int banks; 399 400 if (!cmci_supported(&banks)) 401 return; 402 403 on_each_cpu(cmci_rediscover_work_func, NULL, 1); 404 } 405 406 /* 407 * Reenable CMCI on this CPU in case a CPU down failed. 408 */ 409 void cmci_reenable(void) 410 { 411 int banks; 412 if (cmci_supported(&banks)) 413 cmci_discover(banks); 414 } 415 416 void cmci_disable_bank(int bank) 417 { 418 int banks; 419 unsigned long flags; 420 421 if (!cmci_supported(&banks)) 422 return; 423 424 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 425 __cmci_disable_bank(bank); 426 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 427 } 428 429 void intel_init_cmci(void) 430 { 431 int banks; 432 433 if (!cmci_supported(&banks)) 434 return; 435 436 mce_threshold_vector = intel_threshold_interrupt; 437 cmci_discover(banks); 438 /* 439 * For CPU #0 this runs with still disabled APIC, but that's 440 * ok because only the vector is set up. We still do another 441 * check for the banks later for CPU #0 just to make sure 442 * to not miss any events. 443 */ 444 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); 445 cmci_recheck(); 446 } 447 448 void intel_init_lmce(void) 449 { 450 u64 val; 451 452 if (!lmce_supported()) 453 return; 454 455 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 456 457 if (!(val & MCG_EXT_CTL_LMCE_EN)) 458 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); 459 } 460 461 void intel_clear_lmce(void) 462 { 463 u64 val; 464 465 if (!lmce_supported()) 466 return; 467 468 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 469 val &= ~MCG_EXT_CTL_LMCE_EN; 470 wrmsrl(MSR_IA32_MCG_EXT_CTL, val); 471 } 472 473 static void intel_ppin_init(struct cpuinfo_x86 *c) 474 { 475 unsigned long long val; 476 477 /* 478 * Even if testing the presence of the MSR would be enough, we don't 479 * want to risk the situation where other models reuse this MSR for 480 * other purposes. 481 */ 482 switch (c->x86_model) { 483 case INTEL_FAM6_IVYBRIDGE_X: 484 case INTEL_FAM6_HASWELL_X: 485 case INTEL_FAM6_BROADWELL_D: 486 case INTEL_FAM6_BROADWELL_X: 487 case INTEL_FAM6_SKYLAKE_X: 488 case INTEL_FAM6_ICELAKE_X: 489 case INTEL_FAM6_XEON_PHI_KNL: 490 case INTEL_FAM6_XEON_PHI_KNM: 491 492 if (rdmsrl_safe(MSR_PPIN_CTL, &val)) 493 return; 494 495 if ((val & 3UL) == 1UL) { 496 /* PPIN available but disabled: */ 497 return; 498 } 499 500 /* If PPIN is disabled, but not locked, try to enable: */ 501 if (!(val & 3UL)) { 502 wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); 503 rdmsrl_safe(MSR_PPIN_CTL, &val); 504 } 505 506 if ((val & 3UL) == 2UL) 507 set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); 508 } 509 } 510 511 void mce_intel_feature_init(struct cpuinfo_x86 *c) 512 { 513 intel_init_thermal(c); 514 intel_init_cmci(); 515 intel_init_lmce(); 516 intel_ppin_init(c); 517 } 518 519 void mce_intel_feature_clear(struct cpuinfo_x86 *c) 520 { 521 intel_clear_lmce(); 522 } 523