1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Intel specific MCE features. 4 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 5 * Copyright (C) 2008, 2009 Intel Corporation 6 * Author: Andi Kleen 7 */ 8 9 #include <linux/gfp.h> 10 #include <linux/interrupt.h> 11 #include <linux/percpu.h> 12 #include <linux/sched.h> 13 #include <linux/cpumask.h> 14 #include <asm/apic.h> 15 #include <asm/cpufeature.h> 16 #include <asm/intel-family.h> 17 #include <asm/processor.h> 18 #include <asm/msr.h> 19 #include <asm/mce.h> 20 21 #include "internal.h" 22 23 /* 24 * Support for Intel Correct Machine Check Interrupts. This allows 25 * the CPU to raise an interrupt when a corrected machine check happened. 26 * Normally we pick those up using a regular polling timer. 27 * Also supports reliable discovery of shared banks. 28 */ 29 30 /* 31 * CMCI can be delivered to multiple cpus that share a machine check bank 32 * so we need to designate a single cpu to process errors logged in each bank 33 * in the interrupt handler (otherwise we would have many races and potential 34 * double reporting of the same error). 35 * Note that this can change when a cpu is offlined or brought online since 36 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() 37 * disables CMCI on all banks owned by the cpu and clears this bitfield. At 38 * this point, cmci_rediscover() kicks in and a different cpu may end up 39 * taking ownership of some of the shared MCA banks that were previously 40 * owned by the offlined cpu. 41 */ 42 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 43 44 /* 45 * CMCI storm detection backoff counter 46 * 47 * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've 48 * encountered an error. If not, we decrement it by one. We signal the end of 49 * the CMCI storm when it reaches 0. 50 */ 51 static DEFINE_PER_CPU(int, cmci_backoff_cnt); 52 53 /* 54 * cmci_discover_lock protects against parallel discovery attempts 55 * which could race against each other. 56 */ 57 static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 58 59 #define CMCI_THRESHOLD 1 60 #define CMCI_POLL_INTERVAL (30 * HZ) 61 #define CMCI_STORM_INTERVAL (HZ) 62 #define CMCI_STORM_THRESHOLD 15 63 64 static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); 65 static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); 66 static DEFINE_PER_CPU(unsigned int, cmci_storm_state); 67 68 enum { 69 CMCI_STORM_NONE, 70 CMCI_STORM_ACTIVE, 71 CMCI_STORM_SUBSIDED, 72 }; 73 74 static atomic_t cmci_storm_on_cpus; 75 76 static int cmci_supported(int *banks) 77 { 78 u64 cap; 79 80 if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) 81 return 0; 82 83 /* 84 * Vendor check is not strictly needed, but the initial 85 * initialization is vendor keyed and this 86 * makes sure none of the backdoors are entered otherwise. 87 */ 88 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && 89 boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) 90 return 0; 91 92 if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) 93 return 0; 94 rdmsrl(MSR_IA32_MCG_CAP, cap); 95 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); 96 return !!(cap & MCG_CMCI_P); 97 } 98 99 static bool lmce_supported(void) 100 { 101 u64 tmp; 102 103 if (mca_cfg.lmce_disabled) 104 return false; 105 106 rdmsrl(MSR_IA32_MCG_CAP, tmp); 107 108 /* 109 * LMCE depends on recovery support in the processor. Hence both 110 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. 111 */ 112 if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != 113 (MCG_SER_P | MCG_LMCE_P)) 114 return false; 115 116 /* 117 * BIOS should indicate support for LMCE by setting bit 20 in 118 * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will 119 * generate a #GP fault. 120 */ 121 rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp); 122 if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) == 123 (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) 124 return true; 125 126 return false; 127 } 128 129 bool mce_intel_cmci_poll(void) 130 { 131 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) 132 return false; 133 134 /* 135 * Reset the counter if we've logged an error in the last poll 136 * during the storm. 137 */ 138 if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned))) 139 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 140 else 141 this_cpu_dec(cmci_backoff_cnt); 142 143 return true; 144 } 145 146 void mce_intel_hcpu_update(unsigned long cpu) 147 { 148 if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) 149 atomic_dec(&cmci_storm_on_cpus); 150 151 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; 152 } 153 154 static void cmci_toggle_interrupt_mode(bool on) 155 { 156 unsigned long flags, *owned; 157 int bank; 158 u64 val; 159 160 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 161 owned = this_cpu_ptr(mce_banks_owned); 162 for_each_set_bit(bank, owned, MAX_NR_BANKS) { 163 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 164 165 if (on) 166 val |= MCI_CTL2_CMCI_EN; 167 else 168 val &= ~MCI_CTL2_CMCI_EN; 169 170 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 171 } 172 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 173 } 174 175 unsigned long cmci_intel_adjust_timer(unsigned long interval) 176 { 177 if ((this_cpu_read(cmci_backoff_cnt) > 0) && 178 (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) { 179 mce_notify_irq(); 180 return CMCI_STORM_INTERVAL; 181 } 182 183 switch (__this_cpu_read(cmci_storm_state)) { 184 case CMCI_STORM_ACTIVE: 185 186 /* 187 * We switch back to interrupt mode once the poll timer has 188 * silenced itself. That means no events recorded and the timer 189 * interval is back to our poll interval. 190 */ 191 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); 192 if (!atomic_sub_return(1, &cmci_storm_on_cpus)) 193 pr_notice("CMCI storm subsided: switching to interrupt mode\n"); 194 195 /* FALLTHROUGH */ 196 197 case CMCI_STORM_SUBSIDED: 198 /* 199 * We wait for all CPUs to go back to SUBSIDED state. When that 200 * happens we switch back to interrupt mode. 201 */ 202 if (!atomic_read(&cmci_storm_on_cpus)) { 203 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); 204 cmci_toggle_interrupt_mode(true); 205 cmci_recheck(); 206 } 207 return CMCI_POLL_INTERVAL; 208 default: 209 210 /* We have shiny weather. Let the poll do whatever it thinks. */ 211 return interval; 212 } 213 } 214 215 static bool cmci_storm_detect(void) 216 { 217 unsigned int cnt = __this_cpu_read(cmci_storm_cnt); 218 unsigned long ts = __this_cpu_read(cmci_time_stamp); 219 unsigned long now = jiffies; 220 int r; 221 222 if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) 223 return true; 224 225 if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { 226 cnt++; 227 } else { 228 cnt = 1; 229 __this_cpu_write(cmci_time_stamp, now); 230 } 231 __this_cpu_write(cmci_storm_cnt, cnt); 232 233 if (cnt <= CMCI_STORM_THRESHOLD) 234 return false; 235 236 cmci_toggle_interrupt_mode(false); 237 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 238 r = atomic_add_return(1, &cmci_storm_on_cpus); 239 mce_timer_kick(CMCI_STORM_INTERVAL); 240 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 241 242 if (r == 1) 243 pr_notice("CMCI storm detected: switching to poll mode\n"); 244 return true; 245 } 246 247 /* 248 * The interrupt handler. This is called on every event. 249 * Just call the poller directly to log any events. 250 * This could in theory increase the threshold under high load, 251 * but doesn't for now. 252 */ 253 static void intel_threshold_interrupt(void) 254 { 255 if (cmci_storm_detect()) 256 return; 257 258 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 259 } 260 261 /* 262 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 263 * on this CPU. Use the algorithm recommended in the SDM to discover shared 264 * banks. 265 */ 266 static void cmci_discover(int banks) 267 { 268 unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); 269 unsigned long flags; 270 int i; 271 int bios_wrong_thresh = 0; 272 273 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 274 for (i = 0; i < banks; i++) { 275 u64 val; 276 int bios_zero_thresh = 0; 277 278 if (test_bit(i, owned)) 279 continue; 280 281 /* Skip banks in firmware first mode */ 282 if (test_bit(i, mce_banks_ce_disabled)) 283 continue; 284 285 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 286 287 /* Already owned by someone else? */ 288 if (val & MCI_CTL2_CMCI_EN) { 289 clear_bit(i, owned); 290 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 291 continue; 292 } 293 294 if (!mca_cfg.bios_cmci_threshold) { 295 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 296 val |= CMCI_THRESHOLD; 297 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { 298 /* 299 * If bios_cmci_threshold boot option was specified 300 * but the threshold is zero, we'll try to initialize 301 * it to 1. 302 */ 303 bios_zero_thresh = 1; 304 val |= CMCI_THRESHOLD; 305 } 306 307 val |= MCI_CTL2_CMCI_EN; 308 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 309 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 310 311 /* Did the enable bit stick? -- the bank supports CMCI */ 312 if (val & MCI_CTL2_CMCI_EN) { 313 set_bit(i, owned); 314 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 315 /* 316 * We are able to set thresholds for some banks that 317 * had a threshold of 0. This means the BIOS has not 318 * set the thresholds properly or does not work with 319 * this boot option. Note down now and report later. 320 */ 321 if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && 322 (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) 323 bios_wrong_thresh = 1; 324 } else { 325 WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); 326 } 327 } 328 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 329 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { 330 pr_info_once( 331 "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 332 pr_info_once( 333 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); 334 } 335 } 336 337 /* 338 * Just in case we missed an event during initialization check 339 * all the CMCI owned banks. 340 */ 341 void cmci_recheck(void) 342 { 343 unsigned long flags; 344 int banks; 345 346 if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) 347 return; 348 349 local_irq_save(flags); 350 machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)); 351 local_irq_restore(flags); 352 } 353 354 /* Caller must hold the lock on cmci_discover_lock */ 355 static void __cmci_disable_bank(int bank) 356 { 357 u64 val; 358 359 if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) 360 return; 361 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 362 val &= ~MCI_CTL2_CMCI_EN; 363 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 364 __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); 365 } 366 367 /* 368 * Disable CMCI on this CPU for all banks it owns when it goes down. 369 * This allows other CPUs to claim the banks on rediscovery. 370 */ 371 void cmci_clear(void) 372 { 373 unsigned long flags; 374 int i; 375 int banks; 376 377 if (!cmci_supported(&banks)) 378 return; 379 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 380 for (i = 0; i < banks; i++) 381 __cmci_disable_bank(i); 382 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 383 } 384 385 static void cmci_rediscover_work_func(void *arg) 386 { 387 int banks; 388 389 /* Recheck banks in case CPUs don't all have the same */ 390 if (cmci_supported(&banks)) 391 cmci_discover(banks); 392 } 393 394 /* After a CPU went down cycle through all the others and rediscover */ 395 void cmci_rediscover(void) 396 { 397 int banks; 398 399 if (!cmci_supported(&banks)) 400 return; 401 402 on_each_cpu(cmci_rediscover_work_func, NULL, 1); 403 } 404 405 /* 406 * Reenable CMCI on this CPU in case a CPU down failed. 407 */ 408 void cmci_reenable(void) 409 { 410 int banks; 411 if (cmci_supported(&banks)) 412 cmci_discover(banks); 413 } 414 415 void cmci_disable_bank(int bank) 416 { 417 int banks; 418 unsigned long flags; 419 420 if (!cmci_supported(&banks)) 421 return; 422 423 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 424 __cmci_disable_bank(bank); 425 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 426 } 427 428 void intel_init_cmci(void) 429 { 430 int banks; 431 432 if (!cmci_supported(&banks)) 433 return; 434 435 mce_threshold_vector = intel_threshold_interrupt; 436 cmci_discover(banks); 437 /* 438 * For CPU #0 this runs with still disabled APIC, but that's 439 * ok because only the vector is set up. We still do another 440 * check for the banks later for CPU #0 just to make sure 441 * to not miss any events. 442 */ 443 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); 444 cmci_recheck(); 445 } 446 447 void intel_init_lmce(void) 448 { 449 u64 val; 450 451 if (!lmce_supported()) 452 return; 453 454 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 455 456 if (!(val & MCG_EXT_CTL_LMCE_EN)) 457 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); 458 } 459 460 void intel_clear_lmce(void) 461 { 462 u64 val; 463 464 if (!lmce_supported()) 465 return; 466 467 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 468 val &= ~MCG_EXT_CTL_LMCE_EN; 469 wrmsrl(MSR_IA32_MCG_EXT_CTL, val); 470 } 471 472 static void intel_ppin_init(struct cpuinfo_x86 *c) 473 { 474 unsigned long long val; 475 476 /* 477 * Even if testing the presence of the MSR would be enough, we don't 478 * want to risk the situation where other models reuse this MSR for 479 * other purposes. 480 */ 481 switch (c->x86_model) { 482 case INTEL_FAM6_IVYBRIDGE_X: 483 case INTEL_FAM6_HASWELL_X: 484 case INTEL_FAM6_BROADWELL_D: 485 case INTEL_FAM6_BROADWELL_X: 486 case INTEL_FAM6_SKYLAKE_X: 487 case INTEL_FAM6_ICELAKE_X: 488 case INTEL_FAM6_XEON_PHI_KNL: 489 case INTEL_FAM6_XEON_PHI_KNM: 490 491 if (rdmsrl_safe(MSR_PPIN_CTL, &val)) 492 return; 493 494 if ((val & 3UL) == 1UL) { 495 /* PPIN available but disabled: */ 496 return; 497 } 498 499 /* If PPIN is disabled, but not locked, try to enable: */ 500 if (!(val & 3UL)) { 501 wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); 502 rdmsrl_safe(MSR_PPIN_CTL, &val); 503 } 504 505 if ((val & 3UL) == 2UL) 506 set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); 507 } 508 } 509 510 void mce_intel_feature_init(struct cpuinfo_x86 *c) 511 { 512 intel_init_thermal(c); 513 intel_init_cmci(); 514 intel_init_lmce(); 515 intel_ppin_init(c); 516 } 517 518 void mce_intel_feature_clear(struct cpuinfo_x86 *c) 519 { 520 intel_clear_lmce(); 521 } 522