1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Intel specific MCE features. 4 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 5 * Copyright (C) 2008, 2009 Intel Corporation 6 * Author: Andi Kleen 7 */ 8 9 #include <linux/gfp.h> 10 #include <linux/interrupt.h> 11 #include <linux/percpu.h> 12 #include <linux/sched.h> 13 #include <linux/cpumask.h> 14 #include <asm/apic.h> 15 #include <asm/cpufeature.h> 16 #include <asm/intel-family.h> 17 #include <asm/processor.h> 18 #include <asm/msr.h> 19 #include <asm/mce.h> 20 21 #include "internal.h" 22 23 /* 24 * Support for Intel Correct Machine Check Interrupts. This allows 25 * the CPU to raise an interrupt when a corrected machine check happened. 26 * Normally we pick those up using a regular polling timer. 27 * Also supports reliable discovery of shared banks. 28 */ 29 30 /* 31 * CMCI can be delivered to multiple cpus that share a machine check bank 32 * so we need to designate a single cpu to process errors logged in each bank 33 * in the interrupt handler (otherwise we would have many races and potential 34 * double reporting of the same error). 35 * Note that this can change when a cpu is offlined or brought online since 36 * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() 37 * disables CMCI on all banks owned by the cpu and clears this bitfield. At 38 * this point, cmci_rediscover() kicks in and a different cpu may end up 39 * taking ownership of some of the shared MCA banks that were previously 40 * owned by the offlined cpu. 41 */ 42 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 43 44 /* 45 * CMCI storm detection backoff counter 46 * 47 * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've 48 * encountered an error. If not, we decrement it by one. We signal the end of 49 * the CMCI storm when it reaches 0. 50 */ 51 static DEFINE_PER_CPU(int, cmci_backoff_cnt); 52 53 /* 54 * cmci_discover_lock protects against parallel discovery attempts 55 * which could race against each other. 56 */ 57 static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 58 59 #define CMCI_THRESHOLD 1 60 #define CMCI_POLL_INTERVAL (30 * HZ) 61 #define CMCI_STORM_INTERVAL (HZ) 62 #define CMCI_STORM_THRESHOLD 15 63 64 static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); 65 static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); 66 static DEFINE_PER_CPU(unsigned int, cmci_storm_state); 67 68 enum { 69 CMCI_STORM_NONE, 70 CMCI_STORM_ACTIVE, 71 CMCI_STORM_SUBSIDED, 72 }; 73 74 static atomic_t cmci_storm_on_cpus; 75 76 static int cmci_supported(int *banks) 77 { 78 u64 cap; 79 80 if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) 81 return 0; 82 83 /* 84 * Vendor check is not strictly needed, but the initial 85 * initialization is vendor keyed and this 86 * makes sure none of the backdoors are entered otherwise. 87 */ 88 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 89 return 0; 90 if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) 91 return 0; 92 rdmsrl(MSR_IA32_MCG_CAP, cap); 93 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); 94 return !!(cap & MCG_CMCI_P); 95 } 96 97 static bool lmce_supported(void) 98 { 99 u64 tmp; 100 101 if (mca_cfg.lmce_disabled) 102 return false; 103 104 rdmsrl(MSR_IA32_MCG_CAP, tmp); 105 106 /* 107 * LMCE depends on recovery support in the processor. Hence both 108 * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. 109 */ 110 if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != 111 (MCG_SER_P | MCG_LMCE_P)) 112 return false; 113 114 /* 115 * BIOS should indicate support for LMCE by setting bit 20 in 116 * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will 117 * generate a #GP fault. 118 */ 119 rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp); 120 if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) == 121 (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) 122 return true; 123 124 return false; 125 } 126 127 bool mce_intel_cmci_poll(void) 128 { 129 if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) 130 return false; 131 132 /* 133 * Reset the counter if we've logged an error in the last poll 134 * during the storm. 135 */ 136 if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned))) 137 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 138 else 139 this_cpu_dec(cmci_backoff_cnt); 140 141 return true; 142 } 143 144 void mce_intel_hcpu_update(unsigned long cpu) 145 { 146 if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) 147 atomic_dec(&cmci_storm_on_cpus); 148 149 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; 150 } 151 152 static void cmci_toggle_interrupt_mode(bool on) 153 { 154 unsigned long flags, *owned; 155 int bank; 156 u64 val; 157 158 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 159 owned = this_cpu_ptr(mce_banks_owned); 160 for_each_set_bit(bank, owned, MAX_NR_BANKS) { 161 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 162 163 if (on) 164 val |= MCI_CTL2_CMCI_EN; 165 else 166 val &= ~MCI_CTL2_CMCI_EN; 167 168 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 169 } 170 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 171 } 172 173 unsigned long cmci_intel_adjust_timer(unsigned long interval) 174 { 175 if ((this_cpu_read(cmci_backoff_cnt) > 0) && 176 (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) { 177 mce_notify_irq(); 178 return CMCI_STORM_INTERVAL; 179 } 180 181 switch (__this_cpu_read(cmci_storm_state)) { 182 case CMCI_STORM_ACTIVE: 183 184 /* 185 * We switch back to interrupt mode once the poll timer has 186 * silenced itself. That means no events recorded and the timer 187 * interval is back to our poll interval. 188 */ 189 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); 190 if (!atomic_sub_return(1, &cmci_storm_on_cpus)) 191 pr_notice("CMCI storm subsided: switching to interrupt mode\n"); 192 193 /* FALLTHROUGH */ 194 195 case CMCI_STORM_SUBSIDED: 196 /* 197 * We wait for all CPUs to go back to SUBSIDED state. When that 198 * happens we switch back to interrupt mode. 199 */ 200 if (!atomic_read(&cmci_storm_on_cpus)) { 201 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); 202 cmci_toggle_interrupt_mode(true); 203 cmci_recheck(); 204 } 205 return CMCI_POLL_INTERVAL; 206 default: 207 208 /* We have shiny weather. Let the poll do whatever it thinks. */ 209 return interval; 210 } 211 } 212 213 static bool cmci_storm_detect(void) 214 { 215 unsigned int cnt = __this_cpu_read(cmci_storm_cnt); 216 unsigned long ts = __this_cpu_read(cmci_time_stamp); 217 unsigned long now = jiffies; 218 int r; 219 220 if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) 221 return true; 222 223 if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { 224 cnt++; 225 } else { 226 cnt = 1; 227 __this_cpu_write(cmci_time_stamp, now); 228 } 229 __this_cpu_write(cmci_storm_cnt, cnt); 230 231 if (cnt <= CMCI_STORM_THRESHOLD) 232 return false; 233 234 cmci_toggle_interrupt_mode(false); 235 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 236 r = atomic_add_return(1, &cmci_storm_on_cpus); 237 mce_timer_kick(CMCI_STORM_INTERVAL); 238 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 239 240 if (r == 1) 241 pr_notice("CMCI storm detected: switching to poll mode\n"); 242 return true; 243 } 244 245 /* 246 * The interrupt handler. This is called on every event. 247 * Just call the poller directly to log any events. 248 * This could in theory increase the threshold under high load, 249 * but doesn't for now. 250 */ 251 static void intel_threshold_interrupt(void) 252 { 253 if (cmci_storm_detect()) 254 return; 255 256 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 257 } 258 259 /* 260 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 261 * on this CPU. Use the algorithm recommended in the SDM to discover shared 262 * banks. 263 */ 264 static void cmci_discover(int banks) 265 { 266 unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); 267 unsigned long flags; 268 int i; 269 int bios_wrong_thresh = 0; 270 271 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 272 for (i = 0; i < banks; i++) { 273 u64 val; 274 int bios_zero_thresh = 0; 275 276 if (test_bit(i, owned)) 277 continue; 278 279 /* Skip banks in firmware first mode */ 280 if (test_bit(i, mce_banks_ce_disabled)) 281 continue; 282 283 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 284 285 /* Already owned by someone else? */ 286 if (val & MCI_CTL2_CMCI_EN) { 287 clear_bit(i, owned); 288 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 289 continue; 290 } 291 292 if (!mca_cfg.bios_cmci_threshold) { 293 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 294 val |= CMCI_THRESHOLD; 295 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { 296 /* 297 * If bios_cmci_threshold boot option was specified 298 * but the threshold is zero, we'll try to initialize 299 * it to 1. 300 */ 301 bios_zero_thresh = 1; 302 val |= CMCI_THRESHOLD; 303 } 304 305 val |= MCI_CTL2_CMCI_EN; 306 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 307 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 308 309 /* Did the enable bit stick? -- the bank supports CMCI */ 310 if (val & MCI_CTL2_CMCI_EN) { 311 set_bit(i, owned); 312 __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 313 /* 314 * We are able to set thresholds for some banks that 315 * had a threshold of 0. This means the BIOS has not 316 * set the thresholds properly or does not work with 317 * this boot option. Note down now and report later. 318 */ 319 if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && 320 (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) 321 bios_wrong_thresh = 1; 322 } else { 323 WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); 324 } 325 } 326 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 327 if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { 328 pr_info_once( 329 "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 330 pr_info_once( 331 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); 332 } 333 } 334 335 /* 336 * Just in case we missed an event during initialization check 337 * all the CMCI owned banks. 338 */ 339 void cmci_recheck(void) 340 { 341 unsigned long flags; 342 int banks; 343 344 if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) 345 return; 346 347 local_irq_save(flags); 348 machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)); 349 local_irq_restore(flags); 350 } 351 352 /* Caller must hold the lock on cmci_discover_lock */ 353 static void __cmci_disable_bank(int bank) 354 { 355 u64 val; 356 357 if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) 358 return; 359 rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 360 val &= ~MCI_CTL2_CMCI_EN; 361 wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 362 __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); 363 } 364 365 /* 366 * Disable CMCI on this CPU for all banks it owns when it goes down. 367 * This allows other CPUs to claim the banks on rediscovery. 368 */ 369 void cmci_clear(void) 370 { 371 unsigned long flags; 372 int i; 373 int banks; 374 375 if (!cmci_supported(&banks)) 376 return; 377 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 378 for (i = 0; i < banks; i++) 379 __cmci_disable_bank(i); 380 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 381 } 382 383 static void cmci_rediscover_work_func(void *arg) 384 { 385 int banks; 386 387 /* Recheck banks in case CPUs don't all have the same */ 388 if (cmci_supported(&banks)) 389 cmci_discover(banks); 390 } 391 392 /* After a CPU went down cycle through all the others and rediscover */ 393 void cmci_rediscover(void) 394 { 395 int banks; 396 397 if (!cmci_supported(&banks)) 398 return; 399 400 on_each_cpu(cmci_rediscover_work_func, NULL, 1); 401 } 402 403 /* 404 * Reenable CMCI on this CPU in case a CPU down failed. 405 */ 406 void cmci_reenable(void) 407 { 408 int banks; 409 if (cmci_supported(&banks)) 410 cmci_discover(banks); 411 } 412 413 void cmci_disable_bank(int bank) 414 { 415 int banks; 416 unsigned long flags; 417 418 if (!cmci_supported(&banks)) 419 return; 420 421 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 422 __cmci_disable_bank(bank); 423 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 424 } 425 426 static void intel_init_cmci(void) 427 { 428 int banks; 429 430 if (!cmci_supported(&banks)) 431 return; 432 433 mce_threshold_vector = intel_threshold_interrupt; 434 cmci_discover(banks); 435 /* 436 * For CPU #0 this runs with still disabled APIC, but that's 437 * ok because only the vector is set up. We still do another 438 * check for the banks later for CPU #0 just to make sure 439 * to not miss any events. 440 */ 441 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); 442 cmci_recheck(); 443 } 444 445 static void intel_init_lmce(void) 446 { 447 u64 val; 448 449 if (!lmce_supported()) 450 return; 451 452 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 453 454 if (!(val & MCG_EXT_CTL_LMCE_EN)) 455 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); 456 } 457 458 static void intel_clear_lmce(void) 459 { 460 u64 val; 461 462 if (!lmce_supported()) 463 return; 464 465 rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 466 val &= ~MCG_EXT_CTL_LMCE_EN; 467 wrmsrl(MSR_IA32_MCG_EXT_CTL, val); 468 } 469 470 static void intel_ppin_init(struct cpuinfo_x86 *c) 471 { 472 unsigned long long val; 473 474 /* 475 * Even if testing the presence of the MSR would be enough, we don't 476 * want to risk the situation where other models reuse this MSR for 477 * other purposes. 478 */ 479 switch (c->x86_model) { 480 case INTEL_FAM6_IVYBRIDGE_X: 481 case INTEL_FAM6_HASWELL_X: 482 case INTEL_FAM6_BROADWELL_XEON_D: 483 case INTEL_FAM6_BROADWELL_X: 484 case INTEL_FAM6_SKYLAKE_X: 485 case INTEL_FAM6_XEON_PHI_KNL: 486 case INTEL_FAM6_XEON_PHI_KNM: 487 488 if (rdmsrl_safe(MSR_PPIN_CTL, &val)) 489 return; 490 491 if ((val & 3UL) == 1UL) { 492 /* PPIN available but disabled: */ 493 return; 494 } 495 496 /* If PPIN is disabled, but not locked, try to enable: */ 497 if (!(val & 3UL)) { 498 wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); 499 rdmsrl_safe(MSR_PPIN_CTL, &val); 500 } 501 502 if ((val & 3UL) == 2UL) 503 set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); 504 } 505 } 506 507 void mce_intel_feature_init(struct cpuinfo_x86 *c) 508 { 509 intel_init_thermal(c); 510 intel_init_cmci(); 511 intel_init_lmce(); 512 intel_ppin_init(c); 513 } 514 515 void mce_intel_feature_clear(struct cpuinfo_x86 *c) 516 { 517 intel_clear_lmce(); 518 } 519