121afaf18SBorislav Petkov // SPDX-License-Identifier: GPL-2.0 221afaf18SBorislav Petkov /* 321afaf18SBorislav Petkov * Intel specific MCE features. 421afaf18SBorislav Petkov * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 521afaf18SBorislav Petkov * Copyright (C) 2008, 2009 Intel Corporation 621afaf18SBorislav Petkov * Author: Andi Kleen 721afaf18SBorislav Petkov */ 821afaf18SBorislav Petkov 921afaf18SBorislav Petkov #include <linux/gfp.h> 1021afaf18SBorislav Petkov #include <linux/interrupt.h> 1121afaf18SBorislav Petkov #include <linux/percpu.h> 1221afaf18SBorislav Petkov #include <linux/sched.h> 1321afaf18SBorislav Petkov #include <linux/cpumask.h> 1421afaf18SBorislav Petkov #include <asm/apic.h> 1521afaf18SBorislav Petkov #include <asm/cpufeature.h> 1621afaf18SBorislav Petkov #include <asm/intel-family.h> 1721afaf18SBorislav Petkov #include <asm/processor.h> 1821afaf18SBorislav Petkov #include <asm/msr.h> 1921afaf18SBorislav Petkov #include <asm/mce.h> 2021afaf18SBorislav Petkov 2121afaf18SBorislav Petkov #include "internal.h" 2221afaf18SBorislav Petkov 2321afaf18SBorislav Petkov /* 2421afaf18SBorislav Petkov * Support for Intel Correct Machine Check Interrupts. This allows 2521afaf18SBorislav Petkov * the CPU to raise an interrupt when a corrected machine check happened. 2621afaf18SBorislav Petkov * Normally we pick those up using a regular polling timer. 2721afaf18SBorislav Petkov * Also supports reliable discovery of shared banks. 2821afaf18SBorislav Petkov */ 2921afaf18SBorislav Petkov 3021afaf18SBorislav Petkov /* 3121afaf18SBorislav Petkov * CMCI can be delivered to multiple cpus that share a machine check bank 3221afaf18SBorislav Petkov * so we need to designate a single cpu to process errors logged in each bank 3321afaf18SBorislav Petkov * in the interrupt handler (otherwise we would have many races and potential 3421afaf18SBorislav Petkov * double reporting of the same error). 3521afaf18SBorislav Petkov * Note that this can change when a cpu is offlined or brought online since 3621afaf18SBorislav Petkov * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear() 3721afaf18SBorislav Petkov * disables CMCI on all banks owned by the cpu and clears this bitfield. At 3821afaf18SBorislav Petkov * this point, cmci_rediscover() kicks in and a different cpu may end up 3921afaf18SBorislav Petkov * taking ownership of some of the shared MCA banks that were previously 4021afaf18SBorislav Petkov * owned by the offlined cpu. 4121afaf18SBorislav Petkov */ 4221afaf18SBorislav Petkov static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); 4321afaf18SBorislav Petkov 4421afaf18SBorislav Petkov /* 4521afaf18SBorislav Petkov * CMCI storm detection backoff counter 4621afaf18SBorislav Petkov * 4721afaf18SBorislav Petkov * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've 4821afaf18SBorislav Petkov * encountered an error. If not, we decrement it by one. We signal the end of 4921afaf18SBorislav Petkov * the CMCI storm when it reaches 0. 5021afaf18SBorislav Petkov */ 5121afaf18SBorislav Petkov static DEFINE_PER_CPU(int, cmci_backoff_cnt); 5221afaf18SBorislav Petkov 5321afaf18SBorislav Petkov /* 5421afaf18SBorislav Petkov * cmci_discover_lock protects against parallel discovery attempts 5521afaf18SBorislav Petkov * which could race against each other. 5621afaf18SBorislav Petkov */ 5721afaf18SBorislav Petkov static DEFINE_RAW_SPINLOCK(cmci_discover_lock); 5821afaf18SBorislav Petkov 5921afaf18SBorislav Petkov #define CMCI_THRESHOLD 1 6021afaf18SBorislav Petkov #define CMCI_POLL_INTERVAL (30 * HZ) 6121afaf18SBorislav Petkov #define CMCI_STORM_INTERVAL (HZ) 6221afaf18SBorislav Petkov #define CMCI_STORM_THRESHOLD 15 6321afaf18SBorislav Petkov 6421afaf18SBorislav Petkov static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); 6521afaf18SBorislav Petkov static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); 6621afaf18SBorislav Petkov static DEFINE_PER_CPU(unsigned int, cmci_storm_state); 6721afaf18SBorislav Petkov 6821afaf18SBorislav Petkov enum { 6921afaf18SBorislav Petkov CMCI_STORM_NONE, 7021afaf18SBorislav Petkov CMCI_STORM_ACTIVE, 7121afaf18SBorislav Petkov CMCI_STORM_SUBSIDED, 7221afaf18SBorislav Petkov }; 7321afaf18SBorislav Petkov 7421afaf18SBorislav Petkov static atomic_t cmci_storm_on_cpus; 7521afaf18SBorislav Petkov 7621afaf18SBorislav Petkov static int cmci_supported(int *banks) 7721afaf18SBorislav Petkov { 7821afaf18SBorislav Petkov u64 cap; 7921afaf18SBorislav Petkov 8021afaf18SBorislav Petkov if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce) 8121afaf18SBorislav Petkov return 0; 8221afaf18SBorislav Petkov 8321afaf18SBorislav Petkov /* 8421afaf18SBorislav Petkov * Vendor check is not strictly needed, but the initial 8521afaf18SBorislav Petkov * initialization is vendor keyed and this 8621afaf18SBorislav Petkov * makes sure none of the backdoors are entered otherwise. 8721afaf18SBorislav Petkov */ 885a3d56a0STony W Wang-oc if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && 895a3d56a0STony W Wang-oc boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) 9021afaf18SBorislav Petkov return 0; 915a3d56a0STony W Wang-oc 9221afaf18SBorislav Petkov if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6) 9321afaf18SBorislav Petkov return 0; 9421afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCG_CAP, cap); 9521afaf18SBorislav Petkov *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); 9621afaf18SBorislav Petkov return !!(cap & MCG_CMCI_P); 9721afaf18SBorislav Petkov } 9821afaf18SBorislav Petkov 9921afaf18SBorislav Petkov static bool lmce_supported(void) 10021afaf18SBorislav Petkov { 10121afaf18SBorislav Petkov u64 tmp; 10221afaf18SBorislav Petkov 10321afaf18SBorislav Petkov if (mca_cfg.lmce_disabled) 10421afaf18SBorislav Petkov return false; 10521afaf18SBorislav Petkov 10621afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCG_CAP, tmp); 10721afaf18SBorislav Petkov 10821afaf18SBorislav Petkov /* 10921afaf18SBorislav Petkov * LMCE depends on recovery support in the processor. Hence both 11021afaf18SBorislav Petkov * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP. 11121afaf18SBorislav Petkov */ 11221afaf18SBorislav Petkov if ((tmp & (MCG_SER_P | MCG_LMCE_P)) != 11321afaf18SBorislav Petkov (MCG_SER_P | MCG_LMCE_P)) 11421afaf18SBorislav Petkov return false; 11521afaf18SBorislav Petkov 11621afaf18SBorislav Petkov /* 11721afaf18SBorislav Petkov * BIOS should indicate support for LMCE by setting bit 20 in 11821afaf18SBorislav Petkov * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will 11921afaf18SBorislav Petkov * generate a #GP fault. 12021afaf18SBorislav Petkov */ 12121afaf18SBorislav Petkov rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp); 12221afaf18SBorislav Petkov if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) == 12321afaf18SBorislav Petkov (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) 12421afaf18SBorislav Petkov return true; 12521afaf18SBorislav Petkov 12621afaf18SBorislav Petkov return false; 12721afaf18SBorislav Petkov } 12821afaf18SBorislav Petkov 12921afaf18SBorislav Petkov bool mce_intel_cmci_poll(void) 13021afaf18SBorislav Petkov { 13121afaf18SBorislav Petkov if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) 13221afaf18SBorislav Petkov return false; 13321afaf18SBorislav Petkov 13421afaf18SBorislav Petkov /* 13521afaf18SBorislav Petkov * Reset the counter if we've logged an error in the last poll 13621afaf18SBorislav Petkov * during the storm. 13721afaf18SBorislav Petkov */ 13821afaf18SBorislav Petkov if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned))) 13921afaf18SBorislav Petkov this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 14021afaf18SBorislav Petkov else 14121afaf18SBorislav Petkov this_cpu_dec(cmci_backoff_cnt); 14221afaf18SBorislav Petkov 14321afaf18SBorislav Petkov return true; 14421afaf18SBorislav Petkov } 14521afaf18SBorislav Petkov 14621afaf18SBorislav Petkov void mce_intel_hcpu_update(unsigned long cpu) 14721afaf18SBorislav Petkov { 14821afaf18SBorislav Petkov if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) 14921afaf18SBorislav Petkov atomic_dec(&cmci_storm_on_cpus); 15021afaf18SBorislav Petkov 15121afaf18SBorislav Petkov per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; 15221afaf18SBorislav Petkov } 15321afaf18SBorislav Petkov 15421afaf18SBorislav Petkov static void cmci_toggle_interrupt_mode(bool on) 15521afaf18SBorislav Petkov { 15621afaf18SBorislav Petkov unsigned long flags, *owned; 15721afaf18SBorislav Petkov int bank; 15821afaf18SBorislav Petkov u64 val; 15921afaf18SBorislav Petkov 16021afaf18SBorislav Petkov raw_spin_lock_irqsave(&cmci_discover_lock, flags); 16121afaf18SBorislav Petkov owned = this_cpu_ptr(mce_banks_owned); 16221afaf18SBorislav Petkov for_each_set_bit(bank, owned, MAX_NR_BANKS) { 16321afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 16421afaf18SBorislav Petkov 16521afaf18SBorislav Petkov if (on) 16621afaf18SBorislav Petkov val |= MCI_CTL2_CMCI_EN; 16721afaf18SBorislav Petkov else 16821afaf18SBorislav Petkov val &= ~MCI_CTL2_CMCI_EN; 16921afaf18SBorislav Petkov 17021afaf18SBorislav Petkov wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 17121afaf18SBorislav Petkov } 17221afaf18SBorislav Petkov raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 17321afaf18SBorislav Petkov } 17421afaf18SBorislav Petkov 17521afaf18SBorislav Petkov unsigned long cmci_intel_adjust_timer(unsigned long interval) 17621afaf18SBorislav Petkov { 17721afaf18SBorislav Petkov if ((this_cpu_read(cmci_backoff_cnt) > 0) && 17821afaf18SBorislav Petkov (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) { 17921afaf18SBorislav Petkov mce_notify_irq(); 18021afaf18SBorislav Petkov return CMCI_STORM_INTERVAL; 18121afaf18SBorislav Petkov } 18221afaf18SBorislav Petkov 18321afaf18SBorislav Petkov switch (__this_cpu_read(cmci_storm_state)) { 18421afaf18SBorislav Petkov case CMCI_STORM_ACTIVE: 18521afaf18SBorislav Petkov 18621afaf18SBorislav Petkov /* 18721afaf18SBorislav Petkov * We switch back to interrupt mode once the poll timer has 18821afaf18SBorislav Petkov * silenced itself. That means no events recorded and the timer 18921afaf18SBorislav Petkov * interval is back to our poll interval. 19021afaf18SBorislav Petkov */ 19121afaf18SBorislav Petkov __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); 19221afaf18SBorislav Petkov if (!atomic_sub_return(1, &cmci_storm_on_cpus)) 19321afaf18SBorislav Petkov pr_notice("CMCI storm subsided: switching to interrupt mode\n"); 19421afaf18SBorislav Petkov 19521afaf18SBorislav Petkov /* FALLTHROUGH */ 19621afaf18SBorislav Petkov 19721afaf18SBorislav Petkov case CMCI_STORM_SUBSIDED: 19821afaf18SBorislav Petkov /* 19921afaf18SBorislav Petkov * We wait for all CPUs to go back to SUBSIDED state. When that 20021afaf18SBorislav Petkov * happens we switch back to interrupt mode. 20121afaf18SBorislav Petkov */ 20221afaf18SBorislav Petkov if (!atomic_read(&cmci_storm_on_cpus)) { 20321afaf18SBorislav Petkov __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); 20421afaf18SBorislav Petkov cmci_toggle_interrupt_mode(true); 20521afaf18SBorislav Petkov cmci_recheck(); 20621afaf18SBorislav Petkov } 20721afaf18SBorislav Petkov return CMCI_POLL_INTERVAL; 20821afaf18SBorislav Petkov default: 20921afaf18SBorislav Petkov 21021afaf18SBorislav Petkov /* We have shiny weather. Let the poll do whatever it thinks. */ 21121afaf18SBorislav Petkov return interval; 21221afaf18SBorislav Petkov } 21321afaf18SBorislav Petkov } 21421afaf18SBorislav Petkov 21521afaf18SBorislav Petkov static bool cmci_storm_detect(void) 21621afaf18SBorislav Petkov { 21721afaf18SBorislav Petkov unsigned int cnt = __this_cpu_read(cmci_storm_cnt); 21821afaf18SBorislav Petkov unsigned long ts = __this_cpu_read(cmci_time_stamp); 21921afaf18SBorislav Petkov unsigned long now = jiffies; 22021afaf18SBorislav Petkov int r; 22121afaf18SBorislav Petkov 22221afaf18SBorislav Petkov if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) 22321afaf18SBorislav Petkov return true; 22421afaf18SBorislav Petkov 22521afaf18SBorislav Petkov if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { 22621afaf18SBorislav Petkov cnt++; 22721afaf18SBorislav Petkov } else { 22821afaf18SBorislav Petkov cnt = 1; 22921afaf18SBorislav Petkov __this_cpu_write(cmci_time_stamp, now); 23021afaf18SBorislav Petkov } 23121afaf18SBorislav Petkov __this_cpu_write(cmci_storm_cnt, cnt); 23221afaf18SBorislav Petkov 23321afaf18SBorislav Petkov if (cnt <= CMCI_STORM_THRESHOLD) 23421afaf18SBorislav Petkov return false; 23521afaf18SBorislav Petkov 23621afaf18SBorislav Petkov cmci_toggle_interrupt_mode(false); 23721afaf18SBorislav Petkov __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 23821afaf18SBorislav Petkov r = atomic_add_return(1, &cmci_storm_on_cpus); 23921afaf18SBorislav Petkov mce_timer_kick(CMCI_STORM_INTERVAL); 24021afaf18SBorislav Petkov this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); 24121afaf18SBorislav Petkov 24221afaf18SBorislav Petkov if (r == 1) 24321afaf18SBorislav Petkov pr_notice("CMCI storm detected: switching to poll mode\n"); 24421afaf18SBorislav Petkov return true; 24521afaf18SBorislav Petkov } 24621afaf18SBorislav Petkov 24721afaf18SBorislav Petkov /* 24821afaf18SBorislav Petkov * The interrupt handler. This is called on every event. 24921afaf18SBorislav Petkov * Just call the poller directly to log any events. 25021afaf18SBorislav Petkov * This could in theory increase the threshold under high load, 25121afaf18SBorislav Petkov * but doesn't for now. 25221afaf18SBorislav Petkov */ 25321afaf18SBorislav Petkov static void intel_threshold_interrupt(void) 25421afaf18SBorislav Petkov { 25521afaf18SBorislav Petkov if (cmci_storm_detect()) 25621afaf18SBorislav Petkov return; 25721afaf18SBorislav Petkov 25821afaf18SBorislav Petkov machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 25921afaf18SBorislav Petkov } 26021afaf18SBorislav Petkov 26121afaf18SBorislav Petkov /* 26221afaf18SBorislav Petkov * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks 26321afaf18SBorislav Petkov * on this CPU. Use the algorithm recommended in the SDM to discover shared 26421afaf18SBorislav Petkov * banks. 26521afaf18SBorislav Petkov */ 26621afaf18SBorislav Petkov static void cmci_discover(int banks) 26721afaf18SBorislav Petkov { 26821afaf18SBorislav Petkov unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); 26921afaf18SBorislav Petkov unsigned long flags; 27021afaf18SBorislav Petkov int i; 27121afaf18SBorislav Petkov int bios_wrong_thresh = 0; 27221afaf18SBorislav Petkov 27321afaf18SBorislav Petkov raw_spin_lock_irqsave(&cmci_discover_lock, flags); 27421afaf18SBorislav Petkov for (i = 0; i < banks; i++) { 27521afaf18SBorislav Petkov u64 val; 27621afaf18SBorislav Petkov int bios_zero_thresh = 0; 27721afaf18SBorislav Petkov 27821afaf18SBorislav Petkov if (test_bit(i, owned)) 27921afaf18SBorislav Petkov continue; 28021afaf18SBorislav Petkov 28121afaf18SBorislav Petkov /* Skip banks in firmware first mode */ 28221afaf18SBorislav Petkov if (test_bit(i, mce_banks_ce_disabled)) 28321afaf18SBorislav Petkov continue; 28421afaf18SBorislav Petkov 28521afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCx_CTL2(i), val); 28621afaf18SBorislav Petkov 28721afaf18SBorislav Petkov /* Already owned by someone else? */ 28821afaf18SBorislav Petkov if (val & MCI_CTL2_CMCI_EN) { 28921afaf18SBorislav Petkov clear_bit(i, owned); 29021afaf18SBorislav Petkov __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 29121afaf18SBorislav Petkov continue; 29221afaf18SBorislav Petkov } 29321afaf18SBorislav Petkov 29421afaf18SBorislav Petkov if (!mca_cfg.bios_cmci_threshold) { 29521afaf18SBorislav Petkov val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 29621afaf18SBorislav Petkov val |= CMCI_THRESHOLD; 29721afaf18SBorislav Petkov } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { 29821afaf18SBorislav Petkov /* 29921afaf18SBorislav Petkov * If bios_cmci_threshold boot option was specified 30021afaf18SBorislav Petkov * but the threshold is zero, we'll try to initialize 30121afaf18SBorislav Petkov * it to 1. 30221afaf18SBorislav Petkov */ 30321afaf18SBorislav Petkov bios_zero_thresh = 1; 30421afaf18SBorislav Petkov val |= CMCI_THRESHOLD; 30521afaf18SBorislav Petkov } 30621afaf18SBorislav Petkov 30721afaf18SBorislav Petkov val |= MCI_CTL2_CMCI_EN; 30821afaf18SBorislav Petkov wrmsrl(MSR_IA32_MCx_CTL2(i), val); 30921afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCx_CTL2(i), val); 31021afaf18SBorislav Petkov 31121afaf18SBorislav Petkov /* Did the enable bit stick? -- the bank supports CMCI */ 31221afaf18SBorislav Petkov if (val & MCI_CTL2_CMCI_EN) { 31321afaf18SBorislav Petkov set_bit(i, owned); 31421afaf18SBorislav Petkov __clear_bit(i, this_cpu_ptr(mce_poll_banks)); 31521afaf18SBorislav Petkov /* 31621afaf18SBorislav Petkov * We are able to set thresholds for some banks that 31721afaf18SBorislav Petkov * had a threshold of 0. This means the BIOS has not 31821afaf18SBorislav Petkov * set the thresholds properly or does not work with 31921afaf18SBorislav Petkov * this boot option. Note down now and report later. 32021afaf18SBorislav Petkov */ 32121afaf18SBorislav Petkov if (mca_cfg.bios_cmci_threshold && bios_zero_thresh && 32221afaf18SBorislav Petkov (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) 32321afaf18SBorislav Petkov bios_wrong_thresh = 1; 32421afaf18SBorislav Petkov } else { 32521afaf18SBorislav Petkov WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); 32621afaf18SBorislav Petkov } 32721afaf18SBorislav Petkov } 32821afaf18SBorislav Petkov raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 32921afaf18SBorislav Petkov if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) { 33021afaf18SBorislav Petkov pr_info_once( 33121afaf18SBorislav Petkov "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); 33221afaf18SBorislav Petkov pr_info_once( 33321afaf18SBorislav Petkov "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); 33421afaf18SBorislav Petkov } 33521afaf18SBorislav Petkov } 33621afaf18SBorislav Petkov 33721afaf18SBorislav Petkov /* 33821afaf18SBorislav Petkov * Just in case we missed an event during initialization check 33921afaf18SBorislav Petkov * all the CMCI owned banks. 34021afaf18SBorislav Petkov */ 34121afaf18SBorislav Petkov void cmci_recheck(void) 34221afaf18SBorislav Petkov { 34321afaf18SBorislav Petkov unsigned long flags; 34421afaf18SBorislav Petkov int banks; 34521afaf18SBorislav Petkov 34621afaf18SBorislav Petkov if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) 34721afaf18SBorislav Petkov return; 34821afaf18SBorislav Petkov 34921afaf18SBorislav Petkov local_irq_save(flags); 35021afaf18SBorislav Petkov machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)); 35121afaf18SBorislav Petkov local_irq_restore(flags); 35221afaf18SBorislav Petkov } 35321afaf18SBorislav Petkov 35421afaf18SBorislav Petkov /* Caller must hold the lock on cmci_discover_lock */ 35521afaf18SBorislav Petkov static void __cmci_disable_bank(int bank) 35621afaf18SBorislav Petkov { 35721afaf18SBorislav Petkov u64 val; 35821afaf18SBorislav Petkov 35921afaf18SBorislav Petkov if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) 36021afaf18SBorislav Petkov return; 36121afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 36221afaf18SBorislav Petkov val &= ~MCI_CTL2_CMCI_EN; 36321afaf18SBorislav Petkov wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 36421afaf18SBorislav Petkov __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); 36521afaf18SBorislav Petkov } 36621afaf18SBorislav Petkov 36721afaf18SBorislav Petkov /* 36821afaf18SBorislav Petkov * Disable CMCI on this CPU for all banks it owns when it goes down. 36921afaf18SBorislav Petkov * This allows other CPUs to claim the banks on rediscovery. 37021afaf18SBorislav Petkov */ 37121afaf18SBorislav Petkov void cmci_clear(void) 37221afaf18SBorislav Petkov { 37321afaf18SBorislav Petkov unsigned long flags; 37421afaf18SBorislav Petkov int i; 37521afaf18SBorislav Petkov int banks; 37621afaf18SBorislav Petkov 37721afaf18SBorislav Petkov if (!cmci_supported(&banks)) 37821afaf18SBorislav Petkov return; 37921afaf18SBorislav Petkov raw_spin_lock_irqsave(&cmci_discover_lock, flags); 38021afaf18SBorislav Petkov for (i = 0; i < banks; i++) 38121afaf18SBorislav Petkov __cmci_disable_bank(i); 38221afaf18SBorislav Petkov raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 38321afaf18SBorislav Petkov } 38421afaf18SBorislav Petkov 38521afaf18SBorislav Petkov static void cmci_rediscover_work_func(void *arg) 38621afaf18SBorislav Petkov { 38721afaf18SBorislav Petkov int banks; 38821afaf18SBorislav Petkov 38921afaf18SBorislav Petkov /* Recheck banks in case CPUs don't all have the same */ 39021afaf18SBorislav Petkov if (cmci_supported(&banks)) 39121afaf18SBorislav Petkov cmci_discover(banks); 39221afaf18SBorislav Petkov } 39321afaf18SBorislav Petkov 39421afaf18SBorislav Petkov /* After a CPU went down cycle through all the others and rediscover */ 39521afaf18SBorislav Petkov void cmci_rediscover(void) 39621afaf18SBorislav Petkov { 39721afaf18SBorislav Petkov int banks; 39821afaf18SBorislav Petkov 39921afaf18SBorislav Petkov if (!cmci_supported(&banks)) 40021afaf18SBorislav Petkov return; 40121afaf18SBorislav Petkov 40221afaf18SBorislav Petkov on_each_cpu(cmci_rediscover_work_func, NULL, 1); 40321afaf18SBorislav Petkov } 40421afaf18SBorislav Petkov 40521afaf18SBorislav Petkov /* 40621afaf18SBorislav Petkov * Reenable CMCI on this CPU in case a CPU down failed. 40721afaf18SBorislav Petkov */ 40821afaf18SBorislav Petkov void cmci_reenable(void) 40921afaf18SBorislav Petkov { 41021afaf18SBorislav Petkov int banks; 41121afaf18SBorislav Petkov if (cmci_supported(&banks)) 41221afaf18SBorislav Petkov cmci_discover(banks); 41321afaf18SBorislav Petkov } 41421afaf18SBorislav Petkov 41521afaf18SBorislav Petkov void cmci_disable_bank(int bank) 41621afaf18SBorislav Petkov { 41721afaf18SBorislav Petkov int banks; 41821afaf18SBorislav Petkov unsigned long flags; 41921afaf18SBorislav Petkov 42021afaf18SBorislav Petkov if (!cmci_supported(&banks)) 42121afaf18SBorislav Petkov return; 42221afaf18SBorislav Petkov 42321afaf18SBorislav Petkov raw_spin_lock_irqsave(&cmci_discover_lock, flags); 42421afaf18SBorislav Petkov __cmci_disable_bank(bank); 42521afaf18SBorislav Petkov raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 42621afaf18SBorislav Petkov } 42721afaf18SBorislav Petkov 4285a3d56a0STony W Wang-oc void intel_init_cmci(void) 42921afaf18SBorislav Petkov { 43021afaf18SBorislav Petkov int banks; 43121afaf18SBorislav Petkov 43221afaf18SBorislav Petkov if (!cmci_supported(&banks)) 43321afaf18SBorislav Petkov return; 43421afaf18SBorislav Petkov 43521afaf18SBorislav Petkov mce_threshold_vector = intel_threshold_interrupt; 43621afaf18SBorislav Petkov cmci_discover(banks); 43721afaf18SBorislav Petkov /* 43821afaf18SBorislav Petkov * For CPU #0 this runs with still disabled APIC, but that's 43921afaf18SBorislav Petkov * ok because only the vector is set up. We still do another 44021afaf18SBorislav Petkov * check for the banks later for CPU #0 just to make sure 44121afaf18SBorislav Petkov * to not miss any events. 44221afaf18SBorislav Petkov */ 44321afaf18SBorislav Petkov apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); 44421afaf18SBorislav Petkov cmci_recheck(); 44521afaf18SBorislav Petkov } 44621afaf18SBorislav Petkov 44770f0c230STony W Wang-oc void intel_init_lmce(void) 44821afaf18SBorislav Petkov { 44921afaf18SBorislav Petkov u64 val; 45021afaf18SBorislav Petkov 45121afaf18SBorislav Petkov if (!lmce_supported()) 45221afaf18SBorislav Petkov return; 45321afaf18SBorislav Petkov 45421afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 45521afaf18SBorislav Petkov 45621afaf18SBorislav Petkov if (!(val & MCG_EXT_CTL_LMCE_EN)) 45721afaf18SBorislav Petkov wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); 45821afaf18SBorislav Petkov } 45921afaf18SBorislav Petkov 46070f0c230STony W Wang-oc void intel_clear_lmce(void) 46121afaf18SBorislav Petkov { 46221afaf18SBorislav Petkov u64 val; 46321afaf18SBorislav Petkov 46421afaf18SBorislav Petkov if (!lmce_supported()) 46521afaf18SBorislav Petkov return; 46621afaf18SBorislav Petkov 46721afaf18SBorislav Petkov rdmsrl(MSR_IA32_MCG_EXT_CTL, val); 46821afaf18SBorislav Petkov val &= ~MCG_EXT_CTL_LMCE_EN; 46921afaf18SBorislav Petkov wrmsrl(MSR_IA32_MCG_EXT_CTL, val); 47021afaf18SBorislav Petkov } 47121afaf18SBorislav Petkov 47221afaf18SBorislav Petkov static void intel_ppin_init(struct cpuinfo_x86 *c) 47321afaf18SBorislav Petkov { 47421afaf18SBorislav Petkov unsigned long long val; 47521afaf18SBorislav Petkov 47621afaf18SBorislav Petkov /* 47721afaf18SBorislav Petkov * Even if testing the presence of the MSR would be enough, we don't 47821afaf18SBorislav Petkov * want to risk the situation where other models reuse this MSR for 47921afaf18SBorislav Petkov * other purposes. 48021afaf18SBorislav Petkov */ 48121afaf18SBorislav Petkov switch (c->x86_model) { 48221afaf18SBorislav Petkov case INTEL_FAM6_IVYBRIDGE_X: 48321afaf18SBorislav Petkov case INTEL_FAM6_HASWELL_X: 4845ebb34edSPeter Zijlstra case INTEL_FAM6_BROADWELL_D: 48521afaf18SBorislav Petkov case INTEL_FAM6_BROADWELL_X: 48621afaf18SBorislav Petkov case INTEL_FAM6_SKYLAKE_X: 487dc6b025dSTony Luck case INTEL_FAM6_ICELAKE_X: 48821afaf18SBorislav Petkov case INTEL_FAM6_XEON_PHI_KNL: 48921afaf18SBorislav Petkov case INTEL_FAM6_XEON_PHI_KNM: 49021afaf18SBorislav Petkov 49121afaf18SBorislav Petkov if (rdmsrl_safe(MSR_PPIN_CTL, &val)) 49221afaf18SBorislav Petkov return; 49321afaf18SBorislav Petkov 49421afaf18SBorislav Petkov if ((val & 3UL) == 1UL) { 49521afaf18SBorislav Petkov /* PPIN available but disabled: */ 49621afaf18SBorislav Petkov return; 49721afaf18SBorislav Petkov } 49821afaf18SBorislav Petkov 49921afaf18SBorislav Petkov /* If PPIN is disabled, but not locked, try to enable: */ 50021afaf18SBorislav Petkov if (!(val & 3UL)) { 50121afaf18SBorislav Petkov wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); 50221afaf18SBorislav Petkov rdmsrl_safe(MSR_PPIN_CTL, &val); 50321afaf18SBorislav Petkov } 50421afaf18SBorislav Petkov 50521afaf18SBorislav Petkov if ((val & 3UL) == 2UL) 50621afaf18SBorislav Petkov set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); 50721afaf18SBorislav Petkov } 50821afaf18SBorislav Petkov } 50921afaf18SBorislav Petkov 51021afaf18SBorislav Petkov void mce_intel_feature_init(struct cpuinfo_x86 *c) 51121afaf18SBorislav Petkov { 51221afaf18SBorislav Petkov intel_init_thermal(c); 51321afaf18SBorislav Petkov intel_init_cmci(); 51421afaf18SBorislav Petkov intel_init_lmce(); 51521afaf18SBorislav Petkov intel_ppin_init(c); 51621afaf18SBorislav Petkov } 51721afaf18SBorislav Petkov 51821afaf18SBorislav Petkov void mce_intel_feature_clear(struct cpuinfo_x86 *c) 51921afaf18SBorislav Petkov { 52021afaf18SBorislav Petkov intel_clear_lmce(); 52121afaf18SBorislav Petkov } 522