xref: /openbmc/linux/arch/x86/kernel/cpu/mce/internal.h (revision a20eefae)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __X86_MCE_INTERNAL_H__
3 #define __X86_MCE_INTERNAL_H__
4 
5 #undef pr_fmt
6 #define pr_fmt(fmt) "mce: " fmt
7 
8 #include <linux/device.h>
9 #include <asm/mce.h>
10 
11 enum severity_level {
12 	MCE_NO_SEVERITY,
13 	MCE_DEFERRED_SEVERITY,
14 	MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
15 	MCE_KEEP_SEVERITY,
16 	MCE_SOME_SEVERITY,
17 	MCE_AO_SEVERITY,
18 	MCE_UC_SEVERITY,
19 	MCE_AR_SEVERITY,
20 	MCE_PANIC_SEVERITY,
21 };
22 
23 extern struct blocking_notifier_head x86_mce_decoder_chain;
24 
25 #define INITIAL_CHECK_INTERVAL	5 * 60 /* 5 minutes */
26 
27 struct mce_evt_llist {
28 	struct llist_node llnode;
29 	struct mce mce;
30 };
31 
32 void mce_gen_pool_process(struct work_struct *__unused);
33 bool mce_gen_pool_empty(void);
34 int mce_gen_pool_add(struct mce *mce);
35 int mce_gen_pool_init(void);
36 struct llist_node *mce_gen_pool_prepare_records(void);
37 
38 extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
39 struct dentry *mce_get_debugfs_dir(void);
40 
41 extern mce_banks_t mce_banks_ce_disabled;
42 
43 #ifdef CONFIG_X86_MCE_INTEL
44 unsigned long cmci_intel_adjust_timer(unsigned long interval);
45 bool mce_intel_cmci_poll(void);
46 void mce_intel_hcpu_update(unsigned long cpu);
47 void cmci_disable_bank(int bank);
48 #else
49 # define cmci_intel_adjust_timer mce_adjust_timer_default
50 static inline bool mce_intel_cmci_poll(void) { return false; }
51 static inline void mce_intel_hcpu_update(unsigned long cpu) { }
52 static inline void cmci_disable_bank(int bank) { }
53 #endif
54 
55 void mce_timer_kick(unsigned long interval);
56 
57 #ifdef CONFIG_ACPI_APEI
58 int apei_write_mce(struct mce *m);
59 ssize_t apei_read_mce(struct mce *m, u64 *record_id);
60 int apei_check_mce(void);
61 int apei_clear_mce(u64 record_id);
62 #else
63 static inline int apei_write_mce(struct mce *m)
64 {
65 	return -EINVAL;
66 }
67 static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
68 {
69 	return 0;
70 }
71 static inline int apei_check_mce(void)
72 {
73 	return 0;
74 }
75 static inline int apei_clear_mce(u64 record_id)
76 {
77 	return -EINVAL;
78 }
79 #endif
80 
81 void mce_inject_log(struct mce *m);
82 
83 /*
84  * We consider records to be equivalent if bank+status+addr+misc all match.
85  * This is only used when the system is going down because of a fatal error
86  * to avoid cluttering the console log with essentially repeated information.
87  * In normal processing all errors seen are logged.
88  */
89 static inline bool mce_cmp(struct mce *m1, struct mce *m2)
90 {
91 	return m1->bank != m2->bank ||
92 		m1->status != m2->status ||
93 		m1->addr != m2->addr ||
94 		m1->misc != m2->misc;
95 }
96 
97 extern struct device_attribute dev_attr_trigger;
98 
99 #ifdef CONFIG_X86_MCELOG_LEGACY
100 void mce_work_trigger(void);
101 void mce_register_injector_chain(struct notifier_block *nb);
102 void mce_unregister_injector_chain(struct notifier_block *nb);
103 #else
104 static inline void mce_work_trigger(void)	{ }
105 static inline void mce_register_injector_chain(struct notifier_block *nb)	{ }
106 static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
107 #endif
108 
109 struct mca_config {
110 	bool dont_log_ce;
111 	bool cmci_disabled;
112 	bool ignore_ce;
113 
114 	__u64 lmce_disabled		: 1,
115 	      disabled			: 1,
116 	      ser			: 1,
117 	      recovery			: 1,
118 	      bios_cmci_threshold	: 1,
119 	      __reserved		: 59;
120 
121 	s8 bootlog;
122 	int tolerant;
123 	int monarch_timeout;
124 	int panic_timeout;
125 	u32 rip_msr;
126 };
127 
128 extern struct mca_config mca_cfg;
129 DECLARE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
130 
131 struct mce_vendor_flags {
132 	/*
133 	 * Indicates that overflow conditions are not fatal, when set.
134 	 */
135 	__u64 overflow_recov	: 1,
136 
137 	/*
138 	 * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
139 	 * Recovery. It indicates support for data poisoning in HW and deferred
140 	 * error interrupts.
141 	 */
142 	      succor		: 1,
143 
144 	/*
145 	 * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
146 	 * the register space for each MCA bank and also increases number of
147 	 * banks. Also, to accommodate the new banks and registers, the MCA
148 	 * register space is moved to a new MSR range.
149 	 */
150 	      smca		: 1,
151 
152 	      __reserved_0	: 61;
153 };
154 
155 extern struct mce_vendor_flags mce_flags;
156 
157 struct mca_msr_regs {
158 	u32 (*ctl)	(int bank);
159 	u32 (*status)	(int bank);
160 	u32 (*addr)	(int bank);
161 	u32 (*misc)	(int bank);
162 };
163 
164 extern struct mca_msr_regs msr_ops;
165 
166 /* Decide whether to add MCE record to MCE event pool or filter it out. */
167 extern bool filter_mce(struct mce *m);
168 
169 #ifdef CONFIG_X86_MCE_AMD
170 extern bool amd_filter_mce(struct mce *m);
171 #else
172 static inline bool amd_filter_mce(struct mce *m)			{ return false; };
173 #endif
174 
175 #endif /* __X86_MCE_INTERNAL_H__ */
176