xref: /openbmc/linux/drivers/edac/edac_mc.h (revision e6dec923)
1 /*
2  * Defines, structures, APIs for edac_mc module
3  *
4  * (C) 2007 Linux Networx (http://lnxi.com)
5  * This file may be distributed under the terms of the
6  * GNU General Public License.
7  *
8  * Written by Thayne Harbaugh
9  * Based on work by Dan Hollis <goemon at anime dot net> and others.
10  *	http://www.anime.net/~goemon/linux-ecc/
11  *
12  * NMI handling support added by
13  *     Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
14  *
15  * Refactored for multi-source files:
16  *	Doug Thompson <norsk5@xmission.com>
17  *
18  * Please look at Documentation/driver-api/edac.rst for more info about
19  * EDAC core structs and functions.
20  */
21 
22 #ifndef _EDAC_MC_H_
23 #define _EDAC_MC_H_
24 
25 #include <linux/kernel.h>
26 #include <linux/types.h>
27 #include <linux/module.h>
28 #include <linux/spinlock.h>
29 #include <linux/smp.h>
30 #include <linux/pci.h>
31 #include <linux/time.h>
32 #include <linux/nmi.h>
33 #include <linux/rcupdate.h>
34 #include <linux/completion.h>
35 #include <linux/kobject.h>
36 #include <linux/platform_device.h>
37 #include <linux/workqueue.h>
38 #include <linux/edac.h>
39 
40 #if PAGE_SHIFT < 20
41 #define PAGES_TO_MiB(pages)	((pages) >> (20 - PAGE_SHIFT))
42 #define MiB_TO_PAGES(mb)	((mb) << (20 - PAGE_SHIFT))
43 #else				/* PAGE_SHIFT > 20 */
44 #define PAGES_TO_MiB(pages)	((pages) << (PAGE_SHIFT - 20))
45 #define MiB_TO_PAGES(mb)	((mb) >> (PAGE_SHIFT - 20))
46 #endif
47 
48 #define edac_printk(level, prefix, fmt, arg...) \
49 	printk(level "EDAC " prefix ": " fmt, ##arg)
50 
51 #define edac_mc_printk(mci, level, fmt, arg...) \
52 	printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
53 
54 #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
55 	printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
56 
57 #define edac_device_printk(ctl, level, fmt, arg...) \
58 	printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
59 
60 #define edac_pci_printk(ctl, level, fmt, arg...) \
61 	printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)
62 
63 /* prefixes for edac_printk() and edac_mc_printk() */
64 #define EDAC_MC "MC"
65 #define EDAC_PCI "PCI"
66 #define EDAC_DEBUG "DEBUG"
67 
68 extern const char * const edac_mem_types[];
69 
70 #ifdef CONFIG_EDAC_DEBUG
71 extern int edac_debug_level;
72 
73 #define edac_dbg(level, fmt, ...)					\
74 do {									\
75 	if (level <= edac_debug_level)					\
76 		edac_printk(KERN_DEBUG, EDAC_DEBUG,			\
77 			    "%s: " fmt, __func__, ##__VA_ARGS__);	\
78 } while (0)
79 
80 #else				/* !CONFIG_EDAC_DEBUG */
81 
82 #define edac_dbg(level, fmt, ...)					\
83 do {									\
84 	if (0)								\
85 		edac_printk(KERN_DEBUG, EDAC_DEBUG,			\
86 			    "%s: " fmt, __func__, ##__VA_ARGS__);	\
87 } while (0)
88 
89 #endif				/* !CONFIG_EDAC_DEBUG */
90 
91 #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
92 	PCI_DEVICE_ID_ ## vend ## _ ## dev
93 
94 #define edac_dev_name(dev) (dev)->dev_name
95 
96 #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
97 
98 /**
99  * edac_mc_alloc() - Allocate and partially fill a struct &mem_ctl_info.
100  *
101  * @mc_num:		Memory controller number
102  * @n_layers:		Number of MC hierarchy layers
103  * @layers:		Describes each layer as seen by the Memory Controller
104  * @sz_pvt:		size of private storage needed
105  *
106  *
107  * Everything is kmalloc'ed as one big chunk - more efficient.
108  * Only can be used if all structures have the same lifetime - otherwise
109  * you have to allocate and initialize your own structures.
110  *
111  * Use edac_mc_free() to free mc structures allocated by this function.
112  *
113  * .. note::
114  *
115  *   drivers handle multi-rank memories in different ways: in some
116  *   drivers, one multi-rank memory stick is mapped as one entry, while, in
117  *   others, a single multi-rank memory stick would be mapped into several
118  *   entries. Currently, this function will allocate multiple struct dimm_info
119  *   on such scenarios, as grouping the multiple ranks require drivers change.
120  *
121  * Returns:
122  *	On success, return a pointer to struct mem_ctl_info pointer;
123  *	%NULL otherwise
124  */
125 struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
126 				   unsigned n_layers,
127 				   struct edac_mc_layer *layers,
128 				   unsigned sz_pvt);
129 
130 /**
131  * edac_mc_add_mc_with_groups() - Insert the @mci structure into the mci
132  *	global list and create sysfs entries associated with @mci structure.
133  *
134  * @mci: pointer to the mci structure to be added to the list
135  * @groups: optional attribute groups for the driver-specific sysfs entries
136  *
137  * Returns:
138  *	0 on Success, or an error code on failure
139  */
140 extern int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
141 				      const struct attribute_group **groups);
142 #define edac_mc_add_mc(mci)	edac_mc_add_mc_with_groups(mci, NULL)
143 
144 /**
145  * edac_mc_free() -  Frees a previously allocated @mci structure
146  *
147  * @mci: pointer to a struct mem_ctl_info structure
148  */
149 extern void edac_mc_free(struct mem_ctl_info *mci);
150 
151 /**
152  * edac_has_mcs() - Check if any MCs have been allocated.
153  *
154  * Returns:
155  *	True if MC instances have been registered successfully.
156  *	False otherwise.
157  */
158 extern bool edac_has_mcs(void);
159 
160 /**
161  * edac_mc_find() - Search for a mem_ctl_info structure whose index is @idx.
162  *
163  * @idx: index to be seek
164  *
165  * If found, return a pointer to the structure.
166  * Else return NULL.
167  */
168 extern struct mem_ctl_info *edac_mc_find(int idx);
169 
170 /**
171  * find_mci_by_dev() - Scan list of controllers looking for the one that
172  *	manages the @dev device.
173  *
174  * @dev: pointer to a struct device related with the MCI
175  *
176  * Returns: on success, returns a pointer to struct &mem_ctl_info;
177  * %NULL otherwise.
178  */
179 extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
180 
181 /**
182  * edac_mc_del_mc() - Remove sysfs entries for mci structure associated with
183  *	@dev and remove mci structure from global list.
184  *
185  * @dev: Pointer to struct &device representing mci structure to remove.
186  *
187  * Returns: pointer to removed mci structure, or %NULL if device not found.
188  */
189 extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
190 
191 /**
192  * edac_mc_find_csrow_by_page() - Ancillary routine to identify what csrow
193  *	contains a memory page.
194  *
195  * @mci: pointer to a struct mem_ctl_info structure
196  * @page: memory page to find
197  *
198  * Returns: on success, returns the csrow. -1 if not found.
199  */
200 extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
201 				      unsigned long page);
202 
203 /**
204  * edac_raw_mc_handle_error() - Reports a memory event to userspace without
205  *	doing anything to discover the error location.
206  *
207  * @type:		severity of the error (CE/UE/Fatal)
208  * @mci:		a struct mem_ctl_info pointer
209  * @e:			error description
210  *
211  * This raw function is used internally by edac_mc_handle_error(). It should
212  * only be called directly when the hardware error come directly from BIOS,
213  * like in the case of APEI GHES driver.
214  */
215 void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
216 			      struct mem_ctl_info *mci,
217 			      struct edac_raw_error_desc *e);
218 
219 /**
220  * edac_mc_handle_error() - Reports a memory event to userspace.
221  *
222  * @type:		severity of the error (CE/UE/Fatal)
223  * @mci:		a struct mem_ctl_info pointer
224  * @error_count:	Number of errors of the same type
225  * @page_frame_number:	mem page where the error occurred
226  * @offset_in_page:	offset of the error inside the page
227  * @syndrome:		ECC syndrome
228  * @top_layer:		Memory layer[0] position
229  * @mid_layer:		Memory layer[1] position
230  * @low_layer:		Memory layer[2] position
231  * @msg:		Message meaningful to the end users that
232  *			explains the event
233  * @other_detail:	Technical details about the event that
234  *			may help hardware manufacturers and
235  *			EDAC developers to analyse the event
236  */
237 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
238 			  struct mem_ctl_info *mci,
239 			  const u16 error_count,
240 			  const unsigned long page_frame_number,
241 			  const unsigned long offset_in_page,
242 			  const unsigned long syndrome,
243 			  const int top_layer,
244 			  const int mid_layer,
245 			  const int low_layer,
246 			  const char *msg,
247 			  const char *other_detail);
248 
249 /*
250  * edac misc APIs
251  */
252 extern char *edac_op_state_to_string(int op_state);
253 
254 #endif				/* _EDAC_MC_H_ */
255