xref: /openbmc/linux/drivers/infiniband/hw/hfi1/fault.c (revision a9d85efb)
1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
2 /*
3  * Copyright(c) 2018 Intel Corporation.
4  */
5 
6 #include <linux/debugfs.h>
7 #include <linux/seq_file.h>
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/bitmap.h>
12 
13 #include "debugfs.h"
14 #include "fault.h"
15 #include "trace.h"
16 
17 #define HFI1_FAULT_DIR_TX   BIT(0)
18 #define HFI1_FAULT_DIR_RX   BIT(1)
19 #define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX)
20 
21 static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
22 {
23 	struct hfi1_opcode_stats_perctx *opstats;
24 
25 	if (*pos >= ARRAY_SIZE(opstats->stats))
26 		return NULL;
27 	return pos;
28 }
29 
30 static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
31 {
32 	struct hfi1_opcode_stats_perctx *opstats;
33 
34 	++*pos;
35 	if (*pos >= ARRAY_SIZE(opstats->stats))
36 		return NULL;
37 	return pos;
38 }
39 
40 static void _fault_stats_seq_stop(struct seq_file *s, void *v)
41 {
42 }
43 
44 static int _fault_stats_seq_show(struct seq_file *s, void *v)
45 {
46 	loff_t *spos = v;
47 	loff_t i = *spos, j;
48 	u64 n_packets = 0, n_bytes = 0;
49 	struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
50 	struct hfi1_devdata *dd = dd_from_dev(ibd);
51 	struct hfi1_ctxtdata *rcd;
52 
53 	for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
54 		rcd = hfi1_rcd_get_by_index(dd, j);
55 		if (rcd) {
56 			n_packets += rcd->opstats->stats[i].n_packets;
57 			n_bytes += rcd->opstats->stats[i].n_bytes;
58 		}
59 		hfi1_rcd_put(rcd);
60 	}
61 	for_each_possible_cpu(j) {
62 		struct hfi1_opcode_stats_perctx *sp =
63 			per_cpu_ptr(dd->tx_opstats, j);
64 
65 		n_packets += sp->stats[i].n_packets;
66 		n_bytes += sp->stats[i].n_bytes;
67 	}
68 	if (!n_packets && !n_bytes)
69 		return SEQ_SKIP;
70 	if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i])
71 		return SEQ_SKIP;
72 	seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
73 		   (unsigned long long)n_packets,
74 		   (unsigned long long)n_bytes,
75 		   (unsigned long long)ibd->fault->n_rxfaults[i],
76 		   (unsigned long long)ibd->fault->n_txfaults[i]);
77 	return 0;
78 }
79 
80 DEBUGFS_SEQ_FILE_OPS(fault_stats);
81 DEBUGFS_SEQ_FILE_OPEN(fault_stats);
82 DEBUGFS_FILE_OPS(fault_stats);
83 
84 static int fault_opcodes_open(struct inode *inode, struct file *file)
85 {
86 	file->private_data = inode->i_private;
87 	return nonseekable_open(inode, file);
88 }
89 
90 static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
91 				   size_t len, loff_t *pos)
92 {
93 	ssize_t ret = 0;
94 	/* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */
95 	size_t copy, datalen = 1280;
96 	char *data, *token, *ptr, *end;
97 	struct fault *fault = file->private_data;
98 
99 	data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
100 	if (!data)
101 		return -ENOMEM;
102 	copy = min(len, datalen - 1);
103 	if (copy_from_user(data, buf, copy)) {
104 		ret = -EFAULT;
105 		goto free_data;
106 	}
107 
108 	ret = debugfs_file_get(file->f_path.dentry);
109 	if (unlikely(ret))
110 		goto free_data;
111 	ptr = data;
112 	token = ptr;
113 	for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
114 		char *dash;
115 		unsigned long range_start, range_end, i;
116 		bool remove = false;
117 		unsigned long bound = 1U << BITS_PER_BYTE;
118 
119 		end = strchr(ptr, ',');
120 		if (end)
121 			*end = '\0';
122 		if (token[0] == '-') {
123 			remove = true;
124 			token++;
125 		}
126 		dash = strchr(token, '-');
127 		if (dash)
128 			*dash = '\0';
129 		if (kstrtoul(token, 0, &range_start))
130 			break;
131 		if (dash) {
132 			token = dash + 1;
133 			if (kstrtoul(token, 0, &range_end))
134 				break;
135 		} else {
136 			range_end = range_start;
137 		}
138 		if (range_start == range_end && range_start == -1UL) {
139 			bitmap_zero(fault->opcodes, sizeof(fault->opcodes) *
140 				    BITS_PER_BYTE);
141 			break;
142 		}
143 		/* Check the inputs */
144 		if (range_start >= bound || range_end >= bound)
145 			break;
146 
147 		for (i = range_start; i <= range_end; i++) {
148 			if (remove)
149 				clear_bit(i, fault->opcodes);
150 			else
151 				set_bit(i, fault->opcodes);
152 		}
153 		if (!end)
154 			break;
155 	}
156 	ret = len;
157 
158 	debugfs_file_put(file->f_path.dentry);
159 free_data:
160 	kfree(data);
161 	return ret;
162 }
163 
164 static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
165 				  size_t len, loff_t *pos)
166 {
167 	ssize_t ret = 0;
168 	char *data;
169 	size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */
170 	unsigned long bit = 0, zero = 0;
171 	struct fault *fault = file->private_data;
172 	size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE;
173 
174 	data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
175 	if (!data)
176 		return -ENOMEM;
177 	ret = debugfs_file_get(file->f_path.dentry);
178 	if (unlikely(ret))
179 		goto free_data;
180 	bit = find_first_bit(fault->opcodes, bitsize);
181 	while (bit < bitsize) {
182 		zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
183 		if (zero - 1 != bit)
184 			size += scnprintf(data + size,
185 					 datalen - size - 1,
186 					 "0x%lx-0x%lx,", bit, zero - 1);
187 		else
188 			size += scnprintf(data + size,
189 					 datalen - size - 1, "0x%lx,",
190 					 bit);
191 		bit = find_next_bit(fault->opcodes, bitsize, zero);
192 	}
193 	debugfs_file_put(file->f_path.dentry);
194 	data[size - 1] = '\n';
195 	data[size] = '\0';
196 	ret = simple_read_from_buffer(buf, len, pos, data, size);
197 free_data:
198 	kfree(data);
199 	return ret;
200 }
201 
202 static const struct file_operations __fault_opcodes_fops = {
203 	.owner = THIS_MODULE,
204 	.open = fault_opcodes_open,
205 	.read = fault_opcodes_read,
206 	.write = fault_opcodes_write,
207 	.llseek = no_llseek
208 };
209 
210 void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
211 {
212 	if (ibd->fault)
213 		debugfs_remove_recursive(ibd->fault->dir);
214 	kfree(ibd->fault);
215 	ibd->fault = NULL;
216 }
217 
218 int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
219 {
220 	struct dentry *parent = ibd->hfi1_ibdev_dbg;
221 	struct dentry *fault_dir;
222 
223 	ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
224 	if (!ibd->fault)
225 		return -ENOMEM;
226 
227 	ibd->fault->attr.interval = 1;
228 	ibd->fault->attr.require_end = ULONG_MAX;
229 	ibd->fault->attr.stacktrace_depth = 32;
230 	ibd->fault->attr.dname = NULL;
231 	ibd->fault->attr.verbose = 0;
232 	ibd->fault->enable = false;
233 	ibd->fault->opcode = false;
234 	ibd->fault->fault_skip = 0;
235 	ibd->fault->skip = 0;
236 	ibd->fault->direction = HFI1_FAULT_DIR_TXRX;
237 	ibd->fault->suppress_err = false;
238 	bitmap_zero(ibd->fault->opcodes,
239 		    sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
240 
241 	fault_dir =
242 		fault_create_debugfs_attr("fault", parent, &ibd->fault->attr);
243 	if (IS_ERR(fault_dir)) {
244 		kfree(ibd->fault);
245 		ibd->fault = NULL;
246 		return -ENOENT;
247 	}
248 	ibd->fault->dir = fault_dir;
249 
250 	debugfs_create_file("fault_stats", 0444, fault_dir, ibd,
251 			    &_fault_stats_file_ops);
252 	debugfs_create_bool("enable", 0600, fault_dir, &ibd->fault->enable);
253 	debugfs_create_bool("suppress_err", 0600, fault_dir,
254 			    &ibd->fault->suppress_err);
255 	debugfs_create_bool("opcode_mode", 0600, fault_dir,
256 			    &ibd->fault->opcode);
257 	debugfs_create_file("opcodes", 0600, fault_dir, ibd->fault,
258 			    &__fault_opcodes_fops);
259 	debugfs_create_u64("skip_pkts", 0600, fault_dir,
260 			   &ibd->fault->fault_skip);
261 	debugfs_create_u64("skip_usec", 0600, fault_dir,
262 			   &ibd->fault->fault_skip_usec);
263 	debugfs_create_u8("direction", 0600, fault_dir, &ibd->fault->direction);
264 
265 	return 0;
266 }
267 
268 bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
269 {
270 	if (ibd->fault)
271 		return ibd->fault->suppress_err;
272 	return false;
273 }
274 
275 static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode,
276 				u8 direction)
277 {
278 	bool ret = false;
279 
280 	if (!ibd->fault || !ibd->fault->enable)
281 		return false;
282 	if (!(ibd->fault->direction & direction))
283 		return false;
284 	if (ibd->fault->opcode) {
285 		if (bitmap_empty(ibd->fault->opcodes,
286 				 (sizeof(ibd->fault->opcodes) *
287 				  BITS_PER_BYTE)))
288 			return false;
289 		if (!(test_bit(opcode, ibd->fault->opcodes)))
290 			return false;
291 	}
292 	if (ibd->fault->fault_skip_usec &&
293 	    time_before(jiffies, ibd->fault->skip_usec))
294 		return false;
295 	if (ibd->fault->fault_skip && ibd->fault->skip) {
296 		ibd->fault->skip--;
297 		return false;
298 	}
299 	ret = should_fail(&ibd->fault->attr, 1);
300 	if (ret) {
301 		ibd->fault->skip = ibd->fault->fault_skip;
302 		ibd->fault->skip_usec = jiffies +
303 			usecs_to_jiffies(ibd->fault->fault_skip_usec);
304 	}
305 	return ret;
306 }
307 
308 bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode)
309 {
310 	struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
311 
312 	if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) {
313 		trace_hfi1_fault_opcode(qp, opcode);
314 		ibd->fault->n_txfaults[opcode]++;
315 		return true;
316 	}
317 	return false;
318 }
319 
320 bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
321 {
322 	struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev;
323 
324 	if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) {
325 		trace_hfi1_fault_packet(packet);
326 		ibd->fault->n_rxfaults[packet->opcode]++;
327 		return true;
328 	}
329 	return false;
330 }
331