xref: /openbmc/linux/drivers/infiniband/hw/hfi1/fault.c (revision 145eba1a)
1*145eba1aSCai Huoqing // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
2a74d5307SMitko Haralanov /*
3a74d5307SMitko Haralanov  * Copyright(c) 2018 Intel Corporation.
4a74d5307SMitko Haralanov  */
5*145eba1aSCai Huoqing 
6a74d5307SMitko Haralanov #include <linux/debugfs.h>
7a74d5307SMitko Haralanov #include <linux/seq_file.h>
8a74d5307SMitko Haralanov #include <linux/kernel.h>
9a74d5307SMitko Haralanov #include <linux/module.h>
10a74d5307SMitko Haralanov #include <linux/types.h>
11a74d5307SMitko Haralanov #include <linux/bitmap.h>
12a74d5307SMitko Haralanov 
13a74d5307SMitko Haralanov #include "debugfs.h"
14a74d5307SMitko Haralanov #include "fault.h"
15a74d5307SMitko Haralanov #include "trace.h"
16a74d5307SMitko Haralanov 
17a74d5307SMitko Haralanov #define HFI1_FAULT_DIR_TX   BIT(0)
18a74d5307SMitko Haralanov #define HFI1_FAULT_DIR_RX   BIT(1)
19a74d5307SMitko Haralanov #define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX)
20a74d5307SMitko Haralanov 
21a74d5307SMitko Haralanov static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
22a74d5307SMitko Haralanov {
23a74d5307SMitko Haralanov 	struct hfi1_opcode_stats_perctx *opstats;
24a74d5307SMitko Haralanov 
25a74d5307SMitko Haralanov 	if (*pos >= ARRAY_SIZE(opstats->stats))
26a74d5307SMitko Haralanov 		return NULL;
27a74d5307SMitko Haralanov 	return pos;
28a74d5307SMitko Haralanov }
29a74d5307SMitko Haralanov 
30a74d5307SMitko Haralanov static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
31a74d5307SMitko Haralanov {
32a74d5307SMitko Haralanov 	struct hfi1_opcode_stats_perctx *opstats;
33a74d5307SMitko Haralanov 
34a74d5307SMitko Haralanov 	++*pos;
35a74d5307SMitko Haralanov 	if (*pos >= ARRAY_SIZE(opstats->stats))
36a74d5307SMitko Haralanov 		return NULL;
37a74d5307SMitko Haralanov 	return pos;
38a74d5307SMitko Haralanov }
39a74d5307SMitko Haralanov 
40a74d5307SMitko Haralanov static void _fault_stats_seq_stop(struct seq_file *s, void *v)
41a74d5307SMitko Haralanov {
42a74d5307SMitko Haralanov }
43a74d5307SMitko Haralanov 
44a74d5307SMitko Haralanov static int _fault_stats_seq_show(struct seq_file *s, void *v)
45a74d5307SMitko Haralanov {
46a74d5307SMitko Haralanov 	loff_t *spos = v;
47a74d5307SMitko Haralanov 	loff_t i = *spos, j;
48a74d5307SMitko Haralanov 	u64 n_packets = 0, n_bytes = 0;
49a74d5307SMitko Haralanov 	struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
50a74d5307SMitko Haralanov 	struct hfi1_devdata *dd = dd_from_dev(ibd);
51a74d5307SMitko Haralanov 	struct hfi1_ctxtdata *rcd;
52a74d5307SMitko Haralanov 
53a74d5307SMitko Haralanov 	for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
54a74d5307SMitko Haralanov 		rcd = hfi1_rcd_get_by_index(dd, j);
55a74d5307SMitko Haralanov 		if (rcd) {
56a74d5307SMitko Haralanov 			n_packets += rcd->opstats->stats[i].n_packets;
57a74d5307SMitko Haralanov 			n_bytes += rcd->opstats->stats[i].n_bytes;
58a74d5307SMitko Haralanov 		}
59a74d5307SMitko Haralanov 		hfi1_rcd_put(rcd);
60a74d5307SMitko Haralanov 	}
61a74d5307SMitko Haralanov 	for_each_possible_cpu(j) {
62a74d5307SMitko Haralanov 		struct hfi1_opcode_stats_perctx *sp =
63a74d5307SMitko Haralanov 			per_cpu_ptr(dd->tx_opstats, j);
64a74d5307SMitko Haralanov 
65a74d5307SMitko Haralanov 		n_packets += sp->stats[i].n_packets;
66a74d5307SMitko Haralanov 		n_bytes += sp->stats[i].n_bytes;
67a74d5307SMitko Haralanov 	}
68a74d5307SMitko Haralanov 	if (!n_packets && !n_bytes)
69a74d5307SMitko Haralanov 		return SEQ_SKIP;
70a74d5307SMitko Haralanov 	if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i])
71a74d5307SMitko Haralanov 		return SEQ_SKIP;
72a74d5307SMitko Haralanov 	seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
73a74d5307SMitko Haralanov 		   (unsigned long long)n_packets,
74a74d5307SMitko Haralanov 		   (unsigned long long)n_bytes,
75a74d5307SMitko Haralanov 		   (unsigned long long)ibd->fault->n_rxfaults[i],
76a74d5307SMitko Haralanov 		   (unsigned long long)ibd->fault->n_txfaults[i]);
77a74d5307SMitko Haralanov 	return 0;
78a74d5307SMitko Haralanov }
79a74d5307SMitko Haralanov 
80a74d5307SMitko Haralanov DEBUGFS_SEQ_FILE_OPS(fault_stats);
81a74d5307SMitko Haralanov DEBUGFS_SEQ_FILE_OPEN(fault_stats);
82a74d5307SMitko Haralanov DEBUGFS_FILE_OPS(fault_stats);
83a74d5307SMitko Haralanov 
84a74d5307SMitko Haralanov static int fault_opcodes_open(struct inode *inode, struct file *file)
85a74d5307SMitko Haralanov {
86a74d5307SMitko Haralanov 	file->private_data = inode->i_private;
87a74d5307SMitko Haralanov 	return nonseekable_open(inode, file);
88a74d5307SMitko Haralanov }
89a74d5307SMitko Haralanov 
90a74d5307SMitko Haralanov static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
91a74d5307SMitko Haralanov 				   size_t len, loff_t *pos)
92a74d5307SMitko Haralanov {
93a74d5307SMitko Haralanov 	ssize_t ret = 0;
94a74d5307SMitko Haralanov 	/* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */
95a74d5307SMitko Haralanov 	size_t copy, datalen = 1280;
96a74d5307SMitko Haralanov 	char *data, *token, *ptr, *end;
97a74d5307SMitko Haralanov 	struct fault *fault = file->private_data;
98a74d5307SMitko Haralanov 
99a74d5307SMitko Haralanov 	data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
100a74d5307SMitko Haralanov 	if (!data)
101a74d5307SMitko Haralanov 		return -ENOMEM;
102a74d5307SMitko Haralanov 	copy = min(len, datalen - 1);
1032323d7baSWenwen Wang 	if (copy_from_user(data, buf, copy)) {
1042323d7baSWenwen Wang 		ret = -EFAULT;
1052323d7baSWenwen Wang 		goto free_data;
1062323d7baSWenwen Wang 	}
107a74d5307SMitko Haralanov 
108a74d5307SMitko Haralanov 	ret = debugfs_file_get(file->f_path.dentry);
109a74d5307SMitko Haralanov 	if (unlikely(ret))
1102323d7baSWenwen Wang 		goto free_data;
111a74d5307SMitko Haralanov 	ptr = data;
112a74d5307SMitko Haralanov 	token = ptr;
113a74d5307SMitko Haralanov 	for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
114a74d5307SMitko Haralanov 		char *dash;
115a74d5307SMitko Haralanov 		unsigned long range_start, range_end, i;
116a74d5307SMitko Haralanov 		bool remove = false;
1175f90677eSKaike Wan 		unsigned long bound = 1U << BITS_PER_BYTE;
118a74d5307SMitko Haralanov 
119a74d5307SMitko Haralanov 		end = strchr(ptr, ',');
120a74d5307SMitko Haralanov 		if (end)
121a74d5307SMitko Haralanov 			*end = '\0';
122a74d5307SMitko Haralanov 		if (token[0] == '-') {
123a74d5307SMitko Haralanov 			remove = true;
124a74d5307SMitko Haralanov 			token++;
125a74d5307SMitko Haralanov 		}
126a74d5307SMitko Haralanov 		dash = strchr(token, '-');
127a74d5307SMitko Haralanov 		if (dash)
128a74d5307SMitko Haralanov 			*dash = '\0';
129a74d5307SMitko Haralanov 		if (kstrtoul(token, 0, &range_start))
130a74d5307SMitko Haralanov 			break;
131a74d5307SMitko Haralanov 		if (dash) {
132a74d5307SMitko Haralanov 			token = dash + 1;
133a74d5307SMitko Haralanov 			if (kstrtoul(token, 0, &range_end))
134a74d5307SMitko Haralanov 				break;
135a74d5307SMitko Haralanov 		} else {
136a74d5307SMitko Haralanov 			range_end = range_start;
137a74d5307SMitko Haralanov 		}
138a74d5307SMitko Haralanov 		if (range_start == range_end && range_start == -1UL) {
139a74d5307SMitko Haralanov 			bitmap_zero(fault->opcodes, sizeof(fault->opcodes) *
140a74d5307SMitko Haralanov 				    BITS_PER_BYTE);
141a74d5307SMitko Haralanov 			break;
142a74d5307SMitko Haralanov 		}
1435f90677eSKaike Wan 		/* Check the inputs */
1445f90677eSKaike Wan 		if (range_start >= bound || range_end >= bound)
1455f90677eSKaike Wan 			break;
1465f90677eSKaike Wan 
147a74d5307SMitko Haralanov 		for (i = range_start; i <= range_end; i++) {
148a74d5307SMitko Haralanov 			if (remove)
149a74d5307SMitko Haralanov 				clear_bit(i, fault->opcodes);
150a74d5307SMitko Haralanov 			else
151a74d5307SMitko Haralanov 				set_bit(i, fault->opcodes);
152a74d5307SMitko Haralanov 		}
153a74d5307SMitko Haralanov 		if (!end)
154a74d5307SMitko Haralanov 			break;
155a74d5307SMitko Haralanov 	}
156a74d5307SMitko Haralanov 	ret = len;
157a74d5307SMitko Haralanov 
158a74d5307SMitko Haralanov 	debugfs_file_put(file->f_path.dentry);
1592323d7baSWenwen Wang free_data:
160a74d5307SMitko Haralanov 	kfree(data);
161a74d5307SMitko Haralanov 	return ret;
162a74d5307SMitko Haralanov }
163a74d5307SMitko Haralanov 
164a74d5307SMitko Haralanov static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
165a74d5307SMitko Haralanov 				  size_t len, loff_t *pos)
166a74d5307SMitko Haralanov {
167a74d5307SMitko Haralanov 	ssize_t ret = 0;
168a74d5307SMitko Haralanov 	char *data;
169a74d5307SMitko Haralanov 	size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */
170a74d5307SMitko Haralanov 	unsigned long bit = 0, zero = 0;
171a74d5307SMitko Haralanov 	struct fault *fault = file->private_data;
172a74d5307SMitko Haralanov 	size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE;
173a74d5307SMitko Haralanov 
174a74d5307SMitko Haralanov 	data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
175a74d5307SMitko Haralanov 	if (!data)
176a74d5307SMitko Haralanov 		return -ENOMEM;
177a74d5307SMitko Haralanov 	ret = debugfs_file_get(file->f_path.dentry);
178a74d5307SMitko Haralanov 	if (unlikely(ret))
179b08afa06SWenwen Wang 		goto free_data;
180a74d5307SMitko Haralanov 	bit = find_first_bit(fault->opcodes, bitsize);
181a74d5307SMitko Haralanov 	while (bit < bitsize) {
182a74d5307SMitko Haralanov 		zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
183a74d5307SMitko Haralanov 		if (zero - 1 != bit)
18423ab5261STakashi Iwai 			size += scnprintf(data + size,
185a74d5307SMitko Haralanov 					 datalen - size - 1,
186a74d5307SMitko Haralanov 					 "0x%lx-0x%lx,", bit, zero - 1);
187a74d5307SMitko Haralanov 		else
18823ab5261STakashi Iwai 			size += scnprintf(data + size,
189a74d5307SMitko Haralanov 					 datalen - size - 1, "0x%lx,",
190a74d5307SMitko Haralanov 					 bit);
191a74d5307SMitko Haralanov 		bit = find_next_bit(fault->opcodes, bitsize, zero);
192a74d5307SMitko Haralanov 	}
193a74d5307SMitko Haralanov 	debugfs_file_put(file->f_path.dentry);
194a74d5307SMitko Haralanov 	data[size - 1] = '\n';
195a74d5307SMitko Haralanov 	data[size] = '\0';
196a74d5307SMitko Haralanov 	ret = simple_read_from_buffer(buf, len, pos, data, size);
197b08afa06SWenwen Wang free_data:
198a74d5307SMitko Haralanov 	kfree(data);
199a74d5307SMitko Haralanov 	return ret;
200a74d5307SMitko Haralanov }
201a74d5307SMitko Haralanov 
202a74d5307SMitko Haralanov static const struct file_operations __fault_opcodes_fops = {
203a74d5307SMitko Haralanov 	.owner = THIS_MODULE,
204a74d5307SMitko Haralanov 	.open = fault_opcodes_open,
205a74d5307SMitko Haralanov 	.read = fault_opcodes_read,
206a74d5307SMitko Haralanov 	.write = fault_opcodes_write,
207a74d5307SMitko Haralanov 	.llseek = no_llseek
208a74d5307SMitko Haralanov };
209a74d5307SMitko Haralanov 
210a74d5307SMitko Haralanov void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
211a74d5307SMitko Haralanov {
212a74d5307SMitko Haralanov 	if (ibd->fault)
213a74d5307SMitko Haralanov 		debugfs_remove_recursive(ibd->fault->dir);
214a74d5307SMitko Haralanov 	kfree(ibd->fault);
215a74d5307SMitko Haralanov 	ibd->fault = NULL;
216a74d5307SMitko Haralanov }
217a74d5307SMitko Haralanov 
218a74d5307SMitko Haralanov int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
219a74d5307SMitko Haralanov {
220a74d5307SMitko Haralanov 	struct dentry *parent = ibd->hfi1_ibdev_dbg;
221e7751180SGreg Kroah-Hartman 	struct dentry *fault_dir;
222a74d5307SMitko Haralanov 
223a74d5307SMitko Haralanov 	ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
224a74d5307SMitko Haralanov 	if (!ibd->fault)
225a74d5307SMitko Haralanov 		return -ENOMEM;
226a74d5307SMitko Haralanov 
227a74d5307SMitko Haralanov 	ibd->fault->attr.interval = 1;
228a74d5307SMitko Haralanov 	ibd->fault->attr.require_end = ULONG_MAX;
229a74d5307SMitko Haralanov 	ibd->fault->attr.stacktrace_depth = 32;
230a74d5307SMitko Haralanov 	ibd->fault->attr.dname = NULL;
231a74d5307SMitko Haralanov 	ibd->fault->attr.verbose = 0;
232a74d5307SMitko Haralanov 	ibd->fault->enable = false;
233a74d5307SMitko Haralanov 	ibd->fault->opcode = false;
234a74d5307SMitko Haralanov 	ibd->fault->fault_skip = 0;
235a74d5307SMitko Haralanov 	ibd->fault->skip = 0;
236a74d5307SMitko Haralanov 	ibd->fault->direction = HFI1_FAULT_DIR_TXRX;
237a74d5307SMitko Haralanov 	ibd->fault->suppress_err = false;
238a74d5307SMitko Haralanov 	bitmap_zero(ibd->fault->opcodes,
239a74d5307SMitko Haralanov 		    sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
240a74d5307SMitko Haralanov 
241e7751180SGreg Kroah-Hartman 	fault_dir =
242e7751180SGreg Kroah-Hartman 		fault_create_debugfs_attr("fault", parent, &ibd->fault->attr);
243e7751180SGreg Kroah-Hartman 	if (IS_ERR(fault_dir)) {
244a74d5307SMitko Haralanov 		kfree(ibd->fault);
245a74d5307SMitko Haralanov 		ibd->fault = NULL;
246a74d5307SMitko Haralanov 		return -ENOENT;
247a74d5307SMitko Haralanov 	}
248e7751180SGreg Kroah-Hartman 	ibd->fault->dir = fault_dir;
249a74d5307SMitko Haralanov 
250e7751180SGreg Kroah-Hartman 	debugfs_create_file("fault_stats", 0444, fault_dir, ibd,
2515c432764SGreg Kroah-Hartman 			    &_fault_stats_file_ops);
252e7751180SGreg Kroah-Hartman 	debugfs_create_bool("enable", 0600, fault_dir, &ibd->fault->enable);
253e7751180SGreg Kroah-Hartman 	debugfs_create_bool("suppress_err", 0600, fault_dir,
254e7751180SGreg Kroah-Hartman 			    &ibd->fault->suppress_err);
255e7751180SGreg Kroah-Hartman 	debugfs_create_bool("opcode_mode", 0600, fault_dir,
256e7751180SGreg Kroah-Hartman 			    &ibd->fault->opcode);
257e7751180SGreg Kroah-Hartman 	debugfs_create_file("opcodes", 0600, fault_dir, ibd->fault,
258e7751180SGreg Kroah-Hartman 			    &__fault_opcodes_fops);
259e7751180SGreg Kroah-Hartman 	debugfs_create_u64("skip_pkts", 0600, fault_dir,
260e7751180SGreg Kroah-Hartman 			   &ibd->fault->fault_skip);
261e7751180SGreg Kroah-Hartman 	debugfs_create_u64("skip_usec", 0600, fault_dir,
262e7751180SGreg Kroah-Hartman 			   &ibd->fault->fault_skip_usec);
263e7751180SGreg Kroah-Hartman 	debugfs_create_u8("direction", 0600, fault_dir, &ibd->fault->direction);
264a74d5307SMitko Haralanov 
265a74d5307SMitko Haralanov 	return 0;
266a74d5307SMitko Haralanov }
267a74d5307SMitko Haralanov 
268a74d5307SMitko Haralanov bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
269a74d5307SMitko Haralanov {
270a74d5307SMitko Haralanov 	if (ibd->fault)
271a74d5307SMitko Haralanov 		return ibd->fault->suppress_err;
272a74d5307SMitko Haralanov 	return false;
273a74d5307SMitko Haralanov }
274a74d5307SMitko Haralanov 
275a74d5307SMitko Haralanov static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode,
276a74d5307SMitko Haralanov 				u8 direction)
277a74d5307SMitko Haralanov {
278a74d5307SMitko Haralanov 	bool ret = false;
279a74d5307SMitko Haralanov 
280a74d5307SMitko Haralanov 	if (!ibd->fault || !ibd->fault->enable)
281a74d5307SMitko Haralanov 		return false;
282a74d5307SMitko Haralanov 	if (!(ibd->fault->direction & direction))
283a74d5307SMitko Haralanov 		return false;
284a74d5307SMitko Haralanov 	if (ibd->fault->opcode) {
285a74d5307SMitko Haralanov 		if (bitmap_empty(ibd->fault->opcodes,
286a74d5307SMitko Haralanov 				 (sizeof(ibd->fault->opcodes) *
287a74d5307SMitko Haralanov 				  BITS_PER_BYTE)))
288a74d5307SMitko Haralanov 			return false;
289a74d5307SMitko Haralanov 		if (!(test_bit(opcode, ibd->fault->opcodes)))
290a74d5307SMitko Haralanov 			return false;
291a74d5307SMitko Haralanov 	}
292a74d5307SMitko Haralanov 	if (ibd->fault->fault_skip_usec &&
293a74d5307SMitko Haralanov 	    time_before(jiffies, ibd->fault->skip_usec))
294a74d5307SMitko Haralanov 		return false;
295a74d5307SMitko Haralanov 	if (ibd->fault->fault_skip && ibd->fault->skip) {
296a74d5307SMitko Haralanov 		ibd->fault->skip--;
297a74d5307SMitko Haralanov 		return false;
298a74d5307SMitko Haralanov 	}
299a74d5307SMitko Haralanov 	ret = should_fail(&ibd->fault->attr, 1);
300a74d5307SMitko Haralanov 	if (ret) {
301a74d5307SMitko Haralanov 		ibd->fault->skip = ibd->fault->fault_skip;
302a74d5307SMitko Haralanov 		ibd->fault->skip_usec = jiffies +
303a74d5307SMitko Haralanov 			usecs_to_jiffies(ibd->fault->fault_skip_usec);
304a74d5307SMitko Haralanov 	}
305a74d5307SMitko Haralanov 	return ret;
306a74d5307SMitko Haralanov }
307a74d5307SMitko Haralanov 
308a74d5307SMitko Haralanov bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode)
309a74d5307SMitko Haralanov {
310a74d5307SMitko Haralanov 	struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
311a74d5307SMitko Haralanov 
312a74d5307SMitko Haralanov 	if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) {
313a74d5307SMitko Haralanov 		trace_hfi1_fault_opcode(qp, opcode);
314a74d5307SMitko Haralanov 		ibd->fault->n_txfaults[opcode]++;
315a74d5307SMitko Haralanov 		return true;
316a74d5307SMitko Haralanov 	}
317a74d5307SMitko Haralanov 	return false;
318a74d5307SMitko Haralanov }
319a74d5307SMitko Haralanov 
320a74d5307SMitko Haralanov bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
321a74d5307SMitko Haralanov {
322a74d5307SMitko Haralanov 	struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev;
323a74d5307SMitko Haralanov 
324a74d5307SMitko Haralanov 	if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) {
325a74d5307SMitko Haralanov 		trace_hfi1_fault_packet(packet);
326a74d5307SMitko Haralanov 		ibd->fault->n_rxfaults[packet->opcode]++;
327a74d5307SMitko Haralanov 		return true;
328a74d5307SMitko Haralanov 	}
329a74d5307SMitko Haralanov 	return false;
330a74d5307SMitko Haralanov }
331