xref: /openbmc/linux/drivers/infiniband/hw/hfi1/fault.c (revision ba61bb17)
1 /*
2  * Copyright(c) 2018 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 #include <linux/debugfs.h>
48 #include <linux/seq_file.h>
49 #include <linux/kernel.h>
50 #include <linux/module.h>
51 #include <linux/types.h>
52 #include <linux/bitmap.h>
53 
54 #include "debugfs.h"
55 #include "fault.h"
56 #include "trace.h"
57 
58 #define HFI1_FAULT_DIR_TX   BIT(0)
59 #define HFI1_FAULT_DIR_RX   BIT(1)
60 #define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX)
61 
62 static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
63 {
64 	struct hfi1_opcode_stats_perctx *opstats;
65 
66 	if (*pos >= ARRAY_SIZE(opstats->stats))
67 		return NULL;
68 	return pos;
69 }
70 
71 static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
72 {
73 	struct hfi1_opcode_stats_perctx *opstats;
74 
75 	++*pos;
76 	if (*pos >= ARRAY_SIZE(opstats->stats))
77 		return NULL;
78 	return pos;
79 }
80 
81 static void _fault_stats_seq_stop(struct seq_file *s, void *v)
82 {
83 }
84 
85 static int _fault_stats_seq_show(struct seq_file *s, void *v)
86 {
87 	loff_t *spos = v;
88 	loff_t i = *spos, j;
89 	u64 n_packets = 0, n_bytes = 0;
90 	struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
91 	struct hfi1_devdata *dd = dd_from_dev(ibd);
92 	struct hfi1_ctxtdata *rcd;
93 
94 	for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
95 		rcd = hfi1_rcd_get_by_index(dd, j);
96 		if (rcd) {
97 			n_packets += rcd->opstats->stats[i].n_packets;
98 			n_bytes += rcd->opstats->stats[i].n_bytes;
99 		}
100 		hfi1_rcd_put(rcd);
101 	}
102 	for_each_possible_cpu(j) {
103 		struct hfi1_opcode_stats_perctx *sp =
104 			per_cpu_ptr(dd->tx_opstats, j);
105 
106 		n_packets += sp->stats[i].n_packets;
107 		n_bytes += sp->stats[i].n_bytes;
108 	}
109 	if (!n_packets && !n_bytes)
110 		return SEQ_SKIP;
111 	if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i])
112 		return SEQ_SKIP;
113 	seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
114 		   (unsigned long long)n_packets,
115 		   (unsigned long long)n_bytes,
116 		   (unsigned long long)ibd->fault->n_rxfaults[i],
117 		   (unsigned long long)ibd->fault->n_txfaults[i]);
118 	return 0;
119 }
120 
121 DEBUGFS_SEQ_FILE_OPS(fault_stats);
122 DEBUGFS_SEQ_FILE_OPEN(fault_stats);
123 DEBUGFS_FILE_OPS(fault_stats);
124 
125 static int fault_opcodes_open(struct inode *inode, struct file *file)
126 {
127 	file->private_data = inode->i_private;
128 	return nonseekable_open(inode, file);
129 }
130 
131 static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
132 				   size_t len, loff_t *pos)
133 {
134 	ssize_t ret = 0;
135 	/* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */
136 	size_t copy, datalen = 1280;
137 	char *data, *token, *ptr, *end;
138 	struct fault *fault = file->private_data;
139 
140 	data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
141 	if (!data)
142 		return -ENOMEM;
143 	copy = min(len, datalen - 1);
144 	if (copy_from_user(data, buf, copy))
145 		return -EFAULT;
146 
147 	ret = debugfs_file_get(file->f_path.dentry);
148 	if (unlikely(ret))
149 		return ret;
150 	ptr = data;
151 	token = ptr;
152 	for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
153 		char *dash;
154 		unsigned long range_start, range_end, i;
155 		bool remove = false;
156 
157 		end = strchr(ptr, ',');
158 		if (end)
159 			*end = '\0';
160 		if (token[0] == '-') {
161 			remove = true;
162 			token++;
163 		}
164 		dash = strchr(token, '-');
165 		if (dash)
166 			*dash = '\0';
167 		if (kstrtoul(token, 0, &range_start))
168 			break;
169 		if (dash) {
170 			token = dash + 1;
171 			if (kstrtoul(token, 0, &range_end))
172 				break;
173 		} else {
174 			range_end = range_start;
175 		}
176 		if (range_start == range_end && range_start == -1UL) {
177 			bitmap_zero(fault->opcodes, sizeof(fault->opcodes) *
178 				    BITS_PER_BYTE);
179 			break;
180 		}
181 		for (i = range_start; i <= range_end; i++) {
182 			if (remove)
183 				clear_bit(i, fault->opcodes);
184 			else
185 				set_bit(i, fault->opcodes);
186 		}
187 		if (!end)
188 			break;
189 	}
190 	ret = len;
191 
192 	debugfs_file_put(file->f_path.dentry);
193 	kfree(data);
194 	return ret;
195 }
196 
197 static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
198 				  size_t len, loff_t *pos)
199 {
200 	ssize_t ret = 0;
201 	char *data;
202 	size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */
203 	unsigned long bit = 0, zero = 0;
204 	struct fault *fault = file->private_data;
205 	size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE;
206 
207 	data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
208 	if (!data)
209 		return -ENOMEM;
210 	ret = debugfs_file_get(file->f_path.dentry);
211 	if (unlikely(ret))
212 		return ret;
213 	bit = find_first_bit(fault->opcodes, bitsize);
214 	while (bit < bitsize) {
215 		zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
216 		if (zero - 1 != bit)
217 			size += snprintf(data + size,
218 					 datalen - size - 1,
219 					 "0x%lx-0x%lx,", bit, zero - 1);
220 		else
221 			size += snprintf(data + size,
222 					 datalen - size - 1, "0x%lx,",
223 					 bit);
224 		bit = find_next_bit(fault->opcodes, bitsize, zero);
225 	}
226 	debugfs_file_put(file->f_path.dentry);
227 	data[size - 1] = '\n';
228 	data[size] = '\0';
229 	ret = simple_read_from_buffer(buf, len, pos, data, size);
230 	kfree(data);
231 	return ret;
232 }
233 
234 static const struct file_operations __fault_opcodes_fops = {
235 	.owner = THIS_MODULE,
236 	.open = fault_opcodes_open,
237 	.read = fault_opcodes_read,
238 	.write = fault_opcodes_write,
239 	.llseek = no_llseek
240 };
241 
242 void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
243 {
244 	if (ibd->fault)
245 		debugfs_remove_recursive(ibd->fault->dir);
246 	kfree(ibd->fault);
247 	ibd->fault = NULL;
248 }
249 
250 int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
251 {
252 	struct dentry *parent = ibd->hfi1_ibdev_dbg;
253 
254 	ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
255 	if (!ibd->fault)
256 		return -ENOMEM;
257 
258 	ibd->fault->attr.interval = 1;
259 	ibd->fault->attr.require_end = ULONG_MAX;
260 	ibd->fault->attr.stacktrace_depth = 32;
261 	ibd->fault->attr.dname = NULL;
262 	ibd->fault->attr.verbose = 0;
263 	ibd->fault->enable = false;
264 	ibd->fault->opcode = false;
265 	ibd->fault->fault_skip = 0;
266 	ibd->fault->skip = 0;
267 	ibd->fault->direction = HFI1_FAULT_DIR_TXRX;
268 	ibd->fault->suppress_err = false;
269 	bitmap_zero(ibd->fault->opcodes,
270 		    sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
271 
272 	ibd->fault->dir =
273 		fault_create_debugfs_attr("fault", parent,
274 					  &ibd->fault->attr);
275 	if (IS_ERR(ibd->fault->dir)) {
276 		kfree(ibd->fault);
277 		ibd->fault = NULL;
278 		return -ENOENT;
279 	}
280 
281 	DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd);
282 	if (!debugfs_create_bool("enable", 0600, ibd->fault->dir,
283 				 &ibd->fault->enable))
284 		goto fail;
285 	if (!debugfs_create_bool("suppress_err", 0600,
286 				 ibd->fault->dir,
287 				 &ibd->fault->suppress_err))
288 		goto fail;
289 	if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir,
290 				 &ibd->fault->opcode))
291 		goto fail;
292 	if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir,
293 				 ibd->fault, &__fault_opcodes_fops))
294 		goto fail;
295 	if (!debugfs_create_u64("skip_pkts", 0600,
296 				ibd->fault->dir,
297 				&ibd->fault->fault_skip))
298 		goto fail;
299 	if (!debugfs_create_u64("skip_usec", 0600,
300 				ibd->fault->dir,
301 				&ibd->fault->fault_skip_usec))
302 		goto fail;
303 	if (!debugfs_create_u8("direction", 0600, ibd->fault->dir,
304 			       &ibd->fault->direction))
305 		goto fail;
306 
307 	return 0;
308 fail:
309 	hfi1_fault_exit_debugfs(ibd);
310 	return -ENOMEM;
311 }
312 
313 bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
314 {
315 	if (ibd->fault)
316 		return ibd->fault->suppress_err;
317 	return false;
318 }
319 
320 static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode,
321 				u8 direction)
322 {
323 	bool ret = false;
324 
325 	if (!ibd->fault || !ibd->fault->enable)
326 		return false;
327 	if (!(ibd->fault->direction & direction))
328 		return false;
329 	if (ibd->fault->opcode) {
330 		if (bitmap_empty(ibd->fault->opcodes,
331 				 (sizeof(ibd->fault->opcodes) *
332 				  BITS_PER_BYTE)))
333 			return false;
334 		if (!(test_bit(opcode, ibd->fault->opcodes)))
335 			return false;
336 	}
337 	if (ibd->fault->fault_skip_usec &&
338 	    time_before(jiffies, ibd->fault->skip_usec))
339 		return false;
340 	if (ibd->fault->fault_skip && ibd->fault->skip) {
341 		ibd->fault->skip--;
342 		return false;
343 	}
344 	ret = should_fail(&ibd->fault->attr, 1);
345 	if (ret) {
346 		ibd->fault->skip = ibd->fault->fault_skip;
347 		ibd->fault->skip_usec = jiffies +
348 			usecs_to_jiffies(ibd->fault->fault_skip_usec);
349 	}
350 	return ret;
351 }
352 
353 bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode)
354 {
355 	struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
356 
357 	if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) {
358 		trace_hfi1_fault_opcode(qp, opcode);
359 		ibd->fault->n_txfaults[opcode]++;
360 		return true;
361 	}
362 	return false;
363 }
364 
365 bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
366 {
367 	struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev;
368 
369 	if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) {
370 		trace_hfi1_fault_packet(packet);
371 		ibd->fault->n_rxfaults[packet->opcode]++;
372 		return true;
373 	}
374 	return false;
375 }
376