xref: /openbmc/linux/drivers/infiniband/core/nldev.c (revision ddc141e5)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <net/netlink.h>
37 #include <rdma/rdma_netlink.h>
38 
39 #include "core_priv.h"
40 
41 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
42 	[RDMA_NLDEV_ATTR_DEV_INDEX]     = { .type = NLA_U32 },
43 	[RDMA_NLDEV_ATTR_DEV_NAME]	= { .type = NLA_NUL_STRING,
44 					    .len = IB_DEVICE_NAME_MAX - 1},
45 	[RDMA_NLDEV_ATTR_PORT_INDEX]	= { .type = NLA_U32 },
46 	[RDMA_NLDEV_ATTR_FW_VERSION]	= { .type = NLA_NUL_STRING,
47 					    .len = IB_FW_VERSION_NAME_MAX - 1},
48 	[RDMA_NLDEV_ATTR_NODE_GUID]	= { .type = NLA_U64 },
49 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
50 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]	= { .type = NLA_U64 },
51 	[RDMA_NLDEV_ATTR_LID]		= { .type = NLA_U32 },
52 	[RDMA_NLDEV_ATTR_SM_LID]	= { .type = NLA_U32 },
53 	[RDMA_NLDEV_ATTR_LMC]		= { .type = NLA_U8 },
54 	[RDMA_NLDEV_ATTR_PORT_STATE]	= { .type = NLA_U8 },
55 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
56 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
57 	[RDMA_NLDEV_ATTR_RES_SUMMARY]	= { .type = NLA_NESTED },
58 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
59 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
60 					     .len = 16 },
61 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
62 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
63 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
64 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
65 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
66 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
67 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
68 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
69 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
70 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
71 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
72 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
73 						    .len = TASK_COMM_LEN },
74 };
75 
76 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
77 {
78 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
79 		return -EMSGSIZE;
80 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
81 		return -EMSGSIZE;
82 
83 	return 0;
84 }
85 
86 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
87 {
88 	char fw[IB_FW_VERSION_NAME_MAX];
89 
90 	if (fill_nldev_handle(msg, device))
91 		return -EMSGSIZE;
92 
93 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
94 		return -EMSGSIZE;
95 
96 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
97 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
98 			      device->attrs.device_cap_flags, 0))
99 		return -EMSGSIZE;
100 
101 	ib_get_device_fw_str(device, fw);
102 	/* Device without FW has strlen(fw) */
103 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
104 		return -EMSGSIZE;
105 
106 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
107 			      be64_to_cpu(device->node_guid), 0))
108 		return -EMSGSIZE;
109 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
110 			      be64_to_cpu(device->attrs.sys_image_guid), 0))
111 		return -EMSGSIZE;
112 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
113 		return -EMSGSIZE;
114 	return 0;
115 }
116 
117 static int fill_port_info(struct sk_buff *msg,
118 			  struct ib_device *device, u32 port)
119 {
120 	struct ib_port_attr attr;
121 	int ret;
122 
123 	if (fill_nldev_handle(msg, device))
124 		return -EMSGSIZE;
125 
126 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
127 		return -EMSGSIZE;
128 
129 	ret = ib_query_port(device, port, &attr);
130 	if (ret)
131 		return ret;
132 
133 	BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
134 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
135 			      (u64)attr.port_cap_flags, 0))
136 		return -EMSGSIZE;
137 	if (rdma_protocol_ib(device, port) &&
138 	    nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
139 			      attr.subnet_prefix, 0))
140 		return -EMSGSIZE;
141 	if (rdma_protocol_ib(device, port)) {
142 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
143 			return -EMSGSIZE;
144 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
145 			return -EMSGSIZE;
146 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
147 			return -EMSGSIZE;
148 	}
149 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
150 		return -EMSGSIZE;
151 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
152 		return -EMSGSIZE;
153 	return 0;
154 }
155 
156 static int fill_res_info_entry(struct sk_buff *msg,
157 			       const char *name, u64 curr)
158 {
159 	struct nlattr *entry_attr;
160 
161 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
162 	if (!entry_attr)
163 		return -EMSGSIZE;
164 
165 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
166 		goto err;
167 	if (nla_put_u64_64bit(msg,
168 			      RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
169 		goto err;
170 
171 	nla_nest_end(msg, entry_attr);
172 	return 0;
173 
174 err:
175 	nla_nest_cancel(msg, entry_attr);
176 	return -EMSGSIZE;
177 }
178 
179 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
180 {
181 	static const char * const names[RDMA_RESTRACK_MAX] = {
182 		[RDMA_RESTRACK_PD] = "pd",
183 		[RDMA_RESTRACK_CQ] = "cq",
184 		[RDMA_RESTRACK_QP] = "qp",
185 	};
186 
187 	struct rdma_restrack_root *res = &device->res;
188 	struct nlattr *table_attr;
189 	int ret, i, curr;
190 
191 	if (fill_nldev_handle(msg, device))
192 		return -EMSGSIZE;
193 
194 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
195 	if (!table_attr)
196 		return -EMSGSIZE;
197 
198 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
199 		if (!names[i])
200 			continue;
201 		curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
202 		ret = fill_res_info_entry(msg, names[i], curr);
203 		if (ret)
204 			goto err;
205 	}
206 
207 	nla_nest_end(msg, table_attr);
208 	return 0;
209 
210 err:
211 	nla_nest_cancel(msg, table_attr);
212 	return ret;
213 }
214 
215 static int fill_res_qp_entry(struct sk_buff *msg,
216 			     struct ib_qp *qp, uint32_t port)
217 {
218 	struct rdma_restrack_entry *res = &qp->res;
219 	struct ib_qp_init_attr qp_init_attr;
220 	struct nlattr *entry_attr;
221 	struct ib_qp_attr qp_attr;
222 	int ret;
223 
224 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
225 	if (ret)
226 		return ret;
227 
228 	if (port && port != qp_attr.port_num)
229 		return 0;
230 
231 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
232 	if (!entry_attr)
233 		goto out;
234 
235 	/* In create_qp() port is not set yet */
236 	if (qp_attr.port_num &&
237 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
238 		goto err;
239 
240 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
241 		goto err;
242 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
243 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
244 				qp_attr.dest_qp_num))
245 			goto err;
246 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
247 				qp_attr.rq_psn))
248 			goto err;
249 	}
250 
251 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
252 		goto err;
253 
254 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
255 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
256 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
257 			       qp_attr.path_mig_state))
258 			goto err;
259 	}
260 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
261 		goto err;
262 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
263 		goto err;
264 
265 	/*
266 	 * Existence of task means that it is user QP and netlink
267 	 * user is invited to go and read /proc/PID/comm to get name
268 	 * of the task file and res->task_com should be NULL.
269 	 */
270 	if (rdma_is_kernel_res(res)) {
271 		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
272 			goto err;
273 	} else {
274 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
275 			goto err;
276 	}
277 
278 	nla_nest_end(msg, entry_attr);
279 	return 0;
280 
281 err:
282 	nla_nest_cancel(msg, entry_attr);
283 out:
284 	return -EMSGSIZE;
285 }
286 
287 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
288 			  struct netlink_ext_ack *extack)
289 {
290 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
291 	struct ib_device *device;
292 	struct sk_buff *msg;
293 	u32 index;
294 	int err;
295 
296 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
297 			  nldev_policy, extack);
298 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
299 		return -EINVAL;
300 
301 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
302 
303 	device = ib_device_get_by_index(index);
304 	if (!device)
305 		return -EINVAL;
306 
307 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
308 	if (!msg) {
309 		err = -ENOMEM;
310 		goto err;
311 	}
312 
313 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
314 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
315 			0, 0);
316 
317 	err = fill_dev_info(msg, device);
318 	if (err)
319 		goto err_free;
320 
321 	nlmsg_end(msg, nlh);
322 
323 	put_device(&device->dev);
324 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
325 
326 err_free:
327 	nlmsg_free(msg);
328 err:
329 	put_device(&device->dev);
330 	return err;
331 }
332 
333 static int _nldev_get_dumpit(struct ib_device *device,
334 			     struct sk_buff *skb,
335 			     struct netlink_callback *cb,
336 			     unsigned int idx)
337 {
338 	int start = cb->args[0];
339 	struct nlmsghdr *nlh;
340 
341 	if (idx < start)
342 		return 0;
343 
344 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
345 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
346 			0, NLM_F_MULTI);
347 
348 	if (fill_dev_info(skb, device)) {
349 		nlmsg_cancel(skb, nlh);
350 		goto out;
351 	}
352 
353 	nlmsg_end(skb, nlh);
354 
355 	idx++;
356 
357 out:	cb->args[0] = idx;
358 	return skb->len;
359 }
360 
361 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
362 {
363 	/*
364 	 * There is no need to take lock, because
365 	 * we are relying on ib_core's lists_rwsem
366 	 */
367 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
368 }
369 
370 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
371 			       struct netlink_ext_ack *extack)
372 {
373 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
374 	struct ib_device *device;
375 	struct sk_buff *msg;
376 	u32 index;
377 	u32 port;
378 	int err;
379 
380 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
381 			  nldev_policy, extack);
382 	if (err ||
383 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
384 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
385 		return -EINVAL;
386 
387 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
388 	device = ib_device_get_by_index(index);
389 	if (!device)
390 		return -EINVAL;
391 
392 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
393 	if (!rdma_is_port_valid(device, port)) {
394 		err = -EINVAL;
395 		goto err;
396 	}
397 
398 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
399 	if (!msg) {
400 		err = -ENOMEM;
401 		goto err;
402 	}
403 
404 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
405 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
406 			0, 0);
407 
408 	err = fill_port_info(msg, device, port);
409 	if (err)
410 		goto err_free;
411 
412 	nlmsg_end(msg, nlh);
413 	put_device(&device->dev);
414 
415 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
416 
417 err_free:
418 	nlmsg_free(msg);
419 err:
420 	put_device(&device->dev);
421 	return err;
422 }
423 
424 static int nldev_port_get_dumpit(struct sk_buff *skb,
425 				 struct netlink_callback *cb)
426 {
427 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
428 	struct ib_device *device;
429 	int start = cb->args[0];
430 	struct nlmsghdr *nlh;
431 	u32 idx = 0;
432 	u32 ifindex;
433 	int err;
434 	u32 p;
435 
436 	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
437 			  nldev_policy, NULL);
438 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
439 		return -EINVAL;
440 
441 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
442 	device = ib_device_get_by_index(ifindex);
443 	if (!device)
444 		return -EINVAL;
445 
446 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
447 		/*
448 		 * The dumpit function returns all information from specific
449 		 * index. This specific index is taken from the netlink
450 		 * messages request sent by user and it is available
451 		 * in cb->args[0].
452 		 *
453 		 * Usually, the user doesn't fill this field and it causes
454 		 * to return everything.
455 		 *
456 		 */
457 		if (idx < start) {
458 			idx++;
459 			continue;
460 		}
461 
462 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
463 				cb->nlh->nlmsg_seq,
464 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
465 						 RDMA_NLDEV_CMD_PORT_GET),
466 				0, NLM_F_MULTI);
467 
468 		if (fill_port_info(skb, device, p)) {
469 			nlmsg_cancel(skb, nlh);
470 			goto out;
471 		}
472 		idx++;
473 		nlmsg_end(skb, nlh);
474 	}
475 
476 out:
477 	put_device(&device->dev);
478 	cb->args[0] = idx;
479 	return skb->len;
480 }
481 
482 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
483 			      struct netlink_ext_ack *extack)
484 {
485 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
486 	struct ib_device *device;
487 	struct sk_buff *msg;
488 	u32 index;
489 	int ret;
490 
491 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
492 			  nldev_policy, extack);
493 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
494 		return -EINVAL;
495 
496 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
497 	device = ib_device_get_by_index(index);
498 	if (!device)
499 		return -EINVAL;
500 
501 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
502 	if (!msg) {
503 		ret = -ENOMEM;
504 		goto err;
505 	}
506 
507 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
508 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
509 			0, 0);
510 
511 	ret = fill_res_info(msg, device);
512 	if (ret)
513 		goto err_free;
514 
515 	nlmsg_end(msg, nlh);
516 	put_device(&device->dev);
517 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
518 
519 err_free:
520 	nlmsg_free(msg);
521 err:
522 	put_device(&device->dev);
523 	return ret;
524 }
525 
526 static int _nldev_res_get_dumpit(struct ib_device *device,
527 				 struct sk_buff *skb,
528 				 struct netlink_callback *cb,
529 				 unsigned int idx)
530 {
531 	int start = cb->args[0];
532 	struct nlmsghdr *nlh;
533 
534 	if (idx < start)
535 		return 0;
536 
537 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
538 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
539 			0, NLM_F_MULTI);
540 
541 	if (fill_res_info(skb, device)) {
542 		nlmsg_cancel(skb, nlh);
543 		goto out;
544 	}
545 
546 	nlmsg_end(skb, nlh);
547 
548 	idx++;
549 
550 out:
551 	cb->args[0] = idx;
552 	return skb->len;
553 }
554 
555 static int nldev_res_get_dumpit(struct sk_buff *skb,
556 				struct netlink_callback *cb)
557 {
558 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
559 }
560 
561 static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
562 				   struct netlink_callback *cb)
563 {
564 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
565 	struct rdma_restrack_entry *res;
566 	int err, ret = 0, idx = 0;
567 	struct nlattr *table_attr;
568 	struct ib_device *device;
569 	int start = cb->args[0];
570 	struct ib_qp *qp = NULL;
571 	struct nlmsghdr *nlh;
572 	u32 index, port = 0;
573 
574 	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
575 			  nldev_policy, NULL);
576 	/*
577 	 * Right now, we are expecting the device index to get QP information,
578 	 * but it is possible to extend this code to return all devices in
579 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
580 	 * if it doesn't exist, we will iterate over all devices.
581 	 *
582 	 * But it is not needed for now.
583 	 */
584 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
585 		return -EINVAL;
586 
587 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
588 	device = ib_device_get_by_index(index);
589 	if (!device)
590 		return -EINVAL;
591 
592 	/*
593 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
594 	 */
595 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
596 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
597 		if (!rdma_is_port_valid(device, port)) {
598 			ret = -EINVAL;
599 			goto err_index;
600 		}
601 	}
602 
603 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
604 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
605 			0, NLM_F_MULTI);
606 
607 	if (fill_nldev_handle(skb, device)) {
608 		ret = -EMSGSIZE;
609 		goto err;
610 	}
611 
612 	table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
613 	if (!table_attr) {
614 		ret = -EMSGSIZE;
615 		goto err;
616 	}
617 
618 	down_read(&device->res.rwsem);
619 	hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
620 		if (idx < start)
621 			goto next;
622 
623 		if ((rdma_is_kernel_res(res) &&
624 		     task_active_pid_ns(current) != &init_pid_ns) ||
625 		    (!rdma_is_kernel_res(res) &&
626 		     task_active_pid_ns(current) != task_active_pid_ns(res->task)))
627 			/*
628 			 * 1. Kernel QPs should be visible in init namspace only
629 			 * 2. Present only QPs visible in the current namespace
630 			 */
631 			goto next;
632 
633 		if (!rdma_restrack_get(res))
634 			/*
635 			 * Resource is under release now, but we are not
636 			 * relesing lock now, so it will be released in
637 			 * our next pass, once we will get ->next pointer.
638 			 */
639 			goto next;
640 
641 		qp = container_of(res, struct ib_qp, res);
642 
643 		up_read(&device->res.rwsem);
644 		ret = fill_res_qp_entry(skb, qp, port);
645 		down_read(&device->res.rwsem);
646 		/*
647 		 * Return resource back, but it won't be released till
648 		 * the &device->res.rwsem will be released for write.
649 		 */
650 		rdma_restrack_put(res);
651 
652 		if (ret == -EMSGSIZE)
653 			/*
654 			 * There is a chance to optimize here.
655 			 * It can be done by using list_prepare_entry
656 			 * and list_for_each_entry_continue afterwards.
657 			 */
658 			break;
659 		if (ret)
660 			goto res_err;
661 next:		idx++;
662 	}
663 	up_read(&device->res.rwsem);
664 
665 	nla_nest_end(skb, table_attr);
666 	nlmsg_end(skb, nlh);
667 	cb->args[0] = idx;
668 
669 	/*
670 	 * No more QPs to fill, cancel the message and
671 	 * return 0 to mark end of dumpit.
672 	 */
673 	if (!qp)
674 		goto err;
675 
676 	put_device(&device->dev);
677 	return skb->len;
678 
679 res_err:
680 	nla_nest_cancel(skb, table_attr);
681 	up_read(&device->res.rwsem);
682 
683 err:
684 	nlmsg_cancel(skb, nlh);
685 
686 err_index:
687 	put_device(&device->dev);
688 	return ret;
689 }
690 
691 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
692 	[RDMA_NLDEV_CMD_GET] = {
693 		.doit = nldev_get_doit,
694 		.dump = nldev_get_dumpit,
695 	},
696 	[RDMA_NLDEV_CMD_PORT_GET] = {
697 		.doit = nldev_port_get_doit,
698 		.dump = nldev_port_get_dumpit,
699 	},
700 	[RDMA_NLDEV_CMD_RES_GET] = {
701 		.doit = nldev_res_get_doit,
702 		.dump = nldev_res_get_dumpit,
703 	},
704 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
705 		.dump = nldev_res_get_qp_dumpit,
706 		/*
707 		 * .doit is not implemented yet for two reasons:
708 		 * 1. It is not needed yet.
709 		 * 2. There is a need to provide identifier, while it is easy
710 		 * for the QPs (device index + port index + LQPN), it is not
711 		 * the case for the rest of resources (PD and CQ). Because it
712 		 * is better to provide similar interface for all resources,
713 		 * let's wait till we will have other resources implemented
714 		 * too.
715 		 */
716 	},
717 };
718 
719 void __init nldev_init(void)
720 {
721 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
722 }
723 
724 void __exit nldev_exit(void)
725 {
726 	rdma_nl_unregister(RDMA_NL_NLDEV);
727 }
728 
729 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
730