xref: /openbmc/linux/drivers/infiniband/core/nldev.c (revision 9c6d26df1fae6ad4718d51c48e6517913304ed27)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <net/netlink.h>
37 #include <rdma/rdma_cm.h>
38 #include <rdma/rdma_netlink.h>
39 
40 #include "core_priv.h"
41 #include "cma_priv.h"
42 
43 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
44 	[RDMA_NLDEV_ATTR_DEV_INDEX]     = { .type = NLA_U32 },
45 	[RDMA_NLDEV_ATTR_DEV_NAME]	= { .type = NLA_NUL_STRING,
46 					    .len = IB_DEVICE_NAME_MAX - 1},
47 	[RDMA_NLDEV_ATTR_PORT_INDEX]	= { .type = NLA_U32 },
48 	[RDMA_NLDEV_ATTR_FW_VERSION]	= { .type = NLA_NUL_STRING,
49 					    .len = IB_FW_VERSION_NAME_MAX - 1},
50 	[RDMA_NLDEV_ATTR_NODE_GUID]	= { .type = NLA_U64 },
51 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
52 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]	= { .type = NLA_U64 },
53 	[RDMA_NLDEV_ATTR_LID]		= { .type = NLA_U32 },
54 	[RDMA_NLDEV_ATTR_SM_LID]	= { .type = NLA_U32 },
55 	[RDMA_NLDEV_ATTR_LMC]		= { .type = NLA_U8 },
56 	[RDMA_NLDEV_ATTR_PORT_STATE]	= { .type = NLA_U8 },
57 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
58 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
59 	[RDMA_NLDEV_ATTR_RES_SUMMARY]	= { .type = NLA_NESTED },
60 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
61 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
62 					     .len = 16 },
63 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
64 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
65 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
66 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
67 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
68 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
69 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
70 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
71 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
72 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
73 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
74 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
75 						    .len = TASK_COMM_LEN },
76 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
77 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
78 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
79 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]	= {
80 			.len = sizeof(struct __kernel_sockaddr_storage) },
81 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]	= {
82 			.len = sizeof(struct __kernel_sockaddr_storage) },
83 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
84 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
85 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
86 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
87 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
88 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
89 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
90 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
91 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
92 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
93 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
94 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
95 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
96 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
97 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
98 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
99 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
100 						    .len = IFNAMSIZ },
101 };
102 
103 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
104 {
105 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
106 		return -EMSGSIZE;
107 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
108 		return -EMSGSIZE;
109 
110 	return 0;
111 }
112 
113 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
114 {
115 	char fw[IB_FW_VERSION_NAME_MAX];
116 
117 	if (fill_nldev_handle(msg, device))
118 		return -EMSGSIZE;
119 
120 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
121 		return -EMSGSIZE;
122 
123 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
124 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
125 			      device->attrs.device_cap_flags, 0))
126 		return -EMSGSIZE;
127 
128 	ib_get_device_fw_str(device, fw);
129 	/* Device without FW has strlen(fw) = 0 */
130 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
131 		return -EMSGSIZE;
132 
133 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
134 			      be64_to_cpu(device->node_guid), 0))
135 		return -EMSGSIZE;
136 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
137 			      be64_to_cpu(device->attrs.sys_image_guid), 0))
138 		return -EMSGSIZE;
139 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
140 		return -EMSGSIZE;
141 	return 0;
142 }
143 
144 static int fill_port_info(struct sk_buff *msg,
145 			  struct ib_device *device, u32 port,
146 			  const struct net *net)
147 {
148 	struct net_device *netdev = NULL;
149 	struct ib_port_attr attr;
150 	int ret;
151 
152 	if (fill_nldev_handle(msg, device))
153 		return -EMSGSIZE;
154 
155 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
156 		return -EMSGSIZE;
157 
158 	ret = ib_query_port(device, port, &attr);
159 	if (ret)
160 		return ret;
161 
162 	BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
163 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
164 			      (u64)attr.port_cap_flags, 0))
165 		return -EMSGSIZE;
166 	if (rdma_protocol_ib(device, port) &&
167 	    nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
168 			      attr.subnet_prefix, 0))
169 		return -EMSGSIZE;
170 	if (rdma_protocol_ib(device, port)) {
171 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
172 			return -EMSGSIZE;
173 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
174 			return -EMSGSIZE;
175 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
176 			return -EMSGSIZE;
177 	}
178 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
179 		return -EMSGSIZE;
180 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
181 		return -EMSGSIZE;
182 
183 	if (device->get_netdev)
184 		netdev = device->get_netdev(device, port);
185 
186 	if (netdev && net_eq(dev_net(netdev), net)) {
187 		ret = nla_put_u32(msg,
188 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
189 		if (ret)
190 			goto out;
191 		ret = nla_put_string(msg,
192 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
193 	}
194 
195 out:
196 	if (netdev)
197 		dev_put(netdev);
198 	return ret;
199 }
200 
201 static int fill_res_info_entry(struct sk_buff *msg,
202 			       const char *name, u64 curr)
203 {
204 	struct nlattr *entry_attr;
205 
206 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
207 	if (!entry_attr)
208 		return -EMSGSIZE;
209 
210 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
211 		goto err;
212 	if (nla_put_u64_64bit(msg,
213 			      RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
214 		goto err;
215 
216 	nla_nest_end(msg, entry_attr);
217 	return 0;
218 
219 err:
220 	nla_nest_cancel(msg, entry_attr);
221 	return -EMSGSIZE;
222 }
223 
224 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
225 {
226 	static const char * const names[RDMA_RESTRACK_MAX] = {
227 		[RDMA_RESTRACK_PD] = "pd",
228 		[RDMA_RESTRACK_CQ] = "cq",
229 		[RDMA_RESTRACK_QP] = "qp",
230 		[RDMA_RESTRACK_CM_ID] = "cm_id",
231 		[RDMA_RESTRACK_MR] = "mr",
232 	};
233 
234 	struct rdma_restrack_root *res = &device->res;
235 	struct nlattr *table_attr;
236 	int ret, i, curr;
237 
238 	if (fill_nldev_handle(msg, device))
239 		return -EMSGSIZE;
240 
241 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
242 	if (!table_attr)
243 		return -EMSGSIZE;
244 
245 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
246 		if (!names[i])
247 			continue;
248 		curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
249 		ret = fill_res_info_entry(msg, names[i], curr);
250 		if (ret)
251 			goto err;
252 	}
253 
254 	nla_nest_end(msg, table_attr);
255 	return 0;
256 
257 err:
258 	nla_nest_cancel(msg, table_attr);
259 	return ret;
260 }
261 
262 static int fill_res_name_pid(struct sk_buff *msg,
263 			     struct rdma_restrack_entry *res)
264 {
265 	/*
266 	 * For user resources, user is should read /proc/PID/comm to get the
267 	 * name of the task file.
268 	 */
269 	if (rdma_is_kernel_res(res)) {
270 		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
271 		    res->kern_name))
272 			return -EMSGSIZE;
273 	} else {
274 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
275 		    task_pid_vnr(res->task)))
276 			return -EMSGSIZE;
277 	}
278 	return 0;
279 }
280 
281 static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
282 			     struct rdma_restrack_entry *res, uint32_t port)
283 {
284 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
285 	struct ib_qp_init_attr qp_init_attr;
286 	struct nlattr *entry_attr;
287 	struct ib_qp_attr qp_attr;
288 	int ret;
289 
290 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
291 	if (ret)
292 		return ret;
293 
294 	if (port && port != qp_attr.port_num)
295 		return 0;
296 
297 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
298 	if (!entry_attr)
299 		goto out;
300 
301 	/* In create_qp() port is not set yet */
302 	if (qp_attr.port_num &&
303 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
304 		goto err;
305 
306 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
307 		goto err;
308 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
309 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
310 				qp_attr.dest_qp_num))
311 			goto err;
312 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
313 				qp_attr.rq_psn))
314 			goto err;
315 	}
316 
317 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
318 		goto err;
319 
320 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
321 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
322 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
323 			       qp_attr.path_mig_state))
324 			goto err;
325 	}
326 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
327 		goto err;
328 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
329 		goto err;
330 
331 	if (fill_res_name_pid(msg, res))
332 		goto err;
333 
334 	nla_nest_end(msg, entry_attr);
335 	return 0;
336 
337 err:
338 	nla_nest_cancel(msg, entry_attr);
339 out:
340 	return -EMSGSIZE;
341 }
342 
343 static int fill_res_cm_id_entry(struct sk_buff *msg,
344 				struct netlink_callback *cb,
345 				struct rdma_restrack_entry *res, uint32_t port)
346 {
347 	struct rdma_id_private *id_priv =
348 				container_of(res, struct rdma_id_private, res);
349 	struct rdma_cm_id *cm_id = &id_priv->id;
350 	struct nlattr *entry_attr;
351 
352 	if (port && port != cm_id->port_num)
353 		return 0;
354 
355 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
356 	if (!entry_attr)
357 		goto out;
358 
359 	if (cm_id->port_num &&
360 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
361 		goto err;
362 
363 	if (id_priv->qp_num) {
364 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
365 			goto err;
366 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
367 			goto err;
368 	}
369 
370 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
371 		goto err;
372 
373 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
374 		goto err;
375 
376 	if (cm_id->route.addr.src_addr.ss_family &&
377 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
378 		    sizeof(cm_id->route.addr.src_addr),
379 		    &cm_id->route.addr.src_addr))
380 		goto err;
381 	if (cm_id->route.addr.dst_addr.ss_family &&
382 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
383 		    sizeof(cm_id->route.addr.dst_addr),
384 		    &cm_id->route.addr.dst_addr))
385 		goto err;
386 
387 	if (fill_res_name_pid(msg, res))
388 		goto err;
389 
390 	nla_nest_end(msg, entry_attr);
391 	return 0;
392 
393 err:
394 	nla_nest_cancel(msg, entry_attr);
395 out:
396 	return -EMSGSIZE;
397 }
398 
399 static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
400 			     struct rdma_restrack_entry *res, uint32_t port)
401 {
402 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
403 	struct nlattr *entry_attr;
404 
405 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
406 	if (!entry_attr)
407 		goto out;
408 
409 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
410 		goto err;
411 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
412 			      atomic_read(&cq->usecnt), 0))
413 		goto err;
414 
415 	/* Poll context is only valid for kernel CQs */
416 	if (rdma_is_kernel_res(res) &&
417 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
418 		goto err;
419 
420 	if (fill_res_name_pid(msg, res))
421 		goto err;
422 
423 	nla_nest_end(msg, entry_attr);
424 	return 0;
425 
426 err:
427 	nla_nest_cancel(msg, entry_attr);
428 out:
429 	return -EMSGSIZE;
430 }
431 
432 static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
433 			     struct rdma_restrack_entry *res, uint32_t port)
434 {
435 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
436 	struct nlattr *entry_attr;
437 
438 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
439 	if (!entry_attr)
440 		goto out;
441 
442 	if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
443 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
444 			goto err;
445 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
446 			goto err;
447 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
448 				      mr->iova, 0))
449 			goto err;
450 	}
451 
452 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
453 		goto err;
454 
455 	if (fill_res_name_pid(msg, res))
456 		goto err;
457 
458 	nla_nest_end(msg, entry_attr);
459 	return 0;
460 
461 err:
462 	nla_nest_cancel(msg, entry_attr);
463 out:
464 	return -EMSGSIZE;
465 }
466 
467 static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
468 			     struct rdma_restrack_entry *res, uint32_t port)
469 {
470 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
471 	struct nlattr *entry_attr;
472 
473 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
474 	if (!entry_attr)
475 		goto out;
476 
477 	if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
478 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
479 				pd->local_dma_lkey))
480 			goto err;
481 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
482 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
483 				pd->unsafe_global_rkey))
484 			goto err;
485 	}
486 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
487 			      atomic_read(&pd->usecnt), 0))
488 		goto err;
489 	if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
490 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
491 			pd->unsafe_global_rkey))
492 		goto err;
493 
494 	if (fill_res_name_pid(msg, res))
495 		goto err;
496 
497 	nla_nest_end(msg, entry_attr);
498 	return 0;
499 
500 err:
501 	nla_nest_cancel(msg, entry_attr);
502 out:
503 	return -EMSGSIZE;
504 }
505 
506 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
507 			  struct netlink_ext_ack *extack)
508 {
509 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
510 	struct ib_device *device;
511 	struct sk_buff *msg;
512 	u32 index;
513 	int err;
514 
515 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
516 			  nldev_policy, extack);
517 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
518 		return -EINVAL;
519 
520 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
521 
522 	device = ib_device_get_by_index(index);
523 	if (!device)
524 		return -EINVAL;
525 
526 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
527 	if (!msg) {
528 		err = -ENOMEM;
529 		goto err;
530 	}
531 
532 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
533 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
534 			0, 0);
535 
536 	err = fill_dev_info(msg, device);
537 	if (err)
538 		goto err_free;
539 
540 	nlmsg_end(msg, nlh);
541 
542 	put_device(&device->dev);
543 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
544 
545 err_free:
546 	nlmsg_free(msg);
547 err:
548 	put_device(&device->dev);
549 	return err;
550 }
551 
552 static int _nldev_get_dumpit(struct ib_device *device,
553 			     struct sk_buff *skb,
554 			     struct netlink_callback *cb,
555 			     unsigned int idx)
556 {
557 	int start = cb->args[0];
558 	struct nlmsghdr *nlh;
559 
560 	if (idx < start)
561 		return 0;
562 
563 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
564 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
565 			0, NLM_F_MULTI);
566 
567 	if (fill_dev_info(skb, device)) {
568 		nlmsg_cancel(skb, nlh);
569 		goto out;
570 	}
571 
572 	nlmsg_end(skb, nlh);
573 
574 	idx++;
575 
576 out:	cb->args[0] = idx;
577 	return skb->len;
578 }
579 
580 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
581 {
582 	/*
583 	 * There is no need to take lock, because
584 	 * we are relying on ib_core's lists_rwsem
585 	 */
586 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
587 }
588 
589 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
590 			       struct netlink_ext_ack *extack)
591 {
592 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
593 	struct ib_device *device;
594 	struct sk_buff *msg;
595 	u32 index;
596 	u32 port;
597 	int err;
598 
599 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
600 			  nldev_policy, extack);
601 	if (err ||
602 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
603 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
604 		return -EINVAL;
605 
606 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
607 	device = ib_device_get_by_index(index);
608 	if (!device)
609 		return -EINVAL;
610 
611 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
612 	if (!rdma_is_port_valid(device, port)) {
613 		err = -EINVAL;
614 		goto err;
615 	}
616 
617 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
618 	if (!msg) {
619 		err = -ENOMEM;
620 		goto err;
621 	}
622 
623 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
624 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
625 			0, 0);
626 
627 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
628 	if (err)
629 		goto err_free;
630 
631 	nlmsg_end(msg, nlh);
632 	put_device(&device->dev);
633 
634 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
635 
636 err_free:
637 	nlmsg_free(msg);
638 err:
639 	put_device(&device->dev);
640 	return err;
641 }
642 
643 static int nldev_port_get_dumpit(struct sk_buff *skb,
644 				 struct netlink_callback *cb)
645 {
646 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
647 	struct ib_device *device;
648 	int start = cb->args[0];
649 	struct nlmsghdr *nlh;
650 	u32 idx = 0;
651 	u32 ifindex;
652 	int err;
653 	u32 p;
654 
655 	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
656 			  nldev_policy, NULL);
657 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
658 		return -EINVAL;
659 
660 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
661 	device = ib_device_get_by_index(ifindex);
662 	if (!device)
663 		return -EINVAL;
664 
665 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
666 		/*
667 		 * The dumpit function returns all information from specific
668 		 * index. This specific index is taken from the netlink
669 		 * messages request sent by user and it is available
670 		 * in cb->args[0].
671 		 *
672 		 * Usually, the user doesn't fill this field and it causes
673 		 * to return everything.
674 		 *
675 		 */
676 		if (idx < start) {
677 			idx++;
678 			continue;
679 		}
680 
681 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
682 				cb->nlh->nlmsg_seq,
683 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
684 						 RDMA_NLDEV_CMD_PORT_GET),
685 				0, NLM_F_MULTI);
686 
687 		if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
688 			nlmsg_cancel(skb, nlh);
689 			goto out;
690 		}
691 		idx++;
692 		nlmsg_end(skb, nlh);
693 	}
694 
695 out:
696 	put_device(&device->dev);
697 	cb->args[0] = idx;
698 	return skb->len;
699 }
700 
701 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
702 			      struct netlink_ext_ack *extack)
703 {
704 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
705 	struct ib_device *device;
706 	struct sk_buff *msg;
707 	u32 index;
708 	int ret;
709 
710 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
711 			  nldev_policy, extack);
712 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
713 		return -EINVAL;
714 
715 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
716 	device = ib_device_get_by_index(index);
717 	if (!device)
718 		return -EINVAL;
719 
720 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
721 	if (!msg) {
722 		ret = -ENOMEM;
723 		goto err;
724 	}
725 
726 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
727 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
728 			0, 0);
729 
730 	ret = fill_res_info(msg, device);
731 	if (ret)
732 		goto err_free;
733 
734 	nlmsg_end(msg, nlh);
735 	put_device(&device->dev);
736 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
737 
738 err_free:
739 	nlmsg_free(msg);
740 err:
741 	put_device(&device->dev);
742 	return ret;
743 }
744 
745 static int _nldev_res_get_dumpit(struct ib_device *device,
746 				 struct sk_buff *skb,
747 				 struct netlink_callback *cb,
748 				 unsigned int idx)
749 {
750 	int start = cb->args[0];
751 	struct nlmsghdr *nlh;
752 
753 	if (idx < start)
754 		return 0;
755 
756 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
757 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
758 			0, NLM_F_MULTI);
759 
760 	if (fill_res_info(skb, device)) {
761 		nlmsg_cancel(skb, nlh);
762 		goto out;
763 	}
764 
765 	nlmsg_end(skb, nlh);
766 
767 	idx++;
768 
769 out:
770 	cb->args[0] = idx;
771 	return skb->len;
772 }
773 
774 static int nldev_res_get_dumpit(struct sk_buff *skb,
775 				struct netlink_callback *cb)
776 {
777 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
778 }
779 
780 struct nldev_fill_res_entry {
781 	int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb,
782 			     struct rdma_restrack_entry *res, u32 port);
783 	enum rdma_nldev_attr nldev_attr;
784 	enum rdma_nldev_command nldev_cmd;
785 };
786 
787 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
788 	[RDMA_RESTRACK_QP] = {
789 		.fill_res_func = fill_res_qp_entry,
790 		.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
791 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
792 	},
793 	[RDMA_RESTRACK_CM_ID] = {
794 		.fill_res_func = fill_res_cm_id_entry,
795 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
796 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
797 	},
798 	[RDMA_RESTRACK_CQ] = {
799 		.fill_res_func = fill_res_cq_entry,
800 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
801 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
802 	},
803 	[RDMA_RESTRACK_MR] = {
804 		.fill_res_func = fill_res_mr_entry,
805 		.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
806 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
807 	},
808 	[RDMA_RESTRACK_PD] = {
809 		.fill_res_func = fill_res_pd_entry,
810 		.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
811 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
812 	},
813 };
814 
815 static int res_get_common_dumpit(struct sk_buff *skb,
816 				 struct netlink_callback *cb,
817 				 enum rdma_restrack_type res_type)
818 {
819 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
820 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
821 	struct rdma_restrack_entry *res;
822 	int err, ret = 0, idx = 0;
823 	struct nlattr *table_attr;
824 	struct ib_device *device;
825 	int start = cb->args[0];
826 	struct nlmsghdr *nlh;
827 	u32 index, port = 0;
828 	bool filled = false;
829 
830 	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
831 			  nldev_policy, NULL);
832 	/*
833 	 * Right now, we are expecting the device index to get res information,
834 	 * but it is possible to extend this code to return all devices in
835 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
836 	 * if it doesn't exist, we will iterate over all devices.
837 	 *
838 	 * But it is not needed for now.
839 	 */
840 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
841 		return -EINVAL;
842 
843 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
844 	device = ib_device_get_by_index(index);
845 	if (!device)
846 		return -EINVAL;
847 
848 	/*
849 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
850 	 */
851 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
852 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
853 		if (!rdma_is_port_valid(device, port)) {
854 			ret = -EINVAL;
855 			goto err_index;
856 		}
857 	}
858 
859 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
860 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
861 			0, NLM_F_MULTI);
862 
863 	if (fill_nldev_handle(skb, device)) {
864 		ret = -EMSGSIZE;
865 		goto err;
866 	}
867 
868 	table_attr = nla_nest_start(skb, fe->nldev_attr);
869 	if (!table_attr) {
870 		ret = -EMSGSIZE;
871 		goto err;
872 	}
873 
874 	down_read(&device->res.rwsem);
875 	hash_for_each_possible(device->res.hash, res, node, res_type) {
876 		if (idx < start)
877 			goto next;
878 
879 		if ((rdma_is_kernel_res(res) &&
880 		     task_active_pid_ns(current) != &init_pid_ns) ||
881 		    (!rdma_is_kernel_res(res) && task_active_pid_ns(current) !=
882 		     task_active_pid_ns(res->task)))
883 			/*
884 			 * 1. Kern resources should be visible in init
885 			 *    namspace only
886 			 * 2. Present only resources visible in the current
887 			 *    namespace
888 			 */
889 			goto next;
890 
891 		if (!rdma_restrack_get(res))
892 			/*
893 			 * Resource is under release now, but we are not
894 			 * relesing lock now, so it will be released in
895 			 * our next pass, once we will get ->next pointer.
896 			 */
897 			goto next;
898 
899 		filled = true;
900 
901 		up_read(&device->res.rwsem);
902 		ret = fe->fill_res_func(skb, cb, res, port);
903 		down_read(&device->res.rwsem);
904 		/*
905 		 * Return resource back, but it won't be released till
906 		 * the &device->res.rwsem will be released for write.
907 		 */
908 		rdma_restrack_put(res);
909 
910 		if (ret == -EMSGSIZE)
911 			/*
912 			 * There is a chance to optimize here.
913 			 * It can be done by using list_prepare_entry
914 			 * and list_for_each_entry_continue afterwards.
915 			 */
916 			break;
917 		if (ret)
918 			goto res_err;
919 next:		idx++;
920 	}
921 	up_read(&device->res.rwsem);
922 
923 	nla_nest_end(skb, table_attr);
924 	nlmsg_end(skb, nlh);
925 	cb->args[0] = idx;
926 
927 	/*
928 	 * No more entries to fill, cancel the message and
929 	 * return 0 to mark end of dumpit.
930 	 */
931 	if (!filled)
932 		goto err;
933 
934 	put_device(&device->dev);
935 	return skb->len;
936 
937 res_err:
938 	nla_nest_cancel(skb, table_attr);
939 	up_read(&device->res.rwsem);
940 
941 err:
942 	nlmsg_cancel(skb, nlh);
943 
944 err_index:
945 	put_device(&device->dev);
946 	return ret;
947 }
948 
949 static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
950 				   struct netlink_callback *cb)
951 {
952 	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP);
953 }
954 
955 static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
956 				      struct netlink_callback *cb)
957 {
958 	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID);
959 }
960 
961 static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
962 				   struct netlink_callback *cb)
963 {
964 	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ);
965 }
966 
967 static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
968 				   struct netlink_callback *cb)
969 {
970 	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR);
971 }
972 
973 static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
974 				   struct netlink_callback *cb)
975 {
976 	return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
977 }
978 
979 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
980 	[RDMA_NLDEV_CMD_GET] = {
981 		.doit = nldev_get_doit,
982 		.dump = nldev_get_dumpit,
983 	},
984 	[RDMA_NLDEV_CMD_PORT_GET] = {
985 		.doit = nldev_port_get_doit,
986 		.dump = nldev_port_get_dumpit,
987 	},
988 	[RDMA_NLDEV_CMD_RES_GET] = {
989 		.doit = nldev_res_get_doit,
990 		.dump = nldev_res_get_dumpit,
991 	},
992 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
993 		.dump = nldev_res_get_qp_dumpit,
994 		/*
995 		 * .doit is not implemented yet for two reasons:
996 		 * 1. It is not needed yet.
997 		 * 2. There is a need to provide identifier, while it is easy
998 		 * for the QPs (device index + port index + LQPN), it is not
999 		 * the case for the rest of resources (PD and CQ). Because it
1000 		 * is better to provide similar interface for all resources,
1001 		 * let's wait till we will have other resources implemented
1002 		 * too.
1003 		 */
1004 	},
1005 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
1006 		.dump = nldev_res_get_cm_id_dumpit,
1007 	},
1008 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
1009 		.dump = nldev_res_get_cq_dumpit,
1010 	},
1011 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
1012 		.dump = nldev_res_get_mr_dumpit,
1013 	},
1014 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
1015 		.dump = nldev_res_get_pd_dumpit,
1016 	},
1017 };
1018 
1019 void __init nldev_init(void)
1020 {
1021 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
1022 }
1023 
1024 void __exit nldev_exit(void)
1025 {
1026 	rdma_nl_unregister(RDMA_NL_NLDEV);
1027 }
1028 
1029 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
1030