xref: /openbmc/linux/drivers/infiniband/core/nldev.c (revision 2f0f2441b4a10948e2ec042b48fef13680387f7c)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40 
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44 
45 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
46 	[RDMA_NLDEV_ATTR_DEV_INDEX]     = { .type = NLA_U32 },
47 	[RDMA_NLDEV_ATTR_DEV_NAME]	= { .type = NLA_NUL_STRING,
48 					    .len = IB_DEVICE_NAME_MAX - 1},
49 	[RDMA_NLDEV_ATTR_PORT_INDEX]	= { .type = NLA_U32 },
50 	[RDMA_NLDEV_ATTR_FW_VERSION]	= { .type = NLA_NUL_STRING,
51 					    .len = IB_FW_VERSION_NAME_MAX - 1},
52 	[RDMA_NLDEV_ATTR_NODE_GUID]	= { .type = NLA_U64 },
53 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
54 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]	= { .type = NLA_U64 },
55 	[RDMA_NLDEV_ATTR_LID]		= { .type = NLA_U32 },
56 	[RDMA_NLDEV_ATTR_SM_LID]	= { .type = NLA_U32 },
57 	[RDMA_NLDEV_ATTR_LMC]		= { .type = NLA_U8 },
58 	[RDMA_NLDEV_ATTR_PORT_STATE]	= { .type = NLA_U8 },
59 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
60 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
61 	[RDMA_NLDEV_ATTR_RES_SUMMARY]	= { .type = NLA_NESTED },
62 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
63 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
64 					     .len = 16 },
65 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
66 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
67 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
68 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
69 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
70 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
71 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
72 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
73 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
74 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
75 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
76 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
77 						    .len = TASK_COMM_LEN },
78 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
79 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
80 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
81 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]	= {
82 			.len = sizeof(struct __kernel_sockaddr_storage) },
83 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]	= {
84 			.len = sizeof(struct __kernel_sockaddr_storage) },
85 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
86 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
87 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
88 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
89 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
90 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
91 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
92 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
93 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
94 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
95 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
96 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
97 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
98 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
99 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
100 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
101 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
102 						    .len = IFNAMSIZ },
103 	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
104 	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
105 	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
106 				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
107 	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
108 	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
109 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
110 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
111 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
112 	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
113 	[RDMA_NLDEV_ATTR_RES_CQN]               = { .type = NLA_U32 },
114 	[RDMA_NLDEV_ATTR_RES_MRN]               = { .type = NLA_U32 },
115 	[RDMA_NLDEV_ATTR_RES_CM_IDN]            = { .type = NLA_U32 },
116 	[RDMA_NLDEV_ATTR_RES_CTXN]              = { .type = NLA_U32 },
117 	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
118 				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
119 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
120 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
121 				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
122 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
123 };
124 
125 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
126 				      enum rdma_nldev_print_type print_type)
127 {
128 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
129 		return -EMSGSIZE;
130 	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
131 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
132 		return -EMSGSIZE;
133 
134 	return 0;
135 }
136 
137 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
138 				   enum rdma_nldev_print_type print_type,
139 				   u32 value)
140 {
141 	if (put_driver_name_print_type(msg, name, print_type))
142 		return -EMSGSIZE;
143 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
144 		return -EMSGSIZE;
145 
146 	return 0;
147 }
148 
149 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
150 				   enum rdma_nldev_print_type print_type,
151 				   u64 value)
152 {
153 	if (put_driver_name_print_type(msg, name, print_type))
154 		return -EMSGSIZE;
155 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
156 			      RDMA_NLDEV_ATTR_PAD))
157 		return -EMSGSIZE;
158 
159 	return 0;
160 }
161 
162 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
163 {
164 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
165 				       value);
166 }
167 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
168 
169 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
170 			       u32 value)
171 {
172 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
173 				       value);
174 }
175 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
176 
177 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
178 {
179 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
180 				       value);
181 }
182 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
183 
184 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
185 {
186 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
187 				       value);
188 }
189 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
190 
191 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
192 {
193 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
194 		return -EMSGSIZE;
195 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
196 			   dev_name(&device->dev)))
197 		return -EMSGSIZE;
198 
199 	return 0;
200 }
201 
202 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
203 {
204 	char fw[IB_FW_VERSION_NAME_MAX];
205 	int ret = 0;
206 	u8 port;
207 
208 	if (fill_nldev_handle(msg, device))
209 		return -EMSGSIZE;
210 
211 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
212 		return -EMSGSIZE;
213 
214 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
215 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
216 			      device->attrs.device_cap_flags,
217 			      RDMA_NLDEV_ATTR_PAD))
218 		return -EMSGSIZE;
219 
220 	ib_get_device_fw_str(device, fw);
221 	/* Device without FW has strlen(fw) = 0 */
222 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
223 		return -EMSGSIZE;
224 
225 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
226 			      be64_to_cpu(device->node_guid),
227 			      RDMA_NLDEV_ATTR_PAD))
228 		return -EMSGSIZE;
229 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
230 			      be64_to_cpu(device->attrs.sys_image_guid),
231 			      RDMA_NLDEV_ATTR_PAD))
232 		return -EMSGSIZE;
233 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
234 		return -EMSGSIZE;
235 
236 	/*
237 	 * Link type is determined on first port and mlx4 device
238 	 * which can potentially have two different link type for the same
239 	 * IB device is considered as better to be avoided in the future,
240 	 */
241 	port = rdma_start_port(device);
242 	if (rdma_cap_opa_mad(device, port))
243 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
244 	else if (rdma_protocol_ib(device, port))
245 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
246 	else if (rdma_protocol_iwarp(device, port))
247 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
248 	else if (rdma_protocol_roce(device, port))
249 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
250 	else if (rdma_protocol_usnic(device, port))
251 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
252 				     "usnic");
253 	return ret;
254 }
255 
256 static int fill_port_info(struct sk_buff *msg,
257 			  struct ib_device *device, u32 port,
258 			  const struct net *net)
259 {
260 	struct net_device *netdev = NULL;
261 	struct ib_port_attr attr;
262 	int ret;
263 	u64 cap_flags = 0;
264 
265 	if (fill_nldev_handle(msg, device))
266 		return -EMSGSIZE;
267 
268 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
269 		return -EMSGSIZE;
270 
271 	ret = ib_query_port(device, port, &attr);
272 	if (ret)
273 		return ret;
274 
275 	if (rdma_protocol_ib(device, port)) {
276 		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
277 				sizeof(attr.port_cap_flags2)) > sizeof(u64));
278 		cap_flags = attr.port_cap_flags |
279 			((u64)attr.port_cap_flags2 << 32);
280 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
281 				      cap_flags, RDMA_NLDEV_ATTR_PAD))
282 			return -EMSGSIZE;
283 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
284 				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
285 			return -EMSGSIZE;
286 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
287 			return -EMSGSIZE;
288 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
289 			return -EMSGSIZE;
290 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
291 			return -EMSGSIZE;
292 	}
293 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
294 		return -EMSGSIZE;
295 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
296 		return -EMSGSIZE;
297 
298 	netdev = ib_device_get_netdev(device, port);
299 	if (netdev && net_eq(dev_net(netdev), net)) {
300 		ret = nla_put_u32(msg,
301 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
302 		if (ret)
303 			goto out;
304 		ret = nla_put_string(msg,
305 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
306 	}
307 
308 out:
309 	if (netdev)
310 		dev_put(netdev);
311 	return ret;
312 }
313 
314 static int fill_res_info_entry(struct sk_buff *msg,
315 			       const char *name, u64 curr)
316 {
317 	struct nlattr *entry_attr;
318 
319 	entry_attr = nla_nest_start_noflag(msg,
320 					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
321 	if (!entry_attr)
322 		return -EMSGSIZE;
323 
324 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
325 		goto err;
326 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
327 			      RDMA_NLDEV_ATTR_PAD))
328 		goto err;
329 
330 	nla_nest_end(msg, entry_attr);
331 	return 0;
332 
333 err:
334 	nla_nest_cancel(msg, entry_attr);
335 	return -EMSGSIZE;
336 }
337 
338 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
339 {
340 	static const char * const names[RDMA_RESTRACK_MAX] = {
341 		[RDMA_RESTRACK_PD] = "pd",
342 		[RDMA_RESTRACK_CQ] = "cq",
343 		[RDMA_RESTRACK_QP] = "qp",
344 		[RDMA_RESTRACK_CM_ID] = "cm_id",
345 		[RDMA_RESTRACK_MR] = "mr",
346 		[RDMA_RESTRACK_CTX] = "ctx",
347 	};
348 
349 	struct nlattr *table_attr;
350 	int ret, i, curr;
351 
352 	if (fill_nldev_handle(msg, device))
353 		return -EMSGSIZE;
354 
355 	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
356 	if (!table_attr)
357 		return -EMSGSIZE;
358 
359 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
360 		if (!names[i])
361 			continue;
362 		curr = rdma_restrack_count(device, i,
363 					   task_active_pid_ns(current));
364 		ret = fill_res_info_entry(msg, names[i], curr);
365 		if (ret)
366 			goto err;
367 	}
368 
369 	nla_nest_end(msg, table_attr);
370 	return 0;
371 
372 err:
373 	nla_nest_cancel(msg, table_attr);
374 	return ret;
375 }
376 
377 static int fill_res_name_pid(struct sk_buff *msg,
378 			     struct rdma_restrack_entry *res)
379 {
380 	/*
381 	 * For user resources, user is should read /proc/PID/comm to get the
382 	 * name of the task file.
383 	 */
384 	if (rdma_is_kernel_res(res)) {
385 		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
386 		    res->kern_name))
387 			return -EMSGSIZE;
388 	} else {
389 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
390 		    task_pid_vnr(res->task)))
391 			return -EMSGSIZE;
392 	}
393 	return 0;
394 }
395 
396 static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
397 			   struct rdma_restrack_entry *res)
398 {
399 	if (!dev->ops.fill_res_entry)
400 		return false;
401 	return dev->ops.fill_res_entry(msg, res);
402 }
403 
404 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
405 			     struct rdma_restrack_entry *res, uint32_t port)
406 {
407 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
408 	struct ib_device *dev = qp->device;
409 	struct ib_qp_init_attr qp_init_attr;
410 	struct ib_qp_attr qp_attr;
411 	int ret;
412 
413 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
414 	if (ret)
415 		return ret;
416 
417 	if (port && port != qp_attr.port_num)
418 		return -EAGAIN;
419 
420 	/* In create_qp() port is not set yet */
421 	if (qp_attr.port_num &&
422 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
423 		goto err;
424 
425 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
426 		goto err;
427 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
428 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
429 				qp_attr.dest_qp_num))
430 			goto err;
431 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
432 				qp_attr.rq_psn))
433 			goto err;
434 	}
435 
436 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
437 		goto err;
438 
439 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
440 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
441 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
442 			       qp_attr.path_mig_state))
443 			goto err;
444 	}
445 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
446 		goto err;
447 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
448 		goto err;
449 
450 	if (!rdma_is_kernel_res(res) &&
451 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
452 		goto err;
453 
454 	if (fill_res_name_pid(msg, res))
455 		goto err;
456 
457 	if (fill_res_entry(dev, msg, res))
458 		goto err;
459 
460 	return 0;
461 
462 err:	return -EMSGSIZE;
463 }
464 
465 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
466 				struct rdma_restrack_entry *res, uint32_t port)
467 {
468 	struct rdma_id_private *id_priv =
469 				container_of(res, struct rdma_id_private, res);
470 	struct ib_device *dev = id_priv->id.device;
471 	struct rdma_cm_id *cm_id = &id_priv->id;
472 
473 	if (port && port != cm_id->port_num)
474 		return 0;
475 
476 	if (cm_id->port_num &&
477 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
478 		goto err;
479 
480 	if (id_priv->qp_num) {
481 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
482 			goto err;
483 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
484 			goto err;
485 	}
486 
487 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
488 		goto err;
489 
490 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
491 		goto err;
492 
493 	if (cm_id->route.addr.src_addr.ss_family &&
494 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
495 		    sizeof(cm_id->route.addr.src_addr),
496 		    &cm_id->route.addr.src_addr))
497 		goto err;
498 	if (cm_id->route.addr.dst_addr.ss_family &&
499 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
500 		    sizeof(cm_id->route.addr.dst_addr),
501 		    &cm_id->route.addr.dst_addr))
502 		goto err;
503 
504 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
505 		goto err;
506 
507 	if (fill_res_name_pid(msg, res))
508 		goto err;
509 
510 	if (fill_res_entry(dev, msg, res))
511 		goto err;
512 
513 	return 0;
514 
515 err: return -EMSGSIZE;
516 }
517 
518 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
519 			     struct rdma_restrack_entry *res, uint32_t port)
520 {
521 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
522 	struct ib_device *dev = cq->device;
523 
524 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
525 		goto err;
526 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
527 			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
528 		goto err;
529 
530 	/* Poll context is only valid for kernel CQs */
531 	if (rdma_is_kernel_res(res) &&
532 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
533 		goto err;
534 
535 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
536 		goto err;
537 	if (!rdma_is_kernel_res(res) &&
538 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
539 			cq->uobject->context->res.id))
540 		goto err;
541 
542 	if (fill_res_name_pid(msg, res))
543 		goto err;
544 
545 	if (fill_res_entry(dev, msg, res))
546 		goto err;
547 
548 	return 0;
549 
550 err:	return -EMSGSIZE;
551 }
552 
553 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
554 			     struct rdma_restrack_entry *res, uint32_t port)
555 {
556 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
557 	struct ib_device *dev = mr->pd->device;
558 
559 	if (has_cap_net_admin) {
560 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
561 			goto err;
562 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
563 			goto err;
564 	}
565 
566 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
567 			      RDMA_NLDEV_ATTR_PAD))
568 		goto err;
569 
570 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
571 		goto err;
572 
573 	if (!rdma_is_kernel_res(res) &&
574 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
575 		goto err;
576 
577 	if (fill_res_name_pid(msg, res))
578 		goto err;
579 
580 	if (fill_res_entry(dev, msg, res))
581 		goto err;
582 
583 	return 0;
584 
585 err:	return -EMSGSIZE;
586 }
587 
588 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
589 			     struct rdma_restrack_entry *res, uint32_t port)
590 {
591 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
592 	struct ib_device *dev = pd->device;
593 
594 	if (has_cap_net_admin) {
595 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
596 				pd->local_dma_lkey))
597 			goto err;
598 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
599 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
600 				pd->unsafe_global_rkey))
601 			goto err;
602 	}
603 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
604 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
605 		goto err;
606 
607 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
608 		goto err;
609 
610 	if (!rdma_is_kernel_res(res) &&
611 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
612 			pd->uobject->context->res.id))
613 		goto err;
614 
615 	if (fill_res_name_pid(msg, res))
616 		goto err;
617 
618 	if (fill_res_entry(dev, msg, res))
619 		goto err;
620 
621 	return 0;
622 
623 err:	return -EMSGSIZE;
624 }
625 
626 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
627 			  struct netlink_ext_ack *extack)
628 {
629 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
630 	struct ib_device *device;
631 	struct sk_buff *msg;
632 	u32 index;
633 	int err;
634 
635 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
636 				     nldev_policy, extack);
637 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
638 		return -EINVAL;
639 
640 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
641 
642 	device = ib_device_get_by_index(sock_net(skb->sk), index);
643 	if (!device)
644 		return -EINVAL;
645 
646 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
647 	if (!msg) {
648 		err = -ENOMEM;
649 		goto err;
650 	}
651 
652 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
653 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
654 			0, 0);
655 
656 	err = fill_dev_info(msg, device);
657 	if (err)
658 		goto err_free;
659 
660 	nlmsg_end(msg, nlh);
661 
662 	ib_device_put(device);
663 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
664 
665 err_free:
666 	nlmsg_free(msg);
667 err:
668 	ib_device_put(device);
669 	return err;
670 }
671 
672 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
673 			  struct netlink_ext_ack *extack)
674 {
675 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
676 	struct ib_device *device;
677 	u32 index;
678 	int err;
679 
680 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
681 				     nldev_policy, extack);
682 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
683 		return -EINVAL;
684 
685 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
686 	device = ib_device_get_by_index(sock_net(skb->sk), index);
687 	if (!device)
688 		return -EINVAL;
689 
690 	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
691 		char name[IB_DEVICE_NAME_MAX] = {};
692 
693 		nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
694 			    IB_DEVICE_NAME_MAX);
695 		err = ib_device_rename(device, name);
696 		goto done;
697 	}
698 
699 	if (tb[RDMA_NLDEV_NET_NS_FD]) {
700 		u32 ns_fd;
701 
702 		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
703 		err = ib_device_set_netns_put(skb, device, ns_fd);
704 		goto put_done;
705 	}
706 
707 done:
708 	ib_device_put(device);
709 put_done:
710 	return err;
711 }
712 
713 static int _nldev_get_dumpit(struct ib_device *device,
714 			     struct sk_buff *skb,
715 			     struct netlink_callback *cb,
716 			     unsigned int idx)
717 {
718 	int start = cb->args[0];
719 	struct nlmsghdr *nlh;
720 
721 	if (idx < start)
722 		return 0;
723 
724 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
725 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
726 			0, NLM_F_MULTI);
727 
728 	if (fill_dev_info(skb, device)) {
729 		nlmsg_cancel(skb, nlh);
730 		goto out;
731 	}
732 
733 	nlmsg_end(skb, nlh);
734 
735 	idx++;
736 
737 out:	cb->args[0] = idx;
738 	return skb->len;
739 }
740 
741 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
742 {
743 	/*
744 	 * There is no need to take lock, because
745 	 * we are relying on ib_core's locking.
746 	 */
747 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
748 }
749 
750 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
751 			       struct netlink_ext_ack *extack)
752 {
753 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
754 	struct ib_device *device;
755 	struct sk_buff *msg;
756 	u32 index;
757 	u32 port;
758 	int err;
759 
760 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
761 				     nldev_policy, extack);
762 	if (err ||
763 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
764 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
765 		return -EINVAL;
766 
767 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
768 	device = ib_device_get_by_index(sock_net(skb->sk), index);
769 	if (!device)
770 		return -EINVAL;
771 
772 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
773 	if (!rdma_is_port_valid(device, port)) {
774 		err = -EINVAL;
775 		goto err;
776 	}
777 
778 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
779 	if (!msg) {
780 		err = -ENOMEM;
781 		goto err;
782 	}
783 
784 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
785 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
786 			0, 0);
787 
788 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
789 	if (err)
790 		goto err_free;
791 
792 	nlmsg_end(msg, nlh);
793 	ib_device_put(device);
794 
795 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
796 
797 err_free:
798 	nlmsg_free(msg);
799 err:
800 	ib_device_put(device);
801 	return err;
802 }
803 
804 static int nldev_port_get_dumpit(struct sk_buff *skb,
805 				 struct netlink_callback *cb)
806 {
807 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
808 	struct ib_device *device;
809 	int start = cb->args[0];
810 	struct nlmsghdr *nlh;
811 	u32 idx = 0;
812 	u32 ifindex;
813 	int err;
814 	unsigned int p;
815 
816 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
817 				     nldev_policy, NULL);
818 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
819 		return -EINVAL;
820 
821 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
822 	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
823 	if (!device)
824 		return -EINVAL;
825 
826 	rdma_for_each_port (device, p) {
827 		/*
828 		 * The dumpit function returns all information from specific
829 		 * index. This specific index is taken from the netlink
830 		 * messages request sent by user and it is available
831 		 * in cb->args[0].
832 		 *
833 		 * Usually, the user doesn't fill this field and it causes
834 		 * to return everything.
835 		 *
836 		 */
837 		if (idx < start) {
838 			idx++;
839 			continue;
840 		}
841 
842 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
843 				cb->nlh->nlmsg_seq,
844 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
845 						 RDMA_NLDEV_CMD_PORT_GET),
846 				0, NLM_F_MULTI);
847 
848 		if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
849 			nlmsg_cancel(skb, nlh);
850 			goto out;
851 		}
852 		idx++;
853 		nlmsg_end(skb, nlh);
854 	}
855 
856 out:
857 	ib_device_put(device);
858 	cb->args[0] = idx;
859 	return skb->len;
860 }
861 
862 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
863 			      struct netlink_ext_ack *extack)
864 {
865 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
866 	struct ib_device *device;
867 	struct sk_buff *msg;
868 	u32 index;
869 	int ret;
870 
871 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
872 				     nldev_policy, extack);
873 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
874 		return -EINVAL;
875 
876 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
877 	device = ib_device_get_by_index(sock_net(skb->sk), index);
878 	if (!device)
879 		return -EINVAL;
880 
881 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
882 	if (!msg) {
883 		ret = -ENOMEM;
884 		goto err;
885 	}
886 
887 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
888 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
889 			0, 0);
890 
891 	ret = fill_res_info(msg, device);
892 	if (ret)
893 		goto err_free;
894 
895 	nlmsg_end(msg, nlh);
896 	ib_device_put(device);
897 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
898 
899 err_free:
900 	nlmsg_free(msg);
901 err:
902 	ib_device_put(device);
903 	return ret;
904 }
905 
906 static int _nldev_res_get_dumpit(struct ib_device *device,
907 				 struct sk_buff *skb,
908 				 struct netlink_callback *cb,
909 				 unsigned int idx)
910 {
911 	int start = cb->args[0];
912 	struct nlmsghdr *nlh;
913 
914 	if (idx < start)
915 		return 0;
916 
917 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
918 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
919 			0, NLM_F_MULTI);
920 
921 	if (fill_res_info(skb, device)) {
922 		nlmsg_cancel(skb, nlh);
923 		goto out;
924 	}
925 	nlmsg_end(skb, nlh);
926 
927 	idx++;
928 
929 out:
930 	cb->args[0] = idx;
931 	return skb->len;
932 }
933 
934 static int nldev_res_get_dumpit(struct sk_buff *skb,
935 				struct netlink_callback *cb)
936 {
937 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
938 }
939 
940 struct nldev_fill_res_entry {
941 	int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
942 			     struct rdma_restrack_entry *res, u32 port);
943 	enum rdma_nldev_attr nldev_attr;
944 	enum rdma_nldev_command nldev_cmd;
945 	u8 flags;
946 	u32 entry;
947 	u32 id;
948 };
949 
950 enum nldev_res_flags {
951 	NLDEV_PER_DEV = 1 << 0,
952 };
953 
954 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
955 	[RDMA_RESTRACK_QP] = {
956 		.fill_res_func = fill_res_qp_entry,
957 		.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
958 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
959 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
960 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
961 	},
962 	[RDMA_RESTRACK_CM_ID] = {
963 		.fill_res_func = fill_res_cm_id_entry,
964 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
965 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
966 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
967 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
968 	},
969 	[RDMA_RESTRACK_CQ] = {
970 		.fill_res_func = fill_res_cq_entry,
971 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
972 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
973 		.flags = NLDEV_PER_DEV,
974 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
975 		.id = RDMA_NLDEV_ATTR_RES_CQN,
976 	},
977 	[RDMA_RESTRACK_MR] = {
978 		.fill_res_func = fill_res_mr_entry,
979 		.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
980 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
981 		.flags = NLDEV_PER_DEV,
982 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
983 		.id = RDMA_NLDEV_ATTR_RES_MRN,
984 	},
985 	[RDMA_RESTRACK_PD] = {
986 		.fill_res_func = fill_res_pd_entry,
987 		.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
988 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
989 		.flags = NLDEV_PER_DEV,
990 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
991 		.id = RDMA_NLDEV_ATTR_RES_PDN,
992 	},
993 };
994 
995 static bool is_visible_in_pid_ns(struct rdma_restrack_entry *res)
996 {
997 	/*
998 	 * 1. Kern resources should be visible in init name space only
999 	 * 2. Present only resources visible in the current namespace
1000 	 */
1001 	if (rdma_is_kernel_res(res))
1002 		return task_active_pid_ns(current) == &init_pid_ns;
1003 	return task_active_pid_ns(current) == task_active_pid_ns(res->task);
1004 }
1005 
1006 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1007 			       struct netlink_ext_ack *extack,
1008 			       enum rdma_restrack_type res_type)
1009 {
1010 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1011 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1012 	struct rdma_restrack_entry *res;
1013 	struct ib_device *device;
1014 	u32 index, id, port = 0;
1015 	bool has_cap_net_admin;
1016 	struct sk_buff *msg;
1017 	int ret;
1018 
1019 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1020 				     nldev_policy, extack);
1021 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1022 		return -EINVAL;
1023 
1024 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1025 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1026 	if (!device)
1027 		return -EINVAL;
1028 
1029 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1030 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1031 		if (!rdma_is_port_valid(device, port)) {
1032 			ret = -EINVAL;
1033 			goto err;
1034 		}
1035 	}
1036 
1037 	if ((port && fe->flags & NLDEV_PER_DEV) ||
1038 	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1039 		ret = -EINVAL;
1040 		goto err;
1041 	}
1042 
1043 	id = nla_get_u32(tb[fe->id]);
1044 	res = rdma_restrack_get_byid(device, res_type, id);
1045 	if (IS_ERR(res)) {
1046 		ret = PTR_ERR(res);
1047 		goto err;
1048 	}
1049 
1050 	if (!is_visible_in_pid_ns(res)) {
1051 		ret = -ENOENT;
1052 		goto err_get;
1053 	}
1054 
1055 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1056 	if (!msg) {
1057 		ret = -ENOMEM;
1058 		goto err;
1059 	}
1060 
1061 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1062 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1063 			0, 0);
1064 
1065 	if (fill_nldev_handle(msg, device)) {
1066 		ret = -EMSGSIZE;
1067 		goto err_free;
1068 	}
1069 
1070 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1071 	ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
1072 	rdma_restrack_put(res);
1073 	if (ret)
1074 		goto err_free;
1075 
1076 	nlmsg_end(msg, nlh);
1077 	ib_device_put(device);
1078 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1079 
1080 err_free:
1081 	nlmsg_free(msg);
1082 err_get:
1083 	rdma_restrack_put(res);
1084 err:
1085 	ib_device_put(device);
1086 	return ret;
1087 }
1088 
1089 static int res_get_common_dumpit(struct sk_buff *skb,
1090 				 struct netlink_callback *cb,
1091 				 enum rdma_restrack_type res_type)
1092 {
1093 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1094 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1095 	struct rdma_restrack_entry *res;
1096 	struct rdma_restrack_root *rt;
1097 	int err, ret = 0, idx = 0;
1098 	struct nlattr *table_attr;
1099 	struct nlattr *entry_attr;
1100 	struct ib_device *device;
1101 	int start = cb->args[0];
1102 	bool has_cap_net_admin;
1103 	struct nlmsghdr *nlh;
1104 	unsigned long id;
1105 	u32 index, port = 0;
1106 	bool filled = false;
1107 
1108 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1109 				     nldev_policy, NULL);
1110 	/*
1111 	 * Right now, we are expecting the device index to get res information,
1112 	 * but it is possible to extend this code to return all devices in
1113 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1114 	 * if it doesn't exist, we will iterate over all devices.
1115 	 *
1116 	 * But it is not needed for now.
1117 	 */
1118 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1119 		return -EINVAL;
1120 
1121 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1122 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1123 	if (!device)
1124 		return -EINVAL;
1125 
1126 	/*
1127 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1128 	 */
1129 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1130 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1131 		if (!rdma_is_port_valid(device, port)) {
1132 			ret = -EINVAL;
1133 			goto err_index;
1134 		}
1135 	}
1136 
1137 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1138 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1139 			0, NLM_F_MULTI);
1140 
1141 	if (fill_nldev_handle(skb, device)) {
1142 		ret = -EMSGSIZE;
1143 		goto err;
1144 	}
1145 
1146 	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1147 	if (!table_attr) {
1148 		ret = -EMSGSIZE;
1149 		goto err;
1150 	}
1151 
1152 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1153 
1154 	rt = &device->res[res_type];
1155 	xa_lock(&rt->xa);
1156 	/*
1157 	 * FIXME: if the skip ahead is something common this loop should
1158 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1159 	 * objects.
1160 	 */
1161 	xa_for_each(&rt->xa, id, res) {
1162 		if (!is_visible_in_pid_ns(res))
1163 			continue;
1164 
1165 		if (idx < start || !rdma_restrack_get(res))
1166 			goto next;
1167 
1168 		xa_unlock(&rt->xa);
1169 
1170 		filled = true;
1171 
1172 		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1173 		if (!entry_attr) {
1174 			ret = -EMSGSIZE;
1175 			rdma_restrack_put(res);
1176 			goto msg_full;
1177 		}
1178 
1179 		ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
1180 		rdma_restrack_put(res);
1181 
1182 		if (ret) {
1183 			nla_nest_cancel(skb, entry_attr);
1184 			if (ret == -EMSGSIZE)
1185 				goto msg_full;
1186 			if (ret == -EAGAIN)
1187 				goto again;
1188 			goto res_err;
1189 		}
1190 		nla_nest_end(skb, entry_attr);
1191 again:		xa_lock(&rt->xa);
1192 next:		idx++;
1193 	}
1194 	xa_unlock(&rt->xa);
1195 
1196 msg_full:
1197 	nla_nest_end(skb, table_attr);
1198 	nlmsg_end(skb, nlh);
1199 	cb->args[0] = idx;
1200 
1201 	/*
1202 	 * No more entries to fill, cancel the message and
1203 	 * return 0 to mark end of dumpit.
1204 	 */
1205 	if (!filled)
1206 		goto err;
1207 
1208 	ib_device_put(device);
1209 	return skb->len;
1210 
1211 res_err:
1212 	nla_nest_cancel(skb, table_attr);
1213 
1214 err:
1215 	nlmsg_cancel(skb, nlh);
1216 
1217 err_index:
1218 	ib_device_put(device);
1219 	return ret;
1220 }
1221 
1222 #define RES_GET_FUNCS(name, type)                                              \
1223 	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1224 						 struct netlink_callback *cb)  \
1225 	{                                                                      \
1226 		return res_get_common_dumpit(skb, cb, type);                   \
1227 	}                                                                      \
1228 	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1229 					       struct nlmsghdr *nlh,           \
1230 					       struct netlink_ext_ack *extack) \
1231 	{                                                                      \
1232 		return res_get_common_doit(skb, nlh, extack, type);            \
1233 	}
1234 
1235 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1236 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1237 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1238 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1239 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1240 
1241 static LIST_HEAD(link_ops);
1242 static DECLARE_RWSEM(link_ops_rwsem);
1243 
1244 static const struct rdma_link_ops *link_ops_get(const char *type)
1245 {
1246 	const struct rdma_link_ops *ops;
1247 
1248 	list_for_each_entry(ops, &link_ops, list) {
1249 		if (!strcmp(ops->type, type))
1250 			goto out;
1251 	}
1252 	ops = NULL;
1253 out:
1254 	return ops;
1255 }
1256 
1257 void rdma_link_register(struct rdma_link_ops *ops)
1258 {
1259 	down_write(&link_ops_rwsem);
1260 	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1261 		goto out;
1262 	list_add(&ops->list, &link_ops);
1263 out:
1264 	up_write(&link_ops_rwsem);
1265 }
1266 EXPORT_SYMBOL(rdma_link_register);
1267 
1268 void rdma_link_unregister(struct rdma_link_ops *ops)
1269 {
1270 	down_write(&link_ops_rwsem);
1271 	list_del(&ops->list);
1272 	up_write(&link_ops_rwsem);
1273 }
1274 EXPORT_SYMBOL(rdma_link_unregister);
1275 
1276 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1277 			  struct netlink_ext_ack *extack)
1278 {
1279 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1280 	char ibdev_name[IB_DEVICE_NAME_MAX];
1281 	const struct rdma_link_ops *ops;
1282 	char ndev_name[IFNAMSIZ];
1283 	struct net_device *ndev;
1284 	char type[IFNAMSIZ];
1285 	int err;
1286 
1287 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1288 				     nldev_policy, extack);
1289 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1290 	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1291 		return -EINVAL;
1292 
1293 	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1294 		    sizeof(ibdev_name));
1295 	if (strchr(ibdev_name, '%'))
1296 		return -EINVAL;
1297 
1298 	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1299 	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1300 		    sizeof(ndev_name));
1301 
1302 	ndev = dev_get_by_name(&init_net, ndev_name);
1303 	if (!ndev)
1304 		return -ENODEV;
1305 
1306 	down_read(&link_ops_rwsem);
1307 	ops = link_ops_get(type);
1308 #ifdef CONFIG_MODULES
1309 	if (!ops) {
1310 		up_read(&link_ops_rwsem);
1311 		request_module("rdma-link-%s", type);
1312 		down_read(&link_ops_rwsem);
1313 		ops = link_ops_get(type);
1314 	}
1315 #endif
1316 	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1317 	up_read(&link_ops_rwsem);
1318 	dev_put(ndev);
1319 
1320 	return err;
1321 }
1322 
1323 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1324 			  struct netlink_ext_ack *extack)
1325 {
1326 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1327 	struct ib_device *device;
1328 	u32 index;
1329 	int err;
1330 
1331 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1332 				     nldev_policy, extack);
1333 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1334 		return -EINVAL;
1335 
1336 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1337 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1338 	if (!device)
1339 		return -EINVAL;
1340 
1341 	if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
1342 		ib_device_put(device);
1343 		return -EINVAL;
1344 	}
1345 
1346 	ib_unregister_device_and_put(device);
1347 	return 0;
1348 }
1349 
1350 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1351 			      struct netlink_ext_ack *extack)
1352 {
1353 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1354 	struct sk_buff *msg;
1355 	int err;
1356 
1357 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1358 			  nldev_policy, extack);
1359 	if (err)
1360 		return err;
1361 
1362 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1363 	if (!msg)
1364 		return -ENOMEM;
1365 
1366 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1367 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1368 					 RDMA_NLDEV_CMD_SYS_GET),
1369 			0, 0);
1370 
1371 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1372 			 (u8)ib_devices_shared_netns);
1373 	if (err) {
1374 		nlmsg_free(msg);
1375 		return err;
1376 	}
1377 	nlmsg_end(msg, nlh);
1378 	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1379 }
1380 
1381 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1382 				  struct netlink_ext_ack *extack)
1383 {
1384 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1385 	u8 enable;
1386 	int err;
1387 
1388 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1389 			  nldev_policy, extack);
1390 	if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1391 		return -EINVAL;
1392 
1393 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1394 	/* Only 0 and 1 are supported */
1395 	if (enable > 1)
1396 		return -EINVAL;
1397 
1398 	err = rdma_compatdev_set(enable);
1399 	return err;
1400 }
1401 
1402 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
1403 	[RDMA_NLDEV_CMD_GET] = {
1404 		.doit = nldev_get_doit,
1405 		.dump = nldev_get_dumpit,
1406 	},
1407 	[RDMA_NLDEV_CMD_SET] = {
1408 		.doit = nldev_set_doit,
1409 		.flags = RDMA_NL_ADMIN_PERM,
1410 	},
1411 	[RDMA_NLDEV_CMD_NEWLINK] = {
1412 		.doit = nldev_newlink,
1413 		.flags = RDMA_NL_ADMIN_PERM,
1414 	},
1415 	[RDMA_NLDEV_CMD_DELLINK] = {
1416 		.doit = nldev_dellink,
1417 		.flags = RDMA_NL_ADMIN_PERM,
1418 	},
1419 	[RDMA_NLDEV_CMD_PORT_GET] = {
1420 		.doit = nldev_port_get_doit,
1421 		.dump = nldev_port_get_dumpit,
1422 	},
1423 	[RDMA_NLDEV_CMD_RES_GET] = {
1424 		.doit = nldev_res_get_doit,
1425 		.dump = nldev_res_get_dumpit,
1426 	},
1427 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
1428 		.doit = nldev_res_get_qp_doit,
1429 		.dump = nldev_res_get_qp_dumpit,
1430 	},
1431 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
1432 		.doit = nldev_res_get_cm_id_doit,
1433 		.dump = nldev_res_get_cm_id_dumpit,
1434 	},
1435 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
1436 		.doit = nldev_res_get_cq_doit,
1437 		.dump = nldev_res_get_cq_dumpit,
1438 	},
1439 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
1440 		.doit = nldev_res_get_mr_doit,
1441 		.dump = nldev_res_get_mr_dumpit,
1442 	},
1443 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
1444 		.doit = nldev_res_get_pd_doit,
1445 		.dump = nldev_res_get_pd_dumpit,
1446 	},
1447 	[RDMA_NLDEV_CMD_SYS_GET] = {
1448 		.doit = nldev_sys_get_doit,
1449 	},
1450 	[RDMA_NLDEV_CMD_SYS_SET] = {
1451 		.doit = nldev_set_sys_set_doit,
1452 		.flags = RDMA_NL_ADMIN_PERM,
1453 	},
1454 };
1455 
1456 void __init nldev_init(void)
1457 {
1458 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
1459 }
1460 
1461 void __exit nldev_exit(void)
1462 {
1463 	rdma_nl_unregister(RDMA_NL_NLDEV);
1464 }
1465 
1466 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
1467