xref: /openbmc/linux/drivers/infiniband/core/nldev.c (revision fcbd8037f7df694aa7bfb7ce82c0c7f5e53e7b7b)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40 
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44 
45 /*
46  * Sort array elements by the netlink attribute name
47  */
48 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
49 	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
50 	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
51 	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
52 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
53 	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
54 					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
55 	[RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
56 	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
57 	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
58 					.len = IB_DEVICE_NAME_MAX },
59 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
60 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
61 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
62 	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
63 	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
64 	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
65 	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
66 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
67 	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
68 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
69 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
70 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
71 	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
72 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
73 	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
74 	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
75 					.len = IFNAMSIZ },
76 	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
77 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
78 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
79 					.len = IFNAMSIZ },
80 	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
81 	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
82 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
83 	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
84 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
85 	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
86 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
87 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
88 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
89 	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
90 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
91 	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
92 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
93 			.len = sizeof(struct __kernel_sockaddr_storage) },
94 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
95 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
96 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
97 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
98 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
99 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
100 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
101 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
102 	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
103 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
104 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
105 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
106 	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
107 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
108 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
109 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
110 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
111 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
112 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
113 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
114 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
115 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
116 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
117 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
118 			.len = sizeof(struct __kernel_sockaddr_storage) },
119 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
120 	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
121 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
122 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
123 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
124 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
125 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
126 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
127 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
128 	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
129 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
130 	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
131 	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
132 	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
133 	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
134 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
135 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
136 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
137 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
138 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
139 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
140 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
141 	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
142 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
143 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
144 };
145 
146 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
147 				      enum rdma_nldev_print_type print_type)
148 {
149 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
150 		return -EMSGSIZE;
151 	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
152 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
153 		return -EMSGSIZE;
154 
155 	return 0;
156 }
157 
158 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
159 				   enum rdma_nldev_print_type print_type,
160 				   u32 value)
161 {
162 	if (put_driver_name_print_type(msg, name, print_type))
163 		return -EMSGSIZE;
164 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
165 		return -EMSGSIZE;
166 
167 	return 0;
168 }
169 
170 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
171 				   enum rdma_nldev_print_type print_type,
172 				   u64 value)
173 {
174 	if (put_driver_name_print_type(msg, name, print_type))
175 		return -EMSGSIZE;
176 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
177 			      RDMA_NLDEV_ATTR_PAD))
178 		return -EMSGSIZE;
179 
180 	return 0;
181 }
182 
183 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
184 {
185 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
186 				       value);
187 }
188 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
189 
190 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
191 			       u32 value)
192 {
193 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
194 				       value);
195 }
196 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
197 
198 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
199 {
200 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
201 				       value);
202 }
203 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
204 
205 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
206 {
207 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
208 				       value);
209 }
210 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
211 
212 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
213 {
214 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
215 		return -EMSGSIZE;
216 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
217 			   dev_name(&device->dev)))
218 		return -EMSGSIZE;
219 
220 	return 0;
221 }
222 
223 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
224 {
225 	char fw[IB_FW_VERSION_NAME_MAX];
226 	int ret = 0;
227 	u8 port;
228 
229 	if (fill_nldev_handle(msg, device))
230 		return -EMSGSIZE;
231 
232 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
233 		return -EMSGSIZE;
234 
235 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
236 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
237 			      device->attrs.device_cap_flags,
238 			      RDMA_NLDEV_ATTR_PAD))
239 		return -EMSGSIZE;
240 
241 	ib_get_device_fw_str(device, fw);
242 	/* Device without FW has strlen(fw) = 0 */
243 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
244 		return -EMSGSIZE;
245 
246 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
247 			      be64_to_cpu(device->node_guid),
248 			      RDMA_NLDEV_ATTR_PAD))
249 		return -EMSGSIZE;
250 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
251 			      be64_to_cpu(device->attrs.sys_image_guid),
252 			      RDMA_NLDEV_ATTR_PAD))
253 		return -EMSGSIZE;
254 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
255 		return -EMSGSIZE;
256 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
257 		return -EMSGSIZE;
258 
259 	/*
260 	 * Link type is determined on first port and mlx4 device
261 	 * which can potentially have two different link type for the same
262 	 * IB device is considered as better to be avoided in the future,
263 	 */
264 	port = rdma_start_port(device);
265 	if (rdma_cap_opa_mad(device, port))
266 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
267 	else if (rdma_protocol_ib(device, port))
268 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
269 	else if (rdma_protocol_iwarp(device, port))
270 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
271 	else if (rdma_protocol_roce(device, port))
272 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
273 	else if (rdma_protocol_usnic(device, port))
274 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
275 				     "usnic");
276 	return ret;
277 }
278 
279 static int fill_port_info(struct sk_buff *msg,
280 			  struct ib_device *device, u32 port,
281 			  const struct net *net)
282 {
283 	struct net_device *netdev = NULL;
284 	struct ib_port_attr attr;
285 	int ret;
286 	u64 cap_flags = 0;
287 
288 	if (fill_nldev_handle(msg, device))
289 		return -EMSGSIZE;
290 
291 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
292 		return -EMSGSIZE;
293 
294 	ret = ib_query_port(device, port, &attr);
295 	if (ret)
296 		return ret;
297 
298 	if (rdma_protocol_ib(device, port)) {
299 		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
300 				sizeof(attr.port_cap_flags2)) > sizeof(u64));
301 		cap_flags = attr.port_cap_flags |
302 			((u64)attr.port_cap_flags2 << 32);
303 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
304 				      cap_flags, RDMA_NLDEV_ATTR_PAD))
305 			return -EMSGSIZE;
306 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
307 				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
308 			return -EMSGSIZE;
309 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
310 			return -EMSGSIZE;
311 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
312 			return -EMSGSIZE;
313 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
314 			return -EMSGSIZE;
315 	}
316 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
317 		return -EMSGSIZE;
318 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
319 		return -EMSGSIZE;
320 
321 	netdev = ib_device_get_netdev(device, port);
322 	if (netdev && net_eq(dev_net(netdev), net)) {
323 		ret = nla_put_u32(msg,
324 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
325 		if (ret)
326 			goto out;
327 		ret = nla_put_string(msg,
328 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
329 	}
330 
331 out:
332 	if (netdev)
333 		dev_put(netdev);
334 	return ret;
335 }
336 
337 static int fill_res_info_entry(struct sk_buff *msg,
338 			       const char *name, u64 curr)
339 {
340 	struct nlattr *entry_attr;
341 
342 	entry_attr = nla_nest_start_noflag(msg,
343 					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
344 	if (!entry_attr)
345 		return -EMSGSIZE;
346 
347 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
348 		goto err;
349 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
350 			      RDMA_NLDEV_ATTR_PAD))
351 		goto err;
352 
353 	nla_nest_end(msg, entry_attr);
354 	return 0;
355 
356 err:
357 	nla_nest_cancel(msg, entry_attr);
358 	return -EMSGSIZE;
359 }
360 
361 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
362 {
363 	static const char * const names[RDMA_RESTRACK_MAX] = {
364 		[RDMA_RESTRACK_PD] = "pd",
365 		[RDMA_RESTRACK_CQ] = "cq",
366 		[RDMA_RESTRACK_QP] = "qp",
367 		[RDMA_RESTRACK_CM_ID] = "cm_id",
368 		[RDMA_RESTRACK_MR] = "mr",
369 		[RDMA_RESTRACK_CTX] = "ctx",
370 	};
371 
372 	struct nlattr *table_attr;
373 	int ret, i, curr;
374 
375 	if (fill_nldev_handle(msg, device))
376 		return -EMSGSIZE;
377 
378 	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
379 	if (!table_attr)
380 		return -EMSGSIZE;
381 
382 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
383 		if (!names[i])
384 			continue;
385 		curr = rdma_restrack_count(device, i);
386 		ret = fill_res_info_entry(msg, names[i], curr);
387 		if (ret)
388 			goto err;
389 	}
390 
391 	nla_nest_end(msg, table_attr);
392 	return 0;
393 
394 err:
395 	nla_nest_cancel(msg, table_attr);
396 	return ret;
397 }
398 
399 static int fill_res_name_pid(struct sk_buff *msg,
400 			     struct rdma_restrack_entry *res)
401 {
402 	/*
403 	 * For user resources, user is should read /proc/PID/comm to get the
404 	 * name of the task file.
405 	 */
406 	if (rdma_is_kernel_res(res)) {
407 		if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
408 		    res->kern_name))
409 			return -EMSGSIZE;
410 	} else {
411 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
412 		    task_pid_vnr(res->task)))
413 			return -EMSGSIZE;
414 	}
415 	return 0;
416 }
417 
418 static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
419 			   struct rdma_restrack_entry *res)
420 {
421 	if (!dev->ops.fill_res_entry)
422 		return false;
423 	return dev->ops.fill_res_entry(msg, res);
424 }
425 
426 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
427 			     struct rdma_restrack_entry *res, uint32_t port)
428 {
429 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
430 	struct ib_device *dev = qp->device;
431 	struct ib_qp_init_attr qp_init_attr;
432 	struct ib_qp_attr qp_attr;
433 	int ret;
434 
435 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
436 	if (ret)
437 		return ret;
438 
439 	if (port && port != qp_attr.port_num)
440 		return -EAGAIN;
441 
442 	/* In create_qp() port is not set yet */
443 	if (qp_attr.port_num &&
444 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
445 		goto err;
446 
447 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
448 		goto err;
449 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
450 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
451 				qp_attr.dest_qp_num))
452 			goto err;
453 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
454 				qp_attr.rq_psn))
455 			goto err;
456 	}
457 
458 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
459 		goto err;
460 
461 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
462 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
463 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
464 			       qp_attr.path_mig_state))
465 			goto err;
466 	}
467 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
468 		goto err;
469 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
470 		goto err;
471 
472 	if (!rdma_is_kernel_res(res) &&
473 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
474 		goto err;
475 
476 	if (fill_res_name_pid(msg, res))
477 		goto err;
478 
479 	if (fill_res_entry(dev, msg, res))
480 		goto err;
481 
482 	return 0;
483 
484 err:	return -EMSGSIZE;
485 }
486 
487 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
488 				struct rdma_restrack_entry *res, uint32_t port)
489 {
490 	struct rdma_id_private *id_priv =
491 				container_of(res, struct rdma_id_private, res);
492 	struct ib_device *dev = id_priv->id.device;
493 	struct rdma_cm_id *cm_id = &id_priv->id;
494 
495 	if (port && port != cm_id->port_num)
496 		return 0;
497 
498 	if (cm_id->port_num &&
499 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
500 		goto err;
501 
502 	if (id_priv->qp_num) {
503 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
504 			goto err;
505 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
506 			goto err;
507 	}
508 
509 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
510 		goto err;
511 
512 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
513 		goto err;
514 
515 	if (cm_id->route.addr.src_addr.ss_family &&
516 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
517 		    sizeof(cm_id->route.addr.src_addr),
518 		    &cm_id->route.addr.src_addr))
519 		goto err;
520 	if (cm_id->route.addr.dst_addr.ss_family &&
521 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
522 		    sizeof(cm_id->route.addr.dst_addr),
523 		    &cm_id->route.addr.dst_addr))
524 		goto err;
525 
526 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
527 		goto err;
528 
529 	if (fill_res_name_pid(msg, res))
530 		goto err;
531 
532 	if (fill_res_entry(dev, msg, res))
533 		goto err;
534 
535 	return 0;
536 
537 err: return -EMSGSIZE;
538 }
539 
540 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
541 			     struct rdma_restrack_entry *res, uint32_t port)
542 {
543 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
544 	struct ib_device *dev = cq->device;
545 
546 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
547 		goto err;
548 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
549 			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
550 		goto err;
551 
552 	/* Poll context is only valid for kernel CQs */
553 	if (rdma_is_kernel_res(res) &&
554 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
555 		goto err;
556 
557 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
558 		goto err;
559 
560 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
561 		goto err;
562 	if (!rdma_is_kernel_res(res) &&
563 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
564 			cq->uobject->context->res.id))
565 		goto err;
566 
567 	if (fill_res_name_pid(msg, res))
568 		goto err;
569 
570 	if (fill_res_entry(dev, msg, res))
571 		goto err;
572 
573 	return 0;
574 
575 err:	return -EMSGSIZE;
576 }
577 
578 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
579 			     struct rdma_restrack_entry *res, uint32_t port)
580 {
581 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
582 	struct ib_device *dev = mr->pd->device;
583 
584 	if (has_cap_net_admin) {
585 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
586 			goto err;
587 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
588 			goto err;
589 	}
590 
591 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
592 			      RDMA_NLDEV_ATTR_PAD))
593 		goto err;
594 
595 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
596 		goto err;
597 
598 	if (!rdma_is_kernel_res(res) &&
599 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
600 		goto err;
601 
602 	if (fill_res_name_pid(msg, res))
603 		goto err;
604 
605 	if (fill_res_entry(dev, msg, res))
606 		goto err;
607 
608 	return 0;
609 
610 err:	return -EMSGSIZE;
611 }
612 
613 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
614 			     struct rdma_restrack_entry *res, uint32_t port)
615 {
616 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
617 	struct ib_device *dev = pd->device;
618 
619 	if (has_cap_net_admin) {
620 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
621 				pd->local_dma_lkey))
622 			goto err;
623 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
624 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
625 				pd->unsafe_global_rkey))
626 			goto err;
627 	}
628 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
629 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
630 		goto err;
631 
632 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
633 		goto err;
634 
635 	if (!rdma_is_kernel_res(res) &&
636 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
637 			pd->uobject->context->res.id))
638 		goto err;
639 
640 	if (fill_res_name_pid(msg, res))
641 		goto err;
642 
643 	if (fill_res_entry(dev, msg, res))
644 		goto err;
645 
646 	return 0;
647 
648 err:	return -EMSGSIZE;
649 }
650 
651 static int fill_stat_counter_mode(struct sk_buff *msg,
652 				  struct rdma_counter *counter)
653 {
654 	struct rdma_counter_mode *m = &counter->mode;
655 
656 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
657 		return -EMSGSIZE;
658 
659 	if (m->mode == RDMA_COUNTER_MODE_AUTO)
660 		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
661 		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
662 			return -EMSGSIZE;
663 
664 	return 0;
665 }
666 
667 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
668 {
669 	struct nlattr *entry_attr;
670 
671 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
672 	if (!entry_attr)
673 		return -EMSGSIZE;
674 
675 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
676 		goto err;
677 
678 	nla_nest_end(msg, entry_attr);
679 	return 0;
680 
681 err:
682 	nla_nest_cancel(msg, entry_attr);
683 	return -EMSGSIZE;
684 }
685 
686 static int fill_stat_counter_qps(struct sk_buff *msg,
687 				 struct rdma_counter *counter)
688 {
689 	struct rdma_restrack_entry *res;
690 	struct rdma_restrack_root *rt;
691 	struct nlattr *table_attr;
692 	struct ib_qp *qp = NULL;
693 	unsigned long id = 0;
694 	int ret = 0;
695 
696 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
697 
698 	rt = &counter->device->res[RDMA_RESTRACK_QP];
699 	xa_lock(&rt->xa);
700 	xa_for_each(&rt->xa, id, res) {
701 		if (!rdma_is_visible_in_pid_ns(res))
702 			continue;
703 
704 		qp = container_of(res, struct ib_qp, res);
705 		if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
706 			continue;
707 
708 		if (!qp->counter || (qp->counter->id != counter->id))
709 			continue;
710 
711 		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
712 		if (ret)
713 			goto err;
714 	}
715 
716 	xa_unlock(&rt->xa);
717 	nla_nest_end(msg, table_attr);
718 	return 0;
719 
720 err:
721 	xa_unlock(&rt->xa);
722 	nla_nest_cancel(msg, table_attr);
723 	return ret;
724 }
725 
726 static int fill_stat_hwcounter_entry(struct sk_buff *msg,
727 				     const char *name, u64 value)
728 {
729 	struct nlattr *entry_attr;
730 
731 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
732 	if (!entry_attr)
733 		return -EMSGSIZE;
734 
735 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
736 			   name))
737 		goto err;
738 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
739 			      value, RDMA_NLDEV_ATTR_PAD))
740 		goto err;
741 
742 	nla_nest_end(msg, entry_attr);
743 	return 0;
744 
745 err:
746 	nla_nest_cancel(msg, entry_attr);
747 	return -EMSGSIZE;
748 }
749 
750 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
751 					struct rdma_counter *counter)
752 {
753 	struct rdma_hw_stats *st = counter->stats;
754 	struct nlattr *table_attr;
755 	int i;
756 
757 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
758 	if (!table_attr)
759 		return -EMSGSIZE;
760 
761 	for (i = 0; i < st->num_counters; i++)
762 		if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
763 			goto err;
764 
765 	nla_nest_end(msg, table_attr);
766 	return 0;
767 
768 err:
769 	nla_nest_cancel(msg, table_attr);
770 	return -EMSGSIZE;
771 }
772 
773 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
774 				  struct rdma_restrack_entry *res,
775 				  uint32_t port)
776 {
777 	struct rdma_counter *counter =
778 		container_of(res, struct rdma_counter, res);
779 
780 	if (port && port != counter->port)
781 		return -EAGAIN;
782 
783 	/* Dump it even query failed */
784 	rdma_counter_query_stats(counter);
785 
786 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
787 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
788 	    fill_res_name_pid(msg, &counter->res) ||
789 	    fill_stat_counter_mode(msg, counter) ||
790 	    fill_stat_counter_qps(msg, counter) ||
791 	    fill_stat_counter_hwcounters(msg, counter))
792 		return -EMSGSIZE;
793 
794 	return 0;
795 }
796 
797 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
798 			  struct netlink_ext_ack *extack)
799 {
800 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
801 	struct ib_device *device;
802 	struct sk_buff *msg;
803 	u32 index;
804 	int err;
805 
806 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
807 				     nldev_policy, extack);
808 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
809 		return -EINVAL;
810 
811 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
812 
813 	device = ib_device_get_by_index(sock_net(skb->sk), index);
814 	if (!device)
815 		return -EINVAL;
816 
817 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
818 	if (!msg) {
819 		err = -ENOMEM;
820 		goto err;
821 	}
822 
823 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
824 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
825 			0, 0);
826 
827 	err = fill_dev_info(msg, device);
828 	if (err)
829 		goto err_free;
830 
831 	nlmsg_end(msg, nlh);
832 
833 	ib_device_put(device);
834 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
835 
836 err_free:
837 	nlmsg_free(msg);
838 err:
839 	ib_device_put(device);
840 	return err;
841 }
842 
843 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
844 			  struct netlink_ext_ack *extack)
845 {
846 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
847 	struct ib_device *device;
848 	u32 index;
849 	int err;
850 
851 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
852 				     nldev_policy, extack);
853 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
854 		return -EINVAL;
855 
856 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
857 	device = ib_device_get_by_index(sock_net(skb->sk), index);
858 	if (!device)
859 		return -EINVAL;
860 
861 	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
862 		char name[IB_DEVICE_NAME_MAX] = {};
863 
864 		nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
865 			    IB_DEVICE_NAME_MAX);
866 		err = ib_device_rename(device, name);
867 		goto done;
868 	}
869 
870 	if (tb[RDMA_NLDEV_NET_NS_FD]) {
871 		u32 ns_fd;
872 
873 		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
874 		err = ib_device_set_netns_put(skb, device, ns_fd);
875 		goto put_done;
876 	}
877 
878 	if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
879 		u8 use_dim;
880 
881 		use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
882 		err = ib_device_set_dim(device,  use_dim);
883 		goto done;
884 	}
885 
886 done:
887 	ib_device_put(device);
888 put_done:
889 	return err;
890 }
891 
892 static int _nldev_get_dumpit(struct ib_device *device,
893 			     struct sk_buff *skb,
894 			     struct netlink_callback *cb,
895 			     unsigned int idx)
896 {
897 	int start = cb->args[0];
898 	struct nlmsghdr *nlh;
899 
900 	if (idx < start)
901 		return 0;
902 
903 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
904 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
905 			0, NLM_F_MULTI);
906 
907 	if (fill_dev_info(skb, device)) {
908 		nlmsg_cancel(skb, nlh);
909 		goto out;
910 	}
911 
912 	nlmsg_end(skb, nlh);
913 
914 	idx++;
915 
916 out:	cb->args[0] = idx;
917 	return skb->len;
918 }
919 
920 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
921 {
922 	/*
923 	 * There is no need to take lock, because
924 	 * we are relying on ib_core's locking.
925 	 */
926 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
927 }
928 
929 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
930 			       struct netlink_ext_ack *extack)
931 {
932 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
933 	struct ib_device *device;
934 	struct sk_buff *msg;
935 	u32 index;
936 	u32 port;
937 	int err;
938 
939 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
940 				     nldev_policy, extack);
941 	if (err ||
942 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
943 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
944 		return -EINVAL;
945 
946 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
947 	device = ib_device_get_by_index(sock_net(skb->sk), index);
948 	if (!device)
949 		return -EINVAL;
950 
951 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
952 	if (!rdma_is_port_valid(device, port)) {
953 		err = -EINVAL;
954 		goto err;
955 	}
956 
957 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
958 	if (!msg) {
959 		err = -ENOMEM;
960 		goto err;
961 	}
962 
963 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
964 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
965 			0, 0);
966 
967 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
968 	if (err)
969 		goto err_free;
970 
971 	nlmsg_end(msg, nlh);
972 	ib_device_put(device);
973 
974 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
975 
976 err_free:
977 	nlmsg_free(msg);
978 err:
979 	ib_device_put(device);
980 	return err;
981 }
982 
983 static int nldev_port_get_dumpit(struct sk_buff *skb,
984 				 struct netlink_callback *cb)
985 {
986 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
987 	struct ib_device *device;
988 	int start = cb->args[0];
989 	struct nlmsghdr *nlh;
990 	u32 idx = 0;
991 	u32 ifindex;
992 	int err;
993 	unsigned int p;
994 
995 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
996 				     nldev_policy, NULL);
997 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
998 		return -EINVAL;
999 
1000 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1001 	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1002 	if (!device)
1003 		return -EINVAL;
1004 
1005 	rdma_for_each_port (device, p) {
1006 		/*
1007 		 * The dumpit function returns all information from specific
1008 		 * index. This specific index is taken from the netlink
1009 		 * messages request sent by user and it is available
1010 		 * in cb->args[0].
1011 		 *
1012 		 * Usually, the user doesn't fill this field and it causes
1013 		 * to return everything.
1014 		 *
1015 		 */
1016 		if (idx < start) {
1017 			idx++;
1018 			continue;
1019 		}
1020 
1021 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1022 				cb->nlh->nlmsg_seq,
1023 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1024 						 RDMA_NLDEV_CMD_PORT_GET),
1025 				0, NLM_F_MULTI);
1026 
1027 		if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
1028 			nlmsg_cancel(skb, nlh);
1029 			goto out;
1030 		}
1031 		idx++;
1032 		nlmsg_end(skb, nlh);
1033 	}
1034 
1035 out:
1036 	ib_device_put(device);
1037 	cb->args[0] = idx;
1038 	return skb->len;
1039 }
1040 
1041 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1042 			      struct netlink_ext_ack *extack)
1043 {
1044 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1045 	struct ib_device *device;
1046 	struct sk_buff *msg;
1047 	u32 index;
1048 	int ret;
1049 
1050 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1051 				     nldev_policy, extack);
1052 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1053 		return -EINVAL;
1054 
1055 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1056 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1057 	if (!device)
1058 		return -EINVAL;
1059 
1060 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1061 	if (!msg) {
1062 		ret = -ENOMEM;
1063 		goto err;
1064 	}
1065 
1066 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1067 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1068 			0, 0);
1069 
1070 	ret = fill_res_info(msg, device);
1071 	if (ret)
1072 		goto err_free;
1073 
1074 	nlmsg_end(msg, nlh);
1075 	ib_device_put(device);
1076 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1077 
1078 err_free:
1079 	nlmsg_free(msg);
1080 err:
1081 	ib_device_put(device);
1082 	return ret;
1083 }
1084 
1085 static int _nldev_res_get_dumpit(struct ib_device *device,
1086 				 struct sk_buff *skb,
1087 				 struct netlink_callback *cb,
1088 				 unsigned int idx)
1089 {
1090 	int start = cb->args[0];
1091 	struct nlmsghdr *nlh;
1092 
1093 	if (idx < start)
1094 		return 0;
1095 
1096 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1097 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1098 			0, NLM_F_MULTI);
1099 
1100 	if (fill_res_info(skb, device)) {
1101 		nlmsg_cancel(skb, nlh);
1102 		goto out;
1103 	}
1104 	nlmsg_end(skb, nlh);
1105 
1106 	idx++;
1107 
1108 out:
1109 	cb->args[0] = idx;
1110 	return skb->len;
1111 }
1112 
1113 static int nldev_res_get_dumpit(struct sk_buff *skb,
1114 				struct netlink_callback *cb)
1115 {
1116 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1117 }
1118 
1119 struct nldev_fill_res_entry {
1120 	int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
1121 			     struct rdma_restrack_entry *res, u32 port);
1122 	enum rdma_nldev_attr nldev_attr;
1123 	enum rdma_nldev_command nldev_cmd;
1124 	u8 flags;
1125 	u32 entry;
1126 	u32 id;
1127 };
1128 
1129 enum nldev_res_flags {
1130 	NLDEV_PER_DEV = 1 << 0,
1131 };
1132 
1133 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1134 	[RDMA_RESTRACK_QP] = {
1135 		.fill_res_func = fill_res_qp_entry,
1136 		.nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
1137 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1138 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1139 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
1140 	},
1141 	[RDMA_RESTRACK_CM_ID] = {
1142 		.fill_res_func = fill_res_cm_id_entry,
1143 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
1144 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1145 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1146 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1147 	},
1148 	[RDMA_RESTRACK_CQ] = {
1149 		.fill_res_func = fill_res_cq_entry,
1150 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
1151 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1152 		.flags = NLDEV_PER_DEV,
1153 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1154 		.id = RDMA_NLDEV_ATTR_RES_CQN,
1155 	},
1156 	[RDMA_RESTRACK_MR] = {
1157 		.fill_res_func = fill_res_mr_entry,
1158 		.nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
1159 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1160 		.flags = NLDEV_PER_DEV,
1161 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1162 		.id = RDMA_NLDEV_ATTR_RES_MRN,
1163 	},
1164 	[RDMA_RESTRACK_PD] = {
1165 		.fill_res_func = fill_res_pd_entry,
1166 		.nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
1167 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1168 		.flags = NLDEV_PER_DEV,
1169 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1170 		.id = RDMA_NLDEV_ATTR_RES_PDN,
1171 	},
1172 	[RDMA_RESTRACK_COUNTER] = {
1173 		.fill_res_func = fill_res_counter_entry,
1174 		.nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
1175 		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1176 		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1177 		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1178 	},
1179 };
1180 
1181 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1182 			       struct netlink_ext_ack *extack,
1183 			       enum rdma_restrack_type res_type)
1184 {
1185 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1186 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1187 	struct rdma_restrack_entry *res;
1188 	struct ib_device *device;
1189 	u32 index, id, port = 0;
1190 	bool has_cap_net_admin;
1191 	struct sk_buff *msg;
1192 	int ret;
1193 
1194 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1195 				     nldev_policy, extack);
1196 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1197 		return -EINVAL;
1198 
1199 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1200 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1201 	if (!device)
1202 		return -EINVAL;
1203 
1204 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1205 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1206 		if (!rdma_is_port_valid(device, port)) {
1207 			ret = -EINVAL;
1208 			goto err;
1209 		}
1210 	}
1211 
1212 	if ((port && fe->flags & NLDEV_PER_DEV) ||
1213 	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1214 		ret = -EINVAL;
1215 		goto err;
1216 	}
1217 
1218 	id = nla_get_u32(tb[fe->id]);
1219 	res = rdma_restrack_get_byid(device, res_type, id);
1220 	if (IS_ERR(res)) {
1221 		ret = PTR_ERR(res);
1222 		goto err;
1223 	}
1224 
1225 	if (!rdma_is_visible_in_pid_ns(res)) {
1226 		ret = -ENOENT;
1227 		goto err_get;
1228 	}
1229 
1230 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1231 	if (!msg) {
1232 		ret = -ENOMEM;
1233 		goto err_get;
1234 	}
1235 
1236 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1237 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1238 			0, 0);
1239 
1240 	if (fill_nldev_handle(msg, device)) {
1241 		ret = -EMSGSIZE;
1242 		goto err_free;
1243 	}
1244 
1245 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1246 	ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
1247 	rdma_restrack_put(res);
1248 	if (ret)
1249 		goto err_free;
1250 
1251 	nlmsg_end(msg, nlh);
1252 	ib_device_put(device);
1253 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1254 
1255 err_free:
1256 	nlmsg_free(msg);
1257 err_get:
1258 	rdma_restrack_put(res);
1259 err:
1260 	ib_device_put(device);
1261 	return ret;
1262 }
1263 
1264 static int res_get_common_dumpit(struct sk_buff *skb,
1265 				 struct netlink_callback *cb,
1266 				 enum rdma_restrack_type res_type)
1267 {
1268 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1269 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1270 	struct rdma_restrack_entry *res;
1271 	struct rdma_restrack_root *rt;
1272 	int err, ret = 0, idx = 0;
1273 	struct nlattr *table_attr;
1274 	struct nlattr *entry_attr;
1275 	struct ib_device *device;
1276 	int start = cb->args[0];
1277 	bool has_cap_net_admin;
1278 	struct nlmsghdr *nlh;
1279 	unsigned long id;
1280 	u32 index, port = 0;
1281 	bool filled = false;
1282 
1283 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1284 				     nldev_policy, NULL);
1285 	/*
1286 	 * Right now, we are expecting the device index to get res information,
1287 	 * but it is possible to extend this code to return all devices in
1288 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1289 	 * if it doesn't exist, we will iterate over all devices.
1290 	 *
1291 	 * But it is not needed for now.
1292 	 */
1293 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1294 		return -EINVAL;
1295 
1296 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1297 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1298 	if (!device)
1299 		return -EINVAL;
1300 
1301 	/*
1302 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1303 	 */
1304 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1305 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1306 		if (!rdma_is_port_valid(device, port)) {
1307 			ret = -EINVAL;
1308 			goto err_index;
1309 		}
1310 	}
1311 
1312 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1313 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1314 			0, NLM_F_MULTI);
1315 
1316 	if (fill_nldev_handle(skb, device)) {
1317 		ret = -EMSGSIZE;
1318 		goto err;
1319 	}
1320 
1321 	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1322 	if (!table_attr) {
1323 		ret = -EMSGSIZE;
1324 		goto err;
1325 	}
1326 
1327 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1328 
1329 	rt = &device->res[res_type];
1330 	xa_lock(&rt->xa);
1331 	/*
1332 	 * FIXME: if the skip ahead is something common this loop should
1333 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1334 	 * objects.
1335 	 */
1336 	xa_for_each(&rt->xa, id, res) {
1337 		if (!rdma_is_visible_in_pid_ns(res))
1338 			continue;
1339 
1340 		if (idx < start || !rdma_restrack_get(res))
1341 			goto next;
1342 
1343 		xa_unlock(&rt->xa);
1344 
1345 		filled = true;
1346 
1347 		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1348 		if (!entry_attr) {
1349 			ret = -EMSGSIZE;
1350 			rdma_restrack_put(res);
1351 			goto msg_full;
1352 		}
1353 
1354 		ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
1355 		rdma_restrack_put(res);
1356 
1357 		if (ret) {
1358 			nla_nest_cancel(skb, entry_attr);
1359 			if (ret == -EMSGSIZE)
1360 				goto msg_full;
1361 			if (ret == -EAGAIN)
1362 				goto again;
1363 			goto res_err;
1364 		}
1365 		nla_nest_end(skb, entry_attr);
1366 again:		xa_lock(&rt->xa);
1367 next:		idx++;
1368 	}
1369 	xa_unlock(&rt->xa);
1370 
1371 msg_full:
1372 	nla_nest_end(skb, table_attr);
1373 	nlmsg_end(skb, nlh);
1374 	cb->args[0] = idx;
1375 
1376 	/*
1377 	 * No more entries to fill, cancel the message and
1378 	 * return 0 to mark end of dumpit.
1379 	 */
1380 	if (!filled)
1381 		goto err;
1382 
1383 	ib_device_put(device);
1384 	return skb->len;
1385 
1386 res_err:
1387 	nla_nest_cancel(skb, table_attr);
1388 
1389 err:
1390 	nlmsg_cancel(skb, nlh);
1391 
1392 err_index:
1393 	ib_device_put(device);
1394 	return ret;
1395 }
1396 
1397 #define RES_GET_FUNCS(name, type)                                              \
1398 	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1399 						 struct netlink_callback *cb)  \
1400 	{                                                                      \
1401 		return res_get_common_dumpit(skb, cb, type);                   \
1402 	}                                                                      \
1403 	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1404 					       struct nlmsghdr *nlh,           \
1405 					       struct netlink_ext_ack *extack) \
1406 	{                                                                      \
1407 		return res_get_common_doit(skb, nlh, extack, type);            \
1408 	}
1409 
1410 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1411 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1412 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1413 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1414 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1415 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1416 
1417 static LIST_HEAD(link_ops);
1418 static DECLARE_RWSEM(link_ops_rwsem);
1419 
1420 static const struct rdma_link_ops *link_ops_get(const char *type)
1421 {
1422 	const struct rdma_link_ops *ops;
1423 
1424 	list_for_each_entry(ops, &link_ops, list) {
1425 		if (!strcmp(ops->type, type))
1426 			goto out;
1427 	}
1428 	ops = NULL;
1429 out:
1430 	return ops;
1431 }
1432 
1433 void rdma_link_register(struct rdma_link_ops *ops)
1434 {
1435 	down_write(&link_ops_rwsem);
1436 	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1437 		goto out;
1438 	list_add(&ops->list, &link_ops);
1439 out:
1440 	up_write(&link_ops_rwsem);
1441 }
1442 EXPORT_SYMBOL(rdma_link_register);
1443 
1444 void rdma_link_unregister(struct rdma_link_ops *ops)
1445 {
1446 	down_write(&link_ops_rwsem);
1447 	list_del(&ops->list);
1448 	up_write(&link_ops_rwsem);
1449 }
1450 EXPORT_SYMBOL(rdma_link_unregister);
1451 
1452 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1453 			  struct netlink_ext_ack *extack)
1454 {
1455 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1456 	char ibdev_name[IB_DEVICE_NAME_MAX];
1457 	const struct rdma_link_ops *ops;
1458 	char ndev_name[IFNAMSIZ];
1459 	struct net_device *ndev;
1460 	char type[IFNAMSIZ];
1461 	int err;
1462 
1463 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1464 				     nldev_policy, extack);
1465 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1466 	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1467 		return -EINVAL;
1468 
1469 	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1470 		    sizeof(ibdev_name));
1471 	if (strchr(ibdev_name, '%'))
1472 		return -EINVAL;
1473 
1474 	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1475 	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1476 		    sizeof(ndev_name));
1477 
1478 	ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1479 	if (!ndev)
1480 		return -ENODEV;
1481 
1482 	down_read(&link_ops_rwsem);
1483 	ops = link_ops_get(type);
1484 #ifdef CONFIG_MODULES
1485 	if (!ops) {
1486 		up_read(&link_ops_rwsem);
1487 		request_module("rdma-link-%s", type);
1488 		down_read(&link_ops_rwsem);
1489 		ops = link_ops_get(type);
1490 	}
1491 #endif
1492 	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1493 	up_read(&link_ops_rwsem);
1494 	dev_put(ndev);
1495 
1496 	return err;
1497 }
1498 
1499 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1500 			  struct netlink_ext_ack *extack)
1501 {
1502 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1503 	struct ib_device *device;
1504 	u32 index;
1505 	int err;
1506 
1507 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1508 				     nldev_policy, extack);
1509 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1510 		return -EINVAL;
1511 
1512 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1513 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1514 	if (!device)
1515 		return -EINVAL;
1516 
1517 	if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
1518 		ib_device_put(device);
1519 		return -EINVAL;
1520 	}
1521 
1522 	ib_unregister_device_and_put(device);
1523 	return 0;
1524 }
1525 
1526 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1527 			     struct netlink_ext_ack *extack)
1528 {
1529 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1530 	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1531 	struct ib_client_nl_info data = {};
1532 	struct ib_device *ibdev = NULL;
1533 	struct sk_buff *msg;
1534 	u32 index;
1535 	int err;
1536 
1537 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1538 			  extack);
1539 	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1540 		return -EINVAL;
1541 
1542 	nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1543 		    sizeof(client_name));
1544 
1545 	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1546 		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1547 		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1548 		if (!ibdev)
1549 			return -EINVAL;
1550 
1551 		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1552 			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1553 			if (!rdma_is_port_valid(ibdev, data.port)) {
1554 				err = -EINVAL;
1555 				goto out_put;
1556 			}
1557 		} else {
1558 			data.port = -1;
1559 		}
1560 	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1561 		return -EINVAL;
1562 	}
1563 
1564 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1565 	if (!msg) {
1566 		err = -ENOMEM;
1567 		goto out_put;
1568 	}
1569 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1570 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1571 					 RDMA_NLDEV_CMD_GET_CHARDEV),
1572 			0, 0);
1573 
1574 	data.nl_msg = msg;
1575 	err = ib_get_client_nl_info(ibdev, client_name, &data);
1576 	if (err)
1577 		goto out_nlmsg;
1578 
1579 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1580 				huge_encode_dev(data.cdev->devt),
1581 				RDMA_NLDEV_ATTR_PAD);
1582 	if (err)
1583 		goto out_data;
1584 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1585 				RDMA_NLDEV_ATTR_PAD);
1586 	if (err)
1587 		goto out_data;
1588 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1589 			   dev_name(data.cdev))) {
1590 		err = -EMSGSIZE;
1591 		goto out_data;
1592 	}
1593 
1594 	nlmsg_end(msg, nlh);
1595 	put_device(data.cdev);
1596 	if (ibdev)
1597 		ib_device_put(ibdev);
1598 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1599 
1600 out_data:
1601 	put_device(data.cdev);
1602 out_nlmsg:
1603 	nlmsg_free(msg);
1604 out_put:
1605 	if (ibdev)
1606 		ib_device_put(ibdev);
1607 	return err;
1608 }
1609 
1610 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1611 			      struct netlink_ext_ack *extack)
1612 {
1613 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1614 	struct sk_buff *msg;
1615 	int err;
1616 
1617 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1618 			  nldev_policy, extack);
1619 	if (err)
1620 		return err;
1621 
1622 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1623 	if (!msg)
1624 		return -ENOMEM;
1625 
1626 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1627 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1628 					 RDMA_NLDEV_CMD_SYS_GET),
1629 			0, 0);
1630 
1631 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1632 			 (u8)ib_devices_shared_netns);
1633 	if (err) {
1634 		nlmsg_free(msg);
1635 		return err;
1636 	}
1637 	nlmsg_end(msg, nlh);
1638 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1639 }
1640 
1641 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1642 				  struct netlink_ext_ack *extack)
1643 {
1644 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1645 	u8 enable;
1646 	int err;
1647 
1648 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1649 			  nldev_policy, extack);
1650 	if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1651 		return -EINVAL;
1652 
1653 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1654 	/* Only 0 and 1 are supported */
1655 	if (enable > 1)
1656 		return -EINVAL;
1657 
1658 	err = rdma_compatdev_set(enable);
1659 	return err;
1660 }
1661 
1662 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1663 			       struct netlink_ext_ack *extack)
1664 {
1665 	u32 index, port, mode, mask = 0, qpn, cntn = 0;
1666 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1667 	struct ib_device *device;
1668 	struct sk_buff *msg;
1669 	int ret;
1670 
1671 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1672 			  nldev_policy, extack);
1673 	/* Currently only counter for QP is supported */
1674 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1675 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1676 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
1677 		return -EINVAL;
1678 
1679 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1680 		return -EINVAL;
1681 
1682 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1683 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1684 	if (!device)
1685 		return -EINVAL;
1686 
1687 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1688 	if (!rdma_is_port_valid(device, port)) {
1689 		ret = -EINVAL;
1690 		goto err;
1691 	}
1692 
1693 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1694 	if (!msg) {
1695 		ret = -ENOMEM;
1696 		goto err;
1697 	}
1698 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1699 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1700 					 RDMA_NLDEV_CMD_STAT_SET),
1701 			0, 0);
1702 
1703 	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
1704 	if (mode == RDMA_COUNTER_MODE_AUTO) {
1705 		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
1706 			mask = nla_get_u32(
1707 				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
1708 
1709 		ret = rdma_counter_set_auto_mode(device, port,
1710 						 mask ? true : false, mask);
1711 		if (ret)
1712 			goto err_msg;
1713 	} else {
1714 		qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1715 		if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
1716 			cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1717 			ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
1718 		} else {
1719 			ret = rdma_counter_bind_qpn_alloc(device, port,
1720 							  qpn, &cntn);
1721 		}
1722 		if (ret)
1723 			goto err_msg;
1724 
1725 		if (fill_nldev_handle(msg, device) ||
1726 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1727 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1728 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1729 			ret = -EMSGSIZE;
1730 			goto err_fill;
1731 		}
1732 	}
1733 
1734 	nlmsg_end(msg, nlh);
1735 	ib_device_put(device);
1736 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1737 
1738 err_fill:
1739 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
1740 err_msg:
1741 	nlmsg_free(msg);
1742 err:
1743 	ib_device_put(device);
1744 	return ret;
1745 }
1746 
1747 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1748 			       struct netlink_ext_ack *extack)
1749 {
1750 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1751 	struct ib_device *device;
1752 	struct sk_buff *msg;
1753 	u32 index, port, qpn, cntn;
1754 	int ret;
1755 
1756 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1757 			  nldev_policy, extack);
1758 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1759 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
1760 	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
1761 	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
1762 		return -EINVAL;
1763 
1764 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1765 		return -EINVAL;
1766 
1767 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1768 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1769 	if (!device)
1770 		return -EINVAL;
1771 
1772 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1773 	if (!rdma_is_port_valid(device, port)) {
1774 		ret = -EINVAL;
1775 		goto err;
1776 	}
1777 
1778 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1779 	if (!msg) {
1780 		ret = -ENOMEM;
1781 		goto err;
1782 	}
1783 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1784 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1785 					 RDMA_NLDEV_CMD_STAT_SET),
1786 			0, 0);
1787 
1788 	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1789 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1790 	if (fill_nldev_handle(msg, device) ||
1791 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1792 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1793 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1794 		ret = -EMSGSIZE;
1795 		goto err_fill;
1796 	}
1797 
1798 	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
1799 	if (ret)
1800 		goto err_fill;
1801 
1802 	nlmsg_end(msg, nlh);
1803 	ib_device_put(device);
1804 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1805 
1806 err_fill:
1807 	nlmsg_free(msg);
1808 err:
1809 	ib_device_put(device);
1810 	return ret;
1811 }
1812 
1813 static int stat_get_doit_default_counter(struct sk_buff *skb,
1814 					 struct nlmsghdr *nlh,
1815 					 struct netlink_ext_ack *extack,
1816 					 struct nlattr *tb[])
1817 {
1818 	struct rdma_hw_stats *stats;
1819 	struct nlattr *table_attr;
1820 	struct ib_device *device;
1821 	int ret, num_cnts, i;
1822 	struct sk_buff *msg;
1823 	u32 index, port;
1824 	u64 v;
1825 
1826 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1827 		return -EINVAL;
1828 
1829 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1830 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1831 	if (!device)
1832 		return -EINVAL;
1833 
1834 	if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
1835 		ret = -EINVAL;
1836 		goto err;
1837 	}
1838 
1839 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1840 	if (!rdma_is_port_valid(device, port)) {
1841 		ret = -EINVAL;
1842 		goto err;
1843 	}
1844 
1845 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1846 	if (!msg) {
1847 		ret = -ENOMEM;
1848 		goto err;
1849 	}
1850 
1851 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1852 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1853 					 RDMA_NLDEV_CMD_STAT_GET),
1854 			0, 0);
1855 
1856 	if (fill_nldev_handle(msg, device) ||
1857 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
1858 		ret = -EMSGSIZE;
1859 		goto err_msg;
1860 	}
1861 
1862 	stats = device->port_data ? device->port_data[port].hw_stats : NULL;
1863 	if (stats == NULL) {
1864 		ret = -EINVAL;
1865 		goto err_msg;
1866 	}
1867 	mutex_lock(&stats->lock);
1868 
1869 	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
1870 	if (num_cnts < 0) {
1871 		ret = -EINVAL;
1872 		goto err_stats;
1873 	}
1874 
1875 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1876 	if (!table_attr) {
1877 		ret = -EMSGSIZE;
1878 		goto err_stats;
1879 	}
1880 	for (i = 0; i < num_cnts; i++) {
1881 		v = stats->value[i] +
1882 			rdma_counter_get_hwstat_value(device, port, i);
1883 		if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) {
1884 			ret = -EMSGSIZE;
1885 			goto err_table;
1886 		}
1887 	}
1888 	nla_nest_end(msg, table_attr);
1889 
1890 	mutex_unlock(&stats->lock);
1891 	nlmsg_end(msg, nlh);
1892 	ib_device_put(device);
1893 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1894 
1895 err_table:
1896 	nla_nest_cancel(msg, table_attr);
1897 err_stats:
1898 	mutex_unlock(&stats->lock);
1899 err_msg:
1900 	nlmsg_free(msg);
1901 err:
1902 	ib_device_put(device);
1903 	return ret;
1904 }
1905 
1906 static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
1907 			    struct netlink_ext_ack *extack, struct nlattr *tb[])
1908 
1909 {
1910 	static enum rdma_nl_counter_mode mode;
1911 	static enum rdma_nl_counter_mask mask;
1912 	struct ib_device *device;
1913 	struct sk_buff *msg;
1914 	u32 index, port;
1915 	int ret;
1916 
1917 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
1918 		return nldev_res_get_counter_doit(skb, nlh, extack);
1919 
1920 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
1921 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1922 		return -EINVAL;
1923 
1924 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1925 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1926 	if (!device)
1927 		return -EINVAL;
1928 
1929 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1930 	if (!rdma_is_port_valid(device, port)) {
1931 		ret = -EINVAL;
1932 		goto err;
1933 	}
1934 
1935 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1936 	if (!msg) {
1937 		ret = -ENOMEM;
1938 		goto err;
1939 	}
1940 
1941 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1942 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1943 					 RDMA_NLDEV_CMD_STAT_GET),
1944 			0, 0);
1945 
1946 	ret = rdma_counter_get_mode(device, port, &mode, &mask);
1947 	if (ret)
1948 		goto err_msg;
1949 
1950 	if (fill_nldev_handle(msg, device) ||
1951 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1952 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
1953 		ret = -EMSGSIZE;
1954 		goto err_msg;
1955 	}
1956 
1957 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
1958 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
1959 		ret = -EMSGSIZE;
1960 		goto err_msg;
1961 	}
1962 
1963 	nlmsg_end(msg, nlh);
1964 	ib_device_put(device);
1965 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1966 
1967 err_msg:
1968 	nlmsg_free(msg);
1969 err:
1970 	ib_device_put(device);
1971 	return ret;
1972 }
1973 
1974 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1975 			       struct netlink_ext_ack *extack)
1976 {
1977 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1978 	int ret;
1979 
1980 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1981 			  nldev_policy, extack);
1982 	if (ret)
1983 		return -EINVAL;
1984 
1985 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
1986 		return stat_get_doit_default_counter(skb, nlh, extack, tb);
1987 
1988 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
1989 	case RDMA_NLDEV_ATTR_RES_QP:
1990 		ret = stat_get_doit_qp(skb, nlh, extack, tb);
1991 		break;
1992 
1993 	default:
1994 		ret = -EINVAL;
1995 		break;
1996 	}
1997 
1998 	return ret;
1999 }
2000 
2001 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2002 				 struct netlink_callback *cb)
2003 {
2004 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2005 	int ret;
2006 
2007 	ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2008 			  nldev_policy, NULL);
2009 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2010 		return -EINVAL;
2011 
2012 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2013 	case RDMA_NLDEV_ATTR_RES_QP:
2014 		ret = nldev_res_get_counter_dumpit(skb, cb);
2015 		break;
2016 
2017 	default:
2018 		ret = -EINVAL;
2019 		break;
2020 	}
2021 
2022 	return ret;
2023 }
2024 
2025 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2026 	[RDMA_NLDEV_CMD_GET] = {
2027 		.doit = nldev_get_doit,
2028 		.dump = nldev_get_dumpit,
2029 	},
2030 	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
2031 		.doit = nldev_get_chardev,
2032 	},
2033 	[RDMA_NLDEV_CMD_SET] = {
2034 		.doit = nldev_set_doit,
2035 		.flags = RDMA_NL_ADMIN_PERM,
2036 	},
2037 	[RDMA_NLDEV_CMD_NEWLINK] = {
2038 		.doit = nldev_newlink,
2039 		.flags = RDMA_NL_ADMIN_PERM,
2040 	},
2041 	[RDMA_NLDEV_CMD_DELLINK] = {
2042 		.doit = nldev_dellink,
2043 		.flags = RDMA_NL_ADMIN_PERM,
2044 	},
2045 	[RDMA_NLDEV_CMD_PORT_GET] = {
2046 		.doit = nldev_port_get_doit,
2047 		.dump = nldev_port_get_dumpit,
2048 	},
2049 	[RDMA_NLDEV_CMD_RES_GET] = {
2050 		.doit = nldev_res_get_doit,
2051 		.dump = nldev_res_get_dumpit,
2052 	},
2053 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
2054 		.doit = nldev_res_get_qp_doit,
2055 		.dump = nldev_res_get_qp_dumpit,
2056 	},
2057 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2058 		.doit = nldev_res_get_cm_id_doit,
2059 		.dump = nldev_res_get_cm_id_dumpit,
2060 	},
2061 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
2062 		.doit = nldev_res_get_cq_doit,
2063 		.dump = nldev_res_get_cq_dumpit,
2064 	},
2065 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
2066 		.doit = nldev_res_get_mr_doit,
2067 		.dump = nldev_res_get_mr_dumpit,
2068 	},
2069 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
2070 		.doit = nldev_res_get_pd_doit,
2071 		.dump = nldev_res_get_pd_dumpit,
2072 	},
2073 	[RDMA_NLDEV_CMD_SYS_GET] = {
2074 		.doit = nldev_sys_get_doit,
2075 	},
2076 	[RDMA_NLDEV_CMD_SYS_SET] = {
2077 		.doit = nldev_set_sys_set_doit,
2078 	},
2079 	[RDMA_NLDEV_CMD_STAT_SET] = {
2080 		.doit = nldev_stat_set_doit,
2081 		.flags = RDMA_NL_ADMIN_PERM,
2082 	},
2083 	[RDMA_NLDEV_CMD_STAT_GET] = {
2084 		.doit = nldev_stat_get_doit,
2085 		.dump = nldev_stat_get_dumpit,
2086 	},
2087 	[RDMA_NLDEV_CMD_STAT_DEL] = {
2088 		.doit = nldev_stat_del_doit,
2089 		.flags = RDMA_NL_ADMIN_PERM,
2090 	},
2091 };
2092 
2093 void __init nldev_init(void)
2094 {
2095 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2096 }
2097 
2098 void __exit nldev_exit(void)
2099 {
2100 	rdma_nl_unregister(RDMA_NL_NLDEV);
2101 }
2102 
2103 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
2104