1 /* 2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Neither the names of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * Alternatively, this software may be distributed under the terms of the 17 * GNU General Public License ("GPL") version 2 as published by the Free 18 * Software Foundation. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <linux/module.h> 34 #include <linux/pid.h> 35 #include <linux/pid_namespace.h> 36 #include <net/netlink.h> 37 #include <rdma/rdma_cm.h> 38 #include <rdma/rdma_netlink.h> 39 40 #include "core_priv.h" 41 #include "cma_priv.h" 42 43 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { 44 [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, 45 [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, 46 .len = IB_DEVICE_NAME_MAX - 1}, 47 [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, 48 [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, 49 .len = IB_FW_VERSION_NAME_MAX - 1}, 50 [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, 51 [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, 52 [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, 53 [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, 54 [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, 55 [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, 56 [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, 57 [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, 58 [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, 59 [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED }, 60 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED }, 61 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING, 62 .len = 16 }, 63 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 }, 64 [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, 65 [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, 66 [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 }, 67 [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, 68 [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, 69 [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 }, 70 [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 }, 71 [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, 72 [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 }, 73 [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, 74 [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, 75 .len = TASK_COMM_LEN }, 76 [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED }, 77 [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED }, 78 [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, 79 [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { 80 .len = sizeof(struct __kernel_sockaddr_storage) }, 81 [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { 82 .len = sizeof(struct __kernel_sockaddr_storage) }, 83 [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED }, 84 [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, 85 [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 }, 86 [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, 87 [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 }, 88 [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED }, 89 [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED }, 90 [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, 91 [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 }, 92 [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 }, 93 [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 }, 94 [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED }, 95 [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED }, 96 [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 }, 97 [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 }, 98 [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, 99 [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, 100 .len = IFNAMSIZ }, 101 }; 102 103 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) 104 { 105 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) 106 return -EMSGSIZE; 107 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) 108 return -EMSGSIZE; 109 110 return 0; 111 } 112 113 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) 114 { 115 char fw[IB_FW_VERSION_NAME_MAX]; 116 117 if (fill_nldev_handle(msg, device)) 118 return -EMSGSIZE; 119 120 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device))) 121 return -EMSGSIZE; 122 123 BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64)); 124 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, 125 device->attrs.device_cap_flags, 0)) 126 return -EMSGSIZE; 127 128 ib_get_device_fw_str(device, fw); 129 /* Device without FW has strlen(fw) = 0 */ 130 if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw)) 131 return -EMSGSIZE; 132 133 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID, 134 be64_to_cpu(device->node_guid), 0)) 135 return -EMSGSIZE; 136 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, 137 be64_to_cpu(device->attrs.sys_image_guid), 0)) 138 return -EMSGSIZE; 139 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) 140 return -EMSGSIZE; 141 return 0; 142 } 143 144 static int fill_port_info(struct sk_buff *msg, 145 struct ib_device *device, u32 port, 146 const struct net *net) 147 { 148 struct net_device *netdev = NULL; 149 struct ib_port_attr attr; 150 int ret; 151 152 if (fill_nldev_handle(msg, device)) 153 return -EMSGSIZE; 154 155 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) 156 return -EMSGSIZE; 157 158 ret = ib_query_port(device, port, &attr); 159 if (ret) 160 return ret; 161 162 BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); 163 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, 164 (u64)attr.port_cap_flags, 0)) 165 return -EMSGSIZE; 166 if (rdma_protocol_ib(device, port) && 167 nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, 168 attr.subnet_prefix, 0)) 169 return -EMSGSIZE; 170 if (rdma_protocol_ib(device, port)) { 171 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) 172 return -EMSGSIZE; 173 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) 174 return -EMSGSIZE; 175 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc)) 176 return -EMSGSIZE; 177 } 178 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state)) 179 return -EMSGSIZE; 180 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) 181 return -EMSGSIZE; 182 183 if (device->get_netdev) 184 netdev = device->get_netdev(device, port); 185 186 if (netdev && net_eq(dev_net(netdev), net)) { 187 ret = nla_put_u32(msg, 188 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 189 if (ret) 190 goto out; 191 ret = nla_put_string(msg, 192 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); 193 } 194 195 out: 196 if (netdev) 197 dev_put(netdev); 198 return ret; 199 } 200 201 static int fill_res_info_entry(struct sk_buff *msg, 202 const char *name, u64 curr) 203 { 204 struct nlattr *entry_attr; 205 206 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY); 207 if (!entry_attr) 208 return -EMSGSIZE; 209 210 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name)) 211 goto err; 212 if (nla_put_u64_64bit(msg, 213 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0)) 214 goto err; 215 216 nla_nest_end(msg, entry_attr); 217 return 0; 218 219 err: 220 nla_nest_cancel(msg, entry_attr); 221 return -EMSGSIZE; 222 } 223 224 static int fill_res_info(struct sk_buff *msg, struct ib_device *device) 225 { 226 static const char * const names[RDMA_RESTRACK_MAX] = { 227 [RDMA_RESTRACK_PD] = "pd", 228 [RDMA_RESTRACK_CQ] = "cq", 229 [RDMA_RESTRACK_QP] = "qp", 230 [RDMA_RESTRACK_CM_ID] = "cm_id", 231 [RDMA_RESTRACK_MR] = "mr", 232 }; 233 234 struct rdma_restrack_root *res = &device->res; 235 struct nlattr *table_attr; 236 int ret, i, curr; 237 238 if (fill_nldev_handle(msg, device)) 239 return -EMSGSIZE; 240 241 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY); 242 if (!table_attr) 243 return -EMSGSIZE; 244 245 for (i = 0; i < RDMA_RESTRACK_MAX; i++) { 246 if (!names[i]) 247 continue; 248 curr = rdma_restrack_count(res, i, task_active_pid_ns(current)); 249 ret = fill_res_info_entry(msg, names[i], curr); 250 if (ret) 251 goto err; 252 } 253 254 nla_nest_end(msg, table_attr); 255 return 0; 256 257 err: 258 nla_nest_cancel(msg, table_attr); 259 return ret; 260 } 261 262 static int fill_res_name_pid(struct sk_buff *msg, 263 struct rdma_restrack_entry *res) 264 { 265 /* 266 * For user resources, user is should read /proc/PID/comm to get the 267 * name of the task file. 268 */ 269 if (rdma_is_kernel_res(res)) { 270 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, 271 res->kern_name)) 272 return -EMSGSIZE; 273 } else { 274 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, 275 task_pid_vnr(res->task))) 276 return -EMSGSIZE; 277 } 278 return 0; 279 } 280 281 static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb, 282 struct rdma_restrack_entry *res, uint32_t port) 283 { 284 struct ib_qp *qp = container_of(res, struct ib_qp, res); 285 struct ib_qp_init_attr qp_init_attr; 286 struct nlattr *entry_attr; 287 struct ib_qp_attr qp_attr; 288 int ret; 289 290 ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr); 291 if (ret) 292 return ret; 293 294 if (port && port != qp_attr.port_num) 295 return 0; 296 297 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); 298 if (!entry_attr) 299 goto out; 300 301 /* In create_qp() port is not set yet */ 302 if (qp_attr.port_num && 303 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num)) 304 goto err; 305 306 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num)) 307 goto err; 308 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) { 309 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN, 310 qp_attr.dest_qp_num)) 311 goto err; 312 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN, 313 qp_attr.rq_psn)) 314 goto err; 315 } 316 317 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn)) 318 goto err; 319 320 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC || 321 qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) { 322 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE, 323 qp_attr.path_mig_state)) 324 goto err; 325 } 326 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type)) 327 goto err; 328 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) 329 goto err; 330 331 if (fill_res_name_pid(msg, res)) 332 goto err; 333 334 nla_nest_end(msg, entry_attr); 335 return 0; 336 337 err: 338 nla_nest_cancel(msg, entry_attr); 339 out: 340 return -EMSGSIZE; 341 } 342 343 static int fill_res_cm_id_entry(struct sk_buff *msg, 344 struct netlink_callback *cb, 345 struct rdma_restrack_entry *res, uint32_t port) 346 { 347 struct rdma_id_private *id_priv = 348 container_of(res, struct rdma_id_private, res); 349 struct rdma_cm_id *cm_id = &id_priv->id; 350 struct nlattr *entry_attr; 351 352 if (port && port != cm_id->port_num) 353 return 0; 354 355 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY); 356 if (!entry_attr) 357 goto out; 358 359 if (cm_id->port_num && 360 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) 361 goto err; 362 363 if (id_priv->qp_num) { 364 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num)) 365 goto err; 366 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type)) 367 goto err; 368 } 369 370 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps)) 371 goto err; 372 373 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state)) 374 goto err; 375 376 if (cm_id->route.addr.src_addr.ss_family && 377 nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR, 378 sizeof(cm_id->route.addr.src_addr), 379 &cm_id->route.addr.src_addr)) 380 goto err; 381 if (cm_id->route.addr.dst_addr.ss_family && 382 nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR, 383 sizeof(cm_id->route.addr.dst_addr), 384 &cm_id->route.addr.dst_addr)) 385 goto err; 386 387 if (fill_res_name_pid(msg, res)) 388 goto err; 389 390 nla_nest_end(msg, entry_attr); 391 return 0; 392 393 err: 394 nla_nest_cancel(msg, entry_attr); 395 out: 396 return -EMSGSIZE; 397 } 398 399 static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb, 400 struct rdma_restrack_entry *res, uint32_t port) 401 { 402 struct ib_cq *cq = container_of(res, struct ib_cq, res); 403 struct nlattr *entry_attr; 404 405 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY); 406 if (!entry_attr) 407 goto out; 408 409 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) 410 goto err; 411 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, 412 atomic_read(&cq->usecnt), 0)) 413 goto err; 414 415 /* Poll context is only valid for kernel CQs */ 416 if (rdma_is_kernel_res(res) && 417 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) 418 goto err; 419 420 if (fill_res_name_pid(msg, res)) 421 goto err; 422 423 nla_nest_end(msg, entry_attr); 424 return 0; 425 426 err: 427 nla_nest_cancel(msg, entry_attr); 428 out: 429 return -EMSGSIZE; 430 } 431 432 static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb, 433 struct rdma_restrack_entry *res, uint32_t port) 434 { 435 struct ib_mr *mr = container_of(res, struct ib_mr, res); 436 struct nlattr *entry_attr; 437 438 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY); 439 if (!entry_attr) 440 goto out; 441 442 if (netlink_capable(cb->skb, CAP_NET_ADMIN)) { 443 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) 444 goto err; 445 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) 446 goto err; 447 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA, 448 mr->iova, 0)) 449 goto err; 450 } 451 452 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0)) 453 goto err; 454 455 if (fill_res_name_pid(msg, res)) 456 goto err; 457 458 nla_nest_end(msg, entry_attr); 459 return 0; 460 461 err: 462 nla_nest_cancel(msg, entry_attr); 463 out: 464 return -EMSGSIZE; 465 } 466 467 static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, 468 struct rdma_restrack_entry *res, uint32_t port) 469 { 470 struct ib_pd *pd = container_of(res, struct ib_pd, res); 471 struct nlattr *entry_attr; 472 473 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY); 474 if (!entry_attr) 475 goto out; 476 477 if (netlink_capable(cb->skb, CAP_NET_ADMIN)) { 478 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, 479 pd->local_dma_lkey)) 480 goto err; 481 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && 482 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, 483 pd->unsafe_global_rkey)) 484 goto err; 485 } 486 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, 487 atomic_read(&pd->usecnt), 0)) 488 goto err; 489 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && 490 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, 491 pd->unsafe_global_rkey)) 492 goto err; 493 494 if (fill_res_name_pid(msg, res)) 495 goto err; 496 497 nla_nest_end(msg, entry_attr); 498 return 0; 499 500 err: 501 nla_nest_cancel(msg, entry_attr); 502 out: 503 return -EMSGSIZE; 504 } 505 506 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 507 struct netlink_ext_ack *extack) 508 { 509 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 510 struct ib_device *device; 511 struct sk_buff *msg; 512 u32 index; 513 int err; 514 515 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 516 nldev_policy, extack); 517 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 518 return -EINVAL; 519 520 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 521 522 device = ib_device_get_by_index(index); 523 if (!device) 524 return -EINVAL; 525 526 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 527 if (!msg) { 528 err = -ENOMEM; 529 goto err; 530 } 531 532 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 533 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 534 0, 0); 535 536 err = fill_dev_info(msg, device); 537 if (err) 538 goto err_free; 539 540 nlmsg_end(msg, nlh); 541 542 put_device(&device->dev); 543 return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); 544 545 err_free: 546 nlmsg_free(msg); 547 err: 548 put_device(&device->dev); 549 return err; 550 } 551 552 static int _nldev_get_dumpit(struct ib_device *device, 553 struct sk_buff *skb, 554 struct netlink_callback *cb, 555 unsigned int idx) 556 { 557 int start = cb->args[0]; 558 struct nlmsghdr *nlh; 559 560 if (idx < start) 561 return 0; 562 563 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 564 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 565 0, NLM_F_MULTI); 566 567 if (fill_dev_info(skb, device)) { 568 nlmsg_cancel(skb, nlh); 569 goto out; 570 } 571 572 nlmsg_end(skb, nlh); 573 574 idx++; 575 576 out: cb->args[0] = idx; 577 return skb->len; 578 } 579 580 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 581 { 582 /* 583 * There is no need to take lock, because 584 * we are relying on ib_core's lists_rwsem 585 */ 586 return ib_enum_all_devs(_nldev_get_dumpit, skb, cb); 587 } 588 589 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 590 struct netlink_ext_ack *extack) 591 { 592 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 593 struct ib_device *device; 594 struct sk_buff *msg; 595 u32 index; 596 u32 port; 597 int err; 598 599 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 600 nldev_policy, extack); 601 if (err || 602 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || 603 !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) 604 return -EINVAL; 605 606 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 607 device = ib_device_get_by_index(index); 608 if (!device) 609 return -EINVAL; 610 611 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 612 if (!rdma_is_port_valid(device, port)) { 613 err = -EINVAL; 614 goto err; 615 } 616 617 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 618 if (!msg) { 619 err = -ENOMEM; 620 goto err; 621 } 622 623 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 624 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 625 0, 0); 626 627 err = fill_port_info(msg, device, port, sock_net(skb->sk)); 628 if (err) 629 goto err_free; 630 631 nlmsg_end(msg, nlh); 632 put_device(&device->dev); 633 634 return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); 635 636 err_free: 637 nlmsg_free(msg); 638 err: 639 put_device(&device->dev); 640 return err; 641 } 642 643 static int nldev_port_get_dumpit(struct sk_buff *skb, 644 struct netlink_callback *cb) 645 { 646 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 647 struct ib_device *device; 648 int start = cb->args[0]; 649 struct nlmsghdr *nlh; 650 u32 idx = 0; 651 u32 ifindex; 652 int err; 653 u32 p; 654 655 err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 656 nldev_policy, NULL); 657 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 658 return -EINVAL; 659 660 ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 661 device = ib_device_get_by_index(ifindex); 662 if (!device) 663 return -EINVAL; 664 665 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { 666 /* 667 * The dumpit function returns all information from specific 668 * index. This specific index is taken from the netlink 669 * messages request sent by user and it is available 670 * in cb->args[0]. 671 * 672 * Usually, the user doesn't fill this field and it causes 673 * to return everything. 674 * 675 */ 676 if (idx < start) { 677 idx++; 678 continue; 679 } 680 681 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, 682 cb->nlh->nlmsg_seq, 683 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, 684 RDMA_NLDEV_CMD_PORT_GET), 685 0, NLM_F_MULTI); 686 687 if (fill_port_info(skb, device, p, sock_net(skb->sk))) { 688 nlmsg_cancel(skb, nlh); 689 goto out; 690 } 691 idx++; 692 nlmsg_end(skb, nlh); 693 } 694 695 out: 696 put_device(&device->dev); 697 cb->args[0] = idx; 698 return skb->len; 699 } 700 701 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 702 struct netlink_ext_ack *extack) 703 { 704 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 705 struct ib_device *device; 706 struct sk_buff *msg; 707 u32 index; 708 int ret; 709 710 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 711 nldev_policy, extack); 712 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 713 return -EINVAL; 714 715 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 716 device = ib_device_get_by_index(index); 717 if (!device) 718 return -EINVAL; 719 720 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 721 if (!msg) { 722 ret = -ENOMEM; 723 goto err; 724 } 725 726 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 727 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 728 0, 0); 729 730 ret = fill_res_info(msg, device); 731 if (ret) 732 goto err_free; 733 734 nlmsg_end(msg, nlh); 735 put_device(&device->dev); 736 return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); 737 738 err_free: 739 nlmsg_free(msg); 740 err: 741 put_device(&device->dev); 742 return ret; 743 } 744 745 static int _nldev_res_get_dumpit(struct ib_device *device, 746 struct sk_buff *skb, 747 struct netlink_callback *cb, 748 unsigned int idx) 749 { 750 int start = cb->args[0]; 751 struct nlmsghdr *nlh; 752 753 if (idx < start) 754 return 0; 755 756 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 757 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 758 0, NLM_F_MULTI); 759 760 if (fill_res_info(skb, device)) { 761 nlmsg_cancel(skb, nlh); 762 goto out; 763 } 764 765 nlmsg_end(skb, nlh); 766 767 idx++; 768 769 out: 770 cb->args[0] = idx; 771 return skb->len; 772 } 773 774 static int nldev_res_get_dumpit(struct sk_buff *skb, 775 struct netlink_callback *cb) 776 { 777 return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb); 778 } 779 780 struct nldev_fill_res_entry { 781 int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb, 782 struct rdma_restrack_entry *res, u32 port); 783 enum rdma_nldev_attr nldev_attr; 784 enum rdma_nldev_command nldev_cmd; 785 }; 786 787 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { 788 [RDMA_RESTRACK_QP] = { 789 .fill_res_func = fill_res_qp_entry, 790 .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET, 791 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, 792 }, 793 [RDMA_RESTRACK_CM_ID] = { 794 .fill_res_func = fill_res_cm_id_entry, 795 .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET, 796 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, 797 }, 798 [RDMA_RESTRACK_CQ] = { 799 .fill_res_func = fill_res_cq_entry, 800 .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET, 801 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, 802 }, 803 [RDMA_RESTRACK_MR] = { 804 .fill_res_func = fill_res_mr_entry, 805 .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET, 806 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, 807 }, 808 [RDMA_RESTRACK_PD] = { 809 .fill_res_func = fill_res_pd_entry, 810 .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET, 811 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, 812 }, 813 }; 814 815 static int res_get_common_dumpit(struct sk_buff *skb, 816 struct netlink_callback *cb, 817 enum rdma_restrack_type res_type) 818 { 819 const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; 820 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; 821 struct rdma_restrack_entry *res; 822 int err, ret = 0, idx = 0; 823 struct nlattr *table_attr; 824 struct ib_device *device; 825 int start = cb->args[0]; 826 struct nlmsghdr *nlh; 827 u32 index, port = 0; 828 bool filled = false; 829 830 err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, 831 nldev_policy, NULL); 832 /* 833 * Right now, we are expecting the device index to get res information, 834 * but it is possible to extend this code to return all devices in 835 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX. 836 * if it doesn't exist, we will iterate over all devices. 837 * 838 * But it is not needed for now. 839 */ 840 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) 841 return -EINVAL; 842 843 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); 844 device = ib_device_get_by_index(index); 845 if (!device) 846 return -EINVAL; 847 848 /* 849 * If no PORT_INDEX is supplied, we will return all QPs from that device 850 */ 851 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { 852 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); 853 if (!rdma_is_port_valid(device, port)) { 854 ret = -EINVAL; 855 goto err_index; 856 } 857 } 858 859 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 860 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd), 861 0, NLM_F_MULTI); 862 863 if (fill_nldev_handle(skb, device)) { 864 ret = -EMSGSIZE; 865 goto err; 866 } 867 868 table_attr = nla_nest_start(skb, fe->nldev_attr); 869 if (!table_attr) { 870 ret = -EMSGSIZE; 871 goto err; 872 } 873 874 down_read(&device->res.rwsem); 875 hash_for_each_possible(device->res.hash, res, node, res_type) { 876 if (idx < start) 877 goto next; 878 879 if ((rdma_is_kernel_res(res) && 880 task_active_pid_ns(current) != &init_pid_ns) || 881 (!rdma_is_kernel_res(res) && task_active_pid_ns(current) != 882 task_active_pid_ns(res->task))) 883 /* 884 * 1. Kern resources should be visible in init 885 * namspace only 886 * 2. Present only resources visible in the current 887 * namespace 888 */ 889 goto next; 890 891 if (!rdma_restrack_get(res)) 892 /* 893 * Resource is under release now, but we are not 894 * relesing lock now, so it will be released in 895 * our next pass, once we will get ->next pointer. 896 */ 897 goto next; 898 899 filled = true; 900 901 up_read(&device->res.rwsem); 902 ret = fe->fill_res_func(skb, cb, res, port); 903 down_read(&device->res.rwsem); 904 /* 905 * Return resource back, but it won't be released till 906 * the &device->res.rwsem will be released for write. 907 */ 908 rdma_restrack_put(res); 909 910 if (ret == -EMSGSIZE) 911 /* 912 * There is a chance to optimize here. 913 * It can be done by using list_prepare_entry 914 * and list_for_each_entry_continue afterwards. 915 */ 916 break; 917 if (ret) 918 goto res_err; 919 next: idx++; 920 } 921 up_read(&device->res.rwsem); 922 923 nla_nest_end(skb, table_attr); 924 nlmsg_end(skb, nlh); 925 cb->args[0] = idx; 926 927 /* 928 * No more entries to fill, cancel the message and 929 * return 0 to mark end of dumpit. 930 */ 931 if (!filled) 932 goto err; 933 934 put_device(&device->dev); 935 return skb->len; 936 937 res_err: 938 nla_nest_cancel(skb, table_attr); 939 up_read(&device->res.rwsem); 940 941 err: 942 nlmsg_cancel(skb, nlh); 943 944 err_index: 945 put_device(&device->dev); 946 return ret; 947 } 948 949 static int nldev_res_get_qp_dumpit(struct sk_buff *skb, 950 struct netlink_callback *cb) 951 { 952 return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP); 953 } 954 955 static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb, 956 struct netlink_callback *cb) 957 { 958 return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID); 959 } 960 961 static int nldev_res_get_cq_dumpit(struct sk_buff *skb, 962 struct netlink_callback *cb) 963 { 964 return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ); 965 } 966 967 static int nldev_res_get_mr_dumpit(struct sk_buff *skb, 968 struct netlink_callback *cb) 969 { 970 return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR); 971 } 972 973 static int nldev_res_get_pd_dumpit(struct sk_buff *skb, 974 struct netlink_callback *cb) 975 { 976 return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD); 977 } 978 979 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { 980 [RDMA_NLDEV_CMD_GET] = { 981 .doit = nldev_get_doit, 982 .dump = nldev_get_dumpit, 983 }, 984 [RDMA_NLDEV_CMD_PORT_GET] = { 985 .doit = nldev_port_get_doit, 986 .dump = nldev_port_get_dumpit, 987 }, 988 [RDMA_NLDEV_CMD_RES_GET] = { 989 .doit = nldev_res_get_doit, 990 .dump = nldev_res_get_dumpit, 991 }, 992 [RDMA_NLDEV_CMD_RES_QP_GET] = { 993 .dump = nldev_res_get_qp_dumpit, 994 /* 995 * .doit is not implemented yet for two reasons: 996 * 1. It is not needed yet. 997 * 2. There is a need to provide identifier, while it is easy 998 * for the QPs (device index + port index + LQPN), it is not 999 * the case for the rest of resources (PD and CQ). Because it 1000 * is better to provide similar interface for all resources, 1001 * let's wait till we will have other resources implemented 1002 * too. 1003 */ 1004 }, 1005 [RDMA_NLDEV_CMD_RES_CM_ID_GET] = { 1006 .dump = nldev_res_get_cm_id_dumpit, 1007 }, 1008 [RDMA_NLDEV_CMD_RES_CQ_GET] = { 1009 .dump = nldev_res_get_cq_dumpit, 1010 }, 1011 [RDMA_NLDEV_CMD_RES_MR_GET] = { 1012 .dump = nldev_res_get_mr_dumpit, 1013 }, 1014 [RDMA_NLDEV_CMD_RES_PD_GET] = { 1015 .dump = nldev_res_get_pd_dumpit, 1016 }, 1017 }; 1018 1019 void __init nldev_init(void) 1020 { 1021 rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); 1022 } 1023 1024 void __exit nldev_exit(void) 1025 { 1026 rdma_nl_unregister(RDMA_NL_NLDEV); 1027 } 1028 1029 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5); 1030