1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Generic netlink support functions to configure an SMC-R PNET table 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/module.h> 13 #include <linux/list.h> 14 #include <linux/ctype.h> 15 #include <net/netlink.h> 16 #include <net/genetlink.h> 17 18 #include <uapi/linux/if.h> 19 #include <uapi/linux/smc.h> 20 21 #include <rdma/ib_verbs.h> 22 23 #include <net/netns/generic.h> 24 #include "smc_netns.h" 25 26 #include "smc_pnet.h" 27 #include "smc_ib.h" 28 #include "smc_ism.h" 29 #include "smc_core.h" 30 31 #define SMC_ASCII_BLANK 32 32 33 static struct net_device *pnet_find_base_ndev(struct net_device *ndev); 34 35 static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 36 [SMC_PNETID_NAME] = { 37 .type = NLA_NUL_STRING, 38 .len = SMC_MAX_PNETID_LEN 39 }, 40 [SMC_PNETID_ETHNAME] = { 41 .type = NLA_NUL_STRING, 42 .len = IFNAMSIZ - 1 43 }, 44 [SMC_PNETID_IBNAME] = { 45 .type = NLA_NUL_STRING, 46 .len = IB_DEVICE_NAME_MAX - 1 47 }, 48 [SMC_PNETID_IBPORT] = { .type = NLA_U8 } 49 }; 50 51 static struct genl_family smc_pnet_nl_family; 52 53 enum smc_pnet_nametype { 54 SMC_PNET_ETH = 1, 55 SMC_PNET_IB = 2, 56 }; 57 58 /* pnet entry stored in pnet table */ 59 struct smc_pnetentry { 60 struct list_head list; 61 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 62 enum smc_pnet_nametype type; 63 union { 64 struct { 65 char eth_name[IFNAMSIZ + 1]; 66 struct net_device *ndev; 67 }; 68 struct { 69 char ib_name[IB_DEVICE_NAME_MAX + 1]; 70 u8 ib_port; 71 }; 72 }; 73 }; 74 75 /* Check if two given pnetids match */ 76 static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) 77 { 78 int i; 79 80 for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { 81 if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && 82 (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) 83 break; 84 if (pnetid1[i] != pnetid2[i]) 85 return false; 86 } 87 return true; 88 } 89 90 /* Remove a pnetid from the pnet table. 91 */ 92 static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) 93 { 94 struct smc_pnetentry *pnetelem, *tmp_pe; 95 struct smc_pnettable *pnettable; 96 struct smc_ib_device *ibdev; 97 struct smcd_dev *smcd_dev; 98 struct smc_net *sn; 99 int rc = -ENOENT; 100 int ibport; 101 102 /* get pnettable for namespace */ 103 sn = net_generic(net, smc_net_id); 104 pnettable = &sn->pnettable; 105 106 /* remove table entry */ 107 write_lock(&pnettable->lock); 108 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, 109 list) { 110 if (!pnet_name || 111 smc_pnet_match(pnetelem->pnet_name, pnet_name)) { 112 list_del(&pnetelem->list); 113 if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) { 114 dev_put(pnetelem->ndev); 115 pr_warn_ratelimited("smc: net device %s " 116 "erased user defined " 117 "pnetid %.16s\n", 118 pnetelem->eth_name, 119 pnetelem->pnet_name); 120 } 121 kfree(pnetelem); 122 rc = 0; 123 } 124 } 125 write_unlock(&pnettable->lock); 126 127 /* if this is not the initial namespace, stop here */ 128 if (net != &init_net) 129 return rc; 130 131 /* remove ib devices */ 132 spin_lock(&smc_ib_devices.lock); 133 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 134 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 135 if (ibdev->pnetid_by_user[ibport] && 136 (!pnet_name || 137 smc_pnet_match(pnet_name, 138 ibdev->pnetid[ibport]))) { 139 pr_warn_ratelimited("smc: ib device %s ibport " 140 "%d erased user defined " 141 "pnetid %.16s\n", 142 ibdev->ibdev->name, 143 ibport + 1, 144 ibdev->pnetid[ibport]); 145 memset(ibdev->pnetid[ibport], 0, 146 SMC_MAX_PNETID_LEN); 147 ibdev->pnetid_by_user[ibport] = false; 148 rc = 0; 149 } 150 } 151 } 152 spin_unlock(&smc_ib_devices.lock); 153 /* remove smcd devices */ 154 spin_lock(&smcd_dev_list.lock); 155 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 156 if (smcd_dev->pnetid_by_user && 157 (!pnet_name || 158 smc_pnet_match(pnet_name, smcd_dev->pnetid))) { 159 pr_warn_ratelimited("smc: smcd device %s " 160 "erased user defined pnetid " 161 "%.16s\n", dev_name(&smcd_dev->dev), 162 smcd_dev->pnetid); 163 memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); 164 smcd_dev->pnetid_by_user = false; 165 rc = 0; 166 } 167 } 168 spin_unlock(&smcd_dev_list.lock); 169 return rc; 170 } 171 172 /* Add the reference to a given network device to the pnet table. 173 */ 174 static int smc_pnet_add_by_ndev(struct net_device *ndev) 175 { 176 struct smc_pnetentry *pnetelem, *tmp_pe; 177 struct smc_pnettable *pnettable; 178 struct net *net = dev_net(ndev); 179 struct smc_net *sn; 180 int rc = -ENOENT; 181 182 /* get pnettable for namespace */ 183 sn = net_generic(net, smc_net_id); 184 pnettable = &sn->pnettable; 185 186 write_lock(&pnettable->lock); 187 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { 188 if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && 189 !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { 190 dev_hold(ndev); 191 pnetelem->ndev = ndev; 192 rc = 0; 193 pr_warn_ratelimited("smc: adding net device %s with " 194 "user defined pnetid %.16s\n", 195 pnetelem->eth_name, 196 pnetelem->pnet_name); 197 break; 198 } 199 } 200 write_unlock(&pnettable->lock); 201 return rc; 202 } 203 204 /* Remove the reference to a given network device from the pnet table. 205 */ 206 static int smc_pnet_remove_by_ndev(struct net_device *ndev) 207 { 208 struct smc_pnetentry *pnetelem, *tmp_pe; 209 struct smc_pnettable *pnettable; 210 struct net *net = dev_net(ndev); 211 struct smc_net *sn; 212 int rc = -ENOENT; 213 214 /* get pnettable for namespace */ 215 sn = net_generic(net, smc_net_id); 216 pnettable = &sn->pnettable; 217 218 write_lock(&pnettable->lock); 219 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { 220 if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { 221 dev_put(pnetelem->ndev); 222 pnetelem->ndev = NULL; 223 rc = 0; 224 pr_warn_ratelimited("smc: removing net device %s with " 225 "user defined pnetid %.16s\n", 226 pnetelem->eth_name, 227 pnetelem->pnet_name); 228 break; 229 } 230 } 231 write_unlock(&pnettable->lock); 232 return rc; 233 } 234 235 /* Apply pnetid to ib device when no pnetid is set. 236 */ 237 static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, 238 char *pnet_name) 239 { 240 u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; 241 bool applied = false; 242 243 spin_lock(&smc_ib_devices.lock); 244 if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { 245 memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, 246 SMC_MAX_PNETID_LEN); 247 ib_dev->pnetid_by_user[ib_port - 1] = true; 248 applied = true; 249 } 250 spin_unlock(&smc_ib_devices.lock); 251 return applied; 252 } 253 254 /* Apply pnetid to smcd device when no pnetid is set. 255 */ 256 static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) 257 { 258 u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; 259 bool applied = false; 260 261 spin_lock(&smcd_dev_list.lock); 262 if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { 263 memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); 264 smcd_dev->pnetid_by_user = true; 265 applied = true; 266 } 267 spin_unlock(&smcd_dev_list.lock); 268 return applied; 269 } 270 271 /* The limit for pnetid is 16 characters. 272 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. 273 * Lower case letters are converted to upper case. 274 * Interior blanks should not be used. 275 */ 276 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) 277 { 278 char *bf = skip_spaces(pnet_name); 279 size_t len = strlen(bf); 280 char *end = bf + len; 281 282 if (!len) 283 return false; 284 while (--end >= bf && isspace(*end)) 285 ; 286 if (end - bf >= SMC_MAX_PNETID_LEN) 287 return false; 288 while (bf <= end) { 289 if (!isalnum(*bf)) 290 return false; 291 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; 292 bf++; 293 } 294 *pnetid = '\0'; 295 return true; 296 } 297 298 /* Find an infiniband device by a given name. The device might not exist. */ 299 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) 300 { 301 struct smc_ib_device *ibdev; 302 303 spin_lock(&smc_ib_devices.lock); 304 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 305 if (!strncmp(ibdev->ibdev->name, ib_name, 306 sizeof(ibdev->ibdev->name)) || 307 !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, 308 IB_DEVICE_NAME_MAX - 1)) { 309 goto out; 310 } 311 } 312 ibdev = NULL; 313 out: 314 spin_unlock(&smc_ib_devices.lock); 315 return ibdev; 316 } 317 318 /* Find an smcd device by a given name. The device might not exist. */ 319 static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) 320 { 321 struct smcd_dev *smcd_dev; 322 323 spin_lock(&smcd_dev_list.lock); 324 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 325 if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, 326 IB_DEVICE_NAME_MAX - 1)) 327 goto out; 328 } 329 smcd_dev = NULL; 330 out: 331 spin_unlock(&smcd_dev_list.lock); 332 return smcd_dev; 333 } 334 335 static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, 336 char *eth_name, char *pnet_name) 337 { 338 struct smc_pnetentry *tmp_pe, *new_pe; 339 struct net_device *ndev, *base_ndev; 340 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 341 bool new_netdev; 342 int rc; 343 344 /* check if (base) netdev already has a pnetid. If there is one, we do 345 * not want to add a pnet table entry 346 */ 347 rc = -EEXIST; 348 ndev = dev_get_by_name(net, eth_name); /* dev_hold() */ 349 if (ndev) { 350 base_ndev = pnet_find_base_ndev(ndev); 351 if (!smc_pnetid_by_dev_port(base_ndev->dev.parent, 352 base_ndev->dev_port, ndev_pnetid)) 353 goto out_put; 354 } 355 356 /* add a new netdev entry to the pnet table if there isn't one */ 357 rc = -ENOMEM; 358 new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); 359 if (!new_pe) 360 goto out_put; 361 new_pe->type = SMC_PNET_ETH; 362 memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); 363 strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); 364 new_pe->ndev = ndev; 365 366 rc = -EEXIST; 367 new_netdev = true; 368 write_lock(&pnettable->lock); 369 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 370 if (tmp_pe->type == SMC_PNET_ETH && 371 !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) { 372 new_netdev = false; 373 break; 374 } 375 } 376 if (new_netdev) { 377 list_add_tail(&new_pe->list, &pnettable->pnetlist); 378 write_unlock(&pnettable->lock); 379 } else { 380 write_unlock(&pnettable->lock); 381 kfree(new_pe); 382 goto out_put; 383 } 384 if (ndev) 385 pr_warn_ratelimited("smc: net device %s " 386 "applied user defined pnetid %.16s\n", 387 new_pe->eth_name, new_pe->pnet_name); 388 return 0; 389 390 out_put: 391 if (ndev) 392 dev_put(ndev); 393 return rc; 394 } 395 396 static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, 397 u8 ib_port, char *pnet_name) 398 { 399 struct smc_pnetentry *tmp_pe, *new_pe; 400 struct smc_ib_device *ib_dev; 401 bool smcddev_applied = true; 402 bool ibdev_applied = true; 403 struct smcd_dev *smcd_dev; 404 bool new_ibdev; 405 406 /* try to apply the pnetid to active devices */ 407 ib_dev = smc_pnet_find_ib(ib_name); 408 if (ib_dev) { 409 ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name); 410 if (ibdev_applied) 411 pr_warn_ratelimited("smc: ib device %s ibport %d " 412 "applied user defined pnetid " 413 "%.16s\n", ib_dev->ibdev->name, 414 ib_port, 415 ib_dev->pnetid[ib_port - 1]); 416 } 417 smcd_dev = smc_pnet_find_smcd(ib_name); 418 if (smcd_dev) { 419 smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name); 420 if (smcddev_applied) 421 pr_warn_ratelimited("smc: smcd device %s " 422 "applied user defined pnetid " 423 "%.16s\n", dev_name(&smcd_dev->dev), 424 smcd_dev->pnetid); 425 } 426 /* Apply fails when a device has a hardware-defined pnetid set, do not 427 * add a pnet table entry in that case. 428 */ 429 if (!ibdev_applied || !smcddev_applied) 430 return -EEXIST; 431 432 /* add a new ib entry to the pnet table if there isn't one */ 433 new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); 434 if (!new_pe) 435 return -ENOMEM; 436 new_pe->type = SMC_PNET_IB; 437 memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); 438 strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX); 439 new_pe->ib_port = ib_port; 440 441 new_ibdev = true; 442 write_lock(&pnettable->lock); 443 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 444 if (tmp_pe->type == SMC_PNET_IB && 445 !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { 446 new_ibdev = false; 447 break; 448 } 449 } 450 if (new_ibdev) { 451 list_add_tail(&new_pe->list, &pnettable->pnetlist); 452 write_unlock(&pnettable->lock); 453 } else { 454 write_unlock(&pnettable->lock); 455 kfree(new_pe); 456 } 457 return (new_ibdev) ? 0 : -EEXIST; 458 } 459 460 /* Append a pnetid to the end of the pnet table if not already on this list. 461 */ 462 static int smc_pnet_enter(struct net *net, struct nlattr *tb[]) 463 { 464 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 465 struct smc_pnettable *pnettable; 466 bool new_netdev = false; 467 bool new_ibdev = false; 468 struct smc_net *sn; 469 u8 ibport = 1; 470 char *string; 471 int rc; 472 473 /* get pnettable for namespace */ 474 sn = net_generic(net, smc_net_id); 475 pnettable = &sn->pnettable; 476 477 rc = -EINVAL; 478 if (!tb[SMC_PNETID_NAME]) 479 goto error; 480 string = (char *)nla_data(tb[SMC_PNETID_NAME]); 481 if (!smc_pnetid_valid(string, pnet_name)) 482 goto error; 483 484 if (tb[SMC_PNETID_ETHNAME]) { 485 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); 486 rc = smc_pnet_add_eth(pnettable, net, string, pnet_name); 487 if (!rc) 488 new_netdev = true; 489 else if (rc != -EEXIST) 490 goto error; 491 } 492 493 /* if this is not the initial namespace, stop here */ 494 if (net != &init_net) 495 return new_netdev ? 0 : -EEXIST; 496 497 rc = -EINVAL; 498 if (tb[SMC_PNETID_IBNAME]) { 499 string = (char *)nla_data(tb[SMC_PNETID_IBNAME]); 500 string = strim(string); 501 if (tb[SMC_PNETID_IBPORT]) { 502 ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]); 503 if (ibport < 1 || ibport > SMC_MAX_PORTS) 504 goto error; 505 } 506 rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name); 507 if (!rc) 508 new_ibdev = true; 509 else if (rc != -EEXIST) 510 goto error; 511 } 512 return (new_netdev || new_ibdev) ? 0 : -EEXIST; 513 514 error: 515 return rc; 516 } 517 518 /* Convert an smc_pnetentry to a netlink attribute sequence */ 519 static int smc_pnet_set_nla(struct sk_buff *msg, 520 struct smc_pnetentry *pnetelem) 521 { 522 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) 523 return -1; 524 if (pnetelem->type == SMC_PNET_ETH) { 525 if (nla_put_string(msg, SMC_PNETID_ETHNAME, 526 pnetelem->eth_name)) 527 return -1; 528 } else { 529 if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) 530 return -1; 531 } 532 if (pnetelem->type == SMC_PNET_IB) { 533 if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) || 534 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) 535 return -1; 536 } else { 537 if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || 538 nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) 539 return -1; 540 } 541 542 return 0; 543 } 544 545 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) 546 { 547 struct net *net = genl_info_net(info); 548 549 return smc_pnet_enter(net, info->attrs); 550 } 551 552 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) 553 { 554 struct net *net = genl_info_net(info); 555 556 if (!info->attrs[SMC_PNETID_NAME]) 557 return -EINVAL; 558 return smc_pnet_remove_by_pnetid(net, 559 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 560 } 561 562 static int smc_pnet_dump_start(struct netlink_callback *cb) 563 { 564 cb->args[0] = 0; 565 return 0; 566 } 567 568 static int smc_pnet_dumpinfo(struct sk_buff *skb, 569 u32 portid, u32 seq, u32 flags, 570 struct smc_pnetentry *pnetelem) 571 { 572 void *hdr; 573 574 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, 575 flags, SMC_PNETID_GET); 576 if (!hdr) 577 return -ENOMEM; 578 if (smc_pnet_set_nla(skb, pnetelem) < 0) { 579 genlmsg_cancel(skb, hdr); 580 return -EMSGSIZE; 581 } 582 genlmsg_end(skb, hdr); 583 return 0; 584 } 585 586 static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, 587 u32 seq, u8 *pnetid, int start_idx) 588 { 589 struct smc_pnettable *pnettable; 590 struct smc_pnetentry *pnetelem; 591 struct smc_net *sn; 592 int idx = 0; 593 594 /* get pnettable for namespace */ 595 sn = net_generic(net, smc_net_id); 596 pnettable = &sn->pnettable; 597 598 /* dump pnettable entries */ 599 read_lock(&pnettable->lock); 600 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 601 if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) 602 continue; 603 if (idx++ < start_idx) 604 continue; 605 /* if this is not the initial namespace, dump only netdev */ 606 if (net != &init_net && pnetelem->type != SMC_PNET_ETH) 607 continue; 608 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 609 pnetelem)) { 610 --idx; 611 break; 612 } 613 } 614 read_unlock(&pnettable->lock); 615 return idx; 616 } 617 618 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) 619 { 620 struct net *net = sock_net(skb->sk); 621 int idx; 622 623 idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, 624 cb->nlh->nlmsg_seq, NULL, cb->args[0]); 625 626 cb->args[0] = idx; 627 return skb->len; 628 } 629 630 /* Retrieve one PNETID entry */ 631 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) 632 { 633 struct net *net = genl_info_net(info); 634 struct sk_buff *msg; 635 void *hdr; 636 637 if (!info->attrs[SMC_PNETID_NAME]) 638 return -EINVAL; 639 640 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 641 if (!msg) 642 return -ENOMEM; 643 644 _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, 645 nla_data(info->attrs[SMC_PNETID_NAME]), 0); 646 647 /* finish multi part message and send it */ 648 hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, 649 NLM_F_MULTI); 650 if (!hdr) { 651 nlmsg_free(msg); 652 return -EMSGSIZE; 653 } 654 return genlmsg_reply(msg, info); 655 } 656 657 /* Remove and delete all pnetids from pnet table. 658 */ 659 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) 660 { 661 struct net *net = genl_info_net(info); 662 663 smc_pnet_remove_by_pnetid(net, NULL); 664 return 0; 665 } 666 667 /* SMC_PNETID generic netlink operation definition */ 668 static const struct genl_ops smc_pnet_ops[] = { 669 { 670 .cmd = SMC_PNETID_GET, 671 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 672 /* can be retrieved by unprivileged users */ 673 .doit = smc_pnet_get, 674 .dumpit = smc_pnet_dump, 675 .start = smc_pnet_dump_start 676 }, 677 { 678 .cmd = SMC_PNETID_ADD, 679 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 680 .flags = GENL_ADMIN_PERM, 681 .doit = smc_pnet_add 682 }, 683 { 684 .cmd = SMC_PNETID_DEL, 685 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 686 .flags = GENL_ADMIN_PERM, 687 .doit = smc_pnet_del 688 }, 689 { 690 .cmd = SMC_PNETID_FLUSH, 691 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 692 .flags = GENL_ADMIN_PERM, 693 .doit = smc_pnet_flush 694 } 695 }; 696 697 /* SMC_PNETID family definition */ 698 static struct genl_family smc_pnet_nl_family __ro_after_init = { 699 .hdrsize = 0, 700 .name = SMCR_GENL_FAMILY_NAME, 701 .version = SMCR_GENL_FAMILY_VERSION, 702 .maxattr = SMC_PNETID_MAX, 703 .policy = smc_pnet_policy, 704 .netnsok = true, 705 .module = THIS_MODULE, 706 .ops = smc_pnet_ops, 707 .n_ops = ARRAY_SIZE(smc_pnet_ops) 708 }; 709 710 static int smc_pnet_netdev_event(struct notifier_block *this, 711 unsigned long event, void *ptr) 712 { 713 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 714 715 switch (event) { 716 case NETDEV_REBOOT: 717 case NETDEV_UNREGISTER: 718 smc_pnet_remove_by_ndev(event_dev); 719 return NOTIFY_OK; 720 case NETDEV_REGISTER: 721 smc_pnet_add_by_ndev(event_dev); 722 return NOTIFY_OK; 723 default: 724 return NOTIFY_DONE; 725 } 726 } 727 728 static struct notifier_block smc_netdev_notifier = { 729 .notifier_call = smc_pnet_netdev_event 730 }; 731 732 /* init network namespace */ 733 int smc_pnet_net_init(struct net *net) 734 { 735 struct smc_net *sn = net_generic(net, smc_net_id); 736 struct smc_pnettable *pnettable = &sn->pnettable; 737 738 INIT_LIST_HEAD(&pnettable->pnetlist); 739 rwlock_init(&pnettable->lock); 740 741 return 0; 742 } 743 744 int __init smc_pnet_init(void) 745 { 746 int rc; 747 748 rc = genl_register_family(&smc_pnet_nl_family); 749 if (rc) 750 return rc; 751 rc = register_netdevice_notifier(&smc_netdev_notifier); 752 if (rc) 753 genl_unregister_family(&smc_pnet_nl_family); 754 return rc; 755 } 756 757 /* exit network namespace */ 758 void smc_pnet_net_exit(struct net *net) 759 { 760 /* flush pnet table */ 761 smc_pnet_remove_by_pnetid(net, NULL); 762 } 763 764 void smc_pnet_exit(void) 765 { 766 unregister_netdevice_notifier(&smc_netdev_notifier); 767 genl_unregister_family(&smc_pnet_nl_family); 768 } 769 770 /* Determine one base device for stacked net devices. 771 * If the lower device level contains more than one devices 772 * (for instance with bonding slaves), just the first device 773 * is used to reach a base device. 774 */ 775 static struct net_device *pnet_find_base_ndev(struct net_device *ndev) 776 { 777 int i, nest_lvl; 778 779 rtnl_lock(); 780 nest_lvl = ndev->lower_level; 781 for (i = 0; i < nest_lvl; i++) { 782 struct list_head *lower = &ndev->adj_list.lower; 783 784 if (list_empty(lower)) 785 break; 786 lower = lower->next; 787 ndev = netdev_lower_get_next(ndev, &lower); 788 } 789 rtnl_unlock(); 790 return ndev; 791 } 792 793 static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, 794 u8 *pnetid) 795 { 796 struct smc_pnettable *pnettable; 797 struct net *net = dev_net(ndev); 798 struct smc_pnetentry *pnetelem; 799 struct smc_net *sn; 800 int rc = -ENOENT; 801 802 /* get pnettable for namespace */ 803 sn = net_generic(net, smc_net_id); 804 pnettable = &sn->pnettable; 805 806 read_lock(&pnettable->lock); 807 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 808 if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) { 809 /* get pnetid of netdev device */ 810 memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 811 rc = 0; 812 break; 813 } 814 } 815 read_unlock(&pnettable->lock); 816 return rc; 817 } 818 819 /* find a roce device for the given pnetid */ 820 static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, 821 struct smc_init_info *ini, 822 struct smc_ib_device *known_dev) 823 { 824 struct smc_ib_device *ibdev; 825 int i; 826 827 ini->ib_dev = NULL; 828 spin_lock(&smc_ib_devices.lock); 829 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 830 if (ibdev == known_dev) 831 continue; 832 for (i = 1; i <= SMC_MAX_PORTS; i++) { 833 if (!rdma_is_port_valid(ibdev->ibdev, i)) 834 continue; 835 if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) && 836 smc_ib_port_active(ibdev, i) && 837 !test_bit(i - 1, ibdev->ports_going_away) && 838 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 839 ini->ib_gid, NULL)) { 840 ini->ib_dev = ibdev; 841 ini->ib_port = i; 842 goto out; 843 } 844 } 845 } 846 out: 847 spin_unlock(&smc_ib_devices.lock); 848 } 849 850 /* find alternate roce device with same pnet_id and vlan_id */ 851 void smc_pnet_find_alt_roce(struct smc_link_group *lgr, 852 struct smc_init_info *ini, 853 struct smc_ib_device *known_dev) 854 { 855 _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev); 856 } 857 858 /* if handshake network device belongs to a roce device, return its 859 * IB device and port 860 */ 861 static void smc_pnet_find_rdma_dev(struct net_device *netdev, 862 struct smc_init_info *ini) 863 { 864 struct smc_ib_device *ibdev; 865 866 spin_lock(&smc_ib_devices.lock); 867 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 868 struct net_device *ndev; 869 int i; 870 871 for (i = 1; i <= SMC_MAX_PORTS; i++) { 872 if (!rdma_is_port_valid(ibdev->ibdev, i)) 873 continue; 874 if (!ibdev->ibdev->ops.get_netdev) 875 continue; 876 ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); 877 if (!ndev) 878 continue; 879 dev_put(ndev); 880 if (netdev == ndev && 881 smc_ib_port_active(ibdev, i) && 882 !test_bit(i - 1, ibdev->ports_going_away) && 883 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 884 ini->ib_gid, NULL)) { 885 ini->ib_dev = ibdev; 886 ini->ib_port = i; 887 break; 888 } 889 } 890 } 891 spin_unlock(&smc_ib_devices.lock); 892 } 893 894 /* Determine the corresponding IB device port based on the hardware PNETID. 895 * Searching stops at the first matching active IB device port with vlan_id 896 * configured. 897 * If nothing found, check pnetid table. 898 * If nothing found, try to use handshake device 899 */ 900 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, 901 struct smc_init_info *ini) 902 { 903 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 904 905 ndev = pnet_find_base_ndev(ndev); 906 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 907 ndev_pnetid) && 908 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { 909 smc_pnet_find_rdma_dev(ndev, ini); 910 return; /* pnetid could not be determined */ 911 } 912 _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL); 913 } 914 915 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, 916 struct smc_init_info *ini) 917 { 918 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 919 struct smcd_dev *ismdev; 920 921 ndev = pnet_find_base_ndev(ndev); 922 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 923 ndev_pnetid) && 924 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) 925 return; /* pnetid could not be determined */ 926 927 spin_lock(&smcd_dev_list.lock); 928 list_for_each_entry(ismdev, &smcd_dev_list.list, list) { 929 if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && 930 !ismdev->going_away) { 931 ini->ism_dev = ismdev; 932 break; 933 } 934 } 935 spin_unlock(&smcd_dev_list.lock); 936 } 937 938 /* PNET table analysis for a given sock: 939 * determine ib_device and port belonging to used internal TCP socket 940 * ethernet interface. 941 */ 942 void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) 943 { 944 struct dst_entry *dst = sk_dst_get(sk); 945 946 ini->ib_dev = NULL; 947 ini->ib_port = 0; 948 if (!dst) 949 goto out; 950 if (!dst->dev) 951 goto out_rel; 952 953 smc_pnet_find_roce_by_pnetid(dst->dev, ini); 954 955 out_rel: 956 dst_release(dst); 957 out: 958 return; 959 } 960 961 void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) 962 { 963 struct dst_entry *dst = sk_dst_get(sk); 964 965 ini->ism_dev = NULL; 966 if (!dst) 967 goto out; 968 if (!dst->dev) 969 goto out_rel; 970 971 smc_pnet_find_ism_by_pnetid(dst->dev, ini); 972 973 out_rel: 974 dst_release(dst); 975 out: 976 return; 977 } 978 979 /* Lookup and apply a pnet table entry to the given ib device. 980 */ 981 int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) 982 { 983 char *ib_name = smcibdev->ibdev->name; 984 struct smc_pnettable *pnettable; 985 struct smc_pnetentry *tmp_pe; 986 struct smc_net *sn; 987 int rc = -ENOENT; 988 989 /* get pnettable for init namespace */ 990 sn = net_generic(&init_net, smc_net_id); 991 pnettable = &sn->pnettable; 992 993 read_lock(&pnettable->lock); 994 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 995 if (tmp_pe->type == SMC_PNET_IB && 996 !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) && 997 tmp_pe->ib_port == ib_port) { 998 smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name); 999 rc = 0; 1000 break; 1001 } 1002 } 1003 read_unlock(&pnettable->lock); 1004 1005 return rc; 1006 } 1007 1008 /* Lookup and apply a pnet table entry to the given smcd device. 1009 */ 1010 int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) 1011 { 1012 const char *ib_name = dev_name(&smcddev->dev); 1013 struct smc_pnettable *pnettable; 1014 struct smc_pnetentry *tmp_pe; 1015 struct smc_net *sn; 1016 int rc = -ENOENT; 1017 1018 /* get pnettable for init namespace */ 1019 sn = net_generic(&init_net, smc_net_id); 1020 pnettable = &sn->pnettable; 1021 1022 read_lock(&pnettable->lock); 1023 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 1024 if (tmp_pe->type == SMC_PNET_IB && 1025 !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { 1026 smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name); 1027 rc = 0; 1028 break; 1029 } 1030 } 1031 read_unlock(&pnettable->lock); 1032 1033 return rc; 1034 } 1035