1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Generic netlink support functions to configure an SMC-R PNET table 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/module.h> 13 #include <linux/list.h> 14 #include <linux/ctype.h> 15 #include <linux/mutex.h> 16 #include <net/netlink.h> 17 #include <net/genetlink.h> 18 19 #include <uapi/linux/if.h> 20 #include <uapi/linux/smc.h> 21 22 #include <rdma/ib_verbs.h> 23 24 #include <net/netns/generic.h> 25 #include "smc_netns.h" 26 27 #include "smc_pnet.h" 28 #include "smc_ib.h" 29 #include "smc_ism.h" 30 #include "smc_core.h" 31 32 #define SMC_ASCII_BLANK 32 33 34 static struct net_device *pnet_find_base_ndev(struct net_device *ndev); 35 36 static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 37 [SMC_PNETID_NAME] = { 38 .type = NLA_NUL_STRING, 39 .len = SMC_MAX_PNETID_LEN 40 }, 41 [SMC_PNETID_ETHNAME] = { 42 .type = NLA_NUL_STRING, 43 .len = IFNAMSIZ - 1 44 }, 45 [SMC_PNETID_IBNAME] = { 46 .type = NLA_NUL_STRING, 47 .len = IB_DEVICE_NAME_MAX - 1 48 }, 49 [SMC_PNETID_IBPORT] = { .type = NLA_U8 } 50 }; 51 52 static struct genl_family smc_pnet_nl_family; 53 54 enum smc_pnet_nametype { 55 SMC_PNET_ETH = 1, 56 SMC_PNET_IB = 2, 57 }; 58 59 /* pnet entry stored in pnet table */ 60 struct smc_pnetentry { 61 struct list_head list; 62 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 63 enum smc_pnet_nametype type; 64 union { 65 struct { 66 char eth_name[IFNAMSIZ + 1]; 67 struct net_device *ndev; 68 }; 69 struct { 70 char ib_name[IB_DEVICE_NAME_MAX + 1]; 71 u8 ib_port; 72 }; 73 }; 74 }; 75 76 /* Check if two given pnetids match */ 77 static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) 78 { 79 int i; 80 81 for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { 82 if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && 83 (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) 84 break; 85 if (pnetid1[i] != pnetid2[i]) 86 return false; 87 } 88 return true; 89 } 90 91 /* Remove a pnetid from the pnet table. 92 */ 93 static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) 94 { 95 struct smc_pnetentry *pnetelem, *tmp_pe; 96 struct smc_pnettable *pnettable; 97 struct smc_ib_device *ibdev; 98 struct smcd_dev *smcd_dev; 99 struct smc_net *sn; 100 int rc = -ENOENT; 101 int ibport; 102 103 /* get pnettable for namespace */ 104 sn = net_generic(net, smc_net_id); 105 pnettable = &sn->pnettable; 106 107 /* remove table entry */ 108 write_lock(&pnettable->lock); 109 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, 110 list) { 111 if (!pnet_name || 112 smc_pnet_match(pnetelem->pnet_name, pnet_name)) { 113 list_del(&pnetelem->list); 114 if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) { 115 dev_put(pnetelem->ndev); 116 pr_warn_ratelimited("smc: net device %s " 117 "erased user defined " 118 "pnetid %.16s\n", 119 pnetelem->eth_name, 120 pnetelem->pnet_name); 121 } 122 kfree(pnetelem); 123 rc = 0; 124 } 125 } 126 write_unlock(&pnettable->lock); 127 128 /* if this is not the initial namespace, stop here */ 129 if (net != &init_net) 130 return rc; 131 132 /* remove ib devices */ 133 mutex_lock(&smc_ib_devices.mutex); 134 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 135 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 136 if (ibdev->pnetid_by_user[ibport] && 137 (!pnet_name || 138 smc_pnet_match(pnet_name, 139 ibdev->pnetid[ibport]))) { 140 pr_warn_ratelimited("smc: ib device %s ibport " 141 "%d erased user defined " 142 "pnetid %.16s\n", 143 ibdev->ibdev->name, 144 ibport + 1, 145 ibdev->pnetid[ibport]); 146 memset(ibdev->pnetid[ibport], 0, 147 SMC_MAX_PNETID_LEN); 148 ibdev->pnetid_by_user[ibport] = false; 149 rc = 0; 150 } 151 } 152 } 153 mutex_unlock(&smc_ib_devices.mutex); 154 /* remove smcd devices */ 155 mutex_lock(&smcd_dev_list.mutex); 156 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 157 if (smcd_dev->pnetid_by_user && 158 (!pnet_name || 159 smc_pnet_match(pnet_name, smcd_dev->pnetid))) { 160 pr_warn_ratelimited("smc: smcd device %s " 161 "erased user defined pnetid " 162 "%.16s\n", dev_name(&smcd_dev->dev), 163 smcd_dev->pnetid); 164 memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); 165 smcd_dev->pnetid_by_user = false; 166 rc = 0; 167 } 168 } 169 mutex_unlock(&smcd_dev_list.mutex); 170 return rc; 171 } 172 173 /* Add the reference to a given network device to the pnet table. 174 */ 175 static int smc_pnet_add_by_ndev(struct net_device *ndev) 176 { 177 struct smc_pnetentry *pnetelem, *tmp_pe; 178 struct smc_pnettable *pnettable; 179 struct net *net = dev_net(ndev); 180 struct smc_net *sn; 181 int rc = -ENOENT; 182 183 /* get pnettable for namespace */ 184 sn = net_generic(net, smc_net_id); 185 pnettable = &sn->pnettable; 186 187 write_lock(&pnettable->lock); 188 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { 189 if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && 190 !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { 191 dev_hold(ndev); 192 pnetelem->ndev = ndev; 193 rc = 0; 194 pr_warn_ratelimited("smc: adding net device %s with " 195 "user defined pnetid %.16s\n", 196 pnetelem->eth_name, 197 pnetelem->pnet_name); 198 break; 199 } 200 } 201 write_unlock(&pnettable->lock); 202 return rc; 203 } 204 205 /* Remove the reference to a given network device from the pnet table. 206 */ 207 static int smc_pnet_remove_by_ndev(struct net_device *ndev) 208 { 209 struct smc_pnetentry *pnetelem, *tmp_pe; 210 struct smc_pnettable *pnettable; 211 struct net *net = dev_net(ndev); 212 struct smc_net *sn; 213 int rc = -ENOENT; 214 215 /* get pnettable for namespace */ 216 sn = net_generic(net, smc_net_id); 217 pnettable = &sn->pnettable; 218 219 write_lock(&pnettable->lock); 220 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { 221 if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { 222 dev_put(pnetelem->ndev); 223 pnetelem->ndev = NULL; 224 rc = 0; 225 pr_warn_ratelimited("smc: removing net device %s with " 226 "user defined pnetid %.16s\n", 227 pnetelem->eth_name, 228 pnetelem->pnet_name); 229 break; 230 } 231 } 232 write_unlock(&pnettable->lock); 233 return rc; 234 } 235 236 /* Apply pnetid to ib device when no pnetid is set. 237 */ 238 static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, 239 char *pnet_name) 240 { 241 u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; 242 bool applied = false; 243 244 mutex_lock(&smc_ib_devices.mutex); 245 if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { 246 memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, 247 SMC_MAX_PNETID_LEN); 248 ib_dev->pnetid_by_user[ib_port - 1] = true; 249 applied = true; 250 } 251 mutex_unlock(&smc_ib_devices.mutex); 252 return applied; 253 } 254 255 /* Apply pnetid to smcd device when no pnetid is set. 256 */ 257 static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) 258 { 259 u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; 260 bool applied = false; 261 262 mutex_lock(&smcd_dev_list.mutex); 263 if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { 264 memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); 265 smcd_dev->pnetid_by_user = true; 266 applied = true; 267 } 268 mutex_unlock(&smcd_dev_list.mutex); 269 return applied; 270 } 271 272 /* The limit for pnetid is 16 characters. 273 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. 274 * Lower case letters are converted to upper case. 275 * Interior blanks should not be used. 276 */ 277 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) 278 { 279 char *bf = skip_spaces(pnet_name); 280 size_t len = strlen(bf); 281 char *end = bf + len; 282 283 if (!len) 284 return false; 285 while (--end >= bf && isspace(*end)) 286 ; 287 if (end - bf >= SMC_MAX_PNETID_LEN) 288 return false; 289 while (bf <= end) { 290 if (!isalnum(*bf)) 291 return false; 292 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; 293 bf++; 294 } 295 *pnetid = '\0'; 296 return true; 297 } 298 299 /* Find an infiniband device by a given name. The device might not exist. */ 300 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) 301 { 302 struct smc_ib_device *ibdev; 303 304 mutex_lock(&smc_ib_devices.mutex); 305 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 306 if (!strncmp(ibdev->ibdev->name, ib_name, 307 sizeof(ibdev->ibdev->name)) || 308 !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, 309 IB_DEVICE_NAME_MAX - 1)) { 310 goto out; 311 } 312 } 313 ibdev = NULL; 314 out: 315 mutex_unlock(&smc_ib_devices.mutex); 316 return ibdev; 317 } 318 319 /* Find an smcd device by a given name. The device might not exist. */ 320 static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) 321 { 322 struct smcd_dev *smcd_dev; 323 324 mutex_lock(&smcd_dev_list.mutex); 325 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 326 if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, 327 IB_DEVICE_NAME_MAX - 1)) 328 goto out; 329 } 330 smcd_dev = NULL; 331 out: 332 mutex_unlock(&smcd_dev_list.mutex); 333 return smcd_dev; 334 } 335 336 static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, 337 char *eth_name, char *pnet_name) 338 { 339 struct smc_pnetentry *tmp_pe, *new_pe; 340 struct net_device *ndev, *base_ndev; 341 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 342 bool new_netdev; 343 int rc; 344 345 /* check if (base) netdev already has a pnetid. If there is one, we do 346 * not want to add a pnet table entry 347 */ 348 rc = -EEXIST; 349 ndev = dev_get_by_name(net, eth_name); /* dev_hold() */ 350 if (ndev) { 351 base_ndev = pnet_find_base_ndev(ndev); 352 if (!smc_pnetid_by_dev_port(base_ndev->dev.parent, 353 base_ndev->dev_port, ndev_pnetid)) 354 goto out_put; 355 } 356 357 /* add a new netdev entry to the pnet table if there isn't one */ 358 rc = -ENOMEM; 359 new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); 360 if (!new_pe) 361 goto out_put; 362 new_pe->type = SMC_PNET_ETH; 363 memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); 364 strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); 365 new_pe->ndev = ndev; 366 367 rc = -EEXIST; 368 new_netdev = true; 369 write_lock(&pnettable->lock); 370 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 371 if (tmp_pe->type == SMC_PNET_ETH && 372 !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) { 373 new_netdev = false; 374 break; 375 } 376 } 377 if (new_netdev) { 378 list_add_tail(&new_pe->list, &pnettable->pnetlist); 379 write_unlock(&pnettable->lock); 380 } else { 381 write_unlock(&pnettable->lock); 382 kfree(new_pe); 383 goto out_put; 384 } 385 if (ndev) 386 pr_warn_ratelimited("smc: net device %s " 387 "applied user defined pnetid %.16s\n", 388 new_pe->eth_name, new_pe->pnet_name); 389 return 0; 390 391 out_put: 392 if (ndev) 393 dev_put(ndev); 394 return rc; 395 } 396 397 static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, 398 u8 ib_port, char *pnet_name) 399 { 400 struct smc_pnetentry *tmp_pe, *new_pe; 401 struct smc_ib_device *ib_dev; 402 bool smcddev_applied = true; 403 bool ibdev_applied = true; 404 struct smcd_dev *smcd_dev; 405 bool new_ibdev; 406 407 /* try to apply the pnetid to active devices */ 408 ib_dev = smc_pnet_find_ib(ib_name); 409 if (ib_dev) { 410 ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name); 411 if (ibdev_applied) 412 pr_warn_ratelimited("smc: ib device %s ibport %d " 413 "applied user defined pnetid " 414 "%.16s\n", ib_dev->ibdev->name, 415 ib_port, 416 ib_dev->pnetid[ib_port - 1]); 417 } 418 smcd_dev = smc_pnet_find_smcd(ib_name); 419 if (smcd_dev) { 420 smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name); 421 if (smcddev_applied) 422 pr_warn_ratelimited("smc: smcd device %s " 423 "applied user defined pnetid " 424 "%.16s\n", dev_name(&smcd_dev->dev), 425 smcd_dev->pnetid); 426 } 427 /* Apply fails when a device has a hardware-defined pnetid set, do not 428 * add a pnet table entry in that case. 429 */ 430 if (!ibdev_applied || !smcddev_applied) 431 return -EEXIST; 432 433 /* add a new ib entry to the pnet table if there isn't one */ 434 new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); 435 if (!new_pe) 436 return -ENOMEM; 437 new_pe->type = SMC_PNET_IB; 438 memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); 439 strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX); 440 new_pe->ib_port = ib_port; 441 442 new_ibdev = true; 443 write_lock(&pnettable->lock); 444 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 445 if (tmp_pe->type == SMC_PNET_IB && 446 !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { 447 new_ibdev = false; 448 break; 449 } 450 } 451 if (new_ibdev) { 452 list_add_tail(&new_pe->list, &pnettable->pnetlist); 453 write_unlock(&pnettable->lock); 454 } else { 455 write_unlock(&pnettable->lock); 456 kfree(new_pe); 457 } 458 return (new_ibdev) ? 0 : -EEXIST; 459 } 460 461 /* Append a pnetid to the end of the pnet table if not already on this list. 462 */ 463 static int smc_pnet_enter(struct net *net, struct nlattr *tb[]) 464 { 465 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 466 struct smc_pnettable *pnettable; 467 bool new_netdev = false; 468 bool new_ibdev = false; 469 struct smc_net *sn; 470 u8 ibport = 1; 471 char *string; 472 int rc; 473 474 /* get pnettable for namespace */ 475 sn = net_generic(net, smc_net_id); 476 pnettable = &sn->pnettable; 477 478 rc = -EINVAL; 479 if (!tb[SMC_PNETID_NAME]) 480 goto error; 481 string = (char *)nla_data(tb[SMC_PNETID_NAME]); 482 if (!smc_pnetid_valid(string, pnet_name)) 483 goto error; 484 485 if (tb[SMC_PNETID_ETHNAME]) { 486 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); 487 rc = smc_pnet_add_eth(pnettable, net, string, pnet_name); 488 if (!rc) 489 new_netdev = true; 490 else if (rc != -EEXIST) 491 goto error; 492 } 493 494 /* if this is not the initial namespace, stop here */ 495 if (net != &init_net) 496 return new_netdev ? 0 : -EEXIST; 497 498 rc = -EINVAL; 499 if (tb[SMC_PNETID_IBNAME]) { 500 string = (char *)nla_data(tb[SMC_PNETID_IBNAME]); 501 string = strim(string); 502 if (tb[SMC_PNETID_IBPORT]) { 503 ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]); 504 if (ibport < 1 || ibport > SMC_MAX_PORTS) 505 goto error; 506 } 507 rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name); 508 if (!rc) 509 new_ibdev = true; 510 else if (rc != -EEXIST) 511 goto error; 512 } 513 return (new_netdev || new_ibdev) ? 0 : -EEXIST; 514 515 error: 516 return rc; 517 } 518 519 /* Convert an smc_pnetentry to a netlink attribute sequence */ 520 static int smc_pnet_set_nla(struct sk_buff *msg, 521 struct smc_pnetentry *pnetelem) 522 { 523 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) 524 return -1; 525 if (pnetelem->type == SMC_PNET_ETH) { 526 if (nla_put_string(msg, SMC_PNETID_ETHNAME, 527 pnetelem->eth_name)) 528 return -1; 529 } else { 530 if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) 531 return -1; 532 } 533 if (pnetelem->type == SMC_PNET_IB) { 534 if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) || 535 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) 536 return -1; 537 } else { 538 if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || 539 nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) 540 return -1; 541 } 542 543 return 0; 544 } 545 546 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) 547 { 548 struct net *net = genl_info_net(info); 549 550 return smc_pnet_enter(net, info->attrs); 551 } 552 553 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) 554 { 555 struct net *net = genl_info_net(info); 556 557 if (!info->attrs[SMC_PNETID_NAME]) 558 return -EINVAL; 559 return smc_pnet_remove_by_pnetid(net, 560 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 561 } 562 563 static int smc_pnet_dump_start(struct netlink_callback *cb) 564 { 565 cb->args[0] = 0; 566 return 0; 567 } 568 569 static int smc_pnet_dumpinfo(struct sk_buff *skb, 570 u32 portid, u32 seq, u32 flags, 571 struct smc_pnetentry *pnetelem) 572 { 573 void *hdr; 574 575 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, 576 flags, SMC_PNETID_GET); 577 if (!hdr) 578 return -ENOMEM; 579 if (smc_pnet_set_nla(skb, pnetelem) < 0) { 580 genlmsg_cancel(skb, hdr); 581 return -EMSGSIZE; 582 } 583 genlmsg_end(skb, hdr); 584 return 0; 585 } 586 587 static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, 588 u32 seq, u8 *pnetid, int start_idx) 589 { 590 struct smc_pnettable *pnettable; 591 struct smc_pnetentry *pnetelem; 592 struct smc_net *sn; 593 int idx = 0; 594 595 /* get pnettable for namespace */ 596 sn = net_generic(net, smc_net_id); 597 pnettable = &sn->pnettable; 598 599 /* dump pnettable entries */ 600 read_lock(&pnettable->lock); 601 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 602 if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) 603 continue; 604 if (idx++ < start_idx) 605 continue; 606 /* if this is not the initial namespace, dump only netdev */ 607 if (net != &init_net && pnetelem->type != SMC_PNET_ETH) 608 continue; 609 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 610 pnetelem)) { 611 --idx; 612 break; 613 } 614 } 615 read_unlock(&pnettable->lock); 616 return idx; 617 } 618 619 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) 620 { 621 struct net *net = sock_net(skb->sk); 622 int idx; 623 624 idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, 625 cb->nlh->nlmsg_seq, NULL, cb->args[0]); 626 627 cb->args[0] = idx; 628 return skb->len; 629 } 630 631 /* Retrieve one PNETID entry */ 632 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) 633 { 634 struct net *net = genl_info_net(info); 635 struct sk_buff *msg; 636 void *hdr; 637 638 if (!info->attrs[SMC_PNETID_NAME]) 639 return -EINVAL; 640 641 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 642 if (!msg) 643 return -ENOMEM; 644 645 _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, 646 nla_data(info->attrs[SMC_PNETID_NAME]), 0); 647 648 /* finish multi part message and send it */ 649 hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, 650 NLM_F_MULTI); 651 if (!hdr) { 652 nlmsg_free(msg); 653 return -EMSGSIZE; 654 } 655 return genlmsg_reply(msg, info); 656 } 657 658 /* Remove and delete all pnetids from pnet table. 659 */ 660 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) 661 { 662 struct net *net = genl_info_net(info); 663 664 smc_pnet_remove_by_pnetid(net, NULL); 665 return 0; 666 } 667 668 /* SMC_PNETID generic netlink operation definition */ 669 static const struct genl_ops smc_pnet_ops[] = { 670 { 671 .cmd = SMC_PNETID_GET, 672 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 673 /* can be retrieved by unprivileged users */ 674 .doit = smc_pnet_get, 675 .dumpit = smc_pnet_dump, 676 .start = smc_pnet_dump_start 677 }, 678 { 679 .cmd = SMC_PNETID_ADD, 680 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 681 .flags = GENL_ADMIN_PERM, 682 .doit = smc_pnet_add 683 }, 684 { 685 .cmd = SMC_PNETID_DEL, 686 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 687 .flags = GENL_ADMIN_PERM, 688 .doit = smc_pnet_del 689 }, 690 { 691 .cmd = SMC_PNETID_FLUSH, 692 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 693 .flags = GENL_ADMIN_PERM, 694 .doit = smc_pnet_flush 695 } 696 }; 697 698 /* SMC_PNETID family definition */ 699 static struct genl_family smc_pnet_nl_family __ro_after_init = { 700 .hdrsize = 0, 701 .name = SMCR_GENL_FAMILY_NAME, 702 .version = SMCR_GENL_FAMILY_VERSION, 703 .maxattr = SMC_PNETID_MAX, 704 .policy = smc_pnet_policy, 705 .netnsok = true, 706 .module = THIS_MODULE, 707 .ops = smc_pnet_ops, 708 .n_ops = ARRAY_SIZE(smc_pnet_ops) 709 }; 710 711 static int smc_pnet_netdev_event(struct notifier_block *this, 712 unsigned long event, void *ptr) 713 { 714 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 715 716 switch (event) { 717 case NETDEV_REBOOT: 718 case NETDEV_UNREGISTER: 719 smc_pnet_remove_by_ndev(event_dev); 720 return NOTIFY_OK; 721 case NETDEV_REGISTER: 722 smc_pnet_add_by_ndev(event_dev); 723 return NOTIFY_OK; 724 default: 725 return NOTIFY_DONE; 726 } 727 } 728 729 static struct notifier_block smc_netdev_notifier = { 730 .notifier_call = smc_pnet_netdev_event 731 }; 732 733 /* init network namespace */ 734 int smc_pnet_net_init(struct net *net) 735 { 736 struct smc_net *sn = net_generic(net, smc_net_id); 737 struct smc_pnettable *pnettable = &sn->pnettable; 738 739 INIT_LIST_HEAD(&pnettable->pnetlist); 740 rwlock_init(&pnettable->lock); 741 742 return 0; 743 } 744 745 int __init smc_pnet_init(void) 746 { 747 int rc; 748 749 rc = genl_register_family(&smc_pnet_nl_family); 750 if (rc) 751 return rc; 752 rc = register_netdevice_notifier(&smc_netdev_notifier); 753 if (rc) 754 genl_unregister_family(&smc_pnet_nl_family); 755 return rc; 756 } 757 758 /* exit network namespace */ 759 void smc_pnet_net_exit(struct net *net) 760 { 761 /* flush pnet table */ 762 smc_pnet_remove_by_pnetid(net, NULL); 763 } 764 765 void smc_pnet_exit(void) 766 { 767 unregister_netdevice_notifier(&smc_netdev_notifier); 768 genl_unregister_family(&smc_pnet_nl_family); 769 } 770 771 /* Determine one base device for stacked net devices. 772 * If the lower device level contains more than one devices 773 * (for instance with bonding slaves), just the first device 774 * is used to reach a base device. 775 */ 776 static struct net_device *pnet_find_base_ndev(struct net_device *ndev) 777 { 778 int i, nest_lvl; 779 780 rtnl_lock(); 781 nest_lvl = ndev->lower_level; 782 for (i = 0; i < nest_lvl; i++) { 783 struct list_head *lower = &ndev->adj_list.lower; 784 785 if (list_empty(lower)) 786 break; 787 lower = lower->next; 788 ndev = netdev_lower_get_next(ndev, &lower); 789 } 790 rtnl_unlock(); 791 return ndev; 792 } 793 794 static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, 795 u8 *pnetid) 796 { 797 struct smc_pnettable *pnettable; 798 struct net *net = dev_net(ndev); 799 struct smc_pnetentry *pnetelem; 800 struct smc_net *sn; 801 int rc = -ENOENT; 802 803 /* get pnettable for namespace */ 804 sn = net_generic(net, smc_net_id); 805 pnettable = &sn->pnettable; 806 807 read_lock(&pnettable->lock); 808 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 809 if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) { 810 /* get pnetid of netdev device */ 811 memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 812 rc = 0; 813 break; 814 } 815 } 816 read_unlock(&pnettable->lock); 817 return rc; 818 } 819 820 /* find a roce device for the given pnetid */ 821 static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, 822 struct smc_init_info *ini, 823 struct smc_ib_device *known_dev) 824 { 825 struct smc_ib_device *ibdev; 826 int i; 827 828 ini->ib_dev = NULL; 829 mutex_lock(&smc_ib_devices.mutex); 830 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 831 if (ibdev == known_dev) 832 continue; 833 for (i = 1; i <= SMC_MAX_PORTS; i++) { 834 if (!rdma_is_port_valid(ibdev->ibdev, i)) 835 continue; 836 if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) && 837 smc_ib_port_active(ibdev, i) && 838 !test_bit(i - 1, ibdev->ports_going_away) && 839 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 840 ini->ib_gid, NULL)) { 841 ini->ib_dev = ibdev; 842 ini->ib_port = i; 843 goto out; 844 } 845 } 846 } 847 out: 848 mutex_unlock(&smc_ib_devices.mutex); 849 } 850 851 /* find alternate roce device with same pnet_id and vlan_id */ 852 void smc_pnet_find_alt_roce(struct smc_link_group *lgr, 853 struct smc_init_info *ini, 854 struct smc_ib_device *known_dev) 855 { 856 _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev); 857 } 858 859 /* if handshake network device belongs to a roce device, return its 860 * IB device and port 861 */ 862 static void smc_pnet_find_rdma_dev(struct net_device *netdev, 863 struct smc_init_info *ini) 864 { 865 struct smc_ib_device *ibdev; 866 867 mutex_lock(&smc_ib_devices.mutex); 868 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 869 struct net_device *ndev; 870 int i; 871 872 for (i = 1; i <= SMC_MAX_PORTS; i++) { 873 if (!rdma_is_port_valid(ibdev->ibdev, i)) 874 continue; 875 if (!ibdev->ibdev->ops.get_netdev) 876 continue; 877 ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); 878 if (!ndev) 879 continue; 880 dev_put(ndev); 881 if (netdev == ndev && 882 smc_ib_port_active(ibdev, i) && 883 !test_bit(i - 1, ibdev->ports_going_away) && 884 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 885 ini->ib_gid, NULL)) { 886 ini->ib_dev = ibdev; 887 ini->ib_port = i; 888 break; 889 } 890 } 891 } 892 mutex_unlock(&smc_ib_devices.mutex); 893 } 894 895 /* Determine the corresponding IB device port based on the hardware PNETID. 896 * Searching stops at the first matching active IB device port with vlan_id 897 * configured. 898 * If nothing found, check pnetid table. 899 * If nothing found, try to use handshake device 900 */ 901 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, 902 struct smc_init_info *ini) 903 { 904 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 905 906 ndev = pnet_find_base_ndev(ndev); 907 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 908 ndev_pnetid) && 909 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { 910 smc_pnet_find_rdma_dev(ndev, ini); 911 return; /* pnetid could not be determined */ 912 } 913 _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL); 914 } 915 916 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, 917 struct smc_init_info *ini) 918 { 919 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 920 struct smcd_dev *ismdev; 921 922 ndev = pnet_find_base_ndev(ndev); 923 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 924 ndev_pnetid) && 925 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) 926 return; /* pnetid could not be determined */ 927 928 mutex_lock(&smcd_dev_list.mutex); 929 list_for_each_entry(ismdev, &smcd_dev_list.list, list) { 930 if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && 931 !ismdev->going_away) { 932 ini->ism_dev = ismdev; 933 break; 934 } 935 } 936 mutex_unlock(&smcd_dev_list.mutex); 937 } 938 939 /* PNET table analysis for a given sock: 940 * determine ib_device and port belonging to used internal TCP socket 941 * ethernet interface. 942 */ 943 void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) 944 { 945 struct dst_entry *dst = sk_dst_get(sk); 946 947 ini->ib_dev = NULL; 948 ini->ib_port = 0; 949 if (!dst) 950 goto out; 951 if (!dst->dev) 952 goto out_rel; 953 954 smc_pnet_find_roce_by_pnetid(dst->dev, ini); 955 956 out_rel: 957 dst_release(dst); 958 out: 959 return; 960 } 961 962 void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) 963 { 964 struct dst_entry *dst = sk_dst_get(sk); 965 966 ini->ism_dev = NULL; 967 if (!dst) 968 goto out; 969 if (!dst->dev) 970 goto out_rel; 971 972 smc_pnet_find_ism_by_pnetid(dst->dev, ini); 973 974 out_rel: 975 dst_release(dst); 976 out: 977 return; 978 } 979 980 /* Lookup and apply a pnet table entry to the given ib device. 981 */ 982 int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) 983 { 984 char *ib_name = smcibdev->ibdev->name; 985 struct smc_pnettable *pnettable; 986 struct smc_pnetentry *tmp_pe; 987 struct smc_net *sn; 988 int rc = -ENOENT; 989 990 /* get pnettable for init namespace */ 991 sn = net_generic(&init_net, smc_net_id); 992 pnettable = &sn->pnettable; 993 994 read_lock(&pnettable->lock); 995 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 996 if (tmp_pe->type == SMC_PNET_IB && 997 !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) && 998 tmp_pe->ib_port == ib_port) { 999 smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name); 1000 rc = 0; 1001 break; 1002 } 1003 } 1004 read_unlock(&pnettable->lock); 1005 1006 return rc; 1007 } 1008 1009 /* Lookup and apply a pnet table entry to the given smcd device. 1010 */ 1011 int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) 1012 { 1013 const char *ib_name = dev_name(&smcddev->dev); 1014 struct smc_pnettable *pnettable; 1015 struct smc_pnetentry *tmp_pe; 1016 struct smc_net *sn; 1017 int rc = -ENOENT; 1018 1019 /* get pnettable for init namespace */ 1020 sn = net_generic(&init_net, smc_net_id); 1021 pnettable = &sn->pnettable; 1022 1023 read_lock(&pnettable->lock); 1024 list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { 1025 if (tmp_pe->type == SMC_PNET_IB && 1026 !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { 1027 smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name); 1028 rc = 0; 1029 break; 1030 } 1031 } 1032 read_unlock(&pnettable->lock); 1033 1034 return rc; 1035 } 1036