1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Generic netlink support functions to configure an SMC-R PNET table 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/module.h> 13 #include <linux/list.h> 14 #include <linux/ctype.h> 15 #include <net/netlink.h> 16 #include <net/genetlink.h> 17 18 #include <uapi/linux/if.h> 19 #include <uapi/linux/smc.h> 20 21 #include <rdma/ib_verbs.h> 22 23 #include <net/netns/generic.h> 24 #include "smc_netns.h" 25 26 #include "smc_pnet.h" 27 #include "smc_ib.h" 28 #include "smc_ism.h" 29 #include "smc_core.h" 30 31 #define SMC_ASCII_BLANK 32 32 33 static struct net_device *pnet_find_base_ndev(struct net_device *ndev); 34 35 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 36 [SMC_PNETID_NAME] = { 37 .type = NLA_NUL_STRING, 38 .len = SMC_MAX_PNETID_LEN 39 }, 40 [SMC_PNETID_ETHNAME] = { 41 .type = NLA_NUL_STRING, 42 .len = IFNAMSIZ - 1 43 }, 44 [SMC_PNETID_IBNAME] = { 45 .type = NLA_NUL_STRING, 46 .len = IB_DEVICE_NAME_MAX - 1 47 }, 48 [SMC_PNETID_IBPORT] = { .type = NLA_U8 } 49 }; 50 51 static struct genl_family smc_pnet_nl_family; 52 53 /** 54 * struct smc_user_pnetentry - pnet identifier name entry for/from user 55 * @list: List node. 56 * @pnet_name: Pnet identifier name 57 * @ndev: pointer to network device. 58 * @smcibdev: Pointer to IB device. 59 * @ib_port: Port of IB device. 60 * @smcd_dev: Pointer to smcd device. 61 */ 62 struct smc_user_pnetentry { 63 struct list_head list; 64 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 65 struct net_device *ndev; 66 struct smc_ib_device *smcibdev; 67 u8 ib_port; 68 struct smcd_dev *smcd_dev; 69 }; 70 71 /* pnet entry stored in pnet table */ 72 struct smc_pnetentry { 73 struct list_head list; 74 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 75 struct net_device *ndev; 76 }; 77 78 /* Check if two given pnetids match */ 79 static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) 80 { 81 int i; 82 83 for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { 84 if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && 85 (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) 86 break; 87 if (pnetid1[i] != pnetid2[i]) 88 return false; 89 } 90 return true; 91 } 92 93 /* Remove a pnetid from the pnet table. 94 */ 95 static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) 96 { 97 struct smc_pnetentry *pnetelem, *tmp_pe; 98 struct smc_pnettable *pnettable; 99 struct smc_ib_device *ibdev; 100 struct smcd_dev *smcd_dev; 101 struct smc_net *sn; 102 int rc = -ENOENT; 103 int ibport; 104 105 /* get pnettable for namespace */ 106 sn = net_generic(net, smc_net_id); 107 pnettable = &sn->pnettable; 108 109 /* remove netdevices */ 110 write_lock(&pnettable->lock); 111 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, 112 list) { 113 if (!pnet_name || 114 smc_pnet_match(pnetelem->pnet_name, pnet_name)) { 115 list_del(&pnetelem->list); 116 dev_put(pnetelem->ndev); 117 kfree(pnetelem); 118 rc = 0; 119 } 120 } 121 write_unlock(&pnettable->lock); 122 123 /* if this is not the initial namespace, stop here */ 124 if (net != &init_net) 125 return rc; 126 127 /* remove ib devices */ 128 spin_lock(&smc_ib_devices.lock); 129 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 130 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 131 if (ibdev->pnetid_by_user[ibport] && 132 (!pnet_name || 133 smc_pnet_match(pnet_name, 134 ibdev->pnetid[ibport]))) { 135 memset(ibdev->pnetid[ibport], 0, 136 SMC_MAX_PNETID_LEN); 137 ibdev->pnetid_by_user[ibport] = false; 138 rc = 0; 139 } 140 } 141 } 142 spin_unlock(&smc_ib_devices.lock); 143 /* remove smcd devices */ 144 spin_lock(&smcd_dev_list.lock); 145 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 146 if (smcd_dev->pnetid_by_user && 147 (!pnet_name || 148 smc_pnet_match(pnet_name, smcd_dev->pnetid))) { 149 memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); 150 smcd_dev->pnetid_by_user = false; 151 rc = 0; 152 } 153 } 154 spin_unlock(&smcd_dev_list.lock); 155 return rc; 156 } 157 158 /* Remove a pnet entry mentioning a given network device from the pnet table. 159 */ 160 static int smc_pnet_remove_by_ndev(struct net_device *ndev) 161 { 162 struct smc_pnetentry *pnetelem, *tmp_pe; 163 struct smc_pnettable *pnettable; 164 struct net *net = dev_net(ndev); 165 struct smc_net *sn; 166 int rc = -ENOENT; 167 168 /* get pnettable for namespace */ 169 sn = net_generic(net, smc_net_id); 170 pnettable = &sn->pnettable; 171 172 write_lock(&pnettable->lock); 173 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { 174 if (pnetelem->ndev == ndev) { 175 list_del(&pnetelem->list); 176 dev_put(pnetelem->ndev); 177 kfree(pnetelem); 178 rc = 0; 179 break; 180 } 181 } 182 write_unlock(&pnettable->lock); 183 return rc; 184 } 185 186 /* Append a pnetid to the end of the pnet table if not already on this list. 187 */ 188 static int smc_pnet_enter(struct smc_pnettable *pnettable, 189 struct smc_user_pnetentry *new_pnetelem) 190 { 191 u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; 192 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 193 struct smc_pnetentry *tmp_pnetelem; 194 struct smc_pnetentry *pnetelem; 195 bool new_smcddev = false; 196 struct net_device *ndev; 197 bool new_netdev = true; 198 bool new_ibdev = false; 199 200 if (new_pnetelem->smcibdev) { 201 struct smc_ib_device *ib_dev = new_pnetelem->smcibdev; 202 int ib_port = new_pnetelem->ib_port; 203 204 spin_lock(&smc_ib_devices.lock); 205 if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { 206 memcpy(ib_dev->pnetid[ib_port - 1], 207 new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 208 ib_dev->pnetid_by_user[ib_port - 1] = true; 209 new_ibdev = true; 210 } 211 spin_unlock(&smc_ib_devices.lock); 212 } 213 if (new_pnetelem->smcd_dev) { 214 struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev; 215 216 spin_lock(&smcd_dev_list.lock); 217 if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { 218 memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name, 219 SMC_MAX_PNETID_LEN); 220 smcd_dev->pnetid_by_user = true; 221 new_smcddev = true; 222 } 223 spin_unlock(&smcd_dev_list.lock); 224 } 225 226 if (!new_pnetelem->ndev) 227 return (new_ibdev || new_smcddev) ? 0 : -EEXIST; 228 229 /* check if (base) netdev already has a pnetid. If there is one, we do 230 * not want to add a pnet table entry 231 */ 232 ndev = pnet_find_base_ndev(new_pnetelem->ndev); 233 if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 234 ndev_pnetid)) 235 return (new_ibdev || new_smcddev) ? 0 : -EEXIST; 236 237 /* add a new netdev entry to the pnet table if there isn't one */ 238 tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); 239 if (!tmp_pnetelem) 240 return -ENOMEM; 241 memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name, 242 SMC_MAX_PNETID_LEN); 243 tmp_pnetelem->ndev = new_pnetelem->ndev; 244 245 write_lock(&pnettable->lock); 246 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 247 if (pnetelem->ndev == new_pnetelem->ndev) 248 new_netdev = false; 249 } 250 if (new_netdev) { 251 dev_hold(tmp_pnetelem->ndev); 252 list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist); 253 write_unlock(&pnettable->lock); 254 } else { 255 write_unlock(&pnettable->lock); 256 kfree(tmp_pnetelem); 257 } 258 259 return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST; 260 } 261 262 /* The limit for pnetid is 16 characters. 263 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. 264 * Lower case letters are converted to upper case. 265 * Interior blanks should not be used. 266 */ 267 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) 268 { 269 char *bf = skip_spaces(pnet_name); 270 size_t len = strlen(bf); 271 char *end = bf + len; 272 273 if (!len) 274 return false; 275 while (--end >= bf && isspace(*end)) 276 ; 277 if (end - bf >= SMC_MAX_PNETID_LEN) 278 return false; 279 while (bf <= end) { 280 if (!isalnum(*bf)) 281 return false; 282 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; 283 bf++; 284 } 285 *pnetid = '\0'; 286 return true; 287 } 288 289 /* Find an infiniband device by a given name. The device might not exist. */ 290 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) 291 { 292 struct smc_ib_device *ibdev; 293 294 spin_lock(&smc_ib_devices.lock); 295 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 296 if (!strncmp(ibdev->ibdev->name, ib_name, 297 sizeof(ibdev->ibdev->name)) || 298 !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, 299 IB_DEVICE_NAME_MAX - 1)) { 300 goto out; 301 } 302 } 303 ibdev = NULL; 304 out: 305 spin_unlock(&smc_ib_devices.lock); 306 return ibdev; 307 } 308 309 /* Find an smcd device by a given name. The device might not exist. */ 310 static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) 311 { 312 struct smcd_dev *smcd_dev; 313 314 spin_lock(&smcd_dev_list.lock); 315 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 316 if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, 317 IB_DEVICE_NAME_MAX - 1)) 318 goto out; 319 } 320 smcd_dev = NULL; 321 out: 322 spin_unlock(&smcd_dev_list.lock); 323 return smcd_dev; 324 } 325 326 /* Parse the supplied netlink attributes and fill a pnetentry structure. 327 * For ethernet and infiniband device names verify that the devices exist. 328 */ 329 static int smc_pnet_fill_entry(struct net *net, 330 struct smc_user_pnetentry *pnetelem, 331 struct nlattr *tb[]) 332 { 333 char *string, *ibname; 334 int rc; 335 336 memset(pnetelem, 0, sizeof(*pnetelem)); 337 INIT_LIST_HEAD(&pnetelem->list); 338 339 rc = -EINVAL; 340 if (!tb[SMC_PNETID_NAME]) 341 goto error; 342 string = (char *)nla_data(tb[SMC_PNETID_NAME]); 343 if (!smc_pnetid_valid(string, pnetelem->pnet_name)) 344 goto error; 345 346 rc = -EINVAL; 347 if (tb[SMC_PNETID_ETHNAME]) { 348 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); 349 pnetelem->ndev = dev_get_by_name(net, string); 350 if (!pnetelem->ndev) 351 goto error; 352 } 353 354 /* if this is not the initial namespace, stop here */ 355 if (net != &init_net) 356 return 0; 357 358 rc = -EINVAL; 359 if (tb[SMC_PNETID_IBNAME]) { 360 ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); 361 ibname = strim(ibname); 362 pnetelem->smcibdev = smc_pnet_find_ib(ibname); 363 pnetelem->smcd_dev = smc_pnet_find_smcd(ibname); 364 if (!pnetelem->smcibdev && !pnetelem->smcd_dev) 365 goto error; 366 if (pnetelem->smcibdev) { 367 if (!tb[SMC_PNETID_IBPORT]) 368 goto error; 369 pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); 370 if (pnetelem->ib_port < 1 || 371 pnetelem->ib_port > SMC_MAX_PORTS) 372 goto error; 373 } 374 } 375 376 return 0; 377 378 error: 379 if (pnetelem->ndev) 380 dev_put(pnetelem->ndev); 381 return rc; 382 } 383 384 /* Convert an smc_pnetentry to a netlink attribute sequence */ 385 static int smc_pnet_set_nla(struct sk_buff *msg, 386 struct smc_user_pnetentry *pnetelem) 387 { 388 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) 389 return -1; 390 if (pnetelem->ndev) { 391 if (nla_put_string(msg, SMC_PNETID_ETHNAME, 392 pnetelem->ndev->name)) 393 return -1; 394 } else { 395 if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) 396 return -1; 397 } 398 if (pnetelem->smcibdev) { 399 if (nla_put_string(msg, SMC_PNETID_IBNAME, 400 dev_name(pnetelem->smcibdev->ibdev->dev.parent)) || 401 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) 402 return -1; 403 } else if (pnetelem->smcd_dev) { 404 if (nla_put_string(msg, SMC_PNETID_IBNAME, 405 dev_name(&pnetelem->smcd_dev->dev)) || 406 nla_put_u8(msg, SMC_PNETID_IBPORT, 1)) 407 return -1; 408 } else { 409 if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || 410 nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) 411 return -1; 412 } 413 414 return 0; 415 } 416 417 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) 418 { 419 struct net *net = genl_info_net(info); 420 struct smc_user_pnetentry pnetelem; 421 struct smc_pnettable *pnettable; 422 struct smc_net *sn; 423 int rc; 424 425 /* get pnettable for namespace */ 426 sn = net_generic(net, smc_net_id); 427 pnettable = &sn->pnettable; 428 429 rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs); 430 if (!rc) 431 rc = smc_pnet_enter(pnettable, &pnetelem); 432 if (pnetelem.ndev) 433 dev_put(pnetelem.ndev); 434 return rc; 435 } 436 437 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) 438 { 439 struct net *net = genl_info_net(info); 440 441 if (!info->attrs[SMC_PNETID_NAME]) 442 return -EINVAL; 443 return smc_pnet_remove_by_pnetid(net, 444 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 445 } 446 447 static int smc_pnet_dump_start(struct netlink_callback *cb) 448 { 449 cb->args[0] = 0; 450 return 0; 451 } 452 453 static int smc_pnet_dumpinfo(struct sk_buff *skb, 454 u32 portid, u32 seq, u32 flags, 455 struct smc_user_pnetentry *pnetelem) 456 { 457 void *hdr; 458 459 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, 460 flags, SMC_PNETID_GET); 461 if (!hdr) 462 return -ENOMEM; 463 if (smc_pnet_set_nla(skb, pnetelem) < 0) { 464 genlmsg_cancel(skb, hdr); 465 return -EMSGSIZE; 466 } 467 genlmsg_end(skb, hdr); 468 return 0; 469 } 470 471 static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, 472 u32 seq, u8 *pnetid, int start_idx) 473 { 474 struct smc_user_pnetentry tmp_entry; 475 struct smc_pnettable *pnettable; 476 struct smc_pnetentry *pnetelem; 477 struct smc_ib_device *ibdev; 478 struct smcd_dev *smcd_dev; 479 struct smc_net *sn; 480 int idx = 0; 481 int ibport; 482 483 /* get pnettable for namespace */ 484 sn = net_generic(net, smc_net_id); 485 pnettable = &sn->pnettable; 486 487 /* dump netdevices */ 488 read_lock(&pnettable->lock); 489 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 490 if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) 491 continue; 492 if (idx++ < start_idx) 493 continue; 494 memset(&tmp_entry, 0, sizeof(tmp_entry)); 495 memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name, 496 SMC_MAX_PNETID_LEN); 497 tmp_entry.ndev = pnetelem->ndev; 498 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 499 &tmp_entry)) { 500 --idx; 501 break; 502 } 503 } 504 read_unlock(&pnettable->lock); 505 506 /* if this is not the initial namespace, stop here */ 507 if (net != &init_net) 508 return idx; 509 510 /* dump ib devices */ 511 spin_lock(&smc_ib_devices.lock); 512 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 513 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 514 if (ibdev->pnetid_by_user[ibport]) { 515 if (pnetid && 516 !smc_pnet_match(ibdev->pnetid[ibport], 517 pnetid)) 518 continue; 519 if (idx++ < start_idx) 520 continue; 521 memset(&tmp_entry, 0, sizeof(tmp_entry)); 522 memcpy(&tmp_entry.pnet_name, 523 ibdev->pnetid[ibport], 524 SMC_MAX_PNETID_LEN); 525 tmp_entry.smcibdev = ibdev; 526 tmp_entry.ib_port = ibport + 1; 527 if (smc_pnet_dumpinfo(skb, portid, seq, 528 NLM_F_MULTI, 529 &tmp_entry)) { 530 --idx; 531 break; 532 } 533 } 534 } 535 } 536 spin_unlock(&smc_ib_devices.lock); 537 538 /* dump smcd devices */ 539 spin_lock(&smcd_dev_list.lock); 540 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 541 if (smcd_dev->pnetid_by_user) { 542 if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid)) 543 continue; 544 if (idx++ < start_idx) 545 continue; 546 memset(&tmp_entry, 0, sizeof(tmp_entry)); 547 memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid, 548 SMC_MAX_PNETID_LEN); 549 tmp_entry.smcd_dev = smcd_dev; 550 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 551 &tmp_entry)) { 552 --idx; 553 break; 554 } 555 } 556 } 557 spin_unlock(&smcd_dev_list.lock); 558 559 return idx; 560 } 561 562 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) 563 { 564 struct net *net = sock_net(skb->sk); 565 int idx; 566 567 idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, 568 cb->nlh->nlmsg_seq, NULL, cb->args[0]); 569 570 cb->args[0] = idx; 571 return skb->len; 572 } 573 574 /* Retrieve one PNETID entry */ 575 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) 576 { 577 struct net *net = genl_info_net(info); 578 struct sk_buff *msg; 579 void *hdr; 580 581 if (!info->attrs[SMC_PNETID_NAME]) 582 return -EINVAL; 583 584 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 585 if (!msg) 586 return -ENOMEM; 587 588 _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, 589 nla_data(info->attrs[SMC_PNETID_NAME]), 0); 590 591 /* finish multi part message and send it */ 592 hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, 593 NLM_F_MULTI); 594 if (!hdr) { 595 nlmsg_free(msg); 596 return -EMSGSIZE; 597 } 598 return genlmsg_reply(msg, info); 599 } 600 601 /* Remove and delete all pnetids from pnet table. 602 */ 603 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) 604 { 605 struct net *net = genl_info_net(info); 606 607 smc_pnet_remove_by_pnetid(net, NULL); 608 return 0; 609 } 610 611 /* SMC_PNETID generic netlink operation definition */ 612 static const struct genl_ops smc_pnet_ops[] = { 613 { 614 .cmd = SMC_PNETID_GET, 615 .flags = GENL_ADMIN_PERM, 616 .doit = smc_pnet_get, 617 .dumpit = smc_pnet_dump, 618 .start = smc_pnet_dump_start 619 }, 620 { 621 .cmd = SMC_PNETID_ADD, 622 .flags = GENL_ADMIN_PERM, 623 .doit = smc_pnet_add 624 }, 625 { 626 .cmd = SMC_PNETID_DEL, 627 .flags = GENL_ADMIN_PERM, 628 .doit = smc_pnet_del 629 }, 630 { 631 .cmd = SMC_PNETID_FLUSH, 632 .flags = GENL_ADMIN_PERM, 633 .doit = smc_pnet_flush 634 } 635 }; 636 637 /* SMC_PNETID family definition */ 638 static struct genl_family smc_pnet_nl_family __ro_after_init = { 639 .hdrsize = 0, 640 .name = SMCR_GENL_FAMILY_NAME, 641 .version = SMCR_GENL_FAMILY_VERSION, 642 .maxattr = SMC_PNETID_MAX, 643 .policy = smc_pnet_policy, 644 .netnsok = true, 645 .module = THIS_MODULE, 646 .ops = smc_pnet_ops, 647 .n_ops = ARRAY_SIZE(smc_pnet_ops) 648 }; 649 650 static int smc_pnet_netdev_event(struct notifier_block *this, 651 unsigned long event, void *ptr) 652 { 653 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 654 655 switch (event) { 656 case NETDEV_REBOOT: 657 case NETDEV_UNREGISTER: 658 smc_pnet_remove_by_ndev(event_dev); 659 return NOTIFY_OK; 660 default: 661 return NOTIFY_DONE; 662 } 663 } 664 665 static struct notifier_block smc_netdev_notifier = { 666 .notifier_call = smc_pnet_netdev_event 667 }; 668 669 /* init network namespace */ 670 int smc_pnet_net_init(struct net *net) 671 { 672 struct smc_net *sn = net_generic(net, smc_net_id); 673 struct smc_pnettable *pnettable = &sn->pnettable; 674 675 INIT_LIST_HEAD(&pnettable->pnetlist); 676 rwlock_init(&pnettable->lock); 677 678 return 0; 679 } 680 681 int __init smc_pnet_init(void) 682 { 683 int rc; 684 685 rc = genl_register_family(&smc_pnet_nl_family); 686 if (rc) 687 return rc; 688 rc = register_netdevice_notifier(&smc_netdev_notifier); 689 if (rc) 690 genl_unregister_family(&smc_pnet_nl_family); 691 return rc; 692 } 693 694 /* exit network namespace */ 695 void smc_pnet_net_exit(struct net *net) 696 { 697 /* flush pnet table */ 698 smc_pnet_remove_by_pnetid(net, NULL); 699 } 700 701 void smc_pnet_exit(void) 702 { 703 unregister_netdevice_notifier(&smc_netdev_notifier); 704 genl_unregister_family(&smc_pnet_nl_family); 705 } 706 707 /* Determine one base device for stacked net devices. 708 * If the lower device level contains more than one devices 709 * (for instance with bonding slaves), just the first device 710 * is used to reach a base device. 711 */ 712 static struct net_device *pnet_find_base_ndev(struct net_device *ndev) 713 { 714 int i, nest_lvl; 715 716 rtnl_lock(); 717 nest_lvl = dev_get_nest_level(ndev); 718 for (i = 0; i < nest_lvl; i++) { 719 struct list_head *lower = &ndev->adj_list.lower; 720 721 if (list_empty(lower)) 722 break; 723 lower = lower->next; 724 ndev = netdev_lower_get_next(ndev, &lower); 725 } 726 rtnl_unlock(); 727 return ndev; 728 } 729 730 static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, 731 u8 *pnetid) 732 { 733 struct smc_pnettable *pnettable; 734 struct net *net = dev_net(ndev); 735 struct smc_pnetentry *pnetelem; 736 struct smc_net *sn; 737 int rc = -ENOENT; 738 739 /* get pnettable for namespace */ 740 sn = net_generic(net, smc_net_id); 741 pnettable = &sn->pnettable; 742 743 read_lock(&pnettable->lock); 744 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 745 if (ndev == pnetelem->ndev) { 746 /* get pnetid of netdev device */ 747 memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 748 rc = 0; 749 break; 750 } 751 } 752 read_unlock(&pnettable->lock); 753 return rc; 754 } 755 756 /* if handshake network device belongs to a roce device, return its 757 * IB device and port 758 */ 759 static void smc_pnet_find_rdma_dev(struct net_device *netdev, 760 struct smc_init_info *ini) 761 { 762 struct smc_ib_device *ibdev; 763 764 spin_lock(&smc_ib_devices.lock); 765 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 766 struct net_device *ndev; 767 int i; 768 769 for (i = 1; i <= SMC_MAX_PORTS; i++) { 770 if (!rdma_is_port_valid(ibdev->ibdev, i)) 771 continue; 772 if (!ibdev->ibdev->ops.get_netdev) 773 continue; 774 ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); 775 if (!ndev) 776 continue; 777 dev_put(ndev); 778 if (netdev == ndev && 779 smc_ib_port_active(ibdev, i) && 780 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 781 ini->ib_gid, NULL)) { 782 ini->ib_dev = ibdev; 783 ini->ib_port = i; 784 break; 785 } 786 } 787 } 788 spin_unlock(&smc_ib_devices.lock); 789 } 790 791 /* Determine the corresponding IB device port based on the hardware PNETID. 792 * Searching stops at the first matching active IB device port with vlan_id 793 * configured. 794 * If nothing found, check pnetid table. 795 * If nothing found, try to use handshake device 796 */ 797 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, 798 struct smc_init_info *ini) 799 { 800 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 801 struct smc_ib_device *ibdev; 802 int i; 803 804 ndev = pnet_find_base_ndev(ndev); 805 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 806 ndev_pnetid) && 807 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { 808 smc_pnet_find_rdma_dev(ndev, ini); 809 return; /* pnetid could not be determined */ 810 } 811 812 spin_lock(&smc_ib_devices.lock); 813 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 814 for (i = 1; i <= SMC_MAX_PORTS; i++) { 815 if (!rdma_is_port_valid(ibdev->ibdev, i)) 816 continue; 817 if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) && 818 smc_ib_port_active(ibdev, i) && 819 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 820 ini->ib_gid, NULL)) { 821 ini->ib_dev = ibdev; 822 ini->ib_port = i; 823 goto out; 824 } 825 } 826 } 827 out: 828 spin_unlock(&smc_ib_devices.lock); 829 } 830 831 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, 832 struct smc_init_info *ini) 833 { 834 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 835 struct smcd_dev *ismdev; 836 837 ndev = pnet_find_base_ndev(ndev); 838 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 839 ndev_pnetid) && 840 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) 841 return; /* pnetid could not be determined */ 842 843 spin_lock(&smcd_dev_list.lock); 844 list_for_each_entry(ismdev, &smcd_dev_list.list, list) { 845 if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) { 846 ini->ism_dev = ismdev; 847 break; 848 } 849 } 850 spin_unlock(&smcd_dev_list.lock); 851 } 852 853 /* PNET table analysis for a given sock: 854 * determine ib_device and port belonging to used internal TCP socket 855 * ethernet interface. 856 */ 857 void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) 858 { 859 struct dst_entry *dst = sk_dst_get(sk); 860 861 ini->ib_dev = NULL; 862 ini->ib_port = 0; 863 if (!dst) 864 goto out; 865 if (!dst->dev) 866 goto out_rel; 867 868 smc_pnet_find_roce_by_pnetid(dst->dev, ini); 869 870 out_rel: 871 dst_release(dst); 872 out: 873 return; 874 } 875 876 void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) 877 { 878 struct dst_entry *dst = sk_dst_get(sk); 879 880 ini->ism_dev = NULL; 881 if (!dst) 882 goto out; 883 if (!dst->dev) 884 goto out_rel; 885 886 smc_pnet_find_ism_by_pnetid(dst->dev, ini); 887 888 out_rel: 889 dst_release(dst); 890 out: 891 return; 892 } 893