1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Generic netlink support functions to configure an SMC-R PNET table 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/module.h> 13 #include <linux/list.h> 14 #include <linux/ctype.h> 15 #include <net/netlink.h> 16 #include <net/genetlink.h> 17 18 #include <uapi/linux/if.h> 19 #include <uapi/linux/smc.h> 20 21 #include <rdma/ib_verbs.h> 22 23 #include <net/netns/generic.h> 24 #include "smc_netns.h" 25 26 #include "smc_pnet.h" 27 #include "smc_ib.h" 28 #include "smc_ism.h" 29 #include "smc_core.h" 30 31 #define SMC_ASCII_BLANK 32 32 33 static struct net_device *pnet_find_base_ndev(struct net_device *ndev); 34 35 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 36 [SMC_PNETID_NAME] = { 37 .type = NLA_NUL_STRING, 38 .len = SMC_MAX_PNETID_LEN 39 }, 40 [SMC_PNETID_ETHNAME] = { 41 .type = NLA_NUL_STRING, 42 .len = IFNAMSIZ - 1 43 }, 44 [SMC_PNETID_IBNAME] = { 45 .type = NLA_NUL_STRING, 46 .len = IB_DEVICE_NAME_MAX - 1 47 }, 48 [SMC_PNETID_IBPORT] = { .type = NLA_U8 } 49 }; 50 51 static struct genl_family smc_pnet_nl_family; 52 53 /** 54 * struct smc_user_pnetentry - pnet identifier name entry for/from user 55 * @list: List node. 56 * @pnet_name: Pnet identifier name 57 * @ndev: pointer to network device. 58 * @smcibdev: Pointer to IB device. 59 * @ib_port: Port of IB device. 60 * @smcd_dev: Pointer to smcd device. 61 */ 62 struct smc_user_pnetentry { 63 struct list_head list; 64 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 65 struct net_device *ndev; 66 struct smc_ib_device *smcibdev; 67 u8 ib_port; 68 struct smcd_dev *smcd_dev; 69 }; 70 71 /* pnet entry stored in pnet table */ 72 struct smc_pnetentry { 73 struct list_head list; 74 char pnet_name[SMC_MAX_PNETID_LEN + 1]; 75 struct net_device *ndev; 76 }; 77 78 /* Check if two given pnetids match */ 79 static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2) 80 { 81 int i; 82 83 for (i = 0; i < SMC_MAX_PNETID_LEN; i++) { 84 if ((pnetid1[i] == 0 || pnetid1[i] == SMC_ASCII_BLANK) && 85 (pnetid2[i] == 0 || pnetid2[i] == SMC_ASCII_BLANK)) 86 break; 87 if (pnetid1[i] != pnetid2[i]) 88 return false; 89 } 90 return true; 91 } 92 93 /* Remove a pnetid from the pnet table. 94 */ 95 static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) 96 { 97 struct smc_pnetentry *pnetelem, *tmp_pe; 98 struct smc_pnettable *pnettable; 99 struct smc_ib_device *ibdev; 100 struct smcd_dev *smcd_dev; 101 struct smc_net *sn; 102 int rc = -ENOENT; 103 int ibport; 104 105 /* get pnettable for namespace */ 106 sn = net_generic(net, smc_net_id); 107 pnettable = &sn->pnettable; 108 109 /* remove netdevices */ 110 write_lock(&pnettable->lock); 111 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, 112 list) { 113 if (!pnet_name || 114 smc_pnet_match(pnetelem->pnet_name, pnet_name)) { 115 list_del(&pnetelem->list); 116 dev_put(pnetelem->ndev); 117 kfree(pnetelem); 118 rc = 0; 119 } 120 } 121 write_unlock(&pnettable->lock); 122 123 /* if this is not the initial namespace, stop here */ 124 if (net != &init_net) 125 return rc; 126 127 /* remove ib devices */ 128 spin_lock(&smc_ib_devices.lock); 129 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 130 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 131 if (ibdev->pnetid_by_user[ibport] && 132 (!pnet_name || 133 smc_pnet_match(pnet_name, 134 ibdev->pnetid[ibport]))) { 135 memset(ibdev->pnetid[ibport], 0, 136 SMC_MAX_PNETID_LEN); 137 ibdev->pnetid_by_user[ibport] = false; 138 rc = 0; 139 } 140 } 141 } 142 spin_unlock(&smc_ib_devices.lock); 143 /* remove smcd devices */ 144 spin_lock(&smcd_dev_list.lock); 145 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 146 if (smcd_dev->pnetid_by_user && 147 (!pnet_name || 148 smc_pnet_match(pnet_name, smcd_dev->pnetid))) { 149 memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN); 150 smcd_dev->pnetid_by_user = false; 151 rc = 0; 152 } 153 } 154 spin_unlock(&smcd_dev_list.lock); 155 return rc; 156 } 157 158 /* Remove a pnet entry mentioning a given network device from the pnet table. 159 */ 160 static int smc_pnet_remove_by_ndev(struct net_device *ndev) 161 { 162 struct smc_pnetentry *pnetelem, *tmp_pe; 163 struct smc_pnettable *pnettable; 164 struct net *net = dev_net(ndev); 165 struct smc_net *sn; 166 int rc = -ENOENT; 167 168 /* get pnettable for namespace */ 169 sn = net_generic(net, smc_net_id); 170 pnettable = &sn->pnettable; 171 172 write_lock(&pnettable->lock); 173 list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { 174 if (pnetelem->ndev == ndev) { 175 list_del(&pnetelem->list); 176 dev_put(pnetelem->ndev); 177 kfree(pnetelem); 178 rc = 0; 179 break; 180 } 181 } 182 write_unlock(&pnettable->lock); 183 return rc; 184 } 185 186 /* Append a pnetid to the end of the pnet table if not already on this list. 187 */ 188 static int smc_pnet_enter(struct smc_pnettable *pnettable, 189 struct smc_user_pnetentry *new_pnetelem) 190 { 191 u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; 192 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 193 struct smc_pnetentry *tmp_pnetelem; 194 struct smc_pnetentry *pnetelem; 195 bool new_smcddev = false; 196 struct net_device *ndev; 197 bool new_netdev = true; 198 bool new_ibdev = false; 199 200 if (new_pnetelem->smcibdev) { 201 struct smc_ib_device *ib_dev = new_pnetelem->smcibdev; 202 int ib_port = new_pnetelem->ib_port; 203 204 spin_lock(&smc_ib_devices.lock); 205 if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { 206 memcpy(ib_dev->pnetid[ib_port - 1], 207 new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 208 ib_dev->pnetid_by_user[ib_port - 1] = true; 209 new_ibdev = true; 210 } 211 spin_unlock(&smc_ib_devices.lock); 212 } 213 if (new_pnetelem->smcd_dev) { 214 struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev; 215 216 spin_lock(&smcd_dev_list.lock); 217 if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { 218 memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name, 219 SMC_MAX_PNETID_LEN); 220 smcd_dev->pnetid_by_user = true; 221 new_smcddev = true; 222 } 223 spin_unlock(&smcd_dev_list.lock); 224 } 225 226 if (!new_pnetelem->ndev) 227 return (new_ibdev || new_smcddev) ? 0 : -EEXIST; 228 229 /* check if (base) netdev already has a pnetid. If there is one, we do 230 * not want to add a pnet table entry 231 */ 232 ndev = pnet_find_base_ndev(new_pnetelem->ndev); 233 if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 234 ndev_pnetid)) 235 return (new_ibdev || new_smcddev) ? 0 : -EEXIST; 236 237 /* add a new netdev entry to the pnet table if there isn't one */ 238 tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); 239 if (!tmp_pnetelem) 240 return -ENOMEM; 241 memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name, 242 SMC_MAX_PNETID_LEN); 243 tmp_pnetelem->ndev = new_pnetelem->ndev; 244 245 write_lock(&pnettable->lock); 246 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 247 if (pnetelem->ndev == new_pnetelem->ndev) 248 new_netdev = false; 249 } 250 if (new_netdev) { 251 dev_hold(tmp_pnetelem->ndev); 252 list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist); 253 write_unlock(&pnettable->lock); 254 } else { 255 write_unlock(&pnettable->lock); 256 kfree(tmp_pnetelem); 257 } 258 259 return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST; 260 } 261 262 /* The limit for pnetid is 16 characters. 263 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. 264 * Lower case letters are converted to upper case. 265 * Interior blanks should not be used. 266 */ 267 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) 268 { 269 char *bf = skip_spaces(pnet_name); 270 size_t len = strlen(bf); 271 char *end = bf + len; 272 273 if (!len) 274 return false; 275 while (--end >= bf && isspace(*end)) 276 ; 277 if (end - bf >= SMC_MAX_PNETID_LEN) 278 return false; 279 while (bf <= end) { 280 if (!isalnum(*bf)) 281 return false; 282 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; 283 bf++; 284 } 285 *pnetid = '\0'; 286 return true; 287 } 288 289 /* Find an infiniband device by a given name. The device might not exist. */ 290 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) 291 { 292 struct smc_ib_device *ibdev; 293 294 spin_lock(&smc_ib_devices.lock); 295 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 296 if (!strncmp(ibdev->ibdev->name, ib_name, 297 sizeof(ibdev->ibdev->name)) || 298 !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name, 299 IB_DEVICE_NAME_MAX - 1)) { 300 goto out; 301 } 302 } 303 ibdev = NULL; 304 out: 305 spin_unlock(&smc_ib_devices.lock); 306 return ibdev; 307 } 308 309 /* Find an smcd device by a given name. The device might not exist. */ 310 static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name) 311 { 312 struct smcd_dev *smcd_dev; 313 314 spin_lock(&smcd_dev_list.lock); 315 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 316 if (!strncmp(dev_name(&smcd_dev->dev), smcd_name, 317 IB_DEVICE_NAME_MAX - 1)) 318 goto out; 319 } 320 smcd_dev = NULL; 321 out: 322 spin_unlock(&smcd_dev_list.lock); 323 return smcd_dev; 324 } 325 326 /* Parse the supplied netlink attributes and fill a pnetentry structure. 327 * For ethernet and infiniband device names verify that the devices exist. 328 */ 329 static int smc_pnet_fill_entry(struct net *net, 330 struct smc_user_pnetentry *pnetelem, 331 struct nlattr *tb[]) 332 { 333 char *string, *ibname; 334 int rc; 335 336 memset(pnetelem, 0, sizeof(*pnetelem)); 337 INIT_LIST_HEAD(&pnetelem->list); 338 339 rc = -EINVAL; 340 if (!tb[SMC_PNETID_NAME]) 341 goto error; 342 string = (char *)nla_data(tb[SMC_PNETID_NAME]); 343 if (!smc_pnetid_valid(string, pnetelem->pnet_name)) 344 goto error; 345 346 rc = -EINVAL; 347 if (tb[SMC_PNETID_ETHNAME]) { 348 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); 349 pnetelem->ndev = dev_get_by_name(net, string); 350 if (!pnetelem->ndev) 351 goto error; 352 } 353 354 /* if this is not the initial namespace, stop here */ 355 if (net != &init_net) 356 return 0; 357 358 rc = -EINVAL; 359 if (tb[SMC_PNETID_IBNAME]) { 360 ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); 361 ibname = strim(ibname); 362 pnetelem->smcibdev = smc_pnet_find_ib(ibname); 363 pnetelem->smcd_dev = smc_pnet_find_smcd(ibname); 364 if (!pnetelem->smcibdev && !pnetelem->smcd_dev) 365 goto error; 366 if (pnetelem->smcibdev) { 367 if (!tb[SMC_PNETID_IBPORT]) 368 goto error; 369 pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); 370 if (pnetelem->ib_port < 1 || 371 pnetelem->ib_port > SMC_MAX_PORTS) 372 goto error; 373 } 374 } 375 376 return 0; 377 378 error: 379 return rc; 380 } 381 382 /* Convert an smc_pnetentry to a netlink attribute sequence */ 383 static int smc_pnet_set_nla(struct sk_buff *msg, 384 struct smc_user_pnetentry *pnetelem) 385 { 386 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) 387 return -1; 388 if (pnetelem->ndev) { 389 if (nla_put_string(msg, SMC_PNETID_ETHNAME, 390 pnetelem->ndev->name)) 391 return -1; 392 } else { 393 if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) 394 return -1; 395 } 396 if (pnetelem->smcibdev) { 397 if (nla_put_string(msg, SMC_PNETID_IBNAME, 398 dev_name(pnetelem->smcibdev->ibdev->dev.parent)) || 399 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) 400 return -1; 401 } else if (pnetelem->smcd_dev) { 402 if (nla_put_string(msg, SMC_PNETID_IBNAME, 403 dev_name(&pnetelem->smcd_dev->dev)) || 404 nla_put_u8(msg, SMC_PNETID_IBPORT, 1)) 405 return -1; 406 } else { 407 if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || 408 nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) 409 return -1; 410 } 411 412 return 0; 413 } 414 415 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) 416 { 417 struct net *net = genl_info_net(info); 418 struct smc_user_pnetentry pnetelem; 419 struct smc_pnettable *pnettable; 420 struct smc_net *sn; 421 int rc; 422 423 /* get pnettable for namespace */ 424 sn = net_generic(net, smc_net_id); 425 pnettable = &sn->pnettable; 426 427 rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs); 428 if (!rc) 429 rc = smc_pnet_enter(pnettable, &pnetelem); 430 if (pnetelem.ndev) 431 dev_put(pnetelem.ndev); 432 return rc; 433 } 434 435 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) 436 { 437 struct net *net = genl_info_net(info); 438 439 if (!info->attrs[SMC_PNETID_NAME]) 440 return -EINVAL; 441 return smc_pnet_remove_by_pnetid(net, 442 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 443 } 444 445 static int smc_pnet_dump_start(struct netlink_callback *cb) 446 { 447 cb->args[0] = 0; 448 return 0; 449 } 450 451 static int smc_pnet_dumpinfo(struct sk_buff *skb, 452 u32 portid, u32 seq, u32 flags, 453 struct smc_user_pnetentry *pnetelem) 454 { 455 void *hdr; 456 457 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, 458 flags, SMC_PNETID_GET); 459 if (!hdr) 460 return -ENOMEM; 461 if (smc_pnet_set_nla(skb, pnetelem) < 0) { 462 genlmsg_cancel(skb, hdr); 463 return -EMSGSIZE; 464 } 465 genlmsg_end(skb, hdr); 466 return 0; 467 } 468 469 static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, 470 u32 seq, u8 *pnetid, int start_idx) 471 { 472 struct smc_user_pnetentry tmp_entry; 473 struct smc_pnettable *pnettable; 474 struct smc_pnetentry *pnetelem; 475 struct smc_ib_device *ibdev; 476 struct smcd_dev *smcd_dev; 477 struct smc_net *sn; 478 int idx = 0; 479 int ibport; 480 481 /* get pnettable for namespace */ 482 sn = net_generic(net, smc_net_id); 483 pnettable = &sn->pnettable; 484 485 /* dump netdevices */ 486 read_lock(&pnettable->lock); 487 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 488 if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) 489 continue; 490 if (idx++ < start_idx) 491 continue; 492 memset(&tmp_entry, 0, sizeof(tmp_entry)); 493 memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name, 494 SMC_MAX_PNETID_LEN); 495 tmp_entry.ndev = pnetelem->ndev; 496 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 497 &tmp_entry)) { 498 --idx; 499 break; 500 } 501 } 502 read_unlock(&pnettable->lock); 503 504 /* if this is not the initial namespace, stop here */ 505 if (net != &init_net) 506 return idx; 507 508 /* dump ib devices */ 509 spin_lock(&smc_ib_devices.lock); 510 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 511 for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { 512 if (ibdev->pnetid_by_user[ibport]) { 513 if (pnetid && 514 !smc_pnet_match(ibdev->pnetid[ibport], 515 pnetid)) 516 continue; 517 if (idx++ < start_idx) 518 continue; 519 memset(&tmp_entry, 0, sizeof(tmp_entry)); 520 memcpy(&tmp_entry.pnet_name, 521 ibdev->pnetid[ibport], 522 SMC_MAX_PNETID_LEN); 523 tmp_entry.smcibdev = ibdev; 524 tmp_entry.ib_port = ibport + 1; 525 if (smc_pnet_dumpinfo(skb, portid, seq, 526 NLM_F_MULTI, 527 &tmp_entry)) { 528 --idx; 529 break; 530 } 531 } 532 } 533 } 534 spin_unlock(&smc_ib_devices.lock); 535 536 /* dump smcd devices */ 537 spin_lock(&smcd_dev_list.lock); 538 list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { 539 if (smcd_dev->pnetid_by_user) { 540 if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid)) 541 continue; 542 if (idx++ < start_idx) 543 continue; 544 memset(&tmp_entry, 0, sizeof(tmp_entry)); 545 memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid, 546 SMC_MAX_PNETID_LEN); 547 tmp_entry.smcd_dev = smcd_dev; 548 if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, 549 &tmp_entry)) { 550 --idx; 551 break; 552 } 553 } 554 } 555 spin_unlock(&smcd_dev_list.lock); 556 557 return idx; 558 } 559 560 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) 561 { 562 struct net *net = sock_net(skb->sk); 563 int idx; 564 565 idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid, 566 cb->nlh->nlmsg_seq, NULL, cb->args[0]); 567 568 cb->args[0] = idx; 569 return skb->len; 570 } 571 572 /* Retrieve one PNETID entry */ 573 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) 574 { 575 struct net *net = genl_info_net(info); 576 struct sk_buff *msg; 577 void *hdr; 578 579 if (!info->attrs[SMC_PNETID_NAME]) 580 return -EINVAL; 581 582 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 583 if (!msg) 584 return -ENOMEM; 585 586 _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq, 587 nla_data(info->attrs[SMC_PNETID_NAME]), 0); 588 589 /* finish multi part message and send it */ 590 hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0, 591 NLM_F_MULTI); 592 if (!hdr) { 593 nlmsg_free(msg); 594 return -EMSGSIZE; 595 } 596 return genlmsg_reply(msg, info); 597 } 598 599 /* Remove and delete all pnetids from pnet table. 600 */ 601 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) 602 { 603 struct net *net = genl_info_net(info); 604 605 smc_pnet_remove_by_pnetid(net, NULL); 606 return 0; 607 } 608 609 /* SMC_PNETID generic netlink operation definition */ 610 static const struct genl_ops smc_pnet_ops[] = { 611 { 612 .cmd = SMC_PNETID_GET, 613 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 614 /* can be retrieved by unprivileged users */ 615 .doit = smc_pnet_get, 616 .dumpit = smc_pnet_dump, 617 .start = smc_pnet_dump_start 618 }, 619 { 620 .cmd = SMC_PNETID_ADD, 621 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 622 .flags = GENL_ADMIN_PERM, 623 .doit = smc_pnet_add 624 }, 625 { 626 .cmd = SMC_PNETID_DEL, 627 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 628 .flags = GENL_ADMIN_PERM, 629 .doit = smc_pnet_del 630 }, 631 { 632 .cmd = SMC_PNETID_FLUSH, 633 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 634 .flags = GENL_ADMIN_PERM, 635 .doit = smc_pnet_flush 636 } 637 }; 638 639 /* SMC_PNETID family definition */ 640 static struct genl_family smc_pnet_nl_family __ro_after_init = { 641 .hdrsize = 0, 642 .name = SMCR_GENL_FAMILY_NAME, 643 .version = SMCR_GENL_FAMILY_VERSION, 644 .maxattr = SMC_PNETID_MAX, 645 .policy = smc_pnet_policy, 646 .netnsok = true, 647 .module = THIS_MODULE, 648 .ops = smc_pnet_ops, 649 .n_ops = ARRAY_SIZE(smc_pnet_ops) 650 }; 651 652 static int smc_pnet_netdev_event(struct notifier_block *this, 653 unsigned long event, void *ptr) 654 { 655 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 656 657 switch (event) { 658 case NETDEV_REBOOT: 659 case NETDEV_UNREGISTER: 660 smc_pnet_remove_by_ndev(event_dev); 661 return NOTIFY_OK; 662 default: 663 return NOTIFY_DONE; 664 } 665 } 666 667 static struct notifier_block smc_netdev_notifier = { 668 .notifier_call = smc_pnet_netdev_event 669 }; 670 671 /* init network namespace */ 672 int smc_pnet_net_init(struct net *net) 673 { 674 struct smc_net *sn = net_generic(net, smc_net_id); 675 struct smc_pnettable *pnettable = &sn->pnettable; 676 677 INIT_LIST_HEAD(&pnettable->pnetlist); 678 rwlock_init(&pnettable->lock); 679 680 return 0; 681 } 682 683 int __init smc_pnet_init(void) 684 { 685 int rc; 686 687 rc = genl_register_family(&smc_pnet_nl_family); 688 if (rc) 689 return rc; 690 rc = register_netdevice_notifier(&smc_netdev_notifier); 691 if (rc) 692 genl_unregister_family(&smc_pnet_nl_family); 693 return rc; 694 } 695 696 /* exit network namespace */ 697 void smc_pnet_net_exit(struct net *net) 698 { 699 /* flush pnet table */ 700 smc_pnet_remove_by_pnetid(net, NULL); 701 } 702 703 void smc_pnet_exit(void) 704 { 705 unregister_netdevice_notifier(&smc_netdev_notifier); 706 genl_unregister_family(&smc_pnet_nl_family); 707 } 708 709 /* Determine one base device for stacked net devices. 710 * If the lower device level contains more than one devices 711 * (for instance with bonding slaves), just the first device 712 * is used to reach a base device. 713 */ 714 static struct net_device *pnet_find_base_ndev(struct net_device *ndev) 715 { 716 int i, nest_lvl; 717 718 rtnl_lock(); 719 nest_lvl = ndev->lower_level; 720 for (i = 0; i < nest_lvl; i++) { 721 struct list_head *lower = &ndev->adj_list.lower; 722 723 if (list_empty(lower)) 724 break; 725 lower = lower->next; 726 ndev = netdev_lower_get_next(ndev, &lower); 727 } 728 rtnl_unlock(); 729 return ndev; 730 } 731 732 static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, 733 u8 *pnetid) 734 { 735 struct smc_pnettable *pnettable; 736 struct net *net = dev_net(ndev); 737 struct smc_pnetentry *pnetelem; 738 struct smc_net *sn; 739 int rc = -ENOENT; 740 741 /* get pnettable for namespace */ 742 sn = net_generic(net, smc_net_id); 743 pnettable = &sn->pnettable; 744 745 read_lock(&pnettable->lock); 746 list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { 747 if (ndev == pnetelem->ndev) { 748 /* get pnetid of netdev device */ 749 memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); 750 rc = 0; 751 break; 752 } 753 } 754 read_unlock(&pnettable->lock); 755 return rc; 756 } 757 758 /* if handshake network device belongs to a roce device, return its 759 * IB device and port 760 */ 761 static void smc_pnet_find_rdma_dev(struct net_device *netdev, 762 struct smc_init_info *ini) 763 { 764 struct smc_ib_device *ibdev; 765 766 spin_lock(&smc_ib_devices.lock); 767 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 768 struct net_device *ndev; 769 int i; 770 771 for (i = 1; i <= SMC_MAX_PORTS; i++) { 772 if (!rdma_is_port_valid(ibdev->ibdev, i)) 773 continue; 774 if (!ibdev->ibdev->ops.get_netdev) 775 continue; 776 ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); 777 if (!ndev) 778 continue; 779 dev_put(ndev); 780 if (netdev == ndev && 781 smc_ib_port_active(ibdev, i) && 782 !test_bit(i - 1, ibdev->ports_going_away) && 783 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 784 ini->ib_gid, NULL)) { 785 ini->ib_dev = ibdev; 786 ini->ib_port = i; 787 break; 788 } 789 } 790 } 791 spin_unlock(&smc_ib_devices.lock); 792 } 793 794 /* Determine the corresponding IB device port based on the hardware PNETID. 795 * Searching stops at the first matching active IB device port with vlan_id 796 * configured. 797 * If nothing found, check pnetid table. 798 * If nothing found, try to use handshake device 799 */ 800 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, 801 struct smc_init_info *ini) 802 { 803 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 804 struct smc_ib_device *ibdev; 805 int i; 806 807 ndev = pnet_find_base_ndev(ndev); 808 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 809 ndev_pnetid) && 810 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { 811 smc_pnet_find_rdma_dev(ndev, ini); 812 return; /* pnetid could not be determined */ 813 } 814 815 spin_lock(&smc_ib_devices.lock); 816 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 817 for (i = 1; i <= SMC_MAX_PORTS; i++) { 818 if (!rdma_is_port_valid(ibdev->ibdev, i)) 819 continue; 820 if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) && 821 smc_ib_port_active(ibdev, i) && 822 !test_bit(i - 1, ibdev->ports_going_away) && 823 !smc_ib_determine_gid(ibdev, i, ini->vlan_id, 824 ini->ib_gid, NULL)) { 825 ini->ib_dev = ibdev; 826 ini->ib_port = i; 827 goto out; 828 } 829 } 830 } 831 out: 832 spin_unlock(&smc_ib_devices.lock); 833 } 834 835 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, 836 struct smc_init_info *ini) 837 { 838 u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; 839 struct smcd_dev *ismdev; 840 841 ndev = pnet_find_base_ndev(ndev); 842 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, 843 ndev_pnetid) && 844 smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) 845 return; /* pnetid could not be determined */ 846 847 spin_lock(&smcd_dev_list.lock); 848 list_for_each_entry(ismdev, &smcd_dev_list.list, list) { 849 if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && 850 !ismdev->going_away) { 851 ini->ism_dev = ismdev; 852 break; 853 } 854 } 855 spin_unlock(&smcd_dev_list.lock); 856 } 857 858 /* PNET table analysis for a given sock: 859 * determine ib_device and port belonging to used internal TCP socket 860 * ethernet interface. 861 */ 862 void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini) 863 { 864 struct dst_entry *dst = sk_dst_get(sk); 865 866 ini->ib_dev = NULL; 867 ini->ib_port = 0; 868 if (!dst) 869 goto out; 870 if (!dst->dev) 871 goto out_rel; 872 873 smc_pnet_find_roce_by_pnetid(dst->dev, ini); 874 875 out_rel: 876 dst_release(dst); 877 out: 878 return; 879 } 880 881 void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini) 882 { 883 struct dst_entry *dst = sk_dst_get(sk); 884 885 ini->ism_dev = NULL; 886 if (!dst) 887 goto out; 888 if (!dst->dev) 889 goto out_rel; 890 891 smc_pnet_find_ism_by_pnetid(dst->dev, ini); 892 893 out_rel: 894 dst_release(dst); 895 out: 896 return; 897 } 898