1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Generic netlink support functions to configure an SMC-R PNET table 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/module.h> 13 #include <linux/list.h> 14 #include <linux/ctype.h> 15 #include <net/netlink.h> 16 #include <net/genetlink.h> 17 18 #include <uapi/linux/if.h> 19 #include <uapi/linux/smc.h> 20 21 #include <rdma/ib_verbs.h> 22 23 #include "smc_pnet.h" 24 #include "smc_ib.h" 25 26 #define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */ 27 28 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 29 [SMC_PNETID_NAME] = { 30 .type = NLA_NUL_STRING, 31 .len = SMC_MAX_PNET_ID_LEN - 1 32 }, 33 [SMC_PNETID_ETHNAME] = { 34 .type = NLA_NUL_STRING, 35 .len = IFNAMSIZ - 1 36 }, 37 [SMC_PNETID_IBNAME] = { 38 .type = NLA_NUL_STRING, 39 .len = IB_DEVICE_NAME_MAX - 1 40 }, 41 [SMC_PNETID_IBPORT] = { .type = NLA_U8 } 42 }; 43 44 static struct genl_family smc_pnet_nl_family; 45 46 /** 47 * struct smc_pnettable - SMC PNET table anchor 48 * @lock: Lock for list action 49 * @pnetlist: List of PNETIDs 50 */ 51 static struct smc_pnettable { 52 rwlock_t lock; 53 struct list_head pnetlist; 54 } smc_pnettable = { 55 .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist), 56 .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock) 57 }; 58 59 /** 60 * struct smc_pnetentry - pnet identifier name entry 61 * @list: List node. 62 * @pnet_name: Pnet identifier name 63 * @ndev: pointer to network device. 64 * @smcibdev: Pointer to IB device. 65 */ 66 struct smc_pnetentry { 67 struct list_head list; 68 char pnet_name[SMC_MAX_PNET_ID_LEN + 1]; 69 struct net_device *ndev; 70 struct smc_ib_device *smcibdev; 71 u8 ib_port; 72 }; 73 74 /* Check if two RDMA device entries are identical. Use device name and port 75 * number for comparison. 76 */ 77 static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname, 78 u8 ibport) 79 { 80 return pnetelem->ib_port == ibport && 81 !strncmp(pnetelem->smcibdev->ibdev->name, ibname, 82 sizeof(pnetelem->smcibdev->ibdev->name)); 83 } 84 85 /* Find a pnetid in the pnet table. 86 */ 87 static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name) 88 { 89 struct smc_pnetentry *pnetelem, *found_pnetelem = NULL; 90 91 read_lock(&smc_pnettable.lock); 92 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 93 if (!strncmp(pnetelem->pnet_name, pnet_name, 94 sizeof(pnetelem->pnet_name))) { 95 found_pnetelem = pnetelem; 96 break; 97 } 98 } 99 read_unlock(&smc_pnettable.lock); 100 return found_pnetelem; 101 } 102 103 /* Remove a pnetid from the pnet table. 104 */ 105 static int smc_pnet_remove_by_pnetid(char *pnet_name) 106 { 107 struct smc_pnetentry *pnetelem, *tmp_pe; 108 int rc = -ENOENT; 109 110 write_lock(&smc_pnettable.lock); 111 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 112 list) { 113 if (!strncmp(pnetelem->pnet_name, pnet_name, 114 sizeof(pnetelem->pnet_name))) { 115 list_del(&pnetelem->list); 116 dev_put(pnetelem->ndev); 117 kfree(pnetelem); 118 rc = 0; 119 break; 120 } 121 } 122 write_unlock(&smc_pnettable.lock); 123 return rc; 124 } 125 126 /* Remove a pnet entry mentioning a given network device from the pnet table. 127 */ 128 static int smc_pnet_remove_by_ndev(struct net_device *ndev) 129 { 130 struct smc_pnetentry *pnetelem, *tmp_pe; 131 int rc = -ENOENT; 132 133 write_lock(&smc_pnettable.lock); 134 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 135 list) { 136 if (pnetelem->ndev == ndev) { 137 list_del(&pnetelem->list); 138 dev_put(pnetelem->ndev); 139 kfree(pnetelem); 140 rc = 0; 141 break; 142 } 143 } 144 write_unlock(&smc_pnettable.lock); 145 return rc; 146 } 147 148 /* Remove a pnet entry mentioning a given ib device from the pnet table. 149 */ 150 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev) 151 { 152 struct smc_pnetentry *pnetelem, *tmp_pe; 153 int rc = -ENOENT; 154 155 write_lock(&smc_pnettable.lock); 156 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 157 list) { 158 if (pnetelem->smcibdev == ibdev) { 159 list_del(&pnetelem->list); 160 dev_put(pnetelem->ndev); 161 kfree(pnetelem); 162 rc = 0; 163 break; 164 } 165 } 166 write_unlock(&smc_pnettable.lock); 167 return rc; 168 } 169 170 /* Append a pnetid to the end of the pnet table if not already on this list. 171 */ 172 static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem) 173 { 174 struct smc_pnetentry *pnetelem; 175 int rc = -EEXIST; 176 177 write_lock(&smc_pnettable.lock); 178 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 179 if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name, 180 sizeof(new_pnetelem->pnet_name)) || 181 !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name, 182 sizeof(new_pnetelem->ndev->name)) || 183 smc_pnet_same_ibname(pnetelem, 184 new_pnetelem->smcibdev->ibdev->name, 185 new_pnetelem->ib_port)) { 186 dev_put(pnetelem->ndev); 187 goto found; 188 } 189 } 190 list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist); 191 rc = 0; 192 found: 193 write_unlock(&smc_pnettable.lock); 194 return rc; 195 } 196 197 /* The limit for pnetid is 16 characters. 198 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. 199 * Lower case letters are converted to upper case. 200 * Interior blanks should not be used. 201 */ 202 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) 203 { 204 char *bf = skip_spaces(pnet_name); 205 size_t len = strlen(bf); 206 char *end = bf + len; 207 208 if (!len) 209 return false; 210 while (--end >= bf && isspace(*end)) 211 ; 212 if (end - bf >= SMC_MAX_PNET_ID_LEN) 213 return false; 214 while (bf <= end) { 215 if (!isalnum(*bf)) 216 return false; 217 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; 218 bf++; 219 } 220 *pnetid = '\0'; 221 return true; 222 } 223 224 /* Find an infiniband device by a given name. The device might not exist. */ 225 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) 226 { 227 struct smc_ib_device *ibdev; 228 229 spin_lock(&smc_ib_devices.lock); 230 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 231 if (!strncmp(ibdev->ibdev->name, ib_name, 232 sizeof(ibdev->ibdev->name))) { 233 goto out; 234 } 235 } 236 ibdev = NULL; 237 out: 238 spin_unlock(&smc_ib_devices.lock); 239 return ibdev; 240 } 241 242 /* Parse the supplied netlink attributes and fill a pnetentry structure. 243 * For ethernet and infiniband device names verify that the devices exist. 244 */ 245 static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, 246 struct nlattr *tb[]) 247 { 248 char *string, *ibname; 249 int rc; 250 251 memset(pnetelem, 0, sizeof(*pnetelem)); 252 INIT_LIST_HEAD(&pnetelem->list); 253 254 rc = -EINVAL; 255 if (!tb[SMC_PNETID_NAME]) 256 goto error; 257 string = (char *)nla_data(tb[SMC_PNETID_NAME]); 258 if (!smc_pnetid_valid(string, pnetelem->pnet_name)) 259 goto error; 260 261 rc = -EINVAL; 262 if (!tb[SMC_PNETID_ETHNAME]) 263 goto error; 264 rc = -ENOENT; 265 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); 266 pnetelem->ndev = dev_get_by_name(net, string); 267 if (!pnetelem->ndev) 268 goto error; 269 270 rc = -EINVAL; 271 if (!tb[SMC_PNETID_IBNAME]) 272 goto error; 273 rc = -ENOENT; 274 ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); 275 ibname = strim(ibname); 276 pnetelem->smcibdev = smc_pnet_find_ib(ibname); 277 if (!pnetelem->smcibdev) 278 goto error; 279 280 rc = -EINVAL; 281 if (!tb[SMC_PNETID_IBPORT]) 282 goto error; 283 pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); 284 if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS) 285 goto error; 286 287 return 0; 288 289 error: 290 if (pnetelem->ndev) 291 dev_put(pnetelem->ndev); 292 return rc; 293 } 294 295 /* Convert an smc_pnetentry to a netlink attribute sequence */ 296 static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem) 297 { 298 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) || 299 nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) || 300 nla_put_string(msg, SMC_PNETID_IBNAME, 301 pnetelem->smcibdev->ibdev->name) || 302 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) 303 return -1; 304 return 0; 305 } 306 307 /* Retrieve one PNETID entry */ 308 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) 309 { 310 struct smc_pnetentry *pnetelem; 311 struct sk_buff *msg; 312 void *hdr; 313 int rc; 314 315 if (!info->attrs[SMC_PNETID_NAME]) 316 return -EINVAL; 317 pnetelem = smc_pnet_find_pnetid( 318 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 319 if (!pnetelem) 320 return -ENOENT; 321 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 322 if (!msg) 323 return -ENOMEM; 324 325 hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, 326 &smc_pnet_nl_family, 0, SMC_PNETID_GET); 327 if (!hdr) { 328 rc = -EMSGSIZE; 329 goto err_out; 330 } 331 332 if (smc_pnet_set_nla(msg, pnetelem)) { 333 rc = -ENOBUFS; 334 goto err_out; 335 } 336 337 genlmsg_end(msg, hdr); 338 return genlmsg_reply(msg, info); 339 340 err_out: 341 nlmsg_free(msg); 342 return rc; 343 } 344 345 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) 346 { 347 struct net *net = genl_info_net(info); 348 struct smc_pnetentry *pnetelem; 349 int rc; 350 351 pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); 352 if (!pnetelem) 353 return -ENOMEM; 354 rc = smc_pnet_fill_entry(net, pnetelem, info->attrs); 355 if (!rc) 356 rc = smc_pnet_enter(pnetelem); 357 if (rc) { 358 kfree(pnetelem); 359 return rc; 360 } 361 rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port); 362 if (rc) 363 smc_pnet_remove_by_pnetid(pnetelem->pnet_name); 364 return rc; 365 } 366 367 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) 368 { 369 if (!info->attrs[SMC_PNETID_NAME]) 370 return -EINVAL; 371 return smc_pnet_remove_by_pnetid( 372 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 373 } 374 375 static int smc_pnet_dump_start(struct netlink_callback *cb) 376 { 377 cb->args[0] = 0; 378 return 0; 379 } 380 381 static int smc_pnet_dumpinfo(struct sk_buff *skb, 382 u32 portid, u32 seq, u32 flags, 383 struct smc_pnetentry *pnetelem) 384 { 385 void *hdr; 386 387 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, 388 flags, SMC_PNETID_GET); 389 if (!hdr) 390 return -ENOMEM; 391 if (smc_pnet_set_nla(skb, pnetelem) < 0) { 392 genlmsg_cancel(skb, hdr); 393 return -EMSGSIZE; 394 } 395 genlmsg_end(skb, hdr); 396 return 0; 397 } 398 399 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) 400 { 401 struct smc_pnetentry *pnetelem; 402 int idx = 0; 403 404 read_lock(&smc_pnettable.lock); 405 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 406 if (idx++ < cb->args[0]) 407 continue; 408 if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid, 409 cb->nlh->nlmsg_seq, NLM_F_MULTI, 410 pnetelem)) { 411 --idx; 412 break; 413 } 414 } 415 cb->args[0] = idx; 416 read_unlock(&smc_pnettable.lock); 417 return skb->len; 418 } 419 420 /* Remove and delete all pnetids from pnet table. 421 */ 422 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) 423 { 424 struct smc_pnetentry *pnetelem, *tmp_pe; 425 426 write_lock(&smc_pnettable.lock); 427 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 428 list) { 429 list_del(&pnetelem->list); 430 dev_put(pnetelem->ndev); 431 kfree(pnetelem); 432 } 433 write_unlock(&smc_pnettable.lock); 434 return 0; 435 } 436 437 /* SMC_PNETID generic netlink operation definition */ 438 static const struct genl_ops smc_pnet_ops[] = { 439 { 440 .cmd = SMC_PNETID_GET, 441 .flags = GENL_ADMIN_PERM, 442 .policy = smc_pnet_policy, 443 .doit = smc_pnet_get, 444 .dumpit = smc_pnet_dump, 445 .start = smc_pnet_dump_start 446 }, 447 { 448 .cmd = SMC_PNETID_ADD, 449 .flags = GENL_ADMIN_PERM, 450 .policy = smc_pnet_policy, 451 .doit = smc_pnet_add 452 }, 453 { 454 .cmd = SMC_PNETID_DEL, 455 .flags = GENL_ADMIN_PERM, 456 .policy = smc_pnet_policy, 457 .doit = smc_pnet_del 458 }, 459 { 460 .cmd = SMC_PNETID_FLUSH, 461 .flags = GENL_ADMIN_PERM, 462 .policy = smc_pnet_policy, 463 .doit = smc_pnet_flush 464 } 465 }; 466 467 /* SMC_PNETID family definition */ 468 static struct genl_family smc_pnet_nl_family = { 469 .hdrsize = 0, 470 .name = SMCR_GENL_FAMILY_NAME, 471 .version = SMCR_GENL_FAMILY_VERSION, 472 .maxattr = SMC_PNETID_MAX, 473 .netnsok = true, 474 .module = THIS_MODULE, 475 .ops = smc_pnet_ops, 476 .n_ops = ARRAY_SIZE(smc_pnet_ops) 477 }; 478 479 static int smc_pnet_netdev_event(struct notifier_block *this, 480 unsigned long event, void *ptr) 481 { 482 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 483 484 switch (event) { 485 case NETDEV_REBOOT: 486 case NETDEV_UNREGISTER: 487 smc_pnet_remove_by_ndev(event_dev); 488 default: 489 break; 490 } 491 return NOTIFY_DONE; 492 } 493 494 static struct notifier_block smc_netdev_notifier = { 495 .notifier_call = smc_pnet_netdev_event 496 }; 497 498 int __init smc_pnet_init(void) 499 { 500 int rc; 501 502 rc = genl_register_family(&smc_pnet_nl_family); 503 if (rc) 504 return rc; 505 rc = register_netdevice_notifier(&smc_netdev_notifier); 506 if (rc) 507 genl_unregister_family(&smc_pnet_nl_family); 508 return rc; 509 } 510 511 void smc_pnet_exit(void) 512 { 513 smc_pnet_flush(NULL, NULL); 514 unregister_netdevice_notifier(&smc_netdev_notifier); 515 genl_unregister_family(&smc_pnet_nl_family); 516 } 517 518 /* PNET table analysis for a given sock: 519 * determine ib_device and port belonging to used internal TCP socket 520 * ethernet interface. 521 */ 522 void smc_pnet_find_roce_resource(struct sock *sk, 523 struct smc_ib_device **smcibdev, u8 *ibport) 524 { 525 struct dst_entry *dst = sk_dst_get(sk); 526 struct smc_pnetentry *pnetelem; 527 528 *smcibdev = NULL; 529 *ibport = 0; 530 531 if (!dst) 532 return; 533 if (!dst->dev) 534 goto out_rel; 535 read_lock(&smc_pnettable.lock); 536 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 537 if (dst->dev == pnetelem->ndev) { 538 if (smc_ib_port_active(pnetelem->smcibdev, 539 pnetelem->ib_port)) { 540 *smcibdev = pnetelem->smcibdev; 541 *ibport = pnetelem->ib_port; 542 } 543 break; 544 } 545 } 546 read_unlock(&smc_pnettable.lock); 547 out_rel: 548 dst_release(dst); 549 } 550