1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * Generic netlink support functions to configure an SMC-R PNET table 5 * 6 * Copyright IBM Corp. 2016 7 * 8 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 9 */ 10 11 #include <linux/module.h> 12 #include <linux/list.h> 13 #include <linux/ctype.h> 14 #include <net/netlink.h> 15 #include <net/genetlink.h> 16 17 #include <uapi/linux/if.h> 18 #include <uapi/linux/smc.h> 19 20 #include <rdma/ib_verbs.h> 21 22 #include "smc_pnet.h" 23 #include "smc_ib.h" 24 25 #define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */ 26 27 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 28 [SMC_PNETID_NAME] = { 29 .type = NLA_NUL_STRING, 30 .len = SMC_MAX_PNET_ID_LEN - 1 31 }, 32 [SMC_PNETID_ETHNAME] = { 33 .type = NLA_NUL_STRING, 34 .len = IFNAMSIZ - 1 35 }, 36 [SMC_PNETID_IBNAME] = { 37 .type = NLA_NUL_STRING, 38 .len = IB_DEVICE_NAME_MAX - 1 39 }, 40 [SMC_PNETID_IBPORT] = { .type = NLA_U8 } 41 }; 42 43 static struct genl_family smc_pnet_nl_family; 44 45 /** 46 * struct smc_pnettable - SMC PNET table anchor 47 * @lock: Lock for list action 48 * @pnetlist: List of PNETIDs 49 */ 50 static struct smc_pnettable { 51 rwlock_t lock; 52 struct list_head pnetlist; 53 } smc_pnettable = { 54 .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist), 55 .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock) 56 }; 57 58 /** 59 * struct smc_pnetentry - pnet identifier name entry 60 * @list: List node. 61 * @pnet_name: Pnet identifier name 62 * @ndev: pointer to network device. 63 * @smcibdev: Pointer to IB device. 64 */ 65 struct smc_pnetentry { 66 struct list_head list; 67 char pnet_name[SMC_MAX_PNET_ID_LEN + 1]; 68 struct net_device *ndev; 69 struct smc_ib_device *smcibdev; 70 u8 ib_port; 71 }; 72 73 /* Check if two RDMA device entries are identical. Use device name and port 74 * number for comparison. 75 */ 76 static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname, 77 u8 ibport) 78 { 79 return pnetelem->ib_port == ibport && 80 !strncmp(pnetelem->smcibdev->ibdev->name, ibname, 81 sizeof(pnetelem->smcibdev->ibdev->name)); 82 } 83 84 /* Find a pnetid in the pnet table. 85 */ 86 static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name) 87 { 88 struct smc_pnetentry *pnetelem, *found_pnetelem = NULL; 89 90 read_lock(&smc_pnettable.lock); 91 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 92 if (!strncmp(pnetelem->pnet_name, pnet_name, 93 sizeof(pnetelem->pnet_name))) { 94 found_pnetelem = pnetelem; 95 break; 96 } 97 } 98 read_unlock(&smc_pnettable.lock); 99 return found_pnetelem; 100 } 101 102 /* Remove a pnetid from the pnet table. 103 */ 104 static int smc_pnet_remove_by_pnetid(char *pnet_name) 105 { 106 struct smc_pnetentry *pnetelem, *tmp_pe; 107 int rc = -ENOENT; 108 109 write_lock(&smc_pnettable.lock); 110 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 111 list) { 112 if (!strncmp(pnetelem->pnet_name, pnet_name, 113 sizeof(pnetelem->pnet_name))) { 114 list_del(&pnetelem->list); 115 dev_put(pnetelem->ndev); 116 kfree(pnetelem); 117 rc = 0; 118 break; 119 } 120 } 121 write_unlock(&smc_pnettable.lock); 122 return rc; 123 } 124 125 /* Remove a pnet entry mentioning a given network device from the pnet table. 126 */ 127 static int smc_pnet_remove_by_ndev(struct net_device *ndev) 128 { 129 struct smc_pnetentry *pnetelem, *tmp_pe; 130 int rc = -ENOENT; 131 132 write_lock(&smc_pnettable.lock); 133 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 134 list) { 135 if (pnetelem->ndev == ndev) { 136 list_del(&pnetelem->list); 137 dev_put(pnetelem->ndev); 138 kfree(pnetelem); 139 rc = 0; 140 break; 141 } 142 } 143 write_unlock(&smc_pnettable.lock); 144 return rc; 145 } 146 147 /* Remove a pnet entry mentioning a given ib device from the pnet table. 148 */ 149 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev) 150 { 151 struct smc_pnetentry *pnetelem, *tmp_pe; 152 int rc = -ENOENT; 153 154 write_lock(&smc_pnettable.lock); 155 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 156 list) { 157 if (pnetelem->smcibdev == ibdev) { 158 list_del(&pnetelem->list); 159 dev_put(pnetelem->ndev); 160 kfree(pnetelem); 161 rc = 0; 162 break; 163 } 164 } 165 write_unlock(&smc_pnettable.lock); 166 return rc; 167 } 168 169 /* Append a pnetid to the end of the pnet table if not already on this list. 170 */ 171 static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem) 172 { 173 struct smc_pnetentry *pnetelem; 174 int rc = -EEXIST; 175 176 write_lock(&smc_pnettable.lock); 177 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 178 if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name, 179 sizeof(new_pnetelem->pnet_name)) || 180 !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name, 181 sizeof(new_pnetelem->ndev->name)) || 182 smc_pnet_same_ibname(pnetelem, 183 new_pnetelem->smcibdev->ibdev->name, 184 new_pnetelem->ib_port)) 185 goto found; 186 } 187 list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist); 188 rc = 0; 189 found: 190 write_unlock(&smc_pnettable.lock); 191 return rc; 192 } 193 194 /* The limit for pnetid is 16 characters. 195 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. 196 * Lower case letters are converted to upper case. 197 * Interior blanks should not be used. 198 */ 199 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) 200 { 201 char *bf = skip_spaces(pnet_name); 202 size_t len = strlen(bf); 203 char *end = bf + len; 204 205 if (!len) 206 return false; 207 while (--end >= bf && isspace(*end)) 208 ; 209 if (end - bf >= SMC_MAX_PNET_ID_LEN) 210 return false; 211 while (bf <= end) { 212 if (!isalnum(*bf)) 213 return false; 214 *pnetid++ = islower(*bf) ? toupper(*bf) : *bf; 215 bf++; 216 } 217 *pnetid = '\0'; 218 return true; 219 } 220 221 /* Find an infiniband device by a given name. The device might not exist. */ 222 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name) 223 { 224 struct smc_ib_device *ibdev; 225 226 spin_lock(&smc_ib_devices.lock); 227 list_for_each_entry(ibdev, &smc_ib_devices.list, list) { 228 if (!strncmp(ibdev->ibdev->name, ib_name, 229 sizeof(ibdev->ibdev->name))) { 230 goto out; 231 } 232 } 233 ibdev = NULL; 234 out: 235 spin_unlock(&smc_ib_devices.lock); 236 return ibdev; 237 } 238 239 /* Parse the supplied netlink attributes and fill a pnetentry structure. 240 * For ethernet and infiniband device names verify that the devices exist. 241 */ 242 static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, 243 struct nlattr *tb[]) 244 { 245 char *string, *ibname = NULL; 246 int rc = 0; 247 248 memset(pnetelem, 0, sizeof(*pnetelem)); 249 INIT_LIST_HEAD(&pnetelem->list); 250 if (tb[SMC_PNETID_NAME]) { 251 string = (char *)nla_data(tb[SMC_PNETID_NAME]); 252 if (!smc_pnetid_valid(string, pnetelem->pnet_name)) { 253 rc = -EINVAL; 254 goto error; 255 } 256 } 257 if (tb[SMC_PNETID_ETHNAME]) { 258 string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); 259 pnetelem->ndev = dev_get_by_name(net, string); 260 if (!pnetelem->ndev) 261 return -ENOENT; 262 } 263 if (tb[SMC_PNETID_IBNAME]) { 264 ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); 265 ibname = strim(ibname); 266 pnetelem->smcibdev = smc_pnet_find_ib(ibname); 267 if (!pnetelem->smcibdev) { 268 rc = -ENOENT; 269 goto error; 270 } 271 } 272 if (tb[SMC_PNETID_IBPORT]) { 273 pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); 274 if (pnetelem->ib_port > SMC_MAX_PORTS) { 275 rc = -EINVAL; 276 goto error; 277 } 278 } 279 return 0; 280 281 error: 282 if (pnetelem->ndev) 283 dev_put(pnetelem->ndev); 284 return rc; 285 } 286 287 /* Convert an smc_pnetentry to a netlink attribute sequence */ 288 static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem) 289 { 290 if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) || 291 nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) || 292 nla_put_string(msg, SMC_PNETID_IBNAME, 293 pnetelem->smcibdev->ibdev->name) || 294 nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) 295 return -1; 296 return 0; 297 } 298 299 /* Retrieve one PNETID entry */ 300 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) 301 { 302 struct smc_pnetentry *pnetelem; 303 struct sk_buff *msg; 304 void *hdr; 305 int rc; 306 307 pnetelem = smc_pnet_find_pnetid( 308 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 309 if (!pnetelem) 310 return -ENOENT; 311 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 312 if (!msg) 313 return -ENOMEM; 314 315 hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, 316 &smc_pnet_nl_family, 0, SMC_PNETID_GET); 317 if (!hdr) { 318 rc = -EMSGSIZE; 319 goto err_out; 320 } 321 322 if (smc_pnet_set_nla(msg, pnetelem)) { 323 rc = -ENOBUFS; 324 goto err_out; 325 } 326 327 genlmsg_end(msg, hdr); 328 return genlmsg_reply(msg, info); 329 330 err_out: 331 nlmsg_free(msg); 332 return rc; 333 } 334 335 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) 336 { 337 struct net *net = genl_info_net(info); 338 struct smc_pnetentry *pnetelem; 339 int rc; 340 341 pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); 342 if (!pnetelem) 343 return -ENOMEM; 344 rc = smc_pnet_fill_entry(net, pnetelem, info->attrs); 345 if (!rc) 346 rc = smc_pnet_enter(pnetelem); 347 if (rc) { 348 kfree(pnetelem); 349 return rc; 350 } 351 rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port); 352 if (rc) 353 smc_pnet_remove_by_pnetid(pnetelem->pnet_name); 354 return rc; 355 } 356 357 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) 358 { 359 return smc_pnet_remove_by_pnetid( 360 (char *)nla_data(info->attrs[SMC_PNETID_NAME])); 361 } 362 363 static int smc_pnet_dump_start(struct netlink_callback *cb) 364 { 365 cb->args[0] = 0; 366 return 0; 367 } 368 369 static int smc_pnet_dumpinfo(struct sk_buff *skb, 370 u32 portid, u32 seq, u32 flags, 371 struct smc_pnetentry *pnetelem) 372 { 373 void *hdr; 374 375 hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family, 376 flags, SMC_PNETID_GET); 377 if (!hdr) 378 return -ENOMEM; 379 if (smc_pnet_set_nla(skb, pnetelem) < 0) { 380 genlmsg_cancel(skb, hdr); 381 return -EMSGSIZE; 382 } 383 genlmsg_end(skb, hdr); 384 return 0; 385 } 386 387 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb) 388 { 389 struct smc_pnetentry *pnetelem; 390 int idx = 0; 391 392 read_lock(&smc_pnettable.lock); 393 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 394 if (idx++ < cb->args[0]) 395 continue; 396 if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid, 397 cb->nlh->nlmsg_seq, NLM_F_MULTI, 398 pnetelem)) { 399 --idx; 400 break; 401 } 402 } 403 cb->args[0] = idx; 404 read_unlock(&smc_pnettable.lock); 405 return skb->len; 406 } 407 408 /* Remove and delete all pnetids from pnet table. 409 */ 410 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info) 411 { 412 struct smc_pnetentry *pnetelem, *tmp_pe; 413 414 write_lock(&smc_pnettable.lock); 415 list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist, 416 list) { 417 list_del(&pnetelem->list); 418 dev_put(pnetelem->ndev); 419 kfree(pnetelem); 420 } 421 write_unlock(&smc_pnettable.lock); 422 return 0; 423 } 424 425 /* SMC_PNETID generic netlink operation definition */ 426 static const struct genl_ops smc_pnet_ops[] = { 427 { 428 .cmd = SMC_PNETID_GET, 429 .flags = GENL_ADMIN_PERM, 430 .policy = smc_pnet_policy, 431 .doit = smc_pnet_get, 432 .dumpit = smc_pnet_dump, 433 .start = smc_pnet_dump_start 434 }, 435 { 436 .cmd = SMC_PNETID_ADD, 437 .flags = GENL_ADMIN_PERM, 438 .policy = smc_pnet_policy, 439 .doit = smc_pnet_add 440 }, 441 { 442 .cmd = SMC_PNETID_DEL, 443 .flags = GENL_ADMIN_PERM, 444 .policy = smc_pnet_policy, 445 .doit = smc_pnet_del 446 }, 447 { 448 .cmd = SMC_PNETID_FLUSH, 449 .flags = GENL_ADMIN_PERM, 450 .policy = smc_pnet_policy, 451 .doit = smc_pnet_flush 452 } 453 }; 454 455 /* SMC_PNETID family definition */ 456 static struct genl_family smc_pnet_nl_family = { 457 .hdrsize = 0, 458 .name = SMCR_GENL_FAMILY_NAME, 459 .version = SMCR_GENL_FAMILY_VERSION, 460 .maxattr = SMC_PNETID_MAX, 461 .netnsok = true, 462 .module = THIS_MODULE, 463 .ops = smc_pnet_ops, 464 .n_ops = ARRAY_SIZE(smc_pnet_ops) 465 }; 466 467 static int smc_pnet_netdev_event(struct notifier_block *this, 468 unsigned long event, void *ptr) 469 { 470 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 471 472 switch (event) { 473 case NETDEV_REBOOT: 474 case NETDEV_UNREGISTER: 475 smc_pnet_remove_by_ndev(event_dev); 476 default: 477 break; 478 } 479 return NOTIFY_DONE; 480 } 481 482 static struct notifier_block smc_netdev_notifier = { 483 .notifier_call = smc_pnet_netdev_event 484 }; 485 486 int __init smc_pnet_init(void) 487 { 488 int rc; 489 490 rc = genl_register_family(&smc_pnet_nl_family); 491 if (rc) 492 return rc; 493 rc = register_netdevice_notifier(&smc_netdev_notifier); 494 if (rc) 495 genl_unregister_family(&smc_pnet_nl_family); 496 return rc; 497 } 498 499 void smc_pnet_exit(void) 500 { 501 smc_pnet_flush(NULL, NULL); 502 unregister_netdevice_notifier(&smc_netdev_notifier); 503 genl_unregister_family(&smc_pnet_nl_family); 504 } 505 506 /* PNET table analysis for a given sock: 507 * determine ib_device and port belonging to used internal TCP socket 508 * ethernet interface. 509 */ 510 void smc_pnet_find_roce_resource(struct sock *sk, 511 struct smc_ib_device **smcibdev, u8 *ibport) 512 { 513 struct dst_entry *dst = sk_dst_get(sk); 514 struct smc_pnetentry *pnetelem; 515 516 *smcibdev = NULL; 517 *ibport = 0; 518 519 if (!dst) 520 return; 521 if (!dst->dev) 522 goto out_rel; 523 read_lock(&smc_pnettable.lock); 524 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 525 if (dst->dev == pnetelem->ndev) { 526 if (smc_ib_port_active(pnetelem->smcibdev, 527 pnetelem->ib_port)) { 528 *smcibdev = pnetelem->smcibdev; 529 *ibport = pnetelem->ib_port; 530 } 531 break; 532 } 533 } 534 read_unlock(&smc_pnettable.lock); 535 out_rel: 536 dst_release(dst); 537 } 538