xref: /openbmc/linux/net/smc/smc_pnet.c (revision a17922de)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Generic netlink support functions to configure an SMC-R PNET table
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Thomas Richter <tmricht@linux.vnet.ibm.com>
10  */
11 
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <net/netlink.h>
16 #include <net/genetlink.h>
17 
18 #include <uapi/linux/if.h>
19 #include <uapi/linux/smc.h>
20 
21 #include <rdma/ib_verbs.h>
22 
23 #include "smc_pnet.h"
24 #include "smc_ib.h"
25 #include "smc_ism.h"
26 
27 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
28 	[SMC_PNETID_NAME] = {
29 		.type = NLA_NUL_STRING,
30 		.len = SMC_MAX_PNETID_LEN - 1
31 	},
32 	[SMC_PNETID_ETHNAME] = {
33 		.type = NLA_NUL_STRING,
34 		.len = IFNAMSIZ - 1
35 	},
36 	[SMC_PNETID_IBNAME] = {
37 		.type = NLA_NUL_STRING,
38 		.len = IB_DEVICE_NAME_MAX - 1
39 	},
40 	[SMC_PNETID_IBPORT] = { .type = NLA_U8 }
41 };
42 
43 static struct genl_family smc_pnet_nl_family;
44 
45 /**
46  * struct smc_pnettable - SMC PNET table anchor
47  * @lock: Lock for list action
48  * @pnetlist: List of PNETIDs
49  */
50 static struct smc_pnettable {
51 	rwlock_t lock;
52 	struct list_head pnetlist;
53 } smc_pnettable = {
54 	.pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist),
55 	.lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock)
56 };
57 
58 /**
59  * struct smc_pnetentry - pnet identifier name entry
60  * @list: List node.
61  * @pnet_name: Pnet identifier name
62  * @ndev: pointer to network device.
63  * @smcibdev: Pointer to IB device.
64  */
65 struct smc_pnetentry {
66 	struct list_head list;
67 	char pnet_name[SMC_MAX_PNETID_LEN + 1];
68 	struct net_device *ndev;
69 	struct smc_ib_device *smcibdev;
70 	u8 ib_port;
71 };
72 
73 /* Check if two RDMA device entries are identical. Use device name and port
74  * number for comparison.
75  */
76 static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname,
77 				 u8 ibport)
78 {
79 	return pnetelem->ib_port == ibport &&
80 	       !strncmp(pnetelem->smcibdev->ibdev->name, ibname,
81 			sizeof(pnetelem->smcibdev->ibdev->name));
82 }
83 
84 /* Find a pnetid in the pnet table.
85  */
86 static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name)
87 {
88 	struct smc_pnetentry *pnetelem, *found_pnetelem = NULL;
89 
90 	read_lock(&smc_pnettable.lock);
91 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
92 		if (!strncmp(pnetelem->pnet_name, pnet_name,
93 			     sizeof(pnetelem->pnet_name))) {
94 			found_pnetelem = pnetelem;
95 			break;
96 		}
97 	}
98 	read_unlock(&smc_pnettable.lock);
99 	return found_pnetelem;
100 }
101 
102 /* Remove a pnetid from the pnet table.
103  */
104 static int smc_pnet_remove_by_pnetid(char *pnet_name)
105 {
106 	struct smc_pnetentry *pnetelem, *tmp_pe;
107 	int rc = -ENOENT;
108 
109 	write_lock(&smc_pnettable.lock);
110 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
111 				 list) {
112 		if (!strncmp(pnetelem->pnet_name, pnet_name,
113 			     sizeof(pnetelem->pnet_name))) {
114 			list_del(&pnetelem->list);
115 			dev_put(pnetelem->ndev);
116 			kfree(pnetelem);
117 			rc = 0;
118 			break;
119 		}
120 	}
121 	write_unlock(&smc_pnettable.lock);
122 	return rc;
123 }
124 
125 /* Remove a pnet entry mentioning a given network device from the pnet table.
126  */
127 static int smc_pnet_remove_by_ndev(struct net_device *ndev)
128 {
129 	struct smc_pnetentry *pnetelem, *tmp_pe;
130 	int rc = -ENOENT;
131 
132 	write_lock(&smc_pnettable.lock);
133 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
134 				 list) {
135 		if (pnetelem->ndev == ndev) {
136 			list_del(&pnetelem->list);
137 			dev_put(pnetelem->ndev);
138 			kfree(pnetelem);
139 			rc = 0;
140 			break;
141 		}
142 	}
143 	write_unlock(&smc_pnettable.lock);
144 	return rc;
145 }
146 
147 /* Remove a pnet entry mentioning a given ib device from the pnet table.
148  */
149 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev)
150 {
151 	struct smc_pnetentry *pnetelem, *tmp_pe;
152 	int rc = -ENOENT;
153 
154 	write_lock(&smc_pnettable.lock);
155 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
156 				 list) {
157 		if (pnetelem->smcibdev == ibdev) {
158 			list_del(&pnetelem->list);
159 			dev_put(pnetelem->ndev);
160 			kfree(pnetelem);
161 			rc = 0;
162 			break;
163 		}
164 	}
165 	write_unlock(&smc_pnettable.lock);
166 	return rc;
167 }
168 
169 /* Append a pnetid to the end of the pnet table if not already on this list.
170  */
171 static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
172 {
173 	struct smc_pnetentry *pnetelem;
174 	int rc = -EEXIST;
175 
176 	write_lock(&smc_pnettable.lock);
177 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
178 		if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name,
179 			     sizeof(new_pnetelem->pnet_name)) ||
180 		    !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name,
181 			     sizeof(new_pnetelem->ndev->name)) ||
182 		    smc_pnet_same_ibname(pnetelem,
183 					 new_pnetelem->smcibdev->ibdev->name,
184 					 new_pnetelem->ib_port)) {
185 			dev_put(pnetelem->ndev);
186 			goto found;
187 		}
188 	}
189 	list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
190 	rc = 0;
191 found:
192 	write_unlock(&smc_pnettable.lock);
193 	return rc;
194 }
195 
196 /* The limit for pnetid is 16 characters.
197  * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
198  * Lower case letters are converted to upper case.
199  * Interior blanks should not be used.
200  */
201 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
202 {
203 	char *bf = skip_spaces(pnet_name);
204 	size_t len = strlen(bf);
205 	char *end = bf + len;
206 
207 	if (!len)
208 		return false;
209 	while (--end >= bf && isspace(*end))
210 		;
211 	if (end - bf >= SMC_MAX_PNETID_LEN)
212 		return false;
213 	while (bf <= end) {
214 		if (!isalnum(*bf))
215 			return false;
216 		*pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
217 		bf++;
218 	}
219 	*pnetid = '\0';
220 	return true;
221 }
222 
223 /* Find an infiniband device by a given name. The device might not exist. */
224 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
225 {
226 	struct smc_ib_device *ibdev;
227 
228 	spin_lock(&smc_ib_devices.lock);
229 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
230 		if (!strncmp(ibdev->ibdev->name, ib_name,
231 			     sizeof(ibdev->ibdev->name))) {
232 			goto out;
233 		}
234 	}
235 	ibdev = NULL;
236 out:
237 	spin_unlock(&smc_ib_devices.lock);
238 	return ibdev;
239 }
240 
241 /* Parse the supplied netlink attributes and fill a pnetentry structure.
242  * For ethernet and infiniband device names verify that the devices exist.
243  */
244 static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
245 			       struct nlattr *tb[])
246 {
247 	char *string, *ibname;
248 	int rc;
249 
250 	memset(pnetelem, 0, sizeof(*pnetelem));
251 	INIT_LIST_HEAD(&pnetelem->list);
252 
253 	rc = -EINVAL;
254 	if (!tb[SMC_PNETID_NAME])
255 		goto error;
256 	string = (char *)nla_data(tb[SMC_PNETID_NAME]);
257 	if (!smc_pnetid_valid(string, pnetelem->pnet_name))
258 		goto error;
259 
260 	rc = -EINVAL;
261 	if (!tb[SMC_PNETID_ETHNAME])
262 		goto error;
263 	rc = -ENOENT;
264 	string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
265 	pnetelem->ndev = dev_get_by_name(net, string);
266 	if (!pnetelem->ndev)
267 		goto error;
268 
269 	rc = -EINVAL;
270 	if (!tb[SMC_PNETID_IBNAME])
271 		goto error;
272 	rc = -ENOENT;
273 	ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
274 	ibname = strim(ibname);
275 	pnetelem->smcibdev = smc_pnet_find_ib(ibname);
276 	if (!pnetelem->smcibdev)
277 		goto error;
278 
279 	rc = -EINVAL;
280 	if (!tb[SMC_PNETID_IBPORT])
281 		goto error;
282 	pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
283 	if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
284 		goto error;
285 
286 	return 0;
287 
288 error:
289 	if (pnetelem->ndev)
290 		dev_put(pnetelem->ndev);
291 	return rc;
292 }
293 
294 /* Convert an smc_pnetentry to a netlink attribute sequence */
295 static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem)
296 {
297 	if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) ||
298 	    nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) ||
299 	    nla_put_string(msg, SMC_PNETID_IBNAME,
300 			   pnetelem->smcibdev->ibdev->name) ||
301 	    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
302 		return -1;
303 	return 0;
304 }
305 
306 /* Retrieve one PNETID entry */
307 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
308 {
309 	struct smc_pnetentry *pnetelem;
310 	struct sk_buff *msg;
311 	void *hdr;
312 	int rc;
313 
314 	if (!info->attrs[SMC_PNETID_NAME])
315 		return -EINVAL;
316 	pnetelem = smc_pnet_find_pnetid(
317 				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
318 	if (!pnetelem)
319 		return -ENOENT;
320 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
321 	if (!msg)
322 		return -ENOMEM;
323 
324 	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
325 			  &smc_pnet_nl_family, 0, SMC_PNETID_GET);
326 	if (!hdr) {
327 		rc = -EMSGSIZE;
328 		goto err_out;
329 	}
330 
331 	if (smc_pnet_set_nla(msg, pnetelem)) {
332 		rc = -ENOBUFS;
333 		goto err_out;
334 	}
335 
336 	genlmsg_end(msg, hdr);
337 	return genlmsg_reply(msg, info);
338 
339 err_out:
340 	nlmsg_free(msg);
341 	return rc;
342 }
343 
344 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
345 {
346 	struct net *net = genl_info_net(info);
347 	struct smc_pnetentry *pnetelem;
348 	int rc;
349 
350 	pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
351 	if (!pnetelem)
352 		return -ENOMEM;
353 	rc = smc_pnet_fill_entry(net, pnetelem, info->attrs);
354 	if (!rc)
355 		rc = smc_pnet_enter(pnetelem);
356 	if (rc) {
357 		kfree(pnetelem);
358 		return rc;
359 	}
360 	return rc;
361 }
362 
363 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
364 {
365 	if (!info->attrs[SMC_PNETID_NAME])
366 		return -EINVAL;
367 	return smc_pnet_remove_by_pnetid(
368 				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
369 }
370 
371 static int smc_pnet_dump_start(struct netlink_callback *cb)
372 {
373 	cb->args[0] = 0;
374 	return 0;
375 }
376 
377 static int smc_pnet_dumpinfo(struct sk_buff *skb,
378 			     u32 portid, u32 seq, u32 flags,
379 			     struct smc_pnetentry *pnetelem)
380 {
381 	void *hdr;
382 
383 	hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
384 			  flags, SMC_PNETID_GET);
385 	if (!hdr)
386 		return -ENOMEM;
387 	if (smc_pnet_set_nla(skb, pnetelem) < 0) {
388 		genlmsg_cancel(skb, hdr);
389 		return -EMSGSIZE;
390 	}
391 	genlmsg_end(skb, hdr);
392 	return 0;
393 }
394 
395 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
396 {
397 	struct smc_pnetentry *pnetelem;
398 	int idx = 0;
399 
400 	read_lock(&smc_pnettable.lock);
401 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
402 		if (idx++ < cb->args[0])
403 			continue;
404 		if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid,
405 				      cb->nlh->nlmsg_seq, NLM_F_MULTI,
406 				      pnetelem)) {
407 			--idx;
408 			break;
409 		}
410 	}
411 	cb->args[0] = idx;
412 	read_unlock(&smc_pnettable.lock);
413 	return skb->len;
414 }
415 
416 /* Remove and delete all pnetids from pnet table.
417  */
418 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
419 {
420 	struct smc_pnetentry *pnetelem, *tmp_pe;
421 
422 	write_lock(&smc_pnettable.lock);
423 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
424 				 list) {
425 		list_del(&pnetelem->list);
426 		dev_put(pnetelem->ndev);
427 		kfree(pnetelem);
428 	}
429 	write_unlock(&smc_pnettable.lock);
430 	return 0;
431 }
432 
433 /* SMC_PNETID generic netlink operation definition */
434 static const struct genl_ops smc_pnet_ops[] = {
435 	{
436 		.cmd = SMC_PNETID_GET,
437 		.flags = GENL_ADMIN_PERM,
438 		.policy = smc_pnet_policy,
439 		.doit = smc_pnet_get,
440 		.dumpit = smc_pnet_dump,
441 		.start = smc_pnet_dump_start
442 	},
443 	{
444 		.cmd = SMC_PNETID_ADD,
445 		.flags = GENL_ADMIN_PERM,
446 		.policy = smc_pnet_policy,
447 		.doit = smc_pnet_add
448 	},
449 	{
450 		.cmd = SMC_PNETID_DEL,
451 		.flags = GENL_ADMIN_PERM,
452 		.policy = smc_pnet_policy,
453 		.doit = smc_pnet_del
454 	},
455 	{
456 		.cmd = SMC_PNETID_FLUSH,
457 		.flags = GENL_ADMIN_PERM,
458 		.policy = smc_pnet_policy,
459 		.doit = smc_pnet_flush
460 	}
461 };
462 
463 /* SMC_PNETID family definition */
464 static struct genl_family smc_pnet_nl_family = {
465 	.hdrsize = 0,
466 	.name = SMCR_GENL_FAMILY_NAME,
467 	.version = SMCR_GENL_FAMILY_VERSION,
468 	.maxattr = SMC_PNETID_MAX,
469 	.netnsok = true,
470 	.module = THIS_MODULE,
471 	.ops = smc_pnet_ops,
472 	.n_ops =  ARRAY_SIZE(smc_pnet_ops)
473 };
474 
475 static int smc_pnet_netdev_event(struct notifier_block *this,
476 				 unsigned long event, void *ptr)
477 {
478 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
479 
480 	switch (event) {
481 	case NETDEV_REBOOT:
482 	case NETDEV_UNREGISTER:
483 		smc_pnet_remove_by_ndev(event_dev);
484 		return NOTIFY_OK;
485 	default:
486 		return NOTIFY_DONE;
487 	}
488 }
489 
490 static struct notifier_block smc_netdev_notifier = {
491 	.notifier_call = smc_pnet_netdev_event
492 };
493 
494 int __init smc_pnet_init(void)
495 {
496 	int rc;
497 
498 	rc = genl_register_family(&smc_pnet_nl_family);
499 	if (rc)
500 		return rc;
501 	rc = register_netdevice_notifier(&smc_netdev_notifier);
502 	if (rc)
503 		genl_unregister_family(&smc_pnet_nl_family);
504 	return rc;
505 }
506 
507 void smc_pnet_exit(void)
508 {
509 	smc_pnet_flush(NULL, NULL);
510 	unregister_netdevice_notifier(&smc_netdev_notifier);
511 	genl_unregister_family(&smc_pnet_nl_family);
512 }
513 
514 /* Determine one base device for stacked net devices.
515  * If the lower device level contains more than one devices
516  * (for instance with bonding slaves), just the first device
517  * is used to reach a base device.
518  */
519 static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
520 {
521 	int i, nest_lvl;
522 
523 	rtnl_lock();
524 	nest_lvl = dev_get_nest_level(ndev);
525 	for (i = 0; i < nest_lvl; i++) {
526 		struct list_head *lower = &ndev->adj_list.lower;
527 
528 		if (list_empty(lower))
529 			break;
530 		lower = lower->next;
531 		ndev = netdev_lower_get_next(ndev, &lower);
532 	}
533 	rtnl_unlock();
534 	return ndev;
535 }
536 
537 /* Determine the corresponding IB device port based on the hardware PNETID.
538  * Searching stops at the first matching active IB device port.
539  */
540 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
541 					 struct smc_ib_device **smcibdev,
542 					 u8 *ibport)
543 {
544 	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
545 	struct smc_ib_device *ibdev;
546 	int i;
547 
548 	ndev = pnet_find_base_ndev(ndev);
549 	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
550 				   ndev_pnetid))
551 		return; /* pnetid could not be determined */
552 
553 	spin_lock(&smc_ib_devices.lock);
554 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
555 		for (i = 1; i <= SMC_MAX_PORTS; i++) {
556 			if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
557 				    SMC_MAX_PNETID_LEN) &&
558 			    smc_ib_port_active(ibdev, i)) {
559 				*smcibdev = ibdev;
560 				*ibport = i;
561 				break;
562 			}
563 		}
564 	}
565 	spin_unlock(&smc_ib_devices.lock);
566 }
567 
568 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
569 					struct smcd_dev **smcismdev)
570 {
571 	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
572 	struct smcd_dev *ismdev;
573 
574 	ndev = pnet_find_base_ndev(ndev);
575 	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
576 				   ndev_pnetid))
577 		return; /* pnetid could not be determined */
578 
579 	spin_lock(&smcd_dev_list.lock);
580 	list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
581 		if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
582 			*smcismdev = ismdev;
583 			break;
584 		}
585 	}
586 	spin_unlock(&smcd_dev_list.lock);
587 }
588 
589 /* Lookup of coupled ib_device via SMC pnet table */
590 static void smc_pnet_find_roce_by_table(struct net_device *netdev,
591 					struct smc_ib_device **smcibdev,
592 					u8 *ibport)
593 {
594 	struct smc_pnetentry *pnetelem;
595 
596 	read_lock(&smc_pnettable.lock);
597 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
598 		if (netdev == pnetelem->ndev) {
599 			if (smc_ib_port_active(pnetelem->smcibdev,
600 					       pnetelem->ib_port)) {
601 				*smcibdev = pnetelem->smcibdev;
602 				*ibport = pnetelem->ib_port;
603 			}
604 			break;
605 		}
606 	}
607 	read_unlock(&smc_pnettable.lock);
608 }
609 
610 /* PNET table analysis for a given sock:
611  * determine ib_device and port belonging to used internal TCP socket
612  * ethernet interface.
613  */
614 void smc_pnet_find_roce_resource(struct sock *sk,
615 				 struct smc_ib_device **smcibdev, u8 *ibport)
616 {
617 	struct dst_entry *dst = sk_dst_get(sk);
618 
619 	*smcibdev = NULL;
620 	*ibport = 0;
621 
622 	if (!dst)
623 		goto out;
624 	if (!dst->dev)
625 		goto out_rel;
626 
627 	/* if possible, lookup via hardware-defined pnetid */
628 	smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport);
629 	if (*smcibdev)
630 		goto out_rel;
631 
632 	/* lookup via SMC PNET table */
633 	smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport);
634 
635 out_rel:
636 	dst_release(dst);
637 out:
638 	return;
639 }
640 
641 void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
642 {
643 	struct dst_entry *dst = sk_dst_get(sk);
644 
645 	*smcismdev = NULL;
646 	if (!dst)
647 		goto out;
648 	if (!dst->dev)
649 		goto out_rel;
650 
651 	/* if possible, lookup via hardware-defined pnetid */
652 	smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
653 
654 out_rel:
655 	dst_release(dst);
656 out:
657 	return;
658 }
659