xref: /openbmc/linux/net/smc/smc_pnet.c (revision 530e7a660fb795452357b36cce26b839a9a187a9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Generic netlink support functions to configure an SMC-R PNET table
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Thomas Richter <tmricht@linux.vnet.ibm.com>
10  */
11 
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <net/netlink.h>
16 #include <net/genetlink.h>
17 
18 #include <uapi/linux/if.h>
19 #include <uapi/linux/smc.h>
20 
21 #include <rdma/ib_verbs.h>
22 
23 #include "smc_pnet.h"
24 #include "smc_ib.h"
25 
26 #define SMC_MAX_PNET_ID_LEN	16	/* Max. length of PNET id */
27 
28 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
29 	[SMC_PNETID_NAME] = {
30 		.type = NLA_NUL_STRING,
31 		.len = SMC_MAX_PNET_ID_LEN - 1
32 	},
33 	[SMC_PNETID_ETHNAME] = {
34 		.type = NLA_NUL_STRING,
35 		.len = IFNAMSIZ - 1
36 	},
37 	[SMC_PNETID_IBNAME] = {
38 		.type = NLA_NUL_STRING,
39 		.len = IB_DEVICE_NAME_MAX - 1
40 	},
41 	[SMC_PNETID_IBPORT] = { .type = NLA_U8 }
42 };
43 
44 static struct genl_family smc_pnet_nl_family;
45 
46 /**
47  * struct smc_pnettable - SMC PNET table anchor
48  * @lock: Lock for list action
49  * @pnetlist: List of PNETIDs
50  */
51 static struct smc_pnettable {
52 	rwlock_t lock;
53 	struct list_head pnetlist;
54 } smc_pnettable = {
55 	.pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist),
56 	.lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock)
57 };
58 
59 /**
60  * struct smc_pnetentry - pnet identifier name entry
61  * @list: List node.
62  * @pnet_name: Pnet identifier name
63  * @ndev: pointer to network device.
64  * @smcibdev: Pointer to IB device.
65  */
66 struct smc_pnetentry {
67 	struct list_head list;
68 	char pnet_name[SMC_MAX_PNET_ID_LEN + 1];
69 	struct net_device *ndev;
70 	struct smc_ib_device *smcibdev;
71 	u8 ib_port;
72 };
73 
74 /* Check if two RDMA device entries are identical. Use device name and port
75  * number for comparison.
76  */
77 static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname,
78 				 u8 ibport)
79 {
80 	return pnetelem->ib_port == ibport &&
81 	       !strncmp(pnetelem->smcibdev->ibdev->name, ibname,
82 			sizeof(pnetelem->smcibdev->ibdev->name));
83 }
84 
85 /* Find a pnetid in the pnet table.
86  */
87 static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name)
88 {
89 	struct smc_pnetentry *pnetelem, *found_pnetelem = NULL;
90 
91 	read_lock(&smc_pnettable.lock);
92 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
93 		if (!strncmp(pnetelem->pnet_name, pnet_name,
94 			     sizeof(pnetelem->pnet_name))) {
95 			found_pnetelem = pnetelem;
96 			break;
97 		}
98 	}
99 	read_unlock(&smc_pnettable.lock);
100 	return found_pnetelem;
101 }
102 
103 /* Remove a pnetid from the pnet table.
104  */
105 static int smc_pnet_remove_by_pnetid(char *pnet_name)
106 {
107 	struct smc_pnetentry *pnetelem, *tmp_pe;
108 	int rc = -ENOENT;
109 
110 	write_lock(&smc_pnettable.lock);
111 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
112 				 list) {
113 		if (!strncmp(pnetelem->pnet_name, pnet_name,
114 			     sizeof(pnetelem->pnet_name))) {
115 			list_del(&pnetelem->list);
116 			dev_put(pnetelem->ndev);
117 			kfree(pnetelem);
118 			rc = 0;
119 			break;
120 		}
121 	}
122 	write_unlock(&smc_pnettable.lock);
123 	return rc;
124 }
125 
126 /* Remove a pnet entry mentioning a given network device from the pnet table.
127  */
128 static int smc_pnet_remove_by_ndev(struct net_device *ndev)
129 {
130 	struct smc_pnetentry *pnetelem, *tmp_pe;
131 	int rc = -ENOENT;
132 
133 	write_lock(&smc_pnettable.lock);
134 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
135 				 list) {
136 		if (pnetelem->ndev == ndev) {
137 			list_del(&pnetelem->list);
138 			dev_put(pnetelem->ndev);
139 			kfree(pnetelem);
140 			rc = 0;
141 			break;
142 		}
143 	}
144 	write_unlock(&smc_pnettable.lock);
145 	return rc;
146 }
147 
148 /* Remove a pnet entry mentioning a given ib device from the pnet table.
149  */
150 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev)
151 {
152 	struct smc_pnetentry *pnetelem, *tmp_pe;
153 	int rc = -ENOENT;
154 
155 	write_lock(&smc_pnettable.lock);
156 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
157 				 list) {
158 		if (pnetelem->smcibdev == ibdev) {
159 			list_del(&pnetelem->list);
160 			dev_put(pnetelem->ndev);
161 			kfree(pnetelem);
162 			rc = 0;
163 			break;
164 		}
165 	}
166 	write_unlock(&smc_pnettable.lock);
167 	return rc;
168 }
169 
170 /* Append a pnetid to the end of the pnet table if not already on this list.
171  */
172 static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
173 {
174 	struct smc_pnetentry *pnetelem;
175 	int rc = -EEXIST;
176 
177 	write_lock(&smc_pnettable.lock);
178 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
179 		if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name,
180 			     sizeof(new_pnetelem->pnet_name)) ||
181 		    !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name,
182 			     sizeof(new_pnetelem->ndev->name)) ||
183 		    smc_pnet_same_ibname(pnetelem,
184 					 new_pnetelem->smcibdev->ibdev->name,
185 					 new_pnetelem->ib_port)) {
186 			dev_put(pnetelem->ndev);
187 			goto found;
188 		}
189 	}
190 	list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
191 	rc = 0;
192 found:
193 	write_unlock(&smc_pnettable.lock);
194 	return rc;
195 }
196 
197 /* The limit for pnetid is 16 characters.
198  * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
199  * Lower case letters are converted to upper case.
200  * Interior blanks should not be used.
201  */
202 static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
203 {
204 	char *bf = skip_spaces(pnet_name);
205 	size_t len = strlen(bf);
206 	char *end = bf + len;
207 
208 	if (!len)
209 		return false;
210 	while (--end >= bf && isspace(*end))
211 		;
212 	if (end - bf >= SMC_MAX_PNET_ID_LEN)
213 		return false;
214 	while (bf <= end) {
215 		if (!isalnum(*bf))
216 			return false;
217 		*pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
218 		bf++;
219 	}
220 	*pnetid = '\0';
221 	return true;
222 }
223 
224 /* Find an infiniband device by a given name. The device might not exist. */
225 static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
226 {
227 	struct smc_ib_device *ibdev;
228 
229 	spin_lock(&smc_ib_devices.lock);
230 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
231 		if (!strncmp(ibdev->ibdev->name, ib_name,
232 			     sizeof(ibdev->ibdev->name))) {
233 			goto out;
234 		}
235 	}
236 	ibdev = NULL;
237 out:
238 	spin_unlock(&smc_ib_devices.lock);
239 	return ibdev;
240 }
241 
242 /* Parse the supplied netlink attributes and fill a pnetentry structure.
243  * For ethernet and infiniband device names verify that the devices exist.
244  */
245 static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
246 			       struct nlattr *tb[])
247 {
248 	char *string, *ibname;
249 	int rc;
250 
251 	memset(pnetelem, 0, sizeof(*pnetelem));
252 	INIT_LIST_HEAD(&pnetelem->list);
253 
254 	rc = -EINVAL;
255 	if (!tb[SMC_PNETID_NAME])
256 		goto error;
257 	string = (char *)nla_data(tb[SMC_PNETID_NAME]);
258 	if (!smc_pnetid_valid(string, pnetelem->pnet_name))
259 		goto error;
260 
261 	rc = -EINVAL;
262 	if (!tb[SMC_PNETID_ETHNAME])
263 		goto error;
264 	rc = -ENOENT;
265 	string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
266 	pnetelem->ndev = dev_get_by_name(net, string);
267 	if (!pnetelem->ndev)
268 		goto error;
269 
270 	rc = -EINVAL;
271 	if (!tb[SMC_PNETID_IBNAME])
272 		goto error;
273 	rc = -ENOENT;
274 	ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
275 	ibname = strim(ibname);
276 	pnetelem->smcibdev = smc_pnet_find_ib(ibname);
277 	if (!pnetelem->smcibdev)
278 		goto error;
279 
280 	rc = -EINVAL;
281 	if (!tb[SMC_PNETID_IBPORT])
282 		goto error;
283 	pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
284 	if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
285 		goto error;
286 
287 	return 0;
288 
289 error:
290 	if (pnetelem->ndev)
291 		dev_put(pnetelem->ndev);
292 	return rc;
293 }
294 
295 /* Convert an smc_pnetentry to a netlink attribute sequence */
296 static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem)
297 {
298 	if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) ||
299 	    nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) ||
300 	    nla_put_string(msg, SMC_PNETID_IBNAME,
301 			   pnetelem->smcibdev->ibdev->name) ||
302 	    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
303 		return -1;
304 	return 0;
305 }
306 
307 /* Retrieve one PNETID entry */
308 static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
309 {
310 	struct smc_pnetentry *pnetelem;
311 	struct sk_buff *msg;
312 	void *hdr;
313 	int rc;
314 
315 	if (!info->attrs[SMC_PNETID_NAME])
316 		return -EINVAL;
317 	pnetelem = smc_pnet_find_pnetid(
318 				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
319 	if (!pnetelem)
320 		return -ENOENT;
321 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
322 	if (!msg)
323 		return -ENOMEM;
324 
325 	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
326 			  &smc_pnet_nl_family, 0, SMC_PNETID_GET);
327 	if (!hdr) {
328 		rc = -EMSGSIZE;
329 		goto err_out;
330 	}
331 
332 	if (smc_pnet_set_nla(msg, pnetelem)) {
333 		rc = -ENOBUFS;
334 		goto err_out;
335 	}
336 
337 	genlmsg_end(msg, hdr);
338 	return genlmsg_reply(msg, info);
339 
340 err_out:
341 	nlmsg_free(msg);
342 	return rc;
343 }
344 
345 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
346 {
347 	struct net *net = genl_info_net(info);
348 	struct smc_pnetentry *pnetelem;
349 	int rc;
350 
351 	pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
352 	if (!pnetelem)
353 		return -ENOMEM;
354 	rc = smc_pnet_fill_entry(net, pnetelem, info->attrs);
355 	if (!rc)
356 		rc = smc_pnet_enter(pnetelem);
357 	if (rc) {
358 		kfree(pnetelem);
359 		return rc;
360 	}
361 	rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port);
362 	if (rc)
363 		smc_pnet_remove_by_pnetid(pnetelem->pnet_name);
364 	return rc;
365 }
366 
367 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
368 {
369 	if (!info->attrs[SMC_PNETID_NAME])
370 		return -EINVAL;
371 	return smc_pnet_remove_by_pnetid(
372 				(char *)nla_data(info->attrs[SMC_PNETID_NAME]));
373 }
374 
375 static int smc_pnet_dump_start(struct netlink_callback *cb)
376 {
377 	cb->args[0] = 0;
378 	return 0;
379 }
380 
381 static int smc_pnet_dumpinfo(struct sk_buff *skb,
382 			     u32 portid, u32 seq, u32 flags,
383 			     struct smc_pnetentry *pnetelem)
384 {
385 	void *hdr;
386 
387 	hdr = genlmsg_put(skb, portid, seq, &smc_pnet_nl_family,
388 			  flags, SMC_PNETID_GET);
389 	if (!hdr)
390 		return -ENOMEM;
391 	if (smc_pnet_set_nla(skb, pnetelem) < 0) {
392 		genlmsg_cancel(skb, hdr);
393 		return -EMSGSIZE;
394 	}
395 	genlmsg_end(skb, hdr);
396 	return 0;
397 }
398 
399 static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
400 {
401 	struct smc_pnetentry *pnetelem;
402 	int idx = 0;
403 
404 	read_lock(&smc_pnettable.lock);
405 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
406 		if (idx++ < cb->args[0])
407 			continue;
408 		if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid,
409 				      cb->nlh->nlmsg_seq, NLM_F_MULTI,
410 				      pnetelem)) {
411 			--idx;
412 			break;
413 		}
414 	}
415 	cb->args[0] = idx;
416 	read_unlock(&smc_pnettable.lock);
417 	return skb->len;
418 }
419 
420 /* Remove and delete all pnetids from pnet table.
421  */
422 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
423 {
424 	struct smc_pnetentry *pnetelem, *tmp_pe;
425 
426 	write_lock(&smc_pnettable.lock);
427 	list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
428 				 list) {
429 		list_del(&pnetelem->list);
430 		dev_put(pnetelem->ndev);
431 		kfree(pnetelem);
432 	}
433 	write_unlock(&smc_pnettable.lock);
434 	return 0;
435 }
436 
437 /* SMC_PNETID generic netlink operation definition */
438 static const struct genl_ops smc_pnet_ops[] = {
439 	{
440 		.cmd = SMC_PNETID_GET,
441 		.flags = GENL_ADMIN_PERM,
442 		.policy = smc_pnet_policy,
443 		.doit = smc_pnet_get,
444 		.dumpit = smc_pnet_dump,
445 		.start = smc_pnet_dump_start
446 	},
447 	{
448 		.cmd = SMC_PNETID_ADD,
449 		.flags = GENL_ADMIN_PERM,
450 		.policy = smc_pnet_policy,
451 		.doit = smc_pnet_add
452 	},
453 	{
454 		.cmd = SMC_PNETID_DEL,
455 		.flags = GENL_ADMIN_PERM,
456 		.policy = smc_pnet_policy,
457 		.doit = smc_pnet_del
458 	},
459 	{
460 		.cmd = SMC_PNETID_FLUSH,
461 		.flags = GENL_ADMIN_PERM,
462 		.policy = smc_pnet_policy,
463 		.doit = smc_pnet_flush
464 	}
465 };
466 
467 /* SMC_PNETID family definition */
468 static struct genl_family smc_pnet_nl_family = {
469 	.hdrsize = 0,
470 	.name = SMCR_GENL_FAMILY_NAME,
471 	.version = SMCR_GENL_FAMILY_VERSION,
472 	.maxattr = SMC_PNETID_MAX,
473 	.netnsok = true,
474 	.module = THIS_MODULE,
475 	.ops = smc_pnet_ops,
476 	.n_ops =  ARRAY_SIZE(smc_pnet_ops)
477 };
478 
479 static int smc_pnet_netdev_event(struct notifier_block *this,
480 				 unsigned long event, void *ptr)
481 {
482 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
483 
484 	switch (event) {
485 	case NETDEV_REBOOT:
486 	case NETDEV_UNREGISTER:
487 		smc_pnet_remove_by_ndev(event_dev);
488 	default:
489 		break;
490 	}
491 	return NOTIFY_DONE;
492 }
493 
494 static struct notifier_block smc_netdev_notifier = {
495 	.notifier_call = smc_pnet_netdev_event
496 };
497 
498 int __init smc_pnet_init(void)
499 {
500 	int rc;
501 
502 	rc = genl_register_family(&smc_pnet_nl_family);
503 	if (rc)
504 		return rc;
505 	rc = register_netdevice_notifier(&smc_netdev_notifier);
506 	if (rc)
507 		genl_unregister_family(&smc_pnet_nl_family);
508 	return rc;
509 }
510 
511 void smc_pnet_exit(void)
512 {
513 	smc_pnet_flush(NULL, NULL);
514 	unregister_netdevice_notifier(&smc_netdev_notifier);
515 	genl_unregister_family(&smc_pnet_nl_family);
516 }
517 
518 /* PNET table analysis for a given sock:
519  * determine ib_device and port belonging to used internal TCP socket
520  * ethernet interface.
521  */
522 void smc_pnet_find_roce_resource(struct sock *sk,
523 				 struct smc_ib_device **smcibdev, u8 *ibport)
524 {
525 	struct dst_entry *dst = sk_dst_get(sk);
526 	struct smc_pnetentry *pnetelem;
527 
528 	*smcibdev = NULL;
529 	*ibport = 0;
530 
531 	if (!dst)
532 		return;
533 	if (!dst->dev)
534 		goto out_rel;
535 	read_lock(&smc_pnettable.lock);
536 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
537 		if (dst->dev == pnetelem->ndev) {
538 			if (smc_ib_port_active(pnetelem->smcibdev,
539 					       pnetelem->ib_port)) {
540 				*smcibdev = pnetelem->smcibdev;
541 				*ibport = pnetelem->ib_port;
542 			}
543 			break;
544 		}
545 	}
546 	read_unlock(&smc_pnettable.lock);
547 out_rel:
548 	dst_release(dst);
549 }
550