xref: /openbmc/linux/net/bridge/br_mdb.c (revision cc8bbe1a)
1 #include <linux/err.h>
2 #include <linux/igmp.h>
3 #include <linux/kernel.h>
4 #include <linux/netdevice.h>
5 #include <linux/rculist.h>
6 #include <linux/skbuff.h>
7 #include <linux/if_ether.h>
8 #include <net/ip.h>
9 #include <net/netlink.h>
10 #include <net/switchdev.h>
11 #if IS_ENABLED(CONFIG_IPV6)
12 #include <net/ipv6.h>
13 #include <net/addrconf.h>
14 #endif
15 
16 #include "br_private.h"
17 
18 static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
19 			       struct net_device *dev)
20 {
21 	struct net_bridge *br = netdev_priv(dev);
22 	struct net_bridge_port *p;
23 	struct nlattr *nest;
24 
25 	if (!br->multicast_router || hlist_empty(&br->router_list))
26 		return 0;
27 
28 	nest = nla_nest_start(skb, MDBA_ROUTER);
29 	if (nest == NULL)
30 		return -EMSGSIZE;
31 
32 	hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
33 		if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex))
34 			goto fail;
35 	}
36 
37 	nla_nest_end(skb, nest);
38 	return 0;
39 fail:
40 	nla_nest_cancel(skb, nest);
41 	return -EMSGSIZE;
42 }
43 
44 static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
45 			    struct net_device *dev)
46 {
47 	struct net_bridge *br = netdev_priv(dev);
48 	struct net_bridge_mdb_htable *mdb;
49 	struct nlattr *nest, *nest2;
50 	int i, err = 0;
51 	int idx = 0, s_idx = cb->args[1];
52 
53 	if (br->multicast_disabled)
54 		return 0;
55 
56 	mdb = rcu_dereference(br->mdb);
57 	if (!mdb)
58 		return 0;
59 
60 	nest = nla_nest_start(skb, MDBA_MDB);
61 	if (nest == NULL)
62 		return -EMSGSIZE;
63 
64 	for (i = 0; i < mdb->max; i++) {
65 		struct net_bridge_mdb_entry *mp;
66 		struct net_bridge_port_group *p;
67 		struct net_bridge_port_group __rcu **pp;
68 		struct net_bridge_port *port;
69 
70 		hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
71 			if (idx < s_idx)
72 				goto skip;
73 
74 			nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
75 			if (nest2 == NULL) {
76 				err = -EMSGSIZE;
77 				goto out;
78 			}
79 
80 			for (pp = &mp->ports;
81 			     (p = rcu_dereference(*pp)) != NULL;
82 			      pp = &p->next) {
83 				port = p->port;
84 				if (port) {
85 					struct br_mdb_entry e;
86 					memset(&e, 0, sizeof(e));
87 					e.ifindex = port->dev->ifindex;
88 					e.state = p->state;
89 					e.vid = p->addr.vid;
90 					if (p->addr.proto == htons(ETH_P_IP))
91 						e.addr.u.ip4 = p->addr.u.ip4;
92 #if IS_ENABLED(CONFIG_IPV6)
93 					if (p->addr.proto == htons(ETH_P_IPV6))
94 						e.addr.u.ip6 = p->addr.u.ip6;
95 #endif
96 					e.addr.proto = p->addr.proto;
97 					if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) {
98 						nla_nest_cancel(skb, nest2);
99 						err = -EMSGSIZE;
100 						goto out;
101 					}
102 				}
103 			}
104 			nla_nest_end(skb, nest2);
105 		skip:
106 			idx++;
107 		}
108 	}
109 
110 out:
111 	cb->args[1] = idx;
112 	nla_nest_end(skb, nest);
113 	return err;
114 }
115 
116 static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
117 {
118 	struct net_device *dev;
119 	struct net *net = sock_net(skb->sk);
120 	struct nlmsghdr *nlh = NULL;
121 	int idx = 0, s_idx;
122 
123 	s_idx = cb->args[0];
124 
125 	rcu_read_lock();
126 
127 	/* In theory this could be wrapped to 0... */
128 	cb->seq = net->dev_base_seq + br_mdb_rehash_seq;
129 
130 	for_each_netdev_rcu(net, dev) {
131 		if (dev->priv_flags & IFF_EBRIDGE) {
132 			struct br_port_msg *bpm;
133 
134 			if (idx < s_idx)
135 				goto skip;
136 
137 			nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
138 					cb->nlh->nlmsg_seq, RTM_GETMDB,
139 					sizeof(*bpm), NLM_F_MULTI);
140 			if (nlh == NULL)
141 				break;
142 
143 			bpm = nlmsg_data(nlh);
144 			memset(bpm, 0, sizeof(*bpm));
145 			bpm->ifindex = dev->ifindex;
146 			if (br_mdb_fill_info(skb, cb, dev) < 0)
147 				goto out;
148 			if (br_rports_fill_info(skb, cb, dev) < 0)
149 				goto out;
150 
151 			cb->args[1] = 0;
152 			nlmsg_end(skb, nlh);
153 		skip:
154 			idx++;
155 		}
156 	}
157 
158 out:
159 	if (nlh)
160 		nlmsg_end(skb, nlh);
161 	rcu_read_unlock();
162 	cb->args[0] = idx;
163 	return skb->len;
164 }
165 
166 static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
167 				   struct net_device *dev,
168 				   struct br_mdb_entry *entry, u32 pid,
169 				   u32 seq, int type, unsigned int flags)
170 {
171 	struct nlmsghdr *nlh;
172 	struct br_port_msg *bpm;
173 	struct nlattr *nest, *nest2;
174 
175 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
176 	if (!nlh)
177 		return -EMSGSIZE;
178 
179 	bpm = nlmsg_data(nlh);
180 	memset(bpm, 0, sizeof(*bpm));
181 	bpm->family  = AF_BRIDGE;
182 	bpm->ifindex = dev->ifindex;
183 	nest = nla_nest_start(skb, MDBA_MDB);
184 	if (nest == NULL)
185 		goto cancel;
186 	nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
187 	if (nest2 == NULL)
188 		goto end;
189 
190 	if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry))
191 		goto end;
192 
193 	nla_nest_end(skb, nest2);
194 	nla_nest_end(skb, nest);
195 	nlmsg_end(skb, nlh);
196 	return 0;
197 
198 end:
199 	nla_nest_end(skb, nest);
200 cancel:
201 	nlmsg_cancel(skb, nlh);
202 	return -EMSGSIZE;
203 }
204 
205 static inline size_t rtnl_mdb_nlmsg_size(void)
206 {
207 	return NLMSG_ALIGN(sizeof(struct br_port_msg))
208 		+ nla_total_size(sizeof(struct br_mdb_entry));
209 }
210 
211 static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
212 			    int type)
213 {
214 	struct switchdev_obj_port_mdb mdb = {
215 		.obj = {
216 			.id = SWITCHDEV_OBJ_ID_PORT_MDB,
217 			.flags = SWITCHDEV_F_DEFER,
218 		},
219 		.vid = entry->vid,
220 	};
221 	struct net_device *port_dev;
222 	struct net *net = dev_net(dev);
223 	struct sk_buff *skb;
224 	int err = -ENOBUFS;
225 
226 	port_dev = __dev_get_by_index(net, entry->ifindex);
227 	if (entry->addr.proto == htons(ETH_P_IP))
228 		ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
229 #if IS_ENABLED(CONFIG_IPV6)
230 	else
231 		ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
232 #endif
233 
234 	mdb.obj.orig_dev = port_dev;
235 	if (port_dev && type == RTM_NEWMDB)
236 		switchdev_port_obj_add(port_dev, &mdb.obj);
237 	else if (port_dev && type == RTM_DELMDB)
238 		switchdev_port_obj_del(port_dev, &mdb.obj);
239 
240 	skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
241 	if (!skb)
242 		goto errout;
243 
244 	err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF);
245 	if (err < 0) {
246 		kfree_skb(skb);
247 		goto errout;
248 	}
249 
250 	rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC);
251 	return;
252 errout:
253 	rtnl_set_sk_err(net, RTNLGRP_MDB, err);
254 }
255 
256 void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
257 		   struct br_ip *group, int type, u8 state)
258 {
259 	struct br_mdb_entry entry;
260 
261 	memset(&entry, 0, sizeof(entry));
262 	entry.ifindex = port->dev->ifindex;
263 	entry.addr.proto = group->proto;
264 	entry.addr.u.ip4 = group->u.ip4;
265 #if IS_ENABLED(CONFIG_IPV6)
266 	entry.addr.u.ip6 = group->u.ip6;
267 #endif
268 	entry.state = state;
269 	entry.vid = group->vid;
270 	__br_mdb_notify(dev, &entry, type);
271 }
272 
273 static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
274 				   struct net_device *dev,
275 				   int ifindex, u32 pid,
276 				   u32 seq, int type, unsigned int flags)
277 {
278 	struct br_port_msg *bpm;
279 	struct nlmsghdr *nlh;
280 	struct nlattr *nest;
281 
282 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI);
283 	if (!nlh)
284 		return -EMSGSIZE;
285 
286 	bpm = nlmsg_data(nlh);
287 	memset(bpm, 0, sizeof(*bpm));
288 	bpm->family = AF_BRIDGE;
289 	bpm->ifindex = dev->ifindex;
290 	nest = nla_nest_start(skb, MDBA_ROUTER);
291 	if (!nest)
292 		goto cancel;
293 
294 	if (nla_put_u32(skb, MDBA_ROUTER_PORT, ifindex))
295 		goto end;
296 
297 	nla_nest_end(skb, nest);
298 	nlmsg_end(skb, nlh);
299 	return 0;
300 
301 end:
302 	nla_nest_end(skb, nest);
303 cancel:
304 	nlmsg_cancel(skb, nlh);
305 	return -EMSGSIZE;
306 }
307 
308 static inline size_t rtnl_rtr_nlmsg_size(void)
309 {
310 	return NLMSG_ALIGN(sizeof(struct br_port_msg))
311 		+ nla_total_size(sizeof(__u32));
312 }
313 
314 void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
315 		   int type)
316 {
317 	struct net *net = dev_net(dev);
318 	struct sk_buff *skb;
319 	int err = -ENOBUFS;
320 	int ifindex;
321 
322 	ifindex = port ? port->dev->ifindex : 0;
323 	skb = nlmsg_new(rtnl_rtr_nlmsg_size(), GFP_ATOMIC);
324 	if (!skb)
325 		goto errout;
326 
327 	err = nlmsg_populate_rtr_fill(skb, dev, ifindex, 0, 0, type, NTF_SELF);
328 	if (err < 0) {
329 		kfree_skb(skb);
330 		goto errout;
331 	}
332 
333 	rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC);
334 	return;
335 
336 errout:
337 	rtnl_set_sk_err(net, RTNLGRP_MDB, err);
338 }
339 
340 static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
341 {
342 	if (entry->ifindex == 0)
343 		return false;
344 
345 	if (entry->addr.proto == htons(ETH_P_IP)) {
346 		if (!ipv4_is_multicast(entry->addr.u.ip4))
347 			return false;
348 		if (ipv4_is_local_multicast(entry->addr.u.ip4))
349 			return false;
350 #if IS_ENABLED(CONFIG_IPV6)
351 	} else if (entry->addr.proto == htons(ETH_P_IPV6)) {
352 		if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6))
353 			return false;
354 #endif
355 	} else
356 		return false;
357 	if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY)
358 		return false;
359 	if (entry->vid >= VLAN_VID_MASK)
360 		return false;
361 
362 	return true;
363 }
364 
365 static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
366 			struct net_device **pdev, struct br_mdb_entry **pentry)
367 {
368 	struct net *net = sock_net(skb->sk);
369 	struct br_mdb_entry *entry;
370 	struct br_port_msg *bpm;
371 	struct nlattr *tb[MDBA_SET_ENTRY_MAX+1];
372 	struct net_device *dev;
373 	int err;
374 
375 	err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, NULL);
376 	if (err < 0)
377 		return err;
378 
379 	bpm = nlmsg_data(nlh);
380 	if (bpm->ifindex == 0) {
381 		pr_info("PF_BRIDGE: br_mdb_parse() with invalid ifindex\n");
382 		return -EINVAL;
383 	}
384 
385 	dev = __dev_get_by_index(net, bpm->ifindex);
386 	if (dev == NULL) {
387 		pr_info("PF_BRIDGE: br_mdb_parse() with unknown ifindex\n");
388 		return -ENODEV;
389 	}
390 
391 	if (!(dev->priv_flags & IFF_EBRIDGE)) {
392 		pr_info("PF_BRIDGE: br_mdb_parse() with non-bridge\n");
393 		return -EOPNOTSUPP;
394 	}
395 
396 	*pdev = dev;
397 
398 	if (!tb[MDBA_SET_ENTRY] ||
399 	    nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
400 		pr_info("PF_BRIDGE: br_mdb_parse() with invalid attr\n");
401 		return -EINVAL;
402 	}
403 
404 	entry = nla_data(tb[MDBA_SET_ENTRY]);
405 	if (!is_valid_mdb_entry(entry)) {
406 		pr_info("PF_BRIDGE: br_mdb_parse() with invalid entry\n");
407 		return -EINVAL;
408 	}
409 
410 	*pentry = entry;
411 	return 0;
412 }
413 
414 static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
415 			    struct br_ip *group, unsigned char state)
416 {
417 	struct net_bridge_mdb_entry *mp;
418 	struct net_bridge_port_group *p;
419 	struct net_bridge_port_group __rcu **pp;
420 	struct net_bridge_mdb_htable *mdb;
421 	unsigned long now = jiffies;
422 	int err;
423 
424 	mdb = mlock_dereference(br->mdb, br);
425 	mp = br_mdb_ip_get(mdb, group);
426 	if (!mp) {
427 		mp = br_multicast_new_group(br, port, group);
428 		err = PTR_ERR(mp);
429 		if (IS_ERR(mp))
430 			return err;
431 	}
432 
433 	for (pp = &mp->ports;
434 	     (p = mlock_dereference(*pp, br)) != NULL;
435 	     pp = &p->next) {
436 		if (p->port == port)
437 			return -EEXIST;
438 		if ((unsigned long)p->port < (unsigned long)port)
439 			break;
440 	}
441 
442 	p = br_multicast_new_port_group(port, group, *pp, state);
443 	if (unlikely(!p))
444 		return -ENOMEM;
445 	rcu_assign_pointer(*pp, p);
446 	if (state == MDB_TEMPORARY)
447 		mod_timer(&p->timer, now + br->multicast_membership_interval);
448 
449 	return 0;
450 }
451 
452 static int __br_mdb_add(struct net *net, struct net_bridge *br,
453 			struct br_mdb_entry *entry)
454 {
455 	struct br_ip ip;
456 	struct net_device *dev;
457 	struct net_bridge_port *p;
458 	int ret;
459 
460 	if (!netif_running(br->dev) || br->multicast_disabled)
461 		return -EINVAL;
462 
463 	dev = __dev_get_by_index(net, entry->ifindex);
464 	if (!dev)
465 		return -ENODEV;
466 
467 	p = br_port_get_rtnl(dev);
468 	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
469 		return -EINVAL;
470 
471 	memset(&ip, 0, sizeof(ip));
472 	ip.vid = entry->vid;
473 	ip.proto = entry->addr.proto;
474 	if (ip.proto == htons(ETH_P_IP))
475 		ip.u.ip4 = entry->addr.u.ip4;
476 #if IS_ENABLED(CONFIG_IPV6)
477 	else
478 		ip.u.ip6 = entry->addr.u.ip6;
479 #endif
480 
481 	spin_lock_bh(&br->multicast_lock);
482 	ret = br_mdb_add_group(br, p, &ip, entry->state);
483 	spin_unlock_bh(&br->multicast_lock);
484 	return ret;
485 }
486 
487 static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
488 {
489 	struct net *net = sock_net(skb->sk);
490 	struct net_bridge_vlan_group *vg;
491 	struct net_device *dev, *pdev;
492 	struct br_mdb_entry *entry;
493 	struct net_bridge_port *p;
494 	struct net_bridge_vlan *v;
495 	struct net_bridge *br;
496 	int err;
497 
498 	err = br_mdb_parse(skb, nlh, &dev, &entry);
499 	if (err < 0)
500 		return err;
501 
502 	br = netdev_priv(dev);
503 
504 	/* If vlan filtering is enabled and VLAN is not specified
505 	 * install mdb entry on all vlans configured on the port.
506 	 */
507 	pdev = __dev_get_by_index(net, entry->ifindex);
508 	if (!pdev)
509 		return -ENODEV;
510 
511 	p = br_port_get_rtnl(pdev);
512 	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
513 		return -EINVAL;
514 
515 	vg = nbp_vlan_group(p);
516 	if (br_vlan_enabled(br) && vg && entry->vid == 0) {
517 		list_for_each_entry(v, &vg->vlan_list, vlist) {
518 			entry->vid = v->vid;
519 			err = __br_mdb_add(net, br, entry);
520 			if (err)
521 				break;
522 			__br_mdb_notify(dev, entry, RTM_NEWMDB);
523 		}
524 	} else {
525 		err = __br_mdb_add(net, br, entry);
526 		if (!err)
527 			__br_mdb_notify(dev, entry, RTM_NEWMDB);
528 	}
529 
530 	return err;
531 }
532 
533 static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
534 {
535 	struct net_bridge_mdb_htable *mdb;
536 	struct net_bridge_mdb_entry *mp;
537 	struct net_bridge_port_group *p;
538 	struct net_bridge_port_group __rcu **pp;
539 	struct br_ip ip;
540 	int err = -EINVAL;
541 
542 	if (!netif_running(br->dev) || br->multicast_disabled)
543 		return -EINVAL;
544 
545 	memset(&ip, 0, sizeof(ip));
546 	ip.vid = entry->vid;
547 	ip.proto = entry->addr.proto;
548 	if (ip.proto == htons(ETH_P_IP))
549 		ip.u.ip4 = entry->addr.u.ip4;
550 #if IS_ENABLED(CONFIG_IPV6)
551 	else
552 		ip.u.ip6 = entry->addr.u.ip6;
553 #endif
554 
555 	spin_lock_bh(&br->multicast_lock);
556 	mdb = mlock_dereference(br->mdb, br);
557 
558 	mp = br_mdb_ip_get(mdb, &ip);
559 	if (!mp)
560 		goto unlock;
561 
562 	for (pp = &mp->ports;
563 	     (p = mlock_dereference(*pp, br)) != NULL;
564 	     pp = &p->next) {
565 		if (!p->port || p->port->dev->ifindex != entry->ifindex)
566 			continue;
567 
568 		if (p->port->state == BR_STATE_DISABLED)
569 			goto unlock;
570 
571 		entry->state = p->state;
572 		rcu_assign_pointer(*pp, p->next);
573 		hlist_del_init(&p->mglist);
574 		del_timer(&p->timer);
575 		call_rcu_bh(&p->rcu, br_multicast_free_pg);
576 		err = 0;
577 
578 		if (!mp->ports && !mp->mglist &&
579 		    netif_running(br->dev))
580 			mod_timer(&mp->timer, jiffies);
581 		break;
582 	}
583 
584 unlock:
585 	spin_unlock_bh(&br->multicast_lock);
586 	return err;
587 }
588 
589 static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
590 {
591 	struct net *net = sock_net(skb->sk);
592 	struct net_bridge_vlan_group *vg;
593 	struct net_device *dev, *pdev;
594 	struct br_mdb_entry *entry;
595 	struct net_bridge_port *p;
596 	struct net_bridge_vlan *v;
597 	struct net_bridge *br;
598 	int err;
599 
600 	err = br_mdb_parse(skb, nlh, &dev, &entry);
601 	if (err < 0)
602 		return err;
603 
604 	br = netdev_priv(dev);
605 
606 	/* If vlan filtering is enabled and VLAN is not specified
607 	 * delete mdb entry on all vlans configured on the port.
608 	 */
609 	pdev = __dev_get_by_index(net, entry->ifindex);
610 	if (!pdev)
611 		return -ENODEV;
612 
613 	p = br_port_get_rtnl(pdev);
614 	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
615 		return -EINVAL;
616 
617 	vg = nbp_vlan_group(p);
618 	if (br_vlan_enabled(br) && vg && entry->vid == 0) {
619 		list_for_each_entry(v, &vg->vlan_list, vlist) {
620 			entry->vid = v->vid;
621 			err = __br_mdb_del(br, entry);
622 			if (!err)
623 				__br_mdb_notify(dev, entry, RTM_DELMDB);
624 		}
625 	} else {
626 		err = __br_mdb_del(br, entry);
627 		if (!err)
628 			__br_mdb_notify(dev, entry, RTM_DELMDB);
629 	}
630 
631 	return err;
632 }
633 
634 void br_mdb_init(void)
635 {
636 	rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
637 	rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL);
638 	rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL);
639 }
640 
641 void br_mdb_uninit(void)
642 {
643 	rtnl_unregister(PF_BRIDGE, RTM_GETMDB);
644 	rtnl_unregister(PF_BRIDGE, RTM_NEWMDB);
645 	rtnl_unregister(PF_BRIDGE, RTM_DELMDB);
646 }
647