xref: /openbmc/linux/net/switchdev/switchdev.c (revision eb3fcf00)
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
4  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_bridge.h>
19 #include <net/ip_fib.h>
20 #include <net/switchdev.h>
21 
22 /**
23  *	switchdev_port_attr_get - Get port attribute
24  *
25  *	@dev: port device
26  *	@attr: attribute to get
27  */
28 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
29 {
30 	const struct switchdev_ops *ops = dev->switchdev_ops;
31 	struct net_device *lower_dev;
32 	struct list_head *iter;
33 	struct switchdev_attr first = {
34 		.id = SWITCHDEV_ATTR_UNDEFINED
35 	};
36 	int err = -EOPNOTSUPP;
37 
38 	if (ops && ops->switchdev_port_attr_get)
39 		return ops->switchdev_port_attr_get(dev, attr);
40 
41 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
42 		return err;
43 
44 	/* Switch device port(s) may be stacked under
45 	 * bond/team/vlan dev, so recurse down to get attr on
46 	 * each port.  Return -ENODATA if attr values don't
47 	 * compare across ports.
48 	 */
49 
50 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
51 		err = switchdev_port_attr_get(lower_dev, attr);
52 		if (err)
53 			break;
54 		if (first.id == SWITCHDEV_ATTR_UNDEFINED)
55 			first = *attr;
56 		else if (memcmp(&first, attr, sizeof(*attr)))
57 			return -ENODATA;
58 	}
59 
60 	return err;
61 }
62 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
63 
64 static int __switchdev_port_attr_set(struct net_device *dev,
65 				     struct switchdev_attr *attr)
66 {
67 	const struct switchdev_ops *ops = dev->switchdev_ops;
68 	struct net_device *lower_dev;
69 	struct list_head *iter;
70 	int err = -EOPNOTSUPP;
71 
72 	if (ops && ops->switchdev_port_attr_set)
73 		return ops->switchdev_port_attr_set(dev, attr);
74 
75 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
76 		return err;
77 
78 	/* Switch device port(s) may be stacked under
79 	 * bond/team/vlan dev, so recurse down to set attr on
80 	 * each port.
81 	 */
82 
83 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
84 		err = __switchdev_port_attr_set(lower_dev, attr);
85 		if (err)
86 			break;
87 	}
88 
89 	return err;
90 }
91 
92 struct switchdev_attr_set_work {
93 	struct work_struct work;
94 	struct net_device *dev;
95 	struct switchdev_attr attr;
96 };
97 
98 static void switchdev_port_attr_set_work(struct work_struct *work)
99 {
100 	struct switchdev_attr_set_work *asw =
101 		container_of(work, struct switchdev_attr_set_work, work);
102 	int err;
103 
104 	rtnl_lock();
105 	err = switchdev_port_attr_set(asw->dev, &asw->attr);
106 	if (err && err != -EOPNOTSUPP)
107 		netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
108 			   err, asw->attr.id);
109 	rtnl_unlock();
110 
111 	dev_put(asw->dev);
112 	kfree(work);
113 }
114 
115 static int switchdev_port_attr_set_defer(struct net_device *dev,
116 					 struct switchdev_attr *attr)
117 {
118 	struct switchdev_attr_set_work *asw;
119 
120 	asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
121 	if (!asw)
122 		return -ENOMEM;
123 
124 	INIT_WORK(&asw->work, switchdev_port_attr_set_work);
125 
126 	dev_hold(dev);
127 	asw->dev = dev;
128 	memcpy(&asw->attr, attr, sizeof(asw->attr));
129 
130 	schedule_work(&asw->work);
131 
132 	return 0;
133 }
134 
135 /**
136  *	switchdev_port_attr_set - Set port attribute
137  *
138  *	@dev: port device
139  *	@attr: attribute to set
140  *
141  *	Use a 2-phase prepare-commit transaction model to ensure
142  *	system is not left in a partially updated state due to
143  *	failure from driver/device.
144  */
145 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
146 {
147 	int err;
148 
149 	if (!rtnl_is_locked()) {
150 		/* Running prepare-commit transaction across stacked
151 		 * devices requires nothing moves, so if rtnl_lock is
152 		 * not held, schedule a worker thread to hold rtnl_lock
153 		 * while setting attr.
154 		 */
155 
156 		return switchdev_port_attr_set_defer(dev, attr);
157 	}
158 
159 	/* Phase I: prepare for attr set. Driver/device should fail
160 	 * here if there are going to be issues in the commit phase,
161 	 * such as lack of resources or support.  The driver/device
162 	 * should reserve resources needed for the commit phase here,
163 	 * but should not commit the attr.
164 	 */
165 
166 	attr->trans = SWITCHDEV_TRANS_PREPARE;
167 	err = __switchdev_port_attr_set(dev, attr);
168 	if (err) {
169 		/* Prepare phase failed: abort the transaction.  Any
170 		 * resources reserved in the prepare phase are
171 		 * released.
172 		 */
173 
174 		if (err != -EOPNOTSUPP) {
175 			attr->trans = SWITCHDEV_TRANS_ABORT;
176 			__switchdev_port_attr_set(dev, attr);
177 		}
178 
179 		return err;
180 	}
181 
182 	/* Phase II: commit attr set.  This cannot fail as a fault
183 	 * of driver/device.  If it does, it's a bug in the driver/device
184 	 * because the driver said everythings was OK in phase I.
185 	 */
186 
187 	attr->trans = SWITCHDEV_TRANS_COMMIT;
188 	err = __switchdev_port_attr_set(dev, attr);
189 	WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
190 	     dev->name, attr->id);
191 
192 	return err;
193 }
194 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
195 
196 static int __switchdev_port_obj_add(struct net_device *dev,
197 				    struct switchdev_obj *obj)
198 {
199 	const struct switchdev_ops *ops = dev->switchdev_ops;
200 	struct net_device *lower_dev;
201 	struct list_head *iter;
202 	int err = -EOPNOTSUPP;
203 
204 	if (ops && ops->switchdev_port_obj_add)
205 		return ops->switchdev_port_obj_add(dev, obj);
206 
207 	/* Switch device port(s) may be stacked under
208 	 * bond/team/vlan dev, so recurse down to add object on
209 	 * each port.
210 	 */
211 
212 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
213 		err = __switchdev_port_obj_add(lower_dev, obj);
214 		if (err)
215 			break;
216 	}
217 
218 	return err;
219 }
220 
221 /**
222  *	switchdev_port_obj_add - Add port object
223  *
224  *	@dev: port device
225  *	@obj: object to add
226  *
227  *	Use a 2-phase prepare-commit transaction model to ensure
228  *	system is not left in a partially updated state due to
229  *	failure from driver/device.
230  *
231  *	rtnl_lock must be held.
232  */
233 int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
234 {
235 	int err;
236 
237 	ASSERT_RTNL();
238 
239 	/* Phase I: prepare for obj add. Driver/device should fail
240 	 * here if there are going to be issues in the commit phase,
241 	 * such as lack of resources or support.  The driver/device
242 	 * should reserve resources needed for the commit phase here,
243 	 * but should not commit the obj.
244 	 */
245 
246 	obj->trans = SWITCHDEV_TRANS_PREPARE;
247 	err = __switchdev_port_obj_add(dev, obj);
248 	if (err) {
249 		/* Prepare phase failed: abort the transaction.  Any
250 		 * resources reserved in the prepare phase are
251 		 * released.
252 		 */
253 
254 		if (err != -EOPNOTSUPP) {
255 			obj->trans = SWITCHDEV_TRANS_ABORT;
256 			__switchdev_port_obj_add(dev, obj);
257 		}
258 
259 		return err;
260 	}
261 
262 	/* Phase II: commit obj add.  This cannot fail as a fault
263 	 * of driver/device.  If it does, it's a bug in the driver/device
264 	 * because the driver said everythings was OK in phase I.
265 	 */
266 
267 	obj->trans = SWITCHDEV_TRANS_COMMIT;
268 	err = __switchdev_port_obj_add(dev, obj);
269 	WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
270 
271 	return err;
272 }
273 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
274 
275 /**
276  *	switchdev_port_obj_del - Delete port object
277  *
278  *	@dev: port device
279  *	@obj: object to delete
280  */
281 int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
282 {
283 	const struct switchdev_ops *ops = dev->switchdev_ops;
284 	struct net_device *lower_dev;
285 	struct list_head *iter;
286 	int err = -EOPNOTSUPP;
287 
288 	if (ops && ops->switchdev_port_obj_del)
289 		return ops->switchdev_port_obj_del(dev, obj);
290 
291 	/* Switch device port(s) may be stacked under
292 	 * bond/team/vlan dev, so recurse down to delete object on
293 	 * each port.
294 	 */
295 
296 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
297 		err = switchdev_port_obj_del(lower_dev, obj);
298 		if (err)
299 			break;
300 	}
301 
302 	return err;
303 }
304 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
305 
306 /**
307  *	switchdev_port_obj_dump - Dump port objects
308  *
309  *	@dev: port device
310  *	@obj: object to dump
311  */
312 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
313 {
314 	const struct switchdev_ops *ops = dev->switchdev_ops;
315 	struct net_device *lower_dev;
316 	struct list_head *iter;
317 	int err = -EOPNOTSUPP;
318 
319 	if (ops && ops->switchdev_port_obj_dump)
320 		return ops->switchdev_port_obj_dump(dev, obj);
321 
322 	/* Switch device port(s) may be stacked under
323 	 * bond/team/vlan dev, so recurse down to dump objects on
324 	 * first port at bottom of stack.
325 	 */
326 
327 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
328 		err = switchdev_port_obj_dump(lower_dev, obj);
329 		break;
330 	}
331 
332 	return err;
333 }
334 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
335 
336 static DEFINE_MUTEX(switchdev_mutex);
337 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
338 
339 /**
340  *	register_switchdev_notifier - Register notifier
341  *	@nb: notifier_block
342  *
343  *	Register switch device notifier. This should be used by code
344  *	which needs to monitor events happening in particular device.
345  *	Return values are same as for atomic_notifier_chain_register().
346  */
347 int register_switchdev_notifier(struct notifier_block *nb)
348 {
349 	int err;
350 
351 	mutex_lock(&switchdev_mutex);
352 	err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
353 	mutex_unlock(&switchdev_mutex);
354 	return err;
355 }
356 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
357 
358 /**
359  *	unregister_switchdev_notifier - Unregister notifier
360  *	@nb: notifier_block
361  *
362  *	Unregister switch device notifier.
363  *	Return values are same as for atomic_notifier_chain_unregister().
364  */
365 int unregister_switchdev_notifier(struct notifier_block *nb)
366 {
367 	int err;
368 
369 	mutex_lock(&switchdev_mutex);
370 	err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
371 	mutex_unlock(&switchdev_mutex);
372 	return err;
373 }
374 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
375 
376 /**
377  *	call_switchdev_notifiers - Call notifiers
378  *	@val: value passed unmodified to notifier function
379  *	@dev: port device
380  *	@info: notifier information data
381  *
382  *	Call all network notifier blocks. This should be called by driver
383  *	when it needs to propagate hardware event.
384  *	Return values are same as for atomic_notifier_call_chain().
385  */
386 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
387 			     struct switchdev_notifier_info *info)
388 {
389 	int err;
390 
391 	info->dev = dev;
392 	mutex_lock(&switchdev_mutex);
393 	err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
394 	mutex_unlock(&switchdev_mutex);
395 	return err;
396 }
397 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
398 
399 struct switchdev_vlan_dump {
400 	struct switchdev_obj obj;
401 	struct sk_buff *skb;
402 	u32 filter_mask;
403 	u16 flags;
404 	u16 begin;
405 	u16 end;
406 };
407 
408 static int switchdev_port_vlan_dump_put(struct net_device *dev,
409 					struct switchdev_vlan_dump *dump)
410 {
411 	struct bridge_vlan_info vinfo;
412 
413 	vinfo.flags = dump->flags;
414 
415 	if (dump->begin == 0 && dump->end == 0) {
416 		return 0;
417 	} else if (dump->begin == dump->end) {
418 		vinfo.vid = dump->begin;
419 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
420 			    sizeof(vinfo), &vinfo))
421 			return -EMSGSIZE;
422 	} else {
423 		vinfo.vid = dump->begin;
424 		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
425 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
426 			    sizeof(vinfo), &vinfo))
427 			return -EMSGSIZE;
428 		vinfo.vid = dump->end;
429 		vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
430 		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
431 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
432 			    sizeof(vinfo), &vinfo))
433 			return -EMSGSIZE;
434 	}
435 
436 	return 0;
437 }
438 
439 static int switchdev_port_vlan_dump_cb(struct net_device *dev,
440 				       struct switchdev_obj *obj)
441 {
442 	struct switchdev_vlan_dump *dump =
443 		container_of(obj, struct switchdev_vlan_dump, obj);
444 	struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
445 	int err = 0;
446 
447 	if (vlan->vid_begin > vlan->vid_end)
448 		return -EINVAL;
449 
450 	if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
451 		dump->flags = vlan->flags;
452 		for (dump->begin = dump->end = vlan->vid_begin;
453 		     dump->begin <= vlan->vid_end;
454 		     dump->begin++, dump->end++) {
455 			err = switchdev_port_vlan_dump_put(dev, dump);
456 			if (err)
457 				return err;
458 		}
459 	} else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
460 		if (dump->begin > vlan->vid_begin &&
461 		    dump->begin >= vlan->vid_end) {
462 			if ((dump->begin - 1) == vlan->vid_end &&
463 			    dump->flags == vlan->flags) {
464 				/* prepend */
465 				dump->begin = vlan->vid_begin;
466 			} else {
467 				err = switchdev_port_vlan_dump_put(dev, dump);
468 				dump->flags = vlan->flags;
469 				dump->begin = vlan->vid_begin;
470 				dump->end = vlan->vid_end;
471 			}
472 		} else if (dump->end <= vlan->vid_begin &&
473 		           dump->end < vlan->vid_end) {
474 			if ((dump->end  + 1) == vlan->vid_begin &&
475 			    dump->flags == vlan->flags) {
476 				/* append */
477 				dump->end = vlan->vid_end;
478 			} else {
479 				err = switchdev_port_vlan_dump_put(dev, dump);
480 				dump->flags = vlan->flags;
481 				dump->begin = vlan->vid_begin;
482 				dump->end = vlan->vid_end;
483 			}
484 		} else {
485 			err = -EINVAL;
486 		}
487 	}
488 
489 	return err;
490 }
491 
492 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
493 				    u32 filter_mask)
494 {
495 	struct switchdev_vlan_dump dump = {
496 		.obj = {
497 			.id = SWITCHDEV_OBJ_PORT_VLAN,
498 			.cb = switchdev_port_vlan_dump_cb,
499 		},
500 		.skb = skb,
501 		.filter_mask = filter_mask,
502 	};
503 	int err = 0;
504 
505 	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
506 	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
507 		err = switchdev_port_obj_dump(dev, &dump.obj);
508 		if (err)
509 			goto err_out;
510 		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
511 			/* last one */
512 			err = switchdev_port_vlan_dump_put(dev, &dump);
513 	}
514 
515 err_out:
516 	return err == -EOPNOTSUPP ? 0 : err;
517 }
518 
519 /**
520  *	switchdev_port_bridge_getlink - Get bridge port attributes
521  *
522  *	@dev: port device
523  *
524  *	Called for SELF on rtnl_bridge_getlink to get bridge port
525  *	attributes.
526  */
527 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
528 				  struct net_device *dev, u32 filter_mask,
529 				  int nlflags)
530 {
531 	struct switchdev_attr attr = {
532 		.id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
533 	};
534 	u16 mode = BRIDGE_MODE_UNDEF;
535 	u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
536 	int err;
537 
538 	err = switchdev_port_attr_get(dev, &attr);
539 	if (err && err != -EOPNOTSUPP)
540 		return err;
541 
542 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
543 				       attr.u.brport_flags, mask, nlflags,
544 				       filter_mask, switchdev_port_vlan_fill);
545 }
546 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
547 
548 static int switchdev_port_br_setflag(struct net_device *dev,
549 				     struct nlattr *nlattr,
550 				     unsigned long brport_flag)
551 {
552 	struct switchdev_attr attr = {
553 		.id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
554 	};
555 	u8 flag = nla_get_u8(nlattr);
556 	int err;
557 
558 	err = switchdev_port_attr_get(dev, &attr);
559 	if (err)
560 		return err;
561 
562 	if (flag)
563 		attr.u.brport_flags |= brport_flag;
564 	else
565 		attr.u.brport_flags &= ~brport_flag;
566 
567 	return switchdev_port_attr_set(dev, &attr);
568 }
569 
570 static const struct nla_policy
571 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
572 	[IFLA_BRPORT_STATE]		= { .type = NLA_U8 },
573 	[IFLA_BRPORT_COST]		= { .type = NLA_U32 },
574 	[IFLA_BRPORT_PRIORITY]		= { .type = NLA_U16 },
575 	[IFLA_BRPORT_MODE]		= { .type = NLA_U8 },
576 	[IFLA_BRPORT_GUARD]		= { .type = NLA_U8 },
577 	[IFLA_BRPORT_PROTECT]		= { .type = NLA_U8 },
578 	[IFLA_BRPORT_FAST_LEAVE]	= { .type = NLA_U8 },
579 	[IFLA_BRPORT_LEARNING]		= { .type = NLA_U8 },
580 	[IFLA_BRPORT_LEARNING_SYNC]	= { .type = NLA_U8 },
581 	[IFLA_BRPORT_UNICAST_FLOOD]	= { .type = NLA_U8 },
582 };
583 
584 static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
585 					      struct nlattr *protinfo)
586 {
587 	struct nlattr *attr;
588 	int rem;
589 	int err;
590 
591 	err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
592 				  switchdev_port_bridge_policy);
593 	if (err)
594 		return err;
595 
596 	nla_for_each_nested(attr, protinfo, rem) {
597 		switch (nla_type(attr)) {
598 		case IFLA_BRPORT_LEARNING:
599 			err = switchdev_port_br_setflag(dev, attr,
600 							BR_LEARNING);
601 			break;
602 		case IFLA_BRPORT_LEARNING_SYNC:
603 			err = switchdev_port_br_setflag(dev, attr,
604 							BR_LEARNING_SYNC);
605 			break;
606 		default:
607 			err = -EOPNOTSUPP;
608 			break;
609 		}
610 		if (err)
611 			return err;
612 	}
613 
614 	return 0;
615 }
616 
617 static int switchdev_port_br_afspec(struct net_device *dev,
618 				    struct nlattr *afspec,
619 				    int (*f)(struct net_device *dev,
620 					     struct switchdev_obj *obj))
621 {
622 	struct nlattr *attr;
623 	struct bridge_vlan_info *vinfo;
624 	struct switchdev_obj obj = {
625 		.id = SWITCHDEV_OBJ_PORT_VLAN,
626 	};
627 	struct switchdev_obj_vlan *vlan = &obj.u.vlan;
628 	int rem;
629 	int err;
630 
631 	nla_for_each_nested(attr, afspec, rem) {
632 		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
633 			continue;
634 		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
635 			return -EINVAL;
636 		vinfo = nla_data(attr);
637 		vlan->flags = vinfo->flags;
638 		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
639 			if (vlan->vid_begin)
640 				return -EINVAL;
641 			vlan->vid_begin = vinfo->vid;
642 		} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
643 			if (!vlan->vid_begin)
644 				return -EINVAL;
645 			vlan->vid_end = vinfo->vid;
646 			if (vlan->vid_end <= vlan->vid_begin)
647 				return -EINVAL;
648 			err = f(dev, &obj);
649 			if (err)
650 				return err;
651 			memset(vlan, 0, sizeof(*vlan));
652 		} else {
653 			if (vlan->vid_begin)
654 				return -EINVAL;
655 			vlan->vid_begin = vinfo->vid;
656 			vlan->vid_end = vinfo->vid;
657 			err = f(dev, &obj);
658 			if (err)
659 				return err;
660 			memset(vlan, 0, sizeof(*vlan));
661 		}
662 	}
663 
664 	return 0;
665 }
666 
667 /**
668  *	switchdev_port_bridge_setlink - Set bridge port attributes
669  *
670  *	@dev: port device
671  *	@nlh: netlink header
672  *	@flags: netlink flags
673  *
674  *	Called for SELF on rtnl_bridge_setlink to set bridge port
675  *	attributes.
676  */
677 int switchdev_port_bridge_setlink(struct net_device *dev,
678 				  struct nlmsghdr *nlh, u16 flags)
679 {
680 	struct nlattr *protinfo;
681 	struct nlattr *afspec;
682 	int err = 0;
683 
684 	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
685 				   IFLA_PROTINFO);
686 	if (protinfo) {
687 		err = switchdev_port_br_setlink_protinfo(dev, protinfo);
688 		if (err)
689 			return err;
690 	}
691 
692 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
693 				 IFLA_AF_SPEC);
694 	if (afspec)
695 		err = switchdev_port_br_afspec(dev, afspec,
696 					       switchdev_port_obj_add);
697 
698 	return err;
699 }
700 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
701 
702 /**
703  *	switchdev_port_bridge_dellink - Set bridge port attributes
704  *
705  *	@dev: port device
706  *	@nlh: netlink header
707  *	@flags: netlink flags
708  *
709  *	Called for SELF on rtnl_bridge_dellink to set bridge port
710  *	attributes.
711  */
712 int switchdev_port_bridge_dellink(struct net_device *dev,
713 				  struct nlmsghdr *nlh, u16 flags)
714 {
715 	struct nlattr *afspec;
716 
717 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
718 				 IFLA_AF_SPEC);
719 	if (afspec)
720 		return switchdev_port_br_afspec(dev, afspec,
721 						switchdev_port_obj_del);
722 
723 	return 0;
724 }
725 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
726 
727 /**
728  *	switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
729  *
730  *	@ndmsg: netlink hdr
731  *	@nlattr: netlink attributes
732  *	@dev: port device
733  *	@addr: MAC address to add
734  *	@vid: VLAN to add
735  *
736  *	Add FDB entry to switch device.
737  */
738 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
739 			   struct net_device *dev, const unsigned char *addr,
740 			   u16 vid, u16 nlm_flags)
741 {
742 	struct switchdev_obj obj = {
743 		.id = SWITCHDEV_OBJ_PORT_FDB,
744 		.u.fdb = {
745 			.addr = addr,
746 			.vid = vid,
747 		},
748 	};
749 
750 	return switchdev_port_obj_add(dev, &obj);
751 }
752 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
753 
754 /**
755  *	switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
756  *
757  *	@ndmsg: netlink hdr
758  *	@nlattr: netlink attributes
759  *	@dev: port device
760  *	@addr: MAC address to delete
761  *	@vid: VLAN to delete
762  *
763  *	Delete FDB entry from switch device.
764  */
765 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
766 			   struct net_device *dev, const unsigned char *addr,
767 			   u16 vid)
768 {
769 	struct switchdev_obj obj = {
770 		.id = SWITCHDEV_OBJ_PORT_FDB,
771 		.u.fdb = {
772 			.addr = addr,
773 			.vid = vid,
774 		},
775 	};
776 
777 	return switchdev_port_obj_del(dev, &obj);
778 }
779 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
780 
781 struct switchdev_fdb_dump {
782 	struct switchdev_obj obj;
783 	struct sk_buff *skb;
784 	struct netlink_callback *cb;
785 	int idx;
786 };
787 
788 static int switchdev_port_fdb_dump_cb(struct net_device *dev,
789 				      struct switchdev_obj *obj)
790 {
791 	struct switchdev_fdb_dump *dump =
792 		container_of(obj, struct switchdev_fdb_dump, obj);
793 	u32 portid = NETLINK_CB(dump->cb->skb).portid;
794 	u32 seq = dump->cb->nlh->nlmsg_seq;
795 	struct nlmsghdr *nlh;
796 	struct ndmsg *ndm;
797 
798 	if (dump->idx < dump->cb->args[0])
799 		goto skip;
800 
801 	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
802 			sizeof(*ndm), NLM_F_MULTI);
803 	if (!nlh)
804 		return -EMSGSIZE;
805 
806 	ndm = nlmsg_data(nlh);
807 	ndm->ndm_family  = AF_BRIDGE;
808 	ndm->ndm_pad1    = 0;
809 	ndm->ndm_pad2    = 0;
810 	ndm->ndm_flags   = NTF_SELF;
811 	ndm->ndm_type    = 0;
812 	ndm->ndm_ifindex = dev->ifindex;
813 	ndm->ndm_state   = obj->u.fdb.ndm_state;
814 
815 	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
816 		goto nla_put_failure;
817 
818 	if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
819 		goto nla_put_failure;
820 
821 	nlmsg_end(dump->skb, nlh);
822 
823 skip:
824 	dump->idx++;
825 	return 0;
826 
827 nla_put_failure:
828 	nlmsg_cancel(dump->skb, nlh);
829 	return -EMSGSIZE;
830 }
831 
832 /**
833  *	switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
834  *
835  *	@skb: netlink skb
836  *	@cb: netlink callback
837  *	@dev: port device
838  *	@filter_dev: filter device
839  *	@idx:
840  *
841  *	Delete FDB entry from switch device.
842  */
843 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
844 			    struct net_device *dev,
845 			    struct net_device *filter_dev, int idx)
846 {
847 	struct switchdev_fdb_dump dump = {
848 		.obj = {
849 			.id = SWITCHDEV_OBJ_PORT_FDB,
850 			.cb = switchdev_port_fdb_dump_cb,
851 		},
852 		.skb = skb,
853 		.cb = cb,
854 		.idx = idx,
855 	};
856 
857 	switchdev_port_obj_dump(dev, &dump.obj);
858 	return dump.idx;
859 }
860 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
861 
862 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
863 {
864 	const struct switchdev_ops *ops = dev->switchdev_ops;
865 	struct net_device *lower_dev;
866 	struct net_device *port_dev;
867 	struct list_head *iter;
868 
869 	/* Recusively search down until we find a sw port dev.
870 	 * (A sw port dev supports switchdev_port_attr_get).
871 	 */
872 
873 	if (ops && ops->switchdev_port_attr_get)
874 		return dev;
875 
876 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
877 		port_dev = switchdev_get_lowest_dev(lower_dev);
878 		if (port_dev)
879 			return port_dev;
880 	}
881 
882 	return NULL;
883 }
884 
885 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
886 {
887 	struct switchdev_attr attr = {
888 		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
889 	};
890 	struct switchdev_attr prev_attr;
891 	struct net_device *dev = NULL;
892 	int nhsel;
893 
894 	/* For this route, all nexthop devs must be on the same switch. */
895 
896 	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
897 		const struct fib_nh *nh = &fi->fib_nh[nhsel];
898 
899 		if (!nh->nh_dev)
900 			return NULL;
901 
902 		dev = switchdev_get_lowest_dev(nh->nh_dev);
903 		if (!dev)
904 			return NULL;
905 
906 		if (switchdev_port_attr_get(dev, &attr))
907 			return NULL;
908 
909 		if (nhsel > 0 &&
910 		    !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
911 				return NULL;
912 
913 		prev_attr = attr;
914 	}
915 
916 	return dev;
917 }
918 
919 /**
920  *	switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
921  *
922  *	@dst: route's IPv4 destination address
923  *	@dst_len: destination address length (prefix length)
924  *	@fi: route FIB info structure
925  *	@tos: route TOS
926  *	@type: route type
927  *	@nlflags: netlink flags passed in (NLM_F_*)
928  *	@tb_id: route table ID
929  *
930  *	Add/modify switch IPv4 route entry.
931  */
932 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
933 			   u8 tos, u8 type, u32 nlflags, u32 tb_id)
934 {
935 	struct switchdev_obj fib_obj = {
936 		.id = SWITCHDEV_OBJ_IPV4_FIB,
937 		.u.ipv4_fib = {
938 			.dst = dst,
939 			.dst_len = dst_len,
940 			.fi = fi,
941 			.tos = tos,
942 			.type = type,
943 			.nlflags = nlflags,
944 			.tb_id = tb_id,
945 		},
946 	};
947 	struct net_device *dev;
948 	int err = 0;
949 
950 	/* Don't offload route if using custom ip rules or if
951 	 * IPv4 FIB offloading has been disabled completely.
952 	 */
953 
954 #ifdef CONFIG_IP_MULTIPLE_TABLES
955 	if (fi->fib_net->ipv4.fib_has_custom_rules)
956 		return 0;
957 #endif
958 
959 	if (fi->fib_net->ipv4.fib_offload_disabled)
960 		return 0;
961 
962 	dev = switchdev_get_dev_by_nhs(fi);
963 	if (!dev)
964 		return 0;
965 
966 	err = switchdev_port_obj_add(dev, &fib_obj);
967 	if (!err)
968 		fi->fib_flags |= RTNH_F_OFFLOAD;
969 
970 	return err == -EOPNOTSUPP ? 0 : err;
971 }
972 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
973 
974 /**
975  *	switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
976  *
977  *	@dst: route's IPv4 destination address
978  *	@dst_len: destination address length (prefix length)
979  *	@fi: route FIB info structure
980  *	@tos: route TOS
981  *	@type: route type
982  *	@tb_id: route table ID
983  *
984  *	Delete IPv4 route entry from switch device.
985  */
986 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
987 			   u8 tos, u8 type, u32 tb_id)
988 {
989 	struct switchdev_obj fib_obj = {
990 		.id = SWITCHDEV_OBJ_IPV4_FIB,
991 		.u.ipv4_fib = {
992 			.dst = dst,
993 			.dst_len = dst_len,
994 			.fi = fi,
995 			.tos = tos,
996 			.type = type,
997 			.nlflags = 0,
998 			.tb_id = tb_id,
999 		},
1000 	};
1001 	struct net_device *dev;
1002 	int err = 0;
1003 
1004 	if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1005 		return 0;
1006 
1007 	dev = switchdev_get_dev_by_nhs(fi);
1008 	if (!dev)
1009 		return 0;
1010 
1011 	err = switchdev_port_obj_del(dev, &fib_obj);
1012 	if (!err)
1013 		fi->fib_flags &= ~RTNH_F_OFFLOAD;
1014 
1015 	return err == -EOPNOTSUPP ? 0 : err;
1016 }
1017 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1018 
1019 /**
1020  *	switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1021  *
1022  *	@fi: route FIB info structure
1023  */
1024 void switchdev_fib_ipv4_abort(struct fib_info *fi)
1025 {
1026 	/* There was a problem installing this route to the offload
1027 	 * device.  For now, until we come up with more refined
1028 	 * policy handling, abruptly end IPv4 fib offloading for
1029 	 * for entire net by flushing offload device(s) of all
1030 	 * IPv4 routes, and mark IPv4 fib offloading broken from
1031 	 * this point forward.
1032 	 */
1033 
1034 	fib_flush_external(fi->fib_net);
1035 	fi->fib_net->ipv4.fib_offload_disabled = true;
1036 }
1037 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1038 
1039 static bool switchdev_port_same_parent_id(struct net_device *a,
1040 					  struct net_device *b)
1041 {
1042 	struct switchdev_attr a_attr = {
1043 		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1044 		.flags = SWITCHDEV_F_NO_RECURSE,
1045 	};
1046 	struct switchdev_attr b_attr = {
1047 		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1048 		.flags = SWITCHDEV_F_NO_RECURSE,
1049 	};
1050 
1051 	if (switchdev_port_attr_get(a, &a_attr) ||
1052 	    switchdev_port_attr_get(b, &b_attr))
1053 		return false;
1054 
1055 	return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1056 }
1057 
1058 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1059 				       struct net_device *group_dev)
1060 {
1061 	struct net_device *lower_dev;
1062 	struct list_head *iter;
1063 
1064 	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1065 		if (lower_dev == dev)
1066 			continue;
1067 		if (switchdev_port_same_parent_id(dev, lower_dev))
1068 			return lower_dev->offload_fwd_mark;
1069 		return switchdev_port_fwd_mark_get(dev, lower_dev);
1070 	}
1071 
1072 	return dev->ifindex;
1073 }
1074 
1075 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1076 					  u32 old_mark, u32 *reset_mark)
1077 {
1078 	struct net_device *lower_dev;
1079 	struct list_head *iter;
1080 
1081 	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1082 		if (lower_dev->offload_fwd_mark == old_mark) {
1083 			if (!*reset_mark)
1084 				*reset_mark = lower_dev->ifindex;
1085 			lower_dev->offload_fwd_mark = *reset_mark;
1086 		}
1087 		switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1088 	}
1089 }
1090 
1091 /**
1092  *	switchdev_port_fwd_mark_set - Set port offload forwarding mark
1093  *
1094  *	@dev: port device
1095  *	@group_dev: containing device
1096  *	@joining: true if dev is joining group; false if leaving group
1097  *
1098  *	An ungrouped port's offload mark is just its ifindex.  A grouped
1099  *	port's (member of a bridge, for example) offload mark is the ifindex
1100  *	of one of the ports in the group with the same parent (switch) ID.
1101  *	Ports on the same device in the same group will have the same mark.
1102  *
1103  *	Example:
1104  *
1105  *		br0		ifindex=9
1106  *		  sw1p1		ifindex=2	mark=2
1107  *		  sw1p2		ifindex=3	mark=2
1108  *		  sw2p1		ifindex=4	mark=5
1109  *		  sw2p2		ifindex=5	mark=5
1110  *
1111  *	If sw2p2 leaves the bridge, we'll have:
1112  *
1113  *		br0		ifindex=9
1114  *		  sw1p1		ifindex=2	mark=2
1115  *		  sw1p2		ifindex=3	mark=2
1116  *		  sw2p1		ifindex=4	mark=4
1117  *		sw2p2		ifindex=5	mark=5
1118  */
1119 void switchdev_port_fwd_mark_set(struct net_device *dev,
1120 				 struct net_device *group_dev,
1121 				 bool joining)
1122 {
1123 	u32 mark = dev->ifindex;
1124 	u32 reset_mark = 0;
1125 
1126 	if (group_dev && joining) {
1127 		mark = switchdev_port_fwd_mark_get(dev, group_dev);
1128 	} else if (group_dev && !joining) {
1129 		if (dev->offload_fwd_mark == mark)
1130 			/* Ohoh, this port was the mark reference port,
1131 			 * but it's leaving the group, so reset the
1132 			 * mark for the remaining ports in the group.
1133 			 */
1134 			switchdev_port_fwd_mark_reset(group_dev, mark,
1135 						      &reset_mark);
1136 	}
1137 
1138 	dev->offload_fwd_mark = mark;
1139 }
1140 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
1141