xref: /openbmc/linux/net/switchdev/switchdev.c (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
4  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_bridge.h>
19 #include <linux/if_vlan.h>
20 #include <net/ip_fib.h>
21 #include <net/switchdev.h>
22 
23 /**
24  *	switchdev_port_attr_get - Get port attribute
25  *
26  *	@dev: port device
27  *	@attr: attribute to get
28  */
29 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
30 {
31 	const struct switchdev_ops *ops = dev->switchdev_ops;
32 	struct net_device *lower_dev;
33 	struct list_head *iter;
34 	struct switchdev_attr first = {
35 		.id = SWITCHDEV_ATTR_UNDEFINED
36 	};
37 	int err = -EOPNOTSUPP;
38 
39 	if (ops && ops->switchdev_port_attr_get)
40 		return ops->switchdev_port_attr_get(dev, attr);
41 
42 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
43 		return err;
44 
45 	/* Switch device port(s) may be stacked under
46 	 * bond/team/vlan dev, so recurse down to get attr on
47 	 * each port.  Return -ENODATA if attr values don't
48 	 * compare across ports.
49 	 */
50 
51 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
52 		err = switchdev_port_attr_get(lower_dev, attr);
53 		if (err)
54 			break;
55 		if (first.id == SWITCHDEV_ATTR_UNDEFINED)
56 			first = *attr;
57 		else if (memcmp(&first, attr, sizeof(*attr)))
58 			return -ENODATA;
59 	}
60 
61 	return err;
62 }
63 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
64 
65 static int __switchdev_port_attr_set(struct net_device *dev,
66 				     struct switchdev_attr *attr)
67 {
68 	const struct switchdev_ops *ops = dev->switchdev_ops;
69 	struct net_device *lower_dev;
70 	struct list_head *iter;
71 	int err = -EOPNOTSUPP;
72 
73 	if (ops && ops->switchdev_port_attr_set)
74 		return ops->switchdev_port_attr_set(dev, attr);
75 
76 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
77 		return err;
78 
79 	/* Switch device port(s) may be stacked under
80 	 * bond/team/vlan dev, so recurse down to set attr on
81 	 * each port.
82 	 */
83 
84 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
85 		err = __switchdev_port_attr_set(lower_dev, attr);
86 		if (err)
87 			break;
88 	}
89 
90 	return err;
91 }
92 
93 struct switchdev_attr_set_work {
94 	struct work_struct work;
95 	struct net_device *dev;
96 	struct switchdev_attr attr;
97 };
98 
99 static void switchdev_port_attr_set_work(struct work_struct *work)
100 {
101 	struct switchdev_attr_set_work *asw =
102 		container_of(work, struct switchdev_attr_set_work, work);
103 	int err;
104 
105 	rtnl_lock();
106 	err = switchdev_port_attr_set(asw->dev, &asw->attr);
107 	if (err && err != -EOPNOTSUPP)
108 		netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
109 			   err, asw->attr.id);
110 	rtnl_unlock();
111 
112 	dev_put(asw->dev);
113 	kfree(work);
114 }
115 
116 static int switchdev_port_attr_set_defer(struct net_device *dev,
117 					 struct switchdev_attr *attr)
118 {
119 	struct switchdev_attr_set_work *asw;
120 
121 	asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
122 	if (!asw)
123 		return -ENOMEM;
124 
125 	INIT_WORK(&asw->work, switchdev_port_attr_set_work);
126 
127 	dev_hold(dev);
128 	asw->dev = dev;
129 	memcpy(&asw->attr, attr, sizeof(asw->attr));
130 
131 	schedule_work(&asw->work);
132 
133 	return 0;
134 }
135 
136 /**
137  *	switchdev_port_attr_set - Set port attribute
138  *
139  *	@dev: port device
140  *	@attr: attribute to set
141  *
142  *	Use a 2-phase prepare-commit transaction model to ensure
143  *	system is not left in a partially updated state due to
144  *	failure from driver/device.
145  */
146 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
147 {
148 	int err;
149 
150 	if (!rtnl_is_locked()) {
151 		/* Running prepare-commit transaction across stacked
152 		 * devices requires nothing moves, so if rtnl_lock is
153 		 * not held, schedule a worker thread to hold rtnl_lock
154 		 * while setting attr.
155 		 */
156 
157 		return switchdev_port_attr_set_defer(dev, attr);
158 	}
159 
160 	/* Phase I: prepare for attr set. Driver/device should fail
161 	 * here if there are going to be issues in the commit phase,
162 	 * such as lack of resources or support.  The driver/device
163 	 * should reserve resources needed for the commit phase here,
164 	 * but should not commit the attr.
165 	 */
166 
167 	attr->trans = SWITCHDEV_TRANS_PREPARE;
168 	err = __switchdev_port_attr_set(dev, attr);
169 	if (err) {
170 		/* Prepare phase failed: abort the transaction.  Any
171 		 * resources reserved in the prepare phase are
172 		 * released.
173 		 */
174 
175 		if (err != -EOPNOTSUPP) {
176 			attr->trans = SWITCHDEV_TRANS_ABORT;
177 			__switchdev_port_attr_set(dev, attr);
178 		}
179 
180 		return err;
181 	}
182 
183 	/* Phase II: commit attr set.  This cannot fail as a fault
184 	 * of driver/device.  If it does, it's a bug in the driver/device
185 	 * because the driver said everythings was OK in phase I.
186 	 */
187 
188 	attr->trans = SWITCHDEV_TRANS_COMMIT;
189 	err = __switchdev_port_attr_set(dev, attr);
190 	WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
191 	     dev->name, attr->id);
192 
193 	return err;
194 }
195 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
196 
197 static int __switchdev_port_obj_add(struct net_device *dev,
198 				    struct switchdev_obj *obj)
199 {
200 	const struct switchdev_ops *ops = dev->switchdev_ops;
201 	struct net_device *lower_dev;
202 	struct list_head *iter;
203 	int err = -EOPNOTSUPP;
204 
205 	if (ops && ops->switchdev_port_obj_add)
206 		return ops->switchdev_port_obj_add(dev, obj);
207 
208 	/* Switch device port(s) may be stacked under
209 	 * bond/team/vlan dev, so recurse down to add object on
210 	 * each port.
211 	 */
212 
213 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
214 		err = __switchdev_port_obj_add(lower_dev, obj);
215 		if (err)
216 			break;
217 	}
218 
219 	return err;
220 }
221 
222 /**
223  *	switchdev_port_obj_add - Add port object
224  *
225  *	@dev: port device
226  *	@obj: object to add
227  *
228  *	Use a 2-phase prepare-commit transaction model to ensure
229  *	system is not left in a partially updated state due to
230  *	failure from driver/device.
231  *
232  *	rtnl_lock must be held.
233  */
234 int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
235 {
236 	int err;
237 
238 	ASSERT_RTNL();
239 
240 	/* Phase I: prepare for obj add. Driver/device should fail
241 	 * here if there are going to be issues in the commit phase,
242 	 * such as lack of resources or support.  The driver/device
243 	 * should reserve resources needed for the commit phase here,
244 	 * but should not commit the obj.
245 	 */
246 
247 	obj->trans = SWITCHDEV_TRANS_PREPARE;
248 	err = __switchdev_port_obj_add(dev, obj);
249 	if (err) {
250 		/* Prepare phase failed: abort the transaction.  Any
251 		 * resources reserved in the prepare phase are
252 		 * released.
253 		 */
254 
255 		if (err != -EOPNOTSUPP) {
256 			obj->trans = SWITCHDEV_TRANS_ABORT;
257 			__switchdev_port_obj_add(dev, obj);
258 		}
259 
260 		return err;
261 	}
262 
263 	/* Phase II: commit obj add.  This cannot fail as a fault
264 	 * of driver/device.  If it does, it's a bug in the driver/device
265 	 * because the driver said everythings was OK in phase I.
266 	 */
267 
268 	obj->trans = SWITCHDEV_TRANS_COMMIT;
269 	err = __switchdev_port_obj_add(dev, obj);
270 	WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
271 
272 	return err;
273 }
274 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
275 
276 /**
277  *	switchdev_port_obj_del - Delete port object
278  *
279  *	@dev: port device
280  *	@obj: object to delete
281  */
282 int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
283 {
284 	const struct switchdev_ops *ops = dev->switchdev_ops;
285 	struct net_device *lower_dev;
286 	struct list_head *iter;
287 	int err = -EOPNOTSUPP;
288 
289 	if (ops && ops->switchdev_port_obj_del)
290 		return ops->switchdev_port_obj_del(dev, obj);
291 
292 	/* Switch device port(s) may be stacked under
293 	 * bond/team/vlan dev, so recurse down to delete object on
294 	 * each port.
295 	 */
296 
297 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
298 		err = switchdev_port_obj_del(lower_dev, obj);
299 		if (err)
300 			break;
301 	}
302 
303 	return err;
304 }
305 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
306 
307 /**
308  *	switchdev_port_obj_dump - Dump port objects
309  *
310  *	@dev: port device
311  *	@obj: object to dump
312  */
313 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
314 {
315 	const struct switchdev_ops *ops = dev->switchdev_ops;
316 	struct net_device *lower_dev;
317 	struct list_head *iter;
318 	int err = -EOPNOTSUPP;
319 
320 	if (ops && ops->switchdev_port_obj_dump)
321 		return ops->switchdev_port_obj_dump(dev, obj);
322 
323 	/* Switch device port(s) may be stacked under
324 	 * bond/team/vlan dev, so recurse down to dump objects on
325 	 * first port at bottom of stack.
326 	 */
327 
328 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
329 		err = switchdev_port_obj_dump(lower_dev, obj);
330 		break;
331 	}
332 
333 	return err;
334 }
335 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
336 
337 static DEFINE_MUTEX(switchdev_mutex);
338 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
339 
340 /**
341  *	register_switchdev_notifier - Register notifier
342  *	@nb: notifier_block
343  *
344  *	Register switch device notifier. This should be used by code
345  *	which needs to monitor events happening in particular device.
346  *	Return values are same as for atomic_notifier_chain_register().
347  */
348 int register_switchdev_notifier(struct notifier_block *nb)
349 {
350 	int err;
351 
352 	mutex_lock(&switchdev_mutex);
353 	err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
354 	mutex_unlock(&switchdev_mutex);
355 	return err;
356 }
357 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
358 
359 /**
360  *	unregister_switchdev_notifier - Unregister notifier
361  *	@nb: notifier_block
362  *
363  *	Unregister switch device notifier.
364  *	Return values are same as for atomic_notifier_chain_unregister().
365  */
366 int unregister_switchdev_notifier(struct notifier_block *nb)
367 {
368 	int err;
369 
370 	mutex_lock(&switchdev_mutex);
371 	err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
372 	mutex_unlock(&switchdev_mutex);
373 	return err;
374 }
375 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
376 
377 /**
378  *	call_switchdev_notifiers - Call notifiers
379  *	@val: value passed unmodified to notifier function
380  *	@dev: port device
381  *	@info: notifier information data
382  *
383  *	Call all network notifier blocks. This should be called by driver
384  *	when it needs to propagate hardware event.
385  *	Return values are same as for atomic_notifier_call_chain().
386  */
387 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
388 			     struct switchdev_notifier_info *info)
389 {
390 	int err;
391 
392 	info->dev = dev;
393 	mutex_lock(&switchdev_mutex);
394 	err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
395 	mutex_unlock(&switchdev_mutex);
396 	return err;
397 }
398 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
399 
400 struct switchdev_vlan_dump {
401 	struct switchdev_obj obj;
402 	struct sk_buff *skb;
403 	u32 filter_mask;
404 	u16 flags;
405 	u16 begin;
406 	u16 end;
407 };
408 
409 static int switchdev_port_vlan_dump_put(struct net_device *dev,
410 					struct switchdev_vlan_dump *dump)
411 {
412 	struct bridge_vlan_info vinfo;
413 
414 	vinfo.flags = dump->flags;
415 
416 	if (dump->begin == 0 && dump->end == 0) {
417 		return 0;
418 	} else if (dump->begin == dump->end) {
419 		vinfo.vid = dump->begin;
420 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
421 			    sizeof(vinfo), &vinfo))
422 			return -EMSGSIZE;
423 	} else {
424 		vinfo.vid = dump->begin;
425 		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
426 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
427 			    sizeof(vinfo), &vinfo))
428 			return -EMSGSIZE;
429 		vinfo.vid = dump->end;
430 		vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
431 		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
432 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
433 			    sizeof(vinfo), &vinfo))
434 			return -EMSGSIZE;
435 	}
436 
437 	return 0;
438 }
439 
440 static int switchdev_port_vlan_dump_cb(struct net_device *dev,
441 				       struct switchdev_obj *obj)
442 {
443 	struct switchdev_vlan_dump *dump =
444 		container_of(obj, struct switchdev_vlan_dump, obj);
445 	struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
446 	int err = 0;
447 
448 	if (vlan->vid_begin > vlan->vid_end)
449 		return -EINVAL;
450 
451 	if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
452 		dump->flags = vlan->flags;
453 		for (dump->begin = dump->end = vlan->vid_begin;
454 		     dump->begin <= vlan->vid_end;
455 		     dump->begin++, dump->end++) {
456 			err = switchdev_port_vlan_dump_put(dev, dump);
457 			if (err)
458 				return err;
459 		}
460 	} else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
461 		if (dump->begin > vlan->vid_begin &&
462 		    dump->begin >= vlan->vid_end) {
463 			if ((dump->begin - 1) == vlan->vid_end &&
464 			    dump->flags == vlan->flags) {
465 				/* prepend */
466 				dump->begin = vlan->vid_begin;
467 			} else {
468 				err = switchdev_port_vlan_dump_put(dev, dump);
469 				dump->flags = vlan->flags;
470 				dump->begin = vlan->vid_begin;
471 				dump->end = vlan->vid_end;
472 			}
473 		} else if (dump->end <= vlan->vid_begin &&
474 		           dump->end < vlan->vid_end) {
475 			if ((dump->end  + 1) == vlan->vid_begin &&
476 			    dump->flags == vlan->flags) {
477 				/* append */
478 				dump->end = vlan->vid_end;
479 			} else {
480 				err = switchdev_port_vlan_dump_put(dev, dump);
481 				dump->flags = vlan->flags;
482 				dump->begin = vlan->vid_begin;
483 				dump->end = vlan->vid_end;
484 			}
485 		} else {
486 			err = -EINVAL;
487 		}
488 	}
489 
490 	return err;
491 }
492 
493 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
494 				    u32 filter_mask)
495 {
496 	struct switchdev_vlan_dump dump = {
497 		.obj = {
498 			.id = SWITCHDEV_OBJ_PORT_VLAN,
499 			.cb = switchdev_port_vlan_dump_cb,
500 		},
501 		.skb = skb,
502 		.filter_mask = filter_mask,
503 	};
504 	int err = 0;
505 
506 	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
507 	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
508 		err = switchdev_port_obj_dump(dev, &dump.obj);
509 		if (err)
510 			goto err_out;
511 		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
512 			/* last one */
513 			err = switchdev_port_vlan_dump_put(dev, &dump);
514 	}
515 
516 err_out:
517 	return err == -EOPNOTSUPP ? 0 : err;
518 }
519 
520 /**
521  *	switchdev_port_bridge_getlink - Get bridge port attributes
522  *
523  *	@dev: port device
524  *
525  *	Called for SELF on rtnl_bridge_getlink to get bridge port
526  *	attributes.
527  */
528 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
529 				  struct net_device *dev, u32 filter_mask,
530 				  int nlflags)
531 {
532 	struct switchdev_attr attr = {
533 		.id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
534 	};
535 	u16 mode = BRIDGE_MODE_UNDEF;
536 	u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
537 	int err;
538 
539 	err = switchdev_port_attr_get(dev, &attr);
540 	if (err && err != -EOPNOTSUPP)
541 		return err;
542 
543 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
544 				       attr.u.brport_flags, mask, nlflags,
545 				       filter_mask, switchdev_port_vlan_fill);
546 }
547 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
548 
549 static int switchdev_port_br_setflag(struct net_device *dev,
550 				     struct nlattr *nlattr,
551 				     unsigned long brport_flag)
552 {
553 	struct switchdev_attr attr = {
554 		.id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
555 	};
556 	u8 flag = nla_get_u8(nlattr);
557 	int err;
558 
559 	err = switchdev_port_attr_get(dev, &attr);
560 	if (err)
561 		return err;
562 
563 	if (flag)
564 		attr.u.brport_flags |= brport_flag;
565 	else
566 		attr.u.brport_flags &= ~brport_flag;
567 
568 	return switchdev_port_attr_set(dev, &attr);
569 }
570 
571 static const struct nla_policy
572 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
573 	[IFLA_BRPORT_STATE]		= { .type = NLA_U8 },
574 	[IFLA_BRPORT_COST]		= { .type = NLA_U32 },
575 	[IFLA_BRPORT_PRIORITY]		= { .type = NLA_U16 },
576 	[IFLA_BRPORT_MODE]		= { .type = NLA_U8 },
577 	[IFLA_BRPORT_GUARD]		= { .type = NLA_U8 },
578 	[IFLA_BRPORT_PROTECT]		= { .type = NLA_U8 },
579 	[IFLA_BRPORT_FAST_LEAVE]	= { .type = NLA_U8 },
580 	[IFLA_BRPORT_LEARNING]		= { .type = NLA_U8 },
581 	[IFLA_BRPORT_LEARNING_SYNC]	= { .type = NLA_U8 },
582 	[IFLA_BRPORT_UNICAST_FLOOD]	= { .type = NLA_U8 },
583 };
584 
585 static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
586 					      struct nlattr *protinfo)
587 {
588 	struct nlattr *attr;
589 	int rem;
590 	int err;
591 
592 	err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
593 				  switchdev_port_bridge_policy);
594 	if (err)
595 		return err;
596 
597 	nla_for_each_nested(attr, protinfo, rem) {
598 		switch (nla_type(attr)) {
599 		case IFLA_BRPORT_LEARNING:
600 			err = switchdev_port_br_setflag(dev, attr,
601 							BR_LEARNING);
602 			break;
603 		case IFLA_BRPORT_LEARNING_SYNC:
604 			err = switchdev_port_br_setflag(dev, attr,
605 							BR_LEARNING_SYNC);
606 			break;
607 		default:
608 			err = -EOPNOTSUPP;
609 			break;
610 		}
611 		if (err)
612 			return err;
613 	}
614 
615 	return 0;
616 }
617 
618 static int switchdev_port_br_afspec(struct net_device *dev,
619 				    struct nlattr *afspec,
620 				    int (*f)(struct net_device *dev,
621 					     struct switchdev_obj *obj))
622 {
623 	struct nlattr *attr;
624 	struct bridge_vlan_info *vinfo;
625 	struct switchdev_obj obj = {
626 		.id = SWITCHDEV_OBJ_PORT_VLAN,
627 	};
628 	struct switchdev_obj_vlan *vlan = &obj.u.vlan;
629 	int rem;
630 	int err;
631 
632 	nla_for_each_nested(attr, afspec, rem) {
633 		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
634 			continue;
635 		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
636 			return -EINVAL;
637 		vinfo = nla_data(attr);
638 		if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
639 			return -EINVAL;
640 		vlan->flags = vinfo->flags;
641 		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
642 			if (vlan->vid_begin)
643 				return -EINVAL;
644 			vlan->vid_begin = vinfo->vid;
645 		} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
646 			if (!vlan->vid_begin)
647 				return -EINVAL;
648 			vlan->vid_end = vinfo->vid;
649 			if (vlan->vid_end <= vlan->vid_begin)
650 				return -EINVAL;
651 			err = f(dev, &obj);
652 			if (err)
653 				return err;
654 			memset(vlan, 0, sizeof(*vlan));
655 		} else {
656 			if (vlan->vid_begin)
657 				return -EINVAL;
658 			vlan->vid_begin = vinfo->vid;
659 			vlan->vid_end = vinfo->vid;
660 			err = f(dev, &obj);
661 			if (err)
662 				return err;
663 			memset(vlan, 0, sizeof(*vlan));
664 		}
665 	}
666 
667 	return 0;
668 }
669 
670 /**
671  *	switchdev_port_bridge_setlink - Set bridge port attributes
672  *
673  *	@dev: port device
674  *	@nlh: netlink header
675  *	@flags: netlink flags
676  *
677  *	Called for SELF on rtnl_bridge_setlink to set bridge port
678  *	attributes.
679  */
680 int switchdev_port_bridge_setlink(struct net_device *dev,
681 				  struct nlmsghdr *nlh, u16 flags)
682 {
683 	struct nlattr *protinfo;
684 	struct nlattr *afspec;
685 	int err = 0;
686 
687 	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
688 				   IFLA_PROTINFO);
689 	if (protinfo) {
690 		err = switchdev_port_br_setlink_protinfo(dev, protinfo);
691 		if (err)
692 			return err;
693 	}
694 
695 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
696 				 IFLA_AF_SPEC);
697 	if (afspec)
698 		err = switchdev_port_br_afspec(dev, afspec,
699 					       switchdev_port_obj_add);
700 
701 	return err;
702 }
703 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
704 
705 /**
706  *	switchdev_port_bridge_dellink - Set bridge port attributes
707  *
708  *	@dev: port device
709  *	@nlh: netlink header
710  *	@flags: netlink flags
711  *
712  *	Called for SELF on rtnl_bridge_dellink to set bridge port
713  *	attributes.
714  */
715 int switchdev_port_bridge_dellink(struct net_device *dev,
716 				  struct nlmsghdr *nlh, u16 flags)
717 {
718 	struct nlattr *afspec;
719 
720 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
721 				 IFLA_AF_SPEC);
722 	if (afspec)
723 		return switchdev_port_br_afspec(dev, afspec,
724 						switchdev_port_obj_del);
725 
726 	return 0;
727 }
728 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
729 
730 /**
731  *	switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
732  *
733  *	@ndmsg: netlink hdr
734  *	@nlattr: netlink attributes
735  *	@dev: port device
736  *	@addr: MAC address to add
737  *	@vid: VLAN to add
738  *
739  *	Add FDB entry to switch device.
740  */
741 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
742 			   struct net_device *dev, const unsigned char *addr,
743 			   u16 vid, u16 nlm_flags)
744 {
745 	struct switchdev_obj obj = {
746 		.id = SWITCHDEV_OBJ_PORT_FDB,
747 		.u.fdb = {
748 			.addr = addr,
749 			.vid = vid,
750 		},
751 	};
752 
753 	return switchdev_port_obj_add(dev, &obj);
754 }
755 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
756 
757 /**
758  *	switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
759  *
760  *	@ndmsg: netlink hdr
761  *	@nlattr: netlink attributes
762  *	@dev: port device
763  *	@addr: MAC address to delete
764  *	@vid: VLAN to delete
765  *
766  *	Delete FDB entry from switch device.
767  */
768 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
769 			   struct net_device *dev, const unsigned char *addr,
770 			   u16 vid)
771 {
772 	struct switchdev_obj obj = {
773 		.id = SWITCHDEV_OBJ_PORT_FDB,
774 		.u.fdb = {
775 			.addr = addr,
776 			.vid = vid,
777 		},
778 	};
779 
780 	return switchdev_port_obj_del(dev, &obj);
781 }
782 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
783 
784 struct switchdev_fdb_dump {
785 	struct switchdev_obj obj;
786 	struct sk_buff *skb;
787 	struct netlink_callback *cb;
788 	int idx;
789 };
790 
791 static int switchdev_port_fdb_dump_cb(struct net_device *dev,
792 				      struct switchdev_obj *obj)
793 {
794 	struct switchdev_fdb_dump *dump =
795 		container_of(obj, struct switchdev_fdb_dump, obj);
796 	u32 portid = NETLINK_CB(dump->cb->skb).portid;
797 	u32 seq = dump->cb->nlh->nlmsg_seq;
798 	struct nlmsghdr *nlh;
799 	struct ndmsg *ndm;
800 
801 	if (dump->idx < dump->cb->args[0])
802 		goto skip;
803 
804 	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
805 			sizeof(*ndm), NLM_F_MULTI);
806 	if (!nlh)
807 		return -EMSGSIZE;
808 
809 	ndm = nlmsg_data(nlh);
810 	ndm->ndm_family  = AF_BRIDGE;
811 	ndm->ndm_pad1    = 0;
812 	ndm->ndm_pad2    = 0;
813 	ndm->ndm_flags   = NTF_SELF;
814 	ndm->ndm_type    = 0;
815 	ndm->ndm_ifindex = dev->ifindex;
816 	ndm->ndm_state   = obj->u.fdb.ndm_state;
817 
818 	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
819 		goto nla_put_failure;
820 
821 	if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
822 		goto nla_put_failure;
823 
824 	nlmsg_end(dump->skb, nlh);
825 
826 skip:
827 	dump->idx++;
828 	return 0;
829 
830 nla_put_failure:
831 	nlmsg_cancel(dump->skb, nlh);
832 	return -EMSGSIZE;
833 }
834 
835 /**
836  *	switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
837  *
838  *	@skb: netlink skb
839  *	@cb: netlink callback
840  *	@dev: port device
841  *	@filter_dev: filter device
842  *	@idx:
843  *
844  *	Delete FDB entry from switch device.
845  */
846 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
847 			    struct net_device *dev,
848 			    struct net_device *filter_dev, int idx)
849 {
850 	struct switchdev_fdb_dump dump = {
851 		.obj = {
852 			.id = SWITCHDEV_OBJ_PORT_FDB,
853 			.cb = switchdev_port_fdb_dump_cb,
854 		},
855 		.skb = skb,
856 		.cb = cb,
857 		.idx = idx,
858 	};
859 
860 	switchdev_port_obj_dump(dev, &dump.obj);
861 	return dump.idx;
862 }
863 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
864 
865 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
866 {
867 	const struct switchdev_ops *ops = dev->switchdev_ops;
868 	struct net_device *lower_dev;
869 	struct net_device *port_dev;
870 	struct list_head *iter;
871 
872 	/* Recusively search down until we find a sw port dev.
873 	 * (A sw port dev supports switchdev_port_attr_get).
874 	 */
875 
876 	if (ops && ops->switchdev_port_attr_get)
877 		return dev;
878 
879 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
880 		port_dev = switchdev_get_lowest_dev(lower_dev);
881 		if (port_dev)
882 			return port_dev;
883 	}
884 
885 	return NULL;
886 }
887 
888 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
889 {
890 	struct switchdev_attr attr = {
891 		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
892 	};
893 	struct switchdev_attr prev_attr;
894 	struct net_device *dev = NULL;
895 	int nhsel;
896 
897 	/* For this route, all nexthop devs must be on the same switch. */
898 
899 	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
900 		const struct fib_nh *nh = &fi->fib_nh[nhsel];
901 
902 		if (!nh->nh_dev)
903 			return NULL;
904 
905 		dev = switchdev_get_lowest_dev(nh->nh_dev);
906 		if (!dev)
907 			return NULL;
908 
909 		if (switchdev_port_attr_get(dev, &attr))
910 			return NULL;
911 
912 		if (nhsel > 0 &&
913 		    !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
914 				return NULL;
915 
916 		prev_attr = attr;
917 	}
918 
919 	return dev;
920 }
921 
922 /**
923  *	switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
924  *
925  *	@dst: route's IPv4 destination address
926  *	@dst_len: destination address length (prefix length)
927  *	@fi: route FIB info structure
928  *	@tos: route TOS
929  *	@type: route type
930  *	@nlflags: netlink flags passed in (NLM_F_*)
931  *	@tb_id: route table ID
932  *
933  *	Add/modify switch IPv4 route entry.
934  */
935 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
936 			   u8 tos, u8 type, u32 nlflags, u32 tb_id)
937 {
938 	struct switchdev_obj fib_obj = {
939 		.id = SWITCHDEV_OBJ_IPV4_FIB,
940 		.u.ipv4_fib = {
941 			.dst = dst,
942 			.dst_len = dst_len,
943 			.fi = fi,
944 			.tos = tos,
945 			.type = type,
946 			.nlflags = nlflags,
947 			.tb_id = tb_id,
948 		},
949 	};
950 	struct net_device *dev;
951 	int err = 0;
952 
953 	/* Don't offload route if using custom ip rules or if
954 	 * IPv4 FIB offloading has been disabled completely.
955 	 */
956 
957 #ifdef CONFIG_IP_MULTIPLE_TABLES
958 	if (fi->fib_net->ipv4.fib_has_custom_rules)
959 		return 0;
960 #endif
961 
962 	if (fi->fib_net->ipv4.fib_offload_disabled)
963 		return 0;
964 
965 	dev = switchdev_get_dev_by_nhs(fi);
966 	if (!dev)
967 		return 0;
968 
969 	err = switchdev_port_obj_add(dev, &fib_obj);
970 	if (!err)
971 		fi->fib_flags |= RTNH_F_OFFLOAD;
972 
973 	return err == -EOPNOTSUPP ? 0 : err;
974 }
975 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
976 
977 /**
978  *	switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
979  *
980  *	@dst: route's IPv4 destination address
981  *	@dst_len: destination address length (prefix length)
982  *	@fi: route FIB info structure
983  *	@tos: route TOS
984  *	@type: route type
985  *	@tb_id: route table ID
986  *
987  *	Delete IPv4 route entry from switch device.
988  */
989 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
990 			   u8 tos, u8 type, u32 tb_id)
991 {
992 	struct switchdev_obj fib_obj = {
993 		.id = SWITCHDEV_OBJ_IPV4_FIB,
994 		.u.ipv4_fib = {
995 			.dst = dst,
996 			.dst_len = dst_len,
997 			.fi = fi,
998 			.tos = tos,
999 			.type = type,
1000 			.nlflags = 0,
1001 			.tb_id = tb_id,
1002 		},
1003 	};
1004 	struct net_device *dev;
1005 	int err = 0;
1006 
1007 	if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1008 		return 0;
1009 
1010 	dev = switchdev_get_dev_by_nhs(fi);
1011 	if (!dev)
1012 		return 0;
1013 
1014 	err = switchdev_port_obj_del(dev, &fib_obj);
1015 	if (!err)
1016 		fi->fib_flags &= ~RTNH_F_OFFLOAD;
1017 
1018 	return err == -EOPNOTSUPP ? 0 : err;
1019 }
1020 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1021 
1022 /**
1023  *	switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1024  *
1025  *	@fi: route FIB info structure
1026  */
1027 void switchdev_fib_ipv4_abort(struct fib_info *fi)
1028 {
1029 	/* There was a problem installing this route to the offload
1030 	 * device.  For now, until we come up with more refined
1031 	 * policy handling, abruptly end IPv4 fib offloading for
1032 	 * for entire net by flushing offload device(s) of all
1033 	 * IPv4 routes, and mark IPv4 fib offloading broken from
1034 	 * this point forward.
1035 	 */
1036 
1037 	fib_flush_external(fi->fib_net);
1038 	fi->fib_net->ipv4.fib_offload_disabled = true;
1039 }
1040 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1041 
1042 static bool switchdev_port_same_parent_id(struct net_device *a,
1043 					  struct net_device *b)
1044 {
1045 	struct switchdev_attr a_attr = {
1046 		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1047 		.flags = SWITCHDEV_F_NO_RECURSE,
1048 	};
1049 	struct switchdev_attr b_attr = {
1050 		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1051 		.flags = SWITCHDEV_F_NO_RECURSE,
1052 	};
1053 
1054 	if (switchdev_port_attr_get(a, &a_attr) ||
1055 	    switchdev_port_attr_get(b, &b_attr))
1056 		return false;
1057 
1058 	return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1059 }
1060 
1061 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1062 				       struct net_device *group_dev)
1063 {
1064 	struct net_device *lower_dev;
1065 	struct list_head *iter;
1066 
1067 	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1068 		if (lower_dev == dev)
1069 			continue;
1070 		if (switchdev_port_same_parent_id(dev, lower_dev))
1071 			return lower_dev->offload_fwd_mark;
1072 		return switchdev_port_fwd_mark_get(dev, lower_dev);
1073 	}
1074 
1075 	return dev->ifindex;
1076 }
1077 
1078 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1079 					  u32 old_mark, u32 *reset_mark)
1080 {
1081 	struct net_device *lower_dev;
1082 	struct list_head *iter;
1083 
1084 	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1085 		if (lower_dev->offload_fwd_mark == old_mark) {
1086 			if (!*reset_mark)
1087 				*reset_mark = lower_dev->ifindex;
1088 			lower_dev->offload_fwd_mark = *reset_mark;
1089 		}
1090 		switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1091 	}
1092 }
1093 
1094 /**
1095  *	switchdev_port_fwd_mark_set - Set port offload forwarding mark
1096  *
1097  *	@dev: port device
1098  *	@group_dev: containing device
1099  *	@joining: true if dev is joining group; false if leaving group
1100  *
1101  *	An ungrouped port's offload mark is just its ifindex.  A grouped
1102  *	port's (member of a bridge, for example) offload mark is the ifindex
1103  *	of one of the ports in the group with the same parent (switch) ID.
1104  *	Ports on the same device in the same group will have the same mark.
1105  *
1106  *	Example:
1107  *
1108  *		br0		ifindex=9
1109  *		  sw1p1		ifindex=2	mark=2
1110  *		  sw1p2		ifindex=3	mark=2
1111  *		  sw2p1		ifindex=4	mark=5
1112  *		  sw2p2		ifindex=5	mark=5
1113  *
1114  *	If sw2p2 leaves the bridge, we'll have:
1115  *
1116  *		br0		ifindex=9
1117  *		  sw1p1		ifindex=2	mark=2
1118  *		  sw1p2		ifindex=3	mark=2
1119  *		  sw2p1		ifindex=4	mark=4
1120  *		sw2p2		ifindex=5	mark=5
1121  */
1122 void switchdev_port_fwd_mark_set(struct net_device *dev,
1123 				 struct net_device *group_dev,
1124 				 bool joining)
1125 {
1126 	u32 mark = dev->ifindex;
1127 	u32 reset_mark = 0;
1128 
1129 	if (group_dev && joining) {
1130 		mark = switchdev_port_fwd_mark_get(dev, group_dev);
1131 	} else if (group_dev && !joining) {
1132 		if (dev->offload_fwd_mark == mark)
1133 			/* Ohoh, this port was the mark reference port,
1134 			 * but it's leaving the group, so reset the
1135 			 * mark for the remaining ports in the group.
1136 			 */
1137 			switchdev_port_fwd_mark_reset(group_dev, mark,
1138 						      &reset_mark);
1139 	}
1140 
1141 	dev->offload_fwd_mark = mark;
1142 }
1143 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
1144