xref: /openbmc/linux/net/switchdev/switchdev.c (revision 62e59c4e)
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
4  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/etherdevice.h>
19 #include <linux/if_bridge.h>
20 #include <linux/list.h>
21 #include <linux/workqueue.h>
22 #include <linux/if_vlan.h>
23 #include <linux/rtnetlink.h>
24 #include <net/switchdev.h>
25 
26 static LIST_HEAD(deferred);
27 static DEFINE_SPINLOCK(deferred_lock);
28 
29 typedef void switchdev_deferred_func_t(struct net_device *dev,
30 				       const void *data);
31 
32 struct switchdev_deferred_item {
33 	struct list_head list;
34 	struct net_device *dev;
35 	switchdev_deferred_func_t *func;
36 	unsigned long data[0];
37 };
38 
39 static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
40 {
41 	struct switchdev_deferred_item *dfitem;
42 
43 	spin_lock_bh(&deferred_lock);
44 	if (list_empty(&deferred)) {
45 		dfitem = NULL;
46 		goto unlock;
47 	}
48 	dfitem = list_first_entry(&deferred,
49 				  struct switchdev_deferred_item, list);
50 	list_del(&dfitem->list);
51 unlock:
52 	spin_unlock_bh(&deferred_lock);
53 	return dfitem;
54 }
55 
56 /**
57  *	switchdev_deferred_process - Process ops in deferred queue
58  *
59  *	Called to flush the ops currently queued in deferred ops queue.
60  *	rtnl_lock must be held.
61  */
62 void switchdev_deferred_process(void)
63 {
64 	struct switchdev_deferred_item *dfitem;
65 
66 	ASSERT_RTNL();
67 
68 	while ((dfitem = switchdev_deferred_dequeue())) {
69 		dfitem->func(dfitem->dev, dfitem->data);
70 		dev_put(dfitem->dev);
71 		kfree(dfitem);
72 	}
73 }
74 EXPORT_SYMBOL_GPL(switchdev_deferred_process);
75 
76 static void switchdev_deferred_process_work(struct work_struct *work)
77 {
78 	rtnl_lock();
79 	switchdev_deferred_process();
80 	rtnl_unlock();
81 }
82 
83 static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
84 
85 static int switchdev_deferred_enqueue(struct net_device *dev,
86 				      const void *data, size_t data_len,
87 				      switchdev_deferred_func_t *func)
88 {
89 	struct switchdev_deferred_item *dfitem;
90 
91 	dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
92 	if (!dfitem)
93 		return -ENOMEM;
94 	dfitem->dev = dev;
95 	dfitem->func = func;
96 	memcpy(dfitem->data, data, data_len);
97 	dev_hold(dev);
98 	spin_lock_bh(&deferred_lock);
99 	list_add_tail(&dfitem->list, &deferred);
100 	spin_unlock_bh(&deferred_lock);
101 	schedule_work(&deferred_process_work);
102 	return 0;
103 }
104 
105 static int switchdev_port_attr_notify(enum switchdev_notifier_type nt,
106 				      struct net_device *dev,
107 				      const struct switchdev_attr *attr,
108 				      struct switchdev_trans *trans)
109 {
110 	int err;
111 	int rc;
112 
113 	struct switchdev_notifier_port_attr_info attr_info = {
114 		.attr = attr,
115 		.trans = trans,
116 		.handled = false,
117 	};
118 
119 	rc = call_switchdev_blocking_notifiers(nt, dev,
120 					       &attr_info.info, NULL);
121 	err = notifier_to_errno(rc);
122 	if (err) {
123 		WARN_ON(!attr_info.handled);
124 		return err;
125 	}
126 
127 	if (!attr_info.handled)
128 		return -EOPNOTSUPP;
129 
130 	return 0;
131 }
132 
133 static int switchdev_port_attr_set_now(struct net_device *dev,
134 				       const struct switchdev_attr *attr)
135 {
136 	struct switchdev_trans trans;
137 	int err;
138 
139 	/* Phase I: prepare for attr set. Driver/device should fail
140 	 * here if there are going to be issues in the commit phase,
141 	 * such as lack of resources or support.  The driver/device
142 	 * should reserve resources needed for the commit phase here,
143 	 * but should not commit the attr.
144 	 */
145 
146 	trans.ph_prepare = true;
147 	err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
148 					 &trans);
149 	if (err)
150 		return err;
151 
152 	/* Phase II: commit attr set.  This cannot fail as a fault
153 	 * of driver/device.  If it does, it's a bug in the driver/device
154 	 * because the driver said everythings was OK in phase I.
155 	 */
156 
157 	trans.ph_prepare = false;
158 	err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
159 					 &trans);
160 	WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
161 	     dev->name, attr->id);
162 
163 	return err;
164 }
165 
166 static void switchdev_port_attr_set_deferred(struct net_device *dev,
167 					     const void *data)
168 {
169 	const struct switchdev_attr *attr = data;
170 	int err;
171 
172 	err = switchdev_port_attr_set_now(dev, attr);
173 	if (err && err != -EOPNOTSUPP)
174 		netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
175 			   err, attr->id);
176 	if (attr->complete)
177 		attr->complete(dev, err, attr->complete_priv);
178 }
179 
180 static int switchdev_port_attr_set_defer(struct net_device *dev,
181 					 const struct switchdev_attr *attr)
182 {
183 	return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
184 					  switchdev_port_attr_set_deferred);
185 }
186 
187 /**
188  *	switchdev_port_attr_set - Set port attribute
189  *
190  *	@dev: port device
191  *	@attr: attribute to set
192  *
193  *	Use a 2-phase prepare-commit transaction model to ensure
194  *	system is not left in a partially updated state due to
195  *	failure from driver/device.
196  *
197  *	rtnl_lock must be held and must not be in atomic section,
198  *	in case SWITCHDEV_F_DEFER flag is not set.
199  */
200 int switchdev_port_attr_set(struct net_device *dev,
201 			    const struct switchdev_attr *attr)
202 {
203 	if (attr->flags & SWITCHDEV_F_DEFER)
204 		return switchdev_port_attr_set_defer(dev, attr);
205 	ASSERT_RTNL();
206 	return switchdev_port_attr_set_now(dev, attr);
207 }
208 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
209 
210 static size_t switchdev_obj_size(const struct switchdev_obj *obj)
211 {
212 	switch (obj->id) {
213 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
214 		return sizeof(struct switchdev_obj_port_vlan);
215 	case SWITCHDEV_OBJ_ID_PORT_MDB:
216 		return sizeof(struct switchdev_obj_port_mdb);
217 	case SWITCHDEV_OBJ_ID_HOST_MDB:
218 		return sizeof(struct switchdev_obj_port_mdb);
219 	default:
220 		BUG();
221 	}
222 	return 0;
223 }
224 
225 static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
226 				     struct net_device *dev,
227 				     const struct switchdev_obj *obj,
228 				     struct switchdev_trans *trans,
229 				     struct netlink_ext_ack *extack)
230 {
231 	int rc;
232 	int err;
233 
234 	struct switchdev_notifier_port_obj_info obj_info = {
235 		.obj = obj,
236 		.trans = trans,
237 		.handled = false,
238 	};
239 
240 	rc = call_switchdev_blocking_notifiers(nt, dev, &obj_info.info, extack);
241 	err = notifier_to_errno(rc);
242 	if (err) {
243 		WARN_ON(!obj_info.handled);
244 		return err;
245 	}
246 	if (!obj_info.handled)
247 		return -EOPNOTSUPP;
248 	return 0;
249 }
250 
251 static int switchdev_port_obj_add_now(struct net_device *dev,
252 				      const struct switchdev_obj *obj,
253 				      struct netlink_ext_ack *extack)
254 {
255 	struct switchdev_trans trans;
256 	int err;
257 
258 	ASSERT_RTNL();
259 
260 	/* Phase I: prepare for obj add. Driver/device should fail
261 	 * here if there are going to be issues in the commit phase,
262 	 * such as lack of resources or support.  The driver/device
263 	 * should reserve resources needed for the commit phase here,
264 	 * but should not commit the obj.
265 	 */
266 
267 	trans.ph_prepare = true;
268 	err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
269 					dev, obj, &trans, extack);
270 	if (err)
271 		return err;
272 
273 	/* Phase II: commit obj add.  This cannot fail as a fault
274 	 * of driver/device.  If it does, it's a bug in the driver/device
275 	 * because the driver said everythings was OK in phase I.
276 	 */
277 
278 	trans.ph_prepare = false;
279 	err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
280 					dev, obj, &trans, extack);
281 	WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
282 
283 	return err;
284 }
285 
286 static void switchdev_port_obj_add_deferred(struct net_device *dev,
287 					    const void *data)
288 {
289 	const struct switchdev_obj *obj = data;
290 	int err;
291 
292 	err = switchdev_port_obj_add_now(dev, obj, NULL);
293 	if (err && err != -EOPNOTSUPP)
294 		netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
295 			   err, obj->id);
296 	if (obj->complete)
297 		obj->complete(dev, err, obj->complete_priv);
298 }
299 
300 static int switchdev_port_obj_add_defer(struct net_device *dev,
301 					const struct switchdev_obj *obj)
302 {
303 	return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
304 					  switchdev_port_obj_add_deferred);
305 }
306 
307 /**
308  *	switchdev_port_obj_add - Add port object
309  *
310  *	@dev: port device
311  *	@id: object ID
312  *	@obj: object to add
313  *
314  *	Use a 2-phase prepare-commit transaction model to ensure
315  *	system is not left in a partially updated state due to
316  *	failure from driver/device.
317  *
318  *	rtnl_lock must be held and must not be in atomic section,
319  *	in case SWITCHDEV_F_DEFER flag is not set.
320  */
321 int switchdev_port_obj_add(struct net_device *dev,
322 			   const struct switchdev_obj *obj,
323 			   struct netlink_ext_ack *extack)
324 {
325 	if (obj->flags & SWITCHDEV_F_DEFER)
326 		return switchdev_port_obj_add_defer(dev, obj);
327 	ASSERT_RTNL();
328 	return switchdev_port_obj_add_now(dev, obj, extack);
329 }
330 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
331 
332 static int switchdev_port_obj_del_now(struct net_device *dev,
333 				      const struct switchdev_obj *obj)
334 {
335 	return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL,
336 					 dev, obj, NULL, NULL);
337 }
338 
339 static void switchdev_port_obj_del_deferred(struct net_device *dev,
340 					    const void *data)
341 {
342 	const struct switchdev_obj *obj = data;
343 	int err;
344 
345 	err = switchdev_port_obj_del_now(dev, obj);
346 	if (err && err != -EOPNOTSUPP)
347 		netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
348 			   err, obj->id);
349 	if (obj->complete)
350 		obj->complete(dev, err, obj->complete_priv);
351 }
352 
353 static int switchdev_port_obj_del_defer(struct net_device *dev,
354 					const struct switchdev_obj *obj)
355 {
356 	return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
357 					  switchdev_port_obj_del_deferred);
358 }
359 
360 /**
361  *	switchdev_port_obj_del - Delete port object
362  *
363  *	@dev: port device
364  *	@id: object ID
365  *	@obj: object to delete
366  *
367  *	rtnl_lock must be held and must not be in atomic section,
368  *	in case SWITCHDEV_F_DEFER flag is not set.
369  */
370 int switchdev_port_obj_del(struct net_device *dev,
371 			   const struct switchdev_obj *obj)
372 {
373 	if (obj->flags & SWITCHDEV_F_DEFER)
374 		return switchdev_port_obj_del_defer(dev, obj);
375 	ASSERT_RTNL();
376 	return switchdev_port_obj_del_now(dev, obj);
377 }
378 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
379 
380 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
381 static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
382 
383 /**
384  *	register_switchdev_notifier - Register notifier
385  *	@nb: notifier_block
386  *
387  *	Register switch device notifier.
388  */
389 int register_switchdev_notifier(struct notifier_block *nb)
390 {
391 	return atomic_notifier_chain_register(&switchdev_notif_chain, nb);
392 }
393 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
394 
395 /**
396  *	unregister_switchdev_notifier - Unregister notifier
397  *	@nb: notifier_block
398  *
399  *	Unregister switch device notifier.
400  */
401 int unregister_switchdev_notifier(struct notifier_block *nb)
402 {
403 	return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb);
404 }
405 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
406 
407 /**
408  *	call_switchdev_notifiers - Call notifiers
409  *	@val: value passed unmodified to notifier function
410  *	@dev: port device
411  *	@info: notifier information data
412  *
413  *	Call all network notifier blocks.
414  */
415 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
416 			     struct switchdev_notifier_info *info,
417 			     struct netlink_ext_ack *extack)
418 {
419 	info->dev = dev;
420 	info->extack = extack;
421 	return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
422 }
423 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
424 
425 int register_switchdev_blocking_notifier(struct notifier_block *nb)
426 {
427 	struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
428 
429 	return blocking_notifier_chain_register(chain, nb);
430 }
431 EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
432 
433 int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
434 {
435 	struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
436 
437 	return blocking_notifier_chain_unregister(chain, nb);
438 }
439 EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
440 
441 int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
442 				      struct switchdev_notifier_info *info,
443 				      struct netlink_ext_ack *extack)
444 {
445 	info->dev = dev;
446 	info->extack = extack;
447 	return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
448 					    val, info);
449 }
450 EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
451 
452 static int __switchdev_handle_port_obj_add(struct net_device *dev,
453 			struct switchdev_notifier_port_obj_info *port_obj_info,
454 			bool (*check_cb)(const struct net_device *dev),
455 			int (*add_cb)(struct net_device *dev,
456 				      const struct switchdev_obj *obj,
457 				      struct switchdev_trans *trans,
458 				      struct netlink_ext_ack *extack))
459 {
460 	struct netlink_ext_ack *extack;
461 	struct net_device *lower_dev;
462 	struct list_head *iter;
463 	int err = -EOPNOTSUPP;
464 
465 	extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
466 
467 	if (check_cb(dev)) {
468 		/* This flag is only checked if the return value is success. */
469 		port_obj_info->handled = true;
470 		return add_cb(dev, port_obj_info->obj, port_obj_info->trans,
471 			      extack);
472 	}
473 
474 	/* Switch ports might be stacked under e.g. a LAG. Ignore the
475 	 * unsupported devices, another driver might be able to handle them. But
476 	 * propagate to the callers any hard errors.
477 	 *
478 	 * If the driver does its own bookkeeping of stacked ports, it's not
479 	 * necessary to go through this helper.
480 	 */
481 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
482 		err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info,
483 						      check_cb, add_cb);
484 		if (err && err != -EOPNOTSUPP)
485 			return err;
486 	}
487 
488 	return err;
489 }
490 
491 int switchdev_handle_port_obj_add(struct net_device *dev,
492 			struct switchdev_notifier_port_obj_info *port_obj_info,
493 			bool (*check_cb)(const struct net_device *dev),
494 			int (*add_cb)(struct net_device *dev,
495 				      const struct switchdev_obj *obj,
496 				      struct switchdev_trans *trans,
497 				      struct netlink_ext_ack *extack))
498 {
499 	int err;
500 
501 	err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
502 					      add_cb);
503 	if (err == -EOPNOTSUPP)
504 		err = 0;
505 	return err;
506 }
507 EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
508 
509 static int __switchdev_handle_port_obj_del(struct net_device *dev,
510 			struct switchdev_notifier_port_obj_info *port_obj_info,
511 			bool (*check_cb)(const struct net_device *dev),
512 			int (*del_cb)(struct net_device *dev,
513 				      const struct switchdev_obj *obj))
514 {
515 	struct net_device *lower_dev;
516 	struct list_head *iter;
517 	int err = -EOPNOTSUPP;
518 
519 	if (check_cb(dev)) {
520 		/* This flag is only checked if the return value is success. */
521 		port_obj_info->handled = true;
522 		return del_cb(dev, port_obj_info->obj);
523 	}
524 
525 	/* Switch ports might be stacked under e.g. a LAG. Ignore the
526 	 * unsupported devices, another driver might be able to handle them. But
527 	 * propagate to the callers any hard errors.
528 	 *
529 	 * If the driver does its own bookkeeping of stacked ports, it's not
530 	 * necessary to go through this helper.
531 	 */
532 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
533 		err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info,
534 						      check_cb, del_cb);
535 		if (err && err != -EOPNOTSUPP)
536 			return err;
537 	}
538 
539 	return err;
540 }
541 
542 int switchdev_handle_port_obj_del(struct net_device *dev,
543 			struct switchdev_notifier_port_obj_info *port_obj_info,
544 			bool (*check_cb)(const struct net_device *dev),
545 			int (*del_cb)(struct net_device *dev,
546 				      const struct switchdev_obj *obj))
547 {
548 	int err;
549 
550 	err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
551 					      del_cb);
552 	if (err == -EOPNOTSUPP)
553 		err = 0;
554 	return err;
555 }
556 EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
557 
558 static int __switchdev_handle_port_attr_set(struct net_device *dev,
559 			struct switchdev_notifier_port_attr_info *port_attr_info,
560 			bool (*check_cb)(const struct net_device *dev),
561 			int (*set_cb)(struct net_device *dev,
562 				      const struct switchdev_attr *attr,
563 				      struct switchdev_trans *trans))
564 {
565 	struct net_device *lower_dev;
566 	struct list_head *iter;
567 	int err = -EOPNOTSUPP;
568 
569 	if (check_cb(dev)) {
570 		port_attr_info->handled = true;
571 		return set_cb(dev, port_attr_info->attr,
572 			      port_attr_info->trans);
573 	}
574 
575 	/* Switch ports might be stacked under e.g. a LAG. Ignore the
576 	 * unsupported devices, another driver might be able to handle them. But
577 	 * propagate to the callers any hard errors.
578 	 *
579 	 * If the driver does its own bookkeeping of stacked ports, it's not
580 	 * necessary to go through this helper.
581 	 */
582 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
583 		err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info,
584 						       check_cb, set_cb);
585 		if (err && err != -EOPNOTSUPP)
586 			return err;
587 	}
588 
589 	return err;
590 }
591 
592 int switchdev_handle_port_attr_set(struct net_device *dev,
593 			struct switchdev_notifier_port_attr_info *port_attr_info,
594 			bool (*check_cb)(const struct net_device *dev),
595 			int (*set_cb)(struct net_device *dev,
596 				      const struct switchdev_attr *attr,
597 				      struct switchdev_trans *trans))
598 {
599 	int err;
600 
601 	err = __switchdev_handle_port_attr_set(dev, port_attr_info, check_cb,
602 					       set_cb);
603 	if (err == -EOPNOTSUPP)
604 		err = 0;
605 	return err;
606 }
607 EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
608