1 /* 2 * net/switchdev/switchdev.c - Switch device API 3 * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us> 4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/types.h> 14 #include <linux/init.h> 15 #include <linux/mutex.h> 16 #include <linux/notifier.h> 17 #include <linux/netdevice.h> 18 #include <linux/etherdevice.h> 19 #include <linux/if_bridge.h> 20 #include <linux/list.h> 21 #include <linux/workqueue.h> 22 #include <linux/if_vlan.h> 23 #include <linux/rtnetlink.h> 24 #include <net/switchdev.h> 25 26 static LIST_HEAD(deferred); 27 static DEFINE_SPINLOCK(deferred_lock); 28 29 typedef void switchdev_deferred_func_t(struct net_device *dev, 30 const void *data); 31 32 struct switchdev_deferred_item { 33 struct list_head list; 34 struct net_device *dev; 35 switchdev_deferred_func_t *func; 36 unsigned long data[0]; 37 }; 38 39 static struct switchdev_deferred_item *switchdev_deferred_dequeue(void) 40 { 41 struct switchdev_deferred_item *dfitem; 42 43 spin_lock_bh(&deferred_lock); 44 if (list_empty(&deferred)) { 45 dfitem = NULL; 46 goto unlock; 47 } 48 dfitem = list_first_entry(&deferred, 49 struct switchdev_deferred_item, list); 50 list_del(&dfitem->list); 51 unlock: 52 spin_unlock_bh(&deferred_lock); 53 return dfitem; 54 } 55 56 /** 57 * switchdev_deferred_process - Process ops in deferred queue 58 * 59 * Called to flush the ops currently queued in deferred ops queue. 60 * rtnl_lock must be held. 61 */ 62 void switchdev_deferred_process(void) 63 { 64 struct switchdev_deferred_item *dfitem; 65 66 ASSERT_RTNL(); 67 68 while ((dfitem = switchdev_deferred_dequeue())) { 69 dfitem->func(dfitem->dev, dfitem->data); 70 dev_put(dfitem->dev); 71 kfree(dfitem); 72 } 73 } 74 EXPORT_SYMBOL_GPL(switchdev_deferred_process); 75 76 static void switchdev_deferred_process_work(struct work_struct *work) 77 { 78 rtnl_lock(); 79 switchdev_deferred_process(); 80 rtnl_unlock(); 81 } 82 83 static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work); 84 85 static int switchdev_deferred_enqueue(struct net_device *dev, 86 const void *data, size_t data_len, 87 switchdev_deferred_func_t *func) 88 { 89 struct switchdev_deferred_item *dfitem; 90 91 dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC); 92 if (!dfitem) 93 return -ENOMEM; 94 dfitem->dev = dev; 95 dfitem->func = func; 96 memcpy(dfitem->data, data, data_len); 97 dev_hold(dev); 98 spin_lock_bh(&deferred_lock); 99 list_add_tail(&dfitem->list, &deferred); 100 spin_unlock_bh(&deferred_lock); 101 schedule_work(&deferred_process_work); 102 return 0; 103 } 104 105 static int switchdev_port_attr_notify(enum switchdev_notifier_type nt, 106 struct net_device *dev, 107 const struct switchdev_attr *attr, 108 struct switchdev_trans *trans) 109 { 110 int err; 111 int rc; 112 113 struct switchdev_notifier_port_attr_info attr_info = { 114 .attr = attr, 115 .trans = trans, 116 .handled = false, 117 }; 118 119 rc = call_switchdev_blocking_notifiers(nt, dev, 120 &attr_info.info, NULL); 121 err = notifier_to_errno(rc); 122 if (err) { 123 WARN_ON(!attr_info.handled); 124 return err; 125 } 126 127 if (!attr_info.handled) 128 return -EOPNOTSUPP; 129 130 return 0; 131 } 132 133 static int switchdev_port_attr_set_now(struct net_device *dev, 134 const struct switchdev_attr *attr) 135 { 136 struct switchdev_trans trans; 137 int err; 138 139 /* Phase I: prepare for attr set. Driver/device should fail 140 * here if there are going to be issues in the commit phase, 141 * such as lack of resources or support. The driver/device 142 * should reserve resources needed for the commit phase here, 143 * but should not commit the attr. 144 */ 145 146 trans.ph_prepare = true; 147 err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr, 148 &trans); 149 if (err) 150 return err; 151 152 /* Phase II: commit attr set. This cannot fail as a fault 153 * of driver/device. If it does, it's a bug in the driver/device 154 * because the driver said everythings was OK in phase I. 155 */ 156 157 trans.ph_prepare = false; 158 err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr, 159 &trans); 160 WARN(err, "%s: Commit of attribute (id=%d) failed.\n", 161 dev->name, attr->id); 162 163 return err; 164 } 165 166 static void switchdev_port_attr_set_deferred(struct net_device *dev, 167 const void *data) 168 { 169 const struct switchdev_attr *attr = data; 170 int err; 171 172 err = switchdev_port_attr_set_now(dev, attr); 173 if (err && err != -EOPNOTSUPP) 174 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n", 175 err, attr->id); 176 if (attr->complete) 177 attr->complete(dev, err, attr->complete_priv); 178 } 179 180 static int switchdev_port_attr_set_defer(struct net_device *dev, 181 const struct switchdev_attr *attr) 182 { 183 return switchdev_deferred_enqueue(dev, attr, sizeof(*attr), 184 switchdev_port_attr_set_deferred); 185 } 186 187 /** 188 * switchdev_port_attr_set - Set port attribute 189 * 190 * @dev: port device 191 * @attr: attribute to set 192 * 193 * Use a 2-phase prepare-commit transaction model to ensure 194 * system is not left in a partially updated state due to 195 * failure from driver/device. 196 * 197 * rtnl_lock must be held and must not be in atomic section, 198 * in case SWITCHDEV_F_DEFER flag is not set. 199 */ 200 int switchdev_port_attr_set(struct net_device *dev, 201 const struct switchdev_attr *attr) 202 { 203 if (attr->flags & SWITCHDEV_F_DEFER) 204 return switchdev_port_attr_set_defer(dev, attr); 205 ASSERT_RTNL(); 206 return switchdev_port_attr_set_now(dev, attr); 207 } 208 EXPORT_SYMBOL_GPL(switchdev_port_attr_set); 209 210 static size_t switchdev_obj_size(const struct switchdev_obj *obj) 211 { 212 switch (obj->id) { 213 case SWITCHDEV_OBJ_ID_PORT_VLAN: 214 return sizeof(struct switchdev_obj_port_vlan); 215 case SWITCHDEV_OBJ_ID_PORT_MDB: 216 return sizeof(struct switchdev_obj_port_mdb); 217 case SWITCHDEV_OBJ_ID_HOST_MDB: 218 return sizeof(struct switchdev_obj_port_mdb); 219 default: 220 BUG(); 221 } 222 return 0; 223 } 224 225 static int switchdev_port_obj_notify(enum switchdev_notifier_type nt, 226 struct net_device *dev, 227 const struct switchdev_obj *obj, 228 struct switchdev_trans *trans, 229 struct netlink_ext_ack *extack) 230 { 231 int rc; 232 int err; 233 234 struct switchdev_notifier_port_obj_info obj_info = { 235 .obj = obj, 236 .trans = trans, 237 .handled = false, 238 }; 239 240 rc = call_switchdev_blocking_notifiers(nt, dev, &obj_info.info, extack); 241 err = notifier_to_errno(rc); 242 if (err) { 243 WARN_ON(!obj_info.handled); 244 return err; 245 } 246 if (!obj_info.handled) 247 return -EOPNOTSUPP; 248 return 0; 249 } 250 251 static int switchdev_port_obj_add_now(struct net_device *dev, 252 const struct switchdev_obj *obj, 253 struct netlink_ext_ack *extack) 254 { 255 struct switchdev_trans trans; 256 int err; 257 258 ASSERT_RTNL(); 259 260 /* Phase I: prepare for obj add. Driver/device should fail 261 * here if there are going to be issues in the commit phase, 262 * such as lack of resources or support. The driver/device 263 * should reserve resources needed for the commit phase here, 264 * but should not commit the obj. 265 */ 266 267 trans.ph_prepare = true; 268 err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, 269 dev, obj, &trans, extack); 270 if (err) 271 return err; 272 273 /* Phase II: commit obj add. This cannot fail as a fault 274 * of driver/device. If it does, it's a bug in the driver/device 275 * because the driver said everythings was OK in phase I. 276 */ 277 278 trans.ph_prepare = false; 279 err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, 280 dev, obj, &trans, extack); 281 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); 282 283 return err; 284 } 285 286 static void switchdev_port_obj_add_deferred(struct net_device *dev, 287 const void *data) 288 { 289 const struct switchdev_obj *obj = data; 290 int err; 291 292 err = switchdev_port_obj_add_now(dev, obj, NULL); 293 if (err && err != -EOPNOTSUPP) 294 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", 295 err, obj->id); 296 if (obj->complete) 297 obj->complete(dev, err, obj->complete_priv); 298 } 299 300 static int switchdev_port_obj_add_defer(struct net_device *dev, 301 const struct switchdev_obj *obj) 302 { 303 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), 304 switchdev_port_obj_add_deferred); 305 } 306 307 /** 308 * switchdev_port_obj_add - Add port object 309 * 310 * @dev: port device 311 * @id: object ID 312 * @obj: object to add 313 * 314 * Use a 2-phase prepare-commit transaction model to ensure 315 * system is not left in a partially updated state due to 316 * failure from driver/device. 317 * 318 * rtnl_lock must be held and must not be in atomic section, 319 * in case SWITCHDEV_F_DEFER flag is not set. 320 */ 321 int switchdev_port_obj_add(struct net_device *dev, 322 const struct switchdev_obj *obj, 323 struct netlink_ext_ack *extack) 324 { 325 if (obj->flags & SWITCHDEV_F_DEFER) 326 return switchdev_port_obj_add_defer(dev, obj); 327 ASSERT_RTNL(); 328 return switchdev_port_obj_add_now(dev, obj, extack); 329 } 330 EXPORT_SYMBOL_GPL(switchdev_port_obj_add); 331 332 static int switchdev_port_obj_del_now(struct net_device *dev, 333 const struct switchdev_obj *obj) 334 { 335 return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL, 336 dev, obj, NULL, NULL); 337 } 338 339 static void switchdev_port_obj_del_deferred(struct net_device *dev, 340 const void *data) 341 { 342 const struct switchdev_obj *obj = data; 343 int err; 344 345 err = switchdev_port_obj_del_now(dev, obj); 346 if (err && err != -EOPNOTSUPP) 347 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", 348 err, obj->id); 349 if (obj->complete) 350 obj->complete(dev, err, obj->complete_priv); 351 } 352 353 static int switchdev_port_obj_del_defer(struct net_device *dev, 354 const struct switchdev_obj *obj) 355 { 356 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), 357 switchdev_port_obj_del_deferred); 358 } 359 360 /** 361 * switchdev_port_obj_del - Delete port object 362 * 363 * @dev: port device 364 * @id: object ID 365 * @obj: object to delete 366 * 367 * rtnl_lock must be held and must not be in atomic section, 368 * in case SWITCHDEV_F_DEFER flag is not set. 369 */ 370 int switchdev_port_obj_del(struct net_device *dev, 371 const struct switchdev_obj *obj) 372 { 373 if (obj->flags & SWITCHDEV_F_DEFER) 374 return switchdev_port_obj_del_defer(dev, obj); 375 ASSERT_RTNL(); 376 return switchdev_port_obj_del_now(dev, obj); 377 } 378 EXPORT_SYMBOL_GPL(switchdev_port_obj_del); 379 380 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); 381 static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); 382 383 /** 384 * register_switchdev_notifier - Register notifier 385 * @nb: notifier_block 386 * 387 * Register switch device notifier. 388 */ 389 int register_switchdev_notifier(struct notifier_block *nb) 390 { 391 return atomic_notifier_chain_register(&switchdev_notif_chain, nb); 392 } 393 EXPORT_SYMBOL_GPL(register_switchdev_notifier); 394 395 /** 396 * unregister_switchdev_notifier - Unregister notifier 397 * @nb: notifier_block 398 * 399 * Unregister switch device notifier. 400 */ 401 int unregister_switchdev_notifier(struct notifier_block *nb) 402 { 403 return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb); 404 } 405 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); 406 407 /** 408 * call_switchdev_notifiers - Call notifiers 409 * @val: value passed unmodified to notifier function 410 * @dev: port device 411 * @info: notifier information data 412 * 413 * Call all network notifier blocks. 414 */ 415 int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 416 struct switchdev_notifier_info *info, 417 struct netlink_ext_ack *extack) 418 { 419 info->dev = dev; 420 info->extack = extack; 421 return atomic_notifier_call_chain(&switchdev_notif_chain, val, info); 422 } 423 EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 424 425 int register_switchdev_blocking_notifier(struct notifier_block *nb) 426 { 427 struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; 428 429 return blocking_notifier_chain_register(chain, nb); 430 } 431 EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier); 432 433 int unregister_switchdev_blocking_notifier(struct notifier_block *nb) 434 { 435 struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain; 436 437 return blocking_notifier_chain_unregister(chain, nb); 438 } 439 EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier); 440 441 int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, 442 struct switchdev_notifier_info *info, 443 struct netlink_ext_ack *extack) 444 { 445 info->dev = dev; 446 info->extack = extack; 447 return blocking_notifier_call_chain(&switchdev_blocking_notif_chain, 448 val, info); 449 } 450 EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); 451 452 static int __switchdev_handle_port_obj_add(struct net_device *dev, 453 struct switchdev_notifier_port_obj_info *port_obj_info, 454 bool (*check_cb)(const struct net_device *dev), 455 int (*add_cb)(struct net_device *dev, 456 const struct switchdev_obj *obj, 457 struct switchdev_trans *trans, 458 struct netlink_ext_ack *extack)) 459 { 460 struct netlink_ext_ack *extack; 461 struct net_device *lower_dev; 462 struct list_head *iter; 463 int err = -EOPNOTSUPP; 464 465 extack = switchdev_notifier_info_to_extack(&port_obj_info->info); 466 467 if (check_cb(dev)) { 468 /* This flag is only checked if the return value is success. */ 469 port_obj_info->handled = true; 470 return add_cb(dev, port_obj_info->obj, port_obj_info->trans, 471 extack); 472 } 473 474 /* Switch ports might be stacked under e.g. a LAG. Ignore the 475 * unsupported devices, another driver might be able to handle them. But 476 * propagate to the callers any hard errors. 477 * 478 * If the driver does its own bookkeeping of stacked ports, it's not 479 * necessary to go through this helper. 480 */ 481 netdev_for_each_lower_dev(dev, lower_dev, iter) { 482 err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info, 483 check_cb, add_cb); 484 if (err && err != -EOPNOTSUPP) 485 return err; 486 } 487 488 return err; 489 } 490 491 int switchdev_handle_port_obj_add(struct net_device *dev, 492 struct switchdev_notifier_port_obj_info *port_obj_info, 493 bool (*check_cb)(const struct net_device *dev), 494 int (*add_cb)(struct net_device *dev, 495 const struct switchdev_obj *obj, 496 struct switchdev_trans *trans, 497 struct netlink_ext_ack *extack)) 498 { 499 int err; 500 501 err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb, 502 add_cb); 503 if (err == -EOPNOTSUPP) 504 err = 0; 505 return err; 506 } 507 EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add); 508 509 static int __switchdev_handle_port_obj_del(struct net_device *dev, 510 struct switchdev_notifier_port_obj_info *port_obj_info, 511 bool (*check_cb)(const struct net_device *dev), 512 int (*del_cb)(struct net_device *dev, 513 const struct switchdev_obj *obj)) 514 { 515 struct net_device *lower_dev; 516 struct list_head *iter; 517 int err = -EOPNOTSUPP; 518 519 if (check_cb(dev)) { 520 /* This flag is only checked if the return value is success. */ 521 port_obj_info->handled = true; 522 return del_cb(dev, port_obj_info->obj); 523 } 524 525 /* Switch ports might be stacked under e.g. a LAG. Ignore the 526 * unsupported devices, another driver might be able to handle them. But 527 * propagate to the callers any hard errors. 528 * 529 * If the driver does its own bookkeeping of stacked ports, it's not 530 * necessary to go through this helper. 531 */ 532 netdev_for_each_lower_dev(dev, lower_dev, iter) { 533 err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info, 534 check_cb, del_cb); 535 if (err && err != -EOPNOTSUPP) 536 return err; 537 } 538 539 return err; 540 } 541 542 int switchdev_handle_port_obj_del(struct net_device *dev, 543 struct switchdev_notifier_port_obj_info *port_obj_info, 544 bool (*check_cb)(const struct net_device *dev), 545 int (*del_cb)(struct net_device *dev, 546 const struct switchdev_obj *obj)) 547 { 548 int err; 549 550 err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb, 551 del_cb); 552 if (err == -EOPNOTSUPP) 553 err = 0; 554 return err; 555 } 556 EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del); 557 558 static int __switchdev_handle_port_attr_set(struct net_device *dev, 559 struct switchdev_notifier_port_attr_info *port_attr_info, 560 bool (*check_cb)(const struct net_device *dev), 561 int (*set_cb)(struct net_device *dev, 562 const struct switchdev_attr *attr, 563 struct switchdev_trans *trans)) 564 { 565 struct net_device *lower_dev; 566 struct list_head *iter; 567 int err = -EOPNOTSUPP; 568 569 if (check_cb(dev)) { 570 port_attr_info->handled = true; 571 return set_cb(dev, port_attr_info->attr, 572 port_attr_info->trans); 573 } 574 575 /* Switch ports might be stacked under e.g. a LAG. Ignore the 576 * unsupported devices, another driver might be able to handle them. But 577 * propagate to the callers any hard errors. 578 * 579 * If the driver does its own bookkeeping of stacked ports, it's not 580 * necessary to go through this helper. 581 */ 582 netdev_for_each_lower_dev(dev, lower_dev, iter) { 583 err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info, 584 check_cb, set_cb); 585 if (err && err != -EOPNOTSUPP) 586 return err; 587 } 588 589 return err; 590 } 591 592 int switchdev_handle_port_attr_set(struct net_device *dev, 593 struct switchdev_notifier_port_attr_info *port_attr_info, 594 bool (*check_cb)(const struct net_device *dev), 595 int (*set_cb)(struct net_device *dev, 596 const struct switchdev_attr *attr, 597 struct switchdev_trans *trans)) 598 { 599 int err; 600 601 err = __switchdev_handle_port_attr_set(dev, port_attr_info, check_cb, 602 set_cb); 603 if (err == -EOPNOTSUPP) 604 err = 0; 605 return err; 606 } 607 EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set); 608