1 /* 2 * net/switchdev/switchdev.c - Switch device API 3 * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us> 4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/types.h> 14 #include <linux/init.h> 15 #include <linux/mutex.h> 16 #include <linux/notifier.h> 17 #include <linux/netdevice.h> 18 #include <linux/etherdevice.h> 19 #include <linux/if_bridge.h> 20 #include <linux/list.h> 21 #include <linux/workqueue.h> 22 #include <linux/if_vlan.h> 23 #include <linux/rtnetlink.h> 24 #include <net/switchdev.h> 25 26 /** 27 * switchdev_trans_item_enqueue - Enqueue data item to transaction queue 28 * 29 * @trans: transaction 30 * @data: pointer to data being queued 31 * @destructor: data destructor 32 * @tritem: transaction item being queued 33 * 34 * Enqeueue data item to transaction queue. tritem is typically placed in 35 * cointainter pointed at by data pointer. Destructor is called on 36 * transaction abort and after successful commit phase in case 37 * the caller did not dequeue the item before. 38 */ 39 void switchdev_trans_item_enqueue(struct switchdev_trans *trans, 40 void *data, void (*destructor)(void const *), 41 struct switchdev_trans_item *tritem) 42 { 43 tritem->data = data; 44 tritem->destructor = destructor; 45 list_add_tail(&tritem->list, &trans->item_list); 46 } 47 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue); 48 49 static struct switchdev_trans_item * 50 __switchdev_trans_item_dequeue(struct switchdev_trans *trans) 51 { 52 struct switchdev_trans_item *tritem; 53 54 if (list_empty(&trans->item_list)) 55 return NULL; 56 tritem = list_first_entry(&trans->item_list, 57 struct switchdev_trans_item, list); 58 list_del(&tritem->list); 59 return tritem; 60 } 61 62 /** 63 * switchdev_trans_item_dequeue - Dequeue data item from transaction queue 64 * 65 * @trans: transaction 66 */ 67 void *switchdev_trans_item_dequeue(struct switchdev_trans *trans) 68 { 69 struct switchdev_trans_item *tritem; 70 71 tritem = __switchdev_trans_item_dequeue(trans); 72 BUG_ON(!tritem); 73 return tritem->data; 74 } 75 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue); 76 77 static void switchdev_trans_init(struct switchdev_trans *trans) 78 { 79 INIT_LIST_HEAD(&trans->item_list); 80 } 81 82 static void switchdev_trans_items_destroy(struct switchdev_trans *trans) 83 { 84 struct switchdev_trans_item *tritem; 85 86 while ((tritem = __switchdev_trans_item_dequeue(trans))) 87 tritem->destructor(tritem->data); 88 } 89 90 static void switchdev_trans_items_warn_destroy(struct net_device *dev, 91 struct switchdev_trans *trans) 92 { 93 WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n", 94 dev->name); 95 switchdev_trans_items_destroy(trans); 96 } 97 98 static LIST_HEAD(deferred); 99 static DEFINE_SPINLOCK(deferred_lock); 100 101 typedef void switchdev_deferred_func_t(struct net_device *dev, 102 const void *data); 103 104 struct switchdev_deferred_item { 105 struct list_head list; 106 struct net_device *dev; 107 switchdev_deferred_func_t *func; 108 unsigned long data[0]; 109 }; 110 111 static struct switchdev_deferred_item *switchdev_deferred_dequeue(void) 112 { 113 struct switchdev_deferred_item *dfitem; 114 115 spin_lock_bh(&deferred_lock); 116 if (list_empty(&deferred)) { 117 dfitem = NULL; 118 goto unlock; 119 } 120 dfitem = list_first_entry(&deferred, 121 struct switchdev_deferred_item, list); 122 list_del(&dfitem->list); 123 unlock: 124 spin_unlock_bh(&deferred_lock); 125 return dfitem; 126 } 127 128 /** 129 * switchdev_deferred_process - Process ops in deferred queue 130 * 131 * Called to flush the ops currently queued in deferred ops queue. 132 * rtnl_lock must be held. 133 */ 134 void switchdev_deferred_process(void) 135 { 136 struct switchdev_deferred_item *dfitem; 137 138 ASSERT_RTNL(); 139 140 while ((dfitem = switchdev_deferred_dequeue())) { 141 dfitem->func(dfitem->dev, dfitem->data); 142 dev_put(dfitem->dev); 143 kfree(dfitem); 144 } 145 } 146 EXPORT_SYMBOL_GPL(switchdev_deferred_process); 147 148 static void switchdev_deferred_process_work(struct work_struct *work) 149 { 150 rtnl_lock(); 151 switchdev_deferred_process(); 152 rtnl_unlock(); 153 } 154 155 static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work); 156 157 static int switchdev_deferred_enqueue(struct net_device *dev, 158 const void *data, size_t data_len, 159 switchdev_deferred_func_t *func) 160 { 161 struct switchdev_deferred_item *dfitem; 162 163 dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC); 164 if (!dfitem) 165 return -ENOMEM; 166 dfitem->dev = dev; 167 dfitem->func = func; 168 memcpy(dfitem->data, data, data_len); 169 dev_hold(dev); 170 spin_lock_bh(&deferred_lock); 171 list_add_tail(&dfitem->list, &deferred); 172 spin_unlock_bh(&deferred_lock); 173 schedule_work(&deferred_process_work); 174 return 0; 175 } 176 177 /** 178 * switchdev_port_attr_get - Get port attribute 179 * 180 * @dev: port device 181 * @attr: attribute to get 182 */ 183 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) 184 { 185 const struct switchdev_ops *ops = dev->switchdev_ops; 186 struct net_device *lower_dev; 187 struct list_head *iter; 188 struct switchdev_attr first = { 189 .id = SWITCHDEV_ATTR_ID_UNDEFINED 190 }; 191 int err = -EOPNOTSUPP; 192 193 if (ops && ops->switchdev_port_attr_get) 194 return ops->switchdev_port_attr_get(dev, attr); 195 196 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 197 return err; 198 199 /* Switch device port(s) may be stacked under 200 * bond/team/vlan dev, so recurse down to get attr on 201 * each port. Return -ENODATA if attr values don't 202 * compare across ports. 203 */ 204 205 netdev_for_each_lower_dev(dev, lower_dev, iter) { 206 err = switchdev_port_attr_get(lower_dev, attr); 207 if (err) 208 break; 209 if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED) 210 first = *attr; 211 else if (memcmp(&first, attr, sizeof(*attr))) 212 return -ENODATA; 213 } 214 215 return err; 216 } 217 EXPORT_SYMBOL_GPL(switchdev_port_attr_get); 218 219 static int __switchdev_port_attr_set(struct net_device *dev, 220 const struct switchdev_attr *attr, 221 struct switchdev_trans *trans) 222 { 223 const struct switchdev_ops *ops = dev->switchdev_ops; 224 struct net_device *lower_dev; 225 struct list_head *iter; 226 int err = -EOPNOTSUPP; 227 228 if (ops && ops->switchdev_port_attr_set) { 229 err = ops->switchdev_port_attr_set(dev, attr, trans); 230 goto done; 231 } 232 233 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 234 goto done; 235 236 /* Switch device port(s) may be stacked under 237 * bond/team/vlan dev, so recurse down to set attr on 238 * each port. 239 */ 240 241 netdev_for_each_lower_dev(dev, lower_dev, iter) { 242 err = __switchdev_port_attr_set(lower_dev, attr, trans); 243 if (err) 244 break; 245 } 246 247 done: 248 if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) 249 err = 0; 250 251 return err; 252 } 253 254 static int switchdev_port_attr_set_now(struct net_device *dev, 255 const struct switchdev_attr *attr) 256 { 257 struct switchdev_trans trans; 258 int err; 259 260 switchdev_trans_init(&trans); 261 262 /* Phase I: prepare for attr set. Driver/device should fail 263 * here if there are going to be issues in the commit phase, 264 * such as lack of resources or support. The driver/device 265 * should reserve resources needed for the commit phase here, 266 * but should not commit the attr. 267 */ 268 269 trans.ph_prepare = true; 270 err = __switchdev_port_attr_set(dev, attr, &trans); 271 if (err) { 272 /* Prepare phase failed: abort the transaction. Any 273 * resources reserved in the prepare phase are 274 * released. 275 */ 276 277 if (err != -EOPNOTSUPP) 278 switchdev_trans_items_destroy(&trans); 279 280 return err; 281 } 282 283 /* Phase II: commit attr set. This cannot fail as a fault 284 * of driver/device. If it does, it's a bug in the driver/device 285 * because the driver said everythings was OK in phase I. 286 */ 287 288 trans.ph_prepare = false; 289 err = __switchdev_port_attr_set(dev, attr, &trans); 290 WARN(err, "%s: Commit of attribute (id=%d) failed.\n", 291 dev->name, attr->id); 292 switchdev_trans_items_warn_destroy(dev, &trans); 293 294 return err; 295 } 296 297 static void switchdev_port_attr_set_deferred(struct net_device *dev, 298 const void *data) 299 { 300 const struct switchdev_attr *attr = data; 301 int err; 302 303 err = switchdev_port_attr_set_now(dev, attr); 304 if (err && err != -EOPNOTSUPP) 305 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n", 306 err, attr->id); 307 if (attr->complete) 308 attr->complete(dev, err, attr->complete_priv); 309 } 310 311 static int switchdev_port_attr_set_defer(struct net_device *dev, 312 const struct switchdev_attr *attr) 313 { 314 return switchdev_deferred_enqueue(dev, attr, sizeof(*attr), 315 switchdev_port_attr_set_deferred); 316 } 317 318 /** 319 * switchdev_port_attr_set - Set port attribute 320 * 321 * @dev: port device 322 * @attr: attribute to set 323 * 324 * Use a 2-phase prepare-commit transaction model to ensure 325 * system is not left in a partially updated state due to 326 * failure from driver/device. 327 * 328 * rtnl_lock must be held and must not be in atomic section, 329 * in case SWITCHDEV_F_DEFER flag is not set. 330 */ 331 int switchdev_port_attr_set(struct net_device *dev, 332 const struct switchdev_attr *attr) 333 { 334 if (attr->flags & SWITCHDEV_F_DEFER) 335 return switchdev_port_attr_set_defer(dev, attr); 336 ASSERT_RTNL(); 337 return switchdev_port_attr_set_now(dev, attr); 338 } 339 EXPORT_SYMBOL_GPL(switchdev_port_attr_set); 340 341 static size_t switchdev_obj_size(const struct switchdev_obj *obj) 342 { 343 switch (obj->id) { 344 case SWITCHDEV_OBJ_ID_PORT_VLAN: 345 return sizeof(struct switchdev_obj_port_vlan); 346 case SWITCHDEV_OBJ_ID_PORT_MDB: 347 return sizeof(struct switchdev_obj_port_mdb); 348 case SWITCHDEV_OBJ_ID_HOST_MDB: 349 return sizeof(struct switchdev_obj_port_mdb); 350 default: 351 BUG(); 352 } 353 return 0; 354 } 355 356 static int __switchdev_port_obj_add(struct net_device *dev, 357 const struct switchdev_obj *obj, 358 struct switchdev_trans *trans) 359 { 360 const struct switchdev_ops *ops = dev->switchdev_ops; 361 struct net_device *lower_dev; 362 struct list_head *iter; 363 int err = -EOPNOTSUPP; 364 365 if (ops && ops->switchdev_port_obj_add) 366 return ops->switchdev_port_obj_add(dev, obj, trans); 367 368 /* Switch device port(s) may be stacked under 369 * bond/team/vlan dev, so recurse down to add object on 370 * each port. 371 */ 372 373 netdev_for_each_lower_dev(dev, lower_dev, iter) { 374 err = __switchdev_port_obj_add(lower_dev, obj, trans); 375 if (err) 376 break; 377 } 378 379 return err; 380 } 381 382 static int switchdev_port_obj_add_now(struct net_device *dev, 383 const struct switchdev_obj *obj) 384 { 385 struct switchdev_trans trans; 386 int err; 387 388 ASSERT_RTNL(); 389 390 switchdev_trans_init(&trans); 391 392 /* Phase I: prepare for obj add. Driver/device should fail 393 * here if there are going to be issues in the commit phase, 394 * such as lack of resources or support. The driver/device 395 * should reserve resources needed for the commit phase here, 396 * but should not commit the obj. 397 */ 398 399 trans.ph_prepare = true; 400 err = __switchdev_port_obj_add(dev, obj, &trans); 401 if (err) { 402 /* Prepare phase failed: abort the transaction. Any 403 * resources reserved in the prepare phase are 404 * released. 405 */ 406 407 if (err != -EOPNOTSUPP) 408 switchdev_trans_items_destroy(&trans); 409 410 return err; 411 } 412 413 /* Phase II: commit obj add. This cannot fail as a fault 414 * of driver/device. If it does, it's a bug in the driver/device 415 * because the driver said everythings was OK in phase I. 416 */ 417 418 trans.ph_prepare = false; 419 err = __switchdev_port_obj_add(dev, obj, &trans); 420 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); 421 switchdev_trans_items_warn_destroy(dev, &trans); 422 423 return err; 424 } 425 426 static void switchdev_port_obj_add_deferred(struct net_device *dev, 427 const void *data) 428 { 429 const struct switchdev_obj *obj = data; 430 int err; 431 432 err = switchdev_port_obj_add_now(dev, obj); 433 if (err && err != -EOPNOTSUPP) 434 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", 435 err, obj->id); 436 if (obj->complete) 437 obj->complete(dev, err, obj->complete_priv); 438 } 439 440 static int switchdev_port_obj_add_defer(struct net_device *dev, 441 const struct switchdev_obj *obj) 442 { 443 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), 444 switchdev_port_obj_add_deferred); 445 } 446 447 /** 448 * switchdev_port_obj_add - Add port object 449 * 450 * @dev: port device 451 * @id: object ID 452 * @obj: object to add 453 * 454 * Use a 2-phase prepare-commit transaction model to ensure 455 * system is not left in a partially updated state due to 456 * failure from driver/device. 457 * 458 * rtnl_lock must be held and must not be in atomic section, 459 * in case SWITCHDEV_F_DEFER flag is not set. 460 */ 461 int switchdev_port_obj_add(struct net_device *dev, 462 const struct switchdev_obj *obj) 463 { 464 if (obj->flags & SWITCHDEV_F_DEFER) 465 return switchdev_port_obj_add_defer(dev, obj); 466 ASSERT_RTNL(); 467 return switchdev_port_obj_add_now(dev, obj); 468 } 469 EXPORT_SYMBOL_GPL(switchdev_port_obj_add); 470 471 static int switchdev_port_obj_del_now(struct net_device *dev, 472 const struct switchdev_obj *obj) 473 { 474 const struct switchdev_ops *ops = dev->switchdev_ops; 475 struct net_device *lower_dev; 476 struct list_head *iter; 477 int err = -EOPNOTSUPP; 478 479 if (ops && ops->switchdev_port_obj_del) 480 return ops->switchdev_port_obj_del(dev, obj); 481 482 /* Switch device port(s) may be stacked under 483 * bond/team/vlan dev, so recurse down to delete object on 484 * each port. 485 */ 486 487 netdev_for_each_lower_dev(dev, lower_dev, iter) { 488 err = switchdev_port_obj_del_now(lower_dev, obj); 489 if (err) 490 break; 491 } 492 493 return err; 494 } 495 496 static void switchdev_port_obj_del_deferred(struct net_device *dev, 497 const void *data) 498 { 499 const struct switchdev_obj *obj = data; 500 int err; 501 502 err = switchdev_port_obj_del_now(dev, obj); 503 if (err && err != -EOPNOTSUPP) 504 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", 505 err, obj->id); 506 if (obj->complete) 507 obj->complete(dev, err, obj->complete_priv); 508 } 509 510 static int switchdev_port_obj_del_defer(struct net_device *dev, 511 const struct switchdev_obj *obj) 512 { 513 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), 514 switchdev_port_obj_del_deferred); 515 } 516 517 /** 518 * switchdev_port_obj_del - Delete port object 519 * 520 * @dev: port device 521 * @id: object ID 522 * @obj: object to delete 523 * 524 * rtnl_lock must be held and must not be in atomic section, 525 * in case SWITCHDEV_F_DEFER flag is not set. 526 */ 527 int switchdev_port_obj_del(struct net_device *dev, 528 const struct switchdev_obj *obj) 529 { 530 if (obj->flags & SWITCHDEV_F_DEFER) 531 return switchdev_port_obj_del_defer(dev, obj); 532 ASSERT_RTNL(); 533 return switchdev_port_obj_del_now(dev, obj); 534 } 535 EXPORT_SYMBOL_GPL(switchdev_port_obj_del); 536 537 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); 538 539 /** 540 * register_switchdev_notifier - Register notifier 541 * @nb: notifier_block 542 * 543 * Register switch device notifier. 544 */ 545 int register_switchdev_notifier(struct notifier_block *nb) 546 { 547 return atomic_notifier_chain_register(&switchdev_notif_chain, nb); 548 } 549 EXPORT_SYMBOL_GPL(register_switchdev_notifier); 550 551 /** 552 * unregister_switchdev_notifier - Unregister notifier 553 * @nb: notifier_block 554 * 555 * Unregister switch device notifier. 556 */ 557 int unregister_switchdev_notifier(struct notifier_block *nb) 558 { 559 return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb); 560 } 561 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); 562 563 /** 564 * call_switchdev_notifiers - Call notifiers 565 * @val: value passed unmodified to notifier function 566 * @dev: port device 567 * @info: notifier information data 568 * 569 * Call all network notifier blocks. 570 */ 571 int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 572 struct switchdev_notifier_info *info) 573 { 574 info->dev = dev; 575 return atomic_notifier_call_chain(&switchdev_notif_chain, val, info); 576 } 577 EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 578 579 bool switchdev_port_same_parent_id(struct net_device *a, 580 struct net_device *b) 581 { 582 struct switchdev_attr a_attr = { 583 .orig_dev = a, 584 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 585 }; 586 struct switchdev_attr b_attr = { 587 .orig_dev = b, 588 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 589 }; 590 591 if (switchdev_port_attr_get(a, &a_attr) || 592 switchdev_port_attr_get(b, &b_attr)) 593 return false; 594 595 return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); 596 } 597 EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); 598