1 /* 2 * net/switchdev/switchdev.c - Switch device API 3 * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us> 4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/types.h> 14 #include <linux/init.h> 15 #include <linux/mutex.h> 16 #include <linux/notifier.h> 17 #include <linux/netdevice.h> 18 #include <linux/etherdevice.h> 19 #include <linux/if_bridge.h> 20 #include <linux/list.h> 21 #include <linux/workqueue.h> 22 #include <linux/if_vlan.h> 23 #include <linux/rtnetlink.h> 24 #include <net/switchdev.h> 25 26 /** 27 * switchdev_trans_item_enqueue - Enqueue data item to transaction queue 28 * 29 * @trans: transaction 30 * @data: pointer to data being queued 31 * @destructor: data destructor 32 * @tritem: transaction item being queued 33 * 34 * Enqeueue data item to transaction queue. tritem is typically placed in 35 * cointainter pointed at by data pointer. Destructor is called on 36 * transaction abort and after successful commit phase in case 37 * the caller did not dequeue the item before. 38 */ 39 void switchdev_trans_item_enqueue(struct switchdev_trans *trans, 40 void *data, void (*destructor)(void const *), 41 struct switchdev_trans_item *tritem) 42 { 43 tritem->data = data; 44 tritem->destructor = destructor; 45 list_add_tail(&tritem->list, &trans->item_list); 46 } 47 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue); 48 49 static struct switchdev_trans_item * 50 __switchdev_trans_item_dequeue(struct switchdev_trans *trans) 51 { 52 struct switchdev_trans_item *tritem; 53 54 if (list_empty(&trans->item_list)) 55 return NULL; 56 tritem = list_first_entry(&trans->item_list, 57 struct switchdev_trans_item, list); 58 list_del(&tritem->list); 59 return tritem; 60 } 61 62 /** 63 * switchdev_trans_item_dequeue - Dequeue data item from transaction queue 64 * 65 * @trans: transaction 66 */ 67 void *switchdev_trans_item_dequeue(struct switchdev_trans *trans) 68 { 69 struct switchdev_trans_item *tritem; 70 71 tritem = __switchdev_trans_item_dequeue(trans); 72 BUG_ON(!tritem); 73 return tritem->data; 74 } 75 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue); 76 77 static void switchdev_trans_init(struct switchdev_trans *trans) 78 { 79 INIT_LIST_HEAD(&trans->item_list); 80 } 81 82 static void switchdev_trans_items_destroy(struct switchdev_trans *trans) 83 { 84 struct switchdev_trans_item *tritem; 85 86 while ((tritem = __switchdev_trans_item_dequeue(trans))) 87 tritem->destructor(tritem->data); 88 } 89 90 static void switchdev_trans_items_warn_destroy(struct net_device *dev, 91 struct switchdev_trans *trans) 92 { 93 WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n", 94 dev->name); 95 switchdev_trans_items_destroy(trans); 96 } 97 98 static LIST_HEAD(deferred); 99 static DEFINE_SPINLOCK(deferred_lock); 100 101 typedef void switchdev_deferred_func_t(struct net_device *dev, 102 const void *data); 103 104 struct switchdev_deferred_item { 105 struct list_head list; 106 struct net_device *dev; 107 switchdev_deferred_func_t *func; 108 unsigned long data[0]; 109 }; 110 111 static struct switchdev_deferred_item *switchdev_deferred_dequeue(void) 112 { 113 struct switchdev_deferred_item *dfitem; 114 115 spin_lock_bh(&deferred_lock); 116 if (list_empty(&deferred)) { 117 dfitem = NULL; 118 goto unlock; 119 } 120 dfitem = list_first_entry(&deferred, 121 struct switchdev_deferred_item, list); 122 list_del(&dfitem->list); 123 unlock: 124 spin_unlock_bh(&deferred_lock); 125 return dfitem; 126 } 127 128 /** 129 * switchdev_deferred_process - Process ops in deferred queue 130 * 131 * Called to flush the ops currently queued in deferred ops queue. 132 * rtnl_lock must be held. 133 */ 134 void switchdev_deferred_process(void) 135 { 136 struct switchdev_deferred_item *dfitem; 137 138 ASSERT_RTNL(); 139 140 while ((dfitem = switchdev_deferred_dequeue())) { 141 dfitem->func(dfitem->dev, dfitem->data); 142 dev_put(dfitem->dev); 143 kfree(dfitem); 144 } 145 } 146 EXPORT_SYMBOL_GPL(switchdev_deferred_process); 147 148 static void switchdev_deferred_process_work(struct work_struct *work) 149 { 150 rtnl_lock(); 151 switchdev_deferred_process(); 152 rtnl_unlock(); 153 } 154 155 static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work); 156 157 static int switchdev_deferred_enqueue(struct net_device *dev, 158 const void *data, size_t data_len, 159 switchdev_deferred_func_t *func) 160 { 161 struct switchdev_deferred_item *dfitem; 162 163 dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC); 164 if (!dfitem) 165 return -ENOMEM; 166 dfitem->dev = dev; 167 dfitem->func = func; 168 memcpy(dfitem->data, data, data_len); 169 dev_hold(dev); 170 spin_lock_bh(&deferred_lock); 171 list_add_tail(&dfitem->list, &deferred); 172 spin_unlock_bh(&deferred_lock); 173 schedule_work(&deferred_process_work); 174 return 0; 175 } 176 177 /** 178 * switchdev_port_attr_get - Get port attribute 179 * 180 * @dev: port device 181 * @attr: attribute to get 182 */ 183 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) 184 { 185 const struct switchdev_ops *ops = dev->switchdev_ops; 186 struct net_device *lower_dev; 187 struct list_head *iter; 188 struct switchdev_attr first = { 189 .id = SWITCHDEV_ATTR_ID_UNDEFINED 190 }; 191 int err = -EOPNOTSUPP; 192 193 if (ops && ops->switchdev_port_attr_get) 194 return ops->switchdev_port_attr_get(dev, attr); 195 196 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 197 return err; 198 199 /* Switch device port(s) may be stacked under 200 * bond/team/vlan dev, so recurse down to get attr on 201 * each port. Return -ENODATA if attr values don't 202 * compare across ports. 203 */ 204 205 netdev_for_each_lower_dev(dev, lower_dev, iter) { 206 err = switchdev_port_attr_get(lower_dev, attr); 207 if (err) 208 break; 209 if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED) 210 first = *attr; 211 else if (memcmp(&first, attr, sizeof(*attr))) 212 return -ENODATA; 213 } 214 215 return err; 216 } 217 EXPORT_SYMBOL_GPL(switchdev_port_attr_get); 218 219 static int __switchdev_port_attr_set(struct net_device *dev, 220 const struct switchdev_attr *attr, 221 struct switchdev_trans *trans) 222 { 223 const struct switchdev_ops *ops = dev->switchdev_ops; 224 struct net_device *lower_dev; 225 struct list_head *iter; 226 int err = -EOPNOTSUPP; 227 228 if (ops && ops->switchdev_port_attr_set) { 229 err = ops->switchdev_port_attr_set(dev, attr, trans); 230 goto done; 231 } 232 233 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 234 goto done; 235 236 /* Switch device port(s) may be stacked under 237 * bond/team/vlan dev, so recurse down to set attr on 238 * each port. 239 */ 240 241 netdev_for_each_lower_dev(dev, lower_dev, iter) { 242 err = __switchdev_port_attr_set(lower_dev, attr, trans); 243 if (err) 244 break; 245 } 246 247 done: 248 if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) 249 err = 0; 250 251 return err; 252 } 253 254 static int switchdev_port_attr_set_now(struct net_device *dev, 255 const struct switchdev_attr *attr) 256 { 257 struct switchdev_trans trans; 258 int err; 259 260 switchdev_trans_init(&trans); 261 262 /* Phase I: prepare for attr set. Driver/device should fail 263 * here if there are going to be issues in the commit phase, 264 * such as lack of resources or support. The driver/device 265 * should reserve resources needed for the commit phase here, 266 * but should not commit the attr. 267 */ 268 269 trans.ph_prepare = true; 270 err = __switchdev_port_attr_set(dev, attr, &trans); 271 if (err) { 272 /* Prepare phase failed: abort the transaction. Any 273 * resources reserved in the prepare phase are 274 * released. 275 */ 276 277 if (err != -EOPNOTSUPP) 278 switchdev_trans_items_destroy(&trans); 279 280 return err; 281 } 282 283 /* Phase II: commit attr set. This cannot fail as a fault 284 * of driver/device. If it does, it's a bug in the driver/device 285 * because the driver said everythings was OK in phase I. 286 */ 287 288 trans.ph_prepare = false; 289 err = __switchdev_port_attr_set(dev, attr, &trans); 290 WARN(err, "%s: Commit of attribute (id=%d) failed.\n", 291 dev->name, attr->id); 292 switchdev_trans_items_warn_destroy(dev, &trans); 293 294 return err; 295 } 296 297 static void switchdev_port_attr_set_deferred(struct net_device *dev, 298 const void *data) 299 { 300 const struct switchdev_attr *attr = data; 301 int err; 302 303 err = switchdev_port_attr_set_now(dev, attr); 304 if (err && err != -EOPNOTSUPP) 305 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n", 306 err, attr->id); 307 if (attr->complete) 308 attr->complete(dev, err, attr->complete_priv); 309 } 310 311 static int switchdev_port_attr_set_defer(struct net_device *dev, 312 const struct switchdev_attr *attr) 313 { 314 return switchdev_deferred_enqueue(dev, attr, sizeof(*attr), 315 switchdev_port_attr_set_deferred); 316 } 317 318 /** 319 * switchdev_port_attr_set - Set port attribute 320 * 321 * @dev: port device 322 * @attr: attribute to set 323 * 324 * Use a 2-phase prepare-commit transaction model to ensure 325 * system is not left in a partially updated state due to 326 * failure from driver/device. 327 * 328 * rtnl_lock must be held and must not be in atomic section, 329 * in case SWITCHDEV_F_DEFER flag is not set. 330 */ 331 int switchdev_port_attr_set(struct net_device *dev, 332 const struct switchdev_attr *attr) 333 { 334 if (attr->flags & SWITCHDEV_F_DEFER) 335 return switchdev_port_attr_set_defer(dev, attr); 336 ASSERT_RTNL(); 337 return switchdev_port_attr_set_now(dev, attr); 338 } 339 EXPORT_SYMBOL_GPL(switchdev_port_attr_set); 340 341 static size_t switchdev_obj_size(const struct switchdev_obj *obj) 342 { 343 switch (obj->id) { 344 case SWITCHDEV_OBJ_ID_PORT_VLAN: 345 return sizeof(struct switchdev_obj_port_vlan); 346 case SWITCHDEV_OBJ_ID_PORT_MDB: 347 return sizeof(struct switchdev_obj_port_mdb); 348 default: 349 BUG(); 350 } 351 return 0; 352 } 353 354 static int __switchdev_port_obj_add(struct net_device *dev, 355 const struct switchdev_obj *obj, 356 struct switchdev_trans *trans) 357 { 358 const struct switchdev_ops *ops = dev->switchdev_ops; 359 struct net_device *lower_dev; 360 struct list_head *iter; 361 int err = -EOPNOTSUPP; 362 363 if (ops && ops->switchdev_port_obj_add) 364 return ops->switchdev_port_obj_add(dev, obj, trans); 365 366 /* Switch device port(s) may be stacked under 367 * bond/team/vlan dev, so recurse down to add object on 368 * each port. 369 */ 370 371 netdev_for_each_lower_dev(dev, lower_dev, iter) { 372 err = __switchdev_port_obj_add(lower_dev, obj, trans); 373 if (err) 374 break; 375 } 376 377 return err; 378 } 379 380 static int switchdev_port_obj_add_now(struct net_device *dev, 381 const struct switchdev_obj *obj) 382 { 383 struct switchdev_trans trans; 384 int err; 385 386 ASSERT_RTNL(); 387 388 switchdev_trans_init(&trans); 389 390 /* Phase I: prepare for obj add. Driver/device should fail 391 * here if there are going to be issues in the commit phase, 392 * such as lack of resources or support. The driver/device 393 * should reserve resources needed for the commit phase here, 394 * but should not commit the obj. 395 */ 396 397 trans.ph_prepare = true; 398 err = __switchdev_port_obj_add(dev, obj, &trans); 399 if (err) { 400 /* Prepare phase failed: abort the transaction. Any 401 * resources reserved in the prepare phase are 402 * released. 403 */ 404 405 if (err != -EOPNOTSUPP) 406 switchdev_trans_items_destroy(&trans); 407 408 return err; 409 } 410 411 /* Phase II: commit obj add. This cannot fail as a fault 412 * of driver/device. If it does, it's a bug in the driver/device 413 * because the driver said everythings was OK in phase I. 414 */ 415 416 trans.ph_prepare = false; 417 err = __switchdev_port_obj_add(dev, obj, &trans); 418 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); 419 switchdev_trans_items_warn_destroy(dev, &trans); 420 421 return err; 422 } 423 424 static void switchdev_port_obj_add_deferred(struct net_device *dev, 425 const void *data) 426 { 427 const struct switchdev_obj *obj = data; 428 int err; 429 430 err = switchdev_port_obj_add_now(dev, obj); 431 if (err && err != -EOPNOTSUPP) 432 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", 433 err, obj->id); 434 if (obj->complete) 435 obj->complete(dev, err, obj->complete_priv); 436 } 437 438 static int switchdev_port_obj_add_defer(struct net_device *dev, 439 const struct switchdev_obj *obj) 440 { 441 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), 442 switchdev_port_obj_add_deferred); 443 } 444 445 /** 446 * switchdev_port_obj_add - Add port object 447 * 448 * @dev: port device 449 * @id: object ID 450 * @obj: object to add 451 * 452 * Use a 2-phase prepare-commit transaction model to ensure 453 * system is not left in a partially updated state due to 454 * failure from driver/device. 455 * 456 * rtnl_lock must be held and must not be in atomic section, 457 * in case SWITCHDEV_F_DEFER flag is not set. 458 */ 459 int switchdev_port_obj_add(struct net_device *dev, 460 const struct switchdev_obj *obj) 461 { 462 if (obj->flags & SWITCHDEV_F_DEFER) 463 return switchdev_port_obj_add_defer(dev, obj); 464 ASSERT_RTNL(); 465 return switchdev_port_obj_add_now(dev, obj); 466 } 467 EXPORT_SYMBOL_GPL(switchdev_port_obj_add); 468 469 static int switchdev_port_obj_del_now(struct net_device *dev, 470 const struct switchdev_obj *obj) 471 { 472 const struct switchdev_ops *ops = dev->switchdev_ops; 473 struct net_device *lower_dev; 474 struct list_head *iter; 475 int err = -EOPNOTSUPP; 476 477 if (ops && ops->switchdev_port_obj_del) 478 return ops->switchdev_port_obj_del(dev, obj); 479 480 /* Switch device port(s) may be stacked under 481 * bond/team/vlan dev, so recurse down to delete object on 482 * each port. 483 */ 484 485 netdev_for_each_lower_dev(dev, lower_dev, iter) { 486 err = switchdev_port_obj_del_now(lower_dev, obj); 487 if (err) 488 break; 489 } 490 491 return err; 492 } 493 494 static void switchdev_port_obj_del_deferred(struct net_device *dev, 495 const void *data) 496 { 497 const struct switchdev_obj *obj = data; 498 int err; 499 500 err = switchdev_port_obj_del_now(dev, obj); 501 if (err && err != -EOPNOTSUPP) 502 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", 503 err, obj->id); 504 if (obj->complete) 505 obj->complete(dev, err, obj->complete_priv); 506 } 507 508 static int switchdev_port_obj_del_defer(struct net_device *dev, 509 const struct switchdev_obj *obj) 510 { 511 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), 512 switchdev_port_obj_del_deferred); 513 } 514 515 /** 516 * switchdev_port_obj_del - Delete port object 517 * 518 * @dev: port device 519 * @id: object ID 520 * @obj: object to delete 521 * 522 * rtnl_lock must be held and must not be in atomic section, 523 * in case SWITCHDEV_F_DEFER flag is not set. 524 */ 525 int switchdev_port_obj_del(struct net_device *dev, 526 const struct switchdev_obj *obj) 527 { 528 if (obj->flags & SWITCHDEV_F_DEFER) 529 return switchdev_port_obj_del_defer(dev, obj); 530 ASSERT_RTNL(); 531 return switchdev_port_obj_del_now(dev, obj); 532 } 533 EXPORT_SYMBOL_GPL(switchdev_port_obj_del); 534 535 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); 536 537 /** 538 * register_switchdev_notifier - Register notifier 539 * @nb: notifier_block 540 * 541 * Register switch device notifier. 542 */ 543 int register_switchdev_notifier(struct notifier_block *nb) 544 { 545 return atomic_notifier_chain_register(&switchdev_notif_chain, nb); 546 } 547 EXPORT_SYMBOL_GPL(register_switchdev_notifier); 548 549 /** 550 * unregister_switchdev_notifier - Unregister notifier 551 * @nb: notifier_block 552 * 553 * Unregister switch device notifier. 554 */ 555 int unregister_switchdev_notifier(struct notifier_block *nb) 556 { 557 return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb); 558 } 559 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); 560 561 /** 562 * call_switchdev_notifiers - Call notifiers 563 * @val: value passed unmodified to notifier function 564 * @dev: port device 565 * @info: notifier information data 566 * 567 * Call all network notifier blocks. 568 */ 569 int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 570 struct switchdev_notifier_info *info) 571 { 572 info->dev = dev; 573 return atomic_notifier_call_chain(&switchdev_notif_chain, val, info); 574 } 575 EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 576 577 bool switchdev_port_same_parent_id(struct net_device *a, 578 struct net_device *b) 579 { 580 struct switchdev_attr a_attr = { 581 .orig_dev = a, 582 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 583 }; 584 struct switchdev_attr b_attr = { 585 .orig_dev = b, 586 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 587 }; 588 589 if (switchdev_port_attr_get(a, &a_attr) || 590 switchdev_port_attr_get(b, &b_attr)) 591 return false; 592 593 return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); 594 } 595 EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); 596