1 /* 2 * net/switchdev/switchdev.c - Switch device API 3 * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us> 4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/types.h> 14 #include <linux/init.h> 15 #include <linux/mutex.h> 16 #include <linux/notifier.h> 17 #include <linux/netdevice.h> 18 #include <linux/etherdevice.h> 19 #include <linux/if_bridge.h> 20 #include <linux/list.h> 21 #include <linux/workqueue.h> 22 #include <linux/if_vlan.h> 23 #include <net/ip_fib.h> 24 #include <net/switchdev.h> 25 26 /** 27 * switchdev_trans_item_enqueue - Enqueue data item to transaction queue 28 * 29 * @trans: transaction 30 * @data: pointer to data being queued 31 * @destructor: data destructor 32 * @tritem: transaction item being queued 33 * 34 * Enqeueue data item to transaction queue. tritem is typically placed in 35 * cointainter pointed at by data pointer. Destructor is called on 36 * transaction abort and after successful commit phase in case 37 * the caller did not dequeue the item before. 38 */ 39 void switchdev_trans_item_enqueue(struct switchdev_trans *trans, 40 void *data, void (*destructor)(void const *), 41 struct switchdev_trans_item *tritem) 42 { 43 tritem->data = data; 44 tritem->destructor = destructor; 45 list_add_tail(&tritem->list, &trans->item_list); 46 } 47 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue); 48 49 static struct switchdev_trans_item * 50 __switchdev_trans_item_dequeue(struct switchdev_trans *trans) 51 { 52 struct switchdev_trans_item *tritem; 53 54 if (list_empty(&trans->item_list)) 55 return NULL; 56 tritem = list_first_entry(&trans->item_list, 57 struct switchdev_trans_item, list); 58 list_del(&tritem->list); 59 return tritem; 60 } 61 62 /** 63 * switchdev_trans_item_dequeue - Dequeue data item from transaction queue 64 * 65 * @trans: transaction 66 */ 67 void *switchdev_trans_item_dequeue(struct switchdev_trans *trans) 68 { 69 struct switchdev_trans_item *tritem; 70 71 tritem = __switchdev_trans_item_dequeue(trans); 72 BUG_ON(!tritem); 73 return tritem->data; 74 } 75 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue); 76 77 static void switchdev_trans_init(struct switchdev_trans *trans) 78 { 79 INIT_LIST_HEAD(&trans->item_list); 80 } 81 82 static void switchdev_trans_items_destroy(struct switchdev_trans *trans) 83 { 84 struct switchdev_trans_item *tritem; 85 86 while ((tritem = __switchdev_trans_item_dequeue(trans))) 87 tritem->destructor(tritem->data); 88 } 89 90 static void switchdev_trans_items_warn_destroy(struct net_device *dev, 91 struct switchdev_trans *trans) 92 { 93 WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n", 94 dev->name); 95 switchdev_trans_items_destroy(trans); 96 } 97 98 static LIST_HEAD(deferred); 99 static DEFINE_SPINLOCK(deferred_lock); 100 101 typedef void switchdev_deferred_func_t(struct net_device *dev, 102 const void *data); 103 104 struct switchdev_deferred_item { 105 struct list_head list; 106 struct net_device *dev; 107 switchdev_deferred_func_t *func; 108 unsigned long data[0]; 109 }; 110 111 static struct switchdev_deferred_item *switchdev_deferred_dequeue(void) 112 { 113 struct switchdev_deferred_item *dfitem; 114 115 spin_lock_bh(&deferred_lock); 116 if (list_empty(&deferred)) { 117 dfitem = NULL; 118 goto unlock; 119 } 120 dfitem = list_first_entry(&deferred, 121 struct switchdev_deferred_item, list); 122 list_del(&dfitem->list); 123 unlock: 124 spin_unlock_bh(&deferred_lock); 125 return dfitem; 126 } 127 128 /** 129 * switchdev_deferred_process - Process ops in deferred queue 130 * 131 * Called to flush the ops currently queued in deferred ops queue. 132 * rtnl_lock must be held. 133 */ 134 void switchdev_deferred_process(void) 135 { 136 struct switchdev_deferred_item *dfitem; 137 138 ASSERT_RTNL(); 139 140 while ((dfitem = switchdev_deferred_dequeue())) { 141 dfitem->func(dfitem->dev, dfitem->data); 142 dev_put(dfitem->dev); 143 kfree(dfitem); 144 } 145 } 146 EXPORT_SYMBOL_GPL(switchdev_deferred_process); 147 148 static void switchdev_deferred_process_work(struct work_struct *work) 149 { 150 rtnl_lock(); 151 switchdev_deferred_process(); 152 rtnl_unlock(); 153 } 154 155 static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work); 156 157 static int switchdev_deferred_enqueue(struct net_device *dev, 158 const void *data, size_t data_len, 159 switchdev_deferred_func_t *func) 160 { 161 struct switchdev_deferred_item *dfitem; 162 163 dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC); 164 if (!dfitem) 165 return -ENOMEM; 166 dfitem->dev = dev; 167 dfitem->func = func; 168 memcpy(dfitem->data, data, data_len); 169 dev_hold(dev); 170 spin_lock_bh(&deferred_lock); 171 list_add_tail(&dfitem->list, &deferred); 172 spin_unlock_bh(&deferred_lock); 173 schedule_work(&deferred_process_work); 174 return 0; 175 } 176 177 /** 178 * switchdev_port_attr_get - Get port attribute 179 * 180 * @dev: port device 181 * @attr: attribute to get 182 */ 183 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) 184 { 185 const struct switchdev_ops *ops = dev->switchdev_ops; 186 struct net_device *lower_dev; 187 struct list_head *iter; 188 struct switchdev_attr first = { 189 .id = SWITCHDEV_ATTR_ID_UNDEFINED 190 }; 191 int err = -EOPNOTSUPP; 192 193 if (ops && ops->switchdev_port_attr_get) 194 return ops->switchdev_port_attr_get(dev, attr); 195 196 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 197 return err; 198 199 /* Switch device port(s) may be stacked under 200 * bond/team/vlan dev, so recurse down to get attr on 201 * each port. Return -ENODATA if attr values don't 202 * compare across ports. 203 */ 204 205 netdev_for_each_lower_dev(dev, lower_dev, iter) { 206 err = switchdev_port_attr_get(lower_dev, attr); 207 if (err) 208 break; 209 if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED) 210 first = *attr; 211 else if (memcmp(&first, attr, sizeof(*attr))) 212 return -ENODATA; 213 } 214 215 return err; 216 } 217 EXPORT_SYMBOL_GPL(switchdev_port_attr_get); 218 219 static int __switchdev_port_attr_set(struct net_device *dev, 220 const struct switchdev_attr *attr, 221 struct switchdev_trans *trans) 222 { 223 const struct switchdev_ops *ops = dev->switchdev_ops; 224 struct net_device *lower_dev; 225 struct list_head *iter; 226 int err = -EOPNOTSUPP; 227 228 if (ops && ops->switchdev_port_attr_set) { 229 err = ops->switchdev_port_attr_set(dev, attr, trans); 230 goto done; 231 } 232 233 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 234 goto done; 235 236 /* Switch device port(s) may be stacked under 237 * bond/team/vlan dev, so recurse down to set attr on 238 * each port. 239 */ 240 241 netdev_for_each_lower_dev(dev, lower_dev, iter) { 242 err = __switchdev_port_attr_set(lower_dev, attr, trans); 243 if (err) 244 break; 245 } 246 247 done: 248 if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) 249 err = 0; 250 251 return err; 252 } 253 254 static int switchdev_port_attr_set_now(struct net_device *dev, 255 const struct switchdev_attr *attr) 256 { 257 struct switchdev_trans trans; 258 int err; 259 260 switchdev_trans_init(&trans); 261 262 /* Phase I: prepare for attr set. Driver/device should fail 263 * here if there are going to be issues in the commit phase, 264 * such as lack of resources or support. The driver/device 265 * should reserve resources needed for the commit phase here, 266 * but should not commit the attr. 267 */ 268 269 trans.ph_prepare = true; 270 err = __switchdev_port_attr_set(dev, attr, &trans); 271 if (err) { 272 /* Prepare phase failed: abort the transaction. Any 273 * resources reserved in the prepare phase are 274 * released. 275 */ 276 277 if (err != -EOPNOTSUPP) 278 switchdev_trans_items_destroy(&trans); 279 280 return err; 281 } 282 283 /* Phase II: commit attr set. This cannot fail as a fault 284 * of driver/device. If it does, it's a bug in the driver/device 285 * because the driver said everythings was OK in phase I. 286 */ 287 288 trans.ph_prepare = false; 289 err = __switchdev_port_attr_set(dev, attr, &trans); 290 WARN(err, "%s: Commit of attribute (id=%d) failed.\n", 291 dev->name, attr->id); 292 switchdev_trans_items_warn_destroy(dev, &trans); 293 294 return err; 295 } 296 297 static void switchdev_port_attr_set_deferred(struct net_device *dev, 298 const void *data) 299 { 300 const struct switchdev_attr *attr = data; 301 int err; 302 303 err = switchdev_port_attr_set_now(dev, attr); 304 if (err && err != -EOPNOTSUPP) 305 netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n", 306 err, attr->id); 307 } 308 309 static int switchdev_port_attr_set_defer(struct net_device *dev, 310 const struct switchdev_attr *attr) 311 { 312 return switchdev_deferred_enqueue(dev, attr, sizeof(*attr), 313 switchdev_port_attr_set_deferred); 314 } 315 316 /** 317 * switchdev_port_attr_set - Set port attribute 318 * 319 * @dev: port device 320 * @attr: attribute to set 321 * 322 * Use a 2-phase prepare-commit transaction model to ensure 323 * system is not left in a partially updated state due to 324 * failure from driver/device. 325 * 326 * rtnl_lock must be held and must not be in atomic section, 327 * in case SWITCHDEV_F_DEFER flag is not set. 328 */ 329 int switchdev_port_attr_set(struct net_device *dev, 330 const struct switchdev_attr *attr) 331 { 332 if (attr->flags & SWITCHDEV_F_DEFER) 333 return switchdev_port_attr_set_defer(dev, attr); 334 ASSERT_RTNL(); 335 return switchdev_port_attr_set_now(dev, attr); 336 } 337 EXPORT_SYMBOL_GPL(switchdev_port_attr_set); 338 339 static size_t switchdev_obj_size(const struct switchdev_obj *obj) 340 { 341 switch (obj->id) { 342 case SWITCHDEV_OBJ_ID_PORT_VLAN: 343 return sizeof(struct switchdev_obj_port_vlan); 344 case SWITCHDEV_OBJ_ID_IPV4_FIB: 345 return sizeof(struct switchdev_obj_ipv4_fib); 346 case SWITCHDEV_OBJ_ID_PORT_FDB: 347 return sizeof(struct switchdev_obj_port_fdb); 348 default: 349 BUG(); 350 } 351 return 0; 352 } 353 354 static int __switchdev_port_obj_add(struct net_device *dev, 355 const struct switchdev_obj *obj, 356 struct switchdev_trans *trans) 357 { 358 const struct switchdev_ops *ops = dev->switchdev_ops; 359 struct net_device *lower_dev; 360 struct list_head *iter; 361 int err = -EOPNOTSUPP; 362 363 if (ops && ops->switchdev_port_obj_add) 364 return ops->switchdev_port_obj_add(dev, obj, trans); 365 366 /* Switch device port(s) may be stacked under 367 * bond/team/vlan dev, so recurse down to add object on 368 * each port. 369 */ 370 371 netdev_for_each_lower_dev(dev, lower_dev, iter) { 372 err = __switchdev_port_obj_add(lower_dev, obj, trans); 373 if (err) 374 break; 375 } 376 377 return err; 378 } 379 380 static int switchdev_port_obj_add_now(struct net_device *dev, 381 const struct switchdev_obj *obj) 382 { 383 struct switchdev_trans trans; 384 int err; 385 386 ASSERT_RTNL(); 387 388 switchdev_trans_init(&trans); 389 390 /* Phase I: prepare for obj add. Driver/device should fail 391 * here if there are going to be issues in the commit phase, 392 * such as lack of resources or support. The driver/device 393 * should reserve resources needed for the commit phase here, 394 * but should not commit the obj. 395 */ 396 397 trans.ph_prepare = true; 398 err = __switchdev_port_obj_add(dev, obj, &trans); 399 if (err) { 400 /* Prepare phase failed: abort the transaction. Any 401 * resources reserved in the prepare phase are 402 * released. 403 */ 404 405 if (err != -EOPNOTSUPP) 406 switchdev_trans_items_destroy(&trans); 407 408 return err; 409 } 410 411 /* Phase II: commit obj add. This cannot fail as a fault 412 * of driver/device. If it does, it's a bug in the driver/device 413 * because the driver said everythings was OK in phase I. 414 */ 415 416 trans.ph_prepare = false; 417 err = __switchdev_port_obj_add(dev, obj, &trans); 418 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); 419 switchdev_trans_items_warn_destroy(dev, &trans); 420 421 return err; 422 } 423 424 static void switchdev_port_obj_add_deferred(struct net_device *dev, 425 const void *data) 426 { 427 const struct switchdev_obj *obj = data; 428 int err; 429 430 err = switchdev_port_obj_add_now(dev, obj); 431 if (err && err != -EOPNOTSUPP) 432 netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", 433 err, obj->id); 434 } 435 436 static int switchdev_port_obj_add_defer(struct net_device *dev, 437 const struct switchdev_obj *obj) 438 { 439 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), 440 switchdev_port_obj_add_deferred); 441 } 442 443 /** 444 * switchdev_port_obj_add - Add port object 445 * 446 * @dev: port device 447 * @id: object ID 448 * @obj: object to add 449 * 450 * Use a 2-phase prepare-commit transaction model to ensure 451 * system is not left in a partially updated state due to 452 * failure from driver/device. 453 * 454 * rtnl_lock must be held and must not be in atomic section, 455 * in case SWITCHDEV_F_DEFER flag is not set. 456 */ 457 int switchdev_port_obj_add(struct net_device *dev, 458 const struct switchdev_obj *obj) 459 { 460 if (obj->flags & SWITCHDEV_F_DEFER) 461 return switchdev_port_obj_add_defer(dev, obj); 462 ASSERT_RTNL(); 463 return switchdev_port_obj_add_now(dev, obj); 464 } 465 EXPORT_SYMBOL_GPL(switchdev_port_obj_add); 466 467 static int switchdev_port_obj_del_now(struct net_device *dev, 468 const struct switchdev_obj *obj) 469 { 470 const struct switchdev_ops *ops = dev->switchdev_ops; 471 struct net_device *lower_dev; 472 struct list_head *iter; 473 int err = -EOPNOTSUPP; 474 475 if (ops && ops->switchdev_port_obj_del) 476 return ops->switchdev_port_obj_del(dev, obj); 477 478 /* Switch device port(s) may be stacked under 479 * bond/team/vlan dev, so recurse down to delete object on 480 * each port. 481 */ 482 483 netdev_for_each_lower_dev(dev, lower_dev, iter) { 484 err = switchdev_port_obj_del_now(lower_dev, obj); 485 if (err) 486 break; 487 } 488 489 return err; 490 } 491 492 static void switchdev_port_obj_del_deferred(struct net_device *dev, 493 const void *data) 494 { 495 const struct switchdev_obj *obj = data; 496 int err; 497 498 err = switchdev_port_obj_del_now(dev, obj); 499 if (err && err != -EOPNOTSUPP) 500 netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", 501 err, obj->id); 502 } 503 504 static int switchdev_port_obj_del_defer(struct net_device *dev, 505 const struct switchdev_obj *obj) 506 { 507 return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), 508 switchdev_port_obj_del_deferred); 509 } 510 511 /** 512 * switchdev_port_obj_del - Delete port object 513 * 514 * @dev: port device 515 * @id: object ID 516 * @obj: object to delete 517 * 518 * rtnl_lock must be held and must not be in atomic section, 519 * in case SWITCHDEV_F_DEFER flag is not set. 520 */ 521 int switchdev_port_obj_del(struct net_device *dev, 522 const struct switchdev_obj *obj) 523 { 524 if (obj->flags & SWITCHDEV_F_DEFER) 525 return switchdev_port_obj_del_defer(dev, obj); 526 ASSERT_RTNL(); 527 return switchdev_port_obj_del_now(dev, obj); 528 } 529 EXPORT_SYMBOL_GPL(switchdev_port_obj_del); 530 531 /** 532 * switchdev_port_obj_dump - Dump port objects 533 * 534 * @dev: port device 535 * @id: object ID 536 * @obj: object to dump 537 * @cb: function to call with a filled object 538 * 539 * rtnl_lock must be held. 540 */ 541 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, 542 switchdev_obj_dump_cb_t *cb) 543 { 544 const struct switchdev_ops *ops = dev->switchdev_ops; 545 struct net_device *lower_dev; 546 struct list_head *iter; 547 int err = -EOPNOTSUPP; 548 549 ASSERT_RTNL(); 550 551 if (ops && ops->switchdev_port_obj_dump) 552 return ops->switchdev_port_obj_dump(dev, obj, cb); 553 554 /* Switch device port(s) may be stacked under 555 * bond/team/vlan dev, so recurse down to dump objects on 556 * first port at bottom of stack. 557 */ 558 559 netdev_for_each_lower_dev(dev, lower_dev, iter) { 560 err = switchdev_port_obj_dump(lower_dev, obj, cb); 561 break; 562 } 563 564 return err; 565 } 566 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); 567 568 static DEFINE_MUTEX(switchdev_mutex); 569 static RAW_NOTIFIER_HEAD(switchdev_notif_chain); 570 571 /** 572 * register_switchdev_notifier - Register notifier 573 * @nb: notifier_block 574 * 575 * Register switch device notifier. This should be used by code 576 * which needs to monitor events happening in particular device. 577 * Return values are same as for atomic_notifier_chain_register(). 578 */ 579 int register_switchdev_notifier(struct notifier_block *nb) 580 { 581 int err; 582 583 mutex_lock(&switchdev_mutex); 584 err = raw_notifier_chain_register(&switchdev_notif_chain, nb); 585 mutex_unlock(&switchdev_mutex); 586 return err; 587 } 588 EXPORT_SYMBOL_GPL(register_switchdev_notifier); 589 590 /** 591 * unregister_switchdev_notifier - Unregister notifier 592 * @nb: notifier_block 593 * 594 * Unregister switch device notifier. 595 * Return values are same as for atomic_notifier_chain_unregister(). 596 */ 597 int unregister_switchdev_notifier(struct notifier_block *nb) 598 { 599 int err; 600 601 mutex_lock(&switchdev_mutex); 602 err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); 603 mutex_unlock(&switchdev_mutex); 604 return err; 605 } 606 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); 607 608 /** 609 * call_switchdev_notifiers - Call notifiers 610 * @val: value passed unmodified to notifier function 611 * @dev: port device 612 * @info: notifier information data 613 * 614 * Call all network notifier blocks. This should be called by driver 615 * when it needs to propagate hardware event. 616 * Return values are same as for atomic_notifier_call_chain(). 617 */ 618 int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 619 struct switchdev_notifier_info *info) 620 { 621 int err; 622 623 info->dev = dev; 624 mutex_lock(&switchdev_mutex); 625 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); 626 mutex_unlock(&switchdev_mutex); 627 return err; 628 } 629 EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 630 631 struct switchdev_vlan_dump { 632 struct switchdev_obj_port_vlan vlan; 633 struct sk_buff *skb; 634 u32 filter_mask; 635 u16 flags; 636 u16 begin; 637 u16 end; 638 }; 639 640 static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump) 641 { 642 struct bridge_vlan_info vinfo; 643 644 vinfo.flags = dump->flags; 645 646 if (dump->begin == 0 && dump->end == 0) { 647 return 0; 648 } else if (dump->begin == dump->end) { 649 vinfo.vid = dump->begin; 650 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 651 sizeof(vinfo), &vinfo)) 652 return -EMSGSIZE; 653 } else { 654 vinfo.vid = dump->begin; 655 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; 656 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 657 sizeof(vinfo), &vinfo)) 658 return -EMSGSIZE; 659 vinfo.vid = dump->end; 660 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; 661 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; 662 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 663 sizeof(vinfo), &vinfo)) 664 return -EMSGSIZE; 665 } 666 667 return 0; 668 } 669 670 static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj) 671 { 672 struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); 673 struct switchdev_vlan_dump *dump = 674 container_of(vlan, struct switchdev_vlan_dump, vlan); 675 int err = 0; 676 677 if (vlan->vid_begin > vlan->vid_end) 678 return -EINVAL; 679 680 if (dump->filter_mask & RTEXT_FILTER_BRVLAN) { 681 dump->flags = vlan->flags; 682 for (dump->begin = dump->end = vlan->vid_begin; 683 dump->begin <= vlan->vid_end; 684 dump->begin++, dump->end++) { 685 err = switchdev_port_vlan_dump_put(dump); 686 if (err) 687 return err; 688 } 689 } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) { 690 if (dump->begin > vlan->vid_begin && 691 dump->begin >= vlan->vid_end) { 692 if ((dump->begin - 1) == vlan->vid_end && 693 dump->flags == vlan->flags) { 694 /* prepend */ 695 dump->begin = vlan->vid_begin; 696 } else { 697 err = switchdev_port_vlan_dump_put(dump); 698 dump->flags = vlan->flags; 699 dump->begin = vlan->vid_begin; 700 dump->end = vlan->vid_end; 701 } 702 } else if (dump->end <= vlan->vid_begin && 703 dump->end < vlan->vid_end) { 704 if ((dump->end + 1) == vlan->vid_begin && 705 dump->flags == vlan->flags) { 706 /* append */ 707 dump->end = vlan->vid_end; 708 } else { 709 err = switchdev_port_vlan_dump_put(dump); 710 dump->flags = vlan->flags; 711 dump->begin = vlan->vid_begin; 712 dump->end = vlan->vid_end; 713 } 714 } else { 715 err = -EINVAL; 716 } 717 } 718 719 return err; 720 } 721 722 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, 723 u32 filter_mask) 724 { 725 struct switchdev_vlan_dump dump = { 726 .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, 727 .skb = skb, 728 .filter_mask = filter_mask, 729 }; 730 int err = 0; 731 732 if ((filter_mask & RTEXT_FILTER_BRVLAN) || 733 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { 734 err = switchdev_port_obj_dump(dev, &dump.vlan.obj, 735 switchdev_port_vlan_dump_cb); 736 if (err) 737 goto err_out; 738 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) 739 /* last one */ 740 err = switchdev_port_vlan_dump_put(&dump); 741 } 742 743 err_out: 744 return err == -EOPNOTSUPP ? 0 : err; 745 } 746 747 /** 748 * switchdev_port_bridge_getlink - Get bridge port attributes 749 * 750 * @dev: port device 751 * 752 * Called for SELF on rtnl_bridge_getlink to get bridge port 753 * attributes. 754 */ 755 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 756 struct net_device *dev, u32 filter_mask, 757 int nlflags) 758 { 759 struct switchdev_attr attr = { 760 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, 761 }; 762 u16 mode = BRIDGE_MODE_UNDEF; 763 u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD; 764 int err; 765 766 err = switchdev_port_attr_get(dev, &attr); 767 if (err && err != -EOPNOTSUPP) 768 return err; 769 770 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 771 attr.u.brport_flags, mask, nlflags, 772 filter_mask, switchdev_port_vlan_fill); 773 } 774 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); 775 776 static int switchdev_port_br_setflag(struct net_device *dev, 777 struct nlattr *nlattr, 778 unsigned long brport_flag) 779 { 780 struct switchdev_attr attr = { 781 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, 782 }; 783 u8 flag = nla_get_u8(nlattr); 784 int err; 785 786 err = switchdev_port_attr_get(dev, &attr); 787 if (err) 788 return err; 789 790 if (flag) 791 attr.u.brport_flags |= brport_flag; 792 else 793 attr.u.brport_flags &= ~brport_flag; 794 795 return switchdev_port_attr_set(dev, &attr); 796 } 797 798 static const struct nla_policy 799 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = { 800 [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, 801 [IFLA_BRPORT_COST] = { .type = NLA_U32 }, 802 [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, 803 [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, 804 [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, 805 [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, 806 [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 }, 807 [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, 808 [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 }, 809 [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, 810 }; 811 812 static int switchdev_port_br_setlink_protinfo(struct net_device *dev, 813 struct nlattr *protinfo) 814 { 815 struct nlattr *attr; 816 int rem; 817 int err; 818 819 err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, 820 switchdev_port_bridge_policy); 821 if (err) 822 return err; 823 824 nla_for_each_nested(attr, protinfo, rem) { 825 switch (nla_type(attr)) { 826 case IFLA_BRPORT_LEARNING: 827 err = switchdev_port_br_setflag(dev, attr, 828 BR_LEARNING); 829 break; 830 case IFLA_BRPORT_LEARNING_SYNC: 831 err = switchdev_port_br_setflag(dev, attr, 832 BR_LEARNING_SYNC); 833 break; 834 case IFLA_BRPORT_UNICAST_FLOOD: 835 err = switchdev_port_br_setflag(dev, attr, BR_FLOOD); 836 break; 837 default: 838 err = -EOPNOTSUPP; 839 break; 840 } 841 if (err) 842 return err; 843 } 844 845 return 0; 846 } 847 848 static int switchdev_port_br_afspec(struct net_device *dev, 849 struct nlattr *afspec, 850 int (*f)(struct net_device *dev, 851 const struct switchdev_obj *obj)) 852 { 853 struct nlattr *attr; 854 struct bridge_vlan_info *vinfo; 855 struct switchdev_obj_port_vlan vlan = { 856 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, 857 }; 858 int rem; 859 int err; 860 861 nla_for_each_nested(attr, afspec, rem) { 862 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) 863 continue; 864 if (nla_len(attr) != sizeof(struct bridge_vlan_info)) 865 return -EINVAL; 866 vinfo = nla_data(attr); 867 if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) 868 return -EINVAL; 869 vlan.flags = vinfo->flags; 870 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { 871 if (vlan.vid_begin) 872 return -EINVAL; 873 vlan.vid_begin = vinfo->vid; 874 /* don't allow range of pvids */ 875 if (vlan.flags & BRIDGE_VLAN_INFO_PVID) 876 return -EINVAL; 877 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { 878 if (!vlan.vid_begin) 879 return -EINVAL; 880 vlan.vid_end = vinfo->vid; 881 if (vlan.vid_end <= vlan.vid_begin) 882 return -EINVAL; 883 err = f(dev, &vlan.obj); 884 if (err) 885 return err; 886 vlan.vid_begin = 0; 887 } else { 888 if (vlan.vid_begin) 889 return -EINVAL; 890 vlan.vid_begin = vinfo->vid; 891 vlan.vid_end = vinfo->vid; 892 err = f(dev, &vlan.obj); 893 if (err) 894 return err; 895 vlan.vid_begin = 0; 896 } 897 } 898 899 return 0; 900 } 901 902 /** 903 * switchdev_port_bridge_setlink - Set bridge port attributes 904 * 905 * @dev: port device 906 * @nlh: netlink header 907 * @flags: netlink flags 908 * 909 * Called for SELF on rtnl_bridge_setlink to set bridge port 910 * attributes. 911 */ 912 int switchdev_port_bridge_setlink(struct net_device *dev, 913 struct nlmsghdr *nlh, u16 flags) 914 { 915 struct nlattr *protinfo; 916 struct nlattr *afspec; 917 int err = 0; 918 919 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 920 IFLA_PROTINFO); 921 if (protinfo) { 922 err = switchdev_port_br_setlink_protinfo(dev, protinfo); 923 if (err) 924 return err; 925 } 926 927 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 928 IFLA_AF_SPEC); 929 if (afspec) 930 err = switchdev_port_br_afspec(dev, afspec, 931 switchdev_port_obj_add); 932 933 return err; 934 } 935 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); 936 937 /** 938 * switchdev_port_bridge_dellink - Set bridge port attributes 939 * 940 * @dev: port device 941 * @nlh: netlink header 942 * @flags: netlink flags 943 * 944 * Called for SELF on rtnl_bridge_dellink to set bridge port 945 * attributes. 946 */ 947 int switchdev_port_bridge_dellink(struct net_device *dev, 948 struct nlmsghdr *nlh, u16 flags) 949 { 950 struct nlattr *afspec; 951 952 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 953 IFLA_AF_SPEC); 954 if (afspec) 955 return switchdev_port_br_afspec(dev, afspec, 956 switchdev_port_obj_del); 957 958 return 0; 959 } 960 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); 961 962 /** 963 * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port 964 * 965 * @ndmsg: netlink hdr 966 * @nlattr: netlink attributes 967 * @dev: port device 968 * @addr: MAC address to add 969 * @vid: VLAN to add 970 * 971 * Add FDB entry to switch device. 972 */ 973 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 974 struct net_device *dev, const unsigned char *addr, 975 u16 vid, u16 nlm_flags) 976 { 977 struct switchdev_obj_port_fdb fdb = { 978 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, 979 .vid = vid, 980 }; 981 982 ether_addr_copy(fdb.addr, addr); 983 return switchdev_port_obj_add(dev, &fdb.obj); 984 } 985 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); 986 987 /** 988 * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port 989 * 990 * @ndmsg: netlink hdr 991 * @nlattr: netlink attributes 992 * @dev: port device 993 * @addr: MAC address to delete 994 * @vid: VLAN to delete 995 * 996 * Delete FDB entry from switch device. 997 */ 998 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], 999 struct net_device *dev, const unsigned char *addr, 1000 u16 vid) 1001 { 1002 struct switchdev_obj_port_fdb fdb = { 1003 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, 1004 .vid = vid, 1005 }; 1006 1007 ether_addr_copy(fdb.addr, addr); 1008 return switchdev_port_obj_del(dev, &fdb.obj); 1009 } 1010 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); 1011 1012 struct switchdev_fdb_dump { 1013 struct switchdev_obj_port_fdb fdb; 1014 struct net_device *dev; 1015 struct sk_buff *skb; 1016 struct netlink_callback *cb; 1017 int idx; 1018 }; 1019 1020 static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj) 1021 { 1022 struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj); 1023 struct switchdev_fdb_dump *dump = 1024 container_of(fdb, struct switchdev_fdb_dump, fdb); 1025 u32 portid = NETLINK_CB(dump->cb->skb).portid; 1026 u32 seq = dump->cb->nlh->nlmsg_seq; 1027 struct nlmsghdr *nlh; 1028 struct ndmsg *ndm; 1029 1030 if (dump->idx < dump->cb->args[0]) 1031 goto skip; 1032 1033 nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, 1034 sizeof(*ndm), NLM_F_MULTI); 1035 if (!nlh) 1036 return -EMSGSIZE; 1037 1038 ndm = nlmsg_data(nlh); 1039 ndm->ndm_family = AF_BRIDGE; 1040 ndm->ndm_pad1 = 0; 1041 ndm->ndm_pad2 = 0; 1042 ndm->ndm_flags = NTF_SELF; 1043 ndm->ndm_type = 0; 1044 ndm->ndm_ifindex = dump->dev->ifindex; 1045 ndm->ndm_state = fdb->ndm_state; 1046 1047 if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr)) 1048 goto nla_put_failure; 1049 1050 if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid)) 1051 goto nla_put_failure; 1052 1053 nlmsg_end(dump->skb, nlh); 1054 1055 skip: 1056 dump->idx++; 1057 return 0; 1058 1059 nla_put_failure: 1060 nlmsg_cancel(dump->skb, nlh); 1061 return -EMSGSIZE; 1062 } 1063 1064 /** 1065 * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries 1066 * 1067 * @skb: netlink skb 1068 * @cb: netlink callback 1069 * @dev: port device 1070 * @filter_dev: filter device 1071 * @idx: 1072 * 1073 * Delete FDB entry from switch device. 1074 */ 1075 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, 1076 struct net_device *dev, 1077 struct net_device *filter_dev, int idx) 1078 { 1079 struct switchdev_fdb_dump dump = { 1080 .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, 1081 .dev = dev, 1082 .skb = skb, 1083 .cb = cb, 1084 .idx = idx, 1085 }; 1086 1087 switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb); 1088 return dump.idx; 1089 } 1090 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); 1091 1092 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) 1093 { 1094 const struct switchdev_ops *ops = dev->switchdev_ops; 1095 struct net_device *lower_dev; 1096 struct net_device *port_dev; 1097 struct list_head *iter; 1098 1099 /* Recusively search down until we find a sw port dev. 1100 * (A sw port dev supports switchdev_port_attr_get). 1101 */ 1102 1103 if (ops && ops->switchdev_port_attr_get) 1104 return dev; 1105 1106 netdev_for_each_lower_dev(dev, lower_dev, iter) { 1107 port_dev = switchdev_get_lowest_dev(lower_dev); 1108 if (port_dev) 1109 return port_dev; 1110 } 1111 1112 return NULL; 1113 } 1114 1115 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) 1116 { 1117 struct switchdev_attr attr = { 1118 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 1119 }; 1120 struct switchdev_attr prev_attr; 1121 struct net_device *dev = NULL; 1122 int nhsel; 1123 1124 ASSERT_RTNL(); 1125 1126 /* For this route, all nexthop devs must be on the same switch. */ 1127 1128 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 1129 const struct fib_nh *nh = &fi->fib_nh[nhsel]; 1130 1131 if (!nh->nh_dev) 1132 return NULL; 1133 1134 dev = switchdev_get_lowest_dev(nh->nh_dev); 1135 if (!dev) 1136 return NULL; 1137 1138 if (switchdev_port_attr_get(dev, &attr)) 1139 return NULL; 1140 1141 if (nhsel > 0 && 1142 !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) 1143 return NULL; 1144 1145 prev_attr = attr; 1146 } 1147 1148 return dev; 1149 } 1150 1151 /** 1152 * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry 1153 * 1154 * @dst: route's IPv4 destination address 1155 * @dst_len: destination address length (prefix length) 1156 * @fi: route FIB info structure 1157 * @tos: route TOS 1158 * @type: route type 1159 * @nlflags: netlink flags passed in (NLM_F_*) 1160 * @tb_id: route table ID 1161 * 1162 * Add/modify switch IPv4 route entry. 1163 */ 1164 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, 1165 u8 tos, u8 type, u32 nlflags, u32 tb_id) 1166 { 1167 struct switchdev_obj_ipv4_fib ipv4_fib = { 1168 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, 1169 .dst = dst, 1170 .dst_len = dst_len, 1171 .tos = tos, 1172 .type = type, 1173 .nlflags = nlflags, 1174 .tb_id = tb_id, 1175 }; 1176 struct net_device *dev; 1177 int err = 0; 1178 1179 memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); 1180 1181 /* Don't offload route if using custom ip rules or if 1182 * IPv4 FIB offloading has been disabled completely. 1183 */ 1184 1185 #ifdef CONFIG_IP_MULTIPLE_TABLES 1186 if (fi->fib_net->ipv4.fib_has_custom_rules) 1187 return 0; 1188 #endif 1189 1190 if (fi->fib_net->ipv4.fib_offload_disabled) 1191 return 0; 1192 1193 dev = switchdev_get_dev_by_nhs(fi); 1194 if (!dev) 1195 return 0; 1196 1197 err = switchdev_port_obj_add(dev, &ipv4_fib.obj); 1198 if (!err) 1199 fi->fib_flags |= RTNH_F_OFFLOAD; 1200 1201 return err == -EOPNOTSUPP ? 0 : err; 1202 } 1203 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); 1204 1205 /** 1206 * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch 1207 * 1208 * @dst: route's IPv4 destination address 1209 * @dst_len: destination address length (prefix length) 1210 * @fi: route FIB info structure 1211 * @tos: route TOS 1212 * @type: route type 1213 * @tb_id: route table ID 1214 * 1215 * Delete IPv4 route entry from switch device. 1216 */ 1217 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, 1218 u8 tos, u8 type, u32 tb_id) 1219 { 1220 struct switchdev_obj_ipv4_fib ipv4_fib = { 1221 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, 1222 .dst = dst, 1223 .dst_len = dst_len, 1224 .tos = tos, 1225 .type = type, 1226 .nlflags = 0, 1227 .tb_id = tb_id, 1228 }; 1229 struct net_device *dev; 1230 int err = 0; 1231 1232 memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); 1233 1234 if (!(fi->fib_flags & RTNH_F_OFFLOAD)) 1235 return 0; 1236 1237 dev = switchdev_get_dev_by_nhs(fi); 1238 if (!dev) 1239 return 0; 1240 1241 err = switchdev_port_obj_del(dev, &ipv4_fib.obj); 1242 if (!err) 1243 fi->fib_flags &= ~RTNH_F_OFFLOAD; 1244 1245 return err == -EOPNOTSUPP ? 0 : err; 1246 } 1247 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); 1248 1249 /** 1250 * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation 1251 * 1252 * @fi: route FIB info structure 1253 */ 1254 void switchdev_fib_ipv4_abort(struct fib_info *fi) 1255 { 1256 /* There was a problem installing this route to the offload 1257 * device. For now, until we come up with more refined 1258 * policy handling, abruptly end IPv4 fib offloading for 1259 * for entire net by flushing offload device(s) of all 1260 * IPv4 routes, and mark IPv4 fib offloading broken from 1261 * this point forward. 1262 */ 1263 1264 fib_flush_external(fi->fib_net); 1265 fi->fib_net->ipv4.fib_offload_disabled = true; 1266 } 1267 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); 1268 1269 static bool switchdev_port_same_parent_id(struct net_device *a, 1270 struct net_device *b) 1271 { 1272 struct switchdev_attr a_attr = { 1273 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 1274 .flags = SWITCHDEV_F_NO_RECURSE, 1275 }; 1276 struct switchdev_attr b_attr = { 1277 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 1278 .flags = SWITCHDEV_F_NO_RECURSE, 1279 }; 1280 1281 if (switchdev_port_attr_get(a, &a_attr) || 1282 switchdev_port_attr_get(b, &b_attr)) 1283 return false; 1284 1285 return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); 1286 } 1287 1288 static u32 switchdev_port_fwd_mark_get(struct net_device *dev, 1289 struct net_device *group_dev) 1290 { 1291 struct net_device *lower_dev; 1292 struct list_head *iter; 1293 1294 netdev_for_each_lower_dev(group_dev, lower_dev, iter) { 1295 if (lower_dev == dev) 1296 continue; 1297 if (switchdev_port_same_parent_id(dev, lower_dev)) 1298 return lower_dev->offload_fwd_mark; 1299 return switchdev_port_fwd_mark_get(dev, lower_dev); 1300 } 1301 1302 return dev->ifindex; 1303 } 1304 1305 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, 1306 u32 old_mark, u32 *reset_mark) 1307 { 1308 struct net_device *lower_dev; 1309 struct list_head *iter; 1310 1311 netdev_for_each_lower_dev(group_dev, lower_dev, iter) { 1312 if (lower_dev->offload_fwd_mark == old_mark) { 1313 if (!*reset_mark) 1314 *reset_mark = lower_dev->ifindex; 1315 lower_dev->offload_fwd_mark = *reset_mark; 1316 } 1317 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); 1318 } 1319 } 1320 1321 /** 1322 * switchdev_port_fwd_mark_set - Set port offload forwarding mark 1323 * 1324 * @dev: port device 1325 * @group_dev: containing device 1326 * @joining: true if dev is joining group; false if leaving group 1327 * 1328 * An ungrouped port's offload mark is just its ifindex. A grouped 1329 * port's (member of a bridge, for example) offload mark is the ifindex 1330 * of one of the ports in the group with the same parent (switch) ID. 1331 * Ports on the same device in the same group will have the same mark. 1332 * 1333 * Example: 1334 * 1335 * br0 ifindex=9 1336 * sw1p1 ifindex=2 mark=2 1337 * sw1p2 ifindex=3 mark=2 1338 * sw2p1 ifindex=4 mark=5 1339 * sw2p2 ifindex=5 mark=5 1340 * 1341 * If sw2p2 leaves the bridge, we'll have: 1342 * 1343 * br0 ifindex=9 1344 * sw1p1 ifindex=2 mark=2 1345 * sw1p2 ifindex=3 mark=2 1346 * sw2p1 ifindex=4 mark=4 1347 * sw2p2 ifindex=5 mark=5 1348 */ 1349 void switchdev_port_fwd_mark_set(struct net_device *dev, 1350 struct net_device *group_dev, 1351 bool joining) 1352 { 1353 u32 mark = dev->ifindex; 1354 u32 reset_mark = 0; 1355 1356 if (group_dev) { 1357 ASSERT_RTNL(); 1358 if (joining) 1359 mark = switchdev_port_fwd_mark_get(dev, group_dev); 1360 else if (dev->offload_fwd_mark == mark) 1361 /* Ohoh, this port was the mark reference port, 1362 * but it's leaving the group, so reset the 1363 * mark for the remaining ports in the group. 1364 */ 1365 switchdev_port_fwd_mark_reset(group_dev, mark, 1366 &reset_mark); 1367 } 1368 1369 dev->offload_fwd_mark = mark; 1370 } 1371 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); 1372