1 /* 2 * net/switchdev/switchdev.c - Switch device API 3 * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> 4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/types.h> 14 #include <linux/init.h> 15 #include <linux/mutex.h> 16 #include <linux/notifier.h> 17 #include <linux/netdevice.h> 18 #include <linux/if_bridge.h> 19 #include <net/ip_fib.h> 20 #include <net/switchdev.h> 21 22 /** 23 * switchdev_port_attr_get - Get port attribute 24 * 25 * @dev: port device 26 * @attr: attribute to get 27 */ 28 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) 29 { 30 const struct switchdev_ops *ops = dev->switchdev_ops; 31 struct net_device *lower_dev; 32 struct list_head *iter; 33 struct switchdev_attr first = { 34 .id = SWITCHDEV_ATTR_UNDEFINED 35 }; 36 int err = -EOPNOTSUPP; 37 38 if (ops && ops->switchdev_port_attr_get) 39 return ops->switchdev_port_attr_get(dev, attr); 40 41 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 42 return err; 43 44 /* Switch device port(s) may be stacked under 45 * bond/team/vlan dev, so recurse down to get attr on 46 * each port. Return -ENODATA if attr values don't 47 * compare across ports. 48 */ 49 50 netdev_for_each_lower_dev(dev, lower_dev, iter) { 51 err = switchdev_port_attr_get(lower_dev, attr); 52 if (err) 53 break; 54 if (first.id == SWITCHDEV_ATTR_UNDEFINED) 55 first = *attr; 56 else if (memcmp(&first, attr, sizeof(*attr))) 57 return -ENODATA; 58 } 59 60 return err; 61 } 62 EXPORT_SYMBOL_GPL(switchdev_port_attr_get); 63 64 static int __switchdev_port_attr_set(struct net_device *dev, 65 struct switchdev_attr *attr) 66 { 67 const struct switchdev_ops *ops = dev->switchdev_ops; 68 struct net_device *lower_dev; 69 struct list_head *iter; 70 int err = -EOPNOTSUPP; 71 72 if (ops && ops->switchdev_port_attr_set) 73 return ops->switchdev_port_attr_set(dev, attr); 74 75 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 76 return err; 77 78 /* Switch device port(s) may be stacked under 79 * bond/team/vlan dev, so recurse down to set attr on 80 * each port. 81 */ 82 83 netdev_for_each_lower_dev(dev, lower_dev, iter) { 84 err = __switchdev_port_attr_set(lower_dev, attr); 85 if (err) 86 break; 87 } 88 89 return err; 90 } 91 92 struct switchdev_attr_set_work { 93 struct work_struct work; 94 struct net_device *dev; 95 struct switchdev_attr attr; 96 }; 97 98 static void switchdev_port_attr_set_work(struct work_struct *work) 99 { 100 struct switchdev_attr_set_work *asw = 101 container_of(work, struct switchdev_attr_set_work, work); 102 int err; 103 104 rtnl_lock(); 105 err = switchdev_port_attr_set(asw->dev, &asw->attr); 106 if (err && err != -EOPNOTSUPP) 107 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n", 108 err, asw->attr.id); 109 rtnl_unlock(); 110 111 dev_put(asw->dev); 112 kfree(work); 113 } 114 115 static int switchdev_port_attr_set_defer(struct net_device *dev, 116 struct switchdev_attr *attr) 117 { 118 struct switchdev_attr_set_work *asw; 119 120 asw = kmalloc(sizeof(*asw), GFP_ATOMIC); 121 if (!asw) 122 return -ENOMEM; 123 124 INIT_WORK(&asw->work, switchdev_port_attr_set_work); 125 126 dev_hold(dev); 127 asw->dev = dev; 128 memcpy(&asw->attr, attr, sizeof(asw->attr)); 129 130 schedule_work(&asw->work); 131 132 return 0; 133 } 134 135 /** 136 * switchdev_port_attr_set - Set port attribute 137 * 138 * @dev: port device 139 * @attr: attribute to set 140 * 141 * Use a 2-phase prepare-commit transaction model to ensure 142 * system is not left in a partially updated state due to 143 * failure from driver/device. 144 */ 145 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) 146 { 147 int err; 148 149 if (!rtnl_is_locked()) { 150 /* Running prepare-commit transaction across stacked 151 * devices requires nothing moves, so if rtnl_lock is 152 * not held, schedule a worker thread to hold rtnl_lock 153 * while setting attr. 154 */ 155 156 return switchdev_port_attr_set_defer(dev, attr); 157 } 158 159 /* Phase I: prepare for attr set. Driver/device should fail 160 * here if there are going to be issues in the commit phase, 161 * such as lack of resources or support. The driver/device 162 * should reserve resources needed for the commit phase here, 163 * but should not commit the attr. 164 */ 165 166 attr->trans = SWITCHDEV_TRANS_PREPARE; 167 err = __switchdev_port_attr_set(dev, attr); 168 if (err) { 169 /* Prepare phase failed: abort the transaction. Any 170 * resources reserved in the prepare phase are 171 * released. 172 */ 173 174 if (err != -EOPNOTSUPP) { 175 attr->trans = SWITCHDEV_TRANS_ABORT; 176 __switchdev_port_attr_set(dev, attr); 177 } 178 179 return err; 180 } 181 182 /* Phase II: commit attr set. This cannot fail as a fault 183 * of driver/device. If it does, it's a bug in the driver/device 184 * because the driver said everythings was OK in phase I. 185 */ 186 187 attr->trans = SWITCHDEV_TRANS_COMMIT; 188 err = __switchdev_port_attr_set(dev, attr); 189 WARN(err, "%s: Commit of attribute (id=%d) failed.\n", 190 dev->name, attr->id); 191 192 return err; 193 } 194 EXPORT_SYMBOL_GPL(switchdev_port_attr_set); 195 196 static int __switchdev_port_obj_add(struct net_device *dev, 197 struct switchdev_obj *obj) 198 { 199 const struct switchdev_ops *ops = dev->switchdev_ops; 200 struct net_device *lower_dev; 201 struct list_head *iter; 202 int err = -EOPNOTSUPP; 203 204 if (ops && ops->switchdev_port_obj_add) 205 return ops->switchdev_port_obj_add(dev, obj); 206 207 /* Switch device port(s) may be stacked under 208 * bond/team/vlan dev, so recurse down to add object on 209 * each port. 210 */ 211 212 netdev_for_each_lower_dev(dev, lower_dev, iter) { 213 err = __switchdev_port_obj_add(lower_dev, obj); 214 if (err) 215 break; 216 } 217 218 return err; 219 } 220 221 /** 222 * switchdev_port_obj_add - Add port object 223 * 224 * @dev: port device 225 * @obj: object to add 226 * 227 * Use a 2-phase prepare-commit transaction model to ensure 228 * system is not left in a partially updated state due to 229 * failure from driver/device. 230 * 231 * rtnl_lock must be held. 232 */ 233 int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) 234 { 235 int err; 236 237 ASSERT_RTNL(); 238 239 /* Phase I: prepare for obj add. Driver/device should fail 240 * here if there are going to be issues in the commit phase, 241 * such as lack of resources or support. The driver/device 242 * should reserve resources needed for the commit phase here, 243 * but should not commit the obj. 244 */ 245 246 obj->trans = SWITCHDEV_TRANS_PREPARE; 247 err = __switchdev_port_obj_add(dev, obj); 248 if (err) { 249 /* Prepare phase failed: abort the transaction. Any 250 * resources reserved in the prepare phase are 251 * released. 252 */ 253 254 if (err != -EOPNOTSUPP) { 255 obj->trans = SWITCHDEV_TRANS_ABORT; 256 __switchdev_port_obj_add(dev, obj); 257 } 258 259 return err; 260 } 261 262 /* Phase II: commit obj add. This cannot fail as a fault 263 * of driver/device. If it does, it's a bug in the driver/device 264 * because the driver said everythings was OK in phase I. 265 */ 266 267 obj->trans = SWITCHDEV_TRANS_COMMIT; 268 err = __switchdev_port_obj_add(dev, obj); 269 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); 270 271 return err; 272 } 273 EXPORT_SYMBOL_GPL(switchdev_port_obj_add); 274 275 /** 276 * switchdev_port_obj_del - Delete port object 277 * 278 * @dev: port device 279 * @obj: object to delete 280 */ 281 int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) 282 { 283 const struct switchdev_ops *ops = dev->switchdev_ops; 284 struct net_device *lower_dev; 285 struct list_head *iter; 286 int err = -EOPNOTSUPP; 287 288 if (ops && ops->switchdev_port_obj_del) 289 return ops->switchdev_port_obj_del(dev, obj); 290 291 /* Switch device port(s) may be stacked under 292 * bond/team/vlan dev, so recurse down to delete object on 293 * each port. 294 */ 295 296 netdev_for_each_lower_dev(dev, lower_dev, iter) { 297 err = switchdev_port_obj_del(lower_dev, obj); 298 if (err) 299 break; 300 } 301 302 return err; 303 } 304 EXPORT_SYMBOL_GPL(switchdev_port_obj_del); 305 306 /** 307 * switchdev_port_obj_dump - Dump port objects 308 * 309 * @dev: port device 310 * @obj: object to dump 311 */ 312 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) 313 { 314 const struct switchdev_ops *ops = dev->switchdev_ops; 315 struct net_device *lower_dev; 316 struct list_head *iter; 317 int err = -EOPNOTSUPP; 318 319 if (ops && ops->switchdev_port_obj_dump) 320 return ops->switchdev_port_obj_dump(dev, obj); 321 322 /* Switch device port(s) may be stacked under 323 * bond/team/vlan dev, so recurse down to dump objects on 324 * first port at bottom of stack. 325 */ 326 327 netdev_for_each_lower_dev(dev, lower_dev, iter) { 328 err = switchdev_port_obj_dump(lower_dev, obj); 329 break; 330 } 331 332 return err; 333 } 334 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); 335 336 static DEFINE_MUTEX(switchdev_mutex); 337 static RAW_NOTIFIER_HEAD(switchdev_notif_chain); 338 339 /** 340 * register_switchdev_notifier - Register notifier 341 * @nb: notifier_block 342 * 343 * Register switch device notifier. This should be used by code 344 * which needs to monitor events happening in particular device. 345 * Return values are same as for atomic_notifier_chain_register(). 346 */ 347 int register_switchdev_notifier(struct notifier_block *nb) 348 { 349 int err; 350 351 mutex_lock(&switchdev_mutex); 352 err = raw_notifier_chain_register(&switchdev_notif_chain, nb); 353 mutex_unlock(&switchdev_mutex); 354 return err; 355 } 356 EXPORT_SYMBOL_GPL(register_switchdev_notifier); 357 358 /** 359 * unregister_switchdev_notifier - Unregister notifier 360 * @nb: notifier_block 361 * 362 * Unregister switch device notifier. 363 * Return values are same as for atomic_notifier_chain_unregister(). 364 */ 365 int unregister_switchdev_notifier(struct notifier_block *nb) 366 { 367 int err; 368 369 mutex_lock(&switchdev_mutex); 370 err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); 371 mutex_unlock(&switchdev_mutex); 372 return err; 373 } 374 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); 375 376 /** 377 * call_switchdev_notifiers - Call notifiers 378 * @val: value passed unmodified to notifier function 379 * @dev: port device 380 * @info: notifier information data 381 * 382 * Call all network notifier blocks. This should be called by driver 383 * when it needs to propagate hardware event. 384 * Return values are same as for atomic_notifier_call_chain(). 385 */ 386 int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 387 struct switchdev_notifier_info *info) 388 { 389 int err; 390 391 info->dev = dev; 392 mutex_lock(&switchdev_mutex); 393 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); 394 mutex_unlock(&switchdev_mutex); 395 return err; 396 } 397 EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 398 399 struct switchdev_vlan_dump { 400 struct switchdev_obj obj; 401 struct sk_buff *skb; 402 u32 filter_mask; 403 u16 flags; 404 u16 begin; 405 u16 end; 406 }; 407 408 static int switchdev_port_vlan_dump_put(struct net_device *dev, 409 struct switchdev_vlan_dump *dump) 410 { 411 struct bridge_vlan_info vinfo; 412 413 vinfo.flags = dump->flags; 414 415 if (dump->begin == 0 && dump->end == 0) { 416 return 0; 417 } else if (dump->begin == dump->end) { 418 vinfo.vid = dump->begin; 419 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 420 sizeof(vinfo), &vinfo)) 421 return -EMSGSIZE; 422 } else { 423 vinfo.vid = dump->begin; 424 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; 425 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 426 sizeof(vinfo), &vinfo)) 427 return -EMSGSIZE; 428 vinfo.vid = dump->end; 429 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; 430 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; 431 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 432 sizeof(vinfo), &vinfo)) 433 return -EMSGSIZE; 434 } 435 436 return 0; 437 } 438 439 static int switchdev_port_vlan_dump_cb(struct net_device *dev, 440 struct switchdev_obj *obj) 441 { 442 struct switchdev_vlan_dump *dump = 443 container_of(obj, struct switchdev_vlan_dump, obj); 444 struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan; 445 int err = 0; 446 447 if (vlan->vid_begin > vlan->vid_end) 448 return -EINVAL; 449 450 if (dump->filter_mask & RTEXT_FILTER_BRVLAN) { 451 dump->flags = vlan->flags; 452 for (dump->begin = dump->end = vlan->vid_begin; 453 dump->begin <= vlan->vid_end; 454 dump->begin++, dump->end++) { 455 err = switchdev_port_vlan_dump_put(dev, dump); 456 if (err) 457 return err; 458 } 459 } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) { 460 if (dump->begin > vlan->vid_begin && 461 dump->begin >= vlan->vid_end) { 462 if ((dump->begin - 1) == vlan->vid_end && 463 dump->flags == vlan->flags) { 464 /* prepend */ 465 dump->begin = vlan->vid_begin; 466 } else { 467 err = switchdev_port_vlan_dump_put(dev, dump); 468 dump->flags = vlan->flags; 469 dump->begin = vlan->vid_begin; 470 dump->end = vlan->vid_end; 471 } 472 } else if (dump->end <= vlan->vid_begin && 473 dump->end < vlan->vid_end) { 474 if ((dump->end + 1) == vlan->vid_begin && 475 dump->flags == vlan->flags) { 476 /* append */ 477 dump->end = vlan->vid_end; 478 } else { 479 err = switchdev_port_vlan_dump_put(dev, dump); 480 dump->flags = vlan->flags; 481 dump->begin = vlan->vid_begin; 482 dump->end = vlan->vid_end; 483 } 484 } else { 485 err = -EINVAL; 486 } 487 } 488 489 return err; 490 } 491 492 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, 493 u32 filter_mask) 494 { 495 struct switchdev_vlan_dump dump = { 496 .obj = { 497 .id = SWITCHDEV_OBJ_PORT_VLAN, 498 .cb = switchdev_port_vlan_dump_cb, 499 }, 500 .skb = skb, 501 .filter_mask = filter_mask, 502 }; 503 int err = 0; 504 505 if ((filter_mask & RTEXT_FILTER_BRVLAN) || 506 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { 507 err = switchdev_port_obj_dump(dev, &dump.obj); 508 if (err) 509 goto err_out; 510 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) 511 /* last one */ 512 err = switchdev_port_vlan_dump_put(dev, &dump); 513 } 514 515 err_out: 516 return err == -EOPNOTSUPP ? 0 : err; 517 } 518 519 /** 520 * switchdev_port_bridge_getlink - Get bridge port attributes 521 * 522 * @dev: port device 523 * 524 * Called for SELF on rtnl_bridge_getlink to get bridge port 525 * attributes. 526 */ 527 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 528 struct net_device *dev, u32 filter_mask, 529 int nlflags) 530 { 531 struct switchdev_attr attr = { 532 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, 533 }; 534 u16 mode = BRIDGE_MODE_UNDEF; 535 u32 mask = BR_LEARNING | BR_LEARNING_SYNC; 536 int err; 537 538 err = switchdev_port_attr_get(dev, &attr); 539 if (err && err != -EOPNOTSUPP) 540 return err; 541 542 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 543 attr.u.brport_flags, mask, nlflags, 544 filter_mask, switchdev_port_vlan_fill); 545 } 546 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); 547 548 static int switchdev_port_br_setflag(struct net_device *dev, 549 struct nlattr *nlattr, 550 unsigned long brport_flag) 551 { 552 struct switchdev_attr attr = { 553 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, 554 }; 555 u8 flag = nla_get_u8(nlattr); 556 int err; 557 558 err = switchdev_port_attr_get(dev, &attr); 559 if (err) 560 return err; 561 562 if (flag) 563 attr.u.brport_flags |= brport_flag; 564 else 565 attr.u.brport_flags &= ~brport_flag; 566 567 return switchdev_port_attr_set(dev, &attr); 568 } 569 570 static const struct nla_policy 571 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = { 572 [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, 573 [IFLA_BRPORT_COST] = { .type = NLA_U32 }, 574 [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, 575 [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, 576 [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, 577 [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, 578 [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 }, 579 [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, 580 [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 }, 581 [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, 582 }; 583 584 static int switchdev_port_br_setlink_protinfo(struct net_device *dev, 585 struct nlattr *protinfo) 586 { 587 struct nlattr *attr; 588 int rem; 589 int err; 590 591 err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, 592 switchdev_port_bridge_policy); 593 if (err) 594 return err; 595 596 nla_for_each_nested(attr, protinfo, rem) { 597 switch (nla_type(attr)) { 598 case IFLA_BRPORT_LEARNING: 599 err = switchdev_port_br_setflag(dev, attr, 600 BR_LEARNING); 601 break; 602 case IFLA_BRPORT_LEARNING_SYNC: 603 err = switchdev_port_br_setflag(dev, attr, 604 BR_LEARNING_SYNC); 605 break; 606 default: 607 err = -EOPNOTSUPP; 608 break; 609 } 610 if (err) 611 return err; 612 } 613 614 return 0; 615 } 616 617 static int switchdev_port_br_afspec(struct net_device *dev, 618 struct nlattr *afspec, 619 int (*f)(struct net_device *dev, 620 struct switchdev_obj *obj)) 621 { 622 struct nlattr *attr; 623 struct bridge_vlan_info *vinfo; 624 struct switchdev_obj obj = { 625 .id = SWITCHDEV_OBJ_PORT_VLAN, 626 }; 627 struct switchdev_obj_vlan *vlan = &obj.u.vlan; 628 int rem; 629 int err; 630 631 nla_for_each_nested(attr, afspec, rem) { 632 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) 633 continue; 634 if (nla_len(attr) != sizeof(struct bridge_vlan_info)) 635 return -EINVAL; 636 vinfo = nla_data(attr); 637 vlan->flags = vinfo->flags; 638 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { 639 if (vlan->vid_begin) 640 return -EINVAL; 641 vlan->vid_begin = vinfo->vid; 642 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { 643 if (!vlan->vid_begin) 644 return -EINVAL; 645 vlan->vid_end = vinfo->vid; 646 if (vlan->vid_end <= vlan->vid_begin) 647 return -EINVAL; 648 err = f(dev, &obj); 649 if (err) 650 return err; 651 memset(vlan, 0, sizeof(*vlan)); 652 } else { 653 if (vlan->vid_begin) 654 return -EINVAL; 655 vlan->vid_begin = vinfo->vid; 656 vlan->vid_end = vinfo->vid; 657 err = f(dev, &obj); 658 if (err) 659 return err; 660 memset(vlan, 0, sizeof(*vlan)); 661 } 662 } 663 664 return 0; 665 } 666 667 /** 668 * switchdev_port_bridge_setlink - Set bridge port attributes 669 * 670 * @dev: port device 671 * @nlh: netlink header 672 * @flags: netlink flags 673 * 674 * Called for SELF on rtnl_bridge_setlink to set bridge port 675 * attributes. 676 */ 677 int switchdev_port_bridge_setlink(struct net_device *dev, 678 struct nlmsghdr *nlh, u16 flags) 679 { 680 struct nlattr *protinfo; 681 struct nlattr *afspec; 682 int err = 0; 683 684 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 685 IFLA_PROTINFO); 686 if (protinfo) { 687 err = switchdev_port_br_setlink_protinfo(dev, protinfo); 688 if (err) 689 return err; 690 } 691 692 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 693 IFLA_AF_SPEC); 694 if (afspec) 695 err = switchdev_port_br_afspec(dev, afspec, 696 switchdev_port_obj_add); 697 698 return err; 699 } 700 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); 701 702 /** 703 * switchdev_port_bridge_dellink - Set bridge port attributes 704 * 705 * @dev: port device 706 * @nlh: netlink header 707 * @flags: netlink flags 708 * 709 * Called for SELF on rtnl_bridge_dellink to set bridge port 710 * attributes. 711 */ 712 int switchdev_port_bridge_dellink(struct net_device *dev, 713 struct nlmsghdr *nlh, u16 flags) 714 { 715 struct nlattr *afspec; 716 717 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 718 IFLA_AF_SPEC); 719 if (afspec) 720 return switchdev_port_br_afspec(dev, afspec, 721 switchdev_port_obj_del); 722 723 return 0; 724 } 725 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); 726 727 /** 728 * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port 729 * 730 * @ndmsg: netlink hdr 731 * @nlattr: netlink attributes 732 * @dev: port device 733 * @addr: MAC address to add 734 * @vid: VLAN to add 735 * 736 * Add FDB entry to switch device. 737 */ 738 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 739 struct net_device *dev, const unsigned char *addr, 740 u16 vid, u16 nlm_flags) 741 { 742 struct switchdev_obj obj = { 743 .id = SWITCHDEV_OBJ_PORT_FDB, 744 .u.fdb = { 745 .addr = addr, 746 .vid = vid, 747 }, 748 }; 749 750 return switchdev_port_obj_add(dev, &obj); 751 } 752 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); 753 754 /** 755 * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port 756 * 757 * @ndmsg: netlink hdr 758 * @nlattr: netlink attributes 759 * @dev: port device 760 * @addr: MAC address to delete 761 * @vid: VLAN to delete 762 * 763 * Delete FDB entry from switch device. 764 */ 765 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], 766 struct net_device *dev, const unsigned char *addr, 767 u16 vid) 768 { 769 struct switchdev_obj obj = { 770 .id = SWITCHDEV_OBJ_PORT_FDB, 771 .u.fdb = { 772 .addr = addr, 773 .vid = vid, 774 }, 775 }; 776 777 return switchdev_port_obj_del(dev, &obj); 778 } 779 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); 780 781 struct switchdev_fdb_dump { 782 struct switchdev_obj obj; 783 struct sk_buff *skb; 784 struct netlink_callback *cb; 785 int idx; 786 }; 787 788 static int switchdev_port_fdb_dump_cb(struct net_device *dev, 789 struct switchdev_obj *obj) 790 { 791 struct switchdev_fdb_dump *dump = 792 container_of(obj, struct switchdev_fdb_dump, obj); 793 u32 portid = NETLINK_CB(dump->cb->skb).portid; 794 u32 seq = dump->cb->nlh->nlmsg_seq; 795 struct nlmsghdr *nlh; 796 struct ndmsg *ndm; 797 798 if (dump->idx < dump->cb->args[0]) 799 goto skip; 800 801 nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, 802 sizeof(*ndm), NLM_F_MULTI); 803 if (!nlh) 804 return -EMSGSIZE; 805 806 ndm = nlmsg_data(nlh); 807 ndm->ndm_family = AF_BRIDGE; 808 ndm->ndm_pad1 = 0; 809 ndm->ndm_pad2 = 0; 810 ndm->ndm_flags = NTF_SELF; 811 ndm->ndm_type = 0; 812 ndm->ndm_ifindex = dev->ifindex; 813 ndm->ndm_state = obj->u.fdb.ndm_state; 814 815 if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) 816 goto nla_put_failure; 817 818 if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid)) 819 goto nla_put_failure; 820 821 nlmsg_end(dump->skb, nlh); 822 823 skip: 824 dump->idx++; 825 return 0; 826 827 nla_put_failure: 828 nlmsg_cancel(dump->skb, nlh); 829 return -EMSGSIZE; 830 } 831 832 /** 833 * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries 834 * 835 * @skb: netlink skb 836 * @cb: netlink callback 837 * @dev: port device 838 * @filter_dev: filter device 839 * @idx: 840 * 841 * Delete FDB entry from switch device. 842 */ 843 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, 844 struct net_device *dev, 845 struct net_device *filter_dev, int idx) 846 { 847 struct switchdev_fdb_dump dump = { 848 .obj = { 849 .id = SWITCHDEV_OBJ_PORT_FDB, 850 .cb = switchdev_port_fdb_dump_cb, 851 }, 852 .skb = skb, 853 .cb = cb, 854 .idx = idx, 855 }; 856 857 switchdev_port_obj_dump(dev, &dump.obj); 858 return dump.idx; 859 } 860 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); 861 862 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) 863 { 864 const struct switchdev_ops *ops = dev->switchdev_ops; 865 struct net_device *lower_dev; 866 struct net_device *port_dev; 867 struct list_head *iter; 868 869 /* Recusively search down until we find a sw port dev. 870 * (A sw port dev supports switchdev_port_attr_get). 871 */ 872 873 if (ops && ops->switchdev_port_attr_get) 874 return dev; 875 876 netdev_for_each_lower_dev(dev, lower_dev, iter) { 877 port_dev = switchdev_get_lowest_dev(lower_dev); 878 if (port_dev) 879 return port_dev; 880 } 881 882 return NULL; 883 } 884 885 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) 886 { 887 struct switchdev_attr attr = { 888 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 889 }; 890 struct switchdev_attr prev_attr; 891 struct net_device *dev = NULL; 892 int nhsel; 893 894 /* For this route, all nexthop devs must be on the same switch. */ 895 896 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 897 const struct fib_nh *nh = &fi->fib_nh[nhsel]; 898 899 if (!nh->nh_dev) 900 return NULL; 901 902 dev = switchdev_get_lowest_dev(nh->nh_dev); 903 if (!dev) 904 return NULL; 905 906 if (switchdev_port_attr_get(dev, &attr)) 907 return NULL; 908 909 if (nhsel > 0 && 910 !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) 911 return NULL; 912 913 prev_attr = attr; 914 } 915 916 return dev; 917 } 918 919 /** 920 * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry 921 * 922 * @dst: route's IPv4 destination address 923 * @dst_len: destination address length (prefix length) 924 * @fi: route FIB info structure 925 * @tos: route TOS 926 * @type: route type 927 * @nlflags: netlink flags passed in (NLM_F_*) 928 * @tb_id: route table ID 929 * 930 * Add/modify switch IPv4 route entry. 931 */ 932 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, 933 u8 tos, u8 type, u32 nlflags, u32 tb_id) 934 { 935 struct switchdev_obj fib_obj = { 936 .id = SWITCHDEV_OBJ_IPV4_FIB, 937 .u.ipv4_fib = { 938 .dst = dst, 939 .dst_len = dst_len, 940 .fi = fi, 941 .tos = tos, 942 .type = type, 943 .nlflags = nlflags, 944 .tb_id = tb_id, 945 }, 946 }; 947 struct net_device *dev; 948 int err = 0; 949 950 /* Don't offload route if using custom ip rules or if 951 * IPv4 FIB offloading has been disabled completely. 952 */ 953 954 #ifdef CONFIG_IP_MULTIPLE_TABLES 955 if (fi->fib_net->ipv4.fib_has_custom_rules) 956 return 0; 957 #endif 958 959 if (fi->fib_net->ipv4.fib_offload_disabled) 960 return 0; 961 962 dev = switchdev_get_dev_by_nhs(fi); 963 if (!dev) 964 return 0; 965 966 err = switchdev_port_obj_add(dev, &fib_obj); 967 if (!err) 968 fi->fib_flags |= RTNH_F_OFFLOAD; 969 970 return err == -EOPNOTSUPP ? 0 : err; 971 } 972 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); 973 974 /** 975 * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch 976 * 977 * @dst: route's IPv4 destination address 978 * @dst_len: destination address length (prefix length) 979 * @fi: route FIB info structure 980 * @tos: route TOS 981 * @type: route type 982 * @tb_id: route table ID 983 * 984 * Delete IPv4 route entry from switch device. 985 */ 986 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, 987 u8 tos, u8 type, u32 tb_id) 988 { 989 struct switchdev_obj fib_obj = { 990 .id = SWITCHDEV_OBJ_IPV4_FIB, 991 .u.ipv4_fib = { 992 .dst = dst, 993 .dst_len = dst_len, 994 .fi = fi, 995 .tos = tos, 996 .type = type, 997 .nlflags = 0, 998 .tb_id = tb_id, 999 }, 1000 }; 1001 struct net_device *dev; 1002 int err = 0; 1003 1004 if (!(fi->fib_flags & RTNH_F_OFFLOAD)) 1005 return 0; 1006 1007 dev = switchdev_get_dev_by_nhs(fi); 1008 if (!dev) 1009 return 0; 1010 1011 err = switchdev_port_obj_del(dev, &fib_obj); 1012 if (!err) 1013 fi->fib_flags &= ~RTNH_F_OFFLOAD; 1014 1015 return err == -EOPNOTSUPP ? 0 : err; 1016 } 1017 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); 1018 1019 /** 1020 * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation 1021 * 1022 * @fi: route FIB info structure 1023 */ 1024 void switchdev_fib_ipv4_abort(struct fib_info *fi) 1025 { 1026 /* There was a problem installing this route to the offload 1027 * device. For now, until we come up with more refined 1028 * policy handling, abruptly end IPv4 fib offloading for 1029 * for entire net by flushing offload device(s) of all 1030 * IPv4 routes, and mark IPv4 fib offloading broken from 1031 * this point forward. 1032 */ 1033 1034 fib_flush_external(fi->fib_net); 1035 fi->fib_net->ipv4.fib_offload_disabled = true; 1036 } 1037 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); 1038 1039 static bool switchdev_port_same_parent_id(struct net_device *a, 1040 struct net_device *b) 1041 { 1042 struct switchdev_attr a_attr = { 1043 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 1044 .flags = SWITCHDEV_F_NO_RECURSE, 1045 }; 1046 struct switchdev_attr b_attr = { 1047 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 1048 .flags = SWITCHDEV_F_NO_RECURSE, 1049 }; 1050 1051 if (switchdev_port_attr_get(a, &a_attr) || 1052 switchdev_port_attr_get(b, &b_attr)) 1053 return false; 1054 1055 return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); 1056 } 1057 1058 static u32 switchdev_port_fwd_mark_get(struct net_device *dev, 1059 struct net_device *group_dev) 1060 { 1061 struct net_device *lower_dev; 1062 struct list_head *iter; 1063 1064 netdev_for_each_lower_dev(group_dev, lower_dev, iter) { 1065 if (lower_dev == dev) 1066 continue; 1067 if (switchdev_port_same_parent_id(dev, lower_dev)) 1068 return lower_dev->offload_fwd_mark; 1069 return switchdev_port_fwd_mark_get(dev, lower_dev); 1070 } 1071 1072 return dev->ifindex; 1073 } 1074 1075 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, 1076 u32 old_mark, u32 *reset_mark) 1077 { 1078 struct net_device *lower_dev; 1079 struct list_head *iter; 1080 1081 netdev_for_each_lower_dev(group_dev, lower_dev, iter) { 1082 if (lower_dev->offload_fwd_mark == old_mark) { 1083 if (!*reset_mark) 1084 *reset_mark = lower_dev->ifindex; 1085 lower_dev->offload_fwd_mark = *reset_mark; 1086 } 1087 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); 1088 } 1089 } 1090 1091 /** 1092 * switchdev_port_fwd_mark_set - Set port offload forwarding mark 1093 * 1094 * @dev: port device 1095 * @group_dev: containing device 1096 * @joining: true if dev is joining group; false if leaving group 1097 * 1098 * An ungrouped port's offload mark is just its ifindex. A grouped 1099 * port's (member of a bridge, for example) offload mark is the ifindex 1100 * of one of the ports in the group with the same parent (switch) ID. 1101 * Ports on the same device in the same group will have the same mark. 1102 * 1103 * Example: 1104 * 1105 * br0 ifindex=9 1106 * sw1p1 ifindex=2 mark=2 1107 * sw1p2 ifindex=3 mark=2 1108 * sw2p1 ifindex=4 mark=5 1109 * sw2p2 ifindex=5 mark=5 1110 * 1111 * If sw2p2 leaves the bridge, we'll have: 1112 * 1113 * br0 ifindex=9 1114 * sw1p1 ifindex=2 mark=2 1115 * sw1p2 ifindex=3 mark=2 1116 * sw2p1 ifindex=4 mark=4 1117 * sw2p2 ifindex=5 mark=5 1118 */ 1119 void switchdev_port_fwd_mark_set(struct net_device *dev, 1120 struct net_device *group_dev, 1121 bool joining) 1122 { 1123 u32 mark = dev->ifindex; 1124 u32 reset_mark = 0; 1125 1126 if (group_dev && joining) { 1127 mark = switchdev_port_fwd_mark_get(dev, group_dev); 1128 } else if (group_dev && !joining) { 1129 if (dev->offload_fwd_mark == mark) 1130 /* Ohoh, this port was the mark reference port, 1131 * but it's leaving the group, so reset the 1132 * mark for the remaining ports in the group. 1133 */ 1134 switchdev_port_fwd_mark_reset(group_dev, mark, 1135 &reset_mark); 1136 } 1137 1138 dev->offload_fwd_mark = mark; 1139 } 1140 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); 1141