1 /* 2 * originally based on the dummy device. 3 * 4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov. 5 * Licensed under the GPL. Based on dummy.c, and eql.c devices. 6 * 7 * bonding.c: an Ethernet Bonding driver 8 * 9 * This is useful to talk to a Cisco EtherChannel compatible equipment: 10 * Cisco 5500 11 * Sun Trunking (Solaris) 12 * Alteon AceDirector Trunks 13 * Linux Bonding 14 * and probably many L2 switches ... 15 * 16 * How it works: 17 * ifconfig bond0 ipaddress netmask up 18 * will setup a network device, with an ip address. No mac address 19 * will be assigned at this time. The hw mac address will come from 20 * the first slave bonded to the channel. All slaves will then use 21 * this hw mac address. 22 * 23 * ifconfig bond0 down 24 * will release all slaves, marking them as down. 25 * 26 * ifenslave bond0 eth0 27 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either 28 * a: be used as initial mac address 29 * b: if a hw mac address already is there, eth0's hw mac address 30 * will then be set from bond0. 31 * 32 */ 33 34 //#define BONDING_DEBUG 1 35 36 #include <linux/kernel.h> 37 #include <linux/module.h> 38 #include <linux/types.h> 39 #include <linux/fcntl.h> 40 #include <linux/interrupt.h> 41 #include <linux/ptrace.h> 42 #include <linux/ioport.h> 43 #include <linux/in.h> 44 #include <net/ip.h> 45 #include <linux/ip.h> 46 #include <linux/tcp.h> 47 #include <linux/udp.h> 48 #include <linux/slab.h> 49 #include <linux/string.h> 50 #include <linux/init.h> 51 #include <linux/timer.h> 52 #include <linux/socket.h> 53 #include <linux/ctype.h> 54 #include <linux/inet.h> 55 #include <linux/bitops.h> 56 #include <asm/system.h> 57 #include <asm/io.h> 58 #include <asm/dma.h> 59 #include <asm/uaccess.h> 60 #include <linux/errno.h> 61 #include <linux/netdevice.h> 62 #include <linux/inetdevice.h> 63 #include <linux/igmp.h> 64 #include <linux/etherdevice.h> 65 #include <linux/skbuff.h> 66 #include <net/sock.h> 67 #include <linux/rtnetlink.h> 68 #include <linux/proc_fs.h> 69 #include <linux/seq_file.h> 70 #include <linux/smp.h> 71 #include <linux/if_ether.h> 72 #include <net/arp.h> 73 #include <linux/mii.h> 74 #include <linux/ethtool.h> 75 #include <linux/if_vlan.h> 76 #include <linux/if_bonding.h> 77 #include <linux/jiffies.h> 78 #include <net/route.h> 79 #include <net/net_namespace.h> 80 #include "bonding.h" 81 #include "bond_3ad.h" 82 #include "bond_alb.h" 83 84 /*---------------------------- Module parameters ----------------------------*/ 85 86 /* monitor all links that often (in milliseconds). <=0 disables monitoring */ 87 #define BOND_LINK_MON_INTERV 0 88 #define BOND_LINK_ARP_INTERV 0 89 90 static int max_bonds = BOND_DEFAULT_MAX_BONDS; 91 static int miimon = BOND_LINK_MON_INTERV; 92 static int updelay = 0; 93 static int downdelay = 0; 94 static int use_carrier = 1; 95 static char *mode = NULL; 96 static char *primary = NULL; 97 static char *lacp_rate = NULL; 98 static char *xmit_hash_policy = NULL; 99 static int arp_interval = BOND_LINK_ARP_INTERV; 100 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; 101 static char *arp_validate = NULL; 102 static int fail_over_mac = 0; 103 struct bond_params bonding_defaults; 104 105 module_param(max_bonds, int, 0); 106 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 107 module_param(miimon, int, 0); 108 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 109 module_param(updelay, int, 0); 110 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); 111 module_param(downdelay, int, 0); 112 MODULE_PARM_DESC(downdelay, "Delay before considering link down, " 113 "in milliseconds"); 114 module_param(use_carrier, int, 0); 115 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " 116 "0 for off, 1 for on (default)"); 117 module_param(mode, charp, 0); 118 MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " 119 "1 for active-backup, 2 for balance-xor, " 120 "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " 121 "6 for balance-alb"); 122 module_param(primary, charp, 0); 123 MODULE_PARM_DESC(primary, "Primary network device to use"); 124 module_param(lacp_rate, charp, 0); 125 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " 126 "(slow/fast)"); 127 module_param(xmit_hash_policy, charp, 0); 128 MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)" 129 ", 1 for layer 3+4"); 130 module_param(arp_interval, int, 0); 131 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 132 module_param_array(arp_ip_target, charp, NULL, 0); 133 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 134 module_param(arp_validate, charp, 0); 135 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); 136 module_param(fail_over_mac, int, 0); 137 MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. 0 of off (default), 1 for on."); 138 139 /*----------------------------- Global variables ----------------------------*/ 140 141 static const char * const version = 142 DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; 143 144 LIST_HEAD(bond_dev_list); 145 146 #ifdef CONFIG_PROC_FS 147 static struct proc_dir_entry *bond_proc_dir = NULL; 148 #endif 149 150 extern struct rw_semaphore bonding_rwsem; 151 static __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ; 152 static int arp_ip_count = 0; 153 static int bond_mode = BOND_MODE_ROUNDROBIN; 154 static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2; 155 static int lacp_fast = 0; 156 157 158 struct bond_parm_tbl bond_lacp_tbl[] = { 159 { "slow", AD_LACP_SLOW}, 160 { "fast", AD_LACP_FAST}, 161 { NULL, -1}, 162 }; 163 164 struct bond_parm_tbl bond_mode_tbl[] = { 165 { "balance-rr", BOND_MODE_ROUNDROBIN}, 166 { "active-backup", BOND_MODE_ACTIVEBACKUP}, 167 { "balance-xor", BOND_MODE_XOR}, 168 { "broadcast", BOND_MODE_BROADCAST}, 169 { "802.3ad", BOND_MODE_8023AD}, 170 { "balance-tlb", BOND_MODE_TLB}, 171 { "balance-alb", BOND_MODE_ALB}, 172 { NULL, -1}, 173 }; 174 175 struct bond_parm_tbl xmit_hashtype_tbl[] = { 176 { "layer2", BOND_XMIT_POLICY_LAYER2}, 177 { "layer3+4", BOND_XMIT_POLICY_LAYER34}, 178 { "layer2+3", BOND_XMIT_POLICY_LAYER23}, 179 { NULL, -1}, 180 }; 181 182 struct bond_parm_tbl arp_validate_tbl[] = { 183 { "none", BOND_ARP_VALIDATE_NONE}, 184 { "active", BOND_ARP_VALIDATE_ACTIVE}, 185 { "backup", BOND_ARP_VALIDATE_BACKUP}, 186 { "all", BOND_ARP_VALIDATE_ALL}, 187 { NULL, -1}, 188 }; 189 190 /*-------------------------- Forward declarations ---------------------------*/ 191 192 static void bond_send_gratuitous_arp(struct bonding *bond); 193 static void bond_deinit(struct net_device *bond_dev); 194 195 /*---------------------------- General routines -----------------------------*/ 196 197 static const char *bond_mode_name(int mode) 198 { 199 switch (mode) { 200 case BOND_MODE_ROUNDROBIN : 201 return "load balancing (round-robin)"; 202 case BOND_MODE_ACTIVEBACKUP : 203 return "fault-tolerance (active-backup)"; 204 case BOND_MODE_XOR : 205 return "load balancing (xor)"; 206 case BOND_MODE_BROADCAST : 207 return "fault-tolerance (broadcast)"; 208 case BOND_MODE_8023AD: 209 return "IEEE 802.3ad Dynamic link aggregation"; 210 case BOND_MODE_TLB: 211 return "transmit load balancing"; 212 case BOND_MODE_ALB: 213 return "adaptive load balancing"; 214 default: 215 return "unknown"; 216 } 217 } 218 219 /*---------------------------------- VLAN -----------------------------------*/ 220 221 /** 222 * bond_add_vlan - add a new vlan id on bond 223 * @bond: bond that got the notification 224 * @vlan_id: the vlan id to add 225 * 226 * Returns -ENOMEM if allocation failed. 227 */ 228 static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) 229 { 230 struct vlan_entry *vlan; 231 232 dprintk("bond: %s, vlan id %d\n", 233 (bond ? bond->dev->name: "None"), vlan_id); 234 235 vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL); 236 if (!vlan) { 237 return -ENOMEM; 238 } 239 240 INIT_LIST_HEAD(&vlan->vlan_list); 241 vlan->vlan_id = vlan_id; 242 vlan->vlan_ip = 0; 243 244 write_lock_bh(&bond->lock); 245 246 list_add_tail(&vlan->vlan_list, &bond->vlan_list); 247 248 write_unlock_bh(&bond->lock); 249 250 dprintk("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); 251 252 return 0; 253 } 254 255 /** 256 * bond_del_vlan - delete a vlan id from bond 257 * @bond: bond that got the notification 258 * @vlan_id: the vlan id to delete 259 * 260 * returns -ENODEV if @vlan_id was not found in @bond. 261 */ 262 static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) 263 { 264 struct vlan_entry *vlan, *next; 265 int res = -ENODEV; 266 267 dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); 268 269 write_lock_bh(&bond->lock); 270 271 list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) { 272 if (vlan->vlan_id == vlan_id) { 273 list_del(&vlan->vlan_list); 274 275 if ((bond->params.mode == BOND_MODE_TLB) || 276 (bond->params.mode == BOND_MODE_ALB)) { 277 bond_alb_clear_vlan(bond, vlan_id); 278 } 279 280 dprintk("removed VLAN ID %d from bond %s\n", vlan_id, 281 bond->dev->name); 282 283 kfree(vlan); 284 285 if (list_empty(&bond->vlan_list) && 286 (bond->slave_cnt == 0)) { 287 /* Last VLAN removed and no slaves, so 288 * restore block on adding VLANs. This will 289 * be removed once new slaves that are not 290 * VLAN challenged will be added. 291 */ 292 bond->dev->features |= NETIF_F_VLAN_CHALLENGED; 293 } 294 295 res = 0; 296 goto out; 297 } 298 } 299 300 dprintk("couldn't find VLAN ID %d in bond %s\n", vlan_id, 301 bond->dev->name); 302 303 out: 304 write_unlock_bh(&bond->lock); 305 return res; 306 } 307 308 /** 309 * bond_has_challenged_slaves 310 * @bond: the bond we're working on 311 * 312 * Searches the slave list. Returns 1 if a vlan challenged slave 313 * was found, 0 otherwise. 314 * 315 * Assumes bond->lock is held. 316 */ 317 static int bond_has_challenged_slaves(struct bonding *bond) 318 { 319 struct slave *slave; 320 int i; 321 322 bond_for_each_slave(bond, slave, i) { 323 if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { 324 dprintk("found VLAN challenged slave - %s\n", 325 slave->dev->name); 326 return 1; 327 } 328 } 329 330 dprintk("no VLAN challenged slaves found\n"); 331 return 0; 332 } 333 334 /** 335 * bond_next_vlan - safely skip to the next item in the vlans list. 336 * @bond: the bond we're working on 337 * @curr: item we're advancing from 338 * 339 * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, 340 * or @curr->next otherwise (even if it is @curr itself again). 341 * 342 * Caller must hold bond->lock 343 */ 344 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) 345 { 346 struct vlan_entry *next, *last; 347 348 if (list_empty(&bond->vlan_list)) { 349 return NULL; 350 } 351 352 if (!curr) { 353 next = list_entry(bond->vlan_list.next, 354 struct vlan_entry, vlan_list); 355 } else { 356 last = list_entry(bond->vlan_list.prev, 357 struct vlan_entry, vlan_list); 358 if (last == curr) { 359 next = list_entry(bond->vlan_list.next, 360 struct vlan_entry, vlan_list); 361 } else { 362 next = list_entry(curr->vlan_list.next, 363 struct vlan_entry, vlan_list); 364 } 365 } 366 367 return next; 368 } 369 370 /** 371 * bond_dev_queue_xmit - Prepare skb for xmit. 372 * 373 * @bond: bond device that got this skb for tx. 374 * @skb: hw accel VLAN tagged skb to transmit 375 * @slave_dev: slave that is supposed to xmit this skbuff 376 * 377 * When the bond gets an skb to transmit that is 378 * already hardware accelerated VLAN tagged, and it 379 * needs to relay this skb to a slave that is not 380 * hw accel capable, the skb needs to be "unaccelerated", 381 * i.e. strip the hwaccel tag and re-insert it as part 382 * of the payload. 383 */ 384 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev) 385 { 386 unsigned short uninitialized_var(vlan_id); 387 388 if (!list_empty(&bond->vlan_list) && 389 !(slave_dev->features & NETIF_F_HW_VLAN_TX) && 390 vlan_get_tag(skb, &vlan_id) == 0) { 391 skb->dev = slave_dev; 392 skb = vlan_put_tag(skb, vlan_id); 393 if (!skb) { 394 /* vlan_put_tag() frees the skb in case of error, 395 * so return success here so the calling functions 396 * won't attempt to free is again. 397 */ 398 return 0; 399 } 400 } else { 401 skb->dev = slave_dev; 402 } 403 404 skb->priority = 1; 405 dev_queue_xmit(skb); 406 407 return 0; 408 } 409 410 /* 411 * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid 412 * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a 413 * lock because: 414 * a. This operation is performed in IOCTL context, 415 * b. The operation is protected by the RTNL semaphore in the 8021q code, 416 * c. Holding a lock with BH disabled while directly calling a base driver 417 * entry point is generally a BAD idea. 418 * 419 * The design of synchronization/protection for this operation in the 8021q 420 * module is good for one or more VLAN devices over a single physical device 421 * and cannot be extended for a teaming solution like bonding, so there is a 422 * potential race condition here where a net device from the vlan group might 423 * be referenced (either by a base driver or the 8021q code) while it is being 424 * removed from the system. However, it turns out we're not making matters 425 * worse, and if it works for regular VLAN usage it will work here too. 426 */ 427 428 /** 429 * bond_vlan_rx_register - Propagates registration to slaves 430 * @bond_dev: bonding net device that got called 431 * @grp: vlan group being registered 432 */ 433 static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp) 434 { 435 struct bonding *bond = bond_dev->priv; 436 struct slave *slave; 437 int i; 438 439 bond->vlgrp = grp; 440 441 bond_for_each_slave(bond, slave, i) { 442 struct net_device *slave_dev = slave->dev; 443 444 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 445 slave_dev->vlan_rx_register) { 446 slave_dev->vlan_rx_register(slave_dev, grp); 447 } 448 } 449 } 450 451 /** 452 * bond_vlan_rx_add_vid - Propagates adding an id to slaves 453 * @bond_dev: bonding net device that got called 454 * @vid: vlan id being added 455 */ 456 static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) 457 { 458 struct bonding *bond = bond_dev->priv; 459 struct slave *slave; 460 int i, res; 461 462 bond_for_each_slave(bond, slave, i) { 463 struct net_device *slave_dev = slave->dev; 464 465 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 466 slave_dev->vlan_rx_add_vid) { 467 slave_dev->vlan_rx_add_vid(slave_dev, vid); 468 } 469 } 470 471 res = bond_add_vlan(bond, vid); 472 if (res) { 473 printk(KERN_ERR DRV_NAME 474 ": %s: Error: Failed to add vlan id %d\n", 475 bond_dev->name, vid); 476 } 477 } 478 479 /** 480 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves 481 * @bond_dev: bonding net device that got called 482 * @vid: vlan id being removed 483 */ 484 static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) 485 { 486 struct bonding *bond = bond_dev->priv; 487 struct slave *slave; 488 struct net_device *vlan_dev; 489 int i, res; 490 491 bond_for_each_slave(bond, slave, i) { 492 struct net_device *slave_dev = slave->dev; 493 494 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 495 slave_dev->vlan_rx_kill_vid) { 496 /* Save and then restore vlan_dev in the grp array, 497 * since the slave's driver might clear it. 498 */ 499 vlan_dev = vlan_group_get_device(bond->vlgrp, vid); 500 slave_dev->vlan_rx_kill_vid(slave_dev, vid); 501 vlan_group_set_device(bond->vlgrp, vid, vlan_dev); 502 } 503 } 504 505 res = bond_del_vlan(bond, vid); 506 if (res) { 507 printk(KERN_ERR DRV_NAME 508 ": %s: Error: Failed to remove vlan id %d\n", 509 bond_dev->name, vid); 510 } 511 } 512 513 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev) 514 { 515 struct vlan_entry *vlan; 516 517 write_lock_bh(&bond->lock); 518 519 if (list_empty(&bond->vlan_list)) { 520 goto out; 521 } 522 523 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 524 slave_dev->vlan_rx_register) { 525 slave_dev->vlan_rx_register(slave_dev, bond->vlgrp); 526 } 527 528 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 529 !(slave_dev->vlan_rx_add_vid)) { 530 goto out; 531 } 532 533 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 534 slave_dev->vlan_rx_add_vid(slave_dev, vlan->vlan_id); 535 } 536 537 out: 538 write_unlock_bh(&bond->lock); 539 } 540 541 static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev) 542 { 543 struct vlan_entry *vlan; 544 struct net_device *vlan_dev; 545 546 write_lock_bh(&bond->lock); 547 548 if (list_empty(&bond->vlan_list)) { 549 goto out; 550 } 551 552 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 553 !(slave_dev->vlan_rx_kill_vid)) { 554 goto unreg; 555 } 556 557 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 558 /* Save and then restore vlan_dev in the grp array, 559 * since the slave's driver might clear it. 560 */ 561 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 562 slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id); 563 vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev); 564 } 565 566 unreg: 567 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 568 slave_dev->vlan_rx_register) { 569 slave_dev->vlan_rx_register(slave_dev, NULL); 570 } 571 572 out: 573 write_unlock_bh(&bond->lock); 574 } 575 576 /*------------------------------- Link status -------------------------------*/ 577 578 /* 579 * Set the carrier state for the master according to the state of its 580 * slaves. If any slaves are up, the master is up. In 802.3ad mode, 581 * do special 802.3ad magic. 582 * 583 * Returns zero if carrier state does not change, nonzero if it does. 584 */ 585 static int bond_set_carrier(struct bonding *bond) 586 { 587 struct slave *slave; 588 int i; 589 590 if (bond->slave_cnt == 0) 591 goto down; 592 593 if (bond->params.mode == BOND_MODE_8023AD) 594 return bond_3ad_set_carrier(bond); 595 596 bond_for_each_slave(bond, slave, i) { 597 if (slave->link == BOND_LINK_UP) { 598 if (!netif_carrier_ok(bond->dev)) { 599 netif_carrier_on(bond->dev); 600 return 1; 601 } 602 return 0; 603 } 604 } 605 606 down: 607 if (netif_carrier_ok(bond->dev)) { 608 netif_carrier_off(bond->dev); 609 return 1; 610 } 611 return 0; 612 } 613 614 /* 615 * Get link speed and duplex from the slave's base driver 616 * using ethtool. If for some reason the call fails or the 617 * values are invalid, fake speed and duplex to 100/Full 618 * and return error. 619 */ 620 static int bond_update_speed_duplex(struct slave *slave) 621 { 622 struct net_device *slave_dev = slave->dev; 623 struct ethtool_cmd etool; 624 int res; 625 626 /* Fake speed and duplex */ 627 slave->speed = SPEED_100; 628 slave->duplex = DUPLEX_FULL; 629 630 if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings) 631 return -1; 632 633 res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); 634 if (res < 0) 635 return -1; 636 637 switch (etool.speed) { 638 case SPEED_10: 639 case SPEED_100: 640 case SPEED_1000: 641 case SPEED_10000: 642 break; 643 default: 644 return -1; 645 } 646 647 switch (etool.duplex) { 648 case DUPLEX_FULL: 649 case DUPLEX_HALF: 650 break; 651 default: 652 return -1; 653 } 654 655 slave->speed = etool.speed; 656 slave->duplex = etool.duplex; 657 658 return 0; 659 } 660 661 /* 662 * if <dev> supports MII link status reporting, check its link status. 663 * 664 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 665 * depening upon the setting of the use_carrier parameter. 666 * 667 * Return either BMSR_LSTATUS, meaning that the link is up (or we 668 * can't tell and just pretend it is), or 0, meaning that the link is 669 * down. 670 * 671 * If reporting is non-zero, instead of faking link up, return -1 if 672 * both ETHTOOL and MII ioctls fail (meaning the device does not 673 * support them). If use_carrier is set, return whatever it says. 674 * It'd be nice if there was a good way to tell if a driver supports 675 * netif_carrier, but there really isn't. 676 */ 677 static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) 678 { 679 static int (* ioctl)(struct net_device *, struct ifreq *, int); 680 struct ifreq ifr; 681 struct mii_ioctl_data *mii; 682 683 if (bond->params.use_carrier) { 684 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; 685 } 686 687 ioctl = slave_dev->do_ioctl; 688 if (ioctl) { 689 /* TODO: set pointer to correct ioctl on a per team member */ 690 /* bases to make this more efficient. that is, once */ 691 /* we determine the correct ioctl, we will always */ 692 /* call it and not the others for that team */ 693 /* member. */ 694 695 /* 696 * We cannot assume that SIOCGMIIPHY will also read a 697 * register; not all network drivers (e.g., e100) 698 * support that. 699 */ 700 701 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ 702 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 703 mii = if_mii(&ifr); 704 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { 705 mii->reg_num = MII_BMSR; 706 if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) { 707 return (mii->val_out & BMSR_LSTATUS); 708 } 709 } 710 } 711 712 /* 713 * Some drivers cache ETHTOOL_GLINK for a period of time so we only 714 * attempt to get link status from it if the above MII ioctls fail. 715 */ 716 if (slave_dev->ethtool_ops) { 717 if (slave_dev->ethtool_ops->get_link) { 718 u32 link; 719 720 link = slave_dev->ethtool_ops->get_link(slave_dev); 721 722 return link ? BMSR_LSTATUS : 0; 723 } 724 } 725 726 /* 727 * If reporting, report that either there's no dev->do_ioctl, 728 * or both SIOCGMIIREG and get_link failed (meaning that we 729 * cannot report link status). If not reporting, pretend 730 * we're ok. 731 */ 732 return (reporting ? -1 : BMSR_LSTATUS); 733 } 734 735 /*----------------------------- Multicast list ------------------------------*/ 736 737 /* 738 * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise 739 */ 740 static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) 741 { 742 return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && 743 dmi1->dmi_addrlen == dmi2->dmi_addrlen; 744 } 745 746 /* 747 * returns dmi entry if found, NULL otherwise 748 */ 749 static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) 750 { 751 struct dev_mc_list *idmi; 752 753 for (idmi = mc_list; idmi; idmi = idmi->next) { 754 if (bond_is_dmi_same(dmi, idmi)) { 755 return idmi; 756 } 757 } 758 759 return NULL; 760 } 761 762 /* 763 * Push the promiscuity flag down to appropriate slaves 764 */ 765 static void bond_set_promiscuity(struct bonding *bond, int inc) 766 { 767 if (USES_PRIMARY(bond->params.mode)) { 768 /* write lock already acquired */ 769 if (bond->curr_active_slave) { 770 dev_set_promiscuity(bond->curr_active_slave->dev, inc); 771 } 772 } else { 773 struct slave *slave; 774 int i; 775 bond_for_each_slave(bond, slave, i) { 776 dev_set_promiscuity(slave->dev, inc); 777 } 778 } 779 } 780 781 /* 782 * Push the allmulti flag down to all slaves 783 */ 784 static void bond_set_allmulti(struct bonding *bond, int inc) 785 { 786 if (USES_PRIMARY(bond->params.mode)) { 787 /* write lock already acquired */ 788 if (bond->curr_active_slave) { 789 dev_set_allmulti(bond->curr_active_slave->dev, inc); 790 } 791 } else { 792 struct slave *slave; 793 int i; 794 bond_for_each_slave(bond, slave, i) { 795 dev_set_allmulti(slave->dev, inc); 796 } 797 } 798 } 799 800 /* 801 * Add a Multicast address to slaves 802 * according to mode 803 */ 804 static void bond_mc_add(struct bonding *bond, void *addr, int alen) 805 { 806 if (USES_PRIMARY(bond->params.mode)) { 807 /* write lock already acquired */ 808 if (bond->curr_active_slave) { 809 dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0); 810 } 811 } else { 812 struct slave *slave; 813 int i; 814 bond_for_each_slave(bond, slave, i) { 815 dev_mc_add(slave->dev, addr, alen, 0); 816 } 817 } 818 } 819 820 /* 821 * Remove a multicast address from slave 822 * according to mode 823 */ 824 static void bond_mc_delete(struct bonding *bond, void *addr, int alen) 825 { 826 if (USES_PRIMARY(bond->params.mode)) { 827 /* write lock already acquired */ 828 if (bond->curr_active_slave) { 829 dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0); 830 } 831 } else { 832 struct slave *slave; 833 int i; 834 bond_for_each_slave(bond, slave, i) { 835 dev_mc_delete(slave->dev, addr, alen, 0); 836 } 837 } 838 } 839 840 841 /* 842 * Retrieve the list of registered multicast addresses for the bonding 843 * device and retransmit an IGMP JOIN request to the current active 844 * slave. 845 */ 846 static void bond_resend_igmp_join_requests(struct bonding *bond) 847 { 848 struct in_device *in_dev; 849 struct ip_mc_list *im; 850 851 rcu_read_lock(); 852 in_dev = __in_dev_get_rcu(bond->dev); 853 if (in_dev) { 854 for (im = in_dev->mc_list; im; im = im->next) { 855 ip_mc_rejoin_group(im); 856 } 857 } 858 859 rcu_read_unlock(); 860 } 861 862 /* 863 * Totally destroys the mc_list in bond 864 */ 865 static void bond_mc_list_destroy(struct bonding *bond) 866 { 867 struct dev_mc_list *dmi; 868 869 dmi = bond->mc_list; 870 while (dmi) { 871 bond->mc_list = dmi->next; 872 kfree(dmi); 873 dmi = bond->mc_list; 874 } 875 bond->mc_list = NULL; 876 } 877 878 /* 879 * Copy all the Multicast addresses from src to the bonding device dst 880 */ 881 static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, 882 gfp_t gfp_flag) 883 { 884 struct dev_mc_list *dmi, *new_dmi; 885 886 for (dmi = mc_list; dmi; dmi = dmi->next) { 887 new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag); 888 889 if (!new_dmi) { 890 /* FIXME: Potential memory leak !!! */ 891 return -ENOMEM; 892 } 893 894 new_dmi->next = bond->mc_list; 895 bond->mc_list = new_dmi; 896 new_dmi->dmi_addrlen = dmi->dmi_addrlen; 897 memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); 898 new_dmi->dmi_users = dmi->dmi_users; 899 new_dmi->dmi_gusers = dmi->dmi_gusers; 900 } 901 902 return 0; 903 } 904 905 /* 906 * flush all members of flush->mc_list from device dev->mc_list 907 */ 908 static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev) 909 { 910 struct bonding *bond = bond_dev->priv; 911 struct dev_mc_list *dmi; 912 913 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 914 dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 915 } 916 917 if (bond->params.mode == BOND_MODE_8023AD) { 918 /* del lacpdu mc addr from mc list */ 919 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 920 921 dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 922 } 923 } 924 925 /*--------------------------- Active slave change ---------------------------*/ 926 927 /* 928 * Update the mc list and multicast-related flags for the new and 929 * old active slaves (if any) according to the multicast mode, and 930 * promiscuous flags unconditionally. 931 */ 932 static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active) 933 { 934 struct dev_mc_list *dmi; 935 936 if (!USES_PRIMARY(bond->params.mode)) { 937 /* nothing to do - mc list is already up-to-date on 938 * all slaves 939 */ 940 return; 941 } 942 943 if (old_active) { 944 if (bond->dev->flags & IFF_PROMISC) { 945 dev_set_promiscuity(old_active->dev, -1); 946 } 947 948 if (bond->dev->flags & IFF_ALLMULTI) { 949 dev_set_allmulti(old_active->dev, -1); 950 } 951 952 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 953 dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 954 } 955 } 956 957 if (new_active) { 958 if (bond->dev->flags & IFF_PROMISC) { 959 dev_set_promiscuity(new_active->dev, 1); 960 } 961 962 if (bond->dev->flags & IFF_ALLMULTI) { 963 dev_set_allmulti(new_active->dev, 1); 964 } 965 966 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 967 dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 968 } 969 bond_resend_igmp_join_requests(bond); 970 } 971 } 972 973 /** 974 * find_best_interface - select the best available slave to be the active one 975 * @bond: our bonding struct 976 * 977 * Warning: Caller must hold curr_slave_lock for writing. 978 */ 979 static struct slave *bond_find_best_slave(struct bonding *bond) 980 { 981 struct slave *new_active, *old_active; 982 struct slave *bestslave = NULL; 983 int mintime = bond->params.updelay; 984 int i; 985 986 new_active = old_active = bond->curr_active_slave; 987 988 if (!new_active) { /* there were no active slaves left */ 989 if (bond->slave_cnt > 0) { /* found one slave */ 990 new_active = bond->first_slave; 991 } else { 992 return NULL; /* still no slave, return NULL */ 993 } 994 } 995 996 /* first try the primary link; if arping, a link must tx/rx traffic 997 * before it can be considered the curr_active_slave - also, we would skip 998 * slaves between the curr_active_slave and primary_slave that may be up 999 * and able to arp 1000 */ 1001 if ((bond->primary_slave) && 1002 (!bond->params.arp_interval) && 1003 (IS_UP(bond->primary_slave->dev))) { 1004 new_active = bond->primary_slave; 1005 } 1006 1007 /* remember where to stop iterating over the slaves */ 1008 old_active = new_active; 1009 1010 bond_for_each_slave_from(bond, new_active, i, old_active) { 1011 if (IS_UP(new_active->dev)) { 1012 if (new_active->link == BOND_LINK_UP) { 1013 return new_active; 1014 } else if (new_active->link == BOND_LINK_BACK) { 1015 /* link up, but waiting for stabilization */ 1016 if (new_active->delay < mintime) { 1017 mintime = new_active->delay; 1018 bestslave = new_active; 1019 } 1020 } 1021 } 1022 } 1023 1024 return bestslave; 1025 } 1026 1027 /** 1028 * change_active_interface - change the active slave into the specified one 1029 * @bond: our bonding struct 1030 * @new: the new slave to make the active one 1031 * 1032 * Set the new slave to the bond's settings and unset them on the old 1033 * curr_active_slave. 1034 * Setting include flags, mc-list, promiscuity, allmulti, etc. 1035 * 1036 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, 1037 * because it is apparently the best available slave we have, even though its 1038 * updelay hasn't timed out yet. 1039 * 1040 * Warning: Caller must hold curr_slave_lock for writing. 1041 */ 1042 void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 1043 { 1044 struct slave *old_active = bond->curr_active_slave; 1045 1046 if (old_active == new_active) { 1047 return; 1048 } 1049 1050 if (new_active) { 1051 if (new_active->link == BOND_LINK_BACK) { 1052 if (USES_PRIMARY(bond->params.mode)) { 1053 printk(KERN_INFO DRV_NAME 1054 ": %s: making interface %s the new " 1055 "active one %d ms earlier.\n", 1056 bond->dev->name, new_active->dev->name, 1057 (bond->params.updelay - new_active->delay) * bond->params.miimon); 1058 } 1059 1060 new_active->delay = 0; 1061 new_active->link = BOND_LINK_UP; 1062 new_active->jiffies = jiffies; 1063 1064 if (bond->params.mode == BOND_MODE_8023AD) { 1065 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1066 } 1067 1068 if ((bond->params.mode == BOND_MODE_TLB) || 1069 (bond->params.mode == BOND_MODE_ALB)) { 1070 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); 1071 } 1072 } else { 1073 if (USES_PRIMARY(bond->params.mode)) { 1074 printk(KERN_INFO DRV_NAME 1075 ": %s: making interface %s the new " 1076 "active one.\n", 1077 bond->dev->name, new_active->dev->name); 1078 } 1079 } 1080 } 1081 1082 if (USES_PRIMARY(bond->params.mode)) { 1083 bond_mc_swap(bond, new_active, old_active); 1084 } 1085 1086 if ((bond->params.mode == BOND_MODE_TLB) || 1087 (bond->params.mode == BOND_MODE_ALB)) { 1088 bond_alb_handle_active_change(bond, new_active); 1089 if (old_active) 1090 bond_set_slave_inactive_flags(old_active); 1091 if (new_active) 1092 bond_set_slave_active_flags(new_active); 1093 } else { 1094 bond->curr_active_slave = new_active; 1095 } 1096 1097 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 1098 if (old_active) { 1099 bond_set_slave_inactive_flags(old_active); 1100 } 1101 1102 if (new_active) { 1103 bond_set_slave_active_flags(new_active); 1104 } 1105 1106 /* when bonding does not set the slave MAC address, the bond MAC 1107 * address is the one of the active slave. 1108 */ 1109 if (new_active && bond->params.fail_over_mac) 1110 memcpy(bond->dev->dev_addr, new_active->dev->dev_addr, 1111 new_active->dev->addr_len); 1112 if (bond->curr_active_slave && 1113 test_bit(__LINK_STATE_LINKWATCH_PENDING, 1114 &bond->curr_active_slave->dev->state)) { 1115 dprintk("delaying gratuitous arp on %s\n", 1116 bond->curr_active_slave->dev->name); 1117 bond->send_grat_arp = 1; 1118 } else 1119 bond_send_gratuitous_arp(bond); 1120 } 1121 } 1122 1123 /** 1124 * bond_select_active_slave - select a new active slave, if needed 1125 * @bond: our bonding struct 1126 * 1127 * This functions shoud be called when one of the following occurs: 1128 * - The old curr_active_slave has been released or lost its link. 1129 * - The primary_slave has got its link back. 1130 * - A slave has got its link back and there's no old curr_active_slave. 1131 * 1132 * Warning: Caller must hold curr_slave_lock for writing. 1133 */ 1134 void bond_select_active_slave(struct bonding *bond) 1135 { 1136 struct slave *best_slave; 1137 int rv; 1138 1139 best_slave = bond_find_best_slave(bond); 1140 if (best_slave != bond->curr_active_slave) { 1141 bond_change_active_slave(bond, best_slave); 1142 rv = bond_set_carrier(bond); 1143 if (!rv) 1144 return; 1145 1146 if (netif_carrier_ok(bond->dev)) { 1147 printk(KERN_INFO DRV_NAME 1148 ": %s: first active interface up!\n", 1149 bond->dev->name); 1150 } else { 1151 printk(KERN_INFO DRV_NAME ": %s: " 1152 "now running without any active interface !\n", 1153 bond->dev->name); 1154 } 1155 } 1156 } 1157 1158 /*--------------------------- slave list handling ---------------------------*/ 1159 1160 /* 1161 * This function attaches the slave to the end of list. 1162 * 1163 * bond->lock held for writing by caller. 1164 */ 1165 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) 1166 { 1167 if (bond->first_slave == NULL) { /* attaching the first slave */ 1168 new_slave->next = new_slave; 1169 new_slave->prev = new_slave; 1170 bond->first_slave = new_slave; 1171 } else { 1172 new_slave->next = bond->first_slave; 1173 new_slave->prev = bond->first_slave->prev; 1174 new_slave->next->prev = new_slave; 1175 new_slave->prev->next = new_slave; 1176 } 1177 1178 bond->slave_cnt++; 1179 } 1180 1181 /* 1182 * This function detaches the slave from the list. 1183 * WARNING: no check is made to verify if the slave effectively 1184 * belongs to <bond>. 1185 * Nothing is freed on return, structures are just unchained. 1186 * If any slave pointer in bond was pointing to <slave>, 1187 * it should be changed by the calling function. 1188 * 1189 * bond->lock held for writing by caller. 1190 */ 1191 static void bond_detach_slave(struct bonding *bond, struct slave *slave) 1192 { 1193 if (slave->next) { 1194 slave->next->prev = slave->prev; 1195 } 1196 1197 if (slave->prev) { 1198 slave->prev->next = slave->next; 1199 } 1200 1201 if (bond->first_slave == slave) { /* slave is the first slave */ 1202 if (bond->slave_cnt > 1) { /* there are more slave */ 1203 bond->first_slave = slave->next; 1204 } else { 1205 bond->first_slave = NULL; /* slave was the last one */ 1206 } 1207 } 1208 1209 slave->next = NULL; 1210 slave->prev = NULL; 1211 bond->slave_cnt--; 1212 } 1213 1214 /*---------------------------------- IOCTL ----------------------------------*/ 1215 1216 static int bond_sethwaddr(struct net_device *bond_dev, 1217 struct net_device *slave_dev) 1218 { 1219 dprintk("bond_dev=%p\n", bond_dev); 1220 dprintk("slave_dev=%p\n", slave_dev); 1221 dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len); 1222 memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); 1223 return 0; 1224 } 1225 1226 #define BOND_VLAN_FEATURES \ 1227 (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \ 1228 NETIF_F_HW_VLAN_FILTER) 1229 1230 /* 1231 * Compute the common dev->feature set available to all slaves. Some 1232 * feature bits are managed elsewhere, so preserve those feature bits 1233 * on the master device. 1234 */ 1235 static int bond_compute_features(struct bonding *bond) 1236 { 1237 struct slave *slave; 1238 struct net_device *bond_dev = bond->dev; 1239 unsigned long features = bond_dev->features; 1240 unsigned short max_hard_header_len = max((u16)ETH_HLEN, 1241 bond_dev->hard_header_len); 1242 int i; 1243 1244 features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); 1245 features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 1246 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; 1247 1248 bond_for_each_slave(bond, slave, i) { 1249 features = netdev_compute_features(features, 1250 slave->dev->features); 1251 if (slave->dev->hard_header_len > max_hard_header_len) 1252 max_hard_header_len = slave->dev->hard_header_len; 1253 } 1254 1255 features |= (bond_dev->features & BOND_VLAN_FEATURES); 1256 bond_dev->features = features; 1257 bond_dev->hard_header_len = max_hard_header_len; 1258 1259 return 0; 1260 } 1261 1262 1263 static void bond_setup_by_slave(struct net_device *bond_dev, 1264 struct net_device *slave_dev) 1265 { 1266 struct bonding *bond = bond_dev->priv; 1267 1268 bond_dev->neigh_setup = slave_dev->neigh_setup; 1269 bond_dev->header_ops = slave_dev->header_ops; 1270 1271 bond_dev->type = slave_dev->type; 1272 bond_dev->hard_header_len = slave_dev->hard_header_len; 1273 bond_dev->addr_len = slave_dev->addr_len; 1274 1275 memcpy(bond_dev->broadcast, slave_dev->broadcast, 1276 slave_dev->addr_len); 1277 bond->setup_by_slave = 1; 1278 } 1279 1280 /* enslave device <slave> to bond device <master> */ 1281 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) 1282 { 1283 struct bonding *bond = bond_dev->priv; 1284 struct slave *new_slave = NULL; 1285 struct dev_mc_list *dmi; 1286 struct sockaddr addr; 1287 int link_reporting; 1288 int old_features = bond_dev->features; 1289 int res = 0; 1290 1291 if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && 1292 slave_dev->do_ioctl == NULL) { 1293 printk(KERN_WARNING DRV_NAME 1294 ": %s: Warning: no link monitoring support for %s\n", 1295 bond_dev->name, slave_dev->name); 1296 } 1297 1298 /* bond must be initialized by bond_open() before enslaving */ 1299 if (!(bond_dev->flags & IFF_UP)) { 1300 printk(KERN_WARNING DRV_NAME 1301 " %s: master_dev is not up in bond_enslave\n", 1302 bond_dev->name); 1303 } 1304 1305 /* already enslaved */ 1306 if (slave_dev->flags & IFF_SLAVE) { 1307 dprintk("Error, Device was already enslaved\n"); 1308 return -EBUSY; 1309 } 1310 1311 /* vlan challenged mutual exclusion */ 1312 /* no need to lock since we're protected by rtnl_lock */ 1313 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { 1314 dprintk("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1315 if (!list_empty(&bond->vlan_list)) { 1316 printk(KERN_ERR DRV_NAME 1317 ": %s: Error: cannot enslave VLAN " 1318 "challenged slave %s on VLAN enabled " 1319 "bond %s\n", bond_dev->name, slave_dev->name, 1320 bond_dev->name); 1321 return -EPERM; 1322 } else { 1323 printk(KERN_WARNING DRV_NAME 1324 ": %s: Warning: enslaved VLAN challenged " 1325 "slave %s. Adding VLANs will be blocked as " 1326 "long as %s is part of bond %s\n", 1327 bond_dev->name, slave_dev->name, slave_dev->name, 1328 bond_dev->name); 1329 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1330 } 1331 } else { 1332 dprintk("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1333 if (bond->slave_cnt == 0) { 1334 /* First slave, and it is not VLAN challenged, 1335 * so remove the block of adding VLANs over the bond. 1336 */ 1337 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1338 } 1339 } 1340 1341 /* 1342 * Old ifenslave binaries are no longer supported. These can 1343 * be identified with moderate accurary by the state of the slave: 1344 * the current ifenslave will set the interface down prior to 1345 * enslaving it; the old ifenslave will not. 1346 */ 1347 if ((slave_dev->flags & IFF_UP)) { 1348 printk(KERN_ERR DRV_NAME ": %s is up. " 1349 "This may be due to an out of date ifenslave.\n", 1350 slave_dev->name); 1351 res = -EPERM; 1352 goto err_undo_flags; 1353 } 1354 1355 /* set bonding device ether type by slave - bonding netdevices are 1356 * created with ether_setup, so when the slave type is not ARPHRD_ETHER 1357 * there is a need to override some of the type dependent attribs/funcs. 1358 * 1359 * bond ether type mutual exclusion - don't allow slaves of dissimilar 1360 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond 1361 */ 1362 if (bond->slave_cnt == 0) { 1363 if (slave_dev->type != ARPHRD_ETHER) 1364 bond_setup_by_slave(bond_dev, slave_dev); 1365 } else if (bond_dev->type != slave_dev->type) { 1366 printk(KERN_ERR DRV_NAME ": %s ether type (%d) is different " 1367 "from other slaves (%d), can not enslave it.\n", 1368 slave_dev->name, 1369 slave_dev->type, bond_dev->type); 1370 res = -EINVAL; 1371 goto err_undo_flags; 1372 } 1373 1374 if (slave_dev->set_mac_address == NULL) { 1375 if (bond->slave_cnt == 0) { 1376 printk(KERN_WARNING DRV_NAME 1377 ": %s: Warning: The first slave device " 1378 "specified does not support setting the MAC " 1379 "address. Enabling the fail_over_mac option.", 1380 bond_dev->name); 1381 bond->params.fail_over_mac = 1; 1382 } else if (!bond->params.fail_over_mac) { 1383 printk(KERN_ERR DRV_NAME 1384 ": %s: Error: The slave device specified " 1385 "does not support setting the MAC address, " 1386 "but fail_over_mac is not enabled.\n" 1387 , bond_dev->name); 1388 res = -EOPNOTSUPP; 1389 goto err_undo_flags; 1390 } 1391 } 1392 1393 new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); 1394 if (!new_slave) { 1395 res = -ENOMEM; 1396 goto err_undo_flags; 1397 } 1398 1399 /* save slave's original flags before calling 1400 * netdev_set_master and dev_open 1401 */ 1402 new_slave->original_flags = slave_dev->flags; 1403 1404 /* 1405 * Save slave's original ("permanent") mac address for modes 1406 * that need it, and for restoring it upon release, and then 1407 * set it to the master's address 1408 */ 1409 memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); 1410 1411 if (!bond->params.fail_over_mac) { 1412 /* 1413 * Set slave to master's mac address. The application already 1414 * set the master's mac address to that of the first slave 1415 */ 1416 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 1417 addr.sa_family = slave_dev->type; 1418 res = dev_set_mac_address(slave_dev, &addr); 1419 if (res) { 1420 dprintk("Error %d calling set_mac_address\n", res); 1421 goto err_free; 1422 } 1423 } 1424 1425 res = netdev_set_master(slave_dev, bond_dev); 1426 if (res) { 1427 dprintk("Error %d calling netdev_set_master\n", res); 1428 goto err_restore_mac; 1429 } 1430 /* open the slave since the application closed it */ 1431 res = dev_open(slave_dev); 1432 if (res) { 1433 dprintk("Openning slave %s failed\n", slave_dev->name); 1434 goto err_unset_master; 1435 } 1436 1437 new_slave->dev = slave_dev; 1438 slave_dev->priv_flags |= IFF_BONDING; 1439 1440 if ((bond->params.mode == BOND_MODE_TLB) || 1441 (bond->params.mode == BOND_MODE_ALB)) { 1442 /* bond_alb_init_slave() must be called before all other stages since 1443 * it might fail and we do not want to have to undo everything 1444 */ 1445 res = bond_alb_init_slave(bond, new_slave); 1446 if (res) { 1447 goto err_close; 1448 } 1449 } 1450 1451 /* If the mode USES_PRIMARY, then the new slave gets the 1452 * master's promisc (and mc) settings only if it becomes the 1453 * curr_active_slave, and that is taken care of later when calling 1454 * bond_change_active() 1455 */ 1456 if (!USES_PRIMARY(bond->params.mode)) { 1457 /* set promiscuity level to new slave */ 1458 if (bond_dev->flags & IFF_PROMISC) { 1459 dev_set_promiscuity(slave_dev, 1); 1460 } 1461 1462 /* set allmulti level to new slave */ 1463 if (bond_dev->flags & IFF_ALLMULTI) { 1464 dev_set_allmulti(slave_dev, 1); 1465 } 1466 1467 netif_tx_lock_bh(bond_dev); 1468 /* upload master's mc_list to new slave */ 1469 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 1470 dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 1471 } 1472 netif_tx_unlock_bh(bond_dev); 1473 } 1474 1475 if (bond->params.mode == BOND_MODE_8023AD) { 1476 /* add lacpdu mc addr to mc list */ 1477 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 1478 1479 dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 1480 } 1481 1482 bond_add_vlans_on_slave(bond, slave_dev); 1483 1484 write_lock_bh(&bond->lock); 1485 1486 bond_attach_slave(bond, new_slave); 1487 1488 new_slave->delay = 0; 1489 new_slave->link_failure_count = 0; 1490 1491 bond_compute_features(bond); 1492 1493 new_slave->last_arp_rx = jiffies; 1494 1495 if (bond->params.miimon && !bond->params.use_carrier) { 1496 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 1497 1498 if ((link_reporting == -1) && !bond->params.arp_interval) { 1499 /* 1500 * miimon is set but a bonded network driver 1501 * does not support ETHTOOL/MII and 1502 * arp_interval is not set. Note: if 1503 * use_carrier is enabled, we will never go 1504 * here (because netif_carrier is always 1505 * supported); thus, we don't need to change 1506 * the messages for netif_carrier. 1507 */ 1508 printk(KERN_WARNING DRV_NAME 1509 ": %s: Warning: MII and ETHTOOL support not " 1510 "available for interface %s, and " 1511 "arp_interval/arp_ip_target module parameters " 1512 "not specified, thus bonding will not detect " 1513 "link failures! see bonding.txt for details.\n", 1514 bond_dev->name, slave_dev->name); 1515 } else if (link_reporting == -1) { 1516 /* unable get link status using mii/ethtool */ 1517 printk(KERN_WARNING DRV_NAME 1518 ": %s: Warning: can't get link status from " 1519 "interface %s; the network driver associated " 1520 "with this interface does not support MII or " 1521 "ETHTOOL link status reporting, thus miimon " 1522 "has no effect on this interface.\n", 1523 bond_dev->name, slave_dev->name); 1524 } 1525 } 1526 1527 /* check for initial state */ 1528 if (!bond->params.miimon || 1529 (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) { 1530 if (bond->params.updelay) { 1531 dprintk("Initial state of slave_dev is " 1532 "BOND_LINK_BACK\n"); 1533 new_slave->link = BOND_LINK_BACK; 1534 new_slave->delay = bond->params.updelay; 1535 } else { 1536 dprintk("Initial state of slave_dev is " 1537 "BOND_LINK_UP\n"); 1538 new_slave->link = BOND_LINK_UP; 1539 } 1540 new_slave->jiffies = jiffies; 1541 } else { 1542 dprintk("Initial state of slave_dev is " 1543 "BOND_LINK_DOWN\n"); 1544 new_slave->link = BOND_LINK_DOWN; 1545 } 1546 1547 if (bond_update_speed_duplex(new_slave) && 1548 (new_slave->link != BOND_LINK_DOWN)) { 1549 printk(KERN_WARNING DRV_NAME 1550 ": %s: Warning: failed to get speed and duplex from %s, " 1551 "assumed to be 100Mb/sec and Full.\n", 1552 bond_dev->name, new_slave->dev->name); 1553 1554 if (bond->params.mode == BOND_MODE_8023AD) { 1555 printk(KERN_WARNING DRV_NAME 1556 ": %s: Warning: Operation of 802.3ad mode requires ETHTOOL " 1557 "support in base driver for proper aggregator " 1558 "selection.\n", bond_dev->name); 1559 } 1560 } 1561 1562 if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { 1563 /* if there is a primary slave, remember it */ 1564 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 1565 bond->primary_slave = new_slave; 1566 } 1567 } 1568 1569 switch (bond->params.mode) { 1570 case BOND_MODE_ACTIVEBACKUP: 1571 bond_set_slave_inactive_flags(new_slave); 1572 bond_select_active_slave(bond); 1573 break; 1574 case BOND_MODE_8023AD: 1575 /* in 802.3ad mode, the internal mechanism 1576 * will activate the slaves in the selected 1577 * aggregator 1578 */ 1579 bond_set_slave_inactive_flags(new_slave); 1580 /* if this is the first slave */ 1581 if (bond->slave_cnt == 1) { 1582 SLAVE_AD_INFO(new_slave).id = 1; 1583 /* Initialize AD with the number of times that the AD timer is called in 1 second 1584 * can be called only after the mac address of the bond is set 1585 */ 1586 bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, 1587 bond->params.lacp_fast); 1588 } else { 1589 SLAVE_AD_INFO(new_slave).id = 1590 SLAVE_AD_INFO(new_slave->prev).id + 1; 1591 } 1592 1593 bond_3ad_bind_slave(new_slave); 1594 break; 1595 case BOND_MODE_TLB: 1596 case BOND_MODE_ALB: 1597 new_slave->state = BOND_STATE_ACTIVE; 1598 bond_set_slave_inactive_flags(new_slave); 1599 break; 1600 default: 1601 dprintk("This slave is always active in trunk mode\n"); 1602 1603 /* always active in trunk mode */ 1604 new_slave->state = BOND_STATE_ACTIVE; 1605 1606 /* In trunking mode there is little meaning to curr_active_slave 1607 * anyway (it holds no special properties of the bond device), 1608 * so we can change it without calling change_active_interface() 1609 */ 1610 if (!bond->curr_active_slave) { 1611 bond->curr_active_slave = new_slave; 1612 } 1613 break; 1614 } /* switch(bond_mode) */ 1615 1616 bond_set_carrier(bond); 1617 1618 write_unlock_bh(&bond->lock); 1619 1620 res = bond_create_slave_symlinks(bond_dev, slave_dev); 1621 if (res) 1622 goto err_close; 1623 1624 printk(KERN_INFO DRV_NAME 1625 ": %s: enslaving %s as a%s interface with a%s link.\n", 1626 bond_dev->name, slave_dev->name, 1627 new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", 1628 new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); 1629 1630 /* enslave is successful */ 1631 return 0; 1632 1633 /* Undo stages on error */ 1634 err_close: 1635 dev_close(slave_dev); 1636 1637 err_unset_master: 1638 netdev_set_master(slave_dev, NULL); 1639 1640 err_restore_mac: 1641 if (!bond->params.fail_over_mac) { 1642 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); 1643 addr.sa_family = slave_dev->type; 1644 dev_set_mac_address(slave_dev, &addr); 1645 } 1646 1647 err_free: 1648 kfree(new_slave); 1649 1650 err_undo_flags: 1651 bond_dev->features = old_features; 1652 1653 return res; 1654 } 1655 1656 /* 1657 * Try to release the slave device <slave> from the bond device <master> 1658 * It is legal to access curr_active_slave without a lock because all the function 1659 * is write-locked. 1660 * 1661 * The rules for slave state should be: 1662 * for Active/Backup: 1663 * Active stays on all backups go down 1664 * for Bonded connections: 1665 * The first up interface should be left on and all others downed. 1666 */ 1667 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) 1668 { 1669 struct bonding *bond = bond_dev->priv; 1670 struct slave *slave, *oldcurrent; 1671 struct sockaddr addr; 1672 int mac_addr_differ; 1673 DECLARE_MAC_BUF(mac); 1674 1675 /* slave is not a slave or master is not master of this slave */ 1676 if (!(slave_dev->flags & IFF_SLAVE) || 1677 (slave_dev->master != bond_dev)) { 1678 printk(KERN_ERR DRV_NAME 1679 ": %s: Error: cannot release %s.\n", 1680 bond_dev->name, slave_dev->name); 1681 return -EINVAL; 1682 } 1683 1684 write_lock_bh(&bond->lock); 1685 1686 slave = bond_get_slave_by_dev(bond, slave_dev); 1687 if (!slave) { 1688 /* not a slave of this bond */ 1689 printk(KERN_INFO DRV_NAME 1690 ": %s: %s not enslaved\n", 1691 bond_dev->name, slave_dev->name); 1692 write_unlock_bh(&bond->lock); 1693 return -EINVAL; 1694 } 1695 1696 mac_addr_differ = memcmp(bond_dev->dev_addr, 1697 slave->perm_hwaddr, 1698 ETH_ALEN); 1699 if (!mac_addr_differ && (bond->slave_cnt > 1)) { 1700 printk(KERN_WARNING DRV_NAME 1701 ": %s: Warning: the permanent HWaddr of %s - " 1702 "%s - is still in use by %s. " 1703 "Set the HWaddr of %s to a different address " 1704 "to avoid conflicts.\n", 1705 bond_dev->name, 1706 slave_dev->name, 1707 print_mac(mac, slave->perm_hwaddr), 1708 bond_dev->name, 1709 slave_dev->name); 1710 } 1711 1712 /* Inform AD package of unbinding of slave. */ 1713 if (bond->params.mode == BOND_MODE_8023AD) { 1714 /* must be called before the slave is 1715 * detached from the list 1716 */ 1717 bond_3ad_unbind_slave(slave); 1718 } 1719 1720 printk(KERN_INFO DRV_NAME 1721 ": %s: releasing %s interface %s\n", 1722 bond_dev->name, 1723 (slave->state == BOND_STATE_ACTIVE) 1724 ? "active" : "backup", 1725 slave_dev->name); 1726 1727 oldcurrent = bond->curr_active_slave; 1728 1729 bond->current_arp_slave = NULL; 1730 1731 /* release the slave from its bond */ 1732 bond_detach_slave(bond, slave); 1733 1734 bond_compute_features(bond); 1735 1736 if (bond->primary_slave == slave) { 1737 bond->primary_slave = NULL; 1738 } 1739 1740 if (oldcurrent == slave) { 1741 bond_change_active_slave(bond, NULL); 1742 } 1743 1744 if ((bond->params.mode == BOND_MODE_TLB) || 1745 (bond->params.mode == BOND_MODE_ALB)) { 1746 /* Must be called only after the slave has been 1747 * detached from the list and the curr_active_slave 1748 * has been cleared (if our_slave == old_current), 1749 * but before a new active slave is selected. 1750 */ 1751 write_unlock_bh(&bond->lock); 1752 bond_alb_deinit_slave(bond, slave); 1753 write_lock_bh(&bond->lock); 1754 } 1755 1756 if (oldcurrent == slave) { 1757 /* 1758 * Note that we hold RTNL over this sequence, so there 1759 * is no concern that another slave add/remove event 1760 * will interfere. 1761 */ 1762 write_unlock_bh(&bond->lock); 1763 read_lock(&bond->lock); 1764 write_lock_bh(&bond->curr_slave_lock); 1765 1766 bond_select_active_slave(bond); 1767 1768 write_unlock_bh(&bond->curr_slave_lock); 1769 read_unlock(&bond->lock); 1770 write_lock_bh(&bond->lock); 1771 } 1772 1773 if (bond->slave_cnt == 0) { 1774 bond_set_carrier(bond); 1775 1776 /* if the last slave was removed, zero the mac address 1777 * of the master so it will be set by the application 1778 * to the mac address of the first slave 1779 */ 1780 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1781 1782 if (list_empty(&bond->vlan_list)) { 1783 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1784 } else { 1785 printk(KERN_WARNING DRV_NAME 1786 ": %s: Warning: clearing HW address of %s while it " 1787 "still has VLANs.\n", 1788 bond_dev->name, bond_dev->name); 1789 printk(KERN_WARNING DRV_NAME 1790 ": %s: When re-adding slaves, make sure the bond's " 1791 "HW address matches its VLANs'.\n", 1792 bond_dev->name); 1793 } 1794 } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && 1795 !bond_has_challenged_slaves(bond)) { 1796 printk(KERN_INFO DRV_NAME 1797 ": %s: last VLAN challenged slave %s " 1798 "left bond %s. VLAN blocking is removed\n", 1799 bond_dev->name, slave_dev->name, bond_dev->name); 1800 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1801 } 1802 1803 write_unlock_bh(&bond->lock); 1804 1805 /* must do this from outside any spinlocks */ 1806 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1807 1808 bond_del_vlans_from_slave(bond, slave_dev); 1809 1810 /* If the mode USES_PRIMARY, then we should only remove its 1811 * promisc and mc settings if it was the curr_active_slave, but that was 1812 * already taken care of above when we detached the slave 1813 */ 1814 if (!USES_PRIMARY(bond->params.mode)) { 1815 /* unset promiscuity level from slave */ 1816 if (bond_dev->flags & IFF_PROMISC) { 1817 dev_set_promiscuity(slave_dev, -1); 1818 } 1819 1820 /* unset allmulti level from slave */ 1821 if (bond_dev->flags & IFF_ALLMULTI) { 1822 dev_set_allmulti(slave_dev, -1); 1823 } 1824 1825 /* flush master's mc_list from slave */ 1826 netif_tx_lock_bh(bond_dev); 1827 bond_mc_list_flush(bond_dev, slave_dev); 1828 netif_tx_unlock_bh(bond_dev); 1829 } 1830 1831 netdev_set_master(slave_dev, NULL); 1832 1833 /* close slave before restoring its mac address */ 1834 dev_close(slave_dev); 1835 1836 if (!bond->params.fail_over_mac) { 1837 /* restore original ("permanent") mac address */ 1838 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1839 addr.sa_family = slave_dev->type; 1840 dev_set_mac_address(slave_dev, &addr); 1841 } 1842 1843 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | 1844 IFF_SLAVE_INACTIVE | IFF_BONDING | 1845 IFF_SLAVE_NEEDARP); 1846 1847 kfree(slave); 1848 1849 return 0; /* deletion OK */ 1850 } 1851 1852 /* 1853 * Destroy a bonding device. 1854 * Must be under rtnl_lock when this function is called. 1855 */ 1856 void bond_destroy(struct bonding *bond) 1857 { 1858 bond_deinit(bond->dev); 1859 bond_destroy_sysfs_entry(bond); 1860 unregister_netdevice(bond->dev); 1861 } 1862 1863 /* 1864 * First release a slave and than destroy the bond if no more slaves iare left. 1865 * Must be under rtnl_lock when this function is called. 1866 */ 1867 int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev) 1868 { 1869 struct bonding *bond = bond_dev->priv; 1870 int ret; 1871 1872 ret = bond_release(bond_dev, slave_dev); 1873 if ((ret == 0) && (bond->slave_cnt == 0)) { 1874 printk(KERN_INFO DRV_NAME ": %s: destroying bond %s.\n", 1875 bond_dev->name, bond_dev->name); 1876 bond_destroy(bond); 1877 } 1878 return ret; 1879 } 1880 1881 /* 1882 * This function releases all slaves. 1883 */ 1884 static int bond_release_all(struct net_device *bond_dev) 1885 { 1886 struct bonding *bond = bond_dev->priv; 1887 struct slave *slave; 1888 struct net_device *slave_dev; 1889 struct sockaddr addr; 1890 1891 write_lock_bh(&bond->lock); 1892 1893 netif_carrier_off(bond_dev); 1894 1895 if (bond->slave_cnt == 0) { 1896 goto out; 1897 } 1898 1899 bond->current_arp_slave = NULL; 1900 bond->primary_slave = NULL; 1901 bond_change_active_slave(bond, NULL); 1902 1903 while ((slave = bond->first_slave) != NULL) { 1904 /* Inform AD package of unbinding of slave 1905 * before slave is detached from the list. 1906 */ 1907 if (bond->params.mode == BOND_MODE_8023AD) { 1908 bond_3ad_unbind_slave(slave); 1909 } 1910 1911 slave_dev = slave->dev; 1912 bond_detach_slave(bond, slave); 1913 1914 /* now that the slave is detached, unlock and perform 1915 * all the undo steps that should not be called from 1916 * within a lock. 1917 */ 1918 write_unlock_bh(&bond->lock); 1919 1920 if ((bond->params.mode == BOND_MODE_TLB) || 1921 (bond->params.mode == BOND_MODE_ALB)) { 1922 /* must be called only after the slave 1923 * has been detached from the list 1924 */ 1925 bond_alb_deinit_slave(bond, slave); 1926 } 1927 1928 bond_compute_features(bond); 1929 1930 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1931 bond_del_vlans_from_slave(bond, slave_dev); 1932 1933 /* If the mode USES_PRIMARY, then we should only remove its 1934 * promisc and mc settings if it was the curr_active_slave, but that was 1935 * already taken care of above when we detached the slave 1936 */ 1937 if (!USES_PRIMARY(bond->params.mode)) { 1938 /* unset promiscuity level from slave */ 1939 if (bond_dev->flags & IFF_PROMISC) { 1940 dev_set_promiscuity(slave_dev, -1); 1941 } 1942 1943 /* unset allmulti level from slave */ 1944 if (bond_dev->flags & IFF_ALLMULTI) { 1945 dev_set_allmulti(slave_dev, -1); 1946 } 1947 1948 /* flush master's mc_list from slave */ 1949 netif_tx_lock_bh(bond_dev); 1950 bond_mc_list_flush(bond_dev, slave_dev); 1951 netif_tx_unlock_bh(bond_dev); 1952 } 1953 1954 netdev_set_master(slave_dev, NULL); 1955 1956 /* close slave before restoring its mac address */ 1957 dev_close(slave_dev); 1958 1959 if (!bond->params.fail_over_mac) { 1960 /* restore original ("permanent") mac address*/ 1961 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1962 addr.sa_family = slave_dev->type; 1963 dev_set_mac_address(slave_dev, &addr); 1964 } 1965 1966 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | 1967 IFF_SLAVE_INACTIVE); 1968 1969 kfree(slave); 1970 1971 /* re-acquire the lock before getting the next slave */ 1972 write_lock_bh(&bond->lock); 1973 } 1974 1975 /* zero the mac address of the master so it will be 1976 * set by the application to the mac address of the 1977 * first slave 1978 */ 1979 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1980 1981 if (list_empty(&bond->vlan_list)) { 1982 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1983 } else { 1984 printk(KERN_WARNING DRV_NAME 1985 ": %s: Warning: clearing HW address of %s while it " 1986 "still has VLANs.\n", 1987 bond_dev->name, bond_dev->name); 1988 printk(KERN_WARNING DRV_NAME 1989 ": %s: When re-adding slaves, make sure the bond's " 1990 "HW address matches its VLANs'.\n", 1991 bond_dev->name); 1992 } 1993 1994 printk(KERN_INFO DRV_NAME 1995 ": %s: released all slaves\n", 1996 bond_dev->name); 1997 1998 out: 1999 write_unlock_bh(&bond->lock); 2000 2001 return 0; 2002 } 2003 2004 /* 2005 * This function changes the active slave to slave <slave_dev>. 2006 * It returns -EINVAL in the following cases. 2007 * - <slave_dev> is not found in the list. 2008 * - There is not active slave now. 2009 * - <slave_dev> is already active. 2010 * - The link state of <slave_dev> is not BOND_LINK_UP. 2011 * - <slave_dev> is not running. 2012 * In these cases, this fuction does nothing. 2013 * In the other cases, currnt_slave pointer is changed and 0 is returned. 2014 */ 2015 static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) 2016 { 2017 struct bonding *bond = bond_dev->priv; 2018 struct slave *old_active = NULL; 2019 struct slave *new_active = NULL; 2020 int res = 0; 2021 2022 if (!USES_PRIMARY(bond->params.mode)) { 2023 return -EINVAL; 2024 } 2025 2026 /* Verify that master_dev is indeed the master of slave_dev */ 2027 if (!(slave_dev->flags & IFF_SLAVE) || 2028 (slave_dev->master != bond_dev)) { 2029 return -EINVAL; 2030 } 2031 2032 read_lock(&bond->lock); 2033 2034 read_lock(&bond->curr_slave_lock); 2035 old_active = bond->curr_active_slave; 2036 read_unlock(&bond->curr_slave_lock); 2037 2038 new_active = bond_get_slave_by_dev(bond, slave_dev); 2039 2040 /* 2041 * Changing to the current active: do nothing; return success. 2042 */ 2043 if (new_active && (new_active == old_active)) { 2044 read_unlock(&bond->lock); 2045 return 0; 2046 } 2047 2048 if ((new_active) && 2049 (old_active) && 2050 (new_active->link == BOND_LINK_UP) && 2051 IS_UP(new_active->dev)) { 2052 write_lock_bh(&bond->curr_slave_lock); 2053 bond_change_active_slave(bond, new_active); 2054 write_unlock_bh(&bond->curr_slave_lock); 2055 } else { 2056 res = -EINVAL; 2057 } 2058 2059 read_unlock(&bond->lock); 2060 2061 return res; 2062 } 2063 2064 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) 2065 { 2066 struct bonding *bond = bond_dev->priv; 2067 2068 info->bond_mode = bond->params.mode; 2069 info->miimon = bond->params.miimon; 2070 2071 read_lock(&bond->lock); 2072 info->num_slaves = bond->slave_cnt; 2073 read_unlock(&bond->lock); 2074 2075 return 0; 2076 } 2077 2078 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) 2079 { 2080 struct bonding *bond = bond_dev->priv; 2081 struct slave *slave; 2082 int i, found = 0; 2083 2084 if (info->slave_id < 0) { 2085 return -ENODEV; 2086 } 2087 2088 read_lock(&bond->lock); 2089 2090 bond_for_each_slave(bond, slave, i) { 2091 if (i == (int)info->slave_id) { 2092 found = 1; 2093 break; 2094 } 2095 } 2096 2097 read_unlock(&bond->lock); 2098 2099 if (found) { 2100 strcpy(info->slave_name, slave->dev->name); 2101 info->link = slave->link; 2102 info->state = slave->state; 2103 info->link_failure_count = slave->link_failure_count; 2104 } else { 2105 return -ENODEV; 2106 } 2107 2108 return 0; 2109 } 2110 2111 /*-------------------------------- Monitoring -------------------------------*/ 2112 2113 /* 2114 * if !have_locks, return nonzero if a failover is necessary. if 2115 * have_locks, do whatever failover activities are needed. 2116 * 2117 * This is to separate the inspection and failover steps for locking 2118 * purposes; failover requires rtnl, but acquiring it for every 2119 * inspection is undesirable, so a wrapper first does inspection, and 2120 * the acquires the necessary locks and calls again to perform 2121 * failover if needed. Since all locks are dropped, a complete 2122 * restart is needed between calls. 2123 */ 2124 static int __bond_mii_monitor(struct bonding *bond, int have_locks) 2125 { 2126 struct slave *slave, *oldcurrent; 2127 int do_failover = 0; 2128 int i; 2129 2130 if (bond->slave_cnt == 0) 2131 goto out; 2132 2133 /* we will try to read the link status of each of our slaves, and 2134 * set their IFF_RUNNING flag appropriately. For each slave not 2135 * supporting MII status, we won't do anything so that a user-space 2136 * program could monitor the link itself if needed. 2137 */ 2138 2139 if (bond->send_grat_arp) { 2140 if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING, 2141 &bond->curr_active_slave->dev->state)) 2142 dprintk("Needs to send gratuitous arp but not yet\n"); 2143 else { 2144 dprintk("sending delayed gratuitous arp on on %s\n", 2145 bond->curr_active_slave->dev->name); 2146 bond_send_gratuitous_arp(bond); 2147 bond->send_grat_arp = 0; 2148 } 2149 } 2150 read_lock(&bond->curr_slave_lock); 2151 oldcurrent = bond->curr_active_slave; 2152 read_unlock(&bond->curr_slave_lock); 2153 2154 bond_for_each_slave(bond, slave, i) { 2155 struct net_device *slave_dev = slave->dev; 2156 int link_state; 2157 u16 old_speed = slave->speed; 2158 u8 old_duplex = slave->duplex; 2159 2160 link_state = bond_check_dev_link(bond, slave_dev, 0); 2161 2162 switch (slave->link) { 2163 case BOND_LINK_UP: /* the link was up */ 2164 if (link_state == BMSR_LSTATUS) { 2165 if (!oldcurrent) { 2166 if (!have_locks) 2167 return 1; 2168 do_failover = 1; 2169 } 2170 break; 2171 } else { /* link going down */ 2172 slave->link = BOND_LINK_FAIL; 2173 slave->delay = bond->params.downdelay; 2174 2175 if (slave->link_failure_count < UINT_MAX) { 2176 slave->link_failure_count++; 2177 } 2178 2179 if (bond->params.downdelay) { 2180 printk(KERN_INFO DRV_NAME 2181 ": %s: link status down for %s " 2182 "interface %s, disabling it in " 2183 "%d ms.\n", 2184 bond->dev->name, 2185 IS_UP(slave_dev) 2186 ? ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) 2187 ? ((slave == oldcurrent) 2188 ? "active " : "backup ") 2189 : "") 2190 : "idle ", 2191 slave_dev->name, 2192 bond->params.downdelay * bond->params.miimon); 2193 } 2194 } 2195 /* no break ! fall through the BOND_LINK_FAIL test to 2196 ensure proper action to be taken 2197 */ 2198 case BOND_LINK_FAIL: /* the link has just gone down */ 2199 if (link_state != BMSR_LSTATUS) { 2200 /* link stays down */ 2201 if (slave->delay <= 0) { 2202 if (!have_locks) 2203 return 1; 2204 2205 /* link down for too long time */ 2206 slave->link = BOND_LINK_DOWN; 2207 2208 /* in active/backup mode, we must 2209 * completely disable this interface 2210 */ 2211 if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) || 2212 (bond->params.mode == BOND_MODE_8023AD)) { 2213 bond_set_slave_inactive_flags(slave); 2214 } 2215 2216 printk(KERN_INFO DRV_NAME 2217 ": %s: link status definitely " 2218 "down for interface %s, " 2219 "disabling it\n", 2220 bond->dev->name, 2221 slave_dev->name); 2222 2223 /* notify ad that the link status has changed */ 2224 if (bond->params.mode == BOND_MODE_8023AD) { 2225 bond_3ad_handle_link_change(slave, BOND_LINK_DOWN); 2226 } 2227 2228 if ((bond->params.mode == BOND_MODE_TLB) || 2229 (bond->params.mode == BOND_MODE_ALB)) { 2230 bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); 2231 } 2232 2233 if (slave == oldcurrent) { 2234 do_failover = 1; 2235 } 2236 } else { 2237 slave->delay--; 2238 } 2239 } else { 2240 /* link up again */ 2241 slave->link = BOND_LINK_UP; 2242 slave->jiffies = jiffies; 2243 printk(KERN_INFO DRV_NAME 2244 ": %s: link status up again after %d " 2245 "ms for interface %s.\n", 2246 bond->dev->name, 2247 (bond->params.downdelay - slave->delay) * bond->params.miimon, 2248 slave_dev->name); 2249 } 2250 break; 2251 case BOND_LINK_DOWN: /* the link was down */ 2252 if (link_state != BMSR_LSTATUS) { 2253 /* the link stays down, nothing more to do */ 2254 break; 2255 } else { /* link going up */ 2256 slave->link = BOND_LINK_BACK; 2257 slave->delay = bond->params.updelay; 2258 2259 if (bond->params.updelay) { 2260 /* if updelay == 0, no need to 2261 advertise about a 0 ms delay */ 2262 printk(KERN_INFO DRV_NAME 2263 ": %s: link status up for " 2264 "interface %s, enabling it " 2265 "in %d ms.\n", 2266 bond->dev->name, 2267 slave_dev->name, 2268 bond->params.updelay * bond->params.miimon); 2269 } 2270 } 2271 /* no break ! fall through the BOND_LINK_BACK state in 2272 case there's something to do. 2273 */ 2274 case BOND_LINK_BACK: /* the link has just come back */ 2275 if (link_state != BMSR_LSTATUS) { 2276 /* link down again */ 2277 slave->link = BOND_LINK_DOWN; 2278 2279 printk(KERN_INFO DRV_NAME 2280 ": %s: link status down again after %d " 2281 "ms for interface %s.\n", 2282 bond->dev->name, 2283 (bond->params.updelay - slave->delay) * bond->params.miimon, 2284 slave_dev->name); 2285 } else { 2286 /* link stays up */ 2287 if (slave->delay == 0) { 2288 if (!have_locks) 2289 return 1; 2290 2291 /* now the link has been up for long time enough */ 2292 slave->link = BOND_LINK_UP; 2293 slave->jiffies = jiffies; 2294 2295 if (bond->params.mode == BOND_MODE_8023AD) { 2296 /* prevent it from being the active one */ 2297 slave->state = BOND_STATE_BACKUP; 2298 } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { 2299 /* make it immediately active */ 2300 slave->state = BOND_STATE_ACTIVE; 2301 } else if (slave != bond->primary_slave) { 2302 /* prevent it from being the active one */ 2303 slave->state = BOND_STATE_BACKUP; 2304 } 2305 2306 printk(KERN_INFO DRV_NAME 2307 ": %s: link status definitely " 2308 "up for interface %s.\n", 2309 bond->dev->name, 2310 slave_dev->name); 2311 2312 /* notify ad that the link status has changed */ 2313 if (bond->params.mode == BOND_MODE_8023AD) { 2314 bond_3ad_handle_link_change(slave, BOND_LINK_UP); 2315 } 2316 2317 if ((bond->params.mode == BOND_MODE_TLB) || 2318 (bond->params.mode == BOND_MODE_ALB)) { 2319 bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); 2320 } 2321 2322 if ((!oldcurrent) || 2323 (slave == bond->primary_slave)) { 2324 do_failover = 1; 2325 } 2326 } else { 2327 slave->delay--; 2328 } 2329 } 2330 break; 2331 default: 2332 /* Should not happen */ 2333 printk(KERN_ERR DRV_NAME 2334 ": %s: Error: %s Illegal value (link=%d)\n", 2335 bond->dev->name, 2336 slave->dev->name, 2337 slave->link); 2338 goto out; 2339 } /* end of switch (slave->link) */ 2340 2341 bond_update_speed_duplex(slave); 2342 2343 if (bond->params.mode == BOND_MODE_8023AD) { 2344 if (old_speed != slave->speed) { 2345 bond_3ad_adapter_speed_changed(slave); 2346 } 2347 2348 if (old_duplex != slave->duplex) { 2349 bond_3ad_adapter_duplex_changed(slave); 2350 } 2351 } 2352 2353 } /* end of for */ 2354 2355 if (do_failover) { 2356 ASSERT_RTNL(); 2357 2358 write_lock_bh(&bond->curr_slave_lock); 2359 2360 bond_select_active_slave(bond); 2361 2362 write_unlock_bh(&bond->curr_slave_lock); 2363 2364 } else 2365 bond_set_carrier(bond); 2366 2367 out: 2368 return 0; 2369 } 2370 2371 /* 2372 * bond_mii_monitor 2373 * 2374 * Really a wrapper that splits the mii monitor into two phases: an 2375 * inspection, then (if inspection indicates something needs to be 2376 * done) an acquisition of appropriate locks followed by another pass 2377 * to implement whatever link state changes are indicated. 2378 */ 2379 void bond_mii_monitor(struct work_struct *work) 2380 { 2381 struct bonding *bond = container_of(work, struct bonding, 2382 mii_work.work); 2383 unsigned long delay; 2384 2385 read_lock(&bond->lock); 2386 if (bond->kill_timers) { 2387 read_unlock(&bond->lock); 2388 return; 2389 } 2390 if (__bond_mii_monitor(bond, 0)) { 2391 read_unlock(&bond->lock); 2392 rtnl_lock(); 2393 read_lock(&bond->lock); 2394 __bond_mii_monitor(bond, 1); 2395 read_unlock(&bond->lock); 2396 rtnl_unlock(); /* might sleep, hold no other locks */ 2397 read_lock(&bond->lock); 2398 } 2399 2400 delay = ((bond->params.miimon * HZ) / 1000) ? : 1; 2401 read_unlock(&bond->lock); 2402 queue_delayed_work(bond->wq, &bond->mii_work, delay); 2403 } 2404 2405 static __be32 bond_glean_dev_ip(struct net_device *dev) 2406 { 2407 struct in_device *idev; 2408 struct in_ifaddr *ifa; 2409 __be32 addr = 0; 2410 2411 if (!dev) 2412 return 0; 2413 2414 rcu_read_lock(); 2415 idev = __in_dev_get_rcu(dev); 2416 if (!idev) 2417 goto out; 2418 2419 ifa = idev->ifa_list; 2420 if (!ifa) 2421 goto out; 2422 2423 addr = ifa->ifa_local; 2424 out: 2425 rcu_read_unlock(); 2426 return addr; 2427 } 2428 2429 static int bond_has_ip(struct bonding *bond) 2430 { 2431 struct vlan_entry *vlan, *vlan_next; 2432 2433 if (bond->master_ip) 2434 return 1; 2435 2436 if (list_empty(&bond->vlan_list)) 2437 return 0; 2438 2439 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2440 vlan_list) { 2441 if (vlan->vlan_ip) 2442 return 1; 2443 } 2444 2445 return 0; 2446 } 2447 2448 static int bond_has_this_ip(struct bonding *bond, __be32 ip) 2449 { 2450 struct vlan_entry *vlan, *vlan_next; 2451 2452 if (ip == bond->master_ip) 2453 return 1; 2454 2455 if (list_empty(&bond->vlan_list)) 2456 return 0; 2457 2458 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2459 vlan_list) { 2460 if (ip == vlan->vlan_ip) 2461 return 1; 2462 } 2463 2464 return 0; 2465 } 2466 2467 /* 2468 * We go to the (large) trouble of VLAN tagging ARP frames because 2469 * switches in VLAN mode (especially if ports are configured as 2470 * "native" to a VLAN) might not pass non-tagged frames. 2471 */ 2472 static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id) 2473 { 2474 struct sk_buff *skb; 2475 2476 dprintk("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, 2477 slave_dev->name, dest_ip, src_ip, vlan_id); 2478 2479 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, 2480 NULL, slave_dev->dev_addr, NULL); 2481 2482 if (!skb) { 2483 printk(KERN_ERR DRV_NAME ": ARP packet allocation failed\n"); 2484 return; 2485 } 2486 if (vlan_id) { 2487 skb = vlan_put_tag(skb, vlan_id); 2488 if (!skb) { 2489 printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n"); 2490 return; 2491 } 2492 } 2493 arp_xmit(skb); 2494 } 2495 2496 2497 static void bond_arp_send_all(struct bonding *bond, struct slave *slave) 2498 { 2499 int i, vlan_id, rv; 2500 __be32 *targets = bond->params.arp_targets; 2501 struct vlan_entry *vlan, *vlan_next; 2502 struct net_device *vlan_dev; 2503 struct flowi fl; 2504 struct rtable *rt; 2505 2506 for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { 2507 if (!targets[i]) 2508 continue; 2509 dprintk("basa: target %x\n", targets[i]); 2510 if (list_empty(&bond->vlan_list)) { 2511 dprintk("basa: empty vlan: arp_send\n"); 2512 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2513 bond->master_ip, 0); 2514 continue; 2515 } 2516 2517 /* 2518 * If VLANs are configured, we do a route lookup to 2519 * determine which VLAN interface would be used, so we 2520 * can tag the ARP with the proper VLAN tag. 2521 */ 2522 memset(&fl, 0, sizeof(fl)); 2523 fl.fl4_dst = targets[i]; 2524 fl.fl4_tos = RTO_ONLINK; 2525 2526 rv = ip_route_output_key(&init_net, &rt, &fl); 2527 if (rv) { 2528 if (net_ratelimit()) { 2529 printk(KERN_WARNING DRV_NAME 2530 ": %s: no route to arp_ip_target %u.%u.%u.%u\n", 2531 bond->dev->name, NIPQUAD(fl.fl4_dst)); 2532 } 2533 continue; 2534 } 2535 2536 /* 2537 * This target is not on a VLAN 2538 */ 2539 if (rt->u.dst.dev == bond->dev) { 2540 ip_rt_put(rt); 2541 dprintk("basa: rtdev == bond->dev: arp_send\n"); 2542 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2543 bond->master_ip, 0); 2544 continue; 2545 } 2546 2547 vlan_id = 0; 2548 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2549 vlan_list) { 2550 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 2551 if (vlan_dev == rt->u.dst.dev) { 2552 vlan_id = vlan->vlan_id; 2553 dprintk("basa: vlan match on %s %d\n", 2554 vlan_dev->name, vlan_id); 2555 break; 2556 } 2557 } 2558 2559 if (vlan_id) { 2560 ip_rt_put(rt); 2561 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2562 vlan->vlan_ip, vlan_id); 2563 continue; 2564 } 2565 2566 if (net_ratelimit()) { 2567 printk(KERN_WARNING DRV_NAME 2568 ": %s: no path to arp_ip_target %u.%u.%u.%u via rt.dev %s\n", 2569 bond->dev->name, NIPQUAD(fl.fl4_dst), 2570 rt->u.dst.dev ? rt->u.dst.dev->name : "NULL"); 2571 } 2572 ip_rt_put(rt); 2573 } 2574 } 2575 2576 /* 2577 * Kick out a gratuitous ARP for an IP on the bonding master plus one 2578 * for each VLAN above us. 2579 */ 2580 static void bond_send_gratuitous_arp(struct bonding *bond) 2581 { 2582 struct slave *slave = bond->curr_active_slave; 2583 struct vlan_entry *vlan; 2584 struct net_device *vlan_dev; 2585 2586 dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name, 2587 slave ? slave->dev->name : "NULL"); 2588 if (!slave) 2589 return; 2590 2591 if (bond->master_ip) { 2592 bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, 2593 bond->master_ip, 0); 2594 } 2595 2596 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 2597 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 2598 if (vlan->vlan_ip) { 2599 bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip, 2600 vlan->vlan_ip, vlan->vlan_id); 2601 } 2602 } 2603 } 2604 2605 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip) 2606 { 2607 int i; 2608 __be32 *targets = bond->params.arp_targets; 2609 2610 targets = bond->params.arp_targets; 2611 for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { 2612 dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] " 2613 "%u.%u.%u.%u bhti(tip) %d\n", 2614 NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]), 2615 bond_has_this_ip(bond, tip)); 2616 if (sip == targets[i]) { 2617 if (bond_has_this_ip(bond, tip)) 2618 slave->last_arp_rx = jiffies; 2619 return; 2620 } 2621 } 2622 } 2623 2624 static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 2625 { 2626 struct arphdr *arp; 2627 struct slave *slave; 2628 struct bonding *bond; 2629 unsigned char *arp_ptr; 2630 __be32 sip, tip; 2631 2632 if (dev_net(dev) != &init_net) 2633 goto out; 2634 2635 if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) 2636 goto out; 2637 2638 bond = dev->priv; 2639 read_lock(&bond->lock); 2640 2641 dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n", 2642 bond->dev->name, skb->dev ? skb->dev->name : "NULL", 2643 orig_dev ? orig_dev->name : "NULL"); 2644 2645 slave = bond_get_slave_by_dev(bond, orig_dev); 2646 if (!slave || !slave_do_arp_validate(bond, slave)) 2647 goto out_unlock; 2648 2649 if (!pskb_may_pull(skb, arp_hdr_len(dev))) 2650 goto out_unlock; 2651 2652 arp = arp_hdr(skb); 2653 if (arp->ar_hln != dev->addr_len || 2654 skb->pkt_type == PACKET_OTHERHOST || 2655 skb->pkt_type == PACKET_LOOPBACK || 2656 arp->ar_hrd != htons(ARPHRD_ETHER) || 2657 arp->ar_pro != htons(ETH_P_IP) || 2658 arp->ar_pln != 4) 2659 goto out_unlock; 2660 2661 arp_ptr = (unsigned char *)(arp + 1); 2662 arp_ptr += dev->addr_len; 2663 memcpy(&sip, arp_ptr, 4); 2664 arp_ptr += 4 + dev->addr_len; 2665 memcpy(&tip, arp_ptr, 4); 2666 2667 dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u" 2668 " tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name, 2669 slave->state, bond->params.arp_validate, 2670 slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip)); 2671 2672 /* 2673 * Backup slaves won't see the ARP reply, but do come through 2674 * here for each ARP probe (so we swap the sip/tip to validate 2675 * the probe). In a "redundant switch, common router" type of 2676 * configuration, the ARP probe will (hopefully) travel from 2677 * the active, through one switch, the router, then the other 2678 * switch before reaching the backup. 2679 */ 2680 if (slave->state == BOND_STATE_ACTIVE) 2681 bond_validate_arp(bond, slave, sip, tip); 2682 else 2683 bond_validate_arp(bond, slave, tip, sip); 2684 2685 out_unlock: 2686 read_unlock(&bond->lock); 2687 out: 2688 dev_kfree_skb(skb); 2689 return NET_RX_SUCCESS; 2690 } 2691 2692 /* 2693 * this function is called regularly to monitor each slave's link 2694 * ensuring that traffic is being sent and received when arp monitoring 2695 * is used in load-balancing mode. if the adapter has been dormant, then an 2696 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 2697 * arp monitoring in active backup mode. 2698 */ 2699 void bond_loadbalance_arp_mon(struct work_struct *work) 2700 { 2701 struct bonding *bond = container_of(work, struct bonding, 2702 arp_work.work); 2703 struct slave *slave, *oldcurrent; 2704 int do_failover = 0; 2705 int delta_in_ticks; 2706 int i; 2707 2708 read_lock(&bond->lock); 2709 2710 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2711 2712 if (bond->kill_timers) { 2713 goto out; 2714 } 2715 2716 if (bond->slave_cnt == 0) { 2717 goto re_arm; 2718 } 2719 2720 read_lock(&bond->curr_slave_lock); 2721 oldcurrent = bond->curr_active_slave; 2722 read_unlock(&bond->curr_slave_lock); 2723 2724 /* see if any of the previous devices are up now (i.e. they have 2725 * xmt and rcv traffic). the curr_active_slave does not come into 2726 * the picture unless it is null. also, slave->jiffies is not needed 2727 * here because we send an arp on each slave and give a slave as 2728 * long as it needs to get the tx/rx within the delta. 2729 * TODO: what about up/down delay in arp mode? it wasn't here before 2730 * so it can wait 2731 */ 2732 bond_for_each_slave(bond, slave, i) { 2733 if (slave->link != BOND_LINK_UP) { 2734 if (time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks) && 2735 time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) { 2736 2737 slave->link = BOND_LINK_UP; 2738 slave->state = BOND_STATE_ACTIVE; 2739 2740 /* primary_slave has no meaning in round-robin 2741 * mode. the window of a slave being up and 2742 * curr_active_slave being null after enslaving 2743 * is closed. 2744 */ 2745 if (!oldcurrent) { 2746 printk(KERN_INFO DRV_NAME 2747 ": %s: link status definitely " 2748 "up for interface %s, ", 2749 bond->dev->name, 2750 slave->dev->name); 2751 do_failover = 1; 2752 } else { 2753 printk(KERN_INFO DRV_NAME 2754 ": %s: interface %s is now up\n", 2755 bond->dev->name, 2756 slave->dev->name); 2757 } 2758 } 2759 } else { 2760 /* slave->link == BOND_LINK_UP */ 2761 2762 /* not all switches will respond to an arp request 2763 * when the source ip is 0, so don't take the link down 2764 * if we don't know our ip yet 2765 */ 2766 if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || 2767 (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks) && 2768 bond_has_ip(bond))) { 2769 2770 slave->link = BOND_LINK_DOWN; 2771 slave->state = BOND_STATE_BACKUP; 2772 2773 if (slave->link_failure_count < UINT_MAX) { 2774 slave->link_failure_count++; 2775 } 2776 2777 printk(KERN_INFO DRV_NAME 2778 ": %s: interface %s is now down.\n", 2779 bond->dev->name, 2780 slave->dev->name); 2781 2782 if (slave == oldcurrent) { 2783 do_failover = 1; 2784 } 2785 } 2786 } 2787 2788 /* note: if switch is in round-robin mode, all links 2789 * must tx arp to ensure all links rx an arp - otherwise 2790 * links may oscillate or not come up at all; if switch is 2791 * in something like xor mode, there is nothing we can 2792 * do - all replies will be rx'ed on same link causing slaves 2793 * to be unstable during low/no traffic periods 2794 */ 2795 if (IS_UP(slave->dev)) { 2796 bond_arp_send_all(bond, slave); 2797 } 2798 } 2799 2800 if (do_failover) { 2801 write_lock_bh(&bond->curr_slave_lock); 2802 2803 bond_select_active_slave(bond); 2804 2805 write_unlock_bh(&bond->curr_slave_lock); 2806 } 2807 2808 re_arm: 2809 if (bond->params.arp_interval) 2810 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 2811 out: 2812 read_unlock(&bond->lock); 2813 } 2814 2815 /* 2816 * When using arp monitoring in active-backup mode, this function is 2817 * called to determine if any backup slaves have went down or a new 2818 * current slave needs to be found. 2819 * The backup slaves never generate traffic, they are considered up by merely 2820 * receiving traffic. If the current slave goes down, each backup slave will 2821 * be given the opportunity to tx/rx an arp before being taken down - this 2822 * prevents all slaves from being taken down due to the current slave not 2823 * sending any traffic for the backups to receive. The arps are not necessarily 2824 * necessary, any tx and rx traffic will keep the current slave up. While any 2825 * rx traffic will keep the backup slaves up, the current slave is responsible 2826 * for generating traffic to keep them up regardless of any other traffic they 2827 * may have received. 2828 * see loadbalance_arp_monitor for arp monitoring in load balancing mode 2829 */ 2830 void bond_activebackup_arp_mon(struct work_struct *work) 2831 { 2832 struct bonding *bond = container_of(work, struct bonding, 2833 arp_work.work); 2834 struct slave *slave; 2835 int delta_in_ticks; 2836 int i; 2837 2838 read_lock(&bond->lock); 2839 2840 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2841 2842 if (bond->kill_timers) { 2843 goto out; 2844 } 2845 2846 if (bond->slave_cnt == 0) { 2847 goto re_arm; 2848 } 2849 2850 /* determine if any slave has come up or any backup slave has 2851 * gone down 2852 * TODO: what about up/down delay in arp mode? it wasn't here before 2853 * so it can wait 2854 */ 2855 bond_for_each_slave(bond, slave, i) { 2856 if (slave->link != BOND_LINK_UP) { 2857 if (time_before_eq(jiffies, 2858 slave_last_rx(bond, slave) + delta_in_ticks)) { 2859 2860 slave->link = BOND_LINK_UP; 2861 2862 write_lock_bh(&bond->curr_slave_lock); 2863 2864 if ((!bond->curr_active_slave) && 2865 time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks)) { 2866 bond_change_active_slave(bond, slave); 2867 bond->current_arp_slave = NULL; 2868 } else if (bond->curr_active_slave != slave) { 2869 /* this slave has just come up but we 2870 * already have a current slave; this 2871 * can also happen if bond_enslave adds 2872 * a new slave that is up while we are 2873 * searching for a new slave 2874 */ 2875 bond_set_slave_inactive_flags(slave); 2876 bond->current_arp_slave = NULL; 2877 } 2878 2879 bond_set_carrier(bond); 2880 2881 if (slave == bond->curr_active_slave) { 2882 printk(KERN_INFO DRV_NAME 2883 ": %s: %s is up and now the " 2884 "active interface\n", 2885 bond->dev->name, 2886 slave->dev->name); 2887 netif_carrier_on(bond->dev); 2888 } else { 2889 printk(KERN_INFO DRV_NAME 2890 ": %s: backup interface %s is " 2891 "now up\n", 2892 bond->dev->name, 2893 slave->dev->name); 2894 } 2895 2896 write_unlock_bh(&bond->curr_slave_lock); 2897 } 2898 } else { 2899 read_lock(&bond->curr_slave_lock); 2900 2901 if ((slave != bond->curr_active_slave) && 2902 (!bond->current_arp_slave) && 2903 (time_after_eq(jiffies, slave_last_rx(bond, slave) + 3*delta_in_ticks) && 2904 bond_has_ip(bond))) { 2905 /* a backup slave has gone down; three times 2906 * the delta allows the current slave to be 2907 * taken out before the backup slave. 2908 * note: a non-null current_arp_slave indicates 2909 * the curr_active_slave went down and we are 2910 * searching for a new one; under this 2911 * condition we only take the curr_active_slave 2912 * down - this gives each slave a chance to 2913 * tx/rx traffic before being taken out 2914 */ 2915 2916 read_unlock(&bond->curr_slave_lock); 2917 2918 slave->link = BOND_LINK_DOWN; 2919 2920 if (slave->link_failure_count < UINT_MAX) { 2921 slave->link_failure_count++; 2922 } 2923 2924 bond_set_slave_inactive_flags(slave); 2925 2926 printk(KERN_INFO DRV_NAME 2927 ": %s: backup interface %s is now down\n", 2928 bond->dev->name, 2929 slave->dev->name); 2930 } else { 2931 read_unlock(&bond->curr_slave_lock); 2932 } 2933 } 2934 } 2935 2936 read_lock(&bond->curr_slave_lock); 2937 slave = bond->curr_active_slave; 2938 read_unlock(&bond->curr_slave_lock); 2939 2940 if (slave) { 2941 /* if we have sent traffic in the past 2*arp_intervals but 2942 * haven't xmit and rx traffic in that time interval, select 2943 * a different slave. slave->jiffies is only updated when 2944 * a slave first becomes the curr_active_slave - not necessarily 2945 * after every arp; this ensures the slave has a full 2*delta 2946 * before being taken out. if a primary is being used, check 2947 * if it is up and needs to take over as the curr_active_slave 2948 */ 2949 if ((time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || 2950 (time_after_eq(jiffies, slave_last_rx(bond, slave) + 2*delta_in_ticks) && 2951 bond_has_ip(bond))) && 2952 time_after_eq(jiffies, slave->jiffies + 2*delta_in_ticks)) { 2953 2954 slave->link = BOND_LINK_DOWN; 2955 2956 if (slave->link_failure_count < UINT_MAX) { 2957 slave->link_failure_count++; 2958 } 2959 2960 printk(KERN_INFO DRV_NAME 2961 ": %s: link status down for active interface " 2962 "%s, disabling it\n", 2963 bond->dev->name, 2964 slave->dev->name); 2965 2966 write_lock_bh(&bond->curr_slave_lock); 2967 2968 bond_select_active_slave(bond); 2969 slave = bond->curr_active_slave; 2970 2971 write_unlock_bh(&bond->curr_slave_lock); 2972 2973 bond->current_arp_slave = slave; 2974 2975 if (slave) { 2976 slave->jiffies = jiffies; 2977 } 2978 } else if ((bond->primary_slave) && 2979 (bond->primary_slave != slave) && 2980 (bond->primary_slave->link == BOND_LINK_UP)) { 2981 /* at this point, slave is the curr_active_slave */ 2982 printk(KERN_INFO DRV_NAME 2983 ": %s: changing from interface %s to primary " 2984 "interface %s\n", 2985 bond->dev->name, 2986 slave->dev->name, 2987 bond->primary_slave->dev->name); 2988 2989 /* primary is up so switch to it */ 2990 write_lock_bh(&bond->curr_slave_lock); 2991 bond_change_active_slave(bond, bond->primary_slave); 2992 write_unlock_bh(&bond->curr_slave_lock); 2993 2994 slave = bond->primary_slave; 2995 slave->jiffies = jiffies; 2996 } else { 2997 bond->current_arp_slave = NULL; 2998 } 2999 3000 /* the current slave must tx an arp to ensure backup slaves 3001 * rx traffic 3002 */ 3003 if (slave && bond_has_ip(bond)) { 3004 bond_arp_send_all(bond, slave); 3005 } 3006 } 3007 3008 /* if we don't have a curr_active_slave, search for the next available 3009 * backup slave from the current_arp_slave and make it the candidate 3010 * for becoming the curr_active_slave 3011 */ 3012 if (!slave) { 3013 if (!bond->current_arp_slave) { 3014 bond->current_arp_slave = bond->first_slave; 3015 } 3016 3017 if (bond->current_arp_slave) { 3018 bond_set_slave_inactive_flags(bond->current_arp_slave); 3019 3020 /* search for next candidate */ 3021 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { 3022 if (IS_UP(slave->dev)) { 3023 slave->link = BOND_LINK_BACK; 3024 bond_set_slave_active_flags(slave); 3025 bond_arp_send_all(bond, slave); 3026 slave->jiffies = jiffies; 3027 bond->current_arp_slave = slave; 3028 break; 3029 } 3030 3031 /* if the link state is up at this point, we 3032 * mark it down - this can happen if we have 3033 * simultaneous link failures and 3034 * reselect_active_interface doesn't make this 3035 * one the current slave so it is still marked 3036 * up when it is actually down 3037 */ 3038 if (slave->link == BOND_LINK_UP) { 3039 slave->link = BOND_LINK_DOWN; 3040 if (slave->link_failure_count < UINT_MAX) { 3041 slave->link_failure_count++; 3042 } 3043 3044 bond_set_slave_inactive_flags(slave); 3045 3046 printk(KERN_INFO DRV_NAME 3047 ": %s: backup interface %s is " 3048 "now down.\n", 3049 bond->dev->name, 3050 slave->dev->name); 3051 } 3052 } 3053 } 3054 } 3055 3056 re_arm: 3057 if (bond->params.arp_interval) { 3058 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 3059 } 3060 out: 3061 read_unlock(&bond->lock); 3062 } 3063 3064 /*------------------------------ proc/seq_file-------------------------------*/ 3065 3066 #ifdef CONFIG_PROC_FS 3067 3068 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) 3069 { 3070 struct bonding *bond = seq->private; 3071 loff_t off = 0; 3072 struct slave *slave; 3073 int i; 3074 3075 /* make sure the bond won't be taken away */ 3076 read_lock(&dev_base_lock); 3077 read_lock(&bond->lock); 3078 3079 if (*pos == 0) { 3080 return SEQ_START_TOKEN; 3081 } 3082 3083 bond_for_each_slave(bond, slave, i) { 3084 if (++off == *pos) { 3085 return slave; 3086 } 3087 } 3088 3089 return NULL; 3090 } 3091 3092 static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3093 { 3094 struct bonding *bond = seq->private; 3095 struct slave *slave = v; 3096 3097 ++*pos; 3098 if (v == SEQ_START_TOKEN) { 3099 return bond->first_slave; 3100 } 3101 3102 slave = slave->next; 3103 3104 return (slave == bond->first_slave) ? NULL : slave; 3105 } 3106 3107 static void bond_info_seq_stop(struct seq_file *seq, void *v) 3108 { 3109 struct bonding *bond = seq->private; 3110 3111 read_unlock(&bond->lock); 3112 read_unlock(&dev_base_lock); 3113 } 3114 3115 static void bond_info_show_master(struct seq_file *seq) 3116 { 3117 struct bonding *bond = seq->private; 3118 struct slave *curr; 3119 int i; 3120 u32 target; 3121 3122 read_lock(&bond->curr_slave_lock); 3123 curr = bond->curr_active_slave; 3124 read_unlock(&bond->curr_slave_lock); 3125 3126 seq_printf(seq, "Bonding Mode: %s", 3127 bond_mode_name(bond->params.mode)); 3128 3129 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && 3130 bond->params.fail_over_mac) 3131 seq_printf(seq, " (fail_over_mac)"); 3132 3133 seq_printf(seq, "\n"); 3134 3135 if (bond->params.mode == BOND_MODE_XOR || 3136 bond->params.mode == BOND_MODE_8023AD) { 3137 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", 3138 xmit_hashtype_tbl[bond->params.xmit_policy].modename, 3139 bond->params.xmit_policy); 3140 } 3141 3142 if (USES_PRIMARY(bond->params.mode)) { 3143 seq_printf(seq, "Primary Slave: %s\n", 3144 (bond->primary_slave) ? 3145 bond->primary_slave->dev->name : "None"); 3146 3147 seq_printf(seq, "Currently Active Slave: %s\n", 3148 (curr) ? curr->dev->name : "None"); 3149 } 3150 3151 seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? 3152 "up" : "down"); 3153 seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); 3154 seq_printf(seq, "Up Delay (ms): %d\n", 3155 bond->params.updelay * bond->params.miimon); 3156 seq_printf(seq, "Down Delay (ms): %d\n", 3157 bond->params.downdelay * bond->params.miimon); 3158 3159 3160 /* ARP information */ 3161 if(bond->params.arp_interval > 0) { 3162 int printed=0; 3163 seq_printf(seq, "ARP Polling Interval (ms): %d\n", 3164 bond->params.arp_interval); 3165 3166 seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); 3167 3168 for(i = 0; (i < BOND_MAX_ARP_TARGETS) ;i++) { 3169 if (!bond->params.arp_targets[i]) 3170 continue; 3171 if (printed) 3172 seq_printf(seq, ","); 3173 target = ntohl(bond->params.arp_targets[i]); 3174 seq_printf(seq, " %d.%d.%d.%d", HIPQUAD(target)); 3175 printed = 1; 3176 } 3177 seq_printf(seq, "\n"); 3178 } 3179 3180 if (bond->params.mode == BOND_MODE_8023AD) { 3181 struct ad_info ad_info; 3182 DECLARE_MAC_BUF(mac); 3183 3184 seq_puts(seq, "\n802.3ad info\n"); 3185 seq_printf(seq, "LACP rate: %s\n", 3186 (bond->params.lacp_fast) ? "fast" : "slow"); 3187 3188 if (bond_3ad_get_active_agg_info(bond, &ad_info)) { 3189 seq_printf(seq, "bond %s has no active aggregator\n", 3190 bond->dev->name); 3191 } else { 3192 seq_printf(seq, "Active Aggregator Info:\n"); 3193 3194 seq_printf(seq, "\tAggregator ID: %d\n", 3195 ad_info.aggregator_id); 3196 seq_printf(seq, "\tNumber of ports: %d\n", 3197 ad_info.ports); 3198 seq_printf(seq, "\tActor Key: %d\n", 3199 ad_info.actor_key); 3200 seq_printf(seq, "\tPartner Key: %d\n", 3201 ad_info.partner_key); 3202 seq_printf(seq, "\tPartner Mac Address: %s\n", 3203 print_mac(mac, ad_info.partner_system)); 3204 } 3205 } 3206 } 3207 3208 static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) 3209 { 3210 struct bonding *bond = seq->private; 3211 DECLARE_MAC_BUF(mac); 3212 3213 seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); 3214 seq_printf(seq, "MII Status: %s\n", 3215 (slave->link == BOND_LINK_UP) ? "up" : "down"); 3216 seq_printf(seq, "Link Failure Count: %u\n", 3217 slave->link_failure_count); 3218 3219 seq_printf(seq, 3220 "Permanent HW addr: %s\n", 3221 print_mac(mac, slave->perm_hwaddr)); 3222 3223 if (bond->params.mode == BOND_MODE_8023AD) { 3224 const struct aggregator *agg 3225 = SLAVE_AD_INFO(slave).port.aggregator; 3226 3227 if (agg) { 3228 seq_printf(seq, "Aggregator ID: %d\n", 3229 agg->aggregator_identifier); 3230 } else { 3231 seq_puts(seq, "Aggregator ID: N/A\n"); 3232 } 3233 } 3234 } 3235 3236 static int bond_info_seq_show(struct seq_file *seq, void *v) 3237 { 3238 if (v == SEQ_START_TOKEN) { 3239 seq_printf(seq, "%s\n", version); 3240 bond_info_show_master(seq); 3241 } else { 3242 bond_info_show_slave(seq, v); 3243 } 3244 3245 return 0; 3246 } 3247 3248 static struct seq_operations bond_info_seq_ops = { 3249 .start = bond_info_seq_start, 3250 .next = bond_info_seq_next, 3251 .stop = bond_info_seq_stop, 3252 .show = bond_info_seq_show, 3253 }; 3254 3255 static int bond_info_open(struct inode *inode, struct file *file) 3256 { 3257 struct seq_file *seq; 3258 struct proc_dir_entry *proc; 3259 int res; 3260 3261 res = seq_open(file, &bond_info_seq_ops); 3262 if (!res) { 3263 /* recover the pointer buried in proc_dir_entry data */ 3264 seq = file->private_data; 3265 proc = PDE(inode); 3266 seq->private = proc->data; 3267 } 3268 3269 return res; 3270 } 3271 3272 static const struct file_operations bond_info_fops = { 3273 .owner = THIS_MODULE, 3274 .open = bond_info_open, 3275 .read = seq_read, 3276 .llseek = seq_lseek, 3277 .release = seq_release, 3278 }; 3279 3280 static int bond_create_proc_entry(struct bonding *bond) 3281 { 3282 struct net_device *bond_dev = bond->dev; 3283 3284 if (bond_proc_dir) { 3285 bond->proc_entry = proc_create_data(bond_dev->name, 3286 S_IRUGO, bond_proc_dir, 3287 &bond_info_fops, bond); 3288 if (bond->proc_entry == NULL) { 3289 printk(KERN_WARNING DRV_NAME 3290 ": Warning: Cannot create /proc/net/%s/%s\n", 3291 DRV_NAME, bond_dev->name); 3292 } else { 3293 memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); 3294 } 3295 } 3296 3297 return 0; 3298 } 3299 3300 static void bond_remove_proc_entry(struct bonding *bond) 3301 { 3302 if (bond_proc_dir && bond->proc_entry) { 3303 remove_proc_entry(bond->proc_file_name, bond_proc_dir); 3304 memset(bond->proc_file_name, 0, IFNAMSIZ); 3305 bond->proc_entry = NULL; 3306 } 3307 } 3308 3309 /* Create the bonding directory under /proc/net, if doesn't exist yet. 3310 * Caller must hold rtnl_lock. 3311 */ 3312 static void bond_create_proc_dir(void) 3313 { 3314 int len = strlen(DRV_NAME); 3315 3316 for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir; 3317 bond_proc_dir = bond_proc_dir->next) { 3318 if ((bond_proc_dir->namelen == len) && 3319 !memcmp(bond_proc_dir->name, DRV_NAME, len)) { 3320 break; 3321 } 3322 } 3323 3324 if (!bond_proc_dir) { 3325 bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net); 3326 if (bond_proc_dir) { 3327 bond_proc_dir->owner = THIS_MODULE; 3328 } else { 3329 printk(KERN_WARNING DRV_NAME 3330 ": Warning: cannot create /proc/net/%s\n", 3331 DRV_NAME); 3332 } 3333 } 3334 } 3335 3336 /* Destroy the bonding directory under /proc/net, if empty. 3337 * Caller must hold rtnl_lock. 3338 */ 3339 static void bond_destroy_proc_dir(void) 3340 { 3341 struct proc_dir_entry *de; 3342 3343 if (!bond_proc_dir) { 3344 return; 3345 } 3346 3347 /* verify that the /proc dir is empty */ 3348 for (de = bond_proc_dir->subdir; de; de = de->next) { 3349 /* ignore . and .. */ 3350 if (*(de->name) != '.') { 3351 break; 3352 } 3353 } 3354 3355 if (de) { 3356 if (bond_proc_dir->owner == THIS_MODULE) { 3357 bond_proc_dir->owner = NULL; 3358 } 3359 } else { 3360 remove_proc_entry(DRV_NAME, init_net.proc_net); 3361 bond_proc_dir = NULL; 3362 } 3363 } 3364 #endif /* CONFIG_PROC_FS */ 3365 3366 /*-------------------------- netdev event handling --------------------------*/ 3367 3368 /* 3369 * Change device name 3370 */ 3371 static int bond_event_changename(struct bonding *bond) 3372 { 3373 #ifdef CONFIG_PROC_FS 3374 bond_remove_proc_entry(bond); 3375 bond_create_proc_entry(bond); 3376 #endif 3377 down_write(&(bonding_rwsem)); 3378 bond_destroy_sysfs_entry(bond); 3379 bond_create_sysfs_entry(bond); 3380 up_write(&(bonding_rwsem)); 3381 return NOTIFY_DONE; 3382 } 3383 3384 static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev) 3385 { 3386 struct bonding *event_bond = bond_dev->priv; 3387 3388 switch (event) { 3389 case NETDEV_CHANGENAME: 3390 return bond_event_changename(event_bond); 3391 case NETDEV_UNREGISTER: 3392 bond_release_all(event_bond->dev); 3393 break; 3394 default: 3395 break; 3396 } 3397 3398 return NOTIFY_DONE; 3399 } 3400 3401 static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) 3402 { 3403 struct net_device *bond_dev = slave_dev->master; 3404 struct bonding *bond = bond_dev->priv; 3405 3406 switch (event) { 3407 case NETDEV_UNREGISTER: 3408 if (bond_dev) { 3409 if (bond->setup_by_slave) 3410 bond_release_and_destroy(bond_dev, slave_dev); 3411 else 3412 bond_release(bond_dev, slave_dev); 3413 } 3414 break; 3415 case NETDEV_CHANGE: 3416 /* 3417 * TODO: is this what we get if somebody 3418 * sets up a hierarchical bond, then rmmod's 3419 * one of the slave bonding devices? 3420 */ 3421 break; 3422 case NETDEV_DOWN: 3423 /* 3424 * ... Or is it this? 3425 */ 3426 break; 3427 case NETDEV_CHANGEMTU: 3428 /* 3429 * TODO: Should slaves be allowed to 3430 * independently alter their MTU? For 3431 * an active-backup bond, slaves need 3432 * not be the same type of device, so 3433 * MTUs may vary. For other modes, 3434 * slaves arguably should have the 3435 * same MTUs. To do this, we'd need to 3436 * take over the slave's change_mtu 3437 * function for the duration of their 3438 * servitude. 3439 */ 3440 break; 3441 case NETDEV_CHANGENAME: 3442 /* 3443 * TODO: handle changing the primary's name 3444 */ 3445 break; 3446 case NETDEV_FEAT_CHANGE: 3447 bond_compute_features(bond); 3448 break; 3449 default: 3450 break; 3451 } 3452 3453 return NOTIFY_DONE; 3454 } 3455 3456 /* 3457 * bond_netdev_event: handle netdev notifier chain events. 3458 * 3459 * This function receives events for the netdev chain. The caller (an 3460 * ioctl handler calling blocking_notifier_call_chain) holds the necessary 3461 * locks for us to safely manipulate the slave devices (RTNL lock, 3462 * dev_probe_lock). 3463 */ 3464 static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 3465 { 3466 struct net_device *event_dev = (struct net_device *)ptr; 3467 3468 if (dev_net(event_dev) != &init_net) 3469 return NOTIFY_DONE; 3470 3471 dprintk("event_dev: %s, event: %lx\n", 3472 (event_dev ? event_dev->name : "None"), 3473 event); 3474 3475 if (!(event_dev->priv_flags & IFF_BONDING)) 3476 return NOTIFY_DONE; 3477 3478 if (event_dev->flags & IFF_MASTER) { 3479 dprintk("IFF_MASTER\n"); 3480 return bond_master_netdev_event(event, event_dev); 3481 } 3482 3483 if (event_dev->flags & IFF_SLAVE) { 3484 dprintk("IFF_SLAVE\n"); 3485 return bond_slave_netdev_event(event, event_dev); 3486 } 3487 3488 return NOTIFY_DONE; 3489 } 3490 3491 /* 3492 * bond_inetaddr_event: handle inetaddr notifier chain events. 3493 * 3494 * We keep track of device IPs primarily to use as source addresses in 3495 * ARP monitor probes (rather than spewing out broadcasts all the time). 3496 * 3497 * We track one IP for the main device (if it has one), plus one per VLAN. 3498 */ 3499 static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 3500 { 3501 struct in_ifaddr *ifa = ptr; 3502 struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; 3503 struct bonding *bond, *bond_next; 3504 struct vlan_entry *vlan, *vlan_next; 3505 3506 if (dev_net(ifa->ifa_dev->dev) != &init_net) 3507 return NOTIFY_DONE; 3508 3509 list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { 3510 if (bond->dev == event_dev) { 3511 switch (event) { 3512 case NETDEV_UP: 3513 bond->master_ip = ifa->ifa_local; 3514 return NOTIFY_OK; 3515 case NETDEV_DOWN: 3516 bond->master_ip = bond_glean_dev_ip(bond->dev); 3517 return NOTIFY_OK; 3518 default: 3519 return NOTIFY_DONE; 3520 } 3521 } 3522 3523 if (list_empty(&bond->vlan_list)) 3524 continue; 3525 3526 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 3527 vlan_list) { 3528 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 3529 if (vlan_dev == event_dev) { 3530 switch (event) { 3531 case NETDEV_UP: 3532 vlan->vlan_ip = ifa->ifa_local; 3533 return NOTIFY_OK; 3534 case NETDEV_DOWN: 3535 vlan->vlan_ip = 3536 bond_glean_dev_ip(vlan_dev); 3537 return NOTIFY_OK; 3538 default: 3539 return NOTIFY_DONE; 3540 } 3541 } 3542 } 3543 } 3544 return NOTIFY_DONE; 3545 } 3546 3547 static struct notifier_block bond_netdev_notifier = { 3548 .notifier_call = bond_netdev_event, 3549 }; 3550 3551 static struct notifier_block bond_inetaddr_notifier = { 3552 .notifier_call = bond_inetaddr_event, 3553 }; 3554 3555 /*-------------------------- Packet type handling ---------------------------*/ 3556 3557 /* register to receive lacpdus on a bond */ 3558 static void bond_register_lacpdu(struct bonding *bond) 3559 { 3560 struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); 3561 3562 /* initialize packet type */ 3563 pk_type->type = PKT_TYPE_LACPDU; 3564 pk_type->dev = bond->dev; 3565 pk_type->func = bond_3ad_lacpdu_recv; 3566 3567 dev_add_pack(pk_type); 3568 } 3569 3570 /* unregister to receive lacpdus on a bond */ 3571 static void bond_unregister_lacpdu(struct bonding *bond) 3572 { 3573 dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); 3574 } 3575 3576 void bond_register_arp(struct bonding *bond) 3577 { 3578 struct packet_type *pt = &bond->arp_mon_pt; 3579 3580 if (pt->type) 3581 return; 3582 3583 pt->type = htons(ETH_P_ARP); 3584 pt->dev = bond->dev; 3585 pt->func = bond_arp_rcv; 3586 dev_add_pack(pt); 3587 } 3588 3589 void bond_unregister_arp(struct bonding *bond) 3590 { 3591 struct packet_type *pt = &bond->arp_mon_pt; 3592 3593 dev_remove_pack(pt); 3594 pt->type = 0; 3595 } 3596 3597 /*---------------------------- Hashing Policies -----------------------------*/ 3598 3599 /* 3600 * Hash for the output device based upon layer 2 and layer 3 data. If 3601 * the packet is not IP mimic bond_xmit_hash_policy_l2() 3602 */ 3603 static int bond_xmit_hash_policy_l23(struct sk_buff *skb, 3604 struct net_device *bond_dev, int count) 3605 { 3606 struct ethhdr *data = (struct ethhdr *)skb->data; 3607 struct iphdr *iph = ip_hdr(skb); 3608 3609 if (skb->protocol == __constant_htons(ETH_P_IP)) { 3610 return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ 3611 (data->h_dest[5] ^ bond_dev->dev_addr[5])) % count; 3612 } 3613 3614 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3615 } 3616 3617 /* 3618 * Hash for the output device based upon layer 3 and layer 4 data. If 3619 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is 3620 * altogether not IP, mimic bond_xmit_hash_policy_l2() 3621 */ 3622 static int bond_xmit_hash_policy_l34(struct sk_buff *skb, 3623 struct net_device *bond_dev, int count) 3624 { 3625 struct ethhdr *data = (struct ethhdr *)skb->data; 3626 struct iphdr *iph = ip_hdr(skb); 3627 __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); 3628 int layer4_xor = 0; 3629 3630 if (skb->protocol == __constant_htons(ETH_P_IP)) { 3631 if (!(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) && 3632 (iph->protocol == IPPROTO_TCP || 3633 iph->protocol == IPPROTO_UDP)) { 3634 layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1))); 3635 } 3636 return (layer4_xor ^ 3637 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; 3638 3639 } 3640 3641 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3642 } 3643 3644 /* 3645 * Hash for the output device based upon layer 2 data 3646 */ 3647 static int bond_xmit_hash_policy_l2(struct sk_buff *skb, 3648 struct net_device *bond_dev, int count) 3649 { 3650 struct ethhdr *data = (struct ethhdr *)skb->data; 3651 3652 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3653 } 3654 3655 /*-------------------------- Device entry points ----------------------------*/ 3656 3657 static int bond_open(struct net_device *bond_dev) 3658 { 3659 struct bonding *bond = bond_dev->priv; 3660 3661 bond->kill_timers = 0; 3662 3663 if ((bond->params.mode == BOND_MODE_TLB) || 3664 (bond->params.mode == BOND_MODE_ALB)) { 3665 /* bond_alb_initialize must be called before the timer 3666 * is started. 3667 */ 3668 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { 3669 /* something went wrong - fail the open operation */ 3670 return -1; 3671 } 3672 3673 INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); 3674 queue_delayed_work(bond->wq, &bond->alb_work, 0); 3675 } 3676 3677 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3678 INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); 3679 queue_delayed_work(bond->wq, &bond->mii_work, 0); 3680 } 3681 3682 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3683 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) 3684 INIT_DELAYED_WORK(&bond->arp_work, 3685 bond_activebackup_arp_mon); 3686 else 3687 INIT_DELAYED_WORK(&bond->arp_work, 3688 bond_loadbalance_arp_mon); 3689 3690 queue_delayed_work(bond->wq, &bond->arp_work, 0); 3691 if (bond->params.arp_validate) 3692 bond_register_arp(bond); 3693 } 3694 3695 if (bond->params.mode == BOND_MODE_8023AD) { 3696 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); 3697 queue_delayed_work(bond->wq, &bond->ad_work, 0); 3698 /* register to receive LACPDUs */ 3699 bond_register_lacpdu(bond); 3700 } 3701 3702 return 0; 3703 } 3704 3705 static int bond_close(struct net_device *bond_dev) 3706 { 3707 struct bonding *bond = bond_dev->priv; 3708 3709 if (bond->params.mode == BOND_MODE_8023AD) { 3710 /* Unregister the receive of LACPDUs */ 3711 bond_unregister_lacpdu(bond); 3712 } 3713 3714 if (bond->params.arp_validate) 3715 bond_unregister_arp(bond); 3716 3717 write_lock_bh(&bond->lock); 3718 3719 3720 /* signal timers not to re-arm */ 3721 bond->kill_timers = 1; 3722 3723 write_unlock_bh(&bond->lock); 3724 3725 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3726 cancel_delayed_work(&bond->mii_work); 3727 } 3728 3729 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3730 cancel_delayed_work(&bond->arp_work); 3731 } 3732 3733 switch (bond->params.mode) { 3734 case BOND_MODE_8023AD: 3735 cancel_delayed_work(&bond->ad_work); 3736 break; 3737 case BOND_MODE_TLB: 3738 case BOND_MODE_ALB: 3739 cancel_delayed_work(&bond->alb_work); 3740 break; 3741 default: 3742 break; 3743 } 3744 3745 3746 if ((bond->params.mode == BOND_MODE_TLB) || 3747 (bond->params.mode == BOND_MODE_ALB)) { 3748 /* Must be called only after all 3749 * slaves have been released 3750 */ 3751 bond_alb_deinitialize(bond); 3752 } 3753 3754 return 0; 3755 } 3756 3757 static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) 3758 { 3759 struct bonding *bond = bond_dev->priv; 3760 struct net_device_stats *stats = &(bond->stats), *sstats; 3761 struct net_device_stats local_stats; 3762 struct slave *slave; 3763 int i; 3764 3765 memset(&local_stats, 0, sizeof(struct net_device_stats)); 3766 3767 read_lock_bh(&bond->lock); 3768 3769 bond_for_each_slave(bond, slave, i) { 3770 sstats = slave->dev->get_stats(slave->dev); 3771 local_stats.rx_packets += sstats->rx_packets; 3772 local_stats.rx_bytes += sstats->rx_bytes; 3773 local_stats.rx_errors += sstats->rx_errors; 3774 local_stats.rx_dropped += sstats->rx_dropped; 3775 3776 local_stats.tx_packets += sstats->tx_packets; 3777 local_stats.tx_bytes += sstats->tx_bytes; 3778 local_stats.tx_errors += sstats->tx_errors; 3779 local_stats.tx_dropped += sstats->tx_dropped; 3780 3781 local_stats.multicast += sstats->multicast; 3782 local_stats.collisions += sstats->collisions; 3783 3784 local_stats.rx_length_errors += sstats->rx_length_errors; 3785 local_stats.rx_over_errors += sstats->rx_over_errors; 3786 local_stats.rx_crc_errors += sstats->rx_crc_errors; 3787 local_stats.rx_frame_errors += sstats->rx_frame_errors; 3788 local_stats.rx_fifo_errors += sstats->rx_fifo_errors; 3789 local_stats.rx_missed_errors += sstats->rx_missed_errors; 3790 3791 local_stats.tx_aborted_errors += sstats->tx_aborted_errors; 3792 local_stats.tx_carrier_errors += sstats->tx_carrier_errors; 3793 local_stats.tx_fifo_errors += sstats->tx_fifo_errors; 3794 local_stats.tx_heartbeat_errors += sstats->tx_heartbeat_errors; 3795 local_stats.tx_window_errors += sstats->tx_window_errors; 3796 } 3797 3798 memcpy(stats, &local_stats, sizeof(struct net_device_stats)); 3799 3800 read_unlock_bh(&bond->lock); 3801 3802 return stats; 3803 } 3804 3805 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 3806 { 3807 struct net_device *slave_dev = NULL; 3808 struct ifbond k_binfo; 3809 struct ifbond __user *u_binfo = NULL; 3810 struct ifslave k_sinfo; 3811 struct ifslave __user *u_sinfo = NULL; 3812 struct mii_ioctl_data *mii = NULL; 3813 int res = 0; 3814 3815 dprintk("bond_ioctl: master=%s, cmd=%d\n", 3816 bond_dev->name, cmd); 3817 3818 switch (cmd) { 3819 case SIOCGMIIPHY: 3820 mii = if_mii(ifr); 3821 if (!mii) { 3822 return -EINVAL; 3823 } 3824 mii->phy_id = 0; 3825 /* Fall Through */ 3826 case SIOCGMIIREG: 3827 /* 3828 * We do this again just in case we were called by SIOCGMIIREG 3829 * instead of SIOCGMIIPHY. 3830 */ 3831 mii = if_mii(ifr); 3832 if (!mii) { 3833 return -EINVAL; 3834 } 3835 3836 if (mii->reg_num == 1) { 3837 struct bonding *bond = bond_dev->priv; 3838 mii->val_out = 0; 3839 read_lock(&bond->lock); 3840 read_lock(&bond->curr_slave_lock); 3841 if (netif_carrier_ok(bond->dev)) { 3842 mii->val_out = BMSR_LSTATUS; 3843 } 3844 read_unlock(&bond->curr_slave_lock); 3845 read_unlock(&bond->lock); 3846 } 3847 3848 return 0; 3849 case BOND_INFO_QUERY_OLD: 3850 case SIOCBONDINFOQUERY: 3851 u_binfo = (struct ifbond __user *)ifr->ifr_data; 3852 3853 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { 3854 return -EFAULT; 3855 } 3856 3857 res = bond_info_query(bond_dev, &k_binfo); 3858 if (res == 0) { 3859 if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { 3860 return -EFAULT; 3861 } 3862 } 3863 3864 return res; 3865 case BOND_SLAVE_INFO_QUERY_OLD: 3866 case SIOCBONDSLAVEINFOQUERY: 3867 u_sinfo = (struct ifslave __user *)ifr->ifr_data; 3868 3869 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { 3870 return -EFAULT; 3871 } 3872 3873 res = bond_slave_info_query(bond_dev, &k_sinfo); 3874 if (res == 0) { 3875 if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { 3876 return -EFAULT; 3877 } 3878 } 3879 3880 return res; 3881 default: 3882 /* Go on */ 3883 break; 3884 } 3885 3886 if (!capable(CAP_NET_ADMIN)) { 3887 return -EPERM; 3888 } 3889 3890 down_write(&(bonding_rwsem)); 3891 slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave); 3892 3893 dprintk("slave_dev=%p: \n", slave_dev); 3894 3895 if (!slave_dev) { 3896 res = -ENODEV; 3897 } else { 3898 dprintk("slave_dev->name=%s: \n", slave_dev->name); 3899 switch (cmd) { 3900 case BOND_ENSLAVE_OLD: 3901 case SIOCBONDENSLAVE: 3902 res = bond_enslave(bond_dev, slave_dev); 3903 break; 3904 case BOND_RELEASE_OLD: 3905 case SIOCBONDRELEASE: 3906 res = bond_release(bond_dev, slave_dev); 3907 break; 3908 case BOND_SETHWADDR_OLD: 3909 case SIOCBONDSETHWADDR: 3910 res = bond_sethwaddr(bond_dev, slave_dev); 3911 break; 3912 case BOND_CHANGE_ACTIVE_OLD: 3913 case SIOCBONDCHANGEACTIVE: 3914 res = bond_ioctl_change_active(bond_dev, slave_dev); 3915 break; 3916 default: 3917 res = -EOPNOTSUPP; 3918 } 3919 3920 dev_put(slave_dev); 3921 } 3922 3923 up_write(&(bonding_rwsem)); 3924 return res; 3925 } 3926 3927 static void bond_set_multicast_list(struct net_device *bond_dev) 3928 { 3929 struct bonding *bond = bond_dev->priv; 3930 struct dev_mc_list *dmi; 3931 3932 /* 3933 * Do promisc before checking multicast_mode 3934 */ 3935 if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) { 3936 bond_set_promiscuity(bond, 1); 3937 } 3938 3939 if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) { 3940 bond_set_promiscuity(bond, -1); 3941 } 3942 3943 /* set allmulti flag to slaves */ 3944 if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) { 3945 bond_set_allmulti(bond, 1); 3946 } 3947 3948 if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) { 3949 bond_set_allmulti(bond, -1); 3950 } 3951 3952 read_lock(&bond->lock); 3953 3954 bond->flags = bond_dev->flags; 3955 3956 /* looking for addresses to add to slaves' mc list */ 3957 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 3958 if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) { 3959 bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3960 } 3961 } 3962 3963 /* looking for addresses to delete from slaves' list */ 3964 for (dmi = bond->mc_list; dmi; dmi = dmi->next) { 3965 if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) { 3966 bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3967 } 3968 } 3969 3970 /* save master's multicast list */ 3971 bond_mc_list_destroy(bond); 3972 bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC); 3973 3974 read_unlock(&bond->lock); 3975 } 3976 3977 /* 3978 * Change the MTU of all of a master's slaves to match the master 3979 */ 3980 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 3981 { 3982 struct bonding *bond = bond_dev->priv; 3983 struct slave *slave, *stop_at; 3984 int res = 0; 3985 int i; 3986 3987 dprintk("bond=%p, name=%s, new_mtu=%d\n", bond, 3988 (bond_dev ? bond_dev->name : "None"), new_mtu); 3989 3990 /* Can't hold bond->lock with bh disabled here since 3991 * some base drivers panic. On the other hand we can't 3992 * hold bond->lock without bh disabled because we'll 3993 * deadlock. The only solution is to rely on the fact 3994 * that we're under rtnl_lock here, and the slaves 3995 * list won't change. This doesn't solve the problem 3996 * of setting the slave's MTU while it is 3997 * transmitting, but the assumption is that the base 3998 * driver can handle that. 3999 * 4000 * TODO: figure out a way to safely iterate the slaves 4001 * list, but without holding a lock around the actual 4002 * call to the base driver. 4003 */ 4004 4005 bond_for_each_slave(bond, slave, i) { 4006 dprintk("s %p s->p %p c_m %p\n", slave, 4007 slave->prev, slave->dev->change_mtu); 4008 4009 res = dev_set_mtu(slave->dev, new_mtu); 4010 4011 if (res) { 4012 /* If we failed to set the slave's mtu to the new value 4013 * we must abort the operation even in ACTIVE_BACKUP 4014 * mode, because if we allow the backup slaves to have 4015 * different mtu values than the active slave we'll 4016 * need to change their mtu when doing a failover. That 4017 * means changing their mtu from timer context, which 4018 * is probably not a good idea. 4019 */ 4020 dprintk("err %d %s\n", res, slave->dev->name); 4021 goto unwind; 4022 } 4023 } 4024 4025 bond_dev->mtu = new_mtu; 4026 4027 return 0; 4028 4029 unwind: 4030 /* unwind from head to the slave that failed */ 4031 stop_at = slave; 4032 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 4033 int tmp_res; 4034 4035 tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); 4036 if (tmp_res) { 4037 dprintk("unwind err %d dev %s\n", tmp_res, 4038 slave->dev->name); 4039 } 4040 } 4041 4042 return res; 4043 } 4044 4045 /* 4046 * Change HW address 4047 * 4048 * Note that many devices must be down to change the HW address, and 4049 * downing the master releases all slaves. We can make bonds full of 4050 * bonding devices to test this, however. 4051 */ 4052 static int bond_set_mac_address(struct net_device *bond_dev, void *addr) 4053 { 4054 struct bonding *bond = bond_dev->priv; 4055 struct sockaddr *sa = addr, tmp_sa; 4056 struct slave *slave, *stop_at; 4057 int res = 0; 4058 int i; 4059 4060 dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); 4061 4062 /* 4063 * If fail_over_mac is enabled, do nothing and return success. 4064 * Returning an error causes ifenslave to fail. 4065 */ 4066 if (bond->params.fail_over_mac) 4067 return 0; 4068 4069 if (!is_valid_ether_addr(sa->sa_data)) { 4070 return -EADDRNOTAVAIL; 4071 } 4072 4073 /* Can't hold bond->lock with bh disabled here since 4074 * some base drivers panic. On the other hand we can't 4075 * hold bond->lock without bh disabled because we'll 4076 * deadlock. The only solution is to rely on the fact 4077 * that we're under rtnl_lock here, and the slaves 4078 * list won't change. This doesn't solve the problem 4079 * of setting the slave's hw address while it is 4080 * transmitting, but the assumption is that the base 4081 * driver can handle that. 4082 * 4083 * TODO: figure out a way to safely iterate the slaves 4084 * list, but without holding a lock around the actual 4085 * call to the base driver. 4086 */ 4087 4088 bond_for_each_slave(bond, slave, i) { 4089 dprintk("slave %p %s\n", slave, slave->dev->name); 4090 4091 if (slave->dev->set_mac_address == NULL) { 4092 res = -EOPNOTSUPP; 4093 dprintk("EOPNOTSUPP %s\n", slave->dev->name); 4094 goto unwind; 4095 } 4096 4097 res = dev_set_mac_address(slave->dev, addr); 4098 if (res) { 4099 /* TODO: consider downing the slave 4100 * and retry ? 4101 * User should expect communications 4102 * breakage anyway until ARP finish 4103 * updating, so... 4104 */ 4105 dprintk("err %d %s\n", res, slave->dev->name); 4106 goto unwind; 4107 } 4108 } 4109 4110 /* success */ 4111 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 4112 return 0; 4113 4114 unwind: 4115 memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 4116 tmp_sa.sa_family = bond_dev->type; 4117 4118 /* unwind from head to the slave that failed */ 4119 stop_at = slave; 4120 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 4121 int tmp_res; 4122 4123 tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); 4124 if (tmp_res) { 4125 dprintk("unwind err %d dev %s\n", tmp_res, 4126 slave->dev->name); 4127 } 4128 } 4129 4130 return res; 4131 } 4132 4133 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) 4134 { 4135 struct bonding *bond = bond_dev->priv; 4136 struct slave *slave, *start_at; 4137 int i, slave_no, res = 1; 4138 4139 read_lock(&bond->lock); 4140 4141 if (!BOND_IS_OK(bond)) { 4142 goto out; 4143 } 4144 4145 /* 4146 * Concurrent TX may collide on rr_tx_counter; we accept that 4147 * as being rare enough not to justify using an atomic op here 4148 */ 4149 slave_no = bond->rr_tx_counter++ % bond->slave_cnt; 4150 4151 bond_for_each_slave(bond, slave, i) { 4152 slave_no--; 4153 if (slave_no < 0) { 4154 break; 4155 } 4156 } 4157 4158 start_at = slave; 4159 bond_for_each_slave_from(bond, slave, i, start_at) { 4160 if (IS_UP(slave->dev) && 4161 (slave->link == BOND_LINK_UP) && 4162 (slave->state == BOND_STATE_ACTIVE)) { 4163 res = bond_dev_queue_xmit(bond, skb, slave->dev); 4164 break; 4165 } 4166 } 4167 4168 out: 4169 if (res) { 4170 /* no suitable interface, frame not sent */ 4171 dev_kfree_skb(skb); 4172 } 4173 read_unlock(&bond->lock); 4174 return 0; 4175 } 4176 4177 4178 /* 4179 * in active-backup mode, we know that bond->curr_active_slave is always valid if 4180 * the bond has a usable interface. 4181 */ 4182 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) 4183 { 4184 struct bonding *bond = bond_dev->priv; 4185 int res = 1; 4186 4187 read_lock(&bond->lock); 4188 read_lock(&bond->curr_slave_lock); 4189 4190 if (!BOND_IS_OK(bond)) { 4191 goto out; 4192 } 4193 4194 if (!bond->curr_active_slave) 4195 goto out; 4196 4197 res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); 4198 4199 out: 4200 if (res) { 4201 /* no suitable interface, frame not sent */ 4202 dev_kfree_skb(skb); 4203 } 4204 read_unlock(&bond->curr_slave_lock); 4205 read_unlock(&bond->lock); 4206 return 0; 4207 } 4208 4209 /* 4210 * In bond_xmit_xor() , we determine the output device by using a pre- 4211 * determined xmit_hash_policy(), If the selected device is not enabled, 4212 * find the next active slave. 4213 */ 4214 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) 4215 { 4216 struct bonding *bond = bond_dev->priv; 4217 struct slave *slave, *start_at; 4218 int slave_no; 4219 int i; 4220 int res = 1; 4221 4222 read_lock(&bond->lock); 4223 4224 if (!BOND_IS_OK(bond)) { 4225 goto out; 4226 } 4227 4228 slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt); 4229 4230 bond_for_each_slave(bond, slave, i) { 4231 slave_no--; 4232 if (slave_no < 0) { 4233 break; 4234 } 4235 } 4236 4237 start_at = slave; 4238 4239 bond_for_each_slave_from(bond, slave, i, start_at) { 4240 if (IS_UP(slave->dev) && 4241 (slave->link == BOND_LINK_UP) && 4242 (slave->state == BOND_STATE_ACTIVE)) { 4243 res = bond_dev_queue_xmit(bond, skb, slave->dev); 4244 break; 4245 } 4246 } 4247 4248 out: 4249 if (res) { 4250 /* no suitable interface, frame not sent */ 4251 dev_kfree_skb(skb); 4252 } 4253 read_unlock(&bond->lock); 4254 return 0; 4255 } 4256 4257 /* 4258 * in broadcast mode, we send everything to all usable interfaces. 4259 */ 4260 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) 4261 { 4262 struct bonding *bond = bond_dev->priv; 4263 struct slave *slave, *start_at; 4264 struct net_device *tx_dev = NULL; 4265 int i; 4266 int res = 1; 4267 4268 read_lock(&bond->lock); 4269 4270 if (!BOND_IS_OK(bond)) { 4271 goto out; 4272 } 4273 4274 read_lock(&bond->curr_slave_lock); 4275 start_at = bond->curr_active_slave; 4276 read_unlock(&bond->curr_slave_lock); 4277 4278 if (!start_at) { 4279 goto out; 4280 } 4281 4282 bond_for_each_slave_from(bond, slave, i, start_at) { 4283 if (IS_UP(slave->dev) && 4284 (slave->link == BOND_LINK_UP) && 4285 (slave->state == BOND_STATE_ACTIVE)) { 4286 if (tx_dev) { 4287 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 4288 if (!skb2) { 4289 printk(KERN_ERR DRV_NAME 4290 ": %s: Error: bond_xmit_broadcast(): " 4291 "skb_clone() failed\n", 4292 bond_dev->name); 4293 continue; 4294 } 4295 4296 res = bond_dev_queue_xmit(bond, skb2, tx_dev); 4297 if (res) { 4298 dev_kfree_skb(skb2); 4299 continue; 4300 } 4301 } 4302 tx_dev = slave->dev; 4303 } 4304 } 4305 4306 if (tx_dev) { 4307 res = bond_dev_queue_xmit(bond, skb, tx_dev); 4308 } 4309 4310 out: 4311 if (res) { 4312 /* no suitable interface, frame not sent */ 4313 dev_kfree_skb(skb); 4314 } 4315 /* frame sent to all suitable interfaces */ 4316 read_unlock(&bond->lock); 4317 return 0; 4318 } 4319 4320 /*------------------------- Device initialization ---------------------------*/ 4321 4322 static void bond_set_xmit_hash_policy(struct bonding *bond) 4323 { 4324 switch (bond->params.xmit_policy) { 4325 case BOND_XMIT_POLICY_LAYER23: 4326 bond->xmit_hash_policy = bond_xmit_hash_policy_l23; 4327 break; 4328 case BOND_XMIT_POLICY_LAYER34: 4329 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4330 break; 4331 case BOND_XMIT_POLICY_LAYER2: 4332 default: 4333 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4334 break; 4335 } 4336 } 4337 4338 /* 4339 * set bond mode specific net device operations 4340 */ 4341 void bond_set_mode_ops(struct bonding *bond, int mode) 4342 { 4343 struct net_device *bond_dev = bond->dev; 4344 4345 switch (mode) { 4346 case BOND_MODE_ROUNDROBIN: 4347 bond_dev->hard_start_xmit = bond_xmit_roundrobin; 4348 break; 4349 case BOND_MODE_ACTIVEBACKUP: 4350 bond_dev->hard_start_xmit = bond_xmit_activebackup; 4351 break; 4352 case BOND_MODE_XOR: 4353 bond_dev->hard_start_xmit = bond_xmit_xor; 4354 bond_set_xmit_hash_policy(bond); 4355 break; 4356 case BOND_MODE_BROADCAST: 4357 bond_dev->hard_start_xmit = bond_xmit_broadcast; 4358 break; 4359 case BOND_MODE_8023AD: 4360 bond_set_master_3ad_flags(bond); 4361 bond_dev->hard_start_xmit = bond_3ad_xmit_xor; 4362 bond_set_xmit_hash_policy(bond); 4363 break; 4364 case BOND_MODE_ALB: 4365 bond_set_master_alb_flags(bond); 4366 /* FALLTHRU */ 4367 case BOND_MODE_TLB: 4368 bond_dev->hard_start_xmit = bond_alb_xmit; 4369 bond_dev->set_mac_address = bond_alb_set_mac_address; 4370 break; 4371 default: 4372 /* Should never happen, mode already checked */ 4373 printk(KERN_ERR DRV_NAME 4374 ": %s: Error: Unknown bonding mode %d\n", 4375 bond_dev->name, 4376 mode); 4377 break; 4378 } 4379 } 4380 4381 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, 4382 struct ethtool_drvinfo *drvinfo) 4383 { 4384 strncpy(drvinfo->driver, DRV_NAME, 32); 4385 strncpy(drvinfo->version, DRV_VERSION, 32); 4386 snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION); 4387 } 4388 4389 static const struct ethtool_ops bond_ethtool_ops = { 4390 .get_drvinfo = bond_ethtool_get_drvinfo, 4391 }; 4392 4393 /* 4394 * Does not allocate but creates a /proc entry. 4395 * Allowed to fail. 4396 */ 4397 static int bond_init(struct net_device *bond_dev, struct bond_params *params) 4398 { 4399 struct bonding *bond = bond_dev->priv; 4400 4401 dprintk("Begin bond_init for %s\n", bond_dev->name); 4402 4403 /* initialize rwlocks */ 4404 rwlock_init(&bond->lock); 4405 rwlock_init(&bond->curr_slave_lock); 4406 4407 bond->params = *params; /* copy params struct */ 4408 4409 bond->wq = create_singlethread_workqueue(bond_dev->name); 4410 if (!bond->wq) 4411 return -ENOMEM; 4412 4413 /* Initialize pointers */ 4414 bond->first_slave = NULL; 4415 bond->curr_active_slave = NULL; 4416 bond->current_arp_slave = NULL; 4417 bond->primary_slave = NULL; 4418 bond->dev = bond_dev; 4419 bond->send_grat_arp = 0; 4420 bond->setup_by_slave = 0; 4421 INIT_LIST_HEAD(&bond->vlan_list); 4422 4423 /* Initialize the device entry points */ 4424 bond_dev->open = bond_open; 4425 bond_dev->stop = bond_close; 4426 bond_dev->get_stats = bond_get_stats; 4427 bond_dev->do_ioctl = bond_do_ioctl; 4428 bond_dev->ethtool_ops = &bond_ethtool_ops; 4429 bond_dev->set_multicast_list = bond_set_multicast_list; 4430 bond_dev->change_mtu = bond_change_mtu; 4431 bond_dev->set_mac_address = bond_set_mac_address; 4432 bond_dev->validate_addr = NULL; 4433 4434 bond_set_mode_ops(bond, bond->params.mode); 4435 4436 bond_dev->destructor = free_netdev; 4437 4438 /* Initialize the device options */ 4439 bond_dev->tx_queue_len = 0; 4440 bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; 4441 bond_dev->priv_flags |= IFF_BONDING; 4442 4443 /* At first, we block adding VLANs. That's the only way to 4444 * prevent problems that occur when adding VLANs over an 4445 * empty bond. The block will be removed once non-challenged 4446 * slaves are enslaved. 4447 */ 4448 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 4449 4450 /* don't acquire bond device's netif_tx_lock when 4451 * transmitting */ 4452 bond_dev->features |= NETIF_F_LLTX; 4453 4454 /* By default, we declare the bond to be fully 4455 * VLAN hardware accelerated capable. Special 4456 * care is taken in the various xmit functions 4457 * when there are slaves that are not hw accel 4458 * capable 4459 */ 4460 bond_dev->vlan_rx_register = bond_vlan_rx_register; 4461 bond_dev->vlan_rx_add_vid = bond_vlan_rx_add_vid; 4462 bond_dev->vlan_rx_kill_vid = bond_vlan_rx_kill_vid; 4463 bond_dev->features |= (NETIF_F_HW_VLAN_TX | 4464 NETIF_F_HW_VLAN_RX | 4465 NETIF_F_HW_VLAN_FILTER); 4466 4467 #ifdef CONFIG_PROC_FS 4468 bond_create_proc_entry(bond); 4469 #endif 4470 list_add_tail(&bond->bond_list, &bond_dev_list); 4471 4472 return 0; 4473 } 4474 4475 /* De-initialize device specific data. 4476 * Caller must hold rtnl_lock. 4477 */ 4478 static void bond_deinit(struct net_device *bond_dev) 4479 { 4480 struct bonding *bond = bond_dev->priv; 4481 4482 list_del(&bond->bond_list); 4483 4484 #ifdef CONFIG_PROC_FS 4485 bond_remove_proc_entry(bond); 4486 #endif 4487 } 4488 4489 static void bond_work_cancel_all(struct bonding *bond) 4490 { 4491 write_lock_bh(&bond->lock); 4492 bond->kill_timers = 1; 4493 write_unlock_bh(&bond->lock); 4494 4495 if (bond->params.miimon && delayed_work_pending(&bond->mii_work)) 4496 cancel_delayed_work(&bond->mii_work); 4497 4498 if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work)) 4499 cancel_delayed_work(&bond->arp_work); 4500 4501 if (bond->params.mode == BOND_MODE_ALB && 4502 delayed_work_pending(&bond->alb_work)) 4503 cancel_delayed_work(&bond->alb_work); 4504 4505 if (bond->params.mode == BOND_MODE_8023AD && 4506 delayed_work_pending(&bond->ad_work)) 4507 cancel_delayed_work(&bond->ad_work); 4508 } 4509 4510 /* Unregister and free all bond devices. 4511 * Caller must hold rtnl_lock. 4512 */ 4513 static void bond_free_all(void) 4514 { 4515 struct bonding *bond, *nxt; 4516 4517 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { 4518 struct net_device *bond_dev = bond->dev; 4519 4520 bond_work_cancel_all(bond); 4521 netif_tx_lock_bh(bond_dev); 4522 bond_mc_list_destroy(bond); 4523 netif_tx_unlock_bh(bond_dev); 4524 /* Release the bonded slaves */ 4525 bond_release_all(bond_dev); 4526 bond_destroy(bond); 4527 } 4528 4529 #ifdef CONFIG_PROC_FS 4530 bond_destroy_proc_dir(); 4531 #endif 4532 } 4533 4534 /*------------------------- Module initialization ---------------------------*/ 4535 4536 /* 4537 * Convert string input module parms. Accept either the 4538 * number of the mode or its string name. A bit complicated because 4539 * some mode names are substrings of other names, and calls from sysfs 4540 * may have whitespace in the name (trailing newlines, for example). 4541 */ 4542 int bond_parse_parm(const char *buf, struct bond_parm_tbl *tbl) 4543 { 4544 int mode = -1, i, rv; 4545 char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, }; 4546 4547 for (p = (char *)buf; *p; p++) 4548 if (!(isdigit(*p) || isspace(*p))) 4549 break; 4550 4551 if (*p) 4552 rv = sscanf(buf, "%20s", modestr); 4553 else 4554 rv = sscanf(buf, "%d", &mode); 4555 4556 if (!rv) 4557 return -1; 4558 4559 for (i = 0; tbl[i].modename; i++) { 4560 if (mode == tbl[i].mode) 4561 return tbl[i].mode; 4562 if (strcmp(modestr, tbl[i].modename) == 0) 4563 return tbl[i].mode; 4564 } 4565 4566 return -1; 4567 } 4568 4569 static int bond_check_params(struct bond_params *params) 4570 { 4571 int arp_validate_value; 4572 4573 /* 4574 * Convert string parameters. 4575 */ 4576 if (mode) { 4577 bond_mode = bond_parse_parm(mode, bond_mode_tbl); 4578 if (bond_mode == -1) { 4579 printk(KERN_ERR DRV_NAME 4580 ": Error: Invalid bonding mode \"%s\"\n", 4581 mode == NULL ? "NULL" : mode); 4582 return -EINVAL; 4583 } 4584 } 4585 4586 if (xmit_hash_policy) { 4587 if ((bond_mode != BOND_MODE_XOR) && 4588 (bond_mode != BOND_MODE_8023AD)) { 4589 printk(KERN_INFO DRV_NAME 4590 ": xor_mode param is irrelevant in mode %s\n", 4591 bond_mode_name(bond_mode)); 4592 } else { 4593 xmit_hashtype = bond_parse_parm(xmit_hash_policy, 4594 xmit_hashtype_tbl); 4595 if (xmit_hashtype == -1) { 4596 printk(KERN_ERR DRV_NAME 4597 ": Error: Invalid xmit_hash_policy \"%s\"\n", 4598 xmit_hash_policy == NULL ? "NULL" : 4599 xmit_hash_policy); 4600 return -EINVAL; 4601 } 4602 } 4603 } 4604 4605 if (lacp_rate) { 4606 if (bond_mode != BOND_MODE_8023AD) { 4607 printk(KERN_INFO DRV_NAME 4608 ": lacp_rate param is irrelevant in mode %s\n", 4609 bond_mode_name(bond_mode)); 4610 } else { 4611 lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); 4612 if (lacp_fast == -1) { 4613 printk(KERN_ERR DRV_NAME 4614 ": Error: Invalid lacp rate \"%s\"\n", 4615 lacp_rate == NULL ? "NULL" : lacp_rate); 4616 return -EINVAL; 4617 } 4618 } 4619 } 4620 4621 if (max_bonds < 1 || max_bonds > INT_MAX) { 4622 printk(KERN_WARNING DRV_NAME 4623 ": Warning: max_bonds (%d) not in range %d-%d, so it " 4624 "was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", 4625 max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); 4626 max_bonds = BOND_DEFAULT_MAX_BONDS; 4627 } 4628 4629 if (miimon < 0) { 4630 printk(KERN_WARNING DRV_NAME 4631 ": Warning: miimon module parameter (%d), " 4632 "not in range 0-%d, so it was reset to %d\n", 4633 miimon, INT_MAX, BOND_LINK_MON_INTERV); 4634 miimon = BOND_LINK_MON_INTERV; 4635 } 4636 4637 if (updelay < 0) { 4638 printk(KERN_WARNING DRV_NAME 4639 ": Warning: updelay module parameter (%d), " 4640 "not in range 0-%d, so it was reset to 0\n", 4641 updelay, INT_MAX); 4642 updelay = 0; 4643 } 4644 4645 if (downdelay < 0) { 4646 printk(KERN_WARNING DRV_NAME 4647 ": Warning: downdelay module parameter (%d), " 4648 "not in range 0-%d, so it was reset to 0\n", 4649 downdelay, INT_MAX); 4650 downdelay = 0; 4651 } 4652 4653 if ((use_carrier != 0) && (use_carrier != 1)) { 4654 printk(KERN_WARNING DRV_NAME 4655 ": Warning: use_carrier module parameter (%d), " 4656 "not of valid value (0/1), so it was set to 1\n", 4657 use_carrier); 4658 use_carrier = 1; 4659 } 4660 4661 /* reset values for 802.3ad */ 4662 if (bond_mode == BOND_MODE_8023AD) { 4663 if (!miimon) { 4664 printk(KERN_WARNING DRV_NAME 4665 ": Warning: miimon must be specified, " 4666 "otherwise bonding will not detect link " 4667 "failure, speed and duplex which are " 4668 "essential for 802.3ad operation\n"); 4669 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4670 miimon = 100; 4671 } 4672 } 4673 4674 /* reset values for TLB/ALB */ 4675 if ((bond_mode == BOND_MODE_TLB) || 4676 (bond_mode == BOND_MODE_ALB)) { 4677 if (!miimon) { 4678 printk(KERN_WARNING DRV_NAME 4679 ": Warning: miimon must be specified, " 4680 "otherwise bonding will not detect link " 4681 "failure and link speed which are essential " 4682 "for TLB/ALB load balancing\n"); 4683 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4684 miimon = 100; 4685 } 4686 } 4687 4688 if (bond_mode == BOND_MODE_ALB) { 4689 printk(KERN_NOTICE DRV_NAME 4690 ": In ALB mode you might experience client " 4691 "disconnections upon reconnection of a link if the " 4692 "bonding module updelay parameter (%d msec) is " 4693 "incompatible with the forwarding delay time of the " 4694 "switch\n", 4695 updelay); 4696 } 4697 4698 if (!miimon) { 4699 if (updelay || downdelay) { 4700 /* just warn the user the up/down delay will have 4701 * no effect since miimon is zero... 4702 */ 4703 printk(KERN_WARNING DRV_NAME 4704 ": Warning: miimon module parameter not set " 4705 "and updelay (%d) or downdelay (%d) module " 4706 "parameter is set; updelay and downdelay have " 4707 "no effect unless miimon is set\n", 4708 updelay, downdelay); 4709 } 4710 } else { 4711 /* don't allow arp monitoring */ 4712 if (arp_interval) { 4713 printk(KERN_WARNING DRV_NAME 4714 ": Warning: miimon (%d) and arp_interval (%d) " 4715 "can't be used simultaneously, disabling ARP " 4716 "monitoring\n", 4717 miimon, arp_interval); 4718 arp_interval = 0; 4719 } 4720 4721 if ((updelay % miimon) != 0) { 4722 printk(KERN_WARNING DRV_NAME 4723 ": Warning: updelay (%d) is not a multiple " 4724 "of miimon (%d), updelay rounded to %d ms\n", 4725 updelay, miimon, (updelay / miimon) * miimon); 4726 } 4727 4728 updelay /= miimon; 4729 4730 if ((downdelay % miimon) != 0) { 4731 printk(KERN_WARNING DRV_NAME 4732 ": Warning: downdelay (%d) is not a multiple " 4733 "of miimon (%d), downdelay rounded to %d ms\n", 4734 downdelay, miimon, 4735 (downdelay / miimon) * miimon); 4736 } 4737 4738 downdelay /= miimon; 4739 } 4740 4741 if (arp_interval < 0) { 4742 printk(KERN_WARNING DRV_NAME 4743 ": Warning: arp_interval module parameter (%d) " 4744 ", not in range 0-%d, so it was reset to %d\n", 4745 arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); 4746 arp_interval = BOND_LINK_ARP_INTERV; 4747 } 4748 4749 for (arp_ip_count = 0; 4750 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; 4751 arp_ip_count++) { 4752 /* not complete check, but should be good enough to 4753 catch mistakes */ 4754 if (!isdigit(arp_ip_target[arp_ip_count][0])) { 4755 printk(KERN_WARNING DRV_NAME 4756 ": Warning: bad arp_ip_target module parameter " 4757 "(%s), ARP monitoring will not be performed\n", 4758 arp_ip_target[arp_ip_count]); 4759 arp_interval = 0; 4760 } else { 4761 __be32 ip = in_aton(arp_ip_target[arp_ip_count]); 4762 arp_target[arp_ip_count] = ip; 4763 } 4764 } 4765 4766 if (arp_interval && !arp_ip_count) { 4767 /* don't allow arping if no arp_ip_target given... */ 4768 printk(KERN_WARNING DRV_NAME 4769 ": Warning: arp_interval module parameter (%d) " 4770 "specified without providing an arp_ip_target " 4771 "parameter, arp_interval was reset to 0\n", 4772 arp_interval); 4773 arp_interval = 0; 4774 } 4775 4776 if (arp_validate) { 4777 if (bond_mode != BOND_MODE_ACTIVEBACKUP) { 4778 printk(KERN_ERR DRV_NAME 4779 ": arp_validate only supported in active-backup mode\n"); 4780 return -EINVAL; 4781 } 4782 if (!arp_interval) { 4783 printk(KERN_ERR DRV_NAME 4784 ": arp_validate requires arp_interval\n"); 4785 return -EINVAL; 4786 } 4787 4788 arp_validate_value = bond_parse_parm(arp_validate, 4789 arp_validate_tbl); 4790 if (arp_validate_value == -1) { 4791 printk(KERN_ERR DRV_NAME 4792 ": Error: invalid arp_validate \"%s\"\n", 4793 arp_validate == NULL ? "NULL" : arp_validate); 4794 return -EINVAL; 4795 } 4796 } else 4797 arp_validate_value = 0; 4798 4799 if (miimon) { 4800 printk(KERN_INFO DRV_NAME 4801 ": MII link monitoring set to %d ms\n", 4802 miimon); 4803 } else if (arp_interval) { 4804 int i; 4805 4806 printk(KERN_INFO DRV_NAME 4807 ": ARP monitoring set to %d ms, validate %s, with %d target(s):", 4808 arp_interval, 4809 arp_validate_tbl[arp_validate_value].modename, 4810 arp_ip_count); 4811 4812 for (i = 0; i < arp_ip_count; i++) 4813 printk (" %s", arp_ip_target[i]); 4814 4815 printk("\n"); 4816 4817 } else { 4818 /* miimon and arp_interval not set, we need one so things 4819 * work as expected, see bonding.txt for details 4820 */ 4821 printk(KERN_WARNING DRV_NAME 4822 ": Warning: either miimon or arp_interval and " 4823 "arp_ip_target module parameters must be specified, " 4824 "otherwise bonding will not detect link failures! see " 4825 "bonding.txt for details.\n"); 4826 } 4827 4828 if (primary && !USES_PRIMARY(bond_mode)) { 4829 /* currently, using a primary only makes sense 4830 * in active backup, TLB or ALB modes 4831 */ 4832 printk(KERN_WARNING DRV_NAME 4833 ": Warning: %s primary device specified but has no " 4834 "effect in %s mode\n", 4835 primary, bond_mode_name(bond_mode)); 4836 primary = NULL; 4837 } 4838 4839 if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP)) 4840 printk(KERN_WARNING DRV_NAME 4841 ": Warning: fail_over_mac only affects " 4842 "active-backup mode.\n"); 4843 4844 /* fill params struct with the proper values */ 4845 params->mode = bond_mode; 4846 params->xmit_policy = xmit_hashtype; 4847 params->miimon = miimon; 4848 params->arp_interval = arp_interval; 4849 params->arp_validate = arp_validate_value; 4850 params->updelay = updelay; 4851 params->downdelay = downdelay; 4852 params->use_carrier = use_carrier; 4853 params->lacp_fast = lacp_fast; 4854 params->primary[0] = 0; 4855 params->fail_over_mac = fail_over_mac; 4856 4857 if (primary) { 4858 strncpy(params->primary, primary, IFNAMSIZ); 4859 params->primary[IFNAMSIZ - 1] = 0; 4860 } 4861 4862 memcpy(params->arp_targets, arp_target, sizeof(arp_target)); 4863 4864 return 0; 4865 } 4866 4867 static struct lock_class_key bonding_netdev_xmit_lock_key; 4868 4869 /* Create a new bond based on the specified name and bonding parameters. 4870 * If name is NULL, obtain a suitable "bond%d" name for us. 4871 * Caller must NOT hold rtnl_lock; we need to release it here before we 4872 * set up our sysfs entries. 4873 */ 4874 int bond_create(char *name, struct bond_params *params, struct bonding **newbond) 4875 { 4876 struct net_device *bond_dev; 4877 struct bonding *bond, *nxt; 4878 int res; 4879 4880 rtnl_lock(); 4881 down_write(&bonding_rwsem); 4882 4883 /* Check to see if the bond already exists. */ 4884 if (name) { 4885 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) 4886 if (strnicmp(bond->dev->name, name, IFNAMSIZ) == 0) { 4887 printk(KERN_ERR DRV_NAME 4888 ": cannot add bond %s; it already exists\n", 4889 name); 4890 res = -EPERM; 4891 goto out_rtnl; 4892 } 4893 } 4894 4895 bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "", 4896 ether_setup); 4897 if (!bond_dev) { 4898 printk(KERN_ERR DRV_NAME 4899 ": %s: eek! can't alloc netdev!\n", 4900 name); 4901 res = -ENOMEM; 4902 goto out_rtnl; 4903 } 4904 4905 if (!name) { 4906 res = dev_alloc_name(bond_dev, "bond%d"); 4907 if (res < 0) 4908 goto out_netdev; 4909 } 4910 4911 /* bond_init() must be called after dev_alloc_name() (for the 4912 * /proc files), but before register_netdevice(), because we 4913 * need to set function pointers. 4914 */ 4915 4916 res = bond_init(bond_dev, params); 4917 if (res < 0) { 4918 goto out_netdev; 4919 } 4920 4921 res = register_netdevice(bond_dev); 4922 if (res < 0) { 4923 goto out_bond; 4924 } 4925 4926 lockdep_set_class(&bond_dev->_xmit_lock, &bonding_netdev_xmit_lock_key); 4927 4928 if (newbond) 4929 *newbond = bond_dev->priv; 4930 4931 netif_carrier_off(bond_dev); 4932 4933 up_write(&bonding_rwsem); 4934 rtnl_unlock(); /* allows sysfs registration of net device */ 4935 res = bond_create_sysfs_entry(bond_dev->priv); 4936 if (res < 0) { 4937 rtnl_lock(); 4938 down_write(&bonding_rwsem); 4939 bond_deinit(bond_dev); 4940 unregister_netdevice(bond_dev); 4941 goto out_rtnl; 4942 } 4943 4944 return 0; 4945 4946 out_bond: 4947 bond_deinit(bond_dev); 4948 out_netdev: 4949 free_netdev(bond_dev); 4950 out_rtnl: 4951 up_write(&bonding_rwsem); 4952 rtnl_unlock(); 4953 return res; 4954 } 4955 4956 static int __init bonding_init(void) 4957 { 4958 int i; 4959 int res; 4960 struct bonding *bond, *nxt; 4961 4962 printk(KERN_INFO "%s", version); 4963 4964 res = bond_check_params(&bonding_defaults); 4965 if (res) { 4966 goto out; 4967 } 4968 4969 #ifdef CONFIG_PROC_FS 4970 bond_create_proc_dir(); 4971 #endif 4972 4973 init_rwsem(&bonding_rwsem); 4974 4975 for (i = 0; i < max_bonds; i++) { 4976 res = bond_create(NULL, &bonding_defaults, NULL); 4977 if (res) 4978 goto err; 4979 } 4980 4981 res = bond_create_sysfs(); 4982 if (res) 4983 goto err; 4984 4985 register_netdevice_notifier(&bond_netdev_notifier); 4986 register_inetaddr_notifier(&bond_inetaddr_notifier); 4987 4988 goto out; 4989 err: 4990 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { 4991 bond_work_cancel_all(bond); 4992 destroy_workqueue(bond->wq); 4993 } 4994 4995 bond_destroy_sysfs(); 4996 4997 rtnl_lock(); 4998 bond_free_all(); 4999 rtnl_unlock(); 5000 out: 5001 return res; 5002 5003 } 5004 5005 static void __exit bonding_exit(void) 5006 { 5007 unregister_netdevice_notifier(&bond_netdev_notifier); 5008 unregister_inetaddr_notifier(&bond_inetaddr_notifier); 5009 5010 bond_destroy_sysfs(); 5011 5012 rtnl_lock(); 5013 bond_free_all(); 5014 rtnl_unlock(); 5015 } 5016 5017 module_init(bonding_init); 5018 module_exit(bonding_exit); 5019 MODULE_LICENSE("GPL"); 5020 MODULE_VERSION(DRV_VERSION); 5021 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); 5022 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); 5023 MODULE_SUPPORTED_DEVICE("most ethernet devices"); 5024 5025 /* 5026 * Local variables: 5027 * c-indent-level: 8 5028 * c-basic-offset: 8 5029 * tab-width: 8 5030 * End: 5031 */ 5032 5033