1 /* 2 * originally based on the dummy device. 3 * 4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov. 5 * Licensed under the GPL. Based on dummy.c, and eql.c devices. 6 * 7 * bonding.c: an Ethernet Bonding driver 8 * 9 * This is useful to talk to a Cisco EtherChannel compatible equipment: 10 * Cisco 5500 11 * Sun Trunking (Solaris) 12 * Alteon AceDirector Trunks 13 * Linux Bonding 14 * and probably many L2 switches ... 15 * 16 * How it works: 17 * ifconfig bond0 ipaddress netmask up 18 * will setup a network device, with an ip address. No mac address 19 * will be assigned at this time. The hw mac address will come from 20 * the first slave bonded to the channel. All slaves will then use 21 * this hw mac address. 22 * 23 * ifconfig bond0 down 24 * will release all slaves, marking them as down. 25 * 26 * ifenslave bond0 eth0 27 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either 28 * a: be used as initial mac address 29 * b: if a hw mac address already is there, eth0's hw mac address 30 * will then be set from bond0. 31 * 32 */ 33 34 //#define BONDING_DEBUG 1 35 36 #include <linux/kernel.h> 37 #include <linux/module.h> 38 #include <linux/types.h> 39 #include <linux/fcntl.h> 40 #include <linux/interrupt.h> 41 #include <linux/ptrace.h> 42 #include <linux/ioport.h> 43 #include <linux/in.h> 44 #include <net/ip.h> 45 #include <linux/ip.h> 46 #include <linux/tcp.h> 47 #include <linux/udp.h> 48 #include <linux/slab.h> 49 #include <linux/string.h> 50 #include <linux/init.h> 51 #include <linux/timer.h> 52 #include <linux/socket.h> 53 #include <linux/ctype.h> 54 #include <linux/inet.h> 55 #include <linux/bitops.h> 56 #include <asm/system.h> 57 #include <asm/io.h> 58 #include <asm/dma.h> 59 #include <asm/uaccess.h> 60 #include <linux/errno.h> 61 #include <linux/netdevice.h> 62 #include <linux/inetdevice.h> 63 #include <linux/igmp.h> 64 #include <linux/etherdevice.h> 65 #include <linux/skbuff.h> 66 #include <net/sock.h> 67 #include <linux/rtnetlink.h> 68 #include <linux/proc_fs.h> 69 #include <linux/seq_file.h> 70 #include <linux/smp.h> 71 #include <linux/if_ether.h> 72 #include <net/arp.h> 73 #include <linux/mii.h> 74 #include <linux/ethtool.h> 75 #include <linux/if_vlan.h> 76 #include <linux/if_bonding.h> 77 #include <linux/jiffies.h> 78 #include <net/route.h> 79 #include <net/net_namespace.h> 80 #include "bonding.h" 81 #include "bond_3ad.h" 82 #include "bond_alb.h" 83 84 /*---------------------------- Module parameters ----------------------------*/ 85 86 /* monitor all links that often (in milliseconds). <=0 disables monitoring */ 87 #define BOND_LINK_MON_INTERV 0 88 #define BOND_LINK_ARP_INTERV 0 89 90 static int max_bonds = BOND_DEFAULT_MAX_BONDS; 91 static int miimon = BOND_LINK_MON_INTERV; 92 static int updelay = 0; 93 static int downdelay = 0; 94 static int use_carrier = 1; 95 static char *mode = NULL; 96 static char *primary = NULL; 97 static char *lacp_rate = NULL; 98 static char *xmit_hash_policy = NULL; 99 static int arp_interval = BOND_LINK_ARP_INTERV; 100 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; 101 static char *arp_validate = NULL; 102 static int fail_over_mac = 0; 103 struct bond_params bonding_defaults; 104 105 module_param(max_bonds, int, 0); 106 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 107 module_param(miimon, int, 0); 108 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 109 module_param(updelay, int, 0); 110 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); 111 module_param(downdelay, int, 0); 112 MODULE_PARM_DESC(downdelay, "Delay before considering link down, " 113 "in milliseconds"); 114 module_param(use_carrier, int, 0); 115 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " 116 "0 for off, 1 for on (default)"); 117 module_param(mode, charp, 0); 118 MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " 119 "1 for active-backup, 2 for balance-xor, " 120 "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " 121 "6 for balance-alb"); 122 module_param(primary, charp, 0); 123 MODULE_PARM_DESC(primary, "Primary network device to use"); 124 module_param(lacp_rate, charp, 0); 125 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " 126 "(slow/fast)"); 127 module_param(xmit_hash_policy, charp, 0); 128 MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)" 129 ", 1 for layer 3+4"); 130 module_param(arp_interval, int, 0); 131 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 132 module_param_array(arp_ip_target, charp, NULL, 0); 133 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 134 module_param(arp_validate, charp, 0); 135 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); 136 module_param(fail_over_mac, int, 0); 137 MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. 0 of off (default), 1 for on."); 138 139 /*----------------------------- Global variables ----------------------------*/ 140 141 static const char * const version = 142 DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; 143 144 LIST_HEAD(bond_dev_list); 145 146 #ifdef CONFIG_PROC_FS 147 static struct proc_dir_entry *bond_proc_dir = NULL; 148 #endif 149 150 extern struct rw_semaphore bonding_rwsem; 151 static __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ; 152 static int arp_ip_count = 0; 153 static int bond_mode = BOND_MODE_ROUNDROBIN; 154 static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2; 155 static int lacp_fast = 0; 156 157 158 struct bond_parm_tbl bond_lacp_tbl[] = { 159 { "slow", AD_LACP_SLOW}, 160 { "fast", AD_LACP_FAST}, 161 { NULL, -1}, 162 }; 163 164 struct bond_parm_tbl bond_mode_tbl[] = { 165 { "balance-rr", BOND_MODE_ROUNDROBIN}, 166 { "active-backup", BOND_MODE_ACTIVEBACKUP}, 167 { "balance-xor", BOND_MODE_XOR}, 168 { "broadcast", BOND_MODE_BROADCAST}, 169 { "802.3ad", BOND_MODE_8023AD}, 170 { "balance-tlb", BOND_MODE_TLB}, 171 { "balance-alb", BOND_MODE_ALB}, 172 { NULL, -1}, 173 }; 174 175 struct bond_parm_tbl xmit_hashtype_tbl[] = { 176 { "layer2", BOND_XMIT_POLICY_LAYER2}, 177 { "layer3+4", BOND_XMIT_POLICY_LAYER34}, 178 { "layer2+3", BOND_XMIT_POLICY_LAYER23}, 179 { NULL, -1}, 180 }; 181 182 struct bond_parm_tbl arp_validate_tbl[] = { 183 { "none", BOND_ARP_VALIDATE_NONE}, 184 { "active", BOND_ARP_VALIDATE_ACTIVE}, 185 { "backup", BOND_ARP_VALIDATE_BACKUP}, 186 { "all", BOND_ARP_VALIDATE_ALL}, 187 { NULL, -1}, 188 }; 189 190 /*-------------------------- Forward declarations ---------------------------*/ 191 192 static void bond_send_gratuitous_arp(struct bonding *bond); 193 static void bond_deinit(struct net_device *bond_dev); 194 195 /*---------------------------- General routines -----------------------------*/ 196 197 static const char *bond_mode_name(int mode) 198 { 199 switch (mode) { 200 case BOND_MODE_ROUNDROBIN : 201 return "load balancing (round-robin)"; 202 case BOND_MODE_ACTIVEBACKUP : 203 return "fault-tolerance (active-backup)"; 204 case BOND_MODE_XOR : 205 return "load balancing (xor)"; 206 case BOND_MODE_BROADCAST : 207 return "fault-tolerance (broadcast)"; 208 case BOND_MODE_8023AD: 209 return "IEEE 802.3ad Dynamic link aggregation"; 210 case BOND_MODE_TLB: 211 return "transmit load balancing"; 212 case BOND_MODE_ALB: 213 return "adaptive load balancing"; 214 default: 215 return "unknown"; 216 } 217 } 218 219 /*---------------------------------- VLAN -----------------------------------*/ 220 221 /** 222 * bond_add_vlan - add a new vlan id on bond 223 * @bond: bond that got the notification 224 * @vlan_id: the vlan id to add 225 * 226 * Returns -ENOMEM if allocation failed. 227 */ 228 static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) 229 { 230 struct vlan_entry *vlan; 231 232 dprintk("bond: %s, vlan id %d\n", 233 (bond ? bond->dev->name: "None"), vlan_id); 234 235 vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL); 236 if (!vlan) { 237 return -ENOMEM; 238 } 239 240 INIT_LIST_HEAD(&vlan->vlan_list); 241 vlan->vlan_id = vlan_id; 242 vlan->vlan_ip = 0; 243 244 write_lock_bh(&bond->lock); 245 246 list_add_tail(&vlan->vlan_list, &bond->vlan_list); 247 248 write_unlock_bh(&bond->lock); 249 250 dprintk("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); 251 252 return 0; 253 } 254 255 /** 256 * bond_del_vlan - delete a vlan id from bond 257 * @bond: bond that got the notification 258 * @vlan_id: the vlan id to delete 259 * 260 * returns -ENODEV if @vlan_id was not found in @bond. 261 */ 262 static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) 263 { 264 struct vlan_entry *vlan, *next; 265 int res = -ENODEV; 266 267 dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); 268 269 write_lock_bh(&bond->lock); 270 271 list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) { 272 if (vlan->vlan_id == vlan_id) { 273 list_del(&vlan->vlan_list); 274 275 if ((bond->params.mode == BOND_MODE_TLB) || 276 (bond->params.mode == BOND_MODE_ALB)) { 277 bond_alb_clear_vlan(bond, vlan_id); 278 } 279 280 dprintk("removed VLAN ID %d from bond %s\n", vlan_id, 281 bond->dev->name); 282 283 kfree(vlan); 284 285 if (list_empty(&bond->vlan_list) && 286 (bond->slave_cnt == 0)) { 287 /* Last VLAN removed and no slaves, so 288 * restore block on adding VLANs. This will 289 * be removed once new slaves that are not 290 * VLAN challenged will be added. 291 */ 292 bond->dev->features |= NETIF_F_VLAN_CHALLENGED; 293 } 294 295 res = 0; 296 goto out; 297 } 298 } 299 300 dprintk("couldn't find VLAN ID %d in bond %s\n", vlan_id, 301 bond->dev->name); 302 303 out: 304 write_unlock_bh(&bond->lock); 305 return res; 306 } 307 308 /** 309 * bond_has_challenged_slaves 310 * @bond: the bond we're working on 311 * 312 * Searches the slave list. Returns 1 if a vlan challenged slave 313 * was found, 0 otherwise. 314 * 315 * Assumes bond->lock is held. 316 */ 317 static int bond_has_challenged_slaves(struct bonding *bond) 318 { 319 struct slave *slave; 320 int i; 321 322 bond_for_each_slave(bond, slave, i) { 323 if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { 324 dprintk("found VLAN challenged slave - %s\n", 325 slave->dev->name); 326 return 1; 327 } 328 } 329 330 dprintk("no VLAN challenged slaves found\n"); 331 return 0; 332 } 333 334 /** 335 * bond_next_vlan - safely skip to the next item in the vlans list. 336 * @bond: the bond we're working on 337 * @curr: item we're advancing from 338 * 339 * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, 340 * or @curr->next otherwise (even if it is @curr itself again). 341 * 342 * Caller must hold bond->lock 343 */ 344 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) 345 { 346 struct vlan_entry *next, *last; 347 348 if (list_empty(&bond->vlan_list)) { 349 return NULL; 350 } 351 352 if (!curr) { 353 next = list_entry(bond->vlan_list.next, 354 struct vlan_entry, vlan_list); 355 } else { 356 last = list_entry(bond->vlan_list.prev, 357 struct vlan_entry, vlan_list); 358 if (last == curr) { 359 next = list_entry(bond->vlan_list.next, 360 struct vlan_entry, vlan_list); 361 } else { 362 next = list_entry(curr->vlan_list.next, 363 struct vlan_entry, vlan_list); 364 } 365 } 366 367 return next; 368 } 369 370 /** 371 * bond_dev_queue_xmit - Prepare skb for xmit. 372 * 373 * @bond: bond device that got this skb for tx. 374 * @skb: hw accel VLAN tagged skb to transmit 375 * @slave_dev: slave that is supposed to xmit this skbuff 376 * 377 * When the bond gets an skb to transmit that is 378 * already hardware accelerated VLAN tagged, and it 379 * needs to relay this skb to a slave that is not 380 * hw accel capable, the skb needs to be "unaccelerated", 381 * i.e. strip the hwaccel tag and re-insert it as part 382 * of the payload. 383 */ 384 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev) 385 { 386 unsigned short vlan_id; 387 388 if (!list_empty(&bond->vlan_list) && 389 !(slave_dev->features & NETIF_F_HW_VLAN_TX) && 390 vlan_get_tag(skb, &vlan_id) == 0) { 391 skb->dev = slave_dev; 392 skb = vlan_put_tag(skb, vlan_id); 393 if (!skb) { 394 /* vlan_put_tag() frees the skb in case of error, 395 * so return success here so the calling functions 396 * won't attempt to free is again. 397 */ 398 return 0; 399 } 400 } else { 401 skb->dev = slave_dev; 402 } 403 404 skb->priority = 1; 405 dev_queue_xmit(skb); 406 407 return 0; 408 } 409 410 /* 411 * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid 412 * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a 413 * lock because: 414 * a. This operation is performed in IOCTL context, 415 * b. The operation is protected by the RTNL semaphore in the 8021q code, 416 * c. Holding a lock with BH disabled while directly calling a base driver 417 * entry point is generally a BAD idea. 418 * 419 * The design of synchronization/protection for this operation in the 8021q 420 * module is good for one or more VLAN devices over a single physical device 421 * and cannot be extended for a teaming solution like bonding, so there is a 422 * potential race condition here where a net device from the vlan group might 423 * be referenced (either by a base driver or the 8021q code) while it is being 424 * removed from the system. However, it turns out we're not making matters 425 * worse, and if it works for regular VLAN usage it will work here too. 426 */ 427 428 /** 429 * bond_vlan_rx_register - Propagates registration to slaves 430 * @bond_dev: bonding net device that got called 431 * @grp: vlan group being registered 432 */ 433 static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp) 434 { 435 struct bonding *bond = bond_dev->priv; 436 struct slave *slave; 437 int i; 438 439 bond->vlgrp = grp; 440 441 bond_for_each_slave(bond, slave, i) { 442 struct net_device *slave_dev = slave->dev; 443 444 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 445 slave_dev->vlan_rx_register) { 446 slave_dev->vlan_rx_register(slave_dev, grp); 447 } 448 } 449 } 450 451 /** 452 * bond_vlan_rx_add_vid - Propagates adding an id to slaves 453 * @bond_dev: bonding net device that got called 454 * @vid: vlan id being added 455 */ 456 static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) 457 { 458 struct bonding *bond = bond_dev->priv; 459 struct slave *slave; 460 int i, res; 461 462 bond_for_each_slave(bond, slave, i) { 463 struct net_device *slave_dev = slave->dev; 464 465 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 466 slave_dev->vlan_rx_add_vid) { 467 slave_dev->vlan_rx_add_vid(slave_dev, vid); 468 } 469 } 470 471 res = bond_add_vlan(bond, vid); 472 if (res) { 473 printk(KERN_ERR DRV_NAME 474 ": %s: Error: Failed to add vlan id %d\n", 475 bond_dev->name, vid); 476 } 477 } 478 479 /** 480 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves 481 * @bond_dev: bonding net device that got called 482 * @vid: vlan id being removed 483 */ 484 static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) 485 { 486 struct bonding *bond = bond_dev->priv; 487 struct slave *slave; 488 struct net_device *vlan_dev; 489 int i, res; 490 491 bond_for_each_slave(bond, slave, i) { 492 struct net_device *slave_dev = slave->dev; 493 494 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 495 slave_dev->vlan_rx_kill_vid) { 496 /* Save and then restore vlan_dev in the grp array, 497 * since the slave's driver might clear it. 498 */ 499 vlan_dev = vlan_group_get_device(bond->vlgrp, vid); 500 slave_dev->vlan_rx_kill_vid(slave_dev, vid); 501 vlan_group_set_device(bond->vlgrp, vid, vlan_dev); 502 } 503 } 504 505 res = bond_del_vlan(bond, vid); 506 if (res) { 507 printk(KERN_ERR DRV_NAME 508 ": %s: Error: Failed to remove vlan id %d\n", 509 bond_dev->name, vid); 510 } 511 } 512 513 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev) 514 { 515 struct vlan_entry *vlan; 516 517 write_lock_bh(&bond->lock); 518 519 if (list_empty(&bond->vlan_list)) { 520 goto out; 521 } 522 523 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 524 slave_dev->vlan_rx_register) { 525 slave_dev->vlan_rx_register(slave_dev, bond->vlgrp); 526 } 527 528 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 529 !(slave_dev->vlan_rx_add_vid)) { 530 goto out; 531 } 532 533 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 534 slave_dev->vlan_rx_add_vid(slave_dev, vlan->vlan_id); 535 } 536 537 out: 538 write_unlock_bh(&bond->lock); 539 } 540 541 static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev) 542 { 543 struct vlan_entry *vlan; 544 struct net_device *vlan_dev; 545 546 write_lock_bh(&bond->lock); 547 548 if (list_empty(&bond->vlan_list)) { 549 goto out; 550 } 551 552 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 553 !(slave_dev->vlan_rx_kill_vid)) { 554 goto unreg; 555 } 556 557 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 558 /* Save and then restore vlan_dev in the grp array, 559 * since the slave's driver might clear it. 560 */ 561 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 562 slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id); 563 vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev); 564 } 565 566 unreg: 567 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 568 slave_dev->vlan_rx_register) { 569 slave_dev->vlan_rx_register(slave_dev, NULL); 570 } 571 572 out: 573 write_unlock_bh(&bond->lock); 574 } 575 576 /*------------------------------- Link status -------------------------------*/ 577 578 /* 579 * Set the carrier state for the master according to the state of its 580 * slaves. If any slaves are up, the master is up. In 802.3ad mode, 581 * do special 802.3ad magic. 582 * 583 * Returns zero if carrier state does not change, nonzero if it does. 584 */ 585 static int bond_set_carrier(struct bonding *bond) 586 { 587 struct slave *slave; 588 int i; 589 590 if (bond->slave_cnt == 0) 591 goto down; 592 593 if (bond->params.mode == BOND_MODE_8023AD) 594 return bond_3ad_set_carrier(bond); 595 596 bond_for_each_slave(bond, slave, i) { 597 if (slave->link == BOND_LINK_UP) { 598 if (!netif_carrier_ok(bond->dev)) { 599 netif_carrier_on(bond->dev); 600 return 1; 601 } 602 return 0; 603 } 604 } 605 606 down: 607 if (netif_carrier_ok(bond->dev)) { 608 netif_carrier_off(bond->dev); 609 return 1; 610 } 611 return 0; 612 } 613 614 /* 615 * Get link speed and duplex from the slave's base driver 616 * using ethtool. If for some reason the call fails or the 617 * values are invalid, fake speed and duplex to 100/Full 618 * and return error. 619 */ 620 static int bond_update_speed_duplex(struct slave *slave) 621 { 622 struct net_device *slave_dev = slave->dev; 623 struct ethtool_cmd etool; 624 int res; 625 626 /* Fake speed and duplex */ 627 slave->speed = SPEED_100; 628 slave->duplex = DUPLEX_FULL; 629 630 if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings) 631 return -1; 632 633 res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); 634 if (res < 0) 635 return -1; 636 637 switch (etool.speed) { 638 case SPEED_10: 639 case SPEED_100: 640 case SPEED_1000: 641 case SPEED_10000: 642 break; 643 default: 644 return -1; 645 } 646 647 switch (etool.duplex) { 648 case DUPLEX_FULL: 649 case DUPLEX_HALF: 650 break; 651 default: 652 return -1; 653 } 654 655 slave->speed = etool.speed; 656 slave->duplex = etool.duplex; 657 658 return 0; 659 } 660 661 /* 662 * if <dev> supports MII link status reporting, check its link status. 663 * 664 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 665 * depening upon the setting of the use_carrier parameter. 666 * 667 * Return either BMSR_LSTATUS, meaning that the link is up (or we 668 * can't tell and just pretend it is), or 0, meaning that the link is 669 * down. 670 * 671 * If reporting is non-zero, instead of faking link up, return -1 if 672 * both ETHTOOL and MII ioctls fail (meaning the device does not 673 * support them). If use_carrier is set, return whatever it says. 674 * It'd be nice if there was a good way to tell if a driver supports 675 * netif_carrier, but there really isn't. 676 */ 677 static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) 678 { 679 static int (* ioctl)(struct net_device *, struct ifreq *, int); 680 struct ifreq ifr; 681 struct mii_ioctl_data *mii; 682 683 if (bond->params.use_carrier) { 684 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; 685 } 686 687 ioctl = slave_dev->do_ioctl; 688 if (ioctl) { 689 /* TODO: set pointer to correct ioctl on a per team member */ 690 /* bases to make this more efficient. that is, once */ 691 /* we determine the correct ioctl, we will always */ 692 /* call it and not the others for that team */ 693 /* member. */ 694 695 /* 696 * We cannot assume that SIOCGMIIPHY will also read a 697 * register; not all network drivers (e.g., e100) 698 * support that. 699 */ 700 701 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ 702 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 703 mii = if_mii(&ifr); 704 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { 705 mii->reg_num = MII_BMSR; 706 if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) { 707 return (mii->val_out & BMSR_LSTATUS); 708 } 709 } 710 } 711 712 /* 713 * Some drivers cache ETHTOOL_GLINK for a period of time so we only 714 * attempt to get link status from it if the above MII ioctls fail. 715 */ 716 if (slave_dev->ethtool_ops) { 717 if (slave_dev->ethtool_ops->get_link) { 718 u32 link; 719 720 link = slave_dev->ethtool_ops->get_link(slave_dev); 721 722 return link ? BMSR_LSTATUS : 0; 723 } 724 } 725 726 /* 727 * If reporting, report that either there's no dev->do_ioctl, 728 * or both SIOCGMIIREG and get_link failed (meaning that we 729 * cannot report link status). If not reporting, pretend 730 * we're ok. 731 */ 732 return (reporting ? -1 : BMSR_LSTATUS); 733 } 734 735 /*----------------------------- Multicast list ------------------------------*/ 736 737 /* 738 * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise 739 */ 740 static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) 741 { 742 return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && 743 dmi1->dmi_addrlen == dmi2->dmi_addrlen; 744 } 745 746 /* 747 * returns dmi entry if found, NULL otherwise 748 */ 749 static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) 750 { 751 struct dev_mc_list *idmi; 752 753 for (idmi = mc_list; idmi; idmi = idmi->next) { 754 if (bond_is_dmi_same(dmi, idmi)) { 755 return idmi; 756 } 757 } 758 759 return NULL; 760 } 761 762 /* 763 * Push the promiscuity flag down to appropriate slaves 764 */ 765 static void bond_set_promiscuity(struct bonding *bond, int inc) 766 { 767 if (USES_PRIMARY(bond->params.mode)) { 768 /* write lock already acquired */ 769 if (bond->curr_active_slave) { 770 dev_set_promiscuity(bond->curr_active_slave->dev, inc); 771 } 772 } else { 773 struct slave *slave; 774 int i; 775 bond_for_each_slave(bond, slave, i) { 776 dev_set_promiscuity(slave->dev, inc); 777 } 778 } 779 } 780 781 /* 782 * Push the allmulti flag down to all slaves 783 */ 784 static void bond_set_allmulti(struct bonding *bond, int inc) 785 { 786 if (USES_PRIMARY(bond->params.mode)) { 787 /* write lock already acquired */ 788 if (bond->curr_active_slave) { 789 dev_set_allmulti(bond->curr_active_slave->dev, inc); 790 } 791 } else { 792 struct slave *slave; 793 int i; 794 bond_for_each_slave(bond, slave, i) { 795 dev_set_allmulti(slave->dev, inc); 796 } 797 } 798 } 799 800 /* 801 * Add a Multicast address to slaves 802 * according to mode 803 */ 804 static void bond_mc_add(struct bonding *bond, void *addr, int alen) 805 { 806 if (USES_PRIMARY(bond->params.mode)) { 807 /* write lock already acquired */ 808 if (bond->curr_active_slave) { 809 dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0); 810 } 811 } else { 812 struct slave *slave; 813 int i; 814 bond_for_each_slave(bond, slave, i) { 815 dev_mc_add(slave->dev, addr, alen, 0); 816 } 817 } 818 } 819 820 /* 821 * Remove a multicast address from slave 822 * according to mode 823 */ 824 static void bond_mc_delete(struct bonding *bond, void *addr, int alen) 825 { 826 if (USES_PRIMARY(bond->params.mode)) { 827 /* write lock already acquired */ 828 if (bond->curr_active_slave) { 829 dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0); 830 } 831 } else { 832 struct slave *slave; 833 int i; 834 bond_for_each_slave(bond, slave, i) { 835 dev_mc_delete(slave->dev, addr, alen, 0); 836 } 837 } 838 } 839 840 841 /* 842 * Retrieve the list of registered multicast addresses for the bonding 843 * device and retransmit an IGMP JOIN request to the current active 844 * slave. 845 */ 846 static void bond_resend_igmp_join_requests(struct bonding *bond) 847 { 848 struct in_device *in_dev; 849 struct ip_mc_list *im; 850 851 rcu_read_lock(); 852 in_dev = __in_dev_get_rcu(bond->dev); 853 if (in_dev) { 854 for (im = in_dev->mc_list; im; im = im->next) { 855 ip_mc_rejoin_group(im); 856 } 857 } 858 859 rcu_read_unlock(); 860 } 861 862 /* 863 * Totally destroys the mc_list in bond 864 */ 865 static void bond_mc_list_destroy(struct bonding *bond) 866 { 867 struct dev_mc_list *dmi; 868 869 dmi = bond->mc_list; 870 while (dmi) { 871 bond->mc_list = dmi->next; 872 kfree(dmi); 873 dmi = bond->mc_list; 874 } 875 bond->mc_list = NULL; 876 } 877 878 /* 879 * Copy all the Multicast addresses from src to the bonding device dst 880 */ 881 static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, 882 gfp_t gfp_flag) 883 { 884 struct dev_mc_list *dmi, *new_dmi; 885 886 for (dmi = mc_list; dmi; dmi = dmi->next) { 887 new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag); 888 889 if (!new_dmi) { 890 /* FIXME: Potential memory leak !!! */ 891 return -ENOMEM; 892 } 893 894 new_dmi->next = bond->mc_list; 895 bond->mc_list = new_dmi; 896 new_dmi->dmi_addrlen = dmi->dmi_addrlen; 897 memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); 898 new_dmi->dmi_users = dmi->dmi_users; 899 new_dmi->dmi_gusers = dmi->dmi_gusers; 900 } 901 902 return 0; 903 } 904 905 /* 906 * flush all members of flush->mc_list from device dev->mc_list 907 */ 908 static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev) 909 { 910 struct bonding *bond = bond_dev->priv; 911 struct dev_mc_list *dmi; 912 913 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 914 dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 915 } 916 917 if (bond->params.mode == BOND_MODE_8023AD) { 918 /* del lacpdu mc addr from mc list */ 919 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 920 921 dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 922 } 923 } 924 925 /*--------------------------- Active slave change ---------------------------*/ 926 927 /* 928 * Update the mc list and multicast-related flags for the new and 929 * old active slaves (if any) according to the multicast mode, and 930 * promiscuous flags unconditionally. 931 */ 932 static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active) 933 { 934 struct dev_mc_list *dmi; 935 936 if (!USES_PRIMARY(bond->params.mode)) { 937 /* nothing to do - mc list is already up-to-date on 938 * all slaves 939 */ 940 return; 941 } 942 943 if (old_active) { 944 if (bond->dev->flags & IFF_PROMISC) { 945 dev_set_promiscuity(old_active->dev, -1); 946 } 947 948 if (bond->dev->flags & IFF_ALLMULTI) { 949 dev_set_allmulti(old_active->dev, -1); 950 } 951 952 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 953 dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 954 } 955 } 956 957 if (new_active) { 958 if (bond->dev->flags & IFF_PROMISC) { 959 dev_set_promiscuity(new_active->dev, 1); 960 } 961 962 if (bond->dev->flags & IFF_ALLMULTI) { 963 dev_set_allmulti(new_active->dev, 1); 964 } 965 966 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 967 dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 968 } 969 bond_resend_igmp_join_requests(bond); 970 } 971 } 972 973 /** 974 * find_best_interface - select the best available slave to be the active one 975 * @bond: our bonding struct 976 * 977 * Warning: Caller must hold curr_slave_lock for writing. 978 */ 979 static struct slave *bond_find_best_slave(struct bonding *bond) 980 { 981 struct slave *new_active, *old_active; 982 struct slave *bestslave = NULL; 983 int mintime = bond->params.updelay; 984 int i; 985 986 new_active = old_active = bond->curr_active_slave; 987 988 if (!new_active) { /* there were no active slaves left */ 989 if (bond->slave_cnt > 0) { /* found one slave */ 990 new_active = bond->first_slave; 991 } else { 992 return NULL; /* still no slave, return NULL */ 993 } 994 } 995 996 /* first try the primary link; if arping, a link must tx/rx traffic 997 * before it can be considered the curr_active_slave - also, we would skip 998 * slaves between the curr_active_slave and primary_slave that may be up 999 * and able to arp 1000 */ 1001 if ((bond->primary_slave) && 1002 (!bond->params.arp_interval) && 1003 (IS_UP(bond->primary_slave->dev))) { 1004 new_active = bond->primary_slave; 1005 } 1006 1007 /* remember where to stop iterating over the slaves */ 1008 old_active = new_active; 1009 1010 bond_for_each_slave_from(bond, new_active, i, old_active) { 1011 if (IS_UP(new_active->dev)) { 1012 if (new_active->link == BOND_LINK_UP) { 1013 return new_active; 1014 } else if (new_active->link == BOND_LINK_BACK) { 1015 /* link up, but waiting for stabilization */ 1016 if (new_active->delay < mintime) { 1017 mintime = new_active->delay; 1018 bestslave = new_active; 1019 } 1020 } 1021 } 1022 } 1023 1024 return bestslave; 1025 } 1026 1027 /** 1028 * change_active_interface - change the active slave into the specified one 1029 * @bond: our bonding struct 1030 * @new: the new slave to make the active one 1031 * 1032 * Set the new slave to the bond's settings and unset them on the old 1033 * curr_active_slave. 1034 * Setting include flags, mc-list, promiscuity, allmulti, etc. 1035 * 1036 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, 1037 * because it is apparently the best available slave we have, even though its 1038 * updelay hasn't timed out yet. 1039 * 1040 * Warning: Caller must hold curr_slave_lock for writing. 1041 */ 1042 void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 1043 { 1044 struct slave *old_active = bond->curr_active_slave; 1045 1046 if (old_active == new_active) { 1047 return; 1048 } 1049 1050 if (new_active) { 1051 if (new_active->link == BOND_LINK_BACK) { 1052 if (USES_PRIMARY(bond->params.mode)) { 1053 printk(KERN_INFO DRV_NAME 1054 ": %s: making interface %s the new " 1055 "active one %d ms earlier.\n", 1056 bond->dev->name, new_active->dev->name, 1057 (bond->params.updelay - new_active->delay) * bond->params.miimon); 1058 } 1059 1060 new_active->delay = 0; 1061 new_active->link = BOND_LINK_UP; 1062 new_active->jiffies = jiffies; 1063 1064 if (bond->params.mode == BOND_MODE_8023AD) { 1065 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1066 } 1067 1068 if ((bond->params.mode == BOND_MODE_TLB) || 1069 (bond->params.mode == BOND_MODE_ALB)) { 1070 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); 1071 } 1072 } else { 1073 if (USES_PRIMARY(bond->params.mode)) { 1074 printk(KERN_INFO DRV_NAME 1075 ": %s: making interface %s the new " 1076 "active one.\n", 1077 bond->dev->name, new_active->dev->name); 1078 } 1079 } 1080 } 1081 1082 if (USES_PRIMARY(bond->params.mode)) { 1083 bond_mc_swap(bond, new_active, old_active); 1084 } 1085 1086 if ((bond->params.mode == BOND_MODE_TLB) || 1087 (bond->params.mode == BOND_MODE_ALB)) { 1088 bond_alb_handle_active_change(bond, new_active); 1089 if (old_active) 1090 bond_set_slave_inactive_flags(old_active); 1091 if (new_active) 1092 bond_set_slave_active_flags(new_active); 1093 } else { 1094 bond->curr_active_slave = new_active; 1095 } 1096 1097 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 1098 if (old_active) { 1099 bond_set_slave_inactive_flags(old_active); 1100 } 1101 1102 if (new_active) { 1103 bond_set_slave_active_flags(new_active); 1104 } 1105 1106 /* when bonding does not set the slave MAC address, the bond MAC 1107 * address is the one of the active slave. 1108 */ 1109 if (new_active && bond->params.fail_over_mac) 1110 memcpy(bond->dev->dev_addr, new_active->dev->dev_addr, 1111 new_active->dev->addr_len); 1112 if (bond->curr_active_slave && 1113 test_bit(__LINK_STATE_LINKWATCH_PENDING, 1114 &bond->curr_active_slave->dev->state)) { 1115 dprintk("delaying gratuitous arp on %s\n", 1116 bond->curr_active_slave->dev->name); 1117 bond->send_grat_arp = 1; 1118 } else 1119 bond_send_gratuitous_arp(bond); 1120 } 1121 } 1122 1123 /** 1124 * bond_select_active_slave - select a new active slave, if needed 1125 * @bond: our bonding struct 1126 * 1127 * This functions shoud be called when one of the following occurs: 1128 * - The old curr_active_slave has been released or lost its link. 1129 * - The primary_slave has got its link back. 1130 * - A slave has got its link back and there's no old curr_active_slave. 1131 * 1132 * Warning: Caller must hold curr_slave_lock for writing. 1133 */ 1134 void bond_select_active_slave(struct bonding *bond) 1135 { 1136 struct slave *best_slave; 1137 int rv; 1138 1139 best_slave = bond_find_best_slave(bond); 1140 if (best_slave != bond->curr_active_slave) { 1141 bond_change_active_slave(bond, best_slave); 1142 rv = bond_set_carrier(bond); 1143 if (!rv) 1144 return; 1145 1146 if (netif_carrier_ok(bond->dev)) { 1147 printk(KERN_INFO DRV_NAME 1148 ": %s: first active interface up!\n", 1149 bond->dev->name); 1150 } else { 1151 printk(KERN_INFO DRV_NAME ": %s: " 1152 "now running without any active interface !\n", 1153 bond->dev->name); 1154 } 1155 } 1156 } 1157 1158 /*--------------------------- slave list handling ---------------------------*/ 1159 1160 /* 1161 * This function attaches the slave to the end of list. 1162 * 1163 * bond->lock held for writing by caller. 1164 */ 1165 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) 1166 { 1167 if (bond->first_slave == NULL) { /* attaching the first slave */ 1168 new_slave->next = new_slave; 1169 new_slave->prev = new_slave; 1170 bond->first_slave = new_slave; 1171 } else { 1172 new_slave->next = bond->first_slave; 1173 new_slave->prev = bond->first_slave->prev; 1174 new_slave->next->prev = new_slave; 1175 new_slave->prev->next = new_slave; 1176 } 1177 1178 bond->slave_cnt++; 1179 } 1180 1181 /* 1182 * This function detaches the slave from the list. 1183 * WARNING: no check is made to verify if the slave effectively 1184 * belongs to <bond>. 1185 * Nothing is freed on return, structures are just unchained. 1186 * If any slave pointer in bond was pointing to <slave>, 1187 * it should be changed by the calling function. 1188 * 1189 * bond->lock held for writing by caller. 1190 */ 1191 static void bond_detach_slave(struct bonding *bond, struct slave *slave) 1192 { 1193 if (slave->next) { 1194 slave->next->prev = slave->prev; 1195 } 1196 1197 if (slave->prev) { 1198 slave->prev->next = slave->next; 1199 } 1200 1201 if (bond->first_slave == slave) { /* slave is the first slave */ 1202 if (bond->slave_cnt > 1) { /* there are more slave */ 1203 bond->first_slave = slave->next; 1204 } else { 1205 bond->first_slave = NULL; /* slave was the last one */ 1206 } 1207 } 1208 1209 slave->next = NULL; 1210 slave->prev = NULL; 1211 bond->slave_cnt--; 1212 } 1213 1214 /*---------------------------------- IOCTL ----------------------------------*/ 1215 1216 static int bond_sethwaddr(struct net_device *bond_dev, 1217 struct net_device *slave_dev) 1218 { 1219 dprintk("bond_dev=%p\n", bond_dev); 1220 dprintk("slave_dev=%p\n", slave_dev); 1221 dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len); 1222 memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); 1223 return 0; 1224 } 1225 1226 #define BOND_VLAN_FEATURES \ 1227 (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \ 1228 NETIF_F_HW_VLAN_FILTER) 1229 1230 /* 1231 * Compute the common dev->feature set available to all slaves. Some 1232 * feature bits are managed elsewhere, so preserve those feature bits 1233 * on the master device. 1234 */ 1235 static int bond_compute_features(struct bonding *bond) 1236 { 1237 struct slave *slave; 1238 struct net_device *bond_dev = bond->dev; 1239 unsigned long features = bond_dev->features; 1240 unsigned short max_hard_header_len = max((u16)ETH_HLEN, 1241 bond_dev->hard_header_len); 1242 int i; 1243 1244 features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); 1245 features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 1246 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; 1247 1248 bond_for_each_slave(bond, slave, i) { 1249 features = netdev_compute_features(features, 1250 slave->dev->features); 1251 if (slave->dev->hard_header_len > max_hard_header_len) 1252 max_hard_header_len = slave->dev->hard_header_len; 1253 } 1254 1255 features |= (bond_dev->features & BOND_VLAN_FEATURES); 1256 bond_dev->features = features; 1257 bond_dev->hard_header_len = max_hard_header_len; 1258 1259 return 0; 1260 } 1261 1262 1263 static void bond_setup_by_slave(struct net_device *bond_dev, 1264 struct net_device *slave_dev) 1265 { 1266 struct bonding *bond = bond_dev->priv; 1267 1268 bond_dev->neigh_setup = slave_dev->neigh_setup; 1269 bond_dev->header_ops = slave_dev->header_ops; 1270 1271 bond_dev->type = slave_dev->type; 1272 bond_dev->hard_header_len = slave_dev->hard_header_len; 1273 bond_dev->addr_len = slave_dev->addr_len; 1274 1275 memcpy(bond_dev->broadcast, slave_dev->broadcast, 1276 slave_dev->addr_len); 1277 bond->setup_by_slave = 1; 1278 } 1279 1280 /* enslave device <slave> to bond device <master> */ 1281 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) 1282 { 1283 struct bonding *bond = bond_dev->priv; 1284 struct slave *new_slave = NULL; 1285 struct dev_mc_list *dmi; 1286 struct sockaddr addr; 1287 int link_reporting; 1288 int old_features = bond_dev->features; 1289 int res = 0; 1290 1291 if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && 1292 slave_dev->do_ioctl == NULL) { 1293 printk(KERN_WARNING DRV_NAME 1294 ": %s: Warning: no link monitoring support for %s\n", 1295 bond_dev->name, slave_dev->name); 1296 } 1297 1298 /* bond must be initialized by bond_open() before enslaving */ 1299 if (!(bond_dev->flags & IFF_UP)) { 1300 printk(KERN_WARNING DRV_NAME 1301 " %s: master_dev is not up in bond_enslave\n", 1302 bond_dev->name); 1303 } 1304 1305 /* already enslaved */ 1306 if (slave_dev->flags & IFF_SLAVE) { 1307 dprintk("Error, Device was already enslaved\n"); 1308 return -EBUSY; 1309 } 1310 1311 /* vlan challenged mutual exclusion */ 1312 /* no need to lock since we're protected by rtnl_lock */ 1313 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { 1314 dprintk("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1315 if (!list_empty(&bond->vlan_list)) { 1316 printk(KERN_ERR DRV_NAME 1317 ": %s: Error: cannot enslave VLAN " 1318 "challenged slave %s on VLAN enabled " 1319 "bond %s\n", bond_dev->name, slave_dev->name, 1320 bond_dev->name); 1321 return -EPERM; 1322 } else { 1323 printk(KERN_WARNING DRV_NAME 1324 ": %s: Warning: enslaved VLAN challenged " 1325 "slave %s. Adding VLANs will be blocked as " 1326 "long as %s is part of bond %s\n", 1327 bond_dev->name, slave_dev->name, slave_dev->name, 1328 bond_dev->name); 1329 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1330 } 1331 } else { 1332 dprintk("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1333 if (bond->slave_cnt == 0) { 1334 /* First slave, and it is not VLAN challenged, 1335 * so remove the block of adding VLANs over the bond. 1336 */ 1337 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1338 } 1339 } 1340 1341 /* 1342 * Old ifenslave binaries are no longer supported. These can 1343 * be identified with moderate accurary by the state of the slave: 1344 * the current ifenslave will set the interface down prior to 1345 * enslaving it; the old ifenslave will not. 1346 */ 1347 if ((slave_dev->flags & IFF_UP)) { 1348 printk(KERN_ERR DRV_NAME ": %s is up. " 1349 "This may be due to an out of date ifenslave.\n", 1350 slave_dev->name); 1351 res = -EPERM; 1352 goto err_undo_flags; 1353 } 1354 1355 /* set bonding device ether type by slave - bonding netdevices are 1356 * created with ether_setup, so when the slave type is not ARPHRD_ETHER 1357 * there is a need to override some of the type dependent attribs/funcs. 1358 * 1359 * bond ether type mutual exclusion - don't allow slaves of dissimilar 1360 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond 1361 */ 1362 if (bond->slave_cnt == 0) { 1363 if (slave_dev->type != ARPHRD_ETHER) 1364 bond_setup_by_slave(bond_dev, slave_dev); 1365 } else if (bond_dev->type != slave_dev->type) { 1366 printk(KERN_ERR DRV_NAME ": %s ether type (%d) is different " 1367 "from other slaves (%d), can not enslave it.\n", 1368 slave_dev->name, 1369 slave_dev->type, bond_dev->type); 1370 res = -EINVAL; 1371 goto err_undo_flags; 1372 } 1373 1374 if (slave_dev->set_mac_address == NULL) { 1375 if (bond->slave_cnt == 0) { 1376 printk(KERN_WARNING DRV_NAME 1377 ": %s: Warning: The first slave device " 1378 "specified does not support setting the MAC " 1379 "address. Enabling the fail_over_mac option.", 1380 bond_dev->name); 1381 bond->params.fail_over_mac = 1; 1382 } else if (!bond->params.fail_over_mac) { 1383 printk(KERN_ERR DRV_NAME 1384 ": %s: Error: The slave device specified " 1385 "does not support setting the MAC address, " 1386 "but fail_over_mac is not enabled.\n" 1387 , bond_dev->name); 1388 res = -EOPNOTSUPP; 1389 goto err_undo_flags; 1390 } 1391 } 1392 1393 new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); 1394 if (!new_slave) { 1395 res = -ENOMEM; 1396 goto err_undo_flags; 1397 } 1398 1399 /* save slave's original flags before calling 1400 * netdev_set_master and dev_open 1401 */ 1402 new_slave->original_flags = slave_dev->flags; 1403 1404 /* 1405 * Save slave's original ("permanent") mac address for modes 1406 * that need it, and for restoring it upon release, and then 1407 * set it to the master's address 1408 */ 1409 memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); 1410 1411 if (!bond->params.fail_over_mac) { 1412 /* 1413 * Set slave to master's mac address. The application already 1414 * set the master's mac address to that of the first slave 1415 */ 1416 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 1417 addr.sa_family = slave_dev->type; 1418 res = dev_set_mac_address(slave_dev, &addr); 1419 if (res) { 1420 dprintk("Error %d calling set_mac_address\n", res); 1421 goto err_free; 1422 } 1423 } 1424 1425 res = netdev_set_master(slave_dev, bond_dev); 1426 if (res) { 1427 dprintk("Error %d calling netdev_set_master\n", res); 1428 goto err_close; 1429 } 1430 /* open the slave since the application closed it */ 1431 res = dev_open(slave_dev); 1432 if (res) { 1433 dprintk("Openning slave %s failed\n", slave_dev->name); 1434 goto err_restore_mac; 1435 } 1436 1437 new_slave->dev = slave_dev; 1438 slave_dev->priv_flags |= IFF_BONDING; 1439 1440 if ((bond->params.mode == BOND_MODE_TLB) || 1441 (bond->params.mode == BOND_MODE_ALB)) { 1442 /* bond_alb_init_slave() must be called before all other stages since 1443 * it might fail and we do not want to have to undo everything 1444 */ 1445 res = bond_alb_init_slave(bond, new_slave); 1446 if (res) { 1447 goto err_unset_master; 1448 } 1449 } 1450 1451 /* If the mode USES_PRIMARY, then the new slave gets the 1452 * master's promisc (and mc) settings only if it becomes the 1453 * curr_active_slave, and that is taken care of later when calling 1454 * bond_change_active() 1455 */ 1456 if (!USES_PRIMARY(bond->params.mode)) { 1457 /* set promiscuity level to new slave */ 1458 if (bond_dev->flags & IFF_PROMISC) { 1459 dev_set_promiscuity(slave_dev, 1); 1460 } 1461 1462 /* set allmulti level to new slave */ 1463 if (bond_dev->flags & IFF_ALLMULTI) { 1464 dev_set_allmulti(slave_dev, 1); 1465 } 1466 1467 /* upload master's mc_list to new slave */ 1468 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 1469 dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 1470 } 1471 } 1472 1473 if (bond->params.mode == BOND_MODE_8023AD) { 1474 /* add lacpdu mc addr to mc list */ 1475 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 1476 1477 dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 1478 } 1479 1480 bond_add_vlans_on_slave(bond, slave_dev); 1481 1482 write_lock_bh(&bond->lock); 1483 1484 bond_attach_slave(bond, new_slave); 1485 1486 new_slave->delay = 0; 1487 new_slave->link_failure_count = 0; 1488 1489 bond_compute_features(bond); 1490 1491 new_slave->last_arp_rx = jiffies; 1492 1493 if (bond->params.miimon && !bond->params.use_carrier) { 1494 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 1495 1496 if ((link_reporting == -1) && !bond->params.arp_interval) { 1497 /* 1498 * miimon is set but a bonded network driver 1499 * does not support ETHTOOL/MII and 1500 * arp_interval is not set. Note: if 1501 * use_carrier is enabled, we will never go 1502 * here (because netif_carrier is always 1503 * supported); thus, we don't need to change 1504 * the messages for netif_carrier. 1505 */ 1506 printk(KERN_WARNING DRV_NAME 1507 ": %s: Warning: MII and ETHTOOL support not " 1508 "available for interface %s, and " 1509 "arp_interval/arp_ip_target module parameters " 1510 "not specified, thus bonding will not detect " 1511 "link failures! see bonding.txt for details.\n", 1512 bond_dev->name, slave_dev->name); 1513 } else if (link_reporting == -1) { 1514 /* unable get link status using mii/ethtool */ 1515 printk(KERN_WARNING DRV_NAME 1516 ": %s: Warning: can't get link status from " 1517 "interface %s; the network driver associated " 1518 "with this interface does not support MII or " 1519 "ETHTOOL link status reporting, thus miimon " 1520 "has no effect on this interface.\n", 1521 bond_dev->name, slave_dev->name); 1522 } 1523 } 1524 1525 /* check for initial state */ 1526 if (!bond->params.miimon || 1527 (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) { 1528 if (bond->params.updelay) { 1529 dprintk("Initial state of slave_dev is " 1530 "BOND_LINK_BACK\n"); 1531 new_slave->link = BOND_LINK_BACK; 1532 new_slave->delay = bond->params.updelay; 1533 } else { 1534 dprintk("Initial state of slave_dev is " 1535 "BOND_LINK_UP\n"); 1536 new_slave->link = BOND_LINK_UP; 1537 } 1538 new_slave->jiffies = jiffies; 1539 } else { 1540 dprintk("Initial state of slave_dev is " 1541 "BOND_LINK_DOWN\n"); 1542 new_slave->link = BOND_LINK_DOWN; 1543 } 1544 1545 if (bond_update_speed_duplex(new_slave) && 1546 (new_slave->link != BOND_LINK_DOWN)) { 1547 printk(KERN_WARNING DRV_NAME 1548 ": %s: Warning: failed to get speed and duplex from %s, " 1549 "assumed to be 100Mb/sec and Full.\n", 1550 bond_dev->name, new_slave->dev->name); 1551 1552 if (bond->params.mode == BOND_MODE_8023AD) { 1553 printk(KERN_WARNING DRV_NAME 1554 ": %s: Warning: Operation of 802.3ad mode requires ETHTOOL " 1555 "support in base driver for proper aggregator " 1556 "selection.\n", bond_dev->name); 1557 } 1558 } 1559 1560 if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { 1561 /* if there is a primary slave, remember it */ 1562 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 1563 bond->primary_slave = new_slave; 1564 } 1565 } 1566 1567 switch (bond->params.mode) { 1568 case BOND_MODE_ACTIVEBACKUP: 1569 bond_set_slave_inactive_flags(new_slave); 1570 bond_select_active_slave(bond); 1571 break; 1572 case BOND_MODE_8023AD: 1573 /* in 802.3ad mode, the internal mechanism 1574 * will activate the slaves in the selected 1575 * aggregator 1576 */ 1577 bond_set_slave_inactive_flags(new_slave); 1578 /* if this is the first slave */ 1579 if (bond->slave_cnt == 1) { 1580 SLAVE_AD_INFO(new_slave).id = 1; 1581 /* Initialize AD with the number of times that the AD timer is called in 1 second 1582 * can be called only after the mac address of the bond is set 1583 */ 1584 bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, 1585 bond->params.lacp_fast); 1586 } else { 1587 SLAVE_AD_INFO(new_slave).id = 1588 SLAVE_AD_INFO(new_slave->prev).id + 1; 1589 } 1590 1591 bond_3ad_bind_slave(new_slave); 1592 break; 1593 case BOND_MODE_TLB: 1594 case BOND_MODE_ALB: 1595 new_slave->state = BOND_STATE_ACTIVE; 1596 bond_set_slave_inactive_flags(new_slave); 1597 break; 1598 default: 1599 dprintk("This slave is always active in trunk mode\n"); 1600 1601 /* always active in trunk mode */ 1602 new_slave->state = BOND_STATE_ACTIVE; 1603 1604 /* In trunking mode there is little meaning to curr_active_slave 1605 * anyway (it holds no special properties of the bond device), 1606 * so we can change it without calling change_active_interface() 1607 */ 1608 if (!bond->curr_active_slave) { 1609 bond->curr_active_slave = new_slave; 1610 } 1611 break; 1612 } /* switch(bond_mode) */ 1613 1614 bond_set_carrier(bond); 1615 1616 write_unlock_bh(&bond->lock); 1617 1618 res = bond_create_slave_symlinks(bond_dev, slave_dev); 1619 if (res) 1620 goto err_unset_master; 1621 1622 printk(KERN_INFO DRV_NAME 1623 ": %s: enslaving %s as a%s interface with a%s link.\n", 1624 bond_dev->name, slave_dev->name, 1625 new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", 1626 new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); 1627 1628 /* enslave is successful */ 1629 return 0; 1630 1631 /* Undo stages on error */ 1632 err_unset_master: 1633 netdev_set_master(slave_dev, NULL); 1634 1635 err_close: 1636 dev_close(slave_dev); 1637 1638 err_restore_mac: 1639 if (!bond->params.fail_over_mac) { 1640 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); 1641 addr.sa_family = slave_dev->type; 1642 dev_set_mac_address(slave_dev, &addr); 1643 } 1644 1645 err_free: 1646 kfree(new_slave); 1647 1648 err_undo_flags: 1649 bond_dev->features = old_features; 1650 1651 return res; 1652 } 1653 1654 /* 1655 * Try to release the slave device <slave> from the bond device <master> 1656 * It is legal to access curr_active_slave without a lock because all the function 1657 * is write-locked. 1658 * 1659 * The rules for slave state should be: 1660 * for Active/Backup: 1661 * Active stays on all backups go down 1662 * for Bonded connections: 1663 * The first up interface should be left on and all others downed. 1664 */ 1665 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) 1666 { 1667 struct bonding *bond = bond_dev->priv; 1668 struct slave *slave, *oldcurrent; 1669 struct sockaddr addr; 1670 int mac_addr_differ; 1671 DECLARE_MAC_BUF(mac); 1672 1673 /* slave is not a slave or master is not master of this slave */ 1674 if (!(slave_dev->flags & IFF_SLAVE) || 1675 (slave_dev->master != bond_dev)) { 1676 printk(KERN_ERR DRV_NAME 1677 ": %s: Error: cannot release %s.\n", 1678 bond_dev->name, slave_dev->name); 1679 return -EINVAL; 1680 } 1681 1682 write_lock_bh(&bond->lock); 1683 1684 slave = bond_get_slave_by_dev(bond, slave_dev); 1685 if (!slave) { 1686 /* not a slave of this bond */ 1687 printk(KERN_INFO DRV_NAME 1688 ": %s: %s not enslaved\n", 1689 bond_dev->name, slave_dev->name); 1690 write_unlock_bh(&bond->lock); 1691 return -EINVAL; 1692 } 1693 1694 mac_addr_differ = memcmp(bond_dev->dev_addr, 1695 slave->perm_hwaddr, 1696 ETH_ALEN); 1697 if (!mac_addr_differ && (bond->slave_cnt > 1)) { 1698 printk(KERN_WARNING DRV_NAME 1699 ": %s: Warning: the permanent HWaddr of %s - " 1700 "%s - is still in use by %s. " 1701 "Set the HWaddr of %s to a different address " 1702 "to avoid conflicts.\n", 1703 bond_dev->name, 1704 slave_dev->name, 1705 print_mac(mac, slave->perm_hwaddr), 1706 bond_dev->name, 1707 slave_dev->name); 1708 } 1709 1710 /* Inform AD package of unbinding of slave. */ 1711 if (bond->params.mode == BOND_MODE_8023AD) { 1712 /* must be called before the slave is 1713 * detached from the list 1714 */ 1715 bond_3ad_unbind_slave(slave); 1716 } 1717 1718 printk(KERN_INFO DRV_NAME 1719 ": %s: releasing %s interface %s\n", 1720 bond_dev->name, 1721 (slave->state == BOND_STATE_ACTIVE) 1722 ? "active" : "backup", 1723 slave_dev->name); 1724 1725 oldcurrent = bond->curr_active_slave; 1726 1727 bond->current_arp_slave = NULL; 1728 1729 /* release the slave from its bond */ 1730 bond_detach_slave(bond, slave); 1731 1732 bond_compute_features(bond); 1733 1734 if (bond->primary_slave == slave) { 1735 bond->primary_slave = NULL; 1736 } 1737 1738 if (oldcurrent == slave) { 1739 bond_change_active_slave(bond, NULL); 1740 } 1741 1742 if ((bond->params.mode == BOND_MODE_TLB) || 1743 (bond->params.mode == BOND_MODE_ALB)) { 1744 /* Must be called only after the slave has been 1745 * detached from the list and the curr_active_slave 1746 * has been cleared (if our_slave == old_current), 1747 * but before a new active slave is selected. 1748 */ 1749 write_unlock_bh(&bond->lock); 1750 bond_alb_deinit_slave(bond, slave); 1751 write_lock_bh(&bond->lock); 1752 } 1753 1754 if (oldcurrent == slave) { 1755 /* 1756 * Note that we hold RTNL over this sequence, so there 1757 * is no concern that another slave add/remove event 1758 * will interfere. 1759 */ 1760 write_unlock_bh(&bond->lock); 1761 read_lock(&bond->lock); 1762 write_lock_bh(&bond->curr_slave_lock); 1763 1764 bond_select_active_slave(bond); 1765 1766 write_unlock_bh(&bond->curr_slave_lock); 1767 read_unlock(&bond->lock); 1768 write_lock_bh(&bond->lock); 1769 } 1770 1771 if (bond->slave_cnt == 0) { 1772 bond_set_carrier(bond); 1773 1774 /* if the last slave was removed, zero the mac address 1775 * of the master so it will be set by the application 1776 * to the mac address of the first slave 1777 */ 1778 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1779 1780 if (list_empty(&bond->vlan_list)) { 1781 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1782 } else { 1783 printk(KERN_WARNING DRV_NAME 1784 ": %s: Warning: clearing HW address of %s while it " 1785 "still has VLANs.\n", 1786 bond_dev->name, bond_dev->name); 1787 printk(KERN_WARNING DRV_NAME 1788 ": %s: When re-adding slaves, make sure the bond's " 1789 "HW address matches its VLANs'.\n", 1790 bond_dev->name); 1791 } 1792 } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && 1793 !bond_has_challenged_slaves(bond)) { 1794 printk(KERN_INFO DRV_NAME 1795 ": %s: last VLAN challenged slave %s " 1796 "left bond %s. VLAN blocking is removed\n", 1797 bond_dev->name, slave_dev->name, bond_dev->name); 1798 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1799 } 1800 1801 write_unlock_bh(&bond->lock); 1802 1803 /* must do this from outside any spinlocks */ 1804 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1805 1806 bond_del_vlans_from_slave(bond, slave_dev); 1807 1808 /* If the mode USES_PRIMARY, then we should only remove its 1809 * promisc and mc settings if it was the curr_active_slave, but that was 1810 * already taken care of above when we detached the slave 1811 */ 1812 if (!USES_PRIMARY(bond->params.mode)) { 1813 /* unset promiscuity level from slave */ 1814 if (bond_dev->flags & IFF_PROMISC) { 1815 dev_set_promiscuity(slave_dev, -1); 1816 } 1817 1818 /* unset allmulti level from slave */ 1819 if (bond_dev->flags & IFF_ALLMULTI) { 1820 dev_set_allmulti(slave_dev, -1); 1821 } 1822 1823 /* flush master's mc_list from slave */ 1824 bond_mc_list_flush(bond_dev, slave_dev); 1825 } 1826 1827 netdev_set_master(slave_dev, NULL); 1828 1829 /* close slave before restoring its mac address */ 1830 dev_close(slave_dev); 1831 1832 if (!bond->params.fail_over_mac) { 1833 /* restore original ("permanent") mac address */ 1834 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1835 addr.sa_family = slave_dev->type; 1836 dev_set_mac_address(slave_dev, &addr); 1837 } 1838 1839 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | 1840 IFF_SLAVE_INACTIVE | IFF_BONDING | 1841 IFF_SLAVE_NEEDARP); 1842 1843 kfree(slave); 1844 1845 return 0; /* deletion OK */ 1846 } 1847 1848 /* 1849 * Destroy a bonding device. 1850 * Must be under rtnl_lock when this function is called. 1851 */ 1852 void bond_destroy(struct bonding *bond) 1853 { 1854 bond_deinit(bond->dev); 1855 bond_destroy_sysfs_entry(bond); 1856 unregister_netdevice(bond->dev); 1857 } 1858 1859 /* 1860 * First release a slave and than destroy the bond if no more slaves iare left. 1861 * Must be under rtnl_lock when this function is called. 1862 */ 1863 int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev) 1864 { 1865 struct bonding *bond = bond_dev->priv; 1866 int ret; 1867 1868 ret = bond_release(bond_dev, slave_dev); 1869 if ((ret == 0) && (bond->slave_cnt == 0)) { 1870 printk(KERN_INFO DRV_NAME ": %s: destroying bond %s.\n", 1871 bond_dev->name, bond_dev->name); 1872 bond_destroy(bond); 1873 } 1874 return ret; 1875 } 1876 1877 /* 1878 * This function releases all slaves. 1879 */ 1880 static int bond_release_all(struct net_device *bond_dev) 1881 { 1882 struct bonding *bond = bond_dev->priv; 1883 struct slave *slave; 1884 struct net_device *slave_dev; 1885 struct sockaddr addr; 1886 1887 write_lock_bh(&bond->lock); 1888 1889 netif_carrier_off(bond_dev); 1890 1891 if (bond->slave_cnt == 0) { 1892 goto out; 1893 } 1894 1895 bond->current_arp_slave = NULL; 1896 bond->primary_slave = NULL; 1897 bond_change_active_slave(bond, NULL); 1898 1899 while ((slave = bond->first_slave) != NULL) { 1900 /* Inform AD package of unbinding of slave 1901 * before slave is detached from the list. 1902 */ 1903 if (bond->params.mode == BOND_MODE_8023AD) { 1904 bond_3ad_unbind_slave(slave); 1905 } 1906 1907 slave_dev = slave->dev; 1908 bond_detach_slave(bond, slave); 1909 1910 /* now that the slave is detached, unlock and perform 1911 * all the undo steps that should not be called from 1912 * within a lock. 1913 */ 1914 write_unlock_bh(&bond->lock); 1915 1916 if ((bond->params.mode == BOND_MODE_TLB) || 1917 (bond->params.mode == BOND_MODE_ALB)) { 1918 /* must be called only after the slave 1919 * has been detached from the list 1920 */ 1921 bond_alb_deinit_slave(bond, slave); 1922 } 1923 1924 bond_compute_features(bond); 1925 1926 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1927 bond_del_vlans_from_slave(bond, slave_dev); 1928 1929 /* If the mode USES_PRIMARY, then we should only remove its 1930 * promisc and mc settings if it was the curr_active_slave, but that was 1931 * already taken care of above when we detached the slave 1932 */ 1933 if (!USES_PRIMARY(bond->params.mode)) { 1934 /* unset promiscuity level from slave */ 1935 if (bond_dev->flags & IFF_PROMISC) { 1936 dev_set_promiscuity(slave_dev, -1); 1937 } 1938 1939 /* unset allmulti level from slave */ 1940 if (bond_dev->flags & IFF_ALLMULTI) { 1941 dev_set_allmulti(slave_dev, -1); 1942 } 1943 1944 /* flush master's mc_list from slave */ 1945 bond_mc_list_flush(bond_dev, slave_dev); 1946 } 1947 1948 netdev_set_master(slave_dev, NULL); 1949 1950 /* close slave before restoring its mac address */ 1951 dev_close(slave_dev); 1952 1953 if (!bond->params.fail_over_mac) { 1954 /* restore original ("permanent") mac address*/ 1955 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1956 addr.sa_family = slave_dev->type; 1957 dev_set_mac_address(slave_dev, &addr); 1958 } 1959 1960 slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | 1961 IFF_SLAVE_INACTIVE); 1962 1963 kfree(slave); 1964 1965 /* re-acquire the lock before getting the next slave */ 1966 write_lock_bh(&bond->lock); 1967 } 1968 1969 /* zero the mac address of the master so it will be 1970 * set by the application to the mac address of the 1971 * first slave 1972 */ 1973 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1974 1975 if (list_empty(&bond->vlan_list)) { 1976 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1977 } else { 1978 printk(KERN_WARNING DRV_NAME 1979 ": %s: Warning: clearing HW address of %s while it " 1980 "still has VLANs.\n", 1981 bond_dev->name, bond_dev->name); 1982 printk(KERN_WARNING DRV_NAME 1983 ": %s: When re-adding slaves, make sure the bond's " 1984 "HW address matches its VLANs'.\n", 1985 bond_dev->name); 1986 } 1987 1988 printk(KERN_INFO DRV_NAME 1989 ": %s: released all slaves\n", 1990 bond_dev->name); 1991 1992 out: 1993 write_unlock_bh(&bond->lock); 1994 1995 return 0; 1996 } 1997 1998 /* 1999 * This function changes the active slave to slave <slave_dev>. 2000 * It returns -EINVAL in the following cases. 2001 * - <slave_dev> is not found in the list. 2002 * - There is not active slave now. 2003 * - <slave_dev> is already active. 2004 * - The link state of <slave_dev> is not BOND_LINK_UP. 2005 * - <slave_dev> is not running. 2006 * In these cases, this fuction does nothing. 2007 * In the other cases, currnt_slave pointer is changed and 0 is returned. 2008 */ 2009 static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) 2010 { 2011 struct bonding *bond = bond_dev->priv; 2012 struct slave *old_active = NULL; 2013 struct slave *new_active = NULL; 2014 int res = 0; 2015 2016 if (!USES_PRIMARY(bond->params.mode)) { 2017 return -EINVAL; 2018 } 2019 2020 /* Verify that master_dev is indeed the master of slave_dev */ 2021 if (!(slave_dev->flags & IFF_SLAVE) || 2022 (slave_dev->master != bond_dev)) { 2023 return -EINVAL; 2024 } 2025 2026 read_lock(&bond->lock); 2027 2028 read_lock(&bond->curr_slave_lock); 2029 old_active = bond->curr_active_slave; 2030 read_unlock(&bond->curr_slave_lock); 2031 2032 new_active = bond_get_slave_by_dev(bond, slave_dev); 2033 2034 /* 2035 * Changing to the current active: do nothing; return success. 2036 */ 2037 if (new_active && (new_active == old_active)) { 2038 read_unlock(&bond->lock); 2039 return 0; 2040 } 2041 2042 if ((new_active) && 2043 (old_active) && 2044 (new_active->link == BOND_LINK_UP) && 2045 IS_UP(new_active->dev)) { 2046 write_lock_bh(&bond->curr_slave_lock); 2047 bond_change_active_slave(bond, new_active); 2048 write_unlock_bh(&bond->curr_slave_lock); 2049 } else { 2050 res = -EINVAL; 2051 } 2052 2053 read_unlock(&bond->lock); 2054 2055 return res; 2056 } 2057 2058 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) 2059 { 2060 struct bonding *bond = bond_dev->priv; 2061 2062 info->bond_mode = bond->params.mode; 2063 info->miimon = bond->params.miimon; 2064 2065 read_lock(&bond->lock); 2066 info->num_slaves = bond->slave_cnt; 2067 read_unlock(&bond->lock); 2068 2069 return 0; 2070 } 2071 2072 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) 2073 { 2074 struct bonding *bond = bond_dev->priv; 2075 struct slave *slave; 2076 int i, found = 0; 2077 2078 if (info->slave_id < 0) { 2079 return -ENODEV; 2080 } 2081 2082 read_lock(&bond->lock); 2083 2084 bond_for_each_slave(bond, slave, i) { 2085 if (i == (int)info->slave_id) { 2086 found = 1; 2087 break; 2088 } 2089 } 2090 2091 read_unlock(&bond->lock); 2092 2093 if (found) { 2094 strcpy(info->slave_name, slave->dev->name); 2095 info->link = slave->link; 2096 info->state = slave->state; 2097 info->link_failure_count = slave->link_failure_count; 2098 } else { 2099 return -ENODEV; 2100 } 2101 2102 return 0; 2103 } 2104 2105 /*-------------------------------- Monitoring -------------------------------*/ 2106 2107 /* 2108 * if !have_locks, return nonzero if a failover is necessary. if 2109 * have_locks, do whatever failover activities are needed. 2110 * 2111 * This is to separate the inspection and failover steps for locking 2112 * purposes; failover requires rtnl, but acquiring it for every 2113 * inspection is undesirable, so a wrapper first does inspection, and 2114 * the acquires the necessary locks and calls again to perform 2115 * failover if needed. Since all locks are dropped, a complete 2116 * restart is needed between calls. 2117 */ 2118 static int __bond_mii_monitor(struct bonding *bond, int have_locks) 2119 { 2120 struct slave *slave, *oldcurrent; 2121 int do_failover = 0; 2122 int i; 2123 2124 if (bond->slave_cnt == 0) 2125 goto out; 2126 2127 /* we will try to read the link status of each of our slaves, and 2128 * set their IFF_RUNNING flag appropriately. For each slave not 2129 * supporting MII status, we won't do anything so that a user-space 2130 * program could monitor the link itself if needed. 2131 */ 2132 2133 if (bond->send_grat_arp) { 2134 if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING, 2135 &bond->curr_active_slave->dev->state)) 2136 dprintk("Needs to send gratuitous arp but not yet\n"); 2137 else { 2138 dprintk("sending delayed gratuitous arp on on %s\n", 2139 bond->curr_active_slave->dev->name); 2140 bond_send_gratuitous_arp(bond); 2141 bond->send_grat_arp = 0; 2142 } 2143 } 2144 read_lock(&bond->curr_slave_lock); 2145 oldcurrent = bond->curr_active_slave; 2146 read_unlock(&bond->curr_slave_lock); 2147 2148 bond_for_each_slave(bond, slave, i) { 2149 struct net_device *slave_dev = slave->dev; 2150 int link_state; 2151 u16 old_speed = slave->speed; 2152 u8 old_duplex = slave->duplex; 2153 2154 link_state = bond_check_dev_link(bond, slave_dev, 0); 2155 2156 switch (slave->link) { 2157 case BOND_LINK_UP: /* the link was up */ 2158 if (link_state == BMSR_LSTATUS) { 2159 if (!oldcurrent) { 2160 if (!have_locks) 2161 return 1; 2162 do_failover = 1; 2163 } 2164 break; 2165 } else { /* link going down */ 2166 slave->link = BOND_LINK_FAIL; 2167 slave->delay = bond->params.downdelay; 2168 2169 if (slave->link_failure_count < UINT_MAX) { 2170 slave->link_failure_count++; 2171 } 2172 2173 if (bond->params.downdelay) { 2174 printk(KERN_INFO DRV_NAME 2175 ": %s: link status down for %s " 2176 "interface %s, disabling it in " 2177 "%d ms.\n", 2178 bond->dev->name, 2179 IS_UP(slave_dev) 2180 ? ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) 2181 ? ((slave == oldcurrent) 2182 ? "active " : "backup ") 2183 : "") 2184 : "idle ", 2185 slave_dev->name, 2186 bond->params.downdelay * bond->params.miimon); 2187 } 2188 } 2189 /* no break ! fall through the BOND_LINK_FAIL test to 2190 ensure proper action to be taken 2191 */ 2192 case BOND_LINK_FAIL: /* the link has just gone down */ 2193 if (link_state != BMSR_LSTATUS) { 2194 /* link stays down */ 2195 if (slave->delay <= 0) { 2196 if (!have_locks) 2197 return 1; 2198 2199 /* link down for too long time */ 2200 slave->link = BOND_LINK_DOWN; 2201 2202 /* in active/backup mode, we must 2203 * completely disable this interface 2204 */ 2205 if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) || 2206 (bond->params.mode == BOND_MODE_8023AD)) { 2207 bond_set_slave_inactive_flags(slave); 2208 } 2209 2210 printk(KERN_INFO DRV_NAME 2211 ": %s: link status definitely " 2212 "down for interface %s, " 2213 "disabling it\n", 2214 bond->dev->name, 2215 slave_dev->name); 2216 2217 /* notify ad that the link status has changed */ 2218 if (bond->params.mode == BOND_MODE_8023AD) { 2219 bond_3ad_handle_link_change(slave, BOND_LINK_DOWN); 2220 } 2221 2222 if ((bond->params.mode == BOND_MODE_TLB) || 2223 (bond->params.mode == BOND_MODE_ALB)) { 2224 bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); 2225 } 2226 2227 if (slave == oldcurrent) { 2228 do_failover = 1; 2229 } 2230 } else { 2231 slave->delay--; 2232 } 2233 } else { 2234 /* link up again */ 2235 slave->link = BOND_LINK_UP; 2236 slave->jiffies = jiffies; 2237 printk(KERN_INFO DRV_NAME 2238 ": %s: link status up again after %d " 2239 "ms for interface %s.\n", 2240 bond->dev->name, 2241 (bond->params.downdelay - slave->delay) * bond->params.miimon, 2242 slave_dev->name); 2243 } 2244 break; 2245 case BOND_LINK_DOWN: /* the link was down */ 2246 if (link_state != BMSR_LSTATUS) { 2247 /* the link stays down, nothing more to do */ 2248 break; 2249 } else { /* link going up */ 2250 slave->link = BOND_LINK_BACK; 2251 slave->delay = bond->params.updelay; 2252 2253 if (bond->params.updelay) { 2254 /* if updelay == 0, no need to 2255 advertise about a 0 ms delay */ 2256 printk(KERN_INFO DRV_NAME 2257 ": %s: link status up for " 2258 "interface %s, enabling it " 2259 "in %d ms.\n", 2260 bond->dev->name, 2261 slave_dev->name, 2262 bond->params.updelay * bond->params.miimon); 2263 } 2264 } 2265 /* no break ! fall through the BOND_LINK_BACK state in 2266 case there's something to do. 2267 */ 2268 case BOND_LINK_BACK: /* the link has just come back */ 2269 if (link_state != BMSR_LSTATUS) { 2270 /* link down again */ 2271 slave->link = BOND_LINK_DOWN; 2272 2273 printk(KERN_INFO DRV_NAME 2274 ": %s: link status down again after %d " 2275 "ms for interface %s.\n", 2276 bond->dev->name, 2277 (bond->params.updelay - slave->delay) * bond->params.miimon, 2278 slave_dev->name); 2279 } else { 2280 /* link stays up */ 2281 if (slave->delay == 0) { 2282 if (!have_locks) 2283 return 1; 2284 2285 /* now the link has been up for long time enough */ 2286 slave->link = BOND_LINK_UP; 2287 slave->jiffies = jiffies; 2288 2289 if (bond->params.mode == BOND_MODE_8023AD) { 2290 /* prevent it from being the active one */ 2291 slave->state = BOND_STATE_BACKUP; 2292 } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { 2293 /* make it immediately active */ 2294 slave->state = BOND_STATE_ACTIVE; 2295 } else if (slave != bond->primary_slave) { 2296 /* prevent it from being the active one */ 2297 slave->state = BOND_STATE_BACKUP; 2298 } 2299 2300 printk(KERN_INFO DRV_NAME 2301 ": %s: link status definitely " 2302 "up for interface %s.\n", 2303 bond->dev->name, 2304 slave_dev->name); 2305 2306 /* notify ad that the link status has changed */ 2307 if (bond->params.mode == BOND_MODE_8023AD) { 2308 bond_3ad_handle_link_change(slave, BOND_LINK_UP); 2309 } 2310 2311 if ((bond->params.mode == BOND_MODE_TLB) || 2312 (bond->params.mode == BOND_MODE_ALB)) { 2313 bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); 2314 } 2315 2316 if ((!oldcurrent) || 2317 (slave == bond->primary_slave)) { 2318 do_failover = 1; 2319 } 2320 } else { 2321 slave->delay--; 2322 } 2323 } 2324 break; 2325 default: 2326 /* Should not happen */ 2327 printk(KERN_ERR DRV_NAME 2328 ": %s: Error: %s Illegal value (link=%d)\n", 2329 bond->dev->name, 2330 slave->dev->name, 2331 slave->link); 2332 goto out; 2333 } /* end of switch (slave->link) */ 2334 2335 bond_update_speed_duplex(slave); 2336 2337 if (bond->params.mode == BOND_MODE_8023AD) { 2338 if (old_speed != slave->speed) { 2339 bond_3ad_adapter_speed_changed(slave); 2340 } 2341 2342 if (old_duplex != slave->duplex) { 2343 bond_3ad_adapter_duplex_changed(slave); 2344 } 2345 } 2346 2347 } /* end of for */ 2348 2349 if (do_failover) { 2350 ASSERT_RTNL(); 2351 2352 write_lock_bh(&bond->curr_slave_lock); 2353 2354 bond_select_active_slave(bond); 2355 2356 write_unlock_bh(&bond->curr_slave_lock); 2357 2358 } else 2359 bond_set_carrier(bond); 2360 2361 out: 2362 return 0; 2363 } 2364 2365 /* 2366 * bond_mii_monitor 2367 * 2368 * Really a wrapper that splits the mii monitor into two phases: an 2369 * inspection, then (if inspection indicates something needs to be 2370 * done) an acquisition of appropriate locks followed by another pass 2371 * to implement whatever link state changes are indicated. 2372 */ 2373 void bond_mii_monitor(struct work_struct *work) 2374 { 2375 struct bonding *bond = container_of(work, struct bonding, 2376 mii_work.work); 2377 unsigned long delay; 2378 2379 read_lock(&bond->lock); 2380 if (bond->kill_timers) { 2381 read_unlock(&bond->lock); 2382 return; 2383 } 2384 if (__bond_mii_monitor(bond, 0)) { 2385 read_unlock(&bond->lock); 2386 rtnl_lock(); 2387 read_lock(&bond->lock); 2388 __bond_mii_monitor(bond, 1); 2389 read_unlock(&bond->lock); 2390 rtnl_unlock(); /* might sleep, hold no other locks */ 2391 read_lock(&bond->lock); 2392 } 2393 2394 delay = ((bond->params.miimon * HZ) / 1000) ? : 1; 2395 read_unlock(&bond->lock); 2396 queue_delayed_work(bond->wq, &bond->mii_work, delay); 2397 } 2398 2399 static __be32 bond_glean_dev_ip(struct net_device *dev) 2400 { 2401 struct in_device *idev; 2402 struct in_ifaddr *ifa; 2403 __be32 addr = 0; 2404 2405 if (!dev) 2406 return 0; 2407 2408 rcu_read_lock(); 2409 idev = __in_dev_get_rcu(dev); 2410 if (!idev) 2411 goto out; 2412 2413 ifa = idev->ifa_list; 2414 if (!ifa) 2415 goto out; 2416 2417 addr = ifa->ifa_local; 2418 out: 2419 rcu_read_unlock(); 2420 return addr; 2421 } 2422 2423 static int bond_has_ip(struct bonding *bond) 2424 { 2425 struct vlan_entry *vlan, *vlan_next; 2426 2427 if (bond->master_ip) 2428 return 1; 2429 2430 if (list_empty(&bond->vlan_list)) 2431 return 0; 2432 2433 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2434 vlan_list) { 2435 if (vlan->vlan_ip) 2436 return 1; 2437 } 2438 2439 return 0; 2440 } 2441 2442 static int bond_has_this_ip(struct bonding *bond, __be32 ip) 2443 { 2444 struct vlan_entry *vlan, *vlan_next; 2445 2446 if (ip == bond->master_ip) 2447 return 1; 2448 2449 if (list_empty(&bond->vlan_list)) 2450 return 0; 2451 2452 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2453 vlan_list) { 2454 if (ip == vlan->vlan_ip) 2455 return 1; 2456 } 2457 2458 return 0; 2459 } 2460 2461 /* 2462 * We go to the (large) trouble of VLAN tagging ARP frames because 2463 * switches in VLAN mode (especially if ports are configured as 2464 * "native" to a VLAN) might not pass non-tagged frames. 2465 */ 2466 static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id) 2467 { 2468 struct sk_buff *skb; 2469 2470 dprintk("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, 2471 slave_dev->name, dest_ip, src_ip, vlan_id); 2472 2473 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, 2474 NULL, slave_dev->dev_addr, NULL); 2475 2476 if (!skb) { 2477 printk(KERN_ERR DRV_NAME ": ARP packet allocation failed\n"); 2478 return; 2479 } 2480 if (vlan_id) { 2481 skb = vlan_put_tag(skb, vlan_id); 2482 if (!skb) { 2483 printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n"); 2484 return; 2485 } 2486 } 2487 arp_xmit(skb); 2488 } 2489 2490 2491 static void bond_arp_send_all(struct bonding *bond, struct slave *slave) 2492 { 2493 int i, vlan_id, rv; 2494 __be32 *targets = bond->params.arp_targets; 2495 struct vlan_entry *vlan, *vlan_next; 2496 struct net_device *vlan_dev; 2497 struct flowi fl; 2498 struct rtable *rt; 2499 2500 for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { 2501 if (!targets[i]) 2502 continue; 2503 dprintk("basa: target %x\n", targets[i]); 2504 if (list_empty(&bond->vlan_list)) { 2505 dprintk("basa: empty vlan: arp_send\n"); 2506 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2507 bond->master_ip, 0); 2508 continue; 2509 } 2510 2511 /* 2512 * If VLANs are configured, we do a route lookup to 2513 * determine which VLAN interface would be used, so we 2514 * can tag the ARP with the proper VLAN tag. 2515 */ 2516 memset(&fl, 0, sizeof(fl)); 2517 fl.fl4_dst = targets[i]; 2518 fl.fl4_tos = RTO_ONLINK; 2519 2520 rv = ip_route_output_key(&init_net, &rt, &fl); 2521 if (rv) { 2522 if (net_ratelimit()) { 2523 printk(KERN_WARNING DRV_NAME 2524 ": %s: no route to arp_ip_target %u.%u.%u.%u\n", 2525 bond->dev->name, NIPQUAD(fl.fl4_dst)); 2526 } 2527 continue; 2528 } 2529 2530 /* 2531 * This target is not on a VLAN 2532 */ 2533 if (rt->u.dst.dev == bond->dev) { 2534 ip_rt_put(rt); 2535 dprintk("basa: rtdev == bond->dev: arp_send\n"); 2536 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2537 bond->master_ip, 0); 2538 continue; 2539 } 2540 2541 vlan_id = 0; 2542 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2543 vlan_list) { 2544 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 2545 if (vlan_dev == rt->u.dst.dev) { 2546 vlan_id = vlan->vlan_id; 2547 dprintk("basa: vlan match on %s %d\n", 2548 vlan_dev->name, vlan_id); 2549 break; 2550 } 2551 } 2552 2553 if (vlan_id) { 2554 ip_rt_put(rt); 2555 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2556 vlan->vlan_ip, vlan_id); 2557 continue; 2558 } 2559 2560 if (net_ratelimit()) { 2561 printk(KERN_WARNING DRV_NAME 2562 ": %s: no path to arp_ip_target %u.%u.%u.%u via rt.dev %s\n", 2563 bond->dev->name, NIPQUAD(fl.fl4_dst), 2564 rt->u.dst.dev ? rt->u.dst.dev->name : "NULL"); 2565 } 2566 ip_rt_put(rt); 2567 } 2568 } 2569 2570 /* 2571 * Kick out a gratuitous ARP for an IP on the bonding master plus one 2572 * for each VLAN above us. 2573 */ 2574 static void bond_send_gratuitous_arp(struct bonding *bond) 2575 { 2576 struct slave *slave = bond->curr_active_slave; 2577 struct vlan_entry *vlan; 2578 struct net_device *vlan_dev; 2579 2580 dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name, 2581 slave ? slave->dev->name : "NULL"); 2582 if (!slave) 2583 return; 2584 2585 if (bond->master_ip) { 2586 bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, 2587 bond->master_ip, 0); 2588 } 2589 2590 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 2591 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 2592 if (vlan->vlan_ip) { 2593 bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip, 2594 vlan->vlan_ip, vlan->vlan_id); 2595 } 2596 } 2597 } 2598 2599 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip) 2600 { 2601 int i; 2602 __be32 *targets = bond->params.arp_targets; 2603 2604 targets = bond->params.arp_targets; 2605 for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { 2606 dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] " 2607 "%u.%u.%u.%u bhti(tip) %d\n", 2608 NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]), 2609 bond_has_this_ip(bond, tip)); 2610 if (sip == targets[i]) { 2611 if (bond_has_this_ip(bond, tip)) 2612 slave->last_arp_rx = jiffies; 2613 return; 2614 } 2615 } 2616 } 2617 2618 static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 2619 { 2620 struct arphdr *arp; 2621 struct slave *slave; 2622 struct bonding *bond; 2623 unsigned char *arp_ptr; 2624 __be32 sip, tip; 2625 2626 if (dev->nd_net != &init_net) 2627 goto out; 2628 2629 if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) 2630 goto out; 2631 2632 bond = dev->priv; 2633 read_lock(&bond->lock); 2634 2635 dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n", 2636 bond->dev->name, skb->dev ? skb->dev->name : "NULL", 2637 orig_dev ? orig_dev->name : "NULL"); 2638 2639 slave = bond_get_slave_by_dev(bond, orig_dev); 2640 if (!slave || !slave_do_arp_validate(bond, slave)) 2641 goto out_unlock; 2642 2643 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ 2644 if (!pskb_may_pull(skb, (sizeof(struct arphdr) + 2645 (2 * dev->addr_len) + 2646 (2 * sizeof(u32))))) 2647 goto out_unlock; 2648 2649 arp = arp_hdr(skb); 2650 if (arp->ar_hln != dev->addr_len || 2651 skb->pkt_type == PACKET_OTHERHOST || 2652 skb->pkt_type == PACKET_LOOPBACK || 2653 arp->ar_hrd != htons(ARPHRD_ETHER) || 2654 arp->ar_pro != htons(ETH_P_IP) || 2655 arp->ar_pln != 4) 2656 goto out_unlock; 2657 2658 arp_ptr = (unsigned char *)(arp + 1); 2659 arp_ptr += dev->addr_len; 2660 memcpy(&sip, arp_ptr, 4); 2661 arp_ptr += 4 + dev->addr_len; 2662 memcpy(&tip, arp_ptr, 4); 2663 2664 dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u" 2665 " tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name, 2666 slave->state, bond->params.arp_validate, 2667 slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip)); 2668 2669 /* 2670 * Backup slaves won't see the ARP reply, but do come through 2671 * here for each ARP probe (so we swap the sip/tip to validate 2672 * the probe). In a "redundant switch, common router" type of 2673 * configuration, the ARP probe will (hopefully) travel from 2674 * the active, through one switch, the router, then the other 2675 * switch before reaching the backup. 2676 */ 2677 if (slave->state == BOND_STATE_ACTIVE) 2678 bond_validate_arp(bond, slave, sip, tip); 2679 else 2680 bond_validate_arp(bond, slave, tip, sip); 2681 2682 out_unlock: 2683 read_unlock(&bond->lock); 2684 out: 2685 dev_kfree_skb(skb); 2686 return NET_RX_SUCCESS; 2687 } 2688 2689 /* 2690 * this function is called regularly to monitor each slave's link 2691 * ensuring that traffic is being sent and received when arp monitoring 2692 * is used in load-balancing mode. if the adapter has been dormant, then an 2693 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 2694 * arp monitoring in active backup mode. 2695 */ 2696 void bond_loadbalance_arp_mon(struct work_struct *work) 2697 { 2698 struct bonding *bond = container_of(work, struct bonding, 2699 arp_work.work); 2700 struct slave *slave, *oldcurrent; 2701 int do_failover = 0; 2702 int delta_in_ticks; 2703 int i; 2704 2705 read_lock(&bond->lock); 2706 2707 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2708 2709 if (bond->kill_timers) { 2710 goto out; 2711 } 2712 2713 if (bond->slave_cnt == 0) { 2714 goto re_arm; 2715 } 2716 2717 read_lock(&bond->curr_slave_lock); 2718 oldcurrent = bond->curr_active_slave; 2719 read_unlock(&bond->curr_slave_lock); 2720 2721 /* see if any of the previous devices are up now (i.e. they have 2722 * xmt and rcv traffic). the curr_active_slave does not come into 2723 * the picture unless it is null. also, slave->jiffies is not needed 2724 * here because we send an arp on each slave and give a slave as 2725 * long as it needs to get the tx/rx within the delta. 2726 * TODO: what about up/down delay in arp mode? it wasn't here before 2727 * so it can wait 2728 */ 2729 bond_for_each_slave(bond, slave, i) { 2730 if (slave->link != BOND_LINK_UP) { 2731 if (time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks) && 2732 time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) { 2733 2734 slave->link = BOND_LINK_UP; 2735 slave->state = BOND_STATE_ACTIVE; 2736 2737 /* primary_slave has no meaning in round-robin 2738 * mode. the window of a slave being up and 2739 * curr_active_slave being null after enslaving 2740 * is closed. 2741 */ 2742 if (!oldcurrent) { 2743 printk(KERN_INFO DRV_NAME 2744 ": %s: link status definitely " 2745 "up for interface %s, ", 2746 bond->dev->name, 2747 slave->dev->name); 2748 do_failover = 1; 2749 } else { 2750 printk(KERN_INFO DRV_NAME 2751 ": %s: interface %s is now up\n", 2752 bond->dev->name, 2753 slave->dev->name); 2754 } 2755 } 2756 } else { 2757 /* slave->link == BOND_LINK_UP */ 2758 2759 /* not all switches will respond to an arp request 2760 * when the source ip is 0, so don't take the link down 2761 * if we don't know our ip yet 2762 */ 2763 if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || 2764 (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks) && 2765 bond_has_ip(bond))) { 2766 2767 slave->link = BOND_LINK_DOWN; 2768 slave->state = BOND_STATE_BACKUP; 2769 2770 if (slave->link_failure_count < UINT_MAX) { 2771 slave->link_failure_count++; 2772 } 2773 2774 printk(KERN_INFO DRV_NAME 2775 ": %s: interface %s is now down.\n", 2776 bond->dev->name, 2777 slave->dev->name); 2778 2779 if (slave == oldcurrent) { 2780 do_failover = 1; 2781 } 2782 } 2783 } 2784 2785 /* note: if switch is in round-robin mode, all links 2786 * must tx arp to ensure all links rx an arp - otherwise 2787 * links may oscillate or not come up at all; if switch is 2788 * in something like xor mode, there is nothing we can 2789 * do - all replies will be rx'ed on same link causing slaves 2790 * to be unstable during low/no traffic periods 2791 */ 2792 if (IS_UP(slave->dev)) { 2793 bond_arp_send_all(bond, slave); 2794 } 2795 } 2796 2797 if (do_failover) { 2798 rtnl_lock(); 2799 write_lock_bh(&bond->curr_slave_lock); 2800 2801 bond_select_active_slave(bond); 2802 2803 write_unlock_bh(&bond->curr_slave_lock); 2804 rtnl_unlock(); 2805 2806 } 2807 2808 re_arm: 2809 if (bond->params.arp_interval) 2810 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 2811 out: 2812 read_unlock(&bond->lock); 2813 } 2814 2815 /* 2816 * When using arp monitoring in active-backup mode, this function is 2817 * called to determine if any backup slaves have went down or a new 2818 * current slave needs to be found. 2819 * The backup slaves never generate traffic, they are considered up by merely 2820 * receiving traffic. If the current slave goes down, each backup slave will 2821 * be given the opportunity to tx/rx an arp before being taken down - this 2822 * prevents all slaves from being taken down due to the current slave not 2823 * sending any traffic for the backups to receive. The arps are not necessarily 2824 * necessary, any tx and rx traffic will keep the current slave up. While any 2825 * rx traffic will keep the backup slaves up, the current slave is responsible 2826 * for generating traffic to keep them up regardless of any other traffic they 2827 * may have received. 2828 * see loadbalance_arp_monitor for arp monitoring in load balancing mode 2829 */ 2830 void bond_activebackup_arp_mon(struct work_struct *work) 2831 { 2832 struct bonding *bond = container_of(work, struct bonding, 2833 arp_work.work); 2834 struct slave *slave; 2835 int delta_in_ticks; 2836 int i; 2837 2838 read_lock(&bond->lock); 2839 2840 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2841 2842 if (bond->kill_timers) { 2843 goto out; 2844 } 2845 2846 if (bond->slave_cnt == 0) { 2847 goto re_arm; 2848 } 2849 2850 /* determine if any slave has come up or any backup slave has 2851 * gone down 2852 * TODO: what about up/down delay in arp mode? it wasn't here before 2853 * so it can wait 2854 */ 2855 bond_for_each_slave(bond, slave, i) { 2856 if (slave->link != BOND_LINK_UP) { 2857 if (time_before_eq(jiffies, 2858 slave_last_rx(bond, slave) + delta_in_ticks)) { 2859 2860 slave->link = BOND_LINK_UP; 2861 2862 rtnl_lock(); 2863 2864 write_lock_bh(&bond->curr_slave_lock); 2865 2866 if ((!bond->curr_active_slave) && 2867 time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks)) { 2868 bond_change_active_slave(bond, slave); 2869 bond->current_arp_slave = NULL; 2870 } else if (bond->curr_active_slave != slave) { 2871 /* this slave has just come up but we 2872 * already have a current slave; this 2873 * can also happen if bond_enslave adds 2874 * a new slave that is up while we are 2875 * searching for a new slave 2876 */ 2877 bond_set_slave_inactive_flags(slave); 2878 bond->current_arp_slave = NULL; 2879 } 2880 2881 bond_set_carrier(bond); 2882 2883 if (slave == bond->curr_active_slave) { 2884 printk(KERN_INFO DRV_NAME 2885 ": %s: %s is up and now the " 2886 "active interface\n", 2887 bond->dev->name, 2888 slave->dev->name); 2889 netif_carrier_on(bond->dev); 2890 } else { 2891 printk(KERN_INFO DRV_NAME 2892 ": %s: backup interface %s is " 2893 "now up\n", 2894 bond->dev->name, 2895 slave->dev->name); 2896 } 2897 2898 write_unlock_bh(&bond->curr_slave_lock); 2899 rtnl_unlock(); 2900 } 2901 } else { 2902 read_lock(&bond->curr_slave_lock); 2903 2904 if ((slave != bond->curr_active_slave) && 2905 (!bond->current_arp_slave) && 2906 (time_after_eq(jiffies, slave_last_rx(bond, slave) + 3*delta_in_ticks) && 2907 bond_has_ip(bond))) { 2908 /* a backup slave has gone down; three times 2909 * the delta allows the current slave to be 2910 * taken out before the backup slave. 2911 * note: a non-null current_arp_slave indicates 2912 * the curr_active_slave went down and we are 2913 * searching for a new one; under this 2914 * condition we only take the curr_active_slave 2915 * down - this gives each slave a chance to 2916 * tx/rx traffic before being taken out 2917 */ 2918 2919 read_unlock(&bond->curr_slave_lock); 2920 2921 slave->link = BOND_LINK_DOWN; 2922 2923 if (slave->link_failure_count < UINT_MAX) { 2924 slave->link_failure_count++; 2925 } 2926 2927 bond_set_slave_inactive_flags(slave); 2928 2929 printk(KERN_INFO DRV_NAME 2930 ": %s: backup interface %s is now down\n", 2931 bond->dev->name, 2932 slave->dev->name); 2933 } else { 2934 read_unlock(&bond->curr_slave_lock); 2935 } 2936 } 2937 } 2938 2939 read_lock(&bond->curr_slave_lock); 2940 slave = bond->curr_active_slave; 2941 read_unlock(&bond->curr_slave_lock); 2942 2943 if (slave) { 2944 /* if we have sent traffic in the past 2*arp_intervals but 2945 * haven't xmit and rx traffic in that time interval, select 2946 * a different slave. slave->jiffies is only updated when 2947 * a slave first becomes the curr_active_slave - not necessarily 2948 * after every arp; this ensures the slave has a full 2*delta 2949 * before being taken out. if a primary is being used, check 2950 * if it is up and needs to take over as the curr_active_slave 2951 */ 2952 if ((time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || 2953 (time_after_eq(jiffies, slave_last_rx(bond, slave) + 2*delta_in_ticks) && 2954 bond_has_ip(bond))) && 2955 time_after_eq(jiffies, slave->jiffies + 2*delta_in_ticks)) { 2956 2957 slave->link = BOND_LINK_DOWN; 2958 2959 if (slave->link_failure_count < UINT_MAX) { 2960 slave->link_failure_count++; 2961 } 2962 2963 printk(KERN_INFO DRV_NAME 2964 ": %s: link status down for active interface " 2965 "%s, disabling it\n", 2966 bond->dev->name, 2967 slave->dev->name); 2968 2969 rtnl_lock(); 2970 write_lock_bh(&bond->curr_slave_lock); 2971 2972 bond_select_active_slave(bond); 2973 slave = bond->curr_active_slave; 2974 2975 write_unlock_bh(&bond->curr_slave_lock); 2976 2977 rtnl_unlock(); 2978 2979 bond->current_arp_slave = slave; 2980 2981 if (slave) { 2982 slave->jiffies = jiffies; 2983 } 2984 } else if ((bond->primary_slave) && 2985 (bond->primary_slave != slave) && 2986 (bond->primary_slave->link == BOND_LINK_UP)) { 2987 /* at this point, slave is the curr_active_slave */ 2988 printk(KERN_INFO DRV_NAME 2989 ": %s: changing from interface %s to primary " 2990 "interface %s\n", 2991 bond->dev->name, 2992 slave->dev->name, 2993 bond->primary_slave->dev->name); 2994 2995 /* primary is up so switch to it */ 2996 rtnl_lock(); 2997 write_lock_bh(&bond->curr_slave_lock); 2998 bond_change_active_slave(bond, bond->primary_slave); 2999 write_unlock_bh(&bond->curr_slave_lock); 3000 3001 rtnl_unlock(); 3002 3003 slave = bond->primary_slave; 3004 slave->jiffies = jiffies; 3005 } else { 3006 bond->current_arp_slave = NULL; 3007 } 3008 3009 /* the current slave must tx an arp to ensure backup slaves 3010 * rx traffic 3011 */ 3012 if (slave && bond_has_ip(bond)) { 3013 bond_arp_send_all(bond, slave); 3014 } 3015 } 3016 3017 /* if we don't have a curr_active_slave, search for the next available 3018 * backup slave from the current_arp_slave and make it the candidate 3019 * for becoming the curr_active_slave 3020 */ 3021 if (!slave) { 3022 if (!bond->current_arp_slave) { 3023 bond->current_arp_slave = bond->first_slave; 3024 } 3025 3026 if (bond->current_arp_slave) { 3027 bond_set_slave_inactive_flags(bond->current_arp_slave); 3028 3029 /* search for next candidate */ 3030 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { 3031 if (IS_UP(slave->dev)) { 3032 slave->link = BOND_LINK_BACK; 3033 bond_set_slave_active_flags(slave); 3034 bond_arp_send_all(bond, slave); 3035 slave->jiffies = jiffies; 3036 bond->current_arp_slave = slave; 3037 break; 3038 } 3039 3040 /* if the link state is up at this point, we 3041 * mark it down - this can happen if we have 3042 * simultaneous link failures and 3043 * reselect_active_interface doesn't make this 3044 * one the current slave so it is still marked 3045 * up when it is actually down 3046 */ 3047 if (slave->link == BOND_LINK_UP) { 3048 slave->link = BOND_LINK_DOWN; 3049 if (slave->link_failure_count < UINT_MAX) { 3050 slave->link_failure_count++; 3051 } 3052 3053 bond_set_slave_inactive_flags(slave); 3054 3055 printk(KERN_INFO DRV_NAME 3056 ": %s: backup interface %s is " 3057 "now down.\n", 3058 bond->dev->name, 3059 slave->dev->name); 3060 } 3061 } 3062 } 3063 } 3064 3065 re_arm: 3066 if (bond->params.arp_interval) { 3067 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 3068 } 3069 out: 3070 read_unlock(&bond->lock); 3071 } 3072 3073 /*------------------------------ proc/seq_file-------------------------------*/ 3074 3075 #ifdef CONFIG_PROC_FS 3076 3077 #define SEQ_START_TOKEN ((void *)1) 3078 3079 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) 3080 { 3081 struct bonding *bond = seq->private; 3082 loff_t off = 0; 3083 struct slave *slave; 3084 int i; 3085 3086 /* make sure the bond won't be taken away */ 3087 read_lock(&dev_base_lock); 3088 read_lock(&bond->lock); 3089 3090 if (*pos == 0) { 3091 return SEQ_START_TOKEN; 3092 } 3093 3094 bond_for_each_slave(bond, slave, i) { 3095 if (++off == *pos) { 3096 return slave; 3097 } 3098 } 3099 3100 return NULL; 3101 } 3102 3103 static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3104 { 3105 struct bonding *bond = seq->private; 3106 struct slave *slave = v; 3107 3108 ++*pos; 3109 if (v == SEQ_START_TOKEN) { 3110 return bond->first_slave; 3111 } 3112 3113 slave = slave->next; 3114 3115 return (slave == bond->first_slave) ? NULL : slave; 3116 } 3117 3118 static void bond_info_seq_stop(struct seq_file *seq, void *v) 3119 { 3120 struct bonding *bond = seq->private; 3121 3122 read_unlock(&bond->lock); 3123 read_unlock(&dev_base_lock); 3124 } 3125 3126 static void bond_info_show_master(struct seq_file *seq) 3127 { 3128 struct bonding *bond = seq->private; 3129 struct slave *curr; 3130 int i; 3131 u32 target; 3132 3133 read_lock(&bond->curr_slave_lock); 3134 curr = bond->curr_active_slave; 3135 read_unlock(&bond->curr_slave_lock); 3136 3137 seq_printf(seq, "Bonding Mode: %s", 3138 bond_mode_name(bond->params.mode)); 3139 3140 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && 3141 bond->params.fail_over_mac) 3142 seq_printf(seq, " (fail_over_mac)"); 3143 3144 seq_printf(seq, "\n"); 3145 3146 if (bond->params.mode == BOND_MODE_XOR || 3147 bond->params.mode == BOND_MODE_8023AD) { 3148 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", 3149 xmit_hashtype_tbl[bond->params.xmit_policy].modename, 3150 bond->params.xmit_policy); 3151 } 3152 3153 if (USES_PRIMARY(bond->params.mode)) { 3154 seq_printf(seq, "Primary Slave: %s\n", 3155 (bond->primary_slave) ? 3156 bond->primary_slave->dev->name : "None"); 3157 3158 seq_printf(seq, "Currently Active Slave: %s\n", 3159 (curr) ? curr->dev->name : "None"); 3160 } 3161 3162 seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? 3163 "up" : "down"); 3164 seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); 3165 seq_printf(seq, "Up Delay (ms): %d\n", 3166 bond->params.updelay * bond->params.miimon); 3167 seq_printf(seq, "Down Delay (ms): %d\n", 3168 bond->params.downdelay * bond->params.miimon); 3169 3170 3171 /* ARP information */ 3172 if(bond->params.arp_interval > 0) { 3173 int printed=0; 3174 seq_printf(seq, "ARP Polling Interval (ms): %d\n", 3175 bond->params.arp_interval); 3176 3177 seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); 3178 3179 for(i = 0; (i < BOND_MAX_ARP_TARGETS) ;i++) { 3180 if (!bond->params.arp_targets[i]) 3181 continue; 3182 if (printed) 3183 seq_printf(seq, ","); 3184 target = ntohl(bond->params.arp_targets[i]); 3185 seq_printf(seq, " %d.%d.%d.%d", HIPQUAD(target)); 3186 printed = 1; 3187 } 3188 seq_printf(seq, "\n"); 3189 } 3190 3191 if (bond->params.mode == BOND_MODE_8023AD) { 3192 struct ad_info ad_info; 3193 DECLARE_MAC_BUF(mac); 3194 3195 seq_puts(seq, "\n802.3ad info\n"); 3196 seq_printf(seq, "LACP rate: %s\n", 3197 (bond->params.lacp_fast) ? "fast" : "slow"); 3198 3199 if (bond_3ad_get_active_agg_info(bond, &ad_info)) { 3200 seq_printf(seq, "bond %s has no active aggregator\n", 3201 bond->dev->name); 3202 } else { 3203 seq_printf(seq, "Active Aggregator Info:\n"); 3204 3205 seq_printf(seq, "\tAggregator ID: %d\n", 3206 ad_info.aggregator_id); 3207 seq_printf(seq, "\tNumber of ports: %d\n", 3208 ad_info.ports); 3209 seq_printf(seq, "\tActor Key: %d\n", 3210 ad_info.actor_key); 3211 seq_printf(seq, "\tPartner Key: %d\n", 3212 ad_info.partner_key); 3213 seq_printf(seq, "\tPartner Mac Address: %s\n", 3214 print_mac(mac, ad_info.partner_system)); 3215 } 3216 } 3217 } 3218 3219 static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) 3220 { 3221 struct bonding *bond = seq->private; 3222 DECLARE_MAC_BUF(mac); 3223 3224 seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); 3225 seq_printf(seq, "MII Status: %s\n", 3226 (slave->link == BOND_LINK_UP) ? "up" : "down"); 3227 seq_printf(seq, "Link Failure Count: %u\n", 3228 slave->link_failure_count); 3229 3230 seq_printf(seq, 3231 "Permanent HW addr: %s\n", 3232 print_mac(mac, slave->perm_hwaddr)); 3233 3234 if (bond->params.mode == BOND_MODE_8023AD) { 3235 const struct aggregator *agg 3236 = SLAVE_AD_INFO(slave).port.aggregator; 3237 3238 if (agg) { 3239 seq_printf(seq, "Aggregator ID: %d\n", 3240 agg->aggregator_identifier); 3241 } else { 3242 seq_puts(seq, "Aggregator ID: N/A\n"); 3243 } 3244 } 3245 } 3246 3247 static int bond_info_seq_show(struct seq_file *seq, void *v) 3248 { 3249 if (v == SEQ_START_TOKEN) { 3250 seq_printf(seq, "%s\n", version); 3251 bond_info_show_master(seq); 3252 } else { 3253 bond_info_show_slave(seq, v); 3254 } 3255 3256 return 0; 3257 } 3258 3259 static struct seq_operations bond_info_seq_ops = { 3260 .start = bond_info_seq_start, 3261 .next = bond_info_seq_next, 3262 .stop = bond_info_seq_stop, 3263 .show = bond_info_seq_show, 3264 }; 3265 3266 static int bond_info_open(struct inode *inode, struct file *file) 3267 { 3268 struct seq_file *seq; 3269 struct proc_dir_entry *proc; 3270 int res; 3271 3272 res = seq_open(file, &bond_info_seq_ops); 3273 if (!res) { 3274 /* recover the pointer buried in proc_dir_entry data */ 3275 seq = file->private_data; 3276 proc = PDE(inode); 3277 seq->private = proc->data; 3278 } 3279 3280 return res; 3281 } 3282 3283 static const struct file_operations bond_info_fops = { 3284 .owner = THIS_MODULE, 3285 .open = bond_info_open, 3286 .read = seq_read, 3287 .llseek = seq_lseek, 3288 .release = seq_release, 3289 }; 3290 3291 static int bond_create_proc_entry(struct bonding *bond) 3292 { 3293 struct net_device *bond_dev = bond->dev; 3294 3295 if (bond_proc_dir) { 3296 bond->proc_entry = create_proc_entry(bond_dev->name, 3297 S_IRUGO, 3298 bond_proc_dir); 3299 if (bond->proc_entry == NULL) { 3300 printk(KERN_WARNING DRV_NAME 3301 ": Warning: Cannot create /proc/net/%s/%s\n", 3302 DRV_NAME, bond_dev->name); 3303 } else { 3304 bond->proc_entry->data = bond; 3305 bond->proc_entry->proc_fops = &bond_info_fops; 3306 bond->proc_entry->owner = THIS_MODULE; 3307 memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); 3308 } 3309 } 3310 3311 return 0; 3312 } 3313 3314 static void bond_remove_proc_entry(struct bonding *bond) 3315 { 3316 if (bond_proc_dir && bond->proc_entry) { 3317 remove_proc_entry(bond->proc_file_name, bond_proc_dir); 3318 memset(bond->proc_file_name, 0, IFNAMSIZ); 3319 bond->proc_entry = NULL; 3320 } 3321 } 3322 3323 /* Create the bonding directory under /proc/net, if doesn't exist yet. 3324 * Caller must hold rtnl_lock. 3325 */ 3326 static void bond_create_proc_dir(void) 3327 { 3328 int len = strlen(DRV_NAME); 3329 3330 for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir; 3331 bond_proc_dir = bond_proc_dir->next) { 3332 if ((bond_proc_dir->namelen == len) && 3333 !memcmp(bond_proc_dir->name, DRV_NAME, len)) { 3334 break; 3335 } 3336 } 3337 3338 if (!bond_proc_dir) { 3339 bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net); 3340 if (bond_proc_dir) { 3341 bond_proc_dir->owner = THIS_MODULE; 3342 } else { 3343 printk(KERN_WARNING DRV_NAME 3344 ": Warning: cannot create /proc/net/%s\n", 3345 DRV_NAME); 3346 } 3347 } 3348 } 3349 3350 /* Destroy the bonding directory under /proc/net, if empty. 3351 * Caller must hold rtnl_lock. 3352 */ 3353 static void bond_destroy_proc_dir(void) 3354 { 3355 struct proc_dir_entry *de; 3356 3357 if (!bond_proc_dir) { 3358 return; 3359 } 3360 3361 /* verify that the /proc dir is empty */ 3362 for (de = bond_proc_dir->subdir; de; de = de->next) { 3363 /* ignore . and .. */ 3364 if (*(de->name) != '.') { 3365 break; 3366 } 3367 } 3368 3369 if (de) { 3370 if (bond_proc_dir->owner == THIS_MODULE) { 3371 bond_proc_dir->owner = NULL; 3372 } 3373 } else { 3374 remove_proc_entry(DRV_NAME, init_net.proc_net); 3375 bond_proc_dir = NULL; 3376 } 3377 } 3378 #endif /* CONFIG_PROC_FS */ 3379 3380 /*-------------------------- netdev event handling --------------------------*/ 3381 3382 /* 3383 * Change device name 3384 */ 3385 static int bond_event_changename(struct bonding *bond) 3386 { 3387 #ifdef CONFIG_PROC_FS 3388 bond_remove_proc_entry(bond); 3389 bond_create_proc_entry(bond); 3390 #endif 3391 down_write(&(bonding_rwsem)); 3392 bond_destroy_sysfs_entry(bond); 3393 bond_create_sysfs_entry(bond); 3394 up_write(&(bonding_rwsem)); 3395 return NOTIFY_DONE; 3396 } 3397 3398 static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev) 3399 { 3400 struct bonding *event_bond = bond_dev->priv; 3401 3402 switch (event) { 3403 case NETDEV_CHANGENAME: 3404 return bond_event_changename(event_bond); 3405 case NETDEV_UNREGISTER: 3406 bond_release_all(event_bond->dev); 3407 break; 3408 default: 3409 break; 3410 } 3411 3412 return NOTIFY_DONE; 3413 } 3414 3415 static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) 3416 { 3417 struct net_device *bond_dev = slave_dev->master; 3418 struct bonding *bond = bond_dev->priv; 3419 3420 switch (event) { 3421 case NETDEV_UNREGISTER: 3422 if (bond_dev) { 3423 if (bond->setup_by_slave) 3424 bond_release_and_destroy(bond_dev, slave_dev); 3425 else 3426 bond_release(bond_dev, slave_dev); 3427 } 3428 break; 3429 case NETDEV_CHANGE: 3430 /* 3431 * TODO: is this what we get if somebody 3432 * sets up a hierarchical bond, then rmmod's 3433 * one of the slave bonding devices? 3434 */ 3435 break; 3436 case NETDEV_DOWN: 3437 /* 3438 * ... Or is it this? 3439 */ 3440 break; 3441 case NETDEV_CHANGEMTU: 3442 /* 3443 * TODO: Should slaves be allowed to 3444 * independently alter their MTU? For 3445 * an active-backup bond, slaves need 3446 * not be the same type of device, so 3447 * MTUs may vary. For other modes, 3448 * slaves arguably should have the 3449 * same MTUs. To do this, we'd need to 3450 * take over the slave's change_mtu 3451 * function for the duration of their 3452 * servitude. 3453 */ 3454 break; 3455 case NETDEV_CHANGENAME: 3456 /* 3457 * TODO: handle changing the primary's name 3458 */ 3459 break; 3460 case NETDEV_FEAT_CHANGE: 3461 bond_compute_features(bond); 3462 break; 3463 default: 3464 break; 3465 } 3466 3467 return NOTIFY_DONE; 3468 } 3469 3470 /* 3471 * bond_netdev_event: handle netdev notifier chain events. 3472 * 3473 * This function receives events for the netdev chain. The caller (an 3474 * ioctl handler calling blocking_notifier_call_chain) holds the necessary 3475 * locks for us to safely manipulate the slave devices (RTNL lock, 3476 * dev_probe_lock). 3477 */ 3478 static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 3479 { 3480 struct net_device *event_dev = (struct net_device *)ptr; 3481 3482 if (event_dev->nd_net != &init_net) 3483 return NOTIFY_DONE; 3484 3485 dprintk("event_dev: %s, event: %lx\n", 3486 (event_dev ? event_dev->name : "None"), 3487 event); 3488 3489 if (!(event_dev->priv_flags & IFF_BONDING)) 3490 return NOTIFY_DONE; 3491 3492 if (event_dev->flags & IFF_MASTER) { 3493 dprintk("IFF_MASTER\n"); 3494 return bond_master_netdev_event(event, event_dev); 3495 } 3496 3497 if (event_dev->flags & IFF_SLAVE) { 3498 dprintk("IFF_SLAVE\n"); 3499 return bond_slave_netdev_event(event, event_dev); 3500 } 3501 3502 return NOTIFY_DONE; 3503 } 3504 3505 /* 3506 * bond_inetaddr_event: handle inetaddr notifier chain events. 3507 * 3508 * We keep track of device IPs primarily to use as source addresses in 3509 * ARP monitor probes (rather than spewing out broadcasts all the time). 3510 * 3511 * We track one IP for the main device (if it has one), plus one per VLAN. 3512 */ 3513 static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 3514 { 3515 struct in_ifaddr *ifa = ptr; 3516 struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; 3517 struct bonding *bond, *bond_next; 3518 struct vlan_entry *vlan, *vlan_next; 3519 3520 list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { 3521 if (bond->dev == event_dev) { 3522 switch (event) { 3523 case NETDEV_UP: 3524 bond->master_ip = ifa->ifa_local; 3525 return NOTIFY_OK; 3526 case NETDEV_DOWN: 3527 bond->master_ip = bond_glean_dev_ip(bond->dev); 3528 return NOTIFY_OK; 3529 default: 3530 return NOTIFY_DONE; 3531 } 3532 } 3533 3534 if (list_empty(&bond->vlan_list)) 3535 continue; 3536 3537 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 3538 vlan_list) { 3539 vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); 3540 if (vlan_dev == event_dev) { 3541 switch (event) { 3542 case NETDEV_UP: 3543 vlan->vlan_ip = ifa->ifa_local; 3544 return NOTIFY_OK; 3545 case NETDEV_DOWN: 3546 vlan->vlan_ip = 3547 bond_glean_dev_ip(vlan_dev); 3548 return NOTIFY_OK; 3549 default: 3550 return NOTIFY_DONE; 3551 } 3552 } 3553 } 3554 } 3555 return NOTIFY_DONE; 3556 } 3557 3558 static struct notifier_block bond_netdev_notifier = { 3559 .notifier_call = bond_netdev_event, 3560 }; 3561 3562 static struct notifier_block bond_inetaddr_notifier = { 3563 .notifier_call = bond_inetaddr_event, 3564 }; 3565 3566 /*-------------------------- Packet type handling ---------------------------*/ 3567 3568 /* register to receive lacpdus on a bond */ 3569 static void bond_register_lacpdu(struct bonding *bond) 3570 { 3571 struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); 3572 3573 /* initialize packet type */ 3574 pk_type->type = PKT_TYPE_LACPDU; 3575 pk_type->dev = bond->dev; 3576 pk_type->func = bond_3ad_lacpdu_recv; 3577 3578 dev_add_pack(pk_type); 3579 } 3580 3581 /* unregister to receive lacpdus on a bond */ 3582 static void bond_unregister_lacpdu(struct bonding *bond) 3583 { 3584 dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); 3585 } 3586 3587 void bond_register_arp(struct bonding *bond) 3588 { 3589 struct packet_type *pt = &bond->arp_mon_pt; 3590 3591 if (pt->type) 3592 return; 3593 3594 pt->type = htons(ETH_P_ARP); 3595 pt->dev = bond->dev; 3596 pt->func = bond_arp_rcv; 3597 dev_add_pack(pt); 3598 } 3599 3600 void bond_unregister_arp(struct bonding *bond) 3601 { 3602 struct packet_type *pt = &bond->arp_mon_pt; 3603 3604 dev_remove_pack(pt); 3605 pt->type = 0; 3606 } 3607 3608 /*---------------------------- Hashing Policies -----------------------------*/ 3609 3610 /* 3611 * Hash for the output device based upon layer 2 and layer 3 data. If 3612 * the packet is not IP mimic bond_xmit_hash_policy_l2() 3613 */ 3614 static int bond_xmit_hash_policy_l23(struct sk_buff *skb, 3615 struct net_device *bond_dev, int count) 3616 { 3617 struct ethhdr *data = (struct ethhdr *)skb->data; 3618 struct iphdr *iph = ip_hdr(skb); 3619 3620 if (skb->protocol == __constant_htons(ETH_P_IP)) { 3621 return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ 3622 (data->h_dest[5] ^ bond_dev->dev_addr[5])) % count; 3623 } 3624 3625 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3626 } 3627 3628 /* 3629 * Hash for the output device based upon layer 3 and layer 4 data. If 3630 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is 3631 * altogether not IP, mimic bond_xmit_hash_policy_l2() 3632 */ 3633 static int bond_xmit_hash_policy_l34(struct sk_buff *skb, 3634 struct net_device *bond_dev, int count) 3635 { 3636 struct ethhdr *data = (struct ethhdr *)skb->data; 3637 struct iphdr *iph = ip_hdr(skb); 3638 __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); 3639 int layer4_xor = 0; 3640 3641 if (skb->protocol == __constant_htons(ETH_P_IP)) { 3642 if (!(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) && 3643 (iph->protocol == IPPROTO_TCP || 3644 iph->protocol == IPPROTO_UDP)) { 3645 layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1))); 3646 } 3647 return (layer4_xor ^ 3648 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; 3649 3650 } 3651 3652 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3653 } 3654 3655 /* 3656 * Hash for the output device based upon layer 2 data 3657 */ 3658 static int bond_xmit_hash_policy_l2(struct sk_buff *skb, 3659 struct net_device *bond_dev, int count) 3660 { 3661 struct ethhdr *data = (struct ethhdr *)skb->data; 3662 3663 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3664 } 3665 3666 /*-------------------------- Device entry points ----------------------------*/ 3667 3668 static int bond_open(struct net_device *bond_dev) 3669 { 3670 struct bonding *bond = bond_dev->priv; 3671 3672 bond->kill_timers = 0; 3673 3674 if ((bond->params.mode == BOND_MODE_TLB) || 3675 (bond->params.mode == BOND_MODE_ALB)) { 3676 /* bond_alb_initialize must be called before the timer 3677 * is started. 3678 */ 3679 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { 3680 /* something went wrong - fail the open operation */ 3681 return -1; 3682 } 3683 3684 INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); 3685 queue_delayed_work(bond->wq, &bond->alb_work, 0); 3686 } 3687 3688 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3689 INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); 3690 queue_delayed_work(bond->wq, &bond->mii_work, 0); 3691 } 3692 3693 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3694 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) 3695 INIT_DELAYED_WORK(&bond->arp_work, 3696 bond_activebackup_arp_mon); 3697 else 3698 INIT_DELAYED_WORK(&bond->arp_work, 3699 bond_loadbalance_arp_mon); 3700 3701 queue_delayed_work(bond->wq, &bond->arp_work, 0); 3702 if (bond->params.arp_validate) 3703 bond_register_arp(bond); 3704 } 3705 3706 if (bond->params.mode == BOND_MODE_8023AD) { 3707 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); 3708 queue_delayed_work(bond->wq, &bond->ad_work, 0); 3709 /* register to receive LACPDUs */ 3710 bond_register_lacpdu(bond); 3711 } 3712 3713 return 0; 3714 } 3715 3716 static int bond_close(struct net_device *bond_dev) 3717 { 3718 struct bonding *bond = bond_dev->priv; 3719 3720 if (bond->params.mode == BOND_MODE_8023AD) { 3721 /* Unregister the receive of LACPDUs */ 3722 bond_unregister_lacpdu(bond); 3723 } 3724 3725 if (bond->params.arp_validate) 3726 bond_unregister_arp(bond); 3727 3728 write_lock_bh(&bond->lock); 3729 3730 3731 /* signal timers not to re-arm */ 3732 bond->kill_timers = 1; 3733 3734 write_unlock_bh(&bond->lock); 3735 3736 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3737 cancel_delayed_work(&bond->mii_work); 3738 } 3739 3740 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3741 cancel_delayed_work(&bond->arp_work); 3742 } 3743 3744 switch (bond->params.mode) { 3745 case BOND_MODE_8023AD: 3746 cancel_delayed_work(&bond->ad_work); 3747 break; 3748 case BOND_MODE_TLB: 3749 case BOND_MODE_ALB: 3750 cancel_delayed_work(&bond->alb_work); 3751 break; 3752 default: 3753 break; 3754 } 3755 3756 3757 if ((bond->params.mode == BOND_MODE_TLB) || 3758 (bond->params.mode == BOND_MODE_ALB)) { 3759 /* Must be called only after all 3760 * slaves have been released 3761 */ 3762 bond_alb_deinitialize(bond); 3763 } 3764 3765 return 0; 3766 } 3767 3768 static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) 3769 { 3770 struct bonding *bond = bond_dev->priv; 3771 struct net_device_stats *stats = &(bond->stats), *sstats; 3772 struct slave *slave; 3773 int i; 3774 3775 memset(stats, 0, sizeof(struct net_device_stats)); 3776 3777 read_lock_bh(&bond->lock); 3778 3779 bond_for_each_slave(bond, slave, i) { 3780 sstats = slave->dev->get_stats(slave->dev); 3781 stats->rx_packets += sstats->rx_packets; 3782 stats->rx_bytes += sstats->rx_bytes; 3783 stats->rx_errors += sstats->rx_errors; 3784 stats->rx_dropped += sstats->rx_dropped; 3785 3786 stats->tx_packets += sstats->tx_packets; 3787 stats->tx_bytes += sstats->tx_bytes; 3788 stats->tx_errors += sstats->tx_errors; 3789 stats->tx_dropped += sstats->tx_dropped; 3790 3791 stats->multicast += sstats->multicast; 3792 stats->collisions += sstats->collisions; 3793 3794 stats->rx_length_errors += sstats->rx_length_errors; 3795 stats->rx_over_errors += sstats->rx_over_errors; 3796 stats->rx_crc_errors += sstats->rx_crc_errors; 3797 stats->rx_frame_errors += sstats->rx_frame_errors; 3798 stats->rx_fifo_errors += sstats->rx_fifo_errors; 3799 stats->rx_missed_errors += sstats->rx_missed_errors; 3800 3801 stats->tx_aborted_errors += sstats->tx_aborted_errors; 3802 stats->tx_carrier_errors += sstats->tx_carrier_errors; 3803 stats->tx_fifo_errors += sstats->tx_fifo_errors; 3804 stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; 3805 stats->tx_window_errors += sstats->tx_window_errors; 3806 } 3807 3808 read_unlock_bh(&bond->lock); 3809 3810 return stats; 3811 } 3812 3813 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 3814 { 3815 struct net_device *slave_dev = NULL; 3816 struct ifbond k_binfo; 3817 struct ifbond __user *u_binfo = NULL; 3818 struct ifslave k_sinfo; 3819 struct ifslave __user *u_sinfo = NULL; 3820 struct mii_ioctl_data *mii = NULL; 3821 int res = 0; 3822 3823 dprintk("bond_ioctl: master=%s, cmd=%d\n", 3824 bond_dev->name, cmd); 3825 3826 switch (cmd) { 3827 case SIOCGMIIPHY: 3828 mii = if_mii(ifr); 3829 if (!mii) { 3830 return -EINVAL; 3831 } 3832 mii->phy_id = 0; 3833 /* Fall Through */ 3834 case SIOCGMIIREG: 3835 /* 3836 * We do this again just in case we were called by SIOCGMIIREG 3837 * instead of SIOCGMIIPHY. 3838 */ 3839 mii = if_mii(ifr); 3840 if (!mii) { 3841 return -EINVAL; 3842 } 3843 3844 if (mii->reg_num == 1) { 3845 struct bonding *bond = bond_dev->priv; 3846 mii->val_out = 0; 3847 read_lock(&bond->lock); 3848 read_lock(&bond->curr_slave_lock); 3849 if (netif_carrier_ok(bond->dev)) { 3850 mii->val_out = BMSR_LSTATUS; 3851 } 3852 read_unlock(&bond->curr_slave_lock); 3853 read_unlock(&bond->lock); 3854 } 3855 3856 return 0; 3857 case BOND_INFO_QUERY_OLD: 3858 case SIOCBONDINFOQUERY: 3859 u_binfo = (struct ifbond __user *)ifr->ifr_data; 3860 3861 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { 3862 return -EFAULT; 3863 } 3864 3865 res = bond_info_query(bond_dev, &k_binfo); 3866 if (res == 0) { 3867 if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { 3868 return -EFAULT; 3869 } 3870 } 3871 3872 return res; 3873 case BOND_SLAVE_INFO_QUERY_OLD: 3874 case SIOCBONDSLAVEINFOQUERY: 3875 u_sinfo = (struct ifslave __user *)ifr->ifr_data; 3876 3877 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { 3878 return -EFAULT; 3879 } 3880 3881 res = bond_slave_info_query(bond_dev, &k_sinfo); 3882 if (res == 0) { 3883 if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { 3884 return -EFAULT; 3885 } 3886 } 3887 3888 return res; 3889 default: 3890 /* Go on */ 3891 break; 3892 } 3893 3894 if (!capable(CAP_NET_ADMIN)) { 3895 return -EPERM; 3896 } 3897 3898 down_write(&(bonding_rwsem)); 3899 slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave); 3900 3901 dprintk("slave_dev=%p: \n", slave_dev); 3902 3903 if (!slave_dev) { 3904 res = -ENODEV; 3905 } else { 3906 dprintk("slave_dev->name=%s: \n", slave_dev->name); 3907 switch (cmd) { 3908 case BOND_ENSLAVE_OLD: 3909 case SIOCBONDENSLAVE: 3910 res = bond_enslave(bond_dev, slave_dev); 3911 break; 3912 case BOND_RELEASE_OLD: 3913 case SIOCBONDRELEASE: 3914 res = bond_release(bond_dev, slave_dev); 3915 break; 3916 case BOND_SETHWADDR_OLD: 3917 case SIOCBONDSETHWADDR: 3918 res = bond_sethwaddr(bond_dev, slave_dev); 3919 break; 3920 case BOND_CHANGE_ACTIVE_OLD: 3921 case SIOCBONDCHANGEACTIVE: 3922 res = bond_ioctl_change_active(bond_dev, slave_dev); 3923 break; 3924 default: 3925 res = -EOPNOTSUPP; 3926 } 3927 3928 dev_put(slave_dev); 3929 } 3930 3931 up_write(&(bonding_rwsem)); 3932 return res; 3933 } 3934 3935 static void bond_set_multicast_list(struct net_device *bond_dev) 3936 { 3937 struct bonding *bond = bond_dev->priv; 3938 struct dev_mc_list *dmi; 3939 3940 write_lock_bh(&bond->lock); 3941 3942 /* 3943 * Do promisc before checking multicast_mode 3944 */ 3945 if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) { 3946 bond_set_promiscuity(bond, 1); 3947 } 3948 3949 if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) { 3950 bond_set_promiscuity(bond, -1); 3951 } 3952 3953 /* set allmulti flag to slaves */ 3954 if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) { 3955 bond_set_allmulti(bond, 1); 3956 } 3957 3958 if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) { 3959 bond_set_allmulti(bond, -1); 3960 } 3961 3962 bond->flags = bond_dev->flags; 3963 3964 /* looking for addresses to add to slaves' mc list */ 3965 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 3966 if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) { 3967 bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3968 } 3969 } 3970 3971 /* looking for addresses to delete from slaves' list */ 3972 for (dmi = bond->mc_list; dmi; dmi = dmi->next) { 3973 if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) { 3974 bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3975 } 3976 } 3977 3978 /* save master's multicast list */ 3979 bond_mc_list_destroy(bond); 3980 bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC); 3981 3982 write_unlock_bh(&bond->lock); 3983 } 3984 3985 /* 3986 * Change the MTU of all of a master's slaves to match the master 3987 */ 3988 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 3989 { 3990 struct bonding *bond = bond_dev->priv; 3991 struct slave *slave, *stop_at; 3992 int res = 0; 3993 int i; 3994 3995 dprintk("bond=%p, name=%s, new_mtu=%d\n", bond, 3996 (bond_dev ? bond_dev->name : "None"), new_mtu); 3997 3998 /* Can't hold bond->lock with bh disabled here since 3999 * some base drivers panic. On the other hand we can't 4000 * hold bond->lock without bh disabled because we'll 4001 * deadlock. The only solution is to rely on the fact 4002 * that we're under rtnl_lock here, and the slaves 4003 * list won't change. This doesn't solve the problem 4004 * of setting the slave's MTU while it is 4005 * transmitting, but the assumption is that the base 4006 * driver can handle that. 4007 * 4008 * TODO: figure out a way to safely iterate the slaves 4009 * list, but without holding a lock around the actual 4010 * call to the base driver. 4011 */ 4012 4013 bond_for_each_slave(bond, slave, i) { 4014 dprintk("s %p s->p %p c_m %p\n", slave, 4015 slave->prev, slave->dev->change_mtu); 4016 4017 res = dev_set_mtu(slave->dev, new_mtu); 4018 4019 if (res) { 4020 /* If we failed to set the slave's mtu to the new value 4021 * we must abort the operation even in ACTIVE_BACKUP 4022 * mode, because if we allow the backup slaves to have 4023 * different mtu values than the active slave we'll 4024 * need to change their mtu when doing a failover. That 4025 * means changing their mtu from timer context, which 4026 * is probably not a good idea. 4027 */ 4028 dprintk("err %d %s\n", res, slave->dev->name); 4029 goto unwind; 4030 } 4031 } 4032 4033 bond_dev->mtu = new_mtu; 4034 4035 return 0; 4036 4037 unwind: 4038 /* unwind from head to the slave that failed */ 4039 stop_at = slave; 4040 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 4041 int tmp_res; 4042 4043 tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); 4044 if (tmp_res) { 4045 dprintk("unwind err %d dev %s\n", tmp_res, 4046 slave->dev->name); 4047 } 4048 } 4049 4050 return res; 4051 } 4052 4053 /* 4054 * Change HW address 4055 * 4056 * Note that many devices must be down to change the HW address, and 4057 * downing the master releases all slaves. We can make bonds full of 4058 * bonding devices to test this, however. 4059 */ 4060 static int bond_set_mac_address(struct net_device *bond_dev, void *addr) 4061 { 4062 struct bonding *bond = bond_dev->priv; 4063 struct sockaddr *sa = addr, tmp_sa; 4064 struct slave *slave, *stop_at; 4065 int res = 0; 4066 int i; 4067 4068 dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); 4069 4070 /* 4071 * If fail_over_mac is enabled, do nothing and return success. 4072 * Returning an error causes ifenslave to fail. 4073 */ 4074 if (bond->params.fail_over_mac) 4075 return 0; 4076 4077 if (!is_valid_ether_addr(sa->sa_data)) { 4078 return -EADDRNOTAVAIL; 4079 } 4080 4081 /* Can't hold bond->lock with bh disabled here since 4082 * some base drivers panic. On the other hand we can't 4083 * hold bond->lock without bh disabled because we'll 4084 * deadlock. The only solution is to rely on the fact 4085 * that we're under rtnl_lock here, and the slaves 4086 * list won't change. This doesn't solve the problem 4087 * of setting the slave's hw address while it is 4088 * transmitting, but the assumption is that the base 4089 * driver can handle that. 4090 * 4091 * TODO: figure out a way to safely iterate the slaves 4092 * list, but without holding a lock around the actual 4093 * call to the base driver. 4094 */ 4095 4096 bond_for_each_slave(bond, slave, i) { 4097 dprintk("slave %p %s\n", slave, slave->dev->name); 4098 4099 if (slave->dev->set_mac_address == NULL) { 4100 res = -EOPNOTSUPP; 4101 dprintk("EOPNOTSUPP %s\n", slave->dev->name); 4102 goto unwind; 4103 } 4104 4105 res = dev_set_mac_address(slave->dev, addr); 4106 if (res) { 4107 /* TODO: consider downing the slave 4108 * and retry ? 4109 * User should expect communications 4110 * breakage anyway until ARP finish 4111 * updating, so... 4112 */ 4113 dprintk("err %d %s\n", res, slave->dev->name); 4114 goto unwind; 4115 } 4116 } 4117 4118 /* success */ 4119 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 4120 return 0; 4121 4122 unwind: 4123 memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 4124 tmp_sa.sa_family = bond_dev->type; 4125 4126 /* unwind from head to the slave that failed */ 4127 stop_at = slave; 4128 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 4129 int tmp_res; 4130 4131 tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); 4132 if (tmp_res) { 4133 dprintk("unwind err %d dev %s\n", tmp_res, 4134 slave->dev->name); 4135 } 4136 } 4137 4138 return res; 4139 } 4140 4141 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) 4142 { 4143 struct bonding *bond = bond_dev->priv; 4144 struct slave *slave, *start_at; 4145 int i, slave_no, res = 1; 4146 4147 read_lock(&bond->lock); 4148 4149 if (!BOND_IS_OK(bond)) { 4150 goto out; 4151 } 4152 4153 /* 4154 * Concurrent TX may collide on rr_tx_counter; we accept that 4155 * as being rare enough not to justify using an atomic op here 4156 */ 4157 slave_no = bond->rr_tx_counter++ % bond->slave_cnt; 4158 4159 bond_for_each_slave(bond, slave, i) { 4160 slave_no--; 4161 if (slave_no < 0) { 4162 break; 4163 } 4164 } 4165 4166 start_at = slave; 4167 bond_for_each_slave_from(bond, slave, i, start_at) { 4168 if (IS_UP(slave->dev) && 4169 (slave->link == BOND_LINK_UP) && 4170 (slave->state == BOND_STATE_ACTIVE)) { 4171 res = bond_dev_queue_xmit(bond, skb, slave->dev); 4172 break; 4173 } 4174 } 4175 4176 out: 4177 if (res) { 4178 /* no suitable interface, frame not sent */ 4179 dev_kfree_skb(skb); 4180 } 4181 read_unlock(&bond->lock); 4182 return 0; 4183 } 4184 4185 4186 /* 4187 * in active-backup mode, we know that bond->curr_active_slave is always valid if 4188 * the bond has a usable interface. 4189 */ 4190 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) 4191 { 4192 struct bonding *bond = bond_dev->priv; 4193 int res = 1; 4194 4195 read_lock(&bond->lock); 4196 read_lock(&bond->curr_slave_lock); 4197 4198 if (!BOND_IS_OK(bond)) { 4199 goto out; 4200 } 4201 4202 if (!bond->curr_active_slave) 4203 goto out; 4204 4205 res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); 4206 4207 out: 4208 if (res) { 4209 /* no suitable interface, frame not sent */ 4210 dev_kfree_skb(skb); 4211 } 4212 read_unlock(&bond->curr_slave_lock); 4213 read_unlock(&bond->lock); 4214 return 0; 4215 } 4216 4217 /* 4218 * In bond_xmit_xor() , we determine the output device by using a pre- 4219 * determined xmit_hash_policy(), If the selected device is not enabled, 4220 * find the next active slave. 4221 */ 4222 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) 4223 { 4224 struct bonding *bond = bond_dev->priv; 4225 struct slave *slave, *start_at; 4226 int slave_no; 4227 int i; 4228 int res = 1; 4229 4230 read_lock(&bond->lock); 4231 4232 if (!BOND_IS_OK(bond)) { 4233 goto out; 4234 } 4235 4236 slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt); 4237 4238 bond_for_each_slave(bond, slave, i) { 4239 slave_no--; 4240 if (slave_no < 0) { 4241 break; 4242 } 4243 } 4244 4245 start_at = slave; 4246 4247 bond_for_each_slave_from(bond, slave, i, start_at) { 4248 if (IS_UP(slave->dev) && 4249 (slave->link == BOND_LINK_UP) && 4250 (slave->state == BOND_STATE_ACTIVE)) { 4251 res = bond_dev_queue_xmit(bond, skb, slave->dev); 4252 break; 4253 } 4254 } 4255 4256 out: 4257 if (res) { 4258 /* no suitable interface, frame not sent */ 4259 dev_kfree_skb(skb); 4260 } 4261 read_unlock(&bond->lock); 4262 return 0; 4263 } 4264 4265 /* 4266 * in broadcast mode, we send everything to all usable interfaces. 4267 */ 4268 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) 4269 { 4270 struct bonding *bond = bond_dev->priv; 4271 struct slave *slave, *start_at; 4272 struct net_device *tx_dev = NULL; 4273 int i; 4274 int res = 1; 4275 4276 read_lock(&bond->lock); 4277 4278 if (!BOND_IS_OK(bond)) { 4279 goto out; 4280 } 4281 4282 read_lock(&bond->curr_slave_lock); 4283 start_at = bond->curr_active_slave; 4284 read_unlock(&bond->curr_slave_lock); 4285 4286 if (!start_at) { 4287 goto out; 4288 } 4289 4290 bond_for_each_slave_from(bond, slave, i, start_at) { 4291 if (IS_UP(slave->dev) && 4292 (slave->link == BOND_LINK_UP) && 4293 (slave->state == BOND_STATE_ACTIVE)) { 4294 if (tx_dev) { 4295 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 4296 if (!skb2) { 4297 printk(KERN_ERR DRV_NAME 4298 ": %s: Error: bond_xmit_broadcast(): " 4299 "skb_clone() failed\n", 4300 bond_dev->name); 4301 continue; 4302 } 4303 4304 res = bond_dev_queue_xmit(bond, skb2, tx_dev); 4305 if (res) { 4306 dev_kfree_skb(skb2); 4307 continue; 4308 } 4309 } 4310 tx_dev = slave->dev; 4311 } 4312 } 4313 4314 if (tx_dev) { 4315 res = bond_dev_queue_xmit(bond, skb, tx_dev); 4316 } 4317 4318 out: 4319 if (res) { 4320 /* no suitable interface, frame not sent */ 4321 dev_kfree_skb(skb); 4322 } 4323 /* frame sent to all suitable interfaces */ 4324 read_unlock(&bond->lock); 4325 return 0; 4326 } 4327 4328 /*------------------------- Device initialization ---------------------------*/ 4329 4330 static void bond_set_xmit_hash_policy(struct bonding *bond) 4331 { 4332 switch (bond->params.xmit_policy) { 4333 case BOND_XMIT_POLICY_LAYER23: 4334 bond->xmit_hash_policy = bond_xmit_hash_policy_l23; 4335 break; 4336 case BOND_XMIT_POLICY_LAYER34: 4337 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4338 break; 4339 case BOND_XMIT_POLICY_LAYER2: 4340 default: 4341 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4342 break; 4343 } 4344 } 4345 4346 /* 4347 * set bond mode specific net device operations 4348 */ 4349 void bond_set_mode_ops(struct bonding *bond, int mode) 4350 { 4351 struct net_device *bond_dev = bond->dev; 4352 4353 switch (mode) { 4354 case BOND_MODE_ROUNDROBIN: 4355 bond_dev->hard_start_xmit = bond_xmit_roundrobin; 4356 break; 4357 case BOND_MODE_ACTIVEBACKUP: 4358 bond_dev->hard_start_xmit = bond_xmit_activebackup; 4359 break; 4360 case BOND_MODE_XOR: 4361 bond_dev->hard_start_xmit = bond_xmit_xor; 4362 bond_set_xmit_hash_policy(bond); 4363 break; 4364 case BOND_MODE_BROADCAST: 4365 bond_dev->hard_start_xmit = bond_xmit_broadcast; 4366 break; 4367 case BOND_MODE_8023AD: 4368 bond_set_master_3ad_flags(bond); 4369 bond_dev->hard_start_xmit = bond_3ad_xmit_xor; 4370 bond_set_xmit_hash_policy(bond); 4371 break; 4372 case BOND_MODE_ALB: 4373 bond_set_master_alb_flags(bond); 4374 /* FALLTHRU */ 4375 case BOND_MODE_TLB: 4376 bond_dev->hard_start_xmit = bond_alb_xmit; 4377 bond_dev->set_mac_address = bond_alb_set_mac_address; 4378 break; 4379 default: 4380 /* Should never happen, mode already checked */ 4381 printk(KERN_ERR DRV_NAME 4382 ": %s: Error: Unknown bonding mode %d\n", 4383 bond_dev->name, 4384 mode); 4385 break; 4386 } 4387 } 4388 4389 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, 4390 struct ethtool_drvinfo *drvinfo) 4391 { 4392 strncpy(drvinfo->driver, DRV_NAME, 32); 4393 strncpy(drvinfo->version, DRV_VERSION, 32); 4394 snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION); 4395 } 4396 4397 static const struct ethtool_ops bond_ethtool_ops = { 4398 .get_drvinfo = bond_ethtool_get_drvinfo, 4399 }; 4400 4401 /* 4402 * Does not allocate but creates a /proc entry. 4403 * Allowed to fail. 4404 */ 4405 static int bond_init(struct net_device *bond_dev, struct bond_params *params) 4406 { 4407 struct bonding *bond = bond_dev->priv; 4408 4409 dprintk("Begin bond_init for %s\n", bond_dev->name); 4410 4411 /* initialize rwlocks */ 4412 rwlock_init(&bond->lock); 4413 rwlock_init(&bond->curr_slave_lock); 4414 4415 bond->params = *params; /* copy params struct */ 4416 4417 bond->wq = create_singlethread_workqueue(bond_dev->name); 4418 if (!bond->wq) 4419 return -ENOMEM; 4420 4421 /* Initialize pointers */ 4422 bond->first_slave = NULL; 4423 bond->curr_active_slave = NULL; 4424 bond->current_arp_slave = NULL; 4425 bond->primary_slave = NULL; 4426 bond->dev = bond_dev; 4427 bond->send_grat_arp = 0; 4428 bond->setup_by_slave = 0; 4429 INIT_LIST_HEAD(&bond->vlan_list); 4430 4431 /* Initialize the device entry points */ 4432 bond_dev->open = bond_open; 4433 bond_dev->stop = bond_close; 4434 bond_dev->get_stats = bond_get_stats; 4435 bond_dev->do_ioctl = bond_do_ioctl; 4436 bond_dev->ethtool_ops = &bond_ethtool_ops; 4437 bond_dev->set_multicast_list = bond_set_multicast_list; 4438 bond_dev->change_mtu = bond_change_mtu; 4439 bond_dev->set_mac_address = bond_set_mac_address; 4440 bond_dev->validate_addr = NULL; 4441 4442 bond_set_mode_ops(bond, bond->params.mode); 4443 4444 bond_dev->destructor = free_netdev; 4445 4446 /* Initialize the device options */ 4447 bond_dev->tx_queue_len = 0; 4448 bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; 4449 bond_dev->priv_flags |= IFF_BONDING; 4450 4451 /* At first, we block adding VLANs. That's the only way to 4452 * prevent problems that occur when adding VLANs over an 4453 * empty bond. The block will be removed once non-challenged 4454 * slaves are enslaved. 4455 */ 4456 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 4457 4458 /* don't acquire bond device's netif_tx_lock when 4459 * transmitting */ 4460 bond_dev->features |= NETIF_F_LLTX; 4461 4462 /* By default, we declare the bond to be fully 4463 * VLAN hardware accelerated capable. Special 4464 * care is taken in the various xmit functions 4465 * when there are slaves that are not hw accel 4466 * capable 4467 */ 4468 bond_dev->vlan_rx_register = bond_vlan_rx_register; 4469 bond_dev->vlan_rx_add_vid = bond_vlan_rx_add_vid; 4470 bond_dev->vlan_rx_kill_vid = bond_vlan_rx_kill_vid; 4471 bond_dev->features |= (NETIF_F_HW_VLAN_TX | 4472 NETIF_F_HW_VLAN_RX | 4473 NETIF_F_HW_VLAN_FILTER); 4474 4475 #ifdef CONFIG_PROC_FS 4476 bond_create_proc_entry(bond); 4477 #endif 4478 list_add_tail(&bond->bond_list, &bond_dev_list); 4479 4480 return 0; 4481 } 4482 4483 /* De-initialize device specific data. 4484 * Caller must hold rtnl_lock. 4485 */ 4486 static void bond_deinit(struct net_device *bond_dev) 4487 { 4488 struct bonding *bond = bond_dev->priv; 4489 4490 list_del(&bond->bond_list); 4491 4492 #ifdef CONFIG_PROC_FS 4493 bond_remove_proc_entry(bond); 4494 #endif 4495 } 4496 4497 static void bond_work_cancel_all(struct bonding *bond) 4498 { 4499 write_lock_bh(&bond->lock); 4500 bond->kill_timers = 1; 4501 write_unlock_bh(&bond->lock); 4502 4503 if (bond->params.miimon && delayed_work_pending(&bond->mii_work)) 4504 cancel_delayed_work(&bond->mii_work); 4505 4506 if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work)) 4507 cancel_delayed_work(&bond->arp_work); 4508 4509 if (bond->params.mode == BOND_MODE_ALB && 4510 delayed_work_pending(&bond->alb_work)) 4511 cancel_delayed_work(&bond->alb_work); 4512 4513 if (bond->params.mode == BOND_MODE_8023AD && 4514 delayed_work_pending(&bond->ad_work)) 4515 cancel_delayed_work(&bond->ad_work); 4516 } 4517 4518 /* Unregister and free all bond devices. 4519 * Caller must hold rtnl_lock. 4520 */ 4521 static void bond_free_all(void) 4522 { 4523 struct bonding *bond, *nxt; 4524 4525 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { 4526 struct net_device *bond_dev = bond->dev; 4527 4528 bond_work_cancel_all(bond); 4529 bond_mc_list_destroy(bond); 4530 /* Release the bonded slaves */ 4531 bond_release_all(bond_dev); 4532 bond_deinit(bond_dev); 4533 unregister_netdevice(bond_dev); 4534 } 4535 4536 #ifdef CONFIG_PROC_FS 4537 bond_destroy_proc_dir(); 4538 #endif 4539 } 4540 4541 /*------------------------- Module initialization ---------------------------*/ 4542 4543 /* 4544 * Convert string input module parms. Accept either the 4545 * number of the mode or its string name. A bit complicated because 4546 * some mode names are substrings of other names, and calls from sysfs 4547 * may have whitespace in the name (trailing newlines, for example). 4548 */ 4549 int bond_parse_parm(const char *buf, struct bond_parm_tbl *tbl) 4550 { 4551 int mode = -1, i, rv; 4552 char modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, }; 4553 4554 rv = sscanf(buf, "%d", &mode); 4555 if (!rv) { 4556 rv = sscanf(buf, "%20s", modestr); 4557 if (!rv) 4558 return -1; 4559 } 4560 4561 for (i = 0; tbl[i].modename; i++) { 4562 if (mode == tbl[i].mode) 4563 return tbl[i].mode; 4564 if (strcmp(modestr, tbl[i].modename) == 0) 4565 return tbl[i].mode; 4566 } 4567 4568 return -1; 4569 } 4570 4571 static int bond_check_params(struct bond_params *params) 4572 { 4573 int arp_validate_value; 4574 4575 /* 4576 * Convert string parameters. 4577 */ 4578 if (mode) { 4579 bond_mode = bond_parse_parm(mode, bond_mode_tbl); 4580 if (bond_mode == -1) { 4581 printk(KERN_ERR DRV_NAME 4582 ": Error: Invalid bonding mode \"%s\"\n", 4583 mode == NULL ? "NULL" : mode); 4584 return -EINVAL; 4585 } 4586 } 4587 4588 if (xmit_hash_policy) { 4589 if ((bond_mode != BOND_MODE_XOR) && 4590 (bond_mode != BOND_MODE_8023AD)) { 4591 printk(KERN_INFO DRV_NAME 4592 ": xor_mode param is irrelevant in mode %s\n", 4593 bond_mode_name(bond_mode)); 4594 } else { 4595 xmit_hashtype = bond_parse_parm(xmit_hash_policy, 4596 xmit_hashtype_tbl); 4597 if (xmit_hashtype == -1) { 4598 printk(KERN_ERR DRV_NAME 4599 ": Error: Invalid xmit_hash_policy \"%s\"\n", 4600 xmit_hash_policy == NULL ? "NULL" : 4601 xmit_hash_policy); 4602 return -EINVAL; 4603 } 4604 } 4605 } 4606 4607 if (lacp_rate) { 4608 if (bond_mode != BOND_MODE_8023AD) { 4609 printk(KERN_INFO DRV_NAME 4610 ": lacp_rate param is irrelevant in mode %s\n", 4611 bond_mode_name(bond_mode)); 4612 } else { 4613 lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); 4614 if (lacp_fast == -1) { 4615 printk(KERN_ERR DRV_NAME 4616 ": Error: Invalid lacp rate \"%s\"\n", 4617 lacp_rate == NULL ? "NULL" : lacp_rate); 4618 return -EINVAL; 4619 } 4620 } 4621 } 4622 4623 if (max_bonds < 1 || max_bonds > INT_MAX) { 4624 printk(KERN_WARNING DRV_NAME 4625 ": Warning: max_bonds (%d) not in range %d-%d, so it " 4626 "was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", 4627 max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); 4628 max_bonds = BOND_DEFAULT_MAX_BONDS; 4629 } 4630 4631 if (miimon < 0) { 4632 printk(KERN_WARNING DRV_NAME 4633 ": Warning: miimon module parameter (%d), " 4634 "not in range 0-%d, so it was reset to %d\n", 4635 miimon, INT_MAX, BOND_LINK_MON_INTERV); 4636 miimon = BOND_LINK_MON_INTERV; 4637 } 4638 4639 if (updelay < 0) { 4640 printk(KERN_WARNING DRV_NAME 4641 ": Warning: updelay module parameter (%d), " 4642 "not in range 0-%d, so it was reset to 0\n", 4643 updelay, INT_MAX); 4644 updelay = 0; 4645 } 4646 4647 if (downdelay < 0) { 4648 printk(KERN_WARNING DRV_NAME 4649 ": Warning: downdelay module parameter (%d), " 4650 "not in range 0-%d, so it was reset to 0\n", 4651 downdelay, INT_MAX); 4652 downdelay = 0; 4653 } 4654 4655 if ((use_carrier != 0) && (use_carrier != 1)) { 4656 printk(KERN_WARNING DRV_NAME 4657 ": Warning: use_carrier module parameter (%d), " 4658 "not of valid value (0/1), so it was set to 1\n", 4659 use_carrier); 4660 use_carrier = 1; 4661 } 4662 4663 /* reset values for 802.3ad */ 4664 if (bond_mode == BOND_MODE_8023AD) { 4665 if (!miimon) { 4666 printk(KERN_WARNING DRV_NAME 4667 ": Warning: miimon must be specified, " 4668 "otherwise bonding will not detect link " 4669 "failure, speed and duplex which are " 4670 "essential for 802.3ad operation\n"); 4671 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4672 miimon = 100; 4673 } 4674 } 4675 4676 /* reset values for TLB/ALB */ 4677 if ((bond_mode == BOND_MODE_TLB) || 4678 (bond_mode == BOND_MODE_ALB)) { 4679 if (!miimon) { 4680 printk(KERN_WARNING DRV_NAME 4681 ": Warning: miimon must be specified, " 4682 "otherwise bonding will not detect link " 4683 "failure and link speed which are essential " 4684 "for TLB/ALB load balancing\n"); 4685 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4686 miimon = 100; 4687 } 4688 } 4689 4690 if (bond_mode == BOND_MODE_ALB) { 4691 printk(KERN_NOTICE DRV_NAME 4692 ": In ALB mode you might experience client " 4693 "disconnections upon reconnection of a link if the " 4694 "bonding module updelay parameter (%d msec) is " 4695 "incompatible with the forwarding delay time of the " 4696 "switch\n", 4697 updelay); 4698 } 4699 4700 if (!miimon) { 4701 if (updelay || downdelay) { 4702 /* just warn the user the up/down delay will have 4703 * no effect since miimon is zero... 4704 */ 4705 printk(KERN_WARNING DRV_NAME 4706 ": Warning: miimon module parameter not set " 4707 "and updelay (%d) or downdelay (%d) module " 4708 "parameter is set; updelay and downdelay have " 4709 "no effect unless miimon is set\n", 4710 updelay, downdelay); 4711 } 4712 } else { 4713 /* don't allow arp monitoring */ 4714 if (arp_interval) { 4715 printk(KERN_WARNING DRV_NAME 4716 ": Warning: miimon (%d) and arp_interval (%d) " 4717 "can't be used simultaneously, disabling ARP " 4718 "monitoring\n", 4719 miimon, arp_interval); 4720 arp_interval = 0; 4721 } 4722 4723 if ((updelay % miimon) != 0) { 4724 printk(KERN_WARNING DRV_NAME 4725 ": Warning: updelay (%d) is not a multiple " 4726 "of miimon (%d), updelay rounded to %d ms\n", 4727 updelay, miimon, (updelay / miimon) * miimon); 4728 } 4729 4730 updelay /= miimon; 4731 4732 if ((downdelay % miimon) != 0) { 4733 printk(KERN_WARNING DRV_NAME 4734 ": Warning: downdelay (%d) is not a multiple " 4735 "of miimon (%d), downdelay rounded to %d ms\n", 4736 downdelay, miimon, 4737 (downdelay / miimon) * miimon); 4738 } 4739 4740 downdelay /= miimon; 4741 } 4742 4743 if (arp_interval < 0) { 4744 printk(KERN_WARNING DRV_NAME 4745 ": Warning: arp_interval module parameter (%d) " 4746 ", not in range 0-%d, so it was reset to %d\n", 4747 arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); 4748 arp_interval = BOND_LINK_ARP_INTERV; 4749 } 4750 4751 for (arp_ip_count = 0; 4752 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; 4753 arp_ip_count++) { 4754 /* not complete check, but should be good enough to 4755 catch mistakes */ 4756 if (!isdigit(arp_ip_target[arp_ip_count][0])) { 4757 printk(KERN_WARNING DRV_NAME 4758 ": Warning: bad arp_ip_target module parameter " 4759 "(%s), ARP monitoring will not be performed\n", 4760 arp_ip_target[arp_ip_count]); 4761 arp_interval = 0; 4762 } else { 4763 __be32 ip = in_aton(arp_ip_target[arp_ip_count]); 4764 arp_target[arp_ip_count] = ip; 4765 } 4766 } 4767 4768 if (arp_interval && !arp_ip_count) { 4769 /* don't allow arping if no arp_ip_target given... */ 4770 printk(KERN_WARNING DRV_NAME 4771 ": Warning: arp_interval module parameter (%d) " 4772 "specified without providing an arp_ip_target " 4773 "parameter, arp_interval was reset to 0\n", 4774 arp_interval); 4775 arp_interval = 0; 4776 } 4777 4778 if (arp_validate) { 4779 if (bond_mode != BOND_MODE_ACTIVEBACKUP) { 4780 printk(KERN_ERR DRV_NAME 4781 ": arp_validate only supported in active-backup mode\n"); 4782 return -EINVAL; 4783 } 4784 if (!arp_interval) { 4785 printk(KERN_ERR DRV_NAME 4786 ": arp_validate requires arp_interval\n"); 4787 return -EINVAL; 4788 } 4789 4790 arp_validate_value = bond_parse_parm(arp_validate, 4791 arp_validate_tbl); 4792 if (arp_validate_value == -1) { 4793 printk(KERN_ERR DRV_NAME 4794 ": Error: invalid arp_validate \"%s\"\n", 4795 arp_validate == NULL ? "NULL" : arp_validate); 4796 return -EINVAL; 4797 } 4798 } else 4799 arp_validate_value = 0; 4800 4801 if (miimon) { 4802 printk(KERN_INFO DRV_NAME 4803 ": MII link monitoring set to %d ms\n", 4804 miimon); 4805 } else if (arp_interval) { 4806 int i; 4807 4808 printk(KERN_INFO DRV_NAME 4809 ": ARP monitoring set to %d ms, validate %s, with %d target(s):", 4810 arp_interval, 4811 arp_validate_tbl[arp_validate_value].modename, 4812 arp_ip_count); 4813 4814 for (i = 0; i < arp_ip_count; i++) 4815 printk (" %s", arp_ip_target[i]); 4816 4817 printk("\n"); 4818 4819 } else { 4820 /* miimon and arp_interval not set, we need one so things 4821 * work as expected, see bonding.txt for details 4822 */ 4823 printk(KERN_WARNING DRV_NAME 4824 ": Warning: either miimon or arp_interval and " 4825 "arp_ip_target module parameters must be specified, " 4826 "otherwise bonding will not detect link failures! see " 4827 "bonding.txt for details.\n"); 4828 } 4829 4830 if (primary && !USES_PRIMARY(bond_mode)) { 4831 /* currently, using a primary only makes sense 4832 * in active backup, TLB or ALB modes 4833 */ 4834 printk(KERN_WARNING DRV_NAME 4835 ": Warning: %s primary device specified but has no " 4836 "effect in %s mode\n", 4837 primary, bond_mode_name(bond_mode)); 4838 primary = NULL; 4839 } 4840 4841 if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP)) 4842 printk(KERN_WARNING DRV_NAME 4843 ": Warning: fail_over_mac only affects " 4844 "active-backup mode.\n"); 4845 4846 /* fill params struct with the proper values */ 4847 params->mode = bond_mode; 4848 params->xmit_policy = xmit_hashtype; 4849 params->miimon = miimon; 4850 params->arp_interval = arp_interval; 4851 params->arp_validate = arp_validate_value; 4852 params->updelay = updelay; 4853 params->downdelay = downdelay; 4854 params->use_carrier = use_carrier; 4855 params->lacp_fast = lacp_fast; 4856 params->primary[0] = 0; 4857 params->fail_over_mac = fail_over_mac; 4858 4859 if (primary) { 4860 strncpy(params->primary, primary, IFNAMSIZ); 4861 params->primary[IFNAMSIZ - 1] = 0; 4862 } 4863 4864 memcpy(params->arp_targets, arp_target, sizeof(arp_target)); 4865 4866 return 0; 4867 } 4868 4869 static struct lock_class_key bonding_netdev_xmit_lock_key; 4870 4871 /* Create a new bond based on the specified name and bonding parameters. 4872 * If name is NULL, obtain a suitable "bond%d" name for us. 4873 * Caller must NOT hold rtnl_lock; we need to release it here before we 4874 * set up our sysfs entries. 4875 */ 4876 int bond_create(char *name, struct bond_params *params, struct bonding **newbond) 4877 { 4878 struct net_device *bond_dev; 4879 struct bonding *bond, *nxt; 4880 int res; 4881 4882 rtnl_lock(); 4883 down_write(&bonding_rwsem); 4884 4885 /* Check to see if the bond already exists. */ 4886 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) 4887 if (strnicmp(bond->dev->name, name, IFNAMSIZ) == 0) { 4888 printk(KERN_ERR DRV_NAME 4889 ": cannot add bond %s; it already exists\n", 4890 name); 4891 res = -EPERM; 4892 goto out_rtnl; 4893 } 4894 4895 bond_dev = alloc_netdev(sizeof(struct bonding), name ? name : "", 4896 ether_setup); 4897 if (!bond_dev) { 4898 printk(KERN_ERR DRV_NAME 4899 ": %s: eek! can't alloc netdev!\n", 4900 name); 4901 res = -ENOMEM; 4902 goto out_rtnl; 4903 } 4904 4905 if (!name) { 4906 res = dev_alloc_name(bond_dev, "bond%d"); 4907 if (res < 0) 4908 goto out_netdev; 4909 } 4910 4911 /* bond_init() must be called after dev_alloc_name() (for the 4912 * /proc files), but before register_netdevice(), because we 4913 * need to set function pointers. 4914 */ 4915 4916 res = bond_init(bond_dev, params); 4917 if (res < 0) { 4918 goto out_netdev; 4919 } 4920 4921 res = register_netdevice(bond_dev); 4922 if (res < 0) { 4923 goto out_bond; 4924 } 4925 4926 lockdep_set_class(&bond_dev->_xmit_lock, &bonding_netdev_xmit_lock_key); 4927 4928 if (newbond) 4929 *newbond = bond_dev->priv; 4930 4931 netif_carrier_off(bond_dev); 4932 4933 up_write(&bonding_rwsem); 4934 rtnl_unlock(); /* allows sysfs registration of net device */ 4935 res = bond_create_sysfs_entry(bond_dev->priv); 4936 if (res < 0) { 4937 rtnl_lock(); 4938 down_write(&bonding_rwsem); 4939 goto out_bond; 4940 } 4941 4942 return 0; 4943 4944 out_bond: 4945 bond_deinit(bond_dev); 4946 out_netdev: 4947 free_netdev(bond_dev); 4948 out_rtnl: 4949 up_write(&bonding_rwsem); 4950 rtnl_unlock(); 4951 return res; 4952 } 4953 4954 static int __init bonding_init(void) 4955 { 4956 int i; 4957 int res; 4958 struct bonding *bond, *nxt; 4959 4960 printk(KERN_INFO "%s", version); 4961 4962 res = bond_check_params(&bonding_defaults); 4963 if (res) { 4964 goto out; 4965 } 4966 4967 #ifdef CONFIG_PROC_FS 4968 bond_create_proc_dir(); 4969 #endif 4970 4971 init_rwsem(&bonding_rwsem); 4972 4973 for (i = 0; i < max_bonds; i++) { 4974 res = bond_create(NULL, &bonding_defaults, NULL); 4975 if (res) 4976 goto err; 4977 } 4978 4979 res = bond_create_sysfs(); 4980 if (res) 4981 goto err; 4982 4983 register_netdevice_notifier(&bond_netdev_notifier); 4984 register_inetaddr_notifier(&bond_inetaddr_notifier); 4985 4986 goto out; 4987 err: 4988 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { 4989 bond_work_cancel_all(bond); 4990 destroy_workqueue(bond->wq); 4991 } 4992 4993 rtnl_lock(); 4994 bond_free_all(); 4995 bond_destroy_sysfs(); 4996 rtnl_unlock(); 4997 out: 4998 return res; 4999 5000 } 5001 5002 static void __exit bonding_exit(void) 5003 { 5004 unregister_netdevice_notifier(&bond_netdev_notifier); 5005 unregister_inetaddr_notifier(&bond_inetaddr_notifier); 5006 5007 rtnl_lock(); 5008 bond_free_all(); 5009 bond_destroy_sysfs(); 5010 rtnl_unlock(); 5011 } 5012 5013 module_init(bonding_init); 5014 module_exit(bonding_exit); 5015 MODULE_LICENSE("GPL"); 5016 MODULE_VERSION(DRV_VERSION); 5017 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); 5018 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); 5019 MODULE_SUPPORTED_DEVICE("most ethernet devices"); 5020 5021 /* 5022 * Local variables: 5023 * c-indent-level: 8 5024 * c-basic-offset: 8 5025 * tab-width: 8 5026 * End: 5027 */ 5028 5029