1 /* 2 * originally based on the dummy device. 3 * 4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov. 5 * Licensed under the GPL. Based on dummy.c, and eql.c devices. 6 * 7 * bonding.c: an Ethernet Bonding driver 8 * 9 * This is useful to talk to a Cisco EtherChannel compatible equipment: 10 * Cisco 5500 11 * Sun Trunking (Solaris) 12 * Alteon AceDirector Trunks 13 * Linux Bonding 14 * and probably many L2 switches ... 15 * 16 * How it works: 17 * ifconfig bond0 ipaddress netmask up 18 * will setup a network device, with an ip address. No mac address 19 * will be assigned at this time. The hw mac address will come from 20 * the first slave bonded to the channel. All slaves will then use 21 * this hw mac address. 22 * 23 * ifconfig bond0 down 24 * will release all slaves, marking them as down. 25 * 26 * ifenslave bond0 eth0 27 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either 28 * a: be used as initial mac address 29 * b: if a hw mac address already is there, eth0's hw mac address 30 * will then be set from bond0. 31 * 32 */ 33 34 //#define BONDING_DEBUG 1 35 36 #include <linux/config.h> 37 #include <linux/kernel.h> 38 #include <linux/module.h> 39 #include <linux/sched.h> 40 #include <linux/types.h> 41 #include <linux/fcntl.h> 42 #include <linux/interrupt.h> 43 #include <linux/ptrace.h> 44 #include <linux/ioport.h> 45 #include <linux/in.h> 46 #include <net/ip.h> 47 #include <linux/ip.h> 48 #include <linux/tcp.h> 49 #include <linux/udp.h> 50 #include <linux/slab.h> 51 #include <linux/string.h> 52 #include <linux/init.h> 53 #include <linux/timer.h> 54 #include <linux/socket.h> 55 #include <linux/ctype.h> 56 #include <linux/inet.h> 57 #include <linux/bitops.h> 58 #include <asm/system.h> 59 #include <asm/io.h> 60 #include <asm/dma.h> 61 #include <asm/uaccess.h> 62 #include <linux/errno.h> 63 #include <linux/netdevice.h> 64 #include <linux/inetdevice.h> 65 #include <linux/etherdevice.h> 66 #include <linux/skbuff.h> 67 #include <net/sock.h> 68 #include <linux/rtnetlink.h> 69 #include <linux/proc_fs.h> 70 #include <linux/seq_file.h> 71 #include <linux/smp.h> 72 #include <linux/if_ether.h> 73 #include <net/arp.h> 74 #include <linux/mii.h> 75 #include <linux/ethtool.h> 76 #include <linux/if_vlan.h> 77 #include <linux/if_bonding.h> 78 #include <net/route.h> 79 #include "bonding.h" 80 #include "bond_3ad.h" 81 #include "bond_alb.h" 82 83 /*---------------------------- Module parameters ----------------------------*/ 84 85 /* monitor all links that often (in milliseconds). <=0 disables monitoring */ 86 #define BOND_LINK_MON_INTERV 0 87 #define BOND_LINK_ARP_INTERV 0 88 89 static int max_bonds = BOND_DEFAULT_MAX_BONDS; 90 static int miimon = BOND_LINK_MON_INTERV; 91 static int updelay = 0; 92 static int downdelay = 0; 93 static int use_carrier = 1; 94 static char *mode = NULL; 95 static char *primary = NULL; 96 static char *lacp_rate = NULL; 97 static char *xmit_hash_policy = NULL; 98 static int arp_interval = BOND_LINK_ARP_INTERV; 99 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; 100 struct bond_params bonding_defaults; 101 102 module_param(max_bonds, int, 0); 103 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 104 module_param(miimon, int, 0); 105 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 106 module_param(updelay, int, 0); 107 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); 108 module_param(downdelay, int, 0); 109 MODULE_PARM_DESC(downdelay, "Delay before considering link down, " 110 "in milliseconds"); 111 module_param(use_carrier, int, 0); 112 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " 113 "0 for off, 1 for on (default)"); 114 module_param(mode, charp, 0); 115 MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " 116 "1 for active-backup, 2 for balance-xor, " 117 "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " 118 "6 for balance-alb"); 119 module_param(primary, charp, 0); 120 MODULE_PARM_DESC(primary, "Primary network device to use"); 121 module_param(lacp_rate, charp, 0); 122 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " 123 "(slow/fast)"); 124 module_param(xmit_hash_policy, charp, 0); 125 MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)" 126 ", 1 for layer 3+4"); 127 module_param(arp_interval, int, 0); 128 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 129 module_param_array(arp_ip_target, charp, NULL, 0); 130 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 131 132 /*----------------------------- Global variables ----------------------------*/ 133 134 static const char *version = 135 DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; 136 137 LIST_HEAD(bond_dev_list); 138 139 #ifdef CONFIG_PROC_FS 140 static struct proc_dir_entry *bond_proc_dir = NULL; 141 #endif 142 143 extern struct rw_semaphore bonding_rwsem; 144 static u32 arp_target[BOND_MAX_ARP_TARGETS] = { 0, } ; 145 static int arp_ip_count = 0; 146 static int bond_mode = BOND_MODE_ROUNDROBIN; 147 static int xmit_hashtype= BOND_XMIT_POLICY_LAYER2; 148 static int lacp_fast = 0; 149 150 151 struct bond_parm_tbl bond_lacp_tbl[] = { 152 { "slow", AD_LACP_SLOW}, 153 { "fast", AD_LACP_FAST}, 154 { NULL, -1}, 155 }; 156 157 struct bond_parm_tbl bond_mode_tbl[] = { 158 { "balance-rr", BOND_MODE_ROUNDROBIN}, 159 { "active-backup", BOND_MODE_ACTIVEBACKUP}, 160 { "balance-xor", BOND_MODE_XOR}, 161 { "broadcast", BOND_MODE_BROADCAST}, 162 { "802.3ad", BOND_MODE_8023AD}, 163 { "balance-tlb", BOND_MODE_TLB}, 164 { "balance-alb", BOND_MODE_ALB}, 165 { NULL, -1}, 166 }; 167 168 struct bond_parm_tbl xmit_hashtype_tbl[] = { 169 { "layer2", BOND_XMIT_POLICY_LAYER2}, 170 { "layer3+4", BOND_XMIT_POLICY_LAYER34}, 171 { NULL, -1}, 172 }; 173 174 /*-------------------------- Forward declarations ---------------------------*/ 175 176 static void bond_send_gratuitous_arp(struct bonding *bond); 177 178 /*---------------------------- General routines -----------------------------*/ 179 180 const char *bond_mode_name(int mode) 181 { 182 switch (mode) { 183 case BOND_MODE_ROUNDROBIN : 184 return "load balancing (round-robin)"; 185 case BOND_MODE_ACTIVEBACKUP : 186 return "fault-tolerance (active-backup)"; 187 case BOND_MODE_XOR : 188 return "load balancing (xor)"; 189 case BOND_MODE_BROADCAST : 190 return "fault-tolerance (broadcast)"; 191 case BOND_MODE_8023AD: 192 return "IEEE 802.3ad Dynamic link aggregation"; 193 case BOND_MODE_TLB: 194 return "transmit load balancing"; 195 case BOND_MODE_ALB: 196 return "adaptive load balancing"; 197 default: 198 return "unknown"; 199 } 200 } 201 202 /*---------------------------------- VLAN -----------------------------------*/ 203 204 /** 205 * bond_add_vlan - add a new vlan id on bond 206 * @bond: bond that got the notification 207 * @vlan_id: the vlan id to add 208 * 209 * Returns -ENOMEM if allocation failed. 210 */ 211 static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) 212 { 213 struct vlan_entry *vlan; 214 215 dprintk("bond: %s, vlan id %d\n", 216 (bond ? bond->dev->name: "None"), vlan_id); 217 218 vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL); 219 if (!vlan) { 220 return -ENOMEM; 221 } 222 223 INIT_LIST_HEAD(&vlan->vlan_list); 224 vlan->vlan_id = vlan_id; 225 vlan->vlan_ip = 0; 226 227 write_lock_bh(&bond->lock); 228 229 list_add_tail(&vlan->vlan_list, &bond->vlan_list); 230 231 write_unlock_bh(&bond->lock); 232 233 dprintk("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); 234 235 return 0; 236 } 237 238 /** 239 * bond_del_vlan - delete a vlan id from bond 240 * @bond: bond that got the notification 241 * @vlan_id: the vlan id to delete 242 * 243 * returns -ENODEV if @vlan_id was not found in @bond. 244 */ 245 static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) 246 { 247 struct vlan_entry *vlan, *next; 248 int res = -ENODEV; 249 250 dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); 251 252 write_lock_bh(&bond->lock); 253 254 list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) { 255 if (vlan->vlan_id == vlan_id) { 256 list_del(&vlan->vlan_list); 257 258 if ((bond->params.mode == BOND_MODE_TLB) || 259 (bond->params.mode == BOND_MODE_ALB)) { 260 bond_alb_clear_vlan(bond, vlan_id); 261 } 262 263 dprintk("removed VLAN ID %d from bond %s\n", vlan_id, 264 bond->dev->name); 265 266 kfree(vlan); 267 268 if (list_empty(&bond->vlan_list) && 269 (bond->slave_cnt == 0)) { 270 /* Last VLAN removed and no slaves, so 271 * restore block on adding VLANs. This will 272 * be removed once new slaves that are not 273 * VLAN challenged will be added. 274 */ 275 bond->dev->features |= NETIF_F_VLAN_CHALLENGED; 276 } 277 278 res = 0; 279 goto out; 280 } 281 } 282 283 dprintk("couldn't find VLAN ID %d in bond %s\n", vlan_id, 284 bond->dev->name); 285 286 out: 287 write_unlock_bh(&bond->lock); 288 return res; 289 } 290 291 /** 292 * bond_has_challenged_slaves 293 * @bond: the bond we're working on 294 * 295 * Searches the slave list. Returns 1 if a vlan challenged slave 296 * was found, 0 otherwise. 297 * 298 * Assumes bond->lock is held. 299 */ 300 static int bond_has_challenged_slaves(struct bonding *bond) 301 { 302 struct slave *slave; 303 int i; 304 305 bond_for_each_slave(bond, slave, i) { 306 if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { 307 dprintk("found VLAN challenged slave - %s\n", 308 slave->dev->name); 309 return 1; 310 } 311 } 312 313 dprintk("no VLAN challenged slaves found\n"); 314 return 0; 315 } 316 317 /** 318 * bond_next_vlan - safely skip to the next item in the vlans list. 319 * @bond: the bond we're working on 320 * @curr: item we're advancing from 321 * 322 * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, 323 * or @curr->next otherwise (even if it is @curr itself again). 324 * 325 * Caller must hold bond->lock 326 */ 327 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) 328 { 329 struct vlan_entry *next, *last; 330 331 if (list_empty(&bond->vlan_list)) { 332 return NULL; 333 } 334 335 if (!curr) { 336 next = list_entry(bond->vlan_list.next, 337 struct vlan_entry, vlan_list); 338 } else { 339 last = list_entry(bond->vlan_list.prev, 340 struct vlan_entry, vlan_list); 341 if (last == curr) { 342 next = list_entry(bond->vlan_list.next, 343 struct vlan_entry, vlan_list); 344 } else { 345 next = list_entry(curr->vlan_list.next, 346 struct vlan_entry, vlan_list); 347 } 348 } 349 350 return next; 351 } 352 353 /** 354 * bond_dev_queue_xmit - Prepare skb for xmit. 355 * 356 * @bond: bond device that got this skb for tx. 357 * @skb: hw accel VLAN tagged skb to transmit 358 * @slave_dev: slave that is supposed to xmit this skbuff 359 * 360 * When the bond gets an skb to transmit that is 361 * already hardware accelerated VLAN tagged, and it 362 * needs to relay this skb to a slave that is not 363 * hw accel capable, the skb needs to be "unaccelerated", 364 * i.e. strip the hwaccel tag and re-insert it as part 365 * of the payload. 366 */ 367 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev) 368 { 369 unsigned short vlan_id; 370 371 if (!list_empty(&bond->vlan_list) && 372 !(slave_dev->features & NETIF_F_HW_VLAN_TX) && 373 vlan_get_tag(skb, &vlan_id) == 0) { 374 skb->dev = slave_dev; 375 skb = vlan_put_tag(skb, vlan_id); 376 if (!skb) { 377 /* vlan_put_tag() frees the skb in case of error, 378 * so return success here so the calling functions 379 * won't attempt to free is again. 380 */ 381 return 0; 382 } 383 } else { 384 skb->dev = slave_dev; 385 } 386 387 skb->priority = 1; 388 dev_queue_xmit(skb); 389 390 return 0; 391 } 392 393 /* 394 * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid 395 * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a 396 * lock because: 397 * a. This operation is performed in IOCTL context, 398 * b. The operation is protected by the RTNL semaphore in the 8021q code, 399 * c. Holding a lock with BH disabled while directly calling a base driver 400 * entry point is generally a BAD idea. 401 * 402 * The design of synchronization/protection for this operation in the 8021q 403 * module is good for one or more VLAN devices over a single physical device 404 * and cannot be extended for a teaming solution like bonding, so there is a 405 * potential race condition here where a net device from the vlan group might 406 * be referenced (either by a base driver or the 8021q code) while it is being 407 * removed from the system. However, it turns out we're not making matters 408 * worse, and if it works for regular VLAN usage it will work here too. 409 */ 410 411 /** 412 * bond_vlan_rx_register - Propagates registration to slaves 413 * @bond_dev: bonding net device that got called 414 * @grp: vlan group being registered 415 */ 416 static void bond_vlan_rx_register(struct net_device *bond_dev, struct vlan_group *grp) 417 { 418 struct bonding *bond = bond_dev->priv; 419 struct slave *slave; 420 int i; 421 422 bond->vlgrp = grp; 423 424 bond_for_each_slave(bond, slave, i) { 425 struct net_device *slave_dev = slave->dev; 426 427 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 428 slave_dev->vlan_rx_register) { 429 slave_dev->vlan_rx_register(slave_dev, grp); 430 } 431 } 432 } 433 434 /** 435 * bond_vlan_rx_add_vid - Propagates adding an id to slaves 436 * @bond_dev: bonding net device that got called 437 * @vid: vlan id being added 438 */ 439 static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) 440 { 441 struct bonding *bond = bond_dev->priv; 442 struct slave *slave; 443 int i, res; 444 445 bond_for_each_slave(bond, slave, i) { 446 struct net_device *slave_dev = slave->dev; 447 448 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 449 slave_dev->vlan_rx_add_vid) { 450 slave_dev->vlan_rx_add_vid(slave_dev, vid); 451 } 452 } 453 454 res = bond_add_vlan(bond, vid); 455 if (res) { 456 printk(KERN_ERR DRV_NAME 457 ": %s: Error: Failed to add vlan id %d\n", 458 bond_dev->name, vid); 459 } 460 } 461 462 /** 463 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves 464 * @bond_dev: bonding net device that got called 465 * @vid: vlan id being removed 466 */ 467 static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) 468 { 469 struct bonding *bond = bond_dev->priv; 470 struct slave *slave; 471 struct net_device *vlan_dev; 472 int i, res; 473 474 bond_for_each_slave(bond, slave, i) { 475 struct net_device *slave_dev = slave->dev; 476 477 if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && 478 slave_dev->vlan_rx_kill_vid) { 479 /* Save and then restore vlan_dev in the grp array, 480 * since the slave's driver might clear it. 481 */ 482 vlan_dev = bond->vlgrp->vlan_devices[vid]; 483 slave_dev->vlan_rx_kill_vid(slave_dev, vid); 484 bond->vlgrp->vlan_devices[vid] = vlan_dev; 485 } 486 } 487 488 res = bond_del_vlan(bond, vid); 489 if (res) { 490 printk(KERN_ERR DRV_NAME 491 ": %s: Error: Failed to remove vlan id %d\n", 492 bond_dev->name, vid); 493 } 494 } 495 496 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev) 497 { 498 struct vlan_entry *vlan; 499 500 write_lock_bh(&bond->lock); 501 502 if (list_empty(&bond->vlan_list)) { 503 goto out; 504 } 505 506 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 507 slave_dev->vlan_rx_register) { 508 slave_dev->vlan_rx_register(slave_dev, bond->vlgrp); 509 } 510 511 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 512 !(slave_dev->vlan_rx_add_vid)) { 513 goto out; 514 } 515 516 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 517 slave_dev->vlan_rx_add_vid(slave_dev, vlan->vlan_id); 518 } 519 520 out: 521 write_unlock_bh(&bond->lock); 522 } 523 524 static void bond_del_vlans_from_slave(struct bonding *bond, struct net_device *slave_dev) 525 { 526 struct vlan_entry *vlan; 527 struct net_device *vlan_dev; 528 529 write_lock_bh(&bond->lock); 530 531 if (list_empty(&bond->vlan_list)) { 532 goto out; 533 } 534 535 if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || 536 !(slave_dev->vlan_rx_kill_vid)) { 537 goto unreg; 538 } 539 540 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 541 /* Save and then restore vlan_dev in the grp array, 542 * since the slave's driver might clear it. 543 */ 544 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 545 slave_dev->vlan_rx_kill_vid(slave_dev, vlan->vlan_id); 546 bond->vlgrp->vlan_devices[vlan->vlan_id] = vlan_dev; 547 } 548 549 unreg: 550 if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && 551 slave_dev->vlan_rx_register) { 552 slave_dev->vlan_rx_register(slave_dev, NULL); 553 } 554 555 out: 556 write_unlock_bh(&bond->lock); 557 } 558 559 /*------------------------------- Link status -------------------------------*/ 560 561 /* 562 * Get link speed and duplex from the slave's base driver 563 * using ethtool. If for some reason the call fails or the 564 * values are invalid, fake speed and duplex to 100/Full 565 * and return error. 566 */ 567 static int bond_update_speed_duplex(struct slave *slave) 568 { 569 struct net_device *slave_dev = slave->dev; 570 static int (* ioctl)(struct net_device *, struct ifreq *, int); 571 struct ifreq ifr; 572 struct ethtool_cmd etool; 573 574 /* Fake speed and duplex */ 575 slave->speed = SPEED_100; 576 slave->duplex = DUPLEX_FULL; 577 578 if (slave_dev->ethtool_ops) { 579 u32 res; 580 581 if (!slave_dev->ethtool_ops->get_settings) { 582 return -1; 583 } 584 585 res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); 586 if (res < 0) { 587 return -1; 588 } 589 590 goto verify; 591 } 592 593 ioctl = slave_dev->do_ioctl; 594 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 595 etool.cmd = ETHTOOL_GSET; 596 ifr.ifr_data = (char*)&etool; 597 if (!ioctl || (IOCTL(slave_dev, &ifr, SIOCETHTOOL) < 0)) { 598 return -1; 599 } 600 601 verify: 602 switch (etool.speed) { 603 case SPEED_10: 604 case SPEED_100: 605 case SPEED_1000: 606 break; 607 default: 608 return -1; 609 } 610 611 switch (etool.duplex) { 612 case DUPLEX_FULL: 613 case DUPLEX_HALF: 614 break; 615 default: 616 return -1; 617 } 618 619 slave->speed = etool.speed; 620 slave->duplex = etool.duplex; 621 622 return 0; 623 } 624 625 /* 626 * if <dev> supports MII link status reporting, check its link status. 627 * 628 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 629 * depening upon the setting of the use_carrier parameter. 630 * 631 * Return either BMSR_LSTATUS, meaning that the link is up (or we 632 * can't tell and just pretend it is), or 0, meaning that the link is 633 * down. 634 * 635 * If reporting is non-zero, instead of faking link up, return -1 if 636 * both ETHTOOL and MII ioctls fail (meaning the device does not 637 * support them). If use_carrier is set, return whatever it says. 638 * It'd be nice if there was a good way to tell if a driver supports 639 * netif_carrier, but there really isn't. 640 */ 641 static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) 642 { 643 static int (* ioctl)(struct net_device *, struct ifreq *, int); 644 struct ifreq ifr; 645 struct mii_ioctl_data *mii; 646 struct ethtool_value etool; 647 648 if (bond->params.use_carrier) { 649 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; 650 } 651 652 ioctl = slave_dev->do_ioctl; 653 if (ioctl) { 654 /* TODO: set pointer to correct ioctl on a per team member */ 655 /* bases to make this more efficient. that is, once */ 656 /* we determine the correct ioctl, we will always */ 657 /* call it and not the others for that team */ 658 /* member. */ 659 660 /* 661 * We cannot assume that SIOCGMIIPHY will also read a 662 * register; not all network drivers (e.g., e100) 663 * support that. 664 */ 665 666 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ 667 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 668 mii = if_mii(&ifr); 669 if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { 670 mii->reg_num = MII_BMSR; 671 if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) { 672 return (mii->val_out & BMSR_LSTATUS); 673 } 674 } 675 } 676 677 /* try SIOCETHTOOL ioctl, some drivers cache ETHTOOL_GLINK */ 678 /* for a period of time so we attempt to get link status */ 679 /* from it last if the above MII ioctls fail... */ 680 if (slave_dev->ethtool_ops) { 681 if (slave_dev->ethtool_ops->get_link) { 682 u32 link; 683 684 link = slave_dev->ethtool_ops->get_link(slave_dev); 685 686 return link ? BMSR_LSTATUS : 0; 687 } 688 } 689 690 if (ioctl) { 691 strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 692 etool.cmd = ETHTOOL_GLINK; 693 ifr.ifr_data = (char*)&etool; 694 if (IOCTL(slave_dev, &ifr, SIOCETHTOOL) == 0) { 695 if (etool.data == 1) { 696 return BMSR_LSTATUS; 697 } else { 698 dprintk("SIOCETHTOOL shows link down\n"); 699 return 0; 700 } 701 } 702 } 703 704 /* 705 * If reporting, report that either there's no dev->do_ioctl, 706 * or both SIOCGMIIREG and SIOCETHTOOL failed (meaning that we 707 * cannot report link status). If not reporting, pretend 708 * we're ok. 709 */ 710 return (reporting ? -1 : BMSR_LSTATUS); 711 } 712 713 /*----------------------------- Multicast list ------------------------------*/ 714 715 /* 716 * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise 717 */ 718 static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) 719 { 720 return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && 721 dmi1->dmi_addrlen == dmi2->dmi_addrlen; 722 } 723 724 /* 725 * returns dmi entry if found, NULL otherwise 726 */ 727 static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) 728 { 729 struct dev_mc_list *idmi; 730 731 for (idmi = mc_list; idmi; idmi = idmi->next) { 732 if (bond_is_dmi_same(dmi, idmi)) { 733 return idmi; 734 } 735 } 736 737 return NULL; 738 } 739 740 /* 741 * Push the promiscuity flag down to appropriate slaves 742 */ 743 static void bond_set_promiscuity(struct bonding *bond, int inc) 744 { 745 if (USES_PRIMARY(bond->params.mode)) { 746 /* write lock already acquired */ 747 if (bond->curr_active_slave) { 748 dev_set_promiscuity(bond->curr_active_slave->dev, inc); 749 } 750 } else { 751 struct slave *slave; 752 int i; 753 bond_for_each_slave(bond, slave, i) { 754 dev_set_promiscuity(slave->dev, inc); 755 } 756 } 757 } 758 759 /* 760 * Push the allmulti flag down to all slaves 761 */ 762 static void bond_set_allmulti(struct bonding *bond, int inc) 763 { 764 if (USES_PRIMARY(bond->params.mode)) { 765 /* write lock already acquired */ 766 if (bond->curr_active_slave) { 767 dev_set_allmulti(bond->curr_active_slave->dev, inc); 768 } 769 } else { 770 struct slave *slave; 771 int i; 772 bond_for_each_slave(bond, slave, i) { 773 dev_set_allmulti(slave->dev, inc); 774 } 775 } 776 } 777 778 /* 779 * Add a Multicast address to slaves 780 * according to mode 781 */ 782 static void bond_mc_add(struct bonding *bond, void *addr, int alen) 783 { 784 if (USES_PRIMARY(bond->params.mode)) { 785 /* write lock already acquired */ 786 if (bond->curr_active_slave) { 787 dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0); 788 } 789 } else { 790 struct slave *slave; 791 int i; 792 bond_for_each_slave(bond, slave, i) { 793 dev_mc_add(slave->dev, addr, alen, 0); 794 } 795 } 796 } 797 798 /* 799 * Remove a multicast address from slave 800 * according to mode 801 */ 802 static void bond_mc_delete(struct bonding *bond, void *addr, int alen) 803 { 804 if (USES_PRIMARY(bond->params.mode)) { 805 /* write lock already acquired */ 806 if (bond->curr_active_slave) { 807 dev_mc_delete(bond->curr_active_slave->dev, addr, alen, 0); 808 } 809 } else { 810 struct slave *slave; 811 int i; 812 bond_for_each_slave(bond, slave, i) { 813 dev_mc_delete(slave->dev, addr, alen, 0); 814 } 815 } 816 } 817 818 /* 819 * Totally destroys the mc_list in bond 820 */ 821 static void bond_mc_list_destroy(struct bonding *bond) 822 { 823 struct dev_mc_list *dmi; 824 825 dmi = bond->mc_list; 826 while (dmi) { 827 bond->mc_list = dmi->next; 828 kfree(dmi); 829 dmi = bond->mc_list; 830 } 831 } 832 833 /* 834 * Copy all the Multicast addresses from src to the bonding device dst 835 */ 836 static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, 837 gfp_t gfp_flag) 838 { 839 struct dev_mc_list *dmi, *new_dmi; 840 841 for (dmi = mc_list; dmi; dmi = dmi->next) { 842 new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag); 843 844 if (!new_dmi) { 845 /* FIXME: Potential memory leak !!! */ 846 return -ENOMEM; 847 } 848 849 new_dmi->next = bond->mc_list; 850 bond->mc_list = new_dmi; 851 new_dmi->dmi_addrlen = dmi->dmi_addrlen; 852 memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); 853 new_dmi->dmi_users = dmi->dmi_users; 854 new_dmi->dmi_gusers = dmi->dmi_gusers; 855 } 856 857 return 0; 858 } 859 860 /* 861 * flush all members of flush->mc_list from device dev->mc_list 862 */ 863 static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev) 864 { 865 struct bonding *bond = bond_dev->priv; 866 struct dev_mc_list *dmi; 867 868 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 869 dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 870 } 871 872 if (bond->params.mode == BOND_MODE_8023AD) { 873 /* del lacpdu mc addr from mc list */ 874 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 875 876 dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 877 } 878 } 879 880 /*--------------------------- Active slave change ---------------------------*/ 881 882 /* 883 * Update the mc list and multicast-related flags for the new and 884 * old active slaves (if any) according to the multicast mode, and 885 * promiscuous flags unconditionally. 886 */ 887 static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active) 888 { 889 struct dev_mc_list *dmi; 890 891 if (!USES_PRIMARY(bond->params.mode)) { 892 /* nothing to do - mc list is already up-to-date on 893 * all slaves 894 */ 895 return; 896 } 897 898 if (old_active) { 899 if (bond->dev->flags & IFF_PROMISC) { 900 dev_set_promiscuity(old_active->dev, -1); 901 } 902 903 if (bond->dev->flags & IFF_ALLMULTI) { 904 dev_set_allmulti(old_active->dev, -1); 905 } 906 907 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 908 dev_mc_delete(old_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 909 } 910 } 911 912 if (new_active) { 913 if (bond->dev->flags & IFF_PROMISC) { 914 dev_set_promiscuity(new_active->dev, 1); 915 } 916 917 if (bond->dev->flags & IFF_ALLMULTI) { 918 dev_set_allmulti(new_active->dev, 1); 919 } 920 921 for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next) { 922 dev_mc_add(new_active->dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 923 } 924 } 925 } 926 927 /** 928 * find_best_interface - select the best available slave to be the active one 929 * @bond: our bonding struct 930 * 931 * Warning: Caller must hold curr_slave_lock for writing. 932 */ 933 static struct slave *bond_find_best_slave(struct bonding *bond) 934 { 935 struct slave *new_active, *old_active; 936 struct slave *bestslave = NULL; 937 int mintime = bond->params.updelay; 938 int i; 939 940 new_active = old_active = bond->curr_active_slave; 941 942 if (!new_active) { /* there were no active slaves left */ 943 if (bond->slave_cnt > 0) { /* found one slave */ 944 new_active = bond->first_slave; 945 } else { 946 return NULL; /* still no slave, return NULL */ 947 } 948 } 949 950 /* first try the primary link; if arping, a link must tx/rx traffic 951 * before it can be considered the curr_active_slave - also, we would skip 952 * slaves between the curr_active_slave and primary_slave that may be up 953 * and able to arp 954 */ 955 if ((bond->primary_slave) && 956 (!bond->params.arp_interval) && 957 (IS_UP(bond->primary_slave->dev))) { 958 new_active = bond->primary_slave; 959 } 960 961 /* remember where to stop iterating over the slaves */ 962 old_active = new_active; 963 964 bond_for_each_slave_from(bond, new_active, i, old_active) { 965 if (IS_UP(new_active->dev)) { 966 if (new_active->link == BOND_LINK_UP) { 967 return new_active; 968 } else if (new_active->link == BOND_LINK_BACK) { 969 /* link up, but waiting for stabilization */ 970 if (new_active->delay < mintime) { 971 mintime = new_active->delay; 972 bestslave = new_active; 973 } 974 } 975 } 976 } 977 978 return bestslave; 979 } 980 981 /** 982 * change_active_interface - change the active slave into the specified one 983 * @bond: our bonding struct 984 * @new: the new slave to make the active one 985 * 986 * Set the new slave to the bond's settings and unset them on the old 987 * curr_active_slave. 988 * Setting include flags, mc-list, promiscuity, allmulti, etc. 989 * 990 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, 991 * because it is apparently the best available slave we have, even though its 992 * updelay hasn't timed out yet. 993 * 994 * Warning: Caller must hold curr_slave_lock for writing. 995 */ 996 void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 997 { 998 struct slave *old_active = bond->curr_active_slave; 999 1000 if (old_active == new_active) { 1001 return; 1002 } 1003 1004 if (new_active) { 1005 if (new_active->link == BOND_LINK_BACK) { 1006 if (USES_PRIMARY(bond->params.mode)) { 1007 printk(KERN_INFO DRV_NAME 1008 ": %s: making interface %s the new " 1009 "active one %d ms earlier.\n", 1010 bond->dev->name, new_active->dev->name, 1011 (bond->params.updelay - new_active->delay) * bond->params.miimon); 1012 } 1013 1014 new_active->delay = 0; 1015 new_active->link = BOND_LINK_UP; 1016 new_active->jiffies = jiffies; 1017 1018 if (bond->params.mode == BOND_MODE_8023AD) { 1019 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1020 } 1021 1022 if ((bond->params.mode == BOND_MODE_TLB) || 1023 (bond->params.mode == BOND_MODE_ALB)) { 1024 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); 1025 } 1026 } else { 1027 if (USES_PRIMARY(bond->params.mode)) { 1028 printk(KERN_INFO DRV_NAME 1029 ": %s: making interface %s the new " 1030 "active one.\n", 1031 bond->dev->name, new_active->dev->name); 1032 } 1033 } 1034 } 1035 1036 if (USES_PRIMARY(bond->params.mode)) { 1037 bond_mc_swap(bond, new_active, old_active); 1038 } 1039 1040 if ((bond->params.mode == BOND_MODE_TLB) || 1041 (bond->params.mode == BOND_MODE_ALB)) { 1042 bond_alb_handle_active_change(bond, new_active); 1043 } else { 1044 bond->curr_active_slave = new_active; 1045 } 1046 1047 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 1048 if (old_active) { 1049 bond_set_slave_inactive_flags(old_active); 1050 } 1051 1052 if (new_active) { 1053 bond_set_slave_active_flags(new_active); 1054 } 1055 bond_send_gratuitous_arp(bond); 1056 } 1057 } 1058 1059 /** 1060 * bond_select_active_slave - select a new active slave, if needed 1061 * @bond: our bonding struct 1062 * 1063 * This functions shoud be called when one of the following occurs: 1064 * - The old curr_active_slave has been released or lost its link. 1065 * - The primary_slave has got its link back. 1066 * - A slave has got its link back and there's no old curr_active_slave. 1067 * 1068 * Warning: Caller must hold curr_slave_lock for writing. 1069 */ 1070 void bond_select_active_slave(struct bonding *bond) 1071 { 1072 struct slave *best_slave; 1073 1074 best_slave = bond_find_best_slave(bond); 1075 if (best_slave != bond->curr_active_slave) { 1076 bond_change_active_slave(bond, best_slave); 1077 } 1078 } 1079 1080 /*--------------------------- slave list handling ---------------------------*/ 1081 1082 /* 1083 * This function attaches the slave to the end of list. 1084 * 1085 * bond->lock held for writing by caller. 1086 */ 1087 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) 1088 { 1089 if (bond->first_slave == NULL) { /* attaching the first slave */ 1090 new_slave->next = new_slave; 1091 new_slave->prev = new_slave; 1092 bond->first_slave = new_slave; 1093 } else { 1094 new_slave->next = bond->first_slave; 1095 new_slave->prev = bond->first_slave->prev; 1096 new_slave->next->prev = new_slave; 1097 new_slave->prev->next = new_slave; 1098 } 1099 1100 bond->slave_cnt++; 1101 } 1102 1103 /* 1104 * This function detaches the slave from the list. 1105 * WARNING: no check is made to verify if the slave effectively 1106 * belongs to <bond>. 1107 * Nothing is freed on return, structures are just unchained. 1108 * If any slave pointer in bond was pointing to <slave>, 1109 * it should be changed by the calling function. 1110 * 1111 * bond->lock held for writing by caller. 1112 */ 1113 static void bond_detach_slave(struct bonding *bond, struct slave *slave) 1114 { 1115 if (slave->next) { 1116 slave->next->prev = slave->prev; 1117 } 1118 1119 if (slave->prev) { 1120 slave->prev->next = slave->next; 1121 } 1122 1123 if (bond->first_slave == slave) { /* slave is the first slave */ 1124 if (bond->slave_cnt > 1) { /* there are more slave */ 1125 bond->first_slave = slave->next; 1126 } else { 1127 bond->first_slave = NULL; /* slave was the last one */ 1128 } 1129 } 1130 1131 slave->next = NULL; 1132 slave->prev = NULL; 1133 bond->slave_cnt--; 1134 } 1135 1136 /*---------------------------------- IOCTL ----------------------------------*/ 1137 1138 int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev) 1139 { 1140 dprintk("bond_dev=%p\n", bond_dev); 1141 dprintk("slave_dev=%p\n", slave_dev); 1142 dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len); 1143 memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); 1144 return 0; 1145 } 1146 1147 #define BOND_INTERSECT_FEATURES \ 1148 (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) 1149 1150 /* 1151 * Compute the common dev->feature set available to all slaves. Some 1152 * feature bits are managed elsewhere, so preserve feature bits set on 1153 * master device that are not part of the examined set. 1154 */ 1155 static int bond_compute_features(struct bonding *bond) 1156 { 1157 unsigned long features = BOND_INTERSECT_FEATURES; 1158 struct slave *slave; 1159 struct net_device *bond_dev = bond->dev; 1160 int i; 1161 1162 bond_for_each_slave(bond, slave, i) 1163 features &= (slave->dev->features & BOND_INTERSECT_FEATURES); 1164 1165 if ((features & NETIF_F_SG) && 1166 !(features & (NETIF_F_IP_CSUM | 1167 NETIF_F_NO_CSUM | 1168 NETIF_F_HW_CSUM))) 1169 features &= ~NETIF_F_SG; 1170 1171 features |= (bond_dev->features & ~BOND_INTERSECT_FEATURES); 1172 bond_dev->features = features; 1173 1174 return 0; 1175 } 1176 1177 /* enslave device <slave> to bond device <master> */ 1178 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) 1179 { 1180 struct bonding *bond = bond_dev->priv; 1181 struct slave *new_slave = NULL; 1182 struct dev_mc_list *dmi; 1183 struct sockaddr addr; 1184 int link_reporting; 1185 int old_features = bond_dev->features; 1186 int res = 0; 1187 1188 if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && 1189 slave_dev->do_ioctl == NULL) { 1190 printk(KERN_WARNING DRV_NAME 1191 ": %s: Warning: no link monitoring support for %s\n", 1192 bond_dev->name, slave_dev->name); 1193 } 1194 1195 /* bond must be initialized by bond_open() before enslaving */ 1196 if (!(bond_dev->flags & IFF_UP)) { 1197 dprintk("Error, master_dev is not up\n"); 1198 return -EPERM; 1199 } 1200 1201 /* already enslaved */ 1202 if (slave_dev->flags & IFF_SLAVE) { 1203 dprintk("Error, Device was already enslaved\n"); 1204 return -EBUSY; 1205 } 1206 1207 /* vlan challenged mutual exclusion */ 1208 /* no need to lock since we're protected by rtnl_lock */ 1209 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { 1210 dprintk("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1211 if (!list_empty(&bond->vlan_list)) { 1212 printk(KERN_ERR DRV_NAME 1213 ": %s: Error: cannot enslave VLAN " 1214 "challenged slave %s on VLAN enabled " 1215 "bond %s\n", bond_dev->name, slave_dev->name, 1216 bond_dev->name); 1217 return -EPERM; 1218 } else { 1219 printk(KERN_WARNING DRV_NAME 1220 ": %s: Warning: enslaved VLAN challenged " 1221 "slave %s. Adding VLANs will be blocked as " 1222 "long as %s is part of bond %s\n", 1223 bond_dev->name, slave_dev->name, slave_dev->name, 1224 bond_dev->name); 1225 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1226 } 1227 } else { 1228 dprintk("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); 1229 if (bond->slave_cnt == 0) { 1230 /* First slave, and it is not VLAN challenged, 1231 * so remove the block of adding VLANs over the bond. 1232 */ 1233 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1234 } 1235 } 1236 1237 /* 1238 * Old ifenslave binaries are no longer supported. These can 1239 * be identified with moderate accurary by the state of the slave: 1240 * the current ifenslave will set the interface down prior to 1241 * enslaving it; the old ifenslave will not. 1242 */ 1243 if ((slave_dev->flags & IFF_UP)) { 1244 printk(KERN_ERR DRV_NAME ": %s is up. " 1245 "This may be due to an out of date ifenslave.\n", 1246 slave_dev->name); 1247 res = -EPERM; 1248 goto err_undo_flags; 1249 } 1250 1251 if (slave_dev->set_mac_address == NULL) { 1252 printk(KERN_ERR DRV_NAME 1253 ": %s: Error: The slave device you specified does " 1254 "not support setting the MAC address. " 1255 "Your kernel likely does not support slave " 1256 "devices.\n", bond_dev->name); 1257 res = -EOPNOTSUPP; 1258 goto err_undo_flags; 1259 } 1260 1261 new_slave = kmalloc(sizeof(struct slave), GFP_KERNEL); 1262 if (!new_slave) { 1263 res = -ENOMEM; 1264 goto err_undo_flags; 1265 } 1266 1267 memset(new_slave, 0, sizeof(struct slave)); 1268 1269 /* save slave's original flags before calling 1270 * netdev_set_master and dev_open 1271 */ 1272 new_slave->original_flags = slave_dev->flags; 1273 1274 /* 1275 * Save slave's original ("permanent") mac address for modes 1276 * that need it, and for restoring it upon release, and then 1277 * set it to the master's address 1278 */ 1279 memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); 1280 1281 /* 1282 * Set slave to master's mac address. The application already 1283 * set the master's mac address to that of the first slave 1284 */ 1285 memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 1286 addr.sa_family = slave_dev->type; 1287 res = dev_set_mac_address(slave_dev, &addr); 1288 if (res) { 1289 dprintk("Error %d calling set_mac_address\n", res); 1290 goto err_free; 1291 } 1292 1293 /* open the slave since the application closed it */ 1294 res = dev_open(slave_dev); 1295 if (res) { 1296 dprintk("Openning slave %s failed\n", slave_dev->name); 1297 goto err_restore_mac; 1298 } 1299 1300 res = netdev_set_master(slave_dev, bond_dev); 1301 if (res) { 1302 dprintk("Error %d calling netdev_set_master\n", res); 1303 goto err_close; 1304 } 1305 1306 new_slave->dev = slave_dev; 1307 1308 if ((bond->params.mode == BOND_MODE_TLB) || 1309 (bond->params.mode == BOND_MODE_ALB)) { 1310 /* bond_alb_init_slave() must be called before all other stages since 1311 * it might fail and we do not want to have to undo everything 1312 */ 1313 res = bond_alb_init_slave(bond, new_slave); 1314 if (res) { 1315 goto err_unset_master; 1316 } 1317 } 1318 1319 /* If the mode USES_PRIMARY, then the new slave gets the 1320 * master's promisc (and mc) settings only if it becomes the 1321 * curr_active_slave, and that is taken care of later when calling 1322 * bond_change_active() 1323 */ 1324 if (!USES_PRIMARY(bond->params.mode)) { 1325 /* set promiscuity level to new slave */ 1326 if (bond_dev->flags & IFF_PROMISC) { 1327 dev_set_promiscuity(slave_dev, 1); 1328 } 1329 1330 /* set allmulti level to new slave */ 1331 if (bond_dev->flags & IFF_ALLMULTI) { 1332 dev_set_allmulti(slave_dev, 1); 1333 } 1334 1335 /* upload master's mc_list to new slave */ 1336 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 1337 dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); 1338 } 1339 } 1340 1341 if (bond->params.mode == BOND_MODE_8023AD) { 1342 /* add lacpdu mc addr to mc list */ 1343 u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; 1344 1345 dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0); 1346 } 1347 1348 bond_add_vlans_on_slave(bond, slave_dev); 1349 1350 write_lock_bh(&bond->lock); 1351 1352 bond_attach_slave(bond, new_slave); 1353 1354 new_slave->delay = 0; 1355 new_slave->link_failure_count = 0; 1356 1357 bond_compute_features(bond); 1358 1359 if (bond->params.miimon && !bond->params.use_carrier) { 1360 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 1361 1362 if ((link_reporting == -1) && !bond->params.arp_interval) { 1363 /* 1364 * miimon is set but a bonded network driver 1365 * does not support ETHTOOL/MII and 1366 * arp_interval is not set. Note: if 1367 * use_carrier is enabled, we will never go 1368 * here (because netif_carrier is always 1369 * supported); thus, we don't need to change 1370 * the messages for netif_carrier. 1371 */ 1372 printk(KERN_WARNING DRV_NAME 1373 ": %s: Warning: MII and ETHTOOL support not " 1374 "available for interface %s, and " 1375 "arp_interval/arp_ip_target module parameters " 1376 "not specified, thus bonding will not detect " 1377 "link failures! see bonding.txt for details.\n", 1378 bond_dev->name, slave_dev->name); 1379 } else if (link_reporting == -1) { 1380 /* unable get link status using mii/ethtool */ 1381 printk(KERN_WARNING DRV_NAME 1382 ": %s: Warning: can't get link status from " 1383 "interface %s; the network driver associated " 1384 "with this interface does not support MII or " 1385 "ETHTOOL link status reporting, thus miimon " 1386 "has no effect on this interface.\n", 1387 bond_dev->name, slave_dev->name); 1388 } 1389 } 1390 1391 /* check for initial state */ 1392 if (!bond->params.miimon || 1393 (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) { 1394 if (bond->params.updelay) { 1395 dprintk("Initial state of slave_dev is " 1396 "BOND_LINK_BACK\n"); 1397 new_slave->link = BOND_LINK_BACK; 1398 new_slave->delay = bond->params.updelay; 1399 } else { 1400 dprintk("Initial state of slave_dev is " 1401 "BOND_LINK_UP\n"); 1402 new_slave->link = BOND_LINK_UP; 1403 } 1404 new_slave->jiffies = jiffies; 1405 } else { 1406 dprintk("Initial state of slave_dev is " 1407 "BOND_LINK_DOWN\n"); 1408 new_slave->link = BOND_LINK_DOWN; 1409 } 1410 1411 if (bond_update_speed_duplex(new_slave) && 1412 (new_slave->link != BOND_LINK_DOWN)) { 1413 printk(KERN_WARNING DRV_NAME 1414 ": %s: Warning: failed to get speed and duplex from %s, " 1415 "assumed to be 100Mb/sec and Full.\n", 1416 bond_dev->name, new_slave->dev->name); 1417 1418 if (bond->params.mode == BOND_MODE_8023AD) { 1419 printk(KERN_WARNING DRV_NAME 1420 ": %s: Warning: Operation of 802.3ad mode requires ETHTOOL " 1421 "support in base driver for proper aggregator " 1422 "selection.\n", bond_dev->name); 1423 } 1424 } 1425 1426 if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { 1427 /* if there is a primary slave, remember it */ 1428 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 1429 bond->primary_slave = new_slave; 1430 } 1431 } 1432 1433 switch (bond->params.mode) { 1434 case BOND_MODE_ACTIVEBACKUP: 1435 /* if we're in active-backup mode, we need one and only one active 1436 * interface. The backup interfaces will have their NOARP flag set 1437 * because we need them to be completely deaf and not to respond to 1438 * any ARP request on the network to avoid fooling a switch. Thus, 1439 * since we guarantee that curr_active_slave always point to the last 1440 * usable interface, we just have to verify this interface's flag. 1441 */ 1442 if (((!bond->curr_active_slave) || 1443 (bond->curr_active_slave->dev->flags & IFF_NOARP)) && 1444 (new_slave->link != BOND_LINK_DOWN)) { 1445 dprintk("This is the first active slave\n"); 1446 /* first slave or no active slave yet, and this link 1447 is OK, so make this interface the active one */ 1448 bond_change_active_slave(bond, new_slave); 1449 } else { 1450 dprintk("This is just a backup slave\n"); 1451 bond_set_slave_inactive_flags(new_slave); 1452 } 1453 break; 1454 case BOND_MODE_8023AD: 1455 /* in 802.3ad mode, the internal mechanism 1456 * will activate the slaves in the selected 1457 * aggregator 1458 */ 1459 bond_set_slave_inactive_flags(new_slave); 1460 /* if this is the first slave */ 1461 if (bond->slave_cnt == 1) { 1462 SLAVE_AD_INFO(new_slave).id = 1; 1463 /* Initialize AD with the number of times that the AD timer is called in 1 second 1464 * can be called only after the mac address of the bond is set 1465 */ 1466 bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, 1467 bond->params.lacp_fast); 1468 } else { 1469 SLAVE_AD_INFO(new_slave).id = 1470 SLAVE_AD_INFO(new_slave->prev).id + 1; 1471 } 1472 1473 bond_3ad_bind_slave(new_slave); 1474 break; 1475 case BOND_MODE_TLB: 1476 case BOND_MODE_ALB: 1477 new_slave->state = BOND_STATE_ACTIVE; 1478 if ((!bond->curr_active_slave) && 1479 (new_slave->link != BOND_LINK_DOWN)) { 1480 /* first slave or no active slave yet, and this link 1481 * is OK, so make this interface the active one 1482 */ 1483 bond_change_active_slave(bond, new_slave); 1484 } 1485 break; 1486 default: 1487 dprintk("This slave is always active in trunk mode\n"); 1488 1489 /* always active in trunk mode */ 1490 new_slave->state = BOND_STATE_ACTIVE; 1491 1492 /* In trunking mode there is little meaning to curr_active_slave 1493 * anyway (it holds no special properties of the bond device), 1494 * so we can change it without calling change_active_interface() 1495 */ 1496 if (!bond->curr_active_slave) { 1497 bond->curr_active_slave = new_slave; 1498 } 1499 break; 1500 } /* switch(bond_mode) */ 1501 1502 write_unlock_bh(&bond->lock); 1503 1504 res = bond_create_slave_symlinks(bond_dev, slave_dev); 1505 if (res) 1506 goto err_unset_master; 1507 1508 printk(KERN_INFO DRV_NAME 1509 ": %s: enslaving %s as a%s interface with a%s link.\n", 1510 bond_dev->name, slave_dev->name, 1511 new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", 1512 new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); 1513 1514 /* enslave is successful */ 1515 return 0; 1516 1517 /* Undo stages on error */ 1518 err_unset_master: 1519 netdev_set_master(slave_dev, NULL); 1520 1521 err_close: 1522 dev_close(slave_dev); 1523 1524 err_restore_mac: 1525 memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); 1526 addr.sa_family = slave_dev->type; 1527 dev_set_mac_address(slave_dev, &addr); 1528 1529 err_free: 1530 kfree(new_slave); 1531 1532 err_undo_flags: 1533 bond_dev->features = old_features; 1534 1535 return res; 1536 } 1537 1538 /* 1539 * Try to release the slave device <slave> from the bond device <master> 1540 * It is legal to access curr_active_slave without a lock because all the function 1541 * is write-locked. 1542 * 1543 * The rules for slave state should be: 1544 * for Active/Backup: 1545 * Active stays on all backups go down 1546 * for Bonded connections: 1547 * The first up interface should be left on and all others downed. 1548 */ 1549 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) 1550 { 1551 struct bonding *bond = bond_dev->priv; 1552 struct slave *slave, *oldcurrent; 1553 struct sockaddr addr; 1554 int mac_addr_differ; 1555 1556 /* slave is not a slave or master is not master of this slave */ 1557 if (!(slave_dev->flags & IFF_SLAVE) || 1558 (slave_dev->master != bond_dev)) { 1559 printk(KERN_ERR DRV_NAME 1560 ": %s: Error: cannot release %s.\n", 1561 bond_dev->name, slave_dev->name); 1562 return -EINVAL; 1563 } 1564 1565 write_lock_bh(&bond->lock); 1566 1567 slave = bond_get_slave_by_dev(bond, slave_dev); 1568 if (!slave) { 1569 /* not a slave of this bond */ 1570 printk(KERN_INFO DRV_NAME 1571 ": %s: %s not enslaved\n", 1572 bond_dev->name, slave_dev->name); 1573 return -EINVAL; 1574 } 1575 1576 mac_addr_differ = memcmp(bond_dev->dev_addr, 1577 slave->perm_hwaddr, 1578 ETH_ALEN); 1579 if (!mac_addr_differ && (bond->slave_cnt > 1)) { 1580 printk(KERN_WARNING DRV_NAME 1581 ": %s: Warning: the permanent HWaddr of %s " 1582 "- %02X:%02X:%02X:%02X:%02X:%02X - is " 1583 "still in use by %s. Set the HWaddr of " 1584 "%s to a different address to avoid " 1585 "conflicts.\n", 1586 bond_dev->name, 1587 slave_dev->name, 1588 slave->perm_hwaddr[0], 1589 slave->perm_hwaddr[1], 1590 slave->perm_hwaddr[2], 1591 slave->perm_hwaddr[3], 1592 slave->perm_hwaddr[4], 1593 slave->perm_hwaddr[5], 1594 bond_dev->name, 1595 slave_dev->name); 1596 } 1597 1598 /* Inform AD package of unbinding of slave. */ 1599 if (bond->params.mode == BOND_MODE_8023AD) { 1600 /* must be called before the slave is 1601 * detached from the list 1602 */ 1603 bond_3ad_unbind_slave(slave); 1604 } 1605 1606 printk(KERN_INFO DRV_NAME 1607 ": %s: releasing %s interface %s\n", 1608 bond_dev->name, 1609 (slave->state == BOND_STATE_ACTIVE) 1610 ? "active" : "backup", 1611 slave_dev->name); 1612 1613 oldcurrent = bond->curr_active_slave; 1614 1615 bond->current_arp_slave = NULL; 1616 1617 /* release the slave from its bond */ 1618 bond_detach_slave(bond, slave); 1619 1620 bond_compute_features(bond); 1621 1622 if (bond->primary_slave == slave) { 1623 bond->primary_slave = NULL; 1624 } 1625 1626 if (oldcurrent == slave) { 1627 bond_change_active_slave(bond, NULL); 1628 } 1629 1630 if ((bond->params.mode == BOND_MODE_TLB) || 1631 (bond->params.mode == BOND_MODE_ALB)) { 1632 /* Must be called only after the slave has been 1633 * detached from the list and the curr_active_slave 1634 * has been cleared (if our_slave == old_current), 1635 * but before a new active slave is selected. 1636 */ 1637 bond_alb_deinit_slave(bond, slave); 1638 } 1639 1640 if (oldcurrent == slave) { 1641 bond_select_active_slave(bond); 1642 1643 if (!bond->curr_active_slave) { 1644 printk(KERN_INFO DRV_NAME 1645 ": %s: now running without any active " 1646 "interface !\n", 1647 bond_dev->name); 1648 } 1649 } 1650 1651 if (bond->slave_cnt == 0) { 1652 /* if the last slave was removed, zero the mac address 1653 * of the master so it will be set by the application 1654 * to the mac address of the first slave 1655 */ 1656 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1657 1658 if (list_empty(&bond->vlan_list)) { 1659 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1660 } else { 1661 printk(KERN_WARNING DRV_NAME 1662 ": %s: Warning: clearing HW address of %s while it " 1663 "still has VLANs.\n", 1664 bond_dev->name, bond_dev->name); 1665 printk(KERN_WARNING DRV_NAME 1666 ": %s: When re-adding slaves, make sure the bond's " 1667 "HW address matches its VLANs'.\n", 1668 bond_dev->name); 1669 } 1670 } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && 1671 !bond_has_challenged_slaves(bond)) { 1672 printk(KERN_INFO DRV_NAME 1673 ": %s: last VLAN challenged slave %s " 1674 "left bond %s. VLAN blocking is removed\n", 1675 bond_dev->name, slave_dev->name, bond_dev->name); 1676 bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; 1677 } 1678 1679 write_unlock_bh(&bond->lock); 1680 1681 /* must do this from outside any spinlocks */ 1682 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1683 1684 bond_del_vlans_from_slave(bond, slave_dev); 1685 1686 /* If the mode USES_PRIMARY, then we should only remove its 1687 * promisc and mc settings if it was the curr_active_slave, but that was 1688 * already taken care of above when we detached the slave 1689 */ 1690 if (!USES_PRIMARY(bond->params.mode)) { 1691 /* unset promiscuity level from slave */ 1692 if (bond_dev->flags & IFF_PROMISC) { 1693 dev_set_promiscuity(slave_dev, -1); 1694 } 1695 1696 /* unset allmulti level from slave */ 1697 if (bond_dev->flags & IFF_ALLMULTI) { 1698 dev_set_allmulti(slave_dev, -1); 1699 } 1700 1701 /* flush master's mc_list from slave */ 1702 bond_mc_list_flush(bond_dev, slave_dev); 1703 } 1704 1705 netdev_set_master(slave_dev, NULL); 1706 1707 /* close slave before restoring its mac address */ 1708 dev_close(slave_dev); 1709 1710 /* restore original ("permanent") mac address */ 1711 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1712 addr.sa_family = slave_dev->type; 1713 dev_set_mac_address(slave_dev, &addr); 1714 1715 /* restore the original state of the 1716 * IFF_NOARP flag that might have been 1717 * set by bond_set_slave_inactive_flags() 1718 */ 1719 if ((slave->original_flags & IFF_NOARP) == 0) { 1720 slave_dev->flags &= ~IFF_NOARP; 1721 } 1722 1723 kfree(slave); 1724 1725 return 0; /* deletion OK */ 1726 } 1727 1728 /* 1729 * This function releases all slaves. 1730 */ 1731 static int bond_release_all(struct net_device *bond_dev) 1732 { 1733 struct bonding *bond = bond_dev->priv; 1734 struct slave *slave; 1735 struct net_device *slave_dev; 1736 struct sockaddr addr; 1737 1738 write_lock_bh(&bond->lock); 1739 1740 if (bond->slave_cnt == 0) { 1741 goto out; 1742 } 1743 1744 bond->current_arp_slave = NULL; 1745 bond->primary_slave = NULL; 1746 bond_change_active_slave(bond, NULL); 1747 1748 while ((slave = bond->first_slave) != NULL) { 1749 /* Inform AD package of unbinding of slave 1750 * before slave is detached from the list. 1751 */ 1752 if (bond->params.mode == BOND_MODE_8023AD) { 1753 bond_3ad_unbind_slave(slave); 1754 } 1755 1756 slave_dev = slave->dev; 1757 bond_detach_slave(bond, slave); 1758 1759 if ((bond->params.mode == BOND_MODE_TLB) || 1760 (bond->params.mode == BOND_MODE_ALB)) { 1761 /* must be called only after the slave 1762 * has been detached from the list 1763 */ 1764 bond_alb_deinit_slave(bond, slave); 1765 } 1766 1767 bond_compute_features(bond); 1768 1769 /* now that the slave is detached, unlock and perform 1770 * all the undo steps that should not be called from 1771 * within a lock. 1772 */ 1773 write_unlock_bh(&bond->lock); 1774 1775 bond_destroy_slave_symlinks(bond_dev, slave_dev); 1776 bond_del_vlans_from_slave(bond, slave_dev); 1777 1778 /* If the mode USES_PRIMARY, then we should only remove its 1779 * promisc and mc settings if it was the curr_active_slave, but that was 1780 * already taken care of above when we detached the slave 1781 */ 1782 if (!USES_PRIMARY(bond->params.mode)) { 1783 /* unset promiscuity level from slave */ 1784 if (bond_dev->flags & IFF_PROMISC) { 1785 dev_set_promiscuity(slave_dev, -1); 1786 } 1787 1788 /* unset allmulti level from slave */ 1789 if (bond_dev->flags & IFF_ALLMULTI) { 1790 dev_set_allmulti(slave_dev, -1); 1791 } 1792 1793 /* flush master's mc_list from slave */ 1794 bond_mc_list_flush(bond_dev, slave_dev); 1795 } 1796 1797 netdev_set_master(slave_dev, NULL); 1798 1799 /* close slave before restoring its mac address */ 1800 dev_close(slave_dev); 1801 1802 /* restore original ("permanent") mac address*/ 1803 memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); 1804 addr.sa_family = slave_dev->type; 1805 dev_set_mac_address(slave_dev, &addr); 1806 1807 /* restore the original state of the IFF_NOARP flag that might have 1808 * been set by bond_set_slave_inactive_flags() 1809 */ 1810 if ((slave->original_flags & IFF_NOARP) == 0) { 1811 slave_dev->flags &= ~IFF_NOARP; 1812 } 1813 1814 kfree(slave); 1815 1816 /* re-acquire the lock before getting the next slave */ 1817 write_lock_bh(&bond->lock); 1818 } 1819 1820 /* zero the mac address of the master so it will be 1821 * set by the application to the mac address of the 1822 * first slave 1823 */ 1824 memset(bond_dev->dev_addr, 0, bond_dev->addr_len); 1825 1826 if (list_empty(&bond->vlan_list)) { 1827 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 1828 } else { 1829 printk(KERN_WARNING DRV_NAME 1830 ": %s: Warning: clearing HW address of %s while it " 1831 "still has VLANs.\n", 1832 bond_dev->name, bond_dev->name); 1833 printk(KERN_WARNING DRV_NAME 1834 ": %s: When re-adding slaves, make sure the bond's " 1835 "HW address matches its VLANs'.\n", 1836 bond_dev->name); 1837 } 1838 1839 printk(KERN_INFO DRV_NAME 1840 ": %s: released all slaves\n", 1841 bond_dev->name); 1842 1843 out: 1844 write_unlock_bh(&bond->lock); 1845 1846 return 0; 1847 } 1848 1849 /* 1850 * This function changes the active slave to slave <slave_dev>. 1851 * It returns -EINVAL in the following cases. 1852 * - <slave_dev> is not found in the list. 1853 * - There is not active slave now. 1854 * - <slave_dev> is already active. 1855 * - The link state of <slave_dev> is not BOND_LINK_UP. 1856 * - <slave_dev> is not running. 1857 * In these cases, this fuction does nothing. 1858 * In the other cases, currnt_slave pointer is changed and 0 is returned. 1859 */ 1860 static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) 1861 { 1862 struct bonding *bond = bond_dev->priv; 1863 struct slave *old_active = NULL; 1864 struct slave *new_active = NULL; 1865 int res = 0; 1866 1867 if (!USES_PRIMARY(bond->params.mode)) { 1868 return -EINVAL; 1869 } 1870 1871 /* Verify that master_dev is indeed the master of slave_dev */ 1872 if (!(slave_dev->flags & IFF_SLAVE) || 1873 (slave_dev->master != bond_dev)) { 1874 return -EINVAL; 1875 } 1876 1877 write_lock_bh(&bond->lock); 1878 1879 old_active = bond->curr_active_slave; 1880 new_active = bond_get_slave_by_dev(bond, slave_dev); 1881 1882 /* 1883 * Changing to the current active: do nothing; return success. 1884 */ 1885 if (new_active && (new_active == old_active)) { 1886 write_unlock_bh(&bond->lock); 1887 return 0; 1888 } 1889 1890 if ((new_active) && 1891 (old_active) && 1892 (new_active->link == BOND_LINK_UP) && 1893 IS_UP(new_active->dev)) { 1894 bond_change_active_slave(bond, new_active); 1895 } else { 1896 res = -EINVAL; 1897 } 1898 1899 write_unlock_bh(&bond->lock); 1900 1901 return res; 1902 } 1903 1904 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) 1905 { 1906 struct bonding *bond = bond_dev->priv; 1907 1908 info->bond_mode = bond->params.mode; 1909 info->miimon = bond->params.miimon; 1910 1911 read_lock_bh(&bond->lock); 1912 info->num_slaves = bond->slave_cnt; 1913 read_unlock_bh(&bond->lock); 1914 1915 return 0; 1916 } 1917 1918 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) 1919 { 1920 struct bonding *bond = bond_dev->priv; 1921 struct slave *slave; 1922 int i, found = 0; 1923 1924 if (info->slave_id < 0) { 1925 return -ENODEV; 1926 } 1927 1928 read_lock_bh(&bond->lock); 1929 1930 bond_for_each_slave(bond, slave, i) { 1931 if (i == (int)info->slave_id) { 1932 found = 1; 1933 break; 1934 } 1935 } 1936 1937 read_unlock_bh(&bond->lock); 1938 1939 if (found) { 1940 strcpy(info->slave_name, slave->dev->name); 1941 info->link = slave->link; 1942 info->state = slave->state; 1943 info->link_failure_count = slave->link_failure_count; 1944 } else { 1945 return -ENODEV; 1946 } 1947 1948 return 0; 1949 } 1950 1951 /*-------------------------------- Monitoring -------------------------------*/ 1952 1953 /* this function is called regularly to monitor each slave's link. */ 1954 void bond_mii_monitor(struct net_device *bond_dev) 1955 { 1956 struct bonding *bond = bond_dev->priv; 1957 struct slave *slave, *oldcurrent; 1958 int do_failover = 0; 1959 int delta_in_ticks; 1960 int i; 1961 1962 read_lock(&bond->lock); 1963 1964 delta_in_ticks = (bond->params.miimon * HZ) / 1000; 1965 1966 if (bond->kill_timers) { 1967 goto out; 1968 } 1969 1970 if (bond->slave_cnt == 0) { 1971 goto re_arm; 1972 } 1973 1974 /* we will try to read the link status of each of our slaves, and 1975 * set their IFF_RUNNING flag appropriately. For each slave not 1976 * supporting MII status, we won't do anything so that a user-space 1977 * program could monitor the link itself if needed. 1978 */ 1979 1980 read_lock(&bond->curr_slave_lock); 1981 oldcurrent = bond->curr_active_slave; 1982 read_unlock(&bond->curr_slave_lock); 1983 1984 bond_for_each_slave(bond, slave, i) { 1985 struct net_device *slave_dev = slave->dev; 1986 int link_state; 1987 u16 old_speed = slave->speed; 1988 u8 old_duplex = slave->duplex; 1989 1990 link_state = bond_check_dev_link(bond, slave_dev, 0); 1991 1992 switch (slave->link) { 1993 case BOND_LINK_UP: /* the link was up */ 1994 if (link_state == BMSR_LSTATUS) { 1995 /* link stays up, nothing more to do */ 1996 break; 1997 } else { /* link going down */ 1998 slave->link = BOND_LINK_FAIL; 1999 slave->delay = bond->params.downdelay; 2000 2001 if (slave->link_failure_count < UINT_MAX) { 2002 slave->link_failure_count++; 2003 } 2004 2005 if (bond->params.downdelay) { 2006 printk(KERN_INFO DRV_NAME 2007 ": %s: link status down for %s " 2008 "interface %s, disabling it in " 2009 "%d ms.\n", 2010 bond_dev->name, 2011 IS_UP(slave_dev) 2012 ? ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) 2013 ? ((slave == oldcurrent) 2014 ? "active " : "backup ") 2015 : "") 2016 : "idle ", 2017 slave_dev->name, 2018 bond->params.downdelay * bond->params.miimon); 2019 } 2020 } 2021 /* no break ! fall through the BOND_LINK_FAIL test to 2022 ensure proper action to be taken 2023 */ 2024 case BOND_LINK_FAIL: /* the link has just gone down */ 2025 if (link_state != BMSR_LSTATUS) { 2026 /* link stays down */ 2027 if (slave->delay <= 0) { 2028 /* link down for too long time */ 2029 slave->link = BOND_LINK_DOWN; 2030 2031 /* in active/backup mode, we must 2032 * completely disable this interface 2033 */ 2034 if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) || 2035 (bond->params.mode == BOND_MODE_8023AD)) { 2036 bond_set_slave_inactive_flags(slave); 2037 } 2038 2039 printk(KERN_INFO DRV_NAME 2040 ": %s: link status definitely " 2041 "down for interface %s, " 2042 "disabling it\n", 2043 bond_dev->name, 2044 slave_dev->name); 2045 2046 /* notify ad that the link status has changed */ 2047 if (bond->params.mode == BOND_MODE_8023AD) { 2048 bond_3ad_handle_link_change(slave, BOND_LINK_DOWN); 2049 } 2050 2051 if ((bond->params.mode == BOND_MODE_TLB) || 2052 (bond->params.mode == BOND_MODE_ALB)) { 2053 bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); 2054 } 2055 2056 if (slave == oldcurrent) { 2057 do_failover = 1; 2058 } 2059 } else { 2060 slave->delay--; 2061 } 2062 } else { 2063 /* link up again */ 2064 slave->link = BOND_LINK_UP; 2065 slave->jiffies = jiffies; 2066 printk(KERN_INFO DRV_NAME 2067 ": %s: link status up again after %d " 2068 "ms for interface %s.\n", 2069 bond_dev->name, 2070 (bond->params.downdelay - slave->delay) * bond->params.miimon, 2071 slave_dev->name); 2072 } 2073 break; 2074 case BOND_LINK_DOWN: /* the link was down */ 2075 if (link_state != BMSR_LSTATUS) { 2076 /* the link stays down, nothing more to do */ 2077 break; 2078 } else { /* link going up */ 2079 slave->link = BOND_LINK_BACK; 2080 slave->delay = bond->params.updelay; 2081 2082 if (bond->params.updelay) { 2083 /* if updelay == 0, no need to 2084 advertise about a 0 ms delay */ 2085 printk(KERN_INFO DRV_NAME 2086 ": %s: link status up for " 2087 "interface %s, enabling it " 2088 "in %d ms.\n", 2089 bond_dev->name, 2090 slave_dev->name, 2091 bond->params.updelay * bond->params.miimon); 2092 } 2093 } 2094 /* no break ! fall through the BOND_LINK_BACK state in 2095 case there's something to do. 2096 */ 2097 case BOND_LINK_BACK: /* the link has just come back */ 2098 if (link_state != BMSR_LSTATUS) { 2099 /* link down again */ 2100 slave->link = BOND_LINK_DOWN; 2101 2102 printk(KERN_INFO DRV_NAME 2103 ": %s: link status down again after %d " 2104 "ms for interface %s.\n", 2105 bond_dev->name, 2106 (bond->params.updelay - slave->delay) * bond->params.miimon, 2107 slave_dev->name); 2108 } else { 2109 /* link stays up */ 2110 if (slave->delay == 0) { 2111 /* now the link has been up for long time enough */ 2112 slave->link = BOND_LINK_UP; 2113 slave->jiffies = jiffies; 2114 2115 if (bond->params.mode == BOND_MODE_8023AD) { 2116 /* prevent it from being the active one */ 2117 slave->state = BOND_STATE_BACKUP; 2118 } else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { 2119 /* make it immediately active */ 2120 slave->state = BOND_STATE_ACTIVE; 2121 } else if (slave != bond->primary_slave) { 2122 /* prevent it from being the active one */ 2123 slave->state = BOND_STATE_BACKUP; 2124 } 2125 2126 printk(KERN_INFO DRV_NAME 2127 ": %s: link status definitely " 2128 "up for interface %s.\n", 2129 bond_dev->name, 2130 slave_dev->name); 2131 2132 /* notify ad that the link status has changed */ 2133 if (bond->params.mode == BOND_MODE_8023AD) { 2134 bond_3ad_handle_link_change(slave, BOND_LINK_UP); 2135 } 2136 2137 if ((bond->params.mode == BOND_MODE_TLB) || 2138 (bond->params.mode == BOND_MODE_ALB)) { 2139 bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); 2140 } 2141 2142 if ((!oldcurrent) || 2143 (slave == bond->primary_slave)) { 2144 do_failover = 1; 2145 } 2146 } else { 2147 slave->delay--; 2148 } 2149 } 2150 break; 2151 default: 2152 /* Should not happen */ 2153 printk(KERN_ERR DRV_NAME 2154 ": %s: Error: %s Illegal value (link=%d)\n", 2155 bond_dev->name, 2156 slave->dev->name, 2157 slave->link); 2158 goto out; 2159 } /* end of switch (slave->link) */ 2160 2161 bond_update_speed_duplex(slave); 2162 2163 if (bond->params.mode == BOND_MODE_8023AD) { 2164 if (old_speed != slave->speed) { 2165 bond_3ad_adapter_speed_changed(slave); 2166 } 2167 2168 if (old_duplex != slave->duplex) { 2169 bond_3ad_adapter_duplex_changed(slave); 2170 } 2171 } 2172 2173 } /* end of for */ 2174 2175 if (do_failover) { 2176 write_lock(&bond->curr_slave_lock); 2177 2178 bond_select_active_slave(bond); 2179 2180 if (oldcurrent && !bond->curr_active_slave) { 2181 printk(KERN_INFO DRV_NAME 2182 ": %s: now running without any active " 2183 "interface !\n", 2184 bond_dev->name); 2185 } 2186 2187 write_unlock(&bond->curr_slave_lock); 2188 } 2189 2190 re_arm: 2191 if (bond->params.miimon) { 2192 mod_timer(&bond->mii_timer, jiffies + delta_in_ticks); 2193 } 2194 out: 2195 read_unlock(&bond->lock); 2196 } 2197 2198 2199 static u32 bond_glean_dev_ip(struct net_device *dev) 2200 { 2201 struct in_device *idev; 2202 struct in_ifaddr *ifa; 2203 u32 addr = 0; 2204 2205 if (!dev) 2206 return 0; 2207 2208 rcu_read_lock(); 2209 idev = __in_dev_get_rcu(dev); 2210 if (!idev) 2211 goto out; 2212 2213 ifa = idev->ifa_list; 2214 if (!ifa) 2215 goto out; 2216 2217 addr = ifa->ifa_local; 2218 out: 2219 rcu_read_unlock(); 2220 return addr; 2221 } 2222 2223 static int bond_has_ip(struct bonding *bond) 2224 { 2225 struct vlan_entry *vlan, *vlan_next; 2226 2227 if (bond->master_ip) 2228 return 1; 2229 2230 if (list_empty(&bond->vlan_list)) 2231 return 0; 2232 2233 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2234 vlan_list) { 2235 if (vlan->vlan_ip) 2236 return 1; 2237 } 2238 2239 return 0; 2240 } 2241 2242 /* 2243 * We go to the (large) trouble of VLAN tagging ARP frames because 2244 * switches in VLAN mode (especially if ports are configured as 2245 * "native" to a VLAN) might not pass non-tagged frames. 2246 */ 2247 static void bond_arp_send(struct net_device *slave_dev, int arp_op, u32 dest_ip, u32 src_ip, unsigned short vlan_id) 2248 { 2249 struct sk_buff *skb; 2250 2251 dprintk("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, 2252 slave_dev->name, dest_ip, src_ip, vlan_id); 2253 2254 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, 2255 NULL, slave_dev->dev_addr, NULL); 2256 2257 if (!skb) { 2258 printk(KERN_ERR DRV_NAME ": ARP packet allocation failed\n"); 2259 return; 2260 } 2261 if (vlan_id) { 2262 skb = vlan_put_tag(skb, vlan_id); 2263 if (!skb) { 2264 printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n"); 2265 return; 2266 } 2267 } 2268 arp_xmit(skb); 2269 } 2270 2271 2272 static void bond_arp_send_all(struct bonding *bond, struct slave *slave) 2273 { 2274 int i, vlan_id, rv; 2275 u32 *targets = bond->params.arp_targets; 2276 struct vlan_entry *vlan, *vlan_next; 2277 struct net_device *vlan_dev; 2278 struct flowi fl; 2279 struct rtable *rt; 2280 2281 for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { 2282 if (!targets[i]) 2283 continue; 2284 dprintk("basa: target %x\n", targets[i]); 2285 if (list_empty(&bond->vlan_list)) { 2286 dprintk("basa: empty vlan: arp_send\n"); 2287 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2288 bond->master_ip, 0); 2289 continue; 2290 } 2291 2292 /* 2293 * If VLANs are configured, we do a route lookup to 2294 * determine which VLAN interface would be used, so we 2295 * can tag the ARP with the proper VLAN tag. 2296 */ 2297 memset(&fl, 0, sizeof(fl)); 2298 fl.fl4_dst = targets[i]; 2299 fl.fl4_tos = RTO_ONLINK; 2300 2301 rv = ip_route_output_key(&rt, &fl); 2302 if (rv) { 2303 if (net_ratelimit()) { 2304 printk(KERN_WARNING DRV_NAME 2305 ": %s: no route to arp_ip_target %u.%u.%u.%u\n", 2306 bond->dev->name, NIPQUAD(fl.fl4_dst)); 2307 } 2308 continue; 2309 } 2310 2311 /* 2312 * This target is not on a VLAN 2313 */ 2314 if (rt->u.dst.dev == bond->dev) { 2315 ip_rt_put(rt); 2316 dprintk("basa: rtdev == bond->dev: arp_send\n"); 2317 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2318 bond->master_ip, 0); 2319 continue; 2320 } 2321 2322 vlan_id = 0; 2323 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 2324 vlan_list) { 2325 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 2326 if (vlan_dev == rt->u.dst.dev) { 2327 vlan_id = vlan->vlan_id; 2328 dprintk("basa: vlan match on %s %d\n", 2329 vlan_dev->name, vlan_id); 2330 break; 2331 } 2332 } 2333 2334 if (vlan_id) { 2335 ip_rt_put(rt); 2336 bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 2337 vlan->vlan_ip, vlan_id); 2338 continue; 2339 } 2340 2341 if (net_ratelimit()) { 2342 printk(KERN_WARNING DRV_NAME 2343 ": %s: no path to arp_ip_target %u.%u.%u.%u via rt.dev %s\n", 2344 bond->dev->name, NIPQUAD(fl.fl4_dst), 2345 rt->u.dst.dev ? rt->u.dst.dev->name : "NULL"); 2346 } 2347 ip_rt_put(rt); 2348 } 2349 } 2350 2351 /* 2352 * Kick out a gratuitous ARP for an IP on the bonding master plus one 2353 * for each VLAN above us. 2354 */ 2355 static void bond_send_gratuitous_arp(struct bonding *bond) 2356 { 2357 struct slave *slave = bond->curr_active_slave; 2358 struct vlan_entry *vlan; 2359 struct net_device *vlan_dev; 2360 2361 dprintk("bond_send_grat_arp: bond %s slave %s\n", bond->dev->name, 2362 slave ? slave->dev->name : "NULL"); 2363 if (!slave) 2364 return; 2365 2366 if (bond->master_ip) { 2367 bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, 2368 bond->master_ip, 0); 2369 } 2370 2371 list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { 2372 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 2373 if (vlan->vlan_ip) { 2374 bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip, 2375 vlan->vlan_ip, vlan->vlan_id); 2376 } 2377 } 2378 } 2379 2380 /* 2381 * this function is called regularly to monitor each slave's link 2382 * ensuring that traffic is being sent and received when arp monitoring 2383 * is used in load-balancing mode. if the adapter has been dormant, then an 2384 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 2385 * arp monitoring in active backup mode. 2386 */ 2387 void bond_loadbalance_arp_mon(struct net_device *bond_dev) 2388 { 2389 struct bonding *bond = bond_dev->priv; 2390 struct slave *slave, *oldcurrent; 2391 int do_failover = 0; 2392 int delta_in_ticks; 2393 int i; 2394 2395 read_lock(&bond->lock); 2396 2397 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2398 2399 if (bond->kill_timers) { 2400 goto out; 2401 } 2402 2403 if (bond->slave_cnt == 0) { 2404 goto re_arm; 2405 } 2406 2407 read_lock(&bond->curr_slave_lock); 2408 oldcurrent = bond->curr_active_slave; 2409 read_unlock(&bond->curr_slave_lock); 2410 2411 /* see if any of the previous devices are up now (i.e. they have 2412 * xmt and rcv traffic). the curr_active_slave does not come into 2413 * the picture unless it is null. also, slave->jiffies is not needed 2414 * here because we send an arp on each slave and give a slave as 2415 * long as it needs to get the tx/rx within the delta. 2416 * TODO: what about up/down delay in arp mode? it wasn't here before 2417 * so it can wait 2418 */ 2419 bond_for_each_slave(bond, slave, i) { 2420 if (slave->link != BOND_LINK_UP) { 2421 if (((jiffies - slave->dev->trans_start) <= delta_in_ticks) && 2422 ((jiffies - slave->dev->last_rx) <= delta_in_ticks)) { 2423 2424 slave->link = BOND_LINK_UP; 2425 slave->state = BOND_STATE_ACTIVE; 2426 2427 /* primary_slave has no meaning in round-robin 2428 * mode. the window of a slave being up and 2429 * curr_active_slave being null after enslaving 2430 * is closed. 2431 */ 2432 if (!oldcurrent) { 2433 printk(KERN_INFO DRV_NAME 2434 ": %s: link status definitely " 2435 "up for interface %s, ", 2436 bond_dev->name, 2437 slave->dev->name); 2438 do_failover = 1; 2439 } else { 2440 printk(KERN_INFO DRV_NAME 2441 ": %s: interface %s is now up\n", 2442 bond_dev->name, 2443 slave->dev->name); 2444 } 2445 } 2446 } else { 2447 /* slave->link == BOND_LINK_UP */ 2448 2449 /* not all switches will respond to an arp request 2450 * when the source ip is 0, so don't take the link down 2451 * if we don't know our ip yet 2452 */ 2453 if (((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || 2454 (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && 2455 bond_has_ip(bond))) { 2456 2457 slave->link = BOND_LINK_DOWN; 2458 slave->state = BOND_STATE_BACKUP; 2459 2460 if (slave->link_failure_count < UINT_MAX) { 2461 slave->link_failure_count++; 2462 } 2463 2464 printk(KERN_INFO DRV_NAME 2465 ": %s: interface %s is now down.\n", 2466 bond_dev->name, 2467 slave->dev->name); 2468 2469 if (slave == oldcurrent) { 2470 do_failover = 1; 2471 } 2472 } 2473 } 2474 2475 /* note: if switch is in round-robin mode, all links 2476 * must tx arp to ensure all links rx an arp - otherwise 2477 * links may oscillate or not come up at all; if switch is 2478 * in something like xor mode, there is nothing we can 2479 * do - all replies will be rx'ed on same link causing slaves 2480 * to be unstable during low/no traffic periods 2481 */ 2482 if (IS_UP(slave->dev)) { 2483 bond_arp_send_all(bond, slave); 2484 } 2485 } 2486 2487 if (do_failover) { 2488 write_lock(&bond->curr_slave_lock); 2489 2490 bond_select_active_slave(bond); 2491 2492 if (oldcurrent && !bond->curr_active_slave) { 2493 printk(KERN_INFO DRV_NAME 2494 ": %s: now running without any active " 2495 "interface !\n", 2496 bond_dev->name); 2497 } 2498 2499 write_unlock(&bond->curr_slave_lock); 2500 } 2501 2502 re_arm: 2503 if (bond->params.arp_interval) { 2504 mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); 2505 } 2506 out: 2507 read_unlock(&bond->lock); 2508 } 2509 2510 /* 2511 * When using arp monitoring in active-backup mode, this function is 2512 * called to determine if any backup slaves have went down or a new 2513 * current slave needs to be found. 2514 * The backup slaves never generate traffic, they are considered up by merely 2515 * receiving traffic. If the current slave goes down, each backup slave will 2516 * be given the opportunity to tx/rx an arp before being taken down - this 2517 * prevents all slaves from being taken down due to the current slave not 2518 * sending any traffic for the backups to receive. The arps are not necessarily 2519 * necessary, any tx and rx traffic will keep the current slave up. While any 2520 * rx traffic will keep the backup slaves up, the current slave is responsible 2521 * for generating traffic to keep them up regardless of any other traffic they 2522 * may have received. 2523 * see loadbalance_arp_monitor for arp monitoring in load balancing mode 2524 */ 2525 void bond_activebackup_arp_mon(struct net_device *bond_dev) 2526 { 2527 struct bonding *bond = bond_dev->priv; 2528 struct slave *slave; 2529 int delta_in_ticks; 2530 int i; 2531 2532 read_lock(&bond->lock); 2533 2534 delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; 2535 2536 if (bond->kill_timers) { 2537 goto out; 2538 } 2539 2540 if (bond->slave_cnt == 0) { 2541 goto re_arm; 2542 } 2543 2544 /* determine if any slave has come up or any backup slave has 2545 * gone down 2546 * TODO: what about up/down delay in arp mode? it wasn't here before 2547 * so it can wait 2548 */ 2549 bond_for_each_slave(bond, slave, i) { 2550 if (slave->link != BOND_LINK_UP) { 2551 if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) { 2552 2553 slave->link = BOND_LINK_UP; 2554 2555 write_lock(&bond->curr_slave_lock); 2556 2557 if ((!bond->curr_active_slave) && 2558 ((jiffies - slave->dev->trans_start) <= delta_in_ticks)) { 2559 bond_change_active_slave(bond, slave); 2560 bond->current_arp_slave = NULL; 2561 } else if (bond->curr_active_slave != slave) { 2562 /* this slave has just come up but we 2563 * already have a current slave; this 2564 * can also happen if bond_enslave adds 2565 * a new slave that is up while we are 2566 * searching for a new slave 2567 */ 2568 bond_set_slave_inactive_flags(slave); 2569 bond->current_arp_slave = NULL; 2570 } 2571 2572 if (slave == bond->curr_active_slave) { 2573 printk(KERN_INFO DRV_NAME 2574 ": %s: %s is up and now the " 2575 "active interface\n", 2576 bond_dev->name, 2577 slave->dev->name); 2578 } else { 2579 printk(KERN_INFO DRV_NAME 2580 ": %s: backup interface %s is " 2581 "now up\n", 2582 bond_dev->name, 2583 slave->dev->name); 2584 } 2585 2586 write_unlock(&bond->curr_slave_lock); 2587 } 2588 } else { 2589 read_lock(&bond->curr_slave_lock); 2590 2591 if ((slave != bond->curr_active_slave) && 2592 (!bond->current_arp_slave) && 2593 (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) && 2594 bond_has_ip(bond))) { 2595 /* a backup slave has gone down; three times 2596 * the delta allows the current slave to be 2597 * taken out before the backup slave. 2598 * note: a non-null current_arp_slave indicates 2599 * the curr_active_slave went down and we are 2600 * searching for a new one; under this 2601 * condition we only take the curr_active_slave 2602 * down - this gives each slave a chance to 2603 * tx/rx traffic before being taken out 2604 */ 2605 2606 read_unlock(&bond->curr_slave_lock); 2607 2608 slave->link = BOND_LINK_DOWN; 2609 2610 if (slave->link_failure_count < UINT_MAX) { 2611 slave->link_failure_count++; 2612 } 2613 2614 bond_set_slave_inactive_flags(slave); 2615 2616 printk(KERN_INFO DRV_NAME 2617 ": %s: backup interface %s is now down\n", 2618 bond_dev->name, 2619 slave->dev->name); 2620 } else { 2621 read_unlock(&bond->curr_slave_lock); 2622 } 2623 } 2624 } 2625 2626 read_lock(&bond->curr_slave_lock); 2627 slave = bond->curr_active_slave; 2628 read_unlock(&bond->curr_slave_lock); 2629 2630 if (slave) { 2631 /* if we have sent traffic in the past 2*arp_intervals but 2632 * haven't xmit and rx traffic in that time interval, select 2633 * a different slave. slave->jiffies is only updated when 2634 * a slave first becomes the curr_active_slave - not necessarily 2635 * after every arp; this ensures the slave has a full 2*delta 2636 * before being taken out. if a primary is being used, check 2637 * if it is up and needs to take over as the curr_active_slave 2638 */ 2639 if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || 2640 (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && 2641 bond_has_ip(bond))) && 2642 ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) { 2643 2644 slave->link = BOND_LINK_DOWN; 2645 2646 if (slave->link_failure_count < UINT_MAX) { 2647 slave->link_failure_count++; 2648 } 2649 2650 printk(KERN_INFO DRV_NAME 2651 ": %s: link status down for active interface " 2652 "%s, disabling it\n", 2653 bond_dev->name, 2654 slave->dev->name); 2655 2656 write_lock(&bond->curr_slave_lock); 2657 2658 bond_select_active_slave(bond); 2659 slave = bond->curr_active_slave; 2660 2661 write_unlock(&bond->curr_slave_lock); 2662 2663 bond->current_arp_slave = slave; 2664 2665 if (slave) { 2666 slave->jiffies = jiffies; 2667 } 2668 } else if ((bond->primary_slave) && 2669 (bond->primary_slave != slave) && 2670 (bond->primary_slave->link == BOND_LINK_UP)) { 2671 /* at this point, slave is the curr_active_slave */ 2672 printk(KERN_INFO DRV_NAME 2673 ": %s: changing from interface %s to primary " 2674 "interface %s\n", 2675 bond_dev->name, 2676 slave->dev->name, 2677 bond->primary_slave->dev->name); 2678 2679 /* primary is up so switch to it */ 2680 write_lock(&bond->curr_slave_lock); 2681 bond_change_active_slave(bond, bond->primary_slave); 2682 write_unlock(&bond->curr_slave_lock); 2683 2684 slave = bond->primary_slave; 2685 slave->jiffies = jiffies; 2686 } else { 2687 bond->current_arp_slave = NULL; 2688 } 2689 2690 /* the current slave must tx an arp to ensure backup slaves 2691 * rx traffic 2692 */ 2693 if (slave && bond_has_ip(bond)) { 2694 bond_arp_send_all(bond, slave); 2695 } 2696 } 2697 2698 /* if we don't have a curr_active_slave, search for the next available 2699 * backup slave from the current_arp_slave and make it the candidate 2700 * for becoming the curr_active_slave 2701 */ 2702 if (!slave) { 2703 if (!bond->current_arp_slave) { 2704 bond->current_arp_slave = bond->first_slave; 2705 } 2706 2707 if (bond->current_arp_slave) { 2708 bond_set_slave_inactive_flags(bond->current_arp_slave); 2709 2710 /* search for next candidate */ 2711 bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { 2712 if (IS_UP(slave->dev)) { 2713 slave->link = BOND_LINK_BACK; 2714 bond_set_slave_active_flags(slave); 2715 bond_arp_send_all(bond, slave); 2716 slave->jiffies = jiffies; 2717 bond->current_arp_slave = slave; 2718 break; 2719 } 2720 2721 /* if the link state is up at this point, we 2722 * mark it down - this can happen if we have 2723 * simultaneous link failures and 2724 * reselect_active_interface doesn't make this 2725 * one the current slave so it is still marked 2726 * up when it is actually down 2727 */ 2728 if (slave->link == BOND_LINK_UP) { 2729 slave->link = BOND_LINK_DOWN; 2730 if (slave->link_failure_count < UINT_MAX) { 2731 slave->link_failure_count++; 2732 } 2733 2734 bond_set_slave_inactive_flags(slave); 2735 2736 printk(KERN_INFO DRV_NAME 2737 ": %s: backup interface %s is " 2738 "now down.\n", 2739 bond_dev->name, 2740 slave->dev->name); 2741 } 2742 } 2743 } 2744 } 2745 2746 re_arm: 2747 if (bond->params.arp_interval) { 2748 mod_timer(&bond->arp_timer, jiffies + delta_in_ticks); 2749 } 2750 out: 2751 read_unlock(&bond->lock); 2752 } 2753 2754 /*------------------------------ proc/seq_file-------------------------------*/ 2755 2756 #ifdef CONFIG_PROC_FS 2757 2758 #define SEQ_START_TOKEN ((void *)1) 2759 2760 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) 2761 { 2762 struct bonding *bond = seq->private; 2763 loff_t off = 0; 2764 struct slave *slave; 2765 int i; 2766 2767 /* make sure the bond won't be taken away */ 2768 read_lock(&dev_base_lock); 2769 read_lock_bh(&bond->lock); 2770 2771 if (*pos == 0) { 2772 return SEQ_START_TOKEN; 2773 } 2774 2775 bond_for_each_slave(bond, slave, i) { 2776 if (++off == *pos) { 2777 return slave; 2778 } 2779 } 2780 2781 return NULL; 2782 } 2783 2784 static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2785 { 2786 struct bonding *bond = seq->private; 2787 struct slave *slave = v; 2788 2789 ++*pos; 2790 if (v == SEQ_START_TOKEN) { 2791 return bond->first_slave; 2792 } 2793 2794 slave = slave->next; 2795 2796 return (slave == bond->first_slave) ? NULL : slave; 2797 } 2798 2799 static void bond_info_seq_stop(struct seq_file *seq, void *v) 2800 { 2801 struct bonding *bond = seq->private; 2802 2803 read_unlock_bh(&bond->lock); 2804 read_unlock(&dev_base_lock); 2805 } 2806 2807 static void bond_info_show_master(struct seq_file *seq) 2808 { 2809 struct bonding *bond = seq->private; 2810 struct slave *curr; 2811 int i; 2812 u32 target; 2813 2814 read_lock(&bond->curr_slave_lock); 2815 curr = bond->curr_active_slave; 2816 read_unlock(&bond->curr_slave_lock); 2817 2818 seq_printf(seq, "Bonding Mode: %s\n", 2819 bond_mode_name(bond->params.mode)); 2820 2821 if (bond->params.mode == BOND_MODE_XOR || 2822 bond->params.mode == BOND_MODE_8023AD) { 2823 seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", 2824 xmit_hashtype_tbl[bond->params.xmit_policy].modename, 2825 bond->params.xmit_policy); 2826 } 2827 2828 if (USES_PRIMARY(bond->params.mode)) { 2829 seq_printf(seq, "Primary Slave: %s\n", 2830 (bond->primary_slave) ? 2831 bond->primary_slave->dev->name : "None"); 2832 2833 seq_printf(seq, "Currently Active Slave: %s\n", 2834 (curr) ? curr->dev->name : "None"); 2835 } 2836 2837 seq_printf(seq, "MII Status: %s\n", (curr) ? "up" : "down"); 2838 seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); 2839 seq_printf(seq, "Up Delay (ms): %d\n", 2840 bond->params.updelay * bond->params.miimon); 2841 seq_printf(seq, "Down Delay (ms): %d\n", 2842 bond->params.downdelay * bond->params.miimon); 2843 2844 2845 /* ARP information */ 2846 if(bond->params.arp_interval > 0) { 2847 int printed=0; 2848 seq_printf(seq, "ARP Polling Interval (ms): %d\n", 2849 bond->params.arp_interval); 2850 2851 seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); 2852 2853 for(i = 0; (i < BOND_MAX_ARP_TARGETS) ;i++) { 2854 if (!bond->params.arp_targets[i]) 2855 continue; 2856 if (printed) 2857 seq_printf(seq, ","); 2858 target = ntohl(bond->params.arp_targets[i]); 2859 seq_printf(seq, " %d.%d.%d.%d", HIPQUAD(target)); 2860 printed = 1; 2861 } 2862 seq_printf(seq, "\n"); 2863 } 2864 2865 if (bond->params.mode == BOND_MODE_8023AD) { 2866 struct ad_info ad_info; 2867 2868 seq_puts(seq, "\n802.3ad info\n"); 2869 seq_printf(seq, "LACP rate: %s\n", 2870 (bond->params.lacp_fast) ? "fast" : "slow"); 2871 2872 if (bond_3ad_get_active_agg_info(bond, &ad_info)) { 2873 seq_printf(seq, "bond %s has no active aggregator\n", 2874 bond->dev->name); 2875 } else { 2876 seq_printf(seq, "Active Aggregator Info:\n"); 2877 2878 seq_printf(seq, "\tAggregator ID: %d\n", 2879 ad_info.aggregator_id); 2880 seq_printf(seq, "\tNumber of ports: %d\n", 2881 ad_info.ports); 2882 seq_printf(seq, "\tActor Key: %d\n", 2883 ad_info.actor_key); 2884 seq_printf(seq, "\tPartner Key: %d\n", 2885 ad_info.partner_key); 2886 seq_printf(seq, "\tPartner Mac Address: %02x:%02x:%02x:%02x:%02x:%02x\n", 2887 ad_info.partner_system[0], 2888 ad_info.partner_system[1], 2889 ad_info.partner_system[2], 2890 ad_info.partner_system[3], 2891 ad_info.partner_system[4], 2892 ad_info.partner_system[5]); 2893 } 2894 } 2895 } 2896 2897 static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) 2898 { 2899 struct bonding *bond = seq->private; 2900 2901 seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); 2902 seq_printf(seq, "MII Status: %s\n", 2903 (slave->link == BOND_LINK_UP) ? "up" : "down"); 2904 seq_printf(seq, "Link Failure Count: %d\n", 2905 slave->link_failure_count); 2906 2907 seq_printf(seq, 2908 "Permanent HW addr: %02x:%02x:%02x:%02x:%02x:%02x\n", 2909 slave->perm_hwaddr[0], slave->perm_hwaddr[1], 2910 slave->perm_hwaddr[2], slave->perm_hwaddr[3], 2911 slave->perm_hwaddr[4], slave->perm_hwaddr[5]); 2912 2913 if (bond->params.mode == BOND_MODE_8023AD) { 2914 const struct aggregator *agg 2915 = SLAVE_AD_INFO(slave).port.aggregator; 2916 2917 if (agg) { 2918 seq_printf(seq, "Aggregator ID: %d\n", 2919 agg->aggregator_identifier); 2920 } else { 2921 seq_puts(seq, "Aggregator ID: N/A\n"); 2922 } 2923 } 2924 } 2925 2926 static int bond_info_seq_show(struct seq_file *seq, void *v) 2927 { 2928 if (v == SEQ_START_TOKEN) { 2929 seq_printf(seq, "%s\n", version); 2930 bond_info_show_master(seq); 2931 } else { 2932 bond_info_show_slave(seq, v); 2933 } 2934 2935 return 0; 2936 } 2937 2938 static struct seq_operations bond_info_seq_ops = { 2939 .start = bond_info_seq_start, 2940 .next = bond_info_seq_next, 2941 .stop = bond_info_seq_stop, 2942 .show = bond_info_seq_show, 2943 }; 2944 2945 static int bond_info_open(struct inode *inode, struct file *file) 2946 { 2947 struct seq_file *seq; 2948 struct proc_dir_entry *proc; 2949 int res; 2950 2951 res = seq_open(file, &bond_info_seq_ops); 2952 if (!res) { 2953 /* recover the pointer buried in proc_dir_entry data */ 2954 seq = file->private_data; 2955 proc = PDE(inode); 2956 seq->private = proc->data; 2957 } 2958 2959 return res; 2960 } 2961 2962 static struct file_operations bond_info_fops = { 2963 .owner = THIS_MODULE, 2964 .open = bond_info_open, 2965 .read = seq_read, 2966 .llseek = seq_lseek, 2967 .release = seq_release, 2968 }; 2969 2970 static int bond_create_proc_entry(struct bonding *bond) 2971 { 2972 struct net_device *bond_dev = bond->dev; 2973 2974 if (bond_proc_dir) { 2975 bond->proc_entry = create_proc_entry(bond_dev->name, 2976 S_IRUGO, 2977 bond_proc_dir); 2978 if (bond->proc_entry == NULL) { 2979 printk(KERN_WARNING DRV_NAME 2980 ": Warning: Cannot create /proc/net/%s/%s\n", 2981 DRV_NAME, bond_dev->name); 2982 } else { 2983 bond->proc_entry->data = bond; 2984 bond->proc_entry->proc_fops = &bond_info_fops; 2985 bond->proc_entry->owner = THIS_MODULE; 2986 memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); 2987 } 2988 } 2989 2990 return 0; 2991 } 2992 2993 static void bond_remove_proc_entry(struct bonding *bond) 2994 { 2995 if (bond_proc_dir && bond->proc_entry) { 2996 remove_proc_entry(bond->proc_file_name, bond_proc_dir); 2997 memset(bond->proc_file_name, 0, IFNAMSIZ); 2998 bond->proc_entry = NULL; 2999 } 3000 } 3001 3002 /* Create the bonding directory under /proc/net, if doesn't exist yet. 3003 * Caller must hold rtnl_lock. 3004 */ 3005 static void bond_create_proc_dir(void) 3006 { 3007 int len = strlen(DRV_NAME); 3008 3009 for (bond_proc_dir = proc_net->subdir; bond_proc_dir; 3010 bond_proc_dir = bond_proc_dir->next) { 3011 if ((bond_proc_dir->namelen == len) && 3012 !memcmp(bond_proc_dir->name, DRV_NAME, len)) { 3013 break; 3014 } 3015 } 3016 3017 if (!bond_proc_dir) { 3018 bond_proc_dir = proc_mkdir(DRV_NAME, proc_net); 3019 if (bond_proc_dir) { 3020 bond_proc_dir->owner = THIS_MODULE; 3021 } else { 3022 printk(KERN_WARNING DRV_NAME 3023 ": Warning: cannot create /proc/net/%s\n", 3024 DRV_NAME); 3025 } 3026 } 3027 } 3028 3029 /* Destroy the bonding directory under /proc/net, if empty. 3030 * Caller must hold rtnl_lock. 3031 */ 3032 static void bond_destroy_proc_dir(void) 3033 { 3034 struct proc_dir_entry *de; 3035 3036 if (!bond_proc_dir) { 3037 return; 3038 } 3039 3040 /* verify that the /proc dir is empty */ 3041 for (de = bond_proc_dir->subdir; de; de = de->next) { 3042 /* ignore . and .. */ 3043 if (*(de->name) != '.') { 3044 break; 3045 } 3046 } 3047 3048 if (de) { 3049 if (bond_proc_dir->owner == THIS_MODULE) { 3050 bond_proc_dir->owner = NULL; 3051 } 3052 } else { 3053 remove_proc_entry(DRV_NAME, proc_net); 3054 bond_proc_dir = NULL; 3055 } 3056 } 3057 #endif /* CONFIG_PROC_FS */ 3058 3059 /*-------------------------- netdev event handling --------------------------*/ 3060 3061 /* 3062 * Change device name 3063 */ 3064 static int bond_event_changename(struct bonding *bond) 3065 { 3066 #ifdef CONFIG_PROC_FS 3067 bond_remove_proc_entry(bond); 3068 bond_create_proc_entry(bond); 3069 #endif 3070 down_write(&(bonding_rwsem)); 3071 bond_destroy_sysfs_entry(bond); 3072 bond_create_sysfs_entry(bond); 3073 up_write(&(bonding_rwsem)); 3074 return NOTIFY_DONE; 3075 } 3076 3077 static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev) 3078 { 3079 struct bonding *event_bond = bond_dev->priv; 3080 3081 switch (event) { 3082 case NETDEV_CHANGENAME: 3083 return bond_event_changename(event_bond); 3084 case NETDEV_UNREGISTER: 3085 /* 3086 * TODO: remove a bond from the list? 3087 */ 3088 break; 3089 default: 3090 break; 3091 } 3092 3093 return NOTIFY_DONE; 3094 } 3095 3096 static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) 3097 { 3098 struct net_device *bond_dev = slave_dev->master; 3099 struct bonding *bond = bond_dev->priv; 3100 3101 switch (event) { 3102 case NETDEV_UNREGISTER: 3103 if (bond_dev) { 3104 bond_release(bond_dev, slave_dev); 3105 } 3106 break; 3107 case NETDEV_CHANGE: 3108 /* 3109 * TODO: is this what we get if somebody 3110 * sets up a hierarchical bond, then rmmod's 3111 * one of the slave bonding devices? 3112 */ 3113 break; 3114 case NETDEV_DOWN: 3115 /* 3116 * ... Or is it this? 3117 */ 3118 break; 3119 case NETDEV_CHANGEMTU: 3120 /* 3121 * TODO: Should slaves be allowed to 3122 * independently alter their MTU? For 3123 * an active-backup bond, slaves need 3124 * not be the same type of device, so 3125 * MTUs may vary. For other modes, 3126 * slaves arguably should have the 3127 * same MTUs. To do this, we'd need to 3128 * take over the slave's change_mtu 3129 * function for the duration of their 3130 * servitude. 3131 */ 3132 break; 3133 case NETDEV_CHANGENAME: 3134 /* 3135 * TODO: handle changing the primary's name 3136 */ 3137 break; 3138 case NETDEV_FEAT_CHANGE: 3139 bond_compute_features(bond); 3140 break; 3141 default: 3142 break; 3143 } 3144 3145 return NOTIFY_DONE; 3146 } 3147 3148 /* 3149 * bond_netdev_event: handle netdev notifier chain events. 3150 * 3151 * This function receives events for the netdev chain. The caller (an 3152 * ioctl handler calling notifier_call_chain) holds the necessary 3153 * locks for us to safely manipulate the slave devices (RTNL lock, 3154 * dev_probe_lock). 3155 */ 3156 static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 3157 { 3158 struct net_device *event_dev = (struct net_device *)ptr; 3159 3160 dprintk("event_dev: %s, event: %lx\n", 3161 (event_dev ? event_dev->name : "None"), 3162 event); 3163 3164 if (event_dev->flags & IFF_MASTER) { 3165 dprintk("IFF_MASTER\n"); 3166 return bond_master_netdev_event(event, event_dev); 3167 } 3168 3169 if (event_dev->flags & IFF_SLAVE) { 3170 dprintk("IFF_SLAVE\n"); 3171 return bond_slave_netdev_event(event, event_dev); 3172 } 3173 3174 return NOTIFY_DONE; 3175 } 3176 3177 /* 3178 * bond_inetaddr_event: handle inetaddr notifier chain events. 3179 * 3180 * We keep track of device IPs primarily to use as source addresses in 3181 * ARP monitor probes (rather than spewing out broadcasts all the time). 3182 * 3183 * We track one IP for the main device (if it has one), plus one per VLAN. 3184 */ 3185 static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 3186 { 3187 struct in_ifaddr *ifa = ptr; 3188 struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; 3189 struct bonding *bond, *bond_next; 3190 struct vlan_entry *vlan, *vlan_next; 3191 3192 list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { 3193 if (bond->dev == event_dev) { 3194 switch (event) { 3195 case NETDEV_UP: 3196 bond->master_ip = ifa->ifa_local; 3197 return NOTIFY_OK; 3198 case NETDEV_DOWN: 3199 bond->master_ip = bond_glean_dev_ip(bond->dev); 3200 return NOTIFY_OK; 3201 default: 3202 return NOTIFY_DONE; 3203 } 3204 } 3205 3206 if (list_empty(&bond->vlan_list)) 3207 continue; 3208 3209 list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, 3210 vlan_list) { 3211 vlan_dev = bond->vlgrp->vlan_devices[vlan->vlan_id]; 3212 if (vlan_dev == event_dev) { 3213 switch (event) { 3214 case NETDEV_UP: 3215 vlan->vlan_ip = ifa->ifa_local; 3216 return NOTIFY_OK; 3217 case NETDEV_DOWN: 3218 vlan->vlan_ip = 3219 bond_glean_dev_ip(vlan_dev); 3220 return NOTIFY_OK; 3221 default: 3222 return NOTIFY_DONE; 3223 } 3224 } 3225 } 3226 } 3227 return NOTIFY_DONE; 3228 } 3229 3230 static struct notifier_block bond_netdev_notifier = { 3231 .notifier_call = bond_netdev_event, 3232 }; 3233 3234 static struct notifier_block bond_inetaddr_notifier = { 3235 .notifier_call = bond_inetaddr_event, 3236 }; 3237 3238 /*-------------------------- Packet type handling ---------------------------*/ 3239 3240 /* register to receive lacpdus on a bond */ 3241 static void bond_register_lacpdu(struct bonding *bond) 3242 { 3243 struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); 3244 3245 /* initialize packet type */ 3246 pk_type->type = PKT_TYPE_LACPDU; 3247 pk_type->dev = bond->dev; 3248 pk_type->func = bond_3ad_lacpdu_recv; 3249 3250 dev_add_pack(pk_type); 3251 } 3252 3253 /* unregister to receive lacpdus on a bond */ 3254 static void bond_unregister_lacpdu(struct bonding *bond) 3255 { 3256 dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); 3257 } 3258 3259 /*---------------------------- Hashing Policies -----------------------------*/ 3260 3261 /* 3262 * Hash for the the output device based upon layer 3 and layer 4 data. If 3263 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is 3264 * altogether not IP, mimic bond_xmit_hash_policy_l2() 3265 */ 3266 static int bond_xmit_hash_policy_l34(struct sk_buff *skb, 3267 struct net_device *bond_dev, int count) 3268 { 3269 struct ethhdr *data = (struct ethhdr *)skb->data; 3270 struct iphdr *iph = skb->nh.iph; 3271 u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl); 3272 int layer4_xor = 0; 3273 3274 if (skb->protocol == __constant_htons(ETH_P_IP)) { 3275 if (!(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) && 3276 (iph->protocol == IPPROTO_TCP || 3277 iph->protocol == IPPROTO_UDP)) { 3278 layer4_xor = htons((*layer4hdr ^ *(layer4hdr + 1))); 3279 } 3280 return (layer4_xor ^ 3281 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; 3282 3283 } 3284 3285 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3286 } 3287 3288 /* 3289 * Hash for the output device based upon layer 2 data 3290 */ 3291 static int bond_xmit_hash_policy_l2(struct sk_buff *skb, 3292 struct net_device *bond_dev, int count) 3293 { 3294 struct ethhdr *data = (struct ethhdr *)skb->data; 3295 3296 return (data->h_dest[5] ^ bond_dev->dev_addr[5]) % count; 3297 } 3298 3299 /*-------------------------- Device entry points ----------------------------*/ 3300 3301 static int bond_open(struct net_device *bond_dev) 3302 { 3303 struct bonding *bond = bond_dev->priv; 3304 struct timer_list *mii_timer = &bond->mii_timer; 3305 struct timer_list *arp_timer = &bond->arp_timer; 3306 3307 bond->kill_timers = 0; 3308 3309 if ((bond->params.mode == BOND_MODE_TLB) || 3310 (bond->params.mode == BOND_MODE_ALB)) { 3311 struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer); 3312 3313 /* bond_alb_initialize must be called before the timer 3314 * is started. 3315 */ 3316 if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { 3317 /* something went wrong - fail the open operation */ 3318 return -1; 3319 } 3320 3321 init_timer(alb_timer); 3322 alb_timer->expires = jiffies + 1; 3323 alb_timer->data = (unsigned long)bond; 3324 alb_timer->function = (void *)&bond_alb_monitor; 3325 add_timer(alb_timer); 3326 } 3327 3328 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3329 init_timer(mii_timer); 3330 mii_timer->expires = jiffies + 1; 3331 mii_timer->data = (unsigned long)bond_dev; 3332 mii_timer->function = (void *)&bond_mii_monitor; 3333 add_timer(mii_timer); 3334 } 3335 3336 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3337 init_timer(arp_timer); 3338 arp_timer->expires = jiffies + 1; 3339 arp_timer->data = (unsigned long)bond_dev; 3340 if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { 3341 arp_timer->function = (void *)&bond_activebackup_arp_mon; 3342 } else { 3343 arp_timer->function = (void *)&bond_loadbalance_arp_mon; 3344 } 3345 add_timer(arp_timer); 3346 } 3347 3348 if (bond->params.mode == BOND_MODE_8023AD) { 3349 struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer); 3350 init_timer(ad_timer); 3351 ad_timer->expires = jiffies + 1; 3352 ad_timer->data = (unsigned long)bond; 3353 ad_timer->function = (void *)&bond_3ad_state_machine_handler; 3354 add_timer(ad_timer); 3355 3356 /* register to receive LACPDUs */ 3357 bond_register_lacpdu(bond); 3358 } 3359 3360 return 0; 3361 } 3362 3363 static int bond_close(struct net_device *bond_dev) 3364 { 3365 struct bonding *bond = bond_dev->priv; 3366 3367 if (bond->params.mode == BOND_MODE_8023AD) { 3368 /* Unregister the receive of LACPDUs */ 3369 bond_unregister_lacpdu(bond); 3370 } 3371 3372 write_lock_bh(&bond->lock); 3373 3374 bond_mc_list_destroy(bond); 3375 3376 /* signal timers not to re-arm */ 3377 bond->kill_timers = 1; 3378 3379 write_unlock_bh(&bond->lock); 3380 3381 /* del_timer_sync must run without holding the bond->lock 3382 * because a running timer might be trying to hold it too 3383 */ 3384 3385 if (bond->params.miimon) { /* link check interval, in milliseconds. */ 3386 del_timer_sync(&bond->mii_timer); 3387 } 3388 3389 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 3390 del_timer_sync(&bond->arp_timer); 3391 } 3392 3393 switch (bond->params.mode) { 3394 case BOND_MODE_8023AD: 3395 del_timer_sync(&(BOND_AD_INFO(bond).ad_timer)); 3396 break; 3397 case BOND_MODE_TLB: 3398 case BOND_MODE_ALB: 3399 del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer)); 3400 break; 3401 default: 3402 break; 3403 } 3404 3405 /* Release the bonded slaves */ 3406 bond_release_all(bond_dev); 3407 3408 if ((bond->params.mode == BOND_MODE_TLB) || 3409 (bond->params.mode == BOND_MODE_ALB)) { 3410 /* Must be called only after all 3411 * slaves have been released 3412 */ 3413 bond_alb_deinitialize(bond); 3414 } 3415 3416 return 0; 3417 } 3418 3419 static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) 3420 { 3421 struct bonding *bond = bond_dev->priv; 3422 struct net_device_stats *stats = &(bond->stats), *sstats; 3423 struct slave *slave; 3424 int i; 3425 3426 memset(stats, 0, sizeof(struct net_device_stats)); 3427 3428 read_lock_bh(&bond->lock); 3429 3430 bond_for_each_slave(bond, slave, i) { 3431 sstats = slave->dev->get_stats(slave->dev); 3432 3433 stats->rx_packets += sstats->rx_packets; 3434 stats->rx_bytes += sstats->rx_bytes; 3435 stats->rx_errors += sstats->rx_errors; 3436 stats->rx_dropped += sstats->rx_dropped; 3437 3438 stats->tx_packets += sstats->tx_packets; 3439 stats->tx_bytes += sstats->tx_bytes; 3440 stats->tx_errors += sstats->tx_errors; 3441 stats->tx_dropped += sstats->tx_dropped; 3442 3443 stats->multicast += sstats->multicast; 3444 stats->collisions += sstats->collisions; 3445 3446 stats->rx_length_errors += sstats->rx_length_errors; 3447 stats->rx_over_errors += sstats->rx_over_errors; 3448 stats->rx_crc_errors += sstats->rx_crc_errors; 3449 stats->rx_frame_errors += sstats->rx_frame_errors; 3450 stats->rx_fifo_errors += sstats->rx_fifo_errors; 3451 stats->rx_missed_errors += sstats->rx_missed_errors; 3452 3453 stats->tx_aborted_errors += sstats->tx_aborted_errors; 3454 stats->tx_carrier_errors += sstats->tx_carrier_errors; 3455 stats->tx_fifo_errors += sstats->tx_fifo_errors; 3456 stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors; 3457 stats->tx_window_errors += sstats->tx_window_errors; 3458 } 3459 3460 read_unlock_bh(&bond->lock); 3461 3462 return stats; 3463 } 3464 3465 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 3466 { 3467 struct net_device *slave_dev = NULL; 3468 struct ifbond k_binfo; 3469 struct ifbond __user *u_binfo = NULL; 3470 struct ifslave k_sinfo; 3471 struct ifslave __user *u_sinfo = NULL; 3472 struct mii_ioctl_data *mii = NULL; 3473 int res = 0; 3474 3475 dprintk("bond_ioctl: master=%s, cmd=%d\n", 3476 bond_dev->name, cmd); 3477 3478 switch (cmd) { 3479 case SIOCGMIIPHY: 3480 mii = if_mii(ifr); 3481 if (!mii) { 3482 return -EINVAL; 3483 } 3484 mii->phy_id = 0; 3485 /* Fall Through */ 3486 case SIOCGMIIREG: 3487 /* 3488 * We do this again just in case we were called by SIOCGMIIREG 3489 * instead of SIOCGMIIPHY. 3490 */ 3491 mii = if_mii(ifr); 3492 if (!mii) { 3493 return -EINVAL; 3494 } 3495 3496 if (mii->reg_num == 1) { 3497 struct bonding *bond = bond_dev->priv; 3498 mii->val_out = 0; 3499 read_lock_bh(&bond->lock); 3500 read_lock(&bond->curr_slave_lock); 3501 if (bond->curr_active_slave) { 3502 mii->val_out = BMSR_LSTATUS; 3503 } 3504 read_unlock(&bond->curr_slave_lock); 3505 read_unlock_bh(&bond->lock); 3506 } 3507 3508 return 0; 3509 case BOND_INFO_QUERY_OLD: 3510 case SIOCBONDINFOQUERY: 3511 u_binfo = (struct ifbond __user *)ifr->ifr_data; 3512 3513 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { 3514 return -EFAULT; 3515 } 3516 3517 res = bond_info_query(bond_dev, &k_binfo); 3518 if (res == 0) { 3519 if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { 3520 return -EFAULT; 3521 } 3522 } 3523 3524 return res; 3525 case BOND_SLAVE_INFO_QUERY_OLD: 3526 case SIOCBONDSLAVEINFOQUERY: 3527 u_sinfo = (struct ifslave __user *)ifr->ifr_data; 3528 3529 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { 3530 return -EFAULT; 3531 } 3532 3533 res = bond_slave_info_query(bond_dev, &k_sinfo); 3534 if (res == 0) { 3535 if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { 3536 return -EFAULT; 3537 } 3538 } 3539 3540 return res; 3541 default: 3542 /* Go on */ 3543 break; 3544 } 3545 3546 if (!capable(CAP_NET_ADMIN)) { 3547 return -EPERM; 3548 } 3549 3550 down_write(&(bonding_rwsem)); 3551 slave_dev = dev_get_by_name(ifr->ifr_slave); 3552 3553 dprintk("slave_dev=%p: \n", slave_dev); 3554 3555 if (!slave_dev) { 3556 res = -ENODEV; 3557 } else { 3558 dprintk("slave_dev->name=%s: \n", slave_dev->name); 3559 switch (cmd) { 3560 case BOND_ENSLAVE_OLD: 3561 case SIOCBONDENSLAVE: 3562 res = bond_enslave(bond_dev, slave_dev); 3563 break; 3564 case BOND_RELEASE_OLD: 3565 case SIOCBONDRELEASE: 3566 res = bond_release(bond_dev, slave_dev); 3567 break; 3568 case BOND_SETHWADDR_OLD: 3569 case SIOCBONDSETHWADDR: 3570 res = bond_sethwaddr(bond_dev, slave_dev); 3571 break; 3572 case BOND_CHANGE_ACTIVE_OLD: 3573 case SIOCBONDCHANGEACTIVE: 3574 res = bond_ioctl_change_active(bond_dev, slave_dev); 3575 break; 3576 default: 3577 res = -EOPNOTSUPP; 3578 } 3579 3580 dev_put(slave_dev); 3581 } 3582 3583 up_write(&(bonding_rwsem)); 3584 return res; 3585 } 3586 3587 static void bond_set_multicast_list(struct net_device *bond_dev) 3588 { 3589 struct bonding *bond = bond_dev->priv; 3590 struct dev_mc_list *dmi; 3591 3592 write_lock_bh(&bond->lock); 3593 3594 /* 3595 * Do promisc before checking multicast_mode 3596 */ 3597 if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) { 3598 bond_set_promiscuity(bond, 1); 3599 } 3600 3601 if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) { 3602 bond_set_promiscuity(bond, -1); 3603 } 3604 3605 /* set allmulti flag to slaves */ 3606 if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) { 3607 bond_set_allmulti(bond, 1); 3608 } 3609 3610 if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) { 3611 bond_set_allmulti(bond, -1); 3612 } 3613 3614 bond->flags = bond_dev->flags; 3615 3616 /* looking for addresses to add to slaves' mc list */ 3617 for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { 3618 if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) { 3619 bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3620 } 3621 } 3622 3623 /* looking for addresses to delete from slaves' list */ 3624 for (dmi = bond->mc_list; dmi; dmi = dmi->next) { 3625 if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) { 3626 bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); 3627 } 3628 } 3629 3630 /* save master's multicast list */ 3631 bond_mc_list_destroy(bond); 3632 bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC); 3633 3634 write_unlock_bh(&bond->lock); 3635 } 3636 3637 /* 3638 * Change the MTU of all of a master's slaves to match the master 3639 */ 3640 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 3641 { 3642 struct bonding *bond = bond_dev->priv; 3643 struct slave *slave, *stop_at; 3644 int res = 0; 3645 int i; 3646 3647 dprintk("bond=%p, name=%s, new_mtu=%d\n", bond, 3648 (bond_dev ? bond_dev->name : "None"), new_mtu); 3649 3650 /* Can't hold bond->lock with bh disabled here since 3651 * some base drivers panic. On the other hand we can't 3652 * hold bond->lock without bh disabled because we'll 3653 * deadlock. The only solution is to rely on the fact 3654 * that we're under rtnl_lock here, and the slaves 3655 * list won't change. This doesn't solve the problem 3656 * of setting the slave's MTU while it is 3657 * transmitting, but the assumption is that the base 3658 * driver can handle that. 3659 * 3660 * TODO: figure out a way to safely iterate the slaves 3661 * list, but without holding a lock around the actual 3662 * call to the base driver. 3663 */ 3664 3665 bond_for_each_slave(bond, slave, i) { 3666 dprintk("s %p s->p %p c_m %p\n", slave, 3667 slave->prev, slave->dev->change_mtu); 3668 3669 res = dev_set_mtu(slave->dev, new_mtu); 3670 3671 if (res) { 3672 /* If we failed to set the slave's mtu to the new value 3673 * we must abort the operation even in ACTIVE_BACKUP 3674 * mode, because if we allow the backup slaves to have 3675 * different mtu values than the active slave we'll 3676 * need to change their mtu when doing a failover. That 3677 * means changing their mtu from timer context, which 3678 * is probably not a good idea. 3679 */ 3680 dprintk("err %d %s\n", res, slave->dev->name); 3681 goto unwind; 3682 } 3683 } 3684 3685 bond_dev->mtu = new_mtu; 3686 3687 return 0; 3688 3689 unwind: 3690 /* unwind from head to the slave that failed */ 3691 stop_at = slave; 3692 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 3693 int tmp_res; 3694 3695 tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); 3696 if (tmp_res) { 3697 dprintk("unwind err %d dev %s\n", tmp_res, 3698 slave->dev->name); 3699 } 3700 } 3701 3702 return res; 3703 } 3704 3705 /* 3706 * Change HW address 3707 * 3708 * Note that many devices must be down to change the HW address, and 3709 * downing the master releases all slaves. We can make bonds full of 3710 * bonding devices to test this, however. 3711 */ 3712 static int bond_set_mac_address(struct net_device *bond_dev, void *addr) 3713 { 3714 struct bonding *bond = bond_dev->priv; 3715 struct sockaddr *sa = addr, tmp_sa; 3716 struct slave *slave, *stop_at; 3717 int res = 0; 3718 int i; 3719 3720 dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); 3721 3722 if (!is_valid_ether_addr(sa->sa_data)) { 3723 return -EADDRNOTAVAIL; 3724 } 3725 3726 /* Can't hold bond->lock with bh disabled here since 3727 * some base drivers panic. On the other hand we can't 3728 * hold bond->lock without bh disabled because we'll 3729 * deadlock. The only solution is to rely on the fact 3730 * that we're under rtnl_lock here, and the slaves 3731 * list won't change. This doesn't solve the problem 3732 * of setting the slave's hw address while it is 3733 * transmitting, but the assumption is that the base 3734 * driver can handle that. 3735 * 3736 * TODO: figure out a way to safely iterate the slaves 3737 * list, but without holding a lock around the actual 3738 * call to the base driver. 3739 */ 3740 3741 bond_for_each_slave(bond, slave, i) { 3742 dprintk("slave %p %s\n", slave, slave->dev->name); 3743 3744 if (slave->dev->set_mac_address == NULL) { 3745 res = -EOPNOTSUPP; 3746 dprintk("EOPNOTSUPP %s\n", slave->dev->name); 3747 goto unwind; 3748 } 3749 3750 res = dev_set_mac_address(slave->dev, addr); 3751 if (res) { 3752 /* TODO: consider downing the slave 3753 * and retry ? 3754 * User should expect communications 3755 * breakage anyway until ARP finish 3756 * updating, so... 3757 */ 3758 dprintk("err %d %s\n", res, slave->dev->name); 3759 goto unwind; 3760 } 3761 } 3762 3763 /* success */ 3764 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 3765 return 0; 3766 3767 unwind: 3768 memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); 3769 tmp_sa.sa_family = bond_dev->type; 3770 3771 /* unwind from head to the slave that failed */ 3772 stop_at = slave; 3773 bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { 3774 int tmp_res; 3775 3776 tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); 3777 if (tmp_res) { 3778 dprintk("unwind err %d dev %s\n", tmp_res, 3779 slave->dev->name); 3780 } 3781 } 3782 3783 return res; 3784 } 3785 3786 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) 3787 { 3788 struct bonding *bond = bond_dev->priv; 3789 struct slave *slave, *start_at; 3790 int i; 3791 int res = 1; 3792 3793 read_lock(&bond->lock); 3794 3795 if (!BOND_IS_OK(bond)) { 3796 goto out; 3797 } 3798 3799 read_lock(&bond->curr_slave_lock); 3800 slave = start_at = bond->curr_active_slave; 3801 read_unlock(&bond->curr_slave_lock); 3802 3803 if (!slave) { 3804 goto out; 3805 } 3806 3807 bond_for_each_slave_from(bond, slave, i, start_at) { 3808 if (IS_UP(slave->dev) && 3809 (slave->link == BOND_LINK_UP) && 3810 (slave->state == BOND_STATE_ACTIVE)) { 3811 res = bond_dev_queue_xmit(bond, skb, slave->dev); 3812 3813 write_lock(&bond->curr_slave_lock); 3814 bond->curr_active_slave = slave->next; 3815 write_unlock(&bond->curr_slave_lock); 3816 3817 break; 3818 } 3819 } 3820 3821 3822 out: 3823 if (res) { 3824 /* no suitable interface, frame not sent */ 3825 dev_kfree_skb(skb); 3826 } 3827 read_unlock(&bond->lock); 3828 return 0; 3829 } 3830 3831 static void bond_activebackup_xmit_copy(struct sk_buff *skb, 3832 struct bonding *bond, 3833 struct slave *slave) 3834 { 3835 struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); 3836 struct ethhdr *eth_data; 3837 u8 *hwaddr; 3838 int res; 3839 3840 if (!skb2) { 3841 printk(KERN_ERR DRV_NAME ": Error: " 3842 "bond_activebackup_xmit_copy(): skb_copy() failed\n"); 3843 return; 3844 } 3845 3846 skb2->mac.raw = (unsigned char *)skb2->data; 3847 eth_data = eth_hdr(skb2); 3848 3849 /* Pick an appropriate source MAC address 3850 * -- use slave's perm MAC addr, unless used by bond 3851 * -- otherwise, borrow active slave's perm MAC addr 3852 * since that will not be used 3853 */ 3854 hwaddr = slave->perm_hwaddr; 3855 if (!memcmp(eth_data->h_source, hwaddr, ETH_ALEN)) 3856 hwaddr = bond->curr_active_slave->perm_hwaddr; 3857 3858 /* Set source MAC address appropriately */ 3859 memcpy(eth_data->h_source, hwaddr, ETH_ALEN); 3860 3861 res = bond_dev_queue_xmit(bond, skb2, slave->dev); 3862 if (res) 3863 dev_kfree_skb(skb2); 3864 3865 return; 3866 } 3867 3868 /* 3869 * in active-backup mode, we know that bond->curr_active_slave is always valid if 3870 * the bond has a usable interface. 3871 */ 3872 static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) 3873 { 3874 struct bonding *bond = bond_dev->priv; 3875 int res = 1; 3876 3877 read_lock(&bond->lock); 3878 read_lock(&bond->curr_slave_lock); 3879 3880 if (!BOND_IS_OK(bond)) { 3881 goto out; 3882 } 3883 3884 if (!bond->curr_active_slave) 3885 goto out; 3886 3887 /* Xmit IGMP frames on all slaves to ensure rapid fail-over 3888 for multicast traffic on snooping switches */ 3889 if (skb->protocol == __constant_htons(ETH_P_IP) && 3890 skb->nh.iph->protocol == IPPROTO_IGMP) { 3891 struct slave *slave, *active_slave; 3892 int i; 3893 3894 active_slave = bond->curr_active_slave; 3895 bond_for_each_slave_from_to(bond, slave, i, active_slave->next, 3896 active_slave->prev) 3897 if (IS_UP(slave->dev) && 3898 (slave->link == BOND_LINK_UP)) 3899 bond_activebackup_xmit_copy(skb, bond, slave); 3900 } 3901 3902 res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); 3903 3904 out: 3905 if (res) { 3906 /* no suitable interface, frame not sent */ 3907 dev_kfree_skb(skb); 3908 } 3909 read_unlock(&bond->curr_slave_lock); 3910 read_unlock(&bond->lock); 3911 return 0; 3912 } 3913 3914 /* 3915 * In bond_xmit_xor() , we determine the output device by using a pre- 3916 * determined xmit_hash_policy(), If the selected device is not enabled, 3917 * find the next active slave. 3918 */ 3919 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) 3920 { 3921 struct bonding *bond = bond_dev->priv; 3922 struct slave *slave, *start_at; 3923 int slave_no; 3924 int i; 3925 int res = 1; 3926 3927 read_lock(&bond->lock); 3928 3929 if (!BOND_IS_OK(bond)) { 3930 goto out; 3931 } 3932 3933 slave_no = bond->xmit_hash_policy(skb, bond_dev, bond->slave_cnt); 3934 3935 bond_for_each_slave(bond, slave, i) { 3936 slave_no--; 3937 if (slave_no < 0) { 3938 break; 3939 } 3940 } 3941 3942 start_at = slave; 3943 3944 bond_for_each_slave_from(bond, slave, i, start_at) { 3945 if (IS_UP(slave->dev) && 3946 (slave->link == BOND_LINK_UP) && 3947 (slave->state == BOND_STATE_ACTIVE)) { 3948 res = bond_dev_queue_xmit(bond, skb, slave->dev); 3949 break; 3950 } 3951 } 3952 3953 out: 3954 if (res) { 3955 /* no suitable interface, frame not sent */ 3956 dev_kfree_skb(skb); 3957 } 3958 read_unlock(&bond->lock); 3959 return 0; 3960 } 3961 3962 /* 3963 * in broadcast mode, we send everything to all usable interfaces. 3964 */ 3965 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) 3966 { 3967 struct bonding *bond = bond_dev->priv; 3968 struct slave *slave, *start_at; 3969 struct net_device *tx_dev = NULL; 3970 int i; 3971 int res = 1; 3972 3973 read_lock(&bond->lock); 3974 3975 if (!BOND_IS_OK(bond)) { 3976 goto out; 3977 } 3978 3979 read_lock(&bond->curr_slave_lock); 3980 start_at = bond->curr_active_slave; 3981 read_unlock(&bond->curr_slave_lock); 3982 3983 if (!start_at) { 3984 goto out; 3985 } 3986 3987 bond_for_each_slave_from(bond, slave, i, start_at) { 3988 if (IS_UP(slave->dev) && 3989 (slave->link == BOND_LINK_UP) && 3990 (slave->state == BOND_STATE_ACTIVE)) { 3991 if (tx_dev) { 3992 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 3993 if (!skb2) { 3994 printk(KERN_ERR DRV_NAME 3995 ": %s: Error: bond_xmit_broadcast(): " 3996 "skb_clone() failed\n", 3997 bond_dev->name); 3998 continue; 3999 } 4000 4001 res = bond_dev_queue_xmit(bond, skb2, tx_dev); 4002 if (res) { 4003 dev_kfree_skb(skb2); 4004 continue; 4005 } 4006 } 4007 tx_dev = slave->dev; 4008 } 4009 } 4010 4011 if (tx_dev) { 4012 res = bond_dev_queue_xmit(bond, skb, tx_dev); 4013 } 4014 4015 out: 4016 if (res) { 4017 /* no suitable interface, frame not sent */ 4018 dev_kfree_skb(skb); 4019 } 4020 /* frame sent to all suitable interfaces */ 4021 read_unlock(&bond->lock); 4022 return 0; 4023 } 4024 4025 /*------------------------- Device initialization ---------------------------*/ 4026 4027 /* 4028 * set bond mode specific net device operations 4029 */ 4030 void bond_set_mode_ops(struct bonding *bond, int mode) 4031 { 4032 struct net_device *bond_dev = bond->dev; 4033 4034 switch (mode) { 4035 case BOND_MODE_ROUNDROBIN: 4036 bond_dev->hard_start_xmit = bond_xmit_roundrobin; 4037 break; 4038 case BOND_MODE_ACTIVEBACKUP: 4039 bond_dev->hard_start_xmit = bond_xmit_activebackup; 4040 break; 4041 case BOND_MODE_XOR: 4042 bond_dev->hard_start_xmit = bond_xmit_xor; 4043 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) 4044 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4045 else 4046 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4047 break; 4048 case BOND_MODE_BROADCAST: 4049 bond_dev->hard_start_xmit = bond_xmit_broadcast; 4050 break; 4051 case BOND_MODE_8023AD: 4052 bond_dev->hard_start_xmit = bond_3ad_xmit_xor; 4053 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) 4054 bond->xmit_hash_policy = bond_xmit_hash_policy_l34; 4055 else 4056 bond->xmit_hash_policy = bond_xmit_hash_policy_l2; 4057 break; 4058 case BOND_MODE_TLB: 4059 case BOND_MODE_ALB: 4060 bond_dev->hard_start_xmit = bond_alb_xmit; 4061 bond_dev->set_mac_address = bond_alb_set_mac_address; 4062 break; 4063 default: 4064 /* Should never happen, mode already checked */ 4065 printk(KERN_ERR DRV_NAME 4066 ": %s: Error: Unknown bonding mode %d\n", 4067 bond_dev->name, 4068 mode); 4069 break; 4070 } 4071 } 4072 4073 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, 4074 struct ethtool_drvinfo *drvinfo) 4075 { 4076 strncpy(drvinfo->driver, DRV_NAME, 32); 4077 strncpy(drvinfo->version, DRV_VERSION, 32); 4078 snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION); 4079 } 4080 4081 static struct ethtool_ops bond_ethtool_ops = { 4082 .get_tx_csum = ethtool_op_get_tx_csum, 4083 .get_sg = ethtool_op_get_sg, 4084 .get_drvinfo = bond_ethtool_get_drvinfo, 4085 }; 4086 4087 /* 4088 * Does not allocate but creates a /proc entry. 4089 * Allowed to fail. 4090 */ 4091 static int bond_init(struct net_device *bond_dev, struct bond_params *params) 4092 { 4093 struct bonding *bond = bond_dev->priv; 4094 4095 dprintk("Begin bond_init for %s\n", bond_dev->name); 4096 4097 /* initialize rwlocks */ 4098 rwlock_init(&bond->lock); 4099 rwlock_init(&bond->curr_slave_lock); 4100 4101 bond->params = *params; /* copy params struct */ 4102 4103 /* Initialize pointers */ 4104 bond->first_slave = NULL; 4105 bond->curr_active_slave = NULL; 4106 bond->current_arp_slave = NULL; 4107 bond->primary_slave = NULL; 4108 bond->dev = bond_dev; 4109 INIT_LIST_HEAD(&bond->vlan_list); 4110 4111 /* Initialize the device entry points */ 4112 bond_dev->open = bond_open; 4113 bond_dev->stop = bond_close; 4114 bond_dev->get_stats = bond_get_stats; 4115 bond_dev->do_ioctl = bond_do_ioctl; 4116 bond_dev->ethtool_ops = &bond_ethtool_ops; 4117 bond_dev->set_multicast_list = bond_set_multicast_list; 4118 bond_dev->change_mtu = bond_change_mtu; 4119 bond_dev->set_mac_address = bond_set_mac_address; 4120 4121 bond_set_mode_ops(bond, bond->params.mode); 4122 4123 bond_dev->destructor = free_netdev; 4124 4125 /* Initialize the device options */ 4126 bond_dev->tx_queue_len = 0; 4127 bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; 4128 4129 /* At first, we block adding VLANs. That's the only way to 4130 * prevent problems that occur when adding VLANs over an 4131 * empty bond. The block will be removed once non-challenged 4132 * slaves are enslaved. 4133 */ 4134 bond_dev->features |= NETIF_F_VLAN_CHALLENGED; 4135 4136 /* don't acquire bond device's xmit_lock when 4137 * transmitting */ 4138 bond_dev->features |= NETIF_F_LLTX; 4139 4140 /* By default, we declare the bond to be fully 4141 * VLAN hardware accelerated capable. Special 4142 * care is taken in the various xmit functions 4143 * when there are slaves that are not hw accel 4144 * capable 4145 */ 4146 bond_dev->vlan_rx_register = bond_vlan_rx_register; 4147 bond_dev->vlan_rx_add_vid = bond_vlan_rx_add_vid; 4148 bond_dev->vlan_rx_kill_vid = bond_vlan_rx_kill_vid; 4149 bond_dev->features |= (NETIF_F_HW_VLAN_TX | 4150 NETIF_F_HW_VLAN_RX | 4151 NETIF_F_HW_VLAN_FILTER); 4152 4153 #ifdef CONFIG_PROC_FS 4154 bond_create_proc_entry(bond); 4155 #endif 4156 4157 list_add_tail(&bond->bond_list, &bond_dev_list); 4158 4159 return 0; 4160 } 4161 4162 /* De-initialize device specific data. 4163 * Caller must hold rtnl_lock. 4164 */ 4165 void bond_deinit(struct net_device *bond_dev) 4166 { 4167 struct bonding *bond = bond_dev->priv; 4168 4169 list_del(&bond->bond_list); 4170 4171 #ifdef CONFIG_PROC_FS 4172 bond_remove_proc_entry(bond); 4173 #endif 4174 } 4175 4176 /* Unregister and free all bond devices. 4177 * Caller must hold rtnl_lock. 4178 */ 4179 static void bond_free_all(void) 4180 { 4181 struct bonding *bond, *nxt; 4182 4183 list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { 4184 struct net_device *bond_dev = bond->dev; 4185 4186 unregister_netdevice(bond_dev); 4187 bond_deinit(bond_dev); 4188 } 4189 4190 #ifdef CONFIG_PROC_FS 4191 bond_destroy_proc_dir(); 4192 #endif 4193 } 4194 4195 /*------------------------- Module initialization ---------------------------*/ 4196 4197 /* 4198 * Convert string input module parms. Accept either the 4199 * number of the mode or its string name. 4200 */ 4201 int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl) 4202 { 4203 int i; 4204 4205 for (i = 0; tbl[i].modename; i++) { 4206 if ((isdigit(*mode_arg) && 4207 tbl[i].mode == simple_strtol(mode_arg, NULL, 0)) || 4208 (strncmp(mode_arg, tbl[i].modename, 4209 strlen(tbl[i].modename)) == 0)) { 4210 return tbl[i].mode; 4211 } 4212 } 4213 4214 return -1; 4215 } 4216 4217 static int bond_check_params(struct bond_params *params) 4218 { 4219 /* 4220 * Convert string parameters. 4221 */ 4222 if (mode) { 4223 bond_mode = bond_parse_parm(mode, bond_mode_tbl); 4224 if (bond_mode == -1) { 4225 printk(KERN_ERR DRV_NAME 4226 ": Error: Invalid bonding mode \"%s\"\n", 4227 mode == NULL ? "NULL" : mode); 4228 return -EINVAL; 4229 } 4230 } 4231 4232 if (xmit_hash_policy) { 4233 if ((bond_mode != BOND_MODE_XOR) && 4234 (bond_mode != BOND_MODE_8023AD)) { 4235 printk(KERN_INFO DRV_NAME 4236 ": xor_mode param is irrelevant in mode %s\n", 4237 bond_mode_name(bond_mode)); 4238 } else { 4239 xmit_hashtype = bond_parse_parm(xmit_hash_policy, 4240 xmit_hashtype_tbl); 4241 if (xmit_hashtype == -1) { 4242 printk(KERN_ERR DRV_NAME 4243 ": Error: Invalid xmit_hash_policy \"%s\"\n", 4244 xmit_hash_policy == NULL ? "NULL" : 4245 xmit_hash_policy); 4246 return -EINVAL; 4247 } 4248 } 4249 } 4250 4251 if (lacp_rate) { 4252 if (bond_mode != BOND_MODE_8023AD) { 4253 printk(KERN_INFO DRV_NAME 4254 ": lacp_rate param is irrelevant in mode %s\n", 4255 bond_mode_name(bond_mode)); 4256 } else { 4257 lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); 4258 if (lacp_fast == -1) { 4259 printk(KERN_ERR DRV_NAME 4260 ": Error: Invalid lacp rate \"%s\"\n", 4261 lacp_rate == NULL ? "NULL" : lacp_rate); 4262 return -EINVAL; 4263 } 4264 } 4265 } 4266 4267 if (max_bonds < 1 || max_bonds > INT_MAX) { 4268 printk(KERN_WARNING DRV_NAME 4269 ": Warning: max_bonds (%d) not in range %d-%d, so it " 4270 "was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", 4271 max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS); 4272 max_bonds = BOND_DEFAULT_MAX_BONDS; 4273 } 4274 4275 if (miimon < 0) { 4276 printk(KERN_WARNING DRV_NAME 4277 ": Warning: miimon module parameter (%d), " 4278 "not in range 0-%d, so it was reset to %d\n", 4279 miimon, INT_MAX, BOND_LINK_MON_INTERV); 4280 miimon = BOND_LINK_MON_INTERV; 4281 } 4282 4283 if (updelay < 0) { 4284 printk(KERN_WARNING DRV_NAME 4285 ": Warning: updelay module parameter (%d), " 4286 "not in range 0-%d, so it was reset to 0\n", 4287 updelay, INT_MAX); 4288 updelay = 0; 4289 } 4290 4291 if (downdelay < 0) { 4292 printk(KERN_WARNING DRV_NAME 4293 ": Warning: downdelay module parameter (%d), " 4294 "not in range 0-%d, so it was reset to 0\n", 4295 downdelay, INT_MAX); 4296 downdelay = 0; 4297 } 4298 4299 if ((use_carrier != 0) && (use_carrier != 1)) { 4300 printk(KERN_WARNING DRV_NAME 4301 ": Warning: use_carrier module parameter (%d), " 4302 "not of valid value (0/1), so it was set to 1\n", 4303 use_carrier); 4304 use_carrier = 1; 4305 } 4306 4307 /* reset values for 802.3ad */ 4308 if (bond_mode == BOND_MODE_8023AD) { 4309 if (!miimon) { 4310 printk(KERN_WARNING DRV_NAME 4311 ": Warning: miimon must be specified, " 4312 "otherwise bonding will not detect link " 4313 "failure, speed and duplex which are " 4314 "essential for 802.3ad operation\n"); 4315 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4316 miimon = 100; 4317 } 4318 } 4319 4320 /* reset values for TLB/ALB */ 4321 if ((bond_mode == BOND_MODE_TLB) || 4322 (bond_mode == BOND_MODE_ALB)) { 4323 if (!miimon) { 4324 printk(KERN_WARNING DRV_NAME 4325 ": Warning: miimon must be specified, " 4326 "otherwise bonding will not detect link " 4327 "failure and link speed which are essential " 4328 "for TLB/ALB load balancing\n"); 4329 printk(KERN_WARNING "Forcing miimon to 100msec\n"); 4330 miimon = 100; 4331 } 4332 } 4333 4334 if (bond_mode == BOND_MODE_ALB) { 4335 printk(KERN_NOTICE DRV_NAME 4336 ": In ALB mode you might experience client " 4337 "disconnections upon reconnection of a link if the " 4338 "bonding module updelay parameter (%d msec) is " 4339 "incompatible with the forwarding delay time of the " 4340 "switch\n", 4341 updelay); 4342 } 4343 4344 if (!miimon) { 4345 if (updelay || downdelay) { 4346 /* just warn the user the up/down delay will have 4347 * no effect since miimon is zero... 4348 */ 4349 printk(KERN_WARNING DRV_NAME 4350 ": Warning: miimon module parameter not set " 4351 "and updelay (%d) or downdelay (%d) module " 4352 "parameter is set; updelay and downdelay have " 4353 "no effect unless miimon is set\n", 4354 updelay, downdelay); 4355 } 4356 } else { 4357 /* don't allow arp monitoring */ 4358 if (arp_interval) { 4359 printk(KERN_WARNING DRV_NAME 4360 ": Warning: miimon (%d) and arp_interval (%d) " 4361 "can't be used simultaneously, disabling ARP " 4362 "monitoring\n", 4363 miimon, arp_interval); 4364 arp_interval = 0; 4365 } 4366 4367 if ((updelay % miimon) != 0) { 4368 printk(KERN_WARNING DRV_NAME 4369 ": Warning: updelay (%d) is not a multiple " 4370 "of miimon (%d), updelay rounded to %d ms\n", 4371 updelay, miimon, (updelay / miimon) * miimon); 4372 } 4373 4374 updelay /= miimon; 4375 4376 if ((downdelay % miimon) != 0) { 4377 printk(KERN_WARNING DRV_NAME 4378 ": Warning: downdelay (%d) is not a multiple " 4379 "of miimon (%d), downdelay rounded to %d ms\n", 4380 downdelay, miimon, 4381 (downdelay / miimon) * miimon); 4382 } 4383 4384 downdelay /= miimon; 4385 } 4386 4387 if (arp_interval < 0) { 4388 printk(KERN_WARNING DRV_NAME 4389 ": Warning: arp_interval module parameter (%d) " 4390 ", not in range 0-%d, so it was reset to %d\n", 4391 arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); 4392 arp_interval = BOND_LINK_ARP_INTERV; 4393 } 4394 4395 for (arp_ip_count = 0; 4396 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; 4397 arp_ip_count++) { 4398 /* not complete check, but should be good enough to 4399 catch mistakes */ 4400 if (!isdigit(arp_ip_target[arp_ip_count][0])) { 4401 printk(KERN_WARNING DRV_NAME 4402 ": Warning: bad arp_ip_target module parameter " 4403 "(%s), ARP monitoring will not be performed\n", 4404 arp_ip_target[arp_ip_count]); 4405 arp_interval = 0; 4406 } else { 4407 u32 ip = in_aton(arp_ip_target[arp_ip_count]); 4408 arp_target[arp_ip_count] = ip; 4409 } 4410 } 4411 4412 if (arp_interval && !arp_ip_count) { 4413 /* don't allow arping if no arp_ip_target given... */ 4414 printk(KERN_WARNING DRV_NAME 4415 ": Warning: arp_interval module parameter (%d) " 4416 "specified without providing an arp_ip_target " 4417 "parameter, arp_interval was reset to 0\n", 4418 arp_interval); 4419 arp_interval = 0; 4420 } 4421 4422 if (miimon) { 4423 printk(KERN_INFO DRV_NAME 4424 ": MII link monitoring set to %d ms\n", 4425 miimon); 4426 } else if (arp_interval) { 4427 int i; 4428 4429 printk(KERN_INFO DRV_NAME 4430 ": ARP monitoring set to %d ms with %d target(s):", 4431 arp_interval, arp_ip_count); 4432 4433 for (i = 0; i < arp_ip_count; i++) 4434 printk (" %s", arp_ip_target[i]); 4435 4436 printk("\n"); 4437 4438 } else { 4439 /* miimon and arp_interval not set, we need one so things 4440 * work as expected, see bonding.txt for details 4441 */ 4442 printk(KERN_WARNING DRV_NAME 4443 ": Warning: either miimon or arp_interval and " 4444 "arp_ip_target module parameters must be specified, " 4445 "otherwise bonding will not detect link failures! see " 4446 "bonding.txt for details.\n"); 4447 } 4448 4449 if (primary && !USES_PRIMARY(bond_mode)) { 4450 /* currently, using a primary only makes sense 4451 * in active backup, TLB or ALB modes 4452 */ 4453 printk(KERN_WARNING DRV_NAME 4454 ": Warning: %s primary device specified but has no " 4455 "effect in %s mode\n", 4456 primary, bond_mode_name(bond_mode)); 4457 primary = NULL; 4458 } 4459 4460 /* fill params struct with the proper values */ 4461 params->mode = bond_mode; 4462 params->xmit_policy = xmit_hashtype; 4463 params->miimon = miimon; 4464 params->arp_interval = arp_interval; 4465 params->updelay = updelay; 4466 params->downdelay = downdelay; 4467 params->use_carrier = use_carrier; 4468 params->lacp_fast = lacp_fast; 4469 params->primary[0] = 0; 4470 4471 if (primary) { 4472 strncpy(params->primary, primary, IFNAMSIZ); 4473 params->primary[IFNAMSIZ - 1] = 0; 4474 } 4475 4476 memcpy(params->arp_targets, arp_target, sizeof(arp_target)); 4477 4478 return 0; 4479 } 4480 4481 /* Create a new bond based on the specified name and bonding parameters. 4482 * Caller must NOT hold rtnl_lock; we need to release it here before we 4483 * set up our sysfs entries. 4484 */ 4485 int bond_create(char *name, struct bond_params *params, struct bonding **newbond) 4486 { 4487 struct net_device *bond_dev; 4488 int res; 4489 4490 rtnl_lock(); 4491 bond_dev = alloc_netdev(sizeof(struct bonding), name, ether_setup); 4492 if (!bond_dev) { 4493 printk(KERN_ERR DRV_NAME 4494 ": %s: eek! can't alloc netdev!\n", 4495 name); 4496 res = -ENOMEM; 4497 goto out_rtnl; 4498 } 4499 4500 /* bond_init() must be called after dev_alloc_name() (for the 4501 * /proc files), but before register_netdevice(), because we 4502 * need to set function pointers. 4503 */ 4504 4505 res = bond_init(bond_dev, params); 4506 if (res < 0) { 4507 goto out_netdev; 4508 } 4509 4510 SET_MODULE_OWNER(bond_dev); 4511 4512 res = register_netdevice(bond_dev); 4513 if (res < 0) { 4514 goto out_bond; 4515 } 4516 if (newbond) 4517 *newbond = bond_dev->priv; 4518 4519 rtnl_unlock(); /* allows sysfs registration of net device */ 4520 res = bond_create_sysfs_entry(bond_dev->priv); 4521 goto done; 4522 out_bond: 4523 bond_deinit(bond_dev); 4524 out_netdev: 4525 free_netdev(bond_dev); 4526 out_rtnl: 4527 rtnl_unlock(); 4528 done: 4529 return res; 4530 } 4531 4532 static int __init bonding_init(void) 4533 { 4534 int i; 4535 int res; 4536 char new_bond_name[8]; /* Enough room for 999 bonds at init. */ 4537 4538 printk(KERN_INFO "%s", version); 4539 4540 res = bond_check_params(&bonding_defaults); 4541 if (res) { 4542 goto out; 4543 } 4544 4545 #ifdef CONFIG_PROC_FS 4546 bond_create_proc_dir(); 4547 #endif 4548 for (i = 0; i < max_bonds; i++) { 4549 sprintf(new_bond_name, "bond%d",i); 4550 res = bond_create(new_bond_name,&bonding_defaults, NULL); 4551 if (res) 4552 goto err; 4553 } 4554 4555 res = bond_create_sysfs(); 4556 if (res) 4557 goto err; 4558 4559 register_netdevice_notifier(&bond_netdev_notifier); 4560 register_inetaddr_notifier(&bond_inetaddr_notifier); 4561 4562 goto out; 4563 err: 4564 rtnl_lock(); 4565 bond_free_all(); 4566 bond_destroy_sysfs(); 4567 rtnl_unlock(); 4568 out: 4569 return res; 4570 4571 } 4572 4573 static void __exit bonding_exit(void) 4574 { 4575 unregister_netdevice_notifier(&bond_netdev_notifier); 4576 unregister_inetaddr_notifier(&bond_inetaddr_notifier); 4577 4578 rtnl_lock(); 4579 bond_free_all(); 4580 bond_destroy_sysfs(); 4581 rtnl_unlock(); 4582 } 4583 4584 module_init(bonding_init); 4585 module_exit(bonding_exit); 4586 MODULE_LICENSE("GPL"); 4587 MODULE_VERSION(DRV_VERSION); 4588 MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); 4589 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); 4590 MODULE_SUPPORTED_DEVICE("most ethernet devices"); 4591 4592 /* 4593 * Local variables: 4594 * c-indent-level: 8 4595 * c-basic-offset: 8 4596 * tab-width: 8 4597 * End: 4598 */ 4599 4600