1 /* 2 * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License as published by the 6 * Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * for more details. 13 * 14 * You should have received a copy of the GNU General Public License along 15 * with this program; if not, see <http://www.gnu.org/licenses/>. 16 * 17 * The full GNU General Public License is included in this distribution in the 18 * file called LICENSE. 19 * 20 */ 21 22 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 23 24 #include <linux/skbuff.h> 25 #include <linux/netdevice.h> 26 #include <linux/etherdevice.h> 27 #include <linux/pkt_sched.h> 28 #include <linux/spinlock.h> 29 #include <linux/slab.h> 30 #include <linux/timer.h> 31 #include <linux/ip.h> 32 #include <linux/ipv6.h> 33 #include <linux/if_arp.h> 34 #include <linux/if_ether.h> 35 #include <linux/if_bonding.h> 36 #include <linux/if_vlan.h> 37 #include <linux/in.h> 38 #include <net/ipx.h> 39 #include <net/arp.h> 40 #include <net/ipv6.h> 41 #include <asm/byteorder.h> 42 #include "bonding.h" 43 #include "bond_alb.h" 44 45 46 47 #ifndef __long_aligned 48 #define __long_aligned __attribute__((aligned((sizeof(long))))) 49 #endif 50 static const u8 mac_bcast[ETH_ALEN] __long_aligned = { 51 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 52 }; 53 static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = { 54 0x33, 0x33, 0x00, 0x00, 0x00, 0x01 55 }; 56 static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; 57 58 #pragma pack(1) 59 struct learning_pkt { 60 u8 mac_dst[ETH_ALEN]; 61 u8 mac_src[ETH_ALEN]; 62 __be16 type; 63 u8 padding[ETH_ZLEN - ETH_HLEN]; 64 }; 65 66 struct arp_pkt { 67 __be16 hw_addr_space; 68 __be16 prot_addr_space; 69 u8 hw_addr_len; 70 u8 prot_addr_len; 71 __be16 op_code; 72 u8 mac_src[ETH_ALEN]; /* sender hardware address */ 73 __be32 ip_src; /* sender IP address */ 74 u8 mac_dst[ETH_ALEN]; /* target hardware address */ 75 __be32 ip_dst; /* target IP address */ 76 }; 77 #pragma pack() 78 79 static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) 80 { 81 return (struct arp_pkt *)skb_network_header(skb); 82 } 83 84 /* Forward declaration */ 85 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], 86 bool strict_match); 87 static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp); 88 static void rlb_src_unlink(struct bonding *bond, u32 index); 89 static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, 90 u32 ip_dst_hash); 91 92 static inline u8 _simple_hash(const u8 *hash_start, int hash_size) 93 { 94 int i; 95 u8 hash = 0; 96 97 for (i = 0; i < hash_size; i++) 98 hash ^= hash_start[i]; 99 100 return hash; 101 } 102 103 /*********************** tlb specific functions ***************************/ 104 105 static inline void _lock_tx_hashtbl_bh(struct bonding *bond) 106 { 107 spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); 108 } 109 110 static inline void _unlock_tx_hashtbl_bh(struct bonding *bond) 111 { 112 spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); 113 } 114 115 static inline void _lock_tx_hashtbl(struct bonding *bond) 116 { 117 spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); 118 } 119 120 static inline void _unlock_tx_hashtbl(struct bonding *bond) 121 { 122 spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); 123 } 124 125 /* Caller must hold tx_hashtbl lock */ 126 static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) 127 { 128 if (save_load) { 129 entry->load_history = 1 + entry->tx_bytes / 130 BOND_TLB_REBALANCE_INTERVAL; 131 entry->tx_bytes = 0; 132 } 133 134 entry->tx_slave = NULL; 135 entry->next = TLB_NULL_INDEX; 136 entry->prev = TLB_NULL_INDEX; 137 } 138 139 static inline void tlb_init_slave(struct slave *slave) 140 { 141 SLAVE_TLB_INFO(slave).load = 0; 142 SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; 143 } 144 145 /* Caller must hold bond lock for read, BH disabled */ 146 static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, 147 int save_load) 148 { 149 struct tlb_client_info *tx_hash_table; 150 u32 index; 151 152 /* clear slave from tx_hashtbl */ 153 tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; 154 155 /* skip this if we've already freed the tx hash table */ 156 if (tx_hash_table) { 157 index = SLAVE_TLB_INFO(slave).head; 158 while (index != TLB_NULL_INDEX) { 159 u32 next_index = tx_hash_table[index].next; 160 tlb_init_table_entry(&tx_hash_table[index], save_load); 161 index = next_index; 162 } 163 } 164 165 tlb_init_slave(slave); 166 } 167 168 /* Caller must hold bond lock for read */ 169 static void tlb_clear_slave(struct bonding *bond, struct slave *slave, 170 int save_load) 171 { 172 _lock_tx_hashtbl_bh(bond); 173 __tlb_clear_slave(bond, slave, save_load); 174 _unlock_tx_hashtbl_bh(bond); 175 } 176 177 /* Must be called before starting the monitor timer */ 178 static int tlb_initialize(struct bonding *bond) 179 { 180 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 181 int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); 182 struct tlb_client_info *new_hashtbl; 183 int i; 184 185 new_hashtbl = kzalloc(size, GFP_KERNEL); 186 if (!new_hashtbl) 187 return -1; 188 189 _lock_tx_hashtbl_bh(bond); 190 191 bond_info->tx_hashtbl = new_hashtbl; 192 193 for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) 194 tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); 195 196 _unlock_tx_hashtbl_bh(bond); 197 198 return 0; 199 } 200 201 /* Must be called only after all slaves have been released */ 202 static void tlb_deinitialize(struct bonding *bond) 203 { 204 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 205 206 _lock_tx_hashtbl_bh(bond); 207 208 kfree(bond_info->tx_hashtbl); 209 bond_info->tx_hashtbl = NULL; 210 211 _unlock_tx_hashtbl_bh(bond); 212 } 213 214 static long long compute_gap(struct slave *slave) 215 { 216 return (s64) (slave->speed << 20) - /* Convert to Megabit per sec */ 217 (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ 218 } 219 220 /* Caller must hold bond lock for read */ 221 static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) 222 { 223 struct slave *slave, *least_loaded; 224 struct list_head *iter; 225 long long max_gap; 226 227 least_loaded = NULL; 228 max_gap = LLONG_MIN; 229 230 /* Find the slave with the largest gap */ 231 bond_for_each_slave_rcu(bond, slave, iter) { 232 if (bond_slave_can_tx(slave)) { 233 long long gap = compute_gap(slave); 234 235 if (max_gap < gap) { 236 least_loaded = slave; 237 max_gap = gap; 238 } 239 } 240 } 241 242 return least_loaded; 243 } 244 245 static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index, 246 u32 skb_len) 247 { 248 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 249 struct tlb_client_info *hash_table; 250 struct slave *assigned_slave; 251 252 hash_table = bond_info->tx_hashtbl; 253 assigned_slave = hash_table[hash_index].tx_slave; 254 if (!assigned_slave) { 255 assigned_slave = tlb_get_least_loaded_slave(bond); 256 257 if (assigned_slave) { 258 struct tlb_slave_info *slave_info = 259 &(SLAVE_TLB_INFO(assigned_slave)); 260 u32 next_index = slave_info->head; 261 262 hash_table[hash_index].tx_slave = assigned_slave; 263 hash_table[hash_index].next = next_index; 264 hash_table[hash_index].prev = TLB_NULL_INDEX; 265 266 if (next_index != TLB_NULL_INDEX) 267 hash_table[next_index].prev = hash_index; 268 269 slave_info->head = hash_index; 270 slave_info->load += 271 hash_table[hash_index].load_history; 272 } 273 } 274 275 if (assigned_slave) 276 hash_table[hash_index].tx_bytes += skb_len; 277 278 return assigned_slave; 279 } 280 281 /* Caller must hold bond lock for read */ 282 static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, 283 u32 skb_len) 284 { 285 struct slave *tx_slave; 286 /* 287 * We don't need to disable softirq here, becase 288 * tlb_choose_channel() is only called by bond_alb_xmit() 289 * which already has softirq disabled. 290 */ 291 _lock_tx_hashtbl(bond); 292 tx_slave = __tlb_choose_channel(bond, hash_index, skb_len); 293 _unlock_tx_hashtbl(bond); 294 return tx_slave; 295 } 296 297 /*********************** rlb specific functions ***************************/ 298 static inline void _lock_rx_hashtbl_bh(struct bonding *bond) 299 { 300 spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 301 } 302 303 static inline void _unlock_rx_hashtbl_bh(struct bonding *bond) 304 { 305 spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 306 } 307 308 static inline void _lock_rx_hashtbl(struct bonding *bond) 309 { 310 spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 311 } 312 313 static inline void _unlock_rx_hashtbl(struct bonding *bond) 314 { 315 spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 316 } 317 318 /* when an ARP REPLY is received from a client update its info 319 * in the rx_hashtbl 320 */ 321 static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) 322 { 323 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 324 struct rlb_client_info *client_info; 325 u32 hash_index; 326 327 _lock_rx_hashtbl_bh(bond); 328 329 hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); 330 client_info = &(bond_info->rx_hashtbl[hash_index]); 331 332 if ((client_info->assigned) && 333 (client_info->ip_src == arp->ip_dst) && 334 (client_info->ip_dst == arp->ip_src) && 335 (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) { 336 /* update the clients MAC address */ 337 ether_addr_copy(client_info->mac_dst, arp->mac_src); 338 client_info->ntt = 1; 339 bond_info->rx_ntt = 1; 340 } 341 342 _unlock_rx_hashtbl_bh(bond); 343 } 344 345 static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, 346 struct slave *slave) 347 { 348 struct arp_pkt *arp, _arp; 349 350 if (skb->protocol != cpu_to_be16(ETH_P_ARP)) 351 goto out; 352 353 arp = skb_header_pointer(skb, 0, sizeof(_arp), &_arp); 354 if (!arp) 355 goto out; 356 357 /* We received an ARP from arp->ip_src. 358 * We might have used this IP address previously (on the bonding host 359 * itself or on a system that is bridged together with the bond). 360 * However, if arp->mac_src is different than what is stored in 361 * rx_hashtbl, some other host is now using the IP and we must prevent 362 * sending out client updates with this IP address and the old MAC 363 * address. 364 * Clean up all hash table entries that have this address as ip_src but 365 * have a different mac_src. 366 */ 367 rlb_purge_src_ip(bond, arp); 368 369 if (arp->op_code == htons(ARPOP_REPLY)) { 370 /* update rx hash table for this ARP */ 371 rlb_update_entry_from_arp(bond, arp); 372 pr_debug("Server received an ARP Reply from client\n"); 373 } 374 out: 375 return RX_HANDLER_ANOTHER; 376 } 377 378 /* Caller must hold bond lock for read */ 379 static struct slave *rlb_next_rx_slave(struct bonding *bond) 380 { 381 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 382 struct slave *before = NULL, *rx_slave = NULL, *slave; 383 struct list_head *iter; 384 bool found = false; 385 386 bond_for_each_slave(bond, slave, iter) { 387 if (!bond_slave_can_tx(slave)) 388 continue; 389 if (!found) { 390 if (!before || before->speed < slave->speed) 391 before = slave; 392 } else { 393 if (!rx_slave || rx_slave->speed < slave->speed) 394 rx_slave = slave; 395 } 396 if (slave == bond_info->rx_slave) 397 found = true; 398 } 399 /* we didn't find anything after the current or we have something 400 * better before and up to the current slave 401 */ 402 if (!rx_slave || (before && rx_slave->speed < before->speed)) 403 rx_slave = before; 404 405 if (rx_slave) 406 bond_info->rx_slave = rx_slave; 407 408 return rx_slave; 409 } 410 411 /* Caller must hold rcu_read_lock() for read */ 412 static struct slave *__rlb_next_rx_slave(struct bonding *bond) 413 { 414 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 415 struct slave *before = NULL, *rx_slave = NULL, *slave; 416 struct list_head *iter; 417 bool found = false; 418 419 bond_for_each_slave_rcu(bond, slave, iter) { 420 if (!bond_slave_can_tx(slave)) 421 continue; 422 if (!found) { 423 if (!before || before->speed < slave->speed) 424 before = slave; 425 } else { 426 if (!rx_slave || rx_slave->speed < slave->speed) 427 rx_slave = slave; 428 } 429 if (slave == bond_info->rx_slave) 430 found = true; 431 } 432 /* we didn't find anything after the current or we have something 433 * better before and up to the current slave 434 */ 435 if (!rx_slave || (before && rx_slave->speed < before->speed)) 436 rx_slave = before; 437 438 if (rx_slave) 439 bond_info->rx_slave = rx_slave; 440 441 return rx_slave; 442 } 443 444 /* teach the switch the mac of a disabled slave 445 * on the primary for fault tolerance 446 * 447 * Caller must hold bond->curr_slave_lock for write or bond lock for write 448 */ 449 static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) 450 { 451 if (!bond->curr_active_slave) 452 return; 453 454 if (!bond->alb_info.primary_is_promisc) { 455 if (!dev_set_promiscuity(bond->curr_active_slave->dev, 1)) 456 bond->alb_info.primary_is_promisc = 1; 457 else 458 bond->alb_info.primary_is_promisc = 0; 459 } 460 461 bond->alb_info.rlb_promisc_timeout_counter = 0; 462 463 alb_send_learning_packets(bond->curr_active_slave, addr, true); 464 } 465 466 /* slave being removed should not be active at this point 467 * 468 * Caller must hold rtnl. 469 */ 470 static void rlb_clear_slave(struct bonding *bond, struct slave *slave) 471 { 472 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 473 struct rlb_client_info *rx_hash_table; 474 u32 index, next_index; 475 476 /* clear slave from rx_hashtbl */ 477 _lock_rx_hashtbl_bh(bond); 478 479 rx_hash_table = bond_info->rx_hashtbl; 480 index = bond_info->rx_hashtbl_used_head; 481 for (; index != RLB_NULL_INDEX; index = next_index) { 482 next_index = rx_hash_table[index].used_next; 483 if (rx_hash_table[index].slave == slave) { 484 struct slave *assigned_slave = rlb_next_rx_slave(bond); 485 486 if (assigned_slave) { 487 rx_hash_table[index].slave = assigned_slave; 488 if (!ether_addr_equal_64bits(rx_hash_table[index].mac_dst, 489 mac_bcast)) { 490 bond_info->rx_hashtbl[index].ntt = 1; 491 bond_info->rx_ntt = 1; 492 /* A slave has been removed from the 493 * table because it is either disabled 494 * or being released. We must retry the 495 * update to avoid clients from not 496 * being updated & disconnecting when 497 * there is stress 498 */ 499 bond_info->rlb_update_retry_counter = 500 RLB_UPDATE_RETRY; 501 } 502 } else { /* there is no active slave */ 503 rx_hash_table[index].slave = NULL; 504 } 505 } 506 } 507 508 _unlock_rx_hashtbl_bh(bond); 509 510 write_lock_bh(&bond->curr_slave_lock); 511 512 if (slave != bond->curr_active_slave) 513 rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); 514 515 write_unlock_bh(&bond->curr_slave_lock); 516 } 517 518 static void rlb_update_client(struct rlb_client_info *client_info) 519 { 520 int i; 521 522 if (!client_info->slave) 523 return; 524 525 for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { 526 struct sk_buff *skb; 527 528 skb = arp_create(ARPOP_REPLY, ETH_P_ARP, 529 client_info->ip_dst, 530 client_info->slave->dev, 531 client_info->ip_src, 532 client_info->mac_dst, 533 client_info->slave->dev->dev_addr, 534 client_info->mac_dst); 535 if (!skb) { 536 pr_err("%s: Error: failed to create an ARP packet\n", 537 client_info->slave->bond->dev->name); 538 continue; 539 } 540 541 skb->dev = client_info->slave->dev; 542 543 if (client_info->vlan_id) { 544 skb = vlan_put_tag(skb, htons(ETH_P_8021Q), client_info->vlan_id); 545 if (!skb) { 546 pr_err("%s: Error: failed to insert VLAN tag\n", 547 client_info->slave->bond->dev->name); 548 continue; 549 } 550 } 551 552 arp_xmit(skb); 553 } 554 } 555 556 /* sends ARP REPLIES that update the clients that need updating */ 557 static void rlb_update_rx_clients(struct bonding *bond) 558 { 559 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 560 struct rlb_client_info *client_info; 561 u32 hash_index; 562 563 _lock_rx_hashtbl_bh(bond); 564 565 hash_index = bond_info->rx_hashtbl_used_head; 566 for (; hash_index != RLB_NULL_INDEX; 567 hash_index = client_info->used_next) { 568 client_info = &(bond_info->rx_hashtbl[hash_index]); 569 if (client_info->ntt) { 570 rlb_update_client(client_info); 571 if (bond_info->rlb_update_retry_counter == 0) 572 client_info->ntt = 0; 573 } 574 } 575 576 /* do not update the entries again until this counter is zero so that 577 * not to confuse the clients. 578 */ 579 bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; 580 581 _unlock_rx_hashtbl_bh(bond); 582 } 583 584 /* The slave was assigned a new mac address - update the clients */ 585 static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave) 586 { 587 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 588 struct rlb_client_info *client_info; 589 int ntt = 0; 590 u32 hash_index; 591 592 _lock_rx_hashtbl_bh(bond); 593 594 hash_index = bond_info->rx_hashtbl_used_head; 595 for (; hash_index != RLB_NULL_INDEX; 596 hash_index = client_info->used_next) { 597 client_info = &(bond_info->rx_hashtbl[hash_index]); 598 599 if ((client_info->slave == slave) && 600 !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { 601 client_info->ntt = 1; 602 ntt = 1; 603 } 604 } 605 606 /* update the team's flag only after the whole iteration */ 607 if (ntt) { 608 bond_info->rx_ntt = 1; 609 /* fasten the change */ 610 bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; 611 } 612 613 _unlock_rx_hashtbl_bh(bond); 614 } 615 616 /* mark all clients using src_ip to be updated */ 617 static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) 618 { 619 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 620 struct rlb_client_info *client_info; 621 u32 hash_index; 622 623 _lock_rx_hashtbl(bond); 624 625 hash_index = bond_info->rx_hashtbl_used_head; 626 for (; hash_index != RLB_NULL_INDEX; 627 hash_index = client_info->used_next) { 628 client_info = &(bond_info->rx_hashtbl[hash_index]); 629 630 if (!client_info->slave) { 631 pr_err("%s: Error: found a client with no channel in the client's hash table\n", 632 bond->dev->name); 633 continue; 634 } 635 /*update all clients using this src_ip, that are not assigned 636 * to the team's address (curr_active_slave) and have a known 637 * unicast mac address. 638 */ 639 if ((client_info->ip_src == src_ip) && 640 !ether_addr_equal_64bits(client_info->slave->dev->dev_addr, 641 bond->dev->dev_addr) && 642 !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { 643 client_info->ntt = 1; 644 bond_info->rx_ntt = 1; 645 } 646 } 647 648 _unlock_rx_hashtbl(bond); 649 } 650 651 /* Caller must hold both bond and ptr locks for read */ 652 static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) 653 { 654 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 655 struct arp_pkt *arp = arp_pkt(skb); 656 struct slave *assigned_slave, *curr_active_slave; 657 struct rlb_client_info *client_info; 658 u32 hash_index = 0; 659 660 _lock_rx_hashtbl(bond); 661 662 curr_active_slave = rcu_dereference(bond->curr_active_slave); 663 664 hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst)); 665 client_info = &(bond_info->rx_hashtbl[hash_index]); 666 667 if (client_info->assigned) { 668 if ((client_info->ip_src == arp->ip_src) && 669 (client_info->ip_dst == arp->ip_dst)) { 670 /* the entry is already assigned to this client */ 671 if (!ether_addr_equal_64bits(arp->mac_dst, mac_bcast)) { 672 /* update mac address from arp */ 673 ether_addr_copy(client_info->mac_dst, arp->mac_dst); 674 } 675 ether_addr_copy(client_info->mac_src, arp->mac_src); 676 677 assigned_slave = client_info->slave; 678 if (assigned_slave) { 679 _unlock_rx_hashtbl(bond); 680 return assigned_slave; 681 } 682 } else { 683 /* the entry is already assigned to some other client, 684 * move the old client to primary (curr_active_slave) so 685 * that the new client can be assigned to this entry. 686 */ 687 if (bond->curr_active_slave && 688 client_info->slave != curr_active_slave) { 689 client_info->slave = curr_active_slave; 690 rlb_update_client(client_info); 691 } 692 } 693 } 694 /* assign a new slave */ 695 assigned_slave = __rlb_next_rx_slave(bond); 696 697 if (assigned_slave) { 698 if (!(client_info->assigned && 699 client_info->ip_src == arp->ip_src)) { 700 /* ip_src is going to be updated, 701 * fix the src hash list 702 */ 703 u32 hash_src = _simple_hash((u8 *)&arp->ip_src, 704 sizeof(arp->ip_src)); 705 rlb_src_unlink(bond, hash_index); 706 rlb_src_link(bond, hash_src, hash_index); 707 } 708 709 client_info->ip_src = arp->ip_src; 710 client_info->ip_dst = arp->ip_dst; 711 /* arp->mac_dst is broadcast for arp reqeusts. 712 * will be updated with clients actual unicast mac address 713 * upon receiving an arp reply. 714 */ 715 ether_addr_copy(client_info->mac_dst, arp->mac_dst); 716 ether_addr_copy(client_info->mac_src, arp->mac_src); 717 client_info->slave = assigned_slave; 718 719 if (!ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { 720 client_info->ntt = 1; 721 bond->alb_info.rx_ntt = 1; 722 } else { 723 client_info->ntt = 0; 724 } 725 726 if (vlan_get_tag(skb, &client_info->vlan_id)) 727 client_info->vlan_id = 0; 728 729 if (!client_info->assigned) { 730 u32 prev_tbl_head = bond_info->rx_hashtbl_used_head; 731 bond_info->rx_hashtbl_used_head = hash_index; 732 client_info->used_next = prev_tbl_head; 733 if (prev_tbl_head != RLB_NULL_INDEX) { 734 bond_info->rx_hashtbl[prev_tbl_head].used_prev = 735 hash_index; 736 } 737 client_info->assigned = 1; 738 } 739 } 740 741 _unlock_rx_hashtbl(bond); 742 743 return assigned_slave; 744 } 745 746 /* chooses (and returns) transmit channel for arp reply 747 * does not choose channel for other arp types since they are 748 * sent on the curr_active_slave 749 */ 750 static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) 751 { 752 struct arp_pkt *arp = arp_pkt(skb); 753 struct slave *tx_slave = NULL; 754 755 /* Don't modify or load balance ARPs that do not originate locally 756 * (e.g.,arrive via a bridge). 757 */ 758 if (!bond_slave_has_mac_rx(bond, arp->mac_src)) 759 return NULL; 760 761 if (arp->op_code == htons(ARPOP_REPLY)) { 762 /* the arp must be sent on the selected 763 * rx channel 764 */ 765 tx_slave = rlb_choose_channel(skb, bond); 766 if (tx_slave) 767 ether_addr_copy(arp->mac_src, tx_slave->dev->dev_addr); 768 pr_debug("Server sent ARP Reply packet\n"); 769 } else if (arp->op_code == htons(ARPOP_REQUEST)) { 770 /* Create an entry in the rx_hashtbl for this client as a 771 * place holder. 772 * When the arp reply is received the entry will be updated 773 * with the correct unicast address of the client. 774 */ 775 rlb_choose_channel(skb, bond); 776 777 /* The ARP reply packets must be delayed so that 778 * they can cancel out the influence of the ARP request. 779 */ 780 bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; 781 782 /* arp requests are broadcast and are sent on the primary 783 * the arp request will collapse all clients on the subnet to 784 * the primary slave. We must register these clients to be 785 * updated with their assigned mac. 786 */ 787 rlb_req_update_subnet_clients(bond, arp->ip_src); 788 pr_debug("Server sent ARP Request packet\n"); 789 } 790 791 return tx_slave; 792 } 793 794 /* Caller must hold bond lock for read */ 795 static void rlb_rebalance(struct bonding *bond) 796 { 797 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 798 struct slave *assigned_slave; 799 struct rlb_client_info *client_info; 800 int ntt; 801 u32 hash_index; 802 803 _lock_rx_hashtbl_bh(bond); 804 805 ntt = 0; 806 hash_index = bond_info->rx_hashtbl_used_head; 807 for (; hash_index != RLB_NULL_INDEX; 808 hash_index = client_info->used_next) { 809 client_info = &(bond_info->rx_hashtbl[hash_index]); 810 assigned_slave = __rlb_next_rx_slave(bond); 811 if (assigned_slave && (client_info->slave != assigned_slave)) { 812 client_info->slave = assigned_slave; 813 client_info->ntt = 1; 814 ntt = 1; 815 } 816 } 817 818 /* update the team's flag only after the whole iteration */ 819 if (ntt) 820 bond_info->rx_ntt = 1; 821 _unlock_rx_hashtbl_bh(bond); 822 } 823 824 /* Caller must hold rx_hashtbl lock */ 825 static void rlb_init_table_entry_dst(struct rlb_client_info *entry) 826 { 827 entry->used_next = RLB_NULL_INDEX; 828 entry->used_prev = RLB_NULL_INDEX; 829 entry->assigned = 0; 830 entry->slave = NULL; 831 entry->vlan_id = 0; 832 } 833 static void rlb_init_table_entry_src(struct rlb_client_info *entry) 834 { 835 entry->src_first = RLB_NULL_INDEX; 836 entry->src_prev = RLB_NULL_INDEX; 837 entry->src_next = RLB_NULL_INDEX; 838 } 839 840 static void rlb_init_table_entry(struct rlb_client_info *entry) 841 { 842 memset(entry, 0, sizeof(struct rlb_client_info)); 843 rlb_init_table_entry_dst(entry); 844 rlb_init_table_entry_src(entry); 845 } 846 847 static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index) 848 { 849 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 850 u32 next_index = bond_info->rx_hashtbl[index].used_next; 851 u32 prev_index = bond_info->rx_hashtbl[index].used_prev; 852 853 if (index == bond_info->rx_hashtbl_used_head) 854 bond_info->rx_hashtbl_used_head = next_index; 855 if (prev_index != RLB_NULL_INDEX) 856 bond_info->rx_hashtbl[prev_index].used_next = next_index; 857 if (next_index != RLB_NULL_INDEX) 858 bond_info->rx_hashtbl[next_index].used_prev = prev_index; 859 } 860 861 /* unlink a rlb hash table entry from the src list */ 862 static void rlb_src_unlink(struct bonding *bond, u32 index) 863 { 864 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 865 u32 next_index = bond_info->rx_hashtbl[index].src_next; 866 u32 prev_index = bond_info->rx_hashtbl[index].src_prev; 867 868 bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX; 869 bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX; 870 871 if (next_index != RLB_NULL_INDEX) 872 bond_info->rx_hashtbl[next_index].src_prev = prev_index; 873 874 if (prev_index == RLB_NULL_INDEX) 875 return; 876 877 /* is prev_index pointing to the head of this list? */ 878 if (bond_info->rx_hashtbl[prev_index].src_first == index) 879 bond_info->rx_hashtbl[prev_index].src_first = next_index; 880 else 881 bond_info->rx_hashtbl[prev_index].src_next = next_index; 882 883 } 884 885 static void rlb_delete_table_entry(struct bonding *bond, u32 index) 886 { 887 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 888 struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); 889 890 rlb_delete_table_entry_dst(bond, index); 891 rlb_init_table_entry_dst(entry); 892 893 rlb_src_unlink(bond, index); 894 } 895 896 /* add the rx_hashtbl[ip_dst_hash] entry to the list 897 * of entries with identical ip_src_hash 898 */ 899 static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash) 900 { 901 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 902 u32 next; 903 904 bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash; 905 next = bond_info->rx_hashtbl[ip_src_hash].src_first; 906 bond_info->rx_hashtbl[ip_dst_hash].src_next = next; 907 if (next != RLB_NULL_INDEX) 908 bond_info->rx_hashtbl[next].src_prev = ip_dst_hash; 909 bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash; 910 } 911 912 /* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does 913 * not match arp->mac_src */ 914 static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) 915 { 916 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 917 u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); 918 u32 index; 919 920 _lock_rx_hashtbl_bh(bond); 921 922 index = bond_info->rx_hashtbl[ip_src_hash].src_first; 923 while (index != RLB_NULL_INDEX) { 924 struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); 925 u32 next_index = entry->src_next; 926 if (entry->ip_src == arp->ip_src && 927 !ether_addr_equal_64bits(arp->mac_src, entry->mac_src)) 928 rlb_delete_table_entry(bond, index); 929 index = next_index; 930 } 931 _unlock_rx_hashtbl_bh(bond); 932 } 933 934 static int rlb_initialize(struct bonding *bond) 935 { 936 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 937 struct rlb_client_info *new_hashtbl; 938 int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); 939 int i; 940 941 new_hashtbl = kmalloc(size, GFP_KERNEL); 942 if (!new_hashtbl) 943 return -1; 944 945 _lock_rx_hashtbl_bh(bond); 946 947 bond_info->rx_hashtbl = new_hashtbl; 948 949 bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; 950 951 for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) 952 rlb_init_table_entry(bond_info->rx_hashtbl + i); 953 954 _unlock_rx_hashtbl_bh(bond); 955 956 /* register to receive ARPs */ 957 bond->recv_probe = rlb_arp_recv; 958 959 return 0; 960 } 961 962 static void rlb_deinitialize(struct bonding *bond) 963 { 964 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 965 966 _lock_rx_hashtbl_bh(bond); 967 968 kfree(bond_info->rx_hashtbl); 969 bond_info->rx_hashtbl = NULL; 970 bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; 971 972 _unlock_rx_hashtbl_bh(bond); 973 } 974 975 static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) 976 { 977 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 978 u32 curr_index; 979 980 _lock_rx_hashtbl_bh(bond); 981 982 curr_index = bond_info->rx_hashtbl_used_head; 983 while (curr_index != RLB_NULL_INDEX) { 984 struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); 985 u32 next_index = bond_info->rx_hashtbl[curr_index].used_next; 986 987 if (curr->vlan_id == vlan_id) 988 rlb_delete_table_entry(bond, curr_index); 989 990 curr_index = next_index; 991 } 992 993 _unlock_rx_hashtbl_bh(bond); 994 } 995 996 /*********************** tlb/rlb shared functions *********************/ 997 998 static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], 999 __be16 vlan_proto, u16 vid) 1000 { 1001 struct learning_pkt pkt; 1002 struct sk_buff *skb; 1003 int size = sizeof(struct learning_pkt); 1004 char *data; 1005 1006 memset(&pkt, 0, size); 1007 ether_addr_copy(pkt.mac_dst, mac_addr); 1008 ether_addr_copy(pkt.mac_src, mac_addr); 1009 pkt.type = cpu_to_be16(ETH_P_LOOPBACK); 1010 1011 skb = dev_alloc_skb(size); 1012 if (!skb) 1013 return; 1014 1015 data = skb_put(skb, size); 1016 memcpy(data, &pkt, size); 1017 1018 skb_reset_mac_header(skb); 1019 skb->network_header = skb->mac_header + ETH_HLEN; 1020 skb->protocol = pkt.type; 1021 skb->priority = TC_PRIO_CONTROL; 1022 skb->dev = slave->dev; 1023 1024 if (vid) { 1025 skb = vlan_put_tag(skb, vlan_proto, vid); 1026 if (!skb) { 1027 pr_err("%s: Error: failed to insert VLAN tag\n", 1028 slave->bond->dev->name); 1029 return; 1030 } 1031 } 1032 1033 dev_queue_xmit(skb); 1034 } 1035 1036 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], 1037 bool strict_match) 1038 { 1039 struct bonding *bond = bond_get_bond_by_slave(slave); 1040 struct net_device *upper; 1041 struct list_head *iter; 1042 struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP]; 1043 1044 /* send untagged */ 1045 alb_send_lp_vid(slave, mac_addr, 0, 0); 1046 1047 /* loop through all devices and see if we need to send a packet 1048 * for that device. 1049 */ 1050 rcu_read_lock(); 1051 netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { 1052 if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { 1053 if (strict_match && 1054 ether_addr_equal_64bits(mac_addr, 1055 upper->dev_addr)) { 1056 alb_send_lp_vid(slave, mac_addr, 1057 vlan_dev_vlan_proto(upper), 1058 vlan_dev_vlan_id(upper)); 1059 } else if (!strict_match) { 1060 alb_send_lp_vid(slave, upper->dev_addr, 1061 vlan_dev_vlan_proto(upper), 1062 vlan_dev_vlan_id(upper)); 1063 } 1064 } 1065 1066 /* If this is a macvlan device, then only send updates 1067 * when strict_match is turned off. 1068 */ 1069 if (netif_is_macvlan(upper) && !strict_match) { 1070 memset(tags, 0, sizeof(tags)); 1071 bond_verify_device_path(bond->dev, upper, tags); 1072 alb_send_lp_vid(slave, upper->dev_addr, 1073 tags[0].vlan_proto, tags[0].vlan_id); 1074 } 1075 } 1076 rcu_read_unlock(); 1077 } 1078 1079 static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[]) 1080 { 1081 struct net_device *dev = slave->dev; 1082 struct sockaddr s_addr; 1083 1084 if (BOND_MODE(slave->bond) == BOND_MODE_TLB) { 1085 memcpy(dev->dev_addr, addr, dev->addr_len); 1086 return 0; 1087 } 1088 1089 /* for rlb each slave must have a unique hw mac addresses so that */ 1090 /* each slave will receive packets destined to a different mac */ 1091 memcpy(s_addr.sa_data, addr, dev->addr_len); 1092 s_addr.sa_family = dev->type; 1093 if (dev_set_mac_address(dev, &s_addr)) { 1094 pr_err("%s: Error: dev_set_mac_address of dev %s failed!\n" 1095 "ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n", 1096 slave->bond->dev->name, dev->name); 1097 return -EOPNOTSUPP; 1098 } 1099 return 0; 1100 } 1101 1102 /* 1103 * Swap MAC addresses between two slaves. 1104 * 1105 * Called with RTNL held, and no other locks. 1106 * 1107 */ 1108 1109 static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2) 1110 { 1111 u8 tmp_mac_addr[ETH_ALEN]; 1112 1113 ether_addr_copy(tmp_mac_addr, slave1->dev->dev_addr); 1114 alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr); 1115 alb_set_slave_mac_addr(slave2, tmp_mac_addr); 1116 1117 } 1118 1119 /* 1120 * Send learning packets after MAC address swap. 1121 * 1122 * Called with RTNL and no other locks 1123 */ 1124 static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, 1125 struct slave *slave2) 1126 { 1127 int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2)); 1128 struct slave *disabled_slave = NULL; 1129 1130 ASSERT_RTNL(); 1131 1132 /* fasten the change in the switch */ 1133 if (bond_slave_can_tx(slave1)) { 1134 alb_send_learning_packets(slave1, slave1->dev->dev_addr, false); 1135 if (bond->alb_info.rlb_enabled) { 1136 /* inform the clients that the mac address 1137 * has changed 1138 */ 1139 rlb_req_update_slave_clients(bond, slave1); 1140 } 1141 } else { 1142 disabled_slave = slave1; 1143 } 1144 1145 if (bond_slave_can_tx(slave2)) { 1146 alb_send_learning_packets(slave2, slave2->dev->dev_addr, false); 1147 if (bond->alb_info.rlb_enabled) { 1148 /* inform the clients that the mac address 1149 * has changed 1150 */ 1151 rlb_req_update_slave_clients(bond, slave2); 1152 } 1153 } else { 1154 disabled_slave = slave2; 1155 } 1156 1157 if (bond->alb_info.rlb_enabled && slaves_state_differ) { 1158 /* A disabled slave was assigned an active mac addr */ 1159 rlb_teach_disabled_mac_on_primary(bond, 1160 disabled_slave->dev->dev_addr); 1161 } 1162 } 1163 1164 /** 1165 * alb_change_hw_addr_on_detach 1166 * @bond: bonding we're working on 1167 * @slave: the slave that was just detached 1168 * 1169 * We assume that @slave was already detached from the slave list. 1170 * 1171 * If @slave's permanent hw address is different both from its current 1172 * address and from @bond's address, then somewhere in the bond there's 1173 * a slave that has @slave's permanet address as its current address. 1174 * We'll make sure that that slave no longer uses @slave's permanent address. 1175 * 1176 * Caller must hold RTNL and no other locks 1177 */ 1178 static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) 1179 { 1180 int perm_curr_diff; 1181 int perm_bond_diff; 1182 struct slave *found_slave; 1183 1184 perm_curr_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, 1185 slave->dev->dev_addr); 1186 perm_bond_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, 1187 bond->dev->dev_addr); 1188 1189 if (perm_curr_diff && perm_bond_diff) { 1190 found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr); 1191 1192 if (found_slave) { 1193 /* locking: needs RTNL and nothing else */ 1194 alb_swap_mac_addr(slave, found_slave); 1195 alb_fasten_mac_swap(bond, slave, found_slave); 1196 } 1197 } 1198 } 1199 1200 /** 1201 * alb_handle_addr_collision_on_attach 1202 * @bond: bonding we're working on 1203 * @slave: the slave that was just attached 1204 * 1205 * checks uniqueness of slave's mac address and handles the case the 1206 * new slave uses the bonds mac address. 1207 * 1208 * If the permanent hw address of @slave is @bond's hw address, we need to 1209 * find a different hw address to give @slave, that isn't in use by any other 1210 * slave in the bond. This address must be, of course, one of the permanent 1211 * addresses of the other slaves. 1212 * 1213 * We go over the slave list, and for each slave there we compare its 1214 * permanent hw address with the current address of all the other slaves. 1215 * If no match was found, then we've found a slave with a permanent address 1216 * that isn't used by any other slave in the bond, so we can assign it to 1217 * @slave. 1218 * 1219 * assumption: this function is called before @slave is attached to the 1220 * bond slave list. 1221 */ 1222 static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) 1223 { 1224 struct slave *has_bond_addr = bond->curr_active_slave; 1225 struct slave *tmp_slave1, *free_mac_slave = NULL; 1226 struct list_head *iter; 1227 1228 if (!bond_has_slaves(bond)) { 1229 /* this is the first slave */ 1230 return 0; 1231 } 1232 1233 /* if slave's mac address differs from bond's mac address 1234 * check uniqueness of slave's mac address against the other 1235 * slaves in the bond. 1236 */ 1237 if (!ether_addr_equal_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) { 1238 if (!bond_slave_has_mac(bond, slave->dev->dev_addr)) 1239 return 0; 1240 1241 /* Try setting slave mac to bond address and fall-through 1242 to code handling that situation below... */ 1243 alb_set_slave_mac_addr(slave, bond->dev->dev_addr); 1244 } 1245 1246 /* The slave's address is equal to the address of the bond. 1247 * Search for a spare address in the bond for this slave. 1248 */ 1249 bond_for_each_slave(bond, tmp_slave1, iter) { 1250 if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) { 1251 /* no slave has tmp_slave1's perm addr 1252 * as its curr addr 1253 */ 1254 free_mac_slave = tmp_slave1; 1255 break; 1256 } 1257 1258 if (!has_bond_addr) { 1259 if (ether_addr_equal_64bits(tmp_slave1->dev->dev_addr, 1260 bond->dev->dev_addr)) { 1261 1262 has_bond_addr = tmp_slave1; 1263 } 1264 } 1265 } 1266 1267 if (free_mac_slave) { 1268 alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr); 1269 1270 pr_warn("%s: Warning: the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", 1271 bond->dev->name, slave->dev->name, 1272 free_mac_slave->dev->name); 1273 1274 } else if (has_bond_addr) { 1275 pr_err("%s: Error: the hw address of slave %s is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n", 1276 bond->dev->name, slave->dev->name); 1277 return -EFAULT; 1278 } 1279 1280 return 0; 1281 } 1282 1283 /** 1284 * alb_set_mac_address 1285 * @bond: 1286 * @addr: 1287 * 1288 * In TLB mode all slaves are configured to the bond's hw address, but set 1289 * their dev_addr field to different addresses (based on their permanent hw 1290 * addresses). 1291 * 1292 * For each slave, this function sets the interface to the new address and then 1293 * changes its dev_addr field to its previous value. 1294 * 1295 * Unwinding assumes bond's mac address has not yet changed. 1296 */ 1297 static int alb_set_mac_address(struct bonding *bond, void *addr) 1298 { 1299 struct slave *slave, *rollback_slave; 1300 struct list_head *iter; 1301 struct sockaddr sa; 1302 char tmp_addr[ETH_ALEN]; 1303 int res; 1304 1305 if (bond->alb_info.rlb_enabled) 1306 return 0; 1307 1308 bond_for_each_slave(bond, slave, iter) { 1309 /* save net_device's current hw address */ 1310 ether_addr_copy(tmp_addr, slave->dev->dev_addr); 1311 1312 res = dev_set_mac_address(slave->dev, addr); 1313 1314 /* restore net_device's hw address */ 1315 ether_addr_copy(slave->dev->dev_addr, tmp_addr); 1316 1317 if (res) 1318 goto unwind; 1319 } 1320 1321 return 0; 1322 1323 unwind: 1324 memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); 1325 sa.sa_family = bond->dev->type; 1326 1327 /* unwind from head to the slave that failed */ 1328 bond_for_each_slave(bond, rollback_slave, iter) { 1329 if (rollback_slave == slave) 1330 break; 1331 ether_addr_copy(tmp_addr, rollback_slave->dev->dev_addr); 1332 dev_set_mac_address(rollback_slave->dev, &sa); 1333 ether_addr_copy(rollback_slave->dev->dev_addr, tmp_addr); 1334 } 1335 1336 return res; 1337 } 1338 1339 /************************ exported alb funcions ************************/ 1340 1341 int bond_alb_initialize(struct bonding *bond, int rlb_enabled) 1342 { 1343 int res; 1344 1345 res = tlb_initialize(bond); 1346 if (res) 1347 return res; 1348 1349 if (rlb_enabled) { 1350 bond->alb_info.rlb_enabled = 1; 1351 /* initialize rlb */ 1352 res = rlb_initialize(bond); 1353 if (res) { 1354 tlb_deinitialize(bond); 1355 return res; 1356 } 1357 } else { 1358 bond->alb_info.rlb_enabled = 0; 1359 } 1360 1361 return 0; 1362 } 1363 1364 void bond_alb_deinitialize(struct bonding *bond) 1365 { 1366 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1367 1368 tlb_deinitialize(bond); 1369 1370 if (bond_info->rlb_enabled) 1371 rlb_deinitialize(bond); 1372 } 1373 1374 static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, 1375 struct slave *tx_slave) 1376 { 1377 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1378 struct ethhdr *eth_data = eth_hdr(skb); 1379 1380 if (!tx_slave) { 1381 /* unbalanced or unassigned, send through primary */ 1382 tx_slave = rcu_dereference(bond->curr_active_slave); 1383 if (bond->params.tlb_dynamic_lb) 1384 bond_info->unbalanced_load += skb->len; 1385 } 1386 1387 if (tx_slave && bond_slave_can_tx(tx_slave)) { 1388 if (tx_slave != rcu_dereference(bond->curr_active_slave)) { 1389 ether_addr_copy(eth_data->h_source, 1390 tx_slave->dev->dev_addr); 1391 } 1392 1393 bond_dev_queue_xmit(bond, skb, tx_slave->dev); 1394 goto out; 1395 } 1396 1397 if (tx_slave && bond->params.tlb_dynamic_lb) { 1398 _lock_tx_hashtbl(bond); 1399 __tlb_clear_slave(bond, tx_slave, 0); 1400 _unlock_tx_hashtbl(bond); 1401 } 1402 1403 /* no suitable interface, frame not sent */ 1404 dev_kfree_skb_any(skb); 1405 out: 1406 return NETDEV_TX_OK; 1407 } 1408 1409 int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) 1410 { 1411 struct bonding *bond = netdev_priv(bond_dev); 1412 struct ethhdr *eth_data; 1413 struct slave *tx_slave = NULL; 1414 u32 hash_index; 1415 1416 skb_reset_mac_header(skb); 1417 eth_data = eth_hdr(skb); 1418 1419 /* Do not TX balance any multicast or broadcast */ 1420 if (!is_multicast_ether_addr(eth_data->h_dest)) { 1421 switch (skb->protocol) { 1422 case htons(ETH_P_IP): 1423 case htons(ETH_P_IPX): 1424 /* In case of IPX, it will falback to L2 hash */ 1425 case htons(ETH_P_IPV6): 1426 hash_index = bond_xmit_hash(bond, skb); 1427 if (bond->params.tlb_dynamic_lb) { 1428 tx_slave = tlb_choose_channel(bond, 1429 hash_index & 0xFF, 1430 skb->len); 1431 } else { 1432 struct list_head *iter; 1433 int idx = hash_index % bond->slave_cnt; 1434 1435 bond_for_each_slave_rcu(bond, tx_slave, iter) 1436 if (--idx < 0) 1437 break; 1438 } 1439 break; 1440 } 1441 } 1442 return bond_do_alb_xmit(skb, bond, tx_slave); 1443 } 1444 1445 int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) 1446 { 1447 struct bonding *bond = netdev_priv(bond_dev); 1448 struct ethhdr *eth_data; 1449 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1450 struct slave *tx_slave = NULL; 1451 static const __be32 ip_bcast = htonl(0xffffffff); 1452 int hash_size = 0; 1453 bool do_tx_balance = true; 1454 u32 hash_index = 0; 1455 const u8 *hash_start = NULL; 1456 struct ipv6hdr *ip6hdr; 1457 1458 skb_reset_mac_header(skb); 1459 eth_data = eth_hdr(skb); 1460 1461 switch (ntohs(skb->protocol)) { 1462 case ETH_P_IP: { 1463 const struct iphdr *iph = ip_hdr(skb); 1464 1465 if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) || 1466 (iph->daddr == ip_bcast) || 1467 (iph->protocol == IPPROTO_IGMP)) { 1468 do_tx_balance = false; 1469 break; 1470 } 1471 hash_start = (char *)&(iph->daddr); 1472 hash_size = sizeof(iph->daddr); 1473 } 1474 break; 1475 case ETH_P_IPV6: 1476 /* IPv6 doesn't really use broadcast mac address, but leave 1477 * that here just in case. 1478 */ 1479 if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast)) { 1480 do_tx_balance = false; 1481 break; 1482 } 1483 1484 /* IPv6 uses all-nodes multicast as an equivalent to 1485 * broadcasts in IPv4. 1486 */ 1487 if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) { 1488 do_tx_balance = false; 1489 break; 1490 } 1491 1492 /* Additianally, DAD probes should not be tx-balanced as that 1493 * will lead to false positives for duplicate addresses and 1494 * prevent address configuration from working. 1495 */ 1496 ip6hdr = ipv6_hdr(skb); 1497 if (ipv6_addr_any(&ip6hdr->saddr)) { 1498 do_tx_balance = false; 1499 break; 1500 } 1501 1502 hash_start = (char *)&(ipv6_hdr(skb)->daddr); 1503 hash_size = sizeof(ipv6_hdr(skb)->daddr); 1504 break; 1505 case ETH_P_IPX: 1506 if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { 1507 /* something is wrong with this packet */ 1508 do_tx_balance = false; 1509 break; 1510 } 1511 1512 if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { 1513 /* The only protocol worth balancing in 1514 * this family since it has an "ARP" like 1515 * mechanism 1516 */ 1517 do_tx_balance = false; 1518 break; 1519 } 1520 1521 hash_start = (char *)eth_data->h_dest; 1522 hash_size = ETH_ALEN; 1523 break; 1524 case ETH_P_ARP: 1525 do_tx_balance = false; 1526 if (bond_info->rlb_enabled) 1527 tx_slave = rlb_arp_xmit(skb, bond); 1528 break; 1529 default: 1530 do_tx_balance = false; 1531 break; 1532 } 1533 1534 if (do_tx_balance) { 1535 hash_index = _simple_hash(hash_start, hash_size); 1536 tx_slave = tlb_choose_channel(bond, hash_index, skb->len); 1537 } 1538 1539 return bond_do_alb_xmit(skb, bond, tx_slave); 1540 } 1541 1542 void bond_alb_monitor(struct work_struct *work) 1543 { 1544 struct bonding *bond = container_of(work, struct bonding, 1545 alb_work.work); 1546 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1547 struct list_head *iter; 1548 struct slave *slave; 1549 1550 if (!bond_has_slaves(bond)) { 1551 bond_info->tx_rebalance_counter = 0; 1552 bond_info->lp_counter = 0; 1553 goto re_arm; 1554 } 1555 1556 rcu_read_lock(); 1557 1558 bond_info->tx_rebalance_counter++; 1559 bond_info->lp_counter++; 1560 1561 /* send learning packets */ 1562 if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { 1563 bool strict_match; 1564 1565 /* change of curr_active_slave involves swapping of mac addresses. 1566 * in order to avoid this swapping from happening while 1567 * sending the learning packets, the curr_slave_lock must be held for 1568 * read. 1569 */ 1570 read_lock(&bond->curr_slave_lock); 1571 1572 bond_for_each_slave_rcu(bond, slave, iter) { 1573 /* If updating current_active, use all currently 1574 * user mac addreses (!strict_match). Otherwise, only 1575 * use mac of the slave device. 1576 * In RLB mode, we always use strict matches. 1577 */ 1578 strict_match = (slave != bond->curr_active_slave || 1579 bond_info->rlb_enabled); 1580 alb_send_learning_packets(slave, slave->dev->dev_addr, 1581 strict_match); 1582 } 1583 1584 read_unlock(&bond->curr_slave_lock); 1585 1586 bond_info->lp_counter = 0; 1587 } 1588 1589 /* rebalance tx traffic */ 1590 if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { 1591 1592 read_lock(&bond->curr_slave_lock); 1593 1594 bond_for_each_slave_rcu(bond, slave, iter) { 1595 tlb_clear_slave(bond, slave, 1); 1596 if (slave == bond->curr_active_slave) { 1597 SLAVE_TLB_INFO(slave).load = 1598 bond_info->unbalanced_load / 1599 BOND_TLB_REBALANCE_INTERVAL; 1600 bond_info->unbalanced_load = 0; 1601 } 1602 } 1603 1604 read_unlock(&bond->curr_slave_lock); 1605 1606 bond_info->tx_rebalance_counter = 0; 1607 } 1608 1609 /* handle rlb stuff */ 1610 if (bond_info->rlb_enabled) { 1611 if (bond_info->primary_is_promisc && 1612 (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { 1613 1614 /* 1615 * dev_set_promiscuity requires rtnl and 1616 * nothing else. Avoid race with bond_close. 1617 */ 1618 rcu_read_unlock(); 1619 if (!rtnl_trylock()) 1620 goto re_arm; 1621 1622 bond_info->rlb_promisc_timeout_counter = 0; 1623 1624 /* If the primary was set to promiscuous mode 1625 * because a slave was disabled then 1626 * it can now leave promiscuous mode. 1627 */ 1628 dev_set_promiscuity(bond->curr_active_slave->dev, -1); 1629 bond_info->primary_is_promisc = 0; 1630 1631 rtnl_unlock(); 1632 rcu_read_lock(); 1633 } 1634 1635 if (bond_info->rlb_rebalance) { 1636 bond_info->rlb_rebalance = 0; 1637 rlb_rebalance(bond); 1638 } 1639 1640 /* check if clients need updating */ 1641 if (bond_info->rx_ntt) { 1642 if (bond_info->rlb_update_delay_counter) { 1643 --bond_info->rlb_update_delay_counter; 1644 } else { 1645 rlb_update_rx_clients(bond); 1646 if (bond_info->rlb_update_retry_counter) 1647 --bond_info->rlb_update_retry_counter; 1648 else 1649 bond_info->rx_ntt = 0; 1650 } 1651 } 1652 } 1653 rcu_read_unlock(); 1654 re_arm: 1655 queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); 1656 } 1657 1658 /* assumption: called before the slave is attached to the bond 1659 * and not locked by the bond lock 1660 */ 1661 int bond_alb_init_slave(struct bonding *bond, struct slave *slave) 1662 { 1663 int res; 1664 1665 res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr); 1666 if (res) 1667 return res; 1668 1669 res = alb_handle_addr_collision_on_attach(bond, slave); 1670 if (res) 1671 return res; 1672 1673 tlb_init_slave(slave); 1674 1675 /* order a rebalance ASAP */ 1676 bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; 1677 1678 if (bond->alb_info.rlb_enabled) 1679 bond->alb_info.rlb_rebalance = 1; 1680 1681 return 0; 1682 } 1683 1684 /* 1685 * Remove slave from tlb and rlb hash tables, and fix up MAC addresses 1686 * if necessary. 1687 * 1688 * Caller must hold RTNL and no other locks 1689 */ 1690 void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) 1691 { 1692 if (bond_has_slaves(bond)) 1693 alb_change_hw_addr_on_detach(bond, slave); 1694 1695 tlb_clear_slave(bond, slave, 0); 1696 1697 if (bond->alb_info.rlb_enabled) { 1698 bond->alb_info.rx_slave = NULL; 1699 rlb_clear_slave(bond, slave); 1700 } 1701 } 1702 1703 /* Caller must hold bond lock for read */ 1704 void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) 1705 { 1706 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1707 1708 if (link == BOND_LINK_DOWN) { 1709 tlb_clear_slave(bond, slave, 0); 1710 if (bond->alb_info.rlb_enabled) 1711 rlb_clear_slave(bond, slave); 1712 } else if (link == BOND_LINK_UP) { 1713 /* order a rebalance ASAP */ 1714 bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; 1715 if (bond->alb_info.rlb_enabled) { 1716 bond->alb_info.rlb_rebalance = 1; 1717 /* If the updelay module parameter is smaller than the 1718 * forwarding delay of the switch the rebalance will 1719 * not work because the rebalance arp replies will 1720 * not be forwarded to the clients.. 1721 */ 1722 } 1723 } 1724 } 1725 1726 /** 1727 * bond_alb_handle_active_change - assign new curr_active_slave 1728 * @bond: our bonding struct 1729 * @new_slave: new slave to assign 1730 * 1731 * Set the bond->curr_active_slave to @new_slave and handle 1732 * mac address swapping and promiscuity changes as needed. 1733 * 1734 * If new_slave is NULL, caller must hold curr_slave_lock or 1735 * bond->lock for write. 1736 * 1737 * If new_slave is not NULL, caller must hold RTNL, curr_slave_lock 1738 * for write. Processing here may sleep, so no other locks may be held. 1739 */ 1740 void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) 1741 __releases(&bond->curr_slave_lock) 1742 __acquires(&bond->curr_slave_lock) 1743 { 1744 struct slave *swap_slave; 1745 1746 if (bond->curr_active_slave == new_slave) 1747 return; 1748 1749 if (bond->curr_active_slave && bond->alb_info.primary_is_promisc) { 1750 dev_set_promiscuity(bond->curr_active_slave->dev, -1); 1751 bond->alb_info.primary_is_promisc = 0; 1752 bond->alb_info.rlb_promisc_timeout_counter = 0; 1753 } 1754 1755 swap_slave = bond->curr_active_slave; 1756 rcu_assign_pointer(bond->curr_active_slave, new_slave); 1757 1758 if (!new_slave || !bond_has_slaves(bond)) 1759 return; 1760 1761 /* set the new curr_active_slave to the bonds mac address 1762 * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave 1763 */ 1764 if (!swap_slave) 1765 swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr); 1766 1767 /* 1768 * Arrange for swap_slave and new_slave to temporarily be 1769 * ignored so we can mess with their MAC addresses without 1770 * fear of interference from transmit activity. 1771 */ 1772 if (swap_slave) 1773 tlb_clear_slave(bond, swap_slave, 1); 1774 tlb_clear_slave(bond, new_slave, 1); 1775 1776 write_unlock_bh(&bond->curr_slave_lock); 1777 1778 ASSERT_RTNL(); 1779 1780 /* in TLB mode, the slave might flip down/up with the old dev_addr, 1781 * and thus filter bond->dev_addr's packets, so force bond's mac 1782 */ 1783 if (BOND_MODE(bond) == BOND_MODE_TLB) { 1784 struct sockaddr sa; 1785 u8 tmp_addr[ETH_ALEN]; 1786 1787 ether_addr_copy(tmp_addr, new_slave->dev->dev_addr); 1788 1789 memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); 1790 sa.sa_family = bond->dev->type; 1791 /* we don't care if it can't change its mac, best effort */ 1792 dev_set_mac_address(new_slave->dev, &sa); 1793 1794 ether_addr_copy(new_slave->dev->dev_addr, tmp_addr); 1795 } 1796 1797 /* curr_active_slave must be set before calling alb_swap_mac_addr */ 1798 if (swap_slave) { 1799 /* swap mac address */ 1800 alb_swap_mac_addr(swap_slave, new_slave); 1801 alb_fasten_mac_swap(bond, swap_slave, new_slave); 1802 } else { 1803 /* set the new_slave to the bond mac address */ 1804 alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr); 1805 alb_send_learning_packets(new_slave, bond->dev->dev_addr, 1806 false); 1807 } 1808 1809 write_lock_bh(&bond->curr_slave_lock); 1810 } 1811 1812 /* 1813 * Called with RTNL 1814 */ 1815 int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) 1816 __acquires(&bond->lock) 1817 __releases(&bond->lock) 1818 { 1819 struct bonding *bond = netdev_priv(bond_dev); 1820 struct sockaddr *sa = addr; 1821 struct slave *swap_slave; 1822 int res; 1823 1824 if (!is_valid_ether_addr(sa->sa_data)) 1825 return -EADDRNOTAVAIL; 1826 1827 res = alb_set_mac_address(bond, addr); 1828 if (res) 1829 return res; 1830 1831 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 1832 1833 /* If there is no curr_active_slave there is nothing else to do. 1834 * Otherwise we'll need to pass the new address to it and handle 1835 * duplications. 1836 */ 1837 if (!bond->curr_active_slave) 1838 return 0; 1839 1840 swap_slave = bond_slave_has_mac(bond, bond_dev->dev_addr); 1841 1842 if (swap_slave) { 1843 alb_swap_mac_addr(swap_slave, bond->curr_active_slave); 1844 alb_fasten_mac_swap(bond, swap_slave, bond->curr_active_slave); 1845 } else { 1846 alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr); 1847 1848 read_lock(&bond->lock); 1849 alb_send_learning_packets(bond->curr_active_slave, 1850 bond_dev->dev_addr, false); 1851 if (bond->alb_info.rlb_enabled) { 1852 /* inform clients mac address has changed */ 1853 rlb_req_update_slave_clients(bond, bond->curr_active_slave); 1854 } 1855 read_unlock(&bond->lock); 1856 } 1857 1858 return 0; 1859 } 1860 1861 void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id) 1862 { 1863 if (bond->alb_info.rlb_enabled) 1864 rlb_clear_vlan(bond, vlan_id); 1865 } 1866 1867