1 /* 2 * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License as published by the 6 * Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * for more details. 13 * 14 * You should have received a copy of the GNU General Public License along 15 * with this program; if not, see <http://www.gnu.org/licenses/>. 16 * 17 * The full GNU General Public License is included in this distribution in the 18 * file called LICENSE. 19 * 20 */ 21 22 #include <linux/skbuff.h> 23 #include <linux/netdevice.h> 24 #include <linux/etherdevice.h> 25 #include <linux/pkt_sched.h> 26 #include <linux/spinlock.h> 27 #include <linux/slab.h> 28 #include <linux/timer.h> 29 #include <linux/ip.h> 30 #include <linux/ipv6.h> 31 #include <linux/if_arp.h> 32 #include <linux/if_ether.h> 33 #include <linux/if_bonding.h> 34 #include <linux/if_vlan.h> 35 #include <linux/in.h> 36 #include <net/ipx.h> 37 #include <net/arp.h> 38 #include <net/ipv6.h> 39 #include <asm/byteorder.h> 40 #include "bonding.h" 41 #include "bond_alb.h" 42 43 44 45 #ifndef __long_aligned 46 #define __long_aligned __attribute__((aligned((sizeof(long))))) 47 #endif 48 static const u8 mac_bcast[ETH_ALEN] __long_aligned = { 49 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 50 }; 51 static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = { 52 0x33, 0x33, 0x00, 0x00, 0x00, 0x01 53 }; 54 static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; 55 56 #pragma pack(1) 57 struct learning_pkt { 58 u8 mac_dst[ETH_ALEN]; 59 u8 mac_src[ETH_ALEN]; 60 __be16 type; 61 u8 padding[ETH_ZLEN - ETH_HLEN]; 62 }; 63 64 struct arp_pkt { 65 __be16 hw_addr_space; 66 __be16 prot_addr_space; 67 u8 hw_addr_len; 68 u8 prot_addr_len; 69 __be16 op_code; 70 u8 mac_src[ETH_ALEN]; /* sender hardware address */ 71 __be32 ip_src; /* sender IP address */ 72 u8 mac_dst[ETH_ALEN]; /* target hardware address */ 73 __be32 ip_dst; /* target IP address */ 74 }; 75 #pragma pack() 76 77 static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) 78 { 79 return (struct arp_pkt *)skb_network_header(skb); 80 } 81 82 /* Forward declaration */ 83 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], 84 bool strict_match); 85 static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp); 86 static void rlb_src_unlink(struct bonding *bond, u32 index); 87 static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, 88 u32 ip_dst_hash); 89 90 static inline u8 _simple_hash(const u8 *hash_start, int hash_size) 91 { 92 int i; 93 u8 hash = 0; 94 95 for (i = 0; i < hash_size; i++) 96 hash ^= hash_start[i]; 97 98 return hash; 99 } 100 101 /*********************** tlb specific functions ***************************/ 102 103 static inline void _lock_tx_hashtbl_bh(struct bonding *bond) 104 { 105 spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); 106 } 107 108 static inline void _unlock_tx_hashtbl_bh(struct bonding *bond) 109 { 110 spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); 111 } 112 113 static inline void _lock_tx_hashtbl(struct bonding *bond) 114 { 115 spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); 116 } 117 118 static inline void _unlock_tx_hashtbl(struct bonding *bond) 119 { 120 spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); 121 } 122 123 /* Caller must hold tx_hashtbl lock */ 124 static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) 125 { 126 if (save_load) { 127 entry->load_history = 1 + entry->tx_bytes / 128 BOND_TLB_REBALANCE_INTERVAL; 129 entry->tx_bytes = 0; 130 } 131 132 entry->tx_slave = NULL; 133 entry->next = TLB_NULL_INDEX; 134 entry->prev = TLB_NULL_INDEX; 135 } 136 137 static inline void tlb_init_slave(struct slave *slave) 138 { 139 SLAVE_TLB_INFO(slave).load = 0; 140 SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; 141 } 142 143 /* Caller must hold bond lock for read, BH disabled */ 144 static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, 145 int save_load) 146 { 147 struct tlb_client_info *tx_hash_table; 148 u32 index; 149 150 /* clear slave from tx_hashtbl */ 151 tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; 152 153 /* skip this if we've already freed the tx hash table */ 154 if (tx_hash_table) { 155 index = SLAVE_TLB_INFO(slave).head; 156 while (index != TLB_NULL_INDEX) { 157 u32 next_index = tx_hash_table[index].next; 158 tlb_init_table_entry(&tx_hash_table[index], save_load); 159 index = next_index; 160 } 161 } 162 163 tlb_init_slave(slave); 164 } 165 166 /* Caller must hold bond lock for read */ 167 static void tlb_clear_slave(struct bonding *bond, struct slave *slave, 168 int save_load) 169 { 170 _lock_tx_hashtbl_bh(bond); 171 __tlb_clear_slave(bond, slave, save_load); 172 _unlock_tx_hashtbl_bh(bond); 173 } 174 175 /* Must be called before starting the monitor timer */ 176 static int tlb_initialize(struct bonding *bond) 177 { 178 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 179 int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); 180 struct tlb_client_info *new_hashtbl; 181 int i; 182 183 new_hashtbl = kzalloc(size, GFP_KERNEL); 184 if (!new_hashtbl) 185 return -1; 186 187 _lock_tx_hashtbl_bh(bond); 188 189 bond_info->tx_hashtbl = new_hashtbl; 190 191 for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) 192 tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); 193 194 _unlock_tx_hashtbl_bh(bond); 195 196 return 0; 197 } 198 199 /* Must be called only after all slaves have been released */ 200 static void tlb_deinitialize(struct bonding *bond) 201 { 202 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 203 struct tlb_up_slave *arr; 204 205 _lock_tx_hashtbl_bh(bond); 206 207 kfree(bond_info->tx_hashtbl); 208 bond_info->tx_hashtbl = NULL; 209 210 _unlock_tx_hashtbl_bh(bond); 211 212 arr = rtnl_dereference(bond_info->slave_arr); 213 if (arr) 214 kfree_rcu(arr, rcu); 215 } 216 217 static long long compute_gap(struct slave *slave) 218 { 219 return (s64) (slave->speed << 20) - /* Convert to Megabit per sec */ 220 (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ 221 } 222 223 /* Caller must hold bond lock for read */ 224 static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) 225 { 226 struct slave *slave, *least_loaded; 227 struct list_head *iter; 228 long long max_gap; 229 230 least_loaded = NULL; 231 max_gap = LLONG_MIN; 232 233 /* Find the slave with the largest gap */ 234 bond_for_each_slave_rcu(bond, slave, iter) { 235 if (bond_slave_can_tx(slave)) { 236 long long gap = compute_gap(slave); 237 238 if (max_gap < gap) { 239 least_loaded = slave; 240 max_gap = gap; 241 } 242 } 243 } 244 245 return least_loaded; 246 } 247 248 static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index, 249 u32 skb_len) 250 { 251 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 252 struct tlb_client_info *hash_table; 253 struct slave *assigned_slave; 254 255 hash_table = bond_info->tx_hashtbl; 256 assigned_slave = hash_table[hash_index].tx_slave; 257 if (!assigned_slave) { 258 assigned_slave = tlb_get_least_loaded_slave(bond); 259 260 if (assigned_slave) { 261 struct tlb_slave_info *slave_info = 262 &(SLAVE_TLB_INFO(assigned_slave)); 263 u32 next_index = slave_info->head; 264 265 hash_table[hash_index].tx_slave = assigned_slave; 266 hash_table[hash_index].next = next_index; 267 hash_table[hash_index].prev = TLB_NULL_INDEX; 268 269 if (next_index != TLB_NULL_INDEX) 270 hash_table[next_index].prev = hash_index; 271 272 slave_info->head = hash_index; 273 slave_info->load += 274 hash_table[hash_index].load_history; 275 } 276 } 277 278 if (assigned_slave) 279 hash_table[hash_index].tx_bytes += skb_len; 280 281 return assigned_slave; 282 } 283 284 /* Caller must hold bond lock for read */ 285 static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, 286 u32 skb_len) 287 { 288 struct slave *tx_slave; 289 /* 290 * We don't need to disable softirq here, becase 291 * tlb_choose_channel() is only called by bond_alb_xmit() 292 * which already has softirq disabled. 293 */ 294 _lock_tx_hashtbl(bond); 295 tx_slave = __tlb_choose_channel(bond, hash_index, skb_len); 296 _unlock_tx_hashtbl(bond); 297 return tx_slave; 298 } 299 300 /*********************** rlb specific functions ***************************/ 301 static inline void _lock_rx_hashtbl_bh(struct bonding *bond) 302 { 303 spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 304 } 305 306 static inline void _unlock_rx_hashtbl_bh(struct bonding *bond) 307 { 308 spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 309 } 310 311 static inline void _lock_rx_hashtbl(struct bonding *bond) 312 { 313 spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 314 } 315 316 static inline void _unlock_rx_hashtbl(struct bonding *bond) 317 { 318 spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); 319 } 320 321 /* when an ARP REPLY is received from a client update its info 322 * in the rx_hashtbl 323 */ 324 static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) 325 { 326 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 327 struct rlb_client_info *client_info; 328 u32 hash_index; 329 330 _lock_rx_hashtbl_bh(bond); 331 332 hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); 333 client_info = &(bond_info->rx_hashtbl[hash_index]); 334 335 if ((client_info->assigned) && 336 (client_info->ip_src == arp->ip_dst) && 337 (client_info->ip_dst == arp->ip_src) && 338 (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) { 339 /* update the clients MAC address */ 340 ether_addr_copy(client_info->mac_dst, arp->mac_src); 341 client_info->ntt = 1; 342 bond_info->rx_ntt = 1; 343 } 344 345 _unlock_rx_hashtbl_bh(bond); 346 } 347 348 static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, 349 struct slave *slave) 350 { 351 struct arp_pkt *arp, _arp; 352 353 if (skb->protocol != cpu_to_be16(ETH_P_ARP)) 354 goto out; 355 356 arp = skb_header_pointer(skb, 0, sizeof(_arp), &_arp); 357 if (!arp) 358 goto out; 359 360 /* We received an ARP from arp->ip_src. 361 * We might have used this IP address previously (on the bonding host 362 * itself or on a system that is bridged together with the bond). 363 * However, if arp->mac_src is different than what is stored in 364 * rx_hashtbl, some other host is now using the IP and we must prevent 365 * sending out client updates with this IP address and the old MAC 366 * address. 367 * Clean up all hash table entries that have this address as ip_src but 368 * have a different mac_src. 369 */ 370 rlb_purge_src_ip(bond, arp); 371 372 if (arp->op_code == htons(ARPOP_REPLY)) { 373 /* update rx hash table for this ARP */ 374 rlb_update_entry_from_arp(bond, arp); 375 netdev_dbg(bond->dev, "Server received an ARP Reply from client\n"); 376 } 377 out: 378 return RX_HANDLER_ANOTHER; 379 } 380 381 /* Caller must hold bond lock for read */ 382 static struct slave *rlb_next_rx_slave(struct bonding *bond) 383 { 384 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 385 struct slave *before = NULL, *rx_slave = NULL, *slave; 386 struct list_head *iter; 387 bool found = false; 388 389 bond_for_each_slave(bond, slave, iter) { 390 if (!bond_slave_can_tx(slave)) 391 continue; 392 if (!found) { 393 if (!before || before->speed < slave->speed) 394 before = slave; 395 } else { 396 if (!rx_slave || rx_slave->speed < slave->speed) 397 rx_slave = slave; 398 } 399 if (slave == bond_info->rx_slave) 400 found = true; 401 } 402 /* we didn't find anything after the current or we have something 403 * better before and up to the current slave 404 */ 405 if (!rx_slave || (before && rx_slave->speed < before->speed)) 406 rx_slave = before; 407 408 if (rx_slave) 409 bond_info->rx_slave = rx_slave; 410 411 return rx_slave; 412 } 413 414 /* Caller must hold rcu_read_lock() for read */ 415 static struct slave *__rlb_next_rx_slave(struct bonding *bond) 416 { 417 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 418 struct slave *before = NULL, *rx_slave = NULL, *slave; 419 struct list_head *iter; 420 bool found = false; 421 422 bond_for_each_slave_rcu(bond, slave, iter) { 423 if (!bond_slave_can_tx(slave)) 424 continue; 425 if (!found) { 426 if (!before || before->speed < slave->speed) 427 before = slave; 428 } else { 429 if (!rx_slave || rx_slave->speed < slave->speed) 430 rx_slave = slave; 431 } 432 if (slave == bond_info->rx_slave) 433 found = true; 434 } 435 /* we didn't find anything after the current or we have something 436 * better before and up to the current slave 437 */ 438 if (!rx_slave || (before && rx_slave->speed < before->speed)) 439 rx_slave = before; 440 441 if (rx_slave) 442 bond_info->rx_slave = rx_slave; 443 444 return rx_slave; 445 } 446 447 /* teach the switch the mac of a disabled slave 448 * on the primary for fault tolerance 449 * 450 * Caller must hold bond->curr_slave_lock for write or bond lock for write 451 */ 452 static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) 453 { 454 struct slave *curr_active = bond_deref_active_protected(bond); 455 456 if (!curr_active) 457 return; 458 459 if (!bond->alb_info.primary_is_promisc) { 460 if (!dev_set_promiscuity(curr_active->dev, 1)) 461 bond->alb_info.primary_is_promisc = 1; 462 else 463 bond->alb_info.primary_is_promisc = 0; 464 } 465 466 bond->alb_info.rlb_promisc_timeout_counter = 0; 467 468 alb_send_learning_packets(curr_active, addr, true); 469 } 470 471 /* slave being removed should not be active at this point 472 * 473 * Caller must hold rtnl. 474 */ 475 static void rlb_clear_slave(struct bonding *bond, struct slave *slave) 476 { 477 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 478 struct rlb_client_info *rx_hash_table; 479 u32 index, next_index; 480 481 /* clear slave from rx_hashtbl */ 482 _lock_rx_hashtbl_bh(bond); 483 484 rx_hash_table = bond_info->rx_hashtbl; 485 index = bond_info->rx_hashtbl_used_head; 486 for (; index != RLB_NULL_INDEX; index = next_index) { 487 next_index = rx_hash_table[index].used_next; 488 if (rx_hash_table[index].slave == slave) { 489 struct slave *assigned_slave = rlb_next_rx_slave(bond); 490 491 if (assigned_slave) { 492 rx_hash_table[index].slave = assigned_slave; 493 if (!ether_addr_equal_64bits(rx_hash_table[index].mac_dst, 494 mac_bcast)) { 495 bond_info->rx_hashtbl[index].ntt = 1; 496 bond_info->rx_ntt = 1; 497 /* A slave has been removed from the 498 * table because it is either disabled 499 * or being released. We must retry the 500 * update to avoid clients from not 501 * being updated & disconnecting when 502 * there is stress 503 */ 504 bond_info->rlb_update_retry_counter = 505 RLB_UPDATE_RETRY; 506 } 507 } else { /* there is no active slave */ 508 rx_hash_table[index].slave = NULL; 509 } 510 } 511 } 512 513 _unlock_rx_hashtbl_bh(bond); 514 515 write_lock_bh(&bond->curr_slave_lock); 516 517 if (slave != bond_deref_active_protected(bond)) 518 rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); 519 520 write_unlock_bh(&bond->curr_slave_lock); 521 } 522 523 static void rlb_update_client(struct rlb_client_info *client_info) 524 { 525 int i; 526 527 if (!client_info->slave) 528 return; 529 530 for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { 531 struct sk_buff *skb; 532 533 skb = arp_create(ARPOP_REPLY, ETH_P_ARP, 534 client_info->ip_dst, 535 client_info->slave->dev, 536 client_info->ip_src, 537 client_info->mac_dst, 538 client_info->slave->dev->dev_addr, 539 client_info->mac_dst); 540 if (!skb) { 541 netdev_err(client_info->slave->bond->dev, 542 "failed to create an ARP packet\n"); 543 continue; 544 } 545 546 skb->dev = client_info->slave->dev; 547 548 if (client_info->vlan_id) { 549 skb = vlan_put_tag(skb, htons(ETH_P_8021Q), client_info->vlan_id); 550 if (!skb) { 551 netdev_err(client_info->slave->bond->dev, 552 "failed to insert VLAN tag\n"); 553 continue; 554 } 555 } 556 557 arp_xmit(skb); 558 } 559 } 560 561 /* sends ARP REPLIES that update the clients that need updating */ 562 static void rlb_update_rx_clients(struct bonding *bond) 563 { 564 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 565 struct rlb_client_info *client_info; 566 u32 hash_index; 567 568 _lock_rx_hashtbl_bh(bond); 569 570 hash_index = bond_info->rx_hashtbl_used_head; 571 for (; hash_index != RLB_NULL_INDEX; 572 hash_index = client_info->used_next) { 573 client_info = &(bond_info->rx_hashtbl[hash_index]); 574 if (client_info->ntt) { 575 rlb_update_client(client_info); 576 if (bond_info->rlb_update_retry_counter == 0) 577 client_info->ntt = 0; 578 } 579 } 580 581 /* do not update the entries again until this counter is zero so that 582 * not to confuse the clients. 583 */ 584 bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; 585 586 _unlock_rx_hashtbl_bh(bond); 587 } 588 589 /* The slave was assigned a new mac address - update the clients */ 590 static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave) 591 { 592 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 593 struct rlb_client_info *client_info; 594 int ntt = 0; 595 u32 hash_index; 596 597 _lock_rx_hashtbl_bh(bond); 598 599 hash_index = bond_info->rx_hashtbl_used_head; 600 for (; hash_index != RLB_NULL_INDEX; 601 hash_index = client_info->used_next) { 602 client_info = &(bond_info->rx_hashtbl[hash_index]); 603 604 if ((client_info->slave == slave) && 605 !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { 606 client_info->ntt = 1; 607 ntt = 1; 608 } 609 } 610 611 /* update the team's flag only after the whole iteration */ 612 if (ntt) { 613 bond_info->rx_ntt = 1; 614 /* fasten the change */ 615 bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; 616 } 617 618 _unlock_rx_hashtbl_bh(bond); 619 } 620 621 /* mark all clients using src_ip to be updated */ 622 static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) 623 { 624 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 625 struct rlb_client_info *client_info; 626 u32 hash_index; 627 628 _lock_rx_hashtbl(bond); 629 630 hash_index = bond_info->rx_hashtbl_used_head; 631 for (; hash_index != RLB_NULL_INDEX; 632 hash_index = client_info->used_next) { 633 client_info = &(bond_info->rx_hashtbl[hash_index]); 634 635 if (!client_info->slave) { 636 netdev_err(bond->dev, "found a client with no channel in the client's hash table\n"); 637 continue; 638 } 639 /*update all clients using this src_ip, that are not assigned 640 * to the team's address (curr_active_slave) and have a known 641 * unicast mac address. 642 */ 643 if ((client_info->ip_src == src_ip) && 644 !ether_addr_equal_64bits(client_info->slave->dev->dev_addr, 645 bond->dev->dev_addr) && 646 !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { 647 client_info->ntt = 1; 648 bond_info->rx_ntt = 1; 649 } 650 } 651 652 _unlock_rx_hashtbl(bond); 653 } 654 655 /* Caller must hold both bond and ptr locks for read */ 656 static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) 657 { 658 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 659 struct arp_pkt *arp = arp_pkt(skb); 660 struct slave *assigned_slave, *curr_active_slave; 661 struct rlb_client_info *client_info; 662 u32 hash_index = 0; 663 664 _lock_rx_hashtbl(bond); 665 666 curr_active_slave = rcu_dereference(bond->curr_active_slave); 667 668 hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst)); 669 client_info = &(bond_info->rx_hashtbl[hash_index]); 670 671 if (client_info->assigned) { 672 if ((client_info->ip_src == arp->ip_src) && 673 (client_info->ip_dst == arp->ip_dst)) { 674 /* the entry is already assigned to this client */ 675 if (!ether_addr_equal_64bits(arp->mac_dst, mac_bcast)) { 676 /* update mac address from arp */ 677 ether_addr_copy(client_info->mac_dst, arp->mac_dst); 678 } 679 ether_addr_copy(client_info->mac_src, arp->mac_src); 680 681 assigned_slave = client_info->slave; 682 if (assigned_slave) { 683 _unlock_rx_hashtbl(bond); 684 return assigned_slave; 685 } 686 } else { 687 /* the entry is already assigned to some other client, 688 * move the old client to primary (curr_active_slave) so 689 * that the new client can be assigned to this entry. 690 */ 691 if (curr_active_slave && 692 client_info->slave != curr_active_slave) { 693 client_info->slave = curr_active_slave; 694 rlb_update_client(client_info); 695 } 696 } 697 } 698 /* assign a new slave */ 699 assigned_slave = __rlb_next_rx_slave(bond); 700 701 if (assigned_slave) { 702 if (!(client_info->assigned && 703 client_info->ip_src == arp->ip_src)) { 704 /* ip_src is going to be updated, 705 * fix the src hash list 706 */ 707 u32 hash_src = _simple_hash((u8 *)&arp->ip_src, 708 sizeof(arp->ip_src)); 709 rlb_src_unlink(bond, hash_index); 710 rlb_src_link(bond, hash_src, hash_index); 711 } 712 713 client_info->ip_src = arp->ip_src; 714 client_info->ip_dst = arp->ip_dst; 715 /* arp->mac_dst is broadcast for arp reqeusts. 716 * will be updated with clients actual unicast mac address 717 * upon receiving an arp reply. 718 */ 719 ether_addr_copy(client_info->mac_dst, arp->mac_dst); 720 ether_addr_copy(client_info->mac_src, arp->mac_src); 721 client_info->slave = assigned_slave; 722 723 if (!ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { 724 client_info->ntt = 1; 725 bond->alb_info.rx_ntt = 1; 726 } else { 727 client_info->ntt = 0; 728 } 729 730 if (vlan_get_tag(skb, &client_info->vlan_id)) 731 client_info->vlan_id = 0; 732 733 if (!client_info->assigned) { 734 u32 prev_tbl_head = bond_info->rx_hashtbl_used_head; 735 bond_info->rx_hashtbl_used_head = hash_index; 736 client_info->used_next = prev_tbl_head; 737 if (prev_tbl_head != RLB_NULL_INDEX) { 738 bond_info->rx_hashtbl[prev_tbl_head].used_prev = 739 hash_index; 740 } 741 client_info->assigned = 1; 742 } 743 } 744 745 _unlock_rx_hashtbl(bond); 746 747 return assigned_slave; 748 } 749 750 /* chooses (and returns) transmit channel for arp reply 751 * does not choose channel for other arp types since they are 752 * sent on the curr_active_slave 753 */ 754 static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) 755 { 756 struct arp_pkt *arp = arp_pkt(skb); 757 struct slave *tx_slave = NULL; 758 759 /* Don't modify or load balance ARPs that do not originate locally 760 * (e.g.,arrive via a bridge). 761 */ 762 if (!bond_slave_has_mac_rx(bond, arp->mac_src)) 763 return NULL; 764 765 if (arp->op_code == htons(ARPOP_REPLY)) { 766 /* the arp must be sent on the selected 767 * rx channel 768 */ 769 tx_slave = rlb_choose_channel(skb, bond); 770 if (tx_slave) 771 ether_addr_copy(arp->mac_src, tx_slave->dev->dev_addr); 772 netdev_dbg(bond->dev, "Server sent ARP Reply packet\n"); 773 } else if (arp->op_code == htons(ARPOP_REQUEST)) { 774 /* Create an entry in the rx_hashtbl for this client as a 775 * place holder. 776 * When the arp reply is received the entry will be updated 777 * with the correct unicast address of the client. 778 */ 779 rlb_choose_channel(skb, bond); 780 781 /* The ARP reply packets must be delayed so that 782 * they can cancel out the influence of the ARP request. 783 */ 784 bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; 785 786 /* arp requests are broadcast and are sent on the primary 787 * the arp request will collapse all clients on the subnet to 788 * the primary slave. We must register these clients to be 789 * updated with their assigned mac. 790 */ 791 rlb_req_update_subnet_clients(bond, arp->ip_src); 792 netdev_dbg(bond->dev, "Server sent ARP Request packet\n"); 793 } 794 795 return tx_slave; 796 } 797 798 /* Caller must hold bond lock for read */ 799 static void rlb_rebalance(struct bonding *bond) 800 { 801 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 802 struct slave *assigned_slave; 803 struct rlb_client_info *client_info; 804 int ntt; 805 u32 hash_index; 806 807 _lock_rx_hashtbl_bh(bond); 808 809 ntt = 0; 810 hash_index = bond_info->rx_hashtbl_used_head; 811 for (; hash_index != RLB_NULL_INDEX; 812 hash_index = client_info->used_next) { 813 client_info = &(bond_info->rx_hashtbl[hash_index]); 814 assigned_slave = __rlb_next_rx_slave(bond); 815 if (assigned_slave && (client_info->slave != assigned_slave)) { 816 client_info->slave = assigned_slave; 817 client_info->ntt = 1; 818 ntt = 1; 819 } 820 } 821 822 /* update the team's flag only after the whole iteration */ 823 if (ntt) 824 bond_info->rx_ntt = 1; 825 _unlock_rx_hashtbl_bh(bond); 826 } 827 828 /* Caller must hold rx_hashtbl lock */ 829 static void rlb_init_table_entry_dst(struct rlb_client_info *entry) 830 { 831 entry->used_next = RLB_NULL_INDEX; 832 entry->used_prev = RLB_NULL_INDEX; 833 entry->assigned = 0; 834 entry->slave = NULL; 835 entry->vlan_id = 0; 836 } 837 static void rlb_init_table_entry_src(struct rlb_client_info *entry) 838 { 839 entry->src_first = RLB_NULL_INDEX; 840 entry->src_prev = RLB_NULL_INDEX; 841 entry->src_next = RLB_NULL_INDEX; 842 } 843 844 static void rlb_init_table_entry(struct rlb_client_info *entry) 845 { 846 memset(entry, 0, sizeof(struct rlb_client_info)); 847 rlb_init_table_entry_dst(entry); 848 rlb_init_table_entry_src(entry); 849 } 850 851 static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index) 852 { 853 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 854 u32 next_index = bond_info->rx_hashtbl[index].used_next; 855 u32 prev_index = bond_info->rx_hashtbl[index].used_prev; 856 857 if (index == bond_info->rx_hashtbl_used_head) 858 bond_info->rx_hashtbl_used_head = next_index; 859 if (prev_index != RLB_NULL_INDEX) 860 bond_info->rx_hashtbl[prev_index].used_next = next_index; 861 if (next_index != RLB_NULL_INDEX) 862 bond_info->rx_hashtbl[next_index].used_prev = prev_index; 863 } 864 865 /* unlink a rlb hash table entry from the src list */ 866 static void rlb_src_unlink(struct bonding *bond, u32 index) 867 { 868 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 869 u32 next_index = bond_info->rx_hashtbl[index].src_next; 870 u32 prev_index = bond_info->rx_hashtbl[index].src_prev; 871 872 bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX; 873 bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX; 874 875 if (next_index != RLB_NULL_INDEX) 876 bond_info->rx_hashtbl[next_index].src_prev = prev_index; 877 878 if (prev_index == RLB_NULL_INDEX) 879 return; 880 881 /* is prev_index pointing to the head of this list? */ 882 if (bond_info->rx_hashtbl[prev_index].src_first == index) 883 bond_info->rx_hashtbl[prev_index].src_first = next_index; 884 else 885 bond_info->rx_hashtbl[prev_index].src_next = next_index; 886 887 } 888 889 static void rlb_delete_table_entry(struct bonding *bond, u32 index) 890 { 891 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 892 struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); 893 894 rlb_delete_table_entry_dst(bond, index); 895 rlb_init_table_entry_dst(entry); 896 897 rlb_src_unlink(bond, index); 898 } 899 900 /* add the rx_hashtbl[ip_dst_hash] entry to the list 901 * of entries with identical ip_src_hash 902 */ 903 static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash) 904 { 905 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 906 u32 next; 907 908 bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash; 909 next = bond_info->rx_hashtbl[ip_src_hash].src_first; 910 bond_info->rx_hashtbl[ip_dst_hash].src_next = next; 911 if (next != RLB_NULL_INDEX) 912 bond_info->rx_hashtbl[next].src_prev = ip_dst_hash; 913 bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash; 914 } 915 916 /* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does 917 * not match arp->mac_src */ 918 static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) 919 { 920 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 921 u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); 922 u32 index; 923 924 _lock_rx_hashtbl_bh(bond); 925 926 index = bond_info->rx_hashtbl[ip_src_hash].src_first; 927 while (index != RLB_NULL_INDEX) { 928 struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); 929 u32 next_index = entry->src_next; 930 if (entry->ip_src == arp->ip_src && 931 !ether_addr_equal_64bits(arp->mac_src, entry->mac_src)) 932 rlb_delete_table_entry(bond, index); 933 index = next_index; 934 } 935 _unlock_rx_hashtbl_bh(bond); 936 } 937 938 static int rlb_initialize(struct bonding *bond) 939 { 940 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 941 struct rlb_client_info *new_hashtbl; 942 int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); 943 int i; 944 945 new_hashtbl = kmalloc(size, GFP_KERNEL); 946 if (!new_hashtbl) 947 return -1; 948 949 _lock_rx_hashtbl_bh(bond); 950 951 bond_info->rx_hashtbl = new_hashtbl; 952 953 bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; 954 955 for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) 956 rlb_init_table_entry(bond_info->rx_hashtbl + i); 957 958 _unlock_rx_hashtbl_bh(bond); 959 960 /* register to receive ARPs */ 961 bond->recv_probe = rlb_arp_recv; 962 963 return 0; 964 } 965 966 static void rlb_deinitialize(struct bonding *bond) 967 { 968 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 969 970 _lock_rx_hashtbl_bh(bond); 971 972 kfree(bond_info->rx_hashtbl); 973 bond_info->rx_hashtbl = NULL; 974 bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; 975 976 _unlock_rx_hashtbl_bh(bond); 977 } 978 979 static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) 980 { 981 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 982 u32 curr_index; 983 984 _lock_rx_hashtbl_bh(bond); 985 986 curr_index = bond_info->rx_hashtbl_used_head; 987 while (curr_index != RLB_NULL_INDEX) { 988 struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); 989 u32 next_index = bond_info->rx_hashtbl[curr_index].used_next; 990 991 if (curr->vlan_id == vlan_id) 992 rlb_delete_table_entry(bond, curr_index); 993 994 curr_index = next_index; 995 } 996 997 _unlock_rx_hashtbl_bh(bond); 998 } 999 1000 /*********************** tlb/rlb shared functions *********************/ 1001 1002 static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], 1003 __be16 vlan_proto, u16 vid) 1004 { 1005 struct learning_pkt pkt; 1006 struct sk_buff *skb; 1007 int size = sizeof(struct learning_pkt); 1008 char *data; 1009 1010 memset(&pkt, 0, size); 1011 ether_addr_copy(pkt.mac_dst, mac_addr); 1012 ether_addr_copy(pkt.mac_src, mac_addr); 1013 pkt.type = cpu_to_be16(ETH_P_LOOPBACK); 1014 1015 skb = dev_alloc_skb(size); 1016 if (!skb) 1017 return; 1018 1019 data = skb_put(skb, size); 1020 memcpy(data, &pkt, size); 1021 1022 skb_reset_mac_header(skb); 1023 skb->network_header = skb->mac_header + ETH_HLEN; 1024 skb->protocol = pkt.type; 1025 skb->priority = TC_PRIO_CONTROL; 1026 skb->dev = slave->dev; 1027 1028 if (vid) { 1029 skb = vlan_put_tag(skb, vlan_proto, vid); 1030 if (!skb) { 1031 netdev_err(slave->bond->dev, "failed to insert VLAN tag\n"); 1032 return; 1033 } 1034 } 1035 1036 dev_queue_xmit(skb); 1037 } 1038 1039 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], 1040 bool strict_match) 1041 { 1042 struct bonding *bond = bond_get_bond_by_slave(slave); 1043 struct net_device *upper; 1044 struct list_head *iter; 1045 struct bond_vlan_tag *tags; 1046 1047 /* send untagged */ 1048 alb_send_lp_vid(slave, mac_addr, 0, 0); 1049 1050 /* loop through all devices and see if we need to send a packet 1051 * for that device. 1052 */ 1053 rcu_read_lock(); 1054 netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { 1055 if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { 1056 if (strict_match && 1057 ether_addr_equal_64bits(mac_addr, 1058 upper->dev_addr)) { 1059 alb_send_lp_vid(slave, mac_addr, 1060 vlan_dev_vlan_proto(upper), 1061 vlan_dev_vlan_id(upper)); 1062 } else if (!strict_match) { 1063 alb_send_lp_vid(slave, upper->dev_addr, 1064 vlan_dev_vlan_proto(upper), 1065 vlan_dev_vlan_id(upper)); 1066 } 1067 } 1068 1069 /* If this is a macvlan device, then only send updates 1070 * when strict_match is turned off. 1071 */ 1072 if (netif_is_macvlan(upper) && !strict_match) { 1073 tags = bond_verify_device_path(bond->dev, upper, 0); 1074 if (IS_ERR_OR_NULL(tags)) 1075 BUG(); 1076 alb_send_lp_vid(slave, upper->dev_addr, 1077 tags[0].vlan_proto, tags[0].vlan_id); 1078 kfree(tags); 1079 } 1080 } 1081 rcu_read_unlock(); 1082 } 1083 1084 static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[]) 1085 { 1086 struct net_device *dev = slave->dev; 1087 struct sockaddr s_addr; 1088 1089 if (BOND_MODE(slave->bond) == BOND_MODE_TLB) { 1090 memcpy(dev->dev_addr, addr, dev->addr_len); 1091 return 0; 1092 } 1093 1094 /* for rlb each slave must have a unique hw mac addresses so that */ 1095 /* each slave will receive packets destined to a different mac */ 1096 memcpy(s_addr.sa_data, addr, dev->addr_len); 1097 s_addr.sa_family = dev->type; 1098 if (dev_set_mac_address(dev, &s_addr)) { 1099 netdev_err(slave->bond->dev, "dev_set_mac_address of dev %s failed! ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n", 1100 dev->name); 1101 return -EOPNOTSUPP; 1102 } 1103 return 0; 1104 } 1105 1106 /* 1107 * Swap MAC addresses between two slaves. 1108 * 1109 * Called with RTNL held, and no other locks. 1110 * 1111 */ 1112 1113 static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2) 1114 { 1115 u8 tmp_mac_addr[ETH_ALEN]; 1116 1117 ether_addr_copy(tmp_mac_addr, slave1->dev->dev_addr); 1118 alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr); 1119 alb_set_slave_mac_addr(slave2, tmp_mac_addr); 1120 1121 } 1122 1123 /* 1124 * Send learning packets after MAC address swap. 1125 * 1126 * Called with RTNL and no other locks 1127 */ 1128 static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, 1129 struct slave *slave2) 1130 { 1131 int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2)); 1132 struct slave *disabled_slave = NULL; 1133 1134 ASSERT_RTNL(); 1135 1136 /* fasten the change in the switch */ 1137 if (bond_slave_can_tx(slave1)) { 1138 alb_send_learning_packets(slave1, slave1->dev->dev_addr, false); 1139 if (bond->alb_info.rlb_enabled) { 1140 /* inform the clients that the mac address 1141 * has changed 1142 */ 1143 rlb_req_update_slave_clients(bond, slave1); 1144 } 1145 } else { 1146 disabled_slave = slave1; 1147 } 1148 1149 if (bond_slave_can_tx(slave2)) { 1150 alb_send_learning_packets(slave2, slave2->dev->dev_addr, false); 1151 if (bond->alb_info.rlb_enabled) { 1152 /* inform the clients that the mac address 1153 * has changed 1154 */ 1155 rlb_req_update_slave_clients(bond, slave2); 1156 } 1157 } else { 1158 disabled_slave = slave2; 1159 } 1160 1161 if (bond->alb_info.rlb_enabled && slaves_state_differ) { 1162 /* A disabled slave was assigned an active mac addr */ 1163 rlb_teach_disabled_mac_on_primary(bond, 1164 disabled_slave->dev->dev_addr); 1165 } 1166 } 1167 1168 /** 1169 * alb_change_hw_addr_on_detach 1170 * @bond: bonding we're working on 1171 * @slave: the slave that was just detached 1172 * 1173 * We assume that @slave was already detached from the slave list. 1174 * 1175 * If @slave's permanent hw address is different both from its current 1176 * address and from @bond's address, then somewhere in the bond there's 1177 * a slave that has @slave's permanet address as its current address. 1178 * We'll make sure that that slave no longer uses @slave's permanent address. 1179 * 1180 * Caller must hold RTNL and no other locks 1181 */ 1182 static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) 1183 { 1184 int perm_curr_diff; 1185 int perm_bond_diff; 1186 struct slave *found_slave; 1187 1188 perm_curr_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, 1189 slave->dev->dev_addr); 1190 perm_bond_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, 1191 bond->dev->dev_addr); 1192 1193 if (perm_curr_diff && perm_bond_diff) { 1194 found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr); 1195 1196 if (found_slave) { 1197 /* locking: needs RTNL and nothing else */ 1198 alb_swap_mac_addr(slave, found_slave); 1199 alb_fasten_mac_swap(bond, slave, found_slave); 1200 } 1201 } 1202 } 1203 1204 /** 1205 * alb_handle_addr_collision_on_attach 1206 * @bond: bonding we're working on 1207 * @slave: the slave that was just attached 1208 * 1209 * checks uniqueness of slave's mac address and handles the case the 1210 * new slave uses the bonds mac address. 1211 * 1212 * If the permanent hw address of @slave is @bond's hw address, we need to 1213 * find a different hw address to give @slave, that isn't in use by any other 1214 * slave in the bond. This address must be, of course, one of the permanent 1215 * addresses of the other slaves. 1216 * 1217 * We go over the slave list, and for each slave there we compare its 1218 * permanent hw address with the current address of all the other slaves. 1219 * If no match was found, then we've found a slave with a permanent address 1220 * that isn't used by any other slave in the bond, so we can assign it to 1221 * @slave. 1222 * 1223 * assumption: this function is called before @slave is attached to the 1224 * bond slave list. 1225 */ 1226 static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) 1227 { 1228 struct slave *has_bond_addr = rcu_access_pointer(bond->curr_active_slave); 1229 struct slave *tmp_slave1, *free_mac_slave = NULL; 1230 struct list_head *iter; 1231 1232 if (!bond_has_slaves(bond)) { 1233 /* this is the first slave */ 1234 return 0; 1235 } 1236 1237 /* if slave's mac address differs from bond's mac address 1238 * check uniqueness of slave's mac address against the other 1239 * slaves in the bond. 1240 */ 1241 if (!ether_addr_equal_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) { 1242 if (!bond_slave_has_mac(bond, slave->dev->dev_addr)) 1243 return 0; 1244 1245 /* Try setting slave mac to bond address and fall-through 1246 to code handling that situation below... */ 1247 alb_set_slave_mac_addr(slave, bond->dev->dev_addr); 1248 } 1249 1250 /* The slave's address is equal to the address of the bond. 1251 * Search for a spare address in the bond for this slave. 1252 */ 1253 bond_for_each_slave(bond, tmp_slave1, iter) { 1254 if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) { 1255 /* no slave has tmp_slave1's perm addr 1256 * as its curr addr 1257 */ 1258 free_mac_slave = tmp_slave1; 1259 break; 1260 } 1261 1262 if (!has_bond_addr) { 1263 if (ether_addr_equal_64bits(tmp_slave1->dev->dev_addr, 1264 bond->dev->dev_addr)) { 1265 1266 has_bond_addr = tmp_slave1; 1267 } 1268 } 1269 } 1270 1271 if (free_mac_slave) { 1272 alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr); 1273 1274 netdev_warn(bond->dev, "the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", 1275 slave->dev->name, free_mac_slave->dev->name); 1276 1277 } else if (has_bond_addr) { 1278 netdev_err(bond->dev, "the hw address of slave %s is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n", 1279 slave->dev->name); 1280 return -EFAULT; 1281 } 1282 1283 return 0; 1284 } 1285 1286 /** 1287 * alb_set_mac_address 1288 * @bond: 1289 * @addr: 1290 * 1291 * In TLB mode all slaves are configured to the bond's hw address, but set 1292 * their dev_addr field to different addresses (based on their permanent hw 1293 * addresses). 1294 * 1295 * For each slave, this function sets the interface to the new address and then 1296 * changes its dev_addr field to its previous value. 1297 * 1298 * Unwinding assumes bond's mac address has not yet changed. 1299 */ 1300 static int alb_set_mac_address(struct bonding *bond, void *addr) 1301 { 1302 struct slave *slave, *rollback_slave; 1303 struct list_head *iter; 1304 struct sockaddr sa; 1305 char tmp_addr[ETH_ALEN]; 1306 int res; 1307 1308 if (bond->alb_info.rlb_enabled) 1309 return 0; 1310 1311 bond_for_each_slave(bond, slave, iter) { 1312 /* save net_device's current hw address */ 1313 ether_addr_copy(tmp_addr, slave->dev->dev_addr); 1314 1315 res = dev_set_mac_address(slave->dev, addr); 1316 1317 /* restore net_device's hw address */ 1318 ether_addr_copy(slave->dev->dev_addr, tmp_addr); 1319 1320 if (res) 1321 goto unwind; 1322 } 1323 1324 return 0; 1325 1326 unwind: 1327 memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); 1328 sa.sa_family = bond->dev->type; 1329 1330 /* unwind from head to the slave that failed */ 1331 bond_for_each_slave(bond, rollback_slave, iter) { 1332 if (rollback_slave == slave) 1333 break; 1334 ether_addr_copy(tmp_addr, rollback_slave->dev->dev_addr); 1335 dev_set_mac_address(rollback_slave->dev, &sa); 1336 ether_addr_copy(rollback_slave->dev->dev_addr, tmp_addr); 1337 } 1338 1339 return res; 1340 } 1341 1342 /************************ exported alb funcions ************************/ 1343 1344 int bond_alb_initialize(struct bonding *bond, int rlb_enabled) 1345 { 1346 int res; 1347 1348 res = tlb_initialize(bond); 1349 if (res) 1350 return res; 1351 1352 if (rlb_enabled) { 1353 bond->alb_info.rlb_enabled = 1; 1354 /* initialize rlb */ 1355 res = rlb_initialize(bond); 1356 if (res) { 1357 tlb_deinitialize(bond); 1358 return res; 1359 } 1360 } else { 1361 bond->alb_info.rlb_enabled = 0; 1362 } 1363 1364 return 0; 1365 } 1366 1367 void bond_alb_deinitialize(struct bonding *bond) 1368 { 1369 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1370 1371 tlb_deinitialize(bond); 1372 1373 if (bond_info->rlb_enabled) 1374 rlb_deinitialize(bond); 1375 } 1376 1377 static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, 1378 struct slave *tx_slave) 1379 { 1380 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1381 struct ethhdr *eth_data = eth_hdr(skb); 1382 1383 if (!tx_slave) { 1384 /* unbalanced or unassigned, send through primary */ 1385 tx_slave = rcu_dereference(bond->curr_active_slave); 1386 if (bond->params.tlb_dynamic_lb) 1387 bond_info->unbalanced_load += skb->len; 1388 } 1389 1390 if (tx_slave && bond_slave_can_tx(tx_slave)) { 1391 if (tx_slave != rcu_dereference(bond->curr_active_slave)) { 1392 ether_addr_copy(eth_data->h_source, 1393 tx_slave->dev->dev_addr); 1394 } 1395 1396 bond_dev_queue_xmit(bond, skb, tx_slave->dev); 1397 goto out; 1398 } 1399 1400 if (tx_slave && bond->params.tlb_dynamic_lb) { 1401 _lock_tx_hashtbl(bond); 1402 __tlb_clear_slave(bond, tx_slave, 0); 1403 _unlock_tx_hashtbl(bond); 1404 } 1405 1406 /* no suitable interface, frame not sent */ 1407 dev_kfree_skb_any(skb); 1408 out: 1409 return NETDEV_TX_OK; 1410 } 1411 1412 static int bond_tlb_update_slave_arr(struct bonding *bond, 1413 struct slave *skipslave) 1414 { 1415 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1416 struct slave *tx_slave; 1417 struct list_head *iter; 1418 struct tlb_up_slave *new_arr, *old_arr; 1419 1420 new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]), 1421 GFP_ATOMIC); 1422 if (!new_arr) 1423 return -ENOMEM; 1424 1425 bond_for_each_slave(bond, tx_slave, iter) { 1426 if (!bond_slave_can_tx(tx_slave)) 1427 continue; 1428 if (skipslave == tx_slave) 1429 continue; 1430 new_arr->arr[new_arr->count++] = tx_slave; 1431 } 1432 1433 old_arr = rtnl_dereference(bond_info->slave_arr); 1434 rcu_assign_pointer(bond_info->slave_arr, new_arr); 1435 if (old_arr) 1436 kfree_rcu(old_arr, rcu); 1437 1438 return 0; 1439 } 1440 1441 int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) 1442 { 1443 struct bonding *bond = netdev_priv(bond_dev); 1444 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1445 struct ethhdr *eth_data; 1446 struct slave *tx_slave = NULL; 1447 u32 hash_index; 1448 1449 skb_reset_mac_header(skb); 1450 eth_data = eth_hdr(skb); 1451 1452 /* Do not TX balance any multicast or broadcast */ 1453 if (!is_multicast_ether_addr(eth_data->h_dest)) { 1454 switch (skb->protocol) { 1455 case htons(ETH_P_IP): 1456 case htons(ETH_P_IPX): 1457 /* In case of IPX, it will falback to L2 hash */ 1458 case htons(ETH_P_IPV6): 1459 hash_index = bond_xmit_hash(bond, skb); 1460 if (bond->params.tlb_dynamic_lb) { 1461 tx_slave = tlb_choose_channel(bond, 1462 hash_index & 0xFF, 1463 skb->len); 1464 } else { 1465 struct tlb_up_slave *slaves; 1466 1467 slaves = rcu_dereference(bond_info->slave_arr); 1468 if (slaves && slaves->count) 1469 tx_slave = slaves->arr[hash_index % 1470 slaves->count]; 1471 } 1472 break; 1473 } 1474 } 1475 return bond_do_alb_xmit(skb, bond, tx_slave); 1476 } 1477 1478 int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) 1479 { 1480 struct bonding *bond = netdev_priv(bond_dev); 1481 struct ethhdr *eth_data; 1482 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1483 struct slave *tx_slave = NULL; 1484 static const __be32 ip_bcast = htonl(0xffffffff); 1485 int hash_size = 0; 1486 bool do_tx_balance = true; 1487 u32 hash_index = 0; 1488 const u8 *hash_start = NULL; 1489 struct ipv6hdr *ip6hdr; 1490 1491 skb_reset_mac_header(skb); 1492 eth_data = eth_hdr(skb); 1493 1494 switch (ntohs(skb->protocol)) { 1495 case ETH_P_IP: { 1496 const struct iphdr *iph = ip_hdr(skb); 1497 1498 if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) || 1499 (iph->daddr == ip_bcast) || 1500 (iph->protocol == IPPROTO_IGMP)) { 1501 do_tx_balance = false; 1502 break; 1503 } 1504 hash_start = (char *)&(iph->daddr); 1505 hash_size = sizeof(iph->daddr); 1506 } 1507 break; 1508 case ETH_P_IPV6: 1509 /* IPv6 doesn't really use broadcast mac address, but leave 1510 * that here just in case. 1511 */ 1512 if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast)) { 1513 do_tx_balance = false; 1514 break; 1515 } 1516 1517 /* IPv6 uses all-nodes multicast as an equivalent to 1518 * broadcasts in IPv4. 1519 */ 1520 if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) { 1521 do_tx_balance = false; 1522 break; 1523 } 1524 1525 /* Additianally, DAD probes should not be tx-balanced as that 1526 * will lead to false positives for duplicate addresses and 1527 * prevent address configuration from working. 1528 */ 1529 ip6hdr = ipv6_hdr(skb); 1530 if (ipv6_addr_any(&ip6hdr->saddr)) { 1531 do_tx_balance = false; 1532 break; 1533 } 1534 1535 hash_start = (char *)&(ipv6_hdr(skb)->daddr); 1536 hash_size = sizeof(ipv6_hdr(skb)->daddr); 1537 break; 1538 case ETH_P_IPX: 1539 if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { 1540 /* something is wrong with this packet */ 1541 do_tx_balance = false; 1542 break; 1543 } 1544 1545 if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { 1546 /* The only protocol worth balancing in 1547 * this family since it has an "ARP" like 1548 * mechanism 1549 */ 1550 do_tx_balance = false; 1551 break; 1552 } 1553 1554 hash_start = (char *)eth_data->h_dest; 1555 hash_size = ETH_ALEN; 1556 break; 1557 case ETH_P_ARP: 1558 do_tx_balance = false; 1559 if (bond_info->rlb_enabled) 1560 tx_slave = rlb_arp_xmit(skb, bond); 1561 break; 1562 default: 1563 do_tx_balance = false; 1564 break; 1565 } 1566 1567 if (do_tx_balance) { 1568 hash_index = _simple_hash(hash_start, hash_size); 1569 tx_slave = tlb_choose_channel(bond, hash_index, skb->len); 1570 } 1571 1572 return bond_do_alb_xmit(skb, bond, tx_slave); 1573 } 1574 1575 void bond_alb_monitor(struct work_struct *work) 1576 { 1577 struct bonding *bond = container_of(work, struct bonding, 1578 alb_work.work); 1579 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1580 struct list_head *iter; 1581 struct slave *slave; 1582 1583 if (!bond_has_slaves(bond)) { 1584 bond_info->tx_rebalance_counter = 0; 1585 bond_info->lp_counter = 0; 1586 goto re_arm; 1587 } 1588 1589 rcu_read_lock(); 1590 1591 bond_info->tx_rebalance_counter++; 1592 bond_info->lp_counter++; 1593 1594 /* send learning packets */ 1595 if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { 1596 bool strict_match; 1597 1598 /* change of curr_active_slave involves swapping of mac addresses. 1599 * in order to avoid this swapping from happening while 1600 * sending the learning packets, the curr_slave_lock must be held for 1601 * read. 1602 */ 1603 read_lock(&bond->curr_slave_lock); 1604 1605 bond_for_each_slave_rcu(bond, slave, iter) { 1606 /* If updating current_active, use all currently 1607 * user mac addreses (!strict_match). Otherwise, only 1608 * use mac of the slave device. 1609 * In RLB mode, we always use strict matches. 1610 */ 1611 strict_match = (slave != rcu_access_pointer(bond->curr_active_slave) || 1612 bond_info->rlb_enabled); 1613 alb_send_learning_packets(slave, slave->dev->dev_addr, 1614 strict_match); 1615 } 1616 1617 read_unlock(&bond->curr_slave_lock); 1618 1619 bond_info->lp_counter = 0; 1620 } 1621 1622 /* rebalance tx traffic */ 1623 if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { 1624 1625 read_lock(&bond->curr_slave_lock); 1626 1627 bond_for_each_slave_rcu(bond, slave, iter) { 1628 tlb_clear_slave(bond, slave, 1); 1629 if (slave == rcu_access_pointer(bond->curr_active_slave)) { 1630 SLAVE_TLB_INFO(slave).load = 1631 bond_info->unbalanced_load / 1632 BOND_TLB_REBALANCE_INTERVAL; 1633 bond_info->unbalanced_load = 0; 1634 } 1635 } 1636 1637 read_unlock(&bond->curr_slave_lock); 1638 1639 bond_info->tx_rebalance_counter = 0; 1640 } 1641 1642 /* handle rlb stuff */ 1643 if (bond_info->rlb_enabled) { 1644 if (bond_info->primary_is_promisc && 1645 (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { 1646 1647 /* 1648 * dev_set_promiscuity requires rtnl and 1649 * nothing else. Avoid race with bond_close. 1650 */ 1651 rcu_read_unlock(); 1652 if (!rtnl_trylock()) 1653 goto re_arm; 1654 1655 bond_info->rlb_promisc_timeout_counter = 0; 1656 1657 /* If the primary was set to promiscuous mode 1658 * because a slave was disabled then 1659 * it can now leave promiscuous mode. 1660 */ 1661 dev_set_promiscuity(rtnl_dereference(bond->curr_active_slave)->dev, 1662 -1); 1663 bond_info->primary_is_promisc = 0; 1664 1665 rtnl_unlock(); 1666 rcu_read_lock(); 1667 } 1668 1669 if (bond_info->rlb_rebalance) { 1670 bond_info->rlb_rebalance = 0; 1671 rlb_rebalance(bond); 1672 } 1673 1674 /* check if clients need updating */ 1675 if (bond_info->rx_ntt) { 1676 if (bond_info->rlb_update_delay_counter) { 1677 --bond_info->rlb_update_delay_counter; 1678 } else { 1679 rlb_update_rx_clients(bond); 1680 if (bond_info->rlb_update_retry_counter) 1681 --bond_info->rlb_update_retry_counter; 1682 else 1683 bond_info->rx_ntt = 0; 1684 } 1685 } 1686 } 1687 rcu_read_unlock(); 1688 re_arm: 1689 queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); 1690 } 1691 1692 /* assumption: called before the slave is attached to the bond 1693 * and not locked by the bond lock 1694 */ 1695 int bond_alb_init_slave(struct bonding *bond, struct slave *slave) 1696 { 1697 int res; 1698 1699 res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr); 1700 if (res) 1701 return res; 1702 1703 res = alb_handle_addr_collision_on_attach(bond, slave); 1704 if (res) 1705 return res; 1706 1707 tlb_init_slave(slave); 1708 1709 /* order a rebalance ASAP */ 1710 bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; 1711 1712 if (bond->alb_info.rlb_enabled) 1713 bond->alb_info.rlb_rebalance = 1; 1714 1715 return 0; 1716 } 1717 1718 /* 1719 * Remove slave from tlb and rlb hash tables, and fix up MAC addresses 1720 * if necessary. 1721 * 1722 * Caller must hold RTNL and no other locks 1723 */ 1724 void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) 1725 { 1726 if (bond_has_slaves(bond)) 1727 alb_change_hw_addr_on_detach(bond, slave); 1728 1729 tlb_clear_slave(bond, slave, 0); 1730 1731 if (bond->alb_info.rlb_enabled) { 1732 bond->alb_info.rx_slave = NULL; 1733 rlb_clear_slave(bond, slave); 1734 } 1735 1736 if (bond_is_nondyn_tlb(bond)) 1737 if (bond_tlb_update_slave_arr(bond, slave)) 1738 pr_err("Failed to build slave-array for TLB mode.\n"); 1739 1740 } 1741 1742 /* Caller must hold bond lock for read */ 1743 void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) 1744 { 1745 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1746 1747 if (link == BOND_LINK_DOWN) { 1748 tlb_clear_slave(bond, slave, 0); 1749 if (bond->alb_info.rlb_enabled) 1750 rlb_clear_slave(bond, slave); 1751 } else if (link == BOND_LINK_UP) { 1752 /* order a rebalance ASAP */ 1753 bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; 1754 if (bond->alb_info.rlb_enabled) { 1755 bond->alb_info.rlb_rebalance = 1; 1756 /* If the updelay module parameter is smaller than the 1757 * forwarding delay of the switch the rebalance will 1758 * not work because the rebalance arp replies will 1759 * not be forwarded to the clients.. 1760 */ 1761 } 1762 } 1763 1764 if (bond_is_nondyn_tlb(bond)) { 1765 if (bond_tlb_update_slave_arr(bond, NULL)) 1766 pr_err("Failed to build slave-array for TLB mode.\n"); 1767 } 1768 } 1769 1770 /** 1771 * bond_alb_handle_active_change - assign new curr_active_slave 1772 * @bond: our bonding struct 1773 * @new_slave: new slave to assign 1774 * 1775 * Set the bond->curr_active_slave to @new_slave and handle 1776 * mac address swapping and promiscuity changes as needed. 1777 * 1778 * If new_slave is NULL, caller must hold curr_slave_lock or 1779 * bond->lock for write. 1780 * 1781 * If new_slave is not NULL, caller must hold RTNL, curr_slave_lock 1782 * for write. Processing here may sleep, so no other locks may be held. 1783 */ 1784 void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) 1785 __releases(&bond->curr_slave_lock) 1786 __acquires(&bond->curr_slave_lock) 1787 { 1788 struct slave *swap_slave; 1789 struct slave *curr_active; 1790 1791 curr_active = rcu_dereference_protected(bond->curr_active_slave, 1792 !new_slave || 1793 lockdep_is_held(&bond->curr_slave_lock)); 1794 if (curr_active == new_slave) 1795 return; 1796 1797 if (curr_active && bond->alb_info.primary_is_promisc) { 1798 dev_set_promiscuity(curr_active->dev, -1); 1799 bond->alb_info.primary_is_promisc = 0; 1800 bond->alb_info.rlb_promisc_timeout_counter = 0; 1801 } 1802 1803 swap_slave = curr_active; 1804 rcu_assign_pointer(bond->curr_active_slave, new_slave); 1805 1806 if (!new_slave || !bond_has_slaves(bond)) 1807 return; 1808 1809 /* set the new curr_active_slave to the bonds mac address 1810 * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave 1811 */ 1812 if (!swap_slave) 1813 swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr); 1814 1815 /* 1816 * Arrange for swap_slave and new_slave to temporarily be 1817 * ignored so we can mess with their MAC addresses without 1818 * fear of interference from transmit activity. 1819 */ 1820 if (swap_slave) 1821 tlb_clear_slave(bond, swap_slave, 1); 1822 tlb_clear_slave(bond, new_slave, 1); 1823 1824 write_unlock_bh(&bond->curr_slave_lock); 1825 1826 ASSERT_RTNL(); 1827 1828 /* in TLB mode, the slave might flip down/up with the old dev_addr, 1829 * and thus filter bond->dev_addr's packets, so force bond's mac 1830 */ 1831 if (BOND_MODE(bond) == BOND_MODE_TLB) { 1832 struct sockaddr sa; 1833 u8 tmp_addr[ETH_ALEN]; 1834 1835 ether_addr_copy(tmp_addr, new_slave->dev->dev_addr); 1836 1837 memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); 1838 sa.sa_family = bond->dev->type; 1839 /* we don't care if it can't change its mac, best effort */ 1840 dev_set_mac_address(new_slave->dev, &sa); 1841 1842 ether_addr_copy(new_slave->dev->dev_addr, tmp_addr); 1843 } 1844 1845 /* curr_active_slave must be set before calling alb_swap_mac_addr */ 1846 if (swap_slave) { 1847 /* swap mac address */ 1848 alb_swap_mac_addr(swap_slave, new_slave); 1849 alb_fasten_mac_swap(bond, swap_slave, new_slave); 1850 } else { 1851 /* set the new_slave to the bond mac address */ 1852 alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr); 1853 alb_send_learning_packets(new_slave, bond->dev->dev_addr, 1854 false); 1855 } 1856 1857 write_lock_bh(&bond->curr_slave_lock); 1858 } 1859 1860 /* 1861 * Called with RTNL 1862 */ 1863 int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) 1864 __acquires(&bond->lock) 1865 __releases(&bond->lock) 1866 { 1867 struct bonding *bond = netdev_priv(bond_dev); 1868 struct sockaddr *sa = addr; 1869 struct slave *curr_active; 1870 struct slave *swap_slave; 1871 int res; 1872 1873 if (!is_valid_ether_addr(sa->sa_data)) 1874 return -EADDRNOTAVAIL; 1875 1876 res = alb_set_mac_address(bond, addr); 1877 if (res) 1878 return res; 1879 1880 memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); 1881 1882 /* If there is no curr_active_slave there is nothing else to do. 1883 * Otherwise we'll need to pass the new address to it and handle 1884 * duplications. 1885 */ 1886 curr_active = rtnl_dereference(bond->curr_active_slave); 1887 if (!curr_active) 1888 return 0; 1889 1890 swap_slave = bond_slave_has_mac(bond, bond_dev->dev_addr); 1891 1892 if (swap_slave) { 1893 alb_swap_mac_addr(swap_slave, curr_active); 1894 alb_fasten_mac_swap(bond, swap_slave, curr_active); 1895 } else { 1896 alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr); 1897 1898 read_lock(&bond->lock); 1899 alb_send_learning_packets(curr_active, 1900 bond_dev->dev_addr, false); 1901 if (bond->alb_info.rlb_enabled) { 1902 /* inform clients mac address has changed */ 1903 rlb_req_update_slave_clients(bond, curr_active); 1904 } 1905 read_unlock(&bond->lock); 1906 } 1907 1908 return 0; 1909 } 1910 1911 void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id) 1912 { 1913 if (bond->alb_info.rlb_enabled) 1914 rlb_clear_vlan(bond, vlan_id); 1915 } 1916 1917