1 /* 2 * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License as published by the 6 * Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * for more details. 13 * 14 * You should have received a copy of the GNU General Public License along 15 * with this program; if not, see <http://www.gnu.org/licenses/>. 16 * 17 * The full GNU General Public License is included in this distribution in the 18 * file called LICENSE. 19 * 20 */ 21 22 #include <linux/skbuff.h> 23 #include <linux/netdevice.h> 24 #include <linux/etherdevice.h> 25 #include <linux/pkt_sched.h> 26 #include <linux/spinlock.h> 27 #include <linux/slab.h> 28 #include <linux/timer.h> 29 #include <linux/ip.h> 30 #include <linux/ipv6.h> 31 #include <linux/if_arp.h> 32 #include <linux/if_ether.h> 33 #include <linux/if_bonding.h> 34 #include <linux/if_vlan.h> 35 #include <linux/in.h> 36 #include <net/ipx.h> 37 #include <net/arp.h> 38 #include <net/ipv6.h> 39 #include <asm/byteorder.h> 40 #include <net/bonding.h> 41 #include <net/bond_alb.h> 42 43 static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = { 44 0x33, 0x33, 0x00, 0x00, 0x00, 0x01 45 }; 46 static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; 47 48 #pragma pack(1) 49 struct learning_pkt { 50 u8 mac_dst[ETH_ALEN]; 51 u8 mac_src[ETH_ALEN]; 52 __be16 type; 53 u8 padding[ETH_ZLEN - ETH_HLEN]; 54 }; 55 56 struct arp_pkt { 57 __be16 hw_addr_space; 58 __be16 prot_addr_space; 59 u8 hw_addr_len; 60 u8 prot_addr_len; 61 __be16 op_code; 62 u8 mac_src[ETH_ALEN]; /* sender hardware address */ 63 __be32 ip_src; /* sender IP address */ 64 u8 mac_dst[ETH_ALEN]; /* target hardware address */ 65 __be32 ip_dst; /* target IP address */ 66 }; 67 #pragma pack() 68 69 static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) 70 { 71 return (struct arp_pkt *)skb_network_header(skb); 72 } 73 74 /* Forward declaration */ 75 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], 76 bool strict_match); 77 static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp); 78 static void rlb_src_unlink(struct bonding *bond, u32 index); 79 static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, 80 u32 ip_dst_hash); 81 82 static inline u8 _simple_hash(const u8 *hash_start, int hash_size) 83 { 84 int i; 85 u8 hash = 0; 86 87 for (i = 0; i < hash_size; i++) 88 hash ^= hash_start[i]; 89 90 return hash; 91 } 92 93 /*********************** tlb specific functions ***************************/ 94 95 static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) 96 { 97 if (save_load) { 98 entry->load_history = 1 + entry->tx_bytes / 99 BOND_TLB_REBALANCE_INTERVAL; 100 entry->tx_bytes = 0; 101 } 102 103 entry->tx_slave = NULL; 104 entry->next = TLB_NULL_INDEX; 105 entry->prev = TLB_NULL_INDEX; 106 } 107 108 static inline void tlb_init_slave(struct slave *slave) 109 { 110 SLAVE_TLB_INFO(slave).load = 0; 111 SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; 112 } 113 114 static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, 115 int save_load) 116 { 117 struct tlb_client_info *tx_hash_table; 118 u32 index; 119 120 /* clear slave from tx_hashtbl */ 121 tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; 122 123 /* skip this if we've already freed the tx hash table */ 124 if (tx_hash_table) { 125 index = SLAVE_TLB_INFO(slave).head; 126 while (index != TLB_NULL_INDEX) { 127 u32 next_index = tx_hash_table[index].next; 128 tlb_init_table_entry(&tx_hash_table[index], save_load); 129 index = next_index; 130 } 131 } 132 133 tlb_init_slave(slave); 134 } 135 136 static void tlb_clear_slave(struct bonding *bond, struct slave *slave, 137 int save_load) 138 { 139 spin_lock_bh(&bond->mode_lock); 140 __tlb_clear_slave(bond, slave, save_load); 141 spin_unlock_bh(&bond->mode_lock); 142 } 143 144 /* Must be called before starting the monitor timer */ 145 static int tlb_initialize(struct bonding *bond) 146 { 147 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 148 int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); 149 struct tlb_client_info *new_hashtbl; 150 int i; 151 152 new_hashtbl = kzalloc(size, GFP_KERNEL); 153 if (!new_hashtbl) 154 return -ENOMEM; 155 156 spin_lock_bh(&bond->mode_lock); 157 158 bond_info->tx_hashtbl = new_hashtbl; 159 160 for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) 161 tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); 162 163 spin_unlock_bh(&bond->mode_lock); 164 165 return 0; 166 } 167 168 /* Must be called only after all slaves have been released */ 169 static void tlb_deinitialize(struct bonding *bond) 170 { 171 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 172 173 spin_lock_bh(&bond->mode_lock); 174 175 kfree(bond_info->tx_hashtbl); 176 bond_info->tx_hashtbl = NULL; 177 178 spin_unlock_bh(&bond->mode_lock); 179 } 180 181 static long long compute_gap(struct slave *slave) 182 { 183 return (s64) (slave->speed << 20) - /* Convert to Megabit per sec */ 184 (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ 185 } 186 187 static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) 188 { 189 struct slave *slave, *least_loaded; 190 struct list_head *iter; 191 long long max_gap; 192 193 least_loaded = NULL; 194 max_gap = LLONG_MIN; 195 196 /* Find the slave with the largest gap */ 197 bond_for_each_slave_rcu(bond, slave, iter) { 198 if (bond_slave_can_tx(slave)) { 199 long long gap = compute_gap(slave); 200 201 if (max_gap < gap) { 202 least_loaded = slave; 203 max_gap = gap; 204 } 205 } 206 } 207 208 return least_loaded; 209 } 210 211 static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index, 212 u32 skb_len) 213 { 214 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 215 struct tlb_client_info *hash_table; 216 struct slave *assigned_slave; 217 218 hash_table = bond_info->tx_hashtbl; 219 assigned_slave = hash_table[hash_index].tx_slave; 220 if (!assigned_slave) { 221 assigned_slave = tlb_get_least_loaded_slave(bond); 222 223 if (assigned_slave) { 224 struct tlb_slave_info *slave_info = 225 &(SLAVE_TLB_INFO(assigned_slave)); 226 u32 next_index = slave_info->head; 227 228 hash_table[hash_index].tx_slave = assigned_slave; 229 hash_table[hash_index].next = next_index; 230 hash_table[hash_index].prev = TLB_NULL_INDEX; 231 232 if (next_index != TLB_NULL_INDEX) 233 hash_table[next_index].prev = hash_index; 234 235 slave_info->head = hash_index; 236 slave_info->load += 237 hash_table[hash_index].load_history; 238 } 239 } 240 241 if (assigned_slave) 242 hash_table[hash_index].tx_bytes += skb_len; 243 244 return assigned_slave; 245 } 246 247 static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, 248 u32 skb_len) 249 { 250 struct slave *tx_slave; 251 252 /* We don't need to disable softirq here, becase 253 * tlb_choose_channel() is only called by bond_alb_xmit() 254 * which already has softirq disabled. 255 */ 256 spin_lock(&bond->mode_lock); 257 tx_slave = __tlb_choose_channel(bond, hash_index, skb_len); 258 spin_unlock(&bond->mode_lock); 259 260 return tx_slave; 261 } 262 263 /*********************** rlb specific functions ***************************/ 264 265 /* when an ARP REPLY is received from a client update its info 266 * in the rx_hashtbl 267 */ 268 static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) 269 { 270 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 271 struct rlb_client_info *client_info; 272 u32 hash_index; 273 274 spin_lock_bh(&bond->mode_lock); 275 276 hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); 277 client_info = &(bond_info->rx_hashtbl[hash_index]); 278 279 if ((client_info->assigned) && 280 (client_info->ip_src == arp->ip_dst) && 281 (client_info->ip_dst == arp->ip_src) && 282 (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) { 283 /* update the clients MAC address */ 284 ether_addr_copy(client_info->mac_dst, arp->mac_src); 285 client_info->ntt = 1; 286 bond_info->rx_ntt = 1; 287 } 288 289 spin_unlock_bh(&bond->mode_lock); 290 } 291 292 static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, 293 struct slave *slave) 294 { 295 struct arp_pkt *arp, _arp; 296 297 if (skb->protocol != cpu_to_be16(ETH_P_ARP)) 298 goto out; 299 300 arp = skb_header_pointer(skb, 0, sizeof(_arp), &_arp); 301 if (!arp) 302 goto out; 303 304 /* We received an ARP from arp->ip_src. 305 * We might have used this IP address previously (on the bonding host 306 * itself or on a system that is bridged together with the bond). 307 * However, if arp->mac_src is different than what is stored in 308 * rx_hashtbl, some other host is now using the IP and we must prevent 309 * sending out client updates with this IP address and the old MAC 310 * address. 311 * Clean up all hash table entries that have this address as ip_src but 312 * have a different mac_src. 313 */ 314 rlb_purge_src_ip(bond, arp); 315 316 if (arp->op_code == htons(ARPOP_REPLY)) { 317 /* update rx hash table for this ARP */ 318 rlb_update_entry_from_arp(bond, arp); 319 netdev_dbg(bond->dev, "Server received an ARP Reply from client\n"); 320 } 321 out: 322 return RX_HANDLER_ANOTHER; 323 } 324 325 /* Caller must hold rcu_read_lock() */ 326 static struct slave *__rlb_next_rx_slave(struct bonding *bond) 327 { 328 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 329 struct slave *before = NULL, *rx_slave = NULL, *slave; 330 struct list_head *iter; 331 bool found = false; 332 333 bond_for_each_slave_rcu(bond, slave, iter) { 334 if (!bond_slave_can_tx(slave)) 335 continue; 336 if (!found) { 337 if (!before || before->speed < slave->speed) 338 before = slave; 339 } else { 340 if (!rx_slave || rx_slave->speed < slave->speed) 341 rx_slave = slave; 342 } 343 if (slave == bond_info->rx_slave) 344 found = true; 345 } 346 /* we didn't find anything after the current or we have something 347 * better before and up to the current slave 348 */ 349 if (!rx_slave || (before && rx_slave->speed < before->speed)) 350 rx_slave = before; 351 352 if (rx_slave) 353 bond_info->rx_slave = rx_slave; 354 355 return rx_slave; 356 } 357 358 /* Caller must hold RTNL, rcu_read_lock is obtained only to silence checkers */ 359 static struct slave *rlb_next_rx_slave(struct bonding *bond) 360 { 361 struct slave *rx_slave; 362 363 ASSERT_RTNL(); 364 365 rcu_read_lock(); 366 rx_slave = __rlb_next_rx_slave(bond); 367 rcu_read_unlock(); 368 369 return rx_slave; 370 } 371 372 /* teach the switch the mac of a disabled slave 373 * on the primary for fault tolerance 374 * 375 * Caller must hold RTNL 376 */ 377 static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) 378 { 379 struct slave *curr_active = rtnl_dereference(bond->curr_active_slave); 380 381 if (!curr_active) 382 return; 383 384 if (!bond->alb_info.primary_is_promisc) { 385 if (!dev_set_promiscuity(curr_active->dev, 1)) 386 bond->alb_info.primary_is_promisc = 1; 387 else 388 bond->alb_info.primary_is_promisc = 0; 389 } 390 391 bond->alb_info.rlb_promisc_timeout_counter = 0; 392 393 alb_send_learning_packets(curr_active, addr, true); 394 } 395 396 /* slave being removed should not be active at this point 397 * 398 * Caller must hold rtnl. 399 */ 400 static void rlb_clear_slave(struct bonding *bond, struct slave *slave) 401 { 402 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 403 struct rlb_client_info *rx_hash_table; 404 u32 index, next_index; 405 406 /* clear slave from rx_hashtbl */ 407 spin_lock_bh(&bond->mode_lock); 408 409 rx_hash_table = bond_info->rx_hashtbl; 410 index = bond_info->rx_hashtbl_used_head; 411 for (; index != RLB_NULL_INDEX; index = next_index) { 412 next_index = rx_hash_table[index].used_next; 413 if (rx_hash_table[index].slave == slave) { 414 struct slave *assigned_slave = rlb_next_rx_slave(bond); 415 416 if (assigned_slave) { 417 rx_hash_table[index].slave = assigned_slave; 418 if (is_valid_ether_addr(rx_hash_table[index].mac_dst)) { 419 bond_info->rx_hashtbl[index].ntt = 1; 420 bond_info->rx_ntt = 1; 421 /* A slave has been removed from the 422 * table because it is either disabled 423 * or being released. We must retry the 424 * update to avoid clients from not 425 * being updated & disconnecting when 426 * there is stress 427 */ 428 bond_info->rlb_update_retry_counter = 429 RLB_UPDATE_RETRY; 430 } 431 } else { /* there is no active slave */ 432 rx_hash_table[index].slave = NULL; 433 } 434 } 435 } 436 437 spin_unlock_bh(&bond->mode_lock); 438 439 if (slave != rtnl_dereference(bond->curr_active_slave)) 440 rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); 441 } 442 443 static void rlb_update_client(struct rlb_client_info *client_info) 444 { 445 int i; 446 447 if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst)) 448 return; 449 450 for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { 451 struct sk_buff *skb; 452 453 skb = arp_create(ARPOP_REPLY, ETH_P_ARP, 454 client_info->ip_dst, 455 client_info->slave->dev, 456 client_info->ip_src, 457 client_info->mac_dst, 458 client_info->slave->dev->dev_addr, 459 client_info->mac_dst); 460 if (!skb) { 461 netdev_err(client_info->slave->bond->dev, 462 "failed to create an ARP packet\n"); 463 continue; 464 } 465 466 skb->dev = client_info->slave->dev; 467 468 if (client_info->vlan_id) { 469 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 470 client_info->vlan_id); 471 } 472 473 arp_xmit(skb); 474 } 475 } 476 477 /* sends ARP REPLIES that update the clients that need updating */ 478 static void rlb_update_rx_clients(struct bonding *bond) 479 { 480 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 481 struct rlb_client_info *client_info; 482 u32 hash_index; 483 484 spin_lock_bh(&bond->mode_lock); 485 486 hash_index = bond_info->rx_hashtbl_used_head; 487 for (; hash_index != RLB_NULL_INDEX; 488 hash_index = client_info->used_next) { 489 client_info = &(bond_info->rx_hashtbl[hash_index]); 490 if (client_info->ntt) { 491 rlb_update_client(client_info); 492 if (bond_info->rlb_update_retry_counter == 0) 493 client_info->ntt = 0; 494 } 495 } 496 497 /* do not update the entries again until this counter is zero so that 498 * not to confuse the clients. 499 */ 500 bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; 501 502 spin_unlock_bh(&bond->mode_lock); 503 } 504 505 /* The slave was assigned a new mac address - update the clients */ 506 static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave) 507 { 508 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 509 struct rlb_client_info *client_info; 510 int ntt = 0; 511 u32 hash_index; 512 513 spin_lock_bh(&bond->mode_lock); 514 515 hash_index = bond_info->rx_hashtbl_used_head; 516 for (; hash_index != RLB_NULL_INDEX; 517 hash_index = client_info->used_next) { 518 client_info = &(bond_info->rx_hashtbl[hash_index]); 519 520 if ((client_info->slave == slave) && 521 is_valid_ether_addr(client_info->mac_dst)) { 522 client_info->ntt = 1; 523 ntt = 1; 524 } 525 } 526 527 /* update the team's flag only after the whole iteration */ 528 if (ntt) { 529 bond_info->rx_ntt = 1; 530 /* fasten the change */ 531 bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; 532 } 533 534 spin_unlock_bh(&bond->mode_lock); 535 } 536 537 /* mark all clients using src_ip to be updated */ 538 static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) 539 { 540 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 541 struct rlb_client_info *client_info; 542 u32 hash_index; 543 544 spin_lock(&bond->mode_lock); 545 546 hash_index = bond_info->rx_hashtbl_used_head; 547 for (; hash_index != RLB_NULL_INDEX; 548 hash_index = client_info->used_next) { 549 client_info = &(bond_info->rx_hashtbl[hash_index]); 550 551 if (!client_info->slave) { 552 netdev_err(bond->dev, "found a client with no channel in the client's hash table\n"); 553 continue; 554 } 555 /* update all clients using this src_ip, that are not assigned 556 * to the team's address (curr_active_slave) and have a known 557 * unicast mac address. 558 */ 559 if ((client_info->ip_src == src_ip) && 560 !ether_addr_equal_64bits(client_info->slave->dev->dev_addr, 561 bond->dev->dev_addr) && 562 is_valid_ether_addr(client_info->mac_dst)) { 563 client_info->ntt = 1; 564 bond_info->rx_ntt = 1; 565 } 566 } 567 568 spin_unlock(&bond->mode_lock); 569 } 570 571 static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) 572 { 573 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 574 struct arp_pkt *arp = arp_pkt(skb); 575 struct slave *assigned_slave, *curr_active_slave; 576 struct rlb_client_info *client_info; 577 u32 hash_index = 0; 578 579 spin_lock(&bond->mode_lock); 580 581 curr_active_slave = rcu_dereference(bond->curr_active_slave); 582 583 hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst)); 584 client_info = &(bond_info->rx_hashtbl[hash_index]); 585 586 if (client_info->assigned) { 587 if ((client_info->ip_src == arp->ip_src) && 588 (client_info->ip_dst == arp->ip_dst)) { 589 /* the entry is already assigned to this client */ 590 if (!is_broadcast_ether_addr(arp->mac_dst)) { 591 /* update mac address from arp */ 592 ether_addr_copy(client_info->mac_dst, arp->mac_dst); 593 } 594 ether_addr_copy(client_info->mac_src, arp->mac_src); 595 596 assigned_slave = client_info->slave; 597 if (assigned_slave) { 598 spin_unlock(&bond->mode_lock); 599 return assigned_slave; 600 } 601 } else { 602 /* the entry is already assigned to some other client, 603 * move the old client to primary (curr_active_slave) so 604 * that the new client can be assigned to this entry. 605 */ 606 if (curr_active_slave && 607 client_info->slave != curr_active_slave) { 608 client_info->slave = curr_active_slave; 609 rlb_update_client(client_info); 610 } 611 } 612 } 613 /* assign a new slave */ 614 assigned_slave = __rlb_next_rx_slave(bond); 615 616 if (assigned_slave) { 617 if (!(client_info->assigned && 618 client_info->ip_src == arp->ip_src)) { 619 /* ip_src is going to be updated, 620 * fix the src hash list 621 */ 622 u32 hash_src = _simple_hash((u8 *)&arp->ip_src, 623 sizeof(arp->ip_src)); 624 rlb_src_unlink(bond, hash_index); 625 rlb_src_link(bond, hash_src, hash_index); 626 } 627 628 client_info->ip_src = arp->ip_src; 629 client_info->ip_dst = arp->ip_dst; 630 /* arp->mac_dst is broadcast for arp reqeusts. 631 * will be updated with clients actual unicast mac address 632 * upon receiving an arp reply. 633 */ 634 ether_addr_copy(client_info->mac_dst, arp->mac_dst); 635 ether_addr_copy(client_info->mac_src, arp->mac_src); 636 client_info->slave = assigned_slave; 637 638 if (is_valid_ether_addr(client_info->mac_dst)) { 639 client_info->ntt = 1; 640 bond->alb_info.rx_ntt = 1; 641 } else { 642 client_info->ntt = 0; 643 } 644 645 if (vlan_get_tag(skb, &client_info->vlan_id)) 646 client_info->vlan_id = 0; 647 648 if (!client_info->assigned) { 649 u32 prev_tbl_head = bond_info->rx_hashtbl_used_head; 650 bond_info->rx_hashtbl_used_head = hash_index; 651 client_info->used_next = prev_tbl_head; 652 if (prev_tbl_head != RLB_NULL_INDEX) { 653 bond_info->rx_hashtbl[prev_tbl_head].used_prev = 654 hash_index; 655 } 656 client_info->assigned = 1; 657 } 658 } 659 660 spin_unlock(&bond->mode_lock); 661 662 return assigned_slave; 663 } 664 665 /* chooses (and returns) transmit channel for arp reply 666 * does not choose channel for other arp types since they are 667 * sent on the curr_active_slave 668 */ 669 static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) 670 { 671 struct arp_pkt *arp = arp_pkt(skb); 672 struct slave *tx_slave = NULL; 673 674 /* Don't modify or load balance ARPs that do not originate locally 675 * (e.g.,arrive via a bridge). 676 */ 677 if (!bond_slave_has_mac_rx(bond, arp->mac_src)) 678 return NULL; 679 680 if (arp->op_code == htons(ARPOP_REPLY)) { 681 /* the arp must be sent on the selected rx channel */ 682 tx_slave = rlb_choose_channel(skb, bond); 683 if (tx_slave) 684 bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, 685 tx_slave->dev->addr_len); 686 netdev_dbg(bond->dev, "Server sent ARP Reply packet\n"); 687 } else if (arp->op_code == htons(ARPOP_REQUEST)) { 688 /* Create an entry in the rx_hashtbl for this client as a 689 * place holder. 690 * When the arp reply is received the entry will be updated 691 * with the correct unicast address of the client. 692 */ 693 rlb_choose_channel(skb, bond); 694 695 /* The ARP reply packets must be delayed so that 696 * they can cancel out the influence of the ARP request. 697 */ 698 bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; 699 700 /* arp requests are broadcast and are sent on the primary 701 * the arp request will collapse all clients on the subnet to 702 * the primary slave. We must register these clients to be 703 * updated with their assigned mac. 704 */ 705 rlb_req_update_subnet_clients(bond, arp->ip_src); 706 netdev_dbg(bond->dev, "Server sent ARP Request packet\n"); 707 } 708 709 return tx_slave; 710 } 711 712 static void rlb_rebalance(struct bonding *bond) 713 { 714 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 715 struct slave *assigned_slave; 716 struct rlb_client_info *client_info; 717 int ntt; 718 u32 hash_index; 719 720 spin_lock_bh(&bond->mode_lock); 721 722 ntt = 0; 723 hash_index = bond_info->rx_hashtbl_used_head; 724 for (; hash_index != RLB_NULL_INDEX; 725 hash_index = client_info->used_next) { 726 client_info = &(bond_info->rx_hashtbl[hash_index]); 727 assigned_slave = __rlb_next_rx_slave(bond); 728 if (assigned_slave && (client_info->slave != assigned_slave)) { 729 client_info->slave = assigned_slave; 730 if (!is_zero_ether_addr(client_info->mac_dst)) { 731 client_info->ntt = 1; 732 ntt = 1; 733 } 734 } 735 } 736 737 /* update the team's flag only after the whole iteration */ 738 if (ntt) 739 bond_info->rx_ntt = 1; 740 spin_unlock_bh(&bond->mode_lock); 741 } 742 743 /* Caller must hold mode_lock */ 744 static void rlb_init_table_entry_dst(struct rlb_client_info *entry) 745 { 746 entry->used_next = RLB_NULL_INDEX; 747 entry->used_prev = RLB_NULL_INDEX; 748 entry->assigned = 0; 749 entry->slave = NULL; 750 entry->vlan_id = 0; 751 } 752 static void rlb_init_table_entry_src(struct rlb_client_info *entry) 753 { 754 entry->src_first = RLB_NULL_INDEX; 755 entry->src_prev = RLB_NULL_INDEX; 756 entry->src_next = RLB_NULL_INDEX; 757 } 758 759 static void rlb_init_table_entry(struct rlb_client_info *entry) 760 { 761 memset(entry, 0, sizeof(struct rlb_client_info)); 762 rlb_init_table_entry_dst(entry); 763 rlb_init_table_entry_src(entry); 764 } 765 766 static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index) 767 { 768 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 769 u32 next_index = bond_info->rx_hashtbl[index].used_next; 770 u32 prev_index = bond_info->rx_hashtbl[index].used_prev; 771 772 if (index == bond_info->rx_hashtbl_used_head) 773 bond_info->rx_hashtbl_used_head = next_index; 774 if (prev_index != RLB_NULL_INDEX) 775 bond_info->rx_hashtbl[prev_index].used_next = next_index; 776 if (next_index != RLB_NULL_INDEX) 777 bond_info->rx_hashtbl[next_index].used_prev = prev_index; 778 } 779 780 /* unlink a rlb hash table entry from the src list */ 781 static void rlb_src_unlink(struct bonding *bond, u32 index) 782 { 783 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 784 u32 next_index = bond_info->rx_hashtbl[index].src_next; 785 u32 prev_index = bond_info->rx_hashtbl[index].src_prev; 786 787 bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX; 788 bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX; 789 790 if (next_index != RLB_NULL_INDEX) 791 bond_info->rx_hashtbl[next_index].src_prev = prev_index; 792 793 if (prev_index == RLB_NULL_INDEX) 794 return; 795 796 /* is prev_index pointing to the head of this list? */ 797 if (bond_info->rx_hashtbl[prev_index].src_first == index) 798 bond_info->rx_hashtbl[prev_index].src_first = next_index; 799 else 800 bond_info->rx_hashtbl[prev_index].src_next = next_index; 801 802 } 803 804 static void rlb_delete_table_entry(struct bonding *bond, u32 index) 805 { 806 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 807 struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); 808 809 rlb_delete_table_entry_dst(bond, index); 810 rlb_init_table_entry_dst(entry); 811 812 rlb_src_unlink(bond, index); 813 } 814 815 /* add the rx_hashtbl[ip_dst_hash] entry to the list 816 * of entries with identical ip_src_hash 817 */ 818 static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash) 819 { 820 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 821 u32 next; 822 823 bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash; 824 next = bond_info->rx_hashtbl[ip_src_hash].src_first; 825 bond_info->rx_hashtbl[ip_dst_hash].src_next = next; 826 if (next != RLB_NULL_INDEX) 827 bond_info->rx_hashtbl[next].src_prev = ip_dst_hash; 828 bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash; 829 } 830 831 /* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does 832 * not match arp->mac_src 833 */ 834 static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) 835 { 836 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 837 u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); 838 u32 index; 839 840 spin_lock_bh(&bond->mode_lock); 841 842 index = bond_info->rx_hashtbl[ip_src_hash].src_first; 843 while (index != RLB_NULL_INDEX) { 844 struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); 845 u32 next_index = entry->src_next; 846 if (entry->ip_src == arp->ip_src && 847 !ether_addr_equal_64bits(arp->mac_src, entry->mac_src)) 848 rlb_delete_table_entry(bond, index); 849 index = next_index; 850 } 851 spin_unlock_bh(&bond->mode_lock); 852 } 853 854 static int rlb_initialize(struct bonding *bond) 855 { 856 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 857 struct rlb_client_info *new_hashtbl; 858 int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); 859 int i; 860 861 new_hashtbl = kmalloc(size, GFP_KERNEL); 862 if (!new_hashtbl) 863 return -1; 864 865 spin_lock_bh(&bond->mode_lock); 866 867 bond_info->rx_hashtbl = new_hashtbl; 868 869 bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; 870 871 for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) 872 rlb_init_table_entry(bond_info->rx_hashtbl + i); 873 874 spin_unlock_bh(&bond->mode_lock); 875 876 /* register to receive ARPs */ 877 bond->recv_probe = rlb_arp_recv; 878 879 return 0; 880 } 881 882 static void rlb_deinitialize(struct bonding *bond) 883 { 884 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 885 886 spin_lock_bh(&bond->mode_lock); 887 888 kfree(bond_info->rx_hashtbl); 889 bond_info->rx_hashtbl = NULL; 890 bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; 891 892 spin_unlock_bh(&bond->mode_lock); 893 } 894 895 static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) 896 { 897 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 898 u32 curr_index; 899 900 spin_lock_bh(&bond->mode_lock); 901 902 curr_index = bond_info->rx_hashtbl_used_head; 903 while (curr_index != RLB_NULL_INDEX) { 904 struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); 905 u32 next_index = bond_info->rx_hashtbl[curr_index].used_next; 906 907 if (curr->vlan_id == vlan_id) 908 rlb_delete_table_entry(bond, curr_index); 909 910 curr_index = next_index; 911 } 912 913 spin_unlock_bh(&bond->mode_lock); 914 } 915 916 /*********************** tlb/rlb shared functions *********************/ 917 918 static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], 919 __be16 vlan_proto, u16 vid) 920 { 921 struct learning_pkt pkt; 922 struct sk_buff *skb; 923 int size = sizeof(struct learning_pkt); 924 925 memset(&pkt, 0, size); 926 ether_addr_copy(pkt.mac_dst, mac_addr); 927 ether_addr_copy(pkt.mac_src, mac_addr); 928 pkt.type = cpu_to_be16(ETH_P_LOOPBACK); 929 930 skb = dev_alloc_skb(size); 931 if (!skb) 932 return; 933 934 skb_put_data(skb, &pkt, size); 935 936 skb_reset_mac_header(skb); 937 skb->network_header = skb->mac_header + ETH_HLEN; 938 skb->protocol = pkt.type; 939 skb->priority = TC_PRIO_CONTROL; 940 skb->dev = slave->dev; 941 942 netdev_dbg(slave->bond->dev, 943 "Send learning packet: dev %s mac %pM vlan %d\n", 944 slave->dev->name, mac_addr, vid); 945 946 if (vid) 947 __vlan_hwaccel_put_tag(skb, vlan_proto, vid); 948 949 dev_queue_xmit(skb); 950 } 951 952 struct alb_walk_data { 953 struct bonding *bond; 954 struct slave *slave; 955 u8 *mac_addr; 956 bool strict_match; 957 }; 958 959 static int alb_upper_dev_walk(struct net_device *upper, void *_data) 960 { 961 struct alb_walk_data *data = _data; 962 bool strict_match = data->strict_match; 963 struct bonding *bond = data->bond; 964 struct slave *slave = data->slave; 965 u8 *mac_addr = data->mac_addr; 966 struct bond_vlan_tag *tags; 967 968 if (is_vlan_dev(upper) && 969 bond->nest_level == vlan_get_encap_level(upper) - 1) { 970 if (upper->addr_assign_type == NET_ADDR_STOLEN) { 971 alb_send_lp_vid(slave, mac_addr, 972 vlan_dev_vlan_proto(upper), 973 vlan_dev_vlan_id(upper)); 974 } else { 975 alb_send_lp_vid(slave, upper->dev_addr, 976 vlan_dev_vlan_proto(upper), 977 vlan_dev_vlan_id(upper)); 978 } 979 } 980 981 /* If this is a macvlan device, then only send updates 982 * when strict_match is turned off. 983 */ 984 if (netif_is_macvlan(upper) && !strict_match) { 985 tags = bond_verify_device_path(bond->dev, upper, 0); 986 if (IS_ERR_OR_NULL(tags)) 987 BUG(); 988 alb_send_lp_vid(slave, upper->dev_addr, 989 tags[0].vlan_proto, tags[0].vlan_id); 990 kfree(tags); 991 } 992 993 return 0; 994 } 995 996 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], 997 bool strict_match) 998 { 999 struct bonding *bond = bond_get_bond_by_slave(slave); 1000 struct alb_walk_data data = { 1001 .strict_match = strict_match, 1002 .mac_addr = mac_addr, 1003 .slave = slave, 1004 .bond = bond, 1005 }; 1006 1007 /* send untagged */ 1008 alb_send_lp_vid(slave, mac_addr, 0, 0); 1009 1010 /* loop through all devices and see if we need to send a packet 1011 * for that device. 1012 */ 1013 rcu_read_lock(); 1014 netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &data); 1015 rcu_read_unlock(); 1016 } 1017 1018 static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], 1019 unsigned int len) 1020 { 1021 struct net_device *dev = slave->dev; 1022 struct sockaddr_storage ss; 1023 1024 if (BOND_MODE(slave->bond) == BOND_MODE_TLB) { 1025 memcpy(dev->dev_addr, addr, len); 1026 return 0; 1027 } 1028 1029 /* for rlb each slave must have a unique hw mac addresses so that 1030 * each slave will receive packets destined to a different mac 1031 */ 1032 memcpy(ss.__data, addr, len); 1033 ss.ss_family = dev->type; 1034 if (dev_set_mac_address(dev, (struct sockaddr *)&ss)) { 1035 netdev_err(slave->bond->dev, "dev_set_mac_address of dev %s failed! ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n", 1036 dev->name); 1037 return -EOPNOTSUPP; 1038 } 1039 return 0; 1040 } 1041 1042 /* Swap MAC addresses between two slaves. 1043 * 1044 * Called with RTNL held, and no other locks. 1045 */ 1046 static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2) 1047 { 1048 u8 tmp_mac_addr[MAX_ADDR_LEN]; 1049 1050 bond_hw_addr_copy(tmp_mac_addr, slave1->dev->dev_addr, 1051 slave1->dev->addr_len); 1052 alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, 1053 slave2->dev->addr_len); 1054 alb_set_slave_mac_addr(slave2, tmp_mac_addr, 1055 slave1->dev->addr_len); 1056 1057 } 1058 1059 /* Send learning packets after MAC address swap. 1060 * 1061 * Called with RTNL and no other locks 1062 */ 1063 static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, 1064 struct slave *slave2) 1065 { 1066 int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2)); 1067 struct slave *disabled_slave = NULL; 1068 1069 ASSERT_RTNL(); 1070 1071 /* fasten the change in the switch */ 1072 if (bond_slave_can_tx(slave1)) { 1073 alb_send_learning_packets(slave1, slave1->dev->dev_addr, false); 1074 if (bond->alb_info.rlb_enabled) { 1075 /* inform the clients that the mac address 1076 * has changed 1077 */ 1078 rlb_req_update_slave_clients(bond, slave1); 1079 } 1080 } else { 1081 disabled_slave = slave1; 1082 } 1083 1084 if (bond_slave_can_tx(slave2)) { 1085 alb_send_learning_packets(slave2, slave2->dev->dev_addr, false); 1086 if (bond->alb_info.rlb_enabled) { 1087 /* inform the clients that the mac address 1088 * has changed 1089 */ 1090 rlb_req_update_slave_clients(bond, slave2); 1091 } 1092 } else { 1093 disabled_slave = slave2; 1094 } 1095 1096 if (bond->alb_info.rlb_enabled && slaves_state_differ) { 1097 /* A disabled slave was assigned an active mac addr */ 1098 rlb_teach_disabled_mac_on_primary(bond, 1099 disabled_slave->dev->dev_addr); 1100 } 1101 } 1102 1103 /** 1104 * alb_change_hw_addr_on_detach 1105 * @bond: bonding we're working on 1106 * @slave: the slave that was just detached 1107 * 1108 * We assume that @slave was already detached from the slave list. 1109 * 1110 * If @slave's permanent hw address is different both from its current 1111 * address and from @bond's address, then somewhere in the bond there's 1112 * a slave that has @slave's permanet address as its current address. 1113 * We'll make sure that that slave no longer uses @slave's permanent address. 1114 * 1115 * Caller must hold RTNL and no other locks 1116 */ 1117 static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) 1118 { 1119 int perm_curr_diff; 1120 int perm_bond_diff; 1121 struct slave *found_slave; 1122 1123 perm_curr_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, 1124 slave->dev->dev_addr); 1125 perm_bond_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, 1126 bond->dev->dev_addr); 1127 1128 if (perm_curr_diff && perm_bond_diff) { 1129 found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr); 1130 1131 if (found_slave) { 1132 alb_swap_mac_addr(slave, found_slave); 1133 alb_fasten_mac_swap(bond, slave, found_slave); 1134 } 1135 } 1136 } 1137 1138 /** 1139 * alb_handle_addr_collision_on_attach 1140 * @bond: bonding we're working on 1141 * @slave: the slave that was just attached 1142 * 1143 * checks uniqueness of slave's mac address and handles the case the 1144 * new slave uses the bonds mac address. 1145 * 1146 * If the permanent hw address of @slave is @bond's hw address, we need to 1147 * find a different hw address to give @slave, that isn't in use by any other 1148 * slave in the bond. This address must be, of course, one of the permanent 1149 * addresses of the other slaves. 1150 * 1151 * We go over the slave list, and for each slave there we compare its 1152 * permanent hw address with the current address of all the other slaves. 1153 * If no match was found, then we've found a slave with a permanent address 1154 * that isn't used by any other slave in the bond, so we can assign it to 1155 * @slave. 1156 * 1157 * assumption: this function is called before @slave is attached to the 1158 * bond slave list. 1159 */ 1160 static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) 1161 { 1162 struct slave *has_bond_addr = rcu_access_pointer(bond->curr_active_slave); 1163 struct slave *tmp_slave1, *free_mac_slave = NULL; 1164 struct list_head *iter; 1165 1166 if (!bond_has_slaves(bond)) { 1167 /* this is the first slave */ 1168 return 0; 1169 } 1170 1171 /* if slave's mac address differs from bond's mac address 1172 * check uniqueness of slave's mac address against the other 1173 * slaves in the bond. 1174 */ 1175 if (!ether_addr_equal_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) { 1176 if (!bond_slave_has_mac(bond, slave->dev->dev_addr)) 1177 return 0; 1178 1179 /* Try setting slave mac to bond address and fall-through 1180 * to code handling that situation below... 1181 */ 1182 alb_set_slave_mac_addr(slave, bond->dev->dev_addr, 1183 bond->dev->addr_len); 1184 } 1185 1186 /* The slave's address is equal to the address of the bond. 1187 * Search for a spare address in the bond for this slave. 1188 */ 1189 bond_for_each_slave(bond, tmp_slave1, iter) { 1190 if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) { 1191 /* no slave has tmp_slave1's perm addr 1192 * as its curr addr 1193 */ 1194 free_mac_slave = tmp_slave1; 1195 break; 1196 } 1197 1198 if (!has_bond_addr) { 1199 if (ether_addr_equal_64bits(tmp_slave1->dev->dev_addr, 1200 bond->dev->dev_addr)) { 1201 1202 has_bond_addr = tmp_slave1; 1203 } 1204 } 1205 } 1206 1207 if (free_mac_slave) { 1208 alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr, 1209 free_mac_slave->dev->addr_len); 1210 1211 netdev_warn(bond->dev, "the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", 1212 slave->dev->name, free_mac_slave->dev->name); 1213 1214 } else if (has_bond_addr) { 1215 netdev_err(bond->dev, "the hw address of slave %s is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n", 1216 slave->dev->name); 1217 return -EFAULT; 1218 } 1219 1220 return 0; 1221 } 1222 1223 /** 1224 * alb_set_mac_address 1225 * @bond: 1226 * @addr: 1227 * 1228 * In TLB mode all slaves are configured to the bond's hw address, but set 1229 * their dev_addr field to different addresses (based on their permanent hw 1230 * addresses). 1231 * 1232 * For each slave, this function sets the interface to the new address and then 1233 * changes its dev_addr field to its previous value. 1234 * 1235 * Unwinding assumes bond's mac address has not yet changed. 1236 */ 1237 static int alb_set_mac_address(struct bonding *bond, void *addr) 1238 { 1239 struct slave *slave, *rollback_slave; 1240 struct list_head *iter; 1241 struct sockaddr_storage ss; 1242 char tmp_addr[MAX_ADDR_LEN]; 1243 int res; 1244 1245 if (bond->alb_info.rlb_enabled) 1246 return 0; 1247 1248 bond_for_each_slave(bond, slave, iter) { 1249 /* save net_device's current hw address */ 1250 bond_hw_addr_copy(tmp_addr, slave->dev->dev_addr, 1251 slave->dev->addr_len); 1252 1253 res = dev_set_mac_address(slave->dev, addr); 1254 1255 /* restore net_device's hw address */ 1256 bond_hw_addr_copy(slave->dev->dev_addr, tmp_addr, 1257 slave->dev->addr_len); 1258 1259 if (res) 1260 goto unwind; 1261 } 1262 1263 return 0; 1264 1265 unwind: 1266 memcpy(ss.__data, bond->dev->dev_addr, bond->dev->addr_len); 1267 ss.ss_family = bond->dev->type; 1268 1269 /* unwind from head to the slave that failed */ 1270 bond_for_each_slave(bond, rollback_slave, iter) { 1271 if (rollback_slave == slave) 1272 break; 1273 bond_hw_addr_copy(tmp_addr, rollback_slave->dev->dev_addr, 1274 rollback_slave->dev->addr_len); 1275 dev_set_mac_address(rollback_slave->dev, 1276 (struct sockaddr *)&ss); 1277 bond_hw_addr_copy(rollback_slave->dev->dev_addr, tmp_addr, 1278 rollback_slave->dev->addr_len); 1279 } 1280 1281 return res; 1282 } 1283 1284 /************************ exported alb funcions ************************/ 1285 1286 int bond_alb_initialize(struct bonding *bond, int rlb_enabled) 1287 { 1288 int res; 1289 1290 res = tlb_initialize(bond); 1291 if (res) 1292 return res; 1293 1294 if (rlb_enabled) { 1295 bond->alb_info.rlb_enabled = 1; 1296 res = rlb_initialize(bond); 1297 if (res) { 1298 tlb_deinitialize(bond); 1299 return res; 1300 } 1301 } else { 1302 bond->alb_info.rlb_enabled = 0; 1303 } 1304 1305 return 0; 1306 } 1307 1308 void bond_alb_deinitialize(struct bonding *bond) 1309 { 1310 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1311 1312 tlb_deinitialize(bond); 1313 1314 if (bond_info->rlb_enabled) 1315 rlb_deinitialize(bond); 1316 } 1317 1318 static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, 1319 struct slave *tx_slave) 1320 { 1321 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1322 struct ethhdr *eth_data = eth_hdr(skb); 1323 1324 if (!tx_slave) { 1325 /* unbalanced or unassigned, send through primary */ 1326 tx_slave = rcu_dereference(bond->curr_active_slave); 1327 if (bond->params.tlb_dynamic_lb) 1328 bond_info->unbalanced_load += skb->len; 1329 } 1330 1331 if (tx_slave && bond_slave_can_tx(tx_slave)) { 1332 if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) { 1333 ether_addr_copy(eth_data->h_source, 1334 tx_slave->dev->dev_addr); 1335 } 1336 1337 bond_dev_queue_xmit(bond, skb, tx_slave->dev); 1338 goto out; 1339 } 1340 1341 if (tx_slave && bond->params.tlb_dynamic_lb) { 1342 spin_lock(&bond->mode_lock); 1343 __tlb_clear_slave(bond, tx_slave, 0); 1344 spin_unlock(&bond->mode_lock); 1345 } 1346 1347 /* no suitable interface, frame not sent */ 1348 bond_tx_drop(bond->dev, skb); 1349 out: 1350 return NETDEV_TX_OK; 1351 } 1352 1353 netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) 1354 { 1355 struct bonding *bond = netdev_priv(bond_dev); 1356 struct ethhdr *eth_data; 1357 struct slave *tx_slave = NULL; 1358 u32 hash_index; 1359 1360 skb_reset_mac_header(skb); 1361 eth_data = eth_hdr(skb); 1362 1363 /* Do not TX balance any multicast or broadcast */ 1364 if (!is_multicast_ether_addr(eth_data->h_dest)) { 1365 switch (skb->protocol) { 1366 case htons(ETH_P_IP): 1367 case htons(ETH_P_IPX): 1368 /* In case of IPX, it will falback to L2 hash */ 1369 case htons(ETH_P_IPV6): 1370 hash_index = bond_xmit_hash(bond, skb); 1371 if (bond->params.tlb_dynamic_lb) { 1372 tx_slave = tlb_choose_channel(bond, 1373 hash_index & 0xFF, 1374 skb->len); 1375 } else { 1376 struct bond_up_slave *slaves; 1377 unsigned int count; 1378 1379 slaves = rcu_dereference(bond->slave_arr); 1380 count = slaves ? READ_ONCE(slaves->count) : 0; 1381 if (likely(count)) 1382 tx_slave = slaves->arr[hash_index % 1383 count]; 1384 } 1385 break; 1386 } 1387 } 1388 return bond_do_alb_xmit(skb, bond, tx_slave); 1389 } 1390 1391 netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) 1392 { 1393 struct bonding *bond = netdev_priv(bond_dev); 1394 struct ethhdr *eth_data; 1395 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1396 struct slave *tx_slave = NULL; 1397 static const __be32 ip_bcast = htonl(0xffffffff); 1398 int hash_size = 0; 1399 bool do_tx_balance = true; 1400 u32 hash_index = 0; 1401 const u8 *hash_start = NULL; 1402 struct ipv6hdr *ip6hdr; 1403 1404 skb_reset_mac_header(skb); 1405 eth_data = eth_hdr(skb); 1406 1407 switch (ntohs(skb->protocol)) { 1408 case ETH_P_IP: { 1409 const struct iphdr *iph = ip_hdr(skb); 1410 1411 if (is_broadcast_ether_addr(eth_data->h_dest) || 1412 iph->daddr == ip_bcast || 1413 iph->protocol == IPPROTO_IGMP) { 1414 do_tx_balance = false; 1415 break; 1416 } 1417 hash_start = (char *)&(iph->daddr); 1418 hash_size = sizeof(iph->daddr); 1419 } 1420 break; 1421 case ETH_P_IPV6: 1422 /* IPv6 doesn't really use broadcast mac address, but leave 1423 * that here just in case. 1424 */ 1425 if (is_broadcast_ether_addr(eth_data->h_dest)) { 1426 do_tx_balance = false; 1427 break; 1428 } 1429 1430 /* IPv6 uses all-nodes multicast as an equivalent to 1431 * broadcasts in IPv4. 1432 */ 1433 if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) { 1434 do_tx_balance = false; 1435 break; 1436 } 1437 1438 /* Additianally, DAD probes should not be tx-balanced as that 1439 * will lead to false positives for duplicate addresses and 1440 * prevent address configuration from working. 1441 */ 1442 ip6hdr = ipv6_hdr(skb); 1443 if (ipv6_addr_any(&ip6hdr->saddr)) { 1444 do_tx_balance = false; 1445 break; 1446 } 1447 1448 hash_start = (char *)&(ipv6_hdr(skb)->daddr); 1449 hash_size = sizeof(ipv6_hdr(skb)->daddr); 1450 break; 1451 case ETH_P_IPX: 1452 if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { 1453 /* something is wrong with this packet */ 1454 do_tx_balance = false; 1455 break; 1456 } 1457 1458 if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { 1459 /* The only protocol worth balancing in 1460 * this family since it has an "ARP" like 1461 * mechanism 1462 */ 1463 do_tx_balance = false; 1464 break; 1465 } 1466 1467 hash_start = (char *)eth_data->h_dest; 1468 hash_size = ETH_ALEN; 1469 break; 1470 case ETH_P_ARP: 1471 do_tx_balance = false; 1472 if (bond_info->rlb_enabled) 1473 tx_slave = rlb_arp_xmit(skb, bond); 1474 break; 1475 default: 1476 do_tx_balance = false; 1477 break; 1478 } 1479 1480 if (do_tx_balance) { 1481 if (bond->params.tlb_dynamic_lb) { 1482 hash_index = _simple_hash(hash_start, hash_size); 1483 tx_slave = tlb_choose_channel(bond, hash_index, skb->len); 1484 } else { 1485 /* 1486 * do_tx_balance means we are free to select the tx_slave 1487 * So we do exactly what tlb would do for hash selection 1488 */ 1489 1490 struct bond_up_slave *slaves; 1491 unsigned int count; 1492 1493 slaves = rcu_dereference(bond->slave_arr); 1494 count = slaves ? READ_ONCE(slaves->count) : 0; 1495 if (likely(count)) 1496 tx_slave = slaves->arr[bond_xmit_hash(bond, skb) % 1497 count]; 1498 } 1499 } 1500 1501 return bond_do_alb_xmit(skb, bond, tx_slave); 1502 } 1503 1504 void bond_alb_monitor(struct work_struct *work) 1505 { 1506 struct bonding *bond = container_of(work, struct bonding, 1507 alb_work.work); 1508 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1509 struct list_head *iter; 1510 struct slave *slave; 1511 1512 if (!bond_has_slaves(bond)) { 1513 bond_info->tx_rebalance_counter = 0; 1514 bond_info->lp_counter = 0; 1515 goto re_arm; 1516 } 1517 1518 rcu_read_lock(); 1519 1520 bond_info->tx_rebalance_counter++; 1521 bond_info->lp_counter++; 1522 1523 /* send learning packets */ 1524 if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { 1525 bool strict_match; 1526 1527 bond_for_each_slave_rcu(bond, slave, iter) { 1528 /* If updating current_active, use all currently 1529 * user mac addreses (!strict_match). Otherwise, only 1530 * use mac of the slave device. 1531 * In RLB mode, we always use strict matches. 1532 */ 1533 strict_match = (slave != rcu_access_pointer(bond->curr_active_slave) || 1534 bond_info->rlb_enabled); 1535 alb_send_learning_packets(slave, slave->dev->dev_addr, 1536 strict_match); 1537 } 1538 bond_info->lp_counter = 0; 1539 } 1540 1541 /* rebalance tx traffic */ 1542 if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { 1543 bond_for_each_slave_rcu(bond, slave, iter) { 1544 tlb_clear_slave(bond, slave, 1); 1545 if (slave == rcu_access_pointer(bond->curr_active_slave)) { 1546 SLAVE_TLB_INFO(slave).load = 1547 bond_info->unbalanced_load / 1548 BOND_TLB_REBALANCE_INTERVAL; 1549 bond_info->unbalanced_load = 0; 1550 } 1551 } 1552 bond_info->tx_rebalance_counter = 0; 1553 } 1554 1555 if (bond_info->rlb_enabled) { 1556 if (bond_info->primary_is_promisc && 1557 (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { 1558 1559 /* dev_set_promiscuity requires rtnl and 1560 * nothing else. Avoid race with bond_close. 1561 */ 1562 rcu_read_unlock(); 1563 if (!rtnl_trylock()) 1564 goto re_arm; 1565 1566 bond_info->rlb_promisc_timeout_counter = 0; 1567 1568 /* If the primary was set to promiscuous mode 1569 * because a slave was disabled then 1570 * it can now leave promiscuous mode. 1571 */ 1572 dev_set_promiscuity(rtnl_dereference(bond->curr_active_slave)->dev, 1573 -1); 1574 bond_info->primary_is_promisc = 0; 1575 1576 rtnl_unlock(); 1577 rcu_read_lock(); 1578 } 1579 1580 if (bond_info->rlb_rebalance) { 1581 bond_info->rlb_rebalance = 0; 1582 rlb_rebalance(bond); 1583 } 1584 1585 /* check if clients need updating */ 1586 if (bond_info->rx_ntt) { 1587 if (bond_info->rlb_update_delay_counter) { 1588 --bond_info->rlb_update_delay_counter; 1589 } else { 1590 rlb_update_rx_clients(bond); 1591 if (bond_info->rlb_update_retry_counter) 1592 --bond_info->rlb_update_retry_counter; 1593 else 1594 bond_info->rx_ntt = 0; 1595 } 1596 } 1597 } 1598 rcu_read_unlock(); 1599 re_arm: 1600 queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); 1601 } 1602 1603 /* assumption: called before the slave is attached to the bond 1604 * and not locked by the bond lock 1605 */ 1606 int bond_alb_init_slave(struct bonding *bond, struct slave *slave) 1607 { 1608 int res; 1609 1610 res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr, 1611 slave->dev->addr_len); 1612 if (res) 1613 return res; 1614 1615 res = alb_handle_addr_collision_on_attach(bond, slave); 1616 if (res) 1617 return res; 1618 1619 tlb_init_slave(slave); 1620 1621 /* order a rebalance ASAP */ 1622 bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; 1623 1624 if (bond->alb_info.rlb_enabled) 1625 bond->alb_info.rlb_rebalance = 1; 1626 1627 return 0; 1628 } 1629 1630 /* Remove slave from tlb and rlb hash tables, and fix up MAC addresses 1631 * if necessary. 1632 * 1633 * Caller must hold RTNL and no other locks 1634 */ 1635 void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) 1636 { 1637 if (bond_has_slaves(bond)) 1638 alb_change_hw_addr_on_detach(bond, slave); 1639 1640 tlb_clear_slave(bond, slave, 0); 1641 1642 if (bond->alb_info.rlb_enabled) { 1643 bond->alb_info.rx_slave = NULL; 1644 rlb_clear_slave(bond, slave); 1645 } 1646 1647 } 1648 1649 void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) 1650 { 1651 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 1652 1653 if (link == BOND_LINK_DOWN) { 1654 tlb_clear_slave(bond, slave, 0); 1655 if (bond->alb_info.rlb_enabled) 1656 rlb_clear_slave(bond, slave); 1657 } else if (link == BOND_LINK_UP) { 1658 /* order a rebalance ASAP */ 1659 bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; 1660 if (bond->alb_info.rlb_enabled) { 1661 bond->alb_info.rlb_rebalance = 1; 1662 /* If the updelay module parameter is smaller than the 1663 * forwarding delay of the switch the rebalance will 1664 * not work because the rebalance arp replies will 1665 * not be forwarded to the clients.. 1666 */ 1667 } 1668 } 1669 1670 if (bond_is_nondyn_tlb(bond)) { 1671 if (bond_update_slave_arr(bond, NULL)) 1672 pr_err("Failed to build slave-array for TLB mode.\n"); 1673 } 1674 } 1675 1676 /** 1677 * bond_alb_handle_active_change - assign new curr_active_slave 1678 * @bond: our bonding struct 1679 * @new_slave: new slave to assign 1680 * 1681 * Set the bond->curr_active_slave to @new_slave and handle 1682 * mac address swapping and promiscuity changes as needed. 1683 * 1684 * Caller must hold RTNL 1685 */ 1686 void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) 1687 { 1688 struct slave *swap_slave; 1689 struct slave *curr_active; 1690 1691 curr_active = rtnl_dereference(bond->curr_active_slave); 1692 if (curr_active == new_slave) 1693 return; 1694 1695 if (curr_active && bond->alb_info.primary_is_promisc) { 1696 dev_set_promiscuity(curr_active->dev, -1); 1697 bond->alb_info.primary_is_promisc = 0; 1698 bond->alb_info.rlb_promisc_timeout_counter = 0; 1699 } 1700 1701 swap_slave = curr_active; 1702 rcu_assign_pointer(bond->curr_active_slave, new_slave); 1703 1704 if (!new_slave || !bond_has_slaves(bond)) 1705 return; 1706 1707 /* set the new curr_active_slave to the bonds mac address 1708 * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave 1709 */ 1710 if (!swap_slave) 1711 swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr); 1712 1713 /* Arrange for swap_slave and new_slave to temporarily be 1714 * ignored so we can mess with their MAC addresses without 1715 * fear of interference from transmit activity. 1716 */ 1717 if (swap_slave) 1718 tlb_clear_slave(bond, swap_slave, 1); 1719 tlb_clear_slave(bond, new_slave, 1); 1720 1721 /* in TLB mode, the slave might flip down/up with the old dev_addr, 1722 * and thus filter bond->dev_addr's packets, so force bond's mac 1723 */ 1724 if (BOND_MODE(bond) == BOND_MODE_TLB) { 1725 struct sockaddr_storage ss; 1726 u8 tmp_addr[MAX_ADDR_LEN]; 1727 1728 bond_hw_addr_copy(tmp_addr, new_slave->dev->dev_addr, 1729 new_slave->dev->addr_len); 1730 1731 bond_hw_addr_copy(ss.__data, bond->dev->dev_addr, 1732 bond->dev->addr_len); 1733 ss.ss_family = bond->dev->type; 1734 /* we don't care if it can't change its mac, best effort */ 1735 dev_set_mac_address(new_slave->dev, (struct sockaddr *)&ss); 1736 1737 bond_hw_addr_copy(new_slave->dev->dev_addr, tmp_addr, 1738 new_slave->dev->addr_len); 1739 } 1740 1741 /* curr_active_slave must be set before calling alb_swap_mac_addr */ 1742 if (swap_slave) { 1743 /* swap mac address */ 1744 alb_swap_mac_addr(swap_slave, new_slave); 1745 alb_fasten_mac_swap(bond, swap_slave, new_slave); 1746 } else { 1747 /* set the new_slave to the bond mac address */ 1748 alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr, 1749 bond->dev->addr_len); 1750 alb_send_learning_packets(new_slave, bond->dev->dev_addr, 1751 false); 1752 } 1753 } 1754 1755 /* Called with RTNL */ 1756 int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) 1757 { 1758 struct bonding *bond = netdev_priv(bond_dev); 1759 struct sockaddr_storage *ss = addr; 1760 struct slave *curr_active; 1761 struct slave *swap_slave; 1762 int res; 1763 1764 if (!is_valid_ether_addr(ss->__data)) 1765 return -EADDRNOTAVAIL; 1766 1767 res = alb_set_mac_address(bond, addr); 1768 if (res) 1769 return res; 1770 1771 bond_hw_addr_copy(bond_dev->dev_addr, ss->__data, bond_dev->addr_len); 1772 1773 /* If there is no curr_active_slave there is nothing else to do. 1774 * Otherwise we'll need to pass the new address to it and handle 1775 * duplications. 1776 */ 1777 curr_active = rtnl_dereference(bond->curr_active_slave); 1778 if (!curr_active) 1779 return 0; 1780 1781 swap_slave = bond_slave_has_mac(bond, bond_dev->dev_addr); 1782 1783 if (swap_slave) { 1784 alb_swap_mac_addr(swap_slave, curr_active); 1785 alb_fasten_mac_swap(bond, swap_slave, curr_active); 1786 } else { 1787 alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr, 1788 bond_dev->addr_len); 1789 1790 alb_send_learning_packets(curr_active, 1791 bond_dev->dev_addr, false); 1792 if (bond->alb_info.rlb_enabled) { 1793 /* inform clients mac address has changed */ 1794 rlb_req_update_slave_clients(bond, curr_active); 1795 } 1796 } 1797 1798 return 0; 1799 } 1800 1801 void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id) 1802 { 1803 if (bond->alb_info.rlb_enabled) 1804 rlb_clear_vlan(bond, vlan_id); 1805 } 1806 1807