1 // SPDX-License-Identifier: GPL-1.0+ 2 /* 3 * originally based on the dummy device. 4 * 5 * Copyright 1999, Thomas Davis, tadavis@lbl.gov. 6 * Based on dummy.c, and eql.c devices. 7 * 8 * bonding.c: an Ethernet Bonding driver 9 * 10 * This is useful to talk to a Cisco EtherChannel compatible equipment: 11 * Cisco 5500 12 * Sun Trunking (Solaris) 13 * Alteon AceDirector Trunks 14 * Linux Bonding 15 * and probably many L2 switches ... 16 * 17 * How it works: 18 * ifconfig bond0 ipaddress netmask up 19 * will setup a network device, with an ip address. No mac address 20 * will be assigned at this time. The hw mac address will come from 21 * the first slave bonded to the channel. All slaves will then use 22 * this hw mac address. 23 * 24 * ifconfig bond0 down 25 * will release all slaves, marking them as down. 26 * 27 * ifenslave bond0 eth0 28 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either 29 * a: be used as initial mac address 30 * b: if a hw mac address already is there, eth0's hw mac address 31 * will then be set from bond0. 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/module.h> 37 #include <linux/types.h> 38 #include <linux/fcntl.h> 39 #include <linux/filter.h> 40 #include <linux/interrupt.h> 41 #include <linux/ptrace.h> 42 #include <linux/ioport.h> 43 #include <linux/in.h> 44 #include <net/ip.h> 45 #include <linux/ip.h> 46 #include <linux/icmp.h> 47 #include <linux/icmpv6.h> 48 #include <linux/tcp.h> 49 #include <linux/udp.h> 50 #include <linux/slab.h> 51 #include <linux/string.h> 52 #include <linux/init.h> 53 #include <linux/timer.h> 54 #include <linux/socket.h> 55 #include <linux/ctype.h> 56 #include <linux/inet.h> 57 #include <linux/bitops.h> 58 #include <linux/io.h> 59 #include <asm/dma.h> 60 #include <linux/uaccess.h> 61 #include <linux/errno.h> 62 #include <linux/netdevice.h> 63 #include <linux/inetdevice.h> 64 #include <linux/igmp.h> 65 #include <linux/etherdevice.h> 66 #include <linux/skbuff.h> 67 #include <net/sock.h> 68 #include <linux/rtnetlink.h> 69 #include <linux/smp.h> 70 #include <linux/if_ether.h> 71 #include <net/arp.h> 72 #include <linux/mii.h> 73 #include <linux/ethtool.h> 74 #include <linux/if_vlan.h> 75 #include <linux/if_bonding.h> 76 #include <linux/phy.h> 77 #include <linux/jiffies.h> 78 #include <linux/preempt.h> 79 #include <net/route.h> 80 #include <net/net_namespace.h> 81 #include <net/netns/generic.h> 82 #include <net/pkt_sched.h> 83 #include <linux/rculist.h> 84 #include <net/flow_dissector.h> 85 #include <net/xfrm.h> 86 #include <net/bonding.h> 87 #include <net/bond_3ad.h> 88 #include <net/bond_alb.h> 89 #if IS_ENABLED(CONFIG_TLS_DEVICE) 90 #include <net/tls.h> 91 #endif 92 #include <net/ip6_route.h> 93 #include <net/xdp.h> 94 95 #include "bonding_priv.h" 96 97 /*---------------------------- Module parameters ----------------------------*/ 98 99 /* monitor all links that often (in milliseconds). <=0 disables monitoring */ 100 101 static int max_bonds = BOND_DEFAULT_MAX_BONDS; 102 static int tx_queues = BOND_DEFAULT_TX_QUEUES; 103 static int num_peer_notif = 1; 104 static int miimon; 105 static int updelay; 106 static int downdelay; 107 static int use_carrier = 1; 108 static char *mode; 109 static char *primary; 110 static char *primary_reselect; 111 static char *lacp_rate; 112 static int min_links; 113 static char *ad_select; 114 static char *xmit_hash_policy; 115 static int arp_interval; 116 static char *arp_ip_target[BOND_MAX_ARP_TARGETS]; 117 static char *arp_validate; 118 static char *arp_all_targets; 119 static char *fail_over_mac; 120 static int all_slaves_active; 121 static struct bond_params bonding_defaults; 122 static int resend_igmp = BOND_DEFAULT_RESEND_IGMP; 123 static int packets_per_slave = 1; 124 static int lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; 125 126 module_param(max_bonds, int, 0); 127 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); 128 module_param(tx_queues, int, 0); 129 MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)"); 130 module_param_named(num_grat_arp, num_peer_notif, int, 0644); 131 MODULE_PARM_DESC(num_grat_arp, "Number of peer notifications to send on " 132 "failover event (alias of num_unsol_na)"); 133 module_param_named(num_unsol_na, num_peer_notif, int, 0644); 134 MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on " 135 "failover event (alias of num_grat_arp)"); 136 module_param(miimon, int, 0); 137 MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); 138 module_param(updelay, int, 0); 139 MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); 140 module_param(downdelay, int, 0); 141 MODULE_PARM_DESC(downdelay, "Delay before considering link down, " 142 "in milliseconds"); 143 module_param(use_carrier, int, 0); 144 MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " 145 "0 for off, 1 for on (default)"); 146 module_param(mode, charp, 0); 147 MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, " 148 "1 for active-backup, 2 for balance-xor, " 149 "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " 150 "6 for balance-alb"); 151 module_param(primary, charp, 0); 152 MODULE_PARM_DESC(primary, "Primary network device to use"); 153 module_param(primary_reselect, charp, 0); 154 MODULE_PARM_DESC(primary_reselect, "Reselect primary slave " 155 "once it comes up; " 156 "0 for always (default), " 157 "1 for only if speed of primary is " 158 "better, " 159 "2 for only on active slave " 160 "failure"); 161 module_param(lacp_rate, charp, 0); 162 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; " 163 "0 for slow, 1 for fast"); 164 module_param(ad_select, charp, 0); 165 MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; " 166 "0 for stable (default), 1 for bandwidth, " 167 "2 for count"); 168 module_param(min_links, int, 0); 169 MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on carrier"); 170 171 module_param(xmit_hash_policy, charp, 0); 172 MODULE_PARM_DESC(xmit_hash_policy, "balance-alb, balance-tlb, balance-xor, 802.3ad hashing method; " 173 "0 for layer 2 (default), 1 for layer 3+4, " 174 "2 for layer 2+3, 3 for encap layer 2+3, " 175 "4 for encap layer 3+4, 5 for vlan+srcmac"); 176 module_param(arp_interval, int, 0); 177 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 178 module_param_array(arp_ip_target, charp, NULL, 0); 179 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); 180 module_param(arp_validate, charp, 0); 181 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes; " 182 "0 for none (default), 1 for active, " 183 "2 for backup, 3 for all"); 184 module_param(arp_all_targets, charp, 0); 185 MODULE_PARM_DESC(arp_all_targets, "fail on any/all arp targets timeout; 0 for any (default), 1 for all"); 186 module_param(fail_over_mac, charp, 0); 187 MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to " 188 "the same MAC; 0 for none (default), " 189 "1 for active, 2 for follow"); 190 module_param(all_slaves_active, int, 0); 191 MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface " 192 "by setting active flag for all slaves; " 193 "0 for never (default), 1 for always."); 194 module_param(resend_igmp, int, 0); 195 MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on " 196 "link failure"); 197 module_param(packets_per_slave, int, 0); 198 MODULE_PARM_DESC(packets_per_slave, "Packets to send per slave in balance-rr " 199 "mode; 0 for a random slave, 1 packet per " 200 "slave (default), >1 packets per slave."); 201 module_param(lp_interval, uint, 0); 202 MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where " 203 "the bonding driver sends learning packets to " 204 "each slaves peer switch. The default is 1."); 205 206 /*----------------------------- Global variables ----------------------------*/ 207 208 #ifdef CONFIG_NET_POLL_CONTROLLER 209 atomic_t netpoll_block_tx = ATOMIC_INIT(0); 210 #endif 211 212 unsigned int bond_net_id __read_mostly; 213 214 static const struct flow_dissector_key flow_keys_bonding_keys[] = { 215 { 216 .key_id = FLOW_DISSECTOR_KEY_CONTROL, 217 .offset = offsetof(struct flow_keys, control), 218 }, 219 { 220 .key_id = FLOW_DISSECTOR_KEY_BASIC, 221 .offset = offsetof(struct flow_keys, basic), 222 }, 223 { 224 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, 225 .offset = offsetof(struct flow_keys, addrs.v4addrs), 226 }, 227 { 228 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, 229 .offset = offsetof(struct flow_keys, addrs.v6addrs), 230 }, 231 { 232 .key_id = FLOW_DISSECTOR_KEY_TIPC, 233 .offset = offsetof(struct flow_keys, addrs.tipckey), 234 }, 235 { 236 .key_id = FLOW_DISSECTOR_KEY_PORTS, 237 .offset = offsetof(struct flow_keys, ports), 238 }, 239 { 240 .key_id = FLOW_DISSECTOR_KEY_ICMP, 241 .offset = offsetof(struct flow_keys, icmp), 242 }, 243 { 244 .key_id = FLOW_DISSECTOR_KEY_VLAN, 245 .offset = offsetof(struct flow_keys, vlan), 246 }, 247 { 248 .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, 249 .offset = offsetof(struct flow_keys, tags), 250 }, 251 { 252 .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, 253 .offset = offsetof(struct flow_keys, keyid), 254 }, 255 }; 256 257 static struct flow_dissector flow_keys_bonding __read_mostly; 258 259 /*-------------------------- Forward declarations ---------------------------*/ 260 261 static int bond_init(struct net_device *bond_dev); 262 static void bond_uninit(struct net_device *bond_dev); 263 static void bond_get_stats(struct net_device *bond_dev, 264 struct rtnl_link_stats64 *stats); 265 static void bond_slave_arr_handler(struct work_struct *work); 266 static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, 267 int mod); 268 static void bond_netdev_notify_work(struct work_struct *work); 269 270 /*---------------------------- General routines -----------------------------*/ 271 272 const char *bond_mode_name(int mode) 273 { 274 static const char *names[] = { 275 [BOND_MODE_ROUNDROBIN] = "load balancing (round-robin)", 276 [BOND_MODE_ACTIVEBACKUP] = "fault-tolerance (active-backup)", 277 [BOND_MODE_XOR] = "load balancing (xor)", 278 [BOND_MODE_BROADCAST] = "fault-tolerance (broadcast)", 279 [BOND_MODE_8023AD] = "IEEE 802.3ad Dynamic link aggregation", 280 [BOND_MODE_TLB] = "transmit load balancing", 281 [BOND_MODE_ALB] = "adaptive load balancing", 282 }; 283 284 if (mode < BOND_MODE_ROUNDROBIN || mode > BOND_MODE_ALB) 285 return "unknown"; 286 287 return names[mode]; 288 } 289 290 /** 291 * bond_dev_queue_xmit - Prepare skb for xmit. 292 * 293 * @bond: bond device that got this skb for tx. 294 * @skb: hw accel VLAN tagged skb to transmit 295 * @slave_dev: slave that is supposed to xmit this skbuff 296 */ 297 netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, 298 struct net_device *slave_dev) 299 { 300 skb->dev = slave_dev; 301 302 BUILD_BUG_ON(sizeof(skb->queue_mapping) != 303 sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); 304 skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping); 305 306 if (unlikely(netpoll_tx_running(bond->dev))) 307 return bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); 308 309 return dev_queue_xmit(skb); 310 } 311 312 static bool bond_sk_check(struct bonding *bond) 313 { 314 switch (BOND_MODE(bond)) { 315 case BOND_MODE_8023AD: 316 case BOND_MODE_XOR: 317 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) 318 return true; 319 fallthrough; 320 default: 321 return false; 322 } 323 } 324 325 static bool bond_xdp_check(struct bonding *bond) 326 { 327 switch (BOND_MODE(bond)) { 328 case BOND_MODE_ROUNDROBIN: 329 case BOND_MODE_ACTIVEBACKUP: 330 return true; 331 case BOND_MODE_8023AD: 332 case BOND_MODE_XOR: 333 /* vlan+srcmac is not supported with XDP as in most cases the 802.1q 334 * payload is not in the packet due to hardware offload. 335 */ 336 if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC) 337 return true; 338 fallthrough; 339 default: 340 return false; 341 } 342 } 343 344 /*---------------------------------- VLAN -----------------------------------*/ 345 346 /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, 347 * We don't protect the slave list iteration with a lock because: 348 * a. This operation is performed in IOCTL context, 349 * b. The operation is protected by the RTNL semaphore in the 8021q code, 350 * c. Holding a lock with BH disabled while directly calling a base driver 351 * entry point is generally a BAD idea. 352 * 353 * The design of synchronization/protection for this operation in the 8021q 354 * module is good for one or more VLAN devices over a single physical device 355 * and cannot be extended for a teaming solution like bonding, so there is a 356 * potential race condition here where a net device from the vlan group might 357 * be referenced (either by a base driver or the 8021q code) while it is being 358 * removed from the system. However, it turns out we're not making matters 359 * worse, and if it works for regular VLAN usage it will work here too. 360 */ 361 362 /** 363 * bond_vlan_rx_add_vid - Propagates adding an id to slaves 364 * @bond_dev: bonding net device that got called 365 * @proto: network protocol ID 366 * @vid: vlan id being added 367 */ 368 static int bond_vlan_rx_add_vid(struct net_device *bond_dev, 369 __be16 proto, u16 vid) 370 { 371 struct bonding *bond = netdev_priv(bond_dev); 372 struct slave *slave, *rollback_slave; 373 struct list_head *iter; 374 int res; 375 376 bond_for_each_slave(bond, slave, iter) { 377 res = vlan_vid_add(slave->dev, proto, vid); 378 if (res) 379 goto unwind; 380 } 381 382 return 0; 383 384 unwind: 385 /* unwind to the slave that failed */ 386 bond_for_each_slave(bond, rollback_slave, iter) { 387 if (rollback_slave == slave) 388 break; 389 390 vlan_vid_del(rollback_slave->dev, proto, vid); 391 } 392 393 return res; 394 } 395 396 /** 397 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves 398 * @bond_dev: bonding net device that got called 399 * @proto: network protocol ID 400 * @vid: vlan id being removed 401 */ 402 static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, 403 __be16 proto, u16 vid) 404 { 405 struct bonding *bond = netdev_priv(bond_dev); 406 struct list_head *iter; 407 struct slave *slave; 408 409 bond_for_each_slave(bond, slave, iter) 410 vlan_vid_del(slave->dev, proto, vid); 411 412 if (bond_is_lb(bond)) 413 bond_alb_clear_vlan(bond, vid); 414 415 return 0; 416 } 417 418 /*---------------------------------- XFRM -----------------------------------*/ 419 420 #ifdef CONFIG_XFRM_OFFLOAD 421 /** 422 * bond_ipsec_add_sa - program device with a security association 423 * @xs: pointer to transformer state struct 424 * @extack: extack point to fill failure reason 425 **/ 426 static int bond_ipsec_add_sa(struct xfrm_state *xs, 427 struct netlink_ext_ack *extack) 428 { 429 struct net_device *bond_dev = xs->xso.dev; 430 struct bond_ipsec *ipsec; 431 struct bonding *bond; 432 struct slave *slave; 433 int err; 434 435 if (!bond_dev) 436 return -EINVAL; 437 438 rcu_read_lock(); 439 bond = netdev_priv(bond_dev); 440 slave = rcu_dereference(bond->curr_active_slave); 441 if (!slave) { 442 rcu_read_unlock(); 443 return -ENODEV; 444 } 445 446 if (!slave->dev->xfrmdev_ops || 447 !slave->dev->xfrmdev_ops->xdo_dev_state_add || 448 netif_is_bond_master(slave->dev)) { 449 NL_SET_ERR_MSG_MOD(extack, "Slave does not support ipsec offload"); 450 rcu_read_unlock(); 451 return -EINVAL; 452 } 453 454 ipsec = kmalloc(sizeof(*ipsec), GFP_ATOMIC); 455 if (!ipsec) { 456 rcu_read_unlock(); 457 return -ENOMEM; 458 } 459 xs->xso.real_dev = slave->dev; 460 461 err = slave->dev->xfrmdev_ops->xdo_dev_state_add(xs, extack); 462 if (!err) { 463 ipsec->xs = xs; 464 INIT_LIST_HEAD(&ipsec->list); 465 spin_lock_bh(&bond->ipsec_lock); 466 list_add(&ipsec->list, &bond->ipsec_list); 467 spin_unlock_bh(&bond->ipsec_lock); 468 } else { 469 kfree(ipsec); 470 } 471 rcu_read_unlock(); 472 return err; 473 } 474 475 static void bond_ipsec_add_sa_all(struct bonding *bond) 476 { 477 struct net_device *bond_dev = bond->dev; 478 struct bond_ipsec *ipsec; 479 struct slave *slave; 480 481 rcu_read_lock(); 482 slave = rcu_dereference(bond->curr_active_slave); 483 if (!slave) 484 goto out; 485 486 if (!slave->dev->xfrmdev_ops || 487 !slave->dev->xfrmdev_ops->xdo_dev_state_add || 488 netif_is_bond_master(slave->dev)) { 489 spin_lock_bh(&bond->ipsec_lock); 490 if (!list_empty(&bond->ipsec_list)) 491 slave_warn(bond_dev, slave->dev, 492 "%s: no slave xdo_dev_state_add\n", 493 __func__); 494 spin_unlock_bh(&bond->ipsec_lock); 495 goto out; 496 } 497 498 spin_lock_bh(&bond->ipsec_lock); 499 list_for_each_entry(ipsec, &bond->ipsec_list, list) { 500 ipsec->xs->xso.real_dev = slave->dev; 501 if (slave->dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) { 502 slave_warn(bond_dev, slave->dev, "%s: failed to add SA\n", __func__); 503 ipsec->xs->xso.real_dev = NULL; 504 } 505 } 506 spin_unlock_bh(&bond->ipsec_lock); 507 out: 508 rcu_read_unlock(); 509 } 510 511 /** 512 * bond_ipsec_del_sa - clear out this specific SA 513 * @xs: pointer to transformer state struct 514 **/ 515 static void bond_ipsec_del_sa(struct xfrm_state *xs) 516 { 517 struct net_device *bond_dev = xs->xso.dev; 518 struct bond_ipsec *ipsec; 519 struct bonding *bond; 520 struct slave *slave; 521 522 if (!bond_dev) 523 return; 524 525 rcu_read_lock(); 526 bond = netdev_priv(bond_dev); 527 slave = rcu_dereference(bond->curr_active_slave); 528 529 if (!slave) 530 goto out; 531 532 if (!xs->xso.real_dev) 533 goto out; 534 535 WARN_ON(xs->xso.real_dev != slave->dev); 536 537 if (!slave->dev->xfrmdev_ops || 538 !slave->dev->xfrmdev_ops->xdo_dev_state_delete || 539 netif_is_bond_master(slave->dev)) { 540 slave_warn(bond_dev, slave->dev, "%s: no slave xdo_dev_state_delete\n", __func__); 541 goto out; 542 } 543 544 slave->dev->xfrmdev_ops->xdo_dev_state_delete(xs); 545 out: 546 spin_lock_bh(&bond->ipsec_lock); 547 list_for_each_entry(ipsec, &bond->ipsec_list, list) { 548 if (ipsec->xs == xs) { 549 list_del(&ipsec->list); 550 kfree(ipsec); 551 break; 552 } 553 } 554 spin_unlock_bh(&bond->ipsec_lock); 555 rcu_read_unlock(); 556 } 557 558 static void bond_ipsec_del_sa_all(struct bonding *bond) 559 { 560 struct net_device *bond_dev = bond->dev; 561 struct bond_ipsec *ipsec; 562 struct slave *slave; 563 564 rcu_read_lock(); 565 slave = rcu_dereference(bond->curr_active_slave); 566 if (!slave) { 567 rcu_read_unlock(); 568 return; 569 } 570 571 spin_lock_bh(&bond->ipsec_lock); 572 list_for_each_entry(ipsec, &bond->ipsec_list, list) { 573 if (!ipsec->xs->xso.real_dev) 574 continue; 575 576 if (!slave->dev->xfrmdev_ops || 577 !slave->dev->xfrmdev_ops->xdo_dev_state_delete || 578 netif_is_bond_master(slave->dev)) { 579 slave_warn(bond_dev, slave->dev, 580 "%s: no slave xdo_dev_state_delete\n", 581 __func__); 582 } else { 583 slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); 584 } 585 ipsec->xs->xso.real_dev = NULL; 586 } 587 spin_unlock_bh(&bond->ipsec_lock); 588 rcu_read_unlock(); 589 } 590 591 /** 592 * bond_ipsec_offload_ok - can this packet use the xfrm hw offload 593 * @skb: current data packet 594 * @xs: pointer to transformer state struct 595 **/ 596 static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) 597 { 598 struct net_device *bond_dev = xs->xso.dev; 599 struct net_device *real_dev; 600 struct slave *curr_active; 601 struct bonding *bond; 602 int err; 603 604 bond = netdev_priv(bond_dev); 605 rcu_read_lock(); 606 curr_active = rcu_dereference(bond->curr_active_slave); 607 real_dev = curr_active->dev; 608 609 if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 610 err = false; 611 goto out; 612 } 613 614 if (!xs->xso.real_dev) { 615 err = false; 616 goto out; 617 } 618 619 if (!real_dev->xfrmdev_ops || 620 !real_dev->xfrmdev_ops->xdo_dev_offload_ok || 621 netif_is_bond_master(real_dev)) { 622 err = false; 623 goto out; 624 } 625 626 err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); 627 out: 628 rcu_read_unlock(); 629 return err; 630 } 631 632 static const struct xfrmdev_ops bond_xfrmdev_ops = { 633 .xdo_dev_state_add = bond_ipsec_add_sa, 634 .xdo_dev_state_delete = bond_ipsec_del_sa, 635 .xdo_dev_offload_ok = bond_ipsec_offload_ok, 636 }; 637 #endif /* CONFIG_XFRM_OFFLOAD */ 638 639 /*------------------------------- Link status -------------------------------*/ 640 641 /* Set the carrier state for the master according to the state of its 642 * slaves. If any slaves are up, the master is up. In 802.3ad mode, 643 * do special 802.3ad magic. 644 * 645 * Returns zero if carrier state does not change, nonzero if it does. 646 */ 647 int bond_set_carrier(struct bonding *bond) 648 { 649 struct list_head *iter; 650 struct slave *slave; 651 652 if (!bond_has_slaves(bond)) 653 goto down; 654 655 if (BOND_MODE(bond) == BOND_MODE_8023AD) 656 return bond_3ad_set_carrier(bond); 657 658 bond_for_each_slave(bond, slave, iter) { 659 if (slave->link == BOND_LINK_UP) { 660 if (!netif_carrier_ok(bond->dev)) { 661 netif_carrier_on(bond->dev); 662 return 1; 663 } 664 return 0; 665 } 666 } 667 668 down: 669 if (netif_carrier_ok(bond->dev)) { 670 netif_carrier_off(bond->dev); 671 return 1; 672 } 673 return 0; 674 } 675 676 /* Get link speed and duplex from the slave's base driver 677 * using ethtool. If for some reason the call fails or the 678 * values are invalid, set speed and duplex to -1, 679 * and return. Return 1 if speed or duplex settings are 680 * UNKNOWN; 0 otherwise. 681 */ 682 static int bond_update_speed_duplex(struct slave *slave) 683 { 684 struct net_device *slave_dev = slave->dev; 685 struct ethtool_link_ksettings ecmd; 686 int res; 687 688 slave->speed = SPEED_UNKNOWN; 689 slave->duplex = DUPLEX_UNKNOWN; 690 691 res = __ethtool_get_link_ksettings(slave_dev, &ecmd); 692 if (res < 0) 693 return 1; 694 if (ecmd.base.speed == 0 || ecmd.base.speed == ((__u32)-1)) 695 return 1; 696 switch (ecmd.base.duplex) { 697 case DUPLEX_FULL: 698 case DUPLEX_HALF: 699 break; 700 default: 701 return 1; 702 } 703 704 slave->speed = ecmd.base.speed; 705 slave->duplex = ecmd.base.duplex; 706 707 return 0; 708 } 709 710 const char *bond_slave_link_status(s8 link) 711 { 712 switch (link) { 713 case BOND_LINK_UP: 714 return "up"; 715 case BOND_LINK_FAIL: 716 return "going down"; 717 case BOND_LINK_DOWN: 718 return "down"; 719 case BOND_LINK_BACK: 720 return "going back"; 721 default: 722 return "unknown"; 723 } 724 } 725 726 /* if <dev> supports MII link status reporting, check its link status. 727 * 728 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), 729 * depending upon the setting of the use_carrier parameter. 730 * 731 * Return either BMSR_LSTATUS, meaning that the link is up (or we 732 * can't tell and just pretend it is), or 0, meaning that the link is 733 * down. 734 * 735 * If reporting is non-zero, instead of faking link up, return -1 if 736 * both ETHTOOL and MII ioctls fail (meaning the device does not 737 * support them). If use_carrier is set, return whatever it says. 738 * It'd be nice if there was a good way to tell if a driver supports 739 * netif_carrier, but there really isn't. 740 */ 741 static int bond_check_dev_link(struct bonding *bond, 742 struct net_device *slave_dev, int reporting) 743 { 744 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 745 int (*ioctl)(struct net_device *, struct ifreq *, int); 746 struct ifreq ifr; 747 struct mii_ioctl_data *mii; 748 749 if (!reporting && !netif_running(slave_dev)) 750 return 0; 751 752 if (bond->params.use_carrier) 753 return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; 754 755 /* Try to get link status using Ethtool first. */ 756 if (slave_dev->ethtool_ops->get_link) 757 return slave_dev->ethtool_ops->get_link(slave_dev) ? 758 BMSR_LSTATUS : 0; 759 760 /* Ethtool can't be used, fallback to MII ioctls. */ 761 ioctl = slave_ops->ndo_eth_ioctl; 762 if (ioctl) { 763 /* TODO: set pointer to correct ioctl on a per team member 764 * bases to make this more efficient. that is, once 765 * we determine the correct ioctl, we will always 766 * call it and not the others for that team 767 * member. 768 */ 769 770 /* We cannot assume that SIOCGMIIPHY will also read a 771 * register; not all network drivers (e.g., e100) 772 * support that. 773 */ 774 775 /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ 776 strscpy_pad(ifr.ifr_name, slave_dev->name, IFNAMSIZ); 777 mii = if_mii(&ifr); 778 if (ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) { 779 mii->reg_num = MII_BMSR; 780 if (ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0) 781 return mii->val_out & BMSR_LSTATUS; 782 } 783 } 784 785 /* If reporting, report that either there's no ndo_eth_ioctl, 786 * or both SIOCGMIIREG and get_link failed (meaning that we 787 * cannot report link status). If not reporting, pretend 788 * we're ok. 789 */ 790 return reporting ? -1 : BMSR_LSTATUS; 791 } 792 793 /*----------------------------- Multicast list ------------------------------*/ 794 795 /* Push the promiscuity flag down to appropriate slaves */ 796 static int bond_set_promiscuity(struct bonding *bond, int inc) 797 { 798 struct list_head *iter; 799 int err = 0; 800 801 if (bond_uses_primary(bond)) { 802 struct slave *curr_active = rtnl_dereference(bond->curr_active_slave); 803 804 if (curr_active) 805 err = dev_set_promiscuity(curr_active->dev, inc); 806 } else { 807 struct slave *slave; 808 809 bond_for_each_slave(bond, slave, iter) { 810 err = dev_set_promiscuity(slave->dev, inc); 811 if (err) 812 return err; 813 } 814 } 815 return err; 816 } 817 818 /* Push the allmulti flag down to all slaves */ 819 static int bond_set_allmulti(struct bonding *bond, int inc) 820 { 821 struct list_head *iter; 822 int err = 0; 823 824 if (bond_uses_primary(bond)) { 825 struct slave *curr_active = rtnl_dereference(bond->curr_active_slave); 826 827 if (curr_active) 828 err = dev_set_allmulti(curr_active->dev, inc); 829 } else { 830 struct slave *slave; 831 832 bond_for_each_slave(bond, slave, iter) { 833 err = dev_set_allmulti(slave->dev, inc); 834 if (err) 835 return err; 836 } 837 } 838 return err; 839 } 840 841 /* Retrieve the list of registered multicast addresses for the bonding 842 * device and retransmit an IGMP JOIN request to the current active 843 * slave. 844 */ 845 static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) 846 { 847 struct bonding *bond = container_of(work, struct bonding, 848 mcast_work.work); 849 850 if (!rtnl_trylock()) { 851 queue_delayed_work(bond->wq, &bond->mcast_work, 1); 852 return; 853 } 854 call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev); 855 856 if (bond->igmp_retrans > 1) { 857 bond->igmp_retrans--; 858 queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); 859 } 860 rtnl_unlock(); 861 } 862 863 /* Flush bond's hardware addresses from slave */ 864 static void bond_hw_addr_flush(struct net_device *bond_dev, 865 struct net_device *slave_dev) 866 { 867 struct bonding *bond = netdev_priv(bond_dev); 868 869 dev_uc_unsync(slave_dev, bond_dev); 870 dev_mc_unsync(slave_dev, bond_dev); 871 872 if (BOND_MODE(bond) == BOND_MODE_8023AD) 873 dev_mc_del(slave_dev, lacpdu_mcast_addr); 874 } 875 876 /*--------------------------- Active slave change ---------------------------*/ 877 878 /* Update the hardware address list and promisc/allmulti for the new and 879 * old active slaves (if any). Modes that are not using primary keep all 880 * slaves up date at all times; only the modes that use primary need to call 881 * this function to swap these settings during a failover. 882 */ 883 static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, 884 struct slave *old_active) 885 { 886 if (old_active) { 887 if (bond->dev->flags & IFF_PROMISC) 888 dev_set_promiscuity(old_active->dev, -1); 889 890 if (bond->dev->flags & IFF_ALLMULTI) 891 dev_set_allmulti(old_active->dev, -1); 892 893 if (bond->dev->flags & IFF_UP) 894 bond_hw_addr_flush(bond->dev, old_active->dev); 895 } 896 897 if (new_active) { 898 /* FIXME: Signal errors upstream. */ 899 if (bond->dev->flags & IFF_PROMISC) 900 dev_set_promiscuity(new_active->dev, 1); 901 902 if (bond->dev->flags & IFF_ALLMULTI) 903 dev_set_allmulti(new_active->dev, 1); 904 905 if (bond->dev->flags & IFF_UP) { 906 netif_addr_lock_bh(bond->dev); 907 dev_uc_sync(new_active->dev, bond->dev); 908 dev_mc_sync(new_active->dev, bond->dev); 909 netif_addr_unlock_bh(bond->dev); 910 } 911 } 912 } 913 914 /** 915 * bond_set_dev_addr - clone slave's address to bond 916 * @bond_dev: bond net device 917 * @slave_dev: slave net device 918 * 919 * Should be called with RTNL held. 920 */ 921 static int bond_set_dev_addr(struct net_device *bond_dev, 922 struct net_device *slave_dev) 923 { 924 int err; 925 926 slave_dbg(bond_dev, slave_dev, "bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n", 927 bond_dev, slave_dev, slave_dev->addr_len); 928 err = dev_pre_changeaddr_notify(bond_dev, slave_dev->dev_addr, NULL); 929 if (err) 930 return err; 931 932 __dev_addr_set(bond_dev, slave_dev->dev_addr, slave_dev->addr_len); 933 bond_dev->addr_assign_type = NET_ADDR_STOLEN; 934 call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); 935 return 0; 936 } 937 938 static struct slave *bond_get_old_active(struct bonding *bond, 939 struct slave *new_active) 940 { 941 struct slave *slave; 942 struct list_head *iter; 943 944 bond_for_each_slave(bond, slave, iter) { 945 if (slave == new_active) 946 continue; 947 948 if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr)) 949 return slave; 950 } 951 952 return NULL; 953 } 954 955 /* bond_do_fail_over_mac 956 * 957 * Perform special MAC address swapping for fail_over_mac settings 958 * 959 * Called with RTNL 960 */ 961 static void bond_do_fail_over_mac(struct bonding *bond, 962 struct slave *new_active, 963 struct slave *old_active) 964 { 965 u8 tmp_mac[MAX_ADDR_LEN]; 966 struct sockaddr_storage ss; 967 int rv; 968 969 switch (bond->params.fail_over_mac) { 970 case BOND_FOM_ACTIVE: 971 if (new_active) { 972 rv = bond_set_dev_addr(bond->dev, new_active->dev); 973 if (rv) 974 slave_err(bond->dev, new_active->dev, "Error %d setting bond MAC from slave\n", 975 -rv); 976 } 977 break; 978 case BOND_FOM_FOLLOW: 979 /* if new_active && old_active, swap them 980 * if just old_active, do nothing (going to no active slave) 981 * if just new_active, set new_active to bond's MAC 982 */ 983 if (!new_active) 984 return; 985 986 if (!old_active) 987 old_active = bond_get_old_active(bond, new_active); 988 989 if (old_active) { 990 bond_hw_addr_copy(tmp_mac, new_active->dev->dev_addr, 991 new_active->dev->addr_len); 992 bond_hw_addr_copy(ss.__data, 993 old_active->dev->dev_addr, 994 old_active->dev->addr_len); 995 ss.ss_family = new_active->dev->type; 996 } else { 997 bond_hw_addr_copy(ss.__data, bond->dev->dev_addr, 998 bond->dev->addr_len); 999 ss.ss_family = bond->dev->type; 1000 } 1001 1002 rv = dev_set_mac_address(new_active->dev, 1003 (struct sockaddr *)&ss, NULL); 1004 if (rv) { 1005 slave_err(bond->dev, new_active->dev, "Error %d setting MAC of new active slave\n", 1006 -rv); 1007 goto out; 1008 } 1009 1010 if (!old_active) 1011 goto out; 1012 1013 bond_hw_addr_copy(ss.__data, tmp_mac, 1014 new_active->dev->addr_len); 1015 ss.ss_family = old_active->dev->type; 1016 1017 rv = dev_set_mac_address(old_active->dev, 1018 (struct sockaddr *)&ss, NULL); 1019 if (rv) 1020 slave_err(bond->dev, old_active->dev, "Error %d setting MAC of old active slave\n", 1021 -rv); 1022 out: 1023 break; 1024 default: 1025 netdev_err(bond->dev, "bond_do_fail_over_mac impossible: bad policy %d\n", 1026 bond->params.fail_over_mac); 1027 break; 1028 } 1029 1030 } 1031 1032 /** 1033 * bond_choose_primary_or_current - select the primary or high priority slave 1034 * @bond: our bonding struct 1035 * 1036 * - Check if there is a primary link. If the primary link was set and is up, 1037 * go on and do link reselection. 1038 * 1039 * - If primary link is not set or down, find the highest priority link. 1040 * If the highest priority link is not current slave, set it as primary 1041 * link and do link reselection. 1042 */ 1043 static struct slave *bond_choose_primary_or_current(struct bonding *bond) 1044 { 1045 struct slave *prim = rtnl_dereference(bond->primary_slave); 1046 struct slave *curr = rtnl_dereference(bond->curr_active_slave); 1047 struct slave *slave, *hprio = NULL; 1048 struct list_head *iter; 1049 1050 if (!prim || prim->link != BOND_LINK_UP) { 1051 bond_for_each_slave(bond, slave, iter) { 1052 if (slave->link == BOND_LINK_UP) { 1053 hprio = hprio ?: slave; 1054 if (slave->prio > hprio->prio) 1055 hprio = slave; 1056 } 1057 } 1058 1059 if (hprio && hprio != curr) { 1060 prim = hprio; 1061 goto link_reselect; 1062 } 1063 1064 if (!curr || curr->link != BOND_LINK_UP) 1065 return NULL; 1066 return curr; 1067 } 1068 1069 if (bond->force_primary) { 1070 bond->force_primary = false; 1071 return prim; 1072 } 1073 1074 link_reselect: 1075 if (!curr || curr->link != BOND_LINK_UP) 1076 return prim; 1077 1078 /* At this point, prim and curr are both up */ 1079 switch (bond->params.primary_reselect) { 1080 case BOND_PRI_RESELECT_ALWAYS: 1081 return prim; 1082 case BOND_PRI_RESELECT_BETTER: 1083 if (prim->speed < curr->speed) 1084 return curr; 1085 if (prim->speed == curr->speed && prim->duplex <= curr->duplex) 1086 return curr; 1087 return prim; 1088 case BOND_PRI_RESELECT_FAILURE: 1089 return curr; 1090 default: 1091 netdev_err(bond->dev, "impossible primary_reselect %d\n", 1092 bond->params.primary_reselect); 1093 return curr; 1094 } 1095 } 1096 1097 /** 1098 * bond_find_best_slave - select the best available slave to be the active one 1099 * @bond: our bonding struct 1100 */ 1101 static struct slave *bond_find_best_slave(struct bonding *bond) 1102 { 1103 struct slave *slave, *bestslave = NULL; 1104 struct list_head *iter; 1105 int mintime = bond->params.updelay; 1106 1107 slave = bond_choose_primary_or_current(bond); 1108 if (slave) 1109 return slave; 1110 1111 bond_for_each_slave(bond, slave, iter) { 1112 if (slave->link == BOND_LINK_UP) 1113 return slave; 1114 if (slave->link == BOND_LINK_BACK && bond_slave_is_up(slave) && 1115 slave->delay < mintime) { 1116 mintime = slave->delay; 1117 bestslave = slave; 1118 } 1119 } 1120 1121 return bestslave; 1122 } 1123 1124 /* must be called in RCU critical section or with RTNL held */ 1125 static bool bond_should_notify_peers(struct bonding *bond) 1126 { 1127 struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave); 1128 1129 if (!slave || !bond->send_peer_notif || 1130 bond->send_peer_notif % 1131 max(1, bond->params.peer_notif_delay) != 0 || 1132 !netif_carrier_ok(bond->dev) || 1133 test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) 1134 return false; 1135 1136 netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n", 1137 slave ? slave->dev->name : "NULL"); 1138 1139 return true; 1140 } 1141 1142 /** 1143 * bond_change_active_slave - change the active slave into the specified one 1144 * @bond: our bonding struct 1145 * @new_active: the new slave to make the active one 1146 * 1147 * Set the new slave to the bond's settings and unset them on the old 1148 * curr_active_slave. 1149 * Setting include flags, mc-list, promiscuity, allmulti, etc. 1150 * 1151 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, 1152 * because it is apparently the best available slave we have, even though its 1153 * updelay hasn't timed out yet. 1154 * 1155 * Caller must hold RTNL. 1156 */ 1157 void bond_change_active_slave(struct bonding *bond, struct slave *new_active) 1158 { 1159 struct slave *old_active; 1160 1161 ASSERT_RTNL(); 1162 1163 old_active = rtnl_dereference(bond->curr_active_slave); 1164 1165 if (old_active == new_active) 1166 return; 1167 1168 #ifdef CONFIG_XFRM_OFFLOAD 1169 bond_ipsec_del_sa_all(bond); 1170 #endif /* CONFIG_XFRM_OFFLOAD */ 1171 1172 if (new_active) { 1173 new_active->last_link_up = jiffies; 1174 1175 if (new_active->link == BOND_LINK_BACK) { 1176 if (bond_uses_primary(bond)) { 1177 slave_info(bond->dev, new_active->dev, "making interface the new active one %d ms earlier\n", 1178 (bond->params.updelay - new_active->delay) * bond->params.miimon); 1179 } 1180 1181 new_active->delay = 0; 1182 bond_set_slave_link_state(new_active, BOND_LINK_UP, 1183 BOND_SLAVE_NOTIFY_NOW); 1184 1185 if (BOND_MODE(bond) == BOND_MODE_8023AD) 1186 bond_3ad_handle_link_change(new_active, BOND_LINK_UP); 1187 1188 if (bond_is_lb(bond)) 1189 bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); 1190 } else { 1191 if (bond_uses_primary(bond)) 1192 slave_info(bond->dev, new_active->dev, "making interface the new active one\n"); 1193 } 1194 } 1195 1196 if (bond_uses_primary(bond)) 1197 bond_hw_addr_swap(bond, new_active, old_active); 1198 1199 if (bond_is_lb(bond)) { 1200 bond_alb_handle_active_change(bond, new_active); 1201 if (old_active) 1202 bond_set_slave_inactive_flags(old_active, 1203 BOND_SLAVE_NOTIFY_NOW); 1204 if (new_active) 1205 bond_set_slave_active_flags(new_active, 1206 BOND_SLAVE_NOTIFY_NOW); 1207 } else { 1208 rcu_assign_pointer(bond->curr_active_slave, new_active); 1209 } 1210 1211 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) { 1212 if (old_active) 1213 bond_set_slave_inactive_flags(old_active, 1214 BOND_SLAVE_NOTIFY_NOW); 1215 1216 if (new_active) { 1217 bool should_notify_peers = false; 1218 1219 bond_set_slave_active_flags(new_active, 1220 BOND_SLAVE_NOTIFY_NOW); 1221 1222 if (bond->params.fail_over_mac) 1223 bond_do_fail_over_mac(bond, new_active, 1224 old_active); 1225 1226 if (netif_running(bond->dev)) { 1227 bond->send_peer_notif = 1228 bond->params.num_peer_notif * 1229 max(1, bond->params.peer_notif_delay); 1230 should_notify_peers = 1231 bond_should_notify_peers(bond); 1232 } 1233 1234 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); 1235 if (should_notify_peers) { 1236 bond->send_peer_notif--; 1237 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, 1238 bond->dev); 1239 } 1240 } 1241 } 1242 1243 #ifdef CONFIG_XFRM_OFFLOAD 1244 bond_ipsec_add_sa_all(bond); 1245 #endif /* CONFIG_XFRM_OFFLOAD */ 1246 1247 /* resend IGMP joins since active slave has changed or 1248 * all were sent on curr_active_slave. 1249 * resend only if bond is brought up with the affected 1250 * bonding modes and the retransmission is enabled 1251 */ 1252 if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) && 1253 ((bond_uses_primary(bond) && new_active) || 1254 BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) { 1255 bond->igmp_retrans = bond->params.resend_igmp; 1256 queue_delayed_work(bond->wq, &bond->mcast_work, 1); 1257 } 1258 } 1259 1260 /** 1261 * bond_select_active_slave - select a new active slave, if needed 1262 * @bond: our bonding struct 1263 * 1264 * This functions should be called when one of the following occurs: 1265 * - The old curr_active_slave has been released or lost its link. 1266 * - The primary_slave has got its link back. 1267 * - A slave has got its link back and there's no old curr_active_slave. 1268 * 1269 * Caller must hold RTNL. 1270 */ 1271 void bond_select_active_slave(struct bonding *bond) 1272 { 1273 struct slave *best_slave; 1274 int rv; 1275 1276 ASSERT_RTNL(); 1277 1278 best_slave = bond_find_best_slave(bond); 1279 if (best_slave != rtnl_dereference(bond->curr_active_slave)) { 1280 bond_change_active_slave(bond, best_slave); 1281 rv = bond_set_carrier(bond); 1282 if (!rv) 1283 return; 1284 1285 if (netif_carrier_ok(bond->dev)) 1286 netdev_info(bond->dev, "active interface up!\n"); 1287 else 1288 netdev_info(bond->dev, "now running without any active interface!\n"); 1289 } 1290 } 1291 1292 #ifdef CONFIG_NET_POLL_CONTROLLER 1293 static inline int slave_enable_netpoll(struct slave *slave) 1294 { 1295 struct netpoll *np; 1296 int err = 0; 1297 1298 np = kzalloc(sizeof(*np), GFP_KERNEL); 1299 err = -ENOMEM; 1300 if (!np) 1301 goto out; 1302 1303 err = __netpoll_setup(np, slave->dev); 1304 if (err) { 1305 kfree(np); 1306 goto out; 1307 } 1308 slave->np = np; 1309 out: 1310 return err; 1311 } 1312 static inline void slave_disable_netpoll(struct slave *slave) 1313 { 1314 struct netpoll *np = slave->np; 1315 1316 if (!np) 1317 return; 1318 1319 slave->np = NULL; 1320 1321 __netpoll_free(np); 1322 } 1323 1324 static void bond_poll_controller(struct net_device *bond_dev) 1325 { 1326 struct bonding *bond = netdev_priv(bond_dev); 1327 struct slave *slave = NULL; 1328 struct list_head *iter; 1329 struct ad_info ad_info; 1330 1331 if (BOND_MODE(bond) == BOND_MODE_8023AD) 1332 if (bond_3ad_get_active_agg_info(bond, &ad_info)) 1333 return; 1334 1335 bond_for_each_slave_rcu(bond, slave, iter) { 1336 if (!bond_slave_is_up(slave)) 1337 continue; 1338 1339 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 1340 struct aggregator *agg = 1341 SLAVE_AD_INFO(slave)->port.aggregator; 1342 1343 if (agg && 1344 agg->aggregator_identifier != ad_info.aggregator_id) 1345 continue; 1346 } 1347 1348 netpoll_poll_dev(slave->dev); 1349 } 1350 } 1351 1352 static void bond_netpoll_cleanup(struct net_device *bond_dev) 1353 { 1354 struct bonding *bond = netdev_priv(bond_dev); 1355 struct list_head *iter; 1356 struct slave *slave; 1357 1358 bond_for_each_slave(bond, slave, iter) 1359 if (bond_slave_is_up(slave)) 1360 slave_disable_netpoll(slave); 1361 } 1362 1363 static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) 1364 { 1365 struct bonding *bond = netdev_priv(dev); 1366 struct list_head *iter; 1367 struct slave *slave; 1368 int err = 0; 1369 1370 bond_for_each_slave(bond, slave, iter) { 1371 err = slave_enable_netpoll(slave); 1372 if (err) { 1373 bond_netpoll_cleanup(dev); 1374 break; 1375 } 1376 } 1377 return err; 1378 } 1379 #else 1380 static inline int slave_enable_netpoll(struct slave *slave) 1381 { 1382 return 0; 1383 } 1384 static inline void slave_disable_netpoll(struct slave *slave) 1385 { 1386 } 1387 static void bond_netpoll_cleanup(struct net_device *bond_dev) 1388 { 1389 } 1390 #endif 1391 1392 /*---------------------------------- IOCTL ----------------------------------*/ 1393 1394 static netdev_features_t bond_fix_features(struct net_device *dev, 1395 netdev_features_t features) 1396 { 1397 struct bonding *bond = netdev_priv(dev); 1398 struct list_head *iter; 1399 netdev_features_t mask; 1400 struct slave *slave; 1401 1402 mask = features; 1403 1404 features &= ~NETIF_F_ONE_FOR_ALL; 1405 features |= NETIF_F_ALL_FOR_ALL; 1406 1407 bond_for_each_slave(bond, slave, iter) { 1408 features = netdev_increment_features(features, 1409 slave->dev->features, 1410 mask); 1411 } 1412 features = netdev_add_tso_features(features, mask); 1413 1414 return features; 1415 } 1416 1417 #define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ 1418 NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | \ 1419 NETIF_F_HIGHDMA | NETIF_F_LRO) 1420 1421 #define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ 1422 NETIF_F_RXCSUM | NETIF_F_GSO_SOFTWARE) 1423 1424 #define BOND_MPLS_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ 1425 NETIF_F_GSO_SOFTWARE) 1426 1427 1428 static void bond_compute_features(struct bonding *bond) 1429 { 1430 unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | 1431 IFF_XMIT_DST_RELEASE_PERM; 1432 netdev_features_t vlan_features = BOND_VLAN_FEATURES; 1433 netdev_features_t enc_features = BOND_ENC_FEATURES; 1434 #ifdef CONFIG_XFRM_OFFLOAD 1435 netdev_features_t xfrm_features = BOND_XFRM_FEATURES; 1436 #endif /* CONFIG_XFRM_OFFLOAD */ 1437 netdev_features_t mpls_features = BOND_MPLS_FEATURES; 1438 struct net_device *bond_dev = bond->dev; 1439 struct list_head *iter; 1440 struct slave *slave; 1441 unsigned short max_hard_header_len = ETH_HLEN; 1442 unsigned int tso_max_size = TSO_MAX_SIZE; 1443 u16 tso_max_segs = TSO_MAX_SEGS; 1444 1445 if (!bond_has_slaves(bond)) 1446 goto done; 1447 vlan_features &= NETIF_F_ALL_FOR_ALL; 1448 mpls_features &= NETIF_F_ALL_FOR_ALL; 1449 1450 bond_for_each_slave(bond, slave, iter) { 1451 vlan_features = netdev_increment_features(vlan_features, 1452 slave->dev->vlan_features, BOND_VLAN_FEATURES); 1453 1454 enc_features = netdev_increment_features(enc_features, 1455 slave->dev->hw_enc_features, 1456 BOND_ENC_FEATURES); 1457 1458 #ifdef CONFIG_XFRM_OFFLOAD 1459 xfrm_features = netdev_increment_features(xfrm_features, 1460 slave->dev->hw_enc_features, 1461 BOND_XFRM_FEATURES); 1462 #endif /* CONFIG_XFRM_OFFLOAD */ 1463 1464 mpls_features = netdev_increment_features(mpls_features, 1465 slave->dev->mpls_features, 1466 BOND_MPLS_FEATURES); 1467 1468 dst_release_flag &= slave->dev->priv_flags; 1469 if (slave->dev->hard_header_len > max_hard_header_len) 1470 max_hard_header_len = slave->dev->hard_header_len; 1471 1472 tso_max_size = min(tso_max_size, slave->dev->tso_max_size); 1473 tso_max_segs = min(tso_max_segs, slave->dev->tso_max_segs); 1474 } 1475 bond_dev->hard_header_len = max_hard_header_len; 1476 1477 done: 1478 bond_dev->vlan_features = vlan_features; 1479 bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | 1480 NETIF_F_HW_VLAN_CTAG_TX | 1481 NETIF_F_HW_VLAN_STAG_TX; 1482 #ifdef CONFIG_XFRM_OFFLOAD 1483 bond_dev->hw_enc_features |= xfrm_features; 1484 #endif /* CONFIG_XFRM_OFFLOAD */ 1485 bond_dev->mpls_features = mpls_features; 1486 netif_set_tso_max_segs(bond_dev, tso_max_segs); 1487 netif_set_tso_max_size(bond_dev, tso_max_size); 1488 1489 bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 1490 if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) && 1491 dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM)) 1492 bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE; 1493 1494 netdev_change_features(bond_dev); 1495 } 1496 1497 static void bond_setup_by_slave(struct net_device *bond_dev, 1498 struct net_device *slave_dev) 1499 { 1500 bool was_up = !!(bond_dev->flags & IFF_UP); 1501 1502 dev_close(bond_dev); 1503 1504 bond_dev->header_ops = slave_dev->header_ops; 1505 1506 bond_dev->type = slave_dev->type; 1507 bond_dev->hard_header_len = slave_dev->hard_header_len; 1508 bond_dev->needed_headroom = slave_dev->needed_headroom; 1509 bond_dev->addr_len = slave_dev->addr_len; 1510 1511 memcpy(bond_dev->broadcast, slave_dev->broadcast, 1512 slave_dev->addr_len); 1513 1514 if (slave_dev->flags & IFF_POINTOPOINT) { 1515 bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST); 1516 bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP); 1517 } 1518 if (was_up) 1519 dev_open(bond_dev, NULL); 1520 } 1521 1522 /* On bonding slaves other than the currently active slave, suppress 1523 * duplicates except for alb non-mcast/bcast. 1524 */ 1525 static bool bond_should_deliver_exact_match(struct sk_buff *skb, 1526 struct slave *slave, 1527 struct bonding *bond) 1528 { 1529 if (bond_is_slave_inactive(slave)) { 1530 if (BOND_MODE(bond) == BOND_MODE_ALB && 1531 skb->pkt_type != PACKET_BROADCAST && 1532 skb->pkt_type != PACKET_MULTICAST) 1533 return false; 1534 return true; 1535 } 1536 return false; 1537 } 1538 1539 static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) 1540 { 1541 struct sk_buff *skb = *pskb; 1542 struct slave *slave; 1543 struct bonding *bond; 1544 int (*recv_probe)(const struct sk_buff *, struct bonding *, 1545 struct slave *); 1546 int ret = RX_HANDLER_ANOTHER; 1547 1548 skb = skb_share_check(skb, GFP_ATOMIC); 1549 if (unlikely(!skb)) 1550 return RX_HANDLER_CONSUMED; 1551 1552 *pskb = skb; 1553 1554 slave = bond_slave_get_rcu(skb->dev); 1555 bond = slave->bond; 1556 1557 recv_probe = READ_ONCE(bond->recv_probe); 1558 if (recv_probe) { 1559 ret = recv_probe(skb, bond, slave); 1560 if (ret == RX_HANDLER_CONSUMED) { 1561 consume_skb(skb); 1562 return ret; 1563 } 1564 } 1565 1566 /* 1567 * For packets determined by bond_should_deliver_exact_match() call to 1568 * be suppressed we want to make an exception for link-local packets. 1569 * This is necessary for e.g. LLDP daemons to be able to monitor 1570 * inactive slave links without being forced to bind to them 1571 * explicitly. 1572 * 1573 * At the same time, packets that are passed to the bonding master 1574 * (including link-local ones) can have their originating interface 1575 * determined via PACKET_ORIGDEV socket option. 1576 */ 1577 if (bond_should_deliver_exact_match(skb, slave, bond)) { 1578 if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) 1579 return RX_HANDLER_PASS; 1580 return RX_HANDLER_EXACT; 1581 } 1582 1583 skb->dev = bond->dev; 1584 1585 if (BOND_MODE(bond) == BOND_MODE_ALB && 1586 netif_is_bridge_port(bond->dev) && 1587 skb->pkt_type == PACKET_HOST) { 1588 1589 if (unlikely(skb_cow_head(skb, 1590 skb->data - skb_mac_header(skb)))) { 1591 kfree_skb(skb); 1592 return RX_HANDLER_CONSUMED; 1593 } 1594 bond_hw_addr_copy(eth_hdr(skb)->h_dest, bond->dev->dev_addr, 1595 bond->dev->addr_len); 1596 } 1597 1598 return ret; 1599 } 1600 1601 static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond) 1602 { 1603 switch (BOND_MODE(bond)) { 1604 case BOND_MODE_ROUNDROBIN: 1605 return NETDEV_LAG_TX_TYPE_ROUNDROBIN; 1606 case BOND_MODE_ACTIVEBACKUP: 1607 return NETDEV_LAG_TX_TYPE_ACTIVEBACKUP; 1608 case BOND_MODE_BROADCAST: 1609 return NETDEV_LAG_TX_TYPE_BROADCAST; 1610 case BOND_MODE_XOR: 1611 case BOND_MODE_8023AD: 1612 return NETDEV_LAG_TX_TYPE_HASH; 1613 default: 1614 return NETDEV_LAG_TX_TYPE_UNKNOWN; 1615 } 1616 } 1617 1618 static enum netdev_lag_hash bond_lag_hash_type(struct bonding *bond, 1619 enum netdev_lag_tx_type type) 1620 { 1621 if (type != NETDEV_LAG_TX_TYPE_HASH) 1622 return NETDEV_LAG_HASH_NONE; 1623 1624 switch (bond->params.xmit_policy) { 1625 case BOND_XMIT_POLICY_LAYER2: 1626 return NETDEV_LAG_HASH_L2; 1627 case BOND_XMIT_POLICY_LAYER34: 1628 return NETDEV_LAG_HASH_L34; 1629 case BOND_XMIT_POLICY_LAYER23: 1630 return NETDEV_LAG_HASH_L23; 1631 case BOND_XMIT_POLICY_ENCAP23: 1632 return NETDEV_LAG_HASH_E23; 1633 case BOND_XMIT_POLICY_ENCAP34: 1634 return NETDEV_LAG_HASH_E34; 1635 case BOND_XMIT_POLICY_VLAN_SRCMAC: 1636 return NETDEV_LAG_HASH_VLAN_SRCMAC; 1637 default: 1638 return NETDEV_LAG_HASH_UNKNOWN; 1639 } 1640 } 1641 1642 static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave, 1643 struct netlink_ext_ack *extack) 1644 { 1645 struct netdev_lag_upper_info lag_upper_info; 1646 enum netdev_lag_tx_type type; 1647 int err; 1648 1649 type = bond_lag_tx_type(bond); 1650 lag_upper_info.tx_type = type; 1651 lag_upper_info.hash_type = bond_lag_hash_type(bond, type); 1652 1653 err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave, 1654 &lag_upper_info, extack); 1655 if (err) 1656 return err; 1657 1658 slave->dev->flags |= IFF_SLAVE; 1659 return 0; 1660 } 1661 1662 static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave) 1663 { 1664 netdev_upper_dev_unlink(slave->dev, bond->dev); 1665 slave->dev->flags &= ~IFF_SLAVE; 1666 } 1667 1668 static void slave_kobj_release(struct kobject *kobj) 1669 { 1670 struct slave *slave = to_slave(kobj); 1671 struct bonding *bond = bond_get_bond_by_slave(slave); 1672 1673 cancel_delayed_work_sync(&slave->notify_work); 1674 if (BOND_MODE(bond) == BOND_MODE_8023AD) 1675 kfree(SLAVE_AD_INFO(slave)); 1676 1677 kfree(slave); 1678 } 1679 1680 static struct kobj_type slave_ktype = { 1681 .release = slave_kobj_release, 1682 #ifdef CONFIG_SYSFS 1683 .sysfs_ops = &slave_sysfs_ops, 1684 #endif 1685 }; 1686 1687 static int bond_kobj_init(struct slave *slave) 1688 { 1689 int err; 1690 1691 err = kobject_init_and_add(&slave->kobj, &slave_ktype, 1692 &(slave->dev->dev.kobj), "bonding_slave"); 1693 if (err) 1694 kobject_put(&slave->kobj); 1695 1696 return err; 1697 } 1698 1699 static struct slave *bond_alloc_slave(struct bonding *bond, 1700 struct net_device *slave_dev) 1701 { 1702 struct slave *slave = NULL; 1703 1704 slave = kzalloc(sizeof(*slave), GFP_KERNEL); 1705 if (!slave) 1706 return NULL; 1707 1708 slave->bond = bond; 1709 slave->dev = slave_dev; 1710 INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); 1711 1712 if (bond_kobj_init(slave)) 1713 return NULL; 1714 1715 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 1716 SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info), 1717 GFP_KERNEL); 1718 if (!SLAVE_AD_INFO(slave)) { 1719 kobject_put(&slave->kobj); 1720 return NULL; 1721 } 1722 } 1723 1724 return slave; 1725 } 1726 1727 static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info) 1728 { 1729 info->bond_mode = BOND_MODE(bond); 1730 info->miimon = bond->params.miimon; 1731 info->num_slaves = bond->slave_cnt; 1732 } 1733 1734 static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) 1735 { 1736 strcpy(info->slave_name, slave->dev->name); 1737 info->link = slave->link; 1738 info->state = bond_slave_state(slave); 1739 info->link_failure_count = slave->link_failure_count; 1740 } 1741 1742 static void bond_netdev_notify_work(struct work_struct *_work) 1743 { 1744 struct slave *slave = container_of(_work, struct slave, 1745 notify_work.work); 1746 1747 if (rtnl_trylock()) { 1748 struct netdev_bonding_info binfo; 1749 1750 bond_fill_ifslave(slave, &binfo.slave); 1751 bond_fill_ifbond(slave->bond, &binfo.master); 1752 netdev_bonding_info_change(slave->dev, &binfo); 1753 rtnl_unlock(); 1754 } else { 1755 queue_delayed_work(slave->bond->wq, &slave->notify_work, 1); 1756 } 1757 } 1758 1759 void bond_queue_slave_event(struct slave *slave) 1760 { 1761 queue_delayed_work(slave->bond->wq, &slave->notify_work, 0); 1762 } 1763 1764 void bond_lower_state_changed(struct slave *slave) 1765 { 1766 struct netdev_lag_lower_state_info info; 1767 1768 info.link_up = slave->link == BOND_LINK_UP || 1769 slave->link == BOND_LINK_FAIL; 1770 info.tx_enabled = bond_is_active_slave(slave); 1771 netdev_lower_state_changed(slave->dev, &info); 1772 } 1773 1774 #define BOND_NL_ERR(bond_dev, extack, errmsg) do { \ 1775 if (extack) \ 1776 NL_SET_ERR_MSG(extack, errmsg); \ 1777 else \ 1778 netdev_err(bond_dev, "Error: %s\n", errmsg); \ 1779 } while (0) 1780 1781 #define SLAVE_NL_ERR(bond_dev, slave_dev, extack, errmsg) do { \ 1782 if (extack) \ 1783 NL_SET_ERR_MSG(extack, errmsg); \ 1784 else \ 1785 slave_err(bond_dev, slave_dev, "Error: %s\n", errmsg); \ 1786 } while (0) 1787 1788 /* The bonding driver uses ether_setup() to convert a master bond device 1789 * to ARPHRD_ETHER, that resets the target netdevice's flags so we always 1790 * have to restore the IFF_MASTER flag, and only restore IFF_SLAVE and IFF_UP 1791 * if they were set 1792 */ 1793 static void bond_ether_setup(struct net_device *bond_dev) 1794 { 1795 unsigned int flags = bond_dev->flags & (IFF_SLAVE | IFF_UP); 1796 1797 ether_setup(bond_dev); 1798 bond_dev->flags |= IFF_MASTER | flags; 1799 bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1800 } 1801 1802 void bond_xdp_set_features(struct net_device *bond_dev) 1803 { 1804 struct bonding *bond = netdev_priv(bond_dev); 1805 xdp_features_t val = NETDEV_XDP_ACT_MASK; 1806 struct list_head *iter; 1807 struct slave *slave; 1808 1809 ASSERT_RTNL(); 1810 1811 if (!bond_xdp_check(bond) || !bond_has_slaves(bond)) { 1812 xdp_clear_features_flag(bond_dev); 1813 return; 1814 } 1815 1816 bond_for_each_slave(bond, slave, iter) 1817 val &= slave->dev->xdp_features; 1818 1819 val &= ~NETDEV_XDP_ACT_XSK_ZEROCOPY; 1820 1821 xdp_set_features_flag(bond_dev, val); 1822 } 1823 1824 /* enslave device <slave> to bond device <master> */ 1825 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, 1826 struct netlink_ext_ack *extack) 1827 { 1828 struct bonding *bond = netdev_priv(bond_dev); 1829 const struct net_device_ops *slave_ops = slave_dev->netdev_ops; 1830 struct slave *new_slave = NULL, *prev_slave; 1831 struct sockaddr_storage ss; 1832 int link_reporting; 1833 int res = 0, i; 1834 1835 if (slave_dev->flags & IFF_MASTER && 1836 !netif_is_bond_master(slave_dev)) { 1837 BOND_NL_ERR(bond_dev, extack, 1838 "Device type (master device) cannot be enslaved"); 1839 return -EPERM; 1840 } 1841 1842 if (!bond->params.use_carrier && 1843 slave_dev->ethtool_ops->get_link == NULL && 1844 slave_ops->ndo_eth_ioctl == NULL) { 1845 slave_warn(bond_dev, slave_dev, "no link monitoring support\n"); 1846 } 1847 1848 /* already in-use? */ 1849 if (netdev_is_rx_handler_busy(slave_dev)) { 1850 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 1851 "Device is in use and cannot be enslaved"); 1852 return -EBUSY; 1853 } 1854 1855 if (bond_dev == slave_dev) { 1856 BOND_NL_ERR(bond_dev, extack, "Cannot enslave bond to itself."); 1857 return -EPERM; 1858 } 1859 1860 /* vlan challenged mutual exclusion */ 1861 /* no need to lock since we're protected by rtnl_lock */ 1862 if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { 1863 slave_dbg(bond_dev, slave_dev, "is NETIF_F_VLAN_CHALLENGED\n"); 1864 if (vlan_uses_dev(bond_dev)) { 1865 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 1866 "Can not enslave VLAN challenged device to VLAN enabled bond"); 1867 return -EPERM; 1868 } else { 1869 slave_warn(bond_dev, slave_dev, "enslaved VLAN challenged slave. Adding VLANs will be blocked as long as it is part of bond.\n"); 1870 } 1871 } else { 1872 slave_dbg(bond_dev, slave_dev, "is !NETIF_F_VLAN_CHALLENGED\n"); 1873 } 1874 1875 if (slave_dev->features & NETIF_F_HW_ESP) 1876 slave_dbg(bond_dev, slave_dev, "is esp-hw-offload capable\n"); 1877 1878 /* Old ifenslave binaries are no longer supported. These can 1879 * be identified with moderate accuracy by the state of the slave: 1880 * the current ifenslave will set the interface down prior to 1881 * enslaving it; the old ifenslave will not. 1882 */ 1883 if (slave_dev->flags & IFF_UP) { 1884 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 1885 "Device can not be enslaved while up"); 1886 return -EPERM; 1887 } 1888 1889 /* set bonding device ether type by slave - bonding netdevices are 1890 * created with ether_setup, so when the slave type is not ARPHRD_ETHER 1891 * there is a need to override some of the type dependent attribs/funcs. 1892 * 1893 * bond ether type mutual exclusion - don't allow slaves of dissimilar 1894 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond 1895 */ 1896 if (!bond_has_slaves(bond)) { 1897 if (bond_dev->type != slave_dev->type) { 1898 slave_dbg(bond_dev, slave_dev, "change device type from %d to %d\n", 1899 bond_dev->type, slave_dev->type); 1900 1901 res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, 1902 bond_dev); 1903 res = notifier_to_errno(res); 1904 if (res) { 1905 slave_err(bond_dev, slave_dev, "refused to change device type\n"); 1906 return -EBUSY; 1907 } 1908 1909 /* Flush unicast and multicast addresses */ 1910 dev_uc_flush(bond_dev); 1911 dev_mc_flush(bond_dev); 1912 1913 if (slave_dev->type != ARPHRD_ETHER) 1914 bond_setup_by_slave(bond_dev, slave_dev); 1915 else 1916 bond_ether_setup(bond_dev); 1917 1918 call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, 1919 bond_dev); 1920 } 1921 } else if (bond_dev->type != slave_dev->type) { 1922 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 1923 "Device type is different from other slaves"); 1924 return -EINVAL; 1925 } 1926 1927 if (slave_dev->type == ARPHRD_INFINIBAND && 1928 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 1929 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 1930 "Only active-backup mode is supported for infiniband slaves"); 1931 res = -EOPNOTSUPP; 1932 goto err_undo_flags; 1933 } 1934 1935 if (!slave_ops->ndo_set_mac_address || 1936 slave_dev->type == ARPHRD_INFINIBAND) { 1937 slave_warn(bond_dev, slave_dev, "The slave device specified does not support setting the MAC address\n"); 1938 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && 1939 bond->params.fail_over_mac != BOND_FOM_ACTIVE) { 1940 if (!bond_has_slaves(bond)) { 1941 bond->params.fail_over_mac = BOND_FOM_ACTIVE; 1942 slave_warn(bond_dev, slave_dev, "Setting fail_over_mac to active for active-backup mode\n"); 1943 } else { 1944 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 1945 "Slave device does not support setting the MAC address, but fail_over_mac is not set to active"); 1946 res = -EOPNOTSUPP; 1947 goto err_undo_flags; 1948 } 1949 } 1950 } 1951 1952 call_netdevice_notifiers(NETDEV_JOIN, slave_dev); 1953 1954 /* If this is the first slave, then we need to set the master's hardware 1955 * address to be the same as the slave's. 1956 */ 1957 if (!bond_has_slaves(bond) && 1958 bond->dev->addr_assign_type == NET_ADDR_RANDOM) { 1959 res = bond_set_dev_addr(bond->dev, slave_dev); 1960 if (res) 1961 goto err_undo_flags; 1962 } 1963 1964 new_slave = bond_alloc_slave(bond, slave_dev); 1965 if (!new_slave) { 1966 res = -ENOMEM; 1967 goto err_undo_flags; 1968 } 1969 1970 /* Set the new_slave's queue_id to be zero. Queue ID mapping 1971 * is set via sysfs or module option if desired. 1972 */ 1973 new_slave->queue_id = 0; 1974 1975 /* Save slave's original mtu and then set it to match the bond */ 1976 new_slave->original_mtu = slave_dev->mtu; 1977 res = dev_set_mtu(slave_dev, bond->dev->mtu); 1978 if (res) { 1979 slave_err(bond_dev, slave_dev, "Error %d calling dev_set_mtu\n", res); 1980 goto err_free; 1981 } 1982 1983 /* Save slave's original ("permanent") mac address for modes 1984 * that need it, and for restoring it upon release, and then 1985 * set it to the master's address 1986 */ 1987 bond_hw_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr, 1988 slave_dev->addr_len); 1989 1990 if (!bond->params.fail_over_mac || 1991 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 1992 /* Set slave to master's mac address. The application already 1993 * set the master's mac address to that of the first slave 1994 */ 1995 memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len); 1996 ss.ss_family = slave_dev->type; 1997 res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, 1998 extack); 1999 if (res) { 2000 slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res); 2001 goto err_restore_mtu; 2002 } 2003 } 2004 2005 /* set no_addrconf flag before open to prevent IPv6 addrconf */ 2006 slave_dev->priv_flags |= IFF_NO_ADDRCONF; 2007 2008 /* open the slave since the application closed it */ 2009 res = dev_open(slave_dev, extack); 2010 if (res) { 2011 slave_err(bond_dev, slave_dev, "Opening slave failed\n"); 2012 goto err_restore_mac; 2013 } 2014 2015 slave_dev->priv_flags |= IFF_BONDING; 2016 /* initialize slave stats */ 2017 dev_get_stats(new_slave->dev, &new_slave->slave_stats); 2018 2019 if (bond_is_lb(bond)) { 2020 /* bond_alb_init_slave() must be called before all other stages since 2021 * it might fail and we do not want to have to undo everything 2022 */ 2023 res = bond_alb_init_slave(bond, new_slave); 2024 if (res) 2025 goto err_close; 2026 } 2027 2028 res = vlan_vids_add_by_dev(slave_dev, bond_dev); 2029 if (res) { 2030 slave_err(bond_dev, slave_dev, "Couldn't add bond vlan ids\n"); 2031 goto err_close; 2032 } 2033 2034 prev_slave = bond_last_slave(bond); 2035 2036 new_slave->delay = 0; 2037 new_slave->link_failure_count = 0; 2038 2039 if (bond_update_speed_duplex(new_slave) && 2040 bond_needs_speed_duplex(bond)) 2041 new_slave->link = BOND_LINK_DOWN; 2042 2043 new_slave->last_rx = jiffies - 2044 (msecs_to_jiffies(bond->params.arp_interval) + 1); 2045 for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) 2046 new_slave->target_last_arp_rx[i] = new_slave->last_rx; 2047 2048 new_slave->last_tx = new_slave->last_rx; 2049 2050 if (bond->params.miimon && !bond->params.use_carrier) { 2051 link_reporting = bond_check_dev_link(bond, slave_dev, 1); 2052 2053 if ((link_reporting == -1) && !bond->params.arp_interval) { 2054 /* miimon is set but a bonded network driver 2055 * does not support ETHTOOL/MII and 2056 * arp_interval is not set. Note: if 2057 * use_carrier is enabled, we will never go 2058 * here (because netif_carrier is always 2059 * supported); thus, we don't need to change 2060 * the messages for netif_carrier. 2061 */ 2062 slave_warn(bond_dev, slave_dev, "MII and ETHTOOL support not available for slave, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n"); 2063 } else if (link_reporting == -1) { 2064 /* unable get link status using mii/ethtool */ 2065 slave_warn(bond_dev, slave_dev, "can't get link status from slave; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n"); 2066 } 2067 } 2068 2069 /* check for initial state */ 2070 new_slave->link = BOND_LINK_NOCHANGE; 2071 if (bond->params.miimon) { 2072 if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { 2073 if (bond->params.updelay) { 2074 bond_set_slave_link_state(new_slave, 2075 BOND_LINK_BACK, 2076 BOND_SLAVE_NOTIFY_NOW); 2077 new_slave->delay = bond->params.updelay; 2078 } else { 2079 bond_set_slave_link_state(new_slave, 2080 BOND_LINK_UP, 2081 BOND_SLAVE_NOTIFY_NOW); 2082 } 2083 } else { 2084 bond_set_slave_link_state(new_slave, BOND_LINK_DOWN, 2085 BOND_SLAVE_NOTIFY_NOW); 2086 } 2087 } else if (bond->params.arp_interval) { 2088 bond_set_slave_link_state(new_slave, 2089 (netif_carrier_ok(slave_dev) ? 2090 BOND_LINK_UP : BOND_LINK_DOWN), 2091 BOND_SLAVE_NOTIFY_NOW); 2092 } else { 2093 bond_set_slave_link_state(new_slave, BOND_LINK_UP, 2094 BOND_SLAVE_NOTIFY_NOW); 2095 } 2096 2097 if (new_slave->link != BOND_LINK_DOWN) 2098 new_slave->last_link_up = jiffies; 2099 slave_dbg(bond_dev, slave_dev, "Initial state of slave is BOND_LINK_%s\n", 2100 new_slave->link == BOND_LINK_DOWN ? "DOWN" : 2101 (new_slave->link == BOND_LINK_UP ? "UP" : "BACK")); 2102 2103 if (bond_uses_primary(bond) && bond->params.primary[0]) { 2104 /* if there is a primary slave, remember it */ 2105 if (strcmp(bond->params.primary, new_slave->dev->name) == 0) { 2106 rcu_assign_pointer(bond->primary_slave, new_slave); 2107 bond->force_primary = true; 2108 } 2109 } 2110 2111 switch (BOND_MODE(bond)) { 2112 case BOND_MODE_ACTIVEBACKUP: 2113 bond_set_slave_inactive_flags(new_slave, 2114 BOND_SLAVE_NOTIFY_NOW); 2115 break; 2116 case BOND_MODE_8023AD: 2117 /* in 802.3ad mode, the internal mechanism 2118 * will activate the slaves in the selected 2119 * aggregator 2120 */ 2121 bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); 2122 /* if this is the first slave */ 2123 if (!prev_slave) { 2124 SLAVE_AD_INFO(new_slave)->id = 1; 2125 /* Initialize AD with the number of times that the AD timer is called in 1 second 2126 * can be called only after the mac address of the bond is set 2127 */ 2128 bond_3ad_initialize(bond); 2129 } else { 2130 SLAVE_AD_INFO(new_slave)->id = 2131 SLAVE_AD_INFO(prev_slave)->id + 1; 2132 } 2133 2134 bond_3ad_bind_slave(new_slave); 2135 break; 2136 case BOND_MODE_TLB: 2137 case BOND_MODE_ALB: 2138 bond_set_active_slave(new_slave); 2139 bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); 2140 break; 2141 default: 2142 slave_dbg(bond_dev, slave_dev, "This slave is always active in trunk mode\n"); 2143 2144 /* always active in trunk mode */ 2145 bond_set_active_slave(new_slave); 2146 2147 /* In trunking mode there is little meaning to curr_active_slave 2148 * anyway (it holds no special properties of the bond device), 2149 * so we can change it without calling change_active_interface() 2150 */ 2151 if (!rcu_access_pointer(bond->curr_active_slave) && 2152 new_slave->link == BOND_LINK_UP) 2153 rcu_assign_pointer(bond->curr_active_slave, new_slave); 2154 2155 break; 2156 } /* switch(bond_mode) */ 2157 2158 #ifdef CONFIG_NET_POLL_CONTROLLER 2159 if (bond->dev->npinfo) { 2160 if (slave_enable_netpoll(new_slave)) { 2161 slave_info(bond_dev, slave_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n"); 2162 res = -EBUSY; 2163 goto err_detach; 2164 } 2165 } 2166 #endif 2167 2168 if (!(bond_dev->features & NETIF_F_LRO)) 2169 dev_disable_lro(slave_dev); 2170 2171 res = netdev_rx_handler_register(slave_dev, bond_handle_frame, 2172 new_slave); 2173 if (res) { 2174 slave_dbg(bond_dev, slave_dev, "Error %d calling netdev_rx_handler_register\n", res); 2175 goto err_detach; 2176 } 2177 2178 res = bond_master_upper_dev_link(bond, new_slave, extack); 2179 if (res) { 2180 slave_dbg(bond_dev, slave_dev, "Error %d calling bond_master_upper_dev_link\n", res); 2181 goto err_unregister; 2182 } 2183 2184 bond_lower_state_changed(new_slave); 2185 2186 res = bond_sysfs_slave_add(new_slave); 2187 if (res) { 2188 slave_dbg(bond_dev, slave_dev, "Error %d calling bond_sysfs_slave_add\n", res); 2189 goto err_upper_unlink; 2190 } 2191 2192 /* If the mode uses primary, then the following is handled by 2193 * bond_change_active_slave(). 2194 */ 2195 if (!bond_uses_primary(bond)) { 2196 /* set promiscuity level to new slave */ 2197 if (bond_dev->flags & IFF_PROMISC) { 2198 res = dev_set_promiscuity(slave_dev, 1); 2199 if (res) 2200 goto err_sysfs_del; 2201 } 2202 2203 /* set allmulti level to new slave */ 2204 if (bond_dev->flags & IFF_ALLMULTI) { 2205 res = dev_set_allmulti(slave_dev, 1); 2206 if (res) { 2207 if (bond_dev->flags & IFF_PROMISC) 2208 dev_set_promiscuity(slave_dev, -1); 2209 goto err_sysfs_del; 2210 } 2211 } 2212 2213 if (bond_dev->flags & IFF_UP) { 2214 netif_addr_lock_bh(bond_dev); 2215 dev_mc_sync_multiple(slave_dev, bond_dev); 2216 dev_uc_sync_multiple(slave_dev, bond_dev); 2217 netif_addr_unlock_bh(bond_dev); 2218 2219 if (BOND_MODE(bond) == BOND_MODE_8023AD) 2220 dev_mc_add(slave_dev, lacpdu_mcast_addr); 2221 } 2222 } 2223 2224 bond->slave_cnt++; 2225 bond_compute_features(bond); 2226 bond_set_carrier(bond); 2227 2228 if (bond_uses_primary(bond)) { 2229 block_netpoll_tx(); 2230 bond_select_active_slave(bond); 2231 unblock_netpoll_tx(); 2232 } 2233 2234 if (bond_mode_can_use_xmit_hash(bond)) 2235 bond_update_slave_arr(bond, NULL); 2236 2237 2238 if (!slave_dev->netdev_ops->ndo_bpf || 2239 !slave_dev->netdev_ops->ndo_xdp_xmit) { 2240 if (bond->xdp_prog) { 2241 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 2242 "Slave does not support XDP"); 2243 res = -EOPNOTSUPP; 2244 goto err_sysfs_del; 2245 } 2246 } else if (bond->xdp_prog) { 2247 struct netdev_bpf xdp = { 2248 .command = XDP_SETUP_PROG, 2249 .flags = 0, 2250 .prog = bond->xdp_prog, 2251 .extack = extack, 2252 }; 2253 2254 if (dev_xdp_prog_count(slave_dev) > 0) { 2255 SLAVE_NL_ERR(bond_dev, slave_dev, extack, 2256 "Slave has XDP program loaded, please unload before enslaving"); 2257 res = -EOPNOTSUPP; 2258 goto err_sysfs_del; 2259 } 2260 2261 res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); 2262 if (res < 0) { 2263 /* ndo_bpf() sets extack error message */ 2264 slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res); 2265 goto err_sysfs_del; 2266 } 2267 if (bond->xdp_prog) 2268 bpf_prog_inc(bond->xdp_prog); 2269 } 2270 2271 bond_xdp_set_features(bond_dev); 2272 2273 slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n", 2274 bond_is_active_slave(new_slave) ? "an active" : "a backup", 2275 new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); 2276 2277 /* enslave is successful */ 2278 bond_queue_slave_event(new_slave); 2279 return 0; 2280 2281 /* Undo stages on error */ 2282 err_sysfs_del: 2283 bond_sysfs_slave_del(new_slave); 2284 2285 err_upper_unlink: 2286 bond_upper_dev_unlink(bond, new_slave); 2287 2288 err_unregister: 2289 netdev_rx_handler_unregister(slave_dev); 2290 2291 err_detach: 2292 vlan_vids_del_by_dev(slave_dev, bond_dev); 2293 if (rcu_access_pointer(bond->primary_slave) == new_slave) 2294 RCU_INIT_POINTER(bond->primary_slave, NULL); 2295 if (rcu_access_pointer(bond->curr_active_slave) == new_slave) { 2296 block_netpoll_tx(); 2297 bond_change_active_slave(bond, NULL); 2298 bond_select_active_slave(bond); 2299 unblock_netpoll_tx(); 2300 } 2301 /* either primary_slave or curr_active_slave might've changed */ 2302 synchronize_rcu(); 2303 slave_disable_netpoll(new_slave); 2304 2305 err_close: 2306 if (!netif_is_bond_master(slave_dev)) 2307 slave_dev->priv_flags &= ~IFF_BONDING; 2308 dev_close(slave_dev); 2309 2310 err_restore_mac: 2311 slave_dev->priv_flags &= ~IFF_NO_ADDRCONF; 2312 if (!bond->params.fail_over_mac || 2313 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 2314 /* XXX TODO - fom follow mode needs to change master's 2315 * MAC if this slave's MAC is in use by the bond, or at 2316 * least print a warning. 2317 */ 2318 bond_hw_addr_copy(ss.__data, new_slave->perm_hwaddr, 2319 new_slave->dev->addr_len); 2320 ss.ss_family = slave_dev->type; 2321 dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL); 2322 } 2323 2324 err_restore_mtu: 2325 dev_set_mtu(slave_dev, new_slave->original_mtu); 2326 2327 err_free: 2328 kobject_put(&new_slave->kobj); 2329 2330 err_undo_flags: 2331 /* Enslave of first slave has failed and we need to fix master's mac */ 2332 if (!bond_has_slaves(bond)) { 2333 if (ether_addr_equal_64bits(bond_dev->dev_addr, 2334 slave_dev->dev_addr)) 2335 eth_hw_addr_random(bond_dev); 2336 if (bond_dev->type != ARPHRD_ETHER) { 2337 dev_close(bond_dev); 2338 bond_ether_setup(bond_dev); 2339 } 2340 } 2341 2342 return res; 2343 } 2344 2345 /* Try to release the slave device <slave> from the bond device <master> 2346 * It is legal to access curr_active_slave without a lock because all the function 2347 * is RTNL-locked. If "all" is true it means that the function is being called 2348 * while destroying a bond interface and all slaves are being released. 2349 * 2350 * The rules for slave state should be: 2351 * for Active/Backup: 2352 * Active stays on all backups go down 2353 * for Bonded connections: 2354 * The first up interface should be left on and all others downed. 2355 */ 2356 static int __bond_release_one(struct net_device *bond_dev, 2357 struct net_device *slave_dev, 2358 bool all, bool unregister) 2359 { 2360 struct bonding *bond = netdev_priv(bond_dev); 2361 struct slave *slave, *oldcurrent; 2362 struct sockaddr_storage ss; 2363 int old_flags = bond_dev->flags; 2364 netdev_features_t old_features = bond_dev->features; 2365 2366 /* slave is not a slave or master is not master of this slave */ 2367 if (!(slave_dev->flags & IFF_SLAVE) || 2368 !netdev_has_upper_dev(slave_dev, bond_dev)) { 2369 slave_dbg(bond_dev, slave_dev, "cannot release slave\n"); 2370 return -EINVAL; 2371 } 2372 2373 block_netpoll_tx(); 2374 2375 slave = bond_get_slave_by_dev(bond, slave_dev); 2376 if (!slave) { 2377 /* not a slave of this bond */ 2378 slave_info(bond_dev, slave_dev, "interface not enslaved\n"); 2379 unblock_netpoll_tx(); 2380 return -EINVAL; 2381 } 2382 2383 bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); 2384 2385 bond_sysfs_slave_del(slave); 2386 2387 /* recompute stats just before removing the slave */ 2388 bond_get_stats(bond->dev, &bond->bond_stats); 2389 2390 if (bond->xdp_prog) { 2391 struct netdev_bpf xdp = { 2392 .command = XDP_SETUP_PROG, 2393 .flags = 0, 2394 .prog = NULL, 2395 .extack = NULL, 2396 }; 2397 if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp)) 2398 slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n"); 2399 } 2400 2401 /* unregister rx_handler early so bond_handle_frame wouldn't be called 2402 * for this slave anymore. 2403 */ 2404 netdev_rx_handler_unregister(slave_dev); 2405 2406 if (BOND_MODE(bond) == BOND_MODE_8023AD) 2407 bond_3ad_unbind_slave(slave); 2408 2409 bond_upper_dev_unlink(bond, slave); 2410 2411 if (bond_mode_can_use_xmit_hash(bond)) 2412 bond_update_slave_arr(bond, slave); 2413 2414 slave_info(bond_dev, slave_dev, "Releasing %s interface\n", 2415 bond_is_active_slave(slave) ? "active" : "backup"); 2416 2417 oldcurrent = rcu_access_pointer(bond->curr_active_slave); 2418 2419 RCU_INIT_POINTER(bond->current_arp_slave, NULL); 2420 2421 if (!all && (!bond->params.fail_over_mac || 2422 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) { 2423 if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) && 2424 bond_has_slaves(bond)) 2425 slave_warn(bond_dev, slave_dev, "the permanent HWaddr of slave - %pM - is still in use by bond - set the HWaddr of slave to a different address to avoid conflicts\n", 2426 slave->perm_hwaddr); 2427 } 2428 2429 if (rtnl_dereference(bond->primary_slave) == slave) 2430 RCU_INIT_POINTER(bond->primary_slave, NULL); 2431 2432 if (oldcurrent == slave) 2433 bond_change_active_slave(bond, NULL); 2434 2435 if (bond_is_lb(bond)) { 2436 /* Must be called only after the slave has been 2437 * detached from the list and the curr_active_slave 2438 * has been cleared (if our_slave == old_current), 2439 * but before a new active slave is selected. 2440 */ 2441 bond_alb_deinit_slave(bond, slave); 2442 } 2443 2444 if (all) { 2445 RCU_INIT_POINTER(bond->curr_active_slave, NULL); 2446 } else if (oldcurrent == slave) { 2447 /* Note that we hold RTNL over this sequence, so there 2448 * is no concern that another slave add/remove event 2449 * will interfere. 2450 */ 2451 bond_select_active_slave(bond); 2452 } 2453 2454 bond_set_carrier(bond); 2455 if (!bond_has_slaves(bond)) 2456 eth_hw_addr_random(bond_dev); 2457 2458 unblock_netpoll_tx(); 2459 synchronize_rcu(); 2460 bond->slave_cnt--; 2461 2462 if (!bond_has_slaves(bond)) { 2463 call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); 2464 call_netdevice_notifiers(NETDEV_RELEASE, bond->dev); 2465 } 2466 2467 bond_compute_features(bond); 2468 if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) && 2469 (old_features & NETIF_F_VLAN_CHALLENGED)) 2470 slave_info(bond_dev, slave_dev, "last VLAN challenged slave left bond - VLAN blocking is removed\n"); 2471 2472 vlan_vids_del_by_dev(slave_dev, bond_dev); 2473 2474 /* If the mode uses primary, then this case was handled above by 2475 * bond_change_active_slave(..., NULL) 2476 */ 2477 if (!bond_uses_primary(bond)) { 2478 /* unset promiscuity level from slave 2479 * NOTE: The NETDEV_CHANGEADDR call above may change the value 2480 * of the IFF_PROMISC flag in the bond_dev, but we need the 2481 * value of that flag before that change, as that was the value 2482 * when this slave was attached, so we cache at the start of the 2483 * function and use it here. Same goes for ALLMULTI below 2484 */ 2485 if (old_flags & IFF_PROMISC) 2486 dev_set_promiscuity(slave_dev, -1); 2487 2488 /* unset allmulti level from slave */ 2489 if (old_flags & IFF_ALLMULTI) 2490 dev_set_allmulti(slave_dev, -1); 2491 2492 if (old_flags & IFF_UP) 2493 bond_hw_addr_flush(bond_dev, slave_dev); 2494 } 2495 2496 slave_disable_netpoll(slave); 2497 2498 /* close slave before restoring its mac address */ 2499 dev_close(slave_dev); 2500 2501 slave_dev->priv_flags &= ~IFF_NO_ADDRCONF; 2502 2503 if (bond->params.fail_over_mac != BOND_FOM_ACTIVE || 2504 BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 2505 /* restore original ("permanent") mac address */ 2506 bond_hw_addr_copy(ss.__data, slave->perm_hwaddr, 2507 slave->dev->addr_len); 2508 ss.ss_family = slave_dev->type; 2509 dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, NULL); 2510 } 2511 2512 if (unregister) 2513 __dev_set_mtu(slave_dev, slave->original_mtu); 2514 else 2515 dev_set_mtu(slave_dev, slave->original_mtu); 2516 2517 if (!netif_is_bond_master(slave_dev)) 2518 slave_dev->priv_flags &= ~IFF_BONDING; 2519 2520 bond_xdp_set_features(bond_dev); 2521 kobject_put(&slave->kobj); 2522 2523 return 0; 2524 } 2525 2526 /* A wrapper used because of ndo_del_link */ 2527 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) 2528 { 2529 return __bond_release_one(bond_dev, slave_dev, false, false); 2530 } 2531 2532 /* First release a slave and then destroy the bond if no more slaves are left. 2533 * Must be under rtnl_lock when this function is called. 2534 */ 2535 static int bond_release_and_destroy(struct net_device *bond_dev, 2536 struct net_device *slave_dev) 2537 { 2538 struct bonding *bond = netdev_priv(bond_dev); 2539 int ret; 2540 2541 ret = __bond_release_one(bond_dev, slave_dev, false, true); 2542 if (ret == 0 && !bond_has_slaves(bond) && 2543 bond_dev->reg_state != NETREG_UNREGISTERING) { 2544 bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; 2545 netdev_info(bond_dev, "Destroying bond\n"); 2546 bond_remove_proc_entry(bond); 2547 unregister_netdevice(bond_dev); 2548 } 2549 return ret; 2550 } 2551 2552 static void bond_info_query(struct net_device *bond_dev, struct ifbond *info) 2553 { 2554 struct bonding *bond = netdev_priv(bond_dev); 2555 2556 bond_fill_ifbond(bond, info); 2557 } 2558 2559 static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) 2560 { 2561 struct bonding *bond = netdev_priv(bond_dev); 2562 struct list_head *iter; 2563 int i = 0, res = -ENODEV; 2564 struct slave *slave; 2565 2566 bond_for_each_slave(bond, slave, iter) { 2567 if (i++ == (int)info->slave_id) { 2568 res = 0; 2569 bond_fill_ifslave(slave, info); 2570 break; 2571 } 2572 } 2573 2574 return res; 2575 } 2576 2577 /*-------------------------------- Monitoring -------------------------------*/ 2578 2579 /* called with rcu_read_lock() */ 2580 static int bond_miimon_inspect(struct bonding *bond) 2581 { 2582 bool ignore_updelay = false; 2583 int link_state, commit = 0; 2584 struct list_head *iter; 2585 struct slave *slave; 2586 2587 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) { 2588 ignore_updelay = !rcu_dereference(bond->curr_active_slave); 2589 } else { 2590 struct bond_up_slave *usable_slaves; 2591 2592 usable_slaves = rcu_dereference(bond->usable_slaves); 2593 2594 if (usable_slaves && usable_slaves->count == 0) 2595 ignore_updelay = true; 2596 } 2597 2598 bond_for_each_slave_rcu(bond, slave, iter) { 2599 bond_propose_link_state(slave, BOND_LINK_NOCHANGE); 2600 2601 link_state = bond_check_dev_link(bond, slave->dev, 0); 2602 2603 switch (slave->link) { 2604 case BOND_LINK_UP: 2605 if (link_state) 2606 continue; 2607 2608 bond_propose_link_state(slave, BOND_LINK_FAIL); 2609 commit++; 2610 slave->delay = bond->params.downdelay; 2611 if (slave->delay) { 2612 slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n", 2613 (BOND_MODE(bond) == 2614 BOND_MODE_ACTIVEBACKUP) ? 2615 (bond_is_active_slave(slave) ? 2616 "active " : "backup ") : "", 2617 bond->params.downdelay * bond->params.miimon); 2618 } 2619 fallthrough; 2620 case BOND_LINK_FAIL: 2621 if (link_state) { 2622 /* recovered before downdelay expired */ 2623 bond_propose_link_state(slave, BOND_LINK_UP); 2624 slave->last_link_up = jiffies; 2625 slave_info(bond->dev, slave->dev, "link status up again after %d ms\n", 2626 (bond->params.downdelay - slave->delay) * 2627 bond->params.miimon); 2628 commit++; 2629 continue; 2630 } 2631 2632 if (slave->delay <= 0) { 2633 bond_propose_link_state(slave, BOND_LINK_DOWN); 2634 commit++; 2635 continue; 2636 } 2637 2638 slave->delay--; 2639 break; 2640 2641 case BOND_LINK_DOWN: 2642 if (!link_state) 2643 continue; 2644 2645 bond_propose_link_state(slave, BOND_LINK_BACK); 2646 commit++; 2647 slave->delay = bond->params.updelay; 2648 2649 if (slave->delay) { 2650 slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n", 2651 ignore_updelay ? 0 : 2652 bond->params.updelay * 2653 bond->params.miimon); 2654 } 2655 fallthrough; 2656 case BOND_LINK_BACK: 2657 if (!link_state) { 2658 bond_propose_link_state(slave, BOND_LINK_DOWN); 2659 slave_info(bond->dev, slave->dev, "link status down again after %d ms\n", 2660 (bond->params.updelay - slave->delay) * 2661 bond->params.miimon); 2662 commit++; 2663 continue; 2664 } 2665 2666 if (ignore_updelay) 2667 slave->delay = 0; 2668 2669 if (slave->delay <= 0) { 2670 bond_propose_link_state(slave, BOND_LINK_UP); 2671 commit++; 2672 ignore_updelay = false; 2673 continue; 2674 } 2675 2676 slave->delay--; 2677 break; 2678 } 2679 } 2680 2681 return commit; 2682 } 2683 2684 static void bond_miimon_link_change(struct bonding *bond, 2685 struct slave *slave, 2686 char link) 2687 { 2688 switch (BOND_MODE(bond)) { 2689 case BOND_MODE_8023AD: 2690 bond_3ad_handle_link_change(slave, link); 2691 break; 2692 case BOND_MODE_TLB: 2693 case BOND_MODE_ALB: 2694 bond_alb_handle_link_change(bond, slave, link); 2695 break; 2696 case BOND_MODE_XOR: 2697 bond_update_slave_arr(bond, NULL); 2698 break; 2699 } 2700 } 2701 2702 static void bond_miimon_commit(struct bonding *bond) 2703 { 2704 struct slave *slave, *primary, *active; 2705 bool do_failover = false; 2706 struct list_head *iter; 2707 2708 ASSERT_RTNL(); 2709 2710 bond_for_each_slave(bond, slave, iter) { 2711 switch (slave->link_new_state) { 2712 case BOND_LINK_NOCHANGE: 2713 /* For 802.3ad mode, check current slave speed and 2714 * duplex again in case its port was disabled after 2715 * invalid speed/duplex reporting but recovered before 2716 * link monitoring could make a decision on the actual 2717 * link status 2718 */ 2719 if (BOND_MODE(bond) == BOND_MODE_8023AD && 2720 slave->link == BOND_LINK_UP) 2721 bond_3ad_adapter_speed_duplex_changed(slave); 2722 continue; 2723 2724 case BOND_LINK_UP: 2725 if (bond_update_speed_duplex(slave) && 2726 bond_needs_speed_duplex(bond)) { 2727 slave->link = BOND_LINK_DOWN; 2728 if (net_ratelimit()) 2729 slave_warn(bond->dev, slave->dev, 2730 "failed to get link speed/duplex\n"); 2731 continue; 2732 } 2733 bond_set_slave_link_state(slave, BOND_LINK_UP, 2734 BOND_SLAVE_NOTIFY_NOW); 2735 slave->last_link_up = jiffies; 2736 2737 primary = rtnl_dereference(bond->primary_slave); 2738 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 2739 /* prevent it from being the active one */ 2740 bond_set_backup_slave(slave); 2741 } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { 2742 /* make it immediately active */ 2743 bond_set_active_slave(slave); 2744 } 2745 2746 slave_info(bond->dev, slave->dev, "link status definitely up, %u Mbps %s duplex\n", 2747 slave->speed == SPEED_UNKNOWN ? 0 : slave->speed, 2748 slave->duplex ? "full" : "half"); 2749 2750 bond_miimon_link_change(bond, slave, BOND_LINK_UP); 2751 2752 active = rtnl_dereference(bond->curr_active_slave); 2753 if (!active || slave == primary || slave->prio > active->prio) 2754 do_failover = true; 2755 2756 continue; 2757 2758 case BOND_LINK_DOWN: 2759 if (slave->link_failure_count < UINT_MAX) 2760 slave->link_failure_count++; 2761 2762 bond_set_slave_link_state(slave, BOND_LINK_DOWN, 2763 BOND_SLAVE_NOTIFY_NOW); 2764 2765 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || 2766 BOND_MODE(bond) == BOND_MODE_8023AD) 2767 bond_set_slave_inactive_flags(slave, 2768 BOND_SLAVE_NOTIFY_NOW); 2769 2770 slave_info(bond->dev, slave->dev, "link status definitely down, disabling slave\n"); 2771 2772 bond_miimon_link_change(bond, slave, BOND_LINK_DOWN); 2773 2774 if (slave == rcu_access_pointer(bond->curr_active_slave)) 2775 do_failover = true; 2776 2777 continue; 2778 2779 default: 2780 slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n", 2781 slave->link_new_state); 2782 bond_propose_link_state(slave, BOND_LINK_NOCHANGE); 2783 2784 continue; 2785 } 2786 } 2787 2788 if (do_failover) { 2789 block_netpoll_tx(); 2790 bond_select_active_slave(bond); 2791 unblock_netpoll_tx(); 2792 } 2793 2794 bond_set_carrier(bond); 2795 } 2796 2797 /* bond_mii_monitor 2798 * 2799 * Really a wrapper that splits the mii monitor into two phases: an 2800 * inspection, then (if inspection indicates something needs to be done) 2801 * an acquisition of appropriate locks followed by a commit phase to 2802 * implement whatever link state changes are indicated. 2803 */ 2804 static void bond_mii_monitor(struct work_struct *work) 2805 { 2806 struct bonding *bond = container_of(work, struct bonding, 2807 mii_work.work); 2808 bool should_notify_peers = false; 2809 bool commit; 2810 unsigned long delay; 2811 struct slave *slave; 2812 struct list_head *iter; 2813 2814 delay = msecs_to_jiffies(bond->params.miimon); 2815 2816 if (!bond_has_slaves(bond)) 2817 goto re_arm; 2818 2819 rcu_read_lock(); 2820 should_notify_peers = bond_should_notify_peers(bond); 2821 commit = !!bond_miimon_inspect(bond); 2822 if (bond->send_peer_notif) { 2823 rcu_read_unlock(); 2824 if (rtnl_trylock()) { 2825 bond->send_peer_notif--; 2826 rtnl_unlock(); 2827 } 2828 } else { 2829 rcu_read_unlock(); 2830 } 2831 2832 if (commit) { 2833 /* Race avoidance with bond_close cancel of workqueue */ 2834 if (!rtnl_trylock()) { 2835 delay = 1; 2836 should_notify_peers = false; 2837 goto re_arm; 2838 } 2839 2840 bond_for_each_slave(bond, slave, iter) { 2841 bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER); 2842 } 2843 bond_miimon_commit(bond); 2844 2845 rtnl_unlock(); /* might sleep, hold no other locks */ 2846 } 2847 2848 re_arm: 2849 if (bond->params.miimon) 2850 queue_delayed_work(bond->wq, &bond->mii_work, delay); 2851 2852 if (should_notify_peers) { 2853 if (!rtnl_trylock()) 2854 return; 2855 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); 2856 rtnl_unlock(); 2857 } 2858 } 2859 2860 static int bond_upper_dev_walk(struct net_device *upper, 2861 struct netdev_nested_priv *priv) 2862 { 2863 __be32 ip = *(__be32 *)priv->data; 2864 2865 return ip == bond_confirm_addr(upper, 0, ip); 2866 } 2867 2868 static bool bond_has_this_ip(struct bonding *bond, __be32 ip) 2869 { 2870 struct netdev_nested_priv priv = { 2871 .data = (void *)&ip, 2872 }; 2873 bool ret = false; 2874 2875 if (ip == bond_confirm_addr(bond->dev, 0, ip)) 2876 return true; 2877 2878 rcu_read_lock(); 2879 if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &priv)) 2880 ret = true; 2881 rcu_read_unlock(); 2882 2883 return ret; 2884 } 2885 2886 #define BOND_VLAN_PROTO_NONE cpu_to_be16(0xffff) 2887 2888 static bool bond_handle_vlan(struct slave *slave, struct bond_vlan_tag *tags, 2889 struct sk_buff *skb) 2890 { 2891 struct net_device *bond_dev = slave->bond->dev; 2892 struct net_device *slave_dev = slave->dev; 2893 struct bond_vlan_tag *outer_tag = tags; 2894 2895 if (!tags || tags->vlan_proto == BOND_VLAN_PROTO_NONE) 2896 return true; 2897 2898 tags++; 2899 2900 /* Go through all the tags backwards and add them to the packet */ 2901 while (tags->vlan_proto != BOND_VLAN_PROTO_NONE) { 2902 if (!tags->vlan_id) { 2903 tags++; 2904 continue; 2905 } 2906 2907 slave_dbg(bond_dev, slave_dev, "inner tag: proto %X vid %X\n", 2908 ntohs(outer_tag->vlan_proto), tags->vlan_id); 2909 skb = vlan_insert_tag_set_proto(skb, tags->vlan_proto, 2910 tags->vlan_id); 2911 if (!skb) { 2912 net_err_ratelimited("failed to insert inner VLAN tag\n"); 2913 return false; 2914 } 2915 2916 tags++; 2917 } 2918 /* Set the outer tag */ 2919 if (outer_tag->vlan_id) { 2920 slave_dbg(bond_dev, slave_dev, "outer tag: proto %X vid %X\n", 2921 ntohs(outer_tag->vlan_proto), outer_tag->vlan_id); 2922 __vlan_hwaccel_put_tag(skb, outer_tag->vlan_proto, 2923 outer_tag->vlan_id); 2924 } 2925 2926 return true; 2927 } 2928 2929 /* We go to the (large) trouble of VLAN tagging ARP frames because 2930 * switches in VLAN mode (especially if ports are configured as 2931 * "native" to a VLAN) might not pass non-tagged frames. 2932 */ 2933 static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip, 2934 __be32 src_ip, struct bond_vlan_tag *tags) 2935 { 2936 struct net_device *bond_dev = slave->bond->dev; 2937 struct net_device *slave_dev = slave->dev; 2938 struct sk_buff *skb; 2939 2940 slave_dbg(bond_dev, slave_dev, "arp %d on slave: dst %pI4 src %pI4\n", 2941 arp_op, &dest_ip, &src_ip); 2942 2943 skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip, 2944 NULL, slave_dev->dev_addr, NULL); 2945 2946 if (!skb) { 2947 net_err_ratelimited("ARP packet allocation failed\n"); 2948 return; 2949 } 2950 2951 if (bond_handle_vlan(slave, tags, skb)) { 2952 slave_update_last_tx(slave); 2953 arp_xmit(skb); 2954 } 2955 2956 return; 2957 } 2958 2959 /* Validate the device path between the @start_dev and the @end_dev. 2960 * The path is valid if the @end_dev is reachable through device 2961 * stacking. 2962 * When the path is validated, collect any vlan information in the 2963 * path. 2964 */ 2965 struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, 2966 struct net_device *end_dev, 2967 int level) 2968 { 2969 struct bond_vlan_tag *tags; 2970 struct net_device *upper; 2971 struct list_head *iter; 2972 2973 if (start_dev == end_dev) { 2974 tags = kcalloc(level + 1, sizeof(*tags), GFP_ATOMIC); 2975 if (!tags) 2976 return ERR_PTR(-ENOMEM); 2977 tags[level].vlan_proto = BOND_VLAN_PROTO_NONE; 2978 return tags; 2979 } 2980 2981 netdev_for_each_upper_dev_rcu(start_dev, upper, iter) { 2982 tags = bond_verify_device_path(upper, end_dev, level + 1); 2983 if (IS_ERR_OR_NULL(tags)) { 2984 if (IS_ERR(tags)) 2985 return tags; 2986 continue; 2987 } 2988 if (is_vlan_dev(upper)) { 2989 tags[level].vlan_proto = vlan_dev_vlan_proto(upper); 2990 tags[level].vlan_id = vlan_dev_vlan_id(upper); 2991 } 2992 2993 return tags; 2994 } 2995 2996 return NULL; 2997 } 2998 2999 static void bond_arp_send_all(struct bonding *bond, struct slave *slave) 3000 { 3001 struct rtable *rt; 3002 struct bond_vlan_tag *tags; 3003 __be32 *targets = bond->params.arp_targets, addr; 3004 int i; 3005 3006 for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { 3007 slave_dbg(bond->dev, slave->dev, "%s: target %pI4\n", 3008 __func__, &targets[i]); 3009 tags = NULL; 3010 3011 /* Find out through which dev should the packet go */ 3012 rt = ip_route_output(dev_net(bond->dev), targets[i], 0, 3013 RTO_ONLINK, 0); 3014 if (IS_ERR(rt)) { 3015 /* there's no route to target - try to send arp 3016 * probe to generate any traffic (arp_validate=0) 3017 */ 3018 if (bond->params.arp_validate) 3019 pr_warn_once("%s: no route to arp_ip_target %pI4 and arp_validate is set\n", 3020 bond->dev->name, 3021 &targets[i]); 3022 bond_arp_send(slave, ARPOP_REQUEST, targets[i], 3023 0, tags); 3024 continue; 3025 } 3026 3027 /* bond device itself */ 3028 if (rt->dst.dev == bond->dev) 3029 goto found; 3030 3031 rcu_read_lock(); 3032 tags = bond_verify_device_path(bond->dev, rt->dst.dev, 0); 3033 rcu_read_unlock(); 3034 3035 if (!IS_ERR_OR_NULL(tags)) 3036 goto found; 3037 3038 /* Not our device - skip */ 3039 slave_dbg(bond->dev, slave->dev, "no path to arp_ip_target %pI4 via rt.dev %s\n", 3040 &targets[i], rt->dst.dev ? rt->dst.dev->name : "NULL"); 3041 3042 ip_rt_put(rt); 3043 continue; 3044 3045 found: 3046 addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); 3047 ip_rt_put(rt); 3048 bond_arp_send(slave, ARPOP_REQUEST, targets[i], addr, tags); 3049 kfree(tags); 3050 } 3051 } 3052 3053 static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip) 3054 { 3055 int i; 3056 3057 if (!sip || !bond_has_this_ip(bond, tip)) { 3058 slave_dbg(bond->dev, slave->dev, "%s: sip %pI4 tip %pI4 not found\n", 3059 __func__, &sip, &tip); 3060 return; 3061 } 3062 3063 i = bond_get_targets_ip(bond->params.arp_targets, sip); 3064 if (i == -1) { 3065 slave_dbg(bond->dev, slave->dev, "%s: sip %pI4 not found in targets\n", 3066 __func__, &sip); 3067 return; 3068 } 3069 slave->last_rx = jiffies; 3070 slave->target_last_arp_rx[i] = jiffies; 3071 } 3072 3073 static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, 3074 struct slave *slave) 3075 { 3076 struct arphdr *arp = (struct arphdr *)skb->data; 3077 struct slave *curr_active_slave, *curr_arp_slave; 3078 unsigned char *arp_ptr; 3079 __be32 sip, tip; 3080 unsigned int alen; 3081 3082 alen = arp_hdr_len(bond->dev); 3083 3084 if (alen > skb_headlen(skb)) { 3085 arp = kmalloc(alen, GFP_ATOMIC); 3086 if (!arp) 3087 goto out_unlock; 3088 if (skb_copy_bits(skb, 0, arp, alen) < 0) 3089 goto out_unlock; 3090 } 3091 3092 if (arp->ar_hln != bond->dev->addr_len || 3093 skb->pkt_type == PACKET_OTHERHOST || 3094 skb->pkt_type == PACKET_LOOPBACK || 3095 arp->ar_hrd != htons(ARPHRD_ETHER) || 3096 arp->ar_pro != htons(ETH_P_IP) || 3097 arp->ar_pln != 4) 3098 goto out_unlock; 3099 3100 arp_ptr = (unsigned char *)(arp + 1); 3101 arp_ptr += bond->dev->addr_len; 3102 memcpy(&sip, arp_ptr, 4); 3103 arp_ptr += 4 + bond->dev->addr_len; 3104 memcpy(&tip, arp_ptr, 4); 3105 3106 slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI4 tip %pI4\n", 3107 __func__, slave->dev->name, bond_slave_state(slave), 3108 bond->params.arp_validate, slave_do_arp_validate(bond, slave), 3109 &sip, &tip); 3110 3111 curr_active_slave = rcu_dereference(bond->curr_active_slave); 3112 curr_arp_slave = rcu_dereference(bond->current_arp_slave); 3113 3114 /* We 'trust' the received ARP enough to validate it if: 3115 * 3116 * (a) the slave receiving the ARP is active (which includes the 3117 * current ARP slave, if any), or 3118 * 3119 * (b) the receiving slave isn't active, but there is a currently 3120 * active slave and it received valid arp reply(s) after it became 3121 * the currently active slave, or 3122 * 3123 * (c) there is an ARP slave that sent an ARP during the prior ARP 3124 * interval, and we receive an ARP reply on any slave. We accept 3125 * these because switch FDB update delays may deliver the ARP 3126 * reply to a slave other than the sender of the ARP request. 3127 * 3128 * Note: for (b), backup slaves are receiving the broadcast ARP 3129 * request, not a reply. This request passes from the sending 3130 * slave through the L2 switch(es) to the receiving slave. Since 3131 * this is checking the request, sip/tip are swapped for 3132 * validation. 3133 * 3134 * This is done to avoid endless looping when we can't reach the 3135 * arp_ip_target and fool ourselves with our own arp requests. 3136 */ 3137 if (bond_is_active_slave(slave)) 3138 bond_validate_arp(bond, slave, sip, tip); 3139 else if (curr_active_slave && 3140 time_after(slave_last_rx(bond, curr_active_slave), 3141 curr_active_slave->last_link_up)) 3142 bond_validate_arp(bond, slave, tip, sip); 3143 else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) && 3144 bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1)) 3145 bond_validate_arp(bond, slave, sip, tip); 3146 3147 out_unlock: 3148 if (arp != (struct arphdr *)skb->data) 3149 kfree(arp); 3150 return RX_HANDLER_ANOTHER; 3151 } 3152 3153 #if IS_ENABLED(CONFIG_IPV6) 3154 static void bond_ns_send(struct slave *slave, const struct in6_addr *daddr, 3155 const struct in6_addr *saddr, struct bond_vlan_tag *tags) 3156 { 3157 struct net_device *bond_dev = slave->bond->dev; 3158 struct net_device *slave_dev = slave->dev; 3159 struct in6_addr mcaddr; 3160 struct sk_buff *skb; 3161 3162 slave_dbg(bond_dev, slave_dev, "NS on slave: dst %pI6c src %pI6c\n", 3163 daddr, saddr); 3164 3165 skb = ndisc_ns_create(slave_dev, daddr, saddr, 0); 3166 if (!skb) { 3167 net_err_ratelimited("NS packet allocation failed\n"); 3168 return; 3169 } 3170 3171 addrconf_addr_solict_mult(daddr, &mcaddr); 3172 if (bond_handle_vlan(slave, tags, skb)) { 3173 slave_update_last_tx(slave); 3174 ndisc_send_skb(skb, &mcaddr, saddr); 3175 } 3176 } 3177 3178 static void bond_ns_send_all(struct bonding *bond, struct slave *slave) 3179 { 3180 struct in6_addr *targets = bond->params.ns_targets; 3181 struct bond_vlan_tag *tags; 3182 struct dst_entry *dst; 3183 struct in6_addr saddr; 3184 struct flowi6 fl6; 3185 int i; 3186 3187 for (i = 0; i < BOND_MAX_NS_TARGETS && !ipv6_addr_any(&targets[i]); i++) { 3188 slave_dbg(bond->dev, slave->dev, "%s: target %pI6c\n", 3189 __func__, &targets[i]); 3190 tags = NULL; 3191 3192 /* Find out through which dev should the packet go */ 3193 memset(&fl6, 0, sizeof(struct flowi6)); 3194 fl6.daddr = targets[i]; 3195 fl6.flowi6_oif = bond->dev->ifindex; 3196 3197 dst = ip6_route_output(dev_net(bond->dev), NULL, &fl6); 3198 if (dst->error) { 3199 dst_release(dst); 3200 /* there's no route to target - try to send arp 3201 * probe to generate any traffic (arp_validate=0) 3202 */ 3203 if (bond->params.arp_validate) 3204 pr_warn_once("%s: no route to ns_ip6_target %pI6c and arp_validate is set\n", 3205 bond->dev->name, 3206 &targets[i]); 3207 bond_ns_send(slave, &targets[i], &in6addr_any, tags); 3208 continue; 3209 } 3210 3211 /* bond device itself */ 3212 if (dst->dev == bond->dev) 3213 goto found; 3214 3215 rcu_read_lock(); 3216 tags = bond_verify_device_path(bond->dev, dst->dev, 0); 3217 rcu_read_unlock(); 3218 3219 if (!IS_ERR_OR_NULL(tags)) 3220 goto found; 3221 3222 /* Not our device - skip */ 3223 slave_dbg(bond->dev, slave->dev, "no path to ns_ip6_target %pI6c via dst->dev %s\n", 3224 &targets[i], dst->dev ? dst->dev->name : "NULL"); 3225 3226 dst_release(dst); 3227 continue; 3228 3229 found: 3230 if (!ipv6_dev_get_saddr(dev_net(dst->dev), dst->dev, &targets[i], 0, &saddr)) 3231 bond_ns_send(slave, &targets[i], &saddr, tags); 3232 else 3233 bond_ns_send(slave, &targets[i], &in6addr_any, tags); 3234 3235 dst_release(dst); 3236 kfree(tags); 3237 } 3238 } 3239 3240 static int bond_confirm_addr6(struct net_device *dev, 3241 struct netdev_nested_priv *priv) 3242 { 3243 struct in6_addr *addr = (struct in6_addr *)priv->data; 3244 3245 return ipv6_chk_addr(dev_net(dev), addr, dev, 0); 3246 } 3247 3248 static bool bond_has_this_ip6(struct bonding *bond, struct in6_addr *addr) 3249 { 3250 struct netdev_nested_priv priv = { 3251 .data = addr, 3252 }; 3253 int ret = false; 3254 3255 if (bond_confirm_addr6(bond->dev, &priv)) 3256 return true; 3257 3258 rcu_read_lock(); 3259 if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_confirm_addr6, &priv)) 3260 ret = true; 3261 rcu_read_unlock(); 3262 3263 return ret; 3264 } 3265 3266 static void bond_validate_na(struct bonding *bond, struct slave *slave, 3267 struct in6_addr *saddr, struct in6_addr *daddr) 3268 { 3269 int i; 3270 3271 /* Ignore NAs that: 3272 * 1. Source address is unspecified address. 3273 * 2. Dest address is neither all-nodes multicast address nor 3274 * exist on bond interface. 3275 */ 3276 if (ipv6_addr_any(saddr) || 3277 (!ipv6_addr_equal(daddr, &in6addr_linklocal_allnodes) && 3278 !bond_has_this_ip6(bond, daddr))) { 3279 slave_dbg(bond->dev, slave->dev, "%s: sip %pI6c tip %pI6c not found\n", 3280 __func__, saddr, daddr); 3281 return; 3282 } 3283 3284 i = bond_get_targets_ip6(bond->params.ns_targets, saddr); 3285 if (i == -1) { 3286 slave_dbg(bond->dev, slave->dev, "%s: sip %pI6c not found in targets\n", 3287 __func__, saddr); 3288 return; 3289 } 3290 slave->last_rx = jiffies; 3291 slave->target_last_arp_rx[i] = jiffies; 3292 } 3293 3294 static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, 3295 struct slave *slave) 3296 { 3297 struct slave *curr_active_slave, *curr_arp_slave; 3298 struct in6_addr *saddr, *daddr; 3299 struct { 3300 struct ipv6hdr ip6; 3301 struct icmp6hdr icmp6; 3302 } *combined, _combined; 3303 3304 if (skb->pkt_type == PACKET_OTHERHOST || 3305 skb->pkt_type == PACKET_LOOPBACK) 3306 goto out; 3307 3308 combined = skb_header_pointer(skb, 0, sizeof(_combined), &_combined); 3309 if (!combined || combined->ip6.nexthdr != NEXTHDR_ICMP || 3310 (combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION && 3311 combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT)) 3312 goto out; 3313 3314 saddr = &combined->ip6.saddr; 3315 daddr = &combined->ip6.daddr; 3316 3317 slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n", 3318 __func__, slave->dev->name, bond_slave_state(slave), 3319 bond->params.arp_validate, slave_do_arp_validate(bond, slave), 3320 saddr, daddr); 3321 3322 curr_active_slave = rcu_dereference(bond->curr_active_slave); 3323 curr_arp_slave = rcu_dereference(bond->current_arp_slave); 3324 3325 /* We 'trust' the received ARP enough to validate it if: 3326 * see bond_arp_rcv(). 3327 */ 3328 if (bond_is_active_slave(slave)) 3329 bond_validate_na(bond, slave, saddr, daddr); 3330 else if (curr_active_slave && 3331 time_after(slave_last_rx(bond, curr_active_slave), 3332 curr_active_slave->last_link_up)) 3333 bond_validate_na(bond, slave, daddr, saddr); 3334 else if (curr_arp_slave && 3335 bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1)) 3336 bond_validate_na(bond, slave, saddr, daddr); 3337 3338 out: 3339 return RX_HANDLER_ANOTHER; 3340 } 3341 #endif 3342 3343 int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, 3344 struct slave *slave) 3345 { 3346 #if IS_ENABLED(CONFIG_IPV6) 3347 bool is_ipv6 = skb->protocol == __cpu_to_be16(ETH_P_IPV6); 3348 #endif 3349 bool is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); 3350 3351 slave_dbg(bond->dev, slave->dev, "%s: skb->dev %s\n", 3352 __func__, skb->dev->name); 3353 3354 /* Use arp validate logic for both ARP and NS */ 3355 if (!slave_do_arp_validate(bond, slave)) { 3356 if ((slave_do_arp_validate_only(bond) && is_arp) || 3357 #if IS_ENABLED(CONFIG_IPV6) 3358 (slave_do_arp_validate_only(bond) && is_ipv6) || 3359 #endif 3360 !slave_do_arp_validate_only(bond)) 3361 slave->last_rx = jiffies; 3362 return RX_HANDLER_ANOTHER; 3363 } else if (is_arp) { 3364 return bond_arp_rcv(skb, bond, slave); 3365 #if IS_ENABLED(CONFIG_IPV6) 3366 } else if (is_ipv6) { 3367 return bond_na_rcv(skb, bond, slave); 3368 #endif 3369 } else { 3370 return RX_HANDLER_ANOTHER; 3371 } 3372 } 3373 3374 static void bond_send_validate(struct bonding *bond, struct slave *slave) 3375 { 3376 bond_arp_send_all(bond, slave); 3377 #if IS_ENABLED(CONFIG_IPV6) 3378 bond_ns_send_all(bond, slave); 3379 #endif 3380 } 3381 3382 /* function to verify if we're in the arp_interval timeslice, returns true if 3383 * (last_act - arp_interval) <= jiffies <= (last_act + mod * arp_interval + 3384 * arp_interval/2) . the arp_interval/2 is needed for really fast networks. 3385 */ 3386 static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, 3387 int mod) 3388 { 3389 int delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 3390 3391 return time_in_range(jiffies, 3392 last_act - delta_in_ticks, 3393 last_act + mod * delta_in_ticks + delta_in_ticks/2); 3394 } 3395 3396 /* This function is called regularly to monitor each slave's link 3397 * ensuring that traffic is being sent and received when arp monitoring 3398 * is used in load-balancing mode. if the adapter has been dormant, then an 3399 * arp is transmitted to generate traffic. see activebackup_arp_monitor for 3400 * arp monitoring in active backup mode. 3401 */ 3402 static void bond_loadbalance_arp_mon(struct bonding *bond) 3403 { 3404 struct slave *slave, *oldcurrent; 3405 struct list_head *iter; 3406 int do_failover = 0, slave_state_changed = 0; 3407 3408 if (!bond_has_slaves(bond)) 3409 goto re_arm; 3410 3411 rcu_read_lock(); 3412 3413 oldcurrent = rcu_dereference(bond->curr_active_slave); 3414 /* see if any of the previous devices are up now (i.e. they have 3415 * xmt and rcv traffic). the curr_active_slave does not come into 3416 * the picture unless it is null. also, slave->last_link_up is not 3417 * needed here because we send an arp on each slave and give a slave 3418 * as long as it needs to get the tx/rx within the delta. 3419 * TODO: what about up/down delay in arp mode? it wasn't here before 3420 * so it can wait 3421 */ 3422 bond_for_each_slave_rcu(bond, slave, iter) { 3423 unsigned long last_tx = slave_last_tx(slave); 3424 3425 bond_propose_link_state(slave, BOND_LINK_NOCHANGE); 3426 3427 if (slave->link != BOND_LINK_UP) { 3428 if (bond_time_in_interval(bond, last_tx, 1) && 3429 bond_time_in_interval(bond, slave->last_rx, 1)) { 3430 3431 bond_propose_link_state(slave, BOND_LINK_UP); 3432 slave_state_changed = 1; 3433 3434 /* primary_slave has no meaning in round-robin 3435 * mode. the window of a slave being up and 3436 * curr_active_slave being null after enslaving 3437 * is closed. 3438 */ 3439 if (!oldcurrent) { 3440 slave_info(bond->dev, slave->dev, "link status definitely up\n"); 3441 do_failover = 1; 3442 } else { 3443 slave_info(bond->dev, slave->dev, "interface is now up\n"); 3444 } 3445 } 3446 } else { 3447 /* slave->link == BOND_LINK_UP */ 3448 3449 /* not all switches will respond to an arp request 3450 * when the source ip is 0, so don't take the link down 3451 * if we don't know our ip yet 3452 */ 3453 if (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) || 3454 !bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) { 3455 3456 bond_propose_link_state(slave, BOND_LINK_DOWN); 3457 slave_state_changed = 1; 3458 3459 if (slave->link_failure_count < UINT_MAX) 3460 slave->link_failure_count++; 3461 3462 slave_info(bond->dev, slave->dev, "interface is now down\n"); 3463 3464 if (slave == oldcurrent) 3465 do_failover = 1; 3466 } 3467 } 3468 3469 /* note: if switch is in round-robin mode, all links 3470 * must tx arp to ensure all links rx an arp - otherwise 3471 * links may oscillate or not come up at all; if switch is 3472 * in something like xor mode, there is nothing we can 3473 * do - all replies will be rx'ed on same link causing slaves 3474 * to be unstable during low/no traffic periods 3475 */ 3476 if (bond_slave_is_up(slave)) 3477 bond_send_validate(bond, slave); 3478 } 3479 3480 rcu_read_unlock(); 3481 3482 if (do_failover || slave_state_changed) { 3483 if (!rtnl_trylock()) 3484 goto re_arm; 3485 3486 bond_for_each_slave(bond, slave, iter) { 3487 if (slave->link_new_state != BOND_LINK_NOCHANGE) 3488 slave->link = slave->link_new_state; 3489 } 3490 3491 if (slave_state_changed) { 3492 bond_slave_state_change(bond); 3493 if (BOND_MODE(bond) == BOND_MODE_XOR) 3494 bond_update_slave_arr(bond, NULL); 3495 } 3496 if (do_failover) { 3497 block_netpoll_tx(); 3498 bond_select_active_slave(bond); 3499 unblock_netpoll_tx(); 3500 } 3501 rtnl_unlock(); 3502 } 3503 3504 re_arm: 3505 if (bond->params.arp_interval) 3506 queue_delayed_work(bond->wq, &bond->arp_work, 3507 msecs_to_jiffies(bond->params.arp_interval)); 3508 } 3509 3510 /* Called to inspect slaves for active-backup mode ARP monitor link state 3511 * changes. Sets proposed link state in slaves to specify what action 3512 * should take place for the slave. Returns 0 if no changes are found, >0 3513 * if changes to link states must be committed. 3514 * 3515 * Called with rcu_read_lock held. 3516 */ 3517 static int bond_ab_arp_inspect(struct bonding *bond) 3518 { 3519 unsigned long last_tx, last_rx; 3520 struct list_head *iter; 3521 struct slave *slave; 3522 int commit = 0; 3523 3524 bond_for_each_slave_rcu(bond, slave, iter) { 3525 bond_propose_link_state(slave, BOND_LINK_NOCHANGE); 3526 last_rx = slave_last_rx(bond, slave); 3527 3528 if (slave->link != BOND_LINK_UP) { 3529 if (bond_time_in_interval(bond, last_rx, 1)) { 3530 bond_propose_link_state(slave, BOND_LINK_UP); 3531 commit++; 3532 } else if (slave->link == BOND_LINK_BACK) { 3533 bond_propose_link_state(slave, BOND_LINK_FAIL); 3534 commit++; 3535 } 3536 continue; 3537 } 3538 3539 /* Give slaves 2*delta after being enslaved or made 3540 * active. This avoids bouncing, as the last receive 3541 * times need a full ARP monitor cycle to be updated. 3542 */ 3543 if (bond_time_in_interval(bond, slave->last_link_up, 2)) 3544 continue; 3545 3546 /* Backup slave is down if: 3547 * - No current_arp_slave AND 3548 * - more than (missed_max+1)*delta since last receive AND 3549 * - the bond has an IP address 3550 * 3551 * Note: a non-null current_arp_slave indicates 3552 * the curr_active_slave went down and we are 3553 * searching for a new one; under this condition 3554 * we only take the curr_active_slave down - this 3555 * gives each slave a chance to tx/rx traffic 3556 * before being taken out 3557 */ 3558 if (!bond_is_active_slave(slave) && 3559 !rcu_access_pointer(bond->current_arp_slave) && 3560 !bond_time_in_interval(bond, last_rx, bond->params.missed_max + 1)) { 3561 bond_propose_link_state(slave, BOND_LINK_DOWN); 3562 commit++; 3563 } 3564 3565 /* Active slave is down if: 3566 * - more than missed_max*delta since transmitting OR 3567 * - (more than missed_max*delta since receive AND 3568 * the bond has an IP address) 3569 */ 3570 last_tx = slave_last_tx(slave); 3571 if (bond_is_active_slave(slave) && 3572 (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) || 3573 !bond_time_in_interval(bond, last_rx, bond->params.missed_max))) { 3574 bond_propose_link_state(slave, BOND_LINK_DOWN); 3575 commit++; 3576 } 3577 } 3578 3579 return commit; 3580 } 3581 3582 /* Called to commit link state changes noted by inspection step of 3583 * active-backup mode ARP monitor. 3584 * 3585 * Called with RTNL hold. 3586 */ 3587 static void bond_ab_arp_commit(struct bonding *bond) 3588 { 3589 bool do_failover = false; 3590 struct list_head *iter; 3591 unsigned long last_tx; 3592 struct slave *slave; 3593 3594 bond_for_each_slave(bond, slave, iter) { 3595 switch (slave->link_new_state) { 3596 case BOND_LINK_NOCHANGE: 3597 continue; 3598 3599 case BOND_LINK_UP: 3600 last_tx = slave_last_tx(slave); 3601 if (rtnl_dereference(bond->curr_active_slave) != slave || 3602 (!rtnl_dereference(bond->curr_active_slave) && 3603 bond_time_in_interval(bond, last_tx, 1))) { 3604 struct slave *current_arp_slave; 3605 3606 current_arp_slave = rtnl_dereference(bond->current_arp_slave); 3607 bond_set_slave_link_state(slave, BOND_LINK_UP, 3608 BOND_SLAVE_NOTIFY_NOW); 3609 if (current_arp_slave) { 3610 bond_set_slave_inactive_flags( 3611 current_arp_slave, 3612 BOND_SLAVE_NOTIFY_NOW); 3613 RCU_INIT_POINTER(bond->current_arp_slave, NULL); 3614 } 3615 3616 slave_info(bond->dev, slave->dev, "link status definitely up\n"); 3617 3618 if (!rtnl_dereference(bond->curr_active_slave) || 3619 slave == rtnl_dereference(bond->primary_slave) || 3620 slave->prio > rtnl_dereference(bond->curr_active_slave)->prio) 3621 do_failover = true; 3622 3623 } 3624 3625 continue; 3626 3627 case BOND_LINK_DOWN: 3628 if (slave->link_failure_count < UINT_MAX) 3629 slave->link_failure_count++; 3630 3631 bond_set_slave_link_state(slave, BOND_LINK_DOWN, 3632 BOND_SLAVE_NOTIFY_NOW); 3633 bond_set_slave_inactive_flags(slave, 3634 BOND_SLAVE_NOTIFY_NOW); 3635 3636 slave_info(bond->dev, slave->dev, "link status definitely down, disabling slave\n"); 3637 3638 if (slave == rtnl_dereference(bond->curr_active_slave)) { 3639 RCU_INIT_POINTER(bond->current_arp_slave, NULL); 3640 do_failover = true; 3641 } 3642 3643 continue; 3644 3645 case BOND_LINK_FAIL: 3646 bond_set_slave_link_state(slave, BOND_LINK_FAIL, 3647 BOND_SLAVE_NOTIFY_NOW); 3648 bond_set_slave_inactive_flags(slave, 3649 BOND_SLAVE_NOTIFY_NOW); 3650 3651 /* A slave has just been enslaved and has become 3652 * the current active slave. 3653 */ 3654 if (rtnl_dereference(bond->curr_active_slave)) 3655 RCU_INIT_POINTER(bond->current_arp_slave, NULL); 3656 continue; 3657 3658 default: 3659 slave_err(bond->dev, slave->dev, 3660 "impossible: link_new_state %d on slave\n", 3661 slave->link_new_state); 3662 continue; 3663 } 3664 } 3665 3666 if (do_failover) { 3667 block_netpoll_tx(); 3668 bond_select_active_slave(bond); 3669 unblock_netpoll_tx(); 3670 } 3671 3672 bond_set_carrier(bond); 3673 } 3674 3675 /* Send ARP probes for active-backup mode ARP monitor. 3676 * 3677 * Called with rcu_read_lock held. 3678 */ 3679 static bool bond_ab_arp_probe(struct bonding *bond) 3680 { 3681 struct slave *slave, *before = NULL, *new_slave = NULL, 3682 *curr_arp_slave = rcu_dereference(bond->current_arp_slave), 3683 *curr_active_slave = rcu_dereference(bond->curr_active_slave); 3684 struct list_head *iter; 3685 bool found = false; 3686 bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; 3687 3688 if (curr_arp_slave && curr_active_slave) 3689 netdev_info(bond->dev, "PROBE: c_arp %s && cas %s BAD\n", 3690 curr_arp_slave->dev->name, 3691 curr_active_slave->dev->name); 3692 3693 if (curr_active_slave) { 3694 bond_send_validate(bond, curr_active_slave); 3695 return should_notify_rtnl; 3696 } 3697 3698 /* if we don't have a curr_active_slave, search for the next available 3699 * backup slave from the current_arp_slave and make it the candidate 3700 * for becoming the curr_active_slave 3701 */ 3702 3703 if (!curr_arp_slave) { 3704 curr_arp_slave = bond_first_slave_rcu(bond); 3705 if (!curr_arp_slave) 3706 return should_notify_rtnl; 3707 } 3708 3709 bond_for_each_slave_rcu(bond, slave, iter) { 3710 if (!found && !before && bond_slave_is_up(slave)) 3711 before = slave; 3712 3713 if (found && !new_slave && bond_slave_is_up(slave)) 3714 new_slave = slave; 3715 /* if the link state is up at this point, we 3716 * mark it down - this can happen if we have 3717 * simultaneous link failures and 3718 * reselect_active_interface doesn't make this 3719 * one the current slave so it is still marked 3720 * up when it is actually down 3721 */ 3722 if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { 3723 bond_set_slave_link_state(slave, BOND_LINK_DOWN, 3724 BOND_SLAVE_NOTIFY_LATER); 3725 if (slave->link_failure_count < UINT_MAX) 3726 slave->link_failure_count++; 3727 3728 bond_set_slave_inactive_flags(slave, 3729 BOND_SLAVE_NOTIFY_LATER); 3730 3731 slave_info(bond->dev, slave->dev, "backup interface is now down\n"); 3732 } 3733 if (slave == curr_arp_slave) 3734 found = true; 3735 } 3736 3737 if (!new_slave && before) 3738 new_slave = before; 3739 3740 if (!new_slave) 3741 goto check_state; 3742 3743 bond_set_slave_link_state(new_slave, BOND_LINK_BACK, 3744 BOND_SLAVE_NOTIFY_LATER); 3745 bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); 3746 bond_send_validate(bond, new_slave); 3747 new_slave->last_link_up = jiffies; 3748 rcu_assign_pointer(bond->current_arp_slave, new_slave); 3749 3750 check_state: 3751 bond_for_each_slave_rcu(bond, slave, iter) { 3752 if (slave->should_notify || slave->should_notify_link) { 3753 should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; 3754 break; 3755 } 3756 } 3757 return should_notify_rtnl; 3758 } 3759 3760 static void bond_activebackup_arp_mon(struct bonding *bond) 3761 { 3762 bool should_notify_peers = false; 3763 bool should_notify_rtnl = false; 3764 int delta_in_ticks; 3765 3766 delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); 3767 3768 if (!bond_has_slaves(bond)) 3769 goto re_arm; 3770 3771 rcu_read_lock(); 3772 3773 should_notify_peers = bond_should_notify_peers(bond); 3774 3775 if (bond_ab_arp_inspect(bond)) { 3776 rcu_read_unlock(); 3777 3778 /* Race avoidance with bond_close flush of workqueue */ 3779 if (!rtnl_trylock()) { 3780 delta_in_ticks = 1; 3781 should_notify_peers = false; 3782 goto re_arm; 3783 } 3784 3785 bond_ab_arp_commit(bond); 3786 3787 rtnl_unlock(); 3788 rcu_read_lock(); 3789 } 3790 3791 should_notify_rtnl = bond_ab_arp_probe(bond); 3792 rcu_read_unlock(); 3793 3794 re_arm: 3795 if (bond->params.arp_interval) 3796 queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); 3797 3798 if (should_notify_peers || should_notify_rtnl) { 3799 if (!rtnl_trylock()) 3800 return; 3801 3802 if (should_notify_peers) { 3803 bond->send_peer_notif--; 3804 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, 3805 bond->dev); 3806 } 3807 if (should_notify_rtnl) { 3808 bond_slave_state_notify(bond); 3809 bond_slave_link_notify(bond); 3810 } 3811 3812 rtnl_unlock(); 3813 } 3814 } 3815 3816 static void bond_arp_monitor(struct work_struct *work) 3817 { 3818 struct bonding *bond = container_of(work, struct bonding, 3819 arp_work.work); 3820 3821 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) 3822 bond_activebackup_arp_mon(bond); 3823 else 3824 bond_loadbalance_arp_mon(bond); 3825 } 3826 3827 /*-------------------------- netdev event handling --------------------------*/ 3828 3829 /* Change device name */ 3830 static int bond_event_changename(struct bonding *bond) 3831 { 3832 bond_remove_proc_entry(bond); 3833 bond_create_proc_entry(bond); 3834 3835 bond_debug_reregister(bond); 3836 3837 return NOTIFY_DONE; 3838 } 3839 3840 static int bond_master_netdev_event(unsigned long event, 3841 struct net_device *bond_dev) 3842 { 3843 struct bonding *event_bond = netdev_priv(bond_dev); 3844 3845 netdev_dbg(bond_dev, "%s called\n", __func__); 3846 3847 switch (event) { 3848 case NETDEV_CHANGENAME: 3849 return bond_event_changename(event_bond); 3850 case NETDEV_UNREGISTER: 3851 bond_remove_proc_entry(event_bond); 3852 #ifdef CONFIG_XFRM_OFFLOAD 3853 xfrm_dev_state_flush(dev_net(bond_dev), bond_dev, true); 3854 #endif /* CONFIG_XFRM_OFFLOAD */ 3855 break; 3856 case NETDEV_REGISTER: 3857 bond_create_proc_entry(event_bond); 3858 break; 3859 default: 3860 break; 3861 } 3862 3863 return NOTIFY_DONE; 3864 } 3865 3866 static int bond_slave_netdev_event(unsigned long event, 3867 struct net_device *slave_dev) 3868 { 3869 struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary; 3870 struct bonding *bond; 3871 struct net_device *bond_dev; 3872 3873 /* A netdev event can be generated while enslaving a device 3874 * before netdev_rx_handler_register is called in which case 3875 * slave will be NULL 3876 */ 3877 if (!slave) { 3878 netdev_dbg(slave_dev, "%s called on NULL slave\n", __func__); 3879 return NOTIFY_DONE; 3880 } 3881 3882 bond_dev = slave->bond->dev; 3883 bond = slave->bond; 3884 primary = rtnl_dereference(bond->primary_slave); 3885 3886 slave_dbg(bond_dev, slave_dev, "%s called\n", __func__); 3887 3888 switch (event) { 3889 case NETDEV_UNREGISTER: 3890 if (bond_dev->type != ARPHRD_ETHER) 3891 bond_release_and_destroy(bond_dev, slave_dev); 3892 else 3893 __bond_release_one(bond_dev, slave_dev, false, true); 3894 break; 3895 case NETDEV_UP: 3896 case NETDEV_CHANGE: 3897 /* For 802.3ad mode only: 3898 * Getting invalid Speed/Duplex values here will put slave 3899 * in weird state. Mark it as link-fail if the link was 3900 * previously up or link-down if it hasn't yet come up, and 3901 * let link-monitoring (miimon) set it right when correct 3902 * speeds/duplex are available. 3903 */ 3904 if (bond_update_speed_duplex(slave) && 3905 BOND_MODE(bond) == BOND_MODE_8023AD) { 3906 if (slave->last_link_up) 3907 slave->link = BOND_LINK_FAIL; 3908 else 3909 slave->link = BOND_LINK_DOWN; 3910 } 3911 3912 if (BOND_MODE(bond) == BOND_MODE_8023AD) 3913 bond_3ad_adapter_speed_duplex_changed(slave); 3914 fallthrough; 3915 case NETDEV_DOWN: 3916 /* Refresh slave-array if applicable! 3917 * If the setup does not use miimon or arpmon (mode-specific!), 3918 * then these events will not cause the slave-array to be 3919 * refreshed. This will cause xmit to use a slave that is not 3920 * usable. Avoid such situation by refeshing the array at these 3921 * events. If these (miimon/arpmon) parameters are configured 3922 * then array gets refreshed twice and that should be fine! 3923 */ 3924 if (bond_mode_can_use_xmit_hash(bond)) 3925 bond_update_slave_arr(bond, NULL); 3926 break; 3927 case NETDEV_CHANGEMTU: 3928 /* TODO: Should slaves be allowed to 3929 * independently alter their MTU? For 3930 * an active-backup bond, slaves need 3931 * not be the same type of device, so 3932 * MTUs may vary. For other modes, 3933 * slaves arguably should have the 3934 * same MTUs. To do this, we'd need to 3935 * take over the slave's change_mtu 3936 * function for the duration of their 3937 * servitude. 3938 */ 3939 break; 3940 case NETDEV_CHANGENAME: 3941 /* we don't care if we don't have primary set */ 3942 if (!bond_uses_primary(bond) || 3943 !bond->params.primary[0]) 3944 break; 3945 3946 if (slave == primary) { 3947 /* slave's name changed - he's no longer primary */ 3948 RCU_INIT_POINTER(bond->primary_slave, NULL); 3949 } else if (!strcmp(slave_dev->name, bond->params.primary)) { 3950 /* we have a new primary slave */ 3951 rcu_assign_pointer(bond->primary_slave, slave); 3952 } else { /* we didn't change primary - exit */ 3953 break; 3954 } 3955 3956 netdev_info(bond->dev, "Primary slave changed to %s, reselecting active slave\n", 3957 primary ? slave_dev->name : "none"); 3958 3959 block_netpoll_tx(); 3960 bond_select_active_slave(bond); 3961 unblock_netpoll_tx(); 3962 break; 3963 case NETDEV_FEAT_CHANGE: 3964 if (!bond->notifier_ctx) { 3965 bond->notifier_ctx = true; 3966 bond_compute_features(bond); 3967 bond->notifier_ctx = false; 3968 } 3969 break; 3970 case NETDEV_RESEND_IGMP: 3971 /* Propagate to master device */ 3972 call_netdevice_notifiers(event, slave->bond->dev); 3973 break; 3974 case NETDEV_XDP_FEAT_CHANGE: 3975 bond_xdp_set_features(bond_dev); 3976 break; 3977 default: 3978 break; 3979 } 3980 3981 return NOTIFY_DONE; 3982 } 3983 3984 /* bond_netdev_event: handle netdev notifier chain events. 3985 * 3986 * This function receives events for the netdev chain. The caller (an 3987 * ioctl handler calling blocking_notifier_call_chain) holds the necessary 3988 * locks for us to safely manipulate the slave devices (RTNL lock, 3989 * dev_probe_lock). 3990 */ 3991 static int bond_netdev_event(struct notifier_block *this, 3992 unsigned long event, void *ptr) 3993 { 3994 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); 3995 3996 netdev_dbg(event_dev, "%s received %s\n", 3997 __func__, netdev_cmd_to_name(event)); 3998 3999 if (!(event_dev->priv_flags & IFF_BONDING)) 4000 return NOTIFY_DONE; 4001 4002 if (event_dev->flags & IFF_MASTER) { 4003 int ret; 4004 4005 ret = bond_master_netdev_event(event, event_dev); 4006 if (ret != NOTIFY_DONE) 4007 return ret; 4008 } 4009 4010 if (event_dev->flags & IFF_SLAVE) 4011 return bond_slave_netdev_event(event, event_dev); 4012 4013 return NOTIFY_DONE; 4014 } 4015 4016 static struct notifier_block bond_netdev_notifier = { 4017 .notifier_call = bond_netdev_event, 4018 }; 4019 4020 /*---------------------------- Hashing Policies -----------------------------*/ 4021 4022 /* Helper to access data in a packet, with or without a backing skb. 4023 * If skb is given the data is linearized if necessary via pskb_may_pull. 4024 */ 4025 static inline const void *bond_pull_data(struct sk_buff *skb, 4026 const void *data, int hlen, int n) 4027 { 4028 if (likely(n <= hlen)) 4029 return data; 4030 else if (skb && likely(pskb_may_pull(skb, n))) 4031 return skb->data; 4032 4033 return NULL; 4034 } 4035 4036 /* L2 hash helper */ 4037 static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen) 4038 { 4039 struct ethhdr *ep; 4040 4041 data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); 4042 if (!data) 4043 return 0; 4044 4045 ep = (struct ethhdr *)(data + mhoff); 4046 return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto); 4047 } 4048 4049 static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data, 4050 int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34) 4051 { 4052 const struct ipv6hdr *iph6; 4053 const struct iphdr *iph; 4054 4055 if (l2_proto == htons(ETH_P_IP)) { 4056 data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph)); 4057 if (!data) 4058 return false; 4059 4060 iph = (const struct iphdr *)(data + *nhoff); 4061 iph_to_flow_copy_v4addrs(fk, iph); 4062 *nhoff += iph->ihl << 2; 4063 if (!ip_is_fragment(iph)) 4064 *ip_proto = iph->protocol; 4065 } else if (l2_proto == htons(ETH_P_IPV6)) { 4066 data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6)); 4067 if (!data) 4068 return false; 4069 4070 iph6 = (const struct ipv6hdr *)(data + *nhoff); 4071 iph_to_flow_copy_v6addrs(fk, iph6); 4072 *nhoff += sizeof(*iph6); 4073 *ip_proto = iph6->nexthdr; 4074 } else { 4075 return false; 4076 } 4077 4078 if (l34 && *ip_proto >= 0) 4079 fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen); 4080 4081 return true; 4082 } 4083 4084 static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen) 4085 { 4086 u32 srcmac_vendor = 0, srcmac_dev = 0; 4087 struct ethhdr *mac_hdr; 4088 u16 vlan = 0; 4089 int i; 4090 4091 data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); 4092 if (!data) 4093 return 0; 4094 mac_hdr = (struct ethhdr *)(data + mhoff); 4095 4096 for (i = 0; i < 3; i++) 4097 srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i]; 4098 4099 for (i = 3; i < ETH_ALEN; i++) 4100 srcmac_dev = (srcmac_dev << 8) | mac_hdr->h_source[i]; 4101 4102 if (skb && skb_vlan_tag_present(skb)) 4103 vlan = skb_vlan_tag_get(skb); 4104 4105 return vlan ^ srcmac_vendor ^ srcmac_dev; 4106 } 4107 4108 /* Extract the appropriate headers based on bond's xmit policy */ 4109 static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data, 4110 __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk) 4111 { 4112 bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; 4113 int ip_proto = -1; 4114 4115 switch (bond->params.xmit_policy) { 4116 case BOND_XMIT_POLICY_ENCAP23: 4117 case BOND_XMIT_POLICY_ENCAP34: 4118 memset(fk, 0, sizeof(*fk)); 4119 return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, 4120 fk, data, l2_proto, nhoff, hlen, 0); 4121 default: 4122 break; 4123 } 4124 4125 fk->ports.ports = 0; 4126 memset(&fk->icmp, 0, sizeof(fk->icmp)); 4127 if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34)) 4128 return false; 4129 4130 /* ICMP error packets contains at least 8 bytes of the header 4131 * of the packet which generated the error. Use this information 4132 * to correlate ICMP error packets within the same flow which 4133 * generated the error. 4134 */ 4135 if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { 4136 skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen); 4137 if (ip_proto == IPPROTO_ICMP) { 4138 if (!icmp_is_err(fk->icmp.type)) 4139 return true; 4140 4141 nhoff += sizeof(struct icmphdr); 4142 } else if (ip_proto == IPPROTO_ICMPV6) { 4143 if (!icmpv6_is_err(fk->icmp.type)) 4144 return true; 4145 4146 nhoff += sizeof(struct icmp6hdr); 4147 } 4148 return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34); 4149 } 4150 4151 return true; 4152 } 4153 4154 static u32 bond_ip_hash(u32 hash, struct flow_keys *flow, int xmit_policy) 4155 { 4156 hash ^= (__force u32)flow_get_u32_dst(flow) ^ 4157 (__force u32)flow_get_u32_src(flow); 4158 hash ^= (hash >> 16); 4159 hash ^= (hash >> 8); 4160 4161 /* discard lowest hash bit to deal with the common even ports pattern */ 4162 if (xmit_policy == BOND_XMIT_POLICY_LAYER34 || 4163 xmit_policy == BOND_XMIT_POLICY_ENCAP34) 4164 return hash >> 1; 4165 4166 return hash; 4167 } 4168 4169 /* Generate hash based on xmit policy. If @skb is given it is used to linearize 4170 * the data as required, but this function can be used without it if the data is 4171 * known to be linear (e.g. with xdp_buff). 4172 */ 4173 static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data, 4174 __be16 l2_proto, int mhoff, int nhoff, int hlen) 4175 { 4176 struct flow_keys flow; 4177 u32 hash; 4178 4179 if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC) 4180 return bond_vlan_srcmac_hash(skb, data, mhoff, hlen); 4181 4182 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || 4183 !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow)) 4184 return bond_eth_hash(skb, data, mhoff, hlen); 4185 4186 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || 4187 bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) { 4188 hash = bond_eth_hash(skb, data, mhoff, hlen); 4189 } else { 4190 if (flow.icmp.id) 4191 memcpy(&hash, &flow.icmp, sizeof(hash)); 4192 else 4193 memcpy(&hash, &flow.ports.ports, sizeof(hash)); 4194 } 4195 4196 return bond_ip_hash(hash, &flow, bond->params.xmit_policy); 4197 } 4198 4199 /** 4200 * bond_xmit_hash - generate a hash value based on the xmit policy 4201 * @bond: bonding device 4202 * @skb: buffer to use for headers 4203 * 4204 * This function will extract the necessary headers from the skb buffer and use 4205 * them to generate a hash based on the xmit_policy set in the bonding device 4206 */ 4207 u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) 4208 { 4209 if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && 4210 skb->l4_hash) 4211 return skb->hash; 4212 4213 return __bond_xmit_hash(bond, skb, skb->data, skb->protocol, 4214 0, skb_network_offset(skb), 4215 skb_headlen(skb)); 4216 } 4217 4218 /** 4219 * bond_xmit_hash_xdp - generate a hash value based on the xmit policy 4220 * @bond: bonding device 4221 * @xdp: buffer to use for headers 4222 * 4223 * The XDP variant of bond_xmit_hash. 4224 */ 4225 static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp) 4226 { 4227 struct ethhdr *eth; 4228 4229 if (xdp->data + sizeof(struct ethhdr) > xdp->data_end) 4230 return 0; 4231 4232 eth = (struct ethhdr *)xdp->data; 4233 4234 return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0, 4235 sizeof(struct ethhdr), xdp->data_end - xdp->data); 4236 } 4237 4238 /*-------------------------- Device entry points ----------------------------*/ 4239 4240 void bond_work_init_all(struct bonding *bond) 4241 { 4242 INIT_DELAYED_WORK(&bond->mcast_work, 4243 bond_resend_igmp_join_requests_delayed); 4244 INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); 4245 INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); 4246 INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor); 4247 INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); 4248 INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler); 4249 } 4250 4251 static void bond_work_cancel_all(struct bonding *bond) 4252 { 4253 cancel_delayed_work_sync(&bond->mii_work); 4254 cancel_delayed_work_sync(&bond->arp_work); 4255 cancel_delayed_work_sync(&bond->alb_work); 4256 cancel_delayed_work_sync(&bond->ad_work); 4257 cancel_delayed_work_sync(&bond->mcast_work); 4258 cancel_delayed_work_sync(&bond->slave_arr_work); 4259 } 4260 4261 static int bond_open(struct net_device *bond_dev) 4262 { 4263 struct bonding *bond = netdev_priv(bond_dev); 4264 struct list_head *iter; 4265 struct slave *slave; 4266 4267 if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN && !bond->rr_tx_counter) { 4268 bond->rr_tx_counter = alloc_percpu(u32); 4269 if (!bond->rr_tx_counter) 4270 return -ENOMEM; 4271 } 4272 4273 /* reset slave->backup and slave->inactive */ 4274 if (bond_has_slaves(bond)) { 4275 bond_for_each_slave(bond, slave, iter) { 4276 if (bond_uses_primary(bond) && 4277 slave != rcu_access_pointer(bond->curr_active_slave)) { 4278 bond_set_slave_inactive_flags(slave, 4279 BOND_SLAVE_NOTIFY_NOW); 4280 } else if (BOND_MODE(bond) != BOND_MODE_8023AD) { 4281 bond_set_slave_active_flags(slave, 4282 BOND_SLAVE_NOTIFY_NOW); 4283 } 4284 } 4285 } 4286 4287 if (bond_is_lb(bond)) { 4288 /* bond_alb_initialize must be called before the timer 4289 * is started. 4290 */ 4291 if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB))) 4292 return -ENOMEM; 4293 if (bond->params.tlb_dynamic_lb || BOND_MODE(bond) == BOND_MODE_ALB) 4294 queue_delayed_work(bond->wq, &bond->alb_work, 0); 4295 } 4296 4297 if (bond->params.miimon) /* link check interval, in milliseconds. */ 4298 queue_delayed_work(bond->wq, &bond->mii_work, 0); 4299 4300 if (bond->params.arp_interval) { /* arp interval, in milliseconds. */ 4301 queue_delayed_work(bond->wq, &bond->arp_work, 0); 4302 bond->recv_probe = bond_rcv_validate; 4303 } 4304 4305 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 4306 queue_delayed_work(bond->wq, &bond->ad_work, 0); 4307 /* register to receive LACPDUs */ 4308 bond->recv_probe = bond_3ad_lacpdu_recv; 4309 bond_3ad_initiate_agg_selection(bond, 1); 4310 4311 bond_for_each_slave(bond, slave, iter) 4312 dev_mc_add(slave->dev, lacpdu_mcast_addr); 4313 } 4314 4315 if (bond_mode_can_use_xmit_hash(bond)) 4316 bond_update_slave_arr(bond, NULL); 4317 4318 return 0; 4319 } 4320 4321 static int bond_close(struct net_device *bond_dev) 4322 { 4323 struct bonding *bond = netdev_priv(bond_dev); 4324 struct slave *slave; 4325 4326 bond_work_cancel_all(bond); 4327 bond->send_peer_notif = 0; 4328 if (bond_is_lb(bond)) 4329 bond_alb_deinitialize(bond); 4330 bond->recv_probe = NULL; 4331 4332 if (bond_uses_primary(bond)) { 4333 rcu_read_lock(); 4334 slave = rcu_dereference(bond->curr_active_slave); 4335 if (slave) 4336 bond_hw_addr_flush(bond_dev, slave->dev); 4337 rcu_read_unlock(); 4338 } else { 4339 struct list_head *iter; 4340 4341 bond_for_each_slave(bond, slave, iter) 4342 bond_hw_addr_flush(bond_dev, slave->dev); 4343 } 4344 4345 return 0; 4346 } 4347 4348 /* fold stats, assuming all rtnl_link_stats64 fields are u64, but 4349 * that some drivers can provide 32bit values only. 4350 */ 4351 static void bond_fold_stats(struct rtnl_link_stats64 *_res, 4352 const struct rtnl_link_stats64 *_new, 4353 const struct rtnl_link_stats64 *_old) 4354 { 4355 const u64 *new = (const u64 *)_new; 4356 const u64 *old = (const u64 *)_old; 4357 u64 *res = (u64 *)_res; 4358 int i; 4359 4360 for (i = 0; i < sizeof(*_res) / sizeof(u64); i++) { 4361 u64 nv = new[i]; 4362 u64 ov = old[i]; 4363 s64 delta = nv - ov; 4364 4365 /* detects if this particular field is 32bit only */ 4366 if (((nv | ov) >> 32) == 0) 4367 delta = (s64)(s32)((u32)nv - (u32)ov); 4368 4369 /* filter anomalies, some drivers reset their stats 4370 * at down/up events. 4371 */ 4372 if (delta > 0) 4373 res[i] += delta; 4374 } 4375 } 4376 4377 #ifdef CONFIG_LOCKDEP 4378 static int bond_get_lowest_level_rcu(struct net_device *dev) 4379 { 4380 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; 4381 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; 4382 int cur = 0, max = 0; 4383 4384 now = dev; 4385 iter = &dev->adj_list.lower; 4386 4387 while (1) { 4388 next = NULL; 4389 while (1) { 4390 ldev = netdev_next_lower_dev_rcu(now, &iter); 4391 if (!ldev) 4392 break; 4393 4394 next = ldev; 4395 niter = &ldev->adj_list.lower; 4396 dev_stack[cur] = now; 4397 iter_stack[cur++] = iter; 4398 if (max <= cur) 4399 max = cur; 4400 break; 4401 } 4402 4403 if (!next) { 4404 if (!cur) 4405 return max; 4406 next = dev_stack[--cur]; 4407 niter = iter_stack[cur]; 4408 } 4409 4410 now = next; 4411 iter = niter; 4412 } 4413 4414 return max; 4415 } 4416 #endif 4417 4418 static void bond_get_stats(struct net_device *bond_dev, 4419 struct rtnl_link_stats64 *stats) 4420 { 4421 struct bonding *bond = netdev_priv(bond_dev); 4422 struct rtnl_link_stats64 temp; 4423 struct list_head *iter; 4424 struct slave *slave; 4425 int nest_level = 0; 4426 4427 4428 rcu_read_lock(); 4429 #ifdef CONFIG_LOCKDEP 4430 nest_level = bond_get_lowest_level_rcu(bond_dev); 4431 #endif 4432 4433 spin_lock_nested(&bond->stats_lock, nest_level); 4434 memcpy(stats, &bond->bond_stats, sizeof(*stats)); 4435 4436 bond_for_each_slave_rcu(bond, slave, iter) { 4437 const struct rtnl_link_stats64 *new = 4438 dev_get_stats(slave->dev, &temp); 4439 4440 bond_fold_stats(stats, new, &slave->slave_stats); 4441 4442 /* save off the slave stats for the next run */ 4443 memcpy(&slave->slave_stats, new, sizeof(*new)); 4444 } 4445 4446 memcpy(&bond->bond_stats, stats, sizeof(*stats)); 4447 spin_unlock(&bond->stats_lock); 4448 rcu_read_unlock(); 4449 } 4450 4451 static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 4452 { 4453 struct bonding *bond = netdev_priv(bond_dev); 4454 struct mii_ioctl_data *mii = NULL; 4455 4456 netdev_dbg(bond_dev, "bond_eth_ioctl: cmd=%d\n", cmd); 4457 4458 switch (cmd) { 4459 case SIOCGMIIPHY: 4460 mii = if_mii(ifr); 4461 if (!mii) 4462 return -EINVAL; 4463 4464 mii->phy_id = 0; 4465 fallthrough; 4466 case SIOCGMIIREG: 4467 /* We do this again just in case we were called by SIOCGMIIREG 4468 * instead of SIOCGMIIPHY. 4469 */ 4470 mii = if_mii(ifr); 4471 if (!mii) 4472 return -EINVAL; 4473 4474 if (mii->reg_num == 1) { 4475 mii->val_out = 0; 4476 if (netif_carrier_ok(bond->dev)) 4477 mii->val_out = BMSR_LSTATUS; 4478 } 4479 4480 break; 4481 default: 4482 return -EOPNOTSUPP; 4483 } 4484 4485 return 0; 4486 } 4487 4488 static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) 4489 { 4490 struct bonding *bond = netdev_priv(bond_dev); 4491 struct net_device *slave_dev = NULL; 4492 struct ifbond k_binfo; 4493 struct ifbond __user *u_binfo = NULL; 4494 struct ifslave k_sinfo; 4495 struct ifslave __user *u_sinfo = NULL; 4496 struct bond_opt_value newval; 4497 struct net *net; 4498 int res = 0; 4499 4500 netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd); 4501 4502 switch (cmd) { 4503 case SIOCBONDINFOQUERY: 4504 u_binfo = (struct ifbond __user *)ifr->ifr_data; 4505 4506 if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) 4507 return -EFAULT; 4508 4509 bond_info_query(bond_dev, &k_binfo); 4510 if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) 4511 return -EFAULT; 4512 4513 return 0; 4514 case SIOCBONDSLAVEINFOQUERY: 4515 u_sinfo = (struct ifslave __user *)ifr->ifr_data; 4516 4517 if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) 4518 return -EFAULT; 4519 4520 res = bond_slave_info_query(bond_dev, &k_sinfo); 4521 if (res == 0 && 4522 copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) 4523 return -EFAULT; 4524 4525 return res; 4526 default: 4527 break; 4528 } 4529 4530 net = dev_net(bond_dev); 4531 4532 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 4533 return -EPERM; 4534 4535 slave_dev = __dev_get_by_name(net, ifr->ifr_slave); 4536 4537 slave_dbg(bond_dev, slave_dev, "slave_dev=%p:\n", slave_dev); 4538 4539 if (!slave_dev) 4540 return -ENODEV; 4541 4542 switch (cmd) { 4543 case SIOCBONDENSLAVE: 4544 res = bond_enslave(bond_dev, slave_dev, NULL); 4545 break; 4546 case SIOCBONDRELEASE: 4547 res = bond_release(bond_dev, slave_dev); 4548 break; 4549 case SIOCBONDSETHWADDR: 4550 res = bond_set_dev_addr(bond_dev, slave_dev); 4551 break; 4552 case SIOCBONDCHANGEACTIVE: 4553 bond_opt_initstr(&newval, slave_dev->name); 4554 res = __bond_opt_set_notify(bond, BOND_OPT_ACTIVE_SLAVE, 4555 &newval); 4556 break; 4557 default: 4558 res = -EOPNOTSUPP; 4559 } 4560 4561 return res; 4562 } 4563 4564 static int bond_siocdevprivate(struct net_device *bond_dev, struct ifreq *ifr, 4565 void __user *data, int cmd) 4566 { 4567 struct ifreq ifrdata = { .ifr_data = data }; 4568 4569 switch (cmd) { 4570 case BOND_INFO_QUERY_OLD: 4571 return bond_do_ioctl(bond_dev, &ifrdata, SIOCBONDINFOQUERY); 4572 case BOND_SLAVE_INFO_QUERY_OLD: 4573 return bond_do_ioctl(bond_dev, &ifrdata, SIOCBONDSLAVEINFOQUERY); 4574 case BOND_ENSLAVE_OLD: 4575 return bond_do_ioctl(bond_dev, ifr, SIOCBONDENSLAVE); 4576 case BOND_RELEASE_OLD: 4577 return bond_do_ioctl(bond_dev, ifr, SIOCBONDRELEASE); 4578 case BOND_SETHWADDR_OLD: 4579 return bond_do_ioctl(bond_dev, ifr, SIOCBONDSETHWADDR); 4580 case BOND_CHANGE_ACTIVE_OLD: 4581 return bond_do_ioctl(bond_dev, ifr, SIOCBONDCHANGEACTIVE); 4582 } 4583 4584 return -EOPNOTSUPP; 4585 } 4586 4587 static void bond_change_rx_flags(struct net_device *bond_dev, int change) 4588 { 4589 struct bonding *bond = netdev_priv(bond_dev); 4590 4591 if (change & IFF_PROMISC) 4592 bond_set_promiscuity(bond, 4593 bond_dev->flags & IFF_PROMISC ? 1 : -1); 4594 4595 if (change & IFF_ALLMULTI) 4596 bond_set_allmulti(bond, 4597 bond_dev->flags & IFF_ALLMULTI ? 1 : -1); 4598 } 4599 4600 static void bond_set_rx_mode(struct net_device *bond_dev) 4601 { 4602 struct bonding *bond = netdev_priv(bond_dev); 4603 struct list_head *iter; 4604 struct slave *slave; 4605 4606 rcu_read_lock(); 4607 if (bond_uses_primary(bond)) { 4608 slave = rcu_dereference(bond->curr_active_slave); 4609 if (slave) { 4610 dev_uc_sync(slave->dev, bond_dev); 4611 dev_mc_sync(slave->dev, bond_dev); 4612 } 4613 } else { 4614 bond_for_each_slave_rcu(bond, slave, iter) { 4615 dev_uc_sync_multiple(slave->dev, bond_dev); 4616 dev_mc_sync_multiple(slave->dev, bond_dev); 4617 } 4618 } 4619 rcu_read_unlock(); 4620 } 4621 4622 static int bond_neigh_init(struct neighbour *n) 4623 { 4624 struct bonding *bond = netdev_priv(n->dev); 4625 const struct net_device_ops *slave_ops; 4626 struct neigh_parms parms; 4627 struct slave *slave; 4628 int ret = 0; 4629 4630 rcu_read_lock(); 4631 slave = bond_first_slave_rcu(bond); 4632 if (!slave) 4633 goto out; 4634 slave_ops = slave->dev->netdev_ops; 4635 if (!slave_ops->ndo_neigh_setup) 4636 goto out; 4637 4638 /* TODO: find another way [1] to implement this. 4639 * Passing a zeroed structure is fragile, 4640 * but at least we do not pass garbage. 4641 * 4642 * [1] One way would be that ndo_neigh_setup() never touch 4643 * struct neigh_parms, but propagate the new neigh_setup() 4644 * back to ___neigh_create() / neigh_parms_alloc() 4645 */ 4646 memset(&parms, 0, sizeof(parms)); 4647 ret = slave_ops->ndo_neigh_setup(slave->dev, &parms); 4648 4649 if (ret) 4650 goto out; 4651 4652 if (parms.neigh_setup) 4653 ret = parms.neigh_setup(n); 4654 out: 4655 rcu_read_unlock(); 4656 return ret; 4657 } 4658 4659 /* The bonding ndo_neigh_setup is called at init time beofre any 4660 * slave exists. So we must declare proxy setup function which will 4661 * be used at run time to resolve the actual slave neigh param setup. 4662 * 4663 * It's also called by master devices (such as vlans) to setup their 4664 * underlying devices. In that case - do nothing, we're already set up from 4665 * our init. 4666 */ 4667 static int bond_neigh_setup(struct net_device *dev, 4668 struct neigh_parms *parms) 4669 { 4670 /* modify only our neigh_parms */ 4671 if (parms->dev == dev) 4672 parms->neigh_setup = bond_neigh_init; 4673 4674 return 0; 4675 } 4676 4677 /* Change the MTU of all of a master's slaves to match the master */ 4678 static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) 4679 { 4680 struct bonding *bond = netdev_priv(bond_dev); 4681 struct slave *slave, *rollback_slave; 4682 struct list_head *iter; 4683 int res = 0; 4684 4685 netdev_dbg(bond_dev, "bond=%p, new_mtu=%d\n", bond, new_mtu); 4686 4687 bond_for_each_slave(bond, slave, iter) { 4688 slave_dbg(bond_dev, slave->dev, "s %p c_m %p\n", 4689 slave, slave->dev->netdev_ops->ndo_change_mtu); 4690 4691 res = dev_set_mtu(slave->dev, new_mtu); 4692 4693 if (res) { 4694 /* If we failed to set the slave's mtu to the new value 4695 * we must abort the operation even in ACTIVE_BACKUP 4696 * mode, because if we allow the backup slaves to have 4697 * different mtu values than the active slave we'll 4698 * need to change their mtu when doing a failover. That 4699 * means changing their mtu from timer context, which 4700 * is probably not a good idea. 4701 */ 4702 slave_dbg(bond_dev, slave->dev, "err %d setting mtu to %d\n", 4703 res, new_mtu); 4704 goto unwind; 4705 } 4706 } 4707 4708 bond_dev->mtu = new_mtu; 4709 4710 return 0; 4711 4712 unwind: 4713 /* unwind from head to the slave that failed */ 4714 bond_for_each_slave(bond, rollback_slave, iter) { 4715 int tmp_res; 4716 4717 if (rollback_slave == slave) 4718 break; 4719 4720 tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu); 4721 if (tmp_res) 4722 slave_dbg(bond_dev, rollback_slave->dev, "unwind err %d\n", 4723 tmp_res); 4724 } 4725 4726 return res; 4727 } 4728 4729 /* Change HW address 4730 * 4731 * Note that many devices must be down to change the HW address, and 4732 * downing the master releases all slaves. We can make bonds full of 4733 * bonding devices to test this, however. 4734 */ 4735 static int bond_set_mac_address(struct net_device *bond_dev, void *addr) 4736 { 4737 struct bonding *bond = netdev_priv(bond_dev); 4738 struct slave *slave, *rollback_slave; 4739 struct sockaddr_storage *ss = addr, tmp_ss; 4740 struct list_head *iter; 4741 int res = 0; 4742 4743 if (BOND_MODE(bond) == BOND_MODE_ALB) 4744 return bond_alb_set_mac_address(bond_dev, addr); 4745 4746 4747 netdev_dbg(bond_dev, "%s: bond=%p\n", __func__, bond); 4748 4749 /* If fail_over_mac is enabled, do nothing and return success. 4750 * Returning an error causes ifenslave to fail. 4751 */ 4752 if (bond->params.fail_over_mac && 4753 BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) 4754 return 0; 4755 4756 if (!is_valid_ether_addr(ss->__data)) 4757 return -EADDRNOTAVAIL; 4758 4759 bond_for_each_slave(bond, slave, iter) { 4760 slave_dbg(bond_dev, slave->dev, "%s: slave=%p\n", 4761 __func__, slave); 4762 res = dev_set_mac_address(slave->dev, addr, NULL); 4763 if (res) { 4764 /* TODO: consider downing the slave 4765 * and retry ? 4766 * User should expect communications 4767 * breakage anyway until ARP finish 4768 * updating, so... 4769 */ 4770 slave_dbg(bond_dev, slave->dev, "%s: err %d\n", 4771 __func__, res); 4772 goto unwind; 4773 } 4774 } 4775 4776 /* success */ 4777 dev_addr_set(bond_dev, ss->__data); 4778 return 0; 4779 4780 unwind: 4781 memcpy(tmp_ss.__data, bond_dev->dev_addr, bond_dev->addr_len); 4782 tmp_ss.ss_family = bond_dev->type; 4783 4784 /* unwind from head to the slave that failed */ 4785 bond_for_each_slave(bond, rollback_slave, iter) { 4786 int tmp_res; 4787 4788 if (rollback_slave == slave) 4789 break; 4790 4791 tmp_res = dev_set_mac_address(rollback_slave->dev, 4792 (struct sockaddr *)&tmp_ss, NULL); 4793 if (tmp_res) { 4794 slave_dbg(bond_dev, rollback_slave->dev, "%s: unwind err %d\n", 4795 __func__, tmp_res); 4796 } 4797 } 4798 4799 return res; 4800 } 4801 4802 /** 4803 * bond_get_slave_by_id - get xmit slave with slave_id 4804 * @bond: bonding device that is transmitting 4805 * @slave_id: slave id up to slave_cnt-1 through which to transmit 4806 * 4807 * This function tries to get slave with slave_id but in case 4808 * it fails, it tries to find the first available slave for transmission. 4809 */ 4810 static struct slave *bond_get_slave_by_id(struct bonding *bond, 4811 int slave_id) 4812 { 4813 struct list_head *iter; 4814 struct slave *slave; 4815 int i = slave_id; 4816 4817 /* Here we start from the slave with slave_id */ 4818 bond_for_each_slave_rcu(bond, slave, iter) { 4819 if (--i < 0) { 4820 if (bond_slave_can_tx(slave)) 4821 return slave; 4822 } 4823 } 4824 4825 /* Here we start from the first slave up to slave_id */ 4826 i = slave_id; 4827 bond_for_each_slave_rcu(bond, slave, iter) { 4828 if (--i < 0) 4829 break; 4830 if (bond_slave_can_tx(slave)) 4831 return slave; 4832 } 4833 /* no slave that can tx has been found */ 4834 return NULL; 4835 } 4836 4837 /** 4838 * bond_rr_gen_slave_id - generate slave id based on packets_per_slave 4839 * @bond: bonding device to use 4840 * 4841 * Based on the value of the bonding device's packets_per_slave parameter 4842 * this function generates a slave id, which is usually used as the next 4843 * slave to transmit through. 4844 */ 4845 static u32 bond_rr_gen_slave_id(struct bonding *bond) 4846 { 4847 u32 slave_id; 4848 struct reciprocal_value reciprocal_packets_per_slave; 4849 int packets_per_slave = bond->params.packets_per_slave; 4850 4851 switch (packets_per_slave) { 4852 case 0: 4853 slave_id = get_random_u32(); 4854 break; 4855 case 1: 4856 slave_id = this_cpu_inc_return(*bond->rr_tx_counter); 4857 break; 4858 default: 4859 reciprocal_packets_per_slave = 4860 bond->params.reciprocal_packets_per_slave; 4861 slave_id = this_cpu_inc_return(*bond->rr_tx_counter); 4862 slave_id = reciprocal_divide(slave_id, 4863 reciprocal_packets_per_slave); 4864 break; 4865 } 4866 4867 return slave_id; 4868 } 4869 4870 static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond, 4871 struct sk_buff *skb) 4872 { 4873 struct slave *slave; 4874 int slave_cnt; 4875 u32 slave_id; 4876 4877 /* Start with the curr_active_slave that joined the bond as the 4878 * default for sending IGMP traffic. For failover purposes one 4879 * needs to maintain some consistency for the interface that will 4880 * send the join/membership reports. The curr_active_slave found 4881 * will send all of this type of traffic. 4882 */ 4883 if (skb->protocol == htons(ETH_P_IP)) { 4884 int noff = skb_network_offset(skb); 4885 struct iphdr *iph; 4886 4887 if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) 4888 goto non_igmp; 4889 4890 iph = ip_hdr(skb); 4891 if (iph->protocol == IPPROTO_IGMP) { 4892 slave = rcu_dereference(bond->curr_active_slave); 4893 if (slave) 4894 return slave; 4895 return bond_get_slave_by_id(bond, 0); 4896 } 4897 } 4898 4899 non_igmp: 4900 slave_cnt = READ_ONCE(bond->slave_cnt); 4901 if (likely(slave_cnt)) { 4902 slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; 4903 return bond_get_slave_by_id(bond, slave_id); 4904 } 4905 return NULL; 4906 } 4907 4908 static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond, 4909 struct xdp_buff *xdp) 4910 { 4911 struct slave *slave; 4912 int slave_cnt; 4913 u32 slave_id; 4914 const struct ethhdr *eth; 4915 void *data = xdp->data; 4916 4917 if (data + sizeof(struct ethhdr) > xdp->data_end) 4918 goto non_igmp; 4919 4920 eth = (struct ethhdr *)data; 4921 data += sizeof(struct ethhdr); 4922 4923 /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */ 4924 if (eth->h_proto == htons(ETH_P_IP)) { 4925 const struct iphdr *iph; 4926 4927 if (data + sizeof(struct iphdr) > xdp->data_end) 4928 goto non_igmp; 4929 4930 iph = (struct iphdr *)data; 4931 4932 if (iph->protocol == IPPROTO_IGMP) { 4933 slave = rcu_dereference(bond->curr_active_slave); 4934 if (slave) 4935 return slave; 4936 return bond_get_slave_by_id(bond, 0); 4937 } 4938 } 4939 4940 non_igmp: 4941 slave_cnt = READ_ONCE(bond->slave_cnt); 4942 if (likely(slave_cnt)) { 4943 slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; 4944 return bond_get_slave_by_id(bond, slave_id); 4945 } 4946 return NULL; 4947 } 4948 4949 static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, 4950 struct net_device *bond_dev) 4951 { 4952 struct bonding *bond = netdev_priv(bond_dev); 4953 struct slave *slave; 4954 4955 slave = bond_xmit_roundrobin_slave_get(bond, skb); 4956 if (likely(slave)) 4957 return bond_dev_queue_xmit(bond, skb, slave->dev); 4958 4959 return bond_tx_drop(bond_dev, skb); 4960 } 4961 4962 static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond) 4963 { 4964 return rcu_dereference(bond->curr_active_slave); 4965 } 4966 4967 /* In active-backup mode, we know that bond->curr_active_slave is always valid if 4968 * the bond has a usable interface. 4969 */ 4970 static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, 4971 struct net_device *bond_dev) 4972 { 4973 struct bonding *bond = netdev_priv(bond_dev); 4974 struct slave *slave; 4975 4976 slave = bond_xmit_activebackup_slave_get(bond); 4977 if (slave) 4978 return bond_dev_queue_xmit(bond, skb, slave->dev); 4979 4980 return bond_tx_drop(bond_dev, skb); 4981 } 4982 4983 /* Use this to update slave_array when (a) it's not appropriate to update 4984 * slave_array right away (note that update_slave_array() may sleep) 4985 * and / or (b) RTNL is not held. 4986 */ 4987 void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay) 4988 { 4989 queue_delayed_work(bond->wq, &bond->slave_arr_work, delay); 4990 } 4991 4992 /* Slave array work handler. Holds only RTNL */ 4993 static void bond_slave_arr_handler(struct work_struct *work) 4994 { 4995 struct bonding *bond = container_of(work, struct bonding, 4996 slave_arr_work.work); 4997 int ret; 4998 4999 if (!rtnl_trylock()) 5000 goto err; 5001 5002 ret = bond_update_slave_arr(bond, NULL); 5003 rtnl_unlock(); 5004 if (ret) { 5005 pr_warn_ratelimited("Failed to update slave array from WT\n"); 5006 goto err; 5007 } 5008 return; 5009 5010 err: 5011 bond_slave_arr_work_rearm(bond, 1); 5012 } 5013 5014 static void bond_skip_slave(struct bond_up_slave *slaves, 5015 struct slave *skipslave) 5016 { 5017 int idx; 5018 5019 /* Rare situation where caller has asked to skip a specific 5020 * slave but allocation failed (most likely!). BTW this is 5021 * only possible when the call is initiated from 5022 * __bond_release_one(). In this situation; overwrite the 5023 * skipslave entry in the array with the last entry from the 5024 * array to avoid a situation where the xmit path may choose 5025 * this to-be-skipped slave to send a packet out. 5026 */ 5027 for (idx = 0; slaves && idx < slaves->count; idx++) { 5028 if (skipslave == slaves->arr[idx]) { 5029 slaves->arr[idx] = 5030 slaves->arr[slaves->count - 1]; 5031 slaves->count--; 5032 break; 5033 } 5034 } 5035 } 5036 5037 static void bond_set_slave_arr(struct bonding *bond, 5038 struct bond_up_slave *usable_slaves, 5039 struct bond_up_slave *all_slaves) 5040 { 5041 struct bond_up_slave *usable, *all; 5042 5043 usable = rtnl_dereference(bond->usable_slaves); 5044 rcu_assign_pointer(bond->usable_slaves, usable_slaves); 5045 kfree_rcu(usable, rcu); 5046 5047 all = rtnl_dereference(bond->all_slaves); 5048 rcu_assign_pointer(bond->all_slaves, all_slaves); 5049 kfree_rcu(all, rcu); 5050 } 5051 5052 static void bond_reset_slave_arr(struct bonding *bond) 5053 { 5054 bond_set_slave_arr(bond, NULL, NULL); 5055 } 5056 5057 /* Build the usable slaves array in control path for modes that use xmit-hash 5058 * to determine the slave interface - 5059 * (a) BOND_MODE_8023AD 5060 * (b) BOND_MODE_XOR 5061 * (c) (BOND_MODE_TLB || BOND_MODE_ALB) && tlb_dynamic_lb == 0 5062 * 5063 * The caller is expected to hold RTNL only and NO other lock! 5064 */ 5065 int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) 5066 { 5067 struct bond_up_slave *usable_slaves = NULL, *all_slaves = NULL; 5068 struct slave *slave; 5069 struct list_head *iter; 5070 int agg_id = 0; 5071 int ret = 0; 5072 5073 might_sleep(); 5074 5075 usable_slaves = kzalloc(struct_size(usable_slaves, arr, 5076 bond->slave_cnt), GFP_KERNEL); 5077 all_slaves = kzalloc(struct_size(all_slaves, arr, 5078 bond->slave_cnt), GFP_KERNEL); 5079 if (!usable_slaves || !all_slaves) { 5080 ret = -ENOMEM; 5081 goto out; 5082 } 5083 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 5084 struct ad_info ad_info; 5085 5086 spin_lock_bh(&bond->mode_lock); 5087 if (bond_3ad_get_active_agg_info(bond, &ad_info)) { 5088 spin_unlock_bh(&bond->mode_lock); 5089 pr_debug("bond_3ad_get_active_agg_info failed\n"); 5090 /* No active aggragator means it's not safe to use 5091 * the previous array. 5092 */ 5093 bond_reset_slave_arr(bond); 5094 goto out; 5095 } 5096 spin_unlock_bh(&bond->mode_lock); 5097 agg_id = ad_info.aggregator_id; 5098 } 5099 bond_for_each_slave(bond, slave, iter) { 5100 if (skipslave == slave) 5101 continue; 5102 5103 all_slaves->arr[all_slaves->count++] = slave; 5104 if (BOND_MODE(bond) == BOND_MODE_8023AD) { 5105 struct aggregator *agg; 5106 5107 agg = SLAVE_AD_INFO(slave)->port.aggregator; 5108 if (!agg || agg->aggregator_identifier != agg_id) 5109 continue; 5110 } 5111 if (!bond_slave_can_tx(slave)) 5112 continue; 5113 5114 slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n", 5115 usable_slaves->count); 5116 5117 usable_slaves->arr[usable_slaves->count++] = slave; 5118 } 5119 5120 bond_set_slave_arr(bond, usable_slaves, all_slaves); 5121 return ret; 5122 out: 5123 if (ret != 0 && skipslave) { 5124 bond_skip_slave(rtnl_dereference(bond->all_slaves), 5125 skipslave); 5126 bond_skip_slave(rtnl_dereference(bond->usable_slaves), 5127 skipslave); 5128 } 5129 kfree_rcu(all_slaves, rcu); 5130 kfree_rcu(usable_slaves, rcu); 5131 5132 return ret; 5133 } 5134 5135 static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, 5136 struct sk_buff *skb, 5137 struct bond_up_slave *slaves) 5138 { 5139 struct slave *slave; 5140 unsigned int count; 5141 u32 hash; 5142 5143 hash = bond_xmit_hash(bond, skb); 5144 count = slaves ? READ_ONCE(slaves->count) : 0; 5145 if (unlikely(!count)) 5146 return NULL; 5147 5148 slave = slaves->arr[hash % count]; 5149 return slave; 5150 } 5151 5152 static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond, 5153 struct xdp_buff *xdp) 5154 { 5155 struct bond_up_slave *slaves; 5156 unsigned int count; 5157 u32 hash; 5158 5159 hash = bond_xmit_hash_xdp(bond, xdp); 5160 slaves = rcu_dereference(bond->usable_slaves); 5161 count = slaves ? READ_ONCE(slaves->count) : 0; 5162 if (unlikely(!count)) 5163 return NULL; 5164 5165 return slaves->arr[hash % count]; 5166 } 5167 5168 /* Use this Xmit function for 3AD as well as XOR modes. The current 5169 * usable slave array is formed in the control path. The xmit function 5170 * just calculates hash and sends the packet out. 5171 */ 5172 static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb, 5173 struct net_device *dev) 5174 { 5175 struct bonding *bond = netdev_priv(dev); 5176 struct bond_up_slave *slaves; 5177 struct slave *slave; 5178 5179 slaves = rcu_dereference(bond->usable_slaves); 5180 slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); 5181 if (likely(slave)) 5182 return bond_dev_queue_xmit(bond, skb, slave->dev); 5183 5184 return bond_tx_drop(dev, skb); 5185 } 5186 5187 /* in broadcast mode, we send everything to all usable interfaces. */ 5188 static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, 5189 struct net_device *bond_dev) 5190 { 5191 struct bonding *bond = netdev_priv(bond_dev); 5192 struct slave *slave = NULL; 5193 struct list_head *iter; 5194 bool xmit_suc = false; 5195 bool skb_used = false; 5196 5197 bond_for_each_slave_rcu(bond, slave, iter) { 5198 struct sk_buff *skb2; 5199 5200 if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) 5201 continue; 5202 5203 if (bond_is_last_slave(bond, slave)) { 5204 skb2 = skb; 5205 skb_used = true; 5206 } else { 5207 skb2 = skb_clone(skb, GFP_ATOMIC); 5208 if (!skb2) { 5209 net_err_ratelimited("%s: Error: %s: skb_clone() failed\n", 5210 bond_dev->name, __func__); 5211 continue; 5212 } 5213 } 5214 5215 if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK) 5216 xmit_suc = true; 5217 } 5218 5219 if (!skb_used) 5220 dev_kfree_skb_any(skb); 5221 5222 if (xmit_suc) 5223 return NETDEV_TX_OK; 5224 5225 dev_core_stats_tx_dropped_inc(bond_dev); 5226 return NET_XMIT_DROP; 5227 } 5228 5229 /*------------------------- Device initialization ---------------------------*/ 5230 5231 /* Lookup the slave that corresponds to a qid */ 5232 static inline int bond_slave_override(struct bonding *bond, 5233 struct sk_buff *skb) 5234 { 5235 struct slave *slave = NULL; 5236 struct list_head *iter; 5237 5238 if (!skb_rx_queue_recorded(skb)) 5239 return 1; 5240 5241 /* Find out if any slaves have the same mapping as this skb. */ 5242 bond_for_each_slave_rcu(bond, slave, iter) { 5243 if (slave->queue_id == skb_get_queue_mapping(skb)) { 5244 if (bond_slave_is_up(slave) && 5245 slave->link == BOND_LINK_UP) { 5246 bond_dev_queue_xmit(bond, skb, slave->dev); 5247 return 0; 5248 } 5249 /* If the slave isn't UP, use default transmit policy. */ 5250 break; 5251 } 5252 } 5253 5254 return 1; 5255 } 5256 5257 5258 static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, 5259 struct net_device *sb_dev) 5260 { 5261 /* This helper function exists to help dev_pick_tx get the correct 5262 * destination queue. Using a helper function skips a call to 5263 * skb_tx_hash and will put the skbs in the queue we expect on their 5264 * way down to the bonding driver. 5265 */ 5266 u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; 5267 5268 /* Save the original txq to restore before passing to the driver */ 5269 qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb_get_queue_mapping(skb); 5270 5271 if (unlikely(txq >= dev->real_num_tx_queues)) { 5272 do { 5273 txq -= dev->real_num_tx_queues; 5274 } while (txq >= dev->real_num_tx_queues); 5275 } 5276 return txq; 5277 } 5278 5279 static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, 5280 struct sk_buff *skb, 5281 bool all_slaves) 5282 { 5283 struct bonding *bond = netdev_priv(master_dev); 5284 struct bond_up_slave *slaves; 5285 struct slave *slave = NULL; 5286 5287 switch (BOND_MODE(bond)) { 5288 case BOND_MODE_ROUNDROBIN: 5289 slave = bond_xmit_roundrobin_slave_get(bond, skb); 5290 break; 5291 case BOND_MODE_ACTIVEBACKUP: 5292 slave = bond_xmit_activebackup_slave_get(bond); 5293 break; 5294 case BOND_MODE_8023AD: 5295 case BOND_MODE_XOR: 5296 if (all_slaves) 5297 slaves = rcu_dereference(bond->all_slaves); 5298 else 5299 slaves = rcu_dereference(bond->usable_slaves); 5300 slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); 5301 break; 5302 case BOND_MODE_BROADCAST: 5303 break; 5304 case BOND_MODE_ALB: 5305 slave = bond_xmit_alb_slave_get(bond, skb); 5306 break; 5307 case BOND_MODE_TLB: 5308 slave = bond_xmit_tlb_slave_get(bond, skb); 5309 break; 5310 default: 5311 /* Should never happen, mode already checked */ 5312 WARN_ONCE(true, "Unknown bonding mode"); 5313 break; 5314 } 5315 5316 if (slave) 5317 return slave->dev; 5318 return NULL; 5319 } 5320 5321 static void bond_sk_to_flow(struct sock *sk, struct flow_keys *flow) 5322 { 5323 switch (sk->sk_family) { 5324 #if IS_ENABLED(CONFIG_IPV6) 5325 case AF_INET6: 5326 if (ipv6_only_sock(sk) || 5327 ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) { 5328 flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 5329 flow->addrs.v6addrs.src = inet6_sk(sk)->saddr; 5330 flow->addrs.v6addrs.dst = sk->sk_v6_daddr; 5331 break; 5332 } 5333 fallthrough; 5334 #endif 5335 default: /* AF_INET */ 5336 flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 5337 flow->addrs.v4addrs.src = inet_sk(sk)->inet_rcv_saddr; 5338 flow->addrs.v4addrs.dst = inet_sk(sk)->inet_daddr; 5339 break; 5340 } 5341 5342 flow->ports.src = inet_sk(sk)->inet_sport; 5343 flow->ports.dst = inet_sk(sk)->inet_dport; 5344 } 5345 5346 /** 5347 * bond_sk_hash_l34 - generate a hash value based on the socket's L3 and L4 fields 5348 * @sk: socket to use for headers 5349 * 5350 * This function will extract the necessary field from the socket and use 5351 * them to generate a hash based on the LAYER34 xmit_policy. 5352 * Assumes that sk is a TCP or UDP socket. 5353 */ 5354 static u32 bond_sk_hash_l34(struct sock *sk) 5355 { 5356 struct flow_keys flow; 5357 u32 hash; 5358 5359 bond_sk_to_flow(sk, &flow); 5360 5361 /* L4 */ 5362 memcpy(&hash, &flow.ports.ports, sizeof(hash)); 5363 /* L3 */ 5364 return bond_ip_hash(hash, &flow, BOND_XMIT_POLICY_LAYER34); 5365 } 5366 5367 static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond, 5368 struct sock *sk) 5369 { 5370 struct bond_up_slave *slaves; 5371 struct slave *slave; 5372 unsigned int count; 5373 u32 hash; 5374 5375 slaves = rcu_dereference(bond->usable_slaves); 5376 count = slaves ? READ_ONCE(slaves->count) : 0; 5377 if (unlikely(!count)) 5378 return NULL; 5379 5380 hash = bond_sk_hash_l34(sk); 5381 slave = slaves->arr[hash % count]; 5382 5383 return slave->dev; 5384 } 5385 5386 static struct net_device *bond_sk_get_lower_dev(struct net_device *dev, 5387 struct sock *sk) 5388 { 5389 struct bonding *bond = netdev_priv(dev); 5390 struct net_device *lower = NULL; 5391 5392 rcu_read_lock(); 5393 if (bond_sk_check(bond)) 5394 lower = __bond_sk_get_lower_dev(bond, sk); 5395 rcu_read_unlock(); 5396 5397 return lower; 5398 } 5399 5400 #if IS_ENABLED(CONFIG_TLS_DEVICE) 5401 static netdev_tx_t bond_tls_device_xmit(struct bonding *bond, struct sk_buff *skb, 5402 struct net_device *dev) 5403 { 5404 struct net_device *tls_netdev = rcu_dereference(tls_get_ctx(skb->sk)->netdev); 5405 5406 /* tls_netdev might become NULL, even if tls_is_skb_tx_device_offloaded 5407 * was true, if tls_device_down is running in parallel, but it's OK, 5408 * because bond_get_slave_by_dev has a NULL check. 5409 */ 5410 if (likely(bond_get_slave_by_dev(bond, tls_netdev))) 5411 return bond_dev_queue_xmit(bond, skb, tls_netdev); 5412 return bond_tx_drop(dev, skb); 5413 } 5414 #endif 5415 5416 static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) 5417 { 5418 struct bonding *bond = netdev_priv(dev); 5419 5420 if (bond_should_override_tx_queue(bond) && 5421 !bond_slave_override(bond, skb)) 5422 return NETDEV_TX_OK; 5423 5424 #if IS_ENABLED(CONFIG_TLS_DEVICE) 5425 if (tls_is_skb_tx_device_offloaded(skb)) 5426 return bond_tls_device_xmit(bond, skb, dev); 5427 #endif 5428 5429 switch (BOND_MODE(bond)) { 5430 case BOND_MODE_ROUNDROBIN: 5431 return bond_xmit_roundrobin(skb, dev); 5432 case BOND_MODE_ACTIVEBACKUP: 5433 return bond_xmit_activebackup(skb, dev); 5434 case BOND_MODE_8023AD: 5435 case BOND_MODE_XOR: 5436 return bond_3ad_xor_xmit(skb, dev); 5437 case BOND_MODE_BROADCAST: 5438 return bond_xmit_broadcast(skb, dev); 5439 case BOND_MODE_ALB: 5440 return bond_alb_xmit(skb, dev); 5441 case BOND_MODE_TLB: 5442 return bond_tlb_xmit(skb, dev); 5443 default: 5444 /* Should never happen, mode already checked */ 5445 netdev_err(dev, "Unknown bonding mode %d\n", BOND_MODE(bond)); 5446 WARN_ON_ONCE(1); 5447 return bond_tx_drop(dev, skb); 5448 } 5449 } 5450 5451 static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) 5452 { 5453 struct bonding *bond = netdev_priv(dev); 5454 netdev_tx_t ret = NETDEV_TX_OK; 5455 5456 /* If we risk deadlock from transmitting this in the 5457 * netpoll path, tell netpoll to queue the frame for later tx 5458 */ 5459 if (unlikely(is_netpoll_tx_blocked(dev))) 5460 return NETDEV_TX_BUSY; 5461 5462 rcu_read_lock(); 5463 if (bond_has_slaves(bond)) 5464 ret = __bond_start_xmit(skb, dev); 5465 else 5466 ret = bond_tx_drop(dev, skb); 5467 rcu_read_unlock(); 5468 5469 return ret; 5470 } 5471 5472 static struct net_device * 5473 bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp) 5474 { 5475 struct bonding *bond = netdev_priv(bond_dev); 5476 struct slave *slave; 5477 5478 /* Caller needs to hold rcu_read_lock() */ 5479 5480 switch (BOND_MODE(bond)) { 5481 case BOND_MODE_ROUNDROBIN: 5482 slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp); 5483 break; 5484 5485 case BOND_MODE_ACTIVEBACKUP: 5486 slave = bond_xmit_activebackup_slave_get(bond); 5487 break; 5488 5489 case BOND_MODE_8023AD: 5490 case BOND_MODE_XOR: 5491 slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp); 5492 break; 5493 5494 default: 5495 /* Should never happen. Mode guarded by bond_xdp_check() */ 5496 netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond)); 5497 WARN_ON_ONCE(1); 5498 return NULL; 5499 } 5500 5501 if (slave) 5502 return slave->dev; 5503 5504 return NULL; 5505 } 5506 5507 static int bond_xdp_xmit(struct net_device *bond_dev, 5508 int n, struct xdp_frame **frames, u32 flags) 5509 { 5510 int nxmit, err = -ENXIO; 5511 5512 rcu_read_lock(); 5513 5514 for (nxmit = 0; nxmit < n; nxmit++) { 5515 struct xdp_frame *frame = frames[nxmit]; 5516 struct xdp_frame *frames1[] = {frame}; 5517 struct net_device *slave_dev; 5518 struct xdp_buff xdp; 5519 5520 xdp_convert_frame_to_buff(frame, &xdp); 5521 5522 slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp); 5523 if (!slave_dev) { 5524 err = -ENXIO; 5525 break; 5526 } 5527 5528 err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags); 5529 if (err < 1) 5530 break; 5531 } 5532 5533 rcu_read_unlock(); 5534 5535 /* If error happened on the first frame then we can pass the error up, otherwise 5536 * report the number of frames that were xmitted. 5537 */ 5538 if (err < 0) 5539 return (nxmit == 0 ? err : nxmit); 5540 5541 return nxmit; 5542 } 5543 5544 static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog, 5545 struct netlink_ext_ack *extack) 5546 { 5547 struct bonding *bond = netdev_priv(dev); 5548 struct list_head *iter; 5549 struct slave *slave, *rollback_slave; 5550 struct bpf_prog *old_prog; 5551 struct netdev_bpf xdp = { 5552 .command = XDP_SETUP_PROG, 5553 .flags = 0, 5554 .prog = prog, 5555 .extack = extack, 5556 }; 5557 int err; 5558 5559 ASSERT_RTNL(); 5560 5561 if (!bond_xdp_check(bond)) 5562 return -EOPNOTSUPP; 5563 5564 old_prog = bond->xdp_prog; 5565 bond->xdp_prog = prog; 5566 5567 bond_for_each_slave(bond, slave, iter) { 5568 struct net_device *slave_dev = slave->dev; 5569 5570 if (!slave_dev->netdev_ops->ndo_bpf || 5571 !slave_dev->netdev_ops->ndo_xdp_xmit) { 5572 SLAVE_NL_ERR(dev, slave_dev, extack, 5573 "Slave device does not support XDP"); 5574 err = -EOPNOTSUPP; 5575 goto err; 5576 } 5577 5578 if (dev_xdp_prog_count(slave_dev) > 0) { 5579 SLAVE_NL_ERR(dev, slave_dev, extack, 5580 "Slave has XDP program loaded, please unload before enslaving"); 5581 err = -EOPNOTSUPP; 5582 goto err; 5583 } 5584 5585 err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); 5586 if (err < 0) { 5587 /* ndo_bpf() sets extack error message */ 5588 slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err); 5589 goto err; 5590 } 5591 if (prog) 5592 bpf_prog_inc(prog); 5593 } 5594 5595 if (prog) { 5596 static_branch_inc(&bpf_master_redirect_enabled_key); 5597 } else if (old_prog) { 5598 bpf_prog_put(old_prog); 5599 static_branch_dec(&bpf_master_redirect_enabled_key); 5600 } 5601 5602 return 0; 5603 5604 err: 5605 /* unwind the program changes */ 5606 bond->xdp_prog = old_prog; 5607 xdp.prog = old_prog; 5608 xdp.extack = NULL; /* do not overwrite original error */ 5609 5610 bond_for_each_slave(bond, rollback_slave, iter) { 5611 struct net_device *slave_dev = rollback_slave->dev; 5612 int err_unwind; 5613 5614 if (slave == rollback_slave) 5615 break; 5616 5617 err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); 5618 if (err_unwind < 0) 5619 slave_err(dev, slave_dev, 5620 "Error %d when unwinding XDP program change\n", err_unwind); 5621 else if (xdp.prog) 5622 bpf_prog_inc(xdp.prog); 5623 } 5624 return err; 5625 } 5626 5627 static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp) 5628 { 5629 switch (xdp->command) { 5630 case XDP_SETUP_PROG: 5631 return bond_xdp_set(dev, xdp->prog, xdp->extack); 5632 default: 5633 return -EINVAL; 5634 } 5635 } 5636 5637 static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed) 5638 { 5639 if (speed == 0 || speed == SPEED_UNKNOWN) 5640 speed = slave->speed; 5641 else 5642 speed = min(speed, slave->speed); 5643 5644 return speed; 5645 } 5646 5647 /* Set the BOND_PHC_INDEX flag to notify user space */ 5648 static int bond_set_phc_index_flag(struct kernel_hwtstamp_config *kernel_cfg) 5649 { 5650 struct ifreq *ifr = kernel_cfg->ifr; 5651 struct hwtstamp_config cfg; 5652 5653 if (kernel_cfg->copied_to_user) { 5654 /* Lower device has a legacy implementation */ 5655 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) 5656 return -EFAULT; 5657 5658 cfg.flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX; 5659 if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg))) 5660 return -EFAULT; 5661 } else { 5662 kernel_cfg->flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX; 5663 } 5664 5665 return 0; 5666 } 5667 5668 static int bond_hwtstamp_get(struct net_device *dev, 5669 struct kernel_hwtstamp_config *cfg) 5670 { 5671 struct bonding *bond = netdev_priv(dev); 5672 struct net_device *real_dev; 5673 int err; 5674 5675 real_dev = bond_option_active_slave_get_rcu(bond); 5676 if (!real_dev) 5677 return -EOPNOTSUPP; 5678 5679 err = generic_hwtstamp_get_lower(real_dev, cfg); 5680 if (err) 5681 return err; 5682 5683 return bond_set_phc_index_flag(cfg); 5684 } 5685 5686 static int bond_hwtstamp_set(struct net_device *dev, 5687 struct kernel_hwtstamp_config *cfg, 5688 struct netlink_ext_ack *extack) 5689 { 5690 struct bonding *bond = netdev_priv(dev); 5691 struct net_device *real_dev; 5692 int err; 5693 5694 if (!(cfg->flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX)) 5695 return -EOPNOTSUPP; 5696 5697 real_dev = bond_option_active_slave_get_rcu(bond); 5698 if (!real_dev) 5699 return -EOPNOTSUPP; 5700 5701 err = generic_hwtstamp_set_lower(real_dev, cfg, extack); 5702 if (err) 5703 return err; 5704 5705 return bond_set_phc_index_flag(cfg); 5706 } 5707 5708 static int bond_ethtool_get_link_ksettings(struct net_device *bond_dev, 5709 struct ethtool_link_ksettings *cmd) 5710 { 5711 struct bonding *bond = netdev_priv(bond_dev); 5712 struct list_head *iter; 5713 struct slave *slave; 5714 u32 speed = 0; 5715 5716 cmd->base.duplex = DUPLEX_UNKNOWN; 5717 cmd->base.port = PORT_OTHER; 5718 5719 /* Since bond_slave_can_tx returns false for all inactive or down slaves, we 5720 * do not need to check mode. Though link speed might not represent 5721 * the true receive or transmit bandwidth (not all modes are symmetric) 5722 * this is an accurate maximum. 5723 */ 5724 bond_for_each_slave(bond, slave, iter) { 5725 if (bond_slave_can_tx(slave)) { 5726 bond_update_speed_duplex(slave); 5727 if (slave->speed != SPEED_UNKNOWN) { 5728 if (BOND_MODE(bond) == BOND_MODE_BROADCAST) 5729 speed = bond_mode_bcast_speed(slave, 5730 speed); 5731 else 5732 speed += slave->speed; 5733 } 5734 if (cmd->base.duplex == DUPLEX_UNKNOWN && 5735 slave->duplex != DUPLEX_UNKNOWN) 5736 cmd->base.duplex = slave->duplex; 5737 } 5738 } 5739 cmd->base.speed = speed ? : SPEED_UNKNOWN; 5740 5741 return 0; 5742 } 5743 5744 static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, 5745 struct ethtool_drvinfo *drvinfo) 5746 { 5747 strscpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); 5748 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d", 5749 BOND_ABI_VERSION); 5750 } 5751 5752 static int bond_ethtool_get_ts_info(struct net_device *bond_dev, 5753 struct ethtool_ts_info *info) 5754 { 5755 struct bonding *bond = netdev_priv(bond_dev); 5756 struct ethtool_ts_info ts_info; 5757 const struct ethtool_ops *ops; 5758 struct net_device *real_dev; 5759 bool sw_tx_support = false; 5760 struct phy_device *phydev; 5761 struct list_head *iter; 5762 struct slave *slave; 5763 int ret = 0; 5764 5765 rcu_read_lock(); 5766 real_dev = bond_option_active_slave_get_rcu(bond); 5767 dev_hold(real_dev); 5768 rcu_read_unlock(); 5769 5770 if (real_dev) { 5771 ops = real_dev->ethtool_ops; 5772 phydev = real_dev->phydev; 5773 5774 if (phy_has_tsinfo(phydev)) { 5775 ret = phy_ts_info(phydev, info); 5776 goto out; 5777 } else if (ops->get_ts_info) { 5778 ret = ops->get_ts_info(real_dev, info); 5779 goto out; 5780 } 5781 } else { 5782 /* Check if all slaves support software tx timestamping */ 5783 rcu_read_lock(); 5784 bond_for_each_slave_rcu(bond, slave, iter) { 5785 ret = -1; 5786 ops = slave->dev->ethtool_ops; 5787 phydev = slave->dev->phydev; 5788 5789 if (phy_has_tsinfo(phydev)) 5790 ret = phy_ts_info(phydev, &ts_info); 5791 else if (ops->get_ts_info) 5792 ret = ops->get_ts_info(slave->dev, &ts_info); 5793 5794 if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_TX_SOFTWARE)) { 5795 sw_tx_support = true; 5796 continue; 5797 } 5798 5799 sw_tx_support = false; 5800 break; 5801 } 5802 rcu_read_unlock(); 5803 } 5804 5805 ret = 0; 5806 info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | 5807 SOF_TIMESTAMPING_SOFTWARE; 5808 if (sw_tx_support) 5809 info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE; 5810 5811 info->phc_index = -1; 5812 5813 out: 5814 dev_put(real_dev); 5815 return ret; 5816 } 5817 5818 static const struct ethtool_ops bond_ethtool_ops = { 5819 .get_drvinfo = bond_ethtool_get_drvinfo, 5820 .get_link = ethtool_op_get_link, 5821 .get_link_ksettings = bond_ethtool_get_link_ksettings, 5822 .get_ts_info = bond_ethtool_get_ts_info, 5823 }; 5824 5825 static const struct net_device_ops bond_netdev_ops = { 5826 .ndo_init = bond_init, 5827 .ndo_uninit = bond_uninit, 5828 .ndo_open = bond_open, 5829 .ndo_stop = bond_close, 5830 .ndo_start_xmit = bond_start_xmit, 5831 .ndo_select_queue = bond_select_queue, 5832 .ndo_get_stats64 = bond_get_stats, 5833 .ndo_eth_ioctl = bond_eth_ioctl, 5834 .ndo_siocbond = bond_do_ioctl, 5835 .ndo_siocdevprivate = bond_siocdevprivate, 5836 .ndo_change_rx_flags = bond_change_rx_flags, 5837 .ndo_set_rx_mode = bond_set_rx_mode, 5838 .ndo_change_mtu = bond_change_mtu, 5839 .ndo_set_mac_address = bond_set_mac_address, 5840 .ndo_neigh_setup = bond_neigh_setup, 5841 .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, 5842 .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, 5843 #ifdef CONFIG_NET_POLL_CONTROLLER 5844 .ndo_netpoll_setup = bond_netpoll_setup, 5845 .ndo_netpoll_cleanup = bond_netpoll_cleanup, 5846 .ndo_poll_controller = bond_poll_controller, 5847 #endif 5848 .ndo_add_slave = bond_enslave, 5849 .ndo_del_slave = bond_release, 5850 .ndo_fix_features = bond_fix_features, 5851 .ndo_features_check = passthru_features_check, 5852 .ndo_get_xmit_slave = bond_xmit_get_slave, 5853 .ndo_sk_get_lower_dev = bond_sk_get_lower_dev, 5854 .ndo_bpf = bond_xdp, 5855 .ndo_xdp_xmit = bond_xdp_xmit, 5856 .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave, 5857 .ndo_hwtstamp_get = bond_hwtstamp_get, 5858 .ndo_hwtstamp_set = bond_hwtstamp_set, 5859 }; 5860 5861 static const struct device_type bond_type = { 5862 .name = "bond", 5863 }; 5864 5865 static void bond_destructor(struct net_device *bond_dev) 5866 { 5867 struct bonding *bond = netdev_priv(bond_dev); 5868 5869 if (bond->wq) 5870 destroy_workqueue(bond->wq); 5871 5872 free_percpu(bond->rr_tx_counter); 5873 } 5874 5875 void bond_setup(struct net_device *bond_dev) 5876 { 5877 struct bonding *bond = netdev_priv(bond_dev); 5878 5879 spin_lock_init(&bond->mode_lock); 5880 bond->params = bonding_defaults; 5881 5882 /* Initialize pointers */ 5883 bond->dev = bond_dev; 5884 5885 /* Initialize the device entry points */ 5886 ether_setup(bond_dev); 5887 bond_dev->max_mtu = ETH_MAX_MTU; 5888 bond_dev->netdev_ops = &bond_netdev_ops; 5889 bond_dev->ethtool_ops = &bond_ethtool_ops; 5890 5891 bond_dev->needs_free_netdev = true; 5892 bond_dev->priv_destructor = bond_destructor; 5893 5894 SET_NETDEV_DEVTYPE(bond_dev, &bond_type); 5895 5896 /* Initialize the device options */ 5897 bond_dev->flags |= IFF_MASTER; 5898 bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT | IFF_NO_QUEUE; 5899 bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); 5900 5901 #ifdef CONFIG_XFRM_OFFLOAD 5902 /* set up xfrm device ops (only supported in active-backup right now) */ 5903 bond_dev->xfrmdev_ops = &bond_xfrmdev_ops; 5904 INIT_LIST_HEAD(&bond->ipsec_list); 5905 spin_lock_init(&bond->ipsec_lock); 5906 #endif /* CONFIG_XFRM_OFFLOAD */ 5907 5908 /* don't acquire bond device's netif_tx_lock when transmitting */ 5909 bond_dev->features |= NETIF_F_LLTX; 5910 5911 /* By default, we declare the bond to be fully 5912 * VLAN hardware accelerated capable. Special 5913 * care is taken in the various xmit functions 5914 * when there are slaves that are not hw accel 5915 * capable 5916 */ 5917 5918 /* Don't allow bond devices to change network namespaces. */ 5919 bond_dev->features |= NETIF_F_NETNS_LOCAL; 5920 5921 bond_dev->hw_features = BOND_VLAN_FEATURES | 5922 NETIF_F_HW_VLAN_CTAG_RX | 5923 NETIF_F_HW_VLAN_CTAG_FILTER | 5924 NETIF_F_HW_VLAN_STAG_RX | 5925 NETIF_F_HW_VLAN_STAG_FILTER; 5926 5927 bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; 5928 bond_dev->features |= bond_dev->hw_features; 5929 bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; 5930 #ifdef CONFIG_XFRM_OFFLOAD 5931 bond_dev->hw_features |= BOND_XFRM_FEATURES; 5932 /* Only enable XFRM features if this is an active-backup config */ 5933 if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) 5934 bond_dev->features |= BOND_XFRM_FEATURES; 5935 #endif /* CONFIG_XFRM_OFFLOAD */ 5936 } 5937 5938 /* Destroy a bonding device. 5939 * Must be under rtnl_lock when this function is called. 5940 */ 5941 static void bond_uninit(struct net_device *bond_dev) 5942 { 5943 struct bonding *bond = netdev_priv(bond_dev); 5944 struct list_head *iter; 5945 struct slave *slave; 5946 5947 bond_netpoll_cleanup(bond_dev); 5948 5949 /* Release the bonded slaves */ 5950 bond_for_each_slave(bond, slave, iter) 5951 __bond_release_one(bond_dev, slave->dev, true, true); 5952 netdev_info(bond_dev, "Released all slaves\n"); 5953 5954 bond_set_slave_arr(bond, NULL, NULL); 5955 5956 list_del(&bond->bond_list); 5957 5958 bond_debug_unregister(bond); 5959 } 5960 5961 /*------------------------- Module initialization ---------------------------*/ 5962 5963 static int __init bond_check_params(struct bond_params *params) 5964 { 5965 int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; 5966 struct bond_opt_value newval; 5967 const struct bond_opt_value *valptr; 5968 int arp_all_targets_value = 0; 5969 u16 ad_actor_sys_prio = 0; 5970 u16 ad_user_port_key = 0; 5971 __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 }; 5972 int arp_ip_count; 5973 int bond_mode = BOND_MODE_ROUNDROBIN; 5974 int xmit_hashtype = BOND_XMIT_POLICY_LAYER2; 5975 int lacp_fast = 0; 5976 int tlb_dynamic_lb; 5977 5978 /* Convert string parameters. */ 5979 if (mode) { 5980 bond_opt_initstr(&newval, mode); 5981 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval); 5982 if (!valptr) { 5983 pr_err("Error: Invalid bonding mode \"%s\"\n", mode); 5984 return -EINVAL; 5985 } 5986 bond_mode = valptr->value; 5987 } 5988 5989 if (xmit_hash_policy) { 5990 if (bond_mode == BOND_MODE_ROUNDROBIN || 5991 bond_mode == BOND_MODE_ACTIVEBACKUP || 5992 bond_mode == BOND_MODE_BROADCAST) { 5993 pr_info("xmit_hash_policy param is irrelevant in mode %s\n", 5994 bond_mode_name(bond_mode)); 5995 } else { 5996 bond_opt_initstr(&newval, xmit_hash_policy); 5997 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_XMIT_HASH), 5998 &newval); 5999 if (!valptr) { 6000 pr_err("Error: Invalid xmit_hash_policy \"%s\"\n", 6001 xmit_hash_policy); 6002 return -EINVAL; 6003 } 6004 xmit_hashtype = valptr->value; 6005 } 6006 } 6007 6008 if (lacp_rate) { 6009 if (bond_mode != BOND_MODE_8023AD) { 6010 pr_info("lacp_rate param is irrelevant in mode %s\n", 6011 bond_mode_name(bond_mode)); 6012 } else { 6013 bond_opt_initstr(&newval, lacp_rate); 6014 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_LACP_RATE), 6015 &newval); 6016 if (!valptr) { 6017 pr_err("Error: Invalid lacp rate \"%s\"\n", 6018 lacp_rate); 6019 return -EINVAL; 6020 } 6021 lacp_fast = valptr->value; 6022 } 6023 } 6024 6025 if (ad_select) { 6026 bond_opt_initstr(&newval, ad_select); 6027 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_SELECT), 6028 &newval); 6029 if (!valptr) { 6030 pr_err("Error: Invalid ad_select \"%s\"\n", ad_select); 6031 return -EINVAL; 6032 } 6033 params->ad_select = valptr->value; 6034 if (bond_mode != BOND_MODE_8023AD) 6035 pr_warn("ad_select param only affects 802.3ad mode\n"); 6036 } else { 6037 params->ad_select = BOND_AD_STABLE; 6038 } 6039 6040 if (max_bonds < 0) { 6041 pr_warn("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", 6042 max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS); 6043 max_bonds = BOND_DEFAULT_MAX_BONDS; 6044 } 6045 6046 if (miimon < 0) { 6047 pr_warn("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to 0\n", 6048 miimon, INT_MAX); 6049 miimon = 0; 6050 } 6051 6052 if (updelay < 0) { 6053 pr_warn("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", 6054 updelay, INT_MAX); 6055 updelay = 0; 6056 } 6057 6058 if (downdelay < 0) { 6059 pr_warn("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", 6060 downdelay, INT_MAX); 6061 downdelay = 0; 6062 } 6063 6064 if ((use_carrier != 0) && (use_carrier != 1)) { 6065 pr_warn("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", 6066 use_carrier); 6067 use_carrier = 1; 6068 } 6069 6070 if (num_peer_notif < 0 || num_peer_notif > 255) { 6071 pr_warn("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n", 6072 num_peer_notif); 6073 num_peer_notif = 1; 6074 } 6075 6076 /* reset values for 802.3ad/TLB/ALB */ 6077 if (!bond_mode_uses_arp(bond_mode)) { 6078 if (!miimon) { 6079 pr_warn("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); 6080 pr_warn("Forcing miimon to 100msec\n"); 6081 miimon = BOND_DEFAULT_MIIMON; 6082 } 6083 } 6084 6085 if (tx_queues < 1 || tx_queues > 255) { 6086 pr_warn("Warning: tx_queues (%d) should be between 1 and 255, resetting to %d\n", 6087 tx_queues, BOND_DEFAULT_TX_QUEUES); 6088 tx_queues = BOND_DEFAULT_TX_QUEUES; 6089 } 6090 6091 if ((all_slaves_active != 0) && (all_slaves_active != 1)) { 6092 pr_warn("Warning: all_slaves_active module parameter (%d), not of valid value (0/1), so it was set to 0\n", 6093 all_slaves_active); 6094 all_slaves_active = 0; 6095 } 6096 6097 if (resend_igmp < 0 || resend_igmp > 255) { 6098 pr_warn("Warning: resend_igmp (%d) should be between 0 and 255, resetting to %d\n", 6099 resend_igmp, BOND_DEFAULT_RESEND_IGMP); 6100 resend_igmp = BOND_DEFAULT_RESEND_IGMP; 6101 } 6102 6103 bond_opt_initval(&newval, packets_per_slave); 6104 if (!bond_opt_parse(bond_opt_get(BOND_OPT_PACKETS_PER_SLAVE), &newval)) { 6105 pr_warn("Warning: packets_per_slave (%d) should be between 0 and %u resetting to 1\n", 6106 packets_per_slave, USHRT_MAX); 6107 packets_per_slave = 1; 6108 } 6109 6110 if (bond_mode == BOND_MODE_ALB) { 6111 pr_notice("In ALB mode you might experience client disconnections upon reconnection of a link if the bonding module updelay parameter (%d msec) is incompatible with the forwarding delay time of the switch\n", 6112 updelay); 6113 } 6114 6115 if (!miimon) { 6116 if (updelay || downdelay) { 6117 /* just warn the user the up/down delay will have 6118 * no effect since miimon is zero... 6119 */ 6120 pr_warn("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n", 6121 updelay, downdelay); 6122 } 6123 } else { 6124 /* don't allow arp monitoring */ 6125 if (arp_interval) { 6126 pr_warn("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n", 6127 miimon, arp_interval); 6128 arp_interval = 0; 6129 } 6130 6131 if ((updelay % miimon) != 0) { 6132 pr_warn("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", 6133 updelay, miimon, (updelay / miimon) * miimon); 6134 } 6135 6136 updelay /= miimon; 6137 6138 if ((downdelay % miimon) != 0) { 6139 pr_warn("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n", 6140 downdelay, miimon, 6141 (downdelay / miimon) * miimon); 6142 } 6143 6144 downdelay /= miimon; 6145 } 6146 6147 if (arp_interval < 0) { 6148 pr_warn("Warning: arp_interval module parameter (%d), not in range 0-%d, so it was reset to 0\n", 6149 arp_interval, INT_MAX); 6150 arp_interval = 0; 6151 } 6152 6153 for (arp_ip_count = 0, i = 0; 6154 (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) { 6155 __be32 ip; 6156 6157 /* not a complete check, but good enough to catch mistakes */ 6158 if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) || 6159 !bond_is_ip_target_ok(ip)) { 6160 pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", 6161 arp_ip_target[i]); 6162 arp_interval = 0; 6163 } else { 6164 if (bond_get_targets_ip(arp_target, ip) == -1) 6165 arp_target[arp_ip_count++] = ip; 6166 else 6167 pr_warn("Warning: duplicate address %pI4 in arp_ip_target, skipping\n", 6168 &ip); 6169 } 6170 } 6171 6172 if (arp_interval && !arp_ip_count) { 6173 /* don't allow arping if no arp_ip_target given... */ 6174 pr_warn("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n", 6175 arp_interval); 6176 arp_interval = 0; 6177 } 6178 6179 if (arp_validate) { 6180 if (!arp_interval) { 6181 pr_err("arp_validate requires arp_interval\n"); 6182 return -EINVAL; 6183 } 6184 6185 bond_opt_initstr(&newval, arp_validate); 6186 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_VALIDATE), 6187 &newval); 6188 if (!valptr) { 6189 pr_err("Error: invalid arp_validate \"%s\"\n", 6190 arp_validate); 6191 return -EINVAL; 6192 } 6193 arp_validate_value = valptr->value; 6194 } else { 6195 arp_validate_value = 0; 6196 } 6197 6198 if (arp_all_targets) { 6199 bond_opt_initstr(&newval, arp_all_targets); 6200 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS), 6201 &newval); 6202 if (!valptr) { 6203 pr_err("Error: invalid arp_all_targets_value \"%s\"\n", 6204 arp_all_targets); 6205 arp_all_targets_value = 0; 6206 } else { 6207 arp_all_targets_value = valptr->value; 6208 } 6209 } 6210 6211 if (miimon) { 6212 pr_info("MII link monitoring set to %d ms\n", miimon); 6213 } else if (arp_interval) { 6214 valptr = bond_opt_get_val(BOND_OPT_ARP_VALIDATE, 6215 arp_validate_value); 6216 pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):", 6217 arp_interval, valptr->string, arp_ip_count); 6218 6219 for (i = 0; i < arp_ip_count; i++) 6220 pr_cont(" %s", arp_ip_target[i]); 6221 6222 pr_cont("\n"); 6223 6224 } else if (max_bonds) { 6225 /* miimon and arp_interval not set, we need one so things 6226 * work as expected, see bonding.txt for details 6227 */ 6228 pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details\n"); 6229 } 6230 6231 if (primary && !bond_mode_uses_primary(bond_mode)) { 6232 /* currently, using a primary only makes sense 6233 * in active backup, TLB or ALB modes 6234 */ 6235 pr_warn("Warning: %s primary device specified but has no effect in %s mode\n", 6236 primary, bond_mode_name(bond_mode)); 6237 primary = NULL; 6238 } 6239 6240 if (primary && primary_reselect) { 6241 bond_opt_initstr(&newval, primary_reselect); 6242 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_PRIMARY_RESELECT), 6243 &newval); 6244 if (!valptr) { 6245 pr_err("Error: Invalid primary_reselect \"%s\"\n", 6246 primary_reselect); 6247 return -EINVAL; 6248 } 6249 primary_reselect_value = valptr->value; 6250 } else { 6251 primary_reselect_value = BOND_PRI_RESELECT_ALWAYS; 6252 } 6253 6254 if (fail_over_mac) { 6255 bond_opt_initstr(&newval, fail_over_mac); 6256 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_FAIL_OVER_MAC), 6257 &newval); 6258 if (!valptr) { 6259 pr_err("Error: invalid fail_over_mac \"%s\"\n", 6260 fail_over_mac); 6261 return -EINVAL; 6262 } 6263 fail_over_mac_value = valptr->value; 6264 if (bond_mode != BOND_MODE_ACTIVEBACKUP) 6265 pr_warn("Warning: fail_over_mac only affects active-backup mode\n"); 6266 } else { 6267 fail_over_mac_value = BOND_FOM_NONE; 6268 } 6269 6270 bond_opt_initstr(&newval, "default"); 6271 valptr = bond_opt_parse( 6272 bond_opt_get(BOND_OPT_AD_ACTOR_SYS_PRIO), 6273 &newval); 6274 if (!valptr) { 6275 pr_err("Error: No ad_actor_sys_prio default value"); 6276 return -EINVAL; 6277 } 6278 ad_actor_sys_prio = valptr->value; 6279 6280 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY), 6281 &newval); 6282 if (!valptr) { 6283 pr_err("Error: No ad_user_port_key default value"); 6284 return -EINVAL; 6285 } 6286 ad_user_port_key = valptr->value; 6287 6288 bond_opt_initstr(&newval, "default"); 6289 valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB), &newval); 6290 if (!valptr) { 6291 pr_err("Error: No tlb_dynamic_lb default value"); 6292 return -EINVAL; 6293 } 6294 tlb_dynamic_lb = valptr->value; 6295 6296 if (lp_interval == 0) { 6297 pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", 6298 INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); 6299 lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; 6300 } 6301 6302 /* fill params struct with the proper values */ 6303 params->mode = bond_mode; 6304 params->xmit_policy = xmit_hashtype; 6305 params->miimon = miimon; 6306 params->num_peer_notif = num_peer_notif; 6307 params->arp_interval = arp_interval; 6308 params->arp_validate = arp_validate_value; 6309 params->arp_all_targets = arp_all_targets_value; 6310 params->missed_max = 2; 6311 params->updelay = updelay; 6312 params->downdelay = downdelay; 6313 params->peer_notif_delay = 0; 6314 params->use_carrier = use_carrier; 6315 params->lacp_active = 1; 6316 params->lacp_fast = lacp_fast; 6317 params->primary[0] = 0; 6318 params->primary_reselect = primary_reselect_value; 6319 params->fail_over_mac = fail_over_mac_value; 6320 params->tx_queues = tx_queues; 6321 params->all_slaves_active = all_slaves_active; 6322 params->resend_igmp = resend_igmp; 6323 params->min_links = min_links; 6324 params->lp_interval = lp_interval; 6325 params->packets_per_slave = packets_per_slave; 6326 params->tlb_dynamic_lb = tlb_dynamic_lb; 6327 params->ad_actor_sys_prio = ad_actor_sys_prio; 6328 eth_zero_addr(params->ad_actor_system); 6329 params->ad_user_port_key = ad_user_port_key; 6330 if (packets_per_slave > 0) { 6331 params->reciprocal_packets_per_slave = 6332 reciprocal_value(packets_per_slave); 6333 } else { 6334 /* reciprocal_packets_per_slave is unused if 6335 * packets_per_slave is 0 or 1, just initialize it 6336 */ 6337 params->reciprocal_packets_per_slave = 6338 (struct reciprocal_value) { 0 }; 6339 } 6340 6341 if (primary) 6342 strscpy_pad(params->primary, primary, sizeof(params->primary)); 6343 6344 memcpy(params->arp_targets, arp_target, sizeof(arp_target)); 6345 #if IS_ENABLED(CONFIG_IPV6) 6346 memset(params->ns_targets, 0, sizeof(struct in6_addr) * BOND_MAX_NS_TARGETS); 6347 #endif 6348 6349 return 0; 6350 } 6351 6352 /* Called from registration process */ 6353 static int bond_init(struct net_device *bond_dev) 6354 { 6355 struct bonding *bond = netdev_priv(bond_dev); 6356 struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); 6357 6358 netdev_dbg(bond_dev, "Begin bond_init\n"); 6359 6360 bond->wq = alloc_ordered_workqueue(bond_dev->name, WQ_MEM_RECLAIM); 6361 if (!bond->wq) 6362 return -ENOMEM; 6363 6364 bond->notifier_ctx = false; 6365 6366 spin_lock_init(&bond->stats_lock); 6367 netdev_lockdep_set_classes(bond_dev); 6368 6369 list_add_tail(&bond->bond_list, &bn->dev_list); 6370 6371 bond_prepare_sysfs_group(bond); 6372 6373 bond_debug_register(bond); 6374 6375 /* Ensure valid dev_addr */ 6376 if (is_zero_ether_addr(bond_dev->dev_addr) && 6377 bond_dev->addr_assign_type == NET_ADDR_PERM) 6378 eth_hw_addr_random(bond_dev); 6379 6380 return 0; 6381 } 6382 6383 unsigned int bond_get_num_tx_queues(void) 6384 { 6385 return tx_queues; 6386 } 6387 6388 /* Create a new bond based on the specified name and bonding parameters. 6389 * If name is NULL, obtain a suitable "bond%d" name for us. 6390 * Caller must NOT hold rtnl_lock; we need to release it here before we 6391 * set up our sysfs entries. 6392 */ 6393 int bond_create(struct net *net, const char *name) 6394 { 6395 struct net_device *bond_dev; 6396 struct bonding *bond; 6397 int res = -ENOMEM; 6398 6399 rtnl_lock(); 6400 6401 bond_dev = alloc_netdev_mq(sizeof(struct bonding), 6402 name ? name : "bond%d", NET_NAME_UNKNOWN, 6403 bond_setup, tx_queues); 6404 if (!bond_dev) 6405 goto out; 6406 6407 bond = netdev_priv(bond_dev); 6408 dev_net_set(bond_dev, net); 6409 bond_dev->rtnl_link_ops = &bond_link_ops; 6410 6411 res = register_netdevice(bond_dev); 6412 if (res < 0) { 6413 free_netdev(bond_dev); 6414 goto out; 6415 } 6416 6417 netif_carrier_off(bond_dev); 6418 6419 bond_work_init_all(bond); 6420 6421 out: 6422 rtnl_unlock(); 6423 return res; 6424 } 6425 6426 static int __net_init bond_net_init(struct net *net) 6427 { 6428 struct bond_net *bn = net_generic(net, bond_net_id); 6429 6430 bn->net = net; 6431 INIT_LIST_HEAD(&bn->dev_list); 6432 6433 bond_create_proc_dir(bn); 6434 bond_create_sysfs(bn); 6435 6436 return 0; 6437 } 6438 6439 static void __net_exit bond_net_exit_batch(struct list_head *net_list) 6440 { 6441 struct bond_net *bn; 6442 struct net *net; 6443 LIST_HEAD(list); 6444 6445 list_for_each_entry(net, net_list, exit_list) { 6446 bn = net_generic(net, bond_net_id); 6447 bond_destroy_sysfs(bn); 6448 } 6449 6450 /* Kill off any bonds created after unregistering bond rtnl ops */ 6451 rtnl_lock(); 6452 list_for_each_entry(net, net_list, exit_list) { 6453 struct bonding *bond, *tmp_bond; 6454 6455 bn = net_generic(net, bond_net_id); 6456 list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) 6457 unregister_netdevice_queue(bond->dev, &list); 6458 } 6459 unregister_netdevice_many(&list); 6460 rtnl_unlock(); 6461 6462 list_for_each_entry(net, net_list, exit_list) { 6463 bn = net_generic(net, bond_net_id); 6464 bond_destroy_proc_dir(bn); 6465 } 6466 } 6467 6468 static struct pernet_operations bond_net_ops = { 6469 .init = bond_net_init, 6470 .exit_batch = bond_net_exit_batch, 6471 .id = &bond_net_id, 6472 .size = sizeof(struct bond_net), 6473 }; 6474 6475 static int __init bonding_init(void) 6476 { 6477 int i; 6478 int res; 6479 6480 res = bond_check_params(&bonding_defaults); 6481 if (res) 6482 goto out; 6483 6484 bond_create_debugfs(); 6485 6486 res = register_pernet_subsys(&bond_net_ops); 6487 if (res) 6488 goto err_net_ops; 6489 6490 res = bond_netlink_init(); 6491 if (res) 6492 goto err_link; 6493 6494 for (i = 0; i < max_bonds; i++) { 6495 res = bond_create(&init_net, NULL); 6496 if (res) 6497 goto err; 6498 } 6499 6500 skb_flow_dissector_init(&flow_keys_bonding, 6501 flow_keys_bonding_keys, 6502 ARRAY_SIZE(flow_keys_bonding_keys)); 6503 6504 register_netdevice_notifier(&bond_netdev_notifier); 6505 out: 6506 return res; 6507 err: 6508 bond_netlink_fini(); 6509 err_link: 6510 unregister_pernet_subsys(&bond_net_ops); 6511 err_net_ops: 6512 bond_destroy_debugfs(); 6513 goto out; 6514 6515 } 6516 6517 static void __exit bonding_exit(void) 6518 { 6519 unregister_netdevice_notifier(&bond_netdev_notifier); 6520 6521 bond_netlink_fini(); 6522 unregister_pernet_subsys(&bond_net_ops); 6523 6524 bond_destroy_debugfs(); 6525 6526 #ifdef CONFIG_NET_POLL_CONTROLLER 6527 /* Make sure we don't have an imbalance on our netpoll blocking */ 6528 WARN_ON(atomic_read(&netpoll_block_tx)); 6529 #endif 6530 } 6531 6532 module_init(bonding_init); 6533 module_exit(bonding_exit); 6534 MODULE_LICENSE("GPL"); 6535 MODULE_DESCRIPTION(DRV_DESCRIPTION); 6536 MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); 6537