1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ 3 4 #include <linux/netdevice.h> 5 #include <linux/list.h> 6 #include <net/lag.h> 7 8 #include "mlx5_core.h" 9 #include "eswitch.h" 10 #include "esw/acl/ofld.h" 11 #include "en_rep.h" 12 13 struct mlx5e_rep_bond { 14 struct notifier_block nb; 15 struct netdev_net_notifier nn; 16 struct list_head metadata_list; 17 }; 18 19 struct mlx5e_rep_bond_slave_entry { 20 struct list_head list; 21 struct net_device *netdev; 22 }; 23 24 struct mlx5e_rep_bond_metadata { 25 struct list_head list; /* link to global list of rep_bond_metadata */ 26 struct mlx5_eswitch *esw; 27 /* private of uplink holding rep bond metadata list */ 28 struct net_device *lag_dev; 29 u32 metadata_reg_c_0; 30 31 struct list_head slaves_list; /* slaves list */ 32 int slaves; 33 }; 34 35 static struct mlx5e_rep_bond_metadata * 36 mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv, 37 const struct net_device *lag_dev) 38 { 39 struct mlx5e_rep_bond_metadata *found = NULL; 40 struct mlx5e_rep_bond_metadata *cur; 41 42 list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) { 43 if (cur->lag_dev == lag_dev) { 44 found = cur; 45 break; 46 } 47 } 48 49 return found; 50 } 51 52 static struct mlx5e_rep_bond_slave_entry * 53 mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata, 54 const struct net_device *netdev) 55 { 56 struct mlx5e_rep_bond_slave_entry *found = NULL; 57 struct mlx5e_rep_bond_slave_entry *cur; 58 59 list_for_each_entry(cur, &mdata->slaves_list, list) { 60 if (cur->netdev == netdev) { 61 found = cur; 62 break; 63 } 64 } 65 66 return found; 67 } 68 69 static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata) 70 { 71 netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n", 72 mdata->metadata_reg_c_0); 73 list_del(&mdata->list); 74 mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0); 75 WARN_ON(!list_empty(&mdata->slaves_list)); 76 kfree(mdata); 77 } 78 79 /* This must be called under rtnl_lock */ 80 int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, 81 struct net_device *lag_dev) 82 { 83 struct mlx5e_rep_bond_slave_entry *s_entry; 84 struct mlx5e_rep_bond_metadata *mdata; 85 struct mlx5e_rep_priv *rpriv; 86 struct mlx5e_priv *priv; 87 int err; 88 89 ASSERT_RTNL(); 90 91 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 92 mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); 93 if (!mdata) { 94 /* First netdev becomes slave, no metadata presents the lag_dev. Create one */ 95 mdata = kzalloc(sizeof(*mdata), GFP_KERNEL); 96 if (!mdata) 97 return -ENOMEM; 98 99 mdata->lag_dev = lag_dev; 100 mdata->esw = esw; 101 INIT_LIST_HEAD(&mdata->slaves_list); 102 mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw); 103 if (!mdata->metadata_reg_c_0) { 104 kfree(mdata); 105 return -ENOSPC; 106 } 107 list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list); 108 109 netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n", 110 mdata->metadata_reg_c_0); 111 } 112 113 s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL); 114 if (!s_entry) { 115 err = -ENOMEM; 116 goto entry_alloc_err; 117 } 118 119 s_entry->netdev = netdev; 120 priv = netdev_priv(netdev); 121 rpriv = priv->ppriv; 122 123 err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 124 mdata->metadata_reg_c_0); 125 if (err) 126 goto ingress_err; 127 128 mdata->slaves++; 129 list_add_tail(&s_entry->list, &mdata->slaves_list); 130 netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", 131 rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); 132 133 return 0; 134 135 ingress_err: 136 kfree(s_entry); 137 entry_alloc_err: 138 if (!mdata->slaves) 139 mlx5e_rep_bond_metadata_release(mdata); 140 return err; 141 } 142 143 /* This must be called under rtnl_lock */ 144 void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, 145 const struct net_device *netdev, 146 const struct net_device *lag_dev) 147 { 148 struct mlx5e_rep_bond_slave_entry *s_entry; 149 struct mlx5e_rep_bond_metadata *mdata; 150 struct mlx5e_rep_priv *rpriv; 151 struct mlx5e_priv *priv; 152 153 ASSERT_RTNL(); 154 155 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 156 mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); 157 if (!mdata) 158 return; 159 160 s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev); 161 if (!s_entry) 162 return; 163 164 priv = netdev_priv(netdev); 165 rpriv = priv->ppriv; 166 167 /* Reset bond_metadata to zero first then reset all ingress/egress 168 * acls and rx rules of unslave representor's vport 169 */ 170 mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0); 171 mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport); 172 mlx5e_rep_bond_update(priv, false); 173 174 list_del(&s_entry->list); 175 176 netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", 177 rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); 178 179 if (--mdata->slaves == 0) 180 mlx5e_rep_bond_metadata_release(mdata); 181 kfree(s_entry); 182 } 183 184 static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) 185 { 186 struct mlx5e_rep_priv *rpriv; 187 struct mlx5e_priv *priv; 188 189 /* A given netdev is not a representor or not a slave of LAG configuration */ 190 if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev)) 191 return false; 192 193 priv = netdev_priv(netdev); 194 rpriv = priv->ppriv; 195 196 /* Egress acl forward to vport is supported only non-uplink representor */ 197 return rpriv->rep->vport != MLX5_VPORT_UPLINK; 198 } 199 200 static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) 201 { 202 struct netdev_notifier_changelowerstate_info *info; 203 struct netdev_lag_lower_state_info *lag_info; 204 struct mlx5e_rep_priv *rpriv; 205 struct net_device *lag_dev; 206 struct mlx5e_priv *priv; 207 struct list_head *iter; 208 struct net_device *dev; 209 u16 acl_vport_num; 210 u16 fwd_vport_num; 211 int err; 212 213 if (!mlx5e_rep_is_lag_netdev(netdev)) 214 return; 215 216 info = ptr; 217 lag_info = info->lower_state_info; 218 /* This is not an event of a representor becoming active slave */ 219 if (!lag_info->tx_enabled) 220 return; 221 222 priv = netdev_priv(netdev); 223 rpriv = priv->ppriv; 224 fwd_vport_num = rpriv->rep->vport; 225 lag_dev = netdev_master_upper_dev_get(netdev); 226 if (!lag_dev) 227 return; 228 229 netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n", 230 lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev)); 231 232 /* Point everyone's egress acl to the vport of the active representor */ 233 netdev_for_each_lower_dev(lag_dev, dev, iter) { 234 priv = netdev_priv(dev); 235 rpriv = priv->ppriv; 236 acl_vport_num = rpriv->rep->vport; 237 if (acl_vport_num != fwd_vport_num) { 238 /* Only single rx_rule for unique bond_metadata should be 239 * present, delete it if it's saved as passive vport's 240 * rx_rule with destination as passive vport's root_ft 241 */ 242 mlx5e_rep_bond_update(priv, true); 243 err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch, 244 fwd_vport_num, 245 acl_vport_num); 246 if (err) 247 netdev_warn(dev, 248 "configure slave vport(%d) egress fwd, err(%d)", 249 acl_vport_num, err); 250 } 251 } 252 253 /* Insert new rx_rule for unique bond_metadata, save it as active vport's 254 * rx_rule with new destination as active vport's root_ft 255 */ 256 err = mlx5e_rep_bond_update(netdev_priv(netdev), false); 257 if (err) 258 netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)", 259 fwd_vport_num, err); 260 } 261 262 static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) 263 { 264 struct netdev_notifier_changeupper_info *info = ptr; 265 struct mlx5e_rep_priv *rpriv; 266 struct net_device *lag_dev; 267 struct mlx5e_priv *priv; 268 269 if (!mlx5e_rep_is_lag_netdev(netdev)) 270 return; 271 272 priv = netdev_priv(netdev); 273 rpriv = priv->ppriv; 274 lag_dev = info->upper_dev; 275 276 netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n", 277 info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name); 278 279 if (info->linking) 280 mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev); 281 else 282 mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev); 283 } 284 285 /* Bond device of representors and netdev events are used here in specific way 286 * to support eswitch vports bonding and to perform failover of eswitch vport 287 * by modifying the vport's egress acl of lower dev representors. Thus this 288 * also change the traditional behavior of lower dev under bond device. 289 * All non-representor netdevs or representors of other vendors as lower dev 290 * of bond device are not supported. 291 */ 292 static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, 293 unsigned long event, void *ptr) 294 { 295 struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 296 297 switch (event) { 298 case NETDEV_CHANGELOWERSTATE: 299 mlx5e_rep_changelowerstate_event(netdev, ptr); 300 break; 301 case NETDEV_CHANGEUPPER: 302 mlx5e_rep_changeupper_event(netdev, ptr); 303 break; 304 } 305 return NOTIFY_DONE; 306 } 307 308 /* If HW support eswitch vports bonding, register a specific notifier to 309 * handle it when two or more representors are bonded 310 */ 311 int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv) 312 { 313 struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; 314 struct net_device *netdev = rpriv->netdev; 315 struct mlx5e_priv *priv; 316 int ret = 0; 317 318 priv = netdev_priv(netdev); 319 if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch)) 320 goto out; 321 322 uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL); 323 if (!uplink_priv->bond) { 324 ret = -ENOMEM; 325 goto out; 326 } 327 328 INIT_LIST_HEAD(&uplink_priv->bond->metadata_list); 329 uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent; 330 ret = register_netdevice_notifier_dev_net(netdev, 331 &uplink_priv->bond->nb, 332 &uplink_priv->bond->nn); 333 if (ret) { 334 netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret); 335 kvfree(uplink_priv->bond); 336 uplink_priv->bond = NULL; 337 } 338 339 out: 340 return ret; 341 } 342 343 void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv) 344 { 345 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); 346 347 if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) || 348 !rpriv->uplink_priv.bond) 349 return; 350 351 unregister_netdevice_notifier_dev_net(rpriv->netdev, 352 &rpriv->uplink_priv.bond->nb, 353 &rpriv->uplink_priv.bond->nn); 354 kvfree(rpriv->uplink_priv.bond); 355 } 356