1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ 3 4 #include <linux/netdevice.h> 5 #include <linux/list.h> 6 #include <net/lag.h> 7 8 #include "mlx5_core.h" 9 #include "eswitch.h" 10 #include "esw/acl/ofld.h" 11 #include "en_rep.h" 12 13 struct mlx5e_rep_bond { 14 struct notifier_block nb; 15 struct netdev_net_notifier nn; 16 struct list_head metadata_list; 17 }; 18 19 struct mlx5e_rep_bond_slave_entry { 20 struct list_head list; 21 struct net_device *netdev; 22 }; 23 24 struct mlx5e_rep_bond_metadata { 25 struct list_head list; /* link to global list of rep_bond_metadata */ 26 struct mlx5_eswitch *esw; 27 /* private of uplink holding rep bond metadata list */ 28 struct net_device *lag_dev; 29 u32 metadata_reg_c_0; 30 31 struct list_head slaves_list; /* slaves list */ 32 int slaves; 33 }; 34 35 static struct mlx5e_rep_bond_metadata * 36 mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv, 37 const struct net_device *lag_dev) 38 { 39 struct mlx5e_rep_bond_metadata *found = NULL; 40 struct mlx5e_rep_bond_metadata *cur; 41 42 list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) { 43 if (cur->lag_dev == lag_dev) { 44 found = cur; 45 break; 46 } 47 } 48 49 return found; 50 } 51 52 static struct mlx5e_rep_bond_slave_entry * 53 mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata, 54 const struct net_device *netdev) 55 { 56 struct mlx5e_rep_bond_slave_entry *found = NULL; 57 struct mlx5e_rep_bond_slave_entry *cur; 58 59 list_for_each_entry(cur, &mdata->slaves_list, list) { 60 if (cur->netdev == netdev) { 61 found = cur; 62 break; 63 } 64 } 65 66 return found; 67 } 68 69 static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata) 70 { 71 netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n", 72 mdata->metadata_reg_c_0); 73 list_del(&mdata->list); 74 mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0); 75 WARN_ON(!list_empty(&mdata->slaves_list)); 76 kfree(mdata); 77 } 78 79 /* This must be called under rtnl_lock */ 80 int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, 81 struct net_device *lag_dev) 82 { 83 struct mlx5e_rep_bond_slave_entry *s_entry; 84 struct mlx5e_rep_bond_metadata *mdata; 85 struct mlx5e_rep_priv *rpriv; 86 struct mlx5e_priv *priv; 87 int err; 88 89 ASSERT_RTNL(); 90 91 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 92 mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); 93 if (!mdata) { 94 /* First netdev becomes slave, no metadata presents the lag_dev. Create one */ 95 mdata = kzalloc(sizeof(*mdata), GFP_KERNEL); 96 if (!mdata) 97 return -ENOMEM; 98 99 mdata->lag_dev = lag_dev; 100 mdata->esw = esw; 101 INIT_LIST_HEAD(&mdata->slaves_list); 102 mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw); 103 if (!mdata->metadata_reg_c_0) { 104 kfree(mdata); 105 return -ENOSPC; 106 } 107 list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list); 108 109 netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n", 110 mdata->metadata_reg_c_0); 111 } 112 113 s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL); 114 if (!s_entry) { 115 err = -ENOMEM; 116 goto entry_alloc_err; 117 } 118 119 s_entry->netdev = netdev; 120 priv = netdev_priv(netdev); 121 rpriv = priv->ppriv; 122 123 err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 124 mdata->metadata_reg_c_0); 125 if (err) 126 goto ingress_err; 127 128 mdata->slaves++; 129 list_add_tail(&s_entry->list, &mdata->slaves_list); 130 netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", 131 rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); 132 133 return 0; 134 135 ingress_err: 136 kfree(s_entry); 137 entry_alloc_err: 138 if (!mdata->slaves) 139 mlx5e_rep_bond_metadata_release(mdata); 140 return err; 141 } 142 143 /* This must be called under rtnl_lock */ 144 void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, 145 const struct net_device *netdev, 146 const struct net_device *lag_dev) 147 { 148 struct mlx5e_rep_bond_slave_entry *s_entry; 149 struct mlx5e_rep_bond_metadata *mdata; 150 struct mlx5e_rep_priv *rpriv; 151 struct mlx5e_priv *priv; 152 153 ASSERT_RTNL(); 154 155 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 156 mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); 157 if (!mdata) 158 return; 159 160 s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev); 161 if (!s_entry) 162 return; 163 164 priv = netdev_priv(netdev); 165 rpriv = priv->ppriv; 166 167 /* Reset bond_metadata to zero first then reset all ingress/egress 168 * acls and rx rules of unslave representor's vport 169 */ 170 mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0); 171 mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport); 172 mlx5e_rep_bond_update(priv, false); 173 174 list_del(&s_entry->list); 175 176 netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", 177 rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); 178 179 if (--mdata->slaves == 0) 180 mlx5e_rep_bond_metadata_release(mdata); 181 kfree(s_entry); 182 } 183 184 static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) 185 { 186 struct mlx5e_rep_priv *rpriv; 187 struct mlx5e_priv *priv; 188 189 /* A given netdev is not a representor or not a slave of LAG configuration */ 190 if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev)) 191 return false; 192 193 priv = netdev_priv(netdev); 194 rpriv = priv->ppriv; 195 196 /* Egress acl forward to vport is supported only non-uplink representor */ 197 return rpriv->rep->vport != MLX5_VPORT_UPLINK; 198 } 199 200 static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) 201 { 202 struct netdev_notifier_changelowerstate_info *info; 203 struct netdev_lag_lower_state_info *lag_info; 204 struct mlx5e_rep_priv *rpriv; 205 struct net_device *lag_dev; 206 struct mlx5e_priv *priv; 207 struct list_head *iter; 208 struct net_device *dev; 209 u16 acl_vport_num; 210 u16 fwd_vport_num; 211 int err; 212 213 if (!mlx5e_rep_is_lag_netdev(netdev)) 214 return; 215 216 info = ptr; 217 lag_info = info->lower_state_info; 218 /* This is not an event of a representor becoming active slave */ 219 if (!lag_info->tx_enabled) 220 return; 221 222 priv = netdev_priv(netdev); 223 rpriv = priv->ppriv; 224 fwd_vport_num = rpriv->rep->vport; 225 lag_dev = netdev_master_upper_dev_get(netdev); 226 227 netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n", 228 lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev)); 229 230 /* Point everyone's egress acl to the vport of the active representor */ 231 netdev_for_each_lower_dev(lag_dev, dev, iter) { 232 priv = netdev_priv(dev); 233 rpriv = priv->ppriv; 234 acl_vport_num = rpriv->rep->vport; 235 if (acl_vport_num != fwd_vport_num) { 236 /* Only single rx_rule for unique bond_metadata should be 237 * present, delete it if it's saved as passive vport's 238 * rx_rule with destination as passive vport's root_ft 239 */ 240 mlx5e_rep_bond_update(priv, true); 241 err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch, 242 fwd_vport_num, 243 acl_vport_num); 244 if (err) 245 netdev_warn(dev, 246 "configure slave vport(%d) egress fwd, err(%d)", 247 acl_vport_num, err); 248 } 249 } 250 251 /* Insert new rx_rule for unique bond_metadata, save it as active vport's 252 * rx_rule with new destination as active vport's root_ft 253 */ 254 err = mlx5e_rep_bond_update(netdev_priv(netdev), false); 255 if (err) 256 netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)", 257 fwd_vport_num, err); 258 } 259 260 static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) 261 { 262 struct netdev_notifier_changeupper_info *info = ptr; 263 struct mlx5e_rep_priv *rpriv; 264 struct net_device *lag_dev; 265 struct mlx5e_priv *priv; 266 267 if (!mlx5e_rep_is_lag_netdev(netdev)) 268 return; 269 270 priv = netdev_priv(netdev); 271 rpriv = priv->ppriv; 272 lag_dev = info->upper_dev; 273 274 netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n", 275 info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name); 276 277 if (info->linking) 278 mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev); 279 else 280 mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev); 281 } 282 283 /* Bond device of representors and netdev events are used here in specific way 284 * to support eswitch vports bonding and to perform failover of eswitch vport 285 * by modifying the vport's egress acl of lower dev representors. Thus this 286 * also change the traditional behavior of lower dev under bond device. 287 * All non-representor netdevs or representors of other vendors as lower dev 288 * of bond device are not supported. 289 */ 290 static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, 291 unsigned long event, void *ptr) 292 { 293 struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 294 295 switch (event) { 296 case NETDEV_CHANGELOWERSTATE: 297 mlx5e_rep_changelowerstate_event(netdev, ptr); 298 break; 299 case NETDEV_CHANGEUPPER: 300 mlx5e_rep_changeupper_event(netdev, ptr); 301 break; 302 } 303 return NOTIFY_DONE; 304 } 305 306 /* If HW support eswitch vports bonding, register a specific notifier to 307 * handle it when two or more representors are bonded 308 */ 309 int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv) 310 { 311 struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; 312 struct net_device *netdev = rpriv->netdev; 313 struct mlx5e_priv *priv; 314 int ret = 0; 315 316 priv = netdev_priv(netdev); 317 if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch)) 318 goto out; 319 320 uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL); 321 if (!uplink_priv->bond) { 322 ret = -ENOMEM; 323 goto out; 324 } 325 326 INIT_LIST_HEAD(&uplink_priv->bond->metadata_list); 327 uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent; 328 ret = register_netdevice_notifier_dev_net(netdev, 329 &uplink_priv->bond->nb, 330 &uplink_priv->bond->nn); 331 if (ret) { 332 netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret); 333 kvfree(uplink_priv->bond); 334 uplink_priv->bond = NULL; 335 } 336 337 out: 338 return ret; 339 } 340 341 void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv) 342 { 343 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); 344 345 if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) || 346 !rpriv->uplink_priv.bond) 347 return; 348 349 unregister_netdevice_notifier_dev_net(rpriv->netdev, 350 &rpriv->uplink_priv.bond->nb, 351 &rpriv->uplink_priv.bond->nn); 352 kvfree(rpriv->uplink_priv.bond); 353 } 354