1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */ 3 4 #include <linux/netdevice.h> 5 #include <linux/list.h> 6 #include <net/lag.h> 7 8 #include "mlx5_core.h" 9 #include "eswitch.h" 10 #include "esw/acl/ofld.h" 11 #include "en_rep.h" 12 13 struct mlx5e_rep_bond { 14 struct notifier_block nb; 15 struct netdev_net_notifier nn; 16 struct list_head metadata_list; 17 }; 18 19 struct mlx5e_rep_bond_slave_entry { 20 struct list_head list; 21 struct net_device *netdev; 22 }; 23 24 struct mlx5e_rep_bond_metadata { 25 struct list_head list; /* link to global list of rep_bond_metadata */ 26 struct mlx5_eswitch *esw; 27 /* private of uplink holding rep bond metadata list */ 28 struct net_device *lag_dev; 29 u32 metadata_reg_c_0; 30 31 struct list_head slaves_list; /* slaves list */ 32 int slaves; 33 }; 34 35 static struct mlx5e_rep_bond_metadata * 36 mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv, 37 const struct net_device *lag_dev) 38 { 39 struct mlx5e_rep_bond_metadata *found = NULL; 40 struct mlx5e_rep_bond_metadata *cur; 41 42 list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) { 43 if (cur->lag_dev == lag_dev) { 44 found = cur; 45 break; 46 } 47 } 48 49 return found; 50 } 51 52 static struct mlx5e_rep_bond_slave_entry * 53 mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata, 54 const struct net_device *netdev) 55 { 56 struct mlx5e_rep_bond_slave_entry *found = NULL; 57 struct mlx5e_rep_bond_slave_entry *cur; 58 59 list_for_each_entry(cur, &mdata->slaves_list, list) { 60 if (cur->netdev == netdev) { 61 found = cur; 62 break; 63 } 64 } 65 66 return found; 67 } 68 69 static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata) 70 { 71 netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n", 72 mdata->metadata_reg_c_0); 73 list_del(&mdata->list); 74 mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0); 75 WARN_ON(!list_empty(&mdata->slaves_list)); 76 kfree(mdata); 77 } 78 79 /* This must be called under rtnl_lock */ 80 int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, 81 struct net_device *lag_dev) 82 { 83 struct mlx5e_rep_bond_slave_entry *s_entry; 84 struct mlx5e_rep_bond_metadata *mdata; 85 struct mlx5e_rep_priv *rpriv; 86 struct mlx5e_priv *priv; 87 int err; 88 89 ASSERT_RTNL(); 90 91 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 92 mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); 93 if (!mdata) { 94 /* First netdev becomes slave, no metadata presents the lag_dev. Create one */ 95 mdata = kzalloc(sizeof(*mdata), GFP_KERNEL); 96 if (!mdata) 97 return -ENOMEM; 98 99 mdata->lag_dev = lag_dev; 100 mdata->esw = esw; 101 INIT_LIST_HEAD(&mdata->slaves_list); 102 mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw); 103 if (!mdata->metadata_reg_c_0) { 104 kfree(mdata); 105 return -ENOSPC; 106 } 107 list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list); 108 109 netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n", 110 mdata->metadata_reg_c_0); 111 } 112 113 s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL); 114 if (!s_entry) { 115 err = -ENOMEM; 116 goto entry_alloc_err; 117 } 118 119 s_entry->netdev = netdev; 120 priv = netdev_priv(netdev); 121 rpriv = priv->ppriv; 122 123 err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 124 mdata->metadata_reg_c_0); 125 if (err) 126 goto ingress_err; 127 128 mdata->slaves++; 129 list_add_tail(&s_entry->list, &mdata->slaves_list); 130 netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", 131 rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); 132 133 return 0; 134 135 ingress_err: 136 kfree(s_entry); 137 entry_alloc_err: 138 if (!mdata->slaves) 139 mlx5e_rep_bond_metadata_release(mdata); 140 return err; 141 } 142 143 /* This must be called under rtnl_lock */ 144 void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, 145 const struct net_device *netdev, 146 const struct net_device *lag_dev) 147 { 148 struct mlx5e_rep_bond_slave_entry *s_entry; 149 struct mlx5e_rep_bond_metadata *mdata; 150 struct mlx5e_rep_priv *rpriv; 151 struct mlx5e_priv *priv; 152 153 ASSERT_RTNL(); 154 155 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); 156 mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev); 157 if (!mdata) 158 return; 159 160 s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev); 161 if (!s_entry) 162 return; 163 164 priv = netdev_priv(netdev); 165 rpriv = priv->ppriv; 166 167 /* Reset bond_metadata to zero first then reset all ingress/egress 168 * acls and rx rules of unslave representor's vport 169 */ 170 mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0); 171 mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport); 172 mlx5e_rep_bond_update(priv, false); 173 174 list_del(&s_entry->list); 175 176 netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n", 177 rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0); 178 179 if (--mdata->slaves == 0) 180 mlx5e_rep_bond_metadata_release(mdata); 181 kfree(s_entry); 182 } 183 184 static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) 185 { 186 return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev); 187 } 188 189 static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) 190 { 191 struct netdev_notifier_changelowerstate_info *info; 192 struct netdev_lag_lower_state_info *lag_info; 193 struct mlx5e_rep_priv *rpriv; 194 struct net_device *lag_dev; 195 struct mlx5e_priv *priv; 196 struct list_head *iter; 197 struct net_device *dev; 198 u16 acl_vport_num; 199 u16 fwd_vport_num; 200 int err; 201 202 info = ptr; 203 lag_info = info->lower_state_info; 204 /* This is not an event of a representor becoming active slave */ 205 if (!lag_info->tx_enabled) 206 return; 207 208 priv = netdev_priv(netdev); 209 rpriv = priv->ppriv; 210 fwd_vport_num = rpriv->rep->vport; 211 lag_dev = netdev_master_upper_dev_get(netdev); 212 if (!lag_dev) 213 return; 214 215 netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n", 216 lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev)); 217 218 /* Point everyone's egress acl to the vport of the active representor */ 219 netdev_for_each_lower_dev(lag_dev, dev, iter) { 220 priv = netdev_priv(dev); 221 rpriv = priv->ppriv; 222 acl_vport_num = rpriv->rep->vport; 223 if (acl_vport_num != fwd_vport_num) { 224 /* Only single rx_rule for unique bond_metadata should be 225 * present, delete it if it's saved as passive vport's 226 * rx_rule with destination as passive vport's root_ft 227 */ 228 mlx5e_rep_bond_update(priv, true); 229 err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch, 230 fwd_vport_num, 231 acl_vport_num); 232 if (err) 233 netdev_warn(dev, 234 "configure slave vport(%d) egress fwd, err(%d)", 235 acl_vport_num, err); 236 } 237 } 238 239 /* Insert new rx_rule for unique bond_metadata, save it as active vport's 240 * rx_rule with new destination as active vport's root_ft 241 */ 242 err = mlx5e_rep_bond_update(netdev_priv(netdev), false); 243 if (err) 244 netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)", 245 fwd_vport_num, err); 246 } 247 248 static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) 249 { 250 struct netdev_notifier_changeupper_info *info = ptr; 251 struct mlx5e_rep_priv *rpriv; 252 struct net_device *lag_dev; 253 struct mlx5e_priv *priv; 254 255 priv = netdev_priv(netdev); 256 rpriv = priv->ppriv; 257 lag_dev = info->upper_dev; 258 259 netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n", 260 info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name); 261 262 if (info->linking) 263 mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev); 264 else 265 mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev); 266 } 267 268 /* Bond device of representors and netdev events are used here in specific way 269 * to support eswitch vports bonding and to perform failover of eswitch vport 270 * by modifying the vport's egress acl of lower dev representors. Thus this 271 * also change the traditional behavior of lower dev under bond device. 272 * All non-representor netdevs or representors of other vendors as lower dev 273 * of bond device are not supported. 274 */ 275 static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, 276 unsigned long event, void *ptr) 277 { 278 struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 279 struct mlx5e_rep_priv *rpriv; 280 struct mlx5e_rep_bond *bond; 281 struct mlx5e_priv *priv; 282 283 if (!mlx5e_rep_is_lag_netdev(netdev)) 284 return NOTIFY_DONE; 285 286 bond = container_of(nb, struct mlx5e_rep_bond, nb); 287 priv = netdev_priv(netdev); 288 rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH); 289 /* Verify VF representor is on the same device of the bond handling the netevent. */ 290 if (rpriv->uplink_priv.bond != bond) 291 return NOTIFY_DONE; 292 293 switch (event) { 294 case NETDEV_CHANGELOWERSTATE: 295 mlx5e_rep_changelowerstate_event(netdev, ptr); 296 break; 297 case NETDEV_CHANGEUPPER: 298 mlx5e_rep_changeupper_event(netdev, ptr); 299 break; 300 } 301 return NOTIFY_DONE; 302 } 303 304 /* If HW support eswitch vports bonding, register a specific notifier to 305 * handle it when two or more representors are bonded 306 */ 307 int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv) 308 { 309 struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; 310 struct net_device *netdev = rpriv->netdev; 311 struct mlx5e_priv *priv; 312 int ret = 0; 313 314 priv = netdev_priv(netdev); 315 if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch)) 316 goto out; 317 318 uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL); 319 if (!uplink_priv->bond) { 320 ret = -ENOMEM; 321 goto out; 322 } 323 324 INIT_LIST_HEAD(&uplink_priv->bond->metadata_list); 325 uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent; 326 ret = register_netdevice_notifier_dev_net(netdev, 327 &uplink_priv->bond->nb, 328 &uplink_priv->bond->nn); 329 if (ret) { 330 netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret); 331 kvfree(uplink_priv->bond); 332 uplink_priv->bond = NULL; 333 } 334 335 out: 336 return ret; 337 } 338 339 void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv) 340 { 341 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); 342 343 if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) || 344 !rpriv->uplink_priv.bond) 345 return; 346 347 unregister_netdevice_notifier_dev_net(rpriv->netdev, 348 &rpriv->uplink_priv.bond->nb, 349 &rpriv->uplink_priv.bond->nn); 350 kvfree(rpriv->uplink_priv.bond); 351 } 352