1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
3 
4 #include <linux/netdevice.h>
5 #include <linux/list.h>
6 #include <net/lag.h>
7 
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "esw/acl/ofld.h"
11 #include "en_rep.h"
12 
13 struct mlx5e_rep_bond {
14 	struct notifier_block nb;
15 	struct netdev_net_notifier nn;
16 	struct list_head metadata_list;
17 };
18 
19 struct mlx5e_rep_bond_slave_entry {
20 	struct list_head list;
21 	struct net_device *netdev;
22 };
23 
24 struct mlx5e_rep_bond_metadata {
25 	struct list_head list; /* link to global list of rep_bond_metadata */
26 	struct mlx5_eswitch *esw;
27 	 /* private of uplink holding rep bond metadata list */
28 	struct net_device *lag_dev;
29 	u32 metadata_reg_c_0;
30 
31 	struct list_head slaves_list; /* slaves list */
32 	int slaves;
33 };
34 
35 static struct mlx5e_rep_bond_metadata *
36 mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
37 			       const struct net_device *lag_dev)
38 {
39 	struct mlx5e_rep_bond_metadata *found = NULL;
40 	struct mlx5e_rep_bond_metadata *cur;
41 
42 	list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
43 		if (cur->lag_dev == lag_dev) {
44 			found = cur;
45 			break;
46 		}
47 	}
48 
49 	return found;
50 }
51 
52 static struct mlx5e_rep_bond_slave_entry *
53 mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
54 				  const struct net_device *netdev)
55 {
56 	struct mlx5e_rep_bond_slave_entry *found = NULL;
57 	struct mlx5e_rep_bond_slave_entry *cur;
58 
59 	list_for_each_entry(cur, &mdata->slaves_list, list) {
60 		if (cur->netdev == netdev) {
61 			found = cur;
62 			break;
63 		}
64 	}
65 
66 	return found;
67 }
68 
69 static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
70 {
71 	netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
72 		   mdata->metadata_reg_c_0);
73 	list_del(&mdata->list);
74 	mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
75 	WARN_ON(!list_empty(&mdata->slaves_list));
76 	kfree(mdata);
77 }
78 
79 /* This must be called under rtnl_lock */
80 int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
81 			   struct net_device *lag_dev)
82 {
83 	struct mlx5e_rep_bond_slave_entry *s_entry;
84 	struct mlx5e_rep_bond_metadata *mdata;
85 	struct mlx5e_rep_priv *rpriv;
86 	struct mlx5e_priv *priv;
87 	int err;
88 
89 	ASSERT_RTNL();
90 
91 	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
92 	mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
93 	if (!mdata) {
94 		/* First netdev becomes slave, no metadata presents the lag_dev. Create one */
95 		mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
96 		if (!mdata)
97 			return -ENOMEM;
98 
99 		mdata->lag_dev = lag_dev;
100 		mdata->esw = esw;
101 		INIT_LIST_HEAD(&mdata->slaves_list);
102 		mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
103 		if (!mdata->metadata_reg_c_0) {
104 			kfree(mdata);
105 			return -ENOSPC;
106 		}
107 		list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
108 
109 		netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
110 			   mdata->metadata_reg_c_0);
111 	}
112 
113 	s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
114 	if (!s_entry) {
115 		err = -ENOMEM;
116 		goto entry_alloc_err;
117 	}
118 
119 	s_entry->netdev = netdev;
120 	priv = netdev_priv(netdev);
121 	rpriv = priv->ppriv;
122 
123 	err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport,
124 						     mdata->metadata_reg_c_0);
125 	if (err)
126 		goto ingress_err;
127 
128 	mdata->slaves++;
129 	list_add_tail(&s_entry->list, &mdata->slaves_list);
130 	netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
131 		   rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
132 
133 	return 0;
134 
135 ingress_err:
136 	kfree(s_entry);
137 entry_alloc_err:
138 	if (!mdata->slaves)
139 		mlx5e_rep_bond_metadata_release(mdata);
140 	return err;
141 }
142 
143 /* This must be called under rtnl_lock */
144 void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
145 			    const struct net_device *netdev,
146 			    const struct net_device *lag_dev)
147 {
148 	struct mlx5e_rep_bond_slave_entry *s_entry;
149 	struct mlx5e_rep_bond_metadata *mdata;
150 	struct mlx5e_rep_priv *rpriv;
151 	struct mlx5e_priv *priv;
152 
153 	ASSERT_RTNL();
154 
155 	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
156 	mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
157 	if (!mdata)
158 		return;
159 
160 	s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
161 	if (!s_entry)
162 		return;
163 
164 	priv = netdev_priv(netdev);
165 	rpriv = priv->ppriv;
166 
167 	/* Reset bond_metadata to zero first then reset all ingress/egress
168 	 * acls and rx rules of unslave representor's vport
169 	 */
170 	mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
171 	mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
172 	mlx5e_rep_bond_update(priv, false);
173 
174 	list_del(&s_entry->list);
175 
176 	netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
177 		   rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
178 
179 	if (--mdata->slaves == 0)
180 		mlx5e_rep_bond_metadata_release(mdata);
181 	kfree(s_entry);
182 }
183 
184 static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
185 {
186 	struct mlx5e_rep_priv *rpriv;
187 	struct mlx5e_priv *priv;
188 
189 	/* A given netdev is not a representor or not a slave of LAG configuration */
190 	if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
191 		return false;
192 
193 	priv = netdev_priv(netdev);
194 	rpriv = priv->ppriv;
195 
196 	/* Egress acl forward to vport is supported only non-uplink representor */
197 	return rpriv->rep->vport != MLX5_VPORT_UPLINK;
198 }
199 
200 static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
201 {
202 	struct netdev_notifier_changelowerstate_info *info;
203 	struct netdev_lag_lower_state_info *lag_info;
204 	struct mlx5e_rep_priv *rpriv;
205 	struct net_device *lag_dev;
206 	struct mlx5e_priv *priv;
207 	struct list_head *iter;
208 	struct net_device *dev;
209 	u16 acl_vport_num;
210 	u16 fwd_vport_num;
211 	int err;
212 
213 	if (!mlx5e_rep_is_lag_netdev(netdev))
214 		return;
215 
216 	info = ptr;
217 	lag_info = info->lower_state_info;
218 	/* This is not an event of a representor becoming active slave */
219 	if (!lag_info->tx_enabled)
220 		return;
221 
222 	priv = netdev_priv(netdev);
223 	rpriv = priv->ppriv;
224 	fwd_vport_num = rpriv->rep->vport;
225 	lag_dev = netdev_master_upper_dev_get(netdev);
226 	if (!lag_dev)
227 		return;
228 
229 	netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
230 		   lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
231 
232 	/* Point everyone's egress acl to the vport of the active representor */
233 	netdev_for_each_lower_dev(lag_dev, dev, iter) {
234 		priv = netdev_priv(dev);
235 		rpriv = priv->ppriv;
236 		acl_vport_num = rpriv->rep->vport;
237 		if (acl_vport_num != fwd_vport_num) {
238 			/* Only single rx_rule for unique bond_metadata should be
239 			 * present, delete it if it's saved as passive vport's
240 			 * rx_rule with destination as passive vport's root_ft
241 			 */
242 			mlx5e_rep_bond_update(priv, true);
243 			err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
244 							     fwd_vport_num,
245 							     acl_vport_num);
246 			if (err)
247 				netdev_warn(dev,
248 					    "configure slave vport(%d) egress fwd, err(%d)",
249 					    acl_vport_num, err);
250 		}
251 	}
252 
253 	/* Insert new rx_rule for unique bond_metadata, save it as active vport's
254 	 * rx_rule with new destination as active vport's root_ft
255 	 */
256 	err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
257 	if (err)
258 		netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
259 			    fwd_vport_num, err);
260 }
261 
262 static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
263 {
264 	struct netdev_notifier_changeupper_info *info = ptr;
265 	struct mlx5e_rep_priv *rpriv;
266 	struct net_device *lag_dev;
267 	struct mlx5e_priv *priv;
268 
269 	if (!mlx5e_rep_is_lag_netdev(netdev))
270 		return;
271 
272 	priv = netdev_priv(netdev);
273 	rpriv = priv->ppriv;
274 	lag_dev = info->upper_dev;
275 
276 	netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
277 		   info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
278 
279 	if (info->linking)
280 		mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
281 	else
282 		mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
283 }
284 
285 /* Bond device of representors and netdev events are used here in specific way
286  * to support eswitch vports bonding and to perform failover of eswitch vport
287  * by modifying the vport's egress acl of lower dev representors. Thus this
288  * also change the traditional behavior of lower dev under bond device.
289  * All non-representor netdevs or representors of other vendors as lower dev
290  * of bond device are not supported.
291  */
292 static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
293 				       unsigned long event, void *ptr)
294 {
295 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
296 
297 	switch (event) {
298 	case NETDEV_CHANGELOWERSTATE:
299 		mlx5e_rep_changelowerstate_event(netdev, ptr);
300 		break;
301 	case NETDEV_CHANGEUPPER:
302 		mlx5e_rep_changeupper_event(netdev, ptr);
303 		break;
304 	}
305 	return NOTIFY_DONE;
306 }
307 
308 /* If HW support eswitch vports bonding, register a specific notifier to
309  * handle it when two or more representors are bonded
310  */
311 int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
312 {
313 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
314 	struct net_device *netdev = rpriv->netdev;
315 	struct mlx5e_priv *priv;
316 	int ret = 0;
317 
318 	priv = netdev_priv(netdev);
319 	if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
320 		goto out;
321 
322 	uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
323 	if (!uplink_priv->bond) {
324 		ret = -ENOMEM;
325 		goto out;
326 	}
327 
328 	INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
329 	uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
330 	ret = register_netdevice_notifier_dev_net(netdev,
331 						  &uplink_priv->bond->nb,
332 						  &uplink_priv->bond->nn);
333 	if (ret) {
334 		netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
335 		kvfree(uplink_priv->bond);
336 		uplink_priv->bond = NULL;
337 	}
338 
339 out:
340 	return ret;
341 }
342 
343 void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
344 {
345 	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
346 
347 	if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
348 	    !rpriv->uplink_priv.bond)
349 		return;
350 
351 	unregister_netdevice_notifier_dev_net(rpriv->netdev,
352 					      &rpriv->uplink_priv.bond->nb,
353 					      &rpriv->uplink_priv.bond->nn);
354 	kvfree(rpriv->uplink_priv.bond);
355 }
356