1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag/lag.h"
7 #include "lag/mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/mlx5.h"
11 
12 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
13 {
14 	return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
15 }
16 
17 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
18 {
19 	if (!mlx5_lag_is_ready(ldev))
20 		return false;
21 
22 	if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
23 		return false;
24 
25 	return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
26 					 ldev->pf[MLX5_LAG_P2].dev);
27 }
28 
29 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
30 {
31 	struct mlx5_lag *ldev;
32 	bool res;
33 
34 	ldev = mlx5_lag_dev(dev);
35 	res  = ldev && __mlx5_lag_is_multipath(ldev);
36 
37 	return res;
38 }
39 
40 /**
41  * mlx5_lag_set_port_affinity
42  *
43  * @ldev: lag device
44  * @port:
45  *     0 - set normal affinity.
46  *     1 - set affinity to port 1.
47  *     2 - set affinity to port 2.
48  *
49  **/
50 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
51 				       enum mlx5_lag_port_affinity port)
52 {
53 	struct lag_tracker tracker;
54 
55 	if (!__mlx5_lag_is_multipath(ldev))
56 		return;
57 
58 	switch (port) {
59 	case MLX5_LAG_NORMAL_AFFINITY:
60 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
61 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
62 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
63 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
64 		break;
65 	case MLX5_LAG_P1_AFFINITY:
66 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
67 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
68 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
69 		tracker.netdev_state[MLX5_LAG_P2].link_up = false;
70 		break;
71 	case MLX5_LAG_P2_AFFINITY:
72 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
73 		tracker.netdev_state[MLX5_LAG_P1].link_up = false;
74 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
75 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
76 		break;
77 	default:
78 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
79 			       "Invalid affinity port %d", port);
80 		return;
81 	}
82 
83 	if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
84 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
85 					 MLX5_DEV_EVENT_PORT_AFFINITY,
86 					 (void *)0);
87 
88 	if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
89 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
90 					 MLX5_DEV_EVENT_PORT_AFFINITY,
91 					 (void *)0);
92 
93 	mlx5_modify_lag(ldev, &tracker);
94 }
95 
96 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
97 {
98 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
99 
100 	flush_workqueue(mp->wq);
101 }
102 
103 struct mlx5_fib_event_work {
104 	struct work_struct work;
105 	struct mlx5_lag *ldev;
106 	unsigned long event;
107 	union {
108 		struct fib_entry_notifier_info fen_info;
109 		struct fib_nh_notifier_info fnh_info;
110 	};
111 };
112 
113 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
114 				     unsigned long event,
115 				     struct fib_info *fi)
116 {
117 	struct lag_mp *mp = &ldev->lag_mp;
118 	struct fib_nh *fib_nh0, *fib_nh1;
119 	unsigned int nhs;
120 
121 	/* Handle delete event */
122 	if (event == FIB_EVENT_ENTRY_DEL) {
123 		/* stop track */
124 		if (mp->mfi == fi)
125 			mp->mfi = NULL;
126 		return;
127 	}
128 
129 	/* Handle add/replace event */
130 	nhs = fib_info_num_path(fi);
131 	if (nhs == 1) {
132 		if (__mlx5_lag_is_active(ldev)) {
133 			struct fib_nh *nh = fib_info_nh(fi, 0);
134 			struct net_device *nh_dev = nh->fib_nh_dev;
135 			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
136 
137 			if (i < 0)
138 				i = MLX5_LAG_NORMAL_AFFINITY;
139 			else
140 				++i;
141 
142 			mlx5_lag_set_port_affinity(ldev, i);
143 		}
144 		return;
145 	}
146 
147 	if (nhs != 2)
148 		return;
149 
150 	/* Verify next hops are ports of the same hca */
151 	fib_nh0 = fib_info_nh(fi, 0);
152 	fib_nh1 = fib_info_nh(fi, 1);
153 	if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
154 	      fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
155 	    !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
156 	      fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
157 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
158 			       "Multipath offload require two ports of the same HCA\n");
159 		return;
160 	}
161 
162 	/* First time we see multipath route */
163 	if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
164 		struct lag_tracker tracker;
165 
166 		tracker = ldev->tracker;
167 		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
168 	}
169 
170 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
171 	mp->mfi = fi;
172 }
173 
174 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
175 				       unsigned long event,
176 				       struct fib_nh *fib_nh,
177 				       struct fib_info *fi)
178 {
179 	struct lag_mp *mp = &ldev->lag_mp;
180 
181 	/* Check the nh event is related to the route */
182 	if (!mp->mfi || mp->mfi != fi)
183 		return;
184 
185 	/* nh added/removed */
186 	if (event == FIB_EVENT_NH_DEL) {
187 		int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
188 
189 		if (i >= 0) {
190 			i = (i + 1) % 2 + 1; /* peer port */
191 			mlx5_lag_set_port_affinity(ldev, i);
192 		}
193 	} else if (event == FIB_EVENT_NH_ADD &&
194 		   fib_info_num_path(fi) == 2) {
195 		mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
196 	}
197 }
198 
199 static void mlx5_lag_fib_update(struct work_struct *work)
200 {
201 	struct mlx5_fib_event_work *fib_work =
202 		container_of(work, struct mlx5_fib_event_work, work);
203 	struct mlx5_lag *ldev = fib_work->ldev;
204 	struct fib_nh *fib_nh;
205 
206 	/* Protect internal structures from changes */
207 	rtnl_lock();
208 	switch (fib_work->event) {
209 	case FIB_EVENT_ENTRY_REPLACE:
210 	case FIB_EVENT_ENTRY_DEL:
211 		mlx5_lag_fib_route_event(ldev, fib_work->event,
212 					 fib_work->fen_info.fi);
213 		fib_info_put(fib_work->fen_info.fi);
214 		break;
215 	case FIB_EVENT_NH_ADD:
216 	case FIB_EVENT_NH_DEL:
217 		fib_nh = fib_work->fnh_info.fib_nh;
218 		mlx5_lag_fib_nexthop_event(ldev,
219 					   fib_work->event,
220 					   fib_work->fnh_info.fib_nh,
221 					   fib_nh->nh_parent);
222 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
223 		break;
224 	}
225 
226 	rtnl_unlock();
227 	kfree(fib_work);
228 }
229 
230 static struct mlx5_fib_event_work *
231 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
232 {
233 	struct mlx5_fib_event_work *fib_work;
234 
235 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
236 	if (WARN_ON(!fib_work))
237 		return NULL;
238 
239 	INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
240 	fib_work->ldev = ldev;
241 	fib_work->event = event;
242 
243 	return fib_work;
244 }
245 
246 static int mlx5_lag_fib_event(struct notifier_block *nb,
247 			      unsigned long event,
248 			      void *ptr)
249 {
250 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
251 	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
252 	struct fib_notifier_info *info = ptr;
253 	struct mlx5_fib_event_work *fib_work;
254 	struct fib_entry_notifier_info *fen_info;
255 	struct fib_nh_notifier_info *fnh_info;
256 	struct net_device *fib_dev;
257 	struct fib_info *fi;
258 
259 	if (info->family != AF_INET)
260 		return NOTIFY_DONE;
261 
262 	if (!mlx5_lag_multipath_check_prereq(ldev))
263 		return NOTIFY_DONE;
264 
265 	switch (event) {
266 	case FIB_EVENT_ENTRY_REPLACE:
267 	case FIB_EVENT_ENTRY_DEL:
268 		fen_info = container_of(info, struct fib_entry_notifier_info,
269 					info);
270 		fi = fen_info->fi;
271 		if (fi->nh)
272 			return NOTIFY_DONE;
273 		fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
274 		if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
275 		    fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
276 			return NOTIFY_DONE;
277 		}
278 		fib_work = mlx5_lag_init_fib_work(ldev, event);
279 		if (!fib_work)
280 			return NOTIFY_DONE;
281 		fib_work->fen_info = *fen_info;
282 		/* Take reference on fib_info to prevent it from being
283 		 * freed while work is queued. Release it afterwards.
284 		 */
285 		fib_info_hold(fib_work->fen_info.fi);
286 		break;
287 	case FIB_EVENT_NH_ADD:
288 	case FIB_EVENT_NH_DEL:
289 		fnh_info = container_of(info, struct fib_nh_notifier_info,
290 					info);
291 		fib_work = mlx5_lag_init_fib_work(ldev, event);
292 		if (!fib_work)
293 			return NOTIFY_DONE;
294 		fib_work->fnh_info = *fnh_info;
295 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
296 		break;
297 	default:
298 		return NOTIFY_DONE;
299 	}
300 
301 	queue_work(mp->wq, &fib_work->work);
302 
303 	return NOTIFY_DONE;
304 }
305 
306 void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
307 {
308 	/* Clear mfi, as it might become stale when a route delete event
309 	 * has been missed, see mlx5_lag_fib_route_event().
310 	 */
311 	ldev->lag_mp.mfi = NULL;
312 }
313 
314 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
315 {
316 	struct lag_mp *mp = &ldev->lag_mp;
317 	int err;
318 
319 	/* always clear mfi, as it might become stale when a route delete event
320 	 * has been missed
321 	 */
322 	mp->mfi = NULL;
323 
324 	if (mp->fib_nb.notifier_call)
325 		return 0;
326 
327 	mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
328 	if (!mp->wq)
329 		return -ENOMEM;
330 
331 	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
332 	err = register_fib_notifier(&init_net, &mp->fib_nb,
333 				    mlx5_lag_fib_event_flush, NULL);
334 	if (err) {
335 		destroy_workqueue(mp->wq);
336 		mp->fib_nb.notifier_call = NULL;
337 	}
338 
339 	return err;
340 }
341 
342 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
343 {
344 	struct lag_mp *mp = &ldev->lag_mp;
345 
346 	if (!mp->fib_nb.notifier_call)
347 		return;
348 
349 	unregister_fib_notifier(&init_net, &mp->fib_nb);
350 	destroy_workqueue(mp->wq);
351 	mp->fib_nb.notifier_call = NULL;
352 	mp->mfi = NULL;
353 }
354