1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag/lag.h"
7 #include "lag/mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/mlx5.h"
11 
12 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
13 {
14 	return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
15 }
16 
17 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
18 {
19 	if (!mlx5_lag_is_ready(ldev))
20 		return false;
21 
22 	if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
23 		return false;
24 
25 	return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
26 					 ldev->pf[MLX5_LAG_P2].dev);
27 }
28 
29 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
30 {
31 	struct mlx5_lag *ldev = mlx5_lag_dev(dev);
32 
33 	return ldev && __mlx5_lag_is_multipath(ldev);
34 }
35 
36 /**
37  * mlx5_lag_set_port_affinity
38  *
39  * @ldev: lag device
40  * @port:
41  *     0 - set normal affinity.
42  *     1 - set affinity to port 1.
43  *     2 - set affinity to port 2.
44  *
45  **/
46 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
47 				       enum mlx5_lag_port_affinity port)
48 {
49 	struct lag_tracker tracker = {};
50 
51 	if (!__mlx5_lag_is_multipath(ldev))
52 		return;
53 
54 	switch (port) {
55 	case MLX5_LAG_NORMAL_AFFINITY:
56 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
57 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
58 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
59 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
60 		break;
61 	case MLX5_LAG_P1_AFFINITY:
62 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
63 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
64 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
65 		tracker.netdev_state[MLX5_LAG_P2].link_up = false;
66 		break;
67 	case MLX5_LAG_P2_AFFINITY:
68 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
69 		tracker.netdev_state[MLX5_LAG_P1].link_up = false;
70 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
71 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
72 		break;
73 	default:
74 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
75 			       "Invalid affinity port %d", port);
76 		return;
77 	}
78 
79 	if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
80 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
81 					 MLX5_DEV_EVENT_PORT_AFFINITY,
82 					 (void *)0);
83 
84 	if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
85 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
86 					 MLX5_DEV_EVENT_PORT_AFFINITY,
87 					 (void *)0);
88 
89 	mlx5_modify_lag(ldev, &tracker);
90 }
91 
92 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
93 {
94 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
95 
96 	flush_workqueue(mp->wq);
97 }
98 
99 static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
100 {
101 	mp->fib.mfi = fi;
102 	mp->fib.priority = fi->fib_priority;
103 	mp->fib.dst = dst;
104 	mp->fib.dst_len = dst_len;
105 }
106 
107 struct mlx5_fib_event_work {
108 	struct work_struct work;
109 	struct mlx5_lag *ldev;
110 	unsigned long event;
111 	union {
112 		struct fib_entry_notifier_info fen_info;
113 		struct fib_nh_notifier_info fnh_info;
114 	};
115 };
116 
117 static struct net_device*
118 mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev,
119 			  struct fib_info *fi,
120 			  struct net_device *current_dev)
121 {
122 	struct net_device *fib_dev;
123 	int i, ldev_idx, nhs;
124 
125 	nhs = fib_info_num_path(fi);
126 	i = 0;
127 	if (current_dev) {
128 		for (; i < nhs; i++) {
129 			fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
130 			if (fib_dev == current_dev) {
131 				i++;
132 				break;
133 			}
134 		}
135 	}
136 	for (; i < nhs; i++) {
137 		fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
138 		ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev);
139 		if (ldev_idx >= 0)
140 			return ldev->pf[ldev_idx].netdev;
141 	}
142 
143 	return NULL;
144 }
145 
146 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
147 				     struct fib_entry_notifier_info *fen_info)
148 {
149 	struct net_device *nh_dev0, *nh_dev1;
150 	struct fib_info *fi = fen_info->fi;
151 	struct lag_mp *mp = &ldev->lag_mp;
152 
153 	/* Handle delete event */
154 	if (event == FIB_EVENT_ENTRY_DEL) {
155 		/* stop track */
156 		if (mp->fib.mfi == fi)
157 			mp->fib.mfi = NULL;
158 		return;
159 	}
160 
161 	/* Handle multipath entry with lower priority value */
162 	if (mp->fib.mfi && mp->fib.mfi != fi &&
163 	    (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
164 	    fi->fib_priority >= mp->fib.priority)
165 		return;
166 
167 	nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL);
168 	nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0);
169 
170 	/* Handle add/replace event */
171 	if (!nh_dev0) {
172 		if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len)
173 			mp->fib.mfi = NULL;
174 		return;
175 	}
176 
177 	if (nh_dev0 == nh_dev1) {
178 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
179 			       "Multipath offload doesn't support routes with multiple nexthops of the same device");
180 		return;
181 	}
182 
183 	if (!nh_dev1) {
184 		if (__mlx5_lag_is_active(ldev)) {
185 			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0);
186 
187 			i++;
188 			mlx5_lag_set_port_affinity(ldev, i);
189 			mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
190 		}
191 
192 		return;
193 	}
194 
195 	/* First time we see multipath route */
196 	if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
197 		struct lag_tracker tracker;
198 
199 		tracker = ldev->tracker;
200 		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
201 	}
202 
203 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
204 	mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
205 }
206 
207 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
208 				       unsigned long event,
209 				       struct fib_nh *fib_nh,
210 				       struct fib_info *fi)
211 {
212 	struct lag_mp *mp = &ldev->lag_mp;
213 
214 	/* Check the nh event is related to the route */
215 	if (!mp->fib.mfi || mp->fib.mfi != fi)
216 		return;
217 
218 	/* nh added/removed */
219 	if (event == FIB_EVENT_NH_DEL) {
220 		int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
221 
222 		if (i >= 0) {
223 			i = (i + 1) % 2 + 1; /* peer port */
224 			mlx5_lag_set_port_affinity(ldev, i);
225 		}
226 	} else if (event == FIB_EVENT_NH_ADD &&
227 		   fib_info_num_path(fi) == 2) {
228 		mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
229 	}
230 }
231 
232 static void mlx5_lag_fib_update(struct work_struct *work)
233 {
234 	struct mlx5_fib_event_work *fib_work =
235 		container_of(work, struct mlx5_fib_event_work, work);
236 	struct mlx5_lag *ldev = fib_work->ldev;
237 	struct fib_nh *fib_nh;
238 
239 	/* Protect internal structures from changes */
240 	rtnl_lock();
241 	switch (fib_work->event) {
242 	case FIB_EVENT_ENTRY_REPLACE:
243 	case FIB_EVENT_ENTRY_DEL:
244 		mlx5_lag_fib_route_event(ldev, fib_work->event,
245 					 &fib_work->fen_info);
246 		fib_info_put(fib_work->fen_info.fi);
247 		break;
248 	case FIB_EVENT_NH_ADD:
249 	case FIB_EVENT_NH_DEL:
250 		fib_nh = fib_work->fnh_info.fib_nh;
251 		mlx5_lag_fib_nexthop_event(ldev,
252 					   fib_work->event,
253 					   fib_work->fnh_info.fib_nh,
254 					   fib_nh->nh_parent);
255 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
256 		break;
257 	}
258 
259 	rtnl_unlock();
260 	kfree(fib_work);
261 }
262 
263 static struct mlx5_fib_event_work *
264 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
265 {
266 	struct mlx5_fib_event_work *fib_work;
267 
268 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
269 	if (WARN_ON(!fib_work))
270 		return NULL;
271 
272 	INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
273 	fib_work->ldev = ldev;
274 	fib_work->event = event;
275 
276 	return fib_work;
277 }
278 
279 static int mlx5_lag_fib_event(struct notifier_block *nb,
280 			      unsigned long event,
281 			      void *ptr)
282 {
283 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
284 	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
285 	struct fib_notifier_info *info = ptr;
286 	struct mlx5_fib_event_work *fib_work;
287 	struct fib_entry_notifier_info *fen_info;
288 	struct fib_nh_notifier_info *fnh_info;
289 	struct fib_info *fi;
290 
291 	if (info->family != AF_INET)
292 		return NOTIFY_DONE;
293 
294 	if (!mlx5_lag_multipath_check_prereq(ldev))
295 		return NOTIFY_DONE;
296 
297 	switch (event) {
298 	case FIB_EVENT_ENTRY_REPLACE:
299 	case FIB_EVENT_ENTRY_DEL:
300 		fen_info = container_of(info, struct fib_entry_notifier_info,
301 					info);
302 		fi = fen_info->fi;
303 		if (fi->nh)
304 			return NOTIFY_DONE;
305 
306 		fib_work = mlx5_lag_init_fib_work(ldev, event);
307 		if (!fib_work)
308 			return NOTIFY_DONE;
309 		fib_work->fen_info = *fen_info;
310 		/* Take reference on fib_info to prevent it from being
311 		 * freed while work is queued. Release it afterwards.
312 		 */
313 		fib_info_hold(fib_work->fen_info.fi);
314 		break;
315 	case FIB_EVENT_NH_ADD:
316 	case FIB_EVENT_NH_DEL:
317 		fnh_info = container_of(info, struct fib_nh_notifier_info,
318 					info);
319 		fib_work = mlx5_lag_init_fib_work(ldev, event);
320 		if (!fib_work)
321 			return NOTIFY_DONE;
322 		fib_work->fnh_info = *fnh_info;
323 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
324 		break;
325 	default:
326 		return NOTIFY_DONE;
327 	}
328 
329 	queue_work(mp->wq, &fib_work->work);
330 
331 	return NOTIFY_DONE;
332 }
333 
334 void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
335 {
336 	/* Clear mfi, as it might become stale when a route delete event
337 	 * has been missed, see mlx5_lag_fib_route_event().
338 	 */
339 	ldev->lag_mp.fib.mfi = NULL;
340 }
341 
342 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
343 {
344 	struct lag_mp *mp = &ldev->lag_mp;
345 	int err;
346 
347 	/* always clear mfi, as it might become stale when a route delete event
348 	 * has been missed
349 	 */
350 	mp->fib.mfi = NULL;
351 
352 	if (mp->fib_nb.notifier_call)
353 		return 0;
354 
355 	mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
356 	if (!mp->wq)
357 		return -ENOMEM;
358 
359 	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
360 	err = register_fib_notifier(&init_net, &mp->fib_nb,
361 				    mlx5_lag_fib_event_flush, NULL);
362 	if (err) {
363 		destroy_workqueue(mp->wq);
364 		mp->fib_nb.notifier_call = NULL;
365 	}
366 
367 	return err;
368 }
369 
370 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
371 {
372 	struct lag_mp *mp = &ldev->lag_mp;
373 
374 	if (!mp->fib_nb.notifier_call)
375 		return;
376 
377 	unregister_fib_notifier(&init_net, &mp->fib_nb);
378 	destroy_workqueue(mp->wq);
379 	mp->fib_nb.notifier_call = NULL;
380 	mp->fib.mfi = NULL;
381 }
382