1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag/lag.h"
7 #include "lag/mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/mlx5.h"
11 
12 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
13 {
14 	return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
15 }
16 
17 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
18 {
19 	if (!mlx5_lag_is_ready(ldev))
20 		return false;
21 
22 	if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
23 		return false;
24 
25 	return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
26 					 ldev->pf[MLX5_LAG_P2].dev);
27 }
28 
29 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
30 {
31 	struct mlx5_lag *ldev;
32 	bool res;
33 
34 	ldev = mlx5_lag_dev(dev);
35 	res  = ldev && __mlx5_lag_is_multipath(ldev);
36 
37 	return res;
38 }
39 
40 /**
41  * mlx5_lag_set_port_affinity
42  *
43  * @ldev: lag device
44  * @port:
45  *     0 - set normal affinity.
46  *     1 - set affinity to port 1.
47  *     2 - set affinity to port 2.
48  *
49  **/
50 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
51 				       enum mlx5_lag_port_affinity port)
52 {
53 	struct lag_tracker tracker = {};
54 
55 	if (!__mlx5_lag_is_multipath(ldev))
56 		return;
57 
58 	switch (port) {
59 	case MLX5_LAG_NORMAL_AFFINITY:
60 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
61 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
62 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
63 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
64 		break;
65 	case MLX5_LAG_P1_AFFINITY:
66 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
67 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
68 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
69 		tracker.netdev_state[MLX5_LAG_P2].link_up = false;
70 		break;
71 	case MLX5_LAG_P2_AFFINITY:
72 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
73 		tracker.netdev_state[MLX5_LAG_P1].link_up = false;
74 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
75 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
76 		break;
77 	default:
78 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
79 			       "Invalid affinity port %d", port);
80 		return;
81 	}
82 
83 	if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
84 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
85 					 MLX5_DEV_EVENT_PORT_AFFINITY,
86 					 (void *)0);
87 
88 	if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
89 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
90 					 MLX5_DEV_EVENT_PORT_AFFINITY,
91 					 (void *)0);
92 
93 	mlx5_modify_lag(ldev, &tracker);
94 }
95 
96 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
97 {
98 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
99 
100 	flush_workqueue(mp->wq);
101 }
102 
103 static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
104 {
105 	mp->fib.mfi = fi;
106 	mp->fib.priority = fi->fib_priority;
107 	mp->fib.dst = dst;
108 	mp->fib.dst_len = dst_len;
109 }
110 
111 struct mlx5_fib_event_work {
112 	struct work_struct work;
113 	struct mlx5_lag *ldev;
114 	unsigned long event;
115 	union {
116 		struct fib_entry_notifier_info fen_info;
117 		struct fib_nh_notifier_info fnh_info;
118 	};
119 };
120 
121 static struct net_device*
122 mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev,
123 			  struct fib_info *fi,
124 			  struct net_device *current_dev)
125 {
126 	struct net_device *fib_dev;
127 	int i, ldev_idx, nhs;
128 
129 	nhs = fib_info_num_path(fi);
130 	i = 0;
131 	if (current_dev) {
132 		for (; i < nhs; i++) {
133 			fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
134 			if (fib_dev == current_dev) {
135 				i++;
136 				break;
137 			}
138 		}
139 	}
140 	for (; i < nhs; i++) {
141 		fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
142 		ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev);
143 		if (ldev_idx >= 0)
144 			return ldev->pf[ldev_idx].netdev;
145 	}
146 
147 	return NULL;
148 }
149 
150 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
151 				     struct fib_entry_notifier_info *fen_info)
152 {
153 	struct net_device *nh_dev0, *nh_dev1;
154 	struct fib_info *fi = fen_info->fi;
155 	struct lag_mp *mp = &ldev->lag_mp;
156 
157 	/* Handle delete event */
158 	if (event == FIB_EVENT_ENTRY_DEL) {
159 		/* stop track */
160 		if (mp->fib.mfi == fi)
161 			mp->fib.mfi = NULL;
162 		return;
163 	}
164 
165 	/* Handle multipath entry with lower priority value */
166 	if (mp->fib.mfi && mp->fib.mfi != fi &&
167 	    (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
168 	    fi->fib_priority >= mp->fib.priority)
169 		return;
170 
171 	nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL);
172 	nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0);
173 
174 	/* Handle add/replace event */
175 	if (!nh_dev0) {
176 		if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len)
177 			mp->fib.mfi = NULL;
178 		return;
179 	}
180 
181 	if (nh_dev0 == nh_dev1) {
182 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
183 			       "Multipath offload doesn't support routes with multiple nexthops of the same device");
184 		return;
185 	}
186 
187 	if (!nh_dev1) {
188 		if (__mlx5_lag_is_active(ldev)) {
189 			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0);
190 
191 			i++;
192 			mlx5_lag_set_port_affinity(ldev, i);
193 			mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
194 		}
195 
196 		return;
197 	}
198 
199 	/* First time we see multipath route */
200 	if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
201 		struct lag_tracker tracker;
202 
203 		tracker = ldev->tracker;
204 		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
205 	}
206 
207 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
208 	mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
209 }
210 
211 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
212 				       unsigned long event,
213 				       struct fib_nh *fib_nh,
214 				       struct fib_info *fi)
215 {
216 	struct lag_mp *mp = &ldev->lag_mp;
217 
218 	/* Check the nh event is related to the route */
219 	if (!mp->fib.mfi || mp->fib.mfi != fi)
220 		return;
221 
222 	/* nh added/removed */
223 	if (event == FIB_EVENT_NH_DEL) {
224 		int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
225 
226 		if (i >= 0) {
227 			i = (i + 1) % 2 + 1; /* peer port */
228 			mlx5_lag_set_port_affinity(ldev, i);
229 		}
230 	} else if (event == FIB_EVENT_NH_ADD &&
231 		   fib_info_num_path(fi) == 2) {
232 		mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
233 	}
234 }
235 
236 static void mlx5_lag_fib_update(struct work_struct *work)
237 {
238 	struct mlx5_fib_event_work *fib_work =
239 		container_of(work, struct mlx5_fib_event_work, work);
240 	struct mlx5_lag *ldev = fib_work->ldev;
241 	struct fib_nh *fib_nh;
242 
243 	/* Protect internal structures from changes */
244 	rtnl_lock();
245 	switch (fib_work->event) {
246 	case FIB_EVENT_ENTRY_REPLACE:
247 	case FIB_EVENT_ENTRY_DEL:
248 		mlx5_lag_fib_route_event(ldev, fib_work->event,
249 					 &fib_work->fen_info);
250 		fib_info_put(fib_work->fen_info.fi);
251 		break;
252 	case FIB_EVENT_NH_ADD:
253 	case FIB_EVENT_NH_DEL:
254 		fib_nh = fib_work->fnh_info.fib_nh;
255 		mlx5_lag_fib_nexthop_event(ldev,
256 					   fib_work->event,
257 					   fib_work->fnh_info.fib_nh,
258 					   fib_nh->nh_parent);
259 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
260 		break;
261 	}
262 
263 	rtnl_unlock();
264 	kfree(fib_work);
265 }
266 
267 static struct mlx5_fib_event_work *
268 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
269 {
270 	struct mlx5_fib_event_work *fib_work;
271 
272 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
273 	if (WARN_ON(!fib_work))
274 		return NULL;
275 
276 	INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
277 	fib_work->ldev = ldev;
278 	fib_work->event = event;
279 
280 	return fib_work;
281 }
282 
283 static int mlx5_lag_fib_event(struct notifier_block *nb,
284 			      unsigned long event,
285 			      void *ptr)
286 {
287 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
288 	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
289 	struct fib_notifier_info *info = ptr;
290 	struct mlx5_fib_event_work *fib_work;
291 	struct fib_entry_notifier_info *fen_info;
292 	struct fib_nh_notifier_info *fnh_info;
293 	struct fib_info *fi;
294 
295 	if (info->family != AF_INET)
296 		return NOTIFY_DONE;
297 
298 	if (!mlx5_lag_multipath_check_prereq(ldev))
299 		return NOTIFY_DONE;
300 
301 	switch (event) {
302 	case FIB_EVENT_ENTRY_REPLACE:
303 	case FIB_EVENT_ENTRY_DEL:
304 		fen_info = container_of(info, struct fib_entry_notifier_info,
305 					info);
306 		fi = fen_info->fi;
307 		if (fi->nh)
308 			return NOTIFY_DONE;
309 
310 		fib_work = mlx5_lag_init_fib_work(ldev, event);
311 		if (!fib_work)
312 			return NOTIFY_DONE;
313 		fib_work->fen_info = *fen_info;
314 		/* Take reference on fib_info to prevent it from being
315 		 * freed while work is queued. Release it afterwards.
316 		 */
317 		fib_info_hold(fib_work->fen_info.fi);
318 		break;
319 	case FIB_EVENT_NH_ADD:
320 	case FIB_EVENT_NH_DEL:
321 		fnh_info = container_of(info, struct fib_nh_notifier_info,
322 					info);
323 		fib_work = mlx5_lag_init_fib_work(ldev, event);
324 		if (!fib_work)
325 			return NOTIFY_DONE;
326 		fib_work->fnh_info = *fnh_info;
327 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
328 		break;
329 	default:
330 		return NOTIFY_DONE;
331 	}
332 
333 	queue_work(mp->wq, &fib_work->work);
334 
335 	return NOTIFY_DONE;
336 }
337 
338 void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
339 {
340 	/* Clear mfi, as it might become stale when a route delete event
341 	 * has been missed, see mlx5_lag_fib_route_event().
342 	 */
343 	ldev->lag_mp.fib.mfi = NULL;
344 }
345 
346 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
347 {
348 	struct lag_mp *mp = &ldev->lag_mp;
349 	int err;
350 
351 	/* always clear mfi, as it might become stale when a route delete event
352 	 * has been missed
353 	 */
354 	mp->fib.mfi = NULL;
355 
356 	if (mp->fib_nb.notifier_call)
357 		return 0;
358 
359 	mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
360 	if (!mp->wq)
361 		return -ENOMEM;
362 
363 	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
364 	err = register_fib_notifier(&init_net, &mp->fib_nb,
365 				    mlx5_lag_fib_event_flush, NULL);
366 	if (err) {
367 		destroy_workqueue(mp->wq);
368 		mp->fib_nb.notifier_call = NULL;
369 	}
370 
371 	return err;
372 }
373 
374 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
375 {
376 	struct lag_mp *mp = &ldev->lag_mp;
377 
378 	if (!mp->fib_nb.notifier_call)
379 		return;
380 
381 	unregister_fib_notifier(&init_net, &mp->fib_nb);
382 	destroy_workqueue(mp->wq);
383 	mp->fib_nb.notifier_call = NULL;
384 	mp->fib.mfi = NULL;
385 }
386