1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag/lag.h"
7 #include "lag/mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/events.h"
11
__mlx5_lag_is_multipath(struct mlx5_lag * ldev)12 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
13 {
14 return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
15 }
16
17 #define MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS 2
mlx5_lag_multipath_check_prereq(struct mlx5_lag * ldev)18 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
19 {
20 if (!mlx5_lag_is_ready(ldev))
21 return false;
22
23 if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
24 return false;
25
26 if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS)
27 return false;
28
29 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
30 ldev->pf[MLX5_LAG_P2].dev);
31 }
32
mlx5_lag_is_multipath(struct mlx5_core_dev * dev)33 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
34 {
35 struct mlx5_lag *ldev = mlx5_lag_dev(dev);
36
37 return ldev && __mlx5_lag_is_multipath(ldev);
38 }
39
40 /**
41 * mlx5_lag_set_port_affinity
42 *
43 * @ldev: lag device
44 * @port:
45 * 0 - set normal affinity.
46 * 1 - set affinity to port 1.
47 * 2 - set affinity to port 2.
48 *
49 **/
mlx5_lag_set_port_affinity(struct mlx5_lag * ldev,enum mlx5_lag_port_affinity port)50 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
51 enum mlx5_lag_port_affinity port)
52 {
53 struct lag_tracker tracker = {};
54
55 if (!__mlx5_lag_is_multipath(ldev))
56 return;
57
58 switch (port) {
59 case MLX5_LAG_NORMAL_AFFINITY:
60 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
61 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
62 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
63 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
64 break;
65 case MLX5_LAG_P1_AFFINITY:
66 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
67 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
68 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
69 tracker.netdev_state[MLX5_LAG_P2].link_up = false;
70 break;
71 case MLX5_LAG_P2_AFFINITY:
72 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
73 tracker.netdev_state[MLX5_LAG_P1].link_up = false;
74 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
75 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
76 break;
77 default:
78 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
79 "Invalid affinity port %d", port);
80 return;
81 }
82
83 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
84 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
85 MLX5_DEV_EVENT_PORT_AFFINITY,
86 (void *)0);
87
88 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
89 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
90 MLX5_DEV_EVENT_PORT_AFFINITY,
91 (void *)0);
92
93 mlx5_modify_lag(ldev, &tracker);
94 }
95
mlx5_lag_fib_event_flush(struct notifier_block * nb)96 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
97 {
98 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
99
100 flush_workqueue(mp->wq);
101 }
102
mlx5_lag_fib_set(struct lag_mp * mp,struct fib_info * fi,u32 dst,int dst_len)103 static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
104 {
105 mp->fib.mfi = fi;
106 mp->fib.priority = fi->fib_priority;
107 mp->fib.dst = dst;
108 mp->fib.dst_len = dst_len;
109 }
110
111 struct mlx5_fib_event_work {
112 struct work_struct work;
113 struct mlx5_lag *ldev;
114 unsigned long event;
115 union {
116 struct fib_entry_notifier_info fen_info;
117 struct fib_nh_notifier_info fnh_info;
118 };
119 };
120
121 static struct net_device*
mlx5_lag_get_next_fib_dev(struct mlx5_lag * ldev,struct fib_info * fi,struct net_device * current_dev)122 mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev,
123 struct fib_info *fi,
124 struct net_device *current_dev)
125 {
126 struct net_device *fib_dev;
127 int i, ldev_idx, nhs;
128
129 nhs = fib_info_num_path(fi);
130 i = 0;
131 if (current_dev) {
132 for (; i < nhs; i++) {
133 fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
134 if (fib_dev == current_dev) {
135 i++;
136 break;
137 }
138 }
139 }
140 for (; i < nhs; i++) {
141 fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
142 ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev);
143 if (ldev_idx >= 0)
144 return ldev->pf[ldev_idx].netdev;
145 }
146
147 return NULL;
148 }
149
mlx5_lag_fib_route_event(struct mlx5_lag * ldev,unsigned long event,struct fib_entry_notifier_info * fen_info)150 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
151 struct fib_entry_notifier_info *fen_info)
152 {
153 struct net_device *nh_dev0, *nh_dev1;
154 struct fib_info *fi = fen_info->fi;
155 struct lag_mp *mp = &ldev->lag_mp;
156
157 /* Handle delete event */
158 if (event == FIB_EVENT_ENTRY_DEL) {
159 /* stop track */
160 if (mp->fib.mfi == fi)
161 mp->fib.mfi = NULL;
162 return;
163 }
164
165 /* Handle multipath entry with lower priority value */
166 if (mp->fib.mfi && mp->fib.mfi != fi &&
167 (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
168 fi->fib_priority >= mp->fib.priority)
169 return;
170
171 nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL);
172 nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0);
173
174 /* Handle add/replace event */
175 if (!nh_dev0) {
176 if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len)
177 mp->fib.mfi = NULL;
178 return;
179 }
180
181 if (nh_dev0 == nh_dev1) {
182 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
183 "Multipath offload doesn't support routes with multiple nexthops of the same device");
184 return;
185 }
186
187 if (!nh_dev1) {
188 if (__mlx5_lag_is_active(ldev)) {
189 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0);
190
191 i++;
192 mlx5_lag_set_port_affinity(ldev, i);
193 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
194 }
195
196 return;
197 }
198
199 /* First time we see multipath route */
200 if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
201 struct lag_tracker tracker;
202
203 tracker = ldev->tracker;
204 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
205 }
206
207 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
208 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
209 }
210
mlx5_lag_fib_nexthop_event(struct mlx5_lag * ldev,unsigned long event,struct fib_nh * fib_nh,struct fib_info * fi)211 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
212 unsigned long event,
213 struct fib_nh *fib_nh,
214 struct fib_info *fi)
215 {
216 struct lag_mp *mp = &ldev->lag_mp;
217
218 /* Check the nh event is related to the route */
219 if (!mp->fib.mfi || mp->fib.mfi != fi)
220 return;
221
222 /* nh added/removed */
223 if (event == FIB_EVENT_NH_DEL) {
224 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
225
226 if (i >= 0) {
227 i = (i + 1) % 2 + 1; /* peer port */
228 mlx5_lag_set_port_affinity(ldev, i);
229 }
230 } else if (event == FIB_EVENT_NH_ADD &&
231 fib_info_num_path(fi) == 2) {
232 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
233 }
234 }
235
mlx5_lag_fib_update(struct work_struct * work)236 static void mlx5_lag_fib_update(struct work_struct *work)
237 {
238 struct mlx5_fib_event_work *fib_work =
239 container_of(work, struct mlx5_fib_event_work, work);
240 struct mlx5_lag *ldev = fib_work->ldev;
241 struct fib_nh *fib_nh;
242
243 /* Protect internal structures from changes */
244 rtnl_lock();
245 switch (fib_work->event) {
246 case FIB_EVENT_ENTRY_REPLACE:
247 case FIB_EVENT_ENTRY_DEL:
248 mlx5_lag_fib_route_event(ldev, fib_work->event,
249 &fib_work->fen_info);
250 fib_info_put(fib_work->fen_info.fi);
251 break;
252 case FIB_EVENT_NH_ADD:
253 case FIB_EVENT_NH_DEL:
254 fib_nh = fib_work->fnh_info.fib_nh;
255 mlx5_lag_fib_nexthop_event(ldev,
256 fib_work->event,
257 fib_work->fnh_info.fib_nh,
258 fib_nh->nh_parent);
259 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
260 break;
261 }
262
263 rtnl_unlock();
264 kfree(fib_work);
265 }
266
267 static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag * ldev,unsigned long event)268 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
269 {
270 struct mlx5_fib_event_work *fib_work;
271
272 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
273 if (WARN_ON(!fib_work))
274 return NULL;
275
276 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
277 fib_work->ldev = ldev;
278 fib_work->event = event;
279
280 return fib_work;
281 }
282
mlx5_lag_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)283 static int mlx5_lag_fib_event(struct notifier_block *nb,
284 unsigned long event,
285 void *ptr)
286 {
287 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
288 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
289 struct fib_notifier_info *info = ptr;
290 struct mlx5_fib_event_work *fib_work;
291 struct fib_entry_notifier_info *fen_info;
292 struct fib_nh_notifier_info *fnh_info;
293 struct fib_info *fi;
294
295 if (info->family != AF_INET)
296 return NOTIFY_DONE;
297
298 if (!mlx5_lag_multipath_check_prereq(ldev))
299 return NOTIFY_DONE;
300
301 switch (event) {
302 case FIB_EVENT_ENTRY_REPLACE:
303 case FIB_EVENT_ENTRY_DEL:
304 fen_info = container_of(info, struct fib_entry_notifier_info,
305 info);
306 fi = fen_info->fi;
307 if (fi->nh)
308 return NOTIFY_DONE;
309
310 fib_work = mlx5_lag_init_fib_work(ldev, event);
311 if (!fib_work)
312 return NOTIFY_DONE;
313 fib_work->fen_info = *fen_info;
314 /* Take reference on fib_info to prevent it from being
315 * freed while work is queued. Release it afterwards.
316 */
317 fib_info_hold(fib_work->fen_info.fi);
318 break;
319 case FIB_EVENT_NH_ADD:
320 case FIB_EVENT_NH_DEL:
321 fnh_info = container_of(info, struct fib_nh_notifier_info,
322 info);
323 fib_work = mlx5_lag_init_fib_work(ldev, event);
324 if (!fib_work)
325 return NOTIFY_DONE;
326 fib_work->fnh_info = *fnh_info;
327 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
328 break;
329 default:
330 return NOTIFY_DONE;
331 }
332
333 queue_work(mp->wq, &fib_work->work);
334
335 return NOTIFY_DONE;
336 }
337
mlx5_lag_mp_reset(struct mlx5_lag * ldev)338 void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
339 {
340 /* Clear mfi, as it might become stale when a route delete event
341 * has been missed, see mlx5_lag_fib_route_event().
342 */
343 ldev->lag_mp.fib.mfi = NULL;
344 }
345
mlx5_lag_mp_init(struct mlx5_lag * ldev)346 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
347 {
348 struct lag_mp *mp = &ldev->lag_mp;
349 int err;
350
351 /* always clear mfi, as it might become stale when a route delete event
352 * has been missed
353 */
354 mp->fib.mfi = NULL;
355
356 if (mp->fib_nb.notifier_call)
357 return 0;
358
359 mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
360 if (!mp->wq)
361 return -ENOMEM;
362
363 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
364 err = register_fib_notifier(&init_net, &mp->fib_nb,
365 mlx5_lag_fib_event_flush, NULL);
366 if (err) {
367 destroy_workqueue(mp->wq);
368 mp->fib_nb.notifier_call = NULL;
369 }
370
371 return err;
372 }
373
mlx5_lag_mp_cleanup(struct mlx5_lag * ldev)374 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
375 {
376 struct lag_mp *mp = &ldev->lag_mp;
377
378 if (!mp->fib_nb.notifier_call)
379 return;
380
381 unregister_fib_notifier(&init_net, &mp->fib_nb);
382 destroy_workqueue(mp->wq);
383 mp->fib_nb.notifier_call = NULL;
384 mp->fib.mfi = NULL;
385 }
386