1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <linux/netdevice.h> 5 #include <net/nexthop.h> 6 #include "lag/lag.h" 7 #include "lag/mp.h" 8 #include "mlx5_core.h" 9 #include "eswitch.h" 10 #include "lib/mlx5.h" 11 12 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) 13 { 14 return ldev->mode == MLX5_LAG_MODE_MULTIPATH; 15 } 16 17 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) 18 { 19 if (!mlx5_lag_is_ready(ldev)) 20 return false; 21 22 if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) 23 return false; 24 25 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, 26 ldev->pf[MLX5_LAG_P2].dev); 27 } 28 29 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) 30 { 31 struct mlx5_lag *ldev = mlx5_lag_dev(dev); 32 33 return ldev && __mlx5_lag_is_multipath(ldev); 34 } 35 36 /** 37 * mlx5_lag_set_port_affinity 38 * 39 * @ldev: lag device 40 * @port: 41 * 0 - set normal affinity. 42 * 1 - set affinity to port 1. 43 * 2 - set affinity to port 2. 44 * 45 **/ 46 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, 47 enum mlx5_lag_port_affinity port) 48 { 49 struct lag_tracker tracker = {}; 50 51 if (!__mlx5_lag_is_multipath(ldev)) 52 return; 53 54 switch (port) { 55 case MLX5_LAG_NORMAL_AFFINITY: 56 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; 57 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; 58 tracker.netdev_state[MLX5_LAG_P1].link_up = true; 59 tracker.netdev_state[MLX5_LAG_P2].link_up = true; 60 break; 61 case MLX5_LAG_P1_AFFINITY: 62 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; 63 tracker.netdev_state[MLX5_LAG_P1].link_up = true; 64 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; 65 tracker.netdev_state[MLX5_LAG_P2].link_up = false; 66 break; 67 case MLX5_LAG_P2_AFFINITY: 68 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; 69 tracker.netdev_state[MLX5_LAG_P1].link_up = false; 70 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; 71 tracker.netdev_state[MLX5_LAG_P2].link_up = true; 72 break; 73 default: 74 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, 75 "Invalid affinity port %d", port); 76 return; 77 } 78 79 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) 80 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, 81 MLX5_DEV_EVENT_PORT_AFFINITY, 82 (void *)0); 83 84 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) 85 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, 86 MLX5_DEV_EVENT_PORT_AFFINITY, 87 (void *)0); 88 89 mlx5_modify_lag(ldev, &tracker); 90 } 91 92 static void mlx5_lag_fib_event_flush(struct notifier_block *nb) 93 { 94 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); 95 96 flush_workqueue(mp->wq); 97 } 98 99 static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len) 100 { 101 mp->fib.mfi = fi; 102 mp->fib.priority = fi->fib_priority; 103 mp->fib.dst = dst; 104 mp->fib.dst_len = dst_len; 105 } 106 107 struct mlx5_fib_event_work { 108 struct work_struct work; 109 struct mlx5_lag *ldev; 110 unsigned long event; 111 union { 112 struct fib_entry_notifier_info fen_info; 113 struct fib_nh_notifier_info fnh_info; 114 }; 115 }; 116 117 static struct net_device* 118 mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev, 119 struct fib_info *fi, 120 struct net_device *current_dev) 121 { 122 struct net_device *fib_dev; 123 int i, ldev_idx, nhs; 124 125 nhs = fib_info_num_path(fi); 126 i = 0; 127 if (current_dev) { 128 for (; i < nhs; i++) { 129 fib_dev = fib_info_nh(fi, i)->fib_nh_dev; 130 if (fib_dev == current_dev) { 131 i++; 132 break; 133 } 134 } 135 } 136 for (; i < nhs; i++) { 137 fib_dev = fib_info_nh(fi, i)->fib_nh_dev; 138 ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev); 139 if (ldev_idx >= 0) 140 return ldev->pf[ldev_idx].netdev; 141 } 142 143 return NULL; 144 } 145 146 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, 147 struct fib_entry_notifier_info *fen_info) 148 { 149 struct net_device *nh_dev0, *nh_dev1; 150 struct fib_info *fi = fen_info->fi; 151 struct lag_mp *mp = &ldev->lag_mp; 152 153 /* Handle delete event */ 154 if (event == FIB_EVENT_ENTRY_DEL) { 155 /* stop track */ 156 if (mp->fib.mfi == fi) 157 mp->fib.mfi = NULL; 158 return; 159 } 160 161 /* Handle multipath entry with lower priority value */ 162 if (mp->fib.mfi && mp->fib.mfi != fi && 163 (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && 164 fi->fib_priority >= mp->fib.priority) 165 return; 166 167 nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL); 168 nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0); 169 170 /* Handle add/replace event */ 171 if (!nh_dev0) { 172 if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len) 173 mp->fib.mfi = NULL; 174 return; 175 } 176 177 if (nh_dev0 == nh_dev1) { 178 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, 179 "Multipath offload doesn't support routes with multiple nexthops of the same device"); 180 return; 181 } 182 183 if (!nh_dev1) { 184 if (__mlx5_lag_is_active(ldev)) { 185 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0); 186 187 i++; 188 mlx5_lag_set_port_affinity(ldev, i); 189 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); 190 } 191 192 return; 193 } 194 195 /* First time we see multipath route */ 196 if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) { 197 struct lag_tracker tracker; 198 199 tracker = ldev->tracker; 200 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false); 201 } 202 203 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); 204 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); 205 } 206 207 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, 208 unsigned long event, 209 struct fib_nh *fib_nh, 210 struct fib_info *fi) 211 { 212 struct lag_mp *mp = &ldev->lag_mp; 213 214 /* Check the nh event is related to the route */ 215 if (!mp->fib.mfi || mp->fib.mfi != fi) 216 return; 217 218 /* nh added/removed */ 219 if (event == FIB_EVENT_NH_DEL) { 220 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); 221 222 if (i >= 0) { 223 i = (i + 1) % 2 + 1; /* peer port */ 224 mlx5_lag_set_port_affinity(ldev, i); 225 } 226 } else if (event == FIB_EVENT_NH_ADD && 227 fib_info_num_path(fi) == 2) { 228 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); 229 } 230 } 231 232 static void mlx5_lag_fib_update(struct work_struct *work) 233 { 234 struct mlx5_fib_event_work *fib_work = 235 container_of(work, struct mlx5_fib_event_work, work); 236 struct mlx5_lag *ldev = fib_work->ldev; 237 struct fib_nh *fib_nh; 238 239 /* Protect internal structures from changes */ 240 rtnl_lock(); 241 switch (fib_work->event) { 242 case FIB_EVENT_ENTRY_REPLACE: 243 case FIB_EVENT_ENTRY_DEL: 244 mlx5_lag_fib_route_event(ldev, fib_work->event, 245 &fib_work->fen_info); 246 fib_info_put(fib_work->fen_info.fi); 247 break; 248 case FIB_EVENT_NH_ADD: 249 case FIB_EVENT_NH_DEL: 250 fib_nh = fib_work->fnh_info.fib_nh; 251 mlx5_lag_fib_nexthop_event(ldev, 252 fib_work->event, 253 fib_work->fnh_info.fib_nh, 254 fib_nh->nh_parent); 255 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); 256 break; 257 } 258 259 rtnl_unlock(); 260 kfree(fib_work); 261 } 262 263 static struct mlx5_fib_event_work * 264 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event) 265 { 266 struct mlx5_fib_event_work *fib_work; 267 268 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); 269 if (WARN_ON(!fib_work)) 270 return NULL; 271 272 INIT_WORK(&fib_work->work, mlx5_lag_fib_update); 273 fib_work->ldev = ldev; 274 fib_work->event = event; 275 276 return fib_work; 277 } 278 279 static int mlx5_lag_fib_event(struct notifier_block *nb, 280 unsigned long event, 281 void *ptr) 282 { 283 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); 284 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); 285 struct fib_notifier_info *info = ptr; 286 struct mlx5_fib_event_work *fib_work; 287 struct fib_entry_notifier_info *fen_info; 288 struct fib_nh_notifier_info *fnh_info; 289 struct fib_info *fi; 290 291 if (info->family != AF_INET) 292 return NOTIFY_DONE; 293 294 if (!mlx5_lag_multipath_check_prereq(ldev)) 295 return NOTIFY_DONE; 296 297 switch (event) { 298 case FIB_EVENT_ENTRY_REPLACE: 299 case FIB_EVENT_ENTRY_DEL: 300 fen_info = container_of(info, struct fib_entry_notifier_info, 301 info); 302 fi = fen_info->fi; 303 if (fi->nh) 304 return NOTIFY_DONE; 305 306 fib_work = mlx5_lag_init_fib_work(ldev, event); 307 if (!fib_work) 308 return NOTIFY_DONE; 309 fib_work->fen_info = *fen_info; 310 /* Take reference on fib_info to prevent it from being 311 * freed while work is queued. Release it afterwards. 312 */ 313 fib_info_hold(fib_work->fen_info.fi); 314 break; 315 case FIB_EVENT_NH_ADD: 316 case FIB_EVENT_NH_DEL: 317 fnh_info = container_of(info, struct fib_nh_notifier_info, 318 info); 319 fib_work = mlx5_lag_init_fib_work(ldev, event); 320 if (!fib_work) 321 return NOTIFY_DONE; 322 fib_work->fnh_info = *fnh_info; 323 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); 324 break; 325 default: 326 return NOTIFY_DONE; 327 } 328 329 queue_work(mp->wq, &fib_work->work); 330 331 return NOTIFY_DONE; 332 } 333 334 void mlx5_lag_mp_reset(struct mlx5_lag *ldev) 335 { 336 /* Clear mfi, as it might become stale when a route delete event 337 * has been missed, see mlx5_lag_fib_route_event(). 338 */ 339 ldev->lag_mp.fib.mfi = NULL; 340 } 341 342 int mlx5_lag_mp_init(struct mlx5_lag *ldev) 343 { 344 struct lag_mp *mp = &ldev->lag_mp; 345 int err; 346 347 /* always clear mfi, as it might become stale when a route delete event 348 * has been missed 349 */ 350 mp->fib.mfi = NULL; 351 352 if (mp->fib_nb.notifier_call) 353 return 0; 354 355 mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); 356 if (!mp->wq) 357 return -ENOMEM; 358 359 mp->fib_nb.notifier_call = mlx5_lag_fib_event; 360 err = register_fib_notifier(&init_net, &mp->fib_nb, 361 mlx5_lag_fib_event_flush, NULL); 362 if (err) { 363 destroy_workqueue(mp->wq); 364 mp->fib_nb.notifier_call = NULL; 365 } 366 367 return err; 368 } 369 370 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) 371 { 372 struct lag_mp *mp = &ldev->lag_mp; 373 374 if (!mp->fib_nb.notifier_call) 375 return; 376 377 unregister_fib_notifier(&init_net, &mp->fib_nb); 378 destroy_workqueue(mp->wq); 379 mp->fib_nb.notifier_call = NULL; 380 mp->fib.mfi = NULL; 381 } 382