1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include <linux/netdevice.h> 5 #include <net/nexthop.h> 6 #include "lag/lag.h" 7 #include "lag/mp.h" 8 #include "mlx5_core.h" 9 #include "eswitch.h" 10 #include "lib/mlx5.h" 11 12 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) 13 { 14 return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH); 15 } 16 17 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) 18 { 19 if (!mlx5_lag_is_ready(ldev)) 20 return false; 21 22 if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) 23 return false; 24 25 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, 26 ldev->pf[MLX5_LAG_P2].dev); 27 } 28 29 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) 30 { 31 struct mlx5_lag *ldev; 32 bool res; 33 34 ldev = mlx5_lag_dev(dev); 35 res = ldev && __mlx5_lag_is_multipath(ldev); 36 37 return res; 38 } 39 40 /** 41 * mlx5_lag_set_port_affinity 42 * 43 * @ldev: lag device 44 * @port: 45 * 0 - set normal affinity. 46 * 1 - set affinity to port 1. 47 * 2 - set affinity to port 2. 48 * 49 **/ 50 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, 51 enum mlx5_lag_port_affinity port) 52 { 53 struct lag_tracker tracker; 54 55 if (!__mlx5_lag_is_multipath(ldev)) 56 return; 57 58 switch (port) { 59 case MLX5_LAG_NORMAL_AFFINITY: 60 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; 61 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; 62 tracker.netdev_state[MLX5_LAG_P1].link_up = true; 63 tracker.netdev_state[MLX5_LAG_P2].link_up = true; 64 break; 65 case MLX5_LAG_P1_AFFINITY: 66 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; 67 tracker.netdev_state[MLX5_LAG_P1].link_up = true; 68 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; 69 tracker.netdev_state[MLX5_LAG_P2].link_up = false; 70 break; 71 case MLX5_LAG_P2_AFFINITY: 72 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; 73 tracker.netdev_state[MLX5_LAG_P1].link_up = false; 74 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; 75 tracker.netdev_state[MLX5_LAG_P2].link_up = true; 76 break; 77 default: 78 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, 79 "Invalid affinity port %d", port); 80 return; 81 } 82 83 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) 84 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, 85 MLX5_DEV_EVENT_PORT_AFFINITY, 86 (void *)0); 87 88 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) 89 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, 90 MLX5_DEV_EVENT_PORT_AFFINITY, 91 (void *)0); 92 93 mlx5_modify_lag(ldev, &tracker); 94 } 95 96 static void mlx5_lag_fib_event_flush(struct notifier_block *nb) 97 { 98 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); 99 100 flush_workqueue(mp->wq); 101 } 102 103 struct mlx5_fib_event_work { 104 struct work_struct work; 105 struct mlx5_lag *ldev; 106 unsigned long event; 107 union { 108 struct fib_entry_notifier_info fen_info; 109 struct fib_nh_notifier_info fnh_info; 110 }; 111 }; 112 113 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, 114 unsigned long event, 115 struct fib_info *fi) 116 { 117 struct lag_mp *mp = &ldev->lag_mp; 118 struct fib_nh *fib_nh0, *fib_nh1; 119 unsigned int nhs; 120 121 /* Handle delete event */ 122 if (event == FIB_EVENT_ENTRY_DEL) { 123 /* stop track */ 124 if (mp->mfi == fi) 125 mp->mfi = NULL; 126 return; 127 } 128 129 /* Handle add/replace event */ 130 nhs = fib_info_num_path(fi); 131 if (nhs == 1) { 132 if (__mlx5_lag_is_active(ldev)) { 133 struct fib_nh *nh = fib_info_nh(fi, 0); 134 struct net_device *nh_dev = nh->fib_nh_dev; 135 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); 136 137 if (i < 0) 138 i = MLX5_LAG_NORMAL_AFFINITY; 139 else 140 ++i; 141 142 mlx5_lag_set_port_affinity(ldev, i); 143 } 144 return; 145 } 146 147 if (nhs != 2) 148 return; 149 150 /* Verify next hops are ports of the same hca */ 151 fib_nh0 = fib_info_nh(fi, 0); 152 fib_nh1 = fib_info_nh(fi, 1); 153 if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev && 154 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) && 155 !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev && 156 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) { 157 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, 158 "Multipath offload require two ports of the same HCA\n"); 159 return; 160 } 161 162 /* First time we see multipath route */ 163 if (!mp->mfi && !__mlx5_lag_is_active(ldev)) { 164 struct lag_tracker tracker; 165 166 tracker = ldev->tracker; 167 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false); 168 } 169 170 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); 171 mp->mfi = fi; 172 } 173 174 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, 175 unsigned long event, 176 struct fib_nh *fib_nh, 177 struct fib_info *fi) 178 { 179 struct lag_mp *mp = &ldev->lag_mp; 180 181 /* Check the nh event is related to the route */ 182 if (!mp->mfi || mp->mfi != fi) 183 return; 184 185 /* nh added/removed */ 186 if (event == FIB_EVENT_NH_DEL) { 187 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); 188 189 if (i >= 0) { 190 i = (i + 1) % 2 + 1; /* peer port */ 191 mlx5_lag_set_port_affinity(ldev, i); 192 } 193 } else if (event == FIB_EVENT_NH_ADD && 194 fib_info_num_path(fi) == 2) { 195 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); 196 } 197 } 198 199 static void mlx5_lag_fib_update(struct work_struct *work) 200 { 201 struct mlx5_fib_event_work *fib_work = 202 container_of(work, struct mlx5_fib_event_work, work); 203 struct mlx5_lag *ldev = fib_work->ldev; 204 struct fib_nh *fib_nh; 205 206 /* Protect internal structures from changes */ 207 rtnl_lock(); 208 switch (fib_work->event) { 209 case FIB_EVENT_ENTRY_REPLACE: 210 case FIB_EVENT_ENTRY_DEL: 211 mlx5_lag_fib_route_event(ldev, fib_work->event, 212 fib_work->fen_info.fi); 213 fib_info_put(fib_work->fen_info.fi); 214 break; 215 case FIB_EVENT_NH_ADD: 216 case FIB_EVENT_NH_DEL: 217 fib_nh = fib_work->fnh_info.fib_nh; 218 mlx5_lag_fib_nexthop_event(ldev, 219 fib_work->event, 220 fib_work->fnh_info.fib_nh, 221 fib_nh->nh_parent); 222 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); 223 break; 224 } 225 226 rtnl_unlock(); 227 kfree(fib_work); 228 } 229 230 static struct mlx5_fib_event_work * 231 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event) 232 { 233 struct mlx5_fib_event_work *fib_work; 234 235 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); 236 if (WARN_ON(!fib_work)) 237 return NULL; 238 239 INIT_WORK(&fib_work->work, mlx5_lag_fib_update); 240 fib_work->ldev = ldev; 241 fib_work->event = event; 242 243 return fib_work; 244 } 245 246 static int mlx5_lag_fib_event(struct notifier_block *nb, 247 unsigned long event, 248 void *ptr) 249 { 250 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); 251 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); 252 struct fib_notifier_info *info = ptr; 253 struct mlx5_fib_event_work *fib_work; 254 struct fib_entry_notifier_info *fen_info; 255 struct fib_nh_notifier_info *fnh_info; 256 struct net_device *fib_dev; 257 struct fib_info *fi; 258 259 if (info->family != AF_INET) 260 return NOTIFY_DONE; 261 262 if (!mlx5_lag_multipath_check_prereq(ldev)) 263 return NOTIFY_DONE; 264 265 switch (event) { 266 case FIB_EVENT_ENTRY_REPLACE: 267 case FIB_EVENT_ENTRY_DEL: 268 fen_info = container_of(info, struct fib_entry_notifier_info, 269 info); 270 fi = fen_info->fi; 271 if (fi->nh) 272 return NOTIFY_DONE; 273 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; 274 if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && 275 fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { 276 return NOTIFY_DONE; 277 } 278 fib_work = mlx5_lag_init_fib_work(ldev, event); 279 if (!fib_work) 280 return NOTIFY_DONE; 281 fib_work->fen_info = *fen_info; 282 /* Take reference on fib_info to prevent it from being 283 * freed while work is queued. Release it afterwards. 284 */ 285 fib_info_hold(fib_work->fen_info.fi); 286 break; 287 case FIB_EVENT_NH_ADD: 288 case FIB_EVENT_NH_DEL: 289 fnh_info = container_of(info, struct fib_nh_notifier_info, 290 info); 291 fib_work = mlx5_lag_init_fib_work(ldev, event); 292 if (!fib_work) 293 return NOTIFY_DONE; 294 fib_work->fnh_info = *fnh_info; 295 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); 296 break; 297 default: 298 return NOTIFY_DONE; 299 } 300 301 queue_work(mp->wq, &fib_work->work); 302 303 return NOTIFY_DONE; 304 } 305 306 void mlx5_lag_mp_reset(struct mlx5_lag *ldev) 307 { 308 /* Clear mfi, as it might become stale when a route delete event 309 * has been missed, see mlx5_lag_fib_route_event(). 310 */ 311 ldev->lag_mp.mfi = NULL; 312 } 313 314 int mlx5_lag_mp_init(struct mlx5_lag *ldev) 315 { 316 struct lag_mp *mp = &ldev->lag_mp; 317 int err; 318 319 /* always clear mfi, as it might become stale when a route delete event 320 * has been missed 321 */ 322 mp->mfi = NULL; 323 324 if (mp->fib_nb.notifier_call) 325 return 0; 326 327 mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); 328 if (!mp->wq) 329 return -ENOMEM; 330 331 mp->fib_nb.notifier_call = mlx5_lag_fib_event; 332 err = register_fib_notifier(&init_net, &mp->fib_nb, 333 mlx5_lag_fib_event_flush, NULL); 334 if (err) { 335 destroy_workqueue(mp->wq); 336 mp->fib_nb.notifier_call = NULL; 337 } 338 339 return err; 340 } 341 342 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) 343 { 344 struct lag_mp *mp = &ldev->lag_mp; 345 346 if (!mp->fib_nb.notifier_call) 347 return; 348 349 unregister_fib_notifier(&init_net, &mp->fib_nb); 350 destroy_workqueue(mp->wq); 351 mp->fib_nb.notifier_call = NULL; 352 mp->mfi = NULL; 353 } 354