1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies. */ 3 4 #include <linux/refcount.h> 5 #include <linux/list.h> 6 #include <linux/rculist.h> 7 #include <linux/rtnetlink.h> 8 #include <linux/workqueue.h> 9 #include <linux/spinlock.h> 10 #include <linux/notifier.h> 11 #include <net/netevent.h> 12 #include <net/arp.h> 13 #include "neigh.h" 14 #include "tc.h" 15 #include "en_rep.h" 16 #include "fs_core.h" 17 #include "diag/en_rep_tracepoint.h" 18 19 static unsigned long mlx5e_rep_ipv6_interval(void) 20 { 21 if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) 22 return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); 23 24 return ~0UL; 25 } 26 27 static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) 28 { 29 unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); 30 unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); 31 struct net_device *netdev = rpriv->netdev; 32 struct mlx5e_priv *priv = netdev_priv(netdev); 33 34 rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); 35 mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); 36 } 37 38 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) 39 { 40 struct mlx5e_rep_priv *rpriv = priv->ppriv; 41 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 42 43 mlx5_fc_queue_stats_work(priv->mdev, 44 &neigh_update->neigh_stats_work, 45 neigh_update->min_interval); 46 } 47 48 static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) 49 { 50 return refcount_inc_not_zero(&nhe->refcnt); 51 } 52 53 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); 54 55 void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) 56 { 57 if (refcount_dec_and_test(&nhe->refcnt)) { 58 mlx5e_rep_neigh_entry_remove(nhe); 59 kfree_rcu(nhe, rcu); 60 } 61 } 62 63 static struct mlx5e_neigh_hash_entry * 64 mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, 65 struct mlx5e_neigh_hash_entry *nhe) 66 { 67 struct mlx5e_neigh_hash_entry *next = NULL; 68 69 rcu_read_lock(); 70 71 for (next = nhe ? 72 list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, 73 &nhe->neigh_list, 74 struct mlx5e_neigh_hash_entry, 75 neigh_list) : 76 list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, 77 struct mlx5e_neigh_hash_entry, 78 neigh_list); 79 next; 80 next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, 81 &next->neigh_list, 82 struct mlx5e_neigh_hash_entry, 83 neigh_list)) 84 if (mlx5e_rep_neigh_entry_hold(next)) 85 break; 86 87 rcu_read_unlock(); 88 89 if (nhe) 90 mlx5e_rep_neigh_entry_release(nhe); 91 92 return next; 93 } 94 95 static void mlx5e_rep_neigh_stats_work(struct work_struct *work) 96 { 97 struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, 98 neigh_update.neigh_stats_work.work); 99 struct net_device *netdev = rpriv->netdev; 100 struct mlx5e_priv *priv = netdev_priv(netdev); 101 struct mlx5e_neigh_hash_entry *nhe = NULL; 102 103 rtnl_lock(); 104 if (!list_empty(&rpriv->neigh_update.neigh_list)) 105 mlx5e_rep_queue_neigh_stats_work(priv); 106 107 while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) 108 mlx5e_tc_update_neigh_used_value(nhe); 109 110 rtnl_unlock(); 111 } 112 113 struct neigh_update_work { 114 struct work_struct work; 115 struct neighbour *n; 116 struct mlx5e_neigh_hash_entry *nhe; 117 }; 118 119 static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work) 120 { 121 neigh_release(update_work->n); 122 mlx5e_rep_neigh_entry_release(update_work->nhe); 123 kfree(update_work); 124 } 125 126 static void mlx5e_rep_neigh_update(struct work_struct *work) 127 { 128 struct neigh_update_work *update_work = container_of(work, struct neigh_update_work, 129 work); 130 struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; 131 struct neighbour *n = update_work->n; 132 struct mlx5e_encap_entry *e = NULL; 133 bool neigh_connected, same_dev; 134 unsigned char ha[ETH_ALEN]; 135 u8 nud_state, dead; 136 137 rtnl_lock(); 138 139 /* If these parameters are changed after we release the lock, 140 * we'll receive another event letting us know about it. 141 * We use this lock to avoid inconsistency between the neigh validity 142 * and it's hw address. 143 */ 144 read_lock_bh(&n->lock); 145 memcpy(ha, n->ha, ETH_ALEN); 146 nud_state = n->nud_state; 147 dead = n->dead; 148 same_dev = READ_ONCE(nhe->neigh_dev) == n->dev; 149 read_unlock_bh(&n->lock); 150 151 neigh_connected = (nud_state & NUD_VALID) && !dead; 152 153 trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); 154 155 if (!same_dev) 156 goto out; 157 158 /* mlx5e_get_next_init_encap() releases previous encap before returning 159 * the next one. 160 */ 161 while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL) 162 mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha); 163 164 out: 165 rtnl_unlock(); 166 mlx5e_release_neigh_update_work(update_work); 167 } 168 169 static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv, 170 struct neighbour *n) 171 { 172 struct neigh_update_work *update_work; 173 struct mlx5e_neigh_hash_entry *nhe; 174 struct mlx5e_neigh m_neigh = {}; 175 176 update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); 177 if (WARN_ON(!update_work)) 178 return NULL; 179 180 m_neigh.family = n->ops->family; 181 memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); 182 183 /* Obtain reference to nhe as last step in order not to release it in 184 * atomic context. 185 */ 186 rcu_read_lock(); 187 nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); 188 rcu_read_unlock(); 189 if (!nhe) { 190 kfree(update_work); 191 return NULL; 192 } 193 194 INIT_WORK(&update_work->work, mlx5e_rep_neigh_update); 195 neigh_hold(n); 196 update_work->n = n; 197 update_work->nhe = nhe; 198 199 return update_work; 200 } 201 202 static int mlx5e_rep_netevent_event(struct notifier_block *nb, 203 unsigned long event, void *ptr) 204 { 205 struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, 206 neigh_update.netevent_nb); 207 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 208 struct net_device *netdev = rpriv->netdev; 209 struct mlx5e_priv *priv = netdev_priv(netdev); 210 struct mlx5e_neigh_hash_entry *nhe = NULL; 211 struct neigh_update_work *update_work; 212 struct neigh_parms *p; 213 struct neighbour *n; 214 bool found = false; 215 216 switch (event) { 217 case NETEVENT_NEIGH_UPDATE: 218 n = ptr; 219 #if IS_ENABLED(CONFIG_IPV6) 220 if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) 221 #else 222 if (n->tbl != &arp_tbl) 223 #endif 224 return NOTIFY_DONE; 225 226 update_work = mlx5e_alloc_neigh_update_work(priv, n); 227 if (!update_work) 228 return NOTIFY_DONE; 229 230 queue_work(priv->wq, &update_work->work); 231 break; 232 233 case NETEVENT_DELAY_PROBE_TIME_UPDATE: 234 p = ptr; 235 236 /* We check the device is present since we don't care about 237 * changes in the default table, we only care about changes 238 * done per device delay prob time parameter. 239 */ 240 #if IS_ENABLED(CONFIG_IPV6) 241 if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) 242 #else 243 if (!p->dev || p->tbl != &arp_tbl) 244 #endif 245 return NOTIFY_DONE; 246 247 rcu_read_lock(); 248 list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, 249 neigh_list) { 250 if (p->dev == READ_ONCE(nhe->neigh_dev)) { 251 found = true; 252 break; 253 } 254 } 255 rcu_read_unlock(); 256 if (!found) 257 return NOTIFY_DONE; 258 259 neigh_update->min_interval = min_t(unsigned long, 260 NEIGH_VAR(p, DELAY_PROBE_TIME), 261 neigh_update->min_interval); 262 mlx5_fc_update_sampling_interval(priv->mdev, 263 neigh_update->min_interval); 264 break; 265 } 266 return NOTIFY_DONE; 267 } 268 269 static const struct rhashtable_params mlx5e_neigh_ht_params = { 270 .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), 271 .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), 272 .key_len = sizeof(struct mlx5e_neigh), 273 .automatic_shrinking = true, 274 }; 275 276 int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) 277 { 278 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 279 int err; 280 281 err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); 282 if (err) 283 goto out_err; 284 285 INIT_LIST_HEAD(&neigh_update->neigh_list); 286 mutex_init(&neigh_update->encap_lock); 287 INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, 288 mlx5e_rep_neigh_stats_work); 289 mlx5e_rep_neigh_update_init_interval(rpriv); 290 291 neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event; 292 err = register_netevent_notifier(&neigh_update->netevent_nb); 293 if (err) 294 goto out_notifier; 295 return 0; 296 297 out_notifier: 298 neigh_update->netevent_nb.notifier_call = NULL; 299 rhashtable_destroy(&neigh_update->neigh_ht); 300 out_err: 301 netdev_warn(rpriv->netdev, 302 "Failed to initialize neighbours handling for vport %d\n", 303 rpriv->rep->vport); 304 return err; 305 } 306 307 void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) 308 { 309 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 310 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); 311 312 if (!rpriv->neigh_update.netevent_nb.notifier_call) 313 return; 314 315 unregister_netevent_notifier(&neigh_update->netevent_nb); 316 317 flush_workqueue(priv->wq); /* flush neigh update works */ 318 319 cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); 320 321 mutex_destroy(&neigh_update->encap_lock); 322 rhashtable_destroy(&neigh_update->neigh_ht); 323 } 324 325 static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, 326 struct mlx5e_neigh_hash_entry *nhe) 327 { 328 struct mlx5e_rep_priv *rpriv = priv->ppriv; 329 int err; 330 331 err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, 332 &nhe->rhash_node, 333 mlx5e_neigh_ht_params); 334 if (err) 335 return err; 336 337 list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); 338 339 return err; 340 } 341 342 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) 343 { 344 struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; 345 346 mutex_lock(&rpriv->neigh_update.encap_lock); 347 348 list_del_rcu(&nhe->neigh_list); 349 350 rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, 351 &nhe->rhash_node, 352 mlx5e_neigh_ht_params); 353 mutex_unlock(&rpriv->neigh_update.encap_lock); 354 } 355 356 /* This function must only be called under the representor's encap_lock or 357 * inside rcu read lock section. 358 */ 359 struct mlx5e_neigh_hash_entry * 360 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, 361 struct mlx5e_neigh *m_neigh) 362 { 363 struct mlx5e_rep_priv *rpriv = priv->ppriv; 364 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 365 struct mlx5e_neigh_hash_entry *nhe; 366 367 nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, 368 mlx5e_neigh_ht_params); 369 return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; 370 } 371 372 int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, 373 struct mlx5e_neigh *m_neigh, 374 struct net_device *neigh_dev, 375 struct mlx5e_neigh_hash_entry **nhe) 376 { 377 int err; 378 379 *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); 380 if (!*nhe) 381 return -ENOMEM; 382 383 (*nhe)->priv = priv; 384 memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh)); 385 spin_lock_init(&(*nhe)->encap_list_lock); 386 INIT_LIST_HEAD(&(*nhe)->encap_list); 387 refcount_set(&(*nhe)->refcnt, 1); 388 WRITE_ONCE((*nhe)->neigh_dev, neigh_dev); 389 390 err = mlx5e_rep_neigh_entry_insert(priv, *nhe); 391 if (err) 392 goto out_free; 393 return 0; 394 395 out_free: 396 kfree(*nhe); 397 return err; 398 } 399