1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies. */ 3 4 #include <linux/refcount.h> 5 #include <linux/list.h> 6 #include <linux/rculist.h> 7 #include <linux/rtnetlink.h> 8 #include <linux/workqueue.h> 9 #include <linux/rwlock.h> 10 #include <linux/spinlock.h> 11 #include <linux/notifier.h> 12 #include <net/netevent.h> 13 #include <net/arp.h> 14 #include "neigh.h" 15 #include "tc.h" 16 #include "en_rep.h" 17 #include "fs_core.h" 18 #include "diag/en_rep_tracepoint.h" 19 20 static unsigned long mlx5e_rep_ipv6_interval(void) 21 { 22 if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) 23 return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); 24 25 return ~0UL; 26 } 27 28 static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) 29 { 30 unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); 31 unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); 32 struct net_device *netdev = rpriv->netdev; 33 struct mlx5e_priv *priv = netdev_priv(netdev); 34 35 rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); 36 mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); 37 } 38 39 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) 40 { 41 struct mlx5e_rep_priv *rpriv = priv->ppriv; 42 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 43 44 mlx5_fc_queue_stats_work(priv->mdev, 45 &neigh_update->neigh_stats_work, 46 neigh_update->min_interval); 47 } 48 49 static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) 50 { 51 return refcount_inc_not_zero(&nhe->refcnt); 52 } 53 54 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); 55 56 void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) 57 { 58 if (refcount_dec_and_test(&nhe->refcnt)) { 59 mlx5e_rep_neigh_entry_remove(nhe); 60 kfree_rcu(nhe, rcu); 61 } 62 } 63 64 static struct mlx5e_neigh_hash_entry * 65 mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, 66 struct mlx5e_neigh_hash_entry *nhe) 67 { 68 struct mlx5e_neigh_hash_entry *next = NULL; 69 70 rcu_read_lock(); 71 72 for (next = nhe ? 73 list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, 74 &nhe->neigh_list, 75 struct mlx5e_neigh_hash_entry, 76 neigh_list) : 77 list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, 78 struct mlx5e_neigh_hash_entry, 79 neigh_list); 80 next; 81 next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, 82 &next->neigh_list, 83 struct mlx5e_neigh_hash_entry, 84 neigh_list)) 85 if (mlx5e_rep_neigh_entry_hold(next)) 86 break; 87 88 rcu_read_unlock(); 89 90 if (nhe) 91 mlx5e_rep_neigh_entry_release(nhe); 92 93 return next; 94 } 95 96 static void mlx5e_rep_neigh_stats_work(struct work_struct *work) 97 { 98 struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, 99 neigh_update.neigh_stats_work.work); 100 struct net_device *netdev = rpriv->netdev; 101 struct mlx5e_priv *priv = netdev_priv(netdev); 102 struct mlx5e_neigh_hash_entry *nhe = NULL; 103 104 rtnl_lock(); 105 if (!list_empty(&rpriv->neigh_update.neigh_list)) 106 mlx5e_rep_queue_neigh_stats_work(priv); 107 108 while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) 109 mlx5e_tc_update_neigh_used_value(nhe); 110 111 rtnl_unlock(); 112 } 113 114 static void mlx5e_rep_neigh_update(struct work_struct *work) 115 { 116 struct mlx5e_neigh_hash_entry *nhe = 117 container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); 118 struct neighbour *n = nhe->n; 119 struct mlx5e_encap_entry *e; 120 unsigned char ha[ETH_ALEN]; 121 struct mlx5e_priv *priv; 122 bool neigh_connected; 123 u8 nud_state, dead; 124 125 rtnl_lock(); 126 127 /* If these parameters are changed after we release the lock, 128 * we'll receive another event letting us know about it. 129 * We use this lock to avoid inconsistency between the neigh validity 130 * and it's hw address. 131 */ 132 read_lock_bh(&n->lock); 133 memcpy(ha, n->ha, ETH_ALEN); 134 nud_state = n->nud_state; 135 dead = n->dead; 136 read_unlock_bh(&n->lock); 137 138 neigh_connected = (nud_state & NUD_VALID) && !dead; 139 140 trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); 141 142 list_for_each_entry(e, &nhe->encap_list, encap_list) { 143 if (!mlx5e_encap_take(e)) 144 continue; 145 146 priv = netdev_priv(e->out_dev); 147 mlx5e_rep_update_flows(priv, e, neigh_connected, ha); 148 mlx5e_encap_put(priv, e); 149 } 150 mlx5e_rep_neigh_entry_release(nhe); 151 rtnl_unlock(); 152 neigh_release(n); 153 } 154 155 static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, 156 struct mlx5e_neigh_hash_entry *nhe, 157 struct neighbour *n) 158 { 159 /* Take a reference to ensure the neighbour and mlx5 encap 160 * entry won't be destructed until we drop the reference in 161 * delayed work. 162 */ 163 neigh_hold(n); 164 165 /* This assignment is valid as long as the the neigh reference 166 * is taken 167 */ 168 nhe->n = n; 169 170 if (!queue_work(priv->wq, &nhe->neigh_update_work)) { 171 mlx5e_rep_neigh_entry_release(nhe); 172 neigh_release(n); 173 } 174 } 175 176 static int mlx5e_rep_netevent_event(struct notifier_block *nb, 177 unsigned long event, void *ptr) 178 { 179 struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, 180 neigh_update.netevent_nb); 181 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 182 struct net_device *netdev = rpriv->netdev; 183 struct mlx5e_priv *priv = netdev_priv(netdev); 184 struct mlx5e_neigh_hash_entry *nhe = NULL; 185 struct mlx5e_neigh m_neigh = {}; 186 struct neigh_parms *p; 187 struct neighbour *n; 188 bool found = false; 189 190 switch (event) { 191 case NETEVENT_NEIGH_UPDATE: 192 n = ptr; 193 #if IS_ENABLED(CONFIG_IPV6) 194 if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) 195 #else 196 if (n->tbl != &arp_tbl) 197 #endif 198 return NOTIFY_DONE; 199 200 m_neigh.dev = n->dev; 201 m_neigh.family = n->ops->family; 202 memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); 203 204 rcu_read_lock(); 205 nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); 206 rcu_read_unlock(); 207 if (!nhe) 208 return NOTIFY_DONE; 209 210 mlx5e_rep_queue_neigh_update_work(priv, nhe, n); 211 break; 212 213 case NETEVENT_DELAY_PROBE_TIME_UPDATE: 214 p = ptr; 215 216 /* We check the device is present since we don't care about 217 * changes in the default table, we only care about changes 218 * done per device delay prob time parameter. 219 */ 220 #if IS_ENABLED(CONFIG_IPV6) 221 if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) 222 #else 223 if (!p->dev || p->tbl != &arp_tbl) 224 #endif 225 return NOTIFY_DONE; 226 227 rcu_read_lock(); 228 list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, 229 neigh_list) { 230 if (p->dev == nhe->m_neigh.dev) { 231 found = true; 232 break; 233 } 234 } 235 rcu_read_unlock(); 236 if (!found) 237 return NOTIFY_DONE; 238 239 neigh_update->min_interval = min_t(unsigned long, 240 NEIGH_VAR(p, DELAY_PROBE_TIME), 241 neigh_update->min_interval); 242 mlx5_fc_update_sampling_interval(priv->mdev, 243 neigh_update->min_interval); 244 break; 245 } 246 return NOTIFY_DONE; 247 } 248 249 static const struct rhashtable_params mlx5e_neigh_ht_params = { 250 .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), 251 .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), 252 .key_len = sizeof(struct mlx5e_neigh), 253 .automatic_shrinking = true, 254 }; 255 256 int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) 257 { 258 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 259 int err; 260 261 err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); 262 if (err) 263 return err; 264 265 INIT_LIST_HEAD(&neigh_update->neigh_list); 266 mutex_init(&neigh_update->encap_lock); 267 INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, 268 mlx5e_rep_neigh_stats_work); 269 mlx5e_rep_neigh_update_init_interval(rpriv); 270 271 rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; 272 err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); 273 if (err) 274 goto out_err; 275 return 0; 276 277 out_err: 278 rhashtable_destroy(&neigh_update->neigh_ht); 279 return err; 280 } 281 282 void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) 283 { 284 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 285 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); 286 287 unregister_netevent_notifier(&neigh_update->netevent_nb); 288 289 flush_workqueue(priv->wq); /* flush neigh update works */ 290 291 cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); 292 293 mutex_destroy(&neigh_update->encap_lock); 294 rhashtable_destroy(&neigh_update->neigh_ht); 295 } 296 297 static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, 298 struct mlx5e_neigh_hash_entry *nhe) 299 { 300 struct mlx5e_rep_priv *rpriv = priv->ppriv; 301 int err; 302 303 err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, 304 &nhe->rhash_node, 305 mlx5e_neigh_ht_params); 306 if (err) 307 return err; 308 309 list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); 310 311 return err; 312 } 313 314 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) 315 { 316 struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; 317 318 mutex_lock(&rpriv->neigh_update.encap_lock); 319 320 list_del_rcu(&nhe->neigh_list); 321 322 rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, 323 &nhe->rhash_node, 324 mlx5e_neigh_ht_params); 325 mutex_unlock(&rpriv->neigh_update.encap_lock); 326 } 327 328 /* This function must only be called under the representor's encap_lock or 329 * inside rcu read lock section. 330 */ 331 struct mlx5e_neigh_hash_entry * 332 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, 333 struct mlx5e_neigh *m_neigh) 334 { 335 struct mlx5e_rep_priv *rpriv = priv->ppriv; 336 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; 337 struct mlx5e_neigh_hash_entry *nhe; 338 339 nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, 340 mlx5e_neigh_ht_params); 341 return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; 342 } 343 344 int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, 345 struct mlx5e_encap_entry *e, 346 struct mlx5e_neigh_hash_entry **nhe) 347 { 348 int err; 349 350 *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); 351 if (!*nhe) 352 return -ENOMEM; 353 354 (*nhe)->priv = priv; 355 memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); 356 INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); 357 spin_lock_init(&(*nhe)->encap_list_lock); 358 INIT_LIST_HEAD(&(*nhe)->encap_list); 359 refcount_set(&(*nhe)->refcnt, 1); 360 361 err = mlx5e_rep_neigh_entry_insert(priv, *nhe); 362 if (err) 363 goto out_free; 364 return 0; 365 366 out_free: 367 kfree(*nhe); 368 return err; 369 } 370