1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies. */
3 
4 #include <linux/refcount.h>
5 #include <linux/list.h>
6 #include <linux/rculist.h>
7 #include <linux/rtnetlink.h>
8 #include <linux/workqueue.h>
9 #include <linux/spinlock.h>
10 #include <linux/notifier.h>
11 #include <net/netevent.h>
12 #include <net/arp.h>
13 #include "neigh.h"
14 #include "tc.h"
15 #include "en_rep.h"
16 #include "fs_core.h"
17 #include "diag/en_rep_tracepoint.h"
18 
mlx5e_rep_ipv6_interval(void)19 static unsigned long mlx5e_rep_ipv6_interval(void)
20 {
21 	if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
22 		return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
23 
24 	return ~0UL;
25 }
26 
mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv * rpriv)27 static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
28 {
29 	unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
30 	unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
31 	struct net_device *netdev = rpriv->netdev;
32 	struct mlx5e_priv *priv = netdev_priv(netdev);
33 
34 	rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
35 	mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
36 }
37 
mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv * priv)38 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
39 {
40 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
41 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
42 
43 	mlx5_fc_queue_stats_work(priv->mdev,
44 				 &neigh_update->neigh_stats_work,
45 				 neigh_update->min_interval);
46 }
47 
mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry * nhe)48 static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
49 {
50 	return refcount_inc_not_zero(&nhe->refcnt);
51 }
52 
53 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
54 
mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry * nhe)55 void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
56 {
57 	if (refcount_dec_and_test(&nhe->refcnt)) {
58 		mlx5e_rep_neigh_entry_remove(nhe);
59 		kfree_rcu(nhe, rcu);
60 	}
61 }
62 
63 static struct mlx5e_neigh_hash_entry *
mlx5e_get_next_nhe(struct mlx5e_rep_priv * rpriv,struct mlx5e_neigh_hash_entry * nhe)64 mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
65 		   struct mlx5e_neigh_hash_entry *nhe)
66 {
67 	struct mlx5e_neigh_hash_entry *next = NULL;
68 
69 	rcu_read_lock();
70 
71 	for (next = nhe ?
72 		     list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
73 					   &nhe->neigh_list,
74 					   struct mlx5e_neigh_hash_entry,
75 					   neigh_list) :
76 		     list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
77 					    struct mlx5e_neigh_hash_entry,
78 					    neigh_list);
79 	     next;
80 	     next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
81 					  &next->neigh_list,
82 					  struct mlx5e_neigh_hash_entry,
83 					  neigh_list))
84 		if (mlx5e_rep_neigh_entry_hold(next))
85 			break;
86 
87 	rcu_read_unlock();
88 
89 	if (nhe)
90 		mlx5e_rep_neigh_entry_release(nhe);
91 
92 	return next;
93 }
94 
mlx5e_rep_neigh_stats_work(struct work_struct * work)95 static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
96 {
97 	struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
98 						    neigh_update.neigh_stats_work.work);
99 	struct net_device *netdev = rpriv->netdev;
100 	struct mlx5e_priv *priv = netdev_priv(netdev);
101 	struct mlx5e_neigh_hash_entry *nhe = NULL;
102 
103 	rtnl_lock();
104 	if (!list_empty(&rpriv->neigh_update.neigh_list))
105 		mlx5e_rep_queue_neigh_stats_work(priv);
106 
107 	while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
108 		mlx5e_tc_update_neigh_used_value(nhe);
109 
110 	rtnl_unlock();
111 }
112 
113 struct neigh_update_work {
114 	struct work_struct work;
115 	struct neighbour *n;
116 	struct mlx5e_neigh_hash_entry *nhe;
117 };
118 
mlx5e_release_neigh_update_work(struct neigh_update_work * update_work)119 static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
120 {
121 	neigh_release(update_work->n);
122 	mlx5e_rep_neigh_entry_release(update_work->nhe);
123 	kfree(update_work);
124 }
125 
mlx5e_rep_neigh_update(struct work_struct * work)126 static void mlx5e_rep_neigh_update(struct work_struct *work)
127 {
128 	struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
129 							     work);
130 	struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
131 	struct neighbour *n = update_work->n;
132 	struct mlx5e_encap_entry *e = NULL;
133 	bool neigh_connected, same_dev;
134 	unsigned char ha[ETH_ALEN];
135 	u8 nud_state, dead;
136 
137 	rtnl_lock();
138 
139 	/* If these parameters are changed after we release the lock,
140 	 * we'll receive another event letting us know about it.
141 	 * We use this lock to avoid inconsistency between the neigh validity
142 	 * and it's hw address.
143 	 */
144 	read_lock_bh(&n->lock);
145 	memcpy(ha, n->ha, ETH_ALEN);
146 	nud_state = n->nud_state;
147 	dead = n->dead;
148 	same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
149 	read_unlock_bh(&n->lock);
150 
151 	neigh_connected = (nud_state & NUD_VALID) && !dead;
152 
153 	trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
154 
155 	if (!same_dev)
156 		goto out;
157 
158 	/* mlx5e_get_next_init_encap() releases previous encap before returning
159 	 * the next one.
160 	 */
161 	while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
162 		mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
163 
164 out:
165 	rtnl_unlock();
166 	mlx5e_release_neigh_update_work(update_work);
167 }
168 
mlx5e_alloc_neigh_update_work(struct mlx5e_priv * priv,struct neighbour * n)169 static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
170 							       struct neighbour *n)
171 {
172 	struct neigh_update_work *update_work;
173 	struct mlx5e_neigh_hash_entry *nhe;
174 	struct mlx5e_neigh m_neigh = {};
175 
176 	update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
177 	if (WARN_ON(!update_work))
178 		return NULL;
179 
180 	m_neigh.family = n->ops->family;
181 	memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
182 
183 	/* Obtain reference to nhe as last step in order not to release it in
184 	 * atomic context.
185 	 */
186 	rcu_read_lock();
187 	nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
188 	rcu_read_unlock();
189 	if (!nhe) {
190 		kfree(update_work);
191 		return NULL;
192 	}
193 
194 	INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
195 	neigh_hold(n);
196 	update_work->n = n;
197 	update_work->nhe = nhe;
198 
199 	return update_work;
200 }
201 
mlx5e_rep_netevent_event(struct notifier_block * nb,unsigned long event,void * ptr)202 static int mlx5e_rep_netevent_event(struct notifier_block *nb,
203 				    unsigned long event, void *ptr)
204 {
205 	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
206 						    neigh_update.netevent_nb);
207 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
208 	struct net_device *netdev = rpriv->netdev;
209 	struct mlx5e_priv *priv = netdev_priv(netdev);
210 	struct mlx5e_neigh_hash_entry *nhe = NULL;
211 	struct neigh_update_work *update_work;
212 	struct neigh_parms *p;
213 	struct neighbour *n;
214 	bool found = false;
215 
216 	switch (event) {
217 	case NETEVENT_NEIGH_UPDATE:
218 		n = ptr;
219 #if IS_ENABLED(CONFIG_IPV6)
220 		if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
221 #else
222 		if (n->tbl != &arp_tbl)
223 #endif
224 			return NOTIFY_DONE;
225 
226 		update_work = mlx5e_alloc_neigh_update_work(priv, n);
227 		if (!update_work)
228 			return NOTIFY_DONE;
229 
230 		queue_work(priv->wq, &update_work->work);
231 		break;
232 
233 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
234 		p = ptr;
235 
236 		/* We check the device is present since we don't care about
237 		 * changes in the default table, we only care about changes
238 		 * done per device delay prob time parameter.
239 		 */
240 #if IS_ENABLED(CONFIG_IPV6)
241 		if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
242 #else
243 		if (!p->dev || p->tbl != &arp_tbl)
244 #endif
245 			return NOTIFY_DONE;
246 
247 		rcu_read_lock();
248 		list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
249 					neigh_list) {
250 			if (p->dev == READ_ONCE(nhe->neigh_dev)) {
251 				found = true;
252 				break;
253 			}
254 		}
255 		rcu_read_unlock();
256 		if (!found)
257 			return NOTIFY_DONE;
258 
259 		neigh_update->min_interval = min_t(unsigned long,
260 						   NEIGH_VAR(p, DELAY_PROBE_TIME),
261 						   neigh_update->min_interval);
262 		mlx5_fc_update_sampling_interval(priv->mdev,
263 						 neigh_update->min_interval);
264 		break;
265 	}
266 	return NOTIFY_DONE;
267 }
268 
269 static const struct rhashtable_params mlx5e_neigh_ht_params = {
270 	.head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
271 	.key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
272 	.key_len = sizeof(struct mlx5e_neigh),
273 	.automatic_shrinking = true,
274 };
275 
mlx5e_rep_neigh_init(struct mlx5e_rep_priv * rpriv)276 int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
277 {
278 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
279 	int err;
280 
281 	err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
282 	if (err)
283 		goto out_err;
284 
285 	INIT_LIST_HEAD(&neigh_update->neigh_list);
286 	mutex_init(&neigh_update->encap_lock);
287 	INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
288 			  mlx5e_rep_neigh_stats_work);
289 	mlx5e_rep_neigh_update_init_interval(rpriv);
290 
291 	neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event;
292 	err = register_netevent_notifier(&neigh_update->netevent_nb);
293 	if (err)
294 		goto out_notifier;
295 	return 0;
296 
297 out_notifier:
298 	neigh_update->netevent_nb.notifier_call = NULL;
299 	rhashtable_destroy(&neigh_update->neigh_ht);
300 out_err:
301 	netdev_warn(rpriv->netdev,
302 		    "Failed to initialize neighbours handling for vport %d\n",
303 		    rpriv->rep->vport);
304 	return err;
305 }
306 
mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv * rpriv)307 void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
308 {
309 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
310 	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
311 
312 	if (!rpriv->neigh_update.netevent_nb.notifier_call)
313 		return;
314 
315 	unregister_netevent_notifier(&neigh_update->netevent_nb);
316 
317 	flush_workqueue(priv->wq); /* flush neigh update works */
318 
319 	cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
320 
321 	mutex_destroy(&neigh_update->encap_lock);
322 	rhashtable_destroy(&neigh_update->neigh_ht);
323 }
324 
mlx5e_rep_neigh_entry_insert(struct mlx5e_priv * priv,struct mlx5e_neigh_hash_entry * nhe)325 static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
326 					struct mlx5e_neigh_hash_entry *nhe)
327 {
328 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
329 	int err;
330 
331 	err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
332 				     &nhe->rhash_node,
333 				     mlx5e_neigh_ht_params);
334 	if (err)
335 		return err;
336 
337 	list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
338 
339 	return err;
340 }
341 
mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry * nhe)342 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
343 {
344 	struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
345 
346 	mutex_lock(&rpriv->neigh_update.encap_lock);
347 
348 	list_del_rcu(&nhe->neigh_list);
349 
350 	rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
351 			       &nhe->rhash_node,
352 			       mlx5e_neigh_ht_params);
353 	mutex_unlock(&rpriv->neigh_update.encap_lock);
354 }
355 
356 /* This function must only be called under the representor's encap_lock or
357  * inside rcu read lock section.
358  */
359 struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv * priv,struct mlx5e_neigh * m_neigh)360 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
361 			     struct mlx5e_neigh *m_neigh)
362 {
363 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
364 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
365 	struct mlx5e_neigh_hash_entry *nhe;
366 
367 	nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
368 				     mlx5e_neigh_ht_params);
369 	return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
370 }
371 
mlx5e_rep_neigh_entry_create(struct mlx5e_priv * priv,struct mlx5e_neigh * m_neigh,struct net_device * neigh_dev,struct mlx5e_neigh_hash_entry ** nhe)372 int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
373 				 struct mlx5e_neigh *m_neigh,
374 				 struct net_device *neigh_dev,
375 				 struct mlx5e_neigh_hash_entry **nhe)
376 {
377 	int err;
378 
379 	*nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
380 	if (!*nhe)
381 		return -ENOMEM;
382 
383 	(*nhe)->priv = priv;
384 	memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
385 	spin_lock_init(&(*nhe)->encap_list_lock);
386 	INIT_LIST_HEAD(&(*nhe)->encap_list);
387 	refcount_set(&(*nhe)->refcnt, 1);
388 	WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
389 
390 	err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
391 	if (err)
392 		goto out_free;
393 	return 0;
394 
395 out_free:
396 	kfree(*nhe);
397 	return err;
398 }
399