1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies. */
3 
4 #include <linux/refcount.h>
5 #include <linux/list.h>
6 #include <linux/rculist.h>
7 #include <linux/rtnetlink.h>
8 #include <linux/workqueue.h>
9 #include <linux/rwlock.h>
10 #include <linux/spinlock.h>
11 #include <linux/notifier.h>
12 #include <net/netevent.h>
13 #include <net/arp.h>
14 #include "neigh.h"
15 #include "tc.h"
16 #include "en_rep.h"
17 #include "fs_core.h"
18 #include "diag/en_rep_tracepoint.h"
19 
20 static unsigned long mlx5e_rep_ipv6_interval(void)
21 {
22 	if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
23 		return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
24 
25 	return ~0UL;
26 }
27 
28 static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
29 {
30 	unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
31 	unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
32 	struct net_device *netdev = rpriv->netdev;
33 	struct mlx5e_priv *priv = netdev_priv(netdev);
34 
35 	rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
36 	mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
37 }
38 
39 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
40 {
41 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
42 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
43 
44 	mlx5_fc_queue_stats_work(priv->mdev,
45 				 &neigh_update->neigh_stats_work,
46 				 neigh_update->min_interval);
47 }
48 
49 static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
50 {
51 	return refcount_inc_not_zero(&nhe->refcnt);
52 }
53 
54 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
55 
56 void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
57 {
58 	if (refcount_dec_and_test(&nhe->refcnt)) {
59 		mlx5e_rep_neigh_entry_remove(nhe);
60 		kfree_rcu(nhe, rcu);
61 	}
62 }
63 
64 static struct mlx5e_neigh_hash_entry *
65 mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
66 		   struct mlx5e_neigh_hash_entry *nhe)
67 {
68 	struct mlx5e_neigh_hash_entry *next = NULL;
69 
70 	rcu_read_lock();
71 
72 	for (next = nhe ?
73 		     list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
74 					   &nhe->neigh_list,
75 					   struct mlx5e_neigh_hash_entry,
76 					   neigh_list) :
77 		     list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
78 					    struct mlx5e_neigh_hash_entry,
79 					    neigh_list);
80 	     next;
81 	     next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
82 					  &next->neigh_list,
83 					  struct mlx5e_neigh_hash_entry,
84 					  neigh_list))
85 		if (mlx5e_rep_neigh_entry_hold(next))
86 			break;
87 
88 	rcu_read_unlock();
89 
90 	if (nhe)
91 		mlx5e_rep_neigh_entry_release(nhe);
92 
93 	return next;
94 }
95 
96 static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
97 {
98 	struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
99 						    neigh_update.neigh_stats_work.work);
100 	struct net_device *netdev = rpriv->netdev;
101 	struct mlx5e_priv *priv = netdev_priv(netdev);
102 	struct mlx5e_neigh_hash_entry *nhe = NULL;
103 
104 	rtnl_lock();
105 	if (!list_empty(&rpriv->neigh_update.neigh_list))
106 		mlx5e_rep_queue_neigh_stats_work(priv);
107 
108 	while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
109 		mlx5e_tc_update_neigh_used_value(nhe);
110 
111 	rtnl_unlock();
112 }
113 
114 static void mlx5e_rep_neigh_update(struct work_struct *work)
115 {
116 	struct mlx5e_neigh_hash_entry *nhe =
117 		container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
118 	struct neighbour *n = nhe->n;
119 	struct mlx5e_encap_entry *e;
120 	unsigned char ha[ETH_ALEN];
121 	struct mlx5e_priv *priv;
122 	bool neigh_connected;
123 	u8 nud_state, dead;
124 
125 	rtnl_lock();
126 
127 	/* If these parameters are changed after we release the lock,
128 	 * we'll receive another event letting us know about it.
129 	 * We use this lock to avoid inconsistency between the neigh validity
130 	 * and it's hw address.
131 	 */
132 	read_lock_bh(&n->lock);
133 	memcpy(ha, n->ha, ETH_ALEN);
134 	nud_state = n->nud_state;
135 	dead = n->dead;
136 	read_unlock_bh(&n->lock);
137 
138 	neigh_connected = (nud_state & NUD_VALID) && !dead;
139 
140 	trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
141 
142 	list_for_each_entry(e, &nhe->encap_list, encap_list) {
143 		if (!mlx5e_encap_take(e))
144 			continue;
145 
146 		priv = netdev_priv(e->out_dev);
147 		mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
148 		mlx5e_encap_put(priv, e);
149 	}
150 	mlx5e_rep_neigh_entry_release(nhe);
151 	rtnl_unlock();
152 	neigh_release(n);
153 }
154 
155 static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
156 					      struct mlx5e_neigh_hash_entry *nhe,
157 					      struct neighbour *n)
158 {
159 	/* Take a reference to ensure the neighbour and mlx5 encap
160 	 * entry won't be destructed until we drop the reference in
161 	 * delayed work.
162 	 */
163 	neigh_hold(n);
164 
165 	/* This assignment is valid as long as the the neigh reference
166 	 * is taken
167 	 */
168 	nhe->n = n;
169 
170 	if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
171 		mlx5e_rep_neigh_entry_release(nhe);
172 		neigh_release(n);
173 	}
174 }
175 
176 static int mlx5e_rep_netevent_event(struct notifier_block *nb,
177 				    unsigned long event, void *ptr)
178 {
179 	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
180 						    neigh_update.netevent_nb);
181 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
182 	struct net_device *netdev = rpriv->netdev;
183 	struct mlx5e_priv *priv = netdev_priv(netdev);
184 	struct mlx5e_neigh_hash_entry *nhe = NULL;
185 	struct mlx5e_neigh m_neigh = {};
186 	struct neigh_parms *p;
187 	struct neighbour *n;
188 	bool found = false;
189 
190 	switch (event) {
191 	case NETEVENT_NEIGH_UPDATE:
192 		n = ptr;
193 #if IS_ENABLED(CONFIG_IPV6)
194 		if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
195 #else
196 		if (n->tbl != &arp_tbl)
197 #endif
198 			return NOTIFY_DONE;
199 
200 		m_neigh.dev = n->dev;
201 		m_neigh.family = n->ops->family;
202 		memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
203 
204 		rcu_read_lock();
205 		nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
206 		rcu_read_unlock();
207 		if (!nhe)
208 			return NOTIFY_DONE;
209 
210 		mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
211 		break;
212 
213 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
214 		p = ptr;
215 
216 		/* We check the device is present since we don't care about
217 		 * changes in the default table, we only care about changes
218 		 * done per device delay prob time parameter.
219 		 */
220 #if IS_ENABLED(CONFIG_IPV6)
221 		if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
222 #else
223 		if (!p->dev || p->tbl != &arp_tbl)
224 #endif
225 			return NOTIFY_DONE;
226 
227 		rcu_read_lock();
228 		list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
229 					neigh_list) {
230 			if (p->dev == nhe->m_neigh.dev) {
231 				found = true;
232 				break;
233 			}
234 		}
235 		rcu_read_unlock();
236 		if (!found)
237 			return NOTIFY_DONE;
238 
239 		neigh_update->min_interval = min_t(unsigned long,
240 						   NEIGH_VAR(p, DELAY_PROBE_TIME),
241 						   neigh_update->min_interval);
242 		mlx5_fc_update_sampling_interval(priv->mdev,
243 						 neigh_update->min_interval);
244 		break;
245 	}
246 	return NOTIFY_DONE;
247 }
248 
249 static const struct rhashtable_params mlx5e_neigh_ht_params = {
250 	.head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
251 	.key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
252 	.key_len = sizeof(struct mlx5e_neigh),
253 	.automatic_shrinking = true,
254 };
255 
256 int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
257 {
258 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
259 	int err;
260 
261 	err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
262 	if (err)
263 		return err;
264 
265 	INIT_LIST_HEAD(&neigh_update->neigh_list);
266 	mutex_init(&neigh_update->encap_lock);
267 	INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
268 			  mlx5e_rep_neigh_stats_work);
269 	mlx5e_rep_neigh_update_init_interval(rpriv);
270 
271 	rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
272 	err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
273 	if (err)
274 		goto out_err;
275 	return 0;
276 
277 out_err:
278 	rhashtable_destroy(&neigh_update->neigh_ht);
279 	return err;
280 }
281 
282 void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
283 {
284 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
285 	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
286 
287 	unregister_netevent_notifier(&neigh_update->netevent_nb);
288 
289 	flush_workqueue(priv->wq); /* flush neigh update works */
290 
291 	cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
292 
293 	mutex_destroy(&neigh_update->encap_lock);
294 	rhashtable_destroy(&neigh_update->neigh_ht);
295 }
296 
297 static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
298 					struct mlx5e_neigh_hash_entry *nhe)
299 {
300 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
301 	int err;
302 
303 	err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
304 				     &nhe->rhash_node,
305 				     mlx5e_neigh_ht_params);
306 	if (err)
307 		return err;
308 
309 	list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
310 
311 	return err;
312 }
313 
314 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
315 {
316 	struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
317 
318 	mutex_lock(&rpriv->neigh_update.encap_lock);
319 
320 	list_del_rcu(&nhe->neigh_list);
321 
322 	rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
323 			       &nhe->rhash_node,
324 			       mlx5e_neigh_ht_params);
325 	mutex_unlock(&rpriv->neigh_update.encap_lock);
326 }
327 
328 /* This function must only be called under the representor's encap_lock or
329  * inside rcu read lock section.
330  */
331 struct mlx5e_neigh_hash_entry *
332 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
333 			     struct mlx5e_neigh *m_neigh)
334 {
335 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
336 	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
337 	struct mlx5e_neigh_hash_entry *nhe;
338 
339 	nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
340 				     mlx5e_neigh_ht_params);
341 	return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
342 }
343 
344 int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
345 				 struct mlx5e_encap_entry *e,
346 				 struct mlx5e_neigh_hash_entry **nhe)
347 {
348 	int err;
349 
350 	*nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
351 	if (!*nhe)
352 		return -ENOMEM;
353 
354 	(*nhe)->priv = priv;
355 	memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
356 	INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
357 	spin_lock_init(&(*nhe)->encap_list_lock);
358 	INIT_LIST_HEAD(&(*nhe)->encap_list);
359 	refcount_set(&(*nhe)->refcnt, 1);
360 
361 	err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
362 	if (err)
363 		goto out_free;
364 	return 0;
365 
366 out_free:
367 	kfree(*nhe);
368 	return err;
369 }
370