xref: /openbmc/linux/drivers/net/ethernet/sfc/tc_encap_actions.c (revision d5a05299306227d73b0febba9cecedf88931c507)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2023, Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10 
11 #include "tc_encap_actions.h"
12 #include "tc.h"
13 #include "mae.h"
14 #include <net/vxlan.h>
15 #include <net/geneve.h>
16 #include <net/netevent.h>
17 #include <net/arp.h>
18 
19 static const struct rhashtable_params efx_neigh_ht_params = {
20 	.key_len	= offsetof(struct efx_neigh_binder, ha),
21 	.key_offset	= 0,
22 	.head_offset	= offsetof(struct efx_neigh_binder, linkage),
23 };
24 
25 static const struct rhashtable_params efx_tc_encap_ht_params = {
26 	.key_len	= offsetofend(struct efx_tc_encap_action, key),
27 	.key_offset	= 0,
28 	.head_offset	= offsetof(struct efx_tc_encap_action, linkage),
29 };
30 
31 static void efx_tc_encap_free(void *ptr, void *__unused)
32 {
33 	struct efx_tc_encap_action *enc = ptr;
34 
35 	WARN_ON(refcount_read(&enc->ref));
36 	kfree(enc);
37 }
38 
39 static void efx_neigh_free(void *ptr, void *__unused)
40 {
41 	struct efx_neigh_binder *neigh = ptr;
42 
43 	WARN_ON(refcount_read(&neigh->ref));
44 	WARN_ON(!list_empty(&neigh->users));
45 	put_net_track(neigh->net, &neigh->ns_tracker);
46 	netdev_put(neigh->egdev, &neigh->dev_tracker);
47 	kfree(neigh);
48 }
49 
50 int efx_tc_init_encap_actions(struct efx_nic *efx)
51 {
52 	int rc;
53 
54 	rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params);
55 	if (rc < 0)
56 		goto fail_neigh_ht;
57 	rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params);
58 	if (rc < 0)
59 		goto fail_encap_ht;
60 	return 0;
61 fail_encap_ht:
62 	rhashtable_destroy(&efx->tc->neigh_ht);
63 fail_neigh_ht:
64 	return rc;
65 }
66 
67 /* Only call this in init failure teardown.
68  * Normal exit should fini instead as there may be entries in the table.
69  */
70 void efx_tc_destroy_encap_actions(struct efx_nic *efx)
71 {
72 	rhashtable_destroy(&efx->tc->encap_ht);
73 	rhashtable_destroy(&efx->tc->neigh_ht);
74 }
75 
76 void efx_tc_fini_encap_actions(struct efx_nic *efx)
77 {
78 	rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL);
79 	rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL);
80 }
81 
82 static void efx_neigh_update(struct work_struct *work);
83 
84 static int efx_bind_neigh(struct efx_nic *efx,
85 			  struct efx_tc_encap_action *encap, struct net *net,
86 			  struct netlink_ext_ack *extack)
87 {
88 	struct efx_neigh_binder *neigh, *old;
89 	struct flowi6 flow6 = {};
90 	struct flowi4 flow4 = {};
91 	int rc;
92 
93 	/* GCC stupidly thinks that only values explicitly listed in the enum
94 	 * definition can _possibly_ be sensible case values, so without this
95 	 * cast it complains about the IPv6 versions.
96 	 */
97 	switch ((int)encap->type) {
98 	case EFX_ENCAP_TYPE_VXLAN:
99 	case EFX_ENCAP_TYPE_GENEVE:
100 		flow4.flowi4_proto = IPPROTO_UDP;
101 		flow4.fl4_dport = encap->key.tp_dst;
102 		flow4.flowi4_tos = encap->key.tos;
103 		flow4.daddr = encap->key.u.ipv4.dst;
104 		flow4.saddr = encap->key.u.ipv4.src;
105 		break;
106 	case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
107 	case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
108 		flow6.flowi6_proto = IPPROTO_UDP;
109 		flow6.fl6_dport = encap->key.tp_dst;
110 		flow6.flowlabel = ip6_make_flowinfo(encap->key.tos,
111 						    encap->key.label);
112 		flow6.daddr = encap->key.u.ipv6.dst;
113 		flow6.saddr = encap->key.u.ipv6.src;
114 		break;
115 	default:
116 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d",
117 				       (int)encap->type);
118 		return -EOPNOTSUPP;
119 	}
120 
121 	neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT);
122 	if (!neigh)
123 		return -ENOMEM;
124 	neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT);
125 	neigh->dst_ip = flow4.daddr;
126 	neigh->dst_ip6 = flow6.daddr;
127 
128 	old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht,
129 						&neigh->linkage,
130 						efx_neigh_ht_params);
131 	if (old) {
132 		/* don't need our new entry */
133 		put_net_track(neigh->net, &neigh->ns_tracker);
134 		kfree(neigh);
135 		if (!refcount_inc_not_zero(&old->ref))
136 			return -EAGAIN;
137 		/* existing entry found, ref taken */
138 		neigh = old;
139 	} else {
140 		/* New entry.  We need to initiate a lookup */
141 		struct neighbour *n;
142 		struct rtable *rt;
143 
144 		if (encap->type & EFX_ENCAP_FLAG_IPV6) {
145 #if IS_ENABLED(CONFIG_IPV6)
146 			struct dst_entry *dst;
147 
148 			dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6,
149 							      NULL);
150 			rc = PTR_ERR_OR_ZERO(dst);
151 			if (rc) {
152 				NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap");
153 				goto out_free;
154 			}
155 			neigh->egdev = dst->dev;
156 			netdev_hold(neigh->egdev, &neigh->dev_tracker,
157 				    GFP_KERNEL_ACCOUNT);
158 			neigh->ttl = ip6_dst_hoplimit(dst);
159 			n = dst_neigh_lookup(dst, &flow6.daddr);
160 			dst_release(dst);
161 #else
162 			/* We shouldn't ever get here, because if IPv6 isn't
163 			 * enabled how did someone create an IPv6 tunnel_key?
164 			 */
165 			rc = -EOPNOTSUPP;
166 			NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)");
167 			goto out_free;
168 #endif
169 		} else {
170 			rt = ip_route_output_key(net, &flow4);
171 			if (IS_ERR_OR_NULL(rt)) {
172 				rc = PTR_ERR_OR_ZERO(rt);
173 				if (!rc)
174 					rc = -EIO;
175 				NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap");
176 				goto out_free;
177 			}
178 			neigh->egdev = rt->dst.dev;
179 			netdev_hold(neigh->egdev, &neigh->dev_tracker,
180 				    GFP_KERNEL_ACCOUNT);
181 			neigh->ttl = ip4_dst_hoplimit(&rt->dst);
182 			n = dst_neigh_lookup(&rt->dst, &flow4.daddr);
183 			ip_rt_put(rt);
184 		}
185 		if (!n) {
186 			rc = -ENETUNREACH;
187 			NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap");
188 			netdev_put(neigh->egdev, &neigh->dev_tracker);
189 			goto out_free;
190 		}
191 		refcount_set(&neigh->ref, 1);
192 		INIT_LIST_HEAD(&neigh->users);
193 		read_lock_bh(&n->lock);
194 		ether_addr_copy(neigh->ha, n->ha);
195 		neigh->n_valid = n->nud_state & NUD_VALID;
196 		read_unlock_bh(&n->lock);
197 		rwlock_init(&neigh->lock);
198 		INIT_WORK(&neigh->work, efx_neigh_update);
199 		neigh->efx = efx;
200 		neigh->used = jiffies;
201 		if (!neigh->n_valid)
202 			/* Prod ARP to find us a neighbour */
203 			neigh_event_send(n, NULL);
204 		neigh_release(n);
205 	}
206 	/* Add us to this neigh */
207 	encap->neigh = neigh;
208 	list_add_tail(&encap->list, &neigh->users);
209 	return 0;
210 
211 out_free:
212 	/* cleanup common to several error paths */
213 	rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
214 			       efx_neigh_ht_params);
215 	synchronize_rcu();
216 	put_net_track(net, &neigh->ns_tracker);
217 	kfree(neigh);
218 	return rc;
219 }
220 
221 static void efx_free_neigh(struct efx_neigh_binder *neigh)
222 {
223 	struct efx_nic *efx = neigh->efx;
224 
225 	rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
226 			       efx_neigh_ht_params);
227 	synchronize_rcu();
228 	netdev_put(neigh->egdev, &neigh->dev_tracker);
229 	put_net_track(neigh->net, &neigh->ns_tracker);
230 	kfree(neigh);
231 }
232 
233 static void efx_release_neigh(struct efx_nic *efx,
234 			      struct efx_tc_encap_action *encap)
235 {
236 	struct efx_neigh_binder *neigh = encap->neigh;
237 
238 	if (!neigh)
239 		return;
240 	list_del(&encap->list);
241 	encap->neigh = NULL;
242 	if (!refcount_dec_and_test(&neigh->ref))
243 		return; /* still in use */
244 	efx_free_neigh(neigh);
245 }
246 
247 static void efx_gen_tun_header_eth(struct efx_tc_encap_action *encap, u16 proto)
248 {
249 	struct efx_neigh_binder *neigh = encap->neigh;
250 	struct ethhdr *eth;
251 
252 	encap->encap_hdr_len = sizeof(*eth);
253 	eth = (struct ethhdr *)encap->encap_hdr;
254 
255 	if (encap->neigh->n_valid)
256 		ether_addr_copy(eth->h_dest, neigh->ha);
257 	else
258 		eth_zero_addr(eth->h_dest);
259 	ether_addr_copy(eth->h_source, neigh->egdev->dev_addr);
260 	eth->h_proto = htons(proto);
261 }
262 
263 static void efx_gen_tun_header_ipv4(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
264 {
265 	struct efx_neigh_binder *neigh = encap->neigh;
266 	struct ip_tunnel_key *key = &encap->key;
267 	struct iphdr *ip;
268 
269 	ip = (struct iphdr *)(encap->encap_hdr + encap->encap_hdr_len);
270 	encap->encap_hdr_len += sizeof(*ip);
271 
272 	ip->daddr = key->u.ipv4.dst;
273 	ip->saddr = key->u.ipv4.src;
274 	ip->ttl = neigh->ttl;
275 	ip->protocol = ipproto;
276 	ip->version = 0x4;
277 	ip->ihl = 0x5;
278 	ip->tot_len = cpu_to_be16(ip->ihl * 4 + len);
279 	ip_send_check(ip);
280 }
281 
282 #ifdef CONFIG_IPV6
283 static void efx_gen_tun_header_ipv6(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
284 {
285 	struct efx_neigh_binder *neigh = encap->neigh;
286 	struct ip_tunnel_key *key = &encap->key;
287 	struct ipv6hdr *ip;
288 
289 	ip = (struct ipv6hdr *)(encap->encap_hdr + encap->encap_hdr_len);
290 	encap->encap_hdr_len += sizeof(*ip);
291 
292 	ip6_flow_hdr(ip, key->tos, key->label);
293 	ip->daddr = key->u.ipv6.dst;
294 	ip->saddr = key->u.ipv6.src;
295 	ip->hop_limit = neigh->ttl;
296 	ip->nexthdr = ipproto;
297 	ip->version = 0x6;
298 	ip->payload_len = cpu_to_be16(len);
299 }
300 #endif
301 
302 static void efx_gen_tun_header_udp(struct efx_tc_encap_action *encap, u8 len)
303 {
304 	struct ip_tunnel_key *key = &encap->key;
305 	struct udphdr *udp;
306 
307 	udp = (struct udphdr *)(encap->encap_hdr + encap->encap_hdr_len);
308 	encap->encap_hdr_len += sizeof(*udp);
309 
310 	udp->dest = key->tp_dst;
311 	udp->len = cpu_to_be16(sizeof(*udp) + len);
312 }
313 
314 static void efx_gen_tun_header_vxlan(struct efx_tc_encap_action *encap)
315 {
316 	struct ip_tunnel_key *key = &encap->key;
317 	struct vxlanhdr *vxlan;
318 
319 	vxlan = (struct vxlanhdr *)(encap->encap_hdr + encap->encap_hdr_len);
320 	encap->encap_hdr_len += sizeof(*vxlan);
321 
322 	vxlan->vx_flags = VXLAN_HF_VNI;
323 	vxlan->vx_vni = vxlan_vni_field(tunnel_id_to_key32(key->tun_id));
324 }
325 
326 static void efx_gen_tun_header_geneve(struct efx_tc_encap_action *encap)
327 {
328 	struct ip_tunnel_key *key = &encap->key;
329 	struct genevehdr *geneve;
330 	u32 vni;
331 
332 	geneve = (struct genevehdr *)(encap->encap_hdr + encap->encap_hdr_len);
333 	encap->encap_hdr_len += sizeof(*geneve);
334 
335 	geneve->proto_type = htons(ETH_P_TEB);
336 	/* convert tun_id to host-endian so we can use host arithmetic to
337 	 * extract individual bytes.
338 	 */
339 	vni = ntohl(tunnel_id_to_key32(key->tun_id));
340 	geneve->vni[0] = vni >> 16;
341 	geneve->vni[1] = vni >> 8;
342 	geneve->vni[2] = vni;
343 }
344 
345 #define vxlan_header_l4_len	(sizeof(struct udphdr) + sizeof(struct vxlanhdr))
346 #define vxlan4_header_len	(sizeof(struct ethhdr) + sizeof(struct iphdr) + vxlan_header_l4_len)
347 static void efx_gen_vxlan_header_ipv4(struct efx_tc_encap_action *encap)
348 {
349 	BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan4_header_len);
350 	efx_gen_tun_header_eth(encap, ETH_P_IP);
351 	efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, vxlan_header_l4_len);
352 	efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr));
353 	efx_gen_tun_header_vxlan(encap);
354 }
355 
356 #define geneve_header_l4_len	(sizeof(struct udphdr) + sizeof(struct genevehdr))
357 #define geneve4_header_len	(sizeof(struct ethhdr) + sizeof(struct iphdr) + geneve_header_l4_len)
358 static void efx_gen_geneve_header_ipv4(struct efx_tc_encap_action *encap)
359 {
360 	BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve4_header_len);
361 	efx_gen_tun_header_eth(encap, ETH_P_IP);
362 	efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, geneve_header_l4_len);
363 	efx_gen_tun_header_udp(encap, sizeof(struct genevehdr));
364 	efx_gen_tun_header_geneve(encap);
365 }
366 
367 #ifdef CONFIG_IPV6
368 #define vxlan6_header_len	(sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + vxlan_header_l4_len)
369 static void efx_gen_vxlan_header_ipv6(struct efx_tc_encap_action *encap)
370 {
371 	BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan6_header_len);
372 	efx_gen_tun_header_eth(encap, ETH_P_IPV6);
373 	efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, vxlan_header_l4_len);
374 	efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr));
375 	efx_gen_tun_header_vxlan(encap);
376 }
377 
378 #define geneve6_header_len	(sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + geneve_header_l4_len)
379 static void efx_gen_geneve_header_ipv6(struct efx_tc_encap_action *encap)
380 {
381 	BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve6_header_len);
382 	efx_gen_tun_header_eth(encap, ETH_P_IPV6);
383 	efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, geneve_header_l4_len);
384 	efx_gen_tun_header_udp(encap, sizeof(struct genevehdr));
385 	efx_gen_tun_header_geneve(encap);
386 }
387 #endif
388 
389 static void efx_gen_encap_header(struct efx_nic *efx,
390 				 struct efx_tc_encap_action *encap)
391 {
392 	encap->n_valid = encap->neigh->n_valid;
393 
394 	/* GCC stupidly thinks that only values explicitly listed in the enum
395 	 * definition can _possibly_ be sensible case values, so without this
396 	 * cast it complains about the IPv6 versions.
397 	 */
398 	switch ((int)encap->type) {
399 	case EFX_ENCAP_TYPE_VXLAN:
400 		efx_gen_vxlan_header_ipv4(encap);
401 		break;
402 	case EFX_ENCAP_TYPE_GENEVE:
403 		efx_gen_geneve_header_ipv4(encap);
404 		break;
405 #ifdef CONFIG_IPV6
406 	case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
407 		efx_gen_vxlan_header_ipv6(encap);
408 		break;
409 	case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
410 		efx_gen_geneve_header_ipv6(encap);
411 		break;
412 #endif
413 	default:
414 		/* unhandled encap type, can't happen */
415 		if (net_ratelimit())
416 			netif_err(efx, drv, efx->net_dev,
417 				  "Bogus encap type %d, can't generate\n",
418 				  encap->type);
419 
420 		/* Use fallback action. */
421 		encap->n_valid = false;
422 		break;
423 	}
424 }
425 
426 static void efx_tc_update_encap(struct efx_nic *efx,
427 				struct efx_tc_encap_action *encap)
428 {
429 	struct efx_tc_action_set_list *acts, *fallback;
430 	struct efx_tc_flow_rule *rule;
431 	struct efx_tc_action_set *act;
432 	int rc;
433 
434 	if (encap->n_valid) {
435 		/* Make sure no rules are using this encap while we change it */
436 		list_for_each_entry(act, &encap->users, encap_user) {
437 			acts = act->user;
438 			if (WARN_ON(!acts)) /* can't happen */
439 				continue;
440 			rule = container_of(acts, struct efx_tc_flow_rule, acts);
441 			if (rule->fallback)
442 				fallback = rule->fallback;
443 			else /* fallback fallback: deliver to PF */
444 				fallback = &efx->tc->facts.pf;
445 			rc = efx_mae_update_rule(efx, fallback->fw_id,
446 						 rule->fw_id);
447 			if (rc)
448 				netif_err(efx, drv, efx->net_dev,
449 					  "Failed to update (f) rule %08x rc %d\n",
450 					  rule->fw_id, rc);
451 			else
452 				netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n",
453 					  rule->fw_id);
454 		}
455 	}
456 
457 	/* Make sure we don't leak arbitrary bytes on the wire;
458 	 * set an all-0s ethernet header.  A successful call to
459 	 * efx_gen_encap_header() will overwrite this.
460 	 */
461 	memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr));
462 	encap->encap_hdr_len = ETH_HLEN;
463 
464 	if (encap->neigh) {
465 		read_lock_bh(&encap->neigh->lock);
466 		efx_gen_encap_header(efx, encap);
467 		read_unlock_bh(&encap->neigh->lock);
468 	} else {
469 		encap->n_valid = false;
470 	}
471 
472 	rc = efx_mae_update_encap_md(efx, encap);
473 	if (rc) {
474 		netif_err(efx, drv, efx->net_dev,
475 			  "Failed to update encap hdr %08x rc %d\n",
476 			  encap->fw_id, rc);
477 		return;
478 	}
479 	netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n",
480 		  encap->fw_id);
481 	if (!encap->n_valid)
482 		return;
483 	/* Update rule users: use the action if they are now ready */
484 	list_for_each_entry(act, &encap->users, encap_user) {
485 		acts = act->user;
486 		if (WARN_ON(!acts)) /* can't happen */
487 			continue;
488 		rule = container_of(acts, struct efx_tc_flow_rule, acts);
489 		if (!efx_tc_check_ready(efx, rule))
490 			continue;
491 		rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id);
492 		if (rc)
493 			netif_err(efx, drv, efx->net_dev,
494 				  "Failed to update rule %08x rc %d\n",
495 				  rule->fw_id, rc);
496 		else
497 			netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n",
498 				  rule->fw_id);
499 	}
500 }
501 
502 static void efx_neigh_update(struct work_struct *work)
503 {
504 	struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work);
505 	struct efx_tc_encap_action *encap;
506 	struct efx_nic *efx = neigh->efx;
507 
508 	mutex_lock(&efx->tc->mutex);
509 	list_for_each_entry(encap, &neigh->users, list)
510 		efx_tc_update_encap(neigh->efx, encap);
511 	/* release ref taken in efx_neigh_event() */
512 	if (refcount_dec_and_test(&neigh->ref))
513 		efx_free_neigh(neigh);
514 	mutex_unlock(&efx->tc->mutex);
515 }
516 
517 static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n)
518 {
519 	struct efx_neigh_binder keys = {NULL}, *neigh;
520 	bool n_valid, ipv6 = false;
521 	char ha[ETH_ALEN];
522 	size_t keysize;
523 
524 	if (WARN_ON(!efx->tc))
525 		return NOTIFY_DONE;
526 
527 	if (n->tbl == &arp_tbl) {
528 		keysize = sizeof(keys.dst_ip);
529 #if IS_ENABLED(CONFIG_IPV6)
530 	} else if (n->tbl == ipv6_stub->nd_tbl) {
531 		ipv6 = true;
532 		keysize = sizeof(keys.dst_ip6);
533 #endif
534 	} else {
535 		return NOTIFY_DONE;
536 	}
537 	if (!n->parms) {
538 		netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n");
539 		return NOTIFY_DONE;
540 	}
541 	keys.net = read_pnet(&n->parms->net);
542 	if (n->tbl->key_len != keysize) {
543 		netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n",
544 			   n->tbl->key_len);
545 		return NOTIFY_DONE;
546 	}
547 	read_lock_bh(&n->lock); /* Get a consistent view */
548 	memcpy(ha, n->ha, ETH_ALEN);
549 	n_valid = (n->nud_state & NUD_VALID) && !n->dead;
550 	read_unlock_bh(&n->lock);
551 	if (ipv6)
552 		memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len);
553 	else
554 		memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len);
555 	rcu_read_lock();
556 	neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys,
557 				       efx_neigh_ht_params);
558 	if (!neigh || neigh->dying)
559 		/* We're not interested in this neighbour */
560 		goto done;
561 	write_lock_bh(&neigh->lock);
562 	if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) {
563 		write_unlock_bh(&neigh->lock);
564 		/* Nothing has changed; no work to do */
565 		goto done;
566 	}
567 	neigh->n_valid = n_valid;
568 	memcpy(neigh->ha, ha, ETH_ALEN);
569 	write_unlock_bh(&neigh->lock);
570 	if (refcount_inc_not_zero(&neigh->ref)) {
571 		rcu_read_unlock();
572 		if (!schedule_work(&neigh->work))
573 			/* failed to schedule, release the ref we just took */
574 			if (refcount_dec_and_test(&neigh->ref))
575 				efx_free_neigh(neigh);
576 	} else {
577 done:
578 		rcu_read_unlock();
579 	}
580 	return NOTIFY_DONE;
581 }
582 
583 bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
584 {
585 	struct efx_tc_action_set *act;
586 
587 	/* Encap actions can only be offloaded if they have valid
588 	 * neighbour info for the outer Ethernet header.
589 	 */
590 	list_for_each_entry(act, &rule->acts.list, list)
591 		if (act->encap_md && !act->encap_md->n_valid)
592 			return false;
593 	return true;
594 }
595 
596 struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
597 			struct efx_nic *efx, const struct ip_tunnel_info *info,
598 			struct net_device *egdev, struct netlink_ext_ack *extack)
599 {
600 	enum efx_encap_type type = efx_tc_indr_netdev_type(egdev);
601 	struct efx_tc_encap_action *encap, *old;
602 	struct efx_rep *to_efv;
603 	s64 rc;
604 
605 	if (type == EFX_ENCAP_TYPE_NONE) {
606 		/* dest is not an encap device */
607 		NL_SET_ERR_MSG_MOD(extack, "Not a (supported) tunnel device but tunnel_key is set");
608 		return ERR_PTR(-EOPNOTSUPP);
609 	}
610 	rc = efx_mae_check_encap_type_supported(efx, type);
611 	if (rc < 0) {
612 		NL_SET_ERR_MSG_MOD(extack, "Firmware reports no support for this tunnel type");
613 		return ERR_PTR(rc);
614 	}
615 	/* No support yet for Geneve options */
616 	if (info->options_len) {
617 		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel options");
618 		return ERR_PTR(-EOPNOTSUPP);
619 	}
620 	switch (info->mode) {
621 	case IP_TUNNEL_INFO_TX:
622 		break;
623 	case IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6:
624 		type |= EFX_ENCAP_FLAG_IPV6;
625 		break;
626 	default:
627 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported tunnel mode %u",
628 				       info->mode);
629 		return ERR_PTR(-EOPNOTSUPP);
630 	}
631 	encap = kzalloc(sizeof(*encap), GFP_KERNEL_ACCOUNT);
632 	if (!encap)
633 		return ERR_PTR(-ENOMEM);
634 	encap->type = type;
635 	encap->key = info->key;
636 	INIT_LIST_HEAD(&encap->users);
637 	old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht,
638 						&encap->linkage,
639 						efx_tc_encap_ht_params);
640 	if (old) {
641 		/* don't need our new entry */
642 		kfree(encap);
643 		if (!refcount_inc_not_zero(&old->ref))
644 			return ERR_PTR(-EAGAIN);
645 		/* existing entry found, ref taken */
646 		return old;
647 	}
648 
649 	rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack);
650 	if (rc < 0)
651 		goto out_remove;
652 	to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev);
653 	if (IS_ERR(to_efv)) {
654 		/* neigh->egdev isn't ours */
655 		NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch");
656 		rc = PTR_ERR(to_efv);
657 		goto out_release;
658 	}
659 	rc = efx_tc_flower_external_mport(efx, to_efv);
660 	if (rc < 0) {
661 		NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port");
662 		goto out_release;
663 	}
664 	encap->dest_mport = rc;
665 	read_lock_bh(&encap->neigh->lock);
666 	efx_gen_encap_header(efx, encap);
667 	read_unlock_bh(&encap->neigh->lock);
668 
669 	rc = efx_mae_allocate_encap_md(efx, encap);
670 	if (rc < 0) {
671 		NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw");
672 		goto out_release;
673 	}
674 
675 	/* ref and return */
676 	refcount_set(&encap->ref, 1);
677 	return encap;
678 out_release:
679 	efx_release_neigh(efx, encap);
680 out_remove:
681 	rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
682 			       efx_tc_encap_ht_params);
683 	kfree(encap);
684 	return ERR_PTR(rc);
685 }
686 
687 void efx_tc_flower_release_encap_md(struct efx_nic *efx,
688 				    struct efx_tc_encap_action *encap)
689 {
690 	if (!refcount_dec_and_test(&encap->ref))
691 		return; /* still in use */
692 	efx_release_neigh(efx, encap);
693 	rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
694 			       efx_tc_encap_ht_params);
695 	efx_mae_free_encap_md(efx, encap);
696 	kfree(encap);
697 }
698 
699 static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh)
700 {
701 	struct efx_tc_encap_action *encap, *next;
702 
703 	list_for_each_entry_safe(encap, next, &neigh->users, list) {
704 		/* Should cause neigh usage count to fall to zero, freeing it */
705 		efx_release_neigh(efx, encap);
706 		/* The encap has lost its neigh, so it's now unready */
707 		efx_tc_update_encap(efx, encap);
708 	}
709 }
710 
711 void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev)
712 {
713 	struct efx_neigh_binder *neigh;
714 	struct rhashtable_iter walk;
715 
716 	mutex_lock(&efx->tc->mutex);
717 	rhashtable_walk_enter(&efx->tc->neigh_ht, &walk);
718 	rhashtable_walk_start(&walk);
719 	while ((neigh = rhashtable_walk_next(&walk)) != NULL) {
720 		if (IS_ERR(neigh))
721 			continue;
722 		if (neigh->egdev != net_dev)
723 			continue;
724 		neigh->dying = true;
725 		rhashtable_walk_stop(&walk);
726 		synchronize_rcu(); /* Make sure any updates see dying flag */
727 		efx_tc_remove_neigh_users(efx, neigh); /* might sleep */
728 		rhashtable_walk_start(&walk);
729 	}
730 	rhashtable_walk_stop(&walk);
731 	rhashtable_walk_exit(&walk);
732 	mutex_unlock(&efx->tc->mutex);
733 }
734 
735 int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
736 			  void *ptr)
737 {
738 	if (efx->type->is_vf)
739 		return NOTIFY_DONE;
740 
741 	switch (event) {
742 	case NETEVENT_NEIGH_UPDATE:
743 		return efx_neigh_event(efx, ptr);
744 	default:
745 		return NOTIFY_DONE;
746 	}
747 }
748