xref: /openbmc/linux/drivers/net/ethernet/sfc/tc.c (revision 7288dd2f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2019 Solarflare Communications Inc.
5  * Copyright 2020-2022 Xilinx Inc.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published
9  * by the Free Software Foundation, incorporated herein by reference.
10  */
11 
12 #include <net/pkt_cls.h>
13 #include <net/vxlan.h>
14 #include <net/geneve.h>
15 #include <net/tc_act/tc_ct.h>
16 #include "tc.h"
17 #include "tc_bindings.h"
18 #include "tc_encap_actions.h"
19 #include "tc_conntrack.h"
20 #include "mae.h"
21 #include "ef100_rep.h"
22 #include "efx.h"
23 
24 enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
25 {
26 	if (netif_is_vxlan(net_dev))
27 		return EFX_ENCAP_TYPE_VXLAN;
28 	if (netif_is_geneve(net_dev))
29 		return EFX_ENCAP_TYPE_GENEVE;
30 
31 	return EFX_ENCAP_TYPE_NONE;
32 }
33 
34 #define EFX_EFV_PF	NULL
35 /* Look up the representor information (efv) for a device.
36  * May return NULL for the PF (us), or an error pointer for a device that
37  * isn't supported as a TC offload endpoint
38  */
39 struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
40 					 struct net_device *dev)
41 {
42 	struct efx_rep *efv;
43 
44 	if (!dev)
45 		return ERR_PTR(-EOPNOTSUPP);
46 	/* Is it us (the PF)? */
47 	if (dev == efx->net_dev)
48 		return EFX_EFV_PF;
49 	/* Is it an efx vfrep at all? */
50 	if (dev->netdev_ops != &efx_ef100_rep_netdev_ops)
51 		return ERR_PTR(-EOPNOTSUPP);
52 	/* Is it ours?  We don't support TC rules that include another
53 	 * EF100's netdevices (not even on another port of the same NIC).
54 	 */
55 	efv = netdev_priv(dev);
56 	if (efv->parent != efx)
57 		return ERR_PTR(-EOPNOTSUPP);
58 	return efv;
59 }
60 
61 /* Convert a driver-internal vport ID into an internal device (PF or VF) */
62 static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv)
63 {
64 	u32 mport;
65 
66 	if (IS_ERR(efv))
67 		return PTR_ERR(efv);
68 	if (!efv) /* device is PF (us) */
69 		efx_mae_mport_uplink(efx, &mport);
70 	else /* device is repr */
71 		efx_mae_mport_mport(efx, efv->mport, &mport);
72 	return mport;
73 }
74 
75 /* Convert a driver-internal vport ID into an external device (wire or VF) */
76 s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
77 {
78 	u32 mport;
79 
80 	if (IS_ERR(efv))
81 		return PTR_ERR(efv);
82 	if (!efv) /* device is PF (us) */
83 		efx_mae_mport_wire(efx, &mport);
84 	else /* device is repr */
85 		efx_mae_mport_mport(efx, efv->mport, &mport);
86 	return mport;
87 }
88 
89 static const struct rhashtable_params efx_tc_encap_match_ht_params = {
90 	.key_len	= offsetof(struct efx_tc_encap_match, linkage),
91 	.key_offset	= 0,
92 	.head_offset	= offsetof(struct efx_tc_encap_match, linkage),
93 };
94 
95 static const struct rhashtable_params efx_tc_match_action_ht_params = {
96 	.key_len	= sizeof(unsigned long),
97 	.key_offset	= offsetof(struct efx_tc_flow_rule, cookie),
98 	.head_offset	= offsetof(struct efx_tc_flow_rule, linkage),
99 };
100 
101 static const struct rhashtable_params efx_tc_lhs_rule_ht_params = {
102 	.key_len	= sizeof(unsigned long),
103 	.key_offset	= offsetof(struct efx_tc_lhs_rule, cookie),
104 	.head_offset	= offsetof(struct efx_tc_lhs_rule, linkage),
105 };
106 
107 static const struct rhashtable_params efx_tc_recirc_ht_params = {
108 	.key_len	= offsetof(struct efx_tc_recirc_id, linkage),
109 	.key_offset	= 0,
110 	.head_offset	= offsetof(struct efx_tc_recirc_id, linkage),
111 };
112 
113 static void efx_tc_free_action_set(struct efx_nic *efx,
114 				   struct efx_tc_action_set *act, bool in_hw)
115 {
116 	/* Failure paths calling this on the 'cursor' action set in_hw=false,
117 	 * because if the alloc had succeeded we'd've put it in acts.list and
118 	 * not still have it in act.
119 	 */
120 	if (in_hw) {
121 		efx_mae_free_action_set(efx, act->fw_id);
122 		/* in_hw is true iff we are on an acts.list; make sure to
123 		 * remove ourselves from that list before we are freed.
124 		 */
125 		list_del(&act->list);
126 	}
127 	if (act->count) {
128 		spin_lock_bh(&act->count->cnt->lock);
129 		if (!list_empty(&act->count_user))
130 			list_del(&act->count_user);
131 		spin_unlock_bh(&act->count->cnt->lock);
132 		efx_tc_flower_put_counter_index(efx, act->count);
133 	}
134 	if (act->encap_md) {
135 		list_del(&act->encap_user);
136 		efx_tc_flower_release_encap_md(efx, act->encap_md);
137 	}
138 	kfree(act);
139 }
140 
141 static void efx_tc_free_action_set_list(struct efx_nic *efx,
142 					struct efx_tc_action_set_list *acts,
143 					bool in_hw)
144 {
145 	struct efx_tc_action_set *act, *next;
146 
147 	/* Failure paths set in_hw=false, because usually the acts didn't get
148 	 * to efx_mae_alloc_action_set_list(); if they did, the failure tree
149 	 * has a separate efx_mae_free_action_set_list() before calling us.
150 	 */
151 	if (in_hw)
152 		efx_mae_free_action_set_list(efx, acts);
153 	/* Any act that's on the list will be in_hw even if the list isn't */
154 	list_for_each_entry_safe(act, next, &acts->list, list)
155 		efx_tc_free_action_set(efx, act, true);
156 	/* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
157 }
158 
159 /* Boilerplate for the simple 'copy a field' cases */
160 #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)	\
161 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) {		\
162 	struct flow_match_##_type fm;					\
163 									\
164 	flow_rule_match_##_tcget(rule, &fm);				\
165 	match->value._field = fm.key->_tcfield;				\
166 	match->mask._field = fm.mask->_tcfield;				\
167 }
168 #define MAP_KEY_AND_MASK(_name, _type, _tcfield, _field)	\
169 	_MAP_KEY_AND_MASK(_name, _type, _type, _tcfield, _field)
170 #define MAP_ENC_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)	\
171 	_MAP_KEY_AND_MASK(ENC_##_name, _type, _tcget, _tcfield, _field)
172 
173 static int efx_tc_flower_parse_match(struct efx_nic *efx,
174 				     struct flow_rule *rule,
175 				     struct efx_tc_match *match,
176 				     struct netlink_ext_ack *extack)
177 {
178 	struct flow_dissector *dissector = rule->match.dissector;
179 	unsigned char ipv = 0;
180 
181 	/* Owing to internal TC infelicities, the IPV6_ADDRS key might be set
182 	 * even on IPv4 filters; so rather than relying on dissector->used_keys
183 	 * we check the addr_type in the CONTROL key.  If we don't find it (or
184 	 * it's masked, which should never happen), we treat both IPV4_ADDRS
185 	 * and IPV6_ADDRS as absent.
186 	 */
187 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
188 		struct flow_match_control fm;
189 
190 		flow_rule_match_control(rule, &fm);
191 		if (IS_ALL_ONES(fm.mask->addr_type))
192 			switch (fm.key->addr_type) {
193 			case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
194 				ipv = 4;
195 				break;
196 			case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
197 				ipv = 6;
198 				break;
199 			default:
200 				break;
201 			}
202 
203 		if (fm.mask->flags & FLOW_DIS_IS_FRAGMENT) {
204 			match->value.ip_frag = fm.key->flags & FLOW_DIS_IS_FRAGMENT;
205 			match->mask.ip_frag = true;
206 		}
207 		if (fm.mask->flags & FLOW_DIS_FIRST_FRAG) {
208 			match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG;
209 			match->mask.ip_firstfrag = true;
210 		}
211 		if (fm.mask->flags & ~(FLOW_DIS_IS_FRAGMENT | FLOW_DIS_FIRST_FRAG)) {
212 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on control.flags %#x",
213 					       fm.mask->flags);
214 			return -EOPNOTSUPP;
215 		}
216 	}
217 	if (dissector->used_keys &
218 	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
219 	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
220 	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
221 	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
222 	      BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) |
223 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
224 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
225 	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
226 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
227 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
228 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
229 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
230 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
231 	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
232 	      BIT_ULL(FLOW_DISSECTOR_KEY_CT) |
233 	      BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
234 	      BIT_ULL(FLOW_DISSECTOR_KEY_IP))) {
235 		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx",
236 				       dissector->used_keys);
237 		return -EOPNOTSUPP;
238 	}
239 
240 	MAP_KEY_AND_MASK(BASIC, basic, n_proto, eth_proto);
241 	/* Make sure we're IP if any L3/L4 keys used. */
242 	if (!IS_ALL_ONES(match->mask.eth_proto) ||
243 	    !(match->value.eth_proto == htons(ETH_P_IP) ||
244 	      match->value.eth_proto == htons(ETH_P_IPV6)))
245 		if (dissector->used_keys &
246 		    (BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
247 		     BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
248 		     BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
249 		     BIT_ULL(FLOW_DISSECTOR_KEY_IP) |
250 		     BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
251 			NL_SET_ERR_MSG_FMT_MOD(extack,
252 					       "L3/L4 flower keys %#llx require protocol ipv[46]",
253 					       dissector->used_keys);
254 			return -EINVAL;
255 		}
256 
257 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
258 		struct flow_match_vlan fm;
259 
260 		flow_rule_match_vlan(rule, &fm);
261 		if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
262 			match->value.vlan_proto[0] = fm.key->vlan_tpid;
263 			match->mask.vlan_proto[0] = fm.mask->vlan_tpid;
264 			match->value.vlan_tci[0] = cpu_to_be16(fm.key->vlan_priority << 13 |
265 							       fm.key->vlan_id);
266 			match->mask.vlan_tci[0] = cpu_to_be16(fm.mask->vlan_priority << 13 |
267 							      fm.mask->vlan_id);
268 		}
269 	}
270 
271 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
272 		struct flow_match_vlan fm;
273 
274 		flow_rule_match_cvlan(rule, &fm);
275 		if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
276 			match->value.vlan_proto[1] = fm.key->vlan_tpid;
277 			match->mask.vlan_proto[1] = fm.mask->vlan_tpid;
278 			match->value.vlan_tci[1] = cpu_to_be16(fm.key->vlan_priority << 13 |
279 							       fm.key->vlan_id);
280 			match->mask.vlan_tci[1] = cpu_to_be16(fm.mask->vlan_priority << 13 |
281 							      fm.mask->vlan_id);
282 		}
283 	}
284 
285 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
286 		struct flow_match_eth_addrs fm;
287 
288 		flow_rule_match_eth_addrs(rule, &fm);
289 		ether_addr_copy(match->value.eth_saddr, fm.key->src);
290 		ether_addr_copy(match->value.eth_daddr, fm.key->dst);
291 		ether_addr_copy(match->mask.eth_saddr, fm.mask->src);
292 		ether_addr_copy(match->mask.eth_daddr, fm.mask->dst);
293 	}
294 
295 	MAP_KEY_AND_MASK(BASIC, basic, ip_proto, ip_proto);
296 	/* Make sure we're TCP/UDP if any L4 keys used. */
297 	if ((match->value.ip_proto != IPPROTO_UDP &&
298 	     match->value.ip_proto != IPPROTO_TCP) || !IS_ALL_ONES(match->mask.ip_proto))
299 		if (dissector->used_keys &
300 		    (BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
301 		     BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
302 			NL_SET_ERR_MSG_FMT_MOD(extack,
303 					       "L4 flower keys %#llx require ipproto udp or tcp",
304 					       dissector->used_keys);
305 			return -EINVAL;
306 		}
307 	MAP_KEY_AND_MASK(IP, ip, tos, ip_tos);
308 	MAP_KEY_AND_MASK(IP, ip, ttl, ip_ttl);
309 	if (ipv == 4) {
310 		MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, src, src_ip);
311 		MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, dst, dst_ip);
312 	}
313 #ifdef CONFIG_IPV6
314 	else if (ipv == 6) {
315 		MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, src, src_ip6);
316 		MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, dst, dst_ip6);
317 	}
318 #endif
319 	MAP_KEY_AND_MASK(PORTS, ports, src, l4_sport);
320 	MAP_KEY_AND_MASK(PORTS, ports, dst, l4_dport);
321 	MAP_KEY_AND_MASK(TCP, tcp, flags, tcp_flags);
322 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
323 		struct flow_match_control fm;
324 
325 		flow_rule_match_enc_control(rule, &fm);
326 		if (fm.mask->flags) {
327 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on enc_control.flags %#x",
328 					       fm.mask->flags);
329 			return -EOPNOTSUPP;
330 		}
331 		if (!IS_ALL_ONES(fm.mask->addr_type)) {
332 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported enc addr_type mask %u (key %u)",
333 					       fm.mask->addr_type,
334 					       fm.key->addr_type);
335 			return -EOPNOTSUPP;
336 		}
337 		switch (fm.key->addr_type) {
338 		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
339 			MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
340 					     src, enc_src_ip);
341 			MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
342 					     dst, enc_dst_ip);
343 			break;
344 #ifdef CONFIG_IPV6
345 		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
346 			MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
347 					     src, enc_src_ip6);
348 			MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
349 					     dst, enc_dst_ip6);
350 			break;
351 #endif
352 		default:
353 			NL_SET_ERR_MSG_FMT_MOD(extack,
354 					       "Unsupported enc addr_type %u (supported are IPv4, IPv6)",
355 					       fm.key->addr_type);
356 			return -EOPNOTSUPP;
357 		}
358 		MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, tos, enc_ip_tos);
359 		MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, ttl, enc_ip_ttl);
360 		MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, src, enc_sport);
361 		MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, dst, enc_dport);
362 		MAP_ENC_KEY_AND_MASK(KEYID, enc_keyid, enc_keyid, keyid, enc_keyid);
363 	} else if (dissector->used_keys &
364 		   (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
365 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
366 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
367 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
368 		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
369 		NL_SET_ERR_MSG_FMT_MOD(extack,
370 				       "Flower enc keys require enc_control (keys: %#llx)",
371 				       dissector->used_keys);
372 		return -EOPNOTSUPP;
373 	}
374 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) {
375 		struct flow_match_ct fm;
376 
377 		flow_rule_match_ct(rule, &fm);
378 		match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
379 		match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
380 		match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
381 		match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
382 		if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
383 					  TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) {
384 			NL_SET_ERR_MSG_FMT_MOD(extack,
385 					       "Unsupported ct_state match %#x",
386 					       fm.mask->ct_state);
387 			return -EOPNOTSUPP;
388 		}
389 		match->value.ct_mark = fm.key->ct_mark;
390 		match->mask.ct_mark = fm.mask->ct_mark;
391 		match->value.ct_zone = fm.key->ct_zone;
392 		match->mask.ct_zone = fm.mask->ct_zone;
393 
394 		if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) {
395 			NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported");
396 			return -EOPNOTSUPP;
397 		}
398 	}
399 
400 	return 0;
401 }
402 
403 static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
404 					      struct efx_tc_encap_match *encap)
405 {
406 	int rc;
407 
408 	if (!refcount_dec_and_test(&encap->ref))
409 		return; /* still in use */
410 
411 	if (encap->type == EFX_TC_EM_DIRECT) {
412 		rc = efx_mae_unregister_encap_match(efx, encap);
413 		if (rc)
414 			/* Display message but carry on and remove entry from our
415 			 * SW tables, because there's not much we can do about it.
416 			 */
417 			netif_err(efx, drv, efx->net_dev,
418 				  "Failed to release encap match %#x, rc %d\n",
419 				  encap->fw_id, rc);
420 	}
421 	rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
422 			       efx_tc_encap_match_ht_params);
423 	if (encap->pseudo)
424 		efx_tc_flower_release_encap_match(efx, encap->pseudo);
425 	kfree(encap);
426 }
427 
428 static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
429 					    struct efx_tc_match *match,
430 					    enum efx_encap_type type,
431 					    enum efx_tc_em_pseudo_type em_type,
432 					    u8 child_ip_tos_mask,
433 					    __be16 child_udp_sport_mask,
434 					    struct netlink_ext_ack *extack)
435 {
436 	struct efx_tc_encap_match *encap, *old, *pseudo = NULL;
437 	bool ipv6 = false;
438 	int rc;
439 
440 	/* We require that the socket-defining fields (IP addrs and UDP dest
441 	 * port) are present and exact-match.  Other fields may only be used
442 	 * if the field-set (and any masks) are the same for all encap
443 	 * matches on the same <sip,dip,dport> tuple; this is enforced by
444 	 * pseudo encap matches.
445 	 */
446 	if (match->mask.enc_dst_ip | match->mask.enc_src_ip) {
447 		if (!IS_ALL_ONES(match->mask.enc_dst_ip)) {
448 			NL_SET_ERR_MSG_MOD(extack,
449 					   "Egress encap match is not exact on dst IP address");
450 			return -EOPNOTSUPP;
451 		}
452 		if (!IS_ALL_ONES(match->mask.enc_src_ip)) {
453 			NL_SET_ERR_MSG_MOD(extack,
454 					   "Egress encap match is not exact on src IP address");
455 			return -EOPNOTSUPP;
456 		}
457 #ifdef CONFIG_IPV6
458 		if (!ipv6_addr_any(&match->mask.enc_dst_ip6) ||
459 		    !ipv6_addr_any(&match->mask.enc_src_ip6)) {
460 			NL_SET_ERR_MSG_MOD(extack,
461 					   "Egress encap match on both IPv4 and IPv6, don't understand");
462 			return -EOPNOTSUPP;
463 		}
464 	} else {
465 		ipv6 = true;
466 		if (!efx_ipv6_addr_all_ones(&match->mask.enc_dst_ip6)) {
467 			NL_SET_ERR_MSG_MOD(extack,
468 					   "Egress encap match is not exact on dst IP address");
469 			return -EOPNOTSUPP;
470 		}
471 		if (!efx_ipv6_addr_all_ones(&match->mask.enc_src_ip6)) {
472 			NL_SET_ERR_MSG_MOD(extack,
473 					   "Egress encap match is not exact on src IP address");
474 			return -EOPNOTSUPP;
475 		}
476 #endif
477 	}
478 	if (!IS_ALL_ONES(match->mask.enc_dport)) {
479 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port");
480 		return -EOPNOTSUPP;
481 	}
482 	if (match->mask.enc_sport || match->mask.enc_ip_tos) {
483 		struct efx_tc_match pmatch = *match;
484 
485 		if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */
486 			NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler");
487 			return -EOPNOTSUPP;
488 		}
489 		pmatch.value.enc_ip_tos = 0;
490 		pmatch.mask.enc_ip_tos = 0;
491 		pmatch.value.enc_sport = 0;
492 		pmatch.mask.enc_sport = 0;
493 		rc = efx_tc_flower_record_encap_match(efx, &pmatch, type,
494 						      EFX_TC_EM_PSEUDO_MASK,
495 						      match->mask.enc_ip_tos,
496 						      match->mask.enc_sport,
497 						      extack);
498 		if (rc)
499 			return rc;
500 		pseudo = pmatch.encap;
501 	}
502 	if (match->mask.enc_ip_ttl) {
503 		NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported");
504 		rc = -EOPNOTSUPP;
505 		goto fail_pseudo;
506 	}
507 
508 	rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos,
509 					    match->mask.enc_sport, extack);
510 	if (rc)
511 		goto fail_pseudo;
512 
513 	encap = kzalloc(sizeof(*encap), GFP_USER);
514 	if (!encap) {
515 		rc = -ENOMEM;
516 		goto fail_pseudo;
517 	}
518 	encap->src_ip = match->value.enc_src_ip;
519 	encap->dst_ip = match->value.enc_dst_ip;
520 #ifdef CONFIG_IPV6
521 	encap->src_ip6 = match->value.enc_src_ip6;
522 	encap->dst_ip6 = match->value.enc_dst_ip6;
523 #endif
524 	encap->udp_dport = match->value.enc_dport;
525 	encap->tun_type = type;
526 	encap->ip_tos = match->value.enc_ip_tos;
527 	encap->ip_tos_mask = match->mask.enc_ip_tos;
528 	encap->child_ip_tos_mask = child_ip_tos_mask;
529 	encap->udp_sport = match->value.enc_sport;
530 	encap->udp_sport_mask = match->mask.enc_sport;
531 	encap->child_udp_sport_mask = child_udp_sport_mask;
532 	encap->type = em_type;
533 	encap->pseudo = pseudo;
534 	old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht,
535 						&encap->linkage,
536 						efx_tc_encap_match_ht_params);
537 	if (old) {
538 		/* don't need our new entry */
539 		kfree(encap);
540 		if (pseudo) /* don't need our new pseudo either */
541 			efx_tc_flower_release_encap_match(efx, pseudo);
542 		/* check old and new em_types are compatible */
543 		switch (old->type) {
544 		case EFX_TC_EM_DIRECT:
545 			/* old EM is in hardware, so mustn't overlap with a
546 			 * pseudo, but may be shared with another direct EM
547 			 */
548 			if (em_type == EFX_TC_EM_DIRECT)
549 				break;
550 			NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry");
551 			return -EEXIST;
552 		case EFX_TC_EM_PSEUDO_MASK:
553 			/* old EM is protecting a ToS- or src port-qualified
554 			 * filter, so may only be shared with another pseudo
555 			 * for the same ToS and src port masks.
556 			 */
557 			if (em_type != EFX_TC_EM_PSEUDO_MASK) {
558 				NL_SET_ERR_MSG_FMT_MOD(extack,
559 						       "%s encap match conflicts with existing pseudo(MASK) entry",
560 						       em_type ? "Pseudo" : "Direct");
561 				return -EEXIST;
562 			}
563 			if (child_ip_tos_mask != old->child_ip_tos_mask) {
564 				NL_SET_ERR_MSG_FMT_MOD(extack,
565 						       "Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x",
566 						       child_ip_tos_mask,
567 						       old->child_ip_tos_mask);
568 				return -EEXIST;
569 			}
570 			if (child_udp_sport_mask != old->child_udp_sport_mask) {
571 				NL_SET_ERR_MSG_FMT_MOD(extack,
572 						       "Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x",
573 						       child_udp_sport_mask,
574 						       old->child_udp_sport_mask);
575 				return -EEXIST;
576 			}
577 			break;
578 		default: /* Unrecognised pseudo-type.  Just say no */
579 			NL_SET_ERR_MSG_FMT_MOD(extack,
580 					       "%s encap match conflicts with existing pseudo(%d) entry",
581 					       em_type ? "Pseudo" : "Direct",
582 					       old->type);
583 			return -EEXIST;
584 		}
585 		/* check old and new tun_types are compatible */
586 		if (old->tun_type != type) {
587 			NL_SET_ERR_MSG_FMT_MOD(extack,
588 					       "Egress encap match with conflicting tun_type %u != %u",
589 					       old->tun_type, type);
590 			return -EEXIST;
591 		}
592 		if (!refcount_inc_not_zero(&old->ref))
593 			return -EAGAIN;
594 		/* existing entry found */
595 		encap = old;
596 	} else {
597 		if (em_type == EFX_TC_EM_DIRECT) {
598 			rc = efx_mae_register_encap_match(efx, encap);
599 			if (rc) {
600 				NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
601 				goto fail;
602 			}
603 		}
604 		refcount_set(&encap->ref, 1);
605 	}
606 	match->encap = encap;
607 	return 0;
608 fail:
609 	rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
610 			       efx_tc_encap_match_ht_params);
611 	kfree(encap);
612 fail_pseudo:
613 	if (pseudo)
614 		efx_tc_flower_release_encap_match(efx, pseudo);
615 	return rc;
616 }
617 
618 static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx,
619 						     u32 chain_index,
620 						     struct net_device *net_dev)
621 {
622 	struct efx_tc_recirc_id *rid, *old;
623 	int rc;
624 
625 	rid = kzalloc(sizeof(*rid), GFP_USER);
626 	if (!rid)
627 		return ERR_PTR(-ENOMEM);
628 	rid->chain_index = chain_index;
629 	/* We don't take a reference here, because it's implied - if there's
630 	 * a rule on the net_dev that's been offloaded to us, then the net_dev
631 	 * can't go away until the rule has been deoffloaded.
632 	 */
633 	rid->net_dev = net_dev;
634 	old = rhashtable_lookup_get_insert_fast(&efx->tc->recirc_ht,
635 						&rid->linkage,
636 						efx_tc_recirc_ht_params);
637 	if (old) {
638 		/* don't need our new entry */
639 		kfree(rid);
640 		if (!refcount_inc_not_zero(&old->ref))
641 			return ERR_PTR(-EAGAIN);
642 		/* existing entry found */
643 		rid = old;
644 	} else {
645 		rc = ida_alloc_range(&efx->tc->recirc_ida, 1, U8_MAX, GFP_USER);
646 		if (rc < 0) {
647 			rhashtable_remove_fast(&efx->tc->recirc_ht,
648 					       &rid->linkage,
649 					       efx_tc_recirc_ht_params);
650 			kfree(rid);
651 			return ERR_PTR(rc);
652 		}
653 		rid->fw_id = rc;
654 		refcount_set(&rid->ref, 1);
655 	}
656 	return rid;
657 }
658 
659 static void efx_tc_put_recirc_id(struct efx_nic *efx, struct efx_tc_recirc_id *rid)
660 {
661 	if (!refcount_dec_and_test(&rid->ref))
662 		return; /* still in use */
663 	rhashtable_remove_fast(&efx->tc->recirc_ht, &rid->linkage,
664 			       efx_tc_recirc_ht_params);
665 	ida_free(&efx->tc->recirc_ida, rid->fw_id);
666 	kfree(rid);
667 }
668 
669 static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
670 {
671 	efx_mae_delete_rule(efx, rule->fw_id);
672 
673 	/* Release entries in subsidiary tables */
674 	efx_tc_free_action_set_list(efx, &rule->acts, true);
675 	if (rule->match.rid)
676 		efx_tc_put_recirc_id(efx, rule->match.rid);
677 	if (rule->match.encap)
678 		efx_tc_flower_release_encap_match(efx, rule->match.encap);
679 	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
680 }
681 
682 static const char *efx_tc_encap_type_name(enum efx_encap_type typ)
683 {
684 	switch (typ) {
685 	case EFX_ENCAP_TYPE_NONE:
686 		return "none";
687 	case EFX_ENCAP_TYPE_VXLAN:
688 		return "vxlan";
689 	case EFX_ENCAP_TYPE_GENEVE:
690 		return "geneve";
691 	default:
692 		pr_warn_once("Unknown efx_encap_type %d encountered\n", typ);
693 		return "unknown";
694 	}
695 }
696 
697 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
698 enum efx_tc_action_order {
699 	EFX_TC_AO_DECAP,
700 	EFX_TC_AO_VLAN_POP,
701 	EFX_TC_AO_VLAN_PUSH,
702 	EFX_TC_AO_COUNT,
703 	EFX_TC_AO_ENCAP,
704 	EFX_TC_AO_DELIVER
705 };
706 /* Determine whether we can add @new action without violating order */
707 static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
708 					  enum efx_tc_action_order new)
709 {
710 	switch (new) {
711 	case EFX_TC_AO_DECAP:
712 		if (act->decap)
713 			return false;
714 		fallthrough;
715 	case EFX_TC_AO_VLAN_POP:
716 		if (act->vlan_pop >= 2)
717 			return false;
718 		/* If we've already pushed a VLAN, we can't then pop it;
719 		 * the hardware would instead try to pop an existing VLAN
720 		 * before pushing the new one.
721 		 */
722 		if (act->vlan_push)
723 			return false;
724 		fallthrough;
725 	case EFX_TC_AO_VLAN_PUSH:
726 		if (act->vlan_push >= 2)
727 			return false;
728 		fallthrough;
729 	case EFX_TC_AO_COUNT:
730 		if (act->count)
731 			return false;
732 		fallthrough;
733 	case EFX_TC_AO_ENCAP:
734 		if (act->encap_md)
735 			return false;
736 		fallthrough;
737 	case EFX_TC_AO_DELIVER:
738 		return !act->deliver;
739 	default:
740 		/* Bad caller.  Whatever they wanted to do, say they can't. */
741 		WARN_ON_ONCE(1);
742 		return false;
743 	}
744 }
745 
746 /**
747  * DOC: TC conntrack sequences
748  *
749  * The MAE hardware can handle at most two rounds of action rule matching,
750  * consequently we support conntrack through the notion of a "left-hand side
751  * rule".  This is a rule which typically contains only the actions "ct" and
752  * "goto chain N", and corresponds to one or more "right-hand side rules" in
753  * chain N, which typically match on +trk+est, and may perform ct(nat) actions.
754  * RHS rules go in the Action Rule table as normal but with a nonzero recirc_id
755  * (the hardware equivalent of chain_index), while LHS rules may go in either
756  * the Action Rule or the Outer Rule table, the latter being preferred for
757  * performance reasons, and set both DO_CT and a recirc_id in their response.
758  *
759  * Besides the RHS rules, there are often also similar rules matching on
760  * +trk+new which perform the ct(commit) action.  These are not offloaded.
761  */
762 
763 static bool efx_tc_rule_is_lhs_rule(struct flow_rule *fr,
764 				    struct efx_tc_match *match)
765 {
766 	const struct flow_action_entry *fa;
767 	int i;
768 
769 	flow_action_for_each(i, fa, &fr->action) {
770 		switch (fa->id) {
771 		case FLOW_ACTION_GOTO:
772 			return true;
773 		case FLOW_ACTION_CT:
774 			/* If rule is -trk, or doesn't mention trk at all, then
775 			 * a CT action implies a conntrack lookup (hence it's an
776 			 * LHS rule).  If rule is +trk, then a CT action could
777 			 * just be ct(nat) or even ct(commit) (though the latter
778 			 * can't be offloaded).
779 			 */
780 			if (!match->mask.ct_state_trk || !match->value.ct_state_trk)
781 				return true;
782 			break;
783 		default:
784 			break;
785 		}
786 	}
787 	return false;
788 }
789 
790 static int efx_tc_flower_handle_lhs_actions(struct efx_nic *efx,
791 					    struct flow_cls_offload *tc,
792 					    struct flow_rule *fr,
793 					    struct net_device *net_dev,
794 					    struct efx_tc_lhs_rule *rule)
795 
796 {
797 	struct netlink_ext_ack *extack = tc->common.extack;
798 	struct efx_tc_lhs_action *act = &rule->lhs_act;
799 	const struct flow_action_entry *fa;
800 	bool pipe = true;
801 	int i;
802 
803 	flow_action_for_each(i, fa, &fr->action) {
804 		struct efx_tc_ct_zone *ct_zone;
805 		struct efx_tc_recirc_id *rid;
806 
807 		if (!pipe) {
808 			/* more actions after a non-pipe action */
809 			NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
810 			return -EINVAL;
811 		}
812 		switch (fa->id) {
813 		case FLOW_ACTION_GOTO:
814 			if (!fa->chain_index) {
815 				NL_SET_ERR_MSG_MOD(extack, "Can't goto chain 0, no looping in hw");
816 				return -EOPNOTSUPP;
817 			}
818 			rid = efx_tc_get_recirc_id(efx, fa->chain_index,
819 						   net_dev);
820 			if (IS_ERR(rid)) {
821 				NL_SET_ERR_MSG_MOD(extack, "Failed to allocate a hardware recirculation ID for this chain_index");
822 				return PTR_ERR(rid);
823 			}
824 			act->rid = rid;
825 			if (fa->hw_stats) {
826 				struct efx_tc_counter_index *cnt;
827 
828 				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
829 					NL_SET_ERR_MSG_FMT_MOD(extack,
830 							       "hw_stats_type %u not supported (only 'delayed')",
831 							       fa->hw_stats);
832 					return -EOPNOTSUPP;
833 				}
834 				cnt = efx_tc_flower_get_counter_index(efx, tc->cookie,
835 								      EFX_TC_COUNTER_TYPE_OR);
836 				if (IS_ERR(cnt)) {
837 					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
838 					return PTR_ERR(cnt);
839 				}
840 				WARN_ON(act->count); /* can't happen */
841 				act->count = cnt;
842 			}
843 			pipe = false;
844 			break;
845 		case FLOW_ACTION_CT:
846 			if (act->zone) {
847 				NL_SET_ERR_MSG_MOD(extack, "Can't offload multiple ct actions");
848 				return -EOPNOTSUPP;
849 			}
850 			if (fa->ct.action & (TCA_CT_ACT_COMMIT |
851 					     TCA_CT_ACT_FORCE)) {
852 				NL_SET_ERR_MSG_MOD(extack, "Can't offload ct commit/force");
853 				return -EOPNOTSUPP;
854 			}
855 			if (fa->ct.action & TCA_CT_ACT_CLEAR) {
856 				NL_SET_ERR_MSG_MOD(extack, "Can't clear ct in LHS rule");
857 				return -EOPNOTSUPP;
858 			}
859 			if (fa->ct.action & (TCA_CT_ACT_NAT |
860 					     TCA_CT_ACT_NAT_SRC |
861 					     TCA_CT_ACT_NAT_DST)) {
862 				NL_SET_ERR_MSG_MOD(extack, "Can't perform NAT in LHS rule - packet isn't conntracked yet");
863 				return -EOPNOTSUPP;
864 			}
865 			if (fa->ct.action) {
866 				NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled ct.action %u for LHS rule\n",
867 						       fa->ct.action);
868 				return -EOPNOTSUPP;
869 			}
870 			ct_zone = efx_tc_ct_register_zone(efx, fa->ct.zone,
871 							  fa->ct.flow_table);
872 			if (IS_ERR(ct_zone)) {
873 				NL_SET_ERR_MSG_MOD(extack, "Failed to register for CT updates");
874 				return PTR_ERR(ct_zone);
875 			}
876 			act->zone = ct_zone;
877 			break;
878 		default:
879 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u for LHS rule\n",
880 					       fa->id);
881 			return -EOPNOTSUPP;
882 		}
883 	}
884 
885 	if (pipe) {
886 		NL_SET_ERR_MSG_MOD(extack, "Missing goto chain in LHS rule");
887 		return -EOPNOTSUPP;
888 	}
889 	return 0;
890 }
891 
892 static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx,
893 					      struct efx_tc_lhs_action *act)
894 {
895 	if (act->rid)
896 		efx_tc_put_recirc_id(efx, act->rid);
897 	if (act->zone)
898 		efx_tc_ct_unregister_zone(efx, act->zone);
899 	if (act->count)
900 		efx_tc_flower_put_counter_index(efx, act->count);
901 }
902 
903 static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
904 					 struct net_device *net_dev,
905 					 struct flow_cls_offload *tc)
906 {
907 	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
908 	struct netlink_ext_ack *extack = tc->common.extack;
909 	struct efx_tc_flow_rule *rule = NULL, *old = NULL;
910 	struct efx_tc_action_set *act = NULL;
911 	bool found = false, uplinked = false;
912 	const struct flow_action_entry *fa;
913 	struct efx_tc_match match;
914 	struct efx_rep *to_efv;
915 	s64 rc;
916 	int i;
917 
918 	/* Parse match */
919 	memset(&match, 0, sizeof(match));
920 	rc = efx_tc_flower_parse_match(efx, fr, &match, NULL);
921 	if (rc)
922 		return rc;
923 	/* The rule as given to us doesn't specify a source netdevice.
924 	 * But, determining whether packets from a VF should match it is
925 	 * complicated, so leave those to the software slowpath: qualify
926 	 * the filter with source m-port == wire.
927 	 */
928 	rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF);
929 	if (rc < 0) {
930 		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter");
931 		return rc;
932 	}
933 	match.value.ingress_port = rc;
934 	match.mask.ingress_port = ~0;
935 
936 	if (tc->common.chain_index) {
937 		struct efx_tc_recirc_id *rid;
938 
939 		rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, net_dev);
940 		if (IS_ERR(rid)) {
941 			NL_SET_ERR_MSG_FMT_MOD(extack,
942 					       "Failed to allocate a hardware recirculation ID for chain_index %u",
943 					       tc->common.chain_index);
944 			return PTR_ERR(rid);
945 		}
946 		match.rid = rid;
947 		match.value.recirc_id = rid->fw_id;
948 	}
949 	match.mask.recirc_id = 0xff;
950 
951 	/* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
952 	 * +trk+est, which is strictly implied by +est, so rewrite it to that.
953 	 */
954 	if (match.mask.ct_state_trk && match.value.ct_state_trk &&
955 	    match.mask.ct_state_est && match.value.ct_state_est)
956 		match.mask.ct_state_trk = 0;
957 	/* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
958 	 * match +trk-est (CT_HIT=0) despite being on an established connection.
959 	 * So make -est imply -tcp_syn_fin_rst match to ensure these packets
960 	 * still hit the software path.
961 	 */
962 	if (match.mask.ct_state_est && !match.value.ct_state_est) {
963 		if (match.value.tcp_syn_fin_rst) {
964 			/* Can't offload this combination */
965 			rc = -EOPNOTSUPP;
966 			goto release;
967 		}
968 		match.mask.tcp_syn_fin_rst = true;
969 	}
970 
971 	flow_action_for_each(i, fa, &fr->action) {
972 		switch (fa->id) {
973 		case FLOW_ACTION_REDIRECT:
974 		case FLOW_ACTION_MIRRED: /* mirred means mirror here */
975 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
976 			if (IS_ERR(to_efv))
977 				continue;
978 			found = true;
979 			break;
980 		default:
981 			break;
982 		}
983 	}
984 	if (!found) { /* We don't care. */
985 		netif_dbg(efx, drv, efx->net_dev,
986 			  "Ignoring foreign filter that doesn't egdev us\n");
987 		rc = -EOPNOTSUPP;
988 		goto release;
989 	}
990 
991 	rc = efx_mae_match_check_caps(efx, &match.mask, NULL);
992 	if (rc)
993 		goto release;
994 
995 	if (efx_tc_match_is_encap(&match.mask)) {
996 		enum efx_encap_type type;
997 
998 		type = efx_tc_indr_netdev_type(net_dev);
999 		if (type == EFX_ENCAP_TYPE_NONE) {
1000 			NL_SET_ERR_MSG_MOD(extack,
1001 					   "Egress encap match on unsupported tunnel device");
1002 			rc = -EOPNOTSUPP;
1003 			goto release;
1004 		}
1005 
1006 		rc = efx_mae_check_encap_type_supported(efx, type);
1007 		if (rc) {
1008 			NL_SET_ERR_MSG_FMT_MOD(extack,
1009 					       "Firmware reports no support for %s encap match",
1010 					       efx_tc_encap_type_name(type));
1011 			goto release;
1012 		}
1013 
1014 		rc = efx_tc_flower_record_encap_match(efx, &match, type,
1015 						      EFX_TC_EM_DIRECT, 0, 0,
1016 						      extack);
1017 		if (rc)
1018 			goto release;
1019 	} else {
1020 		/* This is not a tunnel decap rule, ignore it */
1021 		netif_dbg(efx, drv, efx->net_dev,
1022 			  "Ignoring foreign filter without encap match\n");
1023 		rc = -EOPNOTSUPP;
1024 		goto release;
1025 	}
1026 
1027 	rule = kzalloc(sizeof(*rule), GFP_USER);
1028 	if (!rule) {
1029 		rc = -ENOMEM;
1030 		goto release;
1031 	}
1032 	INIT_LIST_HEAD(&rule->acts.list);
1033 	rule->cookie = tc->cookie;
1034 	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1035 						&rule->linkage,
1036 						efx_tc_match_action_ht_params);
1037 	if (old) {
1038 		netif_dbg(efx, drv, efx->net_dev,
1039 			  "Ignoring already-offloaded rule (cookie %lx)\n",
1040 			  tc->cookie);
1041 		rc = -EEXIST;
1042 		goto release;
1043 	}
1044 
1045 	act = kzalloc(sizeof(*act), GFP_USER);
1046 	if (!act) {
1047 		rc = -ENOMEM;
1048 		goto release;
1049 	}
1050 
1051 	/* Parse actions.  For foreign rules we only support decap & redirect.
1052 	 * See corresponding code in efx_tc_flower_replace() for theory of
1053 	 * operation & how 'act' cursor is used.
1054 	 */
1055 	flow_action_for_each(i, fa, &fr->action) {
1056 		struct efx_tc_action_set save;
1057 
1058 		switch (fa->id) {
1059 		case FLOW_ACTION_REDIRECT:
1060 		case FLOW_ACTION_MIRRED:
1061 			/* See corresponding code in efx_tc_flower_replace() for
1062 			 * long explanations of what's going on here.
1063 			 */
1064 			save = *act;
1065 			if (fa->hw_stats) {
1066 				struct efx_tc_counter_index *ctr;
1067 
1068 				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1069 					NL_SET_ERR_MSG_FMT_MOD(extack,
1070 							       "hw_stats_type %u not supported (only 'delayed')",
1071 							       fa->hw_stats);
1072 					rc = -EOPNOTSUPP;
1073 					goto release;
1074 				}
1075 				if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1076 					rc = -EOPNOTSUPP;
1077 					goto release;
1078 				}
1079 
1080 				ctr = efx_tc_flower_get_counter_index(efx,
1081 								      tc->cookie,
1082 								      EFX_TC_COUNTER_TYPE_AR);
1083 				if (IS_ERR(ctr)) {
1084 					rc = PTR_ERR(ctr);
1085 					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1086 					goto release;
1087 				}
1088 				act->count = ctr;
1089 				INIT_LIST_HEAD(&act->count_user);
1090 			}
1091 
1092 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
1093 				/* can't happen */
1094 				rc = -EOPNOTSUPP;
1095 				NL_SET_ERR_MSG_MOD(extack,
1096 						   "Deliver action violates action order (can't happen)");
1097 				goto release;
1098 			}
1099 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1100 			/* PF implies egdev is us, in which case we really
1101 			 * want to deliver to the uplink (because this is an
1102 			 * ingress filter).  If we don't recognise the egdev
1103 			 * at all, then we'd better trap so SW can handle it.
1104 			 */
1105 			if (IS_ERR(to_efv))
1106 				to_efv = EFX_EFV_PF;
1107 			if (to_efv == EFX_EFV_PF) {
1108 				if (uplinked)
1109 					break;
1110 				uplinked = true;
1111 			}
1112 			rc = efx_tc_flower_internal_mport(efx, to_efv);
1113 			if (rc < 0) {
1114 				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
1115 				goto release;
1116 			}
1117 			act->dest_mport = rc;
1118 			act->deliver = 1;
1119 			rc = efx_mae_alloc_action_set(efx, act);
1120 			if (rc) {
1121 				NL_SET_ERR_MSG_MOD(extack,
1122 						   "Failed to write action set to hw (mirred)");
1123 				goto release;
1124 			}
1125 			list_add_tail(&act->list, &rule->acts.list);
1126 			act = NULL;
1127 			if (fa->id == FLOW_ACTION_REDIRECT)
1128 				break; /* end of the line */
1129 			/* Mirror, so continue on with saved act */
1130 			act = kzalloc(sizeof(*act), GFP_USER);
1131 			if (!act) {
1132 				rc = -ENOMEM;
1133 				goto release;
1134 			}
1135 			*act = save;
1136 			break;
1137 		case FLOW_ACTION_TUNNEL_DECAP:
1138 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) {
1139 				rc = -EINVAL;
1140 				NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order");
1141 				goto release;
1142 			}
1143 			act->decap = 1;
1144 			/* If we previously delivered/trapped to uplink, now
1145 			 * that we've decapped we'll want another copy if we
1146 			 * try to deliver/trap to uplink again.
1147 			 */
1148 			uplinked = false;
1149 			break;
1150 		default:
1151 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
1152 					       fa->id);
1153 			rc = -EOPNOTSUPP;
1154 			goto release;
1155 		}
1156 	}
1157 
1158 	if (act) {
1159 		if (!uplinked) {
1160 			/* Not shot/redirected, so deliver to default dest (which is
1161 			 * the uplink, as this is an ingress filter)
1162 			 */
1163 			efx_mae_mport_uplink(efx, &act->dest_mport);
1164 			act->deliver = 1;
1165 		}
1166 		rc = efx_mae_alloc_action_set(efx, act);
1167 		if (rc) {
1168 			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
1169 			goto release;
1170 		}
1171 		list_add_tail(&act->list, &rule->acts.list);
1172 		act = NULL; /* Prevent double-free in error path */
1173 	}
1174 
1175 	rule->match = match;
1176 
1177 	netif_dbg(efx, drv, efx->net_dev,
1178 		  "Successfully parsed foreign filter (cookie %lx)\n",
1179 		  tc->cookie);
1180 
1181 	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
1182 	if (rc) {
1183 		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
1184 		goto release;
1185 	}
1186 	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
1187 				 rule->acts.fw_id, &rule->fw_id);
1188 	if (rc) {
1189 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1190 		goto release_acts;
1191 	}
1192 	return 0;
1193 
1194 release_acts:
1195 	efx_mae_free_action_set_list(efx, &rule->acts);
1196 release:
1197 	/* We failed to insert the rule, so free up any entries we created in
1198 	 * subsidiary tables.
1199 	 */
1200 	if (match.rid)
1201 		efx_tc_put_recirc_id(efx, match.rid);
1202 	if (act)
1203 		efx_tc_free_action_set(efx, act, false);
1204 	if (rule) {
1205 		if (!old)
1206 			rhashtable_remove_fast(&efx->tc->match_action_ht,
1207 					       &rule->linkage,
1208 					       efx_tc_match_action_ht_params);
1209 		efx_tc_free_action_set_list(efx, &rule->acts, false);
1210 	}
1211 	kfree(rule);
1212 	if (match.encap)
1213 		efx_tc_flower_release_encap_match(efx, match.encap);
1214 	return rc;
1215 }
1216 
1217 static int efx_tc_flower_replace_lhs(struct efx_nic *efx,
1218 				     struct flow_cls_offload *tc,
1219 				     struct flow_rule *fr,
1220 				     struct efx_tc_match *match,
1221 				     struct efx_rep *efv,
1222 				     struct net_device *net_dev)
1223 {
1224 	struct netlink_ext_ack *extack = tc->common.extack;
1225 	struct efx_tc_lhs_rule *rule, *old;
1226 	int rc;
1227 
1228 	if (tc->common.chain_index) {
1229 		NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0");
1230 		return -EOPNOTSUPP;
1231 	}
1232 
1233 	if (match->mask.ct_state_trk && match->value.ct_state_trk) {
1234 		NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
1235 		return -EOPNOTSUPP;
1236 	}
1237 	/* LHS rules are always -trk, so we don't need to match on that */
1238 	match->mask.ct_state_trk = 0;
1239 	match->value.ct_state_trk = 0;
1240 
1241 	rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack);
1242 	if (rc)
1243 		return rc;
1244 
1245 	rule = kzalloc(sizeof(*rule), GFP_USER);
1246 	if (!rule)
1247 		return -ENOMEM;
1248 	rule->cookie = tc->cookie;
1249 	old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
1250 						&rule->linkage,
1251 						efx_tc_lhs_rule_ht_params);
1252 	if (old) {
1253 		netif_dbg(efx, drv, efx->net_dev,
1254 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
1255 		rc = -EEXIST;
1256 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1257 		goto release;
1258 	}
1259 
1260 	/* Parse actions */
1261 	/* See note in efx_tc_flower_replace() regarding passed net_dev
1262 	 * (used for efx_tc_get_recirc_id()).
1263 	 */
1264 	rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, efx->net_dev, rule);
1265 	if (rc)
1266 		goto release;
1267 
1268 	rule->match = *match;
1269 
1270 	rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
1271 	if (rc) {
1272 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1273 		goto release;
1274 	}
1275 	netif_dbg(efx, drv, efx->net_dev,
1276 		  "Successfully parsed lhs rule (cookie %lx)\n",
1277 		  tc->cookie);
1278 	return 0;
1279 
1280 release:
1281 	efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
1282 	if (!old)
1283 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
1284 				       efx_tc_lhs_rule_ht_params);
1285 	kfree(rule);
1286 	return rc;
1287 }
1288 
1289 static int efx_tc_flower_replace(struct efx_nic *efx,
1290 				 struct net_device *net_dev,
1291 				 struct flow_cls_offload *tc,
1292 				 struct efx_rep *efv)
1293 {
1294 	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1295 	struct netlink_ext_ack *extack = tc->common.extack;
1296 	const struct ip_tunnel_info *encap_info = NULL;
1297 	struct efx_tc_flow_rule *rule = NULL, *old;
1298 	struct efx_tc_action_set *act = NULL;
1299 	const struct flow_action_entry *fa;
1300 	struct efx_rep *from_efv, *to_efv;
1301 	struct efx_tc_match match;
1302 	u32 acts_id;
1303 	s64 rc;
1304 	int i;
1305 
1306 	if (!tc_can_offload_extack(efx->net_dev, extack))
1307 		return -EOPNOTSUPP;
1308 	if (WARN_ON(!efx->tc))
1309 		return -ENETDOWN;
1310 	if (WARN_ON(!efx->tc->up))
1311 		return -ENETDOWN;
1312 
1313 	from_efv = efx_tc_flower_lookup_efv(efx, net_dev);
1314 	if (IS_ERR(from_efv)) {
1315 		/* Not from our PF or representors, so probably a tunnel dev */
1316 		return efx_tc_flower_replace_foreign(efx, net_dev, tc);
1317 	}
1318 
1319 	if (efv != from_efv) {
1320 		/* can't happen */
1321 		NL_SET_ERR_MSG_FMT_MOD(extack, "for %s efv is %snull but from_efv is %snull (can't happen)",
1322 				       netdev_name(net_dev), efv ? "non-" : "",
1323 				       from_efv ? "non-" : "");
1324 		return -EINVAL;
1325 	}
1326 
1327 	/* Parse match */
1328 	memset(&match, 0, sizeof(match));
1329 	rc = efx_tc_flower_external_mport(efx, from_efv);
1330 	if (rc < 0) {
1331 		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port");
1332 		return rc;
1333 	}
1334 	match.value.ingress_port = rc;
1335 	match.mask.ingress_port = ~0;
1336 	rc = efx_tc_flower_parse_match(efx, fr, &match, extack);
1337 	if (rc)
1338 		return rc;
1339 	if (efx_tc_match_is_encap(&match.mask)) {
1340 		NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported");
1341 		return -EOPNOTSUPP;
1342 	}
1343 
1344 	if (efx_tc_rule_is_lhs_rule(fr, &match))
1345 		return efx_tc_flower_replace_lhs(efx, tc, fr, &match, efv,
1346 						 net_dev);
1347 
1348 	/* chain_index 0 is always recirc_id 0 (and does not appear in recirc_ht).
1349 	 * Conveniently, match.rid == NULL and match.value.recirc_id == 0 owing
1350 	 * to the initial memset(), so we don't need to do anything in that case.
1351 	 */
1352 	if (tc->common.chain_index) {
1353 		struct efx_tc_recirc_id *rid;
1354 
1355 		/* Note regarding passed net_dev:
1356 		 * VFreps and PF can share chain namespace, as they have
1357 		 * distinct ingress_mports.  So we don't need to burn an
1358 		 * extra recirc_id if both use the same chain_index.
1359 		 * (Strictly speaking, we could give each VFrep its own
1360 		 * recirc_id namespace that doesn't take IDs away from the
1361 		 * PF, but that would require a bunch of additional IDAs -
1362 		 * one for each representor - and that's not likely to be
1363 		 * the main cause of recirc_id exhaustion anyway.)
1364 		 */
1365 		rid = efx_tc_get_recirc_id(efx, tc->common.chain_index,
1366 					   efx->net_dev);
1367 		if (IS_ERR(rid)) {
1368 			NL_SET_ERR_MSG_FMT_MOD(extack,
1369 					       "Failed to allocate a hardware recirculation ID for chain_index %u",
1370 					       tc->common.chain_index);
1371 			return PTR_ERR(rid);
1372 		}
1373 		match.rid = rid;
1374 		match.value.recirc_id = rid->fw_id;
1375 	}
1376 	match.mask.recirc_id = 0xff;
1377 
1378 	/* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1379 	 * +trk+est, which is strictly implied by +est, so rewrite it to that.
1380 	 */
1381 	if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1382 	    match.mask.ct_state_est && match.value.ct_state_est)
1383 		match.mask.ct_state_trk = 0;
1384 	/* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1385 	 * match +trk-est (CT_HIT=0) despite being on an established connection.
1386 	 * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1387 	 * still hit the software path.
1388 	 */
1389 	if (match.mask.ct_state_est && !match.value.ct_state_est) {
1390 		if (match.value.tcp_syn_fin_rst) {
1391 			/* Can't offload this combination */
1392 			rc = -EOPNOTSUPP;
1393 			goto release;
1394 		}
1395 		match.mask.tcp_syn_fin_rst = true;
1396 	}
1397 
1398 	rc = efx_mae_match_check_caps(efx, &match.mask, extack);
1399 	if (rc)
1400 		goto release;
1401 
1402 	rule = kzalloc(sizeof(*rule), GFP_USER);
1403 	if (!rule) {
1404 		rc = -ENOMEM;
1405 		goto release;
1406 	}
1407 	INIT_LIST_HEAD(&rule->acts.list);
1408 	rule->cookie = tc->cookie;
1409 	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1410 						&rule->linkage,
1411 						efx_tc_match_action_ht_params);
1412 	if (old) {
1413 		netif_dbg(efx, drv, efx->net_dev,
1414 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
1415 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1416 		rc = -EEXIST;
1417 		goto release;
1418 	}
1419 
1420 	/* Parse actions */
1421 	act = kzalloc(sizeof(*act), GFP_USER);
1422 	if (!act) {
1423 		rc = -ENOMEM;
1424 		goto release;
1425 	}
1426 
1427 	/**
1428 	 * DOC: TC action translation
1429 	 *
1430 	 * Actions in TC are sequential and cumulative, with delivery actions
1431 	 * potentially anywhere in the order.  The EF100 MAE, however, takes
1432 	 * an 'action set list' consisting of 'action sets', each of which is
1433 	 * applied to the _original_ packet, and consists of a set of optional
1434 	 * actions in a fixed order with delivery at the end.
1435 	 * To translate between these two models, we maintain a 'cursor', @act,
1436 	 * which describes the cumulative effect of all the packet-mutating
1437 	 * actions encountered so far; on handling a delivery (mirred or drop)
1438 	 * action, once the action-set has been inserted into hardware, we
1439 	 * append @act to the action-set list (@rule->acts); if this is a pipe
1440 	 * action (mirred mirror) we then allocate a new @act with a copy of
1441 	 * the cursor state _before_ the delivery action, otherwise we set @act
1442 	 * to %NULL.
1443 	 * This ensures that every allocated action-set is either attached to
1444 	 * @rule->acts or pointed to by @act (and never both), and that only
1445 	 * those action-sets in @rule->acts exist in hardware.  Consequently,
1446 	 * in the failure path, @act only needs to be freed in memory, whereas
1447 	 * for @rule->acts we remove each action-set from hardware before
1448 	 * freeing it (efx_tc_free_action_set_list()), even if the action-set
1449 	 * list itself is not in hardware.
1450 	 */
1451 	flow_action_for_each(i, fa, &fr->action) {
1452 		struct efx_tc_action_set save;
1453 		u16 tci;
1454 
1455 		if (!act) {
1456 			/* more actions after a non-pipe action */
1457 			NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
1458 			rc = -EINVAL;
1459 			goto release;
1460 		}
1461 
1462 		if ((fa->id == FLOW_ACTION_REDIRECT ||
1463 		     fa->id == FLOW_ACTION_MIRRED ||
1464 		     fa->id == FLOW_ACTION_DROP) && fa->hw_stats) {
1465 			struct efx_tc_counter_index *ctr;
1466 
1467 			/* Currently the only actions that want stats are
1468 			 * mirred and gact (ok, shot, trap, goto-chain), which
1469 			 * means we want stats just before delivery.  Also,
1470 			 * note that tunnel_key set shouldn't change the length
1471 			 * — it's only the subsequent mirred that does that,
1472 			 * and the stats are taken _before_ the mirred action
1473 			 * happens.
1474 			 */
1475 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1476 				/* All supported actions that count either steal
1477 				 * (gact shot, mirred redirect) or clone act
1478 				 * (mirred mirror), so we should never get two
1479 				 * count actions on one action_set.
1480 				 */
1481 				NL_SET_ERR_MSG_MOD(extack, "Count-action conflict (can't happen)");
1482 				rc = -EOPNOTSUPP;
1483 				goto release;
1484 			}
1485 
1486 			if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1487 				NL_SET_ERR_MSG_FMT_MOD(extack, "hw_stats_type %u not supported (only 'delayed')",
1488 						       fa->hw_stats);
1489 				rc = -EOPNOTSUPP;
1490 				goto release;
1491 			}
1492 
1493 			ctr = efx_tc_flower_get_counter_index(efx, tc->cookie,
1494 							      EFX_TC_COUNTER_TYPE_AR);
1495 			if (IS_ERR(ctr)) {
1496 				rc = PTR_ERR(ctr);
1497 				NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1498 				goto release;
1499 			}
1500 			act->count = ctr;
1501 			INIT_LIST_HEAD(&act->count_user);
1502 		}
1503 
1504 		switch (fa->id) {
1505 		case FLOW_ACTION_DROP:
1506 			rc = efx_mae_alloc_action_set(efx, act);
1507 			if (rc) {
1508 				NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (drop)");
1509 				goto release;
1510 			}
1511 			list_add_tail(&act->list, &rule->acts.list);
1512 			act = NULL; /* end of the line */
1513 			break;
1514 		case FLOW_ACTION_REDIRECT:
1515 		case FLOW_ACTION_MIRRED:
1516 			save = *act;
1517 
1518 			if (encap_info) {
1519 				struct efx_tc_encap_action *encap;
1520 
1521 				if (!efx_tc_flower_action_order_ok(act,
1522 								   EFX_TC_AO_ENCAP)) {
1523 					rc = -EOPNOTSUPP;
1524 					NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order");
1525 					goto release;
1526 				}
1527 				encap = efx_tc_flower_create_encap_md(
1528 						efx, encap_info, fa->dev, extack);
1529 				if (IS_ERR_OR_NULL(encap)) {
1530 					rc = PTR_ERR(encap);
1531 					if (!rc)
1532 						rc = -EIO; /* arbitrary */
1533 					goto release;
1534 				}
1535 				act->encap_md = encap;
1536 				list_add_tail(&act->encap_user, &encap->users);
1537 				act->dest_mport = encap->dest_mport;
1538 				act->deliver = 1;
1539 				if (act->count && !WARN_ON(!act->count->cnt)) {
1540 					/* This counter is used by an encap
1541 					 * action, which needs a reference back
1542 					 * so it can prod neighbouring whenever
1543 					 * traffic is seen.
1544 					 */
1545 					spin_lock_bh(&act->count->cnt->lock);
1546 					list_add_tail(&act->count_user,
1547 						      &act->count->cnt->users);
1548 					spin_unlock_bh(&act->count->cnt->lock);
1549 				}
1550 				rc = efx_mae_alloc_action_set(efx, act);
1551 				if (rc) {
1552 					NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)");
1553 					goto release;
1554 				}
1555 				list_add_tail(&act->list, &rule->acts.list);
1556 				act->user = &rule->acts;
1557 				act = NULL;
1558 				if (fa->id == FLOW_ACTION_REDIRECT)
1559 					break; /* end of the line */
1560 				/* Mirror, so continue on with saved act */
1561 				save.count = NULL;
1562 				act = kzalloc(sizeof(*act), GFP_USER);
1563 				if (!act) {
1564 					rc = -ENOMEM;
1565 					goto release;
1566 				}
1567 				*act = save;
1568 				break;
1569 			}
1570 
1571 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
1572 				/* can't happen */
1573 				rc = -EOPNOTSUPP;
1574 				NL_SET_ERR_MSG_MOD(extack, "Deliver action violates action order (can't happen)");
1575 				goto release;
1576 			}
1577 
1578 			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1579 			if (IS_ERR(to_efv)) {
1580 				NL_SET_ERR_MSG_MOD(extack, "Mirred egress device not on switch");
1581 				rc = PTR_ERR(to_efv);
1582 				goto release;
1583 			}
1584 			rc = efx_tc_flower_external_mport(efx, to_efv);
1585 			if (rc < 0) {
1586 				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
1587 				goto release;
1588 			}
1589 			act->dest_mport = rc;
1590 			act->deliver = 1;
1591 			rc = efx_mae_alloc_action_set(efx, act);
1592 			if (rc) {
1593 				NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (mirred)");
1594 				goto release;
1595 			}
1596 			list_add_tail(&act->list, &rule->acts.list);
1597 			act = NULL;
1598 			if (fa->id == FLOW_ACTION_REDIRECT)
1599 				break; /* end of the line */
1600 			/* Mirror, so continue on with saved act */
1601 			save.count = NULL;
1602 			act = kzalloc(sizeof(*act), GFP_USER);
1603 			if (!act) {
1604 				rc = -ENOMEM;
1605 				goto release;
1606 			}
1607 			*act = save;
1608 			break;
1609 		case FLOW_ACTION_VLAN_POP:
1610 			if (act->vlan_push) {
1611 				act->vlan_push--;
1612 			} else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) {
1613 				act->vlan_pop++;
1614 			} else {
1615 				NL_SET_ERR_MSG_MOD(extack,
1616 						   "More than two VLAN pops, or action order violated");
1617 				rc = -EINVAL;
1618 				goto release;
1619 			}
1620 			break;
1621 		case FLOW_ACTION_VLAN_PUSH:
1622 			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) {
1623 				rc = -EINVAL;
1624 				NL_SET_ERR_MSG_MOD(extack,
1625 						   "More than two VLAN pushes, or action order violated");
1626 				goto release;
1627 			}
1628 			tci = fa->vlan.vid & VLAN_VID_MASK;
1629 			tci |= fa->vlan.prio << VLAN_PRIO_SHIFT;
1630 			act->vlan_tci[act->vlan_push] = cpu_to_be16(tci);
1631 			act->vlan_proto[act->vlan_push] = fa->vlan.proto;
1632 			act->vlan_push++;
1633 			break;
1634 		case FLOW_ACTION_TUNNEL_ENCAP:
1635 			if (encap_info) {
1636 				/* Can't specify encap multiple times.
1637 				 * If you want to overwrite an existing
1638 				 * encap_info, use an intervening
1639 				 * FLOW_ACTION_TUNNEL_DECAP to clear it.
1640 				 */
1641 				NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set");
1642 				rc = -EINVAL;
1643 				goto release;
1644 			}
1645 			if (!fa->tunnel) {
1646 				NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key");
1647 				rc = -EOPNOTSUPP;
1648 				goto release;
1649 			}
1650 			encap_info = fa->tunnel;
1651 			break;
1652 		case FLOW_ACTION_TUNNEL_DECAP:
1653 			if (encap_info) {
1654 				encap_info = NULL;
1655 				break;
1656 			}
1657 			/* Since we don't support enc_key matches on ingress
1658 			 * (and if we did there'd be no tunnel-device to give
1659 			 * us a type), we can't offload a decap that's not
1660 			 * just undoing a previous encap action.
1661 			 */
1662 			NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device");
1663 			rc = -EOPNOTSUPP;
1664 			goto release;
1665 		default:
1666 			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
1667 					       fa->id);
1668 			rc = -EOPNOTSUPP;
1669 			goto release;
1670 		}
1671 	}
1672 
1673 	if (act) {
1674 		/* Not shot/redirected, so deliver to default dest */
1675 		if (from_efv == EFX_EFV_PF)
1676 			/* Rule applies to traffic from the wire,
1677 			 * and default dest is thus the PF
1678 			 */
1679 			efx_mae_mport_uplink(efx, &act->dest_mport);
1680 		else
1681 			/* Representor, so rule applies to traffic from
1682 			 * representee, and default dest is thus the rep.
1683 			 * All reps use the same mport for delivery
1684 			 */
1685 			efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
1686 					    &act->dest_mport);
1687 		act->deliver = 1;
1688 		rc = efx_mae_alloc_action_set(efx, act);
1689 		if (rc) {
1690 			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
1691 			goto release;
1692 		}
1693 		list_add_tail(&act->list, &rule->acts.list);
1694 		act = NULL; /* Prevent double-free in error path */
1695 	}
1696 
1697 	netif_dbg(efx, drv, efx->net_dev,
1698 		  "Successfully parsed filter (cookie %lx)\n",
1699 		  tc->cookie);
1700 
1701 	rule->match = match;
1702 
1703 	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
1704 	if (rc) {
1705 		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
1706 		goto release;
1707 	}
1708 	if (from_efv == EFX_EFV_PF)
1709 		/* PF netdev, so rule applies to traffic from wire */
1710 		rule->fallback = &efx->tc->facts.pf;
1711 	else
1712 		/* repdev, so rule applies to traffic from representee */
1713 		rule->fallback = &efx->tc->facts.reps;
1714 	if (!efx_tc_check_ready(efx, rule)) {
1715 		netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n");
1716 		acts_id = rule->fallback->fw_id;
1717 	} else {
1718 		netif_dbg(efx, drv, efx->net_dev, "ready for hw\n");
1719 		acts_id = rule->acts.fw_id;
1720 	}
1721 	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
1722 				 acts_id, &rule->fw_id);
1723 	if (rc) {
1724 		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1725 		goto release_acts;
1726 	}
1727 	return 0;
1728 
1729 release_acts:
1730 	efx_mae_free_action_set_list(efx, &rule->acts);
1731 release:
1732 	/* We failed to insert the rule, so free up any entries we created in
1733 	 * subsidiary tables.
1734 	 */
1735 	if (match.rid)
1736 		efx_tc_put_recirc_id(efx, match.rid);
1737 	if (act)
1738 		efx_tc_free_action_set(efx, act, false);
1739 	if (rule) {
1740 		if (!old)
1741 			rhashtable_remove_fast(&efx->tc->match_action_ht,
1742 					       &rule->linkage,
1743 					       efx_tc_match_action_ht_params);
1744 		efx_tc_free_action_set_list(efx, &rule->acts, false);
1745 	}
1746 	kfree(rule);
1747 	return rc;
1748 }
1749 
1750 static int efx_tc_flower_destroy(struct efx_nic *efx,
1751 				 struct net_device *net_dev,
1752 				 struct flow_cls_offload *tc)
1753 {
1754 	struct netlink_ext_ack *extack = tc->common.extack;
1755 	struct efx_tc_lhs_rule *lhs_rule;
1756 	struct efx_tc_flow_rule *rule;
1757 
1758 	lhs_rule = rhashtable_lookup_fast(&efx->tc->lhs_rule_ht, &tc->cookie,
1759 					  efx_tc_lhs_rule_ht_params);
1760 	if (lhs_rule) {
1761 		/* Remove it from HW */
1762 		efx_mae_remove_lhs_rule(efx, lhs_rule);
1763 		/* Delete it from SW */
1764 		efx_tc_flower_release_lhs_actions(efx, &lhs_rule->lhs_act);
1765 		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &lhs_rule->linkage,
1766 				       efx_tc_lhs_rule_ht_params);
1767 		if (lhs_rule->match.encap)
1768 			efx_tc_flower_release_encap_match(efx, lhs_rule->match.encap);
1769 		netif_dbg(efx, drv, efx->net_dev, "Removed (lhs) filter %lx\n",
1770 			  lhs_rule->cookie);
1771 		kfree(lhs_rule);
1772 		return 0;
1773 	}
1774 
1775 	rule = rhashtable_lookup_fast(&efx->tc->match_action_ht, &tc->cookie,
1776 				      efx_tc_match_action_ht_params);
1777 	if (!rule) {
1778 		/* Only log a message if we're the ingress device.  Otherwise
1779 		 * it's a foreign filter and we might just not have been
1780 		 * interested (e.g. we might not have been the egress device
1781 		 * either).
1782 		 */
1783 		if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
1784 			netif_warn(efx, drv, efx->net_dev,
1785 				   "Filter %lx not found to remove\n", tc->cookie);
1786 		NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
1787 		return -ENOENT;
1788 	}
1789 
1790 	/* Remove it from HW */
1791 	efx_tc_delete_rule(efx, rule);
1792 	/* Delete it from SW */
1793 	rhashtable_remove_fast(&efx->tc->match_action_ht, &rule->linkage,
1794 			       efx_tc_match_action_ht_params);
1795 	netif_dbg(efx, drv, efx->net_dev, "Removed filter %lx\n", rule->cookie);
1796 	kfree(rule);
1797 	return 0;
1798 }
1799 
1800 static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev,
1801 			       struct flow_cls_offload *tc)
1802 {
1803 	struct netlink_ext_ack *extack = tc->common.extack;
1804 	struct efx_tc_counter_index *ctr;
1805 	struct efx_tc_counter *cnt;
1806 	u64 packets, bytes;
1807 
1808 	ctr = efx_tc_flower_find_counter_index(efx, tc->cookie);
1809 	if (!ctr) {
1810 		/* See comment in efx_tc_flower_destroy() */
1811 		if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
1812 			if (net_ratelimit())
1813 				netif_warn(efx, drv, efx->net_dev,
1814 					   "Filter %lx not found for stats\n",
1815 					   tc->cookie);
1816 		NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
1817 		return -ENOENT;
1818 	}
1819 	if (WARN_ON(!ctr->cnt)) /* can't happen */
1820 		return -EIO;
1821 	cnt = ctr->cnt;
1822 
1823 	spin_lock_bh(&cnt->lock);
1824 	/* Report only new pkts/bytes since last time TC asked */
1825 	packets = cnt->packets;
1826 	bytes = cnt->bytes;
1827 	flow_stats_update(&tc->stats, bytes - cnt->old_bytes,
1828 			  packets - cnt->old_packets, 0, cnt->touched,
1829 			  FLOW_ACTION_HW_STATS_DELAYED);
1830 	cnt->old_packets = packets;
1831 	cnt->old_bytes = bytes;
1832 	spin_unlock_bh(&cnt->lock);
1833 	return 0;
1834 }
1835 
1836 int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev,
1837 		  struct flow_cls_offload *tc, struct efx_rep *efv)
1838 {
1839 	int rc;
1840 
1841 	if (!efx->tc)
1842 		return -EOPNOTSUPP;
1843 
1844 	mutex_lock(&efx->tc->mutex);
1845 	switch (tc->command) {
1846 	case FLOW_CLS_REPLACE:
1847 		rc = efx_tc_flower_replace(efx, net_dev, tc, efv);
1848 		break;
1849 	case FLOW_CLS_DESTROY:
1850 		rc = efx_tc_flower_destroy(efx, net_dev, tc);
1851 		break;
1852 	case FLOW_CLS_STATS:
1853 		rc = efx_tc_flower_stats(efx, net_dev, tc);
1854 		break;
1855 	default:
1856 		rc = -EOPNOTSUPP;
1857 		break;
1858 	}
1859 	mutex_unlock(&efx->tc->mutex);
1860 	return rc;
1861 }
1862 
1863 static int efx_tc_configure_default_rule(struct efx_nic *efx, u32 ing_port,
1864 					 u32 eg_port, struct efx_tc_flow_rule *rule)
1865 {
1866 	struct efx_tc_action_set_list *acts = &rule->acts;
1867 	struct efx_tc_match *match = &rule->match;
1868 	struct efx_tc_action_set *act;
1869 	int rc;
1870 
1871 	match->value.ingress_port = ing_port;
1872 	match->mask.ingress_port = ~0;
1873 	act = kzalloc(sizeof(*act), GFP_KERNEL);
1874 	if (!act)
1875 		return -ENOMEM;
1876 	act->deliver = 1;
1877 	act->dest_mport = eg_port;
1878 	rc = efx_mae_alloc_action_set(efx, act);
1879 	if (rc)
1880 		goto fail1;
1881 	EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
1882 	list_add_tail(&act->list, &acts->list);
1883 	rc = efx_mae_alloc_action_set_list(efx, acts);
1884 	if (rc)
1885 		goto fail2;
1886 	rc = efx_mae_insert_rule(efx, match, EFX_TC_PRIO_DFLT,
1887 				 acts->fw_id, &rule->fw_id);
1888 	if (rc)
1889 		goto fail3;
1890 	return 0;
1891 fail3:
1892 	efx_mae_free_action_set_list(efx, acts);
1893 fail2:
1894 	list_del(&act->list);
1895 	efx_mae_free_action_set(efx, act->fw_id);
1896 fail1:
1897 	kfree(act);
1898 	return rc;
1899 }
1900 
1901 static int efx_tc_configure_default_rule_pf(struct efx_nic *efx)
1902 {
1903 	struct efx_tc_flow_rule *rule = &efx->tc->dflt.pf;
1904 	u32 ing_port, eg_port;
1905 
1906 	efx_mae_mport_uplink(efx, &ing_port);
1907 	efx_mae_mport_wire(efx, &eg_port);
1908 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
1909 }
1910 
1911 static int efx_tc_configure_default_rule_wire(struct efx_nic *efx)
1912 {
1913 	struct efx_tc_flow_rule *rule = &efx->tc->dflt.wire;
1914 	u32 ing_port, eg_port;
1915 
1916 	efx_mae_mport_wire(efx, &ing_port);
1917 	efx_mae_mport_uplink(efx, &eg_port);
1918 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
1919 }
1920 
1921 int efx_tc_configure_default_rule_rep(struct efx_rep *efv)
1922 {
1923 	struct efx_tc_flow_rule *rule = &efv->dflt;
1924 	struct efx_nic *efx = efv->parent;
1925 	u32 ing_port, eg_port;
1926 
1927 	efx_mae_mport_mport(efx, efv->mport, &ing_port);
1928 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
1929 	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
1930 }
1931 
1932 void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
1933 				     struct efx_tc_flow_rule *rule)
1934 {
1935 	if (rule->fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL)
1936 		efx_tc_delete_rule(efx, rule);
1937 	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
1938 }
1939 
1940 static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port,
1941 					  struct efx_tc_action_set_list *acts)
1942 {
1943 	struct efx_tc_action_set *act;
1944 	int rc;
1945 
1946 	act = kzalloc(sizeof(*act), GFP_KERNEL);
1947 	if (!act)
1948 		return -ENOMEM;
1949 	act->deliver = 1;
1950 	act->dest_mport = eg_port;
1951 	rc = efx_mae_alloc_action_set(efx, act);
1952 	if (rc)
1953 		goto fail1;
1954 	EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
1955 	list_add_tail(&act->list, &acts->list);
1956 	rc = efx_mae_alloc_action_set_list(efx, acts);
1957 	if (rc)
1958 		goto fail2;
1959 	return 0;
1960 fail2:
1961 	list_del(&act->list);
1962 	efx_mae_free_action_set(efx, act->fw_id);
1963 fail1:
1964 	kfree(act);
1965 	return rc;
1966 }
1967 
1968 static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx)
1969 {
1970 	struct efx_tc_action_set_list *acts = &efx->tc->facts.pf;
1971 	u32 eg_port;
1972 
1973 	efx_mae_mport_uplink(efx, &eg_port);
1974 	return efx_tc_configure_fallback_acts(efx, eg_port, acts);
1975 }
1976 
1977 static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx)
1978 {
1979 	struct efx_tc_action_set_list *acts = &efx->tc->facts.reps;
1980 	u32 eg_port;
1981 
1982 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
1983 	return efx_tc_configure_fallback_acts(efx, eg_port, acts);
1984 }
1985 
1986 static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx,
1987 					     struct efx_tc_action_set_list *acts)
1988 {
1989 	efx_tc_free_action_set_list(efx, acts, true);
1990 }
1991 
1992 static int efx_tc_configure_rep_mport(struct efx_nic *efx)
1993 {
1994 	u32 rep_mport_label;
1995 	int rc;
1996 
1997 	rc = efx_mae_allocate_mport(efx, &efx->tc->reps_mport_id, &rep_mport_label);
1998 	if (rc)
1999 		return rc;
2000 	pci_dbg(efx->pci_dev, "created rep mport 0x%08x (0x%04x)\n",
2001 		efx->tc->reps_mport_id, rep_mport_label);
2002 	/* Use mport *selector* as vport ID */
2003 	efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2004 			    &efx->tc->reps_mport_vport_id);
2005 	return 0;
2006 }
2007 
2008 static void efx_tc_deconfigure_rep_mport(struct efx_nic *efx)
2009 {
2010 	efx_mae_free_mport(efx, efx->tc->reps_mport_id);
2011 	efx->tc->reps_mport_id = MAE_MPORT_SELECTOR_NULL;
2012 }
2013 
2014 int efx_tc_insert_rep_filters(struct efx_nic *efx)
2015 {
2016 	struct efx_filter_spec promisc, allmulti;
2017 	int rc;
2018 
2019 	if (efx->type->is_vf)
2020 		return 0;
2021 	if (!efx->tc)
2022 		return 0;
2023 	efx_filter_init_rx(&promisc, EFX_FILTER_PRI_REQUIRED, 0, 0);
2024 	efx_filter_set_uc_def(&promisc);
2025 	efx_filter_set_vport_id(&promisc, efx->tc->reps_mport_vport_id);
2026 	rc = efx_filter_insert_filter(efx, &promisc, false);
2027 	if (rc < 0)
2028 		return rc;
2029 	efx->tc->reps_filter_uc = rc;
2030 	efx_filter_init_rx(&allmulti, EFX_FILTER_PRI_REQUIRED, 0, 0);
2031 	efx_filter_set_mc_def(&allmulti);
2032 	efx_filter_set_vport_id(&allmulti, efx->tc->reps_mport_vport_id);
2033 	rc = efx_filter_insert_filter(efx, &allmulti, false);
2034 	if (rc < 0)
2035 		return rc;
2036 	efx->tc->reps_filter_mc = rc;
2037 	return 0;
2038 }
2039 
2040 void efx_tc_remove_rep_filters(struct efx_nic *efx)
2041 {
2042 	if (efx->type->is_vf)
2043 		return;
2044 	if (!efx->tc)
2045 		return;
2046 	if (efx->tc->reps_filter_mc >= 0)
2047 		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_mc);
2048 	efx->tc->reps_filter_mc = -1;
2049 	if (efx->tc->reps_filter_uc >= 0)
2050 		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_uc);
2051 	efx->tc->reps_filter_uc = -1;
2052 }
2053 
2054 int efx_init_tc(struct efx_nic *efx)
2055 {
2056 	int rc;
2057 
2058 	rc = efx_mae_get_caps(efx, efx->tc->caps);
2059 	if (rc)
2060 		return rc;
2061 	if (efx->tc->caps->match_field_count > MAE_NUM_FIELDS)
2062 		/* Firmware supports some match fields the driver doesn't know
2063 		 * about.  Not fatal, unless any of those fields are required
2064 		 * (MAE_FIELD_SUPPORTED_MATCH_ALWAYS) but if so we don't know.
2065 		 */
2066 		netif_warn(efx, probe, efx->net_dev,
2067 			   "FW reports additional match fields %u\n",
2068 			   efx->tc->caps->match_field_count);
2069 	if (efx->tc->caps->action_prios < EFX_TC_PRIO__NUM) {
2070 		netif_err(efx, probe, efx->net_dev,
2071 			  "Too few action prios supported (have %u, need %u)\n",
2072 			  efx->tc->caps->action_prios, EFX_TC_PRIO__NUM);
2073 		return -EIO;
2074 	}
2075 	rc = efx_tc_configure_default_rule_pf(efx);
2076 	if (rc)
2077 		return rc;
2078 	rc = efx_tc_configure_default_rule_wire(efx);
2079 	if (rc)
2080 		return rc;
2081 	rc = efx_tc_configure_rep_mport(efx);
2082 	if (rc)
2083 		return rc;
2084 	rc = efx_tc_configure_fallback_acts_pf(efx);
2085 	if (rc)
2086 		return rc;
2087 	rc = efx_tc_configure_fallback_acts_reps(efx);
2088 	if (rc)
2089 		return rc;
2090 	rc = efx_mae_get_tables(efx);
2091 	if (rc)
2092 		return rc;
2093 	efx->tc->up = true;
2094 	rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
2095 	if (rc)
2096 		goto out_free;
2097 	return 0;
2098 out_free:
2099 	efx_mae_free_tables(efx);
2100 	return rc;
2101 }
2102 
2103 void efx_fini_tc(struct efx_nic *efx)
2104 {
2105 	/* We can get called even if efx_init_struct_tc() failed */
2106 	if (!efx->tc)
2107 		return;
2108 	if (efx->tc->up)
2109 		flow_indr_dev_unregister(efx_tc_indr_setup_cb, efx, efx_tc_block_unbind);
2110 	efx_tc_deconfigure_rep_mport(efx);
2111 	efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf);
2112 	efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire);
2113 	efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf);
2114 	efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps);
2115 	efx->tc->up = false;
2116 	efx_mae_free_tables(efx);
2117 }
2118 
2119 /* At teardown time, all TC filter rules (and thus all resources they created)
2120  * should already have been removed.  If we find any in our hashtables, make a
2121  * cursory attempt to clean up the software side.
2122  */
2123 static void efx_tc_encap_match_free(void *ptr, void *__unused)
2124 {
2125 	struct efx_tc_encap_match *encap = ptr;
2126 
2127 	WARN_ON(refcount_read(&encap->ref));
2128 	kfree(encap);
2129 }
2130 
2131 static void efx_tc_recirc_free(void *ptr, void *arg)
2132 {
2133 	struct efx_tc_recirc_id *rid = ptr;
2134 	struct efx_nic *efx = arg;
2135 
2136 	WARN_ON(refcount_read(&rid->ref));
2137 	ida_free(&efx->tc->recirc_ida, rid->fw_id);
2138 	kfree(rid);
2139 }
2140 
2141 static void efx_tc_lhs_free(void *ptr, void *arg)
2142 {
2143 	struct efx_tc_lhs_rule *rule = ptr;
2144 	struct efx_nic *efx = arg;
2145 
2146 	netif_err(efx, drv, efx->net_dev,
2147 		  "tc lhs_rule %lx still present at teardown, removing\n",
2148 		  rule->cookie);
2149 
2150 	if (rule->lhs_act.zone)
2151 		efx_tc_ct_unregister_zone(efx, rule->lhs_act.zone);
2152 	if (rule->lhs_act.count)
2153 		efx_tc_flower_put_counter_index(efx, rule->lhs_act.count);
2154 	efx_mae_remove_lhs_rule(efx, rule);
2155 
2156 	kfree(rule);
2157 }
2158 
2159 static void efx_tc_flow_free(void *ptr, void *arg)
2160 {
2161 	struct efx_tc_flow_rule *rule = ptr;
2162 	struct efx_nic *efx = arg;
2163 
2164 	netif_err(efx, drv, efx->net_dev,
2165 		  "tc rule %lx still present at teardown, removing\n",
2166 		  rule->cookie);
2167 
2168 	/* Also releases entries in subsidiary tables */
2169 	efx_tc_delete_rule(efx, rule);
2170 
2171 	kfree(rule);
2172 }
2173 
2174 int efx_init_struct_tc(struct efx_nic *efx)
2175 {
2176 	int rc;
2177 
2178 	if (efx->type->is_vf)
2179 		return 0;
2180 
2181 	efx->tc = kzalloc(sizeof(*efx->tc), GFP_KERNEL);
2182 	if (!efx->tc)
2183 		return -ENOMEM;
2184 	efx->tc->caps = kzalloc(sizeof(struct mae_caps), GFP_KERNEL);
2185 	if (!efx->tc->caps) {
2186 		rc = -ENOMEM;
2187 		goto fail_alloc_caps;
2188 	}
2189 	INIT_LIST_HEAD(&efx->tc->block_list);
2190 
2191 	mutex_init(&efx->tc->mutex);
2192 	init_waitqueue_head(&efx->tc->flush_wq);
2193 	rc = efx_tc_init_encap_actions(efx);
2194 	if (rc < 0)
2195 		goto fail_encap_actions;
2196 	rc = efx_tc_init_counters(efx);
2197 	if (rc < 0)
2198 		goto fail_counters;
2199 	rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params);
2200 	if (rc < 0)
2201 		goto fail_encap_match_ht;
2202 	rc = rhashtable_init(&efx->tc->match_action_ht, &efx_tc_match_action_ht_params);
2203 	if (rc < 0)
2204 		goto fail_match_action_ht;
2205 	rc = rhashtable_init(&efx->tc->lhs_rule_ht, &efx_tc_lhs_rule_ht_params);
2206 	if (rc < 0)
2207 		goto fail_lhs_rule_ht;
2208 	rc = efx_tc_init_conntrack(efx);
2209 	if (rc < 0)
2210 		goto fail_conntrack;
2211 	rc = rhashtable_init(&efx->tc->recirc_ht, &efx_tc_recirc_ht_params);
2212 	if (rc < 0)
2213 		goto fail_recirc_ht;
2214 	ida_init(&efx->tc->recirc_ida);
2215 	efx->tc->reps_filter_uc = -1;
2216 	efx->tc->reps_filter_mc = -1;
2217 	INIT_LIST_HEAD(&efx->tc->dflt.pf.acts.list);
2218 	efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2219 	INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list);
2220 	efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2221 	INIT_LIST_HEAD(&efx->tc->facts.pf.list);
2222 	efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
2223 	INIT_LIST_HEAD(&efx->tc->facts.reps.list);
2224 	efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
2225 	efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type;
2226 	return 0;
2227 fail_recirc_ht:
2228 	efx_tc_destroy_conntrack(efx);
2229 fail_conntrack:
2230 	rhashtable_destroy(&efx->tc->lhs_rule_ht);
2231 fail_lhs_rule_ht:
2232 	rhashtable_destroy(&efx->tc->match_action_ht);
2233 fail_match_action_ht:
2234 	rhashtable_destroy(&efx->tc->encap_match_ht);
2235 fail_encap_match_ht:
2236 	efx_tc_destroy_counters(efx);
2237 fail_counters:
2238 	efx_tc_destroy_encap_actions(efx);
2239 fail_encap_actions:
2240 	mutex_destroy(&efx->tc->mutex);
2241 	kfree(efx->tc->caps);
2242 fail_alloc_caps:
2243 	kfree(efx->tc);
2244 	efx->tc = NULL;
2245 	return rc;
2246 }
2247 
2248 void efx_fini_struct_tc(struct efx_nic *efx)
2249 {
2250 	if (!efx->tc)
2251 		return;
2252 
2253 	mutex_lock(&efx->tc->mutex);
2254 	EFX_WARN_ON_PARANOID(efx->tc->dflt.pf.fw_id !=
2255 			     MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
2256 	EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id !=
2257 			     MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
2258 	EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id !=
2259 			     MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
2260 	EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id !=
2261 			     MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
2262 	rhashtable_free_and_destroy(&efx->tc->lhs_rule_ht, efx_tc_lhs_free, efx);
2263 	rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free,
2264 				    efx);
2265 	rhashtable_free_and_destroy(&efx->tc->encap_match_ht,
2266 				    efx_tc_encap_match_free, NULL);
2267 	efx_tc_fini_conntrack(efx);
2268 	rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx);
2269 	WARN_ON(!ida_is_empty(&efx->tc->recirc_ida));
2270 	ida_destroy(&efx->tc->recirc_ida);
2271 	efx_tc_fini_counters(efx);
2272 	efx_tc_fini_encap_actions(efx);
2273 	mutex_unlock(&efx->tc->mutex);
2274 	mutex_destroy(&efx->tc->mutex);
2275 	kfree(efx->tc->caps);
2276 	kfree(efx->tc);
2277 	efx->tc = NULL;
2278 }
2279