1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (C) 2021 Corigine, Inc. */
3 
4 #include <net/tc_act/tc_csum.h>
5 #include <net/tc_act/tc_ct.h>
6 
7 #include "conntrack.h"
8 #include "../nfp_port.h"
9 
10 const struct rhashtable_params nfp_tc_ct_merge_params = {
11 	.head_offset		= offsetof(struct nfp_fl_ct_tc_merge,
12 					   hash_node),
13 	.key_len		= sizeof(unsigned long) * 2,
14 	.key_offset		= offsetof(struct nfp_fl_ct_tc_merge, cookie),
15 	.automatic_shrinking	= true,
16 };
17 
18 const struct rhashtable_params nfp_nft_ct_merge_params = {
19 	.head_offset		= offsetof(struct nfp_fl_nft_tc_merge,
20 					   hash_node),
21 	.key_len		= sizeof(unsigned long) * 3,
22 	.key_offset		= offsetof(struct nfp_fl_nft_tc_merge, cookie),
23 	.automatic_shrinking	= true,
24 };
25 
26 static struct flow_action_entry *get_flow_act(struct flow_rule *rule,
27 					      enum flow_action_id act_id);
28 
29 /**
30  * get_hashentry() - Wrapper around hashtable lookup.
31  * @ht:		hashtable where entry could be found
32  * @key:	key to lookup
33  * @params:	hashtable params
34  * @size:	size of entry to allocate if not in table
35  *
36  * Returns an entry from a hashtable. If entry does not exist
37  * yet allocate the memory for it and return the new entry.
38  */
39 static void *get_hashentry(struct rhashtable *ht, void *key,
40 			   const struct rhashtable_params params, size_t size)
41 {
42 	void *result;
43 
44 	result = rhashtable_lookup_fast(ht, key, params);
45 
46 	if (result)
47 		return result;
48 
49 	result = kzalloc(size, GFP_KERNEL);
50 	if (!result)
51 		return ERR_PTR(-ENOMEM);
52 
53 	return result;
54 }
55 
56 bool is_pre_ct_flow(struct flow_cls_offload *flow)
57 {
58 	struct flow_action_entry *act;
59 	int i;
60 
61 	flow_action_for_each(i, act, &flow->rule->action) {
62 		if (act->id == FLOW_ACTION_CT) {
63 			/* The pre_ct rule only have the ct or ct nat action, cannot
64 			 * contains other ct action e.g ct commit and so on.
65 			 */
66 			if ((!act->ct.action || act->ct.action == TCA_CT_ACT_NAT))
67 				return true;
68 			else
69 				return false;
70 		}
71 	}
72 
73 	return false;
74 }
75 
76 bool is_post_ct_flow(struct flow_cls_offload *flow)
77 {
78 	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
79 	struct flow_dissector *dissector = rule->match.dissector;
80 	struct flow_action_entry *act;
81 	bool exist_ct_clear = false;
82 	struct flow_match_ct ct;
83 	int i;
84 
85 	/* post ct entry cannot contains any ct action except ct_clear. */
86 	flow_action_for_each(i, act, &flow->rule->action) {
87 		if (act->id == FLOW_ACTION_CT) {
88 			/* ignore ct clear action. */
89 			if (act->ct.action == TCA_CT_ACT_CLEAR) {
90 				exist_ct_clear = true;
91 				continue;
92 			}
93 
94 			return false;
95 		}
96 	}
97 
98 	if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) {
99 		flow_rule_match_ct(rule, &ct);
100 		if (ct.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)
101 			return true;
102 	} else {
103 		/* when do nat with ct, the post ct entry ignore the ct status,
104 		 * will match the nat field(sip/dip) instead. In this situation,
105 		 * the flow chain index is not zero and contains ct clear action.
106 		 */
107 		if (flow->common.chain_index && exist_ct_clear)
108 			return true;
109 	}
110 
111 	return false;
112 }
113 
114 /**
115  * get_mangled_key() - Mangle the key if mangle act exists
116  * @rule:	rule that carries the actions
117  * @buf:	pointer to key to be mangled
118  * @offset:	used to adjust mangled offset in L2/L3/L4 header
119  * @key_sz:	key size
120  * @htype:	mangling type
121  *
122  * Returns buf where the mangled key stores.
123  */
124 static void *get_mangled_key(struct flow_rule *rule, void *buf,
125 			     u32 offset, size_t key_sz,
126 			     enum flow_action_mangle_base htype)
127 {
128 	struct flow_action_entry *act;
129 	u32 *val = (u32 *)buf;
130 	u32 off, msk, key;
131 	int i;
132 
133 	flow_action_for_each(i, act, &rule->action) {
134 		if (act->id == FLOW_ACTION_MANGLE &&
135 		    act->mangle.htype == htype) {
136 			off = act->mangle.offset - offset;
137 			msk = act->mangle.mask;
138 			key = act->mangle.val;
139 
140 			/* Mangling is supposed to be u32 aligned */
141 			if (off % 4 || off >= key_sz)
142 				continue;
143 
144 			val[off >> 2] &= msk;
145 			val[off >> 2] |= key;
146 		}
147 	}
148 
149 	return buf;
150 }
151 
152 /* Only tos and ttl are involved in flow_match_ip structure, which
153  * doesn't conform to the layout of ip/ipv6 header definition. So
154  * they need particular process here: fill them into the ip/ipv6
155  * header, so that mangling actions can work directly.
156  */
157 #define NFP_IPV4_TOS_MASK	GENMASK(23, 16)
158 #define NFP_IPV4_TTL_MASK	GENMASK(31, 24)
159 #define NFP_IPV6_TCLASS_MASK	GENMASK(27, 20)
160 #define NFP_IPV6_HLIMIT_MASK	GENMASK(7, 0)
161 static void *get_mangled_tos_ttl(struct flow_rule *rule, void *buf,
162 				 bool is_v6)
163 {
164 	struct flow_match_ip match;
165 	/* IPv4's ttl field is in third dword. */
166 	__be32 ip_hdr[3];
167 	u32 tmp, hdr_len;
168 
169 	flow_rule_match_ip(rule, &match);
170 
171 	if (is_v6) {
172 		tmp = FIELD_PREP(NFP_IPV6_TCLASS_MASK, match.key->tos);
173 		ip_hdr[0] = cpu_to_be32(tmp);
174 		tmp = FIELD_PREP(NFP_IPV6_HLIMIT_MASK, match.key->ttl);
175 		ip_hdr[1] = cpu_to_be32(tmp);
176 		hdr_len = 2 * sizeof(__be32);
177 	} else {
178 		tmp = FIELD_PREP(NFP_IPV4_TOS_MASK, match.key->tos);
179 		ip_hdr[0] = cpu_to_be32(tmp);
180 		tmp = FIELD_PREP(NFP_IPV4_TTL_MASK, match.key->ttl);
181 		ip_hdr[2] = cpu_to_be32(tmp);
182 		hdr_len = 3 * sizeof(__be32);
183 	}
184 
185 	get_mangled_key(rule, ip_hdr, 0, hdr_len,
186 			is_v6 ? FLOW_ACT_MANGLE_HDR_TYPE_IP6 :
187 				FLOW_ACT_MANGLE_HDR_TYPE_IP4);
188 
189 	match.key = buf;
190 
191 	if (is_v6) {
192 		tmp = be32_to_cpu(ip_hdr[0]);
193 		match.key->tos = FIELD_GET(NFP_IPV6_TCLASS_MASK, tmp);
194 		tmp = be32_to_cpu(ip_hdr[1]);
195 		match.key->ttl = FIELD_GET(NFP_IPV6_HLIMIT_MASK, tmp);
196 	} else {
197 		tmp = be32_to_cpu(ip_hdr[0]);
198 		match.key->tos = FIELD_GET(NFP_IPV4_TOS_MASK, tmp);
199 		tmp = be32_to_cpu(ip_hdr[2]);
200 		match.key->ttl = FIELD_GET(NFP_IPV4_TTL_MASK, tmp);
201 	}
202 
203 	return buf;
204 }
205 
206 /* Note entry1 and entry2 are not swappable. only skip ip and
207  * tport merge check for pre_ct and post_ct when pre_ct do nat.
208  */
209 static bool nfp_ct_merge_check_cannot_skip(struct nfp_fl_ct_flow_entry *entry1,
210 					   struct nfp_fl_ct_flow_entry *entry2)
211 {
212 	/* only pre_ct have NFP_FL_ACTION_DO_NAT flag. */
213 	if ((entry1->flags & NFP_FL_ACTION_DO_NAT) &&
214 	    entry2->type == CT_TYPE_POST_CT)
215 		return false;
216 
217 	return true;
218 }
219 
220 /* Note entry1 and entry2 are not swappable, entry1 should be
221  * the former flow whose mangle action need be taken into account
222  * if existed, and entry2 should be the latter flow whose action
223  * we don't care.
224  */
225 static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1,
226 			      struct nfp_fl_ct_flow_entry *entry2)
227 {
228 	unsigned int ovlp_keys = entry1->rule->match.dissector->used_keys &
229 				 entry2->rule->match.dissector->used_keys;
230 	bool out, is_v6 = false;
231 	u8 ip_proto = 0;
232 	/* Temporary buffer for mangling keys, 64 is enough to cover max
233 	 * struct size of key in various fields that may be mangled.
234 	 * Supported fields to mangle:
235 	 * mac_src/mac_dst(struct flow_match_eth_addrs, 12B)
236 	 * nw_tos/nw_ttl(struct flow_match_ip, 2B)
237 	 * nw_src/nw_dst(struct flow_match_ipv4/6_addrs, 32B)
238 	 * tp_src/tp_dst(struct flow_match_ports, 4B)
239 	 */
240 	char buf[64];
241 
242 	if (entry1->netdev && entry2->netdev &&
243 	    entry1->netdev != entry2->netdev)
244 		return -EINVAL;
245 
246 	/* Check the overlapped fields one by one, the unmasked part
247 	 * should not conflict with each other.
248 	 */
249 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) {
250 		struct flow_match_control match1, match2;
251 
252 		flow_rule_match_control(entry1->rule, &match1);
253 		flow_rule_match_control(entry2->rule, &match2);
254 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
255 		if (out)
256 			goto check_failed;
257 	}
258 
259 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) {
260 		struct flow_match_basic match1, match2;
261 
262 		flow_rule_match_basic(entry1->rule, &match1);
263 		flow_rule_match_basic(entry2->rule, &match2);
264 
265 		/* n_proto field is a must in ct-related flows,
266 		 * it should be either ipv4 or ipv6.
267 		 */
268 		is_v6 = match1.key->n_proto == htons(ETH_P_IPV6);
269 		/* ip_proto field is a must when port field is cared */
270 		ip_proto = match1.key->ip_proto;
271 
272 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
273 		if (out)
274 			goto check_failed;
275 	}
276 
277 	/* if pre ct entry do nat, the nat ip exists in nft entry,
278 	 * will be do merge check when do nft and post ct merge,
279 	 * so skip this ip merge check here.
280 	 */
281 	if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS)) &&
282 	    nfp_ct_merge_check_cannot_skip(entry1, entry2)) {
283 		struct flow_match_ipv4_addrs match1, match2;
284 
285 		flow_rule_match_ipv4_addrs(entry1->rule, &match1);
286 		flow_rule_match_ipv4_addrs(entry2->rule, &match2);
287 
288 		memcpy(buf, match1.key, sizeof(*match1.key));
289 		match1.key = get_mangled_key(entry1->rule, buf,
290 					     offsetof(struct iphdr, saddr),
291 					     sizeof(*match1.key),
292 					     FLOW_ACT_MANGLE_HDR_TYPE_IP4);
293 
294 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
295 		if (out)
296 			goto check_failed;
297 	}
298 
299 	/* if pre ct entry do nat, the nat ip exists in nft entry,
300 	 * will be do merge check when do nft and post ct merge,
301 	 * so skip this ip merge check here.
302 	 */
303 	if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS)) &&
304 	    nfp_ct_merge_check_cannot_skip(entry1, entry2)) {
305 		struct flow_match_ipv6_addrs match1, match2;
306 
307 		flow_rule_match_ipv6_addrs(entry1->rule, &match1);
308 		flow_rule_match_ipv6_addrs(entry2->rule, &match2);
309 
310 		memcpy(buf, match1.key, sizeof(*match1.key));
311 		match1.key = get_mangled_key(entry1->rule, buf,
312 					     offsetof(struct ipv6hdr, saddr),
313 					     sizeof(*match1.key),
314 					     FLOW_ACT_MANGLE_HDR_TYPE_IP6);
315 
316 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
317 		if (out)
318 			goto check_failed;
319 	}
320 
321 	/* if pre ct entry do nat, the nat tport exists in nft entry,
322 	 * will be do merge check when do nft and post ct merge,
323 	 * so skip this tport merge check here.
324 	 */
325 	if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_PORTS)) &&
326 	    nfp_ct_merge_check_cannot_skip(entry1, entry2)) {
327 		enum flow_action_mangle_base htype = FLOW_ACT_MANGLE_UNSPEC;
328 		struct flow_match_ports match1, match2;
329 
330 		flow_rule_match_ports(entry1->rule, &match1);
331 		flow_rule_match_ports(entry2->rule, &match2);
332 
333 		if (ip_proto == IPPROTO_UDP)
334 			htype = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
335 		else if (ip_proto == IPPROTO_TCP)
336 			htype = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
337 
338 		memcpy(buf, match1.key, sizeof(*match1.key));
339 		match1.key = get_mangled_key(entry1->rule, buf, 0,
340 					     sizeof(*match1.key), htype);
341 
342 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
343 		if (out)
344 			goto check_failed;
345 	}
346 
347 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
348 		struct flow_match_eth_addrs match1, match2;
349 
350 		flow_rule_match_eth_addrs(entry1->rule, &match1);
351 		flow_rule_match_eth_addrs(entry2->rule, &match2);
352 
353 		memcpy(buf, match1.key, sizeof(*match1.key));
354 		match1.key = get_mangled_key(entry1->rule, buf, 0,
355 					     sizeof(*match1.key),
356 					     FLOW_ACT_MANGLE_HDR_TYPE_ETH);
357 
358 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
359 		if (out)
360 			goto check_failed;
361 	}
362 
363 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_VLAN)) {
364 		struct flow_match_vlan match1, match2;
365 
366 		flow_rule_match_vlan(entry1->rule, &match1);
367 		flow_rule_match_vlan(entry2->rule, &match2);
368 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
369 		if (out)
370 			goto check_failed;
371 	}
372 
373 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_MPLS)) {
374 		struct flow_match_mpls match1, match2;
375 
376 		flow_rule_match_mpls(entry1->rule, &match1);
377 		flow_rule_match_mpls(entry2->rule, &match2);
378 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
379 		if (out)
380 			goto check_failed;
381 	}
382 
383 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_TCP)) {
384 		struct flow_match_tcp match1, match2;
385 
386 		flow_rule_match_tcp(entry1->rule, &match1);
387 		flow_rule_match_tcp(entry2->rule, &match2);
388 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
389 		if (out)
390 			goto check_failed;
391 	}
392 
393 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IP)) {
394 		struct flow_match_ip match1, match2;
395 
396 		flow_rule_match_ip(entry1->rule, &match1);
397 		flow_rule_match_ip(entry2->rule, &match2);
398 
399 		match1.key = get_mangled_tos_ttl(entry1->rule, buf, is_v6);
400 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
401 		if (out)
402 			goto check_failed;
403 	}
404 
405 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_KEYID)) {
406 		struct flow_match_enc_keyid match1, match2;
407 
408 		flow_rule_match_enc_keyid(entry1->rule, &match1);
409 		flow_rule_match_enc_keyid(entry2->rule, &match2);
410 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
411 		if (out)
412 			goto check_failed;
413 	}
414 
415 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
416 		struct flow_match_ipv4_addrs match1, match2;
417 
418 		flow_rule_match_enc_ipv4_addrs(entry1->rule, &match1);
419 		flow_rule_match_enc_ipv4_addrs(entry2->rule, &match2);
420 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
421 		if (out)
422 			goto check_failed;
423 	}
424 
425 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
426 		struct flow_match_ipv6_addrs match1, match2;
427 
428 		flow_rule_match_enc_ipv6_addrs(entry1->rule, &match1);
429 		flow_rule_match_enc_ipv6_addrs(entry2->rule, &match2);
430 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
431 		if (out)
432 			goto check_failed;
433 	}
434 
435 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
436 		struct flow_match_control match1, match2;
437 
438 		flow_rule_match_enc_control(entry1->rule, &match1);
439 		flow_rule_match_enc_control(entry2->rule, &match2);
440 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
441 		if (out)
442 			goto check_failed;
443 	}
444 
445 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_IP)) {
446 		struct flow_match_ip match1, match2;
447 
448 		flow_rule_match_enc_ip(entry1->rule, &match1);
449 		flow_rule_match_enc_ip(entry2->rule, &match2);
450 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
451 		if (out)
452 			goto check_failed;
453 	}
454 
455 	if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_ENC_OPTS)) {
456 		struct flow_match_enc_opts match1, match2;
457 
458 		flow_rule_match_enc_opts(entry1->rule, &match1);
459 		flow_rule_match_enc_opts(entry2->rule, &match2);
460 		COMPARE_UNMASKED_FIELDS(match1, match2, &out);
461 		if (out)
462 			goto check_failed;
463 	}
464 
465 	return 0;
466 
467 check_failed:
468 	return -EINVAL;
469 }
470 
471 static int nfp_ct_check_vlan_merge(struct flow_action_entry *a_in,
472 				   struct flow_rule *rule)
473 {
474 	struct flow_match_vlan match;
475 
476 	if (unlikely(flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)))
477 		return -EOPNOTSUPP;
478 
479 	/* post_ct does not match VLAN KEY, can be merged. */
480 	if (likely(!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)))
481 		return 0;
482 
483 	switch (a_in->id) {
484 	/* pre_ct has pop vlan, post_ct cannot match VLAN KEY, cannot be merged. */
485 	case FLOW_ACTION_VLAN_POP:
486 		return -EOPNOTSUPP;
487 
488 	case FLOW_ACTION_VLAN_PUSH:
489 	case FLOW_ACTION_VLAN_MANGLE:
490 		flow_rule_match_vlan(rule, &match);
491 		/* different vlan id, cannot be merged. */
492 		if ((match.key->vlan_id & match.mask->vlan_id) ^
493 		    (a_in->vlan.vid & match.mask->vlan_id))
494 			return -EOPNOTSUPP;
495 
496 		/* different tpid, cannot be merged. */
497 		if ((match.key->vlan_tpid & match.mask->vlan_tpid) ^
498 		    (a_in->vlan.proto & match.mask->vlan_tpid))
499 			return -EOPNOTSUPP;
500 
501 		/* different priority, cannot be merged. */
502 		if ((match.key->vlan_priority & match.mask->vlan_priority) ^
503 		    (a_in->vlan.prio & match.mask->vlan_priority))
504 			return -EOPNOTSUPP;
505 
506 		break;
507 	default:
508 		return -EOPNOTSUPP;
509 	}
510 
511 	return 0;
512 }
513 
514 static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry,
515 				  struct nfp_fl_ct_flow_entry *post_ct_entry,
516 				  struct nfp_fl_ct_flow_entry *nft_entry)
517 {
518 	struct flow_action_entry *act;
519 	int i, err;
520 
521 	/* Check for pre_ct->action conflicts */
522 	flow_action_for_each(i, act, &pre_ct_entry->rule->action) {
523 		switch (act->id) {
524 		case FLOW_ACTION_VLAN_PUSH:
525 		case FLOW_ACTION_VLAN_POP:
526 		case FLOW_ACTION_VLAN_MANGLE:
527 			err = nfp_ct_check_vlan_merge(act, post_ct_entry->rule);
528 			if (err)
529 				return err;
530 			break;
531 		case FLOW_ACTION_MPLS_PUSH:
532 		case FLOW_ACTION_MPLS_POP:
533 		case FLOW_ACTION_MPLS_MANGLE:
534 			return -EOPNOTSUPP;
535 		default:
536 			break;
537 		}
538 	}
539 
540 	/* Check for nft->action conflicts */
541 	flow_action_for_each(i, act, &nft_entry->rule->action) {
542 		switch (act->id) {
543 		case FLOW_ACTION_VLAN_PUSH:
544 		case FLOW_ACTION_VLAN_POP:
545 		case FLOW_ACTION_VLAN_MANGLE:
546 		case FLOW_ACTION_MPLS_PUSH:
547 		case FLOW_ACTION_MPLS_POP:
548 		case FLOW_ACTION_MPLS_MANGLE:
549 			return -EOPNOTSUPP;
550 		default:
551 			break;
552 		}
553 	}
554 	return 0;
555 }
556 
557 static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry,
558 			     struct nfp_fl_ct_flow_entry *nft_entry)
559 {
560 	struct flow_dissector *dissector = post_ct_entry->rule->match.dissector;
561 	struct flow_action_entry *ct_met;
562 	struct flow_match_ct ct;
563 	int i;
564 
565 	ct_met = get_flow_act(nft_entry->rule, FLOW_ACTION_CT_METADATA);
566 	if (ct_met && (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT))) {
567 		u32 *act_lbl;
568 
569 		act_lbl = ct_met->ct_metadata.labels;
570 		flow_rule_match_ct(post_ct_entry->rule, &ct);
571 		for (i = 0; i < 4; i++) {
572 			if ((ct.key->ct_labels[i] & ct.mask->ct_labels[i]) ^
573 			    (act_lbl[i] & ct.mask->ct_labels[i]))
574 				return -EINVAL;
575 		}
576 
577 		if ((ct.key->ct_mark & ct.mask->ct_mark) ^
578 		    (ct_met->ct_metadata.mark & ct.mask->ct_mark))
579 			return -EINVAL;
580 
581 		return 0;
582 	} else {
583 		/* post_ct with ct clear action will not match the
584 		 * ct status when nft is nat entry.
585 		 */
586 		if (nft_entry->flags & NFP_FL_ACTION_DO_MANGLE)
587 			return 0;
588 	}
589 
590 	return -EINVAL;
591 }
592 
593 static int
594 nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map)
595 {
596 	int key_size;
597 
598 	/* This field must always be present */
599 	key_size = sizeof(struct nfp_flower_meta_tci);
600 	map[FLOW_PAY_META_TCI] = 0;
601 
602 	if (in_key_ls.key_layer & NFP_FLOWER_LAYER_EXT_META) {
603 		map[FLOW_PAY_EXT_META] = key_size;
604 		key_size += sizeof(struct nfp_flower_ext_meta);
605 	}
606 	if (in_key_ls.key_layer & NFP_FLOWER_LAYER_PORT) {
607 		map[FLOW_PAY_INPORT] = key_size;
608 		key_size += sizeof(struct nfp_flower_in_port);
609 	}
610 	if (in_key_ls.key_layer & NFP_FLOWER_LAYER_MAC) {
611 		map[FLOW_PAY_MAC_MPLS] = key_size;
612 		key_size += sizeof(struct nfp_flower_mac_mpls);
613 	}
614 	if (in_key_ls.key_layer & NFP_FLOWER_LAYER_TP) {
615 		map[FLOW_PAY_L4] = key_size;
616 		key_size += sizeof(struct nfp_flower_tp_ports);
617 	}
618 	if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV4) {
619 		map[FLOW_PAY_IPV4] = key_size;
620 		key_size += sizeof(struct nfp_flower_ipv4);
621 	}
622 	if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV6) {
623 		map[FLOW_PAY_IPV6] = key_size;
624 		key_size += sizeof(struct nfp_flower_ipv6);
625 	}
626 
627 	if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) {
628 		map[FLOW_PAY_QINQ] = key_size;
629 		key_size += sizeof(struct nfp_flower_vlan);
630 	}
631 
632 	if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) {
633 		map[FLOW_PAY_GRE] = key_size;
634 		if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6)
635 			key_size += sizeof(struct nfp_flower_ipv6_gre_tun);
636 		else
637 			key_size += sizeof(struct nfp_flower_ipv4_gre_tun);
638 	}
639 
640 	if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) ||
641 	    (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) {
642 		map[FLOW_PAY_UDP_TUN] = key_size;
643 		if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6)
644 			key_size += sizeof(struct nfp_flower_ipv6_udp_tun);
645 		else
646 			key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
647 	}
648 
649 	if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
650 		map[FLOW_PAY_GENEVE_OPT] = key_size;
651 		key_size += sizeof(struct nfp_flower_geneve_options);
652 	}
653 
654 	return key_size;
655 }
656 
657 /* get the csum flag according the ip proto and mangle action. */
658 static void nfp_fl_get_csum_flag(struct flow_action_entry *a_in, u8 ip_proto, u32 *csum)
659 {
660 	if (a_in->id != FLOW_ACTION_MANGLE)
661 		return;
662 
663 	switch (a_in->mangle.htype) {
664 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
665 		*csum |= TCA_CSUM_UPDATE_FLAG_IPV4HDR;
666 		if (ip_proto == IPPROTO_TCP)
667 			*csum |= TCA_CSUM_UPDATE_FLAG_TCP;
668 		else if (ip_proto == IPPROTO_UDP)
669 			*csum |= TCA_CSUM_UPDATE_FLAG_UDP;
670 		break;
671 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
672 		*csum |= TCA_CSUM_UPDATE_FLAG_TCP;
673 		break;
674 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
675 		*csum |= TCA_CSUM_UPDATE_FLAG_UDP;
676 		break;
677 	default:
678 		break;
679 	}
680 }
681 
682 static int nfp_fl_merge_actions_offload(struct flow_rule **rules,
683 					struct nfp_flower_priv *priv,
684 					struct net_device *netdev,
685 					struct nfp_fl_payload *flow_pay)
686 {
687 	enum flow_action_hw_stats tmp_stats = FLOW_ACTION_HW_STATS_DONT_CARE;
688 	struct flow_action_entry *a_in;
689 	int i, j, num_actions, id;
690 	struct flow_rule *a_rule;
691 	int err = 0, offset = 0;
692 
693 	num_actions = rules[CT_TYPE_PRE_CT]->action.num_entries +
694 		      rules[CT_TYPE_NFT]->action.num_entries +
695 		      rules[CT_TYPE_POST_CT]->action.num_entries;
696 
697 	/* Add one action to make sure there is enough room to add an checksum action
698 	 * when do nat.
699 	 */
700 	a_rule = flow_rule_alloc(num_actions + 1);
701 	if (!a_rule)
702 		return -ENOMEM;
703 
704 	/* Actions need a BASIC dissector. */
705 	a_rule->match = rules[CT_TYPE_PRE_CT]->match;
706 	/* post_ct entry have one action at least. */
707 	if (rules[CT_TYPE_POST_CT]->action.num_entries != 0) {
708 		tmp_stats = rules[CT_TYPE_POST_CT]->action.entries[0].hw_stats;
709 	}
710 
711 	/* Copy actions */
712 	for (j = 0; j < _CT_TYPE_MAX; j++) {
713 		u32 csum_updated = 0;
714 		u8 ip_proto = 0;
715 
716 		if (flow_rule_match_key(rules[j], FLOW_DISSECTOR_KEY_BASIC)) {
717 			struct flow_match_basic match;
718 
719 			/* ip_proto is the only field that is needed in later compile_action,
720 			 * needed to set the correct checksum flags. It doesn't really matter
721 			 * which input rule's ip_proto field we take as the earlier merge checks
722 			 * would have made sure that they don't conflict. We do not know which
723 			 * of the subflows would have the ip_proto filled in, so we need to iterate
724 			 * through the subflows and assign the proper subflow to a_rule
725 			 */
726 			flow_rule_match_basic(rules[j], &match);
727 			if (match.mask->ip_proto) {
728 				a_rule->match = rules[j]->match;
729 				ip_proto = match.key->ip_proto;
730 			}
731 		}
732 
733 		for (i = 0; i < rules[j]->action.num_entries; i++) {
734 			a_in = &rules[j]->action.entries[i];
735 			id = a_in->id;
736 
737 			/* Ignore CT related actions as these would already have
738 			 * been taken care of by previous checks, and we do not send
739 			 * any CT actions to the firmware.
740 			 */
741 			switch (id) {
742 			case FLOW_ACTION_CT:
743 			case FLOW_ACTION_GOTO:
744 			case FLOW_ACTION_CT_METADATA:
745 				continue;
746 			default:
747 				/* nft entry is generated by tc ct, which mangle action do not care
748 				 * the stats, inherit the post entry stats to meet the
749 				 * flow_action_hw_stats_check.
750 				 */
751 				if (j == CT_TYPE_NFT) {
752 					if (a_in->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE)
753 						a_in->hw_stats = tmp_stats;
754 					nfp_fl_get_csum_flag(a_in, ip_proto, &csum_updated);
755 				}
756 				memcpy(&a_rule->action.entries[offset++],
757 				       a_in, sizeof(struct flow_action_entry));
758 				break;
759 			}
760 		}
761 		/* nft entry have mangle action, but do not have checksum action when do NAT,
762 		 * hardware will automatically fix IPv4 and TCP/UDP checksum. so add an csum action
763 		 * to meet csum action check.
764 		 */
765 		if (csum_updated) {
766 			struct flow_action_entry *csum_action;
767 
768 			csum_action = &a_rule->action.entries[offset++];
769 			csum_action->id = FLOW_ACTION_CSUM;
770 			csum_action->csum_flags = csum_updated;
771 			csum_action->hw_stats = tmp_stats;
772 		}
773 	}
774 
775 	/* Some actions would have been ignored, so update the num_entries field */
776 	a_rule->action.num_entries = offset;
777 	err = nfp_flower_compile_action(priv->app, a_rule, netdev, flow_pay, NULL);
778 	kfree(a_rule);
779 
780 	return err;
781 }
782 
783 static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
784 {
785 	enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
786 	struct nfp_fl_ct_zone_entry *zt = m_entry->zt;
787 	struct nfp_fl_key_ls key_layer, tmp_layer;
788 	struct nfp_flower_priv *priv = zt->priv;
789 	u16 key_map[_FLOW_PAY_LAYERS_MAX];
790 	struct nfp_fl_payload *flow_pay;
791 
792 	struct flow_rule *rules[_CT_TYPE_MAX];
793 	u8 *key, *msk, *kdata, *mdata;
794 	struct nfp_port *port = NULL;
795 	struct net_device *netdev;
796 	bool qinq_sup;
797 	u32 port_id;
798 	u16 offset;
799 	int i, err;
800 
801 	netdev = m_entry->netdev;
802 	qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ);
803 
804 	rules[CT_TYPE_PRE_CT] = m_entry->tc_m_parent->pre_ct_parent->rule;
805 	rules[CT_TYPE_NFT] = m_entry->nft_parent->rule;
806 	rules[CT_TYPE_POST_CT] = m_entry->tc_m_parent->post_ct_parent->rule;
807 
808 	memset(&key_layer, 0, sizeof(struct nfp_fl_key_ls));
809 	memset(&key_map, 0, sizeof(key_map));
810 
811 	/* Calculate the resultant key layer and size for offload */
812 	for (i = 0; i < _CT_TYPE_MAX; i++) {
813 		err = nfp_flower_calculate_key_layers(priv->app,
814 						      m_entry->netdev,
815 						      &tmp_layer, rules[i],
816 						      &tun_type, NULL);
817 		if (err)
818 			return err;
819 
820 		key_layer.key_layer |= tmp_layer.key_layer;
821 		key_layer.key_layer_two |= tmp_layer.key_layer_two;
822 	}
823 	key_layer.key_size = nfp_fl_calc_key_layers_sz(key_layer, key_map);
824 
825 	flow_pay = nfp_flower_allocate_new(&key_layer);
826 	if (!flow_pay)
827 		return -ENOMEM;
828 
829 	memset(flow_pay->unmasked_data, 0, key_layer.key_size);
830 	memset(flow_pay->mask_data, 0, key_layer.key_size);
831 
832 	kdata = flow_pay->unmasked_data;
833 	mdata = flow_pay->mask_data;
834 
835 	offset = key_map[FLOW_PAY_META_TCI];
836 	key = kdata + offset;
837 	msk = mdata + offset;
838 	nfp_flower_compile_meta((struct nfp_flower_meta_tci *)key,
839 				(struct nfp_flower_meta_tci *)msk,
840 				key_layer.key_layer);
841 
842 	if (NFP_FLOWER_LAYER_EXT_META & key_layer.key_layer) {
843 		offset =  key_map[FLOW_PAY_EXT_META];
844 		key = kdata + offset;
845 		msk = mdata + offset;
846 		nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)key,
847 					    key_layer.key_layer_two);
848 		nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)msk,
849 					    key_layer.key_layer_two);
850 	}
851 
852 	/* Using in_port from the -trk rule. The tc merge checks should already
853 	 * be checking that the ingress netdevs are the same
854 	 */
855 	port_id = nfp_flower_get_port_id_from_netdev(priv->app, netdev);
856 	offset = key_map[FLOW_PAY_INPORT];
857 	key = kdata + offset;
858 	msk = mdata + offset;
859 	err = nfp_flower_compile_port((struct nfp_flower_in_port *)key,
860 				      port_id, false, tun_type, NULL);
861 	if (err)
862 		goto ct_offload_err;
863 	err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
864 				      port_id, true, tun_type, NULL);
865 	if (err)
866 		goto ct_offload_err;
867 
868 	/* This following part works on the assumption that previous checks has
869 	 * already filtered out flows that has different values for the different
870 	 * layers. Here we iterate through all three rules and merge their respective
871 	 * masked value(cared bits), basic method is:
872 	 * final_key = (r1_key & r1_mask) | (r2_key & r2_mask) | (r3_key & r3_mask)
873 	 * final_mask = r1_mask | r2_mask | r3_mask
874 	 * If none of the rules contains a match that is also fine, that simply means
875 	 * that the layer is not present.
876 	 */
877 	if (!qinq_sup) {
878 		for (i = 0; i < _CT_TYPE_MAX; i++) {
879 			offset = key_map[FLOW_PAY_META_TCI];
880 			key = kdata + offset;
881 			msk = mdata + offset;
882 			nfp_flower_compile_tci((struct nfp_flower_meta_tci *)key,
883 					       (struct nfp_flower_meta_tci *)msk,
884 					       rules[i]);
885 		}
886 	}
887 
888 	if (NFP_FLOWER_LAYER_MAC & key_layer.key_layer) {
889 		offset = key_map[FLOW_PAY_MAC_MPLS];
890 		key = kdata + offset;
891 		msk = mdata + offset;
892 		for (i = 0; i < _CT_TYPE_MAX; i++) {
893 			nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)key,
894 					       (struct nfp_flower_mac_mpls *)msk,
895 					       rules[i]);
896 			err = nfp_flower_compile_mpls((struct nfp_flower_mac_mpls *)key,
897 						      (struct nfp_flower_mac_mpls *)msk,
898 						      rules[i], NULL);
899 			if (err)
900 				goto ct_offload_err;
901 		}
902 	}
903 
904 	if (NFP_FLOWER_LAYER_IPV4 & key_layer.key_layer) {
905 		offset = key_map[FLOW_PAY_IPV4];
906 		key = kdata + offset;
907 		msk = mdata + offset;
908 		for (i = 0; i < _CT_TYPE_MAX; i++) {
909 			nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)key,
910 						(struct nfp_flower_ipv4 *)msk,
911 						rules[i]);
912 		}
913 	}
914 
915 	if (NFP_FLOWER_LAYER_IPV6 & key_layer.key_layer) {
916 		offset = key_map[FLOW_PAY_IPV6];
917 		key = kdata + offset;
918 		msk = mdata + offset;
919 		for (i = 0; i < _CT_TYPE_MAX; i++) {
920 			nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)key,
921 						(struct nfp_flower_ipv6 *)msk,
922 						rules[i]);
923 		}
924 	}
925 
926 	if (NFP_FLOWER_LAYER_TP & key_layer.key_layer) {
927 		offset = key_map[FLOW_PAY_L4];
928 		key = kdata + offset;
929 		msk = mdata + offset;
930 		for (i = 0; i < _CT_TYPE_MAX; i++) {
931 			nfp_flower_compile_tport((struct nfp_flower_tp_ports *)key,
932 						 (struct nfp_flower_tp_ports *)msk,
933 						 rules[i]);
934 		}
935 	}
936 
937 	if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) {
938 		offset = key_map[FLOW_PAY_QINQ];
939 		key = kdata + offset;
940 		msk = mdata + offset;
941 		for (i = 0; i < _CT_TYPE_MAX; i++) {
942 			nfp_flower_compile_vlan((struct nfp_flower_vlan *)key,
943 						(struct nfp_flower_vlan *)msk,
944 						rules[i]);
945 		}
946 	}
947 
948 	if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) {
949 		offset = key_map[FLOW_PAY_GRE];
950 		key = kdata + offset;
951 		msk = mdata + offset;
952 		if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
953 			struct nfp_flower_ipv6_gre_tun *gre_match;
954 			struct nfp_ipv6_addr_entry *entry;
955 			struct in6_addr *dst;
956 
957 			for (i = 0; i < _CT_TYPE_MAX; i++) {
958 				nfp_flower_compile_ipv6_gre_tun((void *)key,
959 								(void *)msk, rules[i]);
960 			}
961 			gre_match = (struct nfp_flower_ipv6_gre_tun *)key;
962 			dst = &gre_match->ipv6.dst;
963 
964 			entry = nfp_tunnel_add_ipv6_off(priv->app, dst);
965 			if (!entry) {
966 				err = -ENOMEM;
967 				goto ct_offload_err;
968 			}
969 
970 			flow_pay->nfp_tun_ipv6 = entry;
971 		} else {
972 			__be32 dst;
973 
974 			for (i = 0; i < _CT_TYPE_MAX; i++) {
975 				nfp_flower_compile_ipv4_gre_tun((void *)key,
976 								(void *)msk, rules[i]);
977 			}
978 			dst = ((struct nfp_flower_ipv4_gre_tun *)key)->ipv4.dst;
979 
980 			/* Store the tunnel destination in the rule data.
981 			 * This must be present and be an exact match.
982 			 */
983 			flow_pay->nfp_tun_ipv4_addr = dst;
984 			nfp_tunnel_add_ipv4_off(priv->app, dst);
985 		}
986 	}
987 
988 	if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN ||
989 	    key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
990 		offset = key_map[FLOW_PAY_UDP_TUN];
991 		key = kdata + offset;
992 		msk = mdata + offset;
993 		if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
994 			struct nfp_flower_ipv6_udp_tun *udp_match;
995 			struct nfp_ipv6_addr_entry *entry;
996 			struct in6_addr *dst;
997 
998 			for (i = 0; i < _CT_TYPE_MAX; i++) {
999 				nfp_flower_compile_ipv6_udp_tun((void *)key,
1000 								(void *)msk, rules[i]);
1001 			}
1002 			udp_match = (struct nfp_flower_ipv6_udp_tun *)key;
1003 			dst = &udp_match->ipv6.dst;
1004 
1005 			entry = nfp_tunnel_add_ipv6_off(priv->app, dst);
1006 			if (!entry) {
1007 				err = -ENOMEM;
1008 				goto ct_offload_err;
1009 			}
1010 
1011 			flow_pay->nfp_tun_ipv6 = entry;
1012 		} else {
1013 			__be32 dst;
1014 
1015 			for (i = 0; i < _CT_TYPE_MAX; i++) {
1016 				nfp_flower_compile_ipv4_udp_tun((void *)key,
1017 								(void *)msk, rules[i]);
1018 			}
1019 			dst = ((struct nfp_flower_ipv4_udp_tun *)key)->ipv4.dst;
1020 
1021 			/* Store the tunnel destination in the rule data.
1022 			 * This must be present and be an exact match.
1023 			 */
1024 			flow_pay->nfp_tun_ipv4_addr = dst;
1025 			nfp_tunnel_add_ipv4_off(priv->app, dst);
1026 		}
1027 
1028 		if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
1029 			offset = key_map[FLOW_PAY_GENEVE_OPT];
1030 			key = kdata + offset;
1031 			msk = mdata + offset;
1032 			for (i = 0; i < _CT_TYPE_MAX; i++)
1033 				nfp_flower_compile_geneve_opt(key, msk, rules[i]);
1034 		}
1035 	}
1036 
1037 	/* Merge actions into flow_pay */
1038 	err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay);
1039 	if (err)
1040 		goto ct_offload_err;
1041 
1042 	/* Use the pointer address as the cookie, but set the last bit to 1.
1043 	 * This is to avoid the 'is_merge_flow' check from detecting this as
1044 	 * an already merged flow. This works since address alignment means
1045 	 * that the last bit for pointer addresses will be 0.
1046 	 */
1047 	flow_pay->tc_flower_cookie = ((unsigned long)flow_pay) | 0x1;
1048 	err = nfp_compile_flow_metadata(priv->app, flow_pay->tc_flower_cookie,
1049 					flow_pay, netdev, NULL);
1050 	if (err)
1051 		goto ct_offload_err;
1052 
1053 	if (nfp_netdev_is_nfp_repr(netdev))
1054 		port = nfp_port_from_netdev(netdev);
1055 
1056 	err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node,
1057 				     nfp_flower_table_params);
1058 	if (err)
1059 		goto ct_release_offload_meta_err;
1060 
1061 	err = nfp_flower_xmit_flow(priv->app, flow_pay,
1062 				   NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
1063 	if (err)
1064 		goto ct_remove_rhash_err;
1065 
1066 	m_entry->tc_flower_cookie = flow_pay->tc_flower_cookie;
1067 	m_entry->flow_pay = flow_pay;
1068 
1069 	if (port)
1070 		port->tc_offload_cnt++;
1071 
1072 	return err;
1073 
1074 ct_remove_rhash_err:
1075 	WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
1076 					    &flow_pay->fl_node,
1077 					    nfp_flower_table_params));
1078 ct_release_offload_meta_err:
1079 	nfp_modify_flow_metadata(priv->app, flow_pay);
1080 ct_offload_err:
1081 	if (flow_pay->nfp_tun_ipv4_addr)
1082 		nfp_tunnel_del_ipv4_off(priv->app, flow_pay->nfp_tun_ipv4_addr);
1083 	if (flow_pay->nfp_tun_ipv6)
1084 		nfp_tunnel_put_ipv6_off(priv->app, flow_pay->nfp_tun_ipv6);
1085 	kfree(flow_pay->action_data);
1086 	kfree(flow_pay->mask_data);
1087 	kfree(flow_pay->unmasked_data);
1088 	kfree(flow_pay);
1089 	return err;
1090 }
1091 
1092 static int nfp_fl_ct_del_offload(struct nfp_app *app, unsigned long cookie,
1093 				 struct net_device *netdev)
1094 {
1095 	struct nfp_flower_priv *priv = app->priv;
1096 	struct nfp_fl_payload *flow_pay;
1097 	struct nfp_port *port = NULL;
1098 	int err = 0;
1099 
1100 	if (nfp_netdev_is_nfp_repr(netdev))
1101 		port = nfp_port_from_netdev(netdev);
1102 
1103 	flow_pay = nfp_flower_search_fl_table(app, cookie, netdev);
1104 	if (!flow_pay)
1105 		return -ENOENT;
1106 
1107 	err = nfp_modify_flow_metadata(app, flow_pay);
1108 	if (err)
1109 		goto err_free_merge_flow;
1110 
1111 	if (flow_pay->nfp_tun_ipv4_addr)
1112 		nfp_tunnel_del_ipv4_off(app, flow_pay->nfp_tun_ipv4_addr);
1113 
1114 	if (flow_pay->nfp_tun_ipv6)
1115 		nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6);
1116 
1117 	if (!flow_pay->in_hw) {
1118 		err = 0;
1119 		goto err_free_merge_flow;
1120 	}
1121 
1122 	err = nfp_flower_xmit_flow(app, flow_pay,
1123 				   NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
1124 
1125 err_free_merge_flow:
1126 	nfp_flower_del_linked_merge_flows(app, flow_pay);
1127 	if (port)
1128 		port->tc_offload_cnt--;
1129 	kfree(flow_pay->action_data);
1130 	kfree(flow_pay->mask_data);
1131 	kfree(flow_pay->unmasked_data);
1132 	WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
1133 					    &flow_pay->fl_node,
1134 					    nfp_flower_table_params));
1135 	kfree_rcu(flow_pay, rcu);
1136 	return err;
1137 }
1138 
1139 static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt,
1140 			       struct nfp_fl_ct_flow_entry *nft_entry,
1141 			       struct nfp_fl_ct_tc_merge *tc_m_entry)
1142 {
1143 	struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry;
1144 	struct nfp_fl_nft_tc_merge *nft_m_entry;
1145 	unsigned long new_cookie[3];
1146 	int err;
1147 
1148 	pre_ct_entry = tc_m_entry->pre_ct_parent;
1149 	post_ct_entry = tc_m_entry->post_ct_parent;
1150 
1151 	err = nfp_ct_merge_act_check(pre_ct_entry, post_ct_entry, nft_entry);
1152 	if (err)
1153 		return err;
1154 
1155 	/* Check that the two tc flows are also compatible with
1156 	 * the nft entry. No need to check the pre_ct and post_ct
1157 	 * entries as that was already done during pre_merge.
1158 	 * The nft entry does not have a chain populated, so
1159 	 * skip this check.
1160 	 */
1161 	err = nfp_ct_merge_check(pre_ct_entry, nft_entry);
1162 	if (err)
1163 		return err;
1164 	err = nfp_ct_merge_check(nft_entry, post_ct_entry);
1165 	if (err)
1166 		return err;
1167 	err = nfp_ct_check_meta(post_ct_entry, nft_entry);
1168 	if (err)
1169 		return err;
1170 
1171 	/* Combine tc_merge and nft cookies for this cookie. */
1172 	new_cookie[0] = tc_m_entry->cookie[0];
1173 	new_cookie[1] = tc_m_entry->cookie[1];
1174 	new_cookie[2] = nft_entry->cookie;
1175 	nft_m_entry = get_hashentry(&zt->nft_merge_tb,
1176 				    &new_cookie,
1177 				    nfp_nft_ct_merge_params,
1178 				    sizeof(*nft_m_entry));
1179 
1180 	if (IS_ERR(nft_m_entry))
1181 		return PTR_ERR(nft_m_entry);
1182 
1183 	/* nft_m_entry already present, not merging again */
1184 	if (!memcmp(&new_cookie, nft_m_entry->cookie, sizeof(new_cookie)))
1185 		return 0;
1186 
1187 	memcpy(&nft_m_entry->cookie, &new_cookie, sizeof(new_cookie));
1188 	nft_m_entry->zt = zt;
1189 	nft_m_entry->tc_m_parent = tc_m_entry;
1190 	nft_m_entry->nft_parent = nft_entry;
1191 	nft_m_entry->tc_flower_cookie = 0;
1192 	/* Copy the netdev from the pre_ct entry. When the tc_m_entry was created
1193 	 * it only combined them if the netdevs were the same, so can use any of them.
1194 	 */
1195 	nft_m_entry->netdev = pre_ct_entry->netdev;
1196 
1197 	/* Add this entry to the tc_m_list and nft_flow lists */
1198 	list_add(&nft_m_entry->tc_merge_list, &tc_m_entry->children);
1199 	list_add(&nft_m_entry->nft_flow_list, &nft_entry->children);
1200 
1201 	/* Generate offload structure and send to nfp */
1202 	err = nfp_fl_ct_add_offload(nft_m_entry);
1203 	if (err)
1204 		goto err_nft_ct_offload;
1205 
1206 	err = rhashtable_insert_fast(&zt->nft_merge_tb, &nft_m_entry->hash_node,
1207 				     nfp_nft_ct_merge_params);
1208 	if (err)
1209 		goto err_nft_ct_merge_insert;
1210 
1211 	zt->nft_merge_count++;
1212 
1213 	return err;
1214 
1215 err_nft_ct_merge_insert:
1216 	nfp_fl_ct_del_offload(zt->priv->app, nft_m_entry->tc_flower_cookie,
1217 			      nft_m_entry->netdev);
1218 err_nft_ct_offload:
1219 	list_del(&nft_m_entry->tc_merge_list);
1220 	list_del(&nft_m_entry->nft_flow_list);
1221 	kfree(nft_m_entry);
1222 	return err;
1223 }
1224 
1225 static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt,
1226 			      struct nfp_fl_ct_flow_entry *ct_entry1,
1227 			      struct nfp_fl_ct_flow_entry *ct_entry2)
1228 {
1229 	struct nfp_fl_ct_flow_entry *post_ct_entry, *pre_ct_entry;
1230 	struct nfp_fl_ct_flow_entry *nft_entry, *nft_tmp;
1231 	struct nfp_fl_ct_tc_merge *m_entry;
1232 	unsigned long new_cookie[2];
1233 	int err;
1234 
1235 	if (ct_entry1->type == CT_TYPE_PRE_CT) {
1236 		pre_ct_entry = ct_entry1;
1237 		post_ct_entry = ct_entry2;
1238 	} else {
1239 		post_ct_entry = ct_entry1;
1240 		pre_ct_entry = ct_entry2;
1241 	}
1242 
1243 	/* Checks that the chain_index of the filter matches the
1244 	 * chain_index of the GOTO action.
1245 	 */
1246 	if (post_ct_entry->chain_index != pre_ct_entry->chain_index)
1247 		return -EINVAL;
1248 
1249 	err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry);
1250 	if (err)
1251 		return err;
1252 
1253 	new_cookie[0] = pre_ct_entry->cookie;
1254 	new_cookie[1] = post_ct_entry->cookie;
1255 	m_entry = get_hashentry(&zt->tc_merge_tb, &new_cookie,
1256 				nfp_tc_ct_merge_params, sizeof(*m_entry));
1257 	if (IS_ERR(m_entry))
1258 		return PTR_ERR(m_entry);
1259 
1260 	/* m_entry already present, not merging again */
1261 	if (!memcmp(&new_cookie, m_entry->cookie, sizeof(new_cookie)))
1262 		return 0;
1263 
1264 	memcpy(&m_entry->cookie, &new_cookie, sizeof(new_cookie));
1265 	m_entry->zt = zt;
1266 	m_entry->post_ct_parent = post_ct_entry;
1267 	m_entry->pre_ct_parent = pre_ct_entry;
1268 
1269 	/* Add this entry to the pre_ct and post_ct lists */
1270 	list_add(&m_entry->post_ct_list, &post_ct_entry->children);
1271 	list_add(&m_entry->pre_ct_list, &pre_ct_entry->children);
1272 	INIT_LIST_HEAD(&m_entry->children);
1273 
1274 	err = rhashtable_insert_fast(&zt->tc_merge_tb, &m_entry->hash_node,
1275 				     nfp_tc_ct_merge_params);
1276 	if (err)
1277 		goto err_ct_tc_merge_insert;
1278 	zt->tc_merge_count++;
1279 
1280 	/* Merge with existing nft flows */
1281 	list_for_each_entry_safe(nft_entry, nft_tmp, &zt->nft_flows_list,
1282 				 list_node) {
1283 		nfp_ct_do_nft_merge(zt, nft_entry, m_entry);
1284 	}
1285 
1286 	return 0;
1287 
1288 err_ct_tc_merge_insert:
1289 	list_del(&m_entry->post_ct_list);
1290 	list_del(&m_entry->pre_ct_list);
1291 	kfree(m_entry);
1292 	return err;
1293 }
1294 
1295 static struct
1296 nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv,
1297 					 u16 zone, bool wildcarded)
1298 {
1299 	struct nfp_fl_ct_zone_entry *zt;
1300 	int err;
1301 
1302 	if (wildcarded && priv->ct_zone_wc)
1303 		return priv->ct_zone_wc;
1304 
1305 	if (!wildcarded) {
1306 		zt = get_hashentry(&priv->ct_zone_table, &zone,
1307 				   nfp_zone_table_params, sizeof(*zt));
1308 
1309 		/* If priv is set this is an existing entry, just return it */
1310 		if (IS_ERR(zt) || zt->priv)
1311 			return zt;
1312 	} else {
1313 		zt = kzalloc(sizeof(*zt), GFP_KERNEL);
1314 		if (!zt)
1315 			return ERR_PTR(-ENOMEM);
1316 	}
1317 
1318 	zt->zone = zone;
1319 	zt->priv = priv;
1320 	zt->nft = NULL;
1321 
1322 	/* init the various hash tables and lists */
1323 	INIT_LIST_HEAD(&zt->pre_ct_list);
1324 	INIT_LIST_HEAD(&zt->post_ct_list);
1325 	INIT_LIST_HEAD(&zt->nft_flows_list);
1326 
1327 	err = rhashtable_init(&zt->tc_merge_tb, &nfp_tc_ct_merge_params);
1328 	if (err)
1329 		goto err_tc_merge_tb_init;
1330 
1331 	err = rhashtable_init(&zt->nft_merge_tb, &nfp_nft_ct_merge_params);
1332 	if (err)
1333 		goto err_nft_merge_tb_init;
1334 
1335 	if (wildcarded) {
1336 		priv->ct_zone_wc = zt;
1337 	} else {
1338 		err = rhashtable_insert_fast(&priv->ct_zone_table,
1339 					     &zt->hash_node,
1340 					     nfp_zone_table_params);
1341 		if (err)
1342 			goto err_zone_insert;
1343 	}
1344 
1345 	return zt;
1346 
1347 err_zone_insert:
1348 	rhashtable_destroy(&zt->nft_merge_tb);
1349 err_nft_merge_tb_init:
1350 	rhashtable_destroy(&zt->tc_merge_tb);
1351 err_tc_merge_tb_init:
1352 	kfree(zt);
1353 	return ERR_PTR(err);
1354 }
1355 
1356 static struct net_device *get_netdev_from_rule(struct flow_rule *rule)
1357 {
1358 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
1359 		struct flow_match_meta match;
1360 
1361 		flow_rule_match_meta(rule, &match);
1362 		if (match.key->ingress_ifindex & match.mask->ingress_ifindex)
1363 			return __dev_get_by_index(&init_net,
1364 						  match.key->ingress_ifindex);
1365 	}
1366 
1367 	return NULL;
1368 }
1369 
1370 static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_action)
1371 {
1372 	if (mangle_action->id != FLOW_ACTION_MANGLE)
1373 		return;
1374 
1375 	switch (mangle_action->mangle.htype) {
1376 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1377 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1378 		mangle_action->mangle.val = (__force u32)cpu_to_be32(mangle_action->mangle.val);
1379 		mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask);
1380 		return;
1381 
1382 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
1383 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
1384 		mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val);
1385 		mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask);
1386 		return;
1387 
1388 	default:
1389 		return;
1390 	}
1391 }
1392 
1393 static int nfp_nft_ct_set_flow_flag(struct flow_action_entry *act,
1394 				    struct nfp_fl_ct_flow_entry *entry)
1395 {
1396 	switch (act->id) {
1397 	case FLOW_ACTION_CT:
1398 		if (act->ct.action == TCA_CT_ACT_NAT)
1399 			entry->flags |= NFP_FL_ACTION_DO_NAT;
1400 		break;
1401 
1402 	case FLOW_ACTION_MANGLE:
1403 		entry->flags |= NFP_FL_ACTION_DO_MANGLE;
1404 		break;
1405 
1406 	default:
1407 		break;
1408 	}
1409 
1410 	return 0;
1411 }
1412 
1413 static struct
1414 nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt,
1415 					 struct net_device *netdev,
1416 					 struct flow_cls_offload *flow,
1417 					 bool is_nft, struct netlink_ext_ack *extack)
1418 {
1419 	struct nf_flow_match *nft_match = NULL;
1420 	struct nfp_fl_ct_flow_entry *entry;
1421 	struct nfp_fl_ct_map_entry *map;
1422 	struct flow_action_entry *act;
1423 	int err, i;
1424 
1425 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1426 	if (!entry)
1427 		return ERR_PTR(-ENOMEM);
1428 
1429 	entry->rule = flow_rule_alloc(flow->rule->action.num_entries);
1430 	if (!entry->rule) {
1431 		err = -ENOMEM;
1432 		goto err_pre_ct_rule;
1433 	}
1434 
1435 	/* nft flows gets destroyed after callback return, so need
1436 	 * to do a full copy instead of just a reference.
1437 	 */
1438 	if (is_nft) {
1439 		nft_match = kzalloc(sizeof(*nft_match), GFP_KERNEL);
1440 		if (!nft_match) {
1441 			err = -ENOMEM;
1442 			goto err_pre_ct_act;
1443 		}
1444 		memcpy(&nft_match->dissector, flow->rule->match.dissector,
1445 		       sizeof(nft_match->dissector));
1446 		memcpy(&nft_match->mask, flow->rule->match.mask,
1447 		       sizeof(nft_match->mask));
1448 		memcpy(&nft_match->key, flow->rule->match.key,
1449 		       sizeof(nft_match->key));
1450 		entry->rule->match.dissector = &nft_match->dissector;
1451 		entry->rule->match.mask = &nft_match->mask;
1452 		entry->rule->match.key = &nft_match->key;
1453 
1454 		if (!netdev)
1455 			netdev = get_netdev_from_rule(entry->rule);
1456 	} else {
1457 		entry->rule->match.dissector = flow->rule->match.dissector;
1458 		entry->rule->match.mask = flow->rule->match.mask;
1459 		entry->rule->match.key = flow->rule->match.key;
1460 	}
1461 
1462 	entry->zt = zt;
1463 	entry->netdev = netdev;
1464 	entry->cookie = flow->cookie;
1465 	entry->chain_index = flow->common.chain_index;
1466 	entry->tun_offset = NFP_FL_CT_NO_TUN;
1467 
1468 	/* Copy over action data. Unfortunately we do not get a handle to the
1469 	 * original tcf_action data, and the flow objects gets destroyed, so we
1470 	 * cannot just save a pointer to this either, so need to copy over the
1471 	 * data unfortunately.
1472 	 */
1473 	entry->rule->action.num_entries = flow->rule->action.num_entries;
1474 	flow_action_for_each(i, act, &flow->rule->action) {
1475 		struct flow_action_entry *new_act;
1476 
1477 		new_act = &entry->rule->action.entries[i];
1478 		memcpy(new_act, act, sizeof(struct flow_action_entry));
1479 		/* nft entry mangle field is host byte order, need translate to
1480 		 * network byte order.
1481 		 */
1482 		if (is_nft)
1483 			nfp_nft_ct_translate_mangle_action(new_act);
1484 
1485 		nfp_nft_ct_set_flow_flag(new_act, entry);
1486 		/* Entunnel is a special case, need to allocate and copy
1487 		 * tunnel info.
1488 		 */
1489 		if (act->id == FLOW_ACTION_TUNNEL_ENCAP) {
1490 			struct ip_tunnel_info *tun = act->tunnel;
1491 			size_t tun_size = sizeof(*tun) + tun->options_len;
1492 
1493 			new_act->tunnel = kmemdup(tun, tun_size, GFP_ATOMIC);
1494 			if (!new_act->tunnel) {
1495 				err = -ENOMEM;
1496 				goto err_pre_ct_tun_cp;
1497 			}
1498 			entry->tun_offset = i;
1499 		}
1500 	}
1501 
1502 	INIT_LIST_HEAD(&entry->children);
1503 
1504 	/* Now add a ct map entry to flower-priv */
1505 	map = get_hashentry(&zt->priv->ct_map_table, &flow->cookie,
1506 			    nfp_ct_map_params, sizeof(*map));
1507 	if (IS_ERR(map)) {
1508 		NL_SET_ERR_MSG_MOD(extack,
1509 				   "offload error: ct map entry creation failed");
1510 		err = -ENOMEM;
1511 		goto err_ct_flow_insert;
1512 	}
1513 	map->cookie = flow->cookie;
1514 	map->ct_entry = entry;
1515 	err = rhashtable_insert_fast(&zt->priv->ct_map_table,
1516 				     &map->hash_node,
1517 				     nfp_ct_map_params);
1518 	if (err) {
1519 		NL_SET_ERR_MSG_MOD(extack,
1520 				   "offload error: ct map entry table add failed");
1521 		goto err_map_insert;
1522 	}
1523 
1524 	return entry;
1525 
1526 err_map_insert:
1527 	kfree(map);
1528 err_ct_flow_insert:
1529 	if (entry->tun_offset != NFP_FL_CT_NO_TUN)
1530 		kfree(entry->rule->action.entries[entry->tun_offset].tunnel);
1531 err_pre_ct_tun_cp:
1532 	kfree(nft_match);
1533 err_pre_ct_act:
1534 	kfree(entry->rule);
1535 err_pre_ct_rule:
1536 	kfree(entry);
1537 	return ERR_PTR(err);
1538 }
1539 
1540 static void cleanup_nft_merge_entry(struct nfp_fl_nft_tc_merge *m_entry)
1541 {
1542 	struct nfp_fl_ct_zone_entry *zt;
1543 	int err;
1544 
1545 	zt = m_entry->zt;
1546 
1547 	/* Flow is in HW, need to delete */
1548 	if (m_entry->tc_flower_cookie) {
1549 		err = nfp_fl_ct_del_offload(zt->priv->app, m_entry->tc_flower_cookie,
1550 					    m_entry->netdev);
1551 		if (err)
1552 			return;
1553 	}
1554 
1555 	WARN_ON_ONCE(rhashtable_remove_fast(&zt->nft_merge_tb,
1556 					    &m_entry->hash_node,
1557 					    nfp_nft_ct_merge_params));
1558 	zt->nft_merge_count--;
1559 	list_del(&m_entry->tc_merge_list);
1560 	list_del(&m_entry->nft_flow_list);
1561 
1562 	kfree(m_entry);
1563 }
1564 
1565 static void nfp_free_nft_merge_children(void *entry, bool is_nft_flow)
1566 {
1567 	struct nfp_fl_nft_tc_merge *m_entry, *tmp;
1568 
1569 	/* These post entries are parts of two lists, one is a list of nft_entries
1570 	 * and the other is of from a list of tc_merge structures. Iterate
1571 	 * through the relevant list and cleanup the entries.
1572 	 */
1573 
1574 	if (is_nft_flow) {
1575 		/* Need to iterate through list of nft_flow entries */
1576 		struct nfp_fl_ct_flow_entry *ct_entry = entry;
1577 
1578 		list_for_each_entry_safe(m_entry, tmp, &ct_entry->children,
1579 					 nft_flow_list) {
1580 			cleanup_nft_merge_entry(m_entry);
1581 		}
1582 	} else {
1583 		/* Need to iterate through list of tc_merged_flow entries */
1584 		struct nfp_fl_ct_tc_merge *ct_entry = entry;
1585 
1586 		list_for_each_entry_safe(m_entry, tmp, &ct_entry->children,
1587 					 tc_merge_list) {
1588 			cleanup_nft_merge_entry(m_entry);
1589 		}
1590 	}
1591 }
1592 
1593 static void nfp_del_tc_merge_entry(struct nfp_fl_ct_tc_merge *m_ent)
1594 {
1595 	struct nfp_fl_ct_zone_entry *zt;
1596 	int err;
1597 
1598 	zt = m_ent->zt;
1599 	err = rhashtable_remove_fast(&zt->tc_merge_tb,
1600 				     &m_ent->hash_node,
1601 				     nfp_tc_ct_merge_params);
1602 	if (err)
1603 		pr_warn("WARNING: could not remove merge_entry from hashtable\n");
1604 	zt->tc_merge_count--;
1605 	list_del(&m_ent->post_ct_list);
1606 	list_del(&m_ent->pre_ct_list);
1607 
1608 	if (!list_empty(&m_ent->children))
1609 		nfp_free_nft_merge_children(m_ent, false);
1610 	kfree(m_ent);
1611 }
1612 
1613 static void nfp_free_tc_merge_children(struct nfp_fl_ct_flow_entry *entry)
1614 {
1615 	struct nfp_fl_ct_tc_merge *m_ent, *tmp;
1616 
1617 	switch (entry->type) {
1618 	case CT_TYPE_PRE_CT:
1619 		list_for_each_entry_safe(m_ent, tmp, &entry->children, pre_ct_list) {
1620 			nfp_del_tc_merge_entry(m_ent);
1621 		}
1622 		break;
1623 	case CT_TYPE_POST_CT:
1624 		list_for_each_entry_safe(m_ent, tmp, &entry->children, post_ct_list) {
1625 			nfp_del_tc_merge_entry(m_ent);
1626 		}
1627 		break;
1628 	default:
1629 		break;
1630 	}
1631 }
1632 
1633 void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry)
1634 {
1635 	list_del(&entry->list_node);
1636 
1637 	if (!list_empty(&entry->children)) {
1638 		if (entry->type == CT_TYPE_NFT)
1639 			nfp_free_nft_merge_children(entry, true);
1640 		else
1641 			nfp_free_tc_merge_children(entry);
1642 	}
1643 
1644 	if (entry->tun_offset != NFP_FL_CT_NO_TUN)
1645 		kfree(entry->rule->action.entries[entry->tun_offset].tunnel);
1646 
1647 	if (entry->type == CT_TYPE_NFT) {
1648 		struct nf_flow_match *nft_match;
1649 
1650 		nft_match = container_of(entry->rule->match.dissector,
1651 					 struct nf_flow_match, dissector);
1652 		kfree(nft_match);
1653 	}
1654 
1655 	kfree(entry->rule);
1656 	kfree(entry);
1657 }
1658 
1659 static struct flow_action_entry *get_flow_act(struct flow_rule *rule,
1660 					      enum flow_action_id act_id)
1661 {
1662 	struct flow_action_entry *act = NULL;
1663 	int i;
1664 
1665 	flow_action_for_each(i, act, &rule->action) {
1666 		if (act->id == act_id)
1667 			return act;
1668 	}
1669 	return NULL;
1670 }
1671 
1672 static void
1673 nfp_ct_merge_tc_entries(struct nfp_fl_ct_flow_entry *ct_entry1,
1674 			struct nfp_fl_ct_zone_entry *zt_src,
1675 			struct nfp_fl_ct_zone_entry *zt_dst)
1676 {
1677 	struct nfp_fl_ct_flow_entry *ct_entry2, *ct_tmp;
1678 	struct list_head *ct_list;
1679 
1680 	if (ct_entry1->type == CT_TYPE_PRE_CT)
1681 		ct_list = &zt_src->post_ct_list;
1682 	else if (ct_entry1->type == CT_TYPE_POST_CT)
1683 		ct_list = &zt_src->pre_ct_list;
1684 	else
1685 		return;
1686 
1687 	list_for_each_entry_safe(ct_entry2, ct_tmp, ct_list,
1688 				 list_node) {
1689 		nfp_ct_do_tc_merge(zt_dst, ct_entry2, ct_entry1);
1690 	}
1691 }
1692 
1693 static void
1694 nfp_ct_merge_nft_with_tc(struct nfp_fl_ct_flow_entry *nft_entry,
1695 			 struct nfp_fl_ct_zone_entry *zt)
1696 {
1697 	struct nfp_fl_ct_tc_merge *tc_merge_entry;
1698 	struct rhashtable_iter iter;
1699 
1700 	rhashtable_walk_enter(&zt->tc_merge_tb, &iter);
1701 	rhashtable_walk_start(&iter);
1702 	while ((tc_merge_entry = rhashtable_walk_next(&iter)) != NULL) {
1703 		if (IS_ERR(tc_merge_entry))
1704 			continue;
1705 		rhashtable_walk_stop(&iter);
1706 		nfp_ct_do_nft_merge(zt, nft_entry, tc_merge_entry);
1707 		rhashtable_walk_start(&iter);
1708 	}
1709 	rhashtable_walk_stop(&iter);
1710 	rhashtable_walk_exit(&iter);
1711 }
1712 
1713 int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
1714 			    struct net_device *netdev,
1715 			    struct flow_cls_offload *flow,
1716 			    struct netlink_ext_ack *extack)
1717 {
1718 	struct flow_action_entry *ct_act, *ct_goto;
1719 	struct nfp_fl_ct_flow_entry *ct_entry;
1720 	struct nfp_fl_ct_zone_entry *zt;
1721 	int err;
1722 
1723 	ct_act = get_flow_act(flow->rule, FLOW_ACTION_CT);
1724 	if (!ct_act) {
1725 		NL_SET_ERR_MSG_MOD(extack,
1726 				   "unsupported offload: Conntrack action empty in conntrack offload");
1727 		return -EOPNOTSUPP;
1728 	}
1729 
1730 	ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO);
1731 	if (!ct_goto) {
1732 		NL_SET_ERR_MSG_MOD(extack,
1733 				   "unsupported offload: Conntrack requires ACTION_GOTO");
1734 		return -EOPNOTSUPP;
1735 	}
1736 
1737 	zt = get_nfp_zone_entry(priv, ct_act->ct.zone, false);
1738 	if (IS_ERR(zt)) {
1739 		NL_SET_ERR_MSG_MOD(extack,
1740 				   "offload error: Could not create zone table entry");
1741 		return PTR_ERR(zt);
1742 	}
1743 
1744 	if (!zt->nft) {
1745 		zt->nft = ct_act->ct.flow_table;
1746 		err = nf_flow_table_offload_add_cb(zt->nft, nfp_fl_ct_handle_nft_flow, zt);
1747 		if (err) {
1748 			NL_SET_ERR_MSG_MOD(extack,
1749 					   "offload error: Could not register nft_callback");
1750 			return err;
1751 		}
1752 	}
1753 
1754 	/* Add entry to pre_ct_list */
1755 	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, false, extack);
1756 	if (IS_ERR(ct_entry))
1757 		return PTR_ERR(ct_entry);
1758 	ct_entry->type = CT_TYPE_PRE_CT;
1759 	ct_entry->chain_index = ct_goto->chain_index;
1760 	list_add(&ct_entry->list_node, &zt->pre_ct_list);
1761 	zt->pre_ct_count++;
1762 
1763 	nfp_ct_merge_tc_entries(ct_entry, zt, zt);
1764 
1765 	/* Need to check and merge with tables in the wc_zone as well */
1766 	if (priv->ct_zone_wc)
1767 		nfp_ct_merge_tc_entries(ct_entry, priv->ct_zone_wc, zt);
1768 
1769 	return 0;
1770 }
1771 
1772 int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
1773 			     struct net_device *netdev,
1774 			     struct flow_cls_offload *flow,
1775 			     struct netlink_ext_ack *extack)
1776 {
1777 	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
1778 	struct nfp_fl_ct_flow_entry *ct_entry;
1779 	struct nfp_fl_ct_zone_entry *zt;
1780 	bool wildcarded = false;
1781 	struct flow_match_ct ct;
1782 
1783 	flow_rule_match_ct(rule, &ct);
1784 	if (!ct.mask->ct_zone) {
1785 		wildcarded = true;
1786 	} else if (ct.mask->ct_zone != U16_MAX) {
1787 		NL_SET_ERR_MSG_MOD(extack,
1788 				   "unsupported offload: partially wildcarded ct_zone is not supported");
1789 		return -EOPNOTSUPP;
1790 	}
1791 
1792 	zt = get_nfp_zone_entry(priv, ct.key->ct_zone, wildcarded);
1793 	if (IS_ERR(zt)) {
1794 		NL_SET_ERR_MSG_MOD(extack,
1795 				   "offload error: Could not create zone table entry");
1796 		return PTR_ERR(zt);
1797 	}
1798 
1799 	/* Add entry to post_ct_list */
1800 	ct_entry = nfp_fl_ct_add_flow(zt, netdev, flow, false, extack);
1801 	if (IS_ERR(ct_entry))
1802 		return PTR_ERR(ct_entry);
1803 
1804 	ct_entry->type = CT_TYPE_POST_CT;
1805 	ct_entry->chain_index = flow->common.chain_index;
1806 	list_add(&ct_entry->list_node, &zt->post_ct_list);
1807 	zt->post_ct_count++;
1808 
1809 	if (wildcarded) {
1810 		/* Iterate through all zone tables if not empty, look for merges with
1811 		 * pre_ct entries and merge them.
1812 		 */
1813 		struct rhashtable_iter iter;
1814 		struct nfp_fl_ct_zone_entry *zone_table;
1815 
1816 		rhashtable_walk_enter(&priv->ct_zone_table, &iter);
1817 		rhashtable_walk_start(&iter);
1818 		while ((zone_table = rhashtable_walk_next(&iter)) != NULL) {
1819 			if (IS_ERR(zone_table))
1820 				continue;
1821 			rhashtable_walk_stop(&iter);
1822 			nfp_ct_merge_tc_entries(ct_entry, zone_table, zone_table);
1823 			rhashtable_walk_start(&iter);
1824 		}
1825 		rhashtable_walk_stop(&iter);
1826 		rhashtable_walk_exit(&iter);
1827 	} else {
1828 		nfp_ct_merge_tc_entries(ct_entry, zt, zt);
1829 	}
1830 
1831 	return 0;
1832 }
1833 
1834 static void
1835 nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge,
1836 		    enum ct_entry_type type, u64 *m_pkts,
1837 		    u64 *m_bytes, u64 *m_used)
1838 {
1839 	struct nfp_flower_priv *priv = nft_merge->zt->priv;
1840 	struct nfp_fl_payload *nfp_flow;
1841 	u32 ctx_id;
1842 
1843 	nfp_flow = nft_merge->flow_pay;
1844 	if (!nfp_flow)
1845 		return;
1846 
1847 	ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
1848 	*m_pkts += priv->stats[ctx_id].pkts;
1849 	*m_bytes += priv->stats[ctx_id].bytes;
1850 	*m_used = max_t(u64, *m_used, priv->stats[ctx_id].used);
1851 
1852 	/* If request is for a sub_flow which is part of a tunnel merged
1853 	 * flow then update stats from tunnel merged flows first.
1854 	 */
1855 	if (!list_empty(&nfp_flow->linked_flows))
1856 		nfp_flower_update_merge_stats(priv->app, nfp_flow);
1857 
1858 	if (type != CT_TYPE_NFT) {
1859 		/* Update nft cached stats */
1860 		flow_stats_update(&nft_merge->nft_parent->stats,
1861 				  priv->stats[ctx_id].bytes,
1862 				  priv->stats[ctx_id].pkts,
1863 				  0, priv->stats[ctx_id].used,
1864 				  FLOW_ACTION_HW_STATS_DELAYED);
1865 	} else {
1866 		/* Update pre_ct cached stats */
1867 		flow_stats_update(&nft_merge->tc_m_parent->pre_ct_parent->stats,
1868 				  priv->stats[ctx_id].bytes,
1869 				  priv->stats[ctx_id].pkts,
1870 				  0, priv->stats[ctx_id].used,
1871 				  FLOW_ACTION_HW_STATS_DELAYED);
1872 		/* Update post_ct cached stats */
1873 		flow_stats_update(&nft_merge->tc_m_parent->post_ct_parent->stats,
1874 				  priv->stats[ctx_id].bytes,
1875 				  priv->stats[ctx_id].pkts,
1876 				  0, priv->stats[ctx_id].used,
1877 				  FLOW_ACTION_HW_STATS_DELAYED);
1878 	}
1879 	/* Reset stats from the nfp */
1880 	priv->stats[ctx_id].pkts = 0;
1881 	priv->stats[ctx_id].bytes = 0;
1882 }
1883 
1884 int nfp_fl_ct_stats(struct flow_cls_offload *flow,
1885 		    struct nfp_fl_ct_map_entry *ct_map_ent)
1886 {
1887 	struct nfp_fl_ct_flow_entry *ct_entry = ct_map_ent->ct_entry;
1888 	struct nfp_fl_nft_tc_merge *nft_merge, *nft_m_tmp;
1889 	struct nfp_fl_ct_tc_merge *tc_merge, *tc_m_tmp;
1890 
1891 	u64 pkts = 0, bytes = 0, used = 0;
1892 	u64 m_pkts, m_bytes, m_used;
1893 
1894 	spin_lock_bh(&ct_entry->zt->priv->stats_lock);
1895 
1896 	if (ct_entry->type == CT_TYPE_PRE_CT) {
1897 		/* Iterate tc_merge entries associated with this flow */
1898 		list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children,
1899 					 pre_ct_list) {
1900 			m_pkts = 0;
1901 			m_bytes = 0;
1902 			m_used = 0;
1903 			/* Iterate nft_merge entries associated with this tc_merge flow */
1904 			list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children,
1905 						 tc_merge_list) {
1906 				nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_PRE_CT,
1907 						    &m_pkts, &m_bytes, &m_used);
1908 			}
1909 			pkts += m_pkts;
1910 			bytes += m_bytes;
1911 			used = max_t(u64, used, m_used);
1912 			/* Update post_ct partner */
1913 			flow_stats_update(&tc_merge->post_ct_parent->stats,
1914 					  m_bytes, m_pkts, 0, m_used,
1915 					  FLOW_ACTION_HW_STATS_DELAYED);
1916 		}
1917 	} else if (ct_entry->type == CT_TYPE_POST_CT) {
1918 		/* Iterate tc_merge entries associated with this flow */
1919 		list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children,
1920 					 post_ct_list) {
1921 			m_pkts = 0;
1922 			m_bytes = 0;
1923 			m_used = 0;
1924 			/* Iterate nft_merge entries associated with this tc_merge flow */
1925 			list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children,
1926 						 tc_merge_list) {
1927 				nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_POST_CT,
1928 						    &m_pkts, &m_bytes, &m_used);
1929 			}
1930 			pkts += m_pkts;
1931 			bytes += m_bytes;
1932 			used = max_t(u64, used, m_used);
1933 			/* Update pre_ct partner */
1934 			flow_stats_update(&tc_merge->pre_ct_parent->stats,
1935 					  m_bytes, m_pkts, 0, m_used,
1936 					  FLOW_ACTION_HW_STATS_DELAYED);
1937 		}
1938 	} else  {
1939 		/* Iterate nft_merge entries associated with this nft flow */
1940 		list_for_each_entry_safe(nft_merge, nft_m_tmp, &ct_entry->children,
1941 					 nft_flow_list) {
1942 			nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_NFT,
1943 					    &pkts, &bytes, &used);
1944 		}
1945 	}
1946 
1947 	/* Add stats from this request to stats potentially cached by
1948 	 * previous requests.
1949 	 */
1950 	flow_stats_update(&ct_entry->stats, bytes, pkts, 0, used,
1951 			  FLOW_ACTION_HW_STATS_DELAYED);
1952 	/* Finally update the flow stats from the original stats request */
1953 	flow_stats_update(&flow->stats, ct_entry->stats.bytes,
1954 			  ct_entry->stats.pkts, 0,
1955 			  ct_entry->stats.lastused,
1956 			  FLOW_ACTION_HW_STATS_DELAYED);
1957 	/* Stats has been synced to original flow, can now clear
1958 	 * the cache.
1959 	 */
1960 	ct_entry->stats.pkts = 0;
1961 	ct_entry->stats.bytes = 0;
1962 	spin_unlock_bh(&ct_entry->zt->priv->stats_lock);
1963 
1964 	return 0;
1965 }
1966 
1967 static int
1968 nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow)
1969 {
1970 	struct nfp_fl_ct_map_entry *ct_map_ent;
1971 	struct nfp_fl_ct_flow_entry *ct_entry;
1972 	struct netlink_ext_ack *extack = NULL;
1973 
1974 	ASSERT_RTNL();
1975 
1976 	extack = flow->common.extack;
1977 	switch (flow->command) {
1978 	case FLOW_CLS_REPLACE:
1979 		/* Netfilter can request offload multiple times for the same
1980 		 * flow - protect against adding duplicates.
1981 		 */
1982 		ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie,
1983 						    nfp_ct_map_params);
1984 		if (!ct_map_ent) {
1985 			ct_entry = nfp_fl_ct_add_flow(zt, NULL, flow, true, extack);
1986 			if (IS_ERR(ct_entry))
1987 				return PTR_ERR(ct_entry);
1988 			ct_entry->type = CT_TYPE_NFT;
1989 			list_add(&ct_entry->list_node, &zt->nft_flows_list);
1990 			zt->nft_flows_count++;
1991 			nfp_ct_merge_nft_with_tc(ct_entry, zt);
1992 		}
1993 		return 0;
1994 	case FLOW_CLS_DESTROY:
1995 		ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie,
1996 						    nfp_ct_map_params);
1997 		return nfp_fl_ct_del_flow(ct_map_ent);
1998 	case FLOW_CLS_STATS:
1999 		ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie,
2000 						    nfp_ct_map_params);
2001 		if (ct_map_ent)
2002 			return nfp_fl_ct_stats(flow, ct_map_ent);
2003 		break;
2004 	default:
2005 		break;
2006 	}
2007 	return -EINVAL;
2008 }
2009 
2010 int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data, void *cb_priv)
2011 {
2012 	struct flow_cls_offload *flow = type_data;
2013 	struct nfp_fl_ct_zone_entry *zt = cb_priv;
2014 	int err = -EOPNOTSUPP;
2015 
2016 	switch (type) {
2017 	case TC_SETUP_CLSFLOWER:
2018 		rtnl_lock();
2019 		err = nfp_fl_ct_offload_nft_flow(zt, flow);
2020 		rtnl_unlock();
2021 		break;
2022 	default:
2023 		return -EOPNOTSUPP;
2024 	}
2025 	return err;
2026 }
2027 
2028 static void
2029 nfp_fl_ct_clean_nft_entries(struct nfp_fl_ct_zone_entry *zt)
2030 {
2031 	struct nfp_fl_ct_flow_entry *nft_entry, *ct_tmp;
2032 	struct nfp_fl_ct_map_entry *ct_map_ent;
2033 
2034 	list_for_each_entry_safe(nft_entry, ct_tmp, &zt->nft_flows_list,
2035 				 list_node) {
2036 		ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table,
2037 						    &nft_entry->cookie,
2038 						    nfp_ct_map_params);
2039 		nfp_fl_ct_del_flow(ct_map_ent);
2040 	}
2041 }
2042 
2043 int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent)
2044 {
2045 	struct nfp_fl_ct_flow_entry *ct_entry;
2046 	struct nfp_fl_ct_zone_entry *zt;
2047 	struct rhashtable *m_table;
2048 
2049 	if (!ct_map_ent)
2050 		return -ENOENT;
2051 
2052 	zt = ct_map_ent->ct_entry->zt;
2053 	ct_entry = ct_map_ent->ct_entry;
2054 	m_table = &zt->priv->ct_map_table;
2055 
2056 	switch (ct_entry->type) {
2057 	case CT_TYPE_PRE_CT:
2058 		zt->pre_ct_count--;
2059 		rhashtable_remove_fast(m_table, &ct_map_ent->hash_node,
2060 				       nfp_ct_map_params);
2061 		nfp_fl_ct_clean_flow_entry(ct_entry);
2062 		kfree(ct_map_ent);
2063 
2064 		if (!zt->pre_ct_count) {
2065 			zt->nft = NULL;
2066 			nfp_fl_ct_clean_nft_entries(zt);
2067 		}
2068 		break;
2069 	case CT_TYPE_POST_CT:
2070 		zt->post_ct_count--;
2071 		rhashtable_remove_fast(m_table, &ct_map_ent->hash_node,
2072 				       nfp_ct_map_params);
2073 		nfp_fl_ct_clean_flow_entry(ct_entry);
2074 		kfree(ct_map_ent);
2075 		break;
2076 	case CT_TYPE_NFT:
2077 		zt->nft_flows_count--;
2078 		rhashtable_remove_fast(m_table, &ct_map_ent->hash_node,
2079 				       nfp_ct_map_params);
2080 		nfp_fl_ct_clean_flow_entry(ct_map_ent->ct_entry);
2081 		kfree(ct_map_ent);
2082 		break;
2083 	default:
2084 		break;
2085 	}
2086 
2087 	return 0;
2088 }
2089