1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17 
18 #include "lib/fs_chains.h"
19 #include "en/tc_ct.h"
20 #include "en/mod_hdr.h"
21 #include "en/mapping.h"
22 #include "en.h"
23 #include "en_tc.h"
24 #include "en_rep.h"
25 
26 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen)
27 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
28 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
29 #define MLX5_CT_STATE_TRK_BIT BIT(2)
30 #define MLX5_CT_STATE_NAT_BIT BIT(3)
31 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
32 #define MLX5_CT_STATE_RELATED_BIT BIT(5)
33 #define MLX5_CT_STATE_INVALID_BIT BIT(6)
34 
35 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen)
36 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
37 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
38 
39 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
40 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
41 
42 #define ct_dbg(fmt, args...)\
43 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
44 
45 struct mlx5_tc_ct_priv {
46 	struct mlx5_core_dev *dev;
47 	const struct net_device *netdev;
48 	struct mod_hdr_tbl *mod_hdr_tbl;
49 	struct idr fte_ids;
50 	struct xarray tuple_ids;
51 	struct rhashtable zone_ht;
52 	struct rhashtable ct_tuples_ht;
53 	struct rhashtable ct_tuples_nat_ht;
54 	struct mlx5_flow_table *ct;
55 	struct mlx5_flow_table *ct_nat;
56 	struct mlx5_flow_table *post_ct;
57 	struct mutex control_lock; /* guards parallel adds/dels */
58 	struct mapping_ctx *zone_mapping;
59 	struct mapping_ctx *labels_mapping;
60 	enum mlx5_flow_namespace_type ns_type;
61 	struct mlx5_fs_chains *chains;
62 	spinlock_t ht_lock; /* protects ft entries */
63 };
64 
65 struct mlx5_ct_flow {
66 	struct mlx5_flow_attr *pre_ct_attr;
67 	struct mlx5_flow_attr *post_ct_attr;
68 	struct mlx5_flow_handle *pre_ct_rule;
69 	struct mlx5_flow_handle *post_ct_rule;
70 	struct mlx5_ct_ft *ft;
71 	u32 fte_id;
72 	u32 chain_mapping;
73 };
74 
75 struct mlx5_ct_zone_rule {
76 	struct mlx5_flow_handle *rule;
77 	struct mlx5e_mod_hdr_handle *mh;
78 	struct mlx5_flow_attr *attr;
79 	bool nat;
80 };
81 
82 struct mlx5_tc_ct_pre {
83 	struct mlx5_flow_table *ft;
84 	struct mlx5_flow_group *flow_grp;
85 	struct mlx5_flow_group *miss_grp;
86 	struct mlx5_flow_handle *flow_rule;
87 	struct mlx5_flow_handle *miss_rule;
88 	struct mlx5_modify_hdr *modify_hdr;
89 };
90 
91 struct mlx5_ct_ft {
92 	struct rhash_head node;
93 	u16 zone;
94 	u32 zone_restore_id;
95 	refcount_t refcount;
96 	struct nf_flowtable *nf_ft;
97 	struct mlx5_tc_ct_priv *ct_priv;
98 	struct rhashtable ct_entries_ht;
99 	struct mlx5_tc_ct_pre pre_ct;
100 	struct mlx5_tc_ct_pre pre_ct_nat;
101 };
102 
103 struct mlx5_ct_tuple {
104 	u16 addr_type;
105 	__be16 n_proto;
106 	u8 ip_proto;
107 	struct {
108 		union {
109 			__be32 src_v4;
110 			struct in6_addr src_v6;
111 		};
112 		union {
113 			__be32 dst_v4;
114 			struct in6_addr dst_v6;
115 		};
116 	} ip;
117 	struct {
118 		__be16 src;
119 		__be16 dst;
120 	} port;
121 
122 	u16 zone;
123 };
124 
125 struct mlx5_ct_counter {
126 	struct mlx5_fc *counter;
127 	refcount_t refcount;
128 	bool is_shared;
129 };
130 
131 enum {
132 	MLX5_CT_ENTRY_FLAG_VALID,
133 };
134 
135 struct mlx5_ct_entry {
136 	struct rhash_head node;
137 	struct rhash_head tuple_node;
138 	struct rhash_head tuple_nat_node;
139 	struct mlx5_ct_counter *counter;
140 	unsigned long cookie;
141 	unsigned long restore_cookie;
142 	struct mlx5_ct_tuple tuple;
143 	struct mlx5_ct_tuple tuple_nat;
144 	struct mlx5_ct_zone_rule zone_rules[2];
145 
146 	struct mlx5_tc_ct_priv *ct_priv;
147 	struct work_struct work;
148 
149 	refcount_t refcnt;
150 	unsigned long flags;
151 };
152 
153 static void
154 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
155 				 struct mlx5_flow_attr *attr,
156 				 struct mlx5e_mod_hdr_handle *mh);
157 
158 static const struct rhashtable_params cts_ht_params = {
159 	.head_offset = offsetof(struct mlx5_ct_entry, node),
160 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
161 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
162 	.automatic_shrinking = true,
163 	.min_size = 16 * 1024,
164 };
165 
166 static const struct rhashtable_params zone_params = {
167 	.head_offset = offsetof(struct mlx5_ct_ft, node),
168 	.key_offset = offsetof(struct mlx5_ct_ft, zone),
169 	.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
170 	.automatic_shrinking = true,
171 };
172 
173 static const struct rhashtable_params tuples_ht_params = {
174 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
175 	.key_offset = offsetof(struct mlx5_ct_entry, tuple),
176 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
177 	.automatic_shrinking = true,
178 	.min_size = 16 * 1024,
179 };
180 
181 static const struct rhashtable_params tuples_nat_ht_params = {
182 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
183 	.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
184 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
185 	.automatic_shrinking = true,
186 	.min_size = 16 * 1024,
187 };
188 
189 static bool
190 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
191 {
192 	return !!(entry->tuple_nat_node.next);
193 }
194 
195 static int
196 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
197 		       u32 *labels, u32 *id)
198 {
199 	if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
200 		*id = 0;
201 		return 0;
202 	}
203 
204 	if (mapping_add(ct_priv->labels_mapping, labels, id))
205 		return -EOPNOTSUPP;
206 
207 	return 0;
208 }
209 
210 static void
211 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
212 {
213 	if (id)
214 		mapping_remove(ct_priv->labels_mapping, id);
215 }
216 
217 static int
218 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
219 {
220 	struct flow_match_control control;
221 	struct flow_match_basic basic;
222 
223 	flow_rule_match_basic(rule, &basic);
224 	flow_rule_match_control(rule, &control);
225 
226 	tuple->n_proto = basic.key->n_proto;
227 	tuple->ip_proto = basic.key->ip_proto;
228 	tuple->addr_type = control.key->addr_type;
229 
230 	if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
231 		struct flow_match_ipv4_addrs match;
232 
233 		flow_rule_match_ipv4_addrs(rule, &match);
234 		tuple->ip.src_v4 = match.key->src;
235 		tuple->ip.dst_v4 = match.key->dst;
236 	} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
237 		struct flow_match_ipv6_addrs match;
238 
239 		flow_rule_match_ipv6_addrs(rule, &match);
240 		tuple->ip.src_v6 = match.key->src;
241 		tuple->ip.dst_v6 = match.key->dst;
242 	} else {
243 		return -EOPNOTSUPP;
244 	}
245 
246 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
247 		struct flow_match_ports match;
248 
249 		flow_rule_match_ports(rule, &match);
250 		switch (tuple->ip_proto) {
251 		case IPPROTO_TCP:
252 		case IPPROTO_UDP:
253 			tuple->port.src = match.key->src;
254 			tuple->port.dst = match.key->dst;
255 			break;
256 		default:
257 			return -EOPNOTSUPP;
258 		}
259 	} else {
260 		return -EOPNOTSUPP;
261 	}
262 
263 	return 0;
264 }
265 
266 static int
267 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
268 			     struct flow_rule *rule)
269 {
270 	struct flow_action *flow_action = &rule->action;
271 	struct flow_action_entry *act;
272 	u32 offset, val, ip6_offset;
273 	int i;
274 
275 	flow_action_for_each(i, act, flow_action) {
276 		if (act->id != FLOW_ACTION_MANGLE)
277 			continue;
278 
279 		offset = act->mangle.offset;
280 		val = act->mangle.val;
281 		switch (act->mangle.htype) {
282 		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
283 			if (offset == offsetof(struct iphdr, saddr))
284 				tuple->ip.src_v4 = cpu_to_be32(val);
285 			else if (offset == offsetof(struct iphdr, daddr))
286 				tuple->ip.dst_v4 = cpu_to_be32(val);
287 			else
288 				return -EOPNOTSUPP;
289 			break;
290 
291 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
292 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
293 			ip6_offset /= 4;
294 			if (ip6_offset < 4)
295 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
296 			else if (ip6_offset < 8)
297 				tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
298 			else
299 				return -EOPNOTSUPP;
300 			break;
301 
302 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
303 			if (offset == offsetof(struct tcphdr, source))
304 				tuple->port.src = cpu_to_be16(val);
305 			else if (offset == offsetof(struct tcphdr, dest))
306 				tuple->port.dst = cpu_to_be16(val);
307 			else
308 				return -EOPNOTSUPP;
309 			break;
310 
311 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
312 			if (offset == offsetof(struct udphdr, source))
313 				tuple->port.src = cpu_to_be16(val);
314 			else if (offset == offsetof(struct udphdr, dest))
315 				tuple->port.dst = cpu_to_be16(val);
316 			else
317 				return -EOPNOTSUPP;
318 			break;
319 
320 		default:
321 			return -EOPNOTSUPP;
322 		}
323 	}
324 
325 	return 0;
326 }
327 
328 static int
329 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
330 			   struct flow_rule *rule)
331 {
332 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
333 				       outer_headers);
334 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
335 				       outer_headers);
336 	u16 addr_type = 0;
337 	u8 ip_proto = 0;
338 
339 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
340 		struct flow_match_basic match;
341 
342 		flow_rule_match_basic(rule, &match);
343 
344 		mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
345 				       headers_v);
346 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
347 			 match.mask->ip_proto);
348 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
349 			 match.key->ip_proto);
350 
351 		ip_proto = match.key->ip_proto;
352 	}
353 
354 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
355 		struct flow_match_control match;
356 
357 		flow_rule_match_control(rule, &match);
358 		addr_type = match.key->addr_type;
359 	}
360 
361 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
362 		struct flow_match_ipv4_addrs match;
363 
364 		flow_rule_match_ipv4_addrs(rule, &match);
365 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
366 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
367 		       &match.mask->src, sizeof(match.mask->src));
368 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
369 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
370 		       &match.key->src, sizeof(match.key->src));
371 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
372 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
373 		       &match.mask->dst, sizeof(match.mask->dst));
374 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
375 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
376 		       &match.key->dst, sizeof(match.key->dst));
377 	}
378 
379 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
380 		struct flow_match_ipv6_addrs match;
381 
382 		flow_rule_match_ipv6_addrs(rule, &match);
383 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
384 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
385 		       &match.mask->src, sizeof(match.mask->src));
386 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
387 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
388 		       &match.key->src, sizeof(match.key->src));
389 
390 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
391 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
392 		       &match.mask->dst, sizeof(match.mask->dst));
393 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
394 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
395 		       &match.key->dst, sizeof(match.key->dst));
396 	}
397 
398 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
399 		struct flow_match_ports match;
400 
401 		flow_rule_match_ports(rule, &match);
402 		switch (ip_proto) {
403 		case IPPROTO_TCP:
404 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
405 				 tcp_sport, ntohs(match.mask->src));
406 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
407 				 tcp_sport, ntohs(match.key->src));
408 
409 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
410 				 tcp_dport, ntohs(match.mask->dst));
411 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
412 				 tcp_dport, ntohs(match.key->dst));
413 			break;
414 
415 		case IPPROTO_UDP:
416 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
417 				 udp_sport, ntohs(match.mask->src));
418 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
419 				 udp_sport, ntohs(match.key->src));
420 
421 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
422 				 udp_dport, ntohs(match.mask->dst));
423 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
424 				 udp_dport, ntohs(match.key->dst));
425 			break;
426 		default:
427 			break;
428 		}
429 	}
430 
431 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
432 		struct flow_match_tcp match;
433 
434 		flow_rule_match_tcp(rule, &match);
435 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
436 			 ntohs(match.mask->flags));
437 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
438 			 ntohs(match.key->flags));
439 	}
440 
441 	return 0;
442 }
443 
444 static void
445 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
446 {
447 	if (entry->counter->is_shared &&
448 	    !refcount_dec_and_test(&entry->counter->refcount))
449 		return;
450 
451 	mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
452 	kfree(entry->counter);
453 }
454 
455 static void
456 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
457 			  struct mlx5_ct_entry *entry,
458 			  bool nat)
459 {
460 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
461 	struct mlx5_flow_attr *attr = zone_rule->attr;
462 
463 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
464 
465 	mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
466 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
467 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
468 	kfree(attr);
469 }
470 
471 static void
472 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
473 			   struct mlx5_ct_entry *entry)
474 {
475 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
476 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
477 }
478 
479 static struct flow_action_entry *
480 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
481 {
482 	struct flow_action *flow_action = &flow_rule->action;
483 	struct flow_action_entry *act;
484 	int i;
485 
486 	flow_action_for_each(i, act, flow_action) {
487 		if (act->id == FLOW_ACTION_CT_METADATA)
488 			return act;
489 	}
490 
491 	return NULL;
492 }
493 
494 static int
495 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
496 			       struct mlx5e_tc_mod_hdr_acts *mod_acts,
497 			       u8 ct_state,
498 			       u32 mark,
499 			       u32 labels_id,
500 			       u8 zone_restore_id)
501 {
502 	enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
503 	struct mlx5_core_dev *dev = ct_priv->dev;
504 	int err;
505 
506 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
507 					CTSTATE_TO_REG, ct_state);
508 	if (err)
509 		return err;
510 
511 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
512 					MARK_TO_REG, mark);
513 	if (err)
514 		return err;
515 
516 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
517 					LABELS_TO_REG, labels_id);
518 	if (err)
519 		return err;
520 
521 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
522 					ZONE_RESTORE_TO_REG, zone_restore_id);
523 	if (err)
524 		return err;
525 
526 	/* Make another copy of zone id in reg_b for
527 	 * NIC rx flows since we don't copy reg_c1 to
528 	 * reg_b upon miss.
529 	 */
530 	if (ns != MLX5_FLOW_NAMESPACE_FDB) {
531 		err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
532 						NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
533 		if (err)
534 			return err;
535 	}
536 	return 0;
537 }
538 
539 static int
540 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
541 				   char *modact)
542 {
543 	u32 offset = act->mangle.offset, field;
544 
545 	switch (act->mangle.htype) {
546 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
547 		MLX5_SET(set_action_in, modact, length, 0);
548 		if (offset == offsetof(struct iphdr, saddr))
549 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
550 		else if (offset == offsetof(struct iphdr, daddr))
551 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
552 		else
553 			return -EOPNOTSUPP;
554 		break;
555 
556 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
557 		MLX5_SET(set_action_in, modact, length, 0);
558 		if (offset == offsetof(struct ipv6hdr, saddr) + 12)
559 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
560 		else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
561 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
562 		else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
563 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
564 		else if (offset == offsetof(struct ipv6hdr, saddr))
565 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
566 		else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
567 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
568 		else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
569 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
570 		else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
571 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
572 		else if (offset == offsetof(struct ipv6hdr, daddr))
573 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
574 		else
575 			return -EOPNOTSUPP;
576 		break;
577 
578 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
579 		MLX5_SET(set_action_in, modact, length, 16);
580 		if (offset == offsetof(struct tcphdr, source))
581 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
582 		else if (offset == offsetof(struct tcphdr, dest))
583 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
584 		else
585 			return -EOPNOTSUPP;
586 		break;
587 
588 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
589 		MLX5_SET(set_action_in, modact, length, 16);
590 		if (offset == offsetof(struct udphdr, source))
591 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
592 		else if (offset == offsetof(struct udphdr, dest))
593 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
594 		else
595 			return -EOPNOTSUPP;
596 		break;
597 
598 	default:
599 		return -EOPNOTSUPP;
600 	}
601 
602 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
603 	MLX5_SET(set_action_in, modact, offset, 0);
604 	MLX5_SET(set_action_in, modact, field, field);
605 	MLX5_SET(set_action_in, modact, data, act->mangle.val);
606 
607 	return 0;
608 }
609 
610 static int
611 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
612 			    struct flow_rule *flow_rule,
613 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
614 {
615 	struct flow_action *flow_action = &flow_rule->action;
616 	struct mlx5_core_dev *mdev = ct_priv->dev;
617 	struct flow_action_entry *act;
618 	size_t action_size;
619 	char *modact;
620 	int err, i;
621 
622 	action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
623 
624 	flow_action_for_each(i, act, flow_action) {
625 		switch (act->id) {
626 		case FLOW_ACTION_MANGLE: {
627 			err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
628 						    mod_acts);
629 			if (err)
630 				return err;
631 
632 			modact = mod_acts->actions +
633 				 mod_acts->num_actions * action_size;
634 
635 			err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
636 			if (err)
637 				return err;
638 
639 			mod_acts->num_actions++;
640 		}
641 		break;
642 
643 		case FLOW_ACTION_CT_METADATA:
644 			/* Handled earlier */
645 			continue;
646 		default:
647 			return -EOPNOTSUPP;
648 		}
649 	}
650 
651 	return 0;
652 }
653 
654 static int
655 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
656 				struct mlx5_flow_attr *attr,
657 				struct flow_rule *flow_rule,
658 				struct mlx5e_mod_hdr_handle **mh,
659 				u8 zone_restore_id, bool nat)
660 {
661 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
662 	struct flow_action_entry *meta;
663 	u16 ct_state = 0;
664 	int err;
665 
666 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
667 	if (!meta)
668 		return -EOPNOTSUPP;
669 
670 	err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
671 				     &attr->ct_attr.ct_labels_id);
672 	if (err)
673 		return -EOPNOTSUPP;
674 	if (nat) {
675 		err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
676 						  &mod_acts);
677 		if (err)
678 			goto err_mapping;
679 
680 		ct_state |= MLX5_CT_STATE_NAT_BIT;
681 	}
682 
683 	ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
684 	ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
685 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
686 					     ct_state,
687 					     meta->ct_metadata.mark,
688 					     attr->ct_attr.ct_labels_id,
689 					     zone_restore_id);
690 	if (err)
691 		goto err_mapping;
692 
693 	if (nat) {
694 		attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
695 							    mod_acts.num_actions,
696 							    mod_acts.actions);
697 		if (IS_ERR(attr->modify_hdr)) {
698 			err = PTR_ERR(attr->modify_hdr);
699 			goto err_mapping;
700 		}
701 
702 		*mh = NULL;
703 	} else {
704 		*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
705 					   ct_priv->mod_hdr_tbl,
706 					   ct_priv->ns_type,
707 					   &mod_acts);
708 		if (IS_ERR(*mh)) {
709 			err = PTR_ERR(*mh);
710 			goto err_mapping;
711 		}
712 		attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
713 	}
714 
715 	dealloc_mod_hdr_actions(&mod_acts);
716 	return 0;
717 
718 err_mapping:
719 	dealloc_mod_hdr_actions(&mod_acts);
720 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
721 	return err;
722 }
723 
724 static void
725 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
726 				 struct mlx5_flow_attr *attr,
727 				 struct mlx5e_mod_hdr_handle *mh)
728 {
729 	if (mh)
730 		mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
731 	else
732 		mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
733 }
734 
735 static int
736 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
737 			  struct flow_rule *flow_rule,
738 			  struct mlx5_ct_entry *entry,
739 			  bool nat, u8 zone_restore_id)
740 {
741 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
742 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
743 	struct mlx5_flow_spec *spec = NULL;
744 	struct mlx5_flow_attr *attr;
745 	int err;
746 
747 	zone_rule->nat = nat;
748 
749 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
750 	if (!spec)
751 		return -ENOMEM;
752 
753 	attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
754 	if (!attr) {
755 		err = -ENOMEM;
756 		goto err_attr;
757 	}
758 
759 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
760 					      &zone_rule->mh,
761 					      zone_restore_id, nat);
762 	if (err) {
763 		ct_dbg("Failed to create ct entry mod hdr");
764 		goto err_mod_hdr;
765 	}
766 
767 	attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
768 		       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
769 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
770 	attr->dest_chain = 0;
771 	attr->dest_ft = ct_priv->post_ct;
772 	attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
773 	attr->outer_match_level = MLX5_MATCH_L4;
774 	attr->counter = entry->counter->counter;
775 	attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
776 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
777 		attr->esw_attr->in_mdev = priv->mdev;
778 
779 	mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
780 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
781 
782 	zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
783 	if (IS_ERR(zone_rule->rule)) {
784 		err = PTR_ERR(zone_rule->rule);
785 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
786 		goto err_rule;
787 	}
788 
789 	zone_rule->attr = attr;
790 
791 	kvfree(spec);
792 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
793 
794 	return 0;
795 
796 err_rule:
797 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
798 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
799 err_mod_hdr:
800 	kfree(attr);
801 err_attr:
802 	kvfree(spec);
803 	return err;
804 }
805 
806 static bool
807 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
808 {
809 	return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
810 }
811 
812 static struct mlx5_ct_entry *
813 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
814 {
815 	struct mlx5_ct_entry *entry;
816 
817 	entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
818 				       tuples_ht_params);
819 	if (entry && mlx5_tc_ct_entry_valid(entry) &&
820 	    refcount_inc_not_zero(&entry->refcnt)) {
821 		return entry;
822 	} else if (!entry) {
823 		entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
824 					       tuple, tuples_nat_ht_params);
825 		if (entry && mlx5_tc_ct_entry_valid(entry) &&
826 		    refcount_inc_not_zero(&entry->refcnt))
827 			return entry;
828 	}
829 
830 	return entry ? ERR_PTR(-EINVAL) : NULL;
831 }
832 
833 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
834 {
835 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
836 
837 	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
838 			       &entry->tuple_nat_node,
839 			       tuples_nat_ht_params);
840 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
841 			       tuples_ht_params);
842 }
843 
844 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
845 {
846 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
847 
848 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
849 
850 	spin_lock_bh(&ct_priv->ht_lock);
851 	mlx5_tc_ct_entry_remove_from_tuples(entry);
852 	spin_unlock_bh(&ct_priv->ht_lock);
853 
854 	mlx5_tc_ct_counter_put(ct_priv, entry);
855 	kfree(entry);
856 }
857 
858 static void
859 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
860 {
861 	if (!refcount_dec_and_test(&entry->refcnt))
862 		return;
863 
864 	mlx5_tc_ct_entry_del(entry);
865 }
866 
867 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
868 {
869 	struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
870 
871 	mlx5_tc_ct_entry_del(entry);
872 }
873 
874 static void
875 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
876 {
877 	struct mlx5e_priv *priv;
878 
879 	if (!refcount_dec_and_test(&entry->refcnt))
880 		return;
881 
882 	priv = netdev_priv(entry->ct_priv->netdev);
883 	INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
884 	queue_work(priv->wq, &entry->work);
885 }
886 
887 static struct mlx5_ct_counter *
888 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
889 {
890 	struct mlx5_ct_counter *counter;
891 	int ret;
892 
893 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
894 	if (!counter)
895 		return ERR_PTR(-ENOMEM);
896 
897 	counter->is_shared = false;
898 	counter->counter = mlx5_fc_create(ct_priv->dev, true);
899 	if (IS_ERR(counter->counter)) {
900 		ct_dbg("Failed to create counter for ct entry");
901 		ret = PTR_ERR(counter->counter);
902 		kfree(counter);
903 		return ERR_PTR(ret);
904 	}
905 
906 	return counter;
907 }
908 
909 static struct mlx5_ct_counter *
910 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
911 			      struct mlx5_ct_entry *entry)
912 {
913 	struct mlx5_ct_tuple rev_tuple = entry->tuple;
914 	struct mlx5_ct_counter *shared_counter;
915 	struct mlx5_ct_entry *rev_entry;
916 	__be16 tmp_port;
917 
918 	/* get the reversed tuple */
919 	tmp_port = rev_tuple.port.src;
920 	rev_tuple.port.src = rev_tuple.port.dst;
921 	rev_tuple.port.dst = tmp_port;
922 
923 	if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
924 		__be32 tmp_addr = rev_tuple.ip.src_v4;
925 
926 		rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
927 		rev_tuple.ip.dst_v4 = tmp_addr;
928 	} else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
929 		struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
930 
931 		rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
932 		rev_tuple.ip.dst_v6 = tmp_addr;
933 	} else {
934 		return ERR_PTR(-EOPNOTSUPP);
935 	}
936 
937 	/* Use the same counter as the reverse direction */
938 	spin_lock_bh(&ct_priv->ht_lock);
939 	rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
940 
941 	if (IS_ERR(rev_entry)) {
942 		spin_unlock_bh(&ct_priv->ht_lock);
943 		goto create_counter;
944 	}
945 
946 	if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
947 		ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
948 		shared_counter = rev_entry->counter;
949 		spin_unlock_bh(&ct_priv->ht_lock);
950 
951 		mlx5_tc_ct_entry_put(rev_entry);
952 		return shared_counter;
953 	}
954 
955 	spin_unlock_bh(&ct_priv->ht_lock);
956 
957 create_counter:
958 
959 	shared_counter = mlx5_tc_ct_counter_create(ct_priv);
960 	if (IS_ERR(shared_counter))
961 		return shared_counter;
962 
963 	shared_counter->is_shared = true;
964 	refcount_set(&shared_counter->refcount, 1);
965 	return shared_counter;
966 }
967 
968 static int
969 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
970 			   struct flow_rule *flow_rule,
971 			   struct mlx5_ct_entry *entry,
972 			   u8 zone_restore_id)
973 {
974 	int err;
975 
976 	if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
977 		entry->counter = mlx5_tc_ct_counter_create(ct_priv);
978 	else
979 		entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
980 
981 	if (IS_ERR(entry->counter)) {
982 		err = PTR_ERR(entry->counter);
983 		return err;
984 	}
985 
986 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
987 					zone_restore_id);
988 	if (err)
989 		goto err_orig;
990 
991 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
992 					zone_restore_id);
993 	if (err)
994 		goto err_nat;
995 
996 	return 0;
997 
998 err_nat:
999 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
1000 err_orig:
1001 	mlx5_tc_ct_counter_put(ct_priv, entry);
1002 	return err;
1003 }
1004 
1005 static int
1006 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
1007 				  struct flow_cls_offload *flow)
1008 {
1009 	struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
1010 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1011 	struct flow_action_entry *meta_action;
1012 	unsigned long cookie = flow->cookie;
1013 	struct mlx5_ct_entry *entry;
1014 	int err;
1015 
1016 	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
1017 	if (!meta_action)
1018 		return -EOPNOTSUPP;
1019 
1020 	spin_lock_bh(&ct_priv->ht_lock);
1021 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1022 	if (entry && refcount_inc_not_zero(&entry->refcnt)) {
1023 		spin_unlock_bh(&ct_priv->ht_lock);
1024 		mlx5_tc_ct_entry_put(entry);
1025 		return -EEXIST;
1026 	}
1027 	spin_unlock_bh(&ct_priv->ht_lock);
1028 
1029 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1030 	if (!entry)
1031 		return -ENOMEM;
1032 
1033 	entry->tuple.zone = ft->zone;
1034 	entry->cookie = flow->cookie;
1035 	entry->restore_cookie = meta_action->ct_metadata.cookie;
1036 	refcount_set(&entry->refcnt, 2);
1037 	entry->ct_priv = ct_priv;
1038 
1039 	err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1040 	if (err)
1041 		goto err_set;
1042 
1043 	memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1044 	err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1045 	if (err)
1046 		goto err_set;
1047 
1048 	spin_lock_bh(&ct_priv->ht_lock);
1049 
1050 	err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1051 					    cts_ht_params);
1052 	if (err)
1053 		goto err_entries;
1054 
1055 	err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1056 					    &entry->tuple_node,
1057 					    tuples_ht_params);
1058 	if (err)
1059 		goto err_tuple;
1060 
1061 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1062 		err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1063 						    &entry->tuple_nat_node,
1064 						    tuples_nat_ht_params);
1065 		if (err)
1066 			goto err_tuple_nat;
1067 	}
1068 	spin_unlock_bh(&ct_priv->ht_lock);
1069 
1070 	err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1071 					 ft->zone_restore_id);
1072 	if (err)
1073 		goto err_rules;
1074 
1075 	set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1076 	mlx5_tc_ct_entry_put(entry); /* this function reference */
1077 
1078 	return 0;
1079 
1080 err_rules:
1081 	spin_lock_bh(&ct_priv->ht_lock);
1082 	if (mlx5_tc_ct_entry_has_nat(entry))
1083 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1084 				       &entry->tuple_nat_node, tuples_nat_ht_params);
1085 err_tuple_nat:
1086 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1087 			       &entry->tuple_node,
1088 			       tuples_ht_params);
1089 err_tuple:
1090 	rhashtable_remove_fast(&ft->ct_entries_ht,
1091 			       &entry->node,
1092 			       cts_ht_params);
1093 err_entries:
1094 	spin_unlock_bh(&ct_priv->ht_lock);
1095 err_set:
1096 	kfree(entry);
1097 	if (err != -EEXIST)
1098 		netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1099 	return err;
1100 }
1101 
1102 static int
1103 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1104 				  struct flow_cls_offload *flow)
1105 {
1106 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1107 	unsigned long cookie = flow->cookie;
1108 	struct mlx5_ct_entry *entry;
1109 
1110 	spin_lock_bh(&ct_priv->ht_lock);
1111 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1112 	if (!entry) {
1113 		spin_unlock_bh(&ct_priv->ht_lock);
1114 		return -ENOENT;
1115 	}
1116 
1117 	if (!mlx5_tc_ct_entry_valid(entry)) {
1118 		spin_unlock_bh(&ct_priv->ht_lock);
1119 		return -EINVAL;
1120 	}
1121 
1122 	rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1123 	mlx5_tc_ct_entry_remove_from_tuples(entry);
1124 	spin_unlock_bh(&ct_priv->ht_lock);
1125 
1126 	mlx5_tc_ct_entry_put(entry);
1127 
1128 	return 0;
1129 }
1130 
1131 static int
1132 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1133 				    struct flow_cls_offload *f)
1134 {
1135 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1136 	unsigned long cookie = f->cookie;
1137 	struct mlx5_ct_entry *entry;
1138 	u64 lastuse, packets, bytes;
1139 
1140 	spin_lock_bh(&ct_priv->ht_lock);
1141 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1142 	if (!entry) {
1143 		spin_unlock_bh(&ct_priv->ht_lock);
1144 		return -ENOENT;
1145 	}
1146 
1147 	if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1148 		spin_unlock_bh(&ct_priv->ht_lock);
1149 		return -EINVAL;
1150 	}
1151 
1152 	spin_unlock_bh(&ct_priv->ht_lock);
1153 
1154 	mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1155 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1156 			  FLOW_ACTION_HW_STATS_DELAYED);
1157 
1158 	mlx5_tc_ct_entry_put(entry);
1159 	return 0;
1160 }
1161 
1162 static int
1163 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1164 			      void *cb_priv)
1165 {
1166 	struct flow_cls_offload *f = type_data;
1167 	struct mlx5_ct_ft *ft = cb_priv;
1168 
1169 	if (type != TC_SETUP_CLSFLOWER)
1170 		return -EOPNOTSUPP;
1171 
1172 	switch (f->command) {
1173 	case FLOW_CLS_REPLACE:
1174 		return mlx5_tc_ct_block_flow_offload_add(ft, f);
1175 	case FLOW_CLS_DESTROY:
1176 		return mlx5_tc_ct_block_flow_offload_del(ft, f);
1177 	case FLOW_CLS_STATS:
1178 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1179 	default:
1180 		break;
1181 	}
1182 
1183 	return -EOPNOTSUPP;
1184 }
1185 
1186 static bool
1187 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1188 			u16 zone)
1189 {
1190 	struct flow_keys flow_keys;
1191 
1192 	skb_reset_network_header(skb);
1193 	skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
1194 
1195 	tuple->zone = zone;
1196 
1197 	if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1198 	    flow_keys.basic.ip_proto != IPPROTO_UDP)
1199 		return false;
1200 
1201 	tuple->port.src = flow_keys.ports.src;
1202 	tuple->port.dst = flow_keys.ports.dst;
1203 	tuple->n_proto = flow_keys.basic.n_proto;
1204 	tuple->ip_proto = flow_keys.basic.ip_proto;
1205 
1206 	switch (flow_keys.basic.n_proto) {
1207 	case htons(ETH_P_IP):
1208 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1209 		tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1210 		tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1211 		break;
1212 
1213 	case htons(ETH_P_IPV6):
1214 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1215 		tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1216 		tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1217 		break;
1218 	default:
1219 		goto out;
1220 	}
1221 
1222 	return true;
1223 
1224 out:
1225 	return false;
1226 }
1227 
1228 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1229 {
1230 	u32 ctstate = 0, ctstate_mask = 0;
1231 
1232 	mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1233 					&ctstate, &ctstate_mask);
1234 
1235 	if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1236 		return -EOPNOTSUPP;
1237 
1238 	ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1239 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1240 				    ctstate, ctstate_mask);
1241 
1242 	return 0;
1243 }
1244 
1245 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1246 {
1247 	if (!priv || !ct_attr->ct_labels_id)
1248 		return;
1249 
1250 	mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1251 }
1252 
1253 int
1254 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1255 		     struct mlx5_flow_spec *spec,
1256 		     struct flow_cls_offload *f,
1257 		     struct mlx5_ct_attr *ct_attr,
1258 		     struct netlink_ext_ack *extack)
1259 {
1260 	bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
1261 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1262 	struct flow_dissector_key_ct *mask, *key;
1263 	u32 ctstate = 0, ctstate_mask = 0;
1264 	u16 ct_state_on, ct_state_off;
1265 	u16 ct_state, ct_state_mask;
1266 	struct flow_match_ct match;
1267 	u32 ct_labels[4];
1268 
1269 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1270 		return 0;
1271 
1272 	if (!priv) {
1273 		NL_SET_ERR_MSG_MOD(extack,
1274 				   "offload of ct matching isn't available");
1275 		return -EOPNOTSUPP;
1276 	}
1277 
1278 	flow_rule_match_ct(rule, &match);
1279 
1280 	key = match.key;
1281 	mask = match.mask;
1282 
1283 	ct_state = key->ct_state;
1284 	ct_state_mask = mask->ct_state;
1285 
1286 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1287 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1288 			      TCA_FLOWER_KEY_CT_FLAGS_NEW |
1289 			      TCA_FLOWER_KEY_CT_FLAGS_REPLY |
1290 			      TCA_FLOWER_KEY_CT_FLAGS_RELATED |
1291 			      TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
1292 		NL_SET_ERR_MSG_MOD(extack,
1293 				   "only ct_state trk, est, new and rpl are supported for offload");
1294 		return -EOPNOTSUPP;
1295 	}
1296 
1297 	ct_state_on = ct_state & ct_state_mask;
1298 	ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1299 	trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1300 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1301 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1302 	rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1303 	rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1304 	inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1305 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1306 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1307 	unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1308 	unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1309 	uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1310 
1311 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1312 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1313 	ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1314 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1315 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1316 	ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1317 	ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
1318 	ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
1319 
1320 	if (rel) {
1321 		NL_SET_ERR_MSG_MOD(extack,
1322 				   "matching on ct_state +rel isn't supported");
1323 		return -EOPNOTSUPP;
1324 	}
1325 
1326 	if (inv) {
1327 		NL_SET_ERR_MSG_MOD(extack,
1328 				   "matching on ct_state +inv isn't supported");
1329 		return -EOPNOTSUPP;
1330 	}
1331 
1332 	if (new) {
1333 		NL_SET_ERR_MSG_MOD(extack,
1334 				   "matching on ct_state +new isn't supported");
1335 		return -EOPNOTSUPP;
1336 	}
1337 
1338 	if (mask->ct_zone)
1339 		mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1340 					    key->ct_zone, MLX5_CT_ZONE_MASK);
1341 	if (ctstate_mask)
1342 		mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1343 					    ctstate, ctstate_mask);
1344 	if (mask->ct_mark)
1345 		mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1346 					    key->ct_mark, mask->ct_mark);
1347 	if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1348 	    mask->ct_labels[3]) {
1349 		ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1350 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1351 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1352 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1353 		if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1354 			return -EOPNOTSUPP;
1355 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1356 					    MLX5_CT_LABELS_MASK);
1357 	}
1358 
1359 	return 0;
1360 }
1361 
1362 int
1363 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1364 			struct mlx5_flow_attr *attr,
1365 			const struct flow_action_entry *act,
1366 			struct netlink_ext_ack *extack)
1367 {
1368 	if (!priv) {
1369 		NL_SET_ERR_MSG_MOD(extack,
1370 				   "offload of ct action isn't available");
1371 		return -EOPNOTSUPP;
1372 	}
1373 
1374 	attr->ct_attr.zone = act->ct.zone;
1375 	attr->ct_attr.ct_action = act->ct.action;
1376 	attr->ct_attr.nf_ft = act->ct.flow_table;
1377 
1378 	return 0;
1379 }
1380 
1381 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1382 				  struct mlx5_tc_ct_pre *pre_ct,
1383 				  bool nat)
1384 {
1385 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1386 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1387 	struct mlx5_core_dev *dev = ct_priv->dev;
1388 	struct mlx5_flow_table *ft = pre_ct->ft;
1389 	struct mlx5_flow_destination dest = {};
1390 	struct mlx5_flow_act flow_act = {};
1391 	struct mlx5_modify_hdr *mod_hdr;
1392 	struct mlx5_flow_handle *rule;
1393 	struct mlx5_flow_spec *spec;
1394 	u32 ctstate;
1395 	u16 zone;
1396 	int err;
1397 
1398 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1399 	if (!spec)
1400 		return -ENOMEM;
1401 
1402 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1403 	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1404 					ZONE_TO_REG, zone);
1405 	if (err) {
1406 		ct_dbg("Failed to set zone register mapping");
1407 		goto err_mapping;
1408 	}
1409 
1410 	mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1411 					   pre_mod_acts.num_actions,
1412 					   pre_mod_acts.actions);
1413 
1414 	if (IS_ERR(mod_hdr)) {
1415 		err = PTR_ERR(mod_hdr);
1416 		ct_dbg("Failed to create pre ct mod hdr");
1417 		goto err_mapping;
1418 	}
1419 	pre_ct->modify_hdr = mod_hdr;
1420 
1421 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1422 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1423 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1424 	flow_act.modify_hdr = mod_hdr;
1425 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1426 
1427 	/* add flow rule */
1428 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1429 				    zone, MLX5_CT_ZONE_MASK);
1430 	ctstate = MLX5_CT_STATE_TRK_BIT;
1431 	if (nat)
1432 		ctstate |= MLX5_CT_STATE_NAT_BIT;
1433 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1434 
1435 	dest.ft = ct_priv->post_ct;
1436 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1437 	if (IS_ERR(rule)) {
1438 		err = PTR_ERR(rule);
1439 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1440 		goto err_flow_rule;
1441 	}
1442 	pre_ct->flow_rule = rule;
1443 
1444 	/* add miss rule */
1445 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1446 	rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1447 	if (IS_ERR(rule)) {
1448 		err = PTR_ERR(rule);
1449 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1450 		goto err_miss_rule;
1451 	}
1452 	pre_ct->miss_rule = rule;
1453 
1454 	dealloc_mod_hdr_actions(&pre_mod_acts);
1455 	kvfree(spec);
1456 	return 0;
1457 
1458 err_miss_rule:
1459 	mlx5_del_flow_rules(pre_ct->flow_rule);
1460 err_flow_rule:
1461 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1462 err_mapping:
1463 	dealloc_mod_hdr_actions(&pre_mod_acts);
1464 	kvfree(spec);
1465 	return err;
1466 }
1467 
1468 static void
1469 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1470 		       struct mlx5_tc_ct_pre *pre_ct)
1471 {
1472 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1473 	struct mlx5_core_dev *dev = ct_priv->dev;
1474 
1475 	mlx5_del_flow_rules(pre_ct->flow_rule);
1476 	mlx5_del_flow_rules(pre_ct->miss_rule);
1477 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1478 }
1479 
1480 static int
1481 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1482 			struct mlx5_tc_ct_pre *pre_ct,
1483 			bool nat)
1484 {
1485 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1486 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1487 	struct mlx5_core_dev *dev = ct_priv->dev;
1488 	struct mlx5_flow_table_attr ft_attr = {};
1489 	struct mlx5_flow_namespace *ns;
1490 	struct mlx5_flow_table *ft;
1491 	struct mlx5_flow_group *g;
1492 	u32 metadata_reg_c_2_mask;
1493 	u32 *flow_group_in;
1494 	void *misc;
1495 	int err;
1496 
1497 	ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1498 	if (!ns) {
1499 		err = -EOPNOTSUPP;
1500 		ct_dbg("Failed to get flow namespace");
1501 		return err;
1502 	}
1503 
1504 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1505 	if (!flow_group_in)
1506 		return -ENOMEM;
1507 
1508 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1509 	ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1510 			FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1511 	ft_attr.max_fte = 2;
1512 	ft_attr.level = 1;
1513 	ft = mlx5_create_flow_table(ns, &ft_attr);
1514 	if (IS_ERR(ft)) {
1515 		err = PTR_ERR(ft);
1516 		ct_dbg("Failed to create pre ct table");
1517 		goto out_free;
1518 	}
1519 	pre_ct->ft = ft;
1520 
1521 	/* create flow group */
1522 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1523 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1524 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1525 		 MLX5_MATCH_MISC_PARAMETERS_2);
1526 
1527 	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1528 			    match_criteria.misc_parameters_2);
1529 
1530 	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1531 	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1532 	if (nat)
1533 		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1534 
1535 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1536 		 metadata_reg_c_2_mask);
1537 
1538 	g = mlx5_create_flow_group(ft, flow_group_in);
1539 	if (IS_ERR(g)) {
1540 		err = PTR_ERR(g);
1541 		ct_dbg("Failed to create pre ct group");
1542 		goto err_flow_grp;
1543 	}
1544 	pre_ct->flow_grp = g;
1545 
1546 	/* create miss group */
1547 	memset(flow_group_in, 0, inlen);
1548 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1549 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1550 	g = mlx5_create_flow_group(ft, flow_group_in);
1551 	if (IS_ERR(g)) {
1552 		err = PTR_ERR(g);
1553 		ct_dbg("Failed to create pre ct miss group");
1554 		goto err_miss_grp;
1555 	}
1556 	pre_ct->miss_grp = g;
1557 
1558 	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1559 	if (err)
1560 		goto err_add_rules;
1561 
1562 	kvfree(flow_group_in);
1563 	return 0;
1564 
1565 err_add_rules:
1566 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1567 err_miss_grp:
1568 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1569 err_flow_grp:
1570 	mlx5_destroy_flow_table(ft);
1571 out_free:
1572 	kvfree(flow_group_in);
1573 	return err;
1574 }
1575 
1576 static void
1577 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1578 		       struct mlx5_tc_ct_pre *pre_ct)
1579 {
1580 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1581 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1582 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1583 	mlx5_destroy_flow_table(pre_ct->ft);
1584 }
1585 
1586 static int
1587 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1588 {
1589 	int err;
1590 
1591 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1592 	if (err)
1593 		return err;
1594 
1595 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1596 	if (err)
1597 		goto err_pre_ct_nat;
1598 
1599 	return 0;
1600 
1601 err_pre_ct_nat:
1602 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1603 	return err;
1604 }
1605 
1606 static void
1607 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1608 {
1609 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1610 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1611 }
1612 
1613 /* To avoid false lock dependency warning set the ct_entries_ht lock
1614  * class different than the lock class of the ht being used when deleting
1615  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
1616  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
1617  * it's different than the ht->mutex here.
1618  */
1619 static struct lock_class_key ct_entries_ht_lock_key;
1620 
1621 static struct mlx5_ct_ft *
1622 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1623 		     struct nf_flowtable *nf_ft)
1624 {
1625 	struct mlx5_ct_ft *ft;
1626 	int err;
1627 
1628 	ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1629 	if (ft) {
1630 		refcount_inc(&ft->refcount);
1631 		return ft;
1632 	}
1633 
1634 	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1635 	if (!ft)
1636 		return ERR_PTR(-ENOMEM);
1637 
1638 	err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1639 	if (err)
1640 		goto err_mapping;
1641 
1642 	ft->zone = zone;
1643 	ft->nf_ft = nf_ft;
1644 	ft->ct_priv = ct_priv;
1645 	refcount_set(&ft->refcount, 1);
1646 
1647 	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1648 	if (err)
1649 		goto err_alloc_pre_ct;
1650 
1651 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1652 	if (err)
1653 		goto err_init;
1654 
1655 	lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
1656 
1657 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1658 				     zone_params);
1659 	if (err)
1660 		goto err_insert;
1661 
1662 	err = nf_flow_table_offload_add_cb(ft->nf_ft,
1663 					   mlx5_tc_ct_block_flow_offload, ft);
1664 	if (err)
1665 		goto err_add_cb;
1666 
1667 	return ft;
1668 
1669 err_add_cb:
1670 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1671 err_insert:
1672 	rhashtable_destroy(&ft->ct_entries_ht);
1673 err_init:
1674 	mlx5_tc_ct_free_pre_ct_tables(ft);
1675 err_alloc_pre_ct:
1676 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1677 err_mapping:
1678 	kfree(ft);
1679 	return ERR_PTR(err);
1680 }
1681 
1682 static void
1683 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1684 {
1685 	struct mlx5_ct_entry *entry = ptr;
1686 
1687 	mlx5_tc_ct_entry_put(entry);
1688 }
1689 
1690 static void
1691 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1692 {
1693 	if (!refcount_dec_and_test(&ft->refcount))
1694 		return;
1695 
1696 	nf_flow_table_offload_del_cb(ft->nf_ft,
1697 				     mlx5_tc_ct_block_flow_offload, ft);
1698 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1699 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
1700 				    mlx5_tc_ct_flush_ft_entry,
1701 				    ct_priv);
1702 	mlx5_tc_ct_free_pre_ct_tables(ft);
1703 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1704 	kfree(ft);
1705 }
1706 
1707 /* We translate the tc filter with CT action to the following HW model:
1708  *
1709  * +---------------------+
1710  * + ft prio (tc chain) +
1711  * + original match      +
1712  * +---------------------+
1713  *      | set chain miss mapping
1714  *      | set fte_id
1715  *      | set tunnel_id
1716  *      | do decap
1717  *      v
1718  * +---------------------+
1719  * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1720  * + zone+nat match      +---------------->+ post_ct (see below) +
1721  * +---------------------+  set zone       +---------------------+
1722  *      | set zone
1723  *      v
1724  * +--------------------+
1725  * + CT (nat or no nat) +
1726  * + tuple + zone match +
1727  * +--------------------+
1728  *      | set mark
1729  *      | set labels_id
1730  *      | set established
1731  *	| set zone_restore
1732  *      | do nat (if needed)
1733  *      v
1734  * +--------------+
1735  * + post_ct      + original filter actions
1736  * + fte_id match +------------------------>
1737  * +--------------+
1738  */
1739 static struct mlx5_flow_handle *
1740 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1741 			  struct mlx5e_tc_flow *flow,
1742 			  struct mlx5_flow_spec *orig_spec,
1743 			  struct mlx5_flow_attr *attr)
1744 {
1745 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1746 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1747 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1748 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1749 	struct mlx5_flow_spec *post_ct_spec = NULL;
1750 	struct mlx5_flow_attr *pre_ct_attr;
1751 	struct mlx5_modify_hdr *mod_hdr;
1752 	struct mlx5_flow_handle *rule;
1753 	struct mlx5_ct_flow *ct_flow;
1754 	int chain_mapping = 0, err;
1755 	struct mlx5_ct_ft *ft;
1756 	u32 fte_id = 1;
1757 
1758 	post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1759 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1760 	if (!post_ct_spec || !ct_flow) {
1761 		kvfree(post_ct_spec);
1762 		kfree(ct_flow);
1763 		return ERR_PTR(-ENOMEM);
1764 	}
1765 
1766 	/* Register for CT established events */
1767 	ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1768 				  attr->ct_attr.nf_ft);
1769 	if (IS_ERR(ft)) {
1770 		err = PTR_ERR(ft);
1771 		ct_dbg("Failed to register to ft callback");
1772 		goto err_ft;
1773 	}
1774 	ct_flow->ft = ft;
1775 
1776 	err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1777 			    MLX5_FTE_ID_MAX, GFP_KERNEL);
1778 	if (err) {
1779 		netdev_warn(priv->netdev,
1780 			    "Failed to allocate fte id, err: %d\n", err);
1781 		goto err_idr;
1782 	}
1783 	ct_flow->fte_id = fte_id;
1784 
1785 	/* Base flow attributes of both rules on original rule attribute */
1786 	ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1787 	if (!ct_flow->pre_ct_attr) {
1788 		err = -ENOMEM;
1789 		goto err_alloc_pre;
1790 	}
1791 
1792 	ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1793 	if (!ct_flow->post_ct_attr) {
1794 		err = -ENOMEM;
1795 		goto err_alloc_post;
1796 	}
1797 
1798 	pre_ct_attr = ct_flow->pre_ct_attr;
1799 	memcpy(pre_ct_attr, attr, attr_sz);
1800 	memcpy(ct_flow->post_ct_attr, attr, attr_sz);
1801 
1802 	/* Modify the original rule's action to fwd and modify, leave decap */
1803 	pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1804 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1805 			       MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1806 
1807 	/* Write chain miss tag for miss in ct table as we
1808 	 * don't go though all prios of this chain as normal tc rules
1809 	 * miss.
1810 	 */
1811 	err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1812 					    &chain_mapping);
1813 	if (err) {
1814 		ct_dbg("Failed to get chain register mapping for chain");
1815 		goto err_get_chain;
1816 	}
1817 	ct_flow->chain_mapping = chain_mapping;
1818 
1819 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1820 					CHAIN_TO_REG, chain_mapping);
1821 	if (err) {
1822 		ct_dbg("Failed to set chain register mapping");
1823 		goto err_mapping;
1824 	}
1825 
1826 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1827 					FTEID_TO_REG, fte_id);
1828 	if (err) {
1829 		ct_dbg("Failed to set fte_id register mapping");
1830 		goto err_mapping;
1831 	}
1832 
1833 	/* If original flow is decap, we do it before going into ct table
1834 	 * so add a rewrite for the tunnel match_id.
1835 	 */
1836 	if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1837 	    attr->chain == 0) {
1838 		u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1839 
1840 		err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1841 						ct_priv->ns_type,
1842 						TUNNEL_TO_REG,
1843 						tun_id);
1844 		if (err) {
1845 			ct_dbg("Failed to set tunnel register mapping");
1846 			goto err_mapping;
1847 		}
1848 	}
1849 
1850 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1851 					   pre_mod_acts.num_actions,
1852 					   pre_mod_acts.actions);
1853 	if (IS_ERR(mod_hdr)) {
1854 		err = PTR_ERR(mod_hdr);
1855 		ct_dbg("Failed to create pre ct mod hdr");
1856 		goto err_mapping;
1857 	}
1858 	pre_ct_attr->modify_hdr = mod_hdr;
1859 
1860 	/* Post ct rule matches on fte_id and executes original rule's
1861 	 * tc rule action
1862 	 */
1863 	mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1864 				    fte_id, MLX5_FTE_ID_MASK);
1865 
1866 	/* Put post_ct rule on post_ct flow table */
1867 	ct_flow->post_ct_attr->chain = 0;
1868 	ct_flow->post_ct_attr->prio = 0;
1869 	ct_flow->post_ct_attr->ft = ct_priv->post_ct;
1870 
1871 	/* Splits were handled before CT */
1872 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
1873 		ct_flow->post_ct_attr->esw_attr->split_count = 0;
1874 
1875 	ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
1876 	ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
1877 	ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1878 	rule = mlx5_tc_rule_insert(priv, post_ct_spec,
1879 				   ct_flow->post_ct_attr);
1880 	ct_flow->post_ct_rule = rule;
1881 	if (IS_ERR(ct_flow->post_ct_rule)) {
1882 		err = PTR_ERR(ct_flow->post_ct_rule);
1883 		ct_dbg("Failed to add post ct rule");
1884 		goto err_insert_post_ct;
1885 	}
1886 
1887 	/* Change original rule point to ct table */
1888 	pre_ct_attr->dest_chain = 0;
1889 	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1890 	ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1891 						   pre_ct_attr);
1892 	if (IS_ERR(ct_flow->pre_ct_rule)) {
1893 		err = PTR_ERR(ct_flow->pre_ct_rule);
1894 		ct_dbg("Failed to add pre ct rule");
1895 		goto err_insert_orig;
1896 	}
1897 
1898 	attr->ct_attr.ct_flow = ct_flow;
1899 	dealloc_mod_hdr_actions(&pre_mod_acts);
1900 	kvfree(post_ct_spec);
1901 
1902 	return rule;
1903 
1904 err_insert_orig:
1905 	mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1906 			    ct_flow->post_ct_attr);
1907 err_insert_post_ct:
1908 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1909 err_mapping:
1910 	dealloc_mod_hdr_actions(&pre_mod_acts);
1911 	mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1912 err_get_chain:
1913 	kfree(ct_flow->post_ct_attr);
1914 err_alloc_post:
1915 	kfree(ct_flow->pre_ct_attr);
1916 err_alloc_pre:
1917 	idr_remove(&ct_priv->fte_ids, fte_id);
1918 err_idr:
1919 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1920 err_ft:
1921 	kvfree(post_ct_spec);
1922 	kfree(ct_flow);
1923 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1924 	return ERR_PTR(err);
1925 }
1926 
1927 static struct mlx5_flow_handle *
1928 __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1929 				struct mlx5_flow_spec *orig_spec,
1930 				struct mlx5_flow_attr *attr,
1931 				struct mlx5e_tc_mod_hdr_acts *mod_acts)
1932 {
1933 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1934 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1935 	struct mlx5_flow_attr *pre_ct_attr;
1936 	struct mlx5_modify_hdr *mod_hdr;
1937 	struct mlx5_flow_handle *rule;
1938 	struct mlx5_ct_flow *ct_flow;
1939 	int err;
1940 
1941 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1942 	if (!ct_flow)
1943 		return ERR_PTR(-ENOMEM);
1944 
1945 	/* Base esw attributes on original rule attribute */
1946 	pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1947 	if (!pre_ct_attr) {
1948 		err = -ENOMEM;
1949 		goto err_attr;
1950 	}
1951 
1952 	memcpy(pre_ct_attr, attr, attr_sz);
1953 
1954 	err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1955 	if (err) {
1956 		ct_dbg("Failed to set register for ct clear");
1957 		goto err_set_registers;
1958 	}
1959 
1960 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1961 					   mod_acts->num_actions,
1962 					   mod_acts->actions);
1963 	if (IS_ERR(mod_hdr)) {
1964 		err = PTR_ERR(mod_hdr);
1965 		ct_dbg("Failed to add create ct clear mod hdr");
1966 		goto err_set_registers;
1967 	}
1968 
1969 	pre_ct_attr->modify_hdr = mod_hdr;
1970 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1971 
1972 	rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1973 	if (IS_ERR(rule)) {
1974 		err = PTR_ERR(rule);
1975 		ct_dbg("Failed to add ct clear rule");
1976 		goto err_insert;
1977 	}
1978 
1979 	attr->ct_attr.ct_flow = ct_flow;
1980 	ct_flow->pre_ct_attr = pre_ct_attr;
1981 	ct_flow->pre_ct_rule = rule;
1982 	return rule;
1983 
1984 err_insert:
1985 	mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1986 err_set_registers:
1987 	netdev_warn(priv->netdev,
1988 		    "Failed to offload ct clear flow, err %d\n", err);
1989 	kfree(pre_ct_attr);
1990 err_attr:
1991 	kfree(ct_flow);
1992 
1993 	return ERR_PTR(err);
1994 }
1995 
1996 struct mlx5_flow_handle *
1997 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1998 			struct mlx5e_tc_flow *flow,
1999 			struct mlx5_flow_spec *spec,
2000 			struct mlx5_flow_attr *attr,
2001 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2002 {
2003 	bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
2004 	struct mlx5_flow_handle *rule;
2005 
2006 	if (!priv)
2007 		return ERR_PTR(-EOPNOTSUPP);
2008 
2009 	mutex_lock(&priv->control_lock);
2010 
2011 	if (clear_action)
2012 		rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
2013 	else
2014 		rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
2015 	mutex_unlock(&priv->control_lock);
2016 
2017 	return rule;
2018 }
2019 
2020 static void
2021 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
2022 			 struct mlx5e_tc_flow *flow,
2023 			 struct mlx5_ct_flow *ct_flow)
2024 {
2025 	struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
2026 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
2027 
2028 	mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
2029 			    pre_ct_attr);
2030 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
2031 
2032 	if (ct_flow->post_ct_rule) {
2033 		mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
2034 				    ct_flow->post_ct_attr);
2035 		mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
2036 		idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
2037 		mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
2038 	}
2039 
2040 	kfree(ct_flow->pre_ct_attr);
2041 	kfree(ct_flow->post_ct_attr);
2042 	kfree(ct_flow);
2043 }
2044 
2045 void
2046 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
2047 		       struct mlx5e_tc_flow *flow,
2048 		       struct mlx5_flow_attr *attr)
2049 {
2050 	struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
2051 
2052 	/* We are called on error to clean up stuff from parsing
2053 	 * but we don't have anything for now
2054 	 */
2055 	if (!ct_flow)
2056 		return;
2057 
2058 	mutex_lock(&priv->control_lock);
2059 	__mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
2060 	mutex_unlock(&priv->control_lock);
2061 }
2062 
2063 static int
2064 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2065 				  const char **err_msg)
2066 {
2067 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
2068 		*err_msg = "firmware level support is missing";
2069 		return -EOPNOTSUPP;
2070 	}
2071 
2072 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2073 		/* vlan workaround should be avoided for multi chain rules.
2074 		 * This is just a sanity check as pop vlan action should
2075 		 * be supported by any FW that supports ignore_flow_level
2076 		 */
2077 
2078 		*err_msg = "firmware vlan actions support is missing";
2079 		return -EOPNOTSUPP;
2080 	}
2081 
2082 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2083 				    fdb_modify_header_fwd_to_table)) {
2084 		/* CT always writes to registers which are mod header actions.
2085 		 * Therefore, mod header and goto is required
2086 		 */
2087 
2088 		*err_msg = "firmware fwd and modify support is missing";
2089 		return -EOPNOTSUPP;
2090 	}
2091 
2092 	if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2093 		*err_msg = "register loopback isn't supported";
2094 		return -EOPNOTSUPP;
2095 	}
2096 
2097 	return 0;
2098 }
2099 
2100 static int
2101 mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
2102 				  const char **err_msg)
2103 {
2104 	if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
2105 		*err_msg = "firmware level support is missing";
2106 		return -EOPNOTSUPP;
2107 	}
2108 
2109 	return 0;
2110 }
2111 
2112 static int
2113 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2114 			      enum mlx5_flow_namespace_type ns_type,
2115 			      const char **err_msg)
2116 {
2117 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2118 
2119 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
2120 	/* cannot restore chain ID on HW miss */
2121 
2122 	*err_msg = "tc skb extension missing";
2123 	return -EOPNOTSUPP;
2124 #endif
2125 	if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2126 		return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
2127 	else
2128 		return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
2129 }
2130 
2131 #define INIT_ERR_PREFIX "tc ct offload init failed"
2132 
2133 struct mlx5_tc_ct_priv *
2134 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2135 		struct mod_hdr_tbl *mod_hdr,
2136 		enum mlx5_flow_namespace_type ns_type)
2137 {
2138 	struct mlx5_tc_ct_priv *ct_priv;
2139 	struct mlx5_core_dev *dev;
2140 	const char *msg;
2141 	int err;
2142 
2143 	dev = priv->mdev;
2144 	err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
2145 	if (err) {
2146 		mlx5_core_warn(dev,
2147 			       "tc ct offload not supported, %s\n",
2148 			       msg);
2149 		goto err_support;
2150 	}
2151 
2152 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2153 	if (!ct_priv)
2154 		goto err_alloc;
2155 
2156 	ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
2157 	if (IS_ERR(ct_priv->zone_mapping)) {
2158 		err = PTR_ERR(ct_priv->zone_mapping);
2159 		goto err_mapping_zone;
2160 	}
2161 
2162 	ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
2163 	if (IS_ERR(ct_priv->labels_mapping)) {
2164 		err = PTR_ERR(ct_priv->labels_mapping);
2165 		goto err_mapping_labels;
2166 	}
2167 
2168 	spin_lock_init(&ct_priv->ht_lock);
2169 	ct_priv->ns_type = ns_type;
2170 	ct_priv->chains = chains;
2171 	ct_priv->netdev = priv->netdev;
2172 	ct_priv->dev = priv->mdev;
2173 	ct_priv->mod_hdr_tbl = mod_hdr;
2174 	ct_priv->ct = mlx5_chains_create_global_table(chains);
2175 	if (IS_ERR(ct_priv->ct)) {
2176 		err = PTR_ERR(ct_priv->ct);
2177 		mlx5_core_warn(dev,
2178 			       "%s, failed to create ct table err: %d\n",
2179 			       INIT_ERR_PREFIX, err);
2180 		goto err_ct_tbl;
2181 	}
2182 
2183 	ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2184 	if (IS_ERR(ct_priv->ct_nat)) {
2185 		err = PTR_ERR(ct_priv->ct_nat);
2186 		mlx5_core_warn(dev,
2187 			       "%s, failed to create ct nat table err: %d\n",
2188 			       INIT_ERR_PREFIX, err);
2189 		goto err_ct_nat_tbl;
2190 	}
2191 
2192 	ct_priv->post_ct = mlx5_chains_create_global_table(chains);
2193 	if (IS_ERR(ct_priv->post_ct)) {
2194 		err = PTR_ERR(ct_priv->post_ct);
2195 		mlx5_core_warn(dev,
2196 			       "%s, failed to create post ct table err: %d\n",
2197 			       INIT_ERR_PREFIX, err);
2198 		goto err_post_ct_tbl;
2199 	}
2200 
2201 	idr_init(&ct_priv->fte_ids);
2202 	mutex_init(&ct_priv->control_lock);
2203 	rhashtable_init(&ct_priv->zone_ht, &zone_params);
2204 	rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
2205 	rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
2206 
2207 	return ct_priv;
2208 
2209 err_post_ct_tbl:
2210 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2211 err_ct_nat_tbl:
2212 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2213 err_ct_tbl:
2214 	mapping_destroy(ct_priv->labels_mapping);
2215 err_mapping_labels:
2216 	mapping_destroy(ct_priv->zone_mapping);
2217 err_mapping_zone:
2218 	kfree(ct_priv);
2219 err_alloc:
2220 err_support:
2221 
2222 	return NULL;
2223 }
2224 
2225 void
2226 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2227 {
2228 	struct mlx5_fs_chains *chains;
2229 
2230 	if (!ct_priv)
2231 		return;
2232 
2233 	chains = ct_priv->chains;
2234 
2235 	mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
2236 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2237 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2238 	mapping_destroy(ct_priv->zone_mapping);
2239 	mapping_destroy(ct_priv->labels_mapping);
2240 
2241 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2242 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2243 	rhashtable_destroy(&ct_priv->zone_ht);
2244 	mutex_destroy(&ct_priv->control_lock);
2245 	idr_destroy(&ct_priv->fte_ids);
2246 	kfree(ct_priv);
2247 }
2248 
2249 bool
2250 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2251 			 struct sk_buff *skb, u8 zone_restore_id)
2252 {
2253 	struct mlx5_ct_tuple tuple = {};
2254 	struct mlx5_ct_entry *entry;
2255 	u16 zone;
2256 
2257 	if (!ct_priv || !zone_restore_id)
2258 		return true;
2259 
2260 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2261 		return false;
2262 
2263 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2264 		return false;
2265 
2266 	spin_lock(&ct_priv->ht_lock);
2267 
2268 	entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2269 	if (!entry) {
2270 		spin_unlock(&ct_priv->ht_lock);
2271 		return false;
2272 	}
2273 
2274 	if (IS_ERR(entry)) {
2275 		spin_unlock(&ct_priv->ht_lock);
2276 		return false;
2277 	}
2278 	spin_unlock(&ct_priv->ht_lock);
2279 
2280 	tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2281 	__mlx5_tc_ct_entry_put(entry);
2282 
2283 	return true;
2284 }
2285