1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17 #include <linux/if_macvlan.h>
18 #include <linux/debugfs.h>
19 
20 #include "lib/fs_chains.h"
21 #include "en/tc_ct.h"
22 #include "en/tc/ct_fs.h"
23 #include "en/tc_priv.h"
24 #include "en/mod_hdr.h"
25 #include "en/mapping.h"
26 #include "en/tc/post_act.h"
27 #include "en.h"
28 #include "en_tc.h"
29 #include "en_rep.h"
30 #include "fs_core.h"
31 
32 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
33 #define MLX5_CT_STATE_TRK_BIT BIT(2)
34 #define MLX5_CT_STATE_NAT_BIT BIT(3)
35 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
36 #define MLX5_CT_STATE_RELATED_BIT BIT(5)
37 #define MLX5_CT_STATE_INVALID_BIT BIT(6)
38 
39 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
40 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
41 
42 /* Statically allocate modify actions for
43  * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
44  * This will be increased dynamically if needed (for the ipv6 snat + dnat).
45  */
46 #define MLX5_CT_MIN_MOD_ACTS 10
47 
48 #define ct_dbg(fmt, args...)\
49 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
50 
51 struct mlx5_tc_ct_debugfs {
52 	struct {
53 		atomic_t offloaded;
54 		atomic_t rx_dropped;
55 	} stats;
56 
57 	struct dentry *root;
58 };
59 
60 struct mlx5_tc_ct_priv {
61 	struct mlx5_core_dev *dev;
62 	const struct net_device *netdev;
63 	struct mod_hdr_tbl *mod_hdr_tbl;
64 	struct xarray tuple_ids;
65 	struct rhashtable zone_ht;
66 	struct rhashtable ct_tuples_ht;
67 	struct rhashtable ct_tuples_nat_ht;
68 	struct mlx5_flow_table *ct;
69 	struct mlx5_flow_table *ct_nat;
70 	struct mlx5e_post_act *post_act;
71 	struct mutex control_lock; /* guards parallel adds/dels */
72 	struct mapping_ctx *zone_mapping;
73 	struct mapping_ctx *labels_mapping;
74 	enum mlx5_flow_namespace_type ns_type;
75 	struct mlx5_fs_chains *chains;
76 	struct mlx5_ct_fs *fs;
77 	struct mlx5_ct_fs_ops *fs_ops;
78 	spinlock_t ht_lock; /* protects ft entries */
79 
80 	struct mlx5_tc_ct_debugfs debugfs;
81 };
82 
83 struct mlx5_ct_flow {
84 	struct mlx5_flow_attr *pre_ct_attr;
85 	struct mlx5_flow_handle *pre_ct_rule;
86 	struct mlx5_ct_ft *ft;
87 	u32 chain_mapping;
88 };
89 
90 struct mlx5_ct_zone_rule {
91 	struct mlx5_ct_fs_rule *rule;
92 	struct mlx5e_mod_hdr_handle *mh;
93 	struct mlx5_flow_attr *attr;
94 	bool nat;
95 };
96 
97 struct mlx5_tc_ct_pre {
98 	struct mlx5_flow_table *ft;
99 	struct mlx5_flow_group *flow_grp;
100 	struct mlx5_flow_group *miss_grp;
101 	struct mlx5_flow_handle *flow_rule;
102 	struct mlx5_flow_handle *miss_rule;
103 	struct mlx5_modify_hdr *modify_hdr;
104 };
105 
106 struct mlx5_ct_ft {
107 	struct rhash_head node;
108 	u16 zone;
109 	u32 zone_restore_id;
110 	refcount_t refcount;
111 	struct nf_flowtable *nf_ft;
112 	struct mlx5_tc_ct_priv *ct_priv;
113 	struct rhashtable ct_entries_ht;
114 	struct mlx5_tc_ct_pre pre_ct;
115 	struct mlx5_tc_ct_pre pre_ct_nat;
116 };
117 
118 struct mlx5_ct_tuple {
119 	u16 addr_type;
120 	__be16 n_proto;
121 	u8 ip_proto;
122 	struct {
123 		union {
124 			__be32 src_v4;
125 			struct in6_addr src_v6;
126 		};
127 		union {
128 			__be32 dst_v4;
129 			struct in6_addr dst_v6;
130 		};
131 	} ip;
132 	struct {
133 		__be16 src;
134 		__be16 dst;
135 	} port;
136 
137 	u16 zone;
138 };
139 
140 struct mlx5_ct_counter {
141 	struct mlx5_fc *counter;
142 	refcount_t refcount;
143 	bool is_shared;
144 };
145 
146 enum {
147 	MLX5_CT_ENTRY_FLAG_VALID,
148 };
149 
150 struct mlx5_ct_entry {
151 	struct rhash_head node;
152 	struct rhash_head tuple_node;
153 	struct rhash_head tuple_nat_node;
154 	struct mlx5_ct_counter *counter;
155 	unsigned long cookie;
156 	unsigned long restore_cookie;
157 	struct mlx5_ct_tuple tuple;
158 	struct mlx5_ct_tuple tuple_nat;
159 	struct mlx5_ct_zone_rule zone_rules[2];
160 
161 	struct mlx5_tc_ct_priv *ct_priv;
162 	struct work_struct work;
163 
164 	refcount_t refcnt;
165 	unsigned long flags;
166 };
167 
168 static void
169 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
170 				 struct mlx5_flow_attr *attr,
171 				 struct mlx5e_mod_hdr_handle *mh);
172 
173 static const struct rhashtable_params cts_ht_params = {
174 	.head_offset = offsetof(struct mlx5_ct_entry, node),
175 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
176 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
177 	.automatic_shrinking = true,
178 	.min_size = 16 * 1024,
179 };
180 
181 static const struct rhashtable_params zone_params = {
182 	.head_offset = offsetof(struct mlx5_ct_ft, node),
183 	.key_offset = offsetof(struct mlx5_ct_ft, zone),
184 	.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
185 	.automatic_shrinking = true,
186 };
187 
188 static const struct rhashtable_params tuples_ht_params = {
189 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
190 	.key_offset = offsetof(struct mlx5_ct_entry, tuple),
191 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
192 	.automatic_shrinking = true,
193 	.min_size = 16 * 1024,
194 };
195 
196 static const struct rhashtable_params tuples_nat_ht_params = {
197 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
198 	.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
199 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
200 	.automatic_shrinking = true,
201 	.min_size = 16 * 1024,
202 };
203 
204 static bool
205 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
206 {
207 	return !!(entry->tuple_nat_node.next);
208 }
209 
210 static int
211 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
212 		       u32 *labels, u32 *id)
213 {
214 	if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
215 		*id = 0;
216 		return 0;
217 	}
218 
219 	if (mapping_add(ct_priv->labels_mapping, labels, id))
220 		return -EOPNOTSUPP;
221 
222 	return 0;
223 }
224 
225 static void
226 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
227 {
228 	if (id)
229 		mapping_remove(ct_priv->labels_mapping, id);
230 }
231 
232 static int
233 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
234 {
235 	struct flow_match_control control;
236 	struct flow_match_basic basic;
237 
238 	flow_rule_match_basic(rule, &basic);
239 	flow_rule_match_control(rule, &control);
240 
241 	tuple->n_proto = basic.key->n_proto;
242 	tuple->ip_proto = basic.key->ip_proto;
243 	tuple->addr_type = control.key->addr_type;
244 
245 	if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
246 		struct flow_match_ipv4_addrs match;
247 
248 		flow_rule_match_ipv4_addrs(rule, &match);
249 		tuple->ip.src_v4 = match.key->src;
250 		tuple->ip.dst_v4 = match.key->dst;
251 	} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
252 		struct flow_match_ipv6_addrs match;
253 
254 		flow_rule_match_ipv6_addrs(rule, &match);
255 		tuple->ip.src_v6 = match.key->src;
256 		tuple->ip.dst_v6 = match.key->dst;
257 	} else {
258 		return -EOPNOTSUPP;
259 	}
260 
261 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
262 		struct flow_match_ports match;
263 
264 		flow_rule_match_ports(rule, &match);
265 		switch (tuple->ip_proto) {
266 		case IPPROTO_TCP:
267 		case IPPROTO_UDP:
268 			tuple->port.src = match.key->src;
269 			tuple->port.dst = match.key->dst;
270 			break;
271 		default:
272 			return -EOPNOTSUPP;
273 		}
274 	} else {
275 		if (tuple->ip_proto != IPPROTO_GRE)
276 			return -EOPNOTSUPP;
277 	}
278 
279 	return 0;
280 }
281 
282 static int
283 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
284 			     struct flow_rule *rule)
285 {
286 	struct flow_action *flow_action = &rule->action;
287 	struct flow_action_entry *act;
288 	u32 offset, val, ip6_offset;
289 	int i;
290 
291 	flow_action_for_each(i, act, flow_action) {
292 		if (act->id != FLOW_ACTION_MANGLE)
293 			continue;
294 
295 		offset = act->mangle.offset;
296 		val = act->mangle.val;
297 		switch (act->mangle.htype) {
298 		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
299 			if (offset == offsetof(struct iphdr, saddr))
300 				tuple->ip.src_v4 = cpu_to_be32(val);
301 			else if (offset == offsetof(struct iphdr, daddr))
302 				tuple->ip.dst_v4 = cpu_to_be32(val);
303 			else
304 				return -EOPNOTSUPP;
305 			break;
306 
307 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
308 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
309 			ip6_offset /= 4;
310 			if (ip6_offset < 4)
311 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
312 			else if (ip6_offset < 8)
313 				tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
314 			else
315 				return -EOPNOTSUPP;
316 			break;
317 
318 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
319 			if (offset == offsetof(struct tcphdr, source))
320 				tuple->port.src = cpu_to_be16(val);
321 			else if (offset == offsetof(struct tcphdr, dest))
322 				tuple->port.dst = cpu_to_be16(val);
323 			else
324 				return -EOPNOTSUPP;
325 			break;
326 
327 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
328 			if (offset == offsetof(struct udphdr, source))
329 				tuple->port.src = cpu_to_be16(val);
330 			else if (offset == offsetof(struct udphdr, dest))
331 				tuple->port.dst = cpu_to_be16(val);
332 			else
333 				return -EOPNOTSUPP;
334 			break;
335 
336 		default:
337 			return -EOPNOTSUPP;
338 		}
339 	}
340 
341 	return 0;
342 }
343 
344 static int
345 mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
346 				 struct net_device *ndev)
347 {
348 	struct mlx5e_priv *other_priv = netdev_priv(ndev);
349 	struct mlx5_core_dev *mdev = ct_priv->dev;
350 	bool vf_rep, uplink_rep;
351 
352 	vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
353 	uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
354 
355 	if (vf_rep)
356 		return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
357 	if (uplink_rep)
358 		return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
359 	if (is_vlan_dev(ndev))
360 		return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
361 	if (netif_is_macvlan(ndev))
362 		return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
363 	if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
364 		return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
365 
366 	return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
367 }
368 
369 static int
370 mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
371 			   struct mlx5_flow_spec *spec,
372 			   struct flow_rule *rule)
373 {
374 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
375 				       outer_headers);
376 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
377 				       outer_headers);
378 	u16 addr_type = 0;
379 	u8 ip_proto = 0;
380 
381 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
382 		struct flow_match_basic match;
383 
384 		flow_rule_match_basic(rule, &match);
385 
386 		mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
387 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
388 			 match.mask->ip_proto);
389 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
390 			 match.key->ip_proto);
391 
392 		ip_proto = match.key->ip_proto;
393 	}
394 
395 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
396 		struct flow_match_control match;
397 
398 		flow_rule_match_control(rule, &match);
399 		addr_type = match.key->addr_type;
400 	}
401 
402 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
403 		struct flow_match_ipv4_addrs match;
404 
405 		flow_rule_match_ipv4_addrs(rule, &match);
406 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
407 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
408 		       &match.mask->src, sizeof(match.mask->src));
409 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
410 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
411 		       &match.key->src, sizeof(match.key->src));
412 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
413 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
414 		       &match.mask->dst, sizeof(match.mask->dst));
415 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
416 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
417 		       &match.key->dst, sizeof(match.key->dst));
418 	}
419 
420 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
421 		struct flow_match_ipv6_addrs match;
422 
423 		flow_rule_match_ipv6_addrs(rule, &match);
424 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
425 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
426 		       &match.mask->src, sizeof(match.mask->src));
427 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
428 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
429 		       &match.key->src, sizeof(match.key->src));
430 
431 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
432 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
433 		       &match.mask->dst, sizeof(match.mask->dst));
434 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
435 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
436 		       &match.key->dst, sizeof(match.key->dst));
437 	}
438 
439 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
440 		struct flow_match_ports match;
441 
442 		flow_rule_match_ports(rule, &match);
443 		switch (ip_proto) {
444 		case IPPROTO_TCP:
445 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
446 				 tcp_sport, ntohs(match.mask->src));
447 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
448 				 tcp_sport, ntohs(match.key->src));
449 
450 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
451 				 tcp_dport, ntohs(match.mask->dst));
452 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
453 				 tcp_dport, ntohs(match.key->dst));
454 			break;
455 
456 		case IPPROTO_UDP:
457 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
458 				 udp_sport, ntohs(match.mask->src));
459 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
460 				 udp_sport, ntohs(match.key->src));
461 
462 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
463 				 udp_dport, ntohs(match.mask->dst));
464 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
465 				 udp_dport, ntohs(match.key->dst));
466 			break;
467 		default:
468 			break;
469 		}
470 	}
471 
472 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
473 		struct flow_match_tcp match;
474 
475 		flow_rule_match_tcp(rule, &match);
476 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
477 			 ntohs(match.mask->flags));
478 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
479 			 ntohs(match.key->flags));
480 	}
481 
482 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
483 		struct flow_match_meta match;
484 
485 		flow_rule_match_meta(rule, &match);
486 
487 		if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
488 			struct net_device *dev;
489 
490 			dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
491 			if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
492 				spec->flow_context.flow_source =
493 					mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
494 
495 			dev_put(dev);
496 		}
497 	}
498 
499 	return 0;
500 }
501 
502 static void
503 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
504 {
505 	if (entry->counter->is_shared &&
506 	    !refcount_dec_and_test(&entry->counter->refcount))
507 		return;
508 
509 	mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
510 	kfree(entry->counter);
511 }
512 
513 static void
514 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
515 			  struct mlx5_ct_entry *entry,
516 			  bool nat)
517 {
518 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
519 	struct mlx5_flow_attr *attr = zone_rule->attr;
520 
521 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
522 
523 	ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
524 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
525 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
526 	kfree(attr);
527 }
528 
529 static void
530 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
531 			   struct mlx5_ct_entry *entry)
532 {
533 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
534 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
535 
536 	atomic_dec(&ct_priv->debugfs.stats.offloaded);
537 }
538 
539 static struct flow_action_entry *
540 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
541 {
542 	struct flow_action *flow_action = &flow_rule->action;
543 	struct flow_action_entry *act;
544 	int i;
545 
546 	flow_action_for_each(i, act, flow_action) {
547 		if (act->id == FLOW_ACTION_CT_METADATA)
548 			return act;
549 	}
550 
551 	return NULL;
552 }
553 
554 static int
555 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
556 			       struct mlx5e_tc_mod_hdr_acts *mod_acts,
557 			       u8 ct_state,
558 			       u32 mark,
559 			       u32 labels_id,
560 			       u8 zone_restore_id)
561 {
562 	enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
563 	struct mlx5_core_dev *dev = ct_priv->dev;
564 	int err;
565 
566 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
567 					CTSTATE_TO_REG, ct_state);
568 	if (err)
569 		return err;
570 
571 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
572 					MARK_TO_REG, mark);
573 	if (err)
574 		return err;
575 
576 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
577 					LABELS_TO_REG, labels_id);
578 	if (err)
579 		return err;
580 
581 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
582 					ZONE_RESTORE_TO_REG, zone_restore_id);
583 	if (err)
584 		return err;
585 
586 	/* Make another copy of zone id in reg_b for
587 	 * NIC rx flows since we don't copy reg_c1 to
588 	 * reg_b upon miss.
589 	 */
590 	if (ns != MLX5_FLOW_NAMESPACE_FDB) {
591 		err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
592 						NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
593 		if (err)
594 			return err;
595 	}
596 	return 0;
597 }
598 
599 int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
600 				 struct mlx5e_tc_mod_hdr_acts *mod_acts)
601 {
602 		return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
603 }
604 
605 static int
606 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
607 				   char *modact)
608 {
609 	u32 offset = act->mangle.offset, field;
610 
611 	switch (act->mangle.htype) {
612 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
613 		MLX5_SET(set_action_in, modact, length, 0);
614 		if (offset == offsetof(struct iphdr, saddr))
615 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
616 		else if (offset == offsetof(struct iphdr, daddr))
617 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
618 		else
619 			return -EOPNOTSUPP;
620 		break;
621 
622 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
623 		MLX5_SET(set_action_in, modact, length, 0);
624 		if (offset == offsetof(struct ipv6hdr, saddr) + 12)
625 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
626 		else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
627 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
628 		else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
629 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
630 		else if (offset == offsetof(struct ipv6hdr, saddr))
631 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
632 		else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
633 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
634 		else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
635 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
636 		else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
637 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
638 		else if (offset == offsetof(struct ipv6hdr, daddr))
639 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
640 		else
641 			return -EOPNOTSUPP;
642 		break;
643 
644 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
645 		MLX5_SET(set_action_in, modact, length, 16);
646 		if (offset == offsetof(struct tcphdr, source))
647 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
648 		else if (offset == offsetof(struct tcphdr, dest))
649 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
650 		else
651 			return -EOPNOTSUPP;
652 		break;
653 
654 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
655 		MLX5_SET(set_action_in, modact, length, 16);
656 		if (offset == offsetof(struct udphdr, source))
657 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
658 		else if (offset == offsetof(struct udphdr, dest))
659 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
660 		else
661 			return -EOPNOTSUPP;
662 		break;
663 
664 	default:
665 		return -EOPNOTSUPP;
666 	}
667 
668 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
669 	MLX5_SET(set_action_in, modact, offset, 0);
670 	MLX5_SET(set_action_in, modact, field, field);
671 	MLX5_SET(set_action_in, modact, data, act->mangle.val);
672 
673 	return 0;
674 }
675 
676 static int
677 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
678 			    struct flow_rule *flow_rule,
679 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
680 {
681 	struct flow_action *flow_action = &flow_rule->action;
682 	struct mlx5_core_dev *mdev = ct_priv->dev;
683 	struct flow_action_entry *act;
684 	char *modact;
685 	int err, i;
686 
687 	flow_action_for_each(i, act, flow_action) {
688 		switch (act->id) {
689 		case FLOW_ACTION_MANGLE: {
690 			modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
691 			if (IS_ERR(modact))
692 				return PTR_ERR(modact);
693 
694 			err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
695 			if (err)
696 				return err;
697 
698 			mod_acts->num_actions++;
699 		}
700 		break;
701 
702 		case FLOW_ACTION_CT_METADATA:
703 			/* Handled earlier */
704 			continue;
705 		default:
706 			return -EOPNOTSUPP;
707 		}
708 	}
709 
710 	return 0;
711 }
712 
713 static int
714 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
715 				struct mlx5_flow_attr *attr,
716 				struct flow_rule *flow_rule,
717 				struct mlx5e_mod_hdr_handle **mh,
718 				u8 zone_restore_id, bool nat_table, bool has_nat)
719 {
720 	DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
721 	DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
722 	struct flow_action_entry *meta;
723 	u16 ct_state = 0;
724 	int err;
725 
726 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
727 	if (!meta)
728 		return -EOPNOTSUPP;
729 
730 	err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
731 				     &attr->ct_attr.ct_labels_id);
732 	if (err)
733 		return -EOPNOTSUPP;
734 	if (nat_table) {
735 		if (has_nat) {
736 			err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
737 			if (err)
738 				goto err_mapping;
739 		}
740 
741 		ct_state |= MLX5_CT_STATE_NAT_BIT;
742 	}
743 
744 	ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
745 	ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
746 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
747 					     ct_state,
748 					     meta->ct_metadata.mark,
749 					     attr->ct_attr.ct_labels_id,
750 					     zone_restore_id);
751 	if (err)
752 		goto err_mapping;
753 
754 	if (nat_table && has_nat) {
755 		attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
756 							    mod_acts.num_actions,
757 							    mod_acts.actions);
758 		if (IS_ERR(attr->modify_hdr)) {
759 			err = PTR_ERR(attr->modify_hdr);
760 			goto err_mapping;
761 		}
762 
763 		*mh = NULL;
764 	} else {
765 		*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
766 					   ct_priv->mod_hdr_tbl,
767 					   ct_priv->ns_type,
768 					   &mod_acts);
769 		if (IS_ERR(*mh)) {
770 			err = PTR_ERR(*mh);
771 			goto err_mapping;
772 		}
773 		attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
774 	}
775 
776 	mlx5e_mod_hdr_dealloc(&mod_acts);
777 	return 0;
778 
779 err_mapping:
780 	mlx5e_mod_hdr_dealloc(&mod_acts);
781 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
782 	return err;
783 }
784 
785 static void
786 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
787 				 struct mlx5_flow_attr *attr,
788 				 struct mlx5e_mod_hdr_handle *mh)
789 {
790 	if (mh)
791 		mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
792 	else
793 		mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
794 }
795 
796 static int
797 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
798 			  struct flow_rule *flow_rule,
799 			  struct mlx5_ct_entry *entry,
800 			  bool nat, u8 zone_restore_id)
801 {
802 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
803 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
804 	struct mlx5_flow_spec *spec = NULL;
805 	struct mlx5_flow_attr *attr;
806 	int err;
807 
808 	zone_rule->nat = nat;
809 
810 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
811 	if (!spec)
812 		return -ENOMEM;
813 
814 	attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
815 	if (!attr) {
816 		err = -ENOMEM;
817 		goto err_attr;
818 	}
819 
820 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
821 					      &zone_rule->mh,
822 					      zone_restore_id,
823 					      nat,
824 					      mlx5_tc_ct_entry_has_nat(entry));
825 	if (err) {
826 		ct_dbg("Failed to create ct entry mod hdr");
827 		goto err_mod_hdr;
828 	}
829 
830 	attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
831 		       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
832 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
833 	attr->dest_chain = 0;
834 	attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
835 	attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
836 	if (entry->tuple.ip_proto == IPPROTO_TCP ||
837 	    entry->tuple.ip_proto == IPPROTO_UDP)
838 		attr->outer_match_level = MLX5_MATCH_L4;
839 	else
840 		attr->outer_match_level = MLX5_MATCH_L3;
841 	attr->counter = entry->counter->counter;
842 	attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
843 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
844 		attr->esw_attr->in_mdev = priv->mdev;
845 
846 	mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
847 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
848 
849 	zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
850 	if (IS_ERR(zone_rule->rule)) {
851 		err = PTR_ERR(zone_rule->rule);
852 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
853 		goto err_rule;
854 	}
855 
856 	zone_rule->attr = attr;
857 
858 	kvfree(spec);
859 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
860 
861 	return 0;
862 
863 err_rule:
864 	mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
865 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
866 err_mod_hdr:
867 	kfree(attr);
868 err_attr:
869 	kvfree(spec);
870 	return err;
871 }
872 
873 static bool
874 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
875 {
876 	return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
877 }
878 
879 static struct mlx5_ct_entry *
880 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
881 {
882 	struct mlx5_ct_entry *entry;
883 
884 	entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
885 				       tuples_ht_params);
886 	if (entry && mlx5_tc_ct_entry_valid(entry) &&
887 	    refcount_inc_not_zero(&entry->refcnt)) {
888 		return entry;
889 	} else if (!entry) {
890 		entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
891 					       tuple, tuples_nat_ht_params);
892 		if (entry && mlx5_tc_ct_entry_valid(entry) &&
893 		    refcount_inc_not_zero(&entry->refcnt))
894 			return entry;
895 	}
896 
897 	return entry ? ERR_PTR(-EINVAL) : NULL;
898 }
899 
900 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
901 {
902 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
903 
904 	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
905 			       &entry->tuple_nat_node,
906 			       tuples_nat_ht_params);
907 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
908 			       tuples_ht_params);
909 }
910 
911 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
912 {
913 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
914 
915 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
916 
917 	spin_lock_bh(&ct_priv->ht_lock);
918 	mlx5_tc_ct_entry_remove_from_tuples(entry);
919 	spin_unlock_bh(&ct_priv->ht_lock);
920 
921 	mlx5_tc_ct_counter_put(ct_priv, entry);
922 	kfree(entry);
923 }
924 
925 static void
926 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
927 {
928 	if (!refcount_dec_and_test(&entry->refcnt))
929 		return;
930 
931 	mlx5_tc_ct_entry_del(entry);
932 }
933 
934 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
935 {
936 	struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
937 
938 	mlx5_tc_ct_entry_del(entry);
939 }
940 
941 static void
942 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
943 {
944 	struct mlx5e_priv *priv;
945 
946 	if (!refcount_dec_and_test(&entry->refcnt))
947 		return;
948 
949 	priv = netdev_priv(entry->ct_priv->netdev);
950 	INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
951 	queue_work(priv->wq, &entry->work);
952 }
953 
954 static struct mlx5_ct_counter *
955 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
956 {
957 	struct mlx5_ct_counter *counter;
958 	int ret;
959 
960 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
961 	if (!counter)
962 		return ERR_PTR(-ENOMEM);
963 
964 	counter->is_shared = false;
965 	counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
966 	if (IS_ERR(counter->counter)) {
967 		ct_dbg("Failed to create counter for ct entry");
968 		ret = PTR_ERR(counter->counter);
969 		kfree(counter);
970 		return ERR_PTR(ret);
971 	}
972 
973 	return counter;
974 }
975 
976 static struct mlx5_ct_counter *
977 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
978 			      struct mlx5_ct_entry *entry)
979 {
980 	struct mlx5_ct_tuple rev_tuple = entry->tuple;
981 	struct mlx5_ct_counter *shared_counter;
982 	struct mlx5_ct_entry *rev_entry;
983 
984 	/* get the reversed tuple */
985 	swap(rev_tuple.port.src, rev_tuple.port.dst);
986 
987 	if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
988 		__be32 tmp_addr = rev_tuple.ip.src_v4;
989 
990 		rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
991 		rev_tuple.ip.dst_v4 = tmp_addr;
992 	} else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
993 		struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
994 
995 		rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
996 		rev_tuple.ip.dst_v6 = tmp_addr;
997 	} else {
998 		return ERR_PTR(-EOPNOTSUPP);
999 	}
1000 
1001 	/* Use the same counter as the reverse direction */
1002 	spin_lock_bh(&ct_priv->ht_lock);
1003 	rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
1004 
1005 	if (IS_ERR(rev_entry)) {
1006 		spin_unlock_bh(&ct_priv->ht_lock);
1007 		goto create_counter;
1008 	}
1009 
1010 	if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
1011 		ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
1012 		shared_counter = rev_entry->counter;
1013 		spin_unlock_bh(&ct_priv->ht_lock);
1014 
1015 		mlx5_tc_ct_entry_put(rev_entry);
1016 		return shared_counter;
1017 	}
1018 
1019 	spin_unlock_bh(&ct_priv->ht_lock);
1020 
1021 create_counter:
1022 
1023 	shared_counter = mlx5_tc_ct_counter_create(ct_priv);
1024 	if (IS_ERR(shared_counter))
1025 		return shared_counter;
1026 
1027 	shared_counter->is_shared = true;
1028 	refcount_set(&shared_counter->refcount, 1);
1029 	return shared_counter;
1030 }
1031 
1032 static int
1033 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
1034 			   struct flow_rule *flow_rule,
1035 			   struct mlx5_ct_entry *entry,
1036 			   u8 zone_restore_id)
1037 {
1038 	int err;
1039 
1040 	if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
1041 		entry->counter = mlx5_tc_ct_counter_create(ct_priv);
1042 	else
1043 		entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
1044 
1045 	if (IS_ERR(entry->counter)) {
1046 		err = PTR_ERR(entry->counter);
1047 		return err;
1048 	}
1049 
1050 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
1051 					zone_restore_id);
1052 	if (err)
1053 		goto err_orig;
1054 
1055 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
1056 					zone_restore_id);
1057 	if (err)
1058 		goto err_nat;
1059 
1060 	atomic_inc(&ct_priv->debugfs.stats.offloaded);
1061 	return 0;
1062 
1063 err_nat:
1064 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
1065 err_orig:
1066 	mlx5_tc_ct_counter_put(ct_priv, entry);
1067 	return err;
1068 }
1069 
1070 static int
1071 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
1072 				  struct flow_cls_offload *flow)
1073 {
1074 	struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
1075 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1076 	struct flow_action_entry *meta_action;
1077 	unsigned long cookie = flow->cookie;
1078 	struct mlx5_ct_entry *entry;
1079 	int err;
1080 
1081 	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
1082 	if (!meta_action)
1083 		return -EOPNOTSUPP;
1084 
1085 	spin_lock_bh(&ct_priv->ht_lock);
1086 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1087 	if (entry && refcount_inc_not_zero(&entry->refcnt)) {
1088 		spin_unlock_bh(&ct_priv->ht_lock);
1089 		mlx5_tc_ct_entry_put(entry);
1090 		return -EEXIST;
1091 	}
1092 	spin_unlock_bh(&ct_priv->ht_lock);
1093 
1094 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1095 	if (!entry)
1096 		return -ENOMEM;
1097 
1098 	entry->tuple.zone = ft->zone;
1099 	entry->cookie = flow->cookie;
1100 	entry->restore_cookie = meta_action->ct_metadata.cookie;
1101 	refcount_set(&entry->refcnt, 2);
1102 	entry->ct_priv = ct_priv;
1103 
1104 	err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1105 	if (err)
1106 		goto err_set;
1107 
1108 	memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1109 	err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1110 	if (err)
1111 		goto err_set;
1112 
1113 	spin_lock_bh(&ct_priv->ht_lock);
1114 
1115 	err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1116 					    cts_ht_params);
1117 	if (err)
1118 		goto err_entries;
1119 
1120 	err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1121 					    &entry->tuple_node,
1122 					    tuples_ht_params);
1123 	if (err)
1124 		goto err_tuple;
1125 
1126 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1127 		err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1128 						    &entry->tuple_nat_node,
1129 						    tuples_nat_ht_params);
1130 		if (err)
1131 			goto err_tuple_nat;
1132 	}
1133 	spin_unlock_bh(&ct_priv->ht_lock);
1134 
1135 	err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1136 					 ft->zone_restore_id);
1137 	if (err)
1138 		goto err_rules;
1139 
1140 	set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1141 	mlx5_tc_ct_entry_put(entry); /* this function reference */
1142 
1143 	return 0;
1144 
1145 err_rules:
1146 	spin_lock_bh(&ct_priv->ht_lock);
1147 	if (mlx5_tc_ct_entry_has_nat(entry))
1148 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1149 				       &entry->tuple_nat_node, tuples_nat_ht_params);
1150 err_tuple_nat:
1151 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1152 			       &entry->tuple_node,
1153 			       tuples_ht_params);
1154 err_tuple:
1155 	rhashtable_remove_fast(&ft->ct_entries_ht,
1156 			       &entry->node,
1157 			       cts_ht_params);
1158 err_entries:
1159 	spin_unlock_bh(&ct_priv->ht_lock);
1160 err_set:
1161 	kfree(entry);
1162 	if (err != -EEXIST)
1163 		netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1164 	return err;
1165 }
1166 
1167 static int
1168 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1169 				  struct flow_cls_offload *flow)
1170 {
1171 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1172 	unsigned long cookie = flow->cookie;
1173 	struct mlx5_ct_entry *entry;
1174 
1175 	spin_lock_bh(&ct_priv->ht_lock);
1176 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1177 	if (!entry) {
1178 		spin_unlock_bh(&ct_priv->ht_lock);
1179 		return -ENOENT;
1180 	}
1181 
1182 	if (!mlx5_tc_ct_entry_valid(entry)) {
1183 		spin_unlock_bh(&ct_priv->ht_lock);
1184 		return -EINVAL;
1185 	}
1186 
1187 	rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1188 	spin_unlock_bh(&ct_priv->ht_lock);
1189 
1190 	mlx5_tc_ct_entry_put(entry);
1191 
1192 	return 0;
1193 }
1194 
1195 static int
1196 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1197 				    struct flow_cls_offload *f)
1198 {
1199 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1200 	unsigned long cookie = f->cookie;
1201 	struct mlx5_ct_entry *entry;
1202 	u64 lastuse, packets, bytes;
1203 
1204 	spin_lock_bh(&ct_priv->ht_lock);
1205 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1206 	if (!entry) {
1207 		spin_unlock_bh(&ct_priv->ht_lock);
1208 		return -ENOENT;
1209 	}
1210 
1211 	if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1212 		spin_unlock_bh(&ct_priv->ht_lock);
1213 		return -EINVAL;
1214 	}
1215 
1216 	spin_unlock_bh(&ct_priv->ht_lock);
1217 
1218 	mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1219 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1220 			  FLOW_ACTION_HW_STATS_DELAYED);
1221 
1222 	mlx5_tc_ct_entry_put(entry);
1223 	return 0;
1224 }
1225 
1226 static int
1227 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1228 			      void *cb_priv)
1229 {
1230 	struct flow_cls_offload *f = type_data;
1231 	struct mlx5_ct_ft *ft = cb_priv;
1232 
1233 	if (type != TC_SETUP_CLSFLOWER)
1234 		return -EOPNOTSUPP;
1235 
1236 	switch (f->command) {
1237 	case FLOW_CLS_REPLACE:
1238 		return mlx5_tc_ct_block_flow_offload_add(ft, f);
1239 	case FLOW_CLS_DESTROY:
1240 		return mlx5_tc_ct_block_flow_offload_del(ft, f);
1241 	case FLOW_CLS_STATS:
1242 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1243 	default:
1244 		break;
1245 	}
1246 
1247 	return -EOPNOTSUPP;
1248 }
1249 
1250 static bool
1251 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1252 			u16 zone)
1253 {
1254 	struct flow_keys flow_keys;
1255 
1256 	skb_reset_network_header(skb);
1257 	skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
1258 
1259 	tuple->zone = zone;
1260 
1261 	if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1262 	    flow_keys.basic.ip_proto != IPPROTO_UDP &&
1263 	    flow_keys.basic.ip_proto != IPPROTO_GRE)
1264 		return false;
1265 
1266 	if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
1267 	    flow_keys.basic.ip_proto == IPPROTO_UDP) {
1268 		tuple->port.src = flow_keys.ports.src;
1269 		tuple->port.dst = flow_keys.ports.dst;
1270 	}
1271 	tuple->n_proto = flow_keys.basic.n_proto;
1272 	tuple->ip_proto = flow_keys.basic.ip_proto;
1273 
1274 	switch (flow_keys.basic.n_proto) {
1275 	case htons(ETH_P_IP):
1276 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1277 		tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1278 		tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1279 		break;
1280 
1281 	case htons(ETH_P_IPV6):
1282 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1283 		tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1284 		tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1285 		break;
1286 	default:
1287 		goto out;
1288 	}
1289 
1290 	return true;
1291 
1292 out:
1293 	return false;
1294 }
1295 
1296 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1297 {
1298 	u32 ctstate = 0, ctstate_mask = 0;
1299 
1300 	mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1301 					&ctstate, &ctstate_mask);
1302 
1303 	if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1304 		return -EOPNOTSUPP;
1305 
1306 	ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1307 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1308 				    ctstate, ctstate_mask);
1309 
1310 	return 0;
1311 }
1312 
1313 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1314 {
1315 	if (!priv || !ct_attr->ct_labels_id)
1316 		return;
1317 
1318 	mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1319 }
1320 
1321 int
1322 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1323 		     struct mlx5_flow_spec *spec,
1324 		     struct flow_cls_offload *f,
1325 		     struct mlx5_ct_attr *ct_attr,
1326 		     struct netlink_ext_ack *extack)
1327 {
1328 	bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
1329 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1330 	struct flow_dissector_key_ct *mask, *key;
1331 	u32 ctstate = 0, ctstate_mask = 0;
1332 	u16 ct_state_on, ct_state_off;
1333 	u16 ct_state, ct_state_mask;
1334 	struct flow_match_ct match;
1335 	u32 ct_labels[4];
1336 
1337 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1338 		return 0;
1339 
1340 	if (!priv) {
1341 		NL_SET_ERR_MSG_MOD(extack,
1342 				   "offload of ct matching isn't available");
1343 		return -EOPNOTSUPP;
1344 	}
1345 
1346 	flow_rule_match_ct(rule, &match);
1347 
1348 	key = match.key;
1349 	mask = match.mask;
1350 
1351 	ct_state = key->ct_state;
1352 	ct_state_mask = mask->ct_state;
1353 
1354 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1355 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1356 			      TCA_FLOWER_KEY_CT_FLAGS_NEW |
1357 			      TCA_FLOWER_KEY_CT_FLAGS_REPLY |
1358 			      TCA_FLOWER_KEY_CT_FLAGS_RELATED |
1359 			      TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
1360 		NL_SET_ERR_MSG_MOD(extack,
1361 				   "only ct_state trk, est, new and rpl are supported for offload");
1362 		return -EOPNOTSUPP;
1363 	}
1364 
1365 	ct_state_on = ct_state & ct_state_mask;
1366 	ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1367 	trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1368 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1369 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1370 	rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1371 	rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1372 	inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1373 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1374 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1375 	unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1376 	unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1377 	uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1378 
1379 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1380 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1381 	ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1382 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1383 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1384 	ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1385 	ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
1386 	ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
1387 
1388 	if (rel) {
1389 		NL_SET_ERR_MSG_MOD(extack,
1390 				   "matching on ct_state +rel isn't supported");
1391 		return -EOPNOTSUPP;
1392 	}
1393 
1394 	if (inv) {
1395 		NL_SET_ERR_MSG_MOD(extack,
1396 				   "matching on ct_state +inv isn't supported");
1397 		return -EOPNOTSUPP;
1398 	}
1399 
1400 	if (new) {
1401 		NL_SET_ERR_MSG_MOD(extack,
1402 				   "matching on ct_state +new isn't supported");
1403 		return -EOPNOTSUPP;
1404 	}
1405 
1406 	if (mask->ct_zone)
1407 		mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1408 					    key->ct_zone, MLX5_CT_ZONE_MASK);
1409 	if (ctstate_mask)
1410 		mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1411 					    ctstate, ctstate_mask);
1412 	if (mask->ct_mark)
1413 		mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1414 					    key->ct_mark, mask->ct_mark);
1415 	if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1416 	    mask->ct_labels[3]) {
1417 		ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1418 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1419 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1420 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1421 		if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1422 			return -EOPNOTSUPP;
1423 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1424 					    MLX5_CT_LABELS_MASK);
1425 	}
1426 
1427 	return 0;
1428 }
1429 
1430 int
1431 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1432 			struct mlx5_flow_attr *attr,
1433 			struct mlx5e_tc_mod_hdr_acts *mod_acts,
1434 			const struct flow_action_entry *act,
1435 			struct netlink_ext_ack *extack)
1436 {
1437 	if (!priv) {
1438 		NL_SET_ERR_MSG_MOD(extack,
1439 				   "offload of ct action isn't available");
1440 		return -EOPNOTSUPP;
1441 	}
1442 
1443 	attr->ct_attr.zone = act->ct.zone;
1444 	attr->ct_attr.ct_action = act->ct.action;
1445 	attr->ct_attr.nf_ft = act->ct.flow_table;
1446 
1447 	return 0;
1448 }
1449 
1450 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1451 				  struct mlx5_tc_ct_pre *pre_ct,
1452 				  bool nat)
1453 {
1454 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1455 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1456 	struct mlx5_core_dev *dev = ct_priv->dev;
1457 	struct mlx5_flow_table *ft = pre_ct->ft;
1458 	struct mlx5_flow_destination dest = {};
1459 	struct mlx5_flow_act flow_act = {};
1460 	struct mlx5_modify_hdr *mod_hdr;
1461 	struct mlx5_flow_handle *rule;
1462 	struct mlx5_flow_spec *spec;
1463 	u32 ctstate;
1464 	u16 zone;
1465 	int err;
1466 
1467 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1468 	if (!spec)
1469 		return -ENOMEM;
1470 
1471 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1472 	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1473 					ZONE_TO_REG, zone);
1474 	if (err) {
1475 		ct_dbg("Failed to set zone register mapping");
1476 		goto err_mapping;
1477 	}
1478 
1479 	mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1480 					   pre_mod_acts.num_actions,
1481 					   pre_mod_acts.actions);
1482 
1483 	if (IS_ERR(mod_hdr)) {
1484 		err = PTR_ERR(mod_hdr);
1485 		ct_dbg("Failed to create pre ct mod hdr");
1486 		goto err_mapping;
1487 	}
1488 	pre_ct->modify_hdr = mod_hdr;
1489 
1490 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1491 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1492 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1493 	flow_act.modify_hdr = mod_hdr;
1494 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1495 
1496 	/* add flow rule */
1497 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1498 				    zone, MLX5_CT_ZONE_MASK);
1499 	ctstate = MLX5_CT_STATE_TRK_BIT;
1500 	if (nat)
1501 		ctstate |= MLX5_CT_STATE_NAT_BIT;
1502 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1503 
1504 	dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1505 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1506 	if (IS_ERR(rule)) {
1507 		err = PTR_ERR(rule);
1508 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1509 		goto err_flow_rule;
1510 	}
1511 	pre_ct->flow_rule = rule;
1512 
1513 	/* add miss rule */
1514 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1515 	rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1516 	if (IS_ERR(rule)) {
1517 		err = PTR_ERR(rule);
1518 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1519 		goto err_miss_rule;
1520 	}
1521 	pre_ct->miss_rule = rule;
1522 
1523 	mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1524 	kvfree(spec);
1525 	return 0;
1526 
1527 err_miss_rule:
1528 	mlx5_del_flow_rules(pre_ct->flow_rule);
1529 err_flow_rule:
1530 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1531 err_mapping:
1532 	mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1533 	kvfree(spec);
1534 	return err;
1535 }
1536 
1537 static void
1538 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1539 		       struct mlx5_tc_ct_pre *pre_ct)
1540 {
1541 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1542 	struct mlx5_core_dev *dev = ct_priv->dev;
1543 
1544 	mlx5_del_flow_rules(pre_ct->flow_rule);
1545 	mlx5_del_flow_rules(pre_ct->miss_rule);
1546 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1547 }
1548 
1549 static int
1550 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1551 			struct mlx5_tc_ct_pre *pre_ct,
1552 			bool nat)
1553 {
1554 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1555 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1556 	struct mlx5_core_dev *dev = ct_priv->dev;
1557 	struct mlx5_flow_table_attr ft_attr = {};
1558 	struct mlx5_flow_namespace *ns;
1559 	struct mlx5_flow_table *ft;
1560 	struct mlx5_flow_group *g;
1561 	u32 metadata_reg_c_2_mask;
1562 	u32 *flow_group_in;
1563 	void *misc;
1564 	int err;
1565 
1566 	ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1567 	if (!ns) {
1568 		err = -EOPNOTSUPP;
1569 		ct_dbg("Failed to get flow namespace");
1570 		return err;
1571 	}
1572 
1573 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1574 	if (!flow_group_in)
1575 		return -ENOMEM;
1576 
1577 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1578 	ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1579 			FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1580 	ft_attr.max_fte = 2;
1581 	ft_attr.level = 1;
1582 	ft = mlx5_create_flow_table(ns, &ft_attr);
1583 	if (IS_ERR(ft)) {
1584 		err = PTR_ERR(ft);
1585 		ct_dbg("Failed to create pre ct table");
1586 		goto out_free;
1587 	}
1588 	pre_ct->ft = ft;
1589 
1590 	/* create flow group */
1591 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1592 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1593 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1594 		 MLX5_MATCH_MISC_PARAMETERS_2);
1595 
1596 	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1597 			    match_criteria.misc_parameters_2);
1598 
1599 	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1600 	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1601 	if (nat)
1602 		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1603 
1604 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1605 		 metadata_reg_c_2_mask);
1606 
1607 	g = mlx5_create_flow_group(ft, flow_group_in);
1608 	if (IS_ERR(g)) {
1609 		err = PTR_ERR(g);
1610 		ct_dbg("Failed to create pre ct group");
1611 		goto err_flow_grp;
1612 	}
1613 	pre_ct->flow_grp = g;
1614 
1615 	/* create miss group */
1616 	memset(flow_group_in, 0, inlen);
1617 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1618 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1619 	g = mlx5_create_flow_group(ft, flow_group_in);
1620 	if (IS_ERR(g)) {
1621 		err = PTR_ERR(g);
1622 		ct_dbg("Failed to create pre ct miss group");
1623 		goto err_miss_grp;
1624 	}
1625 	pre_ct->miss_grp = g;
1626 
1627 	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1628 	if (err)
1629 		goto err_add_rules;
1630 
1631 	kvfree(flow_group_in);
1632 	return 0;
1633 
1634 err_add_rules:
1635 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1636 err_miss_grp:
1637 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1638 err_flow_grp:
1639 	mlx5_destroy_flow_table(ft);
1640 out_free:
1641 	kvfree(flow_group_in);
1642 	return err;
1643 }
1644 
1645 static void
1646 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1647 		       struct mlx5_tc_ct_pre *pre_ct)
1648 {
1649 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1650 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1651 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1652 	mlx5_destroy_flow_table(pre_ct->ft);
1653 }
1654 
1655 static int
1656 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1657 {
1658 	int err;
1659 
1660 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1661 	if (err)
1662 		return err;
1663 
1664 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1665 	if (err)
1666 		goto err_pre_ct_nat;
1667 
1668 	return 0;
1669 
1670 err_pre_ct_nat:
1671 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1672 	return err;
1673 }
1674 
1675 static void
1676 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1677 {
1678 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1679 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1680 }
1681 
1682 /* To avoid false lock dependency warning set the ct_entries_ht lock
1683  * class different than the lock class of the ht being used when deleting
1684  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
1685  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
1686  * it's different than the ht->mutex here.
1687  */
1688 static struct lock_class_key ct_entries_ht_lock_key;
1689 
1690 static struct mlx5_ct_ft *
1691 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1692 		     struct nf_flowtable *nf_ft)
1693 {
1694 	struct mlx5_ct_ft *ft;
1695 	int err;
1696 
1697 	ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1698 	if (ft) {
1699 		refcount_inc(&ft->refcount);
1700 		return ft;
1701 	}
1702 
1703 	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1704 	if (!ft)
1705 		return ERR_PTR(-ENOMEM);
1706 
1707 	err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1708 	if (err)
1709 		goto err_mapping;
1710 
1711 	ft->zone = zone;
1712 	ft->nf_ft = nf_ft;
1713 	ft->ct_priv = ct_priv;
1714 	refcount_set(&ft->refcount, 1);
1715 
1716 	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1717 	if (err)
1718 		goto err_alloc_pre_ct;
1719 
1720 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1721 	if (err)
1722 		goto err_init;
1723 
1724 	lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
1725 
1726 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1727 				     zone_params);
1728 	if (err)
1729 		goto err_insert;
1730 
1731 	err = nf_flow_table_offload_add_cb(ft->nf_ft,
1732 					   mlx5_tc_ct_block_flow_offload, ft);
1733 	if (err)
1734 		goto err_add_cb;
1735 
1736 	return ft;
1737 
1738 err_add_cb:
1739 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1740 err_insert:
1741 	rhashtable_destroy(&ft->ct_entries_ht);
1742 err_init:
1743 	mlx5_tc_ct_free_pre_ct_tables(ft);
1744 err_alloc_pre_ct:
1745 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1746 err_mapping:
1747 	kfree(ft);
1748 	return ERR_PTR(err);
1749 }
1750 
1751 static void
1752 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1753 {
1754 	struct mlx5_ct_entry *entry = ptr;
1755 
1756 	mlx5_tc_ct_entry_put(entry);
1757 }
1758 
1759 static void
1760 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1761 {
1762 	struct mlx5e_priv *priv;
1763 
1764 	if (!refcount_dec_and_test(&ft->refcount))
1765 		return;
1766 
1767 	nf_flow_table_offload_del_cb(ft->nf_ft,
1768 				     mlx5_tc_ct_block_flow_offload, ft);
1769 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1770 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
1771 				    mlx5_tc_ct_flush_ft_entry,
1772 				    ct_priv);
1773 	priv = netdev_priv(ct_priv->netdev);
1774 	flush_workqueue(priv->wq);
1775 	mlx5_tc_ct_free_pre_ct_tables(ft);
1776 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1777 	kfree(ft);
1778 }
1779 
1780 /* We translate the tc filter with CT action to the following HW model:
1781  *
1782  * +---------------------+
1783  * + ft prio (tc chain)  +
1784  * + original match      +
1785  * +---------------------+
1786  *      | set chain miss mapping
1787  *      | set fte_id
1788  *      | set tunnel_id
1789  *      | do decap
1790  *      v
1791  * +---------------------+
1792  * + pre_ct/pre_ct_nat   +  if matches     +-------------------------+
1793  * + zone+nat match      +---------------->+ post_act (see below)    +
1794  * +---------------------+  set zone       +-------------------------+
1795  *      | set zone
1796  *      v
1797  * +--------------------+
1798  * + CT (nat or no nat) +
1799  * + tuple + zone match +
1800  * +--------------------+
1801  *      | set mark
1802  *      | set labels_id
1803  *      | set established
1804  *	| set zone_restore
1805  *      | do nat (if needed)
1806  *      v
1807  * +--------------+
1808  * + post_act     + original filter actions
1809  * + fte_id match +------------------------>
1810  * +--------------+
1811  */
1812 static struct mlx5_flow_handle *
1813 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1814 			  struct mlx5_flow_spec *orig_spec,
1815 			  struct mlx5_flow_attr *attr)
1816 {
1817 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1818 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1819 	struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
1820 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1821 	struct mlx5_flow_attr *pre_ct_attr;
1822 	struct mlx5_modify_hdr *mod_hdr;
1823 	struct mlx5_ct_flow *ct_flow;
1824 	int chain_mapping = 0, err;
1825 	struct mlx5_ct_ft *ft;
1826 
1827 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1828 	if (!ct_flow) {
1829 		return ERR_PTR(-ENOMEM);
1830 	}
1831 
1832 	/* Register for CT established events */
1833 	ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1834 				  attr->ct_attr.nf_ft);
1835 	if (IS_ERR(ft)) {
1836 		err = PTR_ERR(ft);
1837 		ct_dbg("Failed to register to ft callback");
1838 		goto err_ft;
1839 	}
1840 	ct_flow->ft = ft;
1841 
1842 	/* Base flow attributes of both rules on original rule attribute */
1843 	ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1844 	if (!ct_flow->pre_ct_attr) {
1845 		err = -ENOMEM;
1846 		goto err_alloc_pre;
1847 	}
1848 
1849 	pre_ct_attr = ct_flow->pre_ct_attr;
1850 	memcpy(pre_ct_attr, attr, attr_sz);
1851 	pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
1852 
1853 	/* Modify the original rule's action to fwd and modify, leave decap */
1854 	pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1855 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1856 			       MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1857 
1858 	/* Write chain miss tag for miss in ct table as we
1859 	 * don't go though all prios of this chain as normal tc rules
1860 	 * miss.
1861 	 */
1862 	err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1863 					    &chain_mapping);
1864 	if (err) {
1865 		ct_dbg("Failed to get chain register mapping for chain");
1866 		goto err_get_chain;
1867 	}
1868 	ct_flow->chain_mapping = chain_mapping;
1869 
1870 	err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
1871 					CHAIN_TO_REG, chain_mapping);
1872 	if (err) {
1873 		ct_dbg("Failed to set chain register mapping");
1874 		goto err_mapping;
1875 	}
1876 
1877 	/* If original flow is decap, we do it before going into ct table
1878 	 * so add a rewrite for the tunnel match_id.
1879 	 */
1880 	if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1881 	    attr->chain == 0) {
1882 		err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
1883 						ct_priv->ns_type,
1884 						TUNNEL_TO_REG,
1885 						attr->tunnel_id);
1886 		if (err) {
1887 			ct_dbg("Failed to set tunnel register mapping");
1888 			goto err_mapping;
1889 		}
1890 	}
1891 
1892 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1893 					   pre_mod_acts->num_actions,
1894 					   pre_mod_acts->actions);
1895 	if (IS_ERR(mod_hdr)) {
1896 		err = PTR_ERR(mod_hdr);
1897 		ct_dbg("Failed to create pre ct mod hdr");
1898 		goto err_mapping;
1899 	}
1900 	pre_ct_attr->modify_hdr = mod_hdr;
1901 
1902 	/* Change original rule point to ct table */
1903 	pre_ct_attr->dest_chain = 0;
1904 	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1905 	ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1906 						   pre_ct_attr);
1907 	if (IS_ERR(ct_flow->pre_ct_rule)) {
1908 		err = PTR_ERR(ct_flow->pre_ct_rule);
1909 		ct_dbg("Failed to add pre ct rule");
1910 		goto err_insert_orig;
1911 	}
1912 
1913 	attr->ct_attr.ct_flow = ct_flow;
1914 	mlx5e_mod_hdr_dealloc(pre_mod_acts);
1915 
1916 	return ct_flow->pre_ct_rule;
1917 
1918 err_insert_orig:
1919 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1920 err_mapping:
1921 	mlx5e_mod_hdr_dealloc(pre_mod_acts);
1922 	mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1923 err_get_chain:
1924 	kfree(ct_flow->pre_ct_attr);
1925 err_alloc_pre:
1926 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1927 err_ft:
1928 	kfree(ct_flow);
1929 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1930 	return ERR_PTR(err);
1931 }
1932 
1933 struct mlx5_flow_handle *
1934 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1935 			struct mlx5_flow_spec *spec,
1936 			struct mlx5_flow_attr *attr,
1937 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1938 {
1939 	struct mlx5_flow_handle *rule;
1940 
1941 	if (!priv)
1942 		return ERR_PTR(-EOPNOTSUPP);
1943 
1944 	mutex_lock(&priv->control_lock);
1945 	rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
1946 	mutex_unlock(&priv->control_lock);
1947 
1948 	return rule;
1949 }
1950 
1951 static void
1952 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1953 			 struct mlx5_ct_flow *ct_flow,
1954 			 struct mlx5_flow_attr *attr)
1955 {
1956 	struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1957 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1958 
1959 	mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
1960 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1961 
1962 	mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1963 	mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1964 
1965 	kfree(ct_flow->pre_ct_attr);
1966 	kfree(ct_flow);
1967 }
1968 
1969 void
1970 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1971 		       struct mlx5_flow_attr *attr)
1972 {
1973 	struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1974 
1975 	/* We are called on error to clean up stuff from parsing
1976 	 * but we don't have anything for now
1977 	 */
1978 	if (!ct_flow)
1979 		return;
1980 
1981 	mutex_lock(&priv->control_lock);
1982 	__mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
1983 	mutex_unlock(&priv->control_lock);
1984 }
1985 
1986 static int
1987 mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
1988 {
1989 	struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1990 	struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
1991 	int err;
1992 
1993 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
1994 	    ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
1995 		ct_dbg("Using SMFS ct flow steering provider");
1996 		fs_ops = mlx5_ct_fs_smfs_ops_get();
1997 	}
1998 
1999 	ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
2000 	if (!ct_priv->fs)
2001 		return -ENOMEM;
2002 
2003 	ct_priv->fs->netdev = ct_priv->netdev;
2004 	ct_priv->fs->dev = ct_priv->dev;
2005 	ct_priv->fs_ops = fs_ops;
2006 
2007 	err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
2008 	if (err)
2009 		goto err_init;
2010 
2011 	return 0;
2012 
2013 err_init:
2014 	kfree(ct_priv->fs);
2015 	return err;
2016 }
2017 
2018 static int
2019 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2020 				  const char **err_msg)
2021 {
2022 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2023 		/* vlan workaround should be avoided for multi chain rules.
2024 		 * This is just a sanity check as pop vlan action should
2025 		 * be supported by any FW that supports ignore_flow_level
2026 		 */
2027 
2028 		*err_msg = "firmware vlan actions support is missing";
2029 		return -EOPNOTSUPP;
2030 	}
2031 
2032 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2033 				    fdb_modify_header_fwd_to_table)) {
2034 		/* CT always writes to registers which are mod header actions.
2035 		 * Therefore, mod header and goto is required
2036 		 */
2037 
2038 		*err_msg = "firmware fwd and modify support is missing";
2039 		return -EOPNOTSUPP;
2040 	}
2041 
2042 	if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2043 		*err_msg = "register loopback isn't supported";
2044 		return -EOPNOTSUPP;
2045 	}
2046 
2047 	return 0;
2048 }
2049 
2050 static int
2051 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2052 			      enum mlx5_flow_namespace_type ns_type,
2053 			      struct mlx5e_post_act *post_act)
2054 {
2055 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2056 	const char *err_msg = NULL;
2057 	int err = 0;
2058 
2059 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
2060 	/* cannot restore chain ID on HW miss */
2061 
2062 	err_msg = "tc skb extension missing";
2063 	err = -EOPNOTSUPP;
2064 	goto out_err;
2065 #endif
2066 	if (IS_ERR_OR_NULL(post_act)) {
2067 		/* Ignore_flow_level support isn't supported by default for VFs and so post_act
2068 		 * won't be supported. Skip showing error msg.
2069 		 */
2070 		if (priv->mdev->coredev_type != MLX5_COREDEV_VF)
2071 			err_msg = "post action is missing";
2072 		err = -EOPNOTSUPP;
2073 		goto out_err;
2074 	}
2075 
2076 	if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2077 		err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg);
2078 
2079 out_err:
2080 	if (err && err_msg)
2081 		netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
2082 	return err;
2083 }
2084 
2085 static void
2086 mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2087 {
2088 	bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB;
2089 	struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
2090 	char dirname[16] = {};
2091 
2092 	if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0)
2093 		return;
2094 
2095 	ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev));
2096 	debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
2097 				&ct_dbgfs->stats.offloaded);
2098 	debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
2099 				&ct_dbgfs->stats.rx_dropped);
2100 }
2101 
2102 static void
2103 mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2104 {
2105 	debugfs_remove_recursive(ct_priv->debugfs.root);
2106 }
2107 
2108 #define INIT_ERR_PREFIX "tc ct offload init failed"
2109 
2110 struct mlx5_tc_ct_priv *
2111 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2112 		struct mod_hdr_tbl *mod_hdr,
2113 		enum mlx5_flow_namespace_type ns_type,
2114 		struct mlx5e_post_act *post_act)
2115 {
2116 	struct mlx5_tc_ct_priv *ct_priv;
2117 	struct mlx5_core_dev *dev;
2118 	u64 mapping_id;
2119 	int err;
2120 
2121 	dev = priv->mdev;
2122 	err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
2123 	if (err)
2124 		goto err_support;
2125 
2126 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2127 	if (!ct_priv)
2128 		goto err_alloc;
2129 
2130 	mapping_id = mlx5_query_nic_system_image_guid(dev);
2131 
2132 	ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
2133 						      sizeof(u16), 0, true);
2134 	if (IS_ERR(ct_priv->zone_mapping)) {
2135 		err = PTR_ERR(ct_priv->zone_mapping);
2136 		goto err_mapping_zone;
2137 	}
2138 
2139 	ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
2140 							sizeof(u32) * 4, 0, true);
2141 	if (IS_ERR(ct_priv->labels_mapping)) {
2142 		err = PTR_ERR(ct_priv->labels_mapping);
2143 		goto err_mapping_labels;
2144 	}
2145 
2146 	spin_lock_init(&ct_priv->ht_lock);
2147 	ct_priv->ns_type = ns_type;
2148 	ct_priv->chains = chains;
2149 	ct_priv->netdev = priv->netdev;
2150 	ct_priv->dev = priv->mdev;
2151 	ct_priv->mod_hdr_tbl = mod_hdr;
2152 	ct_priv->ct = mlx5_chains_create_global_table(chains);
2153 	if (IS_ERR(ct_priv->ct)) {
2154 		err = PTR_ERR(ct_priv->ct);
2155 		mlx5_core_warn(dev,
2156 			       "%s, failed to create ct table err: %d\n",
2157 			       INIT_ERR_PREFIX, err);
2158 		goto err_ct_tbl;
2159 	}
2160 
2161 	ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2162 	if (IS_ERR(ct_priv->ct_nat)) {
2163 		err = PTR_ERR(ct_priv->ct_nat);
2164 		mlx5_core_warn(dev,
2165 			       "%s, failed to create ct nat table err: %d\n",
2166 			       INIT_ERR_PREFIX, err);
2167 		goto err_ct_nat_tbl;
2168 	}
2169 
2170 	ct_priv->post_act = post_act;
2171 	mutex_init(&ct_priv->control_lock);
2172 	if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
2173 		goto err_ct_zone_ht;
2174 	if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params))
2175 		goto err_ct_tuples_ht;
2176 	if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
2177 		goto err_ct_tuples_nat_ht;
2178 
2179 	err = mlx5_tc_ct_fs_init(ct_priv);
2180 	if (err)
2181 		goto err_init_fs;
2182 
2183 	mlx5_ct_tc_create_dbgfs(ct_priv);
2184 	return ct_priv;
2185 
2186 err_init_fs:
2187 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2188 err_ct_tuples_nat_ht:
2189 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2190 err_ct_tuples_ht:
2191 	rhashtable_destroy(&ct_priv->zone_ht);
2192 err_ct_zone_ht:
2193 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2194 err_ct_nat_tbl:
2195 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2196 err_ct_tbl:
2197 	mapping_destroy(ct_priv->labels_mapping);
2198 err_mapping_labels:
2199 	mapping_destroy(ct_priv->zone_mapping);
2200 err_mapping_zone:
2201 	kfree(ct_priv);
2202 err_alloc:
2203 err_support:
2204 
2205 	return NULL;
2206 }
2207 
2208 void
2209 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2210 {
2211 	struct mlx5_fs_chains *chains;
2212 
2213 	if (!ct_priv)
2214 		return;
2215 
2216 	mlx5_ct_tc_remove_dbgfs(ct_priv);
2217 	chains = ct_priv->chains;
2218 
2219 	ct_priv->fs_ops->destroy(ct_priv->fs);
2220 	kfree(ct_priv->fs);
2221 
2222 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2223 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2224 	mapping_destroy(ct_priv->zone_mapping);
2225 	mapping_destroy(ct_priv->labels_mapping);
2226 
2227 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2228 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2229 	rhashtable_destroy(&ct_priv->zone_ht);
2230 	mutex_destroy(&ct_priv->control_lock);
2231 	kfree(ct_priv);
2232 }
2233 
2234 bool
2235 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2236 			 struct sk_buff *skb, u8 zone_restore_id)
2237 {
2238 	struct mlx5_ct_tuple tuple = {};
2239 	struct mlx5_ct_entry *entry;
2240 	u16 zone;
2241 
2242 	if (!ct_priv || !zone_restore_id)
2243 		return true;
2244 
2245 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2246 		goto out_inc_drop;
2247 
2248 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2249 		goto out_inc_drop;
2250 
2251 	spin_lock(&ct_priv->ht_lock);
2252 
2253 	entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2254 	if (!entry) {
2255 		spin_unlock(&ct_priv->ht_lock);
2256 		goto out_inc_drop;
2257 	}
2258 
2259 	if (IS_ERR(entry)) {
2260 		spin_unlock(&ct_priv->ht_lock);
2261 		goto out_inc_drop;
2262 	}
2263 	spin_unlock(&ct_priv->ht_lock);
2264 
2265 	tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2266 	__mlx5_tc_ct_entry_put(entry);
2267 
2268 	return true;
2269 
2270 out_inc_drop:
2271 	atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
2272 	return false;
2273 }
2274