1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17 
18 #include "lib/fs_chains.h"
19 #include "en/tc_ct.h"
20 #include "en/mod_hdr.h"
21 #include "en/mapping.h"
22 #include "en.h"
23 #include "en_tc.h"
24 #include "en_rep.h"
25 
26 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
27 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
28 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
29 #define MLX5_CT_STATE_TRK_BIT BIT(2)
30 #define MLX5_CT_STATE_NAT_BIT BIT(3)
31 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
32 
33 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
34 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
35 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
36 
37 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
38 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
39 
40 #define ct_dbg(fmt, args...)\
41 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
42 
43 struct mlx5_tc_ct_priv {
44 	struct mlx5_core_dev *dev;
45 	const struct net_device *netdev;
46 	struct mod_hdr_tbl *mod_hdr_tbl;
47 	struct idr fte_ids;
48 	struct xarray tuple_ids;
49 	struct rhashtable zone_ht;
50 	struct rhashtable ct_tuples_ht;
51 	struct rhashtable ct_tuples_nat_ht;
52 	struct mlx5_flow_table *ct;
53 	struct mlx5_flow_table *ct_nat;
54 	struct mlx5_flow_table *post_ct;
55 	struct mutex control_lock; /* guards parallel adds/dels */
56 	struct mapping_ctx *zone_mapping;
57 	struct mapping_ctx *labels_mapping;
58 	enum mlx5_flow_namespace_type ns_type;
59 	struct mlx5_fs_chains *chains;
60 	spinlock_t ht_lock; /* protects ft entries */
61 };
62 
63 struct mlx5_ct_flow {
64 	struct mlx5_flow_attr *pre_ct_attr;
65 	struct mlx5_flow_attr *post_ct_attr;
66 	struct mlx5_flow_handle *pre_ct_rule;
67 	struct mlx5_flow_handle *post_ct_rule;
68 	struct mlx5_ct_ft *ft;
69 	u32 fte_id;
70 	u32 chain_mapping;
71 };
72 
73 struct mlx5_ct_zone_rule {
74 	struct mlx5_flow_handle *rule;
75 	struct mlx5e_mod_hdr_handle *mh;
76 	struct mlx5_flow_attr *attr;
77 	bool nat;
78 };
79 
80 struct mlx5_tc_ct_pre {
81 	struct mlx5_flow_table *ft;
82 	struct mlx5_flow_group *flow_grp;
83 	struct mlx5_flow_group *miss_grp;
84 	struct mlx5_flow_handle *flow_rule;
85 	struct mlx5_flow_handle *miss_rule;
86 	struct mlx5_modify_hdr *modify_hdr;
87 };
88 
89 struct mlx5_ct_ft {
90 	struct rhash_head node;
91 	u16 zone;
92 	u32 zone_restore_id;
93 	refcount_t refcount;
94 	struct nf_flowtable *nf_ft;
95 	struct mlx5_tc_ct_priv *ct_priv;
96 	struct rhashtable ct_entries_ht;
97 	struct mlx5_tc_ct_pre pre_ct;
98 	struct mlx5_tc_ct_pre pre_ct_nat;
99 };
100 
101 struct mlx5_ct_tuple {
102 	u16 addr_type;
103 	__be16 n_proto;
104 	u8 ip_proto;
105 	struct {
106 		union {
107 			__be32 src_v4;
108 			struct in6_addr src_v6;
109 		};
110 		union {
111 			__be32 dst_v4;
112 			struct in6_addr dst_v6;
113 		};
114 	} ip;
115 	struct {
116 		__be16 src;
117 		__be16 dst;
118 	} port;
119 
120 	u16 zone;
121 };
122 
123 struct mlx5_ct_counter {
124 	struct mlx5_fc *counter;
125 	refcount_t refcount;
126 	bool is_shared;
127 };
128 
129 enum {
130 	MLX5_CT_ENTRY_FLAG_VALID,
131 };
132 
133 struct mlx5_ct_entry {
134 	struct rhash_head node;
135 	struct rhash_head tuple_node;
136 	struct rhash_head tuple_nat_node;
137 	struct mlx5_ct_counter *counter;
138 	unsigned long cookie;
139 	unsigned long restore_cookie;
140 	struct mlx5_ct_tuple tuple;
141 	struct mlx5_ct_tuple tuple_nat;
142 	struct mlx5_ct_zone_rule zone_rules[2];
143 
144 	struct mlx5_tc_ct_priv *ct_priv;
145 	struct work_struct work;
146 
147 	refcount_t refcnt;
148 	unsigned long flags;
149 };
150 
151 static const struct rhashtable_params cts_ht_params = {
152 	.head_offset = offsetof(struct mlx5_ct_entry, node),
153 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
154 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
155 	.automatic_shrinking = true,
156 	.min_size = 16 * 1024,
157 };
158 
159 static const struct rhashtable_params zone_params = {
160 	.head_offset = offsetof(struct mlx5_ct_ft, node),
161 	.key_offset = offsetof(struct mlx5_ct_ft, zone),
162 	.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
163 	.automatic_shrinking = true,
164 };
165 
166 static const struct rhashtable_params tuples_ht_params = {
167 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
168 	.key_offset = offsetof(struct mlx5_ct_entry, tuple),
169 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
170 	.automatic_shrinking = true,
171 	.min_size = 16 * 1024,
172 };
173 
174 static const struct rhashtable_params tuples_nat_ht_params = {
175 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
176 	.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
177 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
178 	.automatic_shrinking = true,
179 	.min_size = 16 * 1024,
180 };
181 
182 static bool
183 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
184 {
185 	return !!(entry->tuple_nat_node.next);
186 }
187 
188 static int
189 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
190 {
191 	struct flow_match_control control;
192 	struct flow_match_basic basic;
193 
194 	flow_rule_match_basic(rule, &basic);
195 	flow_rule_match_control(rule, &control);
196 
197 	tuple->n_proto = basic.key->n_proto;
198 	tuple->ip_proto = basic.key->ip_proto;
199 	tuple->addr_type = control.key->addr_type;
200 
201 	if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
202 		struct flow_match_ipv4_addrs match;
203 
204 		flow_rule_match_ipv4_addrs(rule, &match);
205 		tuple->ip.src_v4 = match.key->src;
206 		tuple->ip.dst_v4 = match.key->dst;
207 	} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
208 		struct flow_match_ipv6_addrs match;
209 
210 		flow_rule_match_ipv6_addrs(rule, &match);
211 		tuple->ip.src_v6 = match.key->src;
212 		tuple->ip.dst_v6 = match.key->dst;
213 	} else {
214 		return -EOPNOTSUPP;
215 	}
216 
217 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
218 		struct flow_match_ports match;
219 
220 		flow_rule_match_ports(rule, &match);
221 		switch (tuple->ip_proto) {
222 		case IPPROTO_TCP:
223 		case IPPROTO_UDP:
224 			tuple->port.src = match.key->src;
225 			tuple->port.dst = match.key->dst;
226 			break;
227 		default:
228 			return -EOPNOTSUPP;
229 		}
230 	} else {
231 		return -EOPNOTSUPP;
232 	}
233 
234 	return 0;
235 }
236 
237 static int
238 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
239 			     struct flow_rule *rule)
240 {
241 	struct flow_action *flow_action = &rule->action;
242 	struct flow_action_entry *act;
243 	u32 offset, val, ip6_offset;
244 	int i;
245 
246 	flow_action_for_each(i, act, flow_action) {
247 		if (act->id != FLOW_ACTION_MANGLE)
248 			continue;
249 
250 		offset = act->mangle.offset;
251 		val = act->mangle.val;
252 		switch (act->mangle.htype) {
253 		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
254 			if (offset == offsetof(struct iphdr, saddr))
255 				tuple->ip.src_v4 = cpu_to_be32(val);
256 			else if (offset == offsetof(struct iphdr, daddr))
257 				tuple->ip.dst_v4 = cpu_to_be32(val);
258 			else
259 				return -EOPNOTSUPP;
260 			break;
261 
262 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
263 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
264 			ip6_offset /= 4;
265 			if (ip6_offset < 4)
266 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
267 			else if (ip6_offset < 8)
268 				tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
269 			else
270 				return -EOPNOTSUPP;
271 			break;
272 
273 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
274 			if (offset == offsetof(struct tcphdr, source))
275 				tuple->port.src = cpu_to_be16(val);
276 			else if (offset == offsetof(struct tcphdr, dest))
277 				tuple->port.dst = cpu_to_be16(val);
278 			else
279 				return -EOPNOTSUPP;
280 			break;
281 
282 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
283 			if (offset == offsetof(struct udphdr, source))
284 				tuple->port.src = cpu_to_be16(val);
285 			else if (offset == offsetof(struct udphdr, dest))
286 				tuple->port.dst = cpu_to_be16(val);
287 			else
288 				return -EOPNOTSUPP;
289 			break;
290 
291 		default:
292 			return -EOPNOTSUPP;
293 		}
294 	}
295 
296 	return 0;
297 }
298 
299 static int
300 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
301 			   struct flow_rule *rule)
302 {
303 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
304 				       outer_headers);
305 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
306 				       outer_headers);
307 	u16 addr_type = 0;
308 	u8 ip_proto = 0;
309 
310 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
311 		struct flow_match_basic match;
312 
313 		flow_rule_match_basic(rule, &match);
314 
315 		mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
316 				       headers_v);
317 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
318 			 match.mask->ip_proto);
319 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
320 			 match.key->ip_proto);
321 
322 		ip_proto = match.key->ip_proto;
323 	}
324 
325 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
326 		struct flow_match_control match;
327 
328 		flow_rule_match_control(rule, &match);
329 		addr_type = match.key->addr_type;
330 	}
331 
332 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
333 		struct flow_match_ipv4_addrs match;
334 
335 		flow_rule_match_ipv4_addrs(rule, &match);
336 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
337 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
338 		       &match.mask->src, sizeof(match.mask->src));
339 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
340 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
341 		       &match.key->src, sizeof(match.key->src));
342 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
343 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
344 		       &match.mask->dst, sizeof(match.mask->dst));
345 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
346 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
347 		       &match.key->dst, sizeof(match.key->dst));
348 	}
349 
350 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
351 		struct flow_match_ipv6_addrs match;
352 
353 		flow_rule_match_ipv6_addrs(rule, &match);
354 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
355 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
356 		       &match.mask->src, sizeof(match.mask->src));
357 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
358 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
359 		       &match.key->src, sizeof(match.key->src));
360 
361 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
362 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
363 		       &match.mask->dst, sizeof(match.mask->dst));
364 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
365 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
366 		       &match.key->dst, sizeof(match.key->dst));
367 	}
368 
369 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
370 		struct flow_match_ports match;
371 
372 		flow_rule_match_ports(rule, &match);
373 		switch (ip_proto) {
374 		case IPPROTO_TCP:
375 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
376 				 tcp_sport, ntohs(match.mask->src));
377 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
378 				 tcp_sport, ntohs(match.key->src));
379 
380 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
381 				 tcp_dport, ntohs(match.mask->dst));
382 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
383 				 tcp_dport, ntohs(match.key->dst));
384 			break;
385 
386 		case IPPROTO_UDP:
387 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
388 				 udp_sport, ntohs(match.mask->src));
389 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
390 				 udp_sport, ntohs(match.key->src));
391 
392 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
393 				 udp_dport, ntohs(match.mask->dst));
394 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
395 				 udp_dport, ntohs(match.key->dst));
396 			break;
397 		default:
398 			break;
399 		}
400 	}
401 
402 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
403 		struct flow_match_tcp match;
404 
405 		flow_rule_match_tcp(rule, &match);
406 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
407 			 ntohs(match.mask->flags));
408 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
409 			 ntohs(match.key->flags));
410 	}
411 
412 	return 0;
413 }
414 
415 static void
416 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
417 {
418 	if (entry->counter->is_shared &&
419 	    !refcount_dec_and_test(&entry->counter->refcount))
420 		return;
421 
422 	mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
423 	kfree(entry->counter);
424 }
425 
426 static void
427 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
428 			  struct mlx5_ct_entry *entry,
429 			  bool nat)
430 {
431 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
432 	struct mlx5_flow_attr *attr = zone_rule->attr;
433 
434 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
435 
436 	mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
437 	mlx5e_mod_hdr_detach(ct_priv->dev,
438 			     ct_priv->mod_hdr_tbl, zone_rule->mh);
439 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
440 	kfree(attr);
441 }
442 
443 static void
444 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
445 			   struct mlx5_ct_entry *entry)
446 {
447 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
448 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
449 }
450 
451 static struct flow_action_entry *
452 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
453 {
454 	struct flow_action *flow_action = &flow_rule->action;
455 	struct flow_action_entry *act;
456 	int i;
457 
458 	flow_action_for_each(i, act, flow_action) {
459 		if (act->id == FLOW_ACTION_CT_METADATA)
460 			return act;
461 	}
462 
463 	return NULL;
464 }
465 
466 static int
467 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
468 			       struct mlx5e_tc_mod_hdr_acts *mod_acts,
469 			       u8 ct_state,
470 			       u32 mark,
471 			       u32 labels_id,
472 			       u8 zone_restore_id)
473 {
474 	enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
475 	struct mlx5_core_dev *dev = ct_priv->dev;
476 	int err;
477 
478 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
479 					CTSTATE_TO_REG, ct_state);
480 	if (err)
481 		return err;
482 
483 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
484 					MARK_TO_REG, mark);
485 	if (err)
486 		return err;
487 
488 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
489 					LABELS_TO_REG, labels_id);
490 	if (err)
491 		return err;
492 
493 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
494 					ZONE_RESTORE_TO_REG, zone_restore_id);
495 	if (err)
496 		return err;
497 
498 	/* Make another copy of zone id in reg_b for
499 	 * NIC rx flows since we don't copy reg_c1 to
500 	 * reg_b upon miss.
501 	 */
502 	if (ns != MLX5_FLOW_NAMESPACE_FDB) {
503 		err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
504 						NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
505 		if (err)
506 			return err;
507 	}
508 	return 0;
509 }
510 
511 static int
512 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
513 				   char *modact)
514 {
515 	u32 offset = act->mangle.offset, field;
516 
517 	switch (act->mangle.htype) {
518 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
519 		MLX5_SET(set_action_in, modact, length, 0);
520 		if (offset == offsetof(struct iphdr, saddr))
521 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
522 		else if (offset == offsetof(struct iphdr, daddr))
523 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
524 		else
525 			return -EOPNOTSUPP;
526 		break;
527 
528 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
529 		MLX5_SET(set_action_in, modact, length, 0);
530 		if (offset == offsetof(struct ipv6hdr, saddr) + 12)
531 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
532 		else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
533 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
534 		else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
535 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
536 		else if (offset == offsetof(struct ipv6hdr, saddr))
537 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
538 		else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
539 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
540 		else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
541 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
542 		else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
543 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
544 		else if (offset == offsetof(struct ipv6hdr, daddr))
545 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
546 		else
547 			return -EOPNOTSUPP;
548 		break;
549 
550 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
551 		MLX5_SET(set_action_in, modact, length, 16);
552 		if (offset == offsetof(struct tcphdr, source))
553 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
554 		else if (offset == offsetof(struct tcphdr, dest))
555 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
556 		else
557 			return -EOPNOTSUPP;
558 		break;
559 
560 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
561 		MLX5_SET(set_action_in, modact, length, 16);
562 		if (offset == offsetof(struct udphdr, source))
563 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
564 		else if (offset == offsetof(struct udphdr, dest))
565 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
566 		else
567 			return -EOPNOTSUPP;
568 		break;
569 
570 	default:
571 		return -EOPNOTSUPP;
572 	}
573 
574 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
575 	MLX5_SET(set_action_in, modact, offset, 0);
576 	MLX5_SET(set_action_in, modact, field, field);
577 	MLX5_SET(set_action_in, modact, data, act->mangle.val);
578 
579 	return 0;
580 }
581 
582 static int
583 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
584 			    struct flow_rule *flow_rule,
585 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
586 {
587 	struct flow_action *flow_action = &flow_rule->action;
588 	struct mlx5_core_dev *mdev = ct_priv->dev;
589 	struct flow_action_entry *act;
590 	size_t action_size;
591 	char *modact;
592 	int err, i;
593 
594 	action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
595 
596 	flow_action_for_each(i, act, flow_action) {
597 		switch (act->id) {
598 		case FLOW_ACTION_MANGLE: {
599 			err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
600 						    mod_acts);
601 			if (err)
602 				return err;
603 
604 			modact = mod_acts->actions +
605 				 mod_acts->num_actions * action_size;
606 
607 			err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
608 			if (err)
609 				return err;
610 
611 			mod_acts->num_actions++;
612 		}
613 		break;
614 
615 		case FLOW_ACTION_CT_METADATA:
616 			/* Handled earlier */
617 			continue;
618 		default:
619 			return -EOPNOTSUPP;
620 		}
621 	}
622 
623 	return 0;
624 }
625 
626 static int
627 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
628 				struct mlx5_flow_attr *attr,
629 				struct flow_rule *flow_rule,
630 				struct mlx5e_mod_hdr_handle **mh,
631 				u8 zone_restore_id, bool nat)
632 {
633 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
634 	struct flow_action_entry *meta;
635 	u16 ct_state = 0;
636 	int err;
637 
638 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
639 	if (!meta)
640 		return -EOPNOTSUPP;
641 
642 	err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
643 			  &attr->ct_attr.ct_labels_id);
644 	if (err)
645 		return -EOPNOTSUPP;
646 	if (nat) {
647 		err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
648 						  &mod_acts);
649 		if (err)
650 			goto err_mapping;
651 
652 		ct_state |= MLX5_CT_STATE_NAT_BIT;
653 	}
654 
655 	ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
656 	ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
657 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
658 					     ct_state,
659 					     meta->ct_metadata.mark,
660 					     attr->ct_attr.ct_labels_id,
661 					     zone_restore_id);
662 	if (err)
663 		goto err_mapping;
664 
665 	*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
666 				   ct_priv->mod_hdr_tbl,
667 				   ct_priv->ns_type,
668 				   &mod_acts);
669 	if (IS_ERR(*mh)) {
670 		err = PTR_ERR(*mh);
671 		goto err_mapping;
672 	}
673 	attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
674 
675 	dealloc_mod_hdr_actions(&mod_acts);
676 	return 0;
677 
678 err_mapping:
679 	dealloc_mod_hdr_actions(&mod_acts);
680 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
681 	return err;
682 }
683 
684 static int
685 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
686 			  struct flow_rule *flow_rule,
687 			  struct mlx5_ct_entry *entry,
688 			  bool nat, u8 zone_restore_id)
689 {
690 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
691 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
692 	struct mlx5_flow_spec *spec = NULL;
693 	struct mlx5_flow_attr *attr;
694 	int err;
695 
696 	zone_rule->nat = nat;
697 
698 	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
699 	if (!spec)
700 		return -ENOMEM;
701 
702 	attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
703 	if (!attr) {
704 		err = -ENOMEM;
705 		goto err_attr;
706 	}
707 
708 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
709 					      &zone_rule->mh,
710 					      zone_restore_id, nat);
711 	if (err) {
712 		ct_dbg("Failed to create ct entry mod hdr");
713 		goto err_mod_hdr;
714 	}
715 
716 	attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
717 		       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
718 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
719 	attr->dest_chain = 0;
720 	attr->dest_ft = ct_priv->post_ct;
721 	attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
722 	attr->outer_match_level = MLX5_MATCH_L4;
723 	attr->counter = entry->counter->counter;
724 	attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
725 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
726 		attr->esw_attr->in_mdev = priv->mdev;
727 
728 	mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
729 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
730 
731 	zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
732 	if (IS_ERR(zone_rule->rule)) {
733 		err = PTR_ERR(zone_rule->rule);
734 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
735 		goto err_rule;
736 	}
737 
738 	zone_rule->attr = attr;
739 
740 	kfree(spec);
741 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
742 
743 	return 0;
744 
745 err_rule:
746 	mlx5e_mod_hdr_detach(ct_priv->dev,
747 			     ct_priv->mod_hdr_tbl, zone_rule->mh);
748 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
749 err_mod_hdr:
750 	kfree(attr);
751 err_attr:
752 	kfree(spec);
753 	return err;
754 }
755 
756 static bool
757 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
758 {
759 	return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
760 }
761 
762 static struct mlx5_ct_entry *
763 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
764 {
765 	struct mlx5_ct_entry *entry;
766 
767 	entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
768 				       tuples_ht_params);
769 	if (entry && mlx5_tc_ct_entry_valid(entry) &&
770 	    refcount_inc_not_zero(&entry->refcnt)) {
771 		return entry;
772 	} else if (!entry) {
773 		entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
774 					       tuple, tuples_nat_ht_params);
775 		if (entry && mlx5_tc_ct_entry_valid(entry) &&
776 		    refcount_inc_not_zero(&entry->refcnt))
777 			return entry;
778 	}
779 
780 	return entry ? ERR_PTR(-EINVAL) : NULL;
781 }
782 
783 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
784 {
785 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
786 
787 	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
788 			       &entry->tuple_nat_node,
789 			       tuples_nat_ht_params);
790 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
791 			       tuples_ht_params);
792 }
793 
794 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
795 {
796 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
797 
798 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
799 
800 	spin_lock_bh(&ct_priv->ht_lock);
801 	mlx5_tc_ct_entry_remove_from_tuples(entry);
802 	spin_unlock_bh(&ct_priv->ht_lock);
803 
804 	mlx5_tc_ct_counter_put(ct_priv, entry);
805 	kfree(entry);
806 }
807 
808 static void
809 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
810 {
811 	if (!refcount_dec_and_test(&entry->refcnt))
812 		return;
813 
814 	mlx5_tc_ct_entry_del(entry);
815 }
816 
817 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
818 {
819 	struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
820 
821 	mlx5_tc_ct_entry_del(entry);
822 }
823 
824 static void
825 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
826 {
827 	struct mlx5e_priv *priv;
828 
829 	if (!refcount_dec_and_test(&entry->refcnt))
830 		return;
831 
832 	priv = netdev_priv(entry->ct_priv->netdev);
833 	INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
834 	queue_work(priv->wq, &entry->work);
835 }
836 
837 static struct mlx5_ct_counter *
838 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
839 {
840 	struct mlx5_ct_counter *counter;
841 	int ret;
842 
843 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
844 	if (!counter)
845 		return ERR_PTR(-ENOMEM);
846 
847 	counter->is_shared = false;
848 	counter->counter = mlx5_fc_create(ct_priv->dev, true);
849 	if (IS_ERR(counter->counter)) {
850 		ct_dbg("Failed to create counter for ct entry");
851 		ret = PTR_ERR(counter->counter);
852 		kfree(counter);
853 		return ERR_PTR(ret);
854 	}
855 
856 	return counter;
857 }
858 
859 static struct mlx5_ct_counter *
860 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
861 			      struct mlx5_ct_entry *entry)
862 {
863 	struct mlx5_ct_tuple rev_tuple = entry->tuple;
864 	struct mlx5_ct_counter *shared_counter;
865 	struct mlx5_ct_entry *rev_entry;
866 	__be16 tmp_port;
867 
868 	/* get the reversed tuple */
869 	tmp_port = rev_tuple.port.src;
870 	rev_tuple.port.src = rev_tuple.port.dst;
871 	rev_tuple.port.dst = tmp_port;
872 
873 	if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
874 		__be32 tmp_addr = rev_tuple.ip.src_v4;
875 
876 		rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
877 		rev_tuple.ip.dst_v4 = tmp_addr;
878 	} else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
879 		struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
880 
881 		rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
882 		rev_tuple.ip.dst_v6 = tmp_addr;
883 	} else {
884 		return ERR_PTR(-EOPNOTSUPP);
885 	}
886 
887 	/* Use the same counter as the reverse direction */
888 	spin_lock_bh(&ct_priv->ht_lock);
889 	rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
890 
891 	if (IS_ERR(rev_entry)) {
892 		spin_unlock_bh(&ct_priv->ht_lock);
893 		goto create_counter;
894 	}
895 
896 	if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
897 		ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry);
898 		shared_counter = rev_entry->counter;
899 		spin_unlock_bh(&ct_priv->ht_lock);
900 
901 		mlx5_tc_ct_entry_put(rev_entry);
902 		return shared_counter;
903 	}
904 
905 	spin_unlock_bh(&ct_priv->ht_lock);
906 
907 create_counter:
908 
909 	shared_counter = mlx5_tc_ct_counter_create(ct_priv);
910 	if (IS_ERR(shared_counter))
911 		return shared_counter;
912 
913 	shared_counter->is_shared = true;
914 	refcount_set(&shared_counter->refcount, 1);
915 	return shared_counter;
916 }
917 
918 static int
919 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
920 			   struct flow_rule *flow_rule,
921 			   struct mlx5_ct_entry *entry,
922 			   u8 zone_restore_id)
923 {
924 	int err;
925 
926 	if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
927 		entry->counter = mlx5_tc_ct_counter_create(ct_priv);
928 	else
929 		entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
930 
931 	if (IS_ERR(entry->counter)) {
932 		err = PTR_ERR(entry->counter);
933 		return err;
934 	}
935 
936 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
937 					zone_restore_id);
938 	if (err)
939 		goto err_orig;
940 
941 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
942 					zone_restore_id);
943 	if (err)
944 		goto err_nat;
945 
946 	return 0;
947 
948 err_nat:
949 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
950 err_orig:
951 	mlx5_tc_ct_counter_put(ct_priv, entry);
952 	return err;
953 }
954 
955 static int
956 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
957 				  struct flow_cls_offload *flow)
958 {
959 	struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
960 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
961 	struct flow_action_entry *meta_action;
962 	unsigned long cookie = flow->cookie;
963 	struct mlx5_ct_entry *entry;
964 	int err;
965 
966 	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
967 	if (!meta_action)
968 		return -EOPNOTSUPP;
969 
970 	spin_lock_bh(&ct_priv->ht_lock);
971 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
972 	if (entry && refcount_inc_not_zero(&entry->refcnt)) {
973 		spin_unlock_bh(&ct_priv->ht_lock);
974 		mlx5_tc_ct_entry_put(entry);
975 		return -EEXIST;
976 	}
977 	spin_unlock_bh(&ct_priv->ht_lock);
978 
979 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
980 	if (!entry)
981 		return -ENOMEM;
982 
983 	entry->tuple.zone = ft->zone;
984 	entry->cookie = flow->cookie;
985 	entry->restore_cookie = meta_action->ct_metadata.cookie;
986 	refcount_set(&entry->refcnt, 2);
987 	entry->ct_priv = ct_priv;
988 
989 	err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
990 	if (err)
991 		goto err_set;
992 
993 	memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
994 	err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
995 	if (err)
996 		goto err_set;
997 
998 	spin_lock_bh(&ct_priv->ht_lock);
999 
1000 	err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1001 					    cts_ht_params);
1002 	if (err)
1003 		goto err_entries;
1004 
1005 	err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1006 					    &entry->tuple_node,
1007 					    tuples_ht_params);
1008 	if (err)
1009 		goto err_tuple;
1010 
1011 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1012 		err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1013 						    &entry->tuple_nat_node,
1014 						    tuples_nat_ht_params);
1015 		if (err)
1016 			goto err_tuple_nat;
1017 	}
1018 	spin_unlock_bh(&ct_priv->ht_lock);
1019 
1020 	err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1021 					 ft->zone_restore_id);
1022 	if (err)
1023 		goto err_rules;
1024 
1025 	set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1026 	mlx5_tc_ct_entry_put(entry); /* this function reference */
1027 
1028 	return 0;
1029 
1030 err_rules:
1031 	spin_lock_bh(&ct_priv->ht_lock);
1032 	if (mlx5_tc_ct_entry_has_nat(entry))
1033 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1034 				       &entry->tuple_nat_node, tuples_nat_ht_params);
1035 err_tuple_nat:
1036 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1037 			       &entry->tuple_node,
1038 			       tuples_ht_params);
1039 err_tuple:
1040 	rhashtable_remove_fast(&ft->ct_entries_ht,
1041 			       &entry->node,
1042 			       cts_ht_params);
1043 err_entries:
1044 	spin_unlock_bh(&ct_priv->ht_lock);
1045 err_set:
1046 	kfree(entry);
1047 	if (err != -EEXIST)
1048 		netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1049 	return err;
1050 }
1051 
1052 static int
1053 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1054 				  struct flow_cls_offload *flow)
1055 {
1056 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1057 	unsigned long cookie = flow->cookie;
1058 	struct mlx5_ct_entry *entry;
1059 
1060 	spin_lock_bh(&ct_priv->ht_lock);
1061 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1062 	if (!entry) {
1063 		spin_unlock_bh(&ct_priv->ht_lock);
1064 		return -ENOENT;
1065 	}
1066 
1067 	if (!mlx5_tc_ct_entry_valid(entry)) {
1068 		spin_unlock_bh(&ct_priv->ht_lock);
1069 		return -EINVAL;
1070 	}
1071 
1072 	rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1073 	mlx5_tc_ct_entry_remove_from_tuples(entry);
1074 	spin_unlock_bh(&ct_priv->ht_lock);
1075 
1076 	mlx5_tc_ct_entry_put(entry);
1077 
1078 	return 0;
1079 }
1080 
1081 static int
1082 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1083 				    struct flow_cls_offload *f)
1084 {
1085 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1086 	unsigned long cookie = f->cookie;
1087 	struct mlx5_ct_entry *entry;
1088 	u64 lastuse, packets, bytes;
1089 
1090 	spin_lock_bh(&ct_priv->ht_lock);
1091 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1092 	if (!entry) {
1093 		spin_unlock_bh(&ct_priv->ht_lock);
1094 		return -ENOENT;
1095 	}
1096 
1097 	if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1098 		spin_unlock_bh(&ct_priv->ht_lock);
1099 		return -EINVAL;
1100 	}
1101 
1102 	spin_unlock_bh(&ct_priv->ht_lock);
1103 
1104 	mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1105 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1106 			  FLOW_ACTION_HW_STATS_DELAYED);
1107 
1108 	mlx5_tc_ct_entry_put(entry);
1109 	return 0;
1110 }
1111 
1112 static int
1113 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1114 			      void *cb_priv)
1115 {
1116 	struct flow_cls_offload *f = type_data;
1117 	struct mlx5_ct_ft *ft = cb_priv;
1118 
1119 	if (type != TC_SETUP_CLSFLOWER)
1120 		return -EOPNOTSUPP;
1121 
1122 	switch (f->command) {
1123 	case FLOW_CLS_REPLACE:
1124 		return mlx5_tc_ct_block_flow_offload_add(ft, f);
1125 	case FLOW_CLS_DESTROY:
1126 		return mlx5_tc_ct_block_flow_offload_del(ft, f);
1127 	case FLOW_CLS_STATS:
1128 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1129 	default:
1130 		break;
1131 	}
1132 
1133 	return -EOPNOTSUPP;
1134 }
1135 
1136 static bool
1137 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1138 			u16 zone)
1139 {
1140 	struct flow_keys flow_keys;
1141 
1142 	skb_reset_network_header(skb);
1143 	skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
1144 
1145 	tuple->zone = zone;
1146 
1147 	if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1148 	    flow_keys.basic.ip_proto != IPPROTO_UDP)
1149 		return false;
1150 
1151 	tuple->port.src = flow_keys.ports.src;
1152 	tuple->port.dst = flow_keys.ports.dst;
1153 	tuple->n_proto = flow_keys.basic.n_proto;
1154 	tuple->ip_proto = flow_keys.basic.ip_proto;
1155 
1156 	switch (flow_keys.basic.n_proto) {
1157 	case htons(ETH_P_IP):
1158 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1159 		tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1160 		tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1161 		break;
1162 
1163 	case htons(ETH_P_IPV6):
1164 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1165 		tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1166 		tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1167 		break;
1168 	default:
1169 		goto out;
1170 	}
1171 
1172 	return true;
1173 
1174 out:
1175 	return false;
1176 }
1177 
1178 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1179 {
1180 	u32 ctstate = 0, ctstate_mask = 0;
1181 
1182 	mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1183 					&ctstate, &ctstate_mask);
1184 	if (ctstate_mask)
1185 		return -EOPNOTSUPP;
1186 
1187 	ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1188 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1189 				    ctstate, ctstate_mask);
1190 
1191 	return 0;
1192 }
1193 
1194 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1195 {
1196 	if (!priv || !ct_attr->ct_labels_id)
1197 		return;
1198 
1199 	mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
1200 }
1201 
1202 int
1203 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1204 		     struct mlx5_flow_spec *spec,
1205 		     struct flow_cls_offload *f,
1206 		     struct mlx5_ct_attr *ct_attr,
1207 		     struct netlink_ext_ack *extack)
1208 {
1209 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1210 	bool trk, est, untrk, unest, new, rpl, unrpl;
1211 	struct flow_dissector_key_ct *mask, *key;
1212 	u32 ctstate = 0, ctstate_mask = 0;
1213 	u16 ct_state_on, ct_state_off;
1214 	u16 ct_state, ct_state_mask;
1215 	struct flow_match_ct match;
1216 	u32 ct_labels[4];
1217 
1218 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1219 		return 0;
1220 
1221 	if (!priv) {
1222 		NL_SET_ERR_MSG_MOD(extack,
1223 				   "offload of ct matching isn't available");
1224 		return -EOPNOTSUPP;
1225 	}
1226 
1227 	flow_rule_match_ct(rule, &match);
1228 
1229 	key = match.key;
1230 	mask = match.mask;
1231 
1232 	ct_state = key->ct_state;
1233 	ct_state_mask = mask->ct_state;
1234 
1235 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1236 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1237 			      TCA_FLOWER_KEY_CT_FLAGS_NEW |
1238 			      TCA_FLOWER_KEY_CT_FLAGS_REPLY)) {
1239 		NL_SET_ERR_MSG_MOD(extack,
1240 				   "only ct_state trk, est, new and rpl are supported for offload");
1241 		return -EOPNOTSUPP;
1242 	}
1243 
1244 	ct_state_on = ct_state & ct_state_mask;
1245 	ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1246 	trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1247 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1248 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1249 	rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1250 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1251 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1252 	unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1253 
1254 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1255 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1256 	ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1257 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1258 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1259 	ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1260 
1261 	if (new) {
1262 		NL_SET_ERR_MSG_MOD(extack,
1263 				   "matching on ct_state +new isn't supported");
1264 		return -EOPNOTSUPP;
1265 	}
1266 
1267 	if (mask->ct_zone)
1268 		mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1269 					    key->ct_zone, MLX5_CT_ZONE_MASK);
1270 	if (ctstate_mask)
1271 		mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1272 					    ctstate, ctstate_mask);
1273 	if (mask->ct_mark)
1274 		mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1275 					    key->ct_mark, mask->ct_mark);
1276 	if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1277 	    mask->ct_labels[3]) {
1278 		ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1279 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1280 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1281 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1282 		if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
1283 			return -EOPNOTSUPP;
1284 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1285 					    MLX5_CT_LABELS_MASK);
1286 	}
1287 
1288 	return 0;
1289 }
1290 
1291 int
1292 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1293 			struct mlx5_flow_attr *attr,
1294 			const struct flow_action_entry *act,
1295 			struct netlink_ext_ack *extack)
1296 {
1297 	if (!priv) {
1298 		NL_SET_ERR_MSG_MOD(extack,
1299 				   "offload of ct action isn't available");
1300 		return -EOPNOTSUPP;
1301 	}
1302 
1303 	attr->ct_attr.zone = act->ct.zone;
1304 	attr->ct_attr.ct_action = act->ct.action;
1305 	attr->ct_attr.nf_ft = act->ct.flow_table;
1306 
1307 	return 0;
1308 }
1309 
1310 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1311 				  struct mlx5_tc_ct_pre *pre_ct,
1312 				  bool nat)
1313 {
1314 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1315 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1316 	struct mlx5_core_dev *dev = ct_priv->dev;
1317 	struct mlx5_flow_table *ft = pre_ct->ft;
1318 	struct mlx5_flow_destination dest = {};
1319 	struct mlx5_flow_act flow_act = {};
1320 	struct mlx5_modify_hdr *mod_hdr;
1321 	struct mlx5_flow_handle *rule;
1322 	struct mlx5_flow_spec *spec;
1323 	u32 ctstate;
1324 	u16 zone;
1325 	int err;
1326 
1327 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1328 	if (!spec)
1329 		return -ENOMEM;
1330 
1331 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1332 	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1333 					ZONE_TO_REG, zone);
1334 	if (err) {
1335 		ct_dbg("Failed to set zone register mapping");
1336 		goto err_mapping;
1337 	}
1338 
1339 	mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1340 					   pre_mod_acts.num_actions,
1341 					   pre_mod_acts.actions);
1342 
1343 	if (IS_ERR(mod_hdr)) {
1344 		err = PTR_ERR(mod_hdr);
1345 		ct_dbg("Failed to create pre ct mod hdr");
1346 		goto err_mapping;
1347 	}
1348 	pre_ct->modify_hdr = mod_hdr;
1349 
1350 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1351 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1352 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1353 	flow_act.modify_hdr = mod_hdr;
1354 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1355 
1356 	/* add flow rule */
1357 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1358 				    zone, MLX5_CT_ZONE_MASK);
1359 	ctstate = MLX5_CT_STATE_TRK_BIT;
1360 	if (nat)
1361 		ctstate |= MLX5_CT_STATE_NAT_BIT;
1362 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1363 
1364 	dest.ft = ct_priv->post_ct;
1365 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1366 	if (IS_ERR(rule)) {
1367 		err = PTR_ERR(rule);
1368 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1369 		goto err_flow_rule;
1370 	}
1371 	pre_ct->flow_rule = rule;
1372 
1373 	/* add miss rule */
1374 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1375 	rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1376 	if (IS_ERR(rule)) {
1377 		err = PTR_ERR(rule);
1378 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1379 		goto err_miss_rule;
1380 	}
1381 	pre_ct->miss_rule = rule;
1382 
1383 	dealloc_mod_hdr_actions(&pre_mod_acts);
1384 	kvfree(spec);
1385 	return 0;
1386 
1387 err_miss_rule:
1388 	mlx5_del_flow_rules(pre_ct->flow_rule);
1389 err_flow_rule:
1390 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1391 err_mapping:
1392 	dealloc_mod_hdr_actions(&pre_mod_acts);
1393 	kvfree(spec);
1394 	return err;
1395 }
1396 
1397 static void
1398 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1399 		       struct mlx5_tc_ct_pre *pre_ct)
1400 {
1401 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1402 	struct mlx5_core_dev *dev = ct_priv->dev;
1403 
1404 	mlx5_del_flow_rules(pre_ct->flow_rule);
1405 	mlx5_del_flow_rules(pre_ct->miss_rule);
1406 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1407 }
1408 
1409 static int
1410 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1411 			struct mlx5_tc_ct_pre *pre_ct,
1412 			bool nat)
1413 {
1414 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1415 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1416 	struct mlx5_core_dev *dev = ct_priv->dev;
1417 	struct mlx5_flow_table_attr ft_attr = {};
1418 	struct mlx5_flow_namespace *ns;
1419 	struct mlx5_flow_table *ft;
1420 	struct mlx5_flow_group *g;
1421 	u32 metadata_reg_c_2_mask;
1422 	u32 *flow_group_in;
1423 	void *misc;
1424 	int err;
1425 
1426 	ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1427 	if (!ns) {
1428 		err = -EOPNOTSUPP;
1429 		ct_dbg("Failed to get flow namespace");
1430 		return err;
1431 	}
1432 
1433 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1434 	if (!flow_group_in)
1435 		return -ENOMEM;
1436 
1437 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1438 	ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1439 			FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1440 	ft_attr.max_fte = 2;
1441 	ft_attr.level = 1;
1442 	ft = mlx5_create_flow_table(ns, &ft_attr);
1443 	if (IS_ERR(ft)) {
1444 		err = PTR_ERR(ft);
1445 		ct_dbg("Failed to create pre ct table");
1446 		goto out_free;
1447 	}
1448 	pre_ct->ft = ft;
1449 
1450 	/* create flow group */
1451 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1452 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1453 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1454 		 MLX5_MATCH_MISC_PARAMETERS_2);
1455 
1456 	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1457 			    match_criteria.misc_parameters_2);
1458 
1459 	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1460 	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1461 	if (nat)
1462 		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1463 
1464 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1465 		 metadata_reg_c_2_mask);
1466 
1467 	g = mlx5_create_flow_group(ft, flow_group_in);
1468 	if (IS_ERR(g)) {
1469 		err = PTR_ERR(g);
1470 		ct_dbg("Failed to create pre ct group");
1471 		goto err_flow_grp;
1472 	}
1473 	pre_ct->flow_grp = g;
1474 
1475 	/* create miss group */
1476 	memset(flow_group_in, 0, inlen);
1477 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1478 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1479 	g = mlx5_create_flow_group(ft, flow_group_in);
1480 	if (IS_ERR(g)) {
1481 		err = PTR_ERR(g);
1482 		ct_dbg("Failed to create pre ct miss group");
1483 		goto err_miss_grp;
1484 	}
1485 	pre_ct->miss_grp = g;
1486 
1487 	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1488 	if (err)
1489 		goto err_add_rules;
1490 
1491 	kvfree(flow_group_in);
1492 	return 0;
1493 
1494 err_add_rules:
1495 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1496 err_miss_grp:
1497 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1498 err_flow_grp:
1499 	mlx5_destroy_flow_table(ft);
1500 out_free:
1501 	kvfree(flow_group_in);
1502 	return err;
1503 }
1504 
1505 static void
1506 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1507 		       struct mlx5_tc_ct_pre *pre_ct)
1508 {
1509 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1510 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1511 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1512 	mlx5_destroy_flow_table(pre_ct->ft);
1513 }
1514 
1515 static int
1516 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1517 {
1518 	int err;
1519 
1520 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1521 	if (err)
1522 		return err;
1523 
1524 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1525 	if (err)
1526 		goto err_pre_ct_nat;
1527 
1528 	return 0;
1529 
1530 err_pre_ct_nat:
1531 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1532 	return err;
1533 }
1534 
1535 static void
1536 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1537 {
1538 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1539 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1540 }
1541 
1542 static struct mlx5_ct_ft *
1543 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1544 		     struct nf_flowtable *nf_ft)
1545 {
1546 	struct mlx5_ct_ft *ft;
1547 	int err;
1548 
1549 	ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1550 	if (ft) {
1551 		refcount_inc(&ft->refcount);
1552 		return ft;
1553 	}
1554 
1555 	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1556 	if (!ft)
1557 		return ERR_PTR(-ENOMEM);
1558 
1559 	err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1560 	if (err)
1561 		goto err_mapping;
1562 
1563 	ft->zone = zone;
1564 	ft->nf_ft = nf_ft;
1565 	ft->ct_priv = ct_priv;
1566 	refcount_set(&ft->refcount, 1);
1567 
1568 	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1569 	if (err)
1570 		goto err_alloc_pre_ct;
1571 
1572 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1573 	if (err)
1574 		goto err_init;
1575 
1576 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1577 				     zone_params);
1578 	if (err)
1579 		goto err_insert;
1580 
1581 	err = nf_flow_table_offload_add_cb(ft->nf_ft,
1582 					   mlx5_tc_ct_block_flow_offload, ft);
1583 	if (err)
1584 		goto err_add_cb;
1585 
1586 	return ft;
1587 
1588 err_add_cb:
1589 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1590 err_insert:
1591 	rhashtable_destroy(&ft->ct_entries_ht);
1592 err_init:
1593 	mlx5_tc_ct_free_pre_ct_tables(ft);
1594 err_alloc_pre_ct:
1595 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1596 err_mapping:
1597 	kfree(ft);
1598 	return ERR_PTR(err);
1599 }
1600 
1601 static void
1602 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1603 {
1604 	struct mlx5_ct_entry *entry = ptr;
1605 
1606 	mlx5_tc_ct_entry_put(entry);
1607 }
1608 
1609 static void
1610 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1611 {
1612 	if (!refcount_dec_and_test(&ft->refcount))
1613 		return;
1614 
1615 	nf_flow_table_offload_del_cb(ft->nf_ft,
1616 				     mlx5_tc_ct_block_flow_offload, ft);
1617 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1618 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
1619 				    mlx5_tc_ct_flush_ft_entry,
1620 				    ct_priv);
1621 	mlx5_tc_ct_free_pre_ct_tables(ft);
1622 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1623 	kfree(ft);
1624 }
1625 
1626 /* We translate the tc filter with CT action to the following HW model:
1627  *
1628  * +---------------------+
1629  * + ft prio (tc chain) +
1630  * + original match      +
1631  * +---------------------+
1632  *      | set chain miss mapping
1633  *      | set fte_id
1634  *      | set tunnel_id
1635  *      | do decap
1636  *      v
1637  * +---------------------+
1638  * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1639  * + zone+nat match      +---------------->+ post_ct (see below) +
1640  * +---------------------+  set zone       +---------------------+
1641  *      | set zone
1642  *      v
1643  * +--------------------+
1644  * + CT (nat or no nat) +
1645  * + tuple + zone match +
1646  * +--------------------+
1647  *      | set mark
1648  *      | set labels_id
1649  *      | set established
1650  *	| set zone_restore
1651  *      | do nat (if needed)
1652  *      v
1653  * +--------------+
1654  * + post_ct      + original filter actions
1655  * + fte_id match +------------------------>
1656  * +--------------+
1657  */
1658 static struct mlx5_flow_handle *
1659 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1660 			  struct mlx5e_tc_flow *flow,
1661 			  struct mlx5_flow_spec *orig_spec,
1662 			  struct mlx5_flow_attr *attr)
1663 {
1664 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1665 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1666 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1667 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1668 	struct mlx5_flow_spec *post_ct_spec = NULL;
1669 	struct mlx5_flow_attr *pre_ct_attr;
1670 	struct mlx5_modify_hdr *mod_hdr;
1671 	struct mlx5_flow_handle *rule;
1672 	struct mlx5_ct_flow *ct_flow;
1673 	int chain_mapping = 0, err;
1674 	struct mlx5_ct_ft *ft;
1675 	u32 fte_id = 1;
1676 
1677 	post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1678 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1679 	if (!post_ct_spec || !ct_flow) {
1680 		kfree(post_ct_spec);
1681 		kfree(ct_flow);
1682 		return ERR_PTR(-ENOMEM);
1683 	}
1684 
1685 	/* Register for CT established events */
1686 	ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1687 				  attr->ct_attr.nf_ft);
1688 	if (IS_ERR(ft)) {
1689 		err = PTR_ERR(ft);
1690 		ct_dbg("Failed to register to ft callback");
1691 		goto err_ft;
1692 	}
1693 	ct_flow->ft = ft;
1694 
1695 	err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1696 			    MLX5_FTE_ID_MAX, GFP_KERNEL);
1697 	if (err) {
1698 		netdev_warn(priv->netdev,
1699 			    "Failed to allocate fte id, err: %d\n", err);
1700 		goto err_idr;
1701 	}
1702 	ct_flow->fte_id = fte_id;
1703 
1704 	/* Base flow attributes of both rules on original rule attribute */
1705 	ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1706 	if (!ct_flow->pre_ct_attr) {
1707 		err = -ENOMEM;
1708 		goto err_alloc_pre;
1709 	}
1710 
1711 	ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1712 	if (!ct_flow->post_ct_attr) {
1713 		err = -ENOMEM;
1714 		goto err_alloc_post;
1715 	}
1716 
1717 	pre_ct_attr = ct_flow->pre_ct_attr;
1718 	memcpy(pre_ct_attr, attr, attr_sz);
1719 	memcpy(ct_flow->post_ct_attr, attr, attr_sz);
1720 
1721 	/* Modify the original rule's action to fwd and modify, leave decap */
1722 	pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1723 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1724 			       MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1725 
1726 	/* Write chain miss tag for miss in ct table as we
1727 	 * don't go though all prios of this chain as normal tc rules
1728 	 * miss.
1729 	 */
1730 	err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1731 					    &chain_mapping);
1732 	if (err) {
1733 		ct_dbg("Failed to get chain register mapping for chain");
1734 		goto err_get_chain;
1735 	}
1736 	ct_flow->chain_mapping = chain_mapping;
1737 
1738 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1739 					CHAIN_TO_REG, chain_mapping);
1740 	if (err) {
1741 		ct_dbg("Failed to set chain register mapping");
1742 		goto err_mapping;
1743 	}
1744 
1745 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1746 					FTEID_TO_REG, fte_id);
1747 	if (err) {
1748 		ct_dbg("Failed to set fte_id register mapping");
1749 		goto err_mapping;
1750 	}
1751 
1752 	/* If original flow is decap, we do it before going into ct table
1753 	 * so add a rewrite for the tunnel match_id.
1754 	 */
1755 	if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1756 	    attr->chain == 0) {
1757 		u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1758 
1759 		err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1760 						ct_priv->ns_type,
1761 						TUNNEL_TO_REG,
1762 						tun_id);
1763 		if (err) {
1764 			ct_dbg("Failed to set tunnel register mapping");
1765 			goto err_mapping;
1766 		}
1767 	}
1768 
1769 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1770 					   pre_mod_acts.num_actions,
1771 					   pre_mod_acts.actions);
1772 	if (IS_ERR(mod_hdr)) {
1773 		err = PTR_ERR(mod_hdr);
1774 		ct_dbg("Failed to create pre ct mod hdr");
1775 		goto err_mapping;
1776 	}
1777 	pre_ct_attr->modify_hdr = mod_hdr;
1778 
1779 	/* Post ct rule matches on fte_id and executes original rule's
1780 	 * tc rule action
1781 	 */
1782 	mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1783 				    fte_id, MLX5_FTE_ID_MASK);
1784 
1785 	/* Put post_ct rule on post_ct flow table */
1786 	ct_flow->post_ct_attr->chain = 0;
1787 	ct_flow->post_ct_attr->prio = 0;
1788 	ct_flow->post_ct_attr->ft = ct_priv->post_ct;
1789 
1790 	ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
1791 	ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
1792 	ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1793 	rule = mlx5_tc_rule_insert(priv, post_ct_spec,
1794 				   ct_flow->post_ct_attr);
1795 	ct_flow->post_ct_rule = rule;
1796 	if (IS_ERR(ct_flow->post_ct_rule)) {
1797 		err = PTR_ERR(ct_flow->post_ct_rule);
1798 		ct_dbg("Failed to add post ct rule");
1799 		goto err_insert_post_ct;
1800 	}
1801 
1802 	/* Change original rule point to ct table */
1803 	pre_ct_attr->dest_chain = 0;
1804 	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1805 	ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1806 						   pre_ct_attr);
1807 	if (IS_ERR(ct_flow->pre_ct_rule)) {
1808 		err = PTR_ERR(ct_flow->pre_ct_rule);
1809 		ct_dbg("Failed to add pre ct rule");
1810 		goto err_insert_orig;
1811 	}
1812 
1813 	attr->ct_attr.ct_flow = ct_flow;
1814 	dealloc_mod_hdr_actions(&pre_mod_acts);
1815 	kfree(post_ct_spec);
1816 
1817 	return rule;
1818 
1819 err_insert_orig:
1820 	mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1821 			    ct_flow->post_ct_attr);
1822 err_insert_post_ct:
1823 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1824 err_mapping:
1825 	dealloc_mod_hdr_actions(&pre_mod_acts);
1826 	mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1827 err_get_chain:
1828 	kfree(ct_flow->post_ct_attr);
1829 err_alloc_post:
1830 	kfree(ct_flow->pre_ct_attr);
1831 err_alloc_pre:
1832 	idr_remove(&ct_priv->fte_ids, fte_id);
1833 err_idr:
1834 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1835 err_ft:
1836 	kfree(post_ct_spec);
1837 	kfree(ct_flow);
1838 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1839 	return ERR_PTR(err);
1840 }
1841 
1842 static struct mlx5_flow_handle *
1843 __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1844 				struct mlx5_flow_spec *orig_spec,
1845 				struct mlx5_flow_attr *attr,
1846 				struct mlx5e_tc_mod_hdr_acts *mod_acts)
1847 {
1848 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1849 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1850 	struct mlx5_flow_attr *pre_ct_attr;
1851 	struct mlx5_modify_hdr *mod_hdr;
1852 	struct mlx5_flow_handle *rule;
1853 	struct mlx5_ct_flow *ct_flow;
1854 	int err;
1855 
1856 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1857 	if (!ct_flow)
1858 		return ERR_PTR(-ENOMEM);
1859 
1860 	/* Base esw attributes on original rule attribute */
1861 	pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1862 	if (!pre_ct_attr) {
1863 		err = -ENOMEM;
1864 		goto err_attr;
1865 	}
1866 
1867 	memcpy(pre_ct_attr, attr, attr_sz);
1868 
1869 	err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1870 	if (err) {
1871 		ct_dbg("Failed to set register for ct clear");
1872 		goto err_set_registers;
1873 	}
1874 
1875 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1876 					   mod_acts->num_actions,
1877 					   mod_acts->actions);
1878 	if (IS_ERR(mod_hdr)) {
1879 		err = PTR_ERR(mod_hdr);
1880 		ct_dbg("Failed to add create ct clear mod hdr");
1881 		goto err_set_registers;
1882 	}
1883 
1884 	pre_ct_attr->modify_hdr = mod_hdr;
1885 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1886 
1887 	rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1888 	if (IS_ERR(rule)) {
1889 		err = PTR_ERR(rule);
1890 		ct_dbg("Failed to add ct clear rule");
1891 		goto err_insert;
1892 	}
1893 
1894 	attr->ct_attr.ct_flow = ct_flow;
1895 	ct_flow->pre_ct_attr = pre_ct_attr;
1896 	ct_flow->pre_ct_rule = rule;
1897 	return rule;
1898 
1899 err_insert:
1900 	mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1901 err_set_registers:
1902 	netdev_warn(priv->netdev,
1903 		    "Failed to offload ct clear flow, err %d\n", err);
1904 	kfree(pre_ct_attr);
1905 err_attr:
1906 	kfree(ct_flow);
1907 
1908 	return ERR_PTR(err);
1909 }
1910 
1911 struct mlx5_flow_handle *
1912 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1913 			struct mlx5e_tc_flow *flow,
1914 			struct mlx5_flow_spec *spec,
1915 			struct mlx5_flow_attr *attr,
1916 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1917 {
1918 	bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1919 	struct mlx5_flow_handle *rule;
1920 
1921 	if (!priv)
1922 		return ERR_PTR(-EOPNOTSUPP);
1923 
1924 	mutex_lock(&priv->control_lock);
1925 
1926 	if (clear_action)
1927 		rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1928 	else
1929 		rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1930 	mutex_unlock(&priv->control_lock);
1931 
1932 	return rule;
1933 }
1934 
1935 static void
1936 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1937 			 struct mlx5e_tc_flow *flow,
1938 			 struct mlx5_ct_flow *ct_flow)
1939 {
1940 	struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1941 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1942 
1943 	mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1944 			    pre_ct_attr);
1945 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1946 
1947 	if (ct_flow->post_ct_rule) {
1948 		mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1949 				    ct_flow->post_ct_attr);
1950 		mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1951 		idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1952 		mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1953 	}
1954 
1955 	kfree(ct_flow->pre_ct_attr);
1956 	kfree(ct_flow->post_ct_attr);
1957 	kfree(ct_flow);
1958 }
1959 
1960 void
1961 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1962 		       struct mlx5e_tc_flow *flow,
1963 		       struct mlx5_flow_attr *attr)
1964 {
1965 	struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1966 
1967 	/* We are called on error to clean up stuff from parsing
1968 	 * but we don't have anything for now
1969 	 */
1970 	if (!ct_flow)
1971 		return;
1972 
1973 	mutex_lock(&priv->control_lock);
1974 	__mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
1975 	mutex_unlock(&priv->control_lock);
1976 }
1977 
1978 static int
1979 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
1980 				  const char **err_msg)
1981 {
1982 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
1983 		*err_msg = "firmware level support is missing";
1984 		return -EOPNOTSUPP;
1985 	}
1986 
1987 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
1988 		/* vlan workaround should be avoided for multi chain rules.
1989 		 * This is just a sanity check as pop vlan action should
1990 		 * be supported by any FW that supports ignore_flow_level
1991 		 */
1992 
1993 		*err_msg = "firmware vlan actions support is missing";
1994 		return -EOPNOTSUPP;
1995 	}
1996 
1997 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
1998 				    fdb_modify_header_fwd_to_table)) {
1999 		/* CT always writes to registers which are mod header actions.
2000 		 * Therefore, mod header and goto is required
2001 		 */
2002 
2003 		*err_msg = "firmware fwd and modify support is missing";
2004 		return -EOPNOTSUPP;
2005 	}
2006 
2007 	if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2008 		*err_msg = "register loopback isn't supported";
2009 		return -EOPNOTSUPP;
2010 	}
2011 
2012 	return 0;
2013 }
2014 
2015 static int
2016 mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
2017 				  const char **err_msg)
2018 {
2019 	if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
2020 		*err_msg = "firmware level support is missing";
2021 		return -EOPNOTSUPP;
2022 	}
2023 
2024 	return 0;
2025 }
2026 
2027 static int
2028 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2029 			      enum mlx5_flow_namespace_type ns_type,
2030 			      const char **err_msg)
2031 {
2032 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2033 
2034 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
2035 	/* cannot restore chain ID on HW miss */
2036 
2037 	*err_msg = "tc skb extension missing";
2038 	return -EOPNOTSUPP;
2039 #endif
2040 	if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2041 		return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
2042 	else
2043 		return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
2044 }
2045 
2046 #define INIT_ERR_PREFIX "tc ct offload init failed"
2047 
2048 struct mlx5_tc_ct_priv *
2049 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2050 		struct mod_hdr_tbl *mod_hdr,
2051 		enum mlx5_flow_namespace_type ns_type)
2052 {
2053 	struct mlx5_tc_ct_priv *ct_priv;
2054 	struct mlx5_core_dev *dev;
2055 	const char *msg;
2056 	int err;
2057 
2058 	dev = priv->mdev;
2059 	err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
2060 	if (err) {
2061 		mlx5_core_warn(dev,
2062 			       "tc ct offload not supported, %s\n",
2063 			       msg);
2064 		goto err_support;
2065 	}
2066 
2067 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2068 	if (!ct_priv)
2069 		goto err_alloc;
2070 
2071 	ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
2072 	if (IS_ERR(ct_priv->zone_mapping)) {
2073 		err = PTR_ERR(ct_priv->zone_mapping);
2074 		goto err_mapping_zone;
2075 	}
2076 
2077 	ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
2078 	if (IS_ERR(ct_priv->labels_mapping)) {
2079 		err = PTR_ERR(ct_priv->labels_mapping);
2080 		goto err_mapping_labels;
2081 	}
2082 
2083 	spin_lock_init(&ct_priv->ht_lock);
2084 	ct_priv->ns_type = ns_type;
2085 	ct_priv->chains = chains;
2086 	ct_priv->netdev = priv->netdev;
2087 	ct_priv->dev = priv->mdev;
2088 	ct_priv->mod_hdr_tbl = mod_hdr;
2089 	ct_priv->ct = mlx5_chains_create_global_table(chains);
2090 	if (IS_ERR(ct_priv->ct)) {
2091 		err = PTR_ERR(ct_priv->ct);
2092 		mlx5_core_warn(dev,
2093 			       "%s, failed to create ct table err: %d\n",
2094 			       INIT_ERR_PREFIX, err);
2095 		goto err_ct_tbl;
2096 	}
2097 
2098 	ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2099 	if (IS_ERR(ct_priv->ct_nat)) {
2100 		err = PTR_ERR(ct_priv->ct_nat);
2101 		mlx5_core_warn(dev,
2102 			       "%s, failed to create ct nat table err: %d\n",
2103 			       INIT_ERR_PREFIX, err);
2104 		goto err_ct_nat_tbl;
2105 	}
2106 
2107 	ct_priv->post_ct = mlx5_chains_create_global_table(chains);
2108 	if (IS_ERR(ct_priv->post_ct)) {
2109 		err = PTR_ERR(ct_priv->post_ct);
2110 		mlx5_core_warn(dev,
2111 			       "%s, failed to create post ct table err: %d\n",
2112 			       INIT_ERR_PREFIX, err);
2113 		goto err_post_ct_tbl;
2114 	}
2115 
2116 	idr_init(&ct_priv->fte_ids);
2117 	mutex_init(&ct_priv->control_lock);
2118 	rhashtable_init(&ct_priv->zone_ht, &zone_params);
2119 	rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
2120 	rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
2121 
2122 	return ct_priv;
2123 
2124 err_post_ct_tbl:
2125 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2126 err_ct_nat_tbl:
2127 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2128 err_ct_tbl:
2129 	mapping_destroy(ct_priv->labels_mapping);
2130 err_mapping_labels:
2131 	mapping_destroy(ct_priv->zone_mapping);
2132 err_mapping_zone:
2133 	kfree(ct_priv);
2134 err_alloc:
2135 err_support:
2136 
2137 	return NULL;
2138 }
2139 
2140 void
2141 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2142 {
2143 	struct mlx5_fs_chains *chains;
2144 
2145 	if (!ct_priv)
2146 		return;
2147 
2148 	chains = ct_priv->chains;
2149 
2150 	mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
2151 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2152 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2153 	mapping_destroy(ct_priv->zone_mapping);
2154 	mapping_destroy(ct_priv->labels_mapping);
2155 
2156 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2157 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2158 	rhashtable_destroy(&ct_priv->zone_ht);
2159 	mutex_destroy(&ct_priv->control_lock);
2160 	idr_destroy(&ct_priv->fte_ids);
2161 	kfree(ct_priv);
2162 }
2163 
2164 bool
2165 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2166 			 struct sk_buff *skb, u8 zone_restore_id)
2167 {
2168 	struct mlx5_ct_tuple tuple = {};
2169 	struct mlx5_ct_entry *entry;
2170 	u16 zone;
2171 
2172 	if (!ct_priv || !zone_restore_id)
2173 		return true;
2174 
2175 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2176 		return false;
2177 
2178 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2179 		return false;
2180 
2181 	spin_lock(&ct_priv->ht_lock);
2182 
2183 	entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2184 	if (!entry) {
2185 		spin_unlock(&ct_priv->ht_lock);
2186 		return false;
2187 	}
2188 
2189 	if (IS_ERR(entry)) {
2190 		spin_unlock(&ct_priv->ht_lock);
2191 		return false;
2192 	}
2193 	spin_unlock(&ct_priv->ht_lock);
2194 
2195 	tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2196 	__mlx5_tc_ct_entry_put(entry);
2197 
2198 	return true;
2199 }
2200