1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/xarray.h>
16 
17 #include "esw/chains.h"
18 #include "en/tc_ct.h"
19 #include "en/mod_hdr.h"
20 #include "en/mapping.h"
21 #include "en.h"
22 #include "en_tc.h"
23 #include "en_rep.h"
24 
25 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
26 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
27 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
28 #define MLX5_CT_STATE_TRK_BIT BIT(2)
29 #define MLX5_CT_STATE_NAT_BIT BIT(3)
30 
31 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
32 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
33 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
34 
35 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
36 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
37 
38 #define ct_dbg(fmt, args...)\
39 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
40 
41 struct mlx5_tc_ct_priv {
42 	struct mlx5_eswitch *esw;
43 	const struct net_device *netdev;
44 	struct idr fte_ids;
45 	struct xarray tuple_ids;
46 	struct rhashtable zone_ht;
47 	struct rhashtable ct_tuples_ht;
48 	struct rhashtable ct_tuples_nat_ht;
49 	struct mlx5_flow_table *ct;
50 	struct mlx5_flow_table *ct_nat;
51 	struct mlx5_flow_table *post_ct;
52 	struct mutex control_lock; /* guards parallel adds/dels */
53 	struct mapping_ctx *zone_mapping;
54 	struct mapping_ctx *labels_mapping;
55 };
56 
57 struct mlx5_ct_flow {
58 	struct mlx5_esw_flow_attr pre_ct_attr;
59 	struct mlx5_esw_flow_attr post_ct_attr;
60 	struct mlx5_flow_handle *pre_ct_rule;
61 	struct mlx5_flow_handle *post_ct_rule;
62 	struct mlx5_ct_ft *ft;
63 	u32 fte_id;
64 	u32 chain_mapping;
65 };
66 
67 struct mlx5_ct_zone_rule {
68 	struct mlx5_flow_handle *rule;
69 	struct mlx5e_mod_hdr_handle *mh;
70 	struct mlx5_esw_flow_attr attr;
71 	bool nat;
72 };
73 
74 struct mlx5_tc_ct_pre {
75 	struct mlx5_flow_table *fdb;
76 	struct mlx5_flow_group *flow_grp;
77 	struct mlx5_flow_group *miss_grp;
78 	struct mlx5_flow_handle *flow_rule;
79 	struct mlx5_flow_handle *miss_rule;
80 	struct mlx5_modify_hdr *modify_hdr;
81 };
82 
83 struct mlx5_ct_ft {
84 	struct rhash_head node;
85 	u16 zone;
86 	u32 zone_restore_id;
87 	refcount_t refcount;
88 	struct nf_flowtable *nf_ft;
89 	struct mlx5_tc_ct_priv *ct_priv;
90 	struct rhashtable ct_entries_ht;
91 	struct mlx5_tc_ct_pre pre_ct;
92 	struct mlx5_tc_ct_pre pre_ct_nat;
93 };
94 
95 struct mlx5_ct_tuple {
96 	u16 addr_type;
97 	__be16 n_proto;
98 	u8 ip_proto;
99 	struct {
100 		union {
101 			__be32 src_v4;
102 			struct in6_addr src_v6;
103 		};
104 		union {
105 			__be32 dst_v4;
106 			struct in6_addr dst_v6;
107 		};
108 	} ip;
109 	struct {
110 		__be16 src;
111 		__be16 dst;
112 	} port;
113 
114 	u16 zone;
115 };
116 
117 struct mlx5_ct_entry {
118 	struct rhash_head node;
119 	struct rhash_head tuple_node;
120 	struct rhash_head tuple_nat_node;
121 	struct mlx5_fc *counter;
122 	unsigned long cookie;
123 	unsigned long restore_cookie;
124 	struct mlx5_ct_tuple tuple;
125 	struct mlx5_ct_tuple tuple_nat;
126 	struct mlx5_ct_zone_rule zone_rules[2];
127 };
128 
129 static const struct rhashtable_params cts_ht_params = {
130 	.head_offset = offsetof(struct mlx5_ct_entry, node),
131 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
132 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
133 	.automatic_shrinking = true,
134 	.min_size = 16 * 1024,
135 };
136 
137 static const struct rhashtable_params zone_params = {
138 	.head_offset = offsetof(struct mlx5_ct_ft, node),
139 	.key_offset = offsetof(struct mlx5_ct_ft, zone),
140 	.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
141 	.automatic_shrinking = true,
142 };
143 
144 static const struct rhashtable_params tuples_ht_params = {
145 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
146 	.key_offset = offsetof(struct mlx5_ct_entry, tuple),
147 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
148 	.automatic_shrinking = true,
149 	.min_size = 16 * 1024,
150 };
151 
152 static const struct rhashtable_params tuples_nat_ht_params = {
153 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
154 	.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
155 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
156 	.automatic_shrinking = true,
157 	.min_size = 16 * 1024,
158 };
159 
160 static struct mlx5_tc_ct_priv *
161 mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv)
162 {
163 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
164 	struct mlx5_rep_uplink_priv *uplink_priv;
165 	struct mlx5e_rep_priv *uplink_rpriv;
166 
167 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
168 	uplink_priv = &uplink_rpriv->uplink_priv;
169 	return uplink_priv->ct_priv;
170 }
171 
172 static int
173 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
174 {
175 	struct flow_match_control control;
176 	struct flow_match_basic basic;
177 
178 	flow_rule_match_basic(rule, &basic);
179 	flow_rule_match_control(rule, &control);
180 
181 	tuple->n_proto = basic.key->n_proto;
182 	tuple->ip_proto = basic.key->ip_proto;
183 	tuple->addr_type = control.key->addr_type;
184 
185 	if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
186 		struct flow_match_ipv4_addrs match;
187 
188 		flow_rule_match_ipv4_addrs(rule, &match);
189 		tuple->ip.src_v4 = match.key->src;
190 		tuple->ip.dst_v4 = match.key->dst;
191 	} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
192 		struct flow_match_ipv6_addrs match;
193 
194 		flow_rule_match_ipv6_addrs(rule, &match);
195 		tuple->ip.src_v6 = match.key->src;
196 		tuple->ip.dst_v6 = match.key->dst;
197 	} else {
198 		return -EOPNOTSUPP;
199 	}
200 
201 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
202 		struct flow_match_ports match;
203 
204 		flow_rule_match_ports(rule, &match);
205 		switch (tuple->ip_proto) {
206 		case IPPROTO_TCP:
207 		case IPPROTO_UDP:
208 			tuple->port.src = match.key->src;
209 			tuple->port.dst = match.key->dst;
210 			break;
211 		default:
212 			return -EOPNOTSUPP;
213 		}
214 	} else {
215 		return -EOPNOTSUPP;
216 	}
217 
218 	return 0;
219 }
220 
221 static int
222 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
223 			     struct flow_rule *rule)
224 {
225 	struct flow_action *flow_action = &rule->action;
226 	struct flow_action_entry *act;
227 	u32 offset, val, ip6_offset;
228 	int i;
229 
230 	flow_action_for_each(i, act, flow_action) {
231 		if (act->id != FLOW_ACTION_MANGLE)
232 			continue;
233 
234 		offset = act->mangle.offset;
235 		val = act->mangle.val;
236 		switch (act->mangle.htype) {
237 		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
238 			if (offset == offsetof(struct iphdr, saddr))
239 				tuple->ip.src_v4 = cpu_to_be32(val);
240 			else if (offset == offsetof(struct iphdr, daddr))
241 				tuple->ip.dst_v4 = cpu_to_be32(val);
242 			else
243 				return -EOPNOTSUPP;
244 			break;
245 
246 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
247 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
248 			ip6_offset /= 4;
249 			if (ip6_offset < 4)
250 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
251 			else if (ip6_offset < 8)
252 				tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
253 			else
254 				return -EOPNOTSUPP;
255 			break;
256 
257 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
258 			if (offset == offsetof(struct tcphdr, source))
259 				tuple->port.src = cpu_to_be16(val);
260 			else if (offset == offsetof(struct tcphdr, dest))
261 				tuple->port.dst = cpu_to_be16(val);
262 			else
263 				return -EOPNOTSUPP;
264 			break;
265 
266 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
267 			if (offset == offsetof(struct udphdr, source))
268 				tuple->port.src = cpu_to_be16(val);
269 			else if (offset == offsetof(struct udphdr, dest))
270 				tuple->port.dst = cpu_to_be16(val);
271 			else
272 				return -EOPNOTSUPP;
273 			break;
274 
275 		default:
276 			return -EOPNOTSUPP;
277 		}
278 	}
279 
280 	return 0;
281 }
282 
283 static int
284 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
285 			   struct flow_rule *rule)
286 {
287 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
288 				       outer_headers);
289 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
290 				       outer_headers);
291 	u16 addr_type = 0;
292 	u8 ip_proto = 0;
293 
294 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
295 		struct flow_match_basic match;
296 
297 		flow_rule_match_basic(rule, &match);
298 
299 		mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
300 				       headers_v);
301 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
302 			 match.mask->ip_proto);
303 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
304 			 match.key->ip_proto);
305 
306 		ip_proto = match.key->ip_proto;
307 	}
308 
309 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
310 		struct flow_match_control match;
311 
312 		flow_rule_match_control(rule, &match);
313 		addr_type = match.key->addr_type;
314 	}
315 
316 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
317 		struct flow_match_ipv4_addrs match;
318 
319 		flow_rule_match_ipv4_addrs(rule, &match);
320 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
321 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
322 		       &match.mask->src, sizeof(match.mask->src));
323 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
324 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
325 		       &match.key->src, sizeof(match.key->src));
326 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
327 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
328 		       &match.mask->dst, sizeof(match.mask->dst));
329 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
330 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
331 		       &match.key->dst, sizeof(match.key->dst));
332 	}
333 
334 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
335 		struct flow_match_ipv6_addrs match;
336 
337 		flow_rule_match_ipv6_addrs(rule, &match);
338 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
339 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
340 		       &match.mask->src, sizeof(match.mask->src));
341 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
342 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
343 		       &match.key->src, sizeof(match.key->src));
344 
345 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
346 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
347 		       &match.mask->dst, sizeof(match.mask->dst));
348 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
349 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
350 		       &match.key->dst, sizeof(match.key->dst));
351 	}
352 
353 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
354 		struct flow_match_ports match;
355 
356 		flow_rule_match_ports(rule, &match);
357 		switch (ip_proto) {
358 		case IPPROTO_TCP:
359 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
360 				 tcp_sport, ntohs(match.mask->src));
361 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
362 				 tcp_sport, ntohs(match.key->src));
363 
364 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
365 				 tcp_dport, ntohs(match.mask->dst));
366 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
367 				 tcp_dport, ntohs(match.key->dst));
368 			break;
369 
370 		case IPPROTO_UDP:
371 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
372 				 udp_sport, ntohs(match.mask->src));
373 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
374 				 udp_sport, ntohs(match.key->src));
375 
376 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
377 				 udp_dport, ntohs(match.mask->dst));
378 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
379 				 udp_dport, ntohs(match.key->dst));
380 			break;
381 		default:
382 			break;
383 		}
384 	}
385 
386 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
387 		struct flow_match_tcp match;
388 
389 		flow_rule_match_tcp(rule, &match);
390 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
391 			 ntohs(match.mask->flags));
392 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
393 			 ntohs(match.key->flags));
394 	}
395 
396 	return 0;
397 }
398 
399 static void
400 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
401 			  struct mlx5_ct_entry *entry,
402 			  bool nat)
403 {
404 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
405 	struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
406 	struct mlx5_eswitch *esw = ct_priv->esw;
407 
408 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
409 
410 	mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr);
411 	mlx5e_mod_hdr_detach(ct_priv->esw->dev,
412 			     &esw->offloads.mod_hdr, zone_rule->mh);
413 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
414 }
415 
416 static void
417 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
418 			   struct mlx5_ct_entry *entry)
419 {
420 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
421 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
422 
423 	mlx5_fc_destroy(ct_priv->esw->dev, entry->counter);
424 }
425 
426 static struct flow_action_entry *
427 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
428 {
429 	struct flow_action *flow_action = &flow_rule->action;
430 	struct flow_action_entry *act;
431 	int i;
432 
433 	flow_action_for_each(i, act, flow_action) {
434 		if (act->id == FLOW_ACTION_CT_METADATA)
435 			return act;
436 	}
437 
438 	return NULL;
439 }
440 
441 static int
442 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
443 			       struct mlx5e_tc_mod_hdr_acts *mod_acts,
444 			       u8 ct_state,
445 			       u32 mark,
446 			       u32 labels_id,
447 			       u8 zone_restore_id)
448 {
449 	struct mlx5_eswitch *esw = ct_priv->esw;
450 	int err;
451 
452 	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
453 					CTSTATE_TO_REG, ct_state);
454 	if (err)
455 		return err;
456 
457 	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
458 					MARK_TO_REG, mark);
459 	if (err)
460 		return err;
461 
462 	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
463 					LABELS_TO_REG, labels_id);
464 	if (err)
465 		return err;
466 
467 	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
468 					ZONE_RESTORE_TO_REG, zone_restore_id);
469 	if (err)
470 		return err;
471 
472 	return 0;
473 }
474 
475 static int
476 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
477 				   char *modact)
478 {
479 	u32 offset = act->mangle.offset, field;
480 
481 	switch (act->mangle.htype) {
482 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
483 		MLX5_SET(set_action_in, modact, length, 0);
484 		if (offset == offsetof(struct iphdr, saddr))
485 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
486 		else if (offset == offsetof(struct iphdr, daddr))
487 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
488 		else
489 			return -EOPNOTSUPP;
490 		break;
491 
492 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
493 		MLX5_SET(set_action_in, modact, length, 0);
494 		if (offset == offsetof(struct ipv6hdr, saddr) + 12)
495 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
496 		else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
497 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
498 		else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
499 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
500 		else if (offset == offsetof(struct ipv6hdr, saddr))
501 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
502 		else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
503 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
504 		else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
505 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
506 		else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
507 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
508 		else if (offset == offsetof(struct ipv6hdr, daddr))
509 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
510 		else
511 			return -EOPNOTSUPP;
512 		break;
513 
514 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
515 		MLX5_SET(set_action_in, modact, length, 16);
516 		if (offset == offsetof(struct tcphdr, source))
517 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
518 		else if (offset == offsetof(struct tcphdr, dest))
519 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
520 		else
521 			return -EOPNOTSUPP;
522 		break;
523 
524 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
525 		MLX5_SET(set_action_in, modact, length, 16);
526 		if (offset == offsetof(struct udphdr, source))
527 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
528 		else if (offset == offsetof(struct udphdr, dest))
529 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
530 		else
531 			return -EOPNOTSUPP;
532 		break;
533 
534 	default:
535 		return -EOPNOTSUPP;
536 	}
537 
538 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
539 	MLX5_SET(set_action_in, modact, offset, 0);
540 	MLX5_SET(set_action_in, modact, field, field);
541 	MLX5_SET(set_action_in, modact, data, act->mangle.val);
542 
543 	return 0;
544 }
545 
546 static int
547 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
548 			    struct flow_rule *flow_rule,
549 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
550 {
551 	struct flow_action *flow_action = &flow_rule->action;
552 	struct mlx5_core_dev *mdev = ct_priv->esw->dev;
553 	struct flow_action_entry *act;
554 	size_t action_size;
555 	char *modact;
556 	int err, i;
557 
558 	action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
559 
560 	flow_action_for_each(i, act, flow_action) {
561 		switch (act->id) {
562 		case FLOW_ACTION_MANGLE: {
563 			err = alloc_mod_hdr_actions(mdev,
564 						    MLX5_FLOW_NAMESPACE_FDB,
565 						    mod_acts);
566 			if (err)
567 				return err;
568 
569 			modact = mod_acts->actions +
570 				 mod_acts->num_actions * action_size;
571 
572 			err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
573 			if (err)
574 				return err;
575 
576 			mod_acts->num_actions++;
577 		}
578 		break;
579 
580 		case FLOW_ACTION_CT_METADATA:
581 			/* Handled earlier */
582 			continue;
583 		default:
584 			return -EOPNOTSUPP;
585 		}
586 	}
587 
588 	return 0;
589 }
590 
591 static int
592 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
593 				struct mlx5_esw_flow_attr *attr,
594 				struct flow_rule *flow_rule,
595 				struct mlx5e_mod_hdr_handle **mh,
596 				u8 zone_restore_id, bool nat)
597 {
598 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
599 	struct flow_action_entry *meta;
600 	u16 ct_state = 0;
601 	int err;
602 
603 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
604 	if (!meta)
605 		return -EOPNOTSUPP;
606 
607 	err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
608 			  &attr->ct_attr.ct_labels_id);
609 	if (err)
610 		return -EOPNOTSUPP;
611 	if (nat) {
612 		err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
613 						  &mod_acts);
614 		if (err)
615 			goto err_mapping;
616 
617 		ct_state |= MLX5_CT_STATE_NAT_BIT;
618 	}
619 
620 	ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
621 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
622 					     ct_state,
623 					     meta->ct_metadata.mark,
624 					     attr->ct_attr.ct_labels_id,
625 					     zone_restore_id);
626 	if (err)
627 		goto err_mapping;
628 
629 	*mh = mlx5e_mod_hdr_attach(ct_priv->esw->dev,
630 				   &ct_priv->esw->offloads.mod_hdr,
631 				   MLX5_FLOW_NAMESPACE_FDB,
632 				   &mod_acts);
633 	if (IS_ERR(*mh)) {
634 		err = PTR_ERR(*mh);
635 		goto err_mapping;
636 	}
637 	attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
638 
639 	dealloc_mod_hdr_actions(&mod_acts);
640 	return 0;
641 
642 err_mapping:
643 	dealloc_mod_hdr_actions(&mod_acts);
644 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
645 	return err;
646 }
647 
648 static int
649 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
650 			  struct flow_rule *flow_rule,
651 			  struct mlx5_ct_entry *entry,
652 			  bool nat, u8 zone_restore_id)
653 {
654 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
655 	struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
656 	struct mlx5_eswitch *esw = ct_priv->esw;
657 	struct mlx5_flow_spec *spec = NULL;
658 	int err;
659 
660 	zone_rule->nat = nat;
661 
662 	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
663 	if (!spec)
664 		return -ENOMEM;
665 
666 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
667 					      &zone_rule->mh,
668 					      zone_restore_id, nat);
669 	if (err) {
670 		ct_dbg("Failed to create ct entry mod hdr");
671 		goto err_mod_hdr;
672 	}
673 
674 	attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
675 		       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
676 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
677 	attr->dest_chain = 0;
678 	attr->dest_ft = ct_priv->post_ct;
679 	attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct;
680 	attr->outer_match_level = MLX5_MATCH_L4;
681 	attr->counter = entry->counter;
682 	attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
683 
684 	mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
685 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
686 				    entry->tuple.zone & MLX5_CT_ZONE_MASK,
687 				    MLX5_CT_ZONE_MASK);
688 
689 	zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
690 	if (IS_ERR(zone_rule->rule)) {
691 		err = PTR_ERR(zone_rule->rule);
692 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
693 		goto err_rule;
694 	}
695 
696 	kfree(spec);
697 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
698 
699 	return 0;
700 
701 err_rule:
702 	mlx5e_mod_hdr_detach(ct_priv->esw->dev,
703 			     &esw->offloads.mod_hdr, zone_rule->mh);
704 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
705 err_mod_hdr:
706 	kfree(spec);
707 	return err;
708 }
709 
710 static int
711 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
712 			   struct flow_rule *flow_rule,
713 			   struct mlx5_ct_entry *entry,
714 			   u8 zone_restore_id)
715 {
716 	struct mlx5_eswitch *esw = ct_priv->esw;
717 	int err;
718 
719 	entry->counter = mlx5_fc_create(esw->dev, true);
720 	if (IS_ERR(entry->counter)) {
721 		err = PTR_ERR(entry->counter);
722 		ct_dbg("Failed to create counter for ct entry");
723 		return err;
724 	}
725 
726 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
727 					zone_restore_id);
728 	if (err)
729 		goto err_orig;
730 
731 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
732 					zone_restore_id);
733 	if (err)
734 		goto err_nat;
735 
736 	return 0;
737 
738 err_nat:
739 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
740 err_orig:
741 	mlx5_fc_destroy(esw->dev, entry->counter);
742 	return err;
743 }
744 
745 static int
746 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
747 				  struct flow_cls_offload *flow)
748 {
749 	struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
750 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
751 	struct flow_action_entry *meta_action;
752 	unsigned long cookie = flow->cookie;
753 	struct mlx5_ct_entry *entry;
754 	int err;
755 
756 	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
757 	if (!meta_action)
758 		return -EOPNOTSUPP;
759 
760 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
761 				       cts_ht_params);
762 	if (entry)
763 		return 0;
764 
765 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
766 	if (!entry)
767 		return -ENOMEM;
768 
769 	entry->tuple.zone = ft->zone;
770 	entry->cookie = flow->cookie;
771 	entry->restore_cookie = meta_action->ct_metadata.cookie;
772 
773 	err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
774 	if (err)
775 		goto err_set;
776 
777 	memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
778 	err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
779 	if (err)
780 		goto err_set;
781 
782 	err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
783 				     &entry->tuple_node,
784 				     tuples_ht_params);
785 	if (err)
786 		goto err_tuple;
787 
788 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
789 		err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
790 					     &entry->tuple_nat_node,
791 					     tuples_nat_ht_params);
792 		if (err)
793 			goto err_tuple_nat;
794 	}
795 
796 	err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
797 					 ft->zone_restore_id);
798 	if (err)
799 		goto err_rules;
800 
801 	err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
802 				     cts_ht_params);
803 	if (err)
804 		goto err_insert;
805 
806 	return 0;
807 
808 err_insert:
809 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
810 err_rules:
811 	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
812 			       &entry->tuple_nat_node, tuples_nat_ht_params);
813 err_tuple_nat:
814 	if (entry->tuple_node.next)
815 		rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
816 				       &entry->tuple_node,
817 				       tuples_ht_params);
818 err_tuple:
819 err_set:
820 	kfree(entry);
821 	netdev_warn(ct_priv->netdev,
822 		    "Failed to offload ct entry, err: %d\n", err);
823 	return err;
824 }
825 
826 static void
827 mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
828 			struct mlx5_ct_entry *entry)
829 {
830 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
831 	if (entry->tuple_node.next)
832 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
833 				       &entry->tuple_nat_node,
834 				       tuples_nat_ht_params);
835 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
836 			       tuples_ht_params);
837 }
838 
839 static int
840 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
841 				  struct flow_cls_offload *flow)
842 {
843 	unsigned long cookie = flow->cookie;
844 	struct mlx5_ct_entry *entry;
845 
846 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
847 				       cts_ht_params);
848 	if (!entry)
849 		return -ENOENT;
850 
851 	mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
852 	WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
853 				       &entry->node,
854 				       cts_ht_params));
855 	kfree(entry);
856 
857 	return 0;
858 }
859 
860 static int
861 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
862 				    struct flow_cls_offload *f)
863 {
864 	unsigned long cookie = f->cookie;
865 	struct mlx5_ct_entry *entry;
866 	u64 lastuse, packets, bytes;
867 
868 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
869 				       cts_ht_params);
870 	if (!entry)
871 		return -ENOENT;
872 
873 	mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse);
874 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
875 			  FLOW_ACTION_HW_STATS_DELAYED);
876 
877 	return 0;
878 }
879 
880 static int
881 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
882 			      void *cb_priv)
883 {
884 	struct flow_cls_offload *f = type_data;
885 	struct mlx5_ct_ft *ft = cb_priv;
886 
887 	if (type != TC_SETUP_CLSFLOWER)
888 		return -EOPNOTSUPP;
889 
890 	switch (f->command) {
891 	case FLOW_CLS_REPLACE:
892 		return mlx5_tc_ct_block_flow_offload_add(ft, f);
893 	case FLOW_CLS_DESTROY:
894 		return mlx5_tc_ct_block_flow_offload_del(ft, f);
895 	case FLOW_CLS_STATS:
896 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
897 	default:
898 		break;
899 	}
900 
901 	return -EOPNOTSUPP;
902 }
903 
904 static bool
905 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
906 			u16 zone)
907 {
908 	struct flow_keys flow_keys;
909 
910 	skb_reset_network_header(skb);
911 	skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
912 
913 	tuple->zone = zone;
914 
915 	if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
916 	    flow_keys.basic.ip_proto != IPPROTO_UDP)
917 		return false;
918 
919 	tuple->port.src = flow_keys.ports.src;
920 	tuple->port.dst = flow_keys.ports.dst;
921 	tuple->n_proto = flow_keys.basic.n_proto;
922 	tuple->ip_proto = flow_keys.basic.ip_proto;
923 
924 	switch (flow_keys.basic.n_proto) {
925 	case htons(ETH_P_IP):
926 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
927 		tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
928 		tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
929 		break;
930 
931 	case htons(ETH_P_IPV6):
932 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
933 		tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
934 		tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
935 		break;
936 	default:
937 		goto out;
938 	}
939 
940 	return true;
941 
942 out:
943 	return false;
944 }
945 
946 int
947 mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
948 			    struct mlx5_flow_spec *spec)
949 {
950 	u32 ctstate = 0, ctstate_mask = 0;
951 
952 	mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
953 					&ctstate, &ctstate_mask);
954 	if (ctstate_mask)
955 		return -EOPNOTSUPP;
956 
957 	ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
958 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
959 				    ctstate, ctstate_mask);
960 
961 	return 0;
962 }
963 
964 void mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr)
965 {
966 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
967 
968 	if (!ct_priv || !ct_attr->ct_labels_id)
969 		return;
970 
971 	mapping_remove(ct_priv->labels_mapping, ct_attr->ct_labels_id);
972 }
973 
974 int
975 mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
976 		     struct mlx5_flow_spec *spec,
977 		     struct flow_cls_offload *f,
978 		     struct mlx5_ct_attr *ct_attr,
979 		     struct netlink_ext_ack *extack)
980 {
981 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
982 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
983 	struct flow_dissector_key_ct *mask, *key;
984 	bool trk, est, untrk, unest, new;
985 	u32 ctstate = 0, ctstate_mask = 0;
986 	u16 ct_state_on, ct_state_off;
987 	u16 ct_state, ct_state_mask;
988 	struct flow_match_ct match;
989 	u32 ct_labels[4];
990 
991 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
992 		return 0;
993 
994 	if (!ct_priv) {
995 		NL_SET_ERR_MSG_MOD(extack,
996 				   "offload of ct matching isn't available");
997 		return -EOPNOTSUPP;
998 	}
999 
1000 	flow_rule_match_ct(rule, &match);
1001 
1002 	key = match.key;
1003 	mask = match.mask;
1004 
1005 	ct_state = key->ct_state;
1006 	ct_state_mask = mask->ct_state;
1007 
1008 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1009 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1010 			      TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
1011 		NL_SET_ERR_MSG_MOD(extack,
1012 				   "only ct_state trk, est and new are supported for offload");
1013 		return -EOPNOTSUPP;
1014 	}
1015 
1016 	ct_state_on = ct_state & ct_state_mask;
1017 	ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1018 	trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1019 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1020 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1021 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1022 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1023 
1024 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1025 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1026 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1027 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1028 
1029 	if (new) {
1030 		NL_SET_ERR_MSG_MOD(extack,
1031 				   "matching on ct_state +new isn't supported");
1032 		return -EOPNOTSUPP;
1033 	}
1034 
1035 	if (mask->ct_zone)
1036 		mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1037 					    key->ct_zone, MLX5_CT_ZONE_MASK);
1038 	if (ctstate_mask)
1039 		mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1040 					    ctstate, ctstate_mask);
1041 	if (mask->ct_mark)
1042 		mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1043 					    key->ct_mark, mask->ct_mark);
1044 	if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1045 	    mask->ct_labels[3]) {
1046 		ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1047 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1048 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1049 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1050 		if (mapping_add(ct_priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
1051 			return -EOPNOTSUPP;
1052 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1053 					    MLX5_CT_LABELS_MASK);
1054 	}
1055 
1056 	return 0;
1057 }
1058 
1059 int
1060 mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
1061 			struct mlx5_esw_flow_attr *attr,
1062 			const struct flow_action_entry *act,
1063 			struct netlink_ext_ack *extack)
1064 {
1065 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1066 
1067 	if (!ct_priv) {
1068 		NL_SET_ERR_MSG_MOD(extack,
1069 				   "offload of ct action isn't available");
1070 		return -EOPNOTSUPP;
1071 	}
1072 
1073 	attr->ct_attr.zone = act->ct.zone;
1074 	attr->ct_attr.ct_action = act->ct.action;
1075 	attr->ct_attr.nf_ft = act->ct.flow_table;
1076 
1077 	return 0;
1078 }
1079 
1080 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1081 				  struct mlx5_tc_ct_pre *pre_ct,
1082 				  bool nat)
1083 {
1084 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1085 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1086 	struct mlx5_core_dev *dev = ct_priv->esw->dev;
1087 	struct mlx5_flow_table *fdb = pre_ct->fdb;
1088 	struct mlx5_flow_destination dest = {};
1089 	struct mlx5_flow_act flow_act = {};
1090 	struct mlx5_modify_hdr *mod_hdr;
1091 	struct mlx5_flow_handle *rule;
1092 	struct mlx5_flow_spec *spec;
1093 	u32 ctstate;
1094 	u16 zone;
1095 	int err;
1096 
1097 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1098 	if (!spec)
1099 		return -ENOMEM;
1100 
1101 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1102 	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone);
1103 	if (err) {
1104 		ct_dbg("Failed to set zone register mapping");
1105 		goto err_mapping;
1106 	}
1107 
1108 	mod_hdr = mlx5_modify_header_alloc(dev,
1109 					   MLX5_FLOW_NAMESPACE_FDB,
1110 					   pre_mod_acts.num_actions,
1111 					   pre_mod_acts.actions);
1112 
1113 	if (IS_ERR(mod_hdr)) {
1114 		err = PTR_ERR(mod_hdr);
1115 		ct_dbg("Failed to create pre ct mod hdr");
1116 		goto err_mapping;
1117 	}
1118 	pre_ct->modify_hdr = mod_hdr;
1119 
1120 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1121 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1122 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1123 	flow_act.modify_hdr = mod_hdr;
1124 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1125 
1126 	/* add flow rule */
1127 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1128 				    zone, MLX5_CT_ZONE_MASK);
1129 	ctstate = MLX5_CT_STATE_TRK_BIT;
1130 	if (nat)
1131 		ctstate |= MLX5_CT_STATE_NAT_BIT;
1132 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1133 
1134 	dest.ft = ct_priv->post_ct;
1135 	rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
1136 	if (IS_ERR(rule)) {
1137 		err = PTR_ERR(rule);
1138 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1139 		goto err_flow_rule;
1140 	}
1141 	pre_ct->flow_rule = rule;
1142 
1143 	/* add miss rule */
1144 	memset(spec, 0, sizeof(*spec));
1145 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1146 	rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
1147 	if (IS_ERR(rule)) {
1148 		err = PTR_ERR(rule);
1149 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1150 		goto err_miss_rule;
1151 	}
1152 	pre_ct->miss_rule = rule;
1153 
1154 	dealloc_mod_hdr_actions(&pre_mod_acts);
1155 	kvfree(spec);
1156 	return 0;
1157 
1158 err_miss_rule:
1159 	mlx5_del_flow_rules(pre_ct->flow_rule);
1160 err_flow_rule:
1161 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1162 err_mapping:
1163 	dealloc_mod_hdr_actions(&pre_mod_acts);
1164 	kvfree(spec);
1165 	return err;
1166 }
1167 
1168 static void
1169 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1170 		       struct mlx5_tc_ct_pre *pre_ct)
1171 {
1172 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1173 	struct mlx5_core_dev *dev = ct_priv->esw->dev;
1174 
1175 	mlx5_del_flow_rules(pre_ct->flow_rule);
1176 	mlx5_del_flow_rules(pre_ct->miss_rule);
1177 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1178 }
1179 
1180 static int
1181 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1182 			struct mlx5_tc_ct_pre *pre_ct,
1183 			bool nat)
1184 {
1185 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1186 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1187 	struct mlx5_core_dev *dev = ct_priv->esw->dev;
1188 	struct mlx5_flow_table_attr ft_attr = {};
1189 	struct mlx5_flow_namespace *ns;
1190 	struct mlx5_flow_table *ft;
1191 	struct mlx5_flow_group *g;
1192 	u32 metadata_reg_c_2_mask;
1193 	u32 *flow_group_in;
1194 	void *misc;
1195 	int err;
1196 
1197 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
1198 	if (!ns) {
1199 		err = -EOPNOTSUPP;
1200 		ct_dbg("Failed to get FDB flow namespace");
1201 		return err;
1202 	}
1203 
1204 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1205 	if (!flow_group_in)
1206 		return -ENOMEM;
1207 
1208 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1209 	ft_attr.prio = FDB_TC_OFFLOAD;
1210 	ft_attr.max_fte = 2;
1211 	ft_attr.level = 1;
1212 	ft = mlx5_create_flow_table(ns, &ft_attr);
1213 	if (IS_ERR(ft)) {
1214 		err = PTR_ERR(ft);
1215 		ct_dbg("Failed to create pre ct table");
1216 		goto out_free;
1217 	}
1218 	pre_ct->fdb = ft;
1219 
1220 	/* create flow group */
1221 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1222 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1223 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1224 		 MLX5_MATCH_MISC_PARAMETERS_2);
1225 
1226 	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1227 			    match_criteria.misc_parameters_2);
1228 
1229 	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1230 	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1231 	if (nat)
1232 		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1233 
1234 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1235 		 metadata_reg_c_2_mask);
1236 
1237 	g = mlx5_create_flow_group(ft, flow_group_in);
1238 	if (IS_ERR(g)) {
1239 		err = PTR_ERR(g);
1240 		ct_dbg("Failed to create pre ct group");
1241 		goto err_flow_grp;
1242 	}
1243 	pre_ct->flow_grp = g;
1244 
1245 	/* create miss group */
1246 	memset(flow_group_in, 0, inlen);
1247 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1248 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1249 	g = mlx5_create_flow_group(ft, flow_group_in);
1250 	if (IS_ERR(g)) {
1251 		err = PTR_ERR(g);
1252 		ct_dbg("Failed to create pre ct miss group");
1253 		goto err_miss_grp;
1254 	}
1255 	pre_ct->miss_grp = g;
1256 
1257 	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1258 	if (err)
1259 		goto err_add_rules;
1260 
1261 	kvfree(flow_group_in);
1262 	return 0;
1263 
1264 err_add_rules:
1265 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1266 err_miss_grp:
1267 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1268 err_flow_grp:
1269 	mlx5_destroy_flow_table(ft);
1270 out_free:
1271 	kvfree(flow_group_in);
1272 	return err;
1273 }
1274 
1275 static void
1276 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1277 		       struct mlx5_tc_ct_pre *pre_ct)
1278 {
1279 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1280 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1281 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1282 	mlx5_destroy_flow_table(pre_ct->fdb);
1283 }
1284 
1285 static int
1286 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1287 {
1288 	int err;
1289 
1290 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1291 	if (err)
1292 		return err;
1293 
1294 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1295 	if (err)
1296 		goto err_pre_ct_nat;
1297 
1298 	return 0;
1299 
1300 err_pre_ct_nat:
1301 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1302 	return err;
1303 }
1304 
1305 static void
1306 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1307 {
1308 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1309 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1310 }
1311 
1312 static struct mlx5_ct_ft *
1313 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1314 		     struct nf_flowtable *nf_ft)
1315 {
1316 	struct mlx5_ct_ft *ft;
1317 	int err;
1318 
1319 	ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1320 	if (ft) {
1321 		refcount_inc(&ft->refcount);
1322 		return ft;
1323 	}
1324 
1325 	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1326 	if (!ft)
1327 		return ERR_PTR(-ENOMEM);
1328 
1329 	err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1330 	if (err)
1331 		goto err_mapping;
1332 
1333 	ft->zone = zone;
1334 	ft->nf_ft = nf_ft;
1335 	ft->ct_priv = ct_priv;
1336 	refcount_set(&ft->refcount, 1);
1337 
1338 	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1339 	if (err)
1340 		goto err_alloc_pre_ct;
1341 
1342 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1343 	if (err)
1344 		goto err_init;
1345 
1346 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1347 				     zone_params);
1348 	if (err)
1349 		goto err_insert;
1350 
1351 	err = nf_flow_table_offload_add_cb(ft->nf_ft,
1352 					   mlx5_tc_ct_block_flow_offload, ft);
1353 	if (err)
1354 		goto err_add_cb;
1355 
1356 	return ft;
1357 
1358 err_add_cb:
1359 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1360 err_insert:
1361 	rhashtable_destroy(&ft->ct_entries_ht);
1362 err_init:
1363 	mlx5_tc_ct_free_pre_ct_tables(ft);
1364 err_alloc_pre_ct:
1365 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1366 err_mapping:
1367 	kfree(ft);
1368 	return ERR_PTR(err);
1369 }
1370 
1371 static void
1372 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1373 {
1374 	struct mlx5_tc_ct_priv *ct_priv = arg;
1375 	struct mlx5_ct_entry *entry = ptr;
1376 
1377 	mlx5_tc_ct_del_ft_entry(ct_priv, entry);
1378 	kfree(entry);
1379 }
1380 
1381 static void
1382 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1383 {
1384 	if (!refcount_dec_and_test(&ft->refcount))
1385 		return;
1386 
1387 	nf_flow_table_offload_del_cb(ft->nf_ft,
1388 				     mlx5_tc_ct_block_flow_offload, ft);
1389 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1390 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
1391 				    mlx5_tc_ct_flush_ft_entry,
1392 				    ct_priv);
1393 	mlx5_tc_ct_free_pre_ct_tables(ft);
1394 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1395 	kfree(ft);
1396 }
1397 
1398 /* We translate the tc filter with CT action to the following HW model:
1399  *
1400  * +---------------------+
1401  * + fdb prio (tc chain) +
1402  * + original match      +
1403  * +---------------------+
1404  *      | set chain miss mapping
1405  *      | set fte_id
1406  *      | set tunnel_id
1407  *      | do decap
1408  *      v
1409  * +---------------------+
1410  * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1411  * + zone+nat match      +---------------->+ post_ct (see below) +
1412  * +---------------------+  set zone       +---------------------+
1413  *      | set zone
1414  *      v
1415  * +--------------------+
1416  * + CT (nat or no nat) +
1417  * + tuple + zone match +
1418  * +--------------------+
1419  *      | set mark
1420  *      | set labels_id
1421  *      | set established
1422  *	| set zone_restore
1423  *      | do nat (if needed)
1424  *      v
1425  * +--------------+
1426  * + post_ct      + original filter actions
1427  * + fte_id match +------------------------>
1428  * +--------------+
1429  */
1430 static struct mlx5_flow_handle *
1431 __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
1432 			  struct mlx5e_tc_flow *flow,
1433 			  struct mlx5_flow_spec *orig_spec,
1434 			  struct mlx5_esw_flow_attr *attr)
1435 {
1436 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1437 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1438 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1439 	struct mlx5_flow_spec *post_ct_spec = NULL;
1440 	struct mlx5_eswitch *esw = ct_priv->esw;
1441 	struct mlx5_esw_flow_attr *pre_ct_attr;
1442 	struct mlx5_modify_hdr *mod_hdr;
1443 	struct mlx5_flow_handle *rule;
1444 	struct mlx5_ct_flow *ct_flow;
1445 	int chain_mapping = 0, err;
1446 	struct mlx5_ct_ft *ft;
1447 	u32 fte_id = 1;
1448 
1449 	post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1450 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1451 	if (!post_ct_spec || !ct_flow) {
1452 		kfree(post_ct_spec);
1453 		kfree(ct_flow);
1454 		return ERR_PTR(-ENOMEM);
1455 	}
1456 
1457 	/* Register for CT established events */
1458 	ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1459 				  attr->ct_attr.nf_ft);
1460 	if (IS_ERR(ft)) {
1461 		err = PTR_ERR(ft);
1462 		ct_dbg("Failed to register to ft callback");
1463 		goto err_ft;
1464 	}
1465 	ct_flow->ft = ft;
1466 
1467 	err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1468 			    MLX5_FTE_ID_MAX, GFP_KERNEL);
1469 	if (err) {
1470 		netdev_warn(priv->netdev,
1471 			    "Failed to allocate fte id, err: %d\n", err);
1472 		goto err_idr;
1473 	}
1474 	ct_flow->fte_id = fte_id;
1475 
1476 	/* Base esw attributes of both rules on original rule attribute */
1477 	pre_ct_attr = &ct_flow->pre_ct_attr;
1478 	memcpy(pre_ct_attr, attr, sizeof(*attr));
1479 	memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr));
1480 
1481 	/* Modify the original rule's action to fwd and modify, leave decap */
1482 	pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1483 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1484 			       MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1485 
1486 	/* Write chain miss tag for miss in ct table as we
1487 	 * don't go though all prios of this chain as normal tc rules
1488 	 * miss.
1489 	 */
1490 	err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain,
1491 						&chain_mapping);
1492 	if (err) {
1493 		ct_dbg("Failed to get chain register mapping for chain");
1494 		goto err_get_chain;
1495 	}
1496 	ct_flow->chain_mapping = chain_mapping;
1497 
1498 	err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
1499 					CHAIN_TO_REG, chain_mapping);
1500 	if (err) {
1501 		ct_dbg("Failed to set chain register mapping");
1502 		goto err_mapping;
1503 	}
1504 
1505 	err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
1506 					FTEID_TO_REG, fte_id);
1507 	if (err) {
1508 		ct_dbg("Failed to set fte_id register mapping");
1509 		goto err_mapping;
1510 	}
1511 
1512 	/* If original flow is decap, we do it before going into ct table
1513 	 * so add a rewrite for the tunnel match_id.
1514 	 */
1515 	if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1516 	    attr->chain == 0) {
1517 		u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1518 
1519 		err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
1520 						TUNNEL_TO_REG,
1521 						tun_id);
1522 		if (err) {
1523 			ct_dbg("Failed to set tunnel register mapping");
1524 			goto err_mapping;
1525 		}
1526 	}
1527 
1528 	mod_hdr = mlx5_modify_header_alloc(esw->dev,
1529 					   MLX5_FLOW_NAMESPACE_FDB,
1530 					   pre_mod_acts.num_actions,
1531 					   pre_mod_acts.actions);
1532 	if (IS_ERR(mod_hdr)) {
1533 		err = PTR_ERR(mod_hdr);
1534 		ct_dbg("Failed to create pre ct mod hdr");
1535 		goto err_mapping;
1536 	}
1537 	pre_ct_attr->modify_hdr = mod_hdr;
1538 
1539 	/* Post ct rule matches on fte_id and executes original rule's
1540 	 * tc rule action
1541 	 */
1542 	mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1543 				    fte_id, MLX5_FTE_ID_MASK);
1544 
1545 	/* Put post_ct rule on post_ct fdb */
1546 	ct_flow->post_ct_attr.chain = 0;
1547 	ct_flow->post_ct_attr.prio = 0;
1548 	ct_flow->post_ct_attr.fdb = ct_priv->post_ct;
1549 
1550 	ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE;
1551 	ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE;
1552 	ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1553 	rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec,
1554 					       &ct_flow->post_ct_attr);
1555 	ct_flow->post_ct_rule = rule;
1556 	if (IS_ERR(ct_flow->post_ct_rule)) {
1557 		err = PTR_ERR(ct_flow->post_ct_rule);
1558 		ct_dbg("Failed to add post ct rule");
1559 		goto err_insert_post_ct;
1560 	}
1561 
1562 	/* Change original rule point to ct table */
1563 	pre_ct_attr->dest_chain = 0;
1564 	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb;
1565 	ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw,
1566 							       orig_spec,
1567 							       pre_ct_attr);
1568 	if (IS_ERR(ct_flow->pre_ct_rule)) {
1569 		err = PTR_ERR(ct_flow->pre_ct_rule);
1570 		ct_dbg("Failed to add pre ct rule");
1571 		goto err_insert_orig;
1572 	}
1573 
1574 	attr->ct_attr.ct_flow = ct_flow;
1575 	dealloc_mod_hdr_actions(&pre_mod_acts);
1576 	kfree(post_ct_spec);
1577 
1578 	return rule;
1579 
1580 err_insert_orig:
1581 	mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule,
1582 					&ct_flow->post_ct_attr);
1583 err_insert_post_ct:
1584 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1585 err_mapping:
1586 	dealloc_mod_hdr_actions(&pre_mod_acts);
1587 	mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
1588 err_get_chain:
1589 	idr_remove(&ct_priv->fte_ids, fte_id);
1590 err_idr:
1591 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1592 err_ft:
1593 	kfree(post_ct_spec);
1594 	kfree(ct_flow);
1595 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1596 	return ERR_PTR(err);
1597 }
1598 
1599 static struct mlx5_flow_handle *
1600 __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
1601 				struct mlx5_flow_spec *orig_spec,
1602 				struct mlx5_esw_flow_attr *attr,
1603 				struct mlx5e_tc_mod_hdr_acts *mod_acts)
1604 {
1605 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1606 	struct mlx5_eswitch *esw = ct_priv->esw;
1607 	struct mlx5_esw_flow_attr *pre_ct_attr;
1608 	struct mlx5_modify_hdr *mod_hdr;
1609 	struct mlx5_flow_handle *rule;
1610 	struct mlx5_ct_flow *ct_flow;
1611 	int err;
1612 
1613 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1614 	if (!ct_flow)
1615 		return ERR_PTR(-ENOMEM);
1616 
1617 	/* Base esw attributes on original rule attribute */
1618 	pre_ct_attr = &ct_flow->pre_ct_attr;
1619 	memcpy(pre_ct_attr, attr, sizeof(*attr));
1620 
1621 	err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1622 	if (err) {
1623 		ct_dbg("Failed to set register for ct clear");
1624 		goto err_set_registers;
1625 	}
1626 
1627 	mod_hdr = mlx5_modify_header_alloc(esw->dev,
1628 					   MLX5_FLOW_NAMESPACE_FDB,
1629 					   mod_acts->num_actions,
1630 					   mod_acts->actions);
1631 	if (IS_ERR(mod_hdr)) {
1632 		err = PTR_ERR(mod_hdr);
1633 		ct_dbg("Failed to add create ct clear mod hdr");
1634 		goto err_set_registers;
1635 	}
1636 
1637 	dealloc_mod_hdr_actions(mod_acts);
1638 	pre_ct_attr->modify_hdr = mod_hdr;
1639 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1640 
1641 	rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr);
1642 	if (IS_ERR(rule)) {
1643 		err = PTR_ERR(rule);
1644 		ct_dbg("Failed to add ct clear rule");
1645 		goto err_insert;
1646 	}
1647 
1648 	attr->ct_attr.ct_flow = ct_flow;
1649 	ct_flow->pre_ct_rule = rule;
1650 	return rule;
1651 
1652 err_insert:
1653 	mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1654 err_set_registers:
1655 	netdev_warn(priv->netdev,
1656 		    "Failed to offload ct clear flow, err %d\n", err);
1657 	return ERR_PTR(err);
1658 }
1659 
1660 struct mlx5_flow_handle *
1661 mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
1662 			struct mlx5e_tc_flow *flow,
1663 			struct mlx5_flow_spec *spec,
1664 			struct mlx5_esw_flow_attr *attr,
1665 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1666 {
1667 	bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1668 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1669 	struct mlx5_flow_handle *rule;
1670 
1671 	if (!ct_priv)
1672 		return ERR_PTR(-EOPNOTSUPP);
1673 
1674 	mutex_lock(&ct_priv->control_lock);
1675 
1676 	if (clear_action)
1677 		rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1678 	else
1679 		rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1680 	mutex_unlock(&ct_priv->control_lock);
1681 
1682 	return rule;
1683 }
1684 
1685 static void
1686 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1687 			 struct mlx5_ct_flow *ct_flow)
1688 {
1689 	struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr;
1690 	struct mlx5_eswitch *esw = ct_priv->esw;
1691 
1692 	mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule,
1693 					pre_ct_attr);
1694 	mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr);
1695 
1696 	if (ct_flow->post_ct_rule) {
1697 		mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule,
1698 						&ct_flow->post_ct_attr);
1699 		mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
1700 		idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1701 		mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1702 	}
1703 
1704 	kfree(ct_flow);
1705 }
1706 
1707 void
1708 mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow,
1709 		       struct mlx5_esw_flow_attr *attr)
1710 {
1711 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1712 	struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1713 
1714 	/* We are called on error to clean up stuff from parsing
1715 	 * but we don't have anything for now
1716 	 */
1717 	if (!ct_flow)
1718 		return;
1719 
1720 	mutex_lock(&ct_priv->control_lock);
1721 	__mlx5_tc_ct_delete_flow(ct_priv, ct_flow);
1722 	mutex_unlock(&ct_priv->control_lock);
1723 }
1724 
1725 static int
1726 mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw,
1727 			      const char **err_msg)
1728 {
1729 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1730 	/* cannot restore chain ID on HW miss */
1731 
1732 	*err_msg = "tc skb extension missing";
1733 	return -EOPNOTSUPP;
1734 #endif
1735 
1736 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
1737 		*err_msg = "firmware level support is missing";
1738 		return -EOPNOTSUPP;
1739 	}
1740 
1741 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
1742 		/* vlan workaround should be avoided for multi chain rules.
1743 		 * This is just a sanity check as pop vlan action should
1744 		 * be supported by any FW that supports ignore_flow_level
1745 		 */
1746 
1747 		*err_msg = "firmware vlan actions support is missing";
1748 		return -EOPNOTSUPP;
1749 	}
1750 
1751 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
1752 				    fdb_modify_header_fwd_to_table)) {
1753 		/* CT always writes to registers which are mod header actions.
1754 		 * Therefore, mod header and goto is required
1755 		 */
1756 
1757 		*err_msg = "firmware fwd and modify support is missing";
1758 		return -EOPNOTSUPP;
1759 	}
1760 
1761 	if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
1762 		*err_msg = "register loopback isn't supported";
1763 		return -EOPNOTSUPP;
1764 	}
1765 
1766 	return 0;
1767 }
1768 
1769 static void
1770 mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err)
1771 {
1772 	if (msg)
1773 		netdev_warn(rpriv->netdev,
1774 			    "tc ct offload not supported, %s, err: %d\n",
1775 			    msg, err);
1776 	else
1777 		netdev_warn(rpriv->netdev,
1778 			    "tc ct offload not supported, err: %d\n",
1779 			    err);
1780 }
1781 
1782 int
1783 mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv)
1784 {
1785 	struct mlx5_tc_ct_priv *ct_priv;
1786 	struct mlx5e_rep_priv *rpriv;
1787 	struct mlx5_eswitch *esw;
1788 	struct mlx5e_priv *priv;
1789 	const char *msg;
1790 	int err;
1791 
1792 	rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
1793 	priv = netdev_priv(rpriv->netdev);
1794 	esw = priv->mdev->priv.eswitch;
1795 
1796 	err = mlx5_tc_ct_init_check_support(esw, &msg);
1797 	if (err) {
1798 		mlx5_tc_ct_init_err(rpriv, msg, err);
1799 		goto err_support;
1800 	}
1801 
1802 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
1803 	if (!ct_priv) {
1804 		mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM);
1805 		goto err_alloc;
1806 	}
1807 
1808 	ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
1809 	if (IS_ERR(ct_priv->zone_mapping)) {
1810 		err = PTR_ERR(ct_priv->zone_mapping);
1811 		goto err_mapping_zone;
1812 	}
1813 
1814 	ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
1815 	if (IS_ERR(ct_priv->labels_mapping)) {
1816 		err = PTR_ERR(ct_priv->labels_mapping);
1817 		goto err_mapping_labels;
1818 	}
1819 
1820 	ct_priv->esw = esw;
1821 	ct_priv->netdev = rpriv->netdev;
1822 	ct_priv->ct = mlx5_esw_chains_create_global_table(esw);
1823 	if (IS_ERR(ct_priv->ct)) {
1824 		err = PTR_ERR(ct_priv->ct);
1825 		mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err);
1826 		goto err_ct_tbl;
1827 	}
1828 
1829 	ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw);
1830 	if (IS_ERR(ct_priv->ct_nat)) {
1831 		err = PTR_ERR(ct_priv->ct_nat);
1832 		mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table",
1833 				    err);
1834 		goto err_ct_nat_tbl;
1835 	}
1836 
1837 	ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw);
1838 	if (IS_ERR(ct_priv->post_ct)) {
1839 		err = PTR_ERR(ct_priv->post_ct);
1840 		mlx5_tc_ct_init_err(rpriv, "failed to create post ct table",
1841 				    err);
1842 		goto err_post_ct_tbl;
1843 	}
1844 
1845 	idr_init(&ct_priv->fte_ids);
1846 	mutex_init(&ct_priv->control_lock);
1847 	rhashtable_init(&ct_priv->zone_ht, &zone_params);
1848 	rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
1849 	rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
1850 
1851 	/* Done, set ct_priv to know it initializted */
1852 	uplink_priv->ct_priv = ct_priv;
1853 
1854 	return 0;
1855 
1856 err_post_ct_tbl:
1857 	mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat);
1858 err_ct_nat_tbl:
1859 	mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct);
1860 err_ct_tbl:
1861 	mapping_destroy(ct_priv->labels_mapping);
1862 err_mapping_labels:
1863 	mapping_destroy(ct_priv->zone_mapping);
1864 err_mapping_zone:
1865 	kfree(ct_priv);
1866 err_alloc:
1867 err_support:
1868 
1869 	return 0;
1870 }
1871 
1872 void
1873 mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv)
1874 {
1875 	struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
1876 
1877 	if (!ct_priv)
1878 		return;
1879 
1880 	mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct);
1881 	mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat);
1882 	mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct);
1883 	mapping_destroy(ct_priv->zone_mapping);
1884 	mapping_destroy(ct_priv->labels_mapping);
1885 
1886 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
1887 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
1888 	rhashtable_destroy(&ct_priv->zone_ht);
1889 	mutex_destroy(&ct_priv->control_lock);
1890 	idr_destroy(&ct_priv->fte_ids);
1891 	kfree(ct_priv);
1892 
1893 	uplink_priv->ct_priv = NULL;
1894 }
1895 
1896 bool
1897 mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv,
1898 			 struct sk_buff *skb, u8 zone_restore_id)
1899 {
1900 	struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
1901 	struct mlx5_ct_tuple tuple = {};
1902 	struct mlx5_ct_entry *entry;
1903 	u16 zone;
1904 
1905 	if (!ct_priv || !zone_restore_id)
1906 		return true;
1907 
1908 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
1909 		return false;
1910 
1911 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
1912 		return false;
1913 
1914 	entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
1915 				       tuples_ht_params);
1916 	if (!entry)
1917 		entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
1918 					       &tuple, tuples_nat_ht_params);
1919 	if (!entry)
1920 		return false;
1921 
1922 	tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
1923 	return true;
1924 }
1925