1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/xarray.h>
16 
17 #include "esw/chains.h"
18 #include "en/tc_ct.h"
19 #include "en/mod_hdr.h"
20 #include "en/mapping.h"
21 #include "en.h"
22 #include "en_tc.h"
23 #include "en_rep.h"
24 
25 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
26 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
27 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
28 #define MLX5_CT_STATE_TRK_BIT BIT(2)
29 #define MLX5_CT_STATE_NAT_BIT BIT(3)
30 
31 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
32 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
33 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
34 
35 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
36 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
37 
38 #define ct_dbg(fmt, args...)\
39 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
40 
41 struct mlx5_tc_ct_priv {
42 	struct mlx5_eswitch *esw;
43 	const struct net_device *netdev;
44 	struct idr fte_ids;
45 	struct xarray tuple_ids;
46 	struct rhashtable zone_ht;
47 	struct rhashtable ct_tuples_ht;
48 	struct rhashtable ct_tuples_nat_ht;
49 	struct mlx5_flow_table *ct;
50 	struct mlx5_flow_table *ct_nat;
51 	struct mlx5_flow_table *post_ct;
52 	struct mutex control_lock; /* guards parallel adds/dels */
53 	struct mapping_ctx *zone_mapping;
54 	struct mapping_ctx *labels_mapping;
55 };
56 
57 struct mlx5_ct_flow {
58 	struct mlx5_esw_flow_attr pre_ct_attr;
59 	struct mlx5_esw_flow_attr post_ct_attr;
60 	struct mlx5_flow_handle *pre_ct_rule;
61 	struct mlx5_flow_handle *post_ct_rule;
62 	struct mlx5_ct_ft *ft;
63 	u32 fte_id;
64 	u32 chain_mapping;
65 };
66 
67 struct mlx5_ct_zone_rule {
68 	struct mlx5_flow_handle *rule;
69 	struct mlx5e_mod_hdr_handle *mh;
70 	struct mlx5_esw_flow_attr attr;
71 	bool nat;
72 };
73 
74 struct mlx5_tc_ct_pre {
75 	struct mlx5_flow_table *fdb;
76 	struct mlx5_flow_group *flow_grp;
77 	struct mlx5_flow_group *miss_grp;
78 	struct mlx5_flow_handle *flow_rule;
79 	struct mlx5_flow_handle *miss_rule;
80 	struct mlx5_modify_hdr *modify_hdr;
81 };
82 
83 struct mlx5_ct_ft {
84 	struct rhash_head node;
85 	u16 zone;
86 	u32 zone_restore_id;
87 	refcount_t refcount;
88 	struct nf_flowtable *nf_ft;
89 	struct mlx5_tc_ct_priv *ct_priv;
90 	struct rhashtable ct_entries_ht;
91 	struct mlx5_tc_ct_pre pre_ct;
92 	struct mlx5_tc_ct_pre pre_ct_nat;
93 };
94 
95 struct mlx5_ct_tuple {
96 	u16 addr_type;
97 	__be16 n_proto;
98 	u8 ip_proto;
99 	struct {
100 		union {
101 			__be32 src_v4;
102 			struct in6_addr src_v6;
103 		};
104 		union {
105 			__be32 dst_v4;
106 			struct in6_addr dst_v6;
107 		};
108 	} ip;
109 	struct {
110 		__be16 src;
111 		__be16 dst;
112 	} port;
113 
114 	u16 zone;
115 };
116 
117 struct mlx5_ct_entry {
118 	struct rhash_head node;
119 	struct rhash_head tuple_node;
120 	struct rhash_head tuple_nat_node;
121 	struct mlx5_fc *counter;
122 	unsigned long cookie;
123 	unsigned long restore_cookie;
124 	struct mlx5_ct_tuple tuple;
125 	struct mlx5_ct_tuple tuple_nat;
126 	struct mlx5_ct_zone_rule zone_rules[2];
127 };
128 
129 static const struct rhashtable_params cts_ht_params = {
130 	.head_offset = offsetof(struct mlx5_ct_entry, node),
131 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
132 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
133 	.automatic_shrinking = true,
134 	.min_size = 16 * 1024,
135 };
136 
137 static const struct rhashtable_params zone_params = {
138 	.head_offset = offsetof(struct mlx5_ct_ft, node),
139 	.key_offset = offsetof(struct mlx5_ct_ft, zone),
140 	.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
141 	.automatic_shrinking = true,
142 };
143 
144 static const struct rhashtable_params tuples_ht_params = {
145 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
146 	.key_offset = offsetof(struct mlx5_ct_entry, tuple),
147 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
148 	.automatic_shrinking = true,
149 	.min_size = 16 * 1024,
150 };
151 
152 static const struct rhashtable_params tuples_nat_ht_params = {
153 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
154 	.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
155 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
156 	.automatic_shrinking = true,
157 	.min_size = 16 * 1024,
158 };
159 
160 static struct mlx5_tc_ct_priv *
161 mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv)
162 {
163 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
164 	struct mlx5_rep_uplink_priv *uplink_priv;
165 	struct mlx5e_rep_priv *uplink_rpriv;
166 
167 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
168 	uplink_priv = &uplink_rpriv->uplink_priv;
169 	return uplink_priv->ct_priv;
170 }
171 
172 static int
173 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
174 {
175 	struct flow_match_control control;
176 	struct flow_match_basic basic;
177 
178 	flow_rule_match_basic(rule, &basic);
179 	flow_rule_match_control(rule, &control);
180 
181 	tuple->n_proto = basic.key->n_proto;
182 	tuple->ip_proto = basic.key->ip_proto;
183 	tuple->addr_type = control.key->addr_type;
184 
185 	if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
186 		struct flow_match_ipv4_addrs match;
187 
188 		flow_rule_match_ipv4_addrs(rule, &match);
189 		tuple->ip.src_v4 = match.key->src;
190 		tuple->ip.dst_v4 = match.key->dst;
191 	} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
192 		struct flow_match_ipv6_addrs match;
193 
194 		flow_rule_match_ipv6_addrs(rule, &match);
195 		tuple->ip.src_v6 = match.key->src;
196 		tuple->ip.dst_v6 = match.key->dst;
197 	} else {
198 		return -EOPNOTSUPP;
199 	}
200 
201 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
202 		struct flow_match_ports match;
203 
204 		flow_rule_match_ports(rule, &match);
205 		switch (tuple->ip_proto) {
206 		case IPPROTO_TCP:
207 		case IPPROTO_UDP:
208 			tuple->port.src = match.key->src;
209 			tuple->port.dst = match.key->dst;
210 			break;
211 		default:
212 			return -EOPNOTSUPP;
213 		}
214 	} else {
215 		return -EOPNOTSUPP;
216 	}
217 
218 	return 0;
219 }
220 
221 static int
222 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
223 			     struct flow_rule *rule)
224 {
225 	struct flow_action *flow_action = &rule->action;
226 	struct flow_action_entry *act;
227 	u32 offset, val, ip6_offset;
228 	int i;
229 
230 	flow_action_for_each(i, act, flow_action) {
231 		if (act->id != FLOW_ACTION_MANGLE)
232 			continue;
233 
234 		offset = act->mangle.offset;
235 		val = act->mangle.val;
236 		switch (act->mangle.htype) {
237 		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
238 			if (offset == offsetof(struct iphdr, saddr))
239 				tuple->ip.src_v4 = cpu_to_be32(val);
240 			else if (offset == offsetof(struct iphdr, daddr))
241 				tuple->ip.dst_v4 = cpu_to_be32(val);
242 			else
243 				return -EOPNOTSUPP;
244 			break;
245 
246 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
247 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
248 			ip6_offset /= 4;
249 			if (ip6_offset < 8)
250 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
251 			else
252 				return -EOPNOTSUPP;
253 			break;
254 
255 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
256 			if (offset == offsetof(struct tcphdr, source))
257 				tuple->port.src = cpu_to_be16(val);
258 			else if (offset == offsetof(struct tcphdr, dest))
259 				tuple->port.dst = cpu_to_be16(val);
260 			else
261 				return -EOPNOTSUPP;
262 			break;
263 
264 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
265 			if (offset == offsetof(struct udphdr, source))
266 				tuple->port.src = cpu_to_be16(val);
267 			else if (offset == offsetof(struct udphdr, dest))
268 				tuple->port.dst = cpu_to_be16(val);
269 			else
270 				return -EOPNOTSUPP;
271 			break;
272 
273 		default:
274 			return -EOPNOTSUPP;
275 		}
276 	}
277 
278 	return 0;
279 }
280 
281 static int
282 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
283 			   struct flow_rule *rule)
284 {
285 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
286 				       outer_headers);
287 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
288 				       outer_headers);
289 	u16 addr_type = 0;
290 	u8 ip_proto = 0;
291 
292 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
293 		struct flow_match_basic match;
294 
295 		flow_rule_match_basic(rule, &match);
296 
297 		mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
298 				       headers_v);
299 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
300 			 match.mask->ip_proto);
301 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
302 			 match.key->ip_proto);
303 
304 		ip_proto = match.key->ip_proto;
305 	}
306 
307 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
308 		struct flow_match_control match;
309 
310 		flow_rule_match_control(rule, &match);
311 		addr_type = match.key->addr_type;
312 	}
313 
314 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
315 		struct flow_match_ipv4_addrs match;
316 
317 		flow_rule_match_ipv4_addrs(rule, &match);
318 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
319 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
320 		       &match.mask->src, sizeof(match.mask->src));
321 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
322 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
323 		       &match.key->src, sizeof(match.key->src));
324 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
325 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
326 		       &match.mask->dst, sizeof(match.mask->dst));
327 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
328 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
329 		       &match.key->dst, sizeof(match.key->dst));
330 	}
331 
332 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
333 		struct flow_match_ipv6_addrs match;
334 
335 		flow_rule_match_ipv6_addrs(rule, &match);
336 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
337 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
338 		       &match.mask->src, sizeof(match.mask->src));
339 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
340 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
341 		       &match.key->src, sizeof(match.key->src));
342 
343 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
344 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
345 		       &match.mask->dst, sizeof(match.mask->dst));
346 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
347 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
348 		       &match.key->dst, sizeof(match.key->dst));
349 	}
350 
351 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
352 		struct flow_match_ports match;
353 
354 		flow_rule_match_ports(rule, &match);
355 		switch (ip_proto) {
356 		case IPPROTO_TCP:
357 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
358 				 tcp_sport, ntohs(match.mask->src));
359 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
360 				 tcp_sport, ntohs(match.key->src));
361 
362 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
363 				 tcp_dport, ntohs(match.mask->dst));
364 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
365 				 tcp_dport, ntohs(match.key->dst));
366 			break;
367 
368 		case IPPROTO_UDP:
369 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
370 				 udp_sport, ntohs(match.mask->src));
371 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
372 				 udp_sport, ntohs(match.key->src));
373 
374 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
375 				 udp_dport, ntohs(match.mask->dst));
376 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
377 				 udp_dport, ntohs(match.key->dst));
378 			break;
379 		default:
380 			break;
381 		}
382 	}
383 
384 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
385 		struct flow_match_tcp match;
386 
387 		flow_rule_match_tcp(rule, &match);
388 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
389 			 ntohs(match.mask->flags));
390 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
391 			 ntohs(match.key->flags));
392 	}
393 
394 	return 0;
395 }
396 
397 static void
398 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
399 			  struct mlx5_ct_entry *entry,
400 			  bool nat)
401 {
402 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
403 	struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
404 	struct mlx5_eswitch *esw = ct_priv->esw;
405 
406 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
407 
408 	mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr);
409 	mlx5e_mod_hdr_detach(ct_priv->esw->dev,
410 			     &esw->offloads.mod_hdr, zone_rule->mh);
411 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
412 }
413 
414 static void
415 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
416 			   struct mlx5_ct_entry *entry)
417 {
418 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
419 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
420 
421 	mlx5_fc_destroy(ct_priv->esw->dev, entry->counter);
422 }
423 
424 static struct flow_action_entry *
425 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
426 {
427 	struct flow_action *flow_action = &flow_rule->action;
428 	struct flow_action_entry *act;
429 	int i;
430 
431 	flow_action_for_each(i, act, flow_action) {
432 		if (act->id == FLOW_ACTION_CT_METADATA)
433 			return act;
434 	}
435 
436 	return NULL;
437 }
438 
439 static int
440 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
441 			       struct mlx5e_tc_mod_hdr_acts *mod_acts,
442 			       u8 ct_state,
443 			       u32 mark,
444 			       u32 labels_id,
445 			       u8 zone_restore_id)
446 {
447 	struct mlx5_eswitch *esw = ct_priv->esw;
448 	int err;
449 
450 	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
451 					CTSTATE_TO_REG, ct_state);
452 	if (err)
453 		return err;
454 
455 	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
456 					MARK_TO_REG, mark);
457 	if (err)
458 		return err;
459 
460 	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
461 					LABELS_TO_REG, labels_id);
462 	if (err)
463 		return err;
464 
465 	err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
466 					ZONE_RESTORE_TO_REG, zone_restore_id);
467 	if (err)
468 		return err;
469 
470 	return 0;
471 }
472 
473 static int
474 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
475 				   char *modact)
476 {
477 	u32 offset = act->mangle.offset, field;
478 
479 	switch (act->mangle.htype) {
480 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
481 		MLX5_SET(set_action_in, modact, length, 0);
482 		if (offset == offsetof(struct iphdr, saddr))
483 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
484 		else if (offset == offsetof(struct iphdr, daddr))
485 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
486 		else
487 			return -EOPNOTSUPP;
488 		break;
489 
490 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
491 		MLX5_SET(set_action_in, modact, length, 0);
492 		if (offset == offsetof(struct ipv6hdr, saddr) + 12)
493 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
494 		else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
495 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
496 		else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
497 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
498 		else if (offset == offsetof(struct ipv6hdr, saddr))
499 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
500 		else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
501 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
502 		else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
503 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
504 		else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
505 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
506 		else if (offset == offsetof(struct ipv6hdr, daddr))
507 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
508 		else
509 			return -EOPNOTSUPP;
510 		break;
511 
512 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
513 		MLX5_SET(set_action_in, modact, length, 16);
514 		if (offset == offsetof(struct tcphdr, source))
515 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
516 		else if (offset == offsetof(struct tcphdr, dest))
517 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
518 		else
519 			return -EOPNOTSUPP;
520 		break;
521 
522 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
523 		MLX5_SET(set_action_in, modact, length, 16);
524 		if (offset == offsetof(struct udphdr, source))
525 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
526 		else if (offset == offsetof(struct udphdr, dest))
527 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
528 		else
529 			return -EOPNOTSUPP;
530 		break;
531 
532 	default:
533 		return -EOPNOTSUPP;
534 	}
535 
536 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
537 	MLX5_SET(set_action_in, modact, offset, 0);
538 	MLX5_SET(set_action_in, modact, field, field);
539 	MLX5_SET(set_action_in, modact, data, act->mangle.val);
540 
541 	return 0;
542 }
543 
544 static int
545 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
546 			    struct flow_rule *flow_rule,
547 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
548 {
549 	struct flow_action *flow_action = &flow_rule->action;
550 	struct mlx5_core_dev *mdev = ct_priv->esw->dev;
551 	struct flow_action_entry *act;
552 	size_t action_size;
553 	char *modact;
554 	int err, i;
555 
556 	action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
557 
558 	flow_action_for_each(i, act, flow_action) {
559 		switch (act->id) {
560 		case FLOW_ACTION_MANGLE: {
561 			err = alloc_mod_hdr_actions(mdev,
562 						    MLX5_FLOW_NAMESPACE_FDB,
563 						    mod_acts);
564 			if (err)
565 				return err;
566 
567 			modact = mod_acts->actions +
568 				 mod_acts->num_actions * action_size;
569 
570 			err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
571 			if (err)
572 				return err;
573 
574 			mod_acts->num_actions++;
575 		}
576 		break;
577 
578 		case FLOW_ACTION_CT_METADATA:
579 			/* Handled earlier */
580 			continue;
581 		default:
582 			return -EOPNOTSUPP;
583 		}
584 	}
585 
586 	return 0;
587 }
588 
589 static int
590 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
591 				struct mlx5_esw_flow_attr *attr,
592 				struct flow_rule *flow_rule,
593 				struct mlx5e_mod_hdr_handle **mh,
594 				u8 zone_restore_id, bool nat)
595 {
596 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
597 	struct flow_action_entry *meta;
598 	u16 ct_state = 0;
599 	int err;
600 
601 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
602 	if (!meta)
603 		return -EOPNOTSUPP;
604 
605 	err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
606 			  &attr->ct_attr.ct_labels_id);
607 	if (err)
608 		return -EOPNOTSUPP;
609 	if (nat) {
610 		err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
611 						  &mod_acts);
612 		if (err)
613 			goto err_mapping;
614 
615 		ct_state |= MLX5_CT_STATE_NAT_BIT;
616 	}
617 
618 	ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
619 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
620 					     ct_state,
621 					     meta->ct_metadata.mark,
622 					     attr->ct_attr.ct_labels_id,
623 					     zone_restore_id);
624 	if (err)
625 		goto err_mapping;
626 
627 	*mh = mlx5e_mod_hdr_attach(ct_priv->esw->dev,
628 				   &ct_priv->esw->offloads.mod_hdr,
629 				   MLX5_FLOW_NAMESPACE_FDB,
630 				   &mod_acts);
631 	if (IS_ERR(*mh)) {
632 		err = PTR_ERR(*mh);
633 		goto err_mapping;
634 	}
635 	attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
636 
637 	dealloc_mod_hdr_actions(&mod_acts);
638 	return 0;
639 
640 err_mapping:
641 	dealloc_mod_hdr_actions(&mod_acts);
642 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
643 	return err;
644 }
645 
646 static int
647 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
648 			  struct flow_rule *flow_rule,
649 			  struct mlx5_ct_entry *entry,
650 			  bool nat, u8 zone_restore_id)
651 {
652 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
653 	struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
654 	struct mlx5_eswitch *esw = ct_priv->esw;
655 	struct mlx5_flow_spec *spec = NULL;
656 	int err;
657 
658 	zone_rule->nat = nat;
659 
660 	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
661 	if (!spec)
662 		return -ENOMEM;
663 
664 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
665 					      &zone_rule->mh,
666 					      zone_restore_id, nat);
667 	if (err) {
668 		ct_dbg("Failed to create ct entry mod hdr");
669 		goto err_mod_hdr;
670 	}
671 
672 	attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
673 		       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
674 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
675 	attr->dest_chain = 0;
676 	attr->dest_ft = ct_priv->post_ct;
677 	attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct;
678 	attr->outer_match_level = MLX5_MATCH_L4;
679 	attr->counter = entry->counter;
680 	attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
681 
682 	mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
683 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
684 				    entry->tuple.zone & MLX5_CT_ZONE_MASK,
685 				    MLX5_CT_ZONE_MASK);
686 
687 	zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
688 	if (IS_ERR(zone_rule->rule)) {
689 		err = PTR_ERR(zone_rule->rule);
690 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
691 		goto err_rule;
692 	}
693 
694 	kfree(spec);
695 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
696 
697 	return 0;
698 
699 err_rule:
700 	mlx5e_mod_hdr_detach(ct_priv->esw->dev,
701 			     &esw->offloads.mod_hdr, zone_rule->mh);
702 err_mod_hdr:
703 	kfree(spec);
704 	return err;
705 }
706 
707 static int
708 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
709 			   struct flow_rule *flow_rule,
710 			   struct mlx5_ct_entry *entry,
711 			   u8 zone_restore_id)
712 {
713 	struct mlx5_eswitch *esw = ct_priv->esw;
714 	int err;
715 
716 	entry->counter = mlx5_fc_create(esw->dev, true);
717 	if (IS_ERR(entry->counter)) {
718 		err = PTR_ERR(entry->counter);
719 		ct_dbg("Failed to create counter for ct entry");
720 		return err;
721 	}
722 
723 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
724 					zone_restore_id);
725 	if (err)
726 		goto err_orig;
727 
728 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
729 					zone_restore_id);
730 	if (err)
731 		goto err_nat;
732 
733 	return 0;
734 
735 err_nat:
736 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
737 err_orig:
738 	mlx5_fc_destroy(esw->dev, entry->counter);
739 	return err;
740 }
741 
742 static int
743 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
744 				  struct flow_cls_offload *flow)
745 {
746 	struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
747 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
748 	struct flow_action_entry *meta_action;
749 	unsigned long cookie = flow->cookie;
750 	struct mlx5_ct_entry *entry;
751 	int err;
752 
753 	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
754 	if (!meta_action)
755 		return -EOPNOTSUPP;
756 
757 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
758 				       cts_ht_params);
759 	if (entry)
760 		return 0;
761 
762 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
763 	if (!entry)
764 		return -ENOMEM;
765 
766 	entry->tuple.zone = ft->zone;
767 	entry->cookie = flow->cookie;
768 	entry->restore_cookie = meta_action->ct_metadata.cookie;
769 
770 	err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
771 	if (err)
772 		goto err_set;
773 
774 	memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
775 	err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
776 	if (err)
777 		goto err_set;
778 
779 	err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
780 				     &entry->tuple_node,
781 				     tuples_ht_params);
782 	if (err)
783 		goto err_tuple;
784 
785 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
786 		err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
787 					     &entry->tuple_nat_node,
788 					     tuples_nat_ht_params);
789 		if (err)
790 			goto err_tuple_nat;
791 	}
792 
793 	err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
794 					 ft->zone_restore_id);
795 	if (err)
796 		goto err_rules;
797 
798 	err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
799 				     cts_ht_params);
800 	if (err)
801 		goto err_insert;
802 
803 	return 0;
804 
805 err_insert:
806 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
807 err_rules:
808 	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
809 			       &entry->tuple_nat_node, tuples_nat_ht_params);
810 err_tuple_nat:
811 	if (entry->tuple_node.next)
812 		rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
813 				       &entry->tuple_node,
814 				       tuples_ht_params);
815 err_tuple:
816 err_set:
817 	kfree(entry);
818 	netdev_warn(ct_priv->netdev,
819 		    "Failed to offload ct entry, err: %d\n", err);
820 	return err;
821 }
822 
823 static void
824 mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
825 			struct mlx5_ct_entry *entry)
826 {
827 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
828 	if (entry->tuple_node.next)
829 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
830 				       &entry->tuple_nat_node,
831 				       tuples_nat_ht_params);
832 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
833 			       tuples_ht_params);
834 }
835 
836 static int
837 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
838 				  struct flow_cls_offload *flow)
839 {
840 	unsigned long cookie = flow->cookie;
841 	struct mlx5_ct_entry *entry;
842 
843 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
844 				       cts_ht_params);
845 	if (!entry)
846 		return -ENOENT;
847 
848 	mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
849 	WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
850 				       &entry->node,
851 				       cts_ht_params));
852 	kfree(entry);
853 
854 	return 0;
855 }
856 
857 static int
858 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
859 				    struct flow_cls_offload *f)
860 {
861 	unsigned long cookie = f->cookie;
862 	struct mlx5_ct_entry *entry;
863 	u64 lastuse, packets, bytes;
864 
865 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
866 				       cts_ht_params);
867 	if (!entry)
868 		return -ENOENT;
869 
870 	mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse);
871 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
872 			  FLOW_ACTION_HW_STATS_DELAYED);
873 
874 	return 0;
875 }
876 
877 static int
878 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
879 			      void *cb_priv)
880 {
881 	struct flow_cls_offload *f = type_data;
882 	struct mlx5_ct_ft *ft = cb_priv;
883 
884 	if (type != TC_SETUP_CLSFLOWER)
885 		return -EOPNOTSUPP;
886 
887 	switch (f->command) {
888 	case FLOW_CLS_REPLACE:
889 		return mlx5_tc_ct_block_flow_offload_add(ft, f);
890 	case FLOW_CLS_DESTROY:
891 		return mlx5_tc_ct_block_flow_offload_del(ft, f);
892 	case FLOW_CLS_STATS:
893 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
894 	default:
895 		break;
896 	}
897 
898 	return -EOPNOTSUPP;
899 }
900 
901 static bool
902 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
903 			u16 zone)
904 {
905 	struct flow_keys flow_keys;
906 
907 	skb_reset_network_header(skb);
908 	skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
909 
910 	tuple->zone = zone;
911 
912 	if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
913 	    flow_keys.basic.ip_proto != IPPROTO_UDP)
914 		return false;
915 
916 	tuple->port.src = flow_keys.ports.src;
917 	tuple->port.dst = flow_keys.ports.dst;
918 	tuple->n_proto = flow_keys.basic.n_proto;
919 	tuple->ip_proto = flow_keys.basic.ip_proto;
920 
921 	switch (flow_keys.basic.n_proto) {
922 	case htons(ETH_P_IP):
923 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
924 		tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
925 		tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
926 		break;
927 
928 	case htons(ETH_P_IPV6):
929 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
930 		tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
931 		tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
932 		break;
933 	default:
934 		goto out;
935 	}
936 
937 	return true;
938 
939 out:
940 	return false;
941 }
942 
943 int
944 mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
945 			    struct mlx5_flow_spec *spec)
946 {
947 	u32 ctstate = 0, ctstate_mask = 0;
948 
949 	mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
950 					&ctstate, &ctstate_mask);
951 	if (ctstate_mask)
952 		return -EOPNOTSUPP;
953 
954 	ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
955 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
956 				    ctstate, ctstate_mask);
957 
958 	return 0;
959 }
960 
961 int
962 mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
963 		       struct mlx5_flow_spec *spec,
964 		       struct flow_cls_offload *f,
965 		       struct mlx5_ct_attr *ct_attr,
966 		       struct netlink_ext_ack *extack)
967 {
968 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
969 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
970 	struct flow_dissector_key_ct *mask, *key;
971 	bool trk, est, untrk, unest, new;
972 	u32 ctstate = 0, ctstate_mask = 0;
973 	u16 ct_state_on, ct_state_off;
974 	u16 ct_state, ct_state_mask;
975 	struct flow_match_ct match;
976 	u32 ct_labels[4];
977 
978 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
979 		return 0;
980 
981 	if (!ct_priv) {
982 		NL_SET_ERR_MSG_MOD(extack,
983 				   "offload of ct matching isn't available");
984 		return -EOPNOTSUPP;
985 	}
986 
987 	flow_rule_match_ct(rule, &match);
988 
989 	key = match.key;
990 	mask = match.mask;
991 
992 	ct_state = key->ct_state;
993 	ct_state_mask = mask->ct_state;
994 
995 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
996 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
997 			      TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
998 		NL_SET_ERR_MSG_MOD(extack,
999 				   "only ct_state trk, est and new are supported for offload");
1000 		return -EOPNOTSUPP;
1001 	}
1002 
1003 	ct_state_on = ct_state & ct_state_mask;
1004 	ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1005 	trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1006 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1007 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1008 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1009 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1010 
1011 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1012 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1013 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1014 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1015 
1016 	if (new) {
1017 		NL_SET_ERR_MSG_MOD(extack,
1018 				   "matching on ct_state +new isn't supported");
1019 		return -EOPNOTSUPP;
1020 	}
1021 
1022 	if (mask->ct_zone)
1023 		mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1024 					    key->ct_zone, MLX5_CT_ZONE_MASK);
1025 	if (ctstate_mask)
1026 		mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1027 					    ctstate, ctstate_mask);
1028 	if (mask->ct_mark)
1029 		mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1030 					    key->ct_mark, mask->ct_mark);
1031 	if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1032 	    mask->ct_labels[3]) {
1033 		ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1034 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1035 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1036 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1037 		if (mapping_add(ct_priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
1038 			return -EOPNOTSUPP;
1039 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1040 					    MLX5_CT_LABELS_MASK);
1041 	}
1042 
1043 	return 0;
1044 }
1045 
1046 int
1047 mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
1048 			struct mlx5_esw_flow_attr *attr,
1049 			const struct flow_action_entry *act,
1050 			struct netlink_ext_ack *extack)
1051 {
1052 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1053 
1054 	if (!ct_priv) {
1055 		NL_SET_ERR_MSG_MOD(extack,
1056 				   "offload of ct action isn't available");
1057 		return -EOPNOTSUPP;
1058 	}
1059 
1060 	attr->ct_attr.zone = act->ct.zone;
1061 	attr->ct_attr.ct_action = act->ct.action;
1062 	attr->ct_attr.nf_ft = act->ct.flow_table;
1063 
1064 	return 0;
1065 }
1066 
1067 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1068 				  struct mlx5_tc_ct_pre *pre_ct,
1069 				  bool nat)
1070 {
1071 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1072 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1073 	struct mlx5_core_dev *dev = ct_priv->esw->dev;
1074 	struct mlx5_flow_table *fdb = pre_ct->fdb;
1075 	struct mlx5_flow_destination dest = {};
1076 	struct mlx5_flow_act flow_act = {};
1077 	struct mlx5_modify_hdr *mod_hdr;
1078 	struct mlx5_flow_handle *rule;
1079 	struct mlx5_flow_spec *spec;
1080 	u32 ctstate;
1081 	u16 zone;
1082 	int err;
1083 
1084 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1085 	if (!spec)
1086 		return -ENOMEM;
1087 
1088 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1089 	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone);
1090 	if (err) {
1091 		ct_dbg("Failed to set zone register mapping");
1092 		goto err_mapping;
1093 	}
1094 
1095 	mod_hdr = mlx5_modify_header_alloc(dev,
1096 					   MLX5_FLOW_NAMESPACE_FDB,
1097 					   pre_mod_acts.num_actions,
1098 					   pre_mod_acts.actions);
1099 
1100 	if (IS_ERR(mod_hdr)) {
1101 		err = PTR_ERR(mod_hdr);
1102 		ct_dbg("Failed to create pre ct mod hdr");
1103 		goto err_mapping;
1104 	}
1105 	pre_ct->modify_hdr = mod_hdr;
1106 
1107 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1108 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1109 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1110 	flow_act.modify_hdr = mod_hdr;
1111 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1112 
1113 	/* add flow rule */
1114 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1115 				    zone, MLX5_CT_ZONE_MASK);
1116 	ctstate = MLX5_CT_STATE_TRK_BIT;
1117 	if (nat)
1118 		ctstate |= MLX5_CT_STATE_NAT_BIT;
1119 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1120 
1121 	dest.ft = ct_priv->post_ct;
1122 	rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
1123 	if (IS_ERR(rule)) {
1124 		err = PTR_ERR(rule);
1125 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1126 		goto err_flow_rule;
1127 	}
1128 	pre_ct->flow_rule = rule;
1129 
1130 	/* add miss rule */
1131 	memset(spec, 0, sizeof(*spec));
1132 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1133 	rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
1134 	if (IS_ERR(rule)) {
1135 		err = PTR_ERR(rule);
1136 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1137 		goto err_miss_rule;
1138 	}
1139 	pre_ct->miss_rule = rule;
1140 
1141 	dealloc_mod_hdr_actions(&pre_mod_acts);
1142 	kvfree(spec);
1143 	return 0;
1144 
1145 err_miss_rule:
1146 	mlx5_del_flow_rules(pre_ct->flow_rule);
1147 err_flow_rule:
1148 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1149 err_mapping:
1150 	dealloc_mod_hdr_actions(&pre_mod_acts);
1151 	kvfree(spec);
1152 	return err;
1153 }
1154 
1155 static void
1156 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1157 		       struct mlx5_tc_ct_pre *pre_ct)
1158 {
1159 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1160 	struct mlx5_core_dev *dev = ct_priv->esw->dev;
1161 
1162 	mlx5_del_flow_rules(pre_ct->flow_rule);
1163 	mlx5_del_flow_rules(pre_ct->miss_rule);
1164 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1165 }
1166 
1167 static int
1168 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1169 			struct mlx5_tc_ct_pre *pre_ct,
1170 			bool nat)
1171 {
1172 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1173 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1174 	struct mlx5_core_dev *dev = ct_priv->esw->dev;
1175 	struct mlx5_flow_table_attr ft_attr = {};
1176 	struct mlx5_flow_namespace *ns;
1177 	struct mlx5_flow_table *ft;
1178 	struct mlx5_flow_group *g;
1179 	u32 metadata_reg_c_2_mask;
1180 	u32 *flow_group_in;
1181 	void *misc;
1182 	int err;
1183 
1184 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
1185 	if (!ns) {
1186 		err = -EOPNOTSUPP;
1187 		ct_dbg("Failed to get FDB flow namespace");
1188 		return err;
1189 	}
1190 
1191 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1192 	if (!flow_group_in)
1193 		return -ENOMEM;
1194 
1195 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1196 	ft_attr.prio = FDB_TC_OFFLOAD;
1197 	ft_attr.max_fte = 2;
1198 	ft_attr.level = 1;
1199 	ft = mlx5_create_flow_table(ns, &ft_attr);
1200 	if (IS_ERR(ft)) {
1201 		err = PTR_ERR(ft);
1202 		ct_dbg("Failed to create pre ct table");
1203 		goto out_free;
1204 	}
1205 	pre_ct->fdb = ft;
1206 
1207 	/* create flow group */
1208 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1209 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1210 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1211 		 MLX5_MATCH_MISC_PARAMETERS_2);
1212 
1213 	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1214 			    match_criteria.misc_parameters_2);
1215 
1216 	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1217 	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1218 	if (nat)
1219 		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1220 
1221 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1222 		 metadata_reg_c_2_mask);
1223 
1224 	g = mlx5_create_flow_group(ft, flow_group_in);
1225 	if (IS_ERR(g)) {
1226 		err = PTR_ERR(g);
1227 		ct_dbg("Failed to create pre ct group");
1228 		goto err_flow_grp;
1229 	}
1230 	pre_ct->flow_grp = g;
1231 
1232 	/* create miss group */
1233 	memset(flow_group_in, 0, inlen);
1234 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1235 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1236 	g = mlx5_create_flow_group(ft, flow_group_in);
1237 	if (IS_ERR(g)) {
1238 		err = PTR_ERR(g);
1239 		ct_dbg("Failed to create pre ct miss group");
1240 		goto err_miss_grp;
1241 	}
1242 	pre_ct->miss_grp = g;
1243 
1244 	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1245 	if (err)
1246 		goto err_add_rules;
1247 
1248 	kvfree(flow_group_in);
1249 	return 0;
1250 
1251 err_add_rules:
1252 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1253 err_miss_grp:
1254 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1255 err_flow_grp:
1256 	mlx5_destroy_flow_table(ft);
1257 out_free:
1258 	kvfree(flow_group_in);
1259 	return err;
1260 }
1261 
1262 static void
1263 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1264 		       struct mlx5_tc_ct_pre *pre_ct)
1265 {
1266 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1267 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1268 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1269 	mlx5_destroy_flow_table(pre_ct->fdb);
1270 }
1271 
1272 static int
1273 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1274 {
1275 	int err;
1276 
1277 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1278 	if (err)
1279 		return err;
1280 
1281 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1282 	if (err)
1283 		goto err_pre_ct_nat;
1284 
1285 	return 0;
1286 
1287 err_pre_ct_nat:
1288 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1289 	return err;
1290 }
1291 
1292 static void
1293 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1294 {
1295 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1296 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1297 }
1298 
1299 static struct mlx5_ct_ft *
1300 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1301 		     struct nf_flowtable *nf_ft)
1302 {
1303 	struct mlx5_ct_ft *ft;
1304 	int err;
1305 
1306 	ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1307 	if (ft) {
1308 		refcount_inc(&ft->refcount);
1309 		return ft;
1310 	}
1311 
1312 	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1313 	if (!ft)
1314 		return ERR_PTR(-ENOMEM);
1315 
1316 	err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1317 	if (err)
1318 		goto err_mapping;
1319 
1320 	ft->zone = zone;
1321 	ft->nf_ft = nf_ft;
1322 	ft->ct_priv = ct_priv;
1323 	refcount_set(&ft->refcount, 1);
1324 
1325 	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1326 	if (err)
1327 		goto err_alloc_pre_ct;
1328 
1329 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1330 	if (err)
1331 		goto err_init;
1332 
1333 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1334 				     zone_params);
1335 	if (err)
1336 		goto err_insert;
1337 
1338 	err = nf_flow_table_offload_add_cb(ft->nf_ft,
1339 					   mlx5_tc_ct_block_flow_offload, ft);
1340 	if (err)
1341 		goto err_add_cb;
1342 
1343 	return ft;
1344 
1345 err_add_cb:
1346 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1347 err_insert:
1348 	rhashtable_destroy(&ft->ct_entries_ht);
1349 err_init:
1350 	mlx5_tc_ct_free_pre_ct_tables(ft);
1351 err_alloc_pre_ct:
1352 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1353 err_mapping:
1354 	kfree(ft);
1355 	return ERR_PTR(err);
1356 }
1357 
1358 static void
1359 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1360 {
1361 	struct mlx5_tc_ct_priv *ct_priv = arg;
1362 	struct mlx5_ct_entry *entry = ptr;
1363 
1364 	mlx5_tc_ct_del_ft_entry(ct_priv, entry);
1365 	kfree(entry);
1366 }
1367 
1368 static void
1369 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1370 {
1371 	if (!refcount_dec_and_test(&ft->refcount))
1372 		return;
1373 
1374 	nf_flow_table_offload_del_cb(ft->nf_ft,
1375 				     mlx5_tc_ct_block_flow_offload, ft);
1376 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1377 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
1378 				    mlx5_tc_ct_flush_ft_entry,
1379 				    ct_priv);
1380 	mlx5_tc_ct_free_pre_ct_tables(ft);
1381 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1382 	kfree(ft);
1383 }
1384 
1385 /* We translate the tc filter with CT action to the following HW model:
1386  *
1387  * +---------------------+
1388  * + fdb prio (tc chain) +
1389  * + original match      +
1390  * +---------------------+
1391  *      | set chain miss mapping
1392  *      | set fte_id
1393  *      | set tunnel_id
1394  *      | do decap
1395  *      v
1396  * +---------------------+
1397  * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1398  * + zone+nat match      +---------------->+ post_ct (see below) +
1399  * +---------------------+  set zone       +---------------------+
1400  *      | set zone
1401  *      v
1402  * +--------------------+
1403  * + CT (nat or no nat) +
1404  * + tuple + zone match +
1405  * +--------------------+
1406  *      | set mark
1407  *      | set labels_id
1408  *      | set established
1409  *	| set zone_restore
1410  *      | do nat (if needed)
1411  *      v
1412  * +--------------+
1413  * + post_ct      + original filter actions
1414  * + fte_id match +------------------------>
1415  * +--------------+
1416  */
1417 static struct mlx5_flow_handle *
1418 __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
1419 			  struct mlx5e_tc_flow *flow,
1420 			  struct mlx5_flow_spec *orig_spec,
1421 			  struct mlx5_esw_flow_attr *attr)
1422 {
1423 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1424 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1425 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1426 	struct mlx5_flow_spec *post_ct_spec = NULL;
1427 	struct mlx5_eswitch *esw = ct_priv->esw;
1428 	struct mlx5_esw_flow_attr *pre_ct_attr;
1429 	struct mlx5_modify_hdr *mod_hdr;
1430 	struct mlx5_flow_handle *rule;
1431 	struct mlx5_ct_flow *ct_flow;
1432 	int chain_mapping = 0, err;
1433 	struct mlx5_ct_ft *ft;
1434 	u32 fte_id = 1;
1435 
1436 	post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1437 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1438 	if (!post_ct_spec || !ct_flow) {
1439 		kfree(post_ct_spec);
1440 		kfree(ct_flow);
1441 		return ERR_PTR(-ENOMEM);
1442 	}
1443 
1444 	/* Register for CT established events */
1445 	ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1446 				  attr->ct_attr.nf_ft);
1447 	if (IS_ERR(ft)) {
1448 		err = PTR_ERR(ft);
1449 		ct_dbg("Failed to register to ft callback");
1450 		goto err_ft;
1451 	}
1452 	ct_flow->ft = ft;
1453 
1454 	err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1455 			    MLX5_FTE_ID_MAX, GFP_KERNEL);
1456 	if (err) {
1457 		netdev_warn(priv->netdev,
1458 			    "Failed to allocate fte id, err: %d\n", err);
1459 		goto err_idr;
1460 	}
1461 	ct_flow->fte_id = fte_id;
1462 
1463 	/* Base esw attributes of both rules on original rule attribute */
1464 	pre_ct_attr = &ct_flow->pre_ct_attr;
1465 	memcpy(pre_ct_attr, attr, sizeof(*attr));
1466 	memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr));
1467 
1468 	/* Modify the original rule's action to fwd and modify, leave decap */
1469 	pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1470 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1471 			       MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1472 
1473 	/* Write chain miss tag for miss in ct table as we
1474 	 * don't go though all prios of this chain as normal tc rules
1475 	 * miss.
1476 	 */
1477 	err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain,
1478 						&chain_mapping);
1479 	if (err) {
1480 		ct_dbg("Failed to get chain register mapping for chain");
1481 		goto err_get_chain;
1482 	}
1483 	ct_flow->chain_mapping = chain_mapping;
1484 
1485 	err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
1486 					CHAIN_TO_REG, chain_mapping);
1487 	if (err) {
1488 		ct_dbg("Failed to set chain register mapping");
1489 		goto err_mapping;
1490 	}
1491 
1492 	err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
1493 					FTEID_TO_REG, fte_id);
1494 	if (err) {
1495 		ct_dbg("Failed to set fte_id register mapping");
1496 		goto err_mapping;
1497 	}
1498 
1499 	/* If original flow is decap, we do it before going into ct table
1500 	 * so add a rewrite for the tunnel match_id.
1501 	 */
1502 	if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1503 	    attr->chain == 0) {
1504 		u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1505 
1506 		err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
1507 						TUNNEL_TO_REG,
1508 						tun_id);
1509 		if (err) {
1510 			ct_dbg("Failed to set tunnel register mapping");
1511 			goto err_mapping;
1512 		}
1513 	}
1514 
1515 	mod_hdr = mlx5_modify_header_alloc(esw->dev,
1516 					   MLX5_FLOW_NAMESPACE_FDB,
1517 					   pre_mod_acts.num_actions,
1518 					   pre_mod_acts.actions);
1519 	if (IS_ERR(mod_hdr)) {
1520 		err = PTR_ERR(mod_hdr);
1521 		ct_dbg("Failed to create pre ct mod hdr");
1522 		goto err_mapping;
1523 	}
1524 	pre_ct_attr->modify_hdr = mod_hdr;
1525 
1526 	/* Post ct rule matches on fte_id and executes original rule's
1527 	 * tc rule action
1528 	 */
1529 	mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1530 				    fte_id, MLX5_FTE_ID_MASK);
1531 
1532 	/* Put post_ct rule on post_ct fdb */
1533 	ct_flow->post_ct_attr.chain = 0;
1534 	ct_flow->post_ct_attr.prio = 0;
1535 	ct_flow->post_ct_attr.fdb = ct_priv->post_ct;
1536 
1537 	ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE;
1538 	ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE;
1539 	ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1540 	rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec,
1541 					       &ct_flow->post_ct_attr);
1542 	ct_flow->post_ct_rule = rule;
1543 	if (IS_ERR(ct_flow->post_ct_rule)) {
1544 		err = PTR_ERR(ct_flow->post_ct_rule);
1545 		ct_dbg("Failed to add post ct rule");
1546 		goto err_insert_post_ct;
1547 	}
1548 
1549 	/* Change original rule point to ct table */
1550 	pre_ct_attr->dest_chain = 0;
1551 	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb;
1552 	ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw,
1553 							       orig_spec,
1554 							       pre_ct_attr);
1555 	if (IS_ERR(ct_flow->pre_ct_rule)) {
1556 		err = PTR_ERR(ct_flow->pre_ct_rule);
1557 		ct_dbg("Failed to add pre ct rule");
1558 		goto err_insert_orig;
1559 	}
1560 
1561 	attr->ct_attr.ct_flow = ct_flow;
1562 	dealloc_mod_hdr_actions(&pre_mod_acts);
1563 	kfree(post_ct_spec);
1564 
1565 	return rule;
1566 
1567 err_insert_orig:
1568 	mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule,
1569 					&ct_flow->post_ct_attr);
1570 err_insert_post_ct:
1571 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1572 err_mapping:
1573 	dealloc_mod_hdr_actions(&pre_mod_acts);
1574 	mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
1575 err_get_chain:
1576 	idr_remove(&ct_priv->fte_ids, fte_id);
1577 err_idr:
1578 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1579 err_ft:
1580 	kfree(post_ct_spec);
1581 	kfree(ct_flow);
1582 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1583 	return ERR_PTR(err);
1584 }
1585 
1586 static struct mlx5_flow_handle *
1587 __mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
1588 				struct mlx5_flow_spec *orig_spec,
1589 				struct mlx5_esw_flow_attr *attr,
1590 				struct mlx5e_tc_mod_hdr_acts *mod_acts)
1591 {
1592 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1593 	struct mlx5_eswitch *esw = ct_priv->esw;
1594 	struct mlx5_esw_flow_attr *pre_ct_attr;
1595 	struct mlx5_modify_hdr *mod_hdr;
1596 	struct mlx5_flow_handle *rule;
1597 	struct mlx5_ct_flow *ct_flow;
1598 	int err;
1599 
1600 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1601 	if (!ct_flow)
1602 		return ERR_PTR(-ENOMEM);
1603 
1604 	/* Base esw attributes on original rule attribute */
1605 	pre_ct_attr = &ct_flow->pre_ct_attr;
1606 	memcpy(pre_ct_attr, attr, sizeof(*attr));
1607 
1608 	err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1609 	if (err) {
1610 		ct_dbg("Failed to set register for ct clear");
1611 		goto err_set_registers;
1612 	}
1613 
1614 	mod_hdr = mlx5_modify_header_alloc(esw->dev,
1615 					   MLX5_FLOW_NAMESPACE_FDB,
1616 					   mod_acts->num_actions,
1617 					   mod_acts->actions);
1618 	if (IS_ERR(mod_hdr)) {
1619 		err = PTR_ERR(mod_hdr);
1620 		ct_dbg("Failed to add create ct clear mod hdr");
1621 		goto err_set_registers;
1622 	}
1623 
1624 	dealloc_mod_hdr_actions(mod_acts);
1625 	pre_ct_attr->modify_hdr = mod_hdr;
1626 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1627 
1628 	rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr);
1629 	if (IS_ERR(rule)) {
1630 		err = PTR_ERR(rule);
1631 		ct_dbg("Failed to add ct clear rule");
1632 		goto err_insert;
1633 	}
1634 
1635 	attr->ct_attr.ct_flow = ct_flow;
1636 	ct_flow->pre_ct_rule = rule;
1637 	return rule;
1638 
1639 err_insert:
1640 	mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1641 err_set_registers:
1642 	netdev_warn(priv->netdev,
1643 		    "Failed to offload ct clear flow, err %d\n", err);
1644 	return ERR_PTR(err);
1645 }
1646 
1647 struct mlx5_flow_handle *
1648 mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
1649 			struct mlx5e_tc_flow *flow,
1650 			struct mlx5_flow_spec *spec,
1651 			struct mlx5_esw_flow_attr *attr,
1652 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1653 {
1654 	bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1655 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1656 	struct mlx5_flow_handle *rule;
1657 
1658 	if (!ct_priv)
1659 		return ERR_PTR(-EOPNOTSUPP);
1660 
1661 	mutex_lock(&ct_priv->control_lock);
1662 
1663 	if (clear_action)
1664 		rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1665 	else
1666 		rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1667 	mutex_unlock(&ct_priv->control_lock);
1668 
1669 	return rule;
1670 }
1671 
1672 static void
1673 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1674 			 struct mlx5_ct_flow *ct_flow)
1675 {
1676 	struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr;
1677 	struct mlx5_eswitch *esw = ct_priv->esw;
1678 
1679 	mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule,
1680 					pre_ct_attr);
1681 	mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr);
1682 
1683 	if (ct_flow->post_ct_rule) {
1684 		mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule,
1685 						&ct_flow->post_ct_attr);
1686 		mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
1687 		idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1688 		mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1689 	}
1690 
1691 	kfree(ct_flow);
1692 }
1693 
1694 void
1695 mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow,
1696 		       struct mlx5_esw_flow_attr *attr)
1697 {
1698 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1699 	struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1700 
1701 	/* We are called on error to clean up stuff from parsing
1702 	 * but we don't have anything for now
1703 	 */
1704 	if (!ct_flow)
1705 		return;
1706 
1707 	mutex_lock(&ct_priv->control_lock);
1708 	__mlx5_tc_ct_delete_flow(ct_priv, ct_flow);
1709 	mutex_unlock(&ct_priv->control_lock);
1710 }
1711 
1712 static int
1713 mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw,
1714 			      const char **err_msg)
1715 {
1716 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1717 	/* cannot restore chain ID on HW miss */
1718 
1719 	*err_msg = "tc skb extension missing";
1720 	return -EOPNOTSUPP;
1721 #endif
1722 
1723 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
1724 		*err_msg = "firmware level support is missing";
1725 		return -EOPNOTSUPP;
1726 	}
1727 
1728 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
1729 		/* vlan workaround should be avoided for multi chain rules.
1730 		 * This is just a sanity check as pop vlan action should
1731 		 * be supported by any FW that supports ignore_flow_level
1732 		 */
1733 
1734 		*err_msg = "firmware vlan actions support is missing";
1735 		return -EOPNOTSUPP;
1736 	}
1737 
1738 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
1739 				    fdb_modify_header_fwd_to_table)) {
1740 		/* CT always writes to registers which are mod header actions.
1741 		 * Therefore, mod header and goto is required
1742 		 */
1743 
1744 		*err_msg = "firmware fwd and modify support is missing";
1745 		return -EOPNOTSUPP;
1746 	}
1747 
1748 	if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
1749 		*err_msg = "register loopback isn't supported";
1750 		return -EOPNOTSUPP;
1751 	}
1752 
1753 	return 0;
1754 }
1755 
1756 static void
1757 mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err)
1758 {
1759 	if (msg)
1760 		netdev_warn(rpriv->netdev,
1761 			    "tc ct offload not supported, %s, err: %d\n",
1762 			    msg, err);
1763 	else
1764 		netdev_warn(rpriv->netdev,
1765 			    "tc ct offload not supported, err: %d\n",
1766 			    err);
1767 }
1768 
1769 int
1770 mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv)
1771 {
1772 	struct mlx5_tc_ct_priv *ct_priv;
1773 	struct mlx5e_rep_priv *rpriv;
1774 	struct mlx5_eswitch *esw;
1775 	struct mlx5e_priv *priv;
1776 	const char *msg;
1777 	int err;
1778 
1779 	rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
1780 	priv = netdev_priv(rpriv->netdev);
1781 	esw = priv->mdev->priv.eswitch;
1782 
1783 	err = mlx5_tc_ct_init_check_support(esw, &msg);
1784 	if (err) {
1785 		mlx5_tc_ct_init_err(rpriv, msg, err);
1786 		goto err_support;
1787 	}
1788 
1789 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
1790 	if (!ct_priv) {
1791 		mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM);
1792 		goto err_alloc;
1793 	}
1794 
1795 	ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
1796 	if (IS_ERR(ct_priv->zone_mapping)) {
1797 		err = PTR_ERR(ct_priv->zone_mapping);
1798 		goto err_mapping_zone;
1799 	}
1800 
1801 	ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
1802 	if (IS_ERR(ct_priv->labels_mapping)) {
1803 		err = PTR_ERR(ct_priv->labels_mapping);
1804 		goto err_mapping_labels;
1805 	}
1806 
1807 	ct_priv->esw = esw;
1808 	ct_priv->netdev = rpriv->netdev;
1809 	ct_priv->ct = mlx5_esw_chains_create_global_table(esw);
1810 	if (IS_ERR(ct_priv->ct)) {
1811 		err = PTR_ERR(ct_priv->ct);
1812 		mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err);
1813 		goto err_ct_tbl;
1814 	}
1815 
1816 	ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw);
1817 	if (IS_ERR(ct_priv->ct_nat)) {
1818 		err = PTR_ERR(ct_priv->ct_nat);
1819 		mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table",
1820 				    err);
1821 		goto err_ct_nat_tbl;
1822 	}
1823 
1824 	ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw);
1825 	if (IS_ERR(ct_priv->post_ct)) {
1826 		err = PTR_ERR(ct_priv->post_ct);
1827 		mlx5_tc_ct_init_err(rpriv, "failed to create post ct table",
1828 				    err);
1829 		goto err_post_ct_tbl;
1830 	}
1831 
1832 	idr_init(&ct_priv->fte_ids);
1833 	mutex_init(&ct_priv->control_lock);
1834 	rhashtable_init(&ct_priv->zone_ht, &zone_params);
1835 	rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
1836 	rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
1837 
1838 	/* Done, set ct_priv to know it initializted */
1839 	uplink_priv->ct_priv = ct_priv;
1840 
1841 	return 0;
1842 
1843 err_post_ct_tbl:
1844 	mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat);
1845 err_ct_nat_tbl:
1846 	mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct);
1847 err_ct_tbl:
1848 	mapping_destroy(ct_priv->labels_mapping);
1849 err_mapping_labels:
1850 	mapping_destroy(ct_priv->zone_mapping);
1851 err_mapping_zone:
1852 	kfree(ct_priv);
1853 err_alloc:
1854 err_support:
1855 
1856 	return 0;
1857 }
1858 
1859 void
1860 mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv)
1861 {
1862 	struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
1863 
1864 	if (!ct_priv)
1865 		return;
1866 
1867 	mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct);
1868 	mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat);
1869 	mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct);
1870 	mapping_destroy(ct_priv->zone_mapping);
1871 	mapping_destroy(ct_priv->labels_mapping);
1872 
1873 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
1874 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
1875 	rhashtable_destroy(&ct_priv->zone_ht);
1876 	mutex_destroy(&ct_priv->control_lock);
1877 	idr_destroy(&ct_priv->fte_ids);
1878 	kfree(ct_priv);
1879 
1880 	uplink_priv->ct_priv = NULL;
1881 }
1882 
1883 bool
1884 mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv,
1885 			 struct sk_buff *skb, u8 zone_restore_id)
1886 {
1887 	struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
1888 	struct mlx5_ct_tuple tuple = {};
1889 	struct mlx5_ct_entry *entry;
1890 	u16 zone;
1891 
1892 	if (!ct_priv || !zone_restore_id)
1893 		return true;
1894 
1895 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
1896 		return false;
1897 
1898 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
1899 		return false;
1900 
1901 	entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
1902 				       tuples_ht_params);
1903 	if (!entry)
1904 		entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
1905 					       &tuple, tuples_nat_ht_params);
1906 	if (!entry)
1907 		return false;
1908 
1909 	tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
1910 	return true;
1911 }
1912