xref: /openbmc/linux/drivers/infiniband/hw/mlx5/fs.c (revision aa74c44b)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
4  */
5 
6 #include <rdma/ib_user_verbs.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/uverbs_types.h>
9 #include <rdma/uverbs_ioctl.h>
10 #include <rdma/uverbs_std_types.h>
11 #include <rdma/mlx5_user_ioctl_cmds.h>
12 #include <rdma/mlx5_user_ioctl_verbs.h>
13 #include <rdma/ib_hdrs.h>
14 #include <rdma/ib_umem.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/fs_helpers.h>
18 #include <linux/mlx5/accel.h>
19 #include <linux/mlx5/eswitch.h>
20 #include <net/inet_ecn.h>
21 #include "mlx5_ib.h"
22 #include "counters.h"
23 #include "devx.h"
24 #include "fs.h"
25 
26 #define UVERBS_MODULE_NAME mlx5_ib
27 #include <rdma/uverbs_named_ioctl.h>
28 
29 enum {
30 	MATCH_CRITERIA_ENABLE_OUTER_BIT,
31 	MATCH_CRITERIA_ENABLE_MISC_BIT,
32 	MATCH_CRITERIA_ENABLE_INNER_BIT,
33 	MATCH_CRITERIA_ENABLE_MISC2_BIT
34 };
35 
36 #define HEADER_IS_ZERO(match_criteria, headers)			           \
37 	!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
38 		    0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
39 
40 static u8 get_match_criteria_enable(u32 *match_criteria)
41 {
42 	u8 match_criteria_enable;
43 
44 	match_criteria_enable =
45 		(!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
46 		MATCH_CRITERIA_ENABLE_OUTER_BIT;
47 	match_criteria_enable |=
48 		(!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
49 		MATCH_CRITERIA_ENABLE_MISC_BIT;
50 	match_criteria_enable |=
51 		(!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
52 		MATCH_CRITERIA_ENABLE_INNER_BIT;
53 	match_criteria_enable |=
54 		(!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
55 		MATCH_CRITERIA_ENABLE_MISC2_BIT;
56 
57 	return match_criteria_enable;
58 }
59 
60 static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
61 {
62 	u8 entry_mask;
63 	u8 entry_val;
64 	int err = 0;
65 
66 	if (!mask)
67 		goto out;
68 
69 	entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
70 			      ip_protocol);
71 	entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
72 			     ip_protocol);
73 	if (!entry_mask) {
74 		MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
75 		MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
76 		goto out;
77 	}
78 	/* Don't override existing ip protocol */
79 	if (mask != entry_mask || val != entry_val)
80 		err = -EINVAL;
81 out:
82 	return err;
83 }
84 
85 static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
86 			   bool inner)
87 {
88 	if (inner) {
89 		MLX5_SET(fte_match_set_misc,
90 			 misc_c, inner_ipv6_flow_label, mask);
91 		MLX5_SET(fte_match_set_misc,
92 			 misc_v, inner_ipv6_flow_label, val);
93 	} else {
94 		MLX5_SET(fte_match_set_misc,
95 			 misc_c, outer_ipv6_flow_label, mask);
96 		MLX5_SET(fte_match_set_misc,
97 			 misc_v, outer_ipv6_flow_label, val);
98 	}
99 }
100 
101 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
102 {
103 	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
104 	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
105 	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
106 	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
107 }
108 
109 static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
110 {
111 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
112 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
113 		return -EOPNOTSUPP;
114 
115 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
116 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
117 		return -EOPNOTSUPP;
118 
119 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
120 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
121 		return -EOPNOTSUPP;
122 
123 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
124 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
125 		return -EOPNOTSUPP;
126 
127 	return 0;
128 }
129 
130 #define LAST_ETH_FIELD vlan_tag
131 #define LAST_IB_FIELD sl
132 #define LAST_IPV4_FIELD tos
133 #define LAST_IPV6_FIELD traffic_class
134 #define LAST_TCP_UDP_FIELD src_port
135 #define LAST_TUNNEL_FIELD tunnel_id
136 #define LAST_FLOW_TAG_FIELD tag_id
137 #define LAST_DROP_FIELD size
138 #define LAST_COUNTERS_FIELD counters
139 
140 /* Field is the last supported field */
141 #define FIELDS_NOT_SUPPORTED(filter, field)                                    \
142 	memchr_inv((void *)&filter.field + sizeof(filter.field), 0,            \
143 		   sizeof(filter) - offsetofend(typeof(filter), field))
144 
145 int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
146 			   bool is_egress,
147 			   struct mlx5_flow_act *action)
148 {
149 
150 	switch (maction->ib_action.type) {
151 	case IB_FLOW_ACTION_ESP:
152 		if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
153 				      MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
154 			return -EINVAL;
155 		/* Currently only AES_GCM keymat is supported by the driver */
156 		action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
157 		action->action |= is_egress ?
158 			MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
159 			MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
160 		return 0;
161 	case IB_FLOW_ACTION_UNSPECIFIED:
162 		if (maction->flow_action_raw.sub_type ==
163 		    MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
164 			if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
165 				return -EINVAL;
166 			action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
167 			action->modify_hdr =
168 				maction->flow_action_raw.modify_hdr;
169 			return 0;
170 		}
171 		if (maction->flow_action_raw.sub_type ==
172 		    MLX5_IB_FLOW_ACTION_DECAP) {
173 			if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
174 				return -EINVAL;
175 			action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
176 			return 0;
177 		}
178 		if (maction->flow_action_raw.sub_type ==
179 		    MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
180 			if (action->action &
181 			    MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
182 				return -EINVAL;
183 			action->action |=
184 				MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
185 			action->pkt_reformat =
186 				maction->flow_action_raw.pkt_reformat;
187 			return 0;
188 		}
189 		fallthrough;
190 	default:
191 		return -EOPNOTSUPP;
192 	}
193 }
194 
195 static int parse_flow_attr(struct mlx5_core_dev *mdev,
196 			   struct mlx5_flow_spec *spec,
197 			   const union ib_flow_spec *ib_spec,
198 			   const struct ib_flow_attr *flow_attr,
199 			   struct mlx5_flow_act *action, u32 prev_type)
200 {
201 	struct mlx5_flow_context *flow_context = &spec->flow_context;
202 	u32 *match_c = spec->match_criteria;
203 	u32 *match_v = spec->match_value;
204 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
205 					   misc_parameters);
206 	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
207 					   misc_parameters);
208 	void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
209 					    misc_parameters_2);
210 	void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
211 					    misc_parameters_2);
212 	void *headers_c;
213 	void *headers_v;
214 	int match_ipv;
215 	int ret;
216 
217 	if (ib_spec->type & IB_FLOW_SPEC_INNER) {
218 		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
219 					 inner_headers);
220 		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
221 					 inner_headers);
222 		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
223 					ft_field_support.inner_ip_version);
224 	} else {
225 		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
226 					 outer_headers);
227 		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
228 					 outer_headers);
229 		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
230 					ft_field_support.outer_ip_version);
231 	}
232 
233 	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
234 	case IB_FLOW_SPEC_ETH:
235 		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
236 			return -EOPNOTSUPP;
237 
238 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
239 					     dmac_47_16),
240 				ib_spec->eth.mask.dst_mac);
241 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
242 					     dmac_47_16),
243 				ib_spec->eth.val.dst_mac);
244 
245 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
246 					     smac_47_16),
247 				ib_spec->eth.mask.src_mac);
248 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
249 					     smac_47_16),
250 				ib_spec->eth.val.src_mac);
251 
252 		if (ib_spec->eth.mask.vlan_tag) {
253 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
254 				 cvlan_tag, 1);
255 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
256 				 cvlan_tag, 1);
257 
258 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
259 				 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
260 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
261 				 first_vid, ntohs(ib_spec->eth.val.vlan_tag));
262 
263 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
264 				 first_cfi,
265 				 ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
266 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
267 				 first_cfi,
268 				 ntohs(ib_spec->eth.val.vlan_tag) >> 12);
269 
270 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
271 				 first_prio,
272 				 ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
273 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
274 				 first_prio,
275 				 ntohs(ib_spec->eth.val.vlan_tag) >> 13);
276 		}
277 		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
278 			 ethertype, ntohs(ib_spec->eth.mask.ether_type));
279 		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
280 			 ethertype, ntohs(ib_spec->eth.val.ether_type));
281 		break;
282 	case IB_FLOW_SPEC_IPV4:
283 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
284 			return -EOPNOTSUPP;
285 
286 		if (match_ipv) {
287 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
288 				 ip_version, 0xf);
289 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
290 				 ip_version, MLX5_FS_IPV4_VERSION);
291 		} else {
292 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
293 				 ethertype, 0xffff);
294 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
295 				 ethertype, ETH_P_IP);
296 		}
297 
298 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
299 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
300 		       &ib_spec->ipv4.mask.src_ip,
301 		       sizeof(ib_spec->ipv4.mask.src_ip));
302 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
303 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
304 		       &ib_spec->ipv4.val.src_ip,
305 		       sizeof(ib_spec->ipv4.val.src_ip));
306 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
307 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
308 		       &ib_spec->ipv4.mask.dst_ip,
309 		       sizeof(ib_spec->ipv4.mask.dst_ip));
310 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
311 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
312 		       &ib_spec->ipv4.val.dst_ip,
313 		       sizeof(ib_spec->ipv4.val.dst_ip));
314 
315 		set_tos(headers_c, headers_v,
316 			ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
317 
318 		if (set_proto(headers_c, headers_v,
319 			      ib_spec->ipv4.mask.proto,
320 			      ib_spec->ipv4.val.proto))
321 			return -EINVAL;
322 		break;
323 	case IB_FLOW_SPEC_IPV6:
324 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
325 			return -EOPNOTSUPP;
326 
327 		if (match_ipv) {
328 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
329 				 ip_version, 0xf);
330 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
331 				 ip_version, MLX5_FS_IPV6_VERSION);
332 		} else {
333 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
334 				 ethertype, 0xffff);
335 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
336 				 ethertype, ETH_P_IPV6);
337 		}
338 
339 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
340 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
341 		       &ib_spec->ipv6.mask.src_ip,
342 		       sizeof(ib_spec->ipv6.mask.src_ip));
343 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
344 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
345 		       &ib_spec->ipv6.val.src_ip,
346 		       sizeof(ib_spec->ipv6.val.src_ip));
347 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
348 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
349 		       &ib_spec->ipv6.mask.dst_ip,
350 		       sizeof(ib_spec->ipv6.mask.dst_ip));
351 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
352 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
353 		       &ib_spec->ipv6.val.dst_ip,
354 		       sizeof(ib_spec->ipv6.val.dst_ip));
355 
356 		set_tos(headers_c, headers_v,
357 			ib_spec->ipv6.mask.traffic_class,
358 			ib_spec->ipv6.val.traffic_class);
359 
360 		if (set_proto(headers_c, headers_v,
361 			      ib_spec->ipv6.mask.next_hdr,
362 			      ib_spec->ipv6.val.next_hdr))
363 			return -EINVAL;
364 
365 		set_flow_label(misc_params_c, misc_params_v,
366 			       ntohl(ib_spec->ipv6.mask.flow_label),
367 			       ntohl(ib_spec->ipv6.val.flow_label),
368 			       ib_spec->type & IB_FLOW_SPEC_INNER);
369 		break;
370 	case IB_FLOW_SPEC_ESP:
371 		if (ib_spec->esp.mask.seq)
372 			return -EOPNOTSUPP;
373 
374 		MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
375 			 ntohl(ib_spec->esp.mask.spi));
376 		MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
377 			 ntohl(ib_spec->esp.val.spi));
378 		break;
379 	case IB_FLOW_SPEC_TCP:
380 		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
381 					 LAST_TCP_UDP_FIELD))
382 			return -EOPNOTSUPP;
383 
384 		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
385 			return -EINVAL;
386 
387 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
388 			 ntohs(ib_spec->tcp_udp.mask.src_port));
389 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
390 			 ntohs(ib_spec->tcp_udp.val.src_port));
391 
392 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
393 			 ntohs(ib_spec->tcp_udp.mask.dst_port));
394 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
395 			 ntohs(ib_spec->tcp_udp.val.dst_port));
396 		break;
397 	case IB_FLOW_SPEC_UDP:
398 		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
399 					 LAST_TCP_UDP_FIELD))
400 			return -EOPNOTSUPP;
401 
402 		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
403 			return -EINVAL;
404 
405 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
406 			 ntohs(ib_spec->tcp_udp.mask.src_port));
407 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
408 			 ntohs(ib_spec->tcp_udp.val.src_port));
409 
410 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
411 			 ntohs(ib_spec->tcp_udp.mask.dst_port));
412 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
413 			 ntohs(ib_spec->tcp_udp.val.dst_port));
414 		break;
415 	case IB_FLOW_SPEC_GRE:
416 		if (ib_spec->gre.mask.c_ks_res0_ver)
417 			return -EOPNOTSUPP;
418 
419 		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
420 			return -EINVAL;
421 
422 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
423 			 0xff);
424 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
425 			 IPPROTO_GRE);
426 
427 		MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
428 			 ntohs(ib_spec->gre.mask.protocol));
429 		MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
430 			 ntohs(ib_spec->gre.val.protocol));
431 
432 		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
433 				    gre_key.nvgre.hi),
434 		       &ib_spec->gre.mask.key,
435 		       sizeof(ib_spec->gre.mask.key));
436 		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
437 				    gre_key.nvgre.hi),
438 		       &ib_spec->gre.val.key,
439 		       sizeof(ib_spec->gre.val.key));
440 		break;
441 	case IB_FLOW_SPEC_MPLS:
442 		switch (prev_type) {
443 		case IB_FLOW_SPEC_UDP:
444 			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
445 						   ft_field_support.outer_first_mpls_over_udp),
446 						   &ib_spec->mpls.mask.tag))
447 				return -EOPNOTSUPP;
448 
449 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
450 					    outer_first_mpls_over_udp),
451 			       &ib_spec->mpls.val.tag,
452 			       sizeof(ib_spec->mpls.val.tag));
453 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
454 					    outer_first_mpls_over_udp),
455 			       &ib_spec->mpls.mask.tag,
456 			       sizeof(ib_spec->mpls.mask.tag));
457 			break;
458 		case IB_FLOW_SPEC_GRE:
459 			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
460 						   ft_field_support.outer_first_mpls_over_gre),
461 						   &ib_spec->mpls.mask.tag))
462 				return -EOPNOTSUPP;
463 
464 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
465 					    outer_first_mpls_over_gre),
466 			       &ib_spec->mpls.val.tag,
467 			       sizeof(ib_spec->mpls.val.tag));
468 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
469 					    outer_first_mpls_over_gre),
470 			       &ib_spec->mpls.mask.tag,
471 			       sizeof(ib_spec->mpls.mask.tag));
472 			break;
473 		default:
474 			if (ib_spec->type & IB_FLOW_SPEC_INNER) {
475 				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
476 							   ft_field_support.inner_first_mpls),
477 							   &ib_spec->mpls.mask.tag))
478 					return -EOPNOTSUPP;
479 
480 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
481 						    inner_first_mpls),
482 				       &ib_spec->mpls.val.tag,
483 				       sizeof(ib_spec->mpls.val.tag));
484 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
485 						    inner_first_mpls),
486 				       &ib_spec->mpls.mask.tag,
487 				       sizeof(ib_spec->mpls.mask.tag));
488 			} else {
489 				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
490 							   ft_field_support.outer_first_mpls),
491 							   &ib_spec->mpls.mask.tag))
492 					return -EOPNOTSUPP;
493 
494 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
495 						    outer_first_mpls),
496 				       &ib_spec->mpls.val.tag,
497 				       sizeof(ib_spec->mpls.val.tag));
498 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
499 						    outer_first_mpls),
500 				       &ib_spec->mpls.mask.tag,
501 				       sizeof(ib_spec->mpls.mask.tag));
502 			}
503 		}
504 		break;
505 	case IB_FLOW_SPEC_VXLAN_TUNNEL:
506 		if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
507 					 LAST_TUNNEL_FIELD))
508 			return -EOPNOTSUPP;
509 
510 		MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
511 			 ntohl(ib_spec->tunnel.mask.tunnel_id));
512 		MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
513 			 ntohl(ib_spec->tunnel.val.tunnel_id));
514 		break;
515 	case IB_FLOW_SPEC_ACTION_TAG:
516 		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
517 					 LAST_FLOW_TAG_FIELD))
518 			return -EOPNOTSUPP;
519 		if (ib_spec->flow_tag.tag_id >= BIT(24))
520 			return -EINVAL;
521 
522 		flow_context->flow_tag = ib_spec->flow_tag.tag_id;
523 		flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
524 		break;
525 	case IB_FLOW_SPEC_ACTION_DROP:
526 		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
527 					 LAST_DROP_FIELD))
528 			return -EOPNOTSUPP;
529 		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
530 		break;
531 	case IB_FLOW_SPEC_ACTION_HANDLE:
532 		ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
533 			flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
534 		if (ret)
535 			return ret;
536 		break;
537 	case IB_FLOW_SPEC_ACTION_COUNT:
538 		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
539 					 LAST_COUNTERS_FIELD))
540 			return -EOPNOTSUPP;
541 
542 		/* for now support only one counters spec per flow */
543 		if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
544 			return -EINVAL;
545 
546 		action->counters = ib_spec->flow_count.counters;
547 		action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
548 		break;
549 	default:
550 		return -EINVAL;
551 	}
552 
553 	return 0;
554 }
555 
556 /* If a flow could catch both multicast and unicast packets,
557  * it won't fall into the multicast flow steering table and this rule
558  * could steal other multicast packets.
559  */
560 static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
561 {
562 	union ib_flow_spec *flow_spec;
563 
564 	if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
565 	    ib_attr->num_of_specs < 1)
566 		return false;
567 
568 	flow_spec = (union ib_flow_spec *)(ib_attr + 1);
569 	if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
570 		struct ib_flow_spec_ipv4 *ipv4_spec;
571 
572 		ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
573 		if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
574 			return true;
575 
576 		return false;
577 	}
578 
579 	if (flow_spec->type == IB_FLOW_SPEC_ETH) {
580 		struct ib_flow_spec_eth *eth_spec;
581 
582 		eth_spec = (struct ib_flow_spec_eth *)flow_spec;
583 		return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
584 		       is_multicast_ether_addr(eth_spec->val.dst_mac);
585 	}
586 
587 	return false;
588 }
589 
590 enum valid_spec {
591 	VALID_SPEC_INVALID,
592 	VALID_SPEC_VALID,
593 	VALID_SPEC_NA,
594 };
595 
596 static enum valid_spec
597 is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
598 		     const struct mlx5_flow_spec *spec,
599 		     const struct mlx5_flow_act *flow_act,
600 		     bool egress)
601 {
602 	const u32 *match_c = spec->match_criteria;
603 	bool is_crypto =
604 		(flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
605 				     MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
606 	bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
607 	bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
608 
609 	/*
610 	 * Currently only crypto is supported in egress, when regular egress
611 	 * rules would be supported, always return VALID_SPEC_NA.
612 	 */
613 	if (!is_crypto)
614 		return VALID_SPEC_NA;
615 
616 	return is_crypto && is_ipsec &&
617 		(!egress || (!is_drop &&
618 			     !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
619 		VALID_SPEC_VALID : VALID_SPEC_INVALID;
620 }
621 
622 static bool is_valid_spec(struct mlx5_core_dev *mdev,
623 			  const struct mlx5_flow_spec *spec,
624 			  const struct mlx5_flow_act *flow_act,
625 			  bool egress)
626 {
627 	/* We curretly only support ipsec egress flow */
628 	return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
629 }
630 
631 static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
632 			       const struct ib_flow_attr *flow_attr,
633 			       bool check_inner)
634 {
635 	union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
636 	int match_ipv = check_inner ?
637 			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
638 					ft_field_support.inner_ip_version) :
639 			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
640 					ft_field_support.outer_ip_version);
641 	int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
642 	bool ipv4_spec_valid, ipv6_spec_valid;
643 	unsigned int ip_spec_type = 0;
644 	bool has_ethertype = false;
645 	unsigned int spec_index;
646 	bool mask_valid = true;
647 	u16 eth_type = 0;
648 	bool type_valid;
649 
650 	/* Validate that ethertype is correct */
651 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
652 		if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
653 		    ib_spec->eth.mask.ether_type) {
654 			mask_valid = (ib_spec->eth.mask.ether_type ==
655 				      htons(0xffff));
656 			has_ethertype = true;
657 			eth_type = ntohs(ib_spec->eth.val.ether_type);
658 		} else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
659 			   (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
660 			ip_spec_type = ib_spec->type;
661 		}
662 		ib_spec = (void *)ib_spec + ib_spec->size;
663 	}
664 
665 	type_valid = (!has_ethertype) || (!ip_spec_type);
666 	if (!type_valid && mask_valid) {
667 		ipv4_spec_valid = (eth_type == ETH_P_IP) &&
668 			(ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
669 		ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
670 			(ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
671 
672 		type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
673 			     (((eth_type == ETH_P_MPLS_UC) ||
674 			       (eth_type == ETH_P_MPLS_MC)) && match_ipv);
675 	}
676 
677 	return type_valid;
678 }
679 
680 static bool is_valid_attr(struct mlx5_core_dev *mdev,
681 			  const struct ib_flow_attr *flow_attr)
682 {
683 	return is_valid_ethertype(mdev, flow_attr, false) &&
684 	       is_valid_ethertype(mdev, flow_attr, true);
685 }
686 
687 static void put_flow_table(struct mlx5_ib_dev *dev,
688 			   struct mlx5_ib_flow_prio *prio, bool ft_added)
689 {
690 	prio->refcount -= !!ft_added;
691 	if (!prio->refcount) {
692 		mlx5_destroy_flow_table(prio->flow_table);
693 		prio->flow_table = NULL;
694 	}
695 }
696 
697 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
698 {
699 	struct mlx5_ib_flow_handler *handler = container_of(flow_id,
700 							  struct mlx5_ib_flow_handler,
701 							  ibflow);
702 	struct mlx5_ib_flow_handler *iter, *tmp;
703 	struct mlx5_ib_dev *dev = handler->dev;
704 
705 	mutex_lock(&dev->flow_db->lock);
706 
707 	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
708 		mlx5_del_flow_rules(iter->rule);
709 		put_flow_table(dev, iter->prio, true);
710 		list_del(&iter->list);
711 		kfree(iter);
712 	}
713 
714 	mlx5_del_flow_rules(handler->rule);
715 	put_flow_table(dev, handler->prio, true);
716 	mlx5_ib_counters_clear_description(handler->ibcounters);
717 	mutex_unlock(&dev->flow_db->lock);
718 	if (handler->flow_matcher)
719 		atomic_dec(&handler->flow_matcher->usecnt);
720 	kfree(handler);
721 
722 	return 0;
723 }
724 
725 static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
726 {
727 	priority *= 2;
728 	if (!dont_trap)
729 		priority++;
730 	return priority;
731 }
732 
733 enum flow_table_type {
734 	MLX5_IB_FT_RX,
735 	MLX5_IB_FT_TX
736 };
737 
738 #define MLX5_FS_MAX_TYPES	 6
739 #define MLX5_FS_MAX_ENTRIES	 BIT(16)
740 
741 static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
742 					   struct mlx5_ib_flow_prio *prio,
743 					   int priority,
744 					   int num_entries, int num_groups,
745 					   u32 flags)
746 {
747 	struct mlx5_flow_table_attr ft_attr = {};
748 	struct mlx5_flow_table *ft;
749 
750 	ft_attr.prio = priority;
751 	ft_attr.max_fte = num_entries;
752 	ft_attr.flags = flags;
753 	ft_attr.autogroup.max_num_groups = num_groups;
754 	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
755 	if (IS_ERR(ft))
756 		return ERR_CAST(ft);
757 
758 	prio->flow_table = ft;
759 	prio->refcount = 0;
760 	return prio;
761 }
762 
763 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
764 						struct ib_flow_attr *flow_attr,
765 						enum flow_table_type ft_type)
766 {
767 	bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
768 	struct mlx5_flow_namespace *ns = NULL;
769 	enum mlx5_flow_namespace_type fn_type;
770 	struct mlx5_ib_flow_prio *prio;
771 	struct mlx5_flow_table *ft;
772 	int max_table_size;
773 	int num_entries;
774 	int num_groups;
775 	bool esw_encap;
776 	u32 flags = 0;
777 	int priority;
778 
779 	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
780 						       log_max_ft_size));
781 	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
782 		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
783 	switch (flow_attr->type) {
784 	case IB_FLOW_ATTR_NORMAL:
785 		if (flow_is_multicast_only(flow_attr) && !dont_trap)
786 			priority = MLX5_IB_FLOW_MCAST_PRIO;
787 		else
788 			priority = ib_prio_to_core_prio(flow_attr->priority,
789 							dont_trap);
790 		if (ft_type == MLX5_IB_FT_RX) {
791 			fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
792 			prio = &dev->flow_db->prios[priority];
793 			if (!dev->is_rep && !esw_encap &&
794 			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
795 				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
796 			if (!dev->is_rep && !esw_encap &&
797 			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
798 						      reformat_l3_tunnel_to_l2))
799 				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
800 		} else {
801 			max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
802 				dev->mdev, log_max_ft_size));
803 			fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
804 			prio = &dev->flow_db->egress_prios[priority];
805 			if (!dev->is_rep && !esw_encap &&
806 			    MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
807 				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
808 		}
809 		ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
810 		num_entries = MLX5_FS_MAX_ENTRIES;
811 		num_groups = MLX5_FS_MAX_TYPES;
812 		break;
813 	case IB_FLOW_ATTR_ALL_DEFAULT:
814 	case IB_FLOW_ATTR_MC_DEFAULT:
815 		ns = mlx5_get_flow_namespace(dev->mdev,
816 					     MLX5_FLOW_NAMESPACE_LEFTOVERS);
817 		build_leftovers_ft_param(&priority, &num_entries, &num_groups);
818 		prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
819 		break;
820 	case IB_FLOW_ATTR_SNIFFER:
821 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
822 					allow_sniffer_and_nic_rx_shared_tir))
823 			return ERR_PTR(-EOPNOTSUPP);
824 
825 		ns = mlx5_get_flow_namespace(
826 			dev->mdev, ft_type == MLX5_IB_FT_RX ?
827 					   MLX5_FLOW_NAMESPACE_SNIFFER_RX :
828 					   MLX5_FLOW_NAMESPACE_SNIFFER_TX);
829 
830 		prio = &dev->flow_db->sniffer[ft_type];
831 		priority = 0;
832 		num_entries = 1;
833 		num_groups = 1;
834 		break;
835 	default:
836 		break;
837 	}
838 
839 	if (!ns)
840 		return ERR_PTR(-EOPNOTSUPP);
841 
842 	max_table_size = min_t(int, num_entries, max_table_size);
843 
844 	ft = prio->flow_table;
845 	if (!ft)
846 		return _get_prio(ns, prio, priority, max_table_size, num_groups,
847 				 flags);
848 
849 	return prio;
850 }
851 
852 enum {
853 	RDMA_RX_ECN_OPCOUNTER_PRIO,
854 	RDMA_RX_CNP_OPCOUNTER_PRIO,
855 };
856 
857 enum {
858 	RDMA_TX_CNP_OPCOUNTER_PRIO,
859 };
860 
861 static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
862 			      struct mlx5_flow_spec *spec)
863 {
864 	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
865 					ft_field_support.source_vhca_port) ||
866 	    !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
867 					ft_field_support.source_vhca_port))
868 		return -EOPNOTSUPP;
869 
870 	MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
871 			 misc_parameters.source_vhca_port);
872 	MLX5_SET(fte_match_param, &spec->match_value,
873 		 misc_parameters.source_vhca_port, port_num);
874 
875 	return 0;
876 }
877 
878 static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
879 			   struct mlx5_flow_spec *spec, int ipv)
880 {
881 	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
882 					ft_field_support.outer_ip_version))
883 		return -EOPNOTSUPP;
884 
885 	if (mlx5_core_mp_enabled(dev->mdev) &&
886 	    set_vhca_port_spec(dev, port_num, spec))
887 		return -EOPNOTSUPP;
888 
889 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
890 			 outer_headers.ip_ecn);
891 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
892 		 INET_ECN_CE);
893 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
894 			 outer_headers.ip_version);
895 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
896 		 ipv);
897 
898 	spec->match_criteria_enable =
899 		get_match_criteria_enable(spec->match_criteria);
900 
901 	return 0;
902 }
903 
904 static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
905 			struct mlx5_flow_spec *spec)
906 {
907 	if (mlx5_core_mp_enabled(dev->mdev) &&
908 	    set_vhca_port_spec(dev, port_num, spec))
909 		return -EOPNOTSUPP;
910 
911 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
912 			 misc_parameters.bth_opcode);
913 	MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
914 		 IB_BTH_OPCODE_CNP);
915 
916 	spec->match_criteria_enable =
917 		get_match_criteria_enable(spec->match_criteria);
918 
919 	return 0;
920 }
921 
922 int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
923 			 struct mlx5_ib_op_fc *opfc,
924 			 enum mlx5_ib_optional_counter_type type)
925 {
926 	enum mlx5_flow_namespace_type fn_type;
927 	int priority, i, err, spec_num;
928 	struct mlx5_flow_act flow_act = {};
929 	struct mlx5_flow_destination dst;
930 	struct mlx5_flow_namespace *ns;
931 	struct mlx5_ib_flow_prio *prio;
932 	struct mlx5_flow_spec *spec;
933 
934 	spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
935 	if (!spec)
936 		return -ENOMEM;
937 
938 	switch (type) {
939 	case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
940 		if (set_ecn_ce_spec(dev, port_num, &spec[0],
941 				    MLX5_FS_IPV4_VERSION) ||
942 		    set_ecn_ce_spec(dev, port_num, &spec[1],
943 				    MLX5_FS_IPV6_VERSION)) {
944 			err = -EOPNOTSUPP;
945 			goto free;
946 		}
947 		spec_num = 2;
948 		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
949 		priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
950 		break;
951 
952 	case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
953 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
954 					ft_field_support_2_nic_receive_rdma.bth_opcode) ||
955 		    set_cnp_spec(dev, port_num, &spec[0])) {
956 			err = -EOPNOTSUPP;
957 			goto free;
958 		}
959 		spec_num = 1;
960 		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
961 		priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
962 		break;
963 
964 	case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
965 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
966 					ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
967 		    set_cnp_spec(dev, port_num, &spec[0])) {
968 			err = -EOPNOTSUPP;
969 			goto free;
970 		}
971 		spec_num = 1;
972 		fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
973 		priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
974 		break;
975 
976 	default:
977 		err = -EOPNOTSUPP;
978 		goto free;
979 	}
980 
981 	ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
982 	if (!ns) {
983 		err = -EOPNOTSUPP;
984 		goto free;
985 	}
986 
987 	prio = &dev->flow_db->opfcs[type];
988 	if (!prio->flow_table) {
989 		prio = _get_prio(ns, prio, priority,
990 				 dev->num_ports * MAX_OPFC_RULES, 1, 0);
991 		if (IS_ERR(prio)) {
992 			err = PTR_ERR(prio);
993 			goto free;
994 		}
995 	}
996 
997 	dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
998 	dst.counter_id = mlx5_fc_id(opfc->fc);
999 
1000 	flow_act.action =
1001 		MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1002 
1003 	for (i = 0; i < spec_num; i++) {
1004 		opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
1005 						    &flow_act, &dst, 1);
1006 		if (IS_ERR(opfc->rule[i])) {
1007 			err = PTR_ERR(opfc->rule[i]);
1008 			goto del_rules;
1009 		}
1010 	}
1011 	prio->refcount += spec_num;
1012 	kfree(spec);
1013 
1014 	return 0;
1015 
1016 del_rules:
1017 	for (i -= 1; i >= 0; i--)
1018 		mlx5_del_flow_rules(opfc->rule[i]);
1019 	put_flow_table(dev, prio, false);
1020 free:
1021 	kfree(spec);
1022 	return err;
1023 }
1024 
1025 void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
1026 			     struct mlx5_ib_op_fc *opfc,
1027 			     enum mlx5_ib_optional_counter_type type)
1028 {
1029 	int i;
1030 
1031 	for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
1032 		mlx5_del_flow_rules(opfc->rule[i]);
1033 		put_flow_table(dev, &dev->flow_db->opfcs[type], true);
1034 	}
1035 }
1036 
1037 static void set_underlay_qp(struct mlx5_ib_dev *dev,
1038 			    struct mlx5_flow_spec *spec,
1039 			    u32 underlay_qpn)
1040 {
1041 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
1042 					   spec->match_criteria,
1043 					   misc_parameters);
1044 	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1045 					   misc_parameters);
1046 
1047 	if (underlay_qpn &&
1048 	    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1049 				      ft_field_support.bth_dst_qp)) {
1050 		MLX5_SET(fte_match_set_misc,
1051 			 misc_params_v, bth_dst_qp, underlay_qpn);
1052 		MLX5_SET(fte_match_set_misc,
1053 			 misc_params_c, bth_dst_qp, 0xffffff);
1054 	}
1055 }
1056 
1057 static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
1058 					 struct mlx5_flow_spec *spec,
1059 					 struct mlx5_eswitch_rep *rep)
1060 {
1061 	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
1062 	void *misc;
1063 
1064 	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1065 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1066 				    misc_parameters_2);
1067 
1068 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1069 			 mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
1070 								   rep->vport));
1071 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1072 				    misc_parameters_2);
1073 
1074 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1075 			 mlx5_eswitch_get_vport_metadata_mask());
1076 	} else {
1077 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1078 				    misc_parameters);
1079 
1080 		MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
1081 
1082 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1083 				    misc_parameters);
1084 
1085 		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1086 	}
1087 }
1088 
1089 static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
1090 						      struct mlx5_ib_flow_prio *ft_prio,
1091 						      const struct ib_flow_attr *flow_attr,
1092 						      struct mlx5_flow_destination *dst,
1093 						      u32 underlay_qpn,
1094 						      struct mlx5_ib_create_flow *ucmd)
1095 {
1096 	struct mlx5_flow_table	*ft = ft_prio->flow_table;
1097 	struct mlx5_ib_flow_handler *handler;
1098 	struct mlx5_flow_act flow_act = {};
1099 	struct mlx5_flow_spec *spec;
1100 	struct mlx5_flow_destination dest_arr[2] = {};
1101 	struct mlx5_flow_destination *rule_dst = dest_arr;
1102 	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
1103 	unsigned int spec_index;
1104 	u32 prev_type = 0;
1105 	int err = 0;
1106 	int dest_num = 0;
1107 	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1108 
1109 	if (!is_valid_attr(dev->mdev, flow_attr))
1110 		return ERR_PTR(-EINVAL);
1111 
1112 	if (dev->is_rep && is_egress)
1113 		return ERR_PTR(-EINVAL);
1114 
1115 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1116 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1117 	if (!handler || !spec) {
1118 		err = -ENOMEM;
1119 		goto free;
1120 	}
1121 
1122 	INIT_LIST_HEAD(&handler->list);
1123 
1124 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
1125 		err = parse_flow_attr(dev->mdev, spec,
1126 				      ib_flow, flow_attr, &flow_act,
1127 				      prev_type);
1128 		if (err < 0)
1129 			goto free;
1130 
1131 		prev_type = ((union ib_flow_spec *)ib_flow)->type;
1132 		ib_flow += ((union ib_flow_spec *)ib_flow)->size;
1133 	}
1134 
1135 	if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
1136 		memcpy(&dest_arr[0], dst, sizeof(*dst));
1137 		dest_num++;
1138 	}
1139 
1140 	if (!flow_is_multicast_only(flow_attr))
1141 		set_underlay_qp(dev, spec, underlay_qpn);
1142 
1143 	if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
1144 		struct mlx5_eswitch_rep *rep;
1145 
1146 		rep = dev->port[flow_attr->port - 1].rep;
1147 		if (!rep) {
1148 			err = -EINVAL;
1149 			goto free;
1150 		}
1151 
1152 		mlx5_ib_set_rule_source_port(dev, spec, rep);
1153 	}
1154 
1155 	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
1156 
1157 	if (is_egress &&
1158 	    !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
1159 		err = -EINVAL;
1160 		goto free;
1161 	}
1162 
1163 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1164 		struct mlx5_ib_mcounters *mcounters;
1165 
1166 		err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
1167 		if (err)
1168 			goto free;
1169 
1170 		mcounters = to_mcounters(flow_act.counters);
1171 		handler->ibcounters = flow_act.counters;
1172 		dest_arr[dest_num].type =
1173 			MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1174 		dest_arr[dest_num].counter_id =
1175 			mlx5_fc_id(mcounters->hw_cntrs_hndl);
1176 		dest_num++;
1177 	}
1178 
1179 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1180 		if (!dest_num)
1181 			rule_dst = NULL;
1182 	} else {
1183 		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1184 			flow_act.action |=
1185 				MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1186 		if (is_egress)
1187 			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1188 		else if (dest_num)
1189 			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1190 	}
1191 
1192 	if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG)  &&
1193 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1194 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1195 		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
1196 			     spec->flow_context.flow_tag, flow_attr->type);
1197 		err = -EINVAL;
1198 		goto free;
1199 	}
1200 	handler->rule = mlx5_add_flow_rules(ft, spec,
1201 					    &flow_act,
1202 					    rule_dst, dest_num);
1203 
1204 	if (IS_ERR(handler->rule)) {
1205 		err = PTR_ERR(handler->rule);
1206 		goto free;
1207 	}
1208 
1209 	ft_prio->refcount++;
1210 	handler->prio = ft_prio;
1211 	handler->dev = dev;
1212 
1213 	ft_prio->flow_table = ft;
1214 free:
1215 	if (err && handler) {
1216 		mlx5_ib_counters_clear_description(handler->ibcounters);
1217 		kfree(handler);
1218 	}
1219 	kvfree(spec);
1220 	return err ? ERR_PTR(err) : handler;
1221 }
1222 
1223 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1224 						     struct mlx5_ib_flow_prio *ft_prio,
1225 						     const struct ib_flow_attr *flow_attr,
1226 						     struct mlx5_flow_destination *dst)
1227 {
1228 	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
1229 }
1230 
1231 enum {
1232 	LEFTOVERS_MC,
1233 	LEFTOVERS_UC,
1234 };
1235 
1236 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
1237 							  struct mlx5_ib_flow_prio *ft_prio,
1238 							  struct ib_flow_attr *flow_attr,
1239 							  struct mlx5_flow_destination *dst)
1240 {
1241 	struct mlx5_ib_flow_handler *handler_ucast = NULL;
1242 	struct mlx5_ib_flow_handler *handler = NULL;
1243 
1244 	static struct {
1245 		struct ib_flow_attr	flow_attr;
1246 		struct ib_flow_spec_eth eth_flow;
1247 	} leftovers_specs[] = {
1248 		[LEFTOVERS_MC] = {
1249 			.flow_attr = {
1250 				.num_of_specs = 1,
1251 				.size = sizeof(leftovers_specs[0])
1252 			},
1253 			.eth_flow = {
1254 				.type = IB_FLOW_SPEC_ETH,
1255 				.size = sizeof(struct ib_flow_spec_eth),
1256 				.mask = {.dst_mac = {0x1} },
1257 				.val =  {.dst_mac = {0x1} }
1258 			}
1259 		},
1260 		[LEFTOVERS_UC] = {
1261 			.flow_attr = {
1262 				.num_of_specs = 1,
1263 				.size = sizeof(leftovers_specs[0])
1264 			},
1265 			.eth_flow = {
1266 				.type = IB_FLOW_SPEC_ETH,
1267 				.size = sizeof(struct ib_flow_spec_eth),
1268 				.mask = {.dst_mac = {0x1} },
1269 				.val = {.dst_mac = {} }
1270 			}
1271 		}
1272 	};
1273 
1274 	handler = create_flow_rule(dev, ft_prio,
1275 				   &leftovers_specs[LEFTOVERS_MC].flow_attr,
1276 				   dst);
1277 	if (!IS_ERR(handler) &&
1278 	    flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
1279 		handler_ucast = create_flow_rule(dev, ft_prio,
1280 						 &leftovers_specs[LEFTOVERS_UC].flow_attr,
1281 						 dst);
1282 		if (IS_ERR(handler_ucast)) {
1283 			mlx5_del_flow_rules(handler->rule);
1284 			ft_prio->refcount--;
1285 			kfree(handler);
1286 			handler = handler_ucast;
1287 		} else {
1288 			list_add(&handler_ucast->list, &handler->list);
1289 		}
1290 	}
1291 
1292 	return handler;
1293 }
1294 
1295 static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
1296 							struct mlx5_ib_flow_prio *ft_rx,
1297 							struct mlx5_ib_flow_prio *ft_tx,
1298 							struct mlx5_flow_destination *dst)
1299 {
1300 	struct mlx5_ib_flow_handler *handler_rx;
1301 	struct mlx5_ib_flow_handler *handler_tx;
1302 	int err;
1303 	static const struct ib_flow_attr flow_attr  = {
1304 		.num_of_specs = 0,
1305 		.type = IB_FLOW_ATTR_SNIFFER,
1306 		.size = sizeof(flow_attr)
1307 	};
1308 
1309 	handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
1310 	if (IS_ERR(handler_rx)) {
1311 		err = PTR_ERR(handler_rx);
1312 		goto err;
1313 	}
1314 
1315 	handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
1316 	if (IS_ERR(handler_tx)) {
1317 		err = PTR_ERR(handler_tx);
1318 		goto err_tx;
1319 	}
1320 
1321 	list_add(&handler_tx->list, &handler_rx->list);
1322 
1323 	return handler_rx;
1324 
1325 err_tx:
1326 	mlx5_del_flow_rules(handler_rx->rule);
1327 	ft_rx->refcount--;
1328 	kfree(handler_rx);
1329 err:
1330 	return ERR_PTR(err);
1331 }
1332 
1333 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1334 					   struct ib_flow_attr *flow_attr,
1335 					   struct ib_udata *udata)
1336 {
1337 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
1338 	struct mlx5_ib_qp *mqp = to_mqp(qp);
1339 	struct mlx5_ib_flow_handler *handler = NULL;
1340 	struct mlx5_flow_destination *dst = NULL;
1341 	struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
1342 	struct mlx5_ib_flow_prio *ft_prio;
1343 	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1344 	struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
1345 	size_t min_ucmd_sz, required_ucmd_sz;
1346 	int err;
1347 	int underlay_qpn;
1348 
1349 	if (udata && udata->inlen) {
1350 		min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
1351 		if (udata->inlen < min_ucmd_sz)
1352 			return ERR_PTR(-EOPNOTSUPP);
1353 
1354 		err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
1355 		if (err)
1356 			return ERR_PTR(err);
1357 
1358 		/* currently supports only one counters data */
1359 		if (ucmd_hdr.ncounters_data > 1)
1360 			return ERR_PTR(-EINVAL);
1361 
1362 		required_ucmd_sz = min_ucmd_sz +
1363 			sizeof(struct mlx5_ib_flow_counters_data) *
1364 			ucmd_hdr.ncounters_data;
1365 		if (udata->inlen > required_ucmd_sz &&
1366 		    !ib_is_udata_cleared(udata, required_ucmd_sz,
1367 					 udata->inlen - required_ucmd_sz))
1368 			return ERR_PTR(-EOPNOTSUPP);
1369 
1370 		ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
1371 		if (!ucmd)
1372 			return ERR_PTR(-ENOMEM);
1373 
1374 		err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
1375 		if (err)
1376 			goto free_ucmd;
1377 	}
1378 
1379 	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
1380 		err = -ENOMEM;
1381 		goto free_ucmd;
1382 	}
1383 
1384 	if (flow_attr->flags &
1385 	    ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
1386 		err = -EINVAL;
1387 		goto free_ucmd;
1388 	}
1389 
1390 	if (is_egress &&
1391 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1392 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1393 		err = -EINVAL;
1394 		goto free_ucmd;
1395 	}
1396 
1397 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1398 	if (!dst) {
1399 		err = -ENOMEM;
1400 		goto free_ucmd;
1401 	}
1402 
1403 	mutex_lock(&dev->flow_db->lock);
1404 
1405 	ft_prio = get_flow_table(dev, flow_attr,
1406 				 is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
1407 	if (IS_ERR(ft_prio)) {
1408 		err = PTR_ERR(ft_prio);
1409 		goto unlock;
1410 	}
1411 	if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
1412 		ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
1413 		if (IS_ERR(ft_prio_tx)) {
1414 			err = PTR_ERR(ft_prio_tx);
1415 			ft_prio_tx = NULL;
1416 			goto destroy_ft;
1417 		}
1418 	}
1419 
1420 	if (is_egress) {
1421 		dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1422 	} else {
1423 		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1424 		if (mqp->is_rss)
1425 			dst->tir_num = mqp->rss_qp.tirn;
1426 		else
1427 			dst->tir_num = mqp->raw_packet_qp.rq.tirn;
1428 	}
1429 
1430 	switch (flow_attr->type) {
1431 	case IB_FLOW_ATTR_NORMAL:
1432 		underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
1433 				       mqp->underlay_qpn :
1434 				       0;
1435 		handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
1436 					    underlay_qpn, ucmd);
1437 		break;
1438 	case IB_FLOW_ATTR_ALL_DEFAULT:
1439 	case IB_FLOW_ATTR_MC_DEFAULT:
1440 		handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
1441 		break;
1442 	case IB_FLOW_ATTR_SNIFFER:
1443 		handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
1444 		break;
1445 	default:
1446 		err = -EINVAL;
1447 		goto destroy_ft;
1448 	}
1449 
1450 	if (IS_ERR(handler)) {
1451 		err = PTR_ERR(handler);
1452 		handler = NULL;
1453 		goto destroy_ft;
1454 	}
1455 
1456 	mutex_unlock(&dev->flow_db->lock);
1457 	kfree(dst);
1458 	kfree(ucmd);
1459 
1460 	return &handler->ibflow;
1461 
1462 destroy_ft:
1463 	put_flow_table(dev, ft_prio, false);
1464 	if (ft_prio_tx)
1465 		put_flow_table(dev, ft_prio_tx, false);
1466 unlock:
1467 	mutex_unlock(&dev->flow_db->lock);
1468 	kfree(dst);
1469 free_ucmd:
1470 	kfree(ucmd);
1471 	return ERR_PTR(err);
1472 }
1473 
1474 static struct mlx5_ib_flow_prio *
1475 _get_flow_table(struct mlx5_ib_dev *dev,
1476 		struct mlx5_ib_flow_matcher *fs_matcher,
1477 		bool mcast)
1478 {
1479 	struct mlx5_flow_namespace *ns = NULL;
1480 	struct mlx5_ib_flow_prio *prio = NULL;
1481 	int max_table_size = 0;
1482 	bool esw_encap;
1483 	u32 flags = 0;
1484 	int priority;
1485 
1486 	if (mcast)
1487 		priority = MLX5_IB_FLOW_MCAST_PRIO;
1488 	else
1489 		priority = ib_prio_to_core_prio(fs_matcher->priority, false);
1490 
1491 	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
1492 		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
1493 	switch (fs_matcher->ns_type) {
1494 	case MLX5_FLOW_NAMESPACE_BYPASS:
1495 		max_table_size = BIT(
1496 			MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
1497 		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
1498 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1499 		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1500 					      reformat_l3_tunnel_to_l2) &&
1501 		    !esw_encap)
1502 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1503 		break;
1504 	case MLX5_FLOW_NAMESPACE_EGRESS:
1505 		max_table_size = BIT(
1506 			MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
1507 		if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
1508 		    !esw_encap)
1509 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1510 		break;
1511 	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1512 		max_table_size = BIT(
1513 			MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
1514 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
1515 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1516 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
1517 					       reformat_l3_tunnel_to_l2) &&
1518 		    esw_encap)
1519 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1520 		priority = fs_matcher->priority;
1521 		break;
1522 	case MLX5_FLOW_NAMESPACE_RDMA_RX:
1523 		max_table_size = BIT(
1524 			MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
1525 		priority = fs_matcher->priority;
1526 		break;
1527 	case MLX5_FLOW_NAMESPACE_RDMA_TX:
1528 		max_table_size = BIT(
1529 			MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
1530 		priority = fs_matcher->priority;
1531 		break;
1532 	default:
1533 		break;
1534 	}
1535 
1536 	max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
1537 
1538 	ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
1539 	if (!ns)
1540 		return ERR_PTR(-EOPNOTSUPP);
1541 
1542 	switch (fs_matcher->ns_type) {
1543 	case MLX5_FLOW_NAMESPACE_BYPASS:
1544 		prio = &dev->flow_db->prios[priority];
1545 		break;
1546 	case MLX5_FLOW_NAMESPACE_EGRESS:
1547 		prio = &dev->flow_db->egress_prios[priority];
1548 		break;
1549 	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1550 		prio = &dev->flow_db->fdb[priority];
1551 		break;
1552 	case MLX5_FLOW_NAMESPACE_RDMA_RX:
1553 		prio = &dev->flow_db->rdma_rx[priority];
1554 		break;
1555 	case MLX5_FLOW_NAMESPACE_RDMA_TX:
1556 		prio = &dev->flow_db->rdma_tx[priority];
1557 		break;
1558 	default: return ERR_PTR(-EINVAL);
1559 	}
1560 
1561 	if (!prio)
1562 		return ERR_PTR(-EINVAL);
1563 
1564 	if (prio->flow_table)
1565 		return prio;
1566 
1567 	return _get_prio(ns, prio, priority, max_table_size,
1568 			 MLX5_FS_MAX_TYPES, flags);
1569 }
1570 
1571 static struct mlx5_ib_flow_handler *
1572 _create_raw_flow_rule(struct mlx5_ib_dev *dev,
1573 		      struct mlx5_ib_flow_prio *ft_prio,
1574 		      struct mlx5_flow_destination *dst,
1575 		      struct mlx5_ib_flow_matcher  *fs_matcher,
1576 		      struct mlx5_flow_context *flow_context,
1577 		      struct mlx5_flow_act *flow_act,
1578 		      void *cmd_in, int inlen,
1579 		      int dst_num)
1580 {
1581 	struct mlx5_ib_flow_handler *handler;
1582 	struct mlx5_flow_spec *spec;
1583 	struct mlx5_flow_table *ft = ft_prio->flow_table;
1584 	int err = 0;
1585 
1586 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1587 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1588 	if (!handler || !spec) {
1589 		err = -ENOMEM;
1590 		goto free;
1591 	}
1592 
1593 	INIT_LIST_HEAD(&handler->list);
1594 
1595 	memcpy(spec->match_value, cmd_in, inlen);
1596 	memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
1597 	       fs_matcher->mask_len);
1598 	spec->match_criteria_enable = fs_matcher->match_criteria_enable;
1599 	spec->flow_context = *flow_context;
1600 
1601 	handler->rule = mlx5_add_flow_rules(ft, spec,
1602 					    flow_act, dst, dst_num);
1603 
1604 	if (IS_ERR(handler->rule)) {
1605 		err = PTR_ERR(handler->rule);
1606 		goto free;
1607 	}
1608 
1609 	ft_prio->refcount++;
1610 	handler->prio = ft_prio;
1611 	handler->dev = dev;
1612 	ft_prio->flow_table = ft;
1613 
1614 free:
1615 	if (err)
1616 		kfree(handler);
1617 	kvfree(spec);
1618 	return err ? ERR_PTR(err) : handler;
1619 }
1620 
1621 static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
1622 				void *match_v)
1623 {
1624 	void *match_c;
1625 	void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
1626 	void *dmac, *dmac_mask;
1627 	void *ipv4, *ipv4_mask;
1628 
1629 	if (!(fs_matcher->match_criteria_enable &
1630 	      (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
1631 		return false;
1632 
1633 	match_c = fs_matcher->matcher_mask.match_params;
1634 	match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
1635 					   outer_headers);
1636 	match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
1637 					   outer_headers);
1638 
1639 	dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1640 			    dmac_47_16);
1641 	dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1642 				 dmac_47_16);
1643 
1644 	if (is_multicast_ether_addr(dmac) &&
1645 	    is_multicast_ether_addr(dmac_mask))
1646 		return true;
1647 
1648 	ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1649 			    dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1650 
1651 	ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1652 				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1653 
1654 	if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
1655 	    ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
1656 		return true;
1657 
1658 	return false;
1659 }
1660 
1661 static struct mlx5_ib_flow_handler *raw_fs_rule_add(
1662 	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
1663 	struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
1664 	u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
1665 {
1666 	struct mlx5_flow_destination *dst;
1667 	struct mlx5_ib_flow_prio *ft_prio;
1668 	struct mlx5_ib_flow_handler *handler;
1669 	int dst_num = 0;
1670 	bool mcast;
1671 	int err;
1672 
1673 	if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
1674 		return ERR_PTR(-EOPNOTSUPP);
1675 
1676 	if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
1677 		return ERR_PTR(-ENOMEM);
1678 
1679 	dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
1680 	if (!dst)
1681 		return ERR_PTR(-ENOMEM);
1682 
1683 	mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
1684 	mutex_lock(&dev->flow_db->lock);
1685 
1686 	ft_prio = _get_flow_table(dev, fs_matcher, mcast);
1687 	if (IS_ERR(ft_prio)) {
1688 		err = PTR_ERR(ft_prio);
1689 		goto unlock;
1690 	}
1691 
1692 	switch (dest_type) {
1693 	case MLX5_FLOW_DESTINATION_TYPE_TIR:
1694 		dst[dst_num].type = dest_type;
1695 		dst[dst_num++].tir_num = dest_id;
1696 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1697 		break;
1698 	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
1699 		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
1700 		dst[dst_num++].ft_num = dest_id;
1701 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1702 		break;
1703 	case MLX5_FLOW_DESTINATION_TYPE_PORT:
1704 		dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1705 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1706 		break;
1707 	default:
1708 		break;
1709 	}
1710 
1711 	if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1712 		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1713 		dst[dst_num].counter_id = counter_id;
1714 		dst_num++;
1715 	}
1716 
1717 	handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
1718 					fs_matcher, flow_context, flow_act,
1719 					cmd_in, inlen, dst_num);
1720 
1721 	if (IS_ERR(handler)) {
1722 		err = PTR_ERR(handler);
1723 		goto destroy_ft;
1724 	}
1725 
1726 	mutex_unlock(&dev->flow_db->lock);
1727 	atomic_inc(&fs_matcher->usecnt);
1728 	handler->flow_matcher = fs_matcher;
1729 
1730 	kfree(dst);
1731 
1732 	return handler;
1733 
1734 destroy_ft:
1735 	put_flow_table(dev, ft_prio, false);
1736 unlock:
1737 	mutex_unlock(&dev->flow_db->lock);
1738 	kfree(dst);
1739 
1740 	return ERR_PTR(err);
1741 }
1742 
1743 static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
1744 {
1745 	u32 flags = 0;
1746 
1747 	if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
1748 		flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
1749 
1750 	return flags;
1751 }
1752 
1753 #define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED                             \
1754 	MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
1755 static struct ib_flow_action *
1756 mlx5_ib_create_flow_action_esp(struct ib_device *device,
1757 			       const struct ib_flow_action_attrs_esp *attr,
1758 			       struct uverbs_attr_bundle *attrs)
1759 {
1760 	struct mlx5_ib_dev *mdev = to_mdev(device);
1761 	struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
1762 	struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
1763 	struct mlx5_ib_flow_action *action;
1764 	u64 action_flags;
1765 	u64 flags;
1766 	int err = 0;
1767 
1768 	err = uverbs_get_flags64(
1769 		&action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
1770 		((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
1771 	if (err)
1772 		return ERR_PTR(err);
1773 
1774 	flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
1775 
1776 	/* We current only support a subset of the standard features. Only a
1777 	 * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
1778 	 * (with overlap). Full offload mode isn't supported.
1779 	 */
1780 	if (!attr->keymat || attr->replay || attr->encap ||
1781 	    attr->spi || attr->seq || attr->tfc_pad ||
1782 	    attr->hard_limit_pkts ||
1783 	    (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
1784 			     IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
1785 		return ERR_PTR(-EOPNOTSUPP);
1786 
1787 	if (attr->keymat->protocol !=
1788 	    IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
1789 		return ERR_PTR(-EOPNOTSUPP);
1790 
1791 	aes_gcm = &attr->keymat->keymat.aes_gcm;
1792 
1793 	if (aes_gcm->icv_len != 16 ||
1794 	    aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
1795 		return ERR_PTR(-EOPNOTSUPP);
1796 
1797 	action = kmalloc(sizeof(*action), GFP_KERNEL);
1798 	if (!action)
1799 		return ERR_PTR(-ENOMEM);
1800 
1801 	action->esp_aes_gcm.ib_flags = attr->flags;
1802 	memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
1803 	       sizeof(accel_attrs.keymat.aes_gcm.aes_key));
1804 	accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
1805 	memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
1806 	       sizeof(accel_attrs.keymat.aes_gcm.salt));
1807 	memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
1808 	       sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
1809 	accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
1810 	accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
1811 	accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
1812 
1813 	accel_attrs.esn = attr->esn;
1814 	if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
1815 		accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
1816 	if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
1817 		accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
1818 
1819 	if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
1820 		accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
1821 
1822 	action->esp_aes_gcm.ctx =
1823 		mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
1824 	if (IS_ERR(action->esp_aes_gcm.ctx)) {
1825 		err = PTR_ERR(action->esp_aes_gcm.ctx);
1826 		goto err_parse;
1827 	}
1828 
1829 	action->esp_aes_gcm.ib_flags = attr->flags;
1830 
1831 	return &action->ib_action;
1832 
1833 err_parse:
1834 	kfree(action);
1835 	return ERR_PTR(err);
1836 }
1837 
1838 static int
1839 mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
1840 			       const struct ib_flow_action_attrs_esp *attr,
1841 			       struct uverbs_attr_bundle *attrs)
1842 {
1843 	struct mlx5_ib_flow_action *maction = to_mflow_act(action);
1844 	struct mlx5_accel_esp_xfrm_attrs accel_attrs;
1845 	int err = 0;
1846 
1847 	if (attr->keymat || attr->replay || attr->encap ||
1848 	    attr->spi || attr->seq || attr->tfc_pad ||
1849 	    attr->hard_limit_pkts ||
1850 	    (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
1851 			     IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
1852 			     IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
1853 		return -EOPNOTSUPP;
1854 
1855 	/* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
1856 	 * be modified.
1857 	 */
1858 	if (!(maction->esp_aes_gcm.ib_flags &
1859 	      IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
1860 	    attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
1861 			   IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
1862 		return -EINVAL;
1863 
1864 	memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
1865 	       sizeof(accel_attrs));
1866 
1867 	accel_attrs.esn = attr->esn;
1868 	if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
1869 		accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
1870 	else
1871 		accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
1872 
1873 	err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
1874 					 &accel_attrs);
1875 	if (err)
1876 		return err;
1877 
1878 	maction->esp_aes_gcm.ib_flags &=
1879 		~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
1880 	maction->esp_aes_gcm.ib_flags |=
1881 		attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
1882 
1883 	return 0;
1884 }
1885 
1886 static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
1887 {
1888 	switch (maction->flow_action_raw.sub_type) {
1889 	case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
1890 		mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
1891 					   maction->flow_action_raw.modify_hdr);
1892 		break;
1893 	case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
1894 		mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
1895 					     maction->flow_action_raw.pkt_reformat);
1896 		break;
1897 	case MLX5_IB_FLOW_ACTION_DECAP:
1898 		break;
1899 	default:
1900 		break;
1901 	}
1902 }
1903 
1904 static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
1905 {
1906 	struct mlx5_ib_flow_action *maction = to_mflow_act(action);
1907 
1908 	switch (action->type) {
1909 	case IB_FLOW_ACTION_ESP:
1910 		/*
1911 		 * We only support aes_gcm by now, so we implicitly know this is
1912 		 * the underline crypto.
1913 		 */
1914 		mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
1915 		break;
1916 	case IB_FLOW_ACTION_UNSPECIFIED:
1917 		destroy_flow_action_raw(maction);
1918 		break;
1919 	default:
1920 		WARN_ON(true);
1921 		break;
1922 	}
1923 
1924 	kfree(maction);
1925 	return 0;
1926 }
1927 
1928 static int
1929 mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
1930 			     enum mlx5_flow_namespace_type *namespace)
1931 {
1932 	switch (table_type) {
1933 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
1934 		*namespace = MLX5_FLOW_NAMESPACE_BYPASS;
1935 		break;
1936 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
1937 		*namespace = MLX5_FLOW_NAMESPACE_EGRESS;
1938 		break;
1939 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
1940 		*namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
1941 		break;
1942 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
1943 		*namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
1944 		break;
1945 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
1946 		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
1947 		break;
1948 	default:
1949 		return -EINVAL;
1950 	}
1951 
1952 	return 0;
1953 }
1954 
1955 static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
1956 	[MLX5_IB_FLOW_TYPE_NORMAL] = {
1957 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1958 		.u.ptr = {
1959 			.len = sizeof(u16), /* data is priority */
1960 			.min_len = sizeof(u16),
1961 		}
1962 	},
1963 	[MLX5_IB_FLOW_TYPE_SNIFFER] = {
1964 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1965 		UVERBS_ATTR_NO_DATA(),
1966 	},
1967 	[MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
1968 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1969 		UVERBS_ATTR_NO_DATA(),
1970 	},
1971 	[MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
1972 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1973 		UVERBS_ATTR_NO_DATA(),
1974 	},
1975 };
1976 
1977 static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
1978 {
1979 	struct devx_obj *devx_obj = obj;
1980 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1981 
1982 	switch (opcode) {
1983 	case MLX5_CMD_OP_DESTROY_TIR:
1984 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1985 		*dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
1986 				    obj_id);
1987 		return true;
1988 
1989 	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
1990 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1991 		*dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
1992 				    table_id);
1993 		return true;
1994 	default:
1995 		return false;
1996 	}
1997 }
1998 
1999 static int get_dests(struct uverbs_attr_bundle *attrs,
2000 		     struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
2001 		     int *dest_type, struct ib_qp **qp, u32 *flags)
2002 {
2003 	bool dest_devx, dest_qp;
2004 	void *devx_obj;
2005 	int err;
2006 
2007 	dest_devx = uverbs_attr_is_valid(attrs,
2008 					 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
2009 	dest_qp = uverbs_attr_is_valid(attrs,
2010 				       MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
2011 
2012 	*flags = 0;
2013 	err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
2014 				 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
2015 					 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
2016 	if (err)
2017 		return err;
2018 
2019 	/* Both flags are not allowed */
2020 	if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
2021 	    *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
2022 		return -EINVAL;
2023 
2024 	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
2025 		if (dest_devx && (dest_qp || *flags))
2026 			return -EINVAL;
2027 		else if (dest_qp && *flags)
2028 			return -EINVAL;
2029 	}
2030 
2031 	/* Allow only DEVX object, drop as dest for FDB */
2032 	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
2033 	    !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
2034 		return -EINVAL;
2035 
2036 	/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
2037 	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
2038 	    ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
2039 		return -EINVAL;
2040 
2041 	*qp = NULL;
2042 	if (dest_devx) {
2043 		devx_obj =
2044 			uverbs_attr_get_obj(attrs,
2045 					    MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
2046 
2047 		/* Verify that the given DEVX object is a flow
2048 		 * steering destination.
2049 		 */
2050 		if (!is_flow_dest(devx_obj, dest_id, dest_type))
2051 			return -EINVAL;
2052 		/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
2053 		if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
2054 		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
2055 		    *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
2056 			return -EINVAL;
2057 	} else if (dest_qp) {
2058 		struct mlx5_ib_qp *mqp;
2059 
2060 		*qp = uverbs_attr_get_obj(attrs,
2061 					  MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
2062 		if (IS_ERR(*qp))
2063 			return PTR_ERR(*qp);
2064 
2065 		if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
2066 			return -EINVAL;
2067 
2068 		mqp = to_mqp(*qp);
2069 		if (mqp->is_rss)
2070 			*dest_id = mqp->rss_qp.tirn;
2071 		else
2072 			*dest_id = mqp->raw_packet_qp.rq.tirn;
2073 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
2074 	} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
2075 		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
2076 		   !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
2077 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
2078 	}
2079 
2080 	if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
2081 	    (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
2082 	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
2083 		return -EINVAL;
2084 
2085 	return 0;
2086 }
2087 
2088 static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
2089 {
2090 	struct devx_obj *devx_obj = obj;
2091 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
2092 
2093 	if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
2094 
2095 		if (offset && offset >= devx_obj->flow_counter_bulk_size)
2096 			return false;
2097 
2098 		*counter_id = MLX5_GET(dealloc_flow_counter_in,
2099 				       devx_obj->dinbox,
2100 				       flow_counter_id);
2101 		*counter_id += offset;
2102 		return true;
2103 	}
2104 
2105 	return false;
2106 }
2107 
2108 #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
2109 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
2110 	struct uverbs_attr_bundle *attrs)
2111 {
2112 	struct mlx5_flow_context flow_context = {.flow_tag =
2113 		MLX5_FS_DEFAULT_FLOW_TAG};
2114 	u32 *offset_attr, offset = 0, counter_id = 0;
2115 	int dest_id, dest_type = -1, inlen, len, ret, i;
2116 	struct mlx5_ib_flow_handler *flow_handler;
2117 	struct mlx5_ib_flow_matcher *fs_matcher;
2118 	struct ib_uobject **arr_flow_actions;
2119 	struct ib_uflow_resources *uflow_res;
2120 	struct mlx5_flow_act flow_act = {};
2121 	struct ib_qp *qp = NULL;
2122 	void *devx_obj, *cmd_in;
2123 	struct ib_uobject *uobj;
2124 	struct mlx5_ib_dev *dev;
2125 	u32 flags;
2126 
2127 	if (!capable(CAP_NET_RAW))
2128 		return -EPERM;
2129 
2130 	fs_matcher = uverbs_attr_get_obj(attrs,
2131 					 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
2132 	uobj =  uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
2133 	dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2134 
2135 	if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
2136 		return -EINVAL;
2137 
2138 	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
2139 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
2140 
2141 	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
2142 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
2143 
2144 	len = uverbs_attr_get_uobjs_arr(attrs,
2145 		MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
2146 	if (len) {
2147 		devx_obj = arr_flow_actions[0]->object;
2148 
2149 		if (uverbs_attr_is_valid(attrs,
2150 					 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
2151 
2152 			int num_offsets = uverbs_attr_ptr_get_array_size(
2153 				attrs,
2154 				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
2155 				sizeof(u32));
2156 
2157 			if (num_offsets != 1)
2158 				return -EINVAL;
2159 
2160 			offset_attr = uverbs_attr_get_alloced_ptr(
2161 				attrs,
2162 				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
2163 			offset = *offset_attr;
2164 		}
2165 
2166 		if (!is_flow_counter(devx_obj, offset, &counter_id))
2167 			return -EINVAL;
2168 
2169 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
2170 	}
2171 
2172 	cmd_in = uverbs_attr_get_alloced_ptr(
2173 		attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
2174 	inlen = uverbs_attr_get_len(attrs,
2175 				    MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
2176 
2177 	uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
2178 	if (!uflow_res)
2179 		return -ENOMEM;
2180 
2181 	len = uverbs_attr_get_uobjs_arr(attrs,
2182 		MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
2183 	for (i = 0; i < len; i++) {
2184 		struct mlx5_ib_flow_action *maction =
2185 			to_mflow_act(arr_flow_actions[i]->object);
2186 
2187 		ret = parse_flow_flow_action(maction, false, &flow_act);
2188 		if (ret)
2189 			goto err_out;
2190 		flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
2191 				   arr_flow_actions[i]->object);
2192 	}
2193 
2194 	ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
2195 			       MLX5_IB_ATTR_CREATE_FLOW_TAG);
2196 	if (!ret) {
2197 		if (flow_context.flow_tag >= BIT(24)) {
2198 			ret = -EINVAL;
2199 			goto err_out;
2200 		}
2201 		flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
2202 	}
2203 
2204 	flow_handler =
2205 		raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
2206 				counter_id, cmd_in, inlen, dest_id, dest_type);
2207 	if (IS_ERR(flow_handler)) {
2208 		ret = PTR_ERR(flow_handler);
2209 		goto err_out;
2210 	}
2211 
2212 	ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
2213 
2214 	return 0;
2215 err_out:
2216 	ib_uverbs_flow_resources_free(uflow_res);
2217 	return ret;
2218 }
2219 
2220 static int flow_matcher_cleanup(struct ib_uobject *uobject,
2221 				enum rdma_remove_reason why,
2222 				struct uverbs_attr_bundle *attrs)
2223 {
2224 	struct mlx5_ib_flow_matcher *obj = uobject->object;
2225 
2226 	if (atomic_read(&obj->usecnt))
2227 		return -EBUSY;
2228 
2229 	kfree(obj);
2230 	return 0;
2231 }
2232 
2233 static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
2234 			      struct mlx5_ib_flow_matcher *obj)
2235 {
2236 	enum mlx5_ib_uapi_flow_table_type ft_type =
2237 		MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
2238 	u32 flags;
2239 	int err;
2240 
2241 	/* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
2242 	 * users should switch to it. We leave this to not break userspace
2243 	 */
2244 	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
2245 	    uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
2246 		return -EINVAL;
2247 
2248 	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
2249 		err = uverbs_get_const(&ft_type, attrs,
2250 				       MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
2251 		if (err)
2252 			return err;
2253 
2254 		err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
2255 		if (err)
2256 			return err;
2257 
2258 		return 0;
2259 	}
2260 
2261 	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
2262 		err = uverbs_get_flags32(&flags, attrs,
2263 					 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2264 					 IB_FLOW_ATTR_FLAGS_EGRESS);
2265 		if (err)
2266 			return err;
2267 
2268 		if (flags) {
2269 			mlx5_ib_ft_type_to_namespace(
2270 				MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
2271 				&obj->ns_type);
2272 			return 0;
2273 		}
2274 	}
2275 
2276 	obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
2277 
2278 	return 0;
2279 }
2280 
2281 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
2282 	struct uverbs_attr_bundle *attrs)
2283 {
2284 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2285 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
2286 	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2287 	struct mlx5_ib_flow_matcher *obj;
2288 	int err;
2289 
2290 	obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
2291 	if (!obj)
2292 		return -ENOMEM;
2293 
2294 	obj->mask_len = uverbs_attr_get_len(
2295 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2296 	err = uverbs_copy_from(&obj->matcher_mask,
2297 			       attrs,
2298 			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2299 	if (err)
2300 		goto end;
2301 
2302 	obj->flow_type = uverbs_attr_get_enum_id(
2303 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2304 
2305 	if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
2306 		err = uverbs_copy_from(&obj->priority,
2307 				       attrs,
2308 				       MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2309 		if (err)
2310 			goto end;
2311 	}
2312 
2313 	err = uverbs_copy_from(&obj->match_criteria_enable,
2314 			       attrs,
2315 			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
2316 	if (err)
2317 		goto end;
2318 
2319 	err = mlx5_ib_matcher_ns(attrs, obj);
2320 	if (err)
2321 		goto end;
2322 
2323 	if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
2324 	    mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
2325 		err = -EINVAL;
2326 		goto end;
2327 	}
2328 
2329 	uobj->object = obj;
2330 	obj->mdev = dev->mdev;
2331 	atomic_set(&obj->usecnt, 0);
2332 	return 0;
2333 
2334 end:
2335 	kfree(obj);
2336 	return err;
2337 }
2338 
2339 static struct ib_flow_action *
2340 mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
2341 			     enum mlx5_ib_uapi_flow_table_type ft_type,
2342 			     u8 num_actions, void *in)
2343 {
2344 	enum mlx5_flow_namespace_type namespace;
2345 	struct mlx5_ib_flow_action *maction;
2346 	int ret;
2347 
2348 	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2349 	if (ret)
2350 		return ERR_PTR(-EINVAL);
2351 
2352 	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2353 	if (!maction)
2354 		return ERR_PTR(-ENOMEM);
2355 
2356 	maction->flow_action_raw.modify_hdr =
2357 		mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
2358 
2359 	if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
2360 		ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
2361 		kfree(maction);
2362 		return ERR_PTR(ret);
2363 	}
2364 	maction->flow_action_raw.sub_type =
2365 		MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
2366 	maction->flow_action_raw.dev = dev;
2367 
2368 	return &maction->ib_action;
2369 }
2370 
2371 static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
2372 {
2373 	return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
2374 					 max_modify_header_actions) ||
2375 	       MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
2376 					 max_modify_header_actions) ||
2377 	       MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
2378 					 max_modify_header_actions);
2379 }
2380 
2381 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
2382 	struct uverbs_attr_bundle *attrs)
2383 {
2384 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2385 		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
2386 	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2387 	enum mlx5_ib_uapi_flow_table_type ft_type;
2388 	struct ib_flow_action *action;
2389 	int num_actions;
2390 	void *in;
2391 	int ret;
2392 
2393 	if (!mlx5_ib_modify_header_supported(mdev))
2394 		return -EOPNOTSUPP;
2395 
2396 	in = uverbs_attr_get_alloced_ptr(attrs,
2397 		MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
2398 
2399 	num_actions = uverbs_attr_ptr_get_array_size(
2400 		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2401 		MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
2402 	if (num_actions < 0)
2403 		return num_actions;
2404 
2405 	ret = uverbs_get_const(&ft_type, attrs,
2406 			       MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
2407 	if (ret)
2408 		return ret;
2409 	action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
2410 	if (IS_ERR(action))
2411 		return PTR_ERR(action);
2412 
2413 	uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
2414 				       IB_FLOW_ACTION_UNSPECIFIED);
2415 
2416 	return 0;
2417 }
2418 
2419 static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
2420 						      u8 packet_reformat_type,
2421 						      u8 ft_type)
2422 {
2423 	switch (packet_reformat_type) {
2424 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2425 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2426 			return MLX5_CAP_FLOWTABLE(ibdev->mdev,
2427 						  encap_general_header);
2428 		break;
2429 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2430 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2431 			return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
2432 				reformat_l2_to_l3_tunnel);
2433 		break;
2434 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2435 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2436 			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
2437 				reformat_l3_tunnel_to_l2);
2438 		break;
2439 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
2440 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2441 			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
2442 		break;
2443 	default:
2444 		break;
2445 	}
2446 
2447 	return false;
2448 }
2449 
2450 static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
2451 {
2452 	switch (dv_prt) {
2453 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2454 		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
2455 		break;
2456 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2457 		*prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
2458 		break;
2459 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2460 		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
2461 		break;
2462 	default:
2463 		return -EINVAL;
2464 	}
2465 
2466 	return 0;
2467 }
2468 
2469 static int mlx5_ib_flow_action_create_packet_reformat_ctx(
2470 	struct mlx5_ib_dev *dev,
2471 	struct mlx5_ib_flow_action *maction,
2472 	u8 ft_type, u8 dv_prt,
2473 	void *in, size_t len)
2474 {
2475 	struct mlx5_pkt_reformat_params reformat_params;
2476 	enum mlx5_flow_namespace_type namespace;
2477 	u8 prm_prt;
2478 	int ret;
2479 
2480 	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2481 	if (ret)
2482 		return ret;
2483 
2484 	ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
2485 	if (ret)
2486 		return ret;
2487 
2488 	memset(&reformat_params, 0, sizeof(reformat_params));
2489 	reformat_params.type = prm_prt;
2490 	reformat_params.size = len;
2491 	reformat_params.data = in;
2492 	maction->flow_action_raw.pkt_reformat =
2493 		mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
2494 					   namespace);
2495 	if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
2496 		ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
2497 		return ret;
2498 	}
2499 
2500 	maction->flow_action_raw.sub_type =
2501 		MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
2502 	maction->flow_action_raw.dev = dev;
2503 
2504 	return 0;
2505 }
2506 
2507 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
2508 	struct uverbs_attr_bundle *attrs)
2509 {
2510 	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
2511 		MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
2512 	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2513 	enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
2514 	enum mlx5_ib_uapi_flow_table_type ft_type;
2515 	struct mlx5_ib_flow_action *maction;
2516 	int ret;
2517 
2518 	ret = uverbs_get_const(&ft_type, attrs,
2519 			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
2520 	if (ret)
2521 		return ret;
2522 
2523 	ret = uverbs_get_const(&dv_prt, attrs,
2524 			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
2525 	if (ret)
2526 		return ret;
2527 
2528 	if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
2529 		return -EOPNOTSUPP;
2530 
2531 	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2532 	if (!maction)
2533 		return -ENOMEM;
2534 
2535 	if (dv_prt ==
2536 	    MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
2537 		maction->flow_action_raw.sub_type =
2538 			MLX5_IB_FLOW_ACTION_DECAP;
2539 		maction->flow_action_raw.dev = mdev;
2540 	} else {
2541 		void *in;
2542 		int len;
2543 
2544 		in = uverbs_attr_get_alloced_ptr(attrs,
2545 			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2546 		if (IS_ERR(in)) {
2547 			ret = PTR_ERR(in);
2548 			goto free_maction;
2549 		}
2550 
2551 		len = uverbs_attr_get_len(attrs,
2552 			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2553 
2554 		ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
2555 			maction, ft_type, dv_prt, in, len);
2556 		if (ret)
2557 			goto free_maction;
2558 	}
2559 
2560 	uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
2561 				       IB_FLOW_ACTION_UNSPECIFIED);
2562 	return 0;
2563 
2564 free_maction:
2565 	kfree(maction);
2566 	return ret;
2567 }
2568 
2569 DECLARE_UVERBS_NAMED_METHOD(
2570 	MLX5_IB_METHOD_CREATE_FLOW,
2571 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2572 			UVERBS_OBJECT_FLOW,
2573 			UVERBS_ACCESS_NEW,
2574 			UA_MANDATORY),
2575 	UVERBS_ATTR_PTR_IN(
2576 		MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
2577 		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2578 		UA_MANDATORY,
2579 		UA_ALLOC_AND_COPY),
2580 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
2581 			MLX5_IB_OBJECT_FLOW_MATCHER,
2582 			UVERBS_ACCESS_READ,
2583 			UA_MANDATORY),
2584 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
2585 			UVERBS_OBJECT_QP,
2586 			UVERBS_ACCESS_READ),
2587 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
2588 			MLX5_IB_OBJECT_DEVX_OBJ,
2589 			UVERBS_ACCESS_READ),
2590 	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
2591 			     UVERBS_OBJECT_FLOW_ACTION,
2592 			     UVERBS_ACCESS_READ, 1,
2593 			     MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
2594 			     UA_OPTIONAL),
2595 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
2596 			   UVERBS_ATTR_TYPE(u32),
2597 			   UA_OPTIONAL),
2598 	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
2599 			     MLX5_IB_OBJECT_DEVX_OBJ,
2600 			     UVERBS_ACCESS_READ, 1, 1,
2601 			     UA_OPTIONAL),
2602 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
2603 			   UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
2604 			   UA_OPTIONAL,
2605 			   UA_ALLOC_AND_COPY),
2606 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
2607 			     enum mlx5_ib_create_flow_flags,
2608 			     UA_OPTIONAL));
2609 
2610 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2611 	MLX5_IB_METHOD_DESTROY_FLOW,
2612 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2613 			UVERBS_OBJECT_FLOW,
2614 			UVERBS_ACCESS_DESTROY,
2615 			UA_MANDATORY));
2616 
2617 ADD_UVERBS_METHODS(mlx5_ib_fs,
2618 		   UVERBS_OBJECT_FLOW,
2619 		   &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
2620 		   &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
2621 
2622 DECLARE_UVERBS_NAMED_METHOD(
2623 	MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
2624 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
2625 			UVERBS_OBJECT_FLOW_ACTION,
2626 			UVERBS_ACCESS_NEW,
2627 			UA_MANDATORY),
2628 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2629 			   UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
2630 				   set_add_copy_action_in_auto)),
2631 			   UA_MANDATORY,
2632 			   UA_ALLOC_AND_COPY),
2633 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
2634 			     enum mlx5_ib_uapi_flow_table_type,
2635 			     UA_MANDATORY));
2636 
2637 DECLARE_UVERBS_NAMED_METHOD(
2638 	MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
2639 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
2640 			UVERBS_OBJECT_FLOW_ACTION,
2641 			UVERBS_ACCESS_NEW,
2642 			UA_MANDATORY),
2643 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
2644 			   UVERBS_ATTR_MIN_SIZE(1),
2645 			   UA_ALLOC_AND_COPY,
2646 			   UA_OPTIONAL),
2647 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
2648 			     enum mlx5_ib_uapi_flow_action_packet_reformat_type,
2649 			     UA_MANDATORY),
2650 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
2651 			     enum mlx5_ib_uapi_flow_table_type,
2652 			     UA_MANDATORY));
2653 
2654 ADD_UVERBS_METHODS(
2655 	mlx5_ib_flow_actions,
2656 	UVERBS_OBJECT_FLOW_ACTION,
2657 	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
2658 	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
2659 
2660 DECLARE_UVERBS_NAMED_METHOD(
2661 	MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
2662 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
2663 			MLX5_IB_OBJECT_FLOW_MATCHER,
2664 			UVERBS_ACCESS_NEW,
2665 			UA_MANDATORY),
2666 	UVERBS_ATTR_PTR_IN(
2667 		MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
2668 		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2669 		UA_MANDATORY),
2670 	UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
2671 			    mlx5_ib_flow_type,
2672 			    UA_MANDATORY),
2673 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
2674 			   UVERBS_ATTR_TYPE(u8),
2675 			   UA_MANDATORY),
2676 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2677 			     enum ib_flow_flags,
2678 			     UA_OPTIONAL),
2679 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
2680 			     enum mlx5_ib_uapi_flow_table_type,
2681 			     UA_OPTIONAL));
2682 
2683 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2684 	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
2685 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
2686 			MLX5_IB_OBJECT_FLOW_MATCHER,
2687 			UVERBS_ACCESS_DESTROY,
2688 			UA_MANDATORY));
2689 
2690 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
2691 			    UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
2692 			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
2693 			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
2694 
2695 const struct uapi_definition mlx5_ib_flow_defs[] = {
2696 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2697 		MLX5_IB_OBJECT_FLOW_MATCHER),
2698 	UAPI_DEF_CHAIN_OBJ_TREE(
2699 		UVERBS_OBJECT_FLOW,
2700 		&mlx5_ib_fs),
2701 	UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
2702 				&mlx5_ib_flow_actions),
2703 	{},
2704 };
2705 
2706 static const struct ib_device_ops flow_ops = {
2707 	.create_flow = mlx5_ib_create_flow,
2708 	.destroy_flow = mlx5_ib_destroy_flow,
2709 	.destroy_flow_action = mlx5_ib_destroy_flow_action,
2710 };
2711 
2712 static const struct ib_device_ops flow_ipsec_ops = {
2713 	.create_flow_action_esp = mlx5_ib_create_flow_action_esp,
2714 	.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
2715 };
2716 
2717 int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
2718 {
2719 	dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
2720 
2721 	if (!dev->flow_db)
2722 		return -ENOMEM;
2723 
2724 	mutex_init(&dev->flow_db->lock);
2725 
2726 	ib_set_device_ops(&dev->ib_dev, &flow_ops);
2727 	if (mlx5_accel_ipsec_device_caps(dev->mdev) &
2728 	    MLX5_ACCEL_IPSEC_CAP_DEVICE)
2729 		ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops);
2730 
2731 	return 0;
2732 }
2733