xref: /openbmc/linux/drivers/infiniband/hw/mlx5/fs.c (revision 1ed1f6be)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
4  */
5 
6 #include <rdma/ib_user_verbs.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/uverbs_types.h>
9 #include <rdma/uverbs_ioctl.h>
10 #include <rdma/uverbs_std_types.h>
11 #include <rdma/mlx5_user_ioctl_cmds.h>
12 #include <rdma/mlx5_user_ioctl_verbs.h>
13 #include <rdma/ib_hdrs.h>
14 #include <rdma/ib_umem.h>
15 #include <linux/mlx5/driver.h>
16 #include <linux/mlx5/fs.h>
17 #include <linux/mlx5/fs_helpers.h>
18 #include <linux/mlx5/eswitch.h>
19 #include <net/inet_ecn.h>
20 #include "mlx5_ib.h"
21 #include "counters.h"
22 #include "devx.h"
23 #include "fs.h"
24 
25 #define UVERBS_MODULE_NAME mlx5_ib
26 #include <rdma/uverbs_named_ioctl.h>
27 
28 enum {
29 	MATCH_CRITERIA_ENABLE_OUTER_BIT,
30 	MATCH_CRITERIA_ENABLE_MISC_BIT,
31 	MATCH_CRITERIA_ENABLE_INNER_BIT,
32 	MATCH_CRITERIA_ENABLE_MISC2_BIT
33 };
34 
35 #define HEADER_IS_ZERO(match_criteria, headers)			           \
36 	!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
37 		    0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
38 
39 static u8 get_match_criteria_enable(u32 *match_criteria)
40 {
41 	u8 match_criteria_enable;
42 
43 	match_criteria_enable =
44 		(!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
45 		MATCH_CRITERIA_ENABLE_OUTER_BIT;
46 	match_criteria_enable |=
47 		(!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
48 		MATCH_CRITERIA_ENABLE_MISC_BIT;
49 	match_criteria_enable |=
50 		(!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
51 		MATCH_CRITERIA_ENABLE_INNER_BIT;
52 	match_criteria_enable |=
53 		(!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
54 		MATCH_CRITERIA_ENABLE_MISC2_BIT;
55 
56 	return match_criteria_enable;
57 }
58 
59 static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
60 {
61 	u8 entry_mask;
62 	u8 entry_val;
63 	int err = 0;
64 
65 	if (!mask)
66 		goto out;
67 
68 	entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
69 			      ip_protocol);
70 	entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
71 			     ip_protocol);
72 	if (!entry_mask) {
73 		MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
74 		MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
75 		goto out;
76 	}
77 	/* Don't override existing ip protocol */
78 	if (mask != entry_mask || val != entry_val)
79 		err = -EINVAL;
80 out:
81 	return err;
82 }
83 
84 static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
85 			   bool inner)
86 {
87 	if (inner) {
88 		MLX5_SET(fte_match_set_misc,
89 			 misc_c, inner_ipv6_flow_label, mask);
90 		MLX5_SET(fte_match_set_misc,
91 			 misc_v, inner_ipv6_flow_label, val);
92 	} else {
93 		MLX5_SET(fte_match_set_misc,
94 			 misc_c, outer_ipv6_flow_label, mask);
95 		MLX5_SET(fte_match_set_misc,
96 			 misc_v, outer_ipv6_flow_label, val);
97 	}
98 }
99 
100 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
101 {
102 	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
103 	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
104 	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
105 	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
106 }
107 
108 static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
109 {
110 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
111 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
112 		return -EOPNOTSUPP;
113 
114 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
115 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
116 		return -EOPNOTSUPP;
117 
118 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
119 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
120 		return -EOPNOTSUPP;
121 
122 	if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
123 	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
124 		return -EOPNOTSUPP;
125 
126 	return 0;
127 }
128 
129 #define LAST_ETH_FIELD vlan_tag
130 #define LAST_IB_FIELD sl
131 #define LAST_IPV4_FIELD tos
132 #define LAST_IPV6_FIELD traffic_class
133 #define LAST_TCP_UDP_FIELD src_port
134 #define LAST_TUNNEL_FIELD tunnel_id
135 #define LAST_FLOW_TAG_FIELD tag_id
136 #define LAST_DROP_FIELD size
137 #define LAST_COUNTERS_FIELD counters
138 
139 /* Field is the last supported field */
140 #define FIELDS_NOT_SUPPORTED(filter, field)                                    \
141 	memchr_inv((void *)&filter.field + sizeof(filter.field), 0,            \
142 		   sizeof(filter) - offsetofend(typeof(filter), field))
143 
144 int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
145 			   bool is_egress,
146 			   struct mlx5_flow_act *action)
147 {
148 
149 	switch (maction->ib_action.type) {
150 	case IB_FLOW_ACTION_UNSPECIFIED:
151 		if (maction->flow_action_raw.sub_type ==
152 		    MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
153 			if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
154 				return -EINVAL;
155 			action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
156 			action->modify_hdr =
157 				maction->flow_action_raw.modify_hdr;
158 			return 0;
159 		}
160 		if (maction->flow_action_raw.sub_type ==
161 		    MLX5_IB_FLOW_ACTION_DECAP) {
162 			if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
163 				return -EINVAL;
164 			action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
165 			return 0;
166 		}
167 		if (maction->flow_action_raw.sub_type ==
168 		    MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
169 			if (action->action &
170 			    MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
171 				return -EINVAL;
172 			action->action |=
173 				MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
174 			action->pkt_reformat =
175 				maction->flow_action_raw.pkt_reformat;
176 			return 0;
177 		}
178 		fallthrough;
179 	default:
180 		return -EOPNOTSUPP;
181 	}
182 }
183 
184 static int parse_flow_attr(struct mlx5_core_dev *mdev,
185 			   struct mlx5_flow_spec *spec,
186 			   const union ib_flow_spec *ib_spec,
187 			   const struct ib_flow_attr *flow_attr,
188 			   struct mlx5_flow_act *action, u32 prev_type)
189 {
190 	struct mlx5_flow_context *flow_context = &spec->flow_context;
191 	u32 *match_c = spec->match_criteria;
192 	u32 *match_v = spec->match_value;
193 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
194 					   misc_parameters);
195 	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
196 					   misc_parameters);
197 	void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
198 					    misc_parameters_2);
199 	void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
200 					    misc_parameters_2);
201 	void *headers_c;
202 	void *headers_v;
203 	int match_ipv;
204 	int ret;
205 
206 	if (ib_spec->type & IB_FLOW_SPEC_INNER) {
207 		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
208 					 inner_headers);
209 		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
210 					 inner_headers);
211 		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
212 					ft_field_support.inner_ip_version);
213 	} else {
214 		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
215 					 outer_headers);
216 		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
217 					 outer_headers);
218 		match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
219 					ft_field_support.outer_ip_version);
220 	}
221 
222 	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
223 	case IB_FLOW_SPEC_ETH:
224 		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
225 			return -EOPNOTSUPP;
226 
227 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
228 					     dmac_47_16),
229 				ib_spec->eth.mask.dst_mac);
230 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
231 					     dmac_47_16),
232 				ib_spec->eth.val.dst_mac);
233 
234 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
235 					     smac_47_16),
236 				ib_spec->eth.mask.src_mac);
237 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
238 					     smac_47_16),
239 				ib_spec->eth.val.src_mac);
240 
241 		if (ib_spec->eth.mask.vlan_tag) {
242 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
243 				 cvlan_tag, 1);
244 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
245 				 cvlan_tag, 1);
246 
247 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
248 				 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
249 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
250 				 first_vid, ntohs(ib_spec->eth.val.vlan_tag));
251 
252 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
253 				 first_cfi,
254 				 ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
255 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
256 				 first_cfi,
257 				 ntohs(ib_spec->eth.val.vlan_tag) >> 12);
258 
259 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
260 				 first_prio,
261 				 ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
262 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
263 				 first_prio,
264 				 ntohs(ib_spec->eth.val.vlan_tag) >> 13);
265 		}
266 		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
267 			 ethertype, ntohs(ib_spec->eth.mask.ether_type));
268 		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
269 			 ethertype, ntohs(ib_spec->eth.val.ether_type));
270 		break;
271 	case IB_FLOW_SPEC_IPV4:
272 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
273 			return -EOPNOTSUPP;
274 
275 		if (match_ipv) {
276 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
277 				 ip_version, 0xf);
278 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
279 				 ip_version, MLX5_FS_IPV4_VERSION);
280 		} else {
281 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
282 				 ethertype, 0xffff);
283 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
284 				 ethertype, ETH_P_IP);
285 		}
286 
287 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
288 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
289 		       &ib_spec->ipv4.mask.src_ip,
290 		       sizeof(ib_spec->ipv4.mask.src_ip));
291 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
292 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
293 		       &ib_spec->ipv4.val.src_ip,
294 		       sizeof(ib_spec->ipv4.val.src_ip));
295 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
296 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
297 		       &ib_spec->ipv4.mask.dst_ip,
298 		       sizeof(ib_spec->ipv4.mask.dst_ip));
299 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
300 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
301 		       &ib_spec->ipv4.val.dst_ip,
302 		       sizeof(ib_spec->ipv4.val.dst_ip));
303 
304 		set_tos(headers_c, headers_v,
305 			ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
306 
307 		if (set_proto(headers_c, headers_v,
308 			      ib_spec->ipv4.mask.proto,
309 			      ib_spec->ipv4.val.proto))
310 			return -EINVAL;
311 		break;
312 	case IB_FLOW_SPEC_IPV6:
313 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
314 			return -EOPNOTSUPP;
315 
316 		if (match_ipv) {
317 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
318 				 ip_version, 0xf);
319 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
320 				 ip_version, MLX5_FS_IPV6_VERSION);
321 		} else {
322 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
323 				 ethertype, 0xffff);
324 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
325 				 ethertype, ETH_P_IPV6);
326 		}
327 
328 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
329 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
330 		       &ib_spec->ipv6.mask.src_ip,
331 		       sizeof(ib_spec->ipv6.mask.src_ip));
332 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
333 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
334 		       &ib_spec->ipv6.val.src_ip,
335 		       sizeof(ib_spec->ipv6.val.src_ip));
336 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
337 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
338 		       &ib_spec->ipv6.mask.dst_ip,
339 		       sizeof(ib_spec->ipv6.mask.dst_ip));
340 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
341 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
342 		       &ib_spec->ipv6.val.dst_ip,
343 		       sizeof(ib_spec->ipv6.val.dst_ip));
344 
345 		set_tos(headers_c, headers_v,
346 			ib_spec->ipv6.mask.traffic_class,
347 			ib_spec->ipv6.val.traffic_class);
348 
349 		if (set_proto(headers_c, headers_v,
350 			      ib_spec->ipv6.mask.next_hdr,
351 			      ib_spec->ipv6.val.next_hdr))
352 			return -EINVAL;
353 
354 		set_flow_label(misc_params_c, misc_params_v,
355 			       ntohl(ib_spec->ipv6.mask.flow_label),
356 			       ntohl(ib_spec->ipv6.val.flow_label),
357 			       ib_spec->type & IB_FLOW_SPEC_INNER);
358 		break;
359 	case IB_FLOW_SPEC_ESP:
360 		return -EOPNOTSUPP;
361 	case IB_FLOW_SPEC_TCP:
362 		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
363 					 LAST_TCP_UDP_FIELD))
364 			return -EOPNOTSUPP;
365 
366 		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
367 			return -EINVAL;
368 
369 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
370 			 ntohs(ib_spec->tcp_udp.mask.src_port));
371 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
372 			 ntohs(ib_spec->tcp_udp.val.src_port));
373 
374 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
375 			 ntohs(ib_spec->tcp_udp.mask.dst_port));
376 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
377 			 ntohs(ib_spec->tcp_udp.val.dst_port));
378 		break;
379 	case IB_FLOW_SPEC_UDP:
380 		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
381 					 LAST_TCP_UDP_FIELD))
382 			return -EOPNOTSUPP;
383 
384 		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
385 			return -EINVAL;
386 
387 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
388 			 ntohs(ib_spec->tcp_udp.mask.src_port));
389 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
390 			 ntohs(ib_spec->tcp_udp.val.src_port));
391 
392 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
393 			 ntohs(ib_spec->tcp_udp.mask.dst_port));
394 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
395 			 ntohs(ib_spec->tcp_udp.val.dst_port));
396 		break;
397 	case IB_FLOW_SPEC_GRE:
398 		if (ib_spec->gre.mask.c_ks_res0_ver)
399 			return -EOPNOTSUPP;
400 
401 		if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
402 			return -EINVAL;
403 
404 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
405 			 0xff);
406 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
407 			 IPPROTO_GRE);
408 
409 		MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
410 			 ntohs(ib_spec->gre.mask.protocol));
411 		MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
412 			 ntohs(ib_spec->gre.val.protocol));
413 
414 		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
415 				    gre_key.nvgre.hi),
416 		       &ib_spec->gre.mask.key,
417 		       sizeof(ib_spec->gre.mask.key));
418 		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
419 				    gre_key.nvgre.hi),
420 		       &ib_spec->gre.val.key,
421 		       sizeof(ib_spec->gre.val.key));
422 		break;
423 	case IB_FLOW_SPEC_MPLS:
424 		switch (prev_type) {
425 		case IB_FLOW_SPEC_UDP:
426 			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
427 						   ft_field_support.outer_first_mpls_over_udp),
428 						   &ib_spec->mpls.mask.tag))
429 				return -EOPNOTSUPP;
430 
431 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
432 					    outer_first_mpls_over_udp),
433 			       &ib_spec->mpls.val.tag,
434 			       sizeof(ib_spec->mpls.val.tag));
435 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
436 					    outer_first_mpls_over_udp),
437 			       &ib_spec->mpls.mask.tag,
438 			       sizeof(ib_spec->mpls.mask.tag));
439 			break;
440 		case IB_FLOW_SPEC_GRE:
441 			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
442 						   ft_field_support.outer_first_mpls_over_gre),
443 						   &ib_spec->mpls.mask.tag))
444 				return -EOPNOTSUPP;
445 
446 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
447 					    outer_first_mpls_over_gre),
448 			       &ib_spec->mpls.val.tag,
449 			       sizeof(ib_spec->mpls.val.tag));
450 			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
451 					    outer_first_mpls_over_gre),
452 			       &ib_spec->mpls.mask.tag,
453 			       sizeof(ib_spec->mpls.mask.tag));
454 			break;
455 		default:
456 			if (ib_spec->type & IB_FLOW_SPEC_INNER) {
457 				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
458 							   ft_field_support.inner_first_mpls),
459 							   &ib_spec->mpls.mask.tag))
460 					return -EOPNOTSUPP;
461 
462 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
463 						    inner_first_mpls),
464 				       &ib_spec->mpls.val.tag,
465 				       sizeof(ib_spec->mpls.val.tag));
466 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
467 						    inner_first_mpls),
468 				       &ib_spec->mpls.mask.tag,
469 				       sizeof(ib_spec->mpls.mask.tag));
470 			} else {
471 				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
472 							   ft_field_support.outer_first_mpls),
473 							   &ib_spec->mpls.mask.tag))
474 					return -EOPNOTSUPP;
475 
476 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
477 						    outer_first_mpls),
478 				       &ib_spec->mpls.val.tag,
479 				       sizeof(ib_spec->mpls.val.tag));
480 				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
481 						    outer_first_mpls),
482 				       &ib_spec->mpls.mask.tag,
483 				       sizeof(ib_spec->mpls.mask.tag));
484 			}
485 		}
486 		break;
487 	case IB_FLOW_SPEC_VXLAN_TUNNEL:
488 		if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
489 					 LAST_TUNNEL_FIELD))
490 			return -EOPNOTSUPP;
491 
492 		MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
493 			 ntohl(ib_spec->tunnel.mask.tunnel_id));
494 		MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
495 			 ntohl(ib_spec->tunnel.val.tunnel_id));
496 		break;
497 	case IB_FLOW_SPEC_ACTION_TAG:
498 		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
499 					 LAST_FLOW_TAG_FIELD))
500 			return -EOPNOTSUPP;
501 		if (ib_spec->flow_tag.tag_id >= BIT(24))
502 			return -EINVAL;
503 
504 		flow_context->flow_tag = ib_spec->flow_tag.tag_id;
505 		flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
506 		break;
507 	case IB_FLOW_SPEC_ACTION_DROP:
508 		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
509 					 LAST_DROP_FIELD))
510 			return -EOPNOTSUPP;
511 		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
512 		break;
513 	case IB_FLOW_SPEC_ACTION_HANDLE:
514 		ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
515 			flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
516 		if (ret)
517 			return ret;
518 		break;
519 	case IB_FLOW_SPEC_ACTION_COUNT:
520 		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
521 					 LAST_COUNTERS_FIELD))
522 			return -EOPNOTSUPP;
523 
524 		/* for now support only one counters spec per flow */
525 		if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
526 			return -EINVAL;
527 
528 		action->counters = ib_spec->flow_count.counters;
529 		action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
530 		break;
531 	default:
532 		return -EINVAL;
533 	}
534 
535 	return 0;
536 }
537 
538 /* If a flow could catch both multicast and unicast packets,
539  * it won't fall into the multicast flow steering table and this rule
540  * could steal other multicast packets.
541  */
542 static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
543 {
544 	union ib_flow_spec *flow_spec;
545 
546 	if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
547 	    ib_attr->num_of_specs < 1)
548 		return false;
549 
550 	flow_spec = (union ib_flow_spec *)(ib_attr + 1);
551 	if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
552 		struct ib_flow_spec_ipv4 *ipv4_spec;
553 
554 		ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
555 		if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
556 			return true;
557 
558 		return false;
559 	}
560 
561 	if (flow_spec->type == IB_FLOW_SPEC_ETH) {
562 		struct ib_flow_spec_eth *eth_spec;
563 
564 		eth_spec = (struct ib_flow_spec_eth *)flow_spec;
565 		return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
566 		       is_multicast_ether_addr(eth_spec->val.dst_mac);
567 	}
568 
569 	return false;
570 }
571 
572 static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
573 			       const struct ib_flow_attr *flow_attr,
574 			       bool check_inner)
575 {
576 	union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
577 	int match_ipv = check_inner ?
578 			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
579 					ft_field_support.inner_ip_version) :
580 			MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
581 					ft_field_support.outer_ip_version);
582 	int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
583 	bool ipv4_spec_valid, ipv6_spec_valid;
584 	unsigned int ip_spec_type = 0;
585 	bool has_ethertype = false;
586 	unsigned int spec_index;
587 	bool mask_valid = true;
588 	u16 eth_type = 0;
589 	bool type_valid;
590 
591 	/* Validate that ethertype is correct */
592 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
593 		if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
594 		    ib_spec->eth.mask.ether_type) {
595 			mask_valid = (ib_spec->eth.mask.ether_type ==
596 				      htons(0xffff));
597 			has_ethertype = true;
598 			eth_type = ntohs(ib_spec->eth.val.ether_type);
599 		} else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
600 			   (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
601 			ip_spec_type = ib_spec->type;
602 		}
603 		ib_spec = (void *)ib_spec + ib_spec->size;
604 	}
605 
606 	type_valid = (!has_ethertype) || (!ip_spec_type);
607 	if (!type_valid && mask_valid) {
608 		ipv4_spec_valid = (eth_type == ETH_P_IP) &&
609 			(ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
610 		ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
611 			(ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
612 
613 		type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
614 			     (((eth_type == ETH_P_MPLS_UC) ||
615 			       (eth_type == ETH_P_MPLS_MC)) && match_ipv);
616 	}
617 
618 	return type_valid;
619 }
620 
621 static bool is_valid_attr(struct mlx5_core_dev *mdev,
622 			  const struct ib_flow_attr *flow_attr)
623 {
624 	return is_valid_ethertype(mdev, flow_attr, false) &&
625 	       is_valid_ethertype(mdev, flow_attr, true);
626 }
627 
628 static void put_flow_table(struct mlx5_ib_dev *dev,
629 			   struct mlx5_ib_flow_prio *prio, bool ft_added)
630 {
631 	prio->refcount -= !!ft_added;
632 	if (!prio->refcount) {
633 		mlx5_destroy_flow_table(prio->flow_table);
634 		prio->flow_table = NULL;
635 	}
636 }
637 
638 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
639 {
640 	struct mlx5_ib_flow_handler *handler = container_of(flow_id,
641 							  struct mlx5_ib_flow_handler,
642 							  ibflow);
643 	struct mlx5_ib_flow_handler *iter, *tmp;
644 	struct mlx5_ib_dev *dev = handler->dev;
645 
646 	mutex_lock(&dev->flow_db->lock);
647 
648 	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
649 		mlx5_del_flow_rules(iter->rule);
650 		put_flow_table(dev, iter->prio, true);
651 		list_del(&iter->list);
652 		kfree(iter);
653 	}
654 
655 	mlx5_del_flow_rules(handler->rule);
656 	put_flow_table(dev, handler->prio, true);
657 	mlx5_ib_counters_clear_description(handler->ibcounters);
658 	mutex_unlock(&dev->flow_db->lock);
659 	if (handler->flow_matcher)
660 		atomic_dec(&handler->flow_matcher->usecnt);
661 	kfree(handler);
662 
663 	return 0;
664 }
665 
666 static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
667 {
668 	priority *= 2;
669 	if (!dont_trap)
670 		priority++;
671 	return priority;
672 }
673 
674 enum flow_table_type {
675 	MLX5_IB_FT_RX,
676 	MLX5_IB_FT_TX
677 };
678 
679 #define MLX5_FS_MAX_TYPES	 6
680 #define MLX5_FS_MAX_ENTRIES	 BIT(16)
681 
682 static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
683 {
684 	struct mlx5_ib_dev *dev = to_mdev(device);
685 
686 	return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
687 }
688 
689 static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
690 					   struct mlx5_flow_namespace *ns,
691 					   struct mlx5_ib_flow_prio *prio,
692 					   int priority,
693 					   int num_entries, int num_groups,
694 					   u32 flags)
695 {
696 	struct mlx5_flow_table_attr ft_attr = {};
697 	struct mlx5_flow_table *ft;
698 
699 	if (mlx5_ib_shared_ft_allowed(&dev->ib_dev))
700 		ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
701 	ft_attr.prio = priority;
702 	ft_attr.max_fte = num_entries;
703 	ft_attr.flags = flags;
704 	ft_attr.autogroup.max_num_groups = num_groups;
705 	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
706 	if (IS_ERR(ft))
707 		return ERR_CAST(ft);
708 
709 	prio->flow_table = ft;
710 	prio->refcount = 0;
711 	return prio;
712 }
713 
714 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
715 						struct ib_flow_attr *flow_attr,
716 						enum flow_table_type ft_type)
717 {
718 	bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
719 	struct mlx5_flow_namespace *ns = NULL;
720 	enum mlx5_flow_namespace_type fn_type;
721 	struct mlx5_ib_flow_prio *prio;
722 	struct mlx5_flow_table *ft;
723 	int max_table_size;
724 	int num_entries;
725 	int num_groups;
726 	bool esw_encap;
727 	u32 flags = 0;
728 	int priority;
729 
730 	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
731 						       log_max_ft_size));
732 	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
733 		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
734 	switch (flow_attr->type) {
735 	case IB_FLOW_ATTR_NORMAL:
736 		if (flow_is_multicast_only(flow_attr) && !dont_trap)
737 			priority = MLX5_IB_FLOW_MCAST_PRIO;
738 		else
739 			priority = ib_prio_to_core_prio(flow_attr->priority,
740 							dont_trap);
741 		if (ft_type == MLX5_IB_FT_RX) {
742 			fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
743 			prio = &dev->flow_db->prios[priority];
744 			if (!dev->is_rep && !esw_encap &&
745 			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
746 				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
747 			if (!dev->is_rep && !esw_encap &&
748 			    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
749 						      reformat_l3_tunnel_to_l2))
750 				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
751 		} else {
752 			max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
753 				dev->mdev, log_max_ft_size));
754 			fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
755 			prio = &dev->flow_db->egress_prios[priority];
756 			if (!dev->is_rep && !esw_encap &&
757 			    MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
758 				flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
759 		}
760 		ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
761 		num_entries = MLX5_FS_MAX_ENTRIES;
762 		num_groups = MLX5_FS_MAX_TYPES;
763 		break;
764 	case IB_FLOW_ATTR_ALL_DEFAULT:
765 	case IB_FLOW_ATTR_MC_DEFAULT:
766 		ns = mlx5_get_flow_namespace(dev->mdev,
767 					     MLX5_FLOW_NAMESPACE_LEFTOVERS);
768 		build_leftovers_ft_param(&priority, &num_entries, &num_groups);
769 		prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
770 		break;
771 	case IB_FLOW_ATTR_SNIFFER:
772 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
773 					allow_sniffer_and_nic_rx_shared_tir))
774 			return ERR_PTR(-EOPNOTSUPP);
775 
776 		ns = mlx5_get_flow_namespace(
777 			dev->mdev, ft_type == MLX5_IB_FT_RX ?
778 					   MLX5_FLOW_NAMESPACE_SNIFFER_RX :
779 					   MLX5_FLOW_NAMESPACE_SNIFFER_TX);
780 
781 		prio = &dev->flow_db->sniffer[ft_type];
782 		priority = 0;
783 		num_entries = 1;
784 		num_groups = 1;
785 		break;
786 	default:
787 		break;
788 	}
789 
790 	if (!ns)
791 		return ERR_PTR(-EOPNOTSUPP);
792 
793 	max_table_size = min_t(int, num_entries, max_table_size);
794 
795 	ft = prio->flow_table;
796 	if (!ft)
797 		return _get_prio(dev, ns, prio, priority, max_table_size,
798 				 num_groups, flags);
799 
800 	return prio;
801 }
802 
803 enum {
804 	RDMA_RX_ECN_OPCOUNTER_PRIO,
805 	RDMA_RX_CNP_OPCOUNTER_PRIO,
806 };
807 
808 enum {
809 	RDMA_TX_CNP_OPCOUNTER_PRIO,
810 };
811 
812 static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
813 			      struct mlx5_flow_spec *spec)
814 {
815 	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
816 					ft_field_support.source_vhca_port) ||
817 	    !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
818 					ft_field_support.source_vhca_port))
819 		return -EOPNOTSUPP;
820 
821 	MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
822 			 misc_parameters.source_vhca_port);
823 	MLX5_SET(fte_match_param, &spec->match_value,
824 		 misc_parameters.source_vhca_port, port_num);
825 
826 	return 0;
827 }
828 
829 static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
830 			   struct mlx5_flow_spec *spec, int ipv)
831 {
832 	if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
833 					ft_field_support.outer_ip_version))
834 		return -EOPNOTSUPP;
835 
836 	if (mlx5_core_mp_enabled(dev->mdev) &&
837 	    set_vhca_port_spec(dev, port_num, spec))
838 		return -EOPNOTSUPP;
839 
840 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
841 			 outer_headers.ip_ecn);
842 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
843 		 INET_ECN_CE);
844 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
845 			 outer_headers.ip_version);
846 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
847 		 ipv);
848 
849 	spec->match_criteria_enable =
850 		get_match_criteria_enable(spec->match_criteria);
851 
852 	return 0;
853 }
854 
855 static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
856 			struct mlx5_flow_spec *spec)
857 {
858 	if (mlx5_core_mp_enabled(dev->mdev) &&
859 	    set_vhca_port_spec(dev, port_num, spec))
860 		return -EOPNOTSUPP;
861 
862 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
863 			 misc_parameters.bth_opcode);
864 	MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
865 		 IB_BTH_OPCODE_CNP);
866 
867 	spec->match_criteria_enable =
868 		get_match_criteria_enable(spec->match_criteria);
869 
870 	return 0;
871 }
872 
873 int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
874 			 struct mlx5_ib_op_fc *opfc,
875 			 enum mlx5_ib_optional_counter_type type)
876 {
877 	enum mlx5_flow_namespace_type fn_type;
878 	int priority, i, err, spec_num;
879 	struct mlx5_flow_act flow_act = {};
880 	struct mlx5_flow_destination dst;
881 	struct mlx5_flow_namespace *ns;
882 	struct mlx5_ib_flow_prio *prio;
883 	struct mlx5_flow_spec *spec;
884 
885 	spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
886 	if (!spec)
887 		return -ENOMEM;
888 
889 	switch (type) {
890 	case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
891 		if (set_ecn_ce_spec(dev, port_num, &spec[0],
892 				    MLX5_FS_IPV4_VERSION) ||
893 		    set_ecn_ce_spec(dev, port_num, &spec[1],
894 				    MLX5_FS_IPV6_VERSION)) {
895 			err = -EOPNOTSUPP;
896 			goto free;
897 		}
898 		spec_num = 2;
899 		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
900 		priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
901 		break;
902 
903 	case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
904 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
905 					ft_field_support_2_nic_receive_rdma.bth_opcode) ||
906 		    set_cnp_spec(dev, port_num, &spec[0])) {
907 			err = -EOPNOTSUPP;
908 			goto free;
909 		}
910 		spec_num = 1;
911 		fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
912 		priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
913 		break;
914 
915 	case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
916 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
917 					ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
918 		    set_cnp_spec(dev, port_num, &spec[0])) {
919 			err = -EOPNOTSUPP;
920 			goto free;
921 		}
922 		spec_num = 1;
923 		fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
924 		priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
925 		break;
926 
927 	default:
928 		err = -EOPNOTSUPP;
929 		goto free;
930 	}
931 
932 	ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
933 	if (!ns) {
934 		err = -EOPNOTSUPP;
935 		goto free;
936 	}
937 
938 	prio = &dev->flow_db->opfcs[type];
939 	if (!prio->flow_table) {
940 		prio = _get_prio(dev, ns, prio, priority,
941 				 dev->num_ports * MAX_OPFC_RULES, 1, 0);
942 		if (IS_ERR(prio)) {
943 			err = PTR_ERR(prio);
944 			goto free;
945 		}
946 	}
947 
948 	dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
949 	dst.counter_id = mlx5_fc_id(opfc->fc);
950 
951 	flow_act.action =
952 		MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
953 
954 	for (i = 0; i < spec_num; i++) {
955 		opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
956 						    &flow_act, &dst, 1);
957 		if (IS_ERR(opfc->rule[i])) {
958 			err = PTR_ERR(opfc->rule[i]);
959 			goto del_rules;
960 		}
961 	}
962 	prio->refcount += spec_num;
963 	kfree(spec);
964 
965 	return 0;
966 
967 del_rules:
968 	for (i -= 1; i >= 0; i--)
969 		mlx5_del_flow_rules(opfc->rule[i]);
970 	put_flow_table(dev, prio, false);
971 free:
972 	kfree(spec);
973 	return err;
974 }
975 
976 void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
977 			     struct mlx5_ib_op_fc *opfc,
978 			     enum mlx5_ib_optional_counter_type type)
979 {
980 	int i;
981 
982 	for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
983 		mlx5_del_flow_rules(opfc->rule[i]);
984 		put_flow_table(dev, &dev->flow_db->opfcs[type], true);
985 	}
986 }
987 
988 static void set_underlay_qp(struct mlx5_ib_dev *dev,
989 			    struct mlx5_flow_spec *spec,
990 			    u32 underlay_qpn)
991 {
992 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
993 					   spec->match_criteria,
994 					   misc_parameters);
995 	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
996 					   misc_parameters);
997 
998 	if (underlay_qpn &&
999 	    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1000 				      ft_field_support.bth_dst_qp)) {
1001 		MLX5_SET(fte_match_set_misc,
1002 			 misc_params_v, bth_dst_qp, underlay_qpn);
1003 		MLX5_SET(fte_match_set_misc,
1004 			 misc_params_c, bth_dst_qp, 0xffffff);
1005 	}
1006 }
1007 
1008 static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
1009 					 struct mlx5_flow_spec *spec,
1010 					 struct mlx5_eswitch_rep *rep)
1011 {
1012 	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
1013 	void *misc;
1014 
1015 	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1016 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1017 				    misc_parameters_2);
1018 
1019 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1020 			 mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
1021 								   rep->vport));
1022 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1023 				    misc_parameters_2);
1024 
1025 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1026 			 mlx5_eswitch_get_vport_metadata_mask());
1027 	} else {
1028 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1029 				    misc_parameters);
1030 
1031 		MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
1032 
1033 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1034 				    misc_parameters);
1035 
1036 		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1037 	}
1038 }
1039 
1040 static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
1041 						      struct mlx5_ib_flow_prio *ft_prio,
1042 						      const struct ib_flow_attr *flow_attr,
1043 						      struct mlx5_flow_destination *dst,
1044 						      u32 underlay_qpn,
1045 						      struct mlx5_ib_create_flow *ucmd)
1046 {
1047 	struct mlx5_flow_table	*ft = ft_prio->flow_table;
1048 	struct mlx5_ib_flow_handler *handler;
1049 	struct mlx5_flow_act flow_act = {};
1050 	struct mlx5_flow_spec *spec;
1051 	struct mlx5_flow_destination dest_arr[2] = {};
1052 	struct mlx5_flow_destination *rule_dst = dest_arr;
1053 	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
1054 	unsigned int spec_index;
1055 	u32 prev_type = 0;
1056 	int err = 0;
1057 	int dest_num = 0;
1058 	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1059 
1060 	if (!is_valid_attr(dev->mdev, flow_attr))
1061 		return ERR_PTR(-EINVAL);
1062 
1063 	if (dev->is_rep && is_egress)
1064 		return ERR_PTR(-EINVAL);
1065 
1066 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1067 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1068 	if (!handler || !spec) {
1069 		err = -ENOMEM;
1070 		goto free;
1071 	}
1072 
1073 	INIT_LIST_HEAD(&handler->list);
1074 
1075 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
1076 		err = parse_flow_attr(dev->mdev, spec,
1077 				      ib_flow, flow_attr, &flow_act,
1078 				      prev_type);
1079 		if (err < 0)
1080 			goto free;
1081 
1082 		prev_type = ((union ib_flow_spec *)ib_flow)->type;
1083 		ib_flow += ((union ib_flow_spec *)ib_flow)->size;
1084 	}
1085 
1086 	if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
1087 		memcpy(&dest_arr[0], dst, sizeof(*dst));
1088 		dest_num++;
1089 	}
1090 
1091 	if (!flow_is_multicast_only(flow_attr))
1092 		set_underlay_qp(dev, spec, underlay_qpn);
1093 
1094 	if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
1095 		struct mlx5_eswitch_rep *rep;
1096 
1097 		rep = dev->port[flow_attr->port - 1].rep;
1098 		if (!rep) {
1099 			err = -EINVAL;
1100 			goto free;
1101 		}
1102 
1103 		mlx5_ib_set_rule_source_port(dev, spec, rep);
1104 	}
1105 
1106 	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
1107 
1108 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1109 		struct mlx5_ib_mcounters *mcounters;
1110 
1111 		err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
1112 		if (err)
1113 			goto free;
1114 
1115 		mcounters = to_mcounters(flow_act.counters);
1116 		handler->ibcounters = flow_act.counters;
1117 		dest_arr[dest_num].type =
1118 			MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1119 		dest_arr[dest_num].counter_id =
1120 			mlx5_fc_id(mcounters->hw_cntrs_hndl);
1121 		dest_num++;
1122 	}
1123 
1124 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1125 		if (!dest_num)
1126 			rule_dst = NULL;
1127 	} else {
1128 		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1129 			flow_act.action |=
1130 				MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1131 		if (is_egress)
1132 			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1133 		else if (dest_num)
1134 			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1135 	}
1136 
1137 	if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG)  &&
1138 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1139 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1140 		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
1141 			     spec->flow_context.flow_tag, flow_attr->type);
1142 		err = -EINVAL;
1143 		goto free;
1144 	}
1145 	handler->rule = mlx5_add_flow_rules(ft, spec,
1146 					    &flow_act,
1147 					    rule_dst, dest_num);
1148 
1149 	if (IS_ERR(handler->rule)) {
1150 		err = PTR_ERR(handler->rule);
1151 		goto free;
1152 	}
1153 
1154 	ft_prio->refcount++;
1155 	handler->prio = ft_prio;
1156 	handler->dev = dev;
1157 
1158 	ft_prio->flow_table = ft;
1159 free:
1160 	if (err && handler) {
1161 		mlx5_ib_counters_clear_description(handler->ibcounters);
1162 		kfree(handler);
1163 	}
1164 	kvfree(spec);
1165 	return err ? ERR_PTR(err) : handler;
1166 }
1167 
1168 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1169 						     struct mlx5_ib_flow_prio *ft_prio,
1170 						     const struct ib_flow_attr *flow_attr,
1171 						     struct mlx5_flow_destination *dst)
1172 {
1173 	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
1174 }
1175 
1176 enum {
1177 	LEFTOVERS_MC,
1178 	LEFTOVERS_UC,
1179 };
1180 
1181 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
1182 							  struct mlx5_ib_flow_prio *ft_prio,
1183 							  struct ib_flow_attr *flow_attr,
1184 							  struct mlx5_flow_destination *dst)
1185 {
1186 	struct mlx5_ib_flow_handler *handler_ucast = NULL;
1187 	struct mlx5_ib_flow_handler *handler = NULL;
1188 
1189 	static struct {
1190 		struct ib_flow_attr	flow_attr;
1191 		struct ib_flow_spec_eth eth_flow;
1192 	} leftovers_specs[] = {
1193 		[LEFTOVERS_MC] = {
1194 			.flow_attr = {
1195 				.num_of_specs = 1,
1196 				.size = sizeof(leftovers_specs[0])
1197 			},
1198 			.eth_flow = {
1199 				.type = IB_FLOW_SPEC_ETH,
1200 				.size = sizeof(struct ib_flow_spec_eth),
1201 				.mask = {.dst_mac = {0x1} },
1202 				.val =  {.dst_mac = {0x1} }
1203 			}
1204 		},
1205 		[LEFTOVERS_UC] = {
1206 			.flow_attr = {
1207 				.num_of_specs = 1,
1208 				.size = sizeof(leftovers_specs[0])
1209 			},
1210 			.eth_flow = {
1211 				.type = IB_FLOW_SPEC_ETH,
1212 				.size = sizeof(struct ib_flow_spec_eth),
1213 				.mask = {.dst_mac = {0x1} },
1214 				.val = {.dst_mac = {} }
1215 			}
1216 		}
1217 	};
1218 
1219 	handler = create_flow_rule(dev, ft_prio,
1220 				   &leftovers_specs[LEFTOVERS_MC].flow_attr,
1221 				   dst);
1222 	if (!IS_ERR(handler) &&
1223 	    flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
1224 		handler_ucast = create_flow_rule(dev, ft_prio,
1225 						 &leftovers_specs[LEFTOVERS_UC].flow_attr,
1226 						 dst);
1227 		if (IS_ERR(handler_ucast)) {
1228 			mlx5_del_flow_rules(handler->rule);
1229 			ft_prio->refcount--;
1230 			kfree(handler);
1231 			handler = handler_ucast;
1232 		} else {
1233 			list_add(&handler_ucast->list, &handler->list);
1234 		}
1235 	}
1236 
1237 	return handler;
1238 }
1239 
1240 static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
1241 							struct mlx5_ib_flow_prio *ft_rx,
1242 							struct mlx5_ib_flow_prio *ft_tx,
1243 							struct mlx5_flow_destination *dst)
1244 {
1245 	struct mlx5_ib_flow_handler *handler_rx;
1246 	struct mlx5_ib_flow_handler *handler_tx;
1247 	int err;
1248 	static const struct ib_flow_attr flow_attr  = {
1249 		.num_of_specs = 0,
1250 		.type = IB_FLOW_ATTR_SNIFFER,
1251 		.size = sizeof(flow_attr)
1252 	};
1253 
1254 	handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
1255 	if (IS_ERR(handler_rx)) {
1256 		err = PTR_ERR(handler_rx);
1257 		goto err;
1258 	}
1259 
1260 	handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
1261 	if (IS_ERR(handler_tx)) {
1262 		err = PTR_ERR(handler_tx);
1263 		goto err_tx;
1264 	}
1265 
1266 	list_add(&handler_tx->list, &handler_rx->list);
1267 
1268 	return handler_rx;
1269 
1270 err_tx:
1271 	mlx5_del_flow_rules(handler_rx->rule);
1272 	ft_rx->refcount--;
1273 	kfree(handler_rx);
1274 err:
1275 	return ERR_PTR(err);
1276 }
1277 
1278 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1279 					   struct ib_flow_attr *flow_attr,
1280 					   struct ib_udata *udata)
1281 {
1282 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
1283 	struct mlx5_ib_qp *mqp = to_mqp(qp);
1284 	struct mlx5_ib_flow_handler *handler = NULL;
1285 	struct mlx5_flow_destination *dst = NULL;
1286 	struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
1287 	struct mlx5_ib_flow_prio *ft_prio;
1288 	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1289 	struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
1290 	size_t min_ucmd_sz, required_ucmd_sz;
1291 	int err;
1292 	int underlay_qpn;
1293 
1294 	if (udata && udata->inlen) {
1295 		min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
1296 		if (udata->inlen < min_ucmd_sz)
1297 			return ERR_PTR(-EOPNOTSUPP);
1298 
1299 		err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
1300 		if (err)
1301 			return ERR_PTR(err);
1302 
1303 		/* currently supports only one counters data */
1304 		if (ucmd_hdr.ncounters_data > 1)
1305 			return ERR_PTR(-EINVAL);
1306 
1307 		required_ucmd_sz = min_ucmd_sz +
1308 			sizeof(struct mlx5_ib_flow_counters_data) *
1309 			ucmd_hdr.ncounters_data;
1310 		if (udata->inlen > required_ucmd_sz &&
1311 		    !ib_is_udata_cleared(udata, required_ucmd_sz,
1312 					 udata->inlen - required_ucmd_sz))
1313 			return ERR_PTR(-EOPNOTSUPP);
1314 
1315 		ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
1316 		if (!ucmd)
1317 			return ERR_PTR(-ENOMEM);
1318 
1319 		err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
1320 		if (err)
1321 			goto free_ucmd;
1322 	}
1323 
1324 	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
1325 		err = -ENOMEM;
1326 		goto free_ucmd;
1327 	}
1328 
1329 	if (flow_attr->flags &
1330 	    ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
1331 		err = -EINVAL;
1332 		goto free_ucmd;
1333 	}
1334 
1335 	if (is_egress &&
1336 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1337 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1338 		err = -EINVAL;
1339 		goto free_ucmd;
1340 	}
1341 
1342 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1343 	if (!dst) {
1344 		err = -ENOMEM;
1345 		goto free_ucmd;
1346 	}
1347 
1348 	mutex_lock(&dev->flow_db->lock);
1349 
1350 	ft_prio = get_flow_table(dev, flow_attr,
1351 				 is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
1352 	if (IS_ERR(ft_prio)) {
1353 		err = PTR_ERR(ft_prio);
1354 		goto unlock;
1355 	}
1356 	if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
1357 		ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
1358 		if (IS_ERR(ft_prio_tx)) {
1359 			err = PTR_ERR(ft_prio_tx);
1360 			ft_prio_tx = NULL;
1361 			goto destroy_ft;
1362 		}
1363 	}
1364 
1365 	if (is_egress) {
1366 		dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1367 	} else {
1368 		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1369 		if (mqp->is_rss)
1370 			dst->tir_num = mqp->rss_qp.tirn;
1371 		else
1372 			dst->tir_num = mqp->raw_packet_qp.rq.tirn;
1373 	}
1374 
1375 	switch (flow_attr->type) {
1376 	case IB_FLOW_ATTR_NORMAL:
1377 		underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
1378 				       mqp->underlay_qpn :
1379 				       0;
1380 		handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
1381 					    underlay_qpn, ucmd);
1382 		break;
1383 	case IB_FLOW_ATTR_ALL_DEFAULT:
1384 	case IB_FLOW_ATTR_MC_DEFAULT:
1385 		handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
1386 		break;
1387 	case IB_FLOW_ATTR_SNIFFER:
1388 		handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
1389 		break;
1390 	default:
1391 		err = -EINVAL;
1392 		goto destroy_ft;
1393 	}
1394 
1395 	if (IS_ERR(handler)) {
1396 		err = PTR_ERR(handler);
1397 		handler = NULL;
1398 		goto destroy_ft;
1399 	}
1400 
1401 	mutex_unlock(&dev->flow_db->lock);
1402 	kfree(dst);
1403 	kfree(ucmd);
1404 
1405 	return &handler->ibflow;
1406 
1407 destroy_ft:
1408 	put_flow_table(dev, ft_prio, false);
1409 	if (ft_prio_tx)
1410 		put_flow_table(dev, ft_prio_tx, false);
1411 unlock:
1412 	mutex_unlock(&dev->flow_db->lock);
1413 	kfree(dst);
1414 free_ucmd:
1415 	kfree(ucmd);
1416 	return ERR_PTR(err);
1417 }
1418 
1419 static struct mlx5_ib_flow_prio *
1420 _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
1421 		enum mlx5_flow_namespace_type ns_type,
1422 		bool mcast)
1423 {
1424 	struct mlx5_flow_namespace *ns = NULL;
1425 	struct mlx5_ib_flow_prio *prio = NULL;
1426 	int max_table_size = 0;
1427 	bool esw_encap;
1428 	u32 flags = 0;
1429 	int priority;
1430 
1431 	if (mcast)
1432 		priority = MLX5_IB_FLOW_MCAST_PRIO;
1433 	else
1434 		priority = ib_prio_to_core_prio(user_priority, false);
1435 
1436 	esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
1437 		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
1438 	switch (ns_type) {
1439 	case MLX5_FLOW_NAMESPACE_BYPASS:
1440 		max_table_size = BIT(
1441 			MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
1442 		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
1443 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1444 		if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1445 					      reformat_l3_tunnel_to_l2) &&
1446 		    !esw_encap)
1447 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1448 		break;
1449 	case MLX5_FLOW_NAMESPACE_EGRESS:
1450 		max_table_size = BIT(
1451 			MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
1452 		if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
1453 		    !esw_encap)
1454 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1455 		break;
1456 	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1457 		max_table_size = BIT(
1458 			MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
1459 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
1460 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1461 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
1462 					       reformat_l3_tunnel_to_l2) &&
1463 		    esw_encap)
1464 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1465 		priority = user_priority;
1466 		break;
1467 	case MLX5_FLOW_NAMESPACE_RDMA_RX:
1468 		max_table_size = BIT(
1469 			MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
1470 		priority = user_priority;
1471 		break;
1472 	case MLX5_FLOW_NAMESPACE_RDMA_TX:
1473 		max_table_size = BIT(
1474 			MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
1475 		priority = user_priority;
1476 		break;
1477 	default:
1478 		break;
1479 	}
1480 
1481 	max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
1482 
1483 	ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
1484 	if (!ns)
1485 		return ERR_PTR(-EOPNOTSUPP);
1486 
1487 	switch (ns_type) {
1488 	case MLX5_FLOW_NAMESPACE_BYPASS:
1489 		prio = &dev->flow_db->prios[priority];
1490 		break;
1491 	case MLX5_FLOW_NAMESPACE_EGRESS:
1492 		prio = &dev->flow_db->egress_prios[priority];
1493 		break;
1494 	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1495 		prio = &dev->flow_db->fdb[priority];
1496 		break;
1497 	case MLX5_FLOW_NAMESPACE_RDMA_RX:
1498 		prio = &dev->flow_db->rdma_rx[priority];
1499 		break;
1500 	case MLX5_FLOW_NAMESPACE_RDMA_TX:
1501 		prio = &dev->flow_db->rdma_tx[priority];
1502 		break;
1503 	default: return ERR_PTR(-EINVAL);
1504 	}
1505 
1506 	if (!prio)
1507 		return ERR_PTR(-EINVAL);
1508 
1509 	if (prio->flow_table)
1510 		return prio;
1511 
1512 	return _get_prio(dev, ns, prio, priority, max_table_size,
1513 			 MLX5_FS_MAX_TYPES, flags);
1514 }
1515 
1516 static struct mlx5_ib_flow_handler *
1517 _create_raw_flow_rule(struct mlx5_ib_dev *dev,
1518 		      struct mlx5_ib_flow_prio *ft_prio,
1519 		      struct mlx5_flow_destination *dst,
1520 		      struct mlx5_ib_flow_matcher  *fs_matcher,
1521 		      struct mlx5_flow_context *flow_context,
1522 		      struct mlx5_flow_act *flow_act,
1523 		      void *cmd_in, int inlen,
1524 		      int dst_num)
1525 {
1526 	struct mlx5_ib_flow_handler *handler;
1527 	struct mlx5_flow_spec *spec;
1528 	struct mlx5_flow_table *ft = ft_prio->flow_table;
1529 	int err = 0;
1530 
1531 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1532 	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1533 	if (!handler || !spec) {
1534 		err = -ENOMEM;
1535 		goto free;
1536 	}
1537 
1538 	INIT_LIST_HEAD(&handler->list);
1539 
1540 	memcpy(spec->match_value, cmd_in, inlen);
1541 	memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
1542 	       fs_matcher->mask_len);
1543 	spec->match_criteria_enable = fs_matcher->match_criteria_enable;
1544 	spec->flow_context = *flow_context;
1545 
1546 	handler->rule = mlx5_add_flow_rules(ft, spec,
1547 					    flow_act, dst, dst_num);
1548 
1549 	if (IS_ERR(handler->rule)) {
1550 		err = PTR_ERR(handler->rule);
1551 		goto free;
1552 	}
1553 
1554 	ft_prio->refcount++;
1555 	handler->prio = ft_prio;
1556 	handler->dev = dev;
1557 	ft_prio->flow_table = ft;
1558 
1559 free:
1560 	if (err)
1561 		kfree(handler);
1562 	kvfree(spec);
1563 	return err ? ERR_PTR(err) : handler;
1564 }
1565 
1566 static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
1567 				void *match_v)
1568 {
1569 	void *match_c;
1570 	void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
1571 	void *dmac, *dmac_mask;
1572 	void *ipv4, *ipv4_mask;
1573 
1574 	if (!(fs_matcher->match_criteria_enable &
1575 	      (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
1576 		return false;
1577 
1578 	match_c = fs_matcher->matcher_mask.match_params;
1579 	match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
1580 					   outer_headers);
1581 	match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
1582 					   outer_headers);
1583 
1584 	dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1585 			    dmac_47_16);
1586 	dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1587 				 dmac_47_16);
1588 
1589 	if (is_multicast_ether_addr(dmac) &&
1590 	    is_multicast_ether_addr(dmac_mask))
1591 		return true;
1592 
1593 	ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1594 			    dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1595 
1596 	ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1597 				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1598 
1599 	if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
1600 	    ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
1601 		return true;
1602 
1603 	return false;
1604 }
1605 
1606 static struct mlx5_ib_flow_handler *raw_fs_rule_add(
1607 	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
1608 	struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
1609 	u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
1610 {
1611 	struct mlx5_flow_destination *dst;
1612 	struct mlx5_ib_flow_prio *ft_prio;
1613 	struct mlx5_ib_flow_handler *handler;
1614 	int dst_num = 0;
1615 	bool mcast;
1616 	int err;
1617 
1618 	if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
1619 		return ERR_PTR(-EOPNOTSUPP);
1620 
1621 	if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
1622 		return ERR_PTR(-ENOMEM);
1623 
1624 	dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
1625 	if (!dst)
1626 		return ERR_PTR(-ENOMEM);
1627 
1628 	mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
1629 	mutex_lock(&dev->flow_db->lock);
1630 
1631 	ft_prio = _get_flow_table(dev, fs_matcher->priority,
1632 				  fs_matcher->ns_type, mcast);
1633 	if (IS_ERR(ft_prio)) {
1634 		err = PTR_ERR(ft_prio);
1635 		goto unlock;
1636 	}
1637 
1638 	switch (dest_type) {
1639 	case MLX5_FLOW_DESTINATION_TYPE_TIR:
1640 		dst[dst_num].type = dest_type;
1641 		dst[dst_num++].tir_num = dest_id;
1642 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1643 		break;
1644 	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
1645 		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
1646 		dst[dst_num++].ft_num = dest_id;
1647 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1648 		break;
1649 	case MLX5_FLOW_DESTINATION_TYPE_PORT:
1650 		dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1651 		flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1652 		break;
1653 	default:
1654 		break;
1655 	}
1656 
1657 	if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1658 		dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1659 		dst[dst_num].counter_id = counter_id;
1660 		dst_num++;
1661 	}
1662 
1663 	handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
1664 					fs_matcher, flow_context, flow_act,
1665 					cmd_in, inlen, dst_num);
1666 
1667 	if (IS_ERR(handler)) {
1668 		err = PTR_ERR(handler);
1669 		goto destroy_ft;
1670 	}
1671 
1672 	mutex_unlock(&dev->flow_db->lock);
1673 	atomic_inc(&fs_matcher->usecnt);
1674 	handler->flow_matcher = fs_matcher;
1675 
1676 	kfree(dst);
1677 
1678 	return handler;
1679 
1680 destroy_ft:
1681 	put_flow_table(dev, ft_prio, false);
1682 unlock:
1683 	mutex_unlock(&dev->flow_db->lock);
1684 	kfree(dst);
1685 
1686 	return ERR_PTR(err);
1687 }
1688 
1689 static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
1690 {
1691 	switch (maction->flow_action_raw.sub_type) {
1692 	case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
1693 		mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
1694 					   maction->flow_action_raw.modify_hdr);
1695 		break;
1696 	case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
1697 		mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
1698 					     maction->flow_action_raw.pkt_reformat);
1699 		break;
1700 	case MLX5_IB_FLOW_ACTION_DECAP:
1701 		break;
1702 	default:
1703 		break;
1704 	}
1705 }
1706 
1707 static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
1708 {
1709 	struct mlx5_ib_flow_action *maction = to_mflow_act(action);
1710 
1711 	switch (action->type) {
1712 	case IB_FLOW_ACTION_UNSPECIFIED:
1713 		destroy_flow_action_raw(maction);
1714 		break;
1715 	default:
1716 		WARN_ON(true);
1717 		break;
1718 	}
1719 
1720 	kfree(maction);
1721 	return 0;
1722 }
1723 
1724 static int
1725 mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
1726 			     enum mlx5_flow_namespace_type *namespace)
1727 {
1728 	switch (table_type) {
1729 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
1730 		*namespace = MLX5_FLOW_NAMESPACE_BYPASS;
1731 		break;
1732 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
1733 		*namespace = MLX5_FLOW_NAMESPACE_EGRESS;
1734 		break;
1735 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
1736 		*namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
1737 		break;
1738 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
1739 		*namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
1740 		break;
1741 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
1742 		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
1743 		break;
1744 	default:
1745 		return -EINVAL;
1746 	}
1747 
1748 	return 0;
1749 }
1750 
1751 static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
1752 	[MLX5_IB_FLOW_TYPE_NORMAL] = {
1753 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1754 		.u.ptr = {
1755 			.len = sizeof(u16), /* data is priority */
1756 			.min_len = sizeof(u16),
1757 		}
1758 	},
1759 	[MLX5_IB_FLOW_TYPE_SNIFFER] = {
1760 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1761 		UVERBS_ATTR_NO_DATA(),
1762 	},
1763 	[MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
1764 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1765 		UVERBS_ATTR_NO_DATA(),
1766 	},
1767 	[MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
1768 		.type = UVERBS_ATTR_TYPE_PTR_IN,
1769 		UVERBS_ATTR_NO_DATA(),
1770 	},
1771 };
1772 
1773 static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
1774 {
1775 	struct devx_obj *devx_obj = obj;
1776 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1777 
1778 	switch (opcode) {
1779 	case MLX5_CMD_OP_DESTROY_TIR:
1780 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1781 		*dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
1782 				    obj_id);
1783 		return true;
1784 
1785 	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
1786 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1787 		*dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
1788 				    table_id);
1789 		return true;
1790 	default:
1791 		return false;
1792 	}
1793 }
1794 
1795 static int get_dests(struct uverbs_attr_bundle *attrs,
1796 		     struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
1797 		     int *dest_type, struct ib_qp **qp, u32 *flags)
1798 {
1799 	bool dest_devx, dest_qp;
1800 	void *devx_obj;
1801 	int err;
1802 
1803 	dest_devx = uverbs_attr_is_valid(attrs,
1804 					 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1805 	dest_qp = uverbs_attr_is_valid(attrs,
1806 				       MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1807 
1808 	*flags = 0;
1809 	err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
1810 				 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
1811 					 MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
1812 	if (err)
1813 		return err;
1814 
1815 	/* Both flags are not allowed */
1816 	if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
1817 	    *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1818 		return -EINVAL;
1819 
1820 	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
1821 		if (dest_devx && (dest_qp || *flags))
1822 			return -EINVAL;
1823 		else if (dest_qp && *flags)
1824 			return -EINVAL;
1825 	}
1826 
1827 	/* Allow only DEVX object, drop as dest for FDB */
1828 	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
1829 	    !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
1830 		return -EINVAL;
1831 
1832 	/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
1833 	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1834 	    ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
1835 		return -EINVAL;
1836 
1837 	*qp = NULL;
1838 	if (dest_devx) {
1839 		devx_obj =
1840 			uverbs_attr_get_obj(attrs,
1841 					    MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1842 
1843 		/* Verify that the given DEVX object is a flow
1844 		 * steering destination.
1845 		 */
1846 		if (!is_flow_dest(devx_obj, dest_id, dest_type))
1847 			return -EINVAL;
1848 		/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
1849 		if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
1850 		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1851 		    *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
1852 			return -EINVAL;
1853 	} else if (dest_qp) {
1854 		struct mlx5_ib_qp *mqp;
1855 
1856 		*qp = uverbs_attr_get_obj(attrs,
1857 					  MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1858 		if (IS_ERR(*qp))
1859 			return PTR_ERR(*qp);
1860 
1861 		if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
1862 			return -EINVAL;
1863 
1864 		mqp = to_mqp(*qp);
1865 		if (mqp->is_rss)
1866 			*dest_id = mqp->rss_qp.tirn;
1867 		else
1868 			*dest_id = mqp->raw_packet_qp.rq.tirn;
1869 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1870 	} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1871 		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
1872 		   !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
1873 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1874 	}
1875 
1876 	if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1877 	    (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1878 	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
1879 		return -EINVAL;
1880 
1881 	return 0;
1882 }
1883 
1884 static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
1885 {
1886 	struct devx_obj *devx_obj = obj;
1887 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1888 
1889 	if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
1890 
1891 		if (offset && offset >= devx_obj->flow_counter_bulk_size)
1892 			return false;
1893 
1894 		*counter_id = MLX5_GET(dealloc_flow_counter_in,
1895 				       devx_obj->dinbox,
1896 				       flow_counter_id);
1897 		*counter_id += offset;
1898 		return true;
1899 	}
1900 
1901 	return false;
1902 }
1903 
1904 #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
1905 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
1906 	struct uverbs_attr_bundle *attrs)
1907 {
1908 	struct mlx5_flow_context flow_context = {.flow_tag =
1909 		MLX5_FS_DEFAULT_FLOW_TAG};
1910 	u32 *offset_attr, offset = 0, counter_id = 0;
1911 	int dest_id, dest_type = -1, inlen, len, ret, i;
1912 	struct mlx5_ib_flow_handler *flow_handler;
1913 	struct mlx5_ib_flow_matcher *fs_matcher;
1914 	struct ib_uobject **arr_flow_actions;
1915 	struct ib_uflow_resources *uflow_res;
1916 	struct mlx5_flow_act flow_act = {};
1917 	struct ib_qp *qp = NULL;
1918 	void *devx_obj, *cmd_in;
1919 	struct ib_uobject *uobj;
1920 	struct mlx5_ib_dev *dev;
1921 	u32 flags;
1922 
1923 	if (!capable(CAP_NET_RAW))
1924 		return -EPERM;
1925 
1926 	fs_matcher = uverbs_attr_get_obj(attrs,
1927 					 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
1928 	uobj =  uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
1929 	dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1930 
1931 	if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
1932 		return -EINVAL;
1933 
1934 	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
1935 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
1936 
1937 	if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1938 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
1939 
1940 	len = uverbs_attr_get_uobjs_arr(attrs,
1941 		MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
1942 	if (len) {
1943 		devx_obj = arr_flow_actions[0]->object;
1944 
1945 		if (uverbs_attr_is_valid(attrs,
1946 					 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
1947 
1948 			int num_offsets = uverbs_attr_ptr_get_array_size(
1949 				attrs,
1950 				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
1951 				sizeof(u32));
1952 
1953 			if (num_offsets != 1)
1954 				return -EINVAL;
1955 
1956 			offset_attr = uverbs_attr_get_alloced_ptr(
1957 				attrs,
1958 				MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
1959 			offset = *offset_attr;
1960 		}
1961 
1962 		if (!is_flow_counter(devx_obj, offset, &counter_id))
1963 			return -EINVAL;
1964 
1965 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1966 	}
1967 
1968 	cmd_in = uverbs_attr_get_alloced_ptr(
1969 		attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1970 	inlen = uverbs_attr_get_len(attrs,
1971 				    MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1972 
1973 	uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
1974 	if (!uflow_res)
1975 		return -ENOMEM;
1976 
1977 	len = uverbs_attr_get_uobjs_arr(attrs,
1978 		MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
1979 	for (i = 0; i < len; i++) {
1980 		struct mlx5_ib_flow_action *maction =
1981 			to_mflow_act(arr_flow_actions[i]->object);
1982 
1983 		ret = parse_flow_flow_action(maction, false, &flow_act);
1984 		if (ret)
1985 			goto err_out;
1986 		flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
1987 				   arr_flow_actions[i]->object);
1988 	}
1989 
1990 	ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
1991 			       MLX5_IB_ATTR_CREATE_FLOW_TAG);
1992 	if (!ret) {
1993 		if (flow_context.flow_tag >= BIT(24)) {
1994 			ret = -EINVAL;
1995 			goto err_out;
1996 		}
1997 		flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
1998 	}
1999 
2000 	flow_handler =
2001 		raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
2002 				counter_id, cmd_in, inlen, dest_id, dest_type);
2003 	if (IS_ERR(flow_handler)) {
2004 		ret = PTR_ERR(flow_handler);
2005 		goto err_out;
2006 	}
2007 
2008 	ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
2009 
2010 	return 0;
2011 err_out:
2012 	ib_uverbs_flow_resources_free(uflow_res);
2013 	return ret;
2014 }
2015 
2016 static int flow_matcher_cleanup(struct ib_uobject *uobject,
2017 				enum rdma_remove_reason why,
2018 				struct uverbs_attr_bundle *attrs)
2019 {
2020 	struct mlx5_ib_flow_matcher *obj = uobject->object;
2021 
2022 	if (atomic_read(&obj->usecnt))
2023 		return -EBUSY;
2024 
2025 	kfree(obj);
2026 	return 0;
2027 }
2028 
2029 static int steering_anchor_cleanup(struct ib_uobject *uobject,
2030 				   enum rdma_remove_reason why,
2031 				   struct uverbs_attr_bundle *attrs)
2032 {
2033 	struct mlx5_ib_steering_anchor *obj = uobject->object;
2034 
2035 	if (atomic_read(&obj->usecnt))
2036 		return -EBUSY;
2037 
2038 	mutex_lock(&obj->dev->flow_db->lock);
2039 	put_flow_table(obj->dev, obj->ft_prio, true);
2040 	mutex_unlock(&obj->dev->flow_db->lock);
2041 
2042 	kfree(obj);
2043 	return 0;
2044 }
2045 
2046 static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
2047 			      struct mlx5_ib_flow_matcher *obj)
2048 {
2049 	enum mlx5_ib_uapi_flow_table_type ft_type =
2050 		MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
2051 	u32 flags;
2052 	int err;
2053 
2054 	/* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
2055 	 * users should switch to it. We leave this to not break userspace
2056 	 */
2057 	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
2058 	    uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
2059 		return -EINVAL;
2060 
2061 	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
2062 		err = uverbs_get_const(&ft_type, attrs,
2063 				       MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
2064 		if (err)
2065 			return err;
2066 
2067 		err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
2068 		if (err)
2069 			return err;
2070 
2071 		return 0;
2072 	}
2073 
2074 	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
2075 		err = uverbs_get_flags32(&flags, attrs,
2076 					 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2077 					 IB_FLOW_ATTR_FLAGS_EGRESS);
2078 		if (err)
2079 			return err;
2080 
2081 		if (flags)
2082 			return mlx5_ib_ft_type_to_namespace(
2083 				MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
2084 				&obj->ns_type);
2085 	}
2086 
2087 	obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
2088 
2089 	return 0;
2090 }
2091 
2092 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
2093 	struct uverbs_attr_bundle *attrs)
2094 {
2095 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2096 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
2097 	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2098 	struct mlx5_ib_flow_matcher *obj;
2099 	int err;
2100 
2101 	obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
2102 	if (!obj)
2103 		return -ENOMEM;
2104 
2105 	obj->mask_len = uverbs_attr_get_len(
2106 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2107 	err = uverbs_copy_from(&obj->matcher_mask,
2108 			       attrs,
2109 			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2110 	if (err)
2111 		goto end;
2112 
2113 	obj->flow_type = uverbs_attr_get_enum_id(
2114 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2115 
2116 	if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
2117 		err = uverbs_copy_from(&obj->priority,
2118 				       attrs,
2119 				       MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2120 		if (err)
2121 			goto end;
2122 	}
2123 
2124 	err = uverbs_copy_from(&obj->match_criteria_enable,
2125 			       attrs,
2126 			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
2127 	if (err)
2128 		goto end;
2129 
2130 	err = mlx5_ib_matcher_ns(attrs, obj);
2131 	if (err)
2132 		goto end;
2133 
2134 	if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
2135 	    mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
2136 		err = -EINVAL;
2137 		goto end;
2138 	}
2139 
2140 	uobj->object = obj;
2141 	obj->mdev = dev->mdev;
2142 	atomic_set(&obj->usecnt, 0);
2143 	return 0;
2144 
2145 end:
2146 	kfree(obj);
2147 	return err;
2148 }
2149 
2150 static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
2151 	struct uverbs_attr_bundle *attrs)
2152 {
2153 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2154 		attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
2155 	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2156 	enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
2157 	enum mlx5_flow_namespace_type ns_type;
2158 	struct mlx5_ib_steering_anchor *obj;
2159 	struct mlx5_ib_flow_prio *ft_prio;
2160 	u16 priority;
2161 	u32 ft_id;
2162 	int err;
2163 
2164 	if (!capable(CAP_NET_RAW))
2165 		return -EPERM;
2166 
2167 	err = uverbs_get_const(&ib_uapi_ft_type, attrs,
2168 			       MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
2169 	if (err)
2170 		return err;
2171 
2172 	err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
2173 	if (err)
2174 		return err;
2175 
2176 	err = uverbs_copy_from(&priority, attrs,
2177 			       MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
2178 	if (err)
2179 		return err;
2180 
2181 	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
2182 	if (!obj)
2183 		return -ENOMEM;
2184 
2185 	mutex_lock(&dev->flow_db->lock);
2186 	ft_prio = _get_flow_table(dev, priority, ns_type, 0);
2187 	if (IS_ERR(ft_prio)) {
2188 		mutex_unlock(&dev->flow_db->lock);
2189 		err = PTR_ERR(ft_prio);
2190 		goto free_obj;
2191 	}
2192 
2193 	ft_prio->refcount++;
2194 	ft_id = mlx5_flow_table_id(ft_prio->flow_table);
2195 	mutex_unlock(&dev->flow_db->lock);
2196 
2197 	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2198 			     &ft_id, sizeof(ft_id));
2199 	if (err)
2200 		goto put_flow_table;
2201 
2202 	uobj->object = obj;
2203 	obj->dev = dev;
2204 	obj->ft_prio = ft_prio;
2205 	atomic_set(&obj->usecnt, 0);
2206 
2207 	return 0;
2208 
2209 put_flow_table:
2210 	mutex_lock(&dev->flow_db->lock);
2211 	put_flow_table(dev, ft_prio, true);
2212 	mutex_unlock(&dev->flow_db->lock);
2213 free_obj:
2214 	kfree(obj);
2215 
2216 	return err;
2217 }
2218 
2219 static struct ib_flow_action *
2220 mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
2221 			     enum mlx5_ib_uapi_flow_table_type ft_type,
2222 			     u8 num_actions, void *in)
2223 {
2224 	enum mlx5_flow_namespace_type namespace;
2225 	struct mlx5_ib_flow_action *maction;
2226 	int ret;
2227 
2228 	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2229 	if (ret)
2230 		return ERR_PTR(-EINVAL);
2231 
2232 	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2233 	if (!maction)
2234 		return ERR_PTR(-ENOMEM);
2235 
2236 	maction->flow_action_raw.modify_hdr =
2237 		mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
2238 
2239 	if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
2240 		ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
2241 		kfree(maction);
2242 		return ERR_PTR(ret);
2243 	}
2244 	maction->flow_action_raw.sub_type =
2245 		MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
2246 	maction->flow_action_raw.dev = dev;
2247 
2248 	return &maction->ib_action;
2249 }
2250 
2251 static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
2252 {
2253 	return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
2254 					 max_modify_header_actions) ||
2255 	       MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
2256 					 max_modify_header_actions) ||
2257 	       MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
2258 					 max_modify_header_actions);
2259 }
2260 
2261 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
2262 	struct uverbs_attr_bundle *attrs)
2263 {
2264 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2265 		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
2266 	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2267 	enum mlx5_ib_uapi_flow_table_type ft_type;
2268 	struct ib_flow_action *action;
2269 	int num_actions;
2270 	void *in;
2271 	int ret;
2272 
2273 	if (!mlx5_ib_modify_header_supported(mdev))
2274 		return -EOPNOTSUPP;
2275 
2276 	in = uverbs_attr_get_alloced_ptr(attrs,
2277 		MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
2278 
2279 	num_actions = uverbs_attr_ptr_get_array_size(
2280 		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2281 		MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
2282 	if (num_actions < 0)
2283 		return num_actions;
2284 
2285 	ret = uverbs_get_const(&ft_type, attrs,
2286 			       MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
2287 	if (ret)
2288 		return ret;
2289 	action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
2290 	if (IS_ERR(action))
2291 		return PTR_ERR(action);
2292 
2293 	uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
2294 				       IB_FLOW_ACTION_UNSPECIFIED);
2295 
2296 	return 0;
2297 }
2298 
2299 static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
2300 						      u8 packet_reformat_type,
2301 						      u8 ft_type)
2302 {
2303 	switch (packet_reformat_type) {
2304 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2305 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2306 			return MLX5_CAP_FLOWTABLE(ibdev->mdev,
2307 						  encap_general_header);
2308 		break;
2309 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2310 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2311 			return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
2312 				reformat_l2_to_l3_tunnel);
2313 		break;
2314 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2315 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2316 			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
2317 				reformat_l3_tunnel_to_l2);
2318 		break;
2319 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
2320 		if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2321 			return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
2322 		break;
2323 	default:
2324 		break;
2325 	}
2326 
2327 	return false;
2328 }
2329 
2330 static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
2331 {
2332 	switch (dv_prt) {
2333 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2334 		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
2335 		break;
2336 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2337 		*prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
2338 		break;
2339 	case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2340 		*prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
2341 		break;
2342 	default:
2343 		return -EINVAL;
2344 	}
2345 
2346 	return 0;
2347 }
2348 
2349 static int mlx5_ib_flow_action_create_packet_reformat_ctx(
2350 	struct mlx5_ib_dev *dev,
2351 	struct mlx5_ib_flow_action *maction,
2352 	u8 ft_type, u8 dv_prt,
2353 	void *in, size_t len)
2354 {
2355 	struct mlx5_pkt_reformat_params reformat_params;
2356 	enum mlx5_flow_namespace_type namespace;
2357 	u8 prm_prt;
2358 	int ret;
2359 
2360 	ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2361 	if (ret)
2362 		return ret;
2363 
2364 	ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
2365 	if (ret)
2366 		return ret;
2367 
2368 	memset(&reformat_params, 0, sizeof(reformat_params));
2369 	reformat_params.type = prm_prt;
2370 	reformat_params.size = len;
2371 	reformat_params.data = in;
2372 	maction->flow_action_raw.pkt_reformat =
2373 		mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
2374 					   namespace);
2375 	if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
2376 		ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
2377 		return ret;
2378 	}
2379 
2380 	maction->flow_action_raw.sub_type =
2381 		MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
2382 	maction->flow_action_raw.dev = dev;
2383 
2384 	return 0;
2385 }
2386 
2387 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
2388 	struct uverbs_attr_bundle *attrs)
2389 {
2390 	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
2391 		MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
2392 	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2393 	enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
2394 	enum mlx5_ib_uapi_flow_table_type ft_type;
2395 	struct mlx5_ib_flow_action *maction;
2396 	int ret;
2397 
2398 	ret = uverbs_get_const(&ft_type, attrs,
2399 			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
2400 	if (ret)
2401 		return ret;
2402 
2403 	ret = uverbs_get_const(&dv_prt, attrs,
2404 			       MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
2405 	if (ret)
2406 		return ret;
2407 
2408 	if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
2409 		return -EOPNOTSUPP;
2410 
2411 	maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2412 	if (!maction)
2413 		return -ENOMEM;
2414 
2415 	if (dv_prt ==
2416 	    MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
2417 		maction->flow_action_raw.sub_type =
2418 			MLX5_IB_FLOW_ACTION_DECAP;
2419 		maction->flow_action_raw.dev = mdev;
2420 	} else {
2421 		void *in;
2422 		int len;
2423 
2424 		in = uverbs_attr_get_alloced_ptr(attrs,
2425 			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2426 		if (IS_ERR(in)) {
2427 			ret = PTR_ERR(in);
2428 			goto free_maction;
2429 		}
2430 
2431 		len = uverbs_attr_get_len(attrs,
2432 			MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2433 
2434 		ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
2435 			maction, ft_type, dv_prt, in, len);
2436 		if (ret)
2437 			goto free_maction;
2438 	}
2439 
2440 	uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
2441 				       IB_FLOW_ACTION_UNSPECIFIED);
2442 	return 0;
2443 
2444 free_maction:
2445 	kfree(maction);
2446 	return ret;
2447 }
2448 
2449 DECLARE_UVERBS_NAMED_METHOD(
2450 	MLX5_IB_METHOD_CREATE_FLOW,
2451 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2452 			UVERBS_OBJECT_FLOW,
2453 			UVERBS_ACCESS_NEW,
2454 			UA_MANDATORY),
2455 	UVERBS_ATTR_PTR_IN(
2456 		MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
2457 		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2458 		UA_MANDATORY,
2459 		UA_ALLOC_AND_COPY),
2460 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
2461 			MLX5_IB_OBJECT_FLOW_MATCHER,
2462 			UVERBS_ACCESS_READ,
2463 			UA_MANDATORY),
2464 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
2465 			UVERBS_OBJECT_QP,
2466 			UVERBS_ACCESS_READ),
2467 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
2468 			MLX5_IB_OBJECT_DEVX_OBJ,
2469 			UVERBS_ACCESS_READ),
2470 	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
2471 			     UVERBS_OBJECT_FLOW_ACTION,
2472 			     UVERBS_ACCESS_READ, 1,
2473 			     MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
2474 			     UA_OPTIONAL),
2475 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
2476 			   UVERBS_ATTR_TYPE(u32),
2477 			   UA_OPTIONAL),
2478 	UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
2479 			     MLX5_IB_OBJECT_DEVX_OBJ,
2480 			     UVERBS_ACCESS_READ, 1, 1,
2481 			     UA_OPTIONAL),
2482 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
2483 			   UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
2484 			   UA_OPTIONAL,
2485 			   UA_ALLOC_AND_COPY),
2486 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
2487 			     enum mlx5_ib_create_flow_flags,
2488 			     UA_OPTIONAL));
2489 
2490 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2491 	MLX5_IB_METHOD_DESTROY_FLOW,
2492 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2493 			UVERBS_OBJECT_FLOW,
2494 			UVERBS_ACCESS_DESTROY,
2495 			UA_MANDATORY));
2496 
2497 ADD_UVERBS_METHODS(mlx5_ib_fs,
2498 		   UVERBS_OBJECT_FLOW,
2499 		   &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
2500 		   &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
2501 
2502 DECLARE_UVERBS_NAMED_METHOD(
2503 	MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
2504 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
2505 			UVERBS_OBJECT_FLOW_ACTION,
2506 			UVERBS_ACCESS_NEW,
2507 			UA_MANDATORY),
2508 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2509 			   UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
2510 				   set_add_copy_action_in_auto)),
2511 			   UA_MANDATORY,
2512 			   UA_ALLOC_AND_COPY),
2513 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
2514 			     enum mlx5_ib_uapi_flow_table_type,
2515 			     UA_MANDATORY));
2516 
2517 DECLARE_UVERBS_NAMED_METHOD(
2518 	MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
2519 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
2520 			UVERBS_OBJECT_FLOW_ACTION,
2521 			UVERBS_ACCESS_NEW,
2522 			UA_MANDATORY),
2523 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
2524 			   UVERBS_ATTR_MIN_SIZE(1),
2525 			   UA_ALLOC_AND_COPY,
2526 			   UA_OPTIONAL),
2527 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
2528 			     enum mlx5_ib_uapi_flow_action_packet_reformat_type,
2529 			     UA_MANDATORY),
2530 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
2531 			     enum mlx5_ib_uapi_flow_table_type,
2532 			     UA_MANDATORY));
2533 
2534 ADD_UVERBS_METHODS(
2535 	mlx5_ib_flow_actions,
2536 	UVERBS_OBJECT_FLOW_ACTION,
2537 	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
2538 	&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
2539 
2540 DECLARE_UVERBS_NAMED_METHOD(
2541 	MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
2542 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
2543 			MLX5_IB_OBJECT_FLOW_MATCHER,
2544 			UVERBS_ACCESS_NEW,
2545 			UA_MANDATORY),
2546 	UVERBS_ATTR_PTR_IN(
2547 		MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
2548 		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2549 		UA_MANDATORY),
2550 	UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
2551 			    mlx5_ib_flow_type,
2552 			    UA_MANDATORY),
2553 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
2554 			   UVERBS_ATTR_TYPE(u8),
2555 			   UA_MANDATORY),
2556 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2557 			     enum ib_flow_flags,
2558 			     UA_OPTIONAL),
2559 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
2560 			     enum mlx5_ib_uapi_flow_table_type,
2561 			     UA_OPTIONAL));
2562 
2563 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2564 	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
2565 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
2566 			MLX5_IB_OBJECT_FLOW_MATCHER,
2567 			UVERBS_ACCESS_DESTROY,
2568 			UA_MANDATORY));
2569 
2570 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
2571 			    UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
2572 			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
2573 			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
2574 
2575 DECLARE_UVERBS_NAMED_METHOD(
2576 	MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
2577 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
2578 			MLX5_IB_OBJECT_STEERING_ANCHOR,
2579 			UVERBS_ACCESS_NEW,
2580 			UA_MANDATORY),
2581 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
2582 			     enum mlx5_ib_uapi_flow_table_type,
2583 			     UA_MANDATORY),
2584 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
2585 			   UVERBS_ATTR_TYPE(u16),
2586 			   UA_MANDATORY),
2587 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2588 			   UVERBS_ATTR_TYPE(u32),
2589 			   UA_MANDATORY));
2590 
2591 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2592 	MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
2593 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
2594 			MLX5_IB_OBJECT_STEERING_ANCHOR,
2595 			UVERBS_ACCESS_DESTROY,
2596 			UA_MANDATORY));
2597 
2598 DECLARE_UVERBS_NAMED_OBJECT(
2599 	MLX5_IB_OBJECT_STEERING_ANCHOR,
2600 	UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
2601 	&UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
2602 	&UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
2603 
2604 const struct uapi_definition mlx5_ib_flow_defs[] = {
2605 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2606 		MLX5_IB_OBJECT_FLOW_MATCHER),
2607 	UAPI_DEF_CHAIN_OBJ_TREE(
2608 		UVERBS_OBJECT_FLOW,
2609 		&mlx5_ib_fs),
2610 	UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
2611 				&mlx5_ib_flow_actions),
2612 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2613 		MLX5_IB_OBJECT_STEERING_ANCHOR,
2614 		UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
2615 	{},
2616 };
2617 
2618 static const struct ib_device_ops flow_ops = {
2619 	.create_flow = mlx5_ib_create_flow,
2620 	.destroy_flow = mlx5_ib_destroy_flow,
2621 	.destroy_flow_action = mlx5_ib_destroy_flow_action,
2622 };
2623 
2624 int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
2625 {
2626 	dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
2627 
2628 	if (!dev->flow_db)
2629 		return -ENOMEM;
2630 
2631 	mutex_init(&dev->flow_db->lock);
2632 
2633 	ib_set_device_ops(&dev->ib_dev, &flow_ops);
2634 	return 0;
2635 }
2636