1 /*
2  * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/mutex.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/vport.h>
36 #include <linux/mlx5/eswitch.h>
37 
38 #include "mlx5_core.h"
39 #include "fs_core.h"
40 #include "fs_cmd.h"
41 #include "fs_ft_pool.h"
42 #include "diag/fs_tracepoint.h"
43 
44 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
45 					 sizeof(struct init_tree_node))
46 
47 #define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\
48 		 ...) {.type = FS_TYPE_PRIO,\
49 	.min_ft_level = min_level_val,\
50 	.num_levels = num_levels_val,\
51 	.num_leaf_prios = num_prios_val,\
52 	.caps = caps_val,\
53 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
54 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
55 }
56 
57 #define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\
58 	ADD_PRIO(num_prios_val, 0, num_levels_val, {},\
59 		 __VA_ARGS__)\
60 
61 #define ADD_NS(def_miss_act, ...) {.type = FS_TYPE_NAMESPACE,	\
62 	.def_miss_action = def_miss_act,\
63 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
64 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
65 }
66 
67 #define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
68 				   sizeof(long))
69 
70 #define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
71 
72 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
73 			       .caps = (long[]) {__VA_ARGS__} }
74 
75 #define FS_CHAINING_CAPS  FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \
76 					   FS_CAP(flow_table_properties_nic_receive.modify_root), \
77 					   FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
78 					   FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
79 
80 #define FS_CHAINING_CAPS_EGRESS                                                \
81 	FS_REQUIRED_CAPS(                                                      \
82 		FS_CAP(flow_table_properties_nic_transmit.flow_modify_en),     \
83 		FS_CAP(flow_table_properties_nic_transmit.modify_root),        \
84 		FS_CAP(flow_table_properties_nic_transmit                      \
85 			       .identified_miss_table_mode),                   \
86 		FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
87 
88 #define FS_CHAINING_CAPS_RDMA_TX                                                \
89 	FS_REQUIRED_CAPS(                                                       \
90 		FS_CAP(flow_table_properties_nic_transmit_rdma.flow_modify_en), \
91 		FS_CAP(flow_table_properties_nic_transmit_rdma.modify_root),    \
92 		FS_CAP(flow_table_properties_nic_transmit_rdma                  \
93 			       .identified_miss_table_mode),                    \
94 		FS_CAP(flow_table_properties_nic_transmit_rdma                  \
95 			       .flow_table_modify))
96 
97 #define LEFTOVERS_NUM_LEVELS 1
98 #define LEFTOVERS_NUM_PRIOS 1
99 
100 #define RDMA_RX_COUNTERS_PRIO_NUM_LEVELS 1
101 #define RDMA_TX_COUNTERS_PRIO_NUM_LEVELS 1
102 
103 #define BY_PASS_PRIO_NUM_LEVELS 1
104 #define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
105 			   LEFTOVERS_NUM_PRIOS)
106 
107 #define KERNEL_RX_MACSEC_NUM_PRIOS  1
108 #define KERNEL_RX_MACSEC_NUM_LEVELS 2
109 #define KERNEL_RX_MACSEC_MIN_LEVEL (BY_PASS_MIN_LEVEL + KERNEL_RX_MACSEC_NUM_PRIOS)
110 
111 #define ETHTOOL_PRIO_NUM_LEVELS 1
112 #define ETHTOOL_NUM_PRIOS 11
113 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
114 /* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}} */
115 #define KERNEL_NIC_PRIO_NUM_LEVELS 7
116 #define KERNEL_NIC_NUM_PRIOS 1
117 /* One more level for tc */
118 #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
119 
120 #define KERNEL_NIC_TC_NUM_PRIOS  1
121 #define KERNEL_NIC_TC_NUM_LEVELS 3
122 
123 #define ANCHOR_NUM_LEVELS 1
124 #define ANCHOR_NUM_PRIOS 1
125 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
126 
127 #define OFFLOADS_MAX_FT 2
128 #define OFFLOADS_NUM_PRIOS 2
129 #define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS)
130 
131 #define LAG_PRIO_NUM_LEVELS 1
132 #define LAG_NUM_PRIOS 1
133 #define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + KERNEL_RX_MACSEC_MIN_LEVEL + 1)
134 
135 #define KERNEL_TX_IPSEC_NUM_PRIOS  1
136 #define KERNEL_TX_IPSEC_NUM_LEVELS 1
137 #define KERNEL_TX_IPSEC_MIN_LEVEL        (KERNEL_TX_IPSEC_NUM_LEVELS)
138 
139 #define KERNEL_TX_MACSEC_NUM_PRIOS  1
140 #define KERNEL_TX_MACSEC_NUM_LEVELS 2
141 #define KERNEL_TX_MACSEC_MIN_LEVEL       (KERNEL_TX_IPSEC_MIN_LEVEL + KERNEL_TX_MACSEC_NUM_PRIOS)
142 
143 struct node_caps {
144 	size_t	arr_sz;
145 	long	*caps;
146 };
147 
148 static struct init_tree_node {
149 	enum fs_node_type	type;
150 	struct init_tree_node *children;
151 	int ar_size;
152 	struct node_caps caps;
153 	int min_ft_level;
154 	int num_leaf_prios;
155 	int prio;
156 	int num_levels;
157 	enum mlx5_flow_table_miss_action def_miss_action;
158 } root_fs = {
159 	.type = FS_TYPE_NAMESPACE,
160 	.ar_size = 8,
161 	  .children = (struct init_tree_node[]){
162 		  ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
163 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
164 				  ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
165 						    BY_PASS_PRIO_NUM_LEVELS))),
166 		  ADD_PRIO(0, KERNEL_RX_MACSEC_MIN_LEVEL, 0, FS_CHAINING_CAPS,
167 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
168 				  ADD_MULTIPLE_PRIO(KERNEL_RX_MACSEC_NUM_PRIOS,
169 						    KERNEL_RX_MACSEC_NUM_LEVELS))),
170 		  ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS,
171 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
172 				  ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
173 						    LAG_PRIO_NUM_LEVELS))),
174 		  ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
175 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
176 				  ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS,
177 						    OFFLOADS_MAX_FT))),
178 		  ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, FS_CHAINING_CAPS,
179 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
180 				  ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
181 						    ETHTOOL_PRIO_NUM_LEVELS))),
182 		  ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
183 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
184 				  ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS,
185 						    KERNEL_NIC_TC_NUM_LEVELS),
186 				  ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
187 						    KERNEL_NIC_PRIO_NUM_LEVELS))),
188 		  ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
189 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
190 				  ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS,
191 						    LEFTOVERS_NUM_LEVELS))),
192 		  ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
193 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
194 				  ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS,
195 						    ANCHOR_NUM_LEVELS))),
196 	}
197 };
198 
199 static struct init_tree_node egress_root_fs = {
200 	.type = FS_TYPE_NAMESPACE,
201 	.ar_size = 3,
202 	.children = (struct init_tree_node[]) {
203 		ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
204 			 FS_CHAINING_CAPS_EGRESS,
205 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
206 				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
207 						  BY_PASS_PRIO_NUM_LEVELS))),
208 		ADD_PRIO(0, KERNEL_TX_IPSEC_MIN_LEVEL, 0,
209 			 FS_CHAINING_CAPS_EGRESS,
210 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
211 				ADD_MULTIPLE_PRIO(KERNEL_TX_IPSEC_NUM_PRIOS,
212 						  KERNEL_TX_IPSEC_NUM_LEVELS))),
213 		ADD_PRIO(0, KERNEL_TX_MACSEC_MIN_LEVEL, 0,
214 			 FS_CHAINING_CAPS_EGRESS,
215 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
216 				ADD_MULTIPLE_PRIO(KERNEL_TX_MACSEC_NUM_PRIOS,
217 						  KERNEL_TX_MACSEC_NUM_LEVELS))),
218 	}
219 };
220 
221 enum {
222 	RDMA_RX_COUNTERS_PRIO,
223 	RDMA_RX_BYPASS_PRIO,
224 	RDMA_RX_KERNEL_PRIO,
225 };
226 
227 #define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS
228 #define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1)
229 #define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2)
230 
231 static struct init_tree_node rdma_rx_root_fs = {
232 	.type = FS_TYPE_NAMESPACE,
233 	.ar_size = 3,
234 	.children = (struct init_tree_node[]) {
235 		[RDMA_RX_COUNTERS_PRIO] =
236 		ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0,
237 			 FS_CHAINING_CAPS,
238 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
239 				ADD_MULTIPLE_PRIO(MLX5_RDMA_RX_NUM_COUNTERS_PRIOS,
240 						  RDMA_RX_COUNTERS_PRIO_NUM_LEVELS))),
241 		[RDMA_RX_BYPASS_PRIO] =
242 		ADD_PRIO(0, RDMA_RX_BYPASS_MIN_LEVEL, 0,
243 			 FS_CHAINING_CAPS,
244 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
245 				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS,
246 						  BY_PASS_PRIO_NUM_LEVELS))),
247 		[RDMA_RX_KERNEL_PRIO] =
248 		ADD_PRIO(0, RDMA_RX_KERNEL_MIN_LEVEL, 0,
249 			 FS_CHAINING_CAPS,
250 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
251 				ADD_MULTIPLE_PRIO(1, 1))),
252 	}
253 };
254 
255 enum {
256 	RDMA_TX_COUNTERS_PRIO,
257 	RDMA_TX_BYPASS_PRIO,
258 };
259 
260 #define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS
261 #define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1)
262 
263 static struct init_tree_node rdma_tx_root_fs = {
264 	.type = FS_TYPE_NAMESPACE,
265 	.ar_size = 2,
266 	.children = (struct init_tree_node[]) {
267 		[RDMA_TX_COUNTERS_PRIO] =
268 		ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0,
269 			 FS_CHAINING_CAPS,
270 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
271 				ADD_MULTIPLE_PRIO(MLX5_RDMA_TX_NUM_COUNTERS_PRIOS,
272 						  RDMA_TX_COUNTERS_PRIO_NUM_LEVELS))),
273 		[RDMA_TX_BYPASS_PRIO] =
274 		ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0,
275 			 FS_CHAINING_CAPS_RDMA_TX,
276 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
277 				ADD_MULTIPLE_PRIO(RDMA_TX_BYPASS_MIN_LEVEL,
278 						  BY_PASS_PRIO_NUM_LEVELS))),
279 	}
280 };
281 
282 enum fs_i_lock_class {
283 	FS_LOCK_GRANDPARENT,
284 	FS_LOCK_PARENT,
285 	FS_LOCK_CHILD
286 };
287 
288 static const struct rhashtable_params rhash_fte = {
289 	.key_len = sizeof_field(struct fs_fte, val),
290 	.key_offset = offsetof(struct fs_fte, val),
291 	.head_offset = offsetof(struct fs_fte, hash),
292 	.automatic_shrinking = true,
293 	.min_size = 1,
294 };
295 
296 static const struct rhashtable_params rhash_fg = {
297 	.key_len = sizeof_field(struct mlx5_flow_group, mask),
298 	.key_offset = offsetof(struct mlx5_flow_group, mask),
299 	.head_offset = offsetof(struct mlx5_flow_group, hash),
300 	.automatic_shrinking = true,
301 	.min_size = 1,
302 
303 };
304 
305 static void del_hw_flow_table(struct fs_node *node);
306 static void del_hw_flow_group(struct fs_node *node);
307 static void del_hw_fte(struct fs_node *node);
308 static void del_sw_flow_table(struct fs_node *node);
309 static void del_sw_flow_group(struct fs_node *node);
310 static void del_sw_fte(struct fs_node *node);
311 static void del_sw_prio(struct fs_node *node);
312 static void del_sw_ns(struct fs_node *node);
313 /* Delete rule (destination) is special case that
314  * requires to lock the FTE for all the deletion process.
315  */
316 static void del_sw_hw_rule(struct fs_node *node);
317 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
318 				struct mlx5_flow_destination *d2);
319 static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns);
320 static struct mlx5_flow_rule *
321 find_flow_rule(struct fs_fte *fte,
322 	       struct mlx5_flow_destination *dest);
323 
324 static void tree_init_node(struct fs_node *node,
325 			   void (*del_hw_func)(struct fs_node *),
326 			   void (*del_sw_func)(struct fs_node *))
327 {
328 	refcount_set(&node->refcount, 1);
329 	INIT_LIST_HEAD(&node->list);
330 	INIT_LIST_HEAD(&node->children);
331 	init_rwsem(&node->lock);
332 	node->del_hw_func = del_hw_func;
333 	node->del_sw_func = del_sw_func;
334 	node->active = false;
335 }
336 
337 static void tree_add_node(struct fs_node *node, struct fs_node *parent)
338 {
339 	if (parent)
340 		refcount_inc(&parent->refcount);
341 	node->parent = parent;
342 
343 	/* Parent is the root */
344 	if (!parent)
345 		node->root = node;
346 	else
347 		node->root = parent->root;
348 }
349 
350 static int tree_get_node(struct fs_node *node)
351 {
352 	return refcount_inc_not_zero(&node->refcount);
353 }
354 
355 static void nested_down_read_ref_node(struct fs_node *node,
356 				      enum fs_i_lock_class class)
357 {
358 	if (node) {
359 		down_read_nested(&node->lock, class);
360 		refcount_inc(&node->refcount);
361 	}
362 }
363 
364 static void nested_down_write_ref_node(struct fs_node *node,
365 				       enum fs_i_lock_class class)
366 {
367 	if (node) {
368 		down_write_nested(&node->lock, class);
369 		refcount_inc(&node->refcount);
370 	}
371 }
372 
373 static void down_write_ref_node(struct fs_node *node, bool locked)
374 {
375 	if (node) {
376 		if (!locked)
377 			down_write(&node->lock);
378 		refcount_inc(&node->refcount);
379 	}
380 }
381 
382 static void up_read_ref_node(struct fs_node *node)
383 {
384 	refcount_dec(&node->refcount);
385 	up_read(&node->lock);
386 }
387 
388 static void up_write_ref_node(struct fs_node *node, bool locked)
389 {
390 	refcount_dec(&node->refcount);
391 	if (!locked)
392 		up_write(&node->lock);
393 }
394 
395 static void tree_put_node(struct fs_node *node, bool locked)
396 {
397 	struct fs_node *parent_node = node->parent;
398 
399 	if (refcount_dec_and_test(&node->refcount)) {
400 		if (node->del_hw_func)
401 			node->del_hw_func(node);
402 		if (parent_node) {
403 			down_write_ref_node(parent_node, locked);
404 			list_del_init(&node->list);
405 		}
406 		node->del_sw_func(node);
407 		if (parent_node)
408 			up_write_ref_node(parent_node, locked);
409 		node = NULL;
410 	}
411 	if (!node && parent_node)
412 		tree_put_node(parent_node, locked);
413 }
414 
415 static int tree_remove_node(struct fs_node *node, bool locked)
416 {
417 	if (refcount_read(&node->refcount) > 1) {
418 		refcount_dec(&node->refcount);
419 		return -EEXIST;
420 	}
421 	tree_put_node(node, locked);
422 	return 0;
423 }
424 
425 static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
426 				 unsigned int prio)
427 {
428 	struct fs_prio *iter_prio;
429 
430 	fs_for_each_prio(iter_prio, ns) {
431 		if (iter_prio->prio == prio)
432 			return iter_prio;
433 	}
434 
435 	return NULL;
436 }
437 
438 static bool is_fwd_next_action(u32 action)
439 {
440 	return action & (MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
441 			 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
442 }
443 
444 static bool is_fwd_dest_type(enum mlx5_flow_destination_type type)
445 {
446 	return type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM ||
447 		type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE ||
448 		type == MLX5_FLOW_DESTINATION_TYPE_UPLINK ||
449 		type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
450 		type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER ||
451 		type == MLX5_FLOW_DESTINATION_TYPE_TIR;
452 }
453 
454 static bool check_valid_spec(const struct mlx5_flow_spec *spec)
455 {
456 	int i;
457 
458 	for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
459 		if (spec->match_value[i] & ~spec->match_criteria[i]) {
460 			pr_warn("mlx5_core: match_value differs from match_criteria\n");
461 			return false;
462 		}
463 
464 	return true;
465 }
466 
467 struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
468 {
469 	struct fs_node *root;
470 	struct mlx5_flow_namespace *ns;
471 
472 	root = node->root;
473 
474 	if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) {
475 		pr_warn("mlx5: flow steering node is not in tree or garbaged\n");
476 		return NULL;
477 	}
478 
479 	ns = container_of(root, struct mlx5_flow_namespace, node);
480 	return container_of(ns, struct mlx5_flow_root_namespace, ns);
481 }
482 
483 static inline struct mlx5_flow_steering *get_steering(struct fs_node *node)
484 {
485 	struct mlx5_flow_root_namespace *root = find_root(node);
486 
487 	if (root)
488 		return root->dev->priv.steering;
489 	return NULL;
490 }
491 
492 static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
493 {
494 	struct mlx5_flow_root_namespace *root = find_root(node);
495 
496 	if (root)
497 		return root->dev;
498 	return NULL;
499 }
500 
501 static void del_sw_ns(struct fs_node *node)
502 {
503 	kfree(node);
504 }
505 
506 static void del_sw_prio(struct fs_node *node)
507 {
508 	kfree(node);
509 }
510 
511 static void del_hw_flow_table(struct fs_node *node)
512 {
513 	struct mlx5_flow_root_namespace *root;
514 	struct mlx5_flow_table *ft;
515 	struct mlx5_core_dev *dev;
516 	int err;
517 
518 	fs_get_obj(ft, node);
519 	dev = get_dev(&ft->node);
520 	root = find_root(&ft->node);
521 	trace_mlx5_fs_del_ft(ft);
522 
523 	if (node->active) {
524 		err = root->cmds->destroy_flow_table(root, ft);
525 		if (err)
526 			mlx5_core_warn(dev, "flow steering can't destroy ft\n");
527 	}
528 }
529 
530 static void del_sw_flow_table(struct fs_node *node)
531 {
532 	struct mlx5_flow_table *ft;
533 	struct fs_prio *prio;
534 
535 	fs_get_obj(ft, node);
536 
537 	rhltable_destroy(&ft->fgs_hash);
538 	if (ft->node.parent) {
539 		fs_get_obj(prio, ft->node.parent);
540 		prio->num_ft--;
541 	}
542 	kfree(ft);
543 }
544 
545 static void modify_fte(struct fs_fte *fte)
546 {
547 	struct mlx5_flow_root_namespace *root;
548 	struct mlx5_flow_table *ft;
549 	struct mlx5_flow_group *fg;
550 	struct mlx5_core_dev *dev;
551 	int err;
552 
553 	fs_get_obj(fg, fte->node.parent);
554 	fs_get_obj(ft, fg->node.parent);
555 	dev = get_dev(&fte->node);
556 
557 	root = find_root(&ft->node);
558 	err = root->cmds->update_fte(root, ft, fg, fte->modify_mask, fte);
559 	if (err)
560 		mlx5_core_warn(dev,
561 			       "%s can't del rule fg id=%d fte_index=%d\n",
562 			       __func__, fg->id, fte->index);
563 	fte->modify_mask = 0;
564 }
565 
566 static void del_sw_hw_rule(struct fs_node *node)
567 {
568 	struct mlx5_flow_rule *rule;
569 	struct fs_fte *fte;
570 
571 	fs_get_obj(rule, node);
572 	fs_get_obj(fte, rule->node.parent);
573 	trace_mlx5_fs_del_rule(rule);
574 	if (is_fwd_next_action(rule->sw_action)) {
575 		mutex_lock(&rule->dest_attr.ft->lock);
576 		list_del(&rule->next_ft);
577 		mutex_unlock(&rule->dest_attr.ft->lock);
578 	}
579 
580 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) {
581 		--fte->dests_size;
582 		fte->modify_mask |=
583 			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
584 			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
585 		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
586 		goto out;
587 	}
588 
589 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
590 		--fte->dests_size;
591 		fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
592 		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW;
593 		goto out;
594 	}
595 
596 	if (is_fwd_dest_type(rule->dest_attr.type)) {
597 		--fte->dests_size;
598 		--fte->fwd_dests;
599 
600 		if (!fte->fwd_dests)
601 			fte->action.action &=
602 				~MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
603 		fte->modify_mask |=
604 			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
605 		goto out;
606 	}
607 out:
608 	kfree(rule);
609 }
610 
611 static void del_hw_fte(struct fs_node *node)
612 {
613 	struct mlx5_flow_root_namespace *root;
614 	struct mlx5_flow_table *ft;
615 	struct mlx5_flow_group *fg;
616 	struct mlx5_core_dev *dev;
617 	struct fs_fte *fte;
618 	int err;
619 
620 	fs_get_obj(fte, node);
621 	fs_get_obj(fg, fte->node.parent);
622 	fs_get_obj(ft, fg->node.parent);
623 
624 	trace_mlx5_fs_del_fte(fte);
625 	WARN_ON(fte->dests_size);
626 	dev = get_dev(&ft->node);
627 	root = find_root(&ft->node);
628 	if (node->active) {
629 		err = root->cmds->delete_fte(root, ft, fte);
630 		if (err)
631 			mlx5_core_warn(dev,
632 				       "flow steering can't delete fte in index %d of flow group id %d\n",
633 				       fte->index, fg->id);
634 		node->active = false;
635 	}
636 }
637 
638 static void del_sw_fte(struct fs_node *node)
639 {
640 	struct mlx5_flow_steering *steering = get_steering(node);
641 	struct mlx5_flow_group *fg;
642 	struct fs_fte *fte;
643 	int err;
644 
645 	fs_get_obj(fte, node);
646 	fs_get_obj(fg, fte->node.parent);
647 
648 	err = rhashtable_remove_fast(&fg->ftes_hash,
649 				     &fte->hash,
650 				     rhash_fte);
651 	WARN_ON(err);
652 	ida_free(&fg->fte_allocator, fte->index - fg->start_index);
653 	kmem_cache_free(steering->ftes_cache, fte);
654 }
655 
656 static void del_hw_flow_group(struct fs_node *node)
657 {
658 	struct mlx5_flow_root_namespace *root;
659 	struct mlx5_flow_group *fg;
660 	struct mlx5_flow_table *ft;
661 	struct mlx5_core_dev *dev;
662 
663 	fs_get_obj(fg, node);
664 	fs_get_obj(ft, fg->node.parent);
665 	dev = get_dev(&ft->node);
666 	trace_mlx5_fs_del_fg(fg);
667 
668 	root = find_root(&ft->node);
669 	if (fg->node.active && root->cmds->destroy_flow_group(root, ft, fg))
670 		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
671 			       fg->id, ft->id);
672 }
673 
674 static void del_sw_flow_group(struct fs_node *node)
675 {
676 	struct mlx5_flow_steering *steering = get_steering(node);
677 	struct mlx5_flow_group *fg;
678 	struct mlx5_flow_table *ft;
679 	int err;
680 
681 	fs_get_obj(fg, node);
682 	fs_get_obj(ft, fg->node.parent);
683 
684 	rhashtable_destroy(&fg->ftes_hash);
685 	ida_destroy(&fg->fte_allocator);
686 	if (ft->autogroup.active &&
687 	    fg->max_ftes == ft->autogroup.group_size &&
688 	    fg->start_index < ft->autogroup.max_fte)
689 		ft->autogroup.num_groups--;
690 	err = rhltable_remove(&ft->fgs_hash,
691 			      &fg->hash,
692 			      rhash_fg);
693 	WARN_ON(err);
694 	kmem_cache_free(steering->fgs_cache, fg);
695 }
696 
697 static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
698 {
699 	int index;
700 	int ret;
701 
702 	index = ida_alloc_max(&fg->fte_allocator, fg->max_ftes - 1, GFP_KERNEL);
703 	if (index < 0)
704 		return index;
705 
706 	fte->index = index + fg->start_index;
707 	ret = rhashtable_insert_fast(&fg->ftes_hash,
708 				     &fte->hash,
709 				     rhash_fte);
710 	if (ret)
711 		goto err_ida_remove;
712 
713 	tree_add_node(&fte->node, &fg->node);
714 	list_add_tail(&fte->node.list, &fg->node.children);
715 	return 0;
716 
717 err_ida_remove:
718 	ida_free(&fg->fte_allocator, index);
719 	return ret;
720 }
721 
722 static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
723 				const struct mlx5_flow_spec *spec,
724 				struct mlx5_flow_act *flow_act)
725 {
726 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
727 	struct fs_fte *fte;
728 
729 	fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL);
730 	if (!fte)
731 		return ERR_PTR(-ENOMEM);
732 
733 	memcpy(fte->val, &spec->match_value, sizeof(fte->val));
734 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
735 	fte->action = *flow_act;
736 	fte->flow_context = spec->flow_context;
737 
738 	tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
739 
740 	return fte;
741 }
742 
743 static void dealloc_flow_group(struct mlx5_flow_steering *steering,
744 			       struct mlx5_flow_group *fg)
745 {
746 	rhashtable_destroy(&fg->ftes_hash);
747 	kmem_cache_free(steering->fgs_cache, fg);
748 }
749 
750 static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
751 						u8 match_criteria_enable,
752 						const void *match_criteria,
753 						int start_index,
754 						int end_index)
755 {
756 	struct mlx5_flow_group *fg;
757 	int ret;
758 
759 	fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL);
760 	if (!fg)
761 		return ERR_PTR(-ENOMEM);
762 
763 	ret = rhashtable_init(&fg->ftes_hash, &rhash_fte);
764 	if (ret) {
765 		kmem_cache_free(steering->fgs_cache, fg);
766 		return ERR_PTR(ret);
767 	}
768 
769 	ida_init(&fg->fte_allocator);
770 	fg->mask.match_criteria_enable = match_criteria_enable;
771 	memcpy(&fg->mask.match_criteria, match_criteria,
772 	       sizeof(fg->mask.match_criteria));
773 	fg->node.type =  FS_TYPE_FLOW_GROUP;
774 	fg->start_index = start_index;
775 	fg->max_ftes = end_index - start_index + 1;
776 
777 	return fg;
778 }
779 
780 static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
781 						       u8 match_criteria_enable,
782 						       const void *match_criteria,
783 						       int start_index,
784 						       int end_index,
785 						       struct list_head *prev)
786 {
787 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
788 	struct mlx5_flow_group *fg;
789 	int ret;
790 
791 	fg = alloc_flow_group(steering, match_criteria_enable, match_criteria,
792 			      start_index, end_index);
793 	if (IS_ERR(fg))
794 		return fg;
795 
796 	/* initialize refcnt, add to parent list */
797 	ret = rhltable_insert(&ft->fgs_hash,
798 			      &fg->hash,
799 			      rhash_fg);
800 	if (ret) {
801 		dealloc_flow_group(steering, fg);
802 		return ERR_PTR(ret);
803 	}
804 
805 	tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group);
806 	tree_add_node(&fg->node, &ft->node);
807 	/* Add node to group list */
808 	list_add(&fg->node.list, prev);
809 	atomic_inc(&ft->node.version);
810 
811 	return fg;
812 }
813 
814 static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport,
815 						enum fs_flow_table_type table_type,
816 						enum fs_flow_table_op_mod op_mod,
817 						u32 flags)
818 {
819 	struct mlx5_flow_table *ft;
820 	int ret;
821 
822 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
823 	if (!ft)
824 		return ERR_PTR(-ENOMEM);
825 
826 	ret = rhltable_init(&ft->fgs_hash, &rhash_fg);
827 	if (ret) {
828 		kfree(ft);
829 		return ERR_PTR(ret);
830 	}
831 
832 	ft->level = level;
833 	ft->node.type = FS_TYPE_FLOW_TABLE;
834 	ft->op_mod = op_mod;
835 	ft->type = table_type;
836 	ft->vport = vport;
837 	ft->flags = flags;
838 	INIT_LIST_HEAD(&ft->fwd_rules);
839 	mutex_init(&ft->lock);
840 
841 	return ft;
842 }
843 
844 /* If reverse is false, then we search for the first flow table in the
845  * root sub-tree from start(closest from right), else we search for the
846  * last flow table in the root sub-tree till start(closest from left).
847  */
848 static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
849 							 struct list_head *start,
850 							 bool reverse)
851 {
852 #define list_advance_entry(pos, reverse)		\
853 	((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
854 
855 #define list_for_each_advance_continue(pos, head, reverse)	\
856 	for (pos = list_advance_entry(pos, reverse);		\
857 	     &pos->list != (head);				\
858 	     pos = list_advance_entry(pos, reverse))
859 
860 	struct fs_node *iter = list_entry(start, struct fs_node, list);
861 	struct mlx5_flow_table *ft = NULL;
862 
863 	if (!root || root->type == FS_TYPE_PRIO_CHAINS)
864 		return NULL;
865 
866 	list_for_each_advance_continue(iter, &root->children, reverse) {
867 		if (iter->type == FS_TYPE_FLOW_TABLE) {
868 			fs_get_obj(ft, iter);
869 			return ft;
870 		}
871 		ft = find_closest_ft_recursive(iter, &iter->children, reverse);
872 		if (ft)
873 			return ft;
874 	}
875 
876 	return ft;
877 }
878 
879 /* If reverse is false then return the first flow table in next priority of
880  * prio in the tree, else return the last flow table in the previous priority
881  * of prio in the tree.
882  */
883 static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
884 {
885 	struct mlx5_flow_table *ft = NULL;
886 	struct fs_node *curr_node;
887 	struct fs_node *parent;
888 
889 	parent = prio->node.parent;
890 	curr_node = &prio->node;
891 	while (!ft && parent) {
892 		ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
893 		curr_node = parent;
894 		parent = curr_node->parent;
895 	}
896 	return ft;
897 }
898 
899 /* Assuming all the tree is locked by mutex chain lock */
900 static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
901 {
902 	return find_closest_ft(prio, false);
903 }
904 
905 /* Assuming all the tree is locked by mutex chain lock */
906 static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
907 {
908 	return find_closest_ft(prio, true);
909 }
910 
911 static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
912 						struct mlx5_flow_act *flow_act)
913 {
914 	struct fs_prio *prio;
915 	bool next_ns;
916 
917 	next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
918 	fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent);
919 
920 	return find_next_chained_ft(prio);
921 }
922 
923 static int connect_fts_in_prio(struct mlx5_core_dev *dev,
924 			       struct fs_prio *prio,
925 			       struct mlx5_flow_table *ft)
926 {
927 	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
928 	struct mlx5_flow_table *iter;
929 	int err;
930 
931 	fs_for_each_ft(iter, prio) {
932 		err = root->cmds->modify_flow_table(root, iter, ft);
933 		if (err) {
934 			mlx5_core_err(dev,
935 				      "Failed to modify flow table id %d, type %d, err %d\n",
936 				      iter->id, iter->type, err);
937 			/* The driver is out of sync with the FW */
938 			return err;
939 		}
940 	}
941 	return 0;
942 }
943 
944 /* Connect flow tables from previous priority of prio to ft */
945 static int connect_prev_fts(struct mlx5_core_dev *dev,
946 			    struct mlx5_flow_table *ft,
947 			    struct fs_prio *prio)
948 {
949 	struct mlx5_flow_table *prev_ft;
950 
951 	prev_ft = find_prev_chained_ft(prio);
952 	if (prev_ft) {
953 		struct fs_prio *prev_prio;
954 
955 		fs_get_obj(prev_prio, prev_ft->node.parent);
956 		return connect_fts_in_prio(dev, prev_prio, ft);
957 	}
958 	return 0;
959 }
960 
961 static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
962 				 *prio)
963 {
964 	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
965 	struct mlx5_ft_underlay_qp *uqp;
966 	int min_level = INT_MAX;
967 	int err = 0;
968 	u32 qpn;
969 
970 	if (root->root_ft)
971 		min_level = root->root_ft->level;
972 
973 	if (ft->level >= min_level)
974 		return 0;
975 
976 	if (list_empty(&root->underlay_qpns)) {
977 		/* Don't set any QPN (zero) in case QPN list is empty */
978 		qpn = 0;
979 		err = root->cmds->update_root_ft(root, ft, qpn, false);
980 	} else {
981 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
982 			qpn = uqp->qpn;
983 			err = root->cmds->update_root_ft(root, ft,
984 							 qpn, false);
985 			if (err)
986 				break;
987 		}
988 	}
989 
990 	if (err)
991 		mlx5_core_warn(root->dev,
992 			       "Update root flow table of id(%u) qpn(%d) failed\n",
993 			       ft->id, qpn);
994 	else
995 		root->root_ft = ft;
996 
997 	return err;
998 }
999 
1000 static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
1001 					 struct mlx5_flow_destination *dest)
1002 {
1003 	struct mlx5_flow_root_namespace *root;
1004 	struct mlx5_flow_table *ft;
1005 	struct mlx5_flow_group *fg;
1006 	struct fs_fte *fte;
1007 	int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
1008 	int err = 0;
1009 
1010 	fs_get_obj(fte, rule->node.parent);
1011 	if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
1012 		return -EINVAL;
1013 	down_write_ref_node(&fte->node, false);
1014 	fs_get_obj(fg, fte->node.parent);
1015 	fs_get_obj(ft, fg->node.parent);
1016 
1017 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
1018 	root = find_root(&ft->node);
1019 	err = root->cmds->update_fte(root, ft, fg,
1020 				     modify_mask, fte);
1021 	up_write_ref_node(&fte->node, false);
1022 
1023 	return err;
1024 }
1025 
1026 int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
1027 				 struct mlx5_flow_destination *new_dest,
1028 				 struct mlx5_flow_destination *old_dest)
1029 {
1030 	int i;
1031 
1032 	if (!old_dest) {
1033 		if (handle->num_rules != 1)
1034 			return -EINVAL;
1035 		return _mlx5_modify_rule_destination(handle->rule[0],
1036 						     new_dest);
1037 	}
1038 
1039 	for (i = 0; i < handle->num_rules; i++) {
1040 		if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
1041 			return _mlx5_modify_rule_destination(handle->rule[i],
1042 							     new_dest);
1043 	}
1044 
1045 	return -EINVAL;
1046 }
1047 
1048 /* Modify/set FWD rules that point on old_next_ft to point on new_next_ft  */
1049 static int connect_fwd_rules(struct mlx5_core_dev *dev,
1050 			     struct mlx5_flow_table *new_next_ft,
1051 			     struct mlx5_flow_table *old_next_ft)
1052 {
1053 	struct mlx5_flow_destination dest = {};
1054 	struct mlx5_flow_rule *iter;
1055 	int err = 0;
1056 
1057 	/* new_next_ft and old_next_ft could be NULL only
1058 	 * when we create/destroy the anchor flow table.
1059 	 */
1060 	if (!new_next_ft || !old_next_ft)
1061 		return 0;
1062 
1063 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1064 	dest.ft = new_next_ft;
1065 
1066 	mutex_lock(&old_next_ft->lock);
1067 	list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
1068 	mutex_unlock(&old_next_ft->lock);
1069 	list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
1070 		if ((iter->sw_action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS) &&
1071 		    iter->ft->ns == new_next_ft->ns)
1072 			continue;
1073 
1074 		err = _mlx5_modify_rule_destination(iter, &dest);
1075 		if (err)
1076 			pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
1077 			       new_next_ft->id);
1078 	}
1079 	return 0;
1080 }
1081 
1082 static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
1083 			      struct fs_prio *prio)
1084 {
1085 	struct mlx5_flow_table *next_ft, *first_ft;
1086 	int err = 0;
1087 
1088 	/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
1089 
1090 	first_ft = list_first_entry_or_null(&prio->node.children,
1091 					    struct mlx5_flow_table, node.list);
1092 	if (!first_ft || first_ft->level > ft->level) {
1093 		err = connect_prev_fts(dev, ft, prio);
1094 		if (err)
1095 			return err;
1096 
1097 		next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
1098 		err = connect_fwd_rules(dev, ft, next_ft);
1099 		if (err)
1100 			return err;
1101 	}
1102 
1103 	if (MLX5_CAP_FLOWTABLE(dev,
1104 			       flow_table_properties_nic_receive.modify_root))
1105 		err = update_root_ft_create(ft, prio);
1106 	return err;
1107 }
1108 
1109 static void list_add_flow_table(struct mlx5_flow_table *ft,
1110 				struct fs_prio *prio)
1111 {
1112 	struct list_head *prev = &prio->node.children;
1113 	struct mlx5_flow_table *iter;
1114 
1115 	fs_for_each_ft(iter, prio) {
1116 		if (iter->level > ft->level)
1117 			break;
1118 		prev = &iter->node.list;
1119 	}
1120 	list_add(&ft->node.list, prev);
1121 }
1122 
1123 static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
1124 							struct mlx5_flow_table_attr *ft_attr,
1125 							enum fs_flow_table_op_mod op_mod,
1126 							u16 vport)
1127 {
1128 	struct mlx5_flow_root_namespace *root = find_root(&ns->node);
1129 	bool unmanaged = ft_attr->flags & MLX5_FLOW_TABLE_UNMANAGED;
1130 	struct mlx5_flow_table *next_ft;
1131 	struct fs_prio *fs_prio = NULL;
1132 	struct mlx5_flow_table *ft;
1133 	int err;
1134 
1135 	if (!root) {
1136 		pr_err("mlx5: flow steering failed to find root of namespace\n");
1137 		return ERR_PTR(-ENODEV);
1138 	}
1139 
1140 	mutex_lock(&root->chain_lock);
1141 	fs_prio = find_prio(ns, ft_attr->prio);
1142 	if (!fs_prio) {
1143 		err = -EINVAL;
1144 		goto unlock_root;
1145 	}
1146 	if (!unmanaged) {
1147 		/* The level is related to the
1148 		 * priority level range.
1149 		 */
1150 		if (ft_attr->level >= fs_prio->num_levels) {
1151 			err = -ENOSPC;
1152 			goto unlock_root;
1153 		}
1154 
1155 		ft_attr->level += fs_prio->start_level;
1156 	}
1157 
1158 	/* The level is related to the
1159 	 * priority level range.
1160 	 */
1161 	ft = alloc_flow_table(ft_attr->level,
1162 			      vport,
1163 			      root->table_type,
1164 			      op_mod, ft_attr->flags);
1165 	if (IS_ERR(ft)) {
1166 		err = PTR_ERR(ft);
1167 		goto unlock_root;
1168 	}
1169 
1170 	tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
1171 	next_ft = unmanaged ? ft_attr->next_ft :
1172 			      find_next_chained_ft(fs_prio);
1173 	ft->def_miss_action = ns->def_miss_action;
1174 	ft->ns = ns;
1175 	err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft);
1176 	if (err)
1177 		goto free_ft;
1178 
1179 	if (!unmanaged) {
1180 		err = connect_flow_table(root->dev, ft, fs_prio);
1181 		if (err)
1182 			goto destroy_ft;
1183 	}
1184 
1185 	ft->node.active = true;
1186 	down_write_ref_node(&fs_prio->node, false);
1187 	if (!unmanaged) {
1188 		tree_add_node(&ft->node, &fs_prio->node);
1189 		list_add_flow_table(ft, fs_prio);
1190 	} else {
1191 		ft->node.root = fs_prio->node.root;
1192 	}
1193 	fs_prio->num_ft++;
1194 	up_write_ref_node(&fs_prio->node, false);
1195 	mutex_unlock(&root->chain_lock);
1196 	trace_mlx5_fs_add_ft(ft);
1197 	return ft;
1198 destroy_ft:
1199 	root->cmds->destroy_flow_table(root, ft);
1200 free_ft:
1201 	rhltable_destroy(&ft->fgs_hash);
1202 	kfree(ft);
1203 unlock_root:
1204 	mutex_unlock(&root->chain_lock);
1205 	return ERR_PTR(err);
1206 }
1207 
1208 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
1209 					       struct mlx5_flow_table_attr *ft_attr)
1210 {
1211 	return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0);
1212 }
1213 EXPORT_SYMBOL(mlx5_create_flow_table);
1214 
1215 u32 mlx5_flow_table_id(struct mlx5_flow_table *ft)
1216 {
1217 	return ft->id;
1218 }
1219 EXPORT_SYMBOL(mlx5_flow_table_id);
1220 
1221 struct mlx5_flow_table *
1222 mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
1223 			     struct mlx5_flow_table_attr *ft_attr, u16 vport)
1224 {
1225 	return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, vport);
1226 }
1227 
1228 struct mlx5_flow_table*
1229 mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
1230 				 int prio, u32 level)
1231 {
1232 	struct mlx5_flow_table_attr ft_attr = {};
1233 
1234 	ft_attr.level = level;
1235 	ft_attr.prio  = prio;
1236 	ft_attr.max_fte = 1;
1237 
1238 	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
1239 }
1240 EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
1241 
1242 #define MAX_FLOW_GROUP_SIZE BIT(24)
1243 struct mlx5_flow_table*
1244 mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
1245 				    struct mlx5_flow_table_attr *ft_attr)
1246 {
1247 	int num_reserved_entries = ft_attr->autogroup.num_reserved_entries;
1248 	int max_num_groups = ft_attr->autogroup.max_num_groups;
1249 	struct mlx5_flow_table *ft;
1250 	int autogroups_max_fte;
1251 
1252 	ft = mlx5_create_flow_table(ns, ft_attr);
1253 	if (IS_ERR(ft))
1254 		return ft;
1255 
1256 	autogroups_max_fte = ft->max_fte - num_reserved_entries;
1257 	if (max_num_groups > autogroups_max_fte)
1258 		goto err_validate;
1259 	if (num_reserved_entries > ft->max_fte)
1260 		goto err_validate;
1261 
1262 	/* Align the number of groups according to the largest group size */
1263 	if (autogroups_max_fte / (max_num_groups + 1) > MAX_FLOW_GROUP_SIZE)
1264 		max_num_groups = (autogroups_max_fte / MAX_FLOW_GROUP_SIZE) - 1;
1265 
1266 	ft->autogroup.active = true;
1267 	ft->autogroup.required_groups = max_num_groups;
1268 	ft->autogroup.max_fte = autogroups_max_fte;
1269 	/* We save place for flow groups in addition to max types */
1270 	ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1);
1271 
1272 	return ft;
1273 
1274 err_validate:
1275 	mlx5_destroy_flow_table(ft);
1276 	return ERR_PTR(-ENOSPC);
1277 }
1278 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
1279 
1280 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
1281 					       u32 *fg_in)
1282 {
1283 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1284 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
1285 					    fg_in, match_criteria);
1286 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
1287 					    fg_in,
1288 					    match_criteria_enable);
1289 	int start_index = MLX5_GET(create_flow_group_in, fg_in,
1290 				   start_flow_index);
1291 	int end_index = MLX5_GET(create_flow_group_in, fg_in,
1292 				 end_flow_index);
1293 	struct mlx5_flow_group *fg;
1294 	int err;
1295 
1296 	if (ft->autogroup.active && start_index < ft->autogroup.max_fte)
1297 		return ERR_PTR(-EPERM);
1298 
1299 	down_write_ref_node(&ft->node, false);
1300 	fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria,
1301 				     start_index, end_index,
1302 				     ft->node.children.prev);
1303 	up_write_ref_node(&ft->node, false);
1304 	if (IS_ERR(fg))
1305 		return fg;
1306 
1307 	err = root->cmds->create_flow_group(root, ft, fg_in, fg);
1308 	if (err) {
1309 		tree_put_node(&fg->node, false);
1310 		return ERR_PTR(err);
1311 	}
1312 	trace_mlx5_fs_add_fg(fg);
1313 	fg->node.active = true;
1314 
1315 	return fg;
1316 }
1317 EXPORT_SYMBOL(mlx5_create_flow_group);
1318 
1319 static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
1320 {
1321 	struct mlx5_flow_rule *rule;
1322 
1323 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
1324 	if (!rule)
1325 		return NULL;
1326 
1327 	INIT_LIST_HEAD(&rule->next_ft);
1328 	rule->node.type = FS_TYPE_FLOW_DEST;
1329 	if (dest)
1330 		memcpy(&rule->dest_attr, dest, sizeof(*dest));
1331 	else
1332 		rule->dest_attr.type = MLX5_FLOW_DESTINATION_TYPE_NONE;
1333 
1334 	return rule;
1335 }
1336 
1337 static struct mlx5_flow_handle *alloc_handle(int num_rules)
1338 {
1339 	struct mlx5_flow_handle *handle;
1340 
1341 	handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL);
1342 	if (!handle)
1343 		return NULL;
1344 
1345 	handle->num_rules = num_rules;
1346 
1347 	return handle;
1348 }
1349 
1350 static void destroy_flow_handle(struct fs_fte *fte,
1351 				struct mlx5_flow_handle *handle,
1352 				struct mlx5_flow_destination *dest,
1353 				int i)
1354 {
1355 	for (; --i >= 0;) {
1356 		if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) {
1357 			fte->dests_size--;
1358 			list_del(&handle->rule[i]->node.list);
1359 			kfree(handle->rule[i]);
1360 		}
1361 	}
1362 	kfree(handle);
1363 }
1364 
1365 static struct mlx5_flow_handle *
1366 create_flow_handle(struct fs_fte *fte,
1367 		   struct mlx5_flow_destination *dest,
1368 		   int dest_num,
1369 		   int *modify_mask,
1370 		   bool *new_rule)
1371 {
1372 	struct mlx5_flow_handle *handle;
1373 	struct mlx5_flow_rule *rule = NULL;
1374 	static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
1375 	static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
1376 	int type;
1377 	int i = 0;
1378 
1379 	handle = alloc_handle((dest_num) ? dest_num : 1);
1380 	if (!handle)
1381 		return ERR_PTR(-ENOMEM);
1382 
1383 	do {
1384 		if (dest) {
1385 			rule = find_flow_rule(fte, dest + i);
1386 			if (rule) {
1387 				refcount_inc(&rule->node.refcount);
1388 				goto rule_found;
1389 			}
1390 		}
1391 
1392 		*new_rule = true;
1393 		rule = alloc_rule(dest + i);
1394 		if (!rule)
1395 			goto free_rules;
1396 
1397 		/* Add dest to dests list- we need flow tables to be in the
1398 		 * end of the list for forward to next prio rules.
1399 		 */
1400 		tree_init_node(&rule->node, NULL, del_sw_hw_rule);
1401 		if (dest &&
1402 		    dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
1403 			list_add(&rule->node.list, &fte->node.children);
1404 		else
1405 			list_add_tail(&rule->node.list, &fte->node.children);
1406 		if (dest) {
1407 			fte->dests_size++;
1408 
1409 			if (is_fwd_dest_type(dest[i].type))
1410 				fte->fwd_dests++;
1411 
1412 			type = dest[i].type ==
1413 				MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1414 			*modify_mask |= type ? count : dst;
1415 		}
1416 rule_found:
1417 		handle->rule[i] = rule;
1418 	} while (++i < dest_num);
1419 
1420 	return handle;
1421 
1422 free_rules:
1423 	destroy_flow_handle(fte, handle, dest, i);
1424 	return ERR_PTR(-ENOMEM);
1425 }
1426 
1427 /* fte should not be deleted while calling this function */
1428 static struct mlx5_flow_handle *
1429 add_rule_fte(struct fs_fte *fte,
1430 	     struct mlx5_flow_group *fg,
1431 	     struct mlx5_flow_destination *dest,
1432 	     int dest_num,
1433 	     bool update_action)
1434 {
1435 	struct mlx5_flow_root_namespace *root;
1436 	struct mlx5_flow_handle *handle;
1437 	struct mlx5_flow_table *ft;
1438 	int modify_mask = 0;
1439 	int err;
1440 	bool new_rule = false;
1441 
1442 	handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
1443 				    &new_rule);
1444 	if (IS_ERR(handle) || !new_rule)
1445 		goto out;
1446 
1447 	if (update_action)
1448 		modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
1449 
1450 	fs_get_obj(ft, fg->node.parent);
1451 	root = find_root(&fg->node);
1452 	if (!(fte->status & FS_FTE_STATUS_EXISTING))
1453 		err = root->cmds->create_fte(root, ft, fg, fte);
1454 	else
1455 		err = root->cmds->update_fte(root, ft, fg, modify_mask, fte);
1456 	if (err)
1457 		goto free_handle;
1458 
1459 	fte->node.active = true;
1460 	fte->status |= FS_FTE_STATUS_EXISTING;
1461 	atomic_inc(&fg->node.version);
1462 
1463 out:
1464 	return handle;
1465 
1466 free_handle:
1467 	destroy_flow_handle(fte, handle, dest, handle->num_rules);
1468 	return ERR_PTR(err);
1469 }
1470 
1471 static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table  *ft,
1472 						     const struct mlx5_flow_spec *spec)
1473 {
1474 	struct list_head *prev = &ft->node.children;
1475 	u32 max_fte = ft->autogroup.max_fte;
1476 	unsigned int candidate_index = 0;
1477 	unsigned int group_size = 0;
1478 	struct mlx5_flow_group *fg;
1479 
1480 	if (!ft->autogroup.active)
1481 		return ERR_PTR(-ENOENT);
1482 
1483 	if (ft->autogroup.num_groups < ft->autogroup.required_groups)
1484 		group_size = ft->autogroup.group_size;
1485 
1486 	/*  max_fte == ft->autogroup.max_types */
1487 	if (group_size == 0)
1488 		group_size = 1;
1489 
1490 	/* sorted by start_index */
1491 	fs_for_each_fg(fg, ft) {
1492 		if (candidate_index + group_size > fg->start_index)
1493 			candidate_index = fg->start_index + fg->max_ftes;
1494 		else
1495 			break;
1496 		prev = &fg->node.list;
1497 	}
1498 
1499 	if (candidate_index + group_size > max_fte)
1500 		return ERR_PTR(-ENOSPC);
1501 
1502 	fg = alloc_insert_flow_group(ft,
1503 				     spec->match_criteria_enable,
1504 				     spec->match_criteria,
1505 				     candidate_index,
1506 				     candidate_index + group_size - 1,
1507 				     prev);
1508 	if (IS_ERR(fg))
1509 		goto out;
1510 
1511 	if (group_size == ft->autogroup.group_size)
1512 		ft->autogroup.num_groups++;
1513 
1514 out:
1515 	return fg;
1516 }
1517 
1518 static int create_auto_flow_group(struct mlx5_flow_table *ft,
1519 				  struct mlx5_flow_group *fg)
1520 {
1521 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1522 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1523 	void *match_criteria_addr;
1524 	u8 src_esw_owner_mask_on;
1525 	void *misc;
1526 	int err;
1527 	u32 *in;
1528 
1529 	in = kvzalloc(inlen, GFP_KERNEL);
1530 	if (!in)
1531 		return -ENOMEM;
1532 
1533 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
1534 		 fg->mask.match_criteria_enable);
1535 	MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index);
1536 	MLX5_SET(create_flow_group_in, in, end_flow_index,   fg->start_index +
1537 		 fg->max_ftes - 1);
1538 
1539 	misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria,
1540 			    misc_parameters);
1541 	src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc,
1542 					 source_eswitch_owner_vhca_id);
1543 	MLX5_SET(create_flow_group_in, in,
1544 		 source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on);
1545 
1546 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
1547 					   in, match_criteria);
1548 	memcpy(match_criteria_addr, fg->mask.match_criteria,
1549 	       sizeof(fg->mask.match_criteria));
1550 
1551 	err = root->cmds->create_flow_group(root, ft, in, fg);
1552 	if (!err) {
1553 		fg->node.active = true;
1554 		trace_mlx5_fs_add_fg(fg);
1555 	}
1556 
1557 	kvfree(in);
1558 	return err;
1559 }
1560 
1561 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
1562 				struct mlx5_flow_destination *d2)
1563 {
1564 	if (d1->type == d2->type) {
1565 		if (((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT ||
1566 		      d1->type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) &&
1567 		     d1->vport.num == d2->vport.num &&
1568 		     d1->vport.flags == d2->vport.flags &&
1569 		     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
1570 		      (d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
1571 		     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
1572 		      (d1->vport.pkt_reformat->id ==
1573 		       d2->vport.pkt_reformat->id) : true)) ||
1574 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1575 		     d1->ft == d2->ft) ||
1576 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1577 		     d1->tir_num == d2->tir_num) ||
1578 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
1579 		     d1->ft_num == d2->ft_num) ||
1580 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER &&
1581 		     d1->sampler_id == d2->sampler_id))
1582 			return true;
1583 	}
1584 
1585 	return false;
1586 }
1587 
1588 static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
1589 					     struct mlx5_flow_destination *dest)
1590 {
1591 	struct mlx5_flow_rule *rule;
1592 
1593 	list_for_each_entry(rule, &fte->node.children, node.list) {
1594 		if (mlx5_flow_dests_cmp(&rule->dest_attr, dest))
1595 			return rule;
1596 	}
1597 	return NULL;
1598 }
1599 
1600 static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0,
1601 					   const struct mlx5_fs_vlan *vlan1)
1602 {
1603 	return vlan0->ethtype != vlan1->ethtype ||
1604 	       vlan0->vid != vlan1->vid ||
1605 	       vlan0->prio != vlan1->prio;
1606 }
1607 
1608 static bool check_conflicting_actions(const struct mlx5_flow_act *act1,
1609 				      const struct mlx5_flow_act *act2)
1610 {
1611 	u32 action1 = act1->action;
1612 	u32 action2 = act2->action;
1613 	u32 xored_actions;
1614 
1615 	xored_actions = action1 ^ action2;
1616 
1617 	/* if one rule only wants to count, it's ok */
1618 	if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
1619 	    action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT)
1620 		return false;
1621 
1622 	if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP  |
1623 			     MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
1624 			     MLX5_FLOW_CONTEXT_ACTION_DECAP |
1625 			     MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
1626 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
1627 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
1628 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 |
1629 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
1630 		return true;
1631 
1632 	if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT &&
1633 	    act1->pkt_reformat != act2->pkt_reformat)
1634 		return true;
1635 
1636 	if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1637 	    act1->modify_hdr != act2->modify_hdr)
1638 		return true;
1639 
1640 	if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
1641 	    check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0]))
1642 		return true;
1643 
1644 	if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 &&
1645 	    check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1]))
1646 		return true;
1647 
1648 	return false;
1649 }
1650 
1651 static int check_conflicting_ftes(struct fs_fte *fte,
1652 				  const struct mlx5_flow_context *flow_context,
1653 				  const struct mlx5_flow_act *flow_act)
1654 {
1655 	if (check_conflicting_actions(flow_act, &fte->action)) {
1656 		mlx5_core_warn(get_dev(&fte->node),
1657 			       "Found two FTEs with conflicting actions\n");
1658 		return -EEXIST;
1659 	}
1660 
1661 	if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
1662 	    fte->flow_context.flow_tag != flow_context->flow_tag) {
1663 		mlx5_core_warn(get_dev(&fte->node),
1664 			       "FTE flow tag %u already exists with different flow tag %u\n",
1665 			       fte->flow_context.flow_tag,
1666 			       flow_context->flow_tag);
1667 		return -EEXIST;
1668 	}
1669 
1670 	return 0;
1671 }
1672 
1673 static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
1674 					    const struct mlx5_flow_spec *spec,
1675 					    struct mlx5_flow_act *flow_act,
1676 					    struct mlx5_flow_destination *dest,
1677 					    int dest_num,
1678 					    struct fs_fte *fte)
1679 {
1680 	struct mlx5_flow_handle *handle;
1681 	int old_action;
1682 	int i;
1683 	int ret;
1684 
1685 	ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
1686 	if (ret)
1687 		return ERR_PTR(ret);
1688 
1689 	old_action = fte->action.action;
1690 	fte->action.action |= flow_act->action;
1691 	handle = add_rule_fte(fte, fg, dest, dest_num,
1692 			      old_action != flow_act->action);
1693 	if (IS_ERR(handle)) {
1694 		fte->action.action = old_action;
1695 		return handle;
1696 	}
1697 	trace_mlx5_fs_set_fte(fte, false);
1698 
1699 	for (i = 0; i < handle->num_rules; i++) {
1700 		if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
1701 			tree_add_node(&handle->rule[i]->node, &fte->node);
1702 			trace_mlx5_fs_add_rule(handle->rule[i]);
1703 		}
1704 	}
1705 	return handle;
1706 }
1707 
1708 static bool counter_is_valid(u32 action)
1709 {
1710 	return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
1711 			  MLX5_FLOW_CONTEXT_ACTION_ALLOW |
1712 			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
1713 }
1714 
1715 static bool dest_is_valid(struct mlx5_flow_destination *dest,
1716 			  struct mlx5_flow_act *flow_act,
1717 			  struct mlx5_flow_table *ft)
1718 {
1719 	bool ignore_level = flow_act->flags & FLOW_ACT_IGNORE_FLOW_LEVEL;
1720 	u32 action = flow_act->action;
1721 
1722 	if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
1723 		return counter_is_valid(action);
1724 
1725 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
1726 		return true;
1727 
1728 	if (ignore_level) {
1729 		if (ft->type != FS_FT_FDB &&
1730 		    ft->type != FS_FT_NIC_RX)
1731 			return false;
1732 
1733 		if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1734 		    ft->type != dest->ft->type)
1735 			return false;
1736 	}
1737 
1738 	if (!dest || ((dest->type ==
1739 	    MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) &&
1740 	    (dest->ft->level <= ft->level && !ignore_level)))
1741 		return false;
1742 	return true;
1743 }
1744 
1745 struct match_list {
1746 	struct list_head	list;
1747 	struct mlx5_flow_group *g;
1748 };
1749 
1750 static void free_match_list(struct match_list *head, bool ft_locked)
1751 {
1752 	struct match_list *iter, *match_tmp;
1753 
1754 	list_for_each_entry_safe(iter, match_tmp, &head->list,
1755 				 list) {
1756 		tree_put_node(&iter->g->node, ft_locked);
1757 		list_del(&iter->list);
1758 		kfree(iter);
1759 	}
1760 }
1761 
1762 static int build_match_list(struct match_list *match_head,
1763 			    struct mlx5_flow_table *ft,
1764 			    const struct mlx5_flow_spec *spec,
1765 			    struct mlx5_flow_group *fg,
1766 			    bool ft_locked)
1767 {
1768 	struct rhlist_head *tmp, *list;
1769 	struct mlx5_flow_group *g;
1770 	int err = 0;
1771 
1772 	rcu_read_lock();
1773 	INIT_LIST_HEAD(&match_head->list);
1774 	/* Collect all fgs which has a matching match_criteria */
1775 	list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg);
1776 	/* RCU is atomic, we can't execute FW commands here */
1777 	rhl_for_each_entry_rcu(g, tmp, list, hash) {
1778 		struct match_list *curr_match;
1779 
1780 		if (fg && fg != g)
1781 			continue;
1782 
1783 		if (unlikely(!tree_get_node(&g->node)))
1784 			continue;
1785 
1786 		curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
1787 		if (!curr_match) {
1788 			rcu_read_unlock();
1789 			free_match_list(match_head, ft_locked);
1790 			return -ENOMEM;
1791 		}
1792 		curr_match->g = g;
1793 		list_add_tail(&curr_match->list, &match_head->list);
1794 	}
1795 	rcu_read_unlock();
1796 	return err;
1797 }
1798 
1799 static u64 matched_fgs_get_version(struct list_head *match_head)
1800 {
1801 	struct match_list *iter;
1802 	u64 version = 0;
1803 
1804 	list_for_each_entry(iter, match_head, list)
1805 		version += (u64)atomic_read(&iter->g->node.version);
1806 	return version;
1807 }
1808 
1809 static struct fs_fte *
1810 lookup_fte_locked(struct mlx5_flow_group *g,
1811 		  const u32 *match_value,
1812 		  bool take_write)
1813 {
1814 	struct fs_fte *fte_tmp;
1815 
1816 	if (take_write)
1817 		nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
1818 	else
1819 		nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
1820 	fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
1821 					 rhash_fte);
1822 	if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
1823 		fte_tmp = NULL;
1824 		goto out;
1825 	}
1826 	if (!fte_tmp->node.active) {
1827 		tree_put_node(&fte_tmp->node, false);
1828 		fte_tmp = NULL;
1829 		goto out;
1830 	}
1831 
1832 	nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
1833 out:
1834 	if (take_write)
1835 		up_write_ref_node(&g->node, false);
1836 	else
1837 		up_read_ref_node(&g->node);
1838 	return fte_tmp;
1839 }
1840 
1841 static struct mlx5_flow_handle *
1842 try_add_to_existing_fg(struct mlx5_flow_table *ft,
1843 		       struct list_head *match_head,
1844 		       const struct mlx5_flow_spec *spec,
1845 		       struct mlx5_flow_act *flow_act,
1846 		       struct mlx5_flow_destination *dest,
1847 		       int dest_num,
1848 		       int ft_version)
1849 {
1850 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
1851 	struct mlx5_flow_group *g;
1852 	struct mlx5_flow_handle *rule;
1853 	struct match_list *iter;
1854 	bool take_write = false;
1855 	struct fs_fte *fte;
1856 	u64  version = 0;
1857 	int err;
1858 
1859 	fte = alloc_fte(ft, spec, flow_act);
1860 	if (IS_ERR(fte))
1861 		return  ERR_PTR(-ENOMEM);
1862 
1863 search_again_locked:
1864 	if (flow_act->flags & FLOW_ACT_NO_APPEND)
1865 		goto skip_search;
1866 	version = matched_fgs_get_version(match_head);
1867 	/* Try to find an fte with identical match value and attempt update its
1868 	 * action.
1869 	 */
1870 	list_for_each_entry(iter, match_head, list) {
1871 		struct fs_fte *fte_tmp;
1872 
1873 		g = iter->g;
1874 		fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
1875 		if (!fte_tmp)
1876 			continue;
1877 		rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
1878 		/* No error check needed here, because insert_fte() is not called */
1879 		up_write_ref_node(&fte_tmp->node, false);
1880 		tree_put_node(&fte_tmp->node, false);
1881 		kmem_cache_free(steering->ftes_cache, fte);
1882 		return rule;
1883 	}
1884 
1885 skip_search:
1886 	/* No group with matching fte found, or we skipped the search.
1887 	 * Try to add a new fte to any matching fg.
1888 	 */
1889 
1890 	/* Check the ft version, for case that new flow group
1891 	 * was added while the fgs weren't locked
1892 	 */
1893 	if (atomic_read(&ft->node.version) != ft_version) {
1894 		rule = ERR_PTR(-EAGAIN);
1895 		goto out;
1896 	}
1897 
1898 	/* Check the fgs version. If version have changed it could be that an
1899 	 * FTE with the same match value was added while the fgs weren't
1900 	 * locked.
1901 	 */
1902 	if (!(flow_act->flags & FLOW_ACT_NO_APPEND) &&
1903 	    version != matched_fgs_get_version(match_head)) {
1904 		take_write = true;
1905 		goto search_again_locked;
1906 	}
1907 
1908 	list_for_each_entry(iter, match_head, list) {
1909 		g = iter->g;
1910 
1911 		nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
1912 
1913 		if (!g->node.active) {
1914 			up_write_ref_node(&g->node, false);
1915 			continue;
1916 		}
1917 
1918 		err = insert_fte(g, fte);
1919 		if (err) {
1920 			up_write_ref_node(&g->node, false);
1921 			if (err == -ENOSPC)
1922 				continue;
1923 			kmem_cache_free(steering->ftes_cache, fte);
1924 			return ERR_PTR(err);
1925 		}
1926 
1927 		nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
1928 		up_write_ref_node(&g->node, false);
1929 		rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
1930 		up_write_ref_node(&fte->node, false);
1931 		if (IS_ERR(rule))
1932 			tree_put_node(&fte->node, false);
1933 		return rule;
1934 	}
1935 	rule = ERR_PTR(-ENOENT);
1936 out:
1937 	kmem_cache_free(steering->ftes_cache, fte);
1938 	return rule;
1939 }
1940 
1941 static struct mlx5_flow_handle *
1942 _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
1943 		     const struct mlx5_flow_spec *spec,
1944 		     struct mlx5_flow_act *flow_act,
1945 		     struct mlx5_flow_destination *dest,
1946 		     int dest_num)
1947 
1948 {
1949 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
1950 	struct mlx5_flow_handle *rule;
1951 	struct match_list match_head;
1952 	struct mlx5_flow_group *g;
1953 	bool take_write = false;
1954 	struct fs_fte *fte;
1955 	int version;
1956 	int err;
1957 	int i;
1958 
1959 	if (!check_valid_spec(spec))
1960 		return ERR_PTR(-EINVAL);
1961 
1962 	if (flow_act->fg && ft->autogroup.active)
1963 		return ERR_PTR(-EINVAL);
1964 
1965 	for (i = 0; i < dest_num; i++) {
1966 		if (!dest_is_valid(&dest[i], flow_act, ft))
1967 			return ERR_PTR(-EINVAL);
1968 	}
1969 	nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
1970 search_again_locked:
1971 	version = atomic_read(&ft->node.version);
1972 
1973 	/* Collect all fgs which has a matching match_criteria */
1974 	err = build_match_list(&match_head, ft, spec, flow_act->fg, take_write);
1975 	if (err) {
1976 		if (take_write)
1977 			up_write_ref_node(&ft->node, false);
1978 		else
1979 			up_read_ref_node(&ft->node);
1980 		return ERR_PTR(err);
1981 	}
1982 
1983 	if (!take_write)
1984 		up_read_ref_node(&ft->node);
1985 
1986 	rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest,
1987 				      dest_num, version);
1988 	free_match_list(&match_head, take_write);
1989 	if (!IS_ERR(rule) ||
1990 	    (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
1991 		if (take_write)
1992 			up_write_ref_node(&ft->node, false);
1993 		return rule;
1994 	}
1995 
1996 	if (!take_write) {
1997 		nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
1998 		take_write = true;
1999 	}
2000 
2001 	if (PTR_ERR(rule) == -EAGAIN ||
2002 	    version != atomic_read(&ft->node.version))
2003 		goto search_again_locked;
2004 
2005 	g = alloc_auto_flow_group(ft, spec);
2006 	if (IS_ERR(g)) {
2007 		rule = ERR_CAST(g);
2008 		up_write_ref_node(&ft->node, false);
2009 		return rule;
2010 	}
2011 
2012 	fte = alloc_fte(ft, spec, flow_act);
2013 	if (IS_ERR(fte)) {
2014 		up_write_ref_node(&ft->node, false);
2015 		err = PTR_ERR(fte);
2016 		goto err_alloc_fte;
2017 	}
2018 
2019 	nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
2020 	up_write_ref_node(&ft->node, false);
2021 
2022 	err = create_auto_flow_group(ft, g);
2023 	if (err)
2024 		goto err_release_fg;
2025 
2026 	err = insert_fte(g, fte);
2027 	if (err)
2028 		goto err_release_fg;
2029 
2030 	nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
2031 	up_write_ref_node(&g->node, false);
2032 	rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
2033 	up_write_ref_node(&fte->node, false);
2034 	if (IS_ERR(rule))
2035 		tree_put_node(&fte->node, false);
2036 	tree_put_node(&g->node, false);
2037 	return rule;
2038 
2039 err_release_fg:
2040 	up_write_ref_node(&g->node, false);
2041 	kmem_cache_free(steering->ftes_cache, fte);
2042 err_alloc_fte:
2043 	tree_put_node(&g->node, false);
2044 	return ERR_PTR(err);
2045 }
2046 
2047 static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
2048 {
2049 	return ((ft->type == FS_FT_NIC_RX) &&
2050 		(MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
2051 }
2052 
2053 struct mlx5_flow_handle *
2054 mlx5_add_flow_rules(struct mlx5_flow_table *ft,
2055 		    const struct mlx5_flow_spec *spec,
2056 		    struct mlx5_flow_act *flow_act,
2057 		    struct mlx5_flow_destination *dest,
2058 		    int num_dest)
2059 {
2060 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
2061 	static const struct mlx5_flow_spec zero_spec = {};
2062 	struct mlx5_flow_destination *gen_dest = NULL;
2063 	struct mlx5_flow_table *next_ft = NULL;
2064 	struct mlx5_flow_handle *handle = NULL;
2065 	u32 sw_action = flow_act->action;
2066 	int i;
2067 
2068 	if (!spec)
2069 		spec = &zero_spec;
2070 
2071 	if (!is_fwd_next_action(sw_action))
2072 		return _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
2073 
2074 	if (!fwd_next_prio_supported(ft))
2075 		return ERR_PTR(-EOPNOTSUPP);
2076 
2077 	mutex_lock(&root->chain_lock);
2078 	next_ft = find_next_fwd_ft(ft, flow_act);
2079 	if (!next_ft) {
2080 		handle = ERR_PTR(-EOPNOTSUPP);
2081 		goto unlock;
2082 	}
2083 
2084 	gen_dest = kcalloc(num_dest + 1, sizeof(*dest),
2085 			   GFP_KERNEL);
2086 	if (!gen_dest) {
2087 		handle = ERR_PTR(-ENOMEM);
2088 		goto unlock;
2089 	}
2090 	for (i = 0; i < num_dest; i++)
2091 		gen_dest[i] = dest[i];
2092 	gen_dest[i].type =
2093 		MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
2094 	gen_dest[i].ft = next_ft;
2095 	dest = gen_dest;
2096 	num_dest++;
2097 	flow_act->action &= ~(MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
2098 			      MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
2099 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2100 	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
2101 	if (IS_ERR(handle))
2102 		goto unlock;
2103 
2104 	if (list_empty(&handle->rule[num_dest - 1]->next_ft)) {
2105 		mutex_lock(&next_ft->lock);
2106 		list_add(&handle->rule[num_dest - 1]->next_ft,
2107 			 &next_ft->fwd_rules);
2108 		mutex_unlock(&next_ft->lock);
2109 		handle->rule[num_dest - 1]->sw_action = sw_action;
2110 		handle->rule[num_dest - 1]->ft = ft;
2111 	}
2112 unlock:
2113 	mutex_unlock(&root->chain_lock);
2114 	kfree(gen_dest);
2115 	return handle;
2116 }
2117 EXPORT_SYMBOL(mlx5_add_flow_rules);
2118 
2119 void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
2120 {
2121 	struct fs_fte *fte;
2122 	int i;
2123 
2124 	/* In order to consolidate the HW changes we lock the FTE for other
2125 	 * changes, and increase its refcount, in order not to perform the
2126 	 * "del" functions of the FTE. Will handle them here.
2127 	 * The removal of the rules is done under locked FTE.
2128 	 * After removing all the handle's rules, if there are remaining
2129 	 * rules, it means we just need to modify the FTE in FW, and
2130 	 * unlock/decrease the refcount we increased before.
2131 	 * Otherwise, it means the FTE should be deleted. First delete the
2132 	 * FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of
2133 	 * the FTE, which will handle the last decrease of the refcount, as
2134 	 * well as required handling of its parent.
2135 	 */
2136 	fs_get_obj(fte, handle->rule[0]->node.parent);
2137 	down_write_ref_node(&fte->node, false);
2138 	for (i = handle->num_rules - 1; i >= 0; i--)
2139 		tree_remove_node(&handle->rule[i]->node, true);
2140 	if (list_empty(&fte->node.children)) {
2141 		fte->node.del_hw_func(&fte->node);
2142 		/* Avoid double call to del_hw_fte */
2143 		fte->node.del_hw_func = NULL;
2144 		up_write_ref_node(&fte->node, false);
2145 		tree_put_node(&fte->node, false);
2146 	} else if (fte->dests_size) {
2147 		if (fte->modify_mask)
2148 			modify_fte(fte);
2149 		up_write_ref_node(&fte->node, false);
2150 	} else {
2151 		up_write_ref_node(&fte->node, false);
2152 	}
2153 	kfree(handle);
2154 }
2155 EXPORT_SYMBOL(mlx5_del_flow_rules);
2156 
2157 /* Assuming prio->node.children(flow tables) is sorted by level */
2158 static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
2159 {
2160 	struct fs_prio *prio;
2161 
2162 	fs_get_obj(prio, ft->node.parent);
2163 
2164 	if (!list_is_last(&ft->node.list, &prio->node.children))
2165 		return list_next_entry(ft, node.list);
2166 	return find_next_chained_ft(prio);
2167 }
2168 
2169 static int update_root_ft_destroy(struct mlx5_flow_table *ft)
2170 {
2171 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
2172 	struct mlx5_ft_underlay_qp *uqp;
2173 	struct mlx5_flow_table *new_root_ft = NULL;
2174 	int err = 0;
2175 	u32 qpn;
2176 
2177 	if (root->root_ft != ft)
2178 		return 0;
2179 
2180 	new_root_ft = find_next_ft(ft);
2181 	if (!new_root_ft) {
2182 		root->root_ft = NULL;
2183 		return 0;
2184 	}
2185 
2186 	if (list_empty(&root->underlay_qpns)) {
2187 		/* Don't set any QPN (zero) in case QPN list is empty */
2188 		qpn = 0;
2189 		err = root->cmds->update_root_ft(root, new_root_ft,
2190 						 qpn, false);
2191 	} else {
2192 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
2193 			qpn = uqp->qpn;
2194 			err = root->cmds->update_root_ft(root,
2195 							 new_root_ft, qpn,
2196 							 false);
2197 			if (err)
2198 				break;
2199 		}
2200 	}
2201 
2202 	if (err)
2203 		mlx5_core_warn(root->dev,
2204 			       "Update root flow table of id(%u) qpn(%d) failed\n",
2205 			       ft->id, qpn);
2206 	else
2207 		root->root_ft = new_root_ft;
2208 
2209 	return 0;
2210 }
2211 
2212 /* Connect flow table from previous priority to
2213  * the next flow table.
2214  */
2215 static int disconnect_flow_table(struct mlx5_flow_table *ft)
2216 {
2217 	struct mlx5_core_dev *dev = get_dev(&ft->node);
2218 	struct mlx5_flow_table *next_ft;
2219 	struct fs_prio *prio;
2220 	int err = 0;
2221 
2222 	err = update_root_ft_destroy(ft);
2223 	if (err)
2224 		return err;
2225 
2226 	fs_get_obj(prio, ft->node.parent);
2227 	if  (!(list_first_entry(&prio->node.children,
2228 				struct mlx5_flow_table,
2229 				node.list) == ft))
2230 		return 0;
2231 
2232 	next_ft = find_next_ft(ft);
2233 	err = connect_fwd_rules(dev, next_ft, ft);
2234 	if (err)
2235 		return err;
2236 
2237 	err = connect_prev_fts(dev, next_ft, prio);
2238 	if (err)
2239 		mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
2240 			       ft->id);
2241 	return err;
2242 }
2243 
2244 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
2245 {
2246 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
2247 	int err = 0;
2248 
2249 	mutex_lock(&root->chain_lock);
2250 	if (!(ft->flags & MLX5_FLOW_TABLE_UNMANAGED))
2251 		err = disconnect_flow_table(ft);
2252 	if (err) {
2253 		mutex_unlock(&root->chain_lock);
2254 		return err;
2255 	}
2256 	if (tree_remove_node(&ft->node, false))
2257 		mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
2258 			       ft->id);
2259 	mutex_unlock(&root->chain_lock);
2260 
2261 	return err;
2262 }
2263 EXPORT_SYMBOL(mlx5_destroy_flow_table);
2264 
2265 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
2266 {
2267 	if (tree_remove_node(&fg->node, false))
2268 		mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
2269 			       fg->id);
2270 }
2271 EXPORT_SYMBOL(mlx5_destroy_flow_group);
2272 
2273 struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
2274 						int n)
2275 {
2276 	struct mlx5_flow_steering *steering = dev->priv.steering;
2277 
2278 	if (!steering || !steering->fdb_sub_ns)
2279 		return NULL;
2280 
2281 	return steering->fdb_sub_ns[n];
2282 }
2283 EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
2284 
2285 static bool is_nic_rx_ns(enum mlx5_flow_namespace_type type)
2286 {
2287 	switch (type) {
2288 	case MLX5_FLOW_NAMESPACE_BYPASS:
2289 	case MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC:
2290 	case MLX5_FLOW_NAMESPACE_LAG:
2291 	case MLX5_FLOW_NAMESPACE_OFFLOADS:
2292 	case MLX5_FLOW_NAMESPACE_ETHTOOL:
2293 	case MLX5_FLOW_NAMESPACE_KERNEL:
2294 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
2295 	case MLX5_FLOW_NAMESPACE_ANCHOR:
2296 		return true;
2297 	default:
2298 		return false;
2299 	}
2300 }
2301 
2302 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
2303 						    enum mlx5_flow_namespace_type type)
2304 {
2305 	struct mlx5_flow_steering *steering = dev->priv.steering;
2306 	struct mlx5_flow_root_namespace *root_ns;
2307 	int prio = 0;
2308 	struct fs_prio *fs_prio;
2309 	struct mlx5_flow_namespace *ns;
2310 
2311 	if (!steering)
2312 		return NULL;
2313 
2314 	switch (type) {
2315 	case MLX5_FLOW_NAMESPACE_FDB:
2316 		if (steering->fdb_root_ns)
2317 			return &steering->fdb_root_ns->ns;
2318 		return NULL;
2319 	case MLX5_FLOW_NAMESPACE_PORT_SEL:
2320 		if (steering->port_sel_root_ns)
2321 			return &steering->port_sel_root_ns->ns;
2322 		return NULL;
2323 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
2324 		if (steering->sniffer_rx_root_ns)
2325 			return &steering->sniffer_rx_root_ns->ns;
2326 		return NULL;
2327 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
2328 		if (steering->sniffer_tx_root_ns)
2329 			return &steering->sniffer_tx_root_ns->ns;
2330 		return NULL;
2331 	case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
2332 		root_ns = steering->fdb_root_ns;
2333 		prio =  FDB_BYPASS_PATH;
2334 		break;
2335 	case MLX5_FLOW_NAMESPACE_EGRESS:
2336 	case MLX5_FLOW_NAMESPACE_EGRESS_IPSEC:
2337 	case MLX5_FLOW_NAMESPACE_EGRESS_MACSEC:
2338 		root_ns = steering->egress_root_ns;
2339 		prio = type - MLX5_FLOW_NAMESPACE_EGRESS;
2340 		break;
2341 	case MLX5_FLOW_NAMESPACE_RDMA_RX:
2342 		root_ns = steering->rdma_rx_root_ns;
2343 		prio = RDMA_RX_BYPASS_PRIO;
2344 		break;
2345 	case MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL:
2346 		root_ns = steering->rdma_rx_root_ns;
2347 		prio = RDMA_RX_KERNEL_PRIO;
2348 		break;
2349 	case MLX5_FLOW_NAMESPACE_RDMA_TX:
2350 		root_ns = steering->rdma_tx_root_ns;
2351 		break;
2352 	case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS:
2353 		root_ns = steering->rdma_rx_root_ns;
2354 		prio = RDMA_RX_COUNTERS_PRIO;
2355 		break;
2356 	case MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS:
2357 		root_ns = steering->rdma_tx_root_ns;
2358 		prio = RDMA_TX_COUNTERS_PRIO;
2359 		break;
2360 	default: /* Must be NIC RX */
2361 		WARN_ON(!is_nic_rx_ns(type));
2362 		root_ns = steering->root_ns;
2363 		prio = type;
2364 		break;
2365 	}
2366 
2367 	if (!root_ns)
2368 		return NULL;
2369 
2370 	fs_prio = find_prio(&root_ns->ns, prio);
2371 	if (!fs_prio)
2372 		return NULL;
2373 
2374 	ns = list_first_entry(&fs_prio->node.children,
2375 			      typeof(*ns),
2376 			      node.list);
2377 
2378 	return ns;
2379 }
2380 EXPORT_SYMBOL(mlx5_get_flow_namespace);
2381 
2382 struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
2383 							      enum mlx5_flow_namespace_type type,
2384 							      int vport)
2385 {
2386 	struct mlx5_flow_steering *steering = dev->priv.steering;
2387 
2388 	if (!steering)
2389 		return NULL;
2390 
2391 	switch (type) {
2392 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
2393 		if (vport >= steering->esw_egress_acl_vports)
2394 			return NULL;
2395 		if (steering->esw_egress_root_ns &&
2396 		    steering->esw_egress_root_ns[vport])
2397 			return &steering->esw_egress_root_ns[vport]->ns;
2398 		else
2399 			return NULL;
2400 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
2401 		if (vport >= steering->esw_ingress_acl_vports)
2402 			return NULL;
2403 		if (steering->esw_ingress_root_ns &&
2404 		    steering->esw_ingress_root_ns[vport])
2405 			return &steering->esw_ingress_root_ns[vport]->ns;
2406 		else
2407 			return NULL;
2408 	default:
2409 		return NULL;
2410 	}
2411 }
2412 
2413 static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
2414 				       unsigned int prio,
2415 				       int num_levels,
2416 				       enum fs_node_type type)
2417 {
2418 	struct fs_prio *fs_prio;
2419 
2420 	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
2421 	if (!fs_prio)
2422 		return ERR_PTR(-ENOMEM);
2423 
2424 	fs_prio->node.type = type;
2425 	tree_init_node(&fs_prio->node, NULL, del_sw_prio);
2426 	tree_add_node(&fs_prio->node, &ns->node);
2427 	fs_prio->num_levels = num_levels;
2428 	fs_prio->prio = prio;
2429 	list_add_tail(&fs_prio->node.list, &ns->node.children);
2430 
2431 	return fs_prio;
2432 }
2433 
2434 static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns,
2435 					      unsigned int prio,
2436 					      int num_levels)
2437 {
2438 	return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS);
2439 }
2440 
2441 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
2442 				      unsigned int prio, int num_levels)
2443 {
2444 	return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO);
2445 }
2446 
2447 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
2448 						     *ns)
2449 {
2450 	ns->node.type = FS_TYPE_NAMESPACE;
2451 
2452 	return ns;
2453 }
2454 
2455 static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
2456 						       int def_miss_act)
2457 {
2458 	struct mlx5_flow_namespace	*ns;
2459 
2460 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
2461 	if (!ns)
2462 		return ERR_PTR(-ENOMEM);
2463 
2464 	fs_init_namespace(ns);
2465 	ns->def_miss_action = def_miss_act;
2466 	tree_init_node(&ns->node, NULL, del_sw_ns);
2467 	tree_add_node(&ns->node, &prio->node);
2468 	list_add_tail(&ns->node.list, &prio->node.children);
2469 
2470 	return ns;
2471 }
2472 
2473 static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
2474 			     struct init_tree_node *prio_metadata)
2475 {
2476 	struct fs_prio *fs_prio;
2477 	int i;
2478 
2479 	for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
2480 		fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels);
2481 		if (IS_ERR(fs_prio))
2482 			return PTR_ERR(fs_prio);
2483 	}
2484 	return 0;
2485 }
2486 
2487 #define FLOW_TABLE_BIT_SZ 1
2488 #define GET_FLOW_TABLE_CAP(dev, offset) \
2489 	((be32_to_cpu(*((__be32 *)(dev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur) +	\
2490 			offset / 32)) >>					\
2491 	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
2492 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
2493 {
2494 	int i;
2495 
2496 	for (i = 0; i < caps->arr_sz; i++) {
2497 		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
2498 			return false;
2499 	}
2500 	return true;
2501 }
2502 
2503 static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
2504 				    struct init_tree_node *init_node,
2505 				    struct fs_node *fs_parent_node,
2506 				    struct init_tree_node *init_parent_node,
2507 				    int prio)
2508 {
2509 	int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
2510 					      flow_table_properties_nic_receive.
2511 					      max_ft_level);
2512 	struct mlx5_flow_namespace *fs_ns;
2513 	struct fs_prio *fs_prio;
2514 	struct fs_node *base;
2515 	int i;
2516 	int err;
2517 
2518 	if (init_node->type == FS_TYPE_PRIO) {
2519 		if ((init_node->min_ft_level > max_ft_level) ||
2520 		    !has_required_caps(steering->dev, &init_node->caps))
2521 			return 0;
2522 
2523 		fs_get_obj(fs_ns, fs_parent_node);
2524 		if (init_node->num_leaf_prios)
2525 			return create_leaf_prios(fs_ns, prio, init_node);
2526 		fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels);
2527 		if (IS_ERR(fs_prio))
2528 			return PTR_ERR(fs_prio);
2529 		base = &fs_prio->node;
2530 	} else if (init_node->type == FS_TYPE_NAMESPACE) {
2531 		fs_get_obj(fs_prio, fs_parent_node);
2532 		fs_ns = fs_create_namespace(fs_prio, init_node->def_miss_action);
2533 		if (IS_ERR(fs_ns))
2534 			return PTR_ERR(fs_ns);
2535 		base = &fs_ns->node;
2536 	} else {
2537 		return -EINVAL;
2538 	}
2539 	prio = 0;
2540 	for (i = 0; i < init_node->ar_size; i++) {
2541 		err = init_root_tree_recursive(steering, &init_node->children[i],
2542 					       base, init_node, prio);
2543 		if (err)
2544 			return err;
2545 		if (init_node->children[i].type == FS_TYPE_PRIO &&
2546 		    init_node->children[i].num_leaf_prios) {
2547 			prio += init_node->children[i].num_leaf_prios;
2548 		}
2549 	}
2550 
2551 	return 0;
2552 }
2553 
2554 static int init_root_tree(struct mlx5_flow_steering *steering,
2555 			  struct init_tree_node *init_node,
2556 			  struct fs_node *fs_parent_node)
2557 {
2558 	int err;
2559 	int i;
2560 
2561 	for (i = 0; i < init_node->ar_size; i++) {
2562 		err = init_root_tree_recursive(steering, &init_node->children[i],
2563 					       fs_parent_node,
2564 					       init_node, i);
2565 		if (err)
2566 			return err;
2567 	}
2568 	return 0;
2569 }
2570 
2571 static void del_sw_root_ns(struct fs_node *node)
2572 {
2573 	struct mlx5_flow_root_namespace *root_ns;
2574 	struct mlx5_flow_namespace *ns;
2575 
2576 	fs_get_obj(ns, node);
2577 	root_ns = container_of(ns, struct mlx5_flow_root_namespace, ns);
2578 	mutex_destroy(&root_ns->chain_lock);
2579 	kfree(node);
2580 }
2581 
2582 static struct mlx5_flow_root_namespace
2583 *create_root_ns(struct mlx5_flow_steering *steering,
2584 		enum fs_flow_table_type table_type)
2585 {
2586 	const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type);
2587 	struct mlx5_flow_root_namespace *root_ns;
2588 	struct mlx5_flow_namespace *ns;
2589 
2590 	/* Create the root namespace */
2591 	root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
2592 	if (!root_ns)
2593 		return NULL;
2594 
2595 	root_ns->dev = steering->dev;
2596 	root_ns->table_type = table_type;
2597 	root_ns->cmds = cmds;
2598 
2599 	INIT_LIST_HEAD(&root_ns->underlay_qpns);
2600 
2601 	ns = &root_ns->ns;
2602 	fs_init_namespace(ns);
2603 	mutex_init(&root_ns->chain_lock);
2604 	tree_init_node(&ns->node, NULL, del_sw_root_ns);
2605 	tree_add_node(&ns->node, NULL);
2606 
2607 	return root_ns;
2608 }
2609 
2610 static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
2611 
2612 static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
2613 {
2614 	struct fs_prio *prio;
2615 
2616 	fs_for_each_prio(prio, ns) {
2617 		 /* This updates prio start_level and num_levels */
2618 		set_prio_attrs_in_prio(prio, acc_level);
2619 		acc_level += prio->num_levels;
2620 	}
2621 	return acc_level;
2622 }
2623 
2624 static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
2625 {
2626 	struct mlx5_flow_namespace *ns;
2627 	int acc_level_ns = acc_level;
2628 
2629 	prio->start_level = acc_level;
2630 	fs_for_each_ns(ns, prio) {
2631 		/* This updates start_level and num_levels of ns's priority descendants */
2632 		acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
2633 
2634 		/* If this a prio with chains, and we can jump from one chain
2635 		 * (namespace) to another, so we accumulate the levels
2636 		 */
2637 		if (prio->node.type == FS_TYPE_PRIO_CHAINS)
2638 			acc_level = acc_level_ns;
2639 	}
2640 
2641 	if (!prio->num_levels)
2642 		prio->num_levels = acc_level_ns - prio->start_level;
2643 	WARN_ON(prio->num_levels < acc_level_ns - prio->start_level);
2644 }
2645 
2646 static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
2647 {
2648 	struct mlx5_flow_namespace *ns = &root_ns->ns;
2649 	struct fs_prio *prio;
2650 	int start_level = 0;
2651 
2652 	fs_for_each_prio(prio, ns) {
2653 		set_prio_attrs_in_prio(prio, start_level);
2654 		start_level += prio->num_levels;
2655 	}
2656 }
2657 
2658 #define ANCHOR_PRIO 0
2659 #define ANCHOR_SIZE 1
2660 #define ANCHOR_LEVEL 0
2661 static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
2662 {
2663 	struct mlx5_flow_namespace *ns = NULL;
2664 	struct mlx5_flow_table_attr ft_attr = {};
2665 	struct mlx5_flow_table *ft;
2666 
2667 	ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
2668 	if (WARN_ON(!ns))
2669 		return -EINVAL;
2670 
2671 	ft_attr.max_fte = ANCHOR_SIZE;
2672 	ft_attr.level   = ANCHOR_LEVEL;
2673 	ft_attr.prio    = ANCHOR_PRIO;
2674 
2675 	ft = mlx5_create_flow_table(ns, &ft_attr);
2676 	if (IS_ERR(ft)) {
2677 		mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
2678 		return PTR_ERR(ft);
2679 	}
2680 	return 0;
2681 }
2682 
2683 static int init_root_ns(struct mlx5_flow_steering *steering)
2684 {
2685 	int err;
2686 
2687 	steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
2688 	if (!steering->root_ns)
2689 		return -ENOMEM;
2690 
2691 	err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node);
2692 	if (err)
2693 		goto out_err;
2694 
2695 	set_prio_attrs(steering->root_ns);
2696 	err = create_anchor_flow_table(steering);
2697 	if (err)
2698 		goto out_err;
2699 
2700 	return 0;
2701 
2702 out_err:
2703 	cleanup_root_ns(steering->root_ns);
2704 	steering->root_ns = NULL;
2705 	return err;
2706 }
2707 
2708 static void clean_tree(struct fs_node *node)
2709 {
2710 	if (node) {
2711 		struct fs_node *iter;
2712 		struct fs_node *temp;
2713 
2714 		tree_get_node(node);
2715 		list_for_each_entry_safe(iter, temp, &node->children, list)
2716 			clean_tree(iter);
2717 		tree_put_node(node, false);
2718 		tree_remove_node(node, false);
2719 	}
2720 }
2721 
2722 static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
2723 {
2724 	if (!root_ns)
2725 		return;
2726 
2727 	clean_tree(&root_ns->ns.node);
2728 }
2729 
2730 static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
2731 {
2732 	struct fs_prio *prio;
2733 
2734 	steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX);
2735 	if (!steering->sniffer_tx_root_ns)
2736 		return -ENOMEM;
2737 
2738 	/* Create single prio */
2739 	prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
2740 	return PTR_ERR_OR_ZERO(prio);
2741 }
2742 
2743 static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
2744 {
2745 	struct fs_prio *prio;
2746 
2747 	steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX);
2748 	if (!steering->sniffer_rx_root_ns)
2749 		return -ENOMEM;
2750 
2751 	/* Create single prio */
2752 	prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
2753 	return PTR_ERR_OR_ZERO(prio);
2754 }
2755 
2756 #define PORT_SEL_NUM_LEVELS 3
2757 static int init_port_sel_root_ns(struct mlx5_flow_steering *steering)
2758 {
2759 	struct fs_prio *prio;
2760 
2761 	steering->port_sel_root_ns = create_root_ns(steering, FS_FT_PORT_SEL);
2762 	if (!steering->port_sel_root_ns)
2763 		return -ENOMEM;
2764 
2765 	/* Create single prio */
2766 	prio = fs_create_prio(&steering->port_sel_root_ns->ns, 0,
2767 			      PORT_SEL_NUM_LEVELS);
2768 	return PTR_ERR_OR_ZERO(prio);
2769 }
2770 
2771 static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
2772 {
2773 	int err;
2774 
2775 	steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX);
2776 	if (!steering->rdma_rx_root_ns)
2777 		return -ENOMEM;
2778 
2779 	err = init_root_tree(steering, &rdma_rx_root_fs,
2780 			     &steering->rdma_rx_root_ns->ns.node);
2781 	if (err)
2782 		goto out_err;
2783 
2784 	set_prio_attrs(steering->rdma_rx_root_ns);
2785 
2786 	return 0;
2787 
2788 out_err:
2789 	cleanup_root_ns(steering->rdma_rx_root_ns);
2790 	steering->rdma_rx_root_ns = NULL;
2791 	return err;
2792 }
2793 
2794 static int init_rdma_tx_root_ns(struct mlx5_flow_steering *steering)
2795 {
2796 	int err;
2797 
2798 	steering->rdma_tx_root_ns = create_root_ns(steering, FS_FT_RDMA_TX);
2799 	if (!steering->rdma_tx_root_ns)
2800 		return -ENOMEM;
2801 
2802 	err = init_root_tree(steering, &rdma_tx_root_fs,
2803 			     &steering->rdma_tx_root_ns->ns.node);
2804 	if (err)
2805 		goto out_err;
2806 
2807 	set_prio_attrs(steering->rdma_tx_root_ns);
2808 
2809 	return 0;
2810 
2811 out_err:
2812 	cleanup_root_ns(steering->rdma_tx_root_ns);
2813 	steering->rdma_tx_root_ns = NULL;
2814 	return err;
2815 }
2816 
2817 /* FT and tc chains are stored in the same array so we can re-use the
2818  * mlx5_get_fdb_sub_ns() and tc api for FT chains.
2819  * When creating a new ns for each chain store it in the first available slot.
2820  * Assume tc chains are created and stored first and only then the FT chain.
2821  */
2822 static void store_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering,
2823 					struct mlx5_flow_namespace *ns)
2824 {
2825 	int chain = 0;
2826 
2827 	while (steering->fdb_sub_ns[chain])
2828 		++chain;
2829 
2830 	steering->fdb_sub_ns[chain] = ns;
2831 }
2832 
2833 static int create_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering,
2834 					struct fs_prio *maj_prio)
2835 {
2836 	struct mlx5_flow_namespace *ns;
2837 	struct fs_prio *min_prio;
2838 	int prio;
2839 
2840 	ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
2841 	if (IS_ERR(ns))
2842 		return PTR_ERR(ns);
2843 
2844 	for (prio = 0; prio < FDB_TC_MAX_PRIO; prio++) {
2845 		min_prio = fs_create_prio(ns, prio, FDB_TC_LEVELS_PER_PRIO);
2846 		if (IS_ERR(min_prio))
2847 			return PTR_ERR(min_prio);
2848 	}
2849 
2850 	store_fdb_sub_ns_prio_chain(steering, ns);
2851 
2852 	return 0;
2853 }
2854 
2855 static int create_fdb_chains(struct mlx5_flow_steering *steering,
2856 			     int fs_prio,
2857 			     int chains)
2858 {
2859 	struct fs_prio *maj_prio;
2860 	int levels;
2861 	int chain;
2862 	int err;
2863 
2864 	levels = FDB_TC_LEVELS_PER_PRIO * FDB_TC_MAX_PRIO * chains;
2865 	maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns,
2866 					  fs_prio,
2867 					  levels);
2868 	if (IS_ERR(maj_prio))
2869 		return PTR_ERR(maj_prio);
2870 
2871 	for (chain = 0; chain < chains; chain++) {
2872 		err = create_fdb_sub_ns_prio_chain(steering, maj_prio);
2873 		if (err)
2874 			return err;
2875 	}
2876 
2877 	return 0;
2878 }
2879 
2880 static int create_fdb_fast_path(struct mlx5_flow_steering *steering)
2881 {
2882 	int err;
2883 
2884 	steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS,
2885 				       sizeof(*steering->fdb_sub_ns),
2886 				       GFP_KERNEL);
2887 	if (!steering->fdb_sub_ns)
2888 		return -ENOMEM;
2889 
2890 	err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1);
2891 	if (err)
2892 		return err;
2893 
2894 	err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1);
2895 	if (err)
2896 		return err;
2897 
2898 	return 0;
2899 }
2900 
2901 static int create_fdb_bypass(struct mlx5_flow_steering *steering)
2902 {
2903 	struct mlx5_flow_namespace *ns;
2904 	struct fs_prio *prio;
2905 	int i;
2906 
2907 	prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 0);
2908 	if (IS_ERR(prio))
2909 		return PTR_ERR(prio);
2910 
2911 	ns = fs_create_namespace(prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
2912 	if (IS_ERR(ns))
2913 		return PTR_ERR(ns);
2914 
2915 	for (i = 0; i < MLX5_BY_PASS_NUM_REGULAR_PRIOS; i++) {
2916 		prio = fs_create_prio(ns, i, 1);
2917 		if (IS_ERR(prio))
2918 			return PTR_ERR(prio);
2919 	}
2920 	return 0;
2921 }
2922 
2923 static void cleanup_fdb_root_ns(struct mlx5_flow_steering *steering)
2924 {
2925 	cleanup_root_ns(steering->fdb_root_ns);
2926 	steering->fdb_root_ns = NULL;
2927 	kfree(steering->fdb_sub_ns);
2928 	steering->fdb_sub_ns = NULL;
2929 }
2930 
2931 static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
2932 {
2933 	struct fs_prio *maj_prio;
2934 	int err;
2935 
2936 	steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
2937 	if (!steering->fdb_root_ns)
2938 		return -ENOMEM;
2939 
2940 	err = create_fdb_bypass(steering);
2941 	if (err)
2942 		goto out_err;
2943 
2944 	err = create_fdb_fast_path(steering);
2945 	if (err)
2946 		goto out_err;
2947 
2948 	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_TC_MISS, 1);
2949 	if (IS_ERR(maj_prio)) {
2950 		err = PTR_ERR(maj_prio);
2951 		goto out_err;
2952 	}
2953 
2954 	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 3);
2955 	if (IS_ERR(maj_prio)) {
2956 		err = PTR_ERR(maj_prio);
2957 		goto out_err;
2958 	}
2959 
2960 	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
2961 	if (IS_ERR(maj_prio)) {
2962 		err = PTR_ERR(maj_prio);
2963 		goto out_err;
2964 	}
2965 
2966 	/* We put this priority last, knowing that nothing will get here
2967 	 * unless explicitly forwarded to. This is possible because the
2968 	 * slow path tables have catch all rules and nothing gets passed
2969 	 * those tables.
2970 	 */
2971 	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_PER_VPORT, 1);
2972 	if (IS_ERR(maj_prio)) {
2973 		err = PTR_ERR(maj_prio);
2974 		goto out_err;
2975 	}
2976 
2977 	set_prio_attrs(steering->fdb_root_ns);
2978 	return 0;
2979 
2980 out_err:
2981 	cleanup_fdb_root_ns(steering);
2982 	return err;
2983 }
2984 
2985 static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
2986 {
2987 	struct fs_prio *prio;
2988 
2989 	steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
2990 	if (!steering->esw_egress_root_ns[vport])
2991 		return -ENOMEM;
2992 
2993 	/* create 1 prio*/
2994 	prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1);
2995 	return PTR_ERR_OR_ZERO(prio);
2996 }
2997 
2998 static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
2999 {
3000 	struct fs_prio *prio;
3001 
3002 	steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
3003 	if (!steering->esw_ingress_root_ns[vport])
3004 		return -ENOMEM;
3005 
3006 	/* create 1 prio*/
3007 	prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1);
3008 	return PTR_ERR_OR_ZERO(prio);
3009 }
3010 
3011 int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports)
3012 {
3013 	struct mlx5_flow_steering *steering = dev->priv.steering;
3014 	int err;
3015 	int i;
3016 
3017 	steering->esw_egress_root_ns =
3018 			kcalloc(total_vports,
3019 				sizeof(*steering->esw_egress_root_ns),
3020 				GFP_KERNEL);
3021 	if (!steering->esw_egress_root_ns)
3022 		return -ENOMEM;
3023 
3024 	for (i = 0; i < total_vports; i++) {
3025 		err = init_egress_acl_root_ns(steering, i);
3026 		if (err)
3027 			goto cleanup_root_ns;
3028 	}
3029 	steering->esw_egress_acl_vports = total_vports;
3030 	return 0;
3031 
3032 cleanup_root_ns:
3033 	for (i--; i >= 0; i--)
3034 		cleanup_root_ns(steering->esw_egress_root_ns[i]);
3035 	kfree(steering->esw_egress_root_ns);
3036 	steering->esw_egress_root_ns = NULL;
3037 	return err;
3038 }
3039 
3040 void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev)
3041 {
3042 	struct mlx5_flow_steering *steering = dev->priv.steering;
3043 	int i;
3044 
3045 	if (!steering->esw_egress_root_ns)
3046 		return;
3047 
3048 	for (i = 0; i < steering->esw_egress_acl_vports; i++)
3049 		cleanup_root_ns(steering->esw_egress_root_ns[i]);
3050 
3051 	kfree(steering->esw_egress_root_ns);
3052 	steering->esw_egress_root_ns = NULL;
3053 }
3054 
3055 int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports)
3056 {
3057 	struct mlx5_flow_steering *steering = dev->priv.steering;
3058 	int err;
3059 	int i;
3060 
3061 	steering->esw_ingress_root_ns =
3062 			kcalloc(total_vports,
3063 				sizeof(*steering->esw_ingress_root_ns),
3064 				GFP_KERNEL);
3065 	if (!steering->esw_ingress_root_ns)
3066 		return -ENOMEM;
3067 
3068 	for (i = 0; i < total_vports; i++) {
3069 		err = init_ingress_acl_root_ns(steering, i);
3070 		if (err)
3071 			goto cleanup_root_ns;
3072 	}
3073 	steering->esw_ingress_acl_vports = total_vports;
3074 	return 0;
3075 
3076 cleanup_root_ns:
3077 	for (i--; i >= 0; i--)
3078 		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
3079 	kfree(steering->esw_ingress_root_ns);
3080 	steering->esw_ingress_root_ns = NULL;
3081 	return err;
3082 }
3083 
3084 void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev)
3085 {
3086 	struct mlx5_flow_steering *steering = dev->priv.steering;
3087 	int i;
3088 
3089 	if (!steering->esw_ingress_root_ns)
3090 		return;
3091 
3092 	for (i = 0; i < steering->esw_ingress_acl_vports; i++)
3093 		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
3094 
3095 	kfree(steering->esw_ingress_root_ns);
3096 	steering->esw_ingress_root_ns = NULL;
3097 }
3098 
3099 u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type)
3100 {
3101 	struct mlx5_flow_root_namespace *root;
3102 	struct mlx5_flow_namespace *ns;
3103 
3104 	ns = mlx5_get_flow_namespace(dev, type);
3105 	if (!ns)
3106 		return 0;
3107 
3108 	root = find_root(&ns->node);
3109 	if (!root)
3110 		return 0;
3111 
3112 	return root->cmds->get_capabilities(root, root->table_type);
3113 }
3114 
3115 static int init_egress_root_ns(struct mlx5_flow_steering *steering)
3116 {
3117 	int err;
3118 
3119 	steering->egress_root_ns = create_root_ns(steering,
3120 						  FS_FT_NIC_TX);
3121 	if (!steering->egress_root_ns)
3122 		return -ENOMEM;
3123 
3124 	err = init_root_tree(steering, &egress_root_fs,
3125 			     &steering->egress_root_ns->ns.node);
3126 	if (err)
3127 		goto cleanup;
3128 	set_prio_attrs(steering->egress_root_ns);
3129 	return 0;
3130 cleanup:
3131 	cleanup_root_ns(steering->egress_root_ns);
3132 	steering->egress_root_ns = NULL;
3133 	return err;
3134 }
3135 
3136 void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
3137 {
3138 	struct mlx5_flow_steering *steering = dev->priv.steering;
3139 
3140 	cleanup_root_ns(steering->root_ns);
3141 	cleanup_fdb_root_ns(steering);
3142 	cleanup_root_ns(steering->port_sel_root_ns);
3143 	cleanup_root_ns(steering->sniffer_rx_root_ns);
3144 	cleanup_root_ns(steering->sniffer_tx_root_ns);
3145 	cleanup_root_ns(steering->rdma_rx_root_ns);
3146 	cleanup_root_ns(steering->rdma_tx_root_ns);
3147 	cleanup_root_ns(steering->egress_root_ns);
3148 }
3149 
3150 int mlx5_fs_core_init(struct mlx5_core_dev *dev)
3151 {
3152 	struct mlx5_flow_steering *steering = dev->priv.steering;
3153 	int err = 0;
3154 
3155 	if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
3156 	      (MLX5_CAP_GEN(dev, nic_flow_table))) ||
3157 	     ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
3158 	      MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) &&
3159 	    MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
3160 		err = init_root_ns(steering);
3161 		if (err)
3162 			goto err;
3163 	}
3164 
3165 	if (MLX5_ESWITCH_MANAGER(dev)) {
3166 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
3167 			err = init_fdb_root_ns(steering);
3168 			if (err)
3169 				goto err;
3170 		}
3171 	}
3172 
3173 	if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
3174 		err = init_sniffer_rx_root_ns(steering);
3175 		if (err)
3176 			goto err;
3177 	}
3178 
3179 	if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) {
3180 		err = init_sniffer_tx_root_ns(steering);
3181 		if (err)
3182 			goto err;
3183 	}
3184 
3185 	if (MLX5_CAP_FLOWTABLE_PORT_SELECTION(dev, ft_support)) {
3186 		err = init_port_sel_root_ns(steering);
3187 		if (err)
3188 			goto err;
3189 	}
3190 
3191 	if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
3192 	    MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) {
3193 		err = init_rdma_rx_root_ns(steering);
3194 		if (err)
3195 			goto err;
3196 	}
3197 
3198 	if (MLX5_CAP_FLOWTABLE_RDMA_TX(dev, ft_support)) {
3199 		err = init_rdma_tx_root_ns(steering);
3200 		if (err)
3201 			goto err;
3202 	}
3203 
3204 	if (MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
3205 		err = init_egress_root_ns(steering);
3206 		if (err)
3207 			goto err;
3208 	}
3209 
3210 	return 0;
3211 
3212 err:
3213 	mlx5_fs_core_cleanup(dev);
3214 	return err;
3215 }
3216 
3217 void mlx5_fs_core_free(struct mlx5_core_dev *dev)
3218 {
3219 	struct mlx5_flow_steering *steering = dev->priv.steering;
3220 
3221 	kmem_cache_destroy(steering->ftes_cache);
3222 	kmem_cache_destroy(steering->fgs_cache);
3223 	kfree(steering);
3224 	mlx5_ft_pool_destroy(dev);
3225 	mlx5_cleanup_fc_stats(dev);
3226 }
3227 
3228 int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
3229 {
3230 	struct mlx5_flow_steering *steering;
3231 	int err = 0;
3232 
3233 	err = mlx5_init_fc_stats(dev);
3234 	if (err)
3235 		return err;
3236 
3237 	err = mlx5_ft_pool_init(dev);
3238 	if (err)
3239 		goto err;
3240 
3241 	steering = kzalloc(sizeof(*steering), GFP_KERNEL);
3242 	if (!steering) {
3243 		err = -ENOMEM;
3244 		goto err;
3245 	}
3246 
3247 	steering->dev = dev;
3248 	dev->priv.steering = steering;
3249 
3250 	if (mlx5_fs_dr_is_supported(dev))
3251 		steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
3252 	else
3253 		steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
3254 
3255 	steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
3256 						sizeof(struct mlx5_flow_group), 0,
3257 						0, NULL);
3258 	steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
3259 						 0, NULL);
3260 	if (!steering->ftes_cache || !steering->fgs_cache) {
3261 		err = -ENOMEM;
3262 		goto err;
3263 	}
3264 
3265 	return 0;
3266 
3267 err:
3268 	mlx5_fs_core_free(dev);
3269 	return err;
3270 }
3271 
3272 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
3273 {
3274 	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
3275 	struct mlx5_ft_underlay_qp *new_uqp;
3276 	int err = 0;
3277 
3278 	new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL);
3279 	if (!new_uqp)
3280 		return -ENOMEM;
3281 
3282 	mutex_lock(&root->chain_lock);
3283 
3284 	if (!root->root_ft) {
3285 		err = -EINVAL;
3286 		goto update_ft_fail;
3287 	}
3288 
3289 	err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn,
3290 					 false);
3291 	if (err) {
3292 		mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
3293 			       underlay_qpn, err);
3294 		goto update_ft_fail;
3295 	}
3296 
3297 	new_uqp->qpn = underlay_qpn;
3298 	list_add_tail(&new_uqp->list, &root->underlay_qpns);
3299 
3300 	mutex_unlock(&root->chain_lock);
3301 
3302 	return 0;
3303 
3304 update_ft_fail:
3305 	mutex_unlock(&root->chain_lock);
3306 	kfree(new_uqp);
3307 	return err;
3308 }
3309 EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
3310 
3311 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
3312 {
3313 	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
3314 	struct mlx5_ft_underlay_qp *uqp;
3315 	bool found = false;
3316 	int err = 0;
3317 
3318 	mutex_lock(&root->chain_lock);
3319 	list_for_each_entry(uqp, &root->underlay_qpns, list) {
3320 		if (uqp->qpn == underlay_qpn) {
3321 			found = true;
3322 			break;
3323 		}
3324 	}
3325 
3326 	if (!found) {
3327 		mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n",
3328 			       underlay_qpn);
3329 		err = -EINVAL;
3330 		goto out;
3331 	}
3332 
3333 	err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn,
3334 					 true);
3335 	if (err)
3336 		mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
3337 			       underlay_qpn, err);
3338 
3339 	list_del(&uqp->list);
3340 	mutex_unlock(&root->chain_lock);
3341 	kfree(uqp);
3342 
3343 	return 0;
3344 
3345 out:
3346 	mutex_unlock(&root->chain_lock);
3347 	return err;
3348 }
3349 EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
3350 
3351 static struct mlx5_flow_root_namespace
3352 *get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type)
3353 {
3354 	struct mlx5_flow_namespace *ns;
3355 
3356 	if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS ||
3357 	    ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS)
3358 		ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0);
3359 	else
3360 		ns = mlx5_get_flow_namespace(dev, ns_type);
3361 	if (!ns)
3362 		return NULL;
3363 
3364 	return find_root(&ns->node);
3365 }
3366 
3367 struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
3368 						 u8 ns_type, u8 num_actions,
3369 						 void *modify_actions)
3370 {
3371 	struct mlx5_flow_root_namespace *root;
3372 	struct mlx5_modify_hdr *modify_hdr;
3373 	int err;
3374 
3375 	root = get_root_namespace(dev, ns_type);
3376 	if (!root)
3377 		return ERR_PTR(-EOPNOTSUPP);
3378 
3379 	modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL);
3380 	if (!modify_hdr)
3381 		return ERR_PTR(-ENOMEM);
3382 
3383 	modify_hdr->ns_type = ns_type;
3384 	err = root->cmds->modify_header_alloc(root, ns_type, num_actions,
3385 					      modify_actions, modify_hdr);
3386 	if (err) {
3387 		kfree(modify_hdr);
3388 		return ERR_PTR(err);
3389 	}
3390 
3391 	return modify_hdr;
3392 }
3393 EXPORT_SYMBOL(mlx5_modify_header_alloc);
3394 
3395 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
3396 				struct mlx5_modify_hdr *modify_hdr)
3397 {
3398 	struct mlx5_flow_root_namespace *root;
3399 
3400 	root = get_root_namespace(dev, modify_hdr->ns_type);
3401 	if (WARN_ON(!root))
3402 		return;
3403 	root->cmds->modify_header_dealloc(root, modify_hdr);
3404 	kfree(modify_hdr);
3405 }
3406 EXPORT_SYMBOL(mlx5_modify_header_dealloc);
3407 
3408 struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
3409 						     struct mlx5_pkt_reformat_params *params,
3410 						     enum mlx5_flow_namespace_type ns_type)
3411 {
3412 	struct mlx5_pkt_reformat *pkt_reformat;
3413 	struct mlx5_flow_root_namespace *root;
3414 	int err;
3415 
3416 	root = get_root_namespace(dev, ns_type);
3417 	if (!root)
3418 		return ERR_PTR(-EOPNOTSUPP);
3419 
3420 	pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL);
3421 	if (!pkt_reformat)
3422 		return ERR_PTR(-ENOMEM);
3423 
3424 	pkt_reformat->ns_type = ns_type;
3425 	pkt_reformat->reformat_type = params->type;
3426 	err = root->cmds->packet_reformat_alloc(root, params, ns_type,
3427 						pkt_reformat);
3428 	if (err) {
3429 		kfree(pkt_reformat);
3430 		return ERR_PTR(err);
3431 	}
3432 
3433 	return pkt_reformat;
3434 }
3435 EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
3436 
3437 void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
3438 				  struct mlx5_pkt_reformat *pkt_reformat)
3439 {
3440 	struct mlx5_flow_root_namespace *root;
3441 
3442 	root = get_root_namespace(dev, pkt_reformat->ns_type);
3443 	if (WARN_ON(!root))
3444 		return;
3445 	root->cmds->packet_reformat_dealloc(root, pkt_reformat);
3446 	kfree(pkt_reformat);
3447 }
3448 EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
3449 
3450 int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer)
3451 {
3452 	return definer->id;
3453 }
3454 
3455 struct mlx5_flow_definer *
3456 mlx5_create_match_definer(struct mlx5_core_dev *dev,
3457 			  enum mlx5_flow_namespace_type ns_type, u16 format_id,
3458 			  u32 *match_mask)
3459 {
3460 	struct mlx5_flow_root_namespace *root;
3461 	struct mlx5_flow_definer *definer;
3462 	int id;
3463 
3464 	root = get_root_namespace(dev, ns_type);
3465 	if (!root)
3466 		return ERR_PTR(-EOPNOTSUPP);
3467 
3468 	definer = kzalloc(sizeof(*definer), GFP_KERNEL);
3469 	if (!definer)
3470 		return ERR_PTR(-ENOMEM);
3471 
3472 	definer->ns_type = ns_type;
3473 	id = root->cmds->create_match_definer(root, format_id, match_mask);
3474 	if (id < 0) {
3475 		mlx5_core_warn(root->dev, "Failed to create match definer (%d)\n", id);
3476 		kfree(definer);
3477 		return ERR_PTR(id);
3478 	}
3479 	definer->id = id;
3480 	return definer;
3481 }
3482 
3483 void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
3484 				struct mlx5_flow_definer *definer)
3485 {
3486 	struct mlx5_flow_root_namespace *root;
3487 
3488 	root = get_root_namespace(dev, definer->ns_type);
3489 	if (WARN_ON(!root))
3490 		return;
3491 
3492 	root->cmds->destroy_match_definer(root, definer->id);
3493 	kfree(definer);
3494 }
3495 
3496 int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
3497 				 struct mlx5_flow_root_namespace *peer_ns)
3498 {
3499 	if (peer_ns && ns->mode != peer_ns->mode) {
3500 		mlx5_core_err(ns->dev,
3501 			      "Can't peer namespace of different steering mode\n");
3502 		return -EINVAL;
3503 	}
3504 
3505 	return ns->cmds->set_peer(ns, peer_ns);
3506 }
3507 
3508 /* This function should be called only at init stage of the namespace.
3509  * It is not safe to call this function while steering operations
3510  * are executed in the namespace.
3511  */
3512 int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
3513 				 enum mlx5_flow_steering_mode mode)
3514 {
3515 	struct mlx5_flow_root_namespace *root;
3516 	const struct mlx5_flow_cmds *cmds;
3517 	int err;
3518 
3519 	root = find_root(&ns->node);
3520 	if (&root->ns != ns)
3521 	/* Can't set cmds to non root namespace */
3522 		return -EINVAL;
3523 
3524 	if (root->table_type != FS_FT_FDB)
3525 		return -EOPNOTSUPP;
3526 
3527 	if (root->mode == mode)
3528 		return 0;
3529 
3530 	if (mode == MLX5_FLOW_STEERING_MODE_SMFS)
3531 		cmds = mlx5_fs_cmd_get_dr_cmds();
3532 	else
3533 		cmds = mlx5_fs_cmd_get_fw_cmds();
3534 	if (!cmds)
3535 		return -EOPNOTSUPP;
3536 
3537 	err = cmds->create_ns(root);
3538 	if (err) {
3539 		mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n",
3540 			      err);
3541 		return err;
3542 	}
3543 
3544 	root->cmds->destroy_ns(root);
3545 	root->cmds = cmds;
3546 	root->mode = mode;
3547 
3548 	return 0;
3549 }
3550