1 /*
2  * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/mutex.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/vport.h>
36 #include <linux/mlx5/eswitch.h>
37 
38 #include "mlx5_core.h"
39 #include "fs_core.h"
40 #include "fs_cmd.h"
41 #include "diag/fs_tracepoint.h"
42 #include "accel/ipsec.h"
43 #include "fpga/ipsec.h"
44 
45 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
46 					 sizeof(struct init_tree_node))
47 
48 #define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\
49 		 ...) {.type = FS_TYPE_PRIO,\
50 	.min_ft_level = min_level_val,\
51 	.num_levels = num_levels_val,\
52 	.num_leaf_prios = num_prios_val,\
53 	.caps = caps_val,\
54 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
55 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
56 }
57 
58 #define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\
59 	ADD_PRIO(num_prios_val, 0, num_levels_val, {},\
60 		 __VA_ARGS__)\
61 
62 #define ADD_NS(def_miss_act, ...) {.type = FS_TYPE_NAMESPACE,	\
63 	.def_miss_action = def_miss_act,\
64 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
65 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
66 }
67 
68 #define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
69 				   sizeof(long))
70 
71 #define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
72 
73 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
74 			       .caps = (long[]) {__VA_ARGS__} }
75 
76 #define FS_CHAINING_CAPS  FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \
77 					   FS_CAP(flow_table_properties_nic_receive.modify_root), \
78 					   FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
79 					   FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
80 
81 #define FS_CHAINING_CAPS_EGRESS                                                \
82 	FS_REQUIRED_CAPS(                                                      \
83 		FS_CAP(flow_table_properties_nic_transmit.flow_modify_en),     \
84 		FS_CAP(flow_table_properties_nic_transmit.modify_root),        \
85 		FS_CAP(flow_table_properties_nic_transmit                      \
86 			       .identified_miss_table_mode),                   \
87 		FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
88 
89 #define FS_CHAINING_CAPS_RDMA_TX                                                \
90 	FS_REQUIRED_CAPS(                                                       \
91 		FS_CAP(flow_table_properties_nic_transmit_rdma.flow_modify_en), \
92 		FS_CAP(flow_table_properties_nic_transmit_rdma.modify_root),    \
93 		FS_CAP(flow_table_properties_nic_transmit_rdma                  \
94 			       .identified_miss_table_mode),                    \
95 		FS_CAP(flow_table_properties_nic_transmit_rdma                  \
96 			       .flow_table_modify))
97 
98 #define LEFTOVERS_NUM_LEVELS 1
99 #define LEFTOVERS_NUM_PRIOS 1
100 
101 #define BY_PASS_PRIO_NUM_LEVELS 1
102 #define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
103 			   LEFTOVERS_NUM_PRIOS)
104 
105 #define ETHTOOL_PRIO_NUM_LEVELS 1
106 #define ETHTOOL_NUM_PRIOS 11
107 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
108 /* Vlan, mac, ttc, inner ttc, {aRFS/accel and esp/esp_err} */
109 #define KERNEL_NIC_PRIO_NUM_LEVELS 6
110 #define KERNEL_NIC_NUM_PRIOS 1
111 /* One more level for tc */
112 #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
113 
114 #define KERNEL_NIC_TC_NUM_PRIOS  1
115 #define KERNEL_NIC_TC_NUM_LEVELS 2
116 
117 #define ANCHOR_NUM_LEVELS 1
118 #define ANCHOR_NUM_PRIOS 1
119 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
120 
121 #define OFFLOADS_MAX_FT 2
122 #define OFFLOADS_NUM_PRIOS 2
123 #define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS)
124 
125 #define LAG_PRIO_NUM_LEVELS 1
126 #define LAG_NUM_PRIOS 1
127 #define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
128 
129 struct node_caps {
130 	size_t	arr_sz;
131 	long	*caps;
132 };
133 
134 static struct init_tree_node {
135 	enum fs_node_type	type;
136 	struct init_tree_node *children;
137 	int ar_size;
138 	struct node_caps caps;
139 	int min_ft_level;
140 	int num_leaf_prios;
141 	int prio;
142 	int num_levels;
143 	enum mlx5_flow_table_miss_action def_miss_action;
144 } root_fs = {
145 	.type = FS_TYPE_NAMESPACE,
146 	.ar_size = 7,
147 	  .children = (struct init_tree_node[]){
148 		  ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
149 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
150 				  ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
151 						    BY_PASS_PRIO_NUM_LEVELS))),
152 		  ADD_PRIO(0, LAG_MIN_LEVEL, 0, FS_CHAINING_CAPS,
153 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
154 				  ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
155 						    LAG_PRIO_NUM_LEVELS))),
156 		  ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
157 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
158 				  ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS,
159 						    OFFLOADS_MAX_FT))),
160 		  ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, FS_CHAINING_CAPS,
161 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
162 				  ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
163 						    ETHTOOL_PRIO_NUM_LEVELS))),
164 		  ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
165 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
166 				  ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS,
167 						    KERNEL_NIC_TC_NUM_LEVELS),
168 				  ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
169 						    KERNEL_NIC_PRIO_NUM_LEVELS))),
170 		  ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
171 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
172 				  ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS,
173 						    LEFTOVERS_NUM_LEVELS))),
174 		  ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
175 			   ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
176 				  ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS,
177 						    ANCHOR_NUM_LEVELS))),
178 	}
179 };
180 
181 static struct init_tree_node egress_root_fs = {
182 	.type = FS_TYPE_NAMESPACE,
183 	.ar_size = 1,
184 	.children = (struct init_tree_node[]) {
185 		ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
186 			 FS_CHAINING_CAPS_EGRESS,
187 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
188 				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
189 						  BY_PASS_PRIO_NUM_LEVELS))),
190 	}
191 };
192 
193 #define RDMA_RX_BYPASS_PRIO 0
194 #define RDMA_RX_KERNEL_PRIO 1
195 static struct init_tree_node rdma_rx_root_fs = {
196 	.type = FS_TYPE_NAMESPACE,
197 	.ar_size = 2,
198 	.children = (struct init_tree_node[]) {
199 		[RDMA_RX_BYPASS_PRIO] =
200 		ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS, 0,
201 			 FS_CHAINING_CAPS,
202 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
203 				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS,
204 						  BY_PASS_PRIO_NUM_LEVELS))),
205 		[RDMA_RX_KERNEL_PRIO] =
206 		ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS + 1, 0,
207 			 FS_CHAINING_CAPS,
208 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
209 				ADD_MULTIPLE_PRIO(1, 1))),
210 	}
211 };
212 
213 static struct init_tree_node rdma_tx_root_fs = {
214 	.type = FS_TYPE_NAMESPACE,
215 	.ar_size = 1,
216 	.children = (struct init_tree_node[]) {
217 		ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
218 			 FS_CHAINING_CAPS_RDMA_TX,
219 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
220 				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
221 						  BY_PASS_PRIO_NUM_LEVELS))),
222 	}
223 };
224 
225 enum fs_i_lock_class {
226 	FS_LOCK_GRANDPARENT,
227 	FS_LOCK_PARENT,
228 	FS_LOCK_CHILD
229 };
230 
231 static const struct rhashtable_params rhash_fte = {
232 	.key_len = sizeof_field(struct fs_fte, val),
233 	.key_offset = offsetof(struct fs_fte, val),
234 	.head_offset = offsetof(struct fs_fte, hash),
235 	.automatic_shrinking = true,
236 	.min_size = 1,
237 };
238 
239 static const struct rhashtable_params rhash_fg = {
240 	.key_len = sizeof_field(struct mlx5_flow_group, mask),
241 	.key_offset = offsetof(struct mlx5_flow_group, mask),
242 	.head_offset = offsetof(struct mlx5_flow_group, hash),
243 	.automatic_shrinking = true,
244 	.min_size = 1,
245 
246 };
247 
248 static void del_hw_flow_table(struct fs_node *node);
249 static void del_hw_flow_group(struct fs_node *node);
250 static void del_hw_fte(struct fs_node *node);
251 static void del_sw_flow_table(struct fs_node *node);
252 static void del_sw_flow_group(struct fs_node *node);
253 static void del_sw_fte(struct fs_node *node);
254 static void del_sw_prio(struct fs_node *node);
255 static void del_sw_ns(struct fs_node *node);
256 /* Delete rule (destination) is special case that
257  * requires to lock the FTE for all the deletion process.
258  */
259 static void del_sw_hw_rule(struct fs_node *node);
260 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
261 				struct mlx5_flow_destination *d2);
262 static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns);
263 static struct mlx5_flow_rule *
264 find_flow_rule(struct fs_fte *fte,
265 	       struct mlx5_flow_destination *dest);
266 
267 static void tree_init_node(struct fs_node *node,
268 			   void (*del_hw_func)(struct fs_node *),
269 			   void (*del_sw_func)(struct fs_node *))
270 {
271 	refcount_set(&node->refcount, 1);
272 	INIT_LIST_HEAD(&node->list);
273 	INIT_LIST_HEAD(&node->children);
274 	init_rwsem(&node->lock);
275 	node->del_hw_func = del_hw_func;
276 	node->del_sw_func = del_sw_func;
277 	node->active = false;
278 }
279 
280 static void tree_add_node(struct fs_node *node, struct fs_node *parent)
281 {
282 	if (parent)
283 		refcount_inc(&parent->refcount);
284 	node->parent = parent;
285 
286 	/* Parent is the root */
287 	if (!parent)
288 		node->root = node;
289 	else
290 		node->root = parent->root;
291 }
292 
293 static int tree_get_node(struct fs_node *node)
294 {
295 	return refcount_inc_not_zero(&node->refcount);
296 }
297 
298 static void nested_down_read_ref_node(struct fs_node *node,
299 				      enum fs_i_lock_class class)
300 {
301 	if (node) {
302 		down_read_nested(&node->lock, class);
303 		refcount_inc(&node->refcount);
304 	}
305 }
306 
307 static void nested_down_write_ref_node(struct fs_node *node,
308 				       enum fs_i_lock_class class)
309 {
310 	if (node) {
311 		down_write_nested(&node->lock, class);
312 		refcount_inc(&node->refcount);
313 	}
314 }
315 
316 static void down_write_ref_node(struct fs_node *node, bool locked)
317 {
318 	if (node) {
319 		if (!locked)
320 			down_write(&node->lock);
321 		refcount_inc(&node->refcount);
322 	}
323 }
324 
325 static void up_read_ref_node(struct fs_node *node)
326 {
327 	refcount_dec(&node->refcount);
328 	up_read(&node->lock);
329 }
330 
331 static void up_write_ref_node(struct fs_node *node, bool locked)
332 {
333 	refcount_dec(&node->refcount);
334 	if (!locked)
335 		up_write(&node->lock);
336 }
337 
338 static void tree_put_node(struct fs_node *node, bool locked)
339 {
340 	struct fs_node *parent_node = node->parent;
341 
342 	if (refcount_dec_and_test(&node->refcount)) {
343 		if (node->del_hw_func)
344 			node->del_hw_func(node);
345 		if (parent_node) {
346 			down_write_ref_node(parent_node, locked);
347 			list_del_init(&node->list);
348 		}
349 		node->del_sw_func(node);
350 		if (parent_node)
351 			up_write_ref_node(parent_node, locked);
352 		node = NULL;
353 	}
354 	if (!node && parent_node)
355 		tree_put_node(parent_node, locked);
356 }
357 
358 static int tree_remove_node(struct fs_node *node, bool locked)
359 {
360 	if (refcount_read(&node->refcount) > 1) {
361 		refcount_dec(&node->refcount);
362 		return -EEXIST;
363 	}
364 	tree_put_node(node, locked);
365 	return 0;
366 }
367 
368 static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
369 				 unsigned int prio)
370 {
371 	struct fs_prio *iter_prio;
372 
373 	fs_for_each_prio(iter_prio, ns) {
374 		if (iter_prio->prio == prio)
375 			return iter_prio;
376 	}
377 
378 	return NULL;
379 }
380 
381 static bool is_fwd_next_action(u32 action)
382 {
383 	return action & (MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
384 			 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
385 }
386 
387 static bool check_valid_spec(const struct mlx5_flow_spec *spec)
388 {
389 	int i;
390 
391 	for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
392 		if (spec->match_value[i] & ~spec->match_criteria[i]) {
393 			pr_warn("mlx5_core: match_value differs from match_criteria\n");
394 			return false;
395 		}
396 
397 	return true;
398 }
399 
400 static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
401 {
402 	struct fs_node *root;
403 	struct mlx5_flow_namespace *ns;
404 
405 	root = node->root;
406 
407 	if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) {
408 		pr_warn("mlx5: flow steering node is not in tree or garbaged\n");
409 		return NULL;
410 	}
411 
412 	ns = container_of(root, struct mlx5_flow_namespace, node);
413 	return container_of(ns, struct mlx5_flow_root_namespace, ns);
414 }
415 
416 static inline struct mlx5_flow_steering *get_steering(struct fs_node *node)
417 {
418 	struct mlx5_flow_root_namespace *root = find_root(node);
419 
420 	if (root)
421 		return root->dev->priv.steering;
422 	return NULL;
423 }
424 
425 static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
426 {
427 	struct mlx5_flow_root_namespace *root = find_root(node);
428 
429 	if (root)
430 		return root->dev;
431 	return NULL;
432 }
433 
434 static void del_sw_ns(struct fs_node *node)
435 {
436 	kfree(node);
437 }
438 
439 static void del_sw_prio(struct fs_node *node)
440 {
441 	kfree(node);
442 }
443 
444 static void del_hw_flow_table(struct fs_node *node)
445 {
446 	struct mlx5_flow_root_namespace *root;
447 	struct mlx5_flow_table *ft;
448 	struct mlx5_core_dev *dev;
449 	int err;
450 
451 	fs_get_obj(ft, node);
452 	dev = get_dev(&ft->node);
453 	root = find_root(&ft->node);
454 	trace_mlx5_fs_del_ft(ft);
455 
456 	if (node->active) {
457 		err = root->cmds->destroy_flow_table(root, ft);
458 		if (err)
459 			mlx5_core_warn(dev, "flow steering can't destroy ft\n");
460 	}
461 }
462 
463 static void del_sw_flow_table(struct fs_node *node)
464 {
465 	struct mlx5_flow_table *ft;
466 	struct fs_prio *prio;
467 
468 	fs_get_obj(ft, node);
469 
470 	rhltable_destroy(&ft->fgs_hash);
471 	if (ft->node.parent) {
472 		fs_get_obj(prio, ft->node.parent);
473 		prio->num_ft--;
474 	}
475 	kfree(ft);
476 }
477 
478 static void modify_fte(struct fs_fte *fte)
479 {
480 	struct mlx5_flow_root_namespace *root;
481 	struct mlx5_flow_table *ft;
482 	struct mlx5_flow_group *fg;
483 	struct mlx5_core_dev *dev;
484 	int err;
485 
486 	fs_get_obj(fg, fte->node.parent);
487 	fs_get_obj(ft, fg->node.parent);
488 	dev = get_dev(&fte->node);
489 
490 	root = find_root(&ft->node);
491 	err = root->cmds->update_fte(root, ft, fg, fte->modify_mask, fte);
492 	if (err)
493 		mlx5_core_warn(dev,
494 			       "%s can't del rule fg id=%d fte_index=%d\n",
495 			       __func__, fg->id, fte->index);
496 	fte->modify_mask = 0;
497 }
498 
499 static void del_sw_hw_rule(struct fs_node *node)
500 {
501 	struct mlx5_flow_rule *rule;
502 	struct fs_fte *fte;
503 
504 	fs_get_obj(rule, node);
505 	fs_get_obj(fte, rule->node.parent);
506 	trace_mlx5_fs_del_rule(rule);
507 	if (is_fwd_next_action(rule->sw_action)) {
508 		mutex_lock(&rule->dest_attr.ft->lock);
509 		list_del(&rule->next_ft);
510 		mutex_unlock(&rule->dest_attr.ft->lock);
511 	}
512 
513 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
514 	    --fte->dests_size) {
515 		fte->modify_mask |=
516 			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
517 			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
518 		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
519 		goto out;
520 	}
521 
522 	if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
523 	    --fte->dests_size) {
524 		fte->modify_mask |=
525 			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
526 	}
527 out:
528 	kfree(rule);
529 }
530 
531 static void del_hw_fte(struct fs_node *node)
532 {
533 	struct mlx5_flow_root_namespace *root;
534 	struct mlx5_flow_table *ft;
535 	struct mlx5_flow_group *fg;
536 	struct mlx5_core_dev *dev;
537 	struct fs_fte *fte;
538 	int err;
539 
540 	fs_get_obj(fte, node);
541 	fs_get_obj(fg, fte->node.parent);
542 	fs_get_obj(ft, fg->node.parent);
543 
544 	trace_mlx5_fs_del_fte(fte);
545 	dev = get_dev(&ft->node);
546 	root = find_root(&ft->node);
547 	if (node->active) {
548 		err = root->cmds->delete_fte(root, ft, fte);
549 		if (err)
550 			mlx5_core_warn(dev,
551 				       "flow steering can't delete fte in index %d of flow group id %d\n",
552 				       fte->index, fg->id);
553 		node->active = 0;
554 	}
555 }
556 
557 static void del_sw_fte(struct fs_node *node)
558 {
559 	struct mlx5_flow_steering *steering = get_steering(node);
560 	struct mlx5_flow_group *fg;
561 	struct fs_fte *fte;
562 	int err;
563 
564 	fs_get_obj(fte, node);
565 	fs_get_obj(fg, fte->node.parent);
566 
567 	err = rhashtable_remove_fast(&fg->ftes_hash,
568 				     &fte->hash,
569 				     rhash_fte);
570 	WARN_ON(err);
571 	ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
572 	kmem_cache_free(steering->ftes_cache, fte);
573 }
574 
575 static void del_hw_flow_group(struct fs_node *node)
576 {
577 	struct mlx5_flow_root_namespace *root;
578 	struct mlx5_flow_group *fg;
579 	struct mlx5_flow_table *ft;
580 	struct mlx5_core_dev *dev;
581 
582 	fs_get_obj(fg, node);
583 	fs_get_obj(ft, fg->node.parent);
584 	dev = get_dev(&ft->node);
585 	trace_mlx5_fs_del_fg(fg);
586 
587 	root = find_root(&ft->node);
588 	if (fg->node.active && root->cmds->destroy_flow_group(root, ft, fg))
589 		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
590 			       fg->id, ft->id);
591 }
592 
593 static void del_sw_flow_group(struct fs_node *node)
594 {
595 	struct mlx5_flow_steering *steering = get_steering(node);
596 	struct mlx5_flow_group *fg;
597 	struct mlx5_flow_table *ft;
598 	int err;
599 
600 	fs_get_obj(fg, node);
601 	fs_get_obj(ft, fg->node.parent);
602 
603 	rhashtable_destroy(&fg->ftes_hash);
604 	ida_destroy(&fg->fte_allocator);
605 	if (ft->autogroup.active &&
606 	    fg->max_ftes == ft->autogroup.group_size &&
607 	    fg->start_index < ft->autogroup.max_fte)
608 		ft->autogroup.num_groups--;
609 	err = rhltable_remove(&ft->fgs_hash,
610 			      &fg->hash,
611 			      rhash_fg);
612 	WARN_ON(err);
613 	kmem_cache_free(steering->fgs_cache, fg);
614 }
615 
616 static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
617 {
618 	int index;
619 	int ret;
620 
621 	index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL);
622 	if (index < 0)
623 		return index;
624 
625 	fte->index = index + fg->start_index;
626 	ret = rhashtable_insert_fast(&fg->ftes_hash,
627 				     &fte->hash,
628 				     rhash_fte);
629 	if (ret)
630 		goto err_ida_remove;
631 
632 	tree_add_node(&fte->node, &fg->node);
633 	list_add_tail(&fte->node.list, &fg->node.children);
634 	return 0;
635 
636 err_ida_remove:
637 	ida_simple_remove(&fg->fte_allocator, index);
638 	return ret;
639 }
640 
641 static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
642 				const struct mlx5_flow_spec *spec,
643 				struct mlx5_flow_act *flow_act)
644 {
645 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
646 	struct fs_fte *fte;
647 
648 	fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL);
649 	if (!fte)
650 		return ERR_PTR(-ENOMEM);
651 
652 	memcpy(fte->val, &spec->match_value, sizeof(fte->val));
653 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
654 	fte->action = *flow_act;
655 	fte->flow_context = spec->flow_context;
656 
657 	tree_init_node(&fte->node, NULL, del_sw_fte);
658 
659 	return fte;
660 }
661 
662 static void dealloc_flow_group(struct mlx5_flow_steering *steering,
663 			       struct mlx5_flow_group *fg)
664 {
665 	rhashtable_destroy(&fg->ftes_hash);
666 	kmem_cache_free(steering->fgs_cache, fg);
667 }
668 
669 static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
670 						u8 match_criteria_enable,
671 						const void *match_criteria,
672 						int start_index,
673 						int end_index)
674 {
675 	struct mlx5_flow_group *fg;
676 	int ret;
677 
678 	fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL);
679 	if (!fg)
680 		return ERR_PTR(-ENOMEM);
681 
682 	ret = rhashtable_init(&fg->ftes_hash, &rhash_fte);
683 	if (ret) {
684 		kmem_cache_free(steering->fgs_cache, fg);
685 		return ERR_PTR(ret);
686 	}
687 
688 	ida_init(&fg->fte_allocator);
689 	fg->mask.match_criteria_enable = match_criteria_enable;
690 	memcpy(&fg->mask.match_criteria, match_criteria,
691 	       sizeof(fg->mask.match_criteria));
692 	fg->node.type =  FS_TYPE_FLOW_GROUP;
693 	fg->start_index = start_index;
694 	fg->max_ftes = end_index - start_index + 1;
695 
696 	return fg;
697 }
698 
699 static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
700 						       u8 match_criteria_enable,
701 						       const void *match_criteria,
702 						       int start_index,
703 						       int end_index,
704 						       struct list_head *prev)
705 {
706 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
707 	struct mlx5_flow_group *fg;
708 	int ret;
709 
710 	fg = alloc_flow_group(steering, match_criteria_enable, match_criteria,
711 			      start_index, end_index);
712 	if (IS_ERR(fg))
713 		return fg;
714 
715 	/* initialize refcnt, add to parent list */
716 	ret = rhltable_insert(&ft->fgs_hash,
717 			      &fg->hash,
718 			      rhash_fg);
719 	if (ret) {
720 		dealloc_flow_group(steering, fg);
721 		return ERR_PTR(ret);
722 	}
723 
724 	tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group);
725 	tree_add_node(&fg->node, &ft->node);
726 	/* Add node to group list */
727 	list_add(&fg->node.list, prev);
728 	atomic_inc(&ft->node.version);
729 
730 	return fg;
731 }
732 
733 static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
734 						enum fs_flow_table_type table_type,
735 						enum fs_flow_table_op_mod op_mod,
736 						u32 flags)
737 {
738 	struct mlx5_flow_table *ft;
739 	int ret;
740 
741 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
742 	if (!ft)
743 		return ERR_PTR(-ENOMEM);
744 
745 	ret = rhltable_init(&ft->fgs_hash, &rhash_fg);
746 	if (ret) {
747 		kfree(ft);
748 		return ERR_PTR(ret);
749 	}
750 
751 	ft->level = level;
752 	ft->node.type = FS_TYPE_FLOW_TABLE;
753 	ft->op_mod = op_mod;
754 	ft->type = table_type;
755 	ft->vport = vport;
756 	ft->max_fte = max_fte;
757 	ft->flags = flags;
758 	INIT_LIST_HEAD(&ft->fwd_rules);
759 	mutex_init(&ft->lock);
760 
761 	return ft;
762 }
763 
764 /* If reverse is false, then we search for the first flow table in the
765  * root sub-tree from start(closest from right), else we search for the
766  * last flow table in the root sub-tree till start(closest from left).
767  */
768 static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
769 							 struct list_head *start,
770 							 bool reverse)
771 {
772 #define list_advance_entry(pos, reverse)		\
773 	((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
774 
775 #define list_for_each_advance_continue(pos, head, reverse)	\
776 	for (pos = list_advance_entry(pos, reverse);		\
777 	     &pos->list != (head);				\
778 	     pos = list_advance_entry(pos, reverse))
779 
780 	struct fs_node *iter = list_entry(start, struct fs_node, list);
781 	struct mlx5_flow_table *ft = NULL;
782 
783 	if (!root || root->type == FS_TYPE_PRIO_CHAINS)
784 		return NULL;
785 
786 	list_for_each_advance_continue(iter, &root->children, reverse) {
787 		if (iter->type == FS_TYPE_FLOW_TABLE) {
788 			fs_get_obj(ft, iter);
789 			return ft;
790 		}
791 		ft = find_closest_ft_recursive(iter, &iter->children, reverse);
792 		if (ft)
793 			return ft;
794 	}
795 
796 	return ft;
797 }
798 
799 /* If reverse is false then return the first flow table in next priority of
800  * prio in the tree, else return the last flow table in the previous priority
801  * of prio in the tree.
802  */
803 static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
804 {
805 	struct mlx5_flow_table *ft = NULL;
806 	struct fs_node *curr_node;
807 	struct fs_node *parent;
808 
809 	parent = prio->node.parent;
810 	curr_node = &prio->node;
811 	while (!ft && parent) {
812 		ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
813 		curr_node = parent;
814 		parent = curr_node->parent;
815 	}
816 	return ft;
817 }
818 
819 /* Assuming all the tree is locked by mutex chain lock */
820 static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
821 {
822 	return find_closest_ft(prio, false);
823 }
824 
825 /* Assuming all the tree is locked by mutex chain lock */
826 static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
827 {
828 	return find_closest_ft(prio, true);
829 }
830 
831 static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
832 						struct mlx5_flow_act *flow_act)
833 {
834 	struct fs_prio *prio;
835 	bool next_ns;
836 
837 	next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
838 	fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent);
839 
840 	return find_next_chained_ft(prio);
841 }
842 
843 static int connect_fts_in_prio(struct mlx5_core_dev *dev,
844 			       struct fs_prio *prio,
845 			       struct mlx5_flow_table *ft)
846 {
847 	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
848 	struct mlx5_flow_table *iter;
849 	int err;
850 
851 	fs_for_each_ft(iter, prio) {
852 		err = root->cmds->modify_flow_table(root, iter, ft);
853 		if (err) {
854 			mlx5_core_err(dev,
855 				      "Failed to modify flow table id %d, type %d, err %d\n",
856 				      iter->id, iter->type, err);
857 			/* The driver is out of sync with the FW */
858 			return err;
859 		}
860 	}
861 	return 0;
862 }
863 
864 /* Connect flow tables from previous priority of prio to ft */
865 static int connect_prev_fts(struct mlx5_core_dev *dev,
866 			    struct mlx5_flow_table *ft,
867 			    struct fs_prio *prio)
868 {
869 	struct mlx5_flow_table *prev_ft;
870 
871 	prev_ft = find_prev_chained_ft(prio);
872 	if (prev_ft) {
873 		struct fs_prio *prev_prio;
874 
875 		fs_get_obj(prev_prio, prev_ft->node.parent);
876 		return connect_fts_in_prio(dev, prev_prio, ft);
877 	}
878 	return 0;
879 }
880 
881 static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
882 				 *prio)
883 {
884 	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
885 	struct mlx5_ft_underlay_qp *uqp;
886 	int min_level = INT_MAX;
887 	int err = 0;
888 	u32 qpn;
889 
890 	if (root->root_ft)
891 		min_level = root->root_ft->level;
892 
893 	if (ft->level >= min_level)
894 		return 0;
895 
896 	if (list_empty(&root->underlay_qpns)) {
897 		/* Don't set any QPN (zero) in case QPN list is empty */
898 		qpn = 0;
899 		err = root->cmds->update_root_ft(root, ft, qpn, false);
900 	} else {
901 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
902 			qpn = uqp->qpn;
903 			err = root->cmds->update_root_ft(root, ft,
904 							 qpn, false);
905 			if (err)
906 				break;
907 		}
908 	}
909 
910 	if (err)
911 		mlx5_core_warn(root->dev,
912 			       "Update root flow table of id(%u) qpn(%d) failed\n",
913 			       ft->id, qpn);
914 	else
915 		root->root_ft = ft;
916 
917 	return err;
918 }
919 
920 static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
921 					 struct mlx5_flow_destination *dest)
922 {
923 	struct mlx5_flow_root_namespace *root;
924 	struct mlx5_flow_table *ft;
925 	struct mlx5_flow_group *fg;
926 	struct fs_fte *fte;
927 	int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
928 	int err = 0;
929 
930 	fs_get_obj(fte, rule->node.parent);
931 	if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
932 		return -EINVAL;
933 	down_write_ref_node(&fte->node, false);
934 	fs_get_obj(fg, fte->node.parent);
935 	fs_get_obj(ft, fg->node.parent);
936 
937 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
938 	root = find_root(&ft->node);
939 	err = root->cmds->update_fte(root, ft, fg,
940 				     modify_mask, fte);
941 	up_write_ref_node(&fte->node, false);
942 
943 	return err;
944 }
945 
946 int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
947 				 struct mlx5_flow_destination *new_dest,
948 				 struct mlx5_flow_destination *old_dest)
949 {
950 	int i;
951 
952 	if (!old_dest) {
953 		if (handle->num_rules != 1)
954 			return -EINVAL;
955 		return _mlx5_modify_rule_destination(handle->rule[0],
956 						     new_dest);
957 	}
958 
959 	for (i = 0; i < handle->num_rules; i++) {
960 		if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
961 			return _mlx5_modify_rule_destination(handle->rule[i],
962 							     new_dest);
963 	}
964 
965 	return -EINVAL;
966 }
967 
968 /* Modify/set FWD rules that point on old_next_ft to point on new_next_ft  */
969 static int connect_fwd_rules(struct mlx5_core_dev *dev,
970 			     struct mlx5_flow_table *new_next_ft,
971 			     struct mlx5_flow_table *old_next_ft)
972 {
973 	struct mlx5_flow_destination dest = {};
974 	struct mlx5_flow_rule *iter;
975 	int err = 0;
976 
977 	/* new_next_ft and old_next_ft could be NULL only
978 	 * when we create/destroy the anchor flow table.
979 	 */
980 	if (!new_next_ft || !old_next_ft)
981 		return 0;
982 
983 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
984 	dest.ft = new_next_ft;
985 
986 	mutex_lock(&old_next_ft->lock);
987 	list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
988 	mutex_unlock(&old_next_ft->lock);
989 	list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
990 		if ((iter->sw_action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS) &&
991 		    iter->ft->ns == new_next_ft->ns)
992 			continue;
993 
994 		err = _mlx5_modify_rule_destination(iter, &dest);
995 		if (err)
996 			pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
997 			       new_next_ft->id);
998 	}
999 	return 0;
1000 }
1001 
1002 static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
1003 			      struct fs_prio *prio)
1004 {
1005 	struct mlx5_flow_table *next_ft;
1006 	int err = 0;
1007 
1008 	/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
1009 
1010 	if (list_empty(&prio->node.children)) {
1011 		err = connect_prev_fts(dev, ft, prio);
1012 		if (err)
1013 			return err;
1014 
1015 		next_ft = find_next_chained_ft(prio);
1016 		err = connect_fwd_rules(dev, ft, next_ft);
1017 		if (err)
1018 			return err;
1019 	}
1020 
1021 	if (MLX5_CAP_FLOWTABLE(dev,
1022 			       flow_table_properties_nic_receive.modify_root))
1023 		err = update_root_ft_create(ft, prio);
1024 	return err;
1025 }
1026 
1027 static void list_add_flow_table(struct mlx5_flow_table *ft,
1028 				struct fs_prio *prio)
1029 {
1030 	struct list_head *prev = &prio->node.children;
1031 	struct mlx5_flow_table *iter;
1032 
1033 	fs_for_each_ft(iter, prio) {
1034 		if (iter->level > ft->level)
1035 			break;
1036 		prev = &iter->node.list;
1037 	}
1038 	list_add(&ft->node.list, prev);
1039 }
1040 
1041 static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
1042 							struct mlx5_flow_table_attr *ft_attr,
1043 							enum fs_flow_table_op_mod op_mod,
1044 							u16 vport)
1045 {
1046 	struct mlx5_flow_root_namespace *root = find_root(&ns->node);
1047 	bool unmanaged = ft_attr->flags & MLX5_FLOW_TABLE_UNMANAGED;
1048 	struct mlx5_flow_table *next_ft;
1049 	struct fs_prio *fs_prio = NULL;
1050 	struct mlx5_flow_table *ft;
1051 	int log_table_sz;
1052 	int err;
1053 
1054 	if (!root) {
1055 		pr_err("mlx5: flow steering failed to find root of namespace\n");
1056 		return ERR_PTR(-ENODEV);
1057 	}
1058 
1059 	mutex_lock(&root->chain_lock);
1060 	fs_prio = find_prio(ns, ft_attr->prio);
1061 	if (!fs_prio) {
1062 		err = -EINVAL;
1063 		goto unlock_root;
1064 	}
1065 	if (!unmanaged) {
1066 		/* The level is related to the
1067 		 * priority level range.
1068 		 */
1069 		if (ft_attr->level >= fs_prio->num_levels) {
1070 			err = -ENOSPC;
1071 			goto unlock_root;
1072 		}
1073 
1074 		ft_attr->level += fs_prio->start_level;
1075 	}
1076 
1077 	/* The level is related to the
1078 	 * priority level range.
1079 	 */
1080 	ft = alloc_flow_table(ft_attr->level,
1081 			      vport,
1082 			      ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
1083 			      root->table_type,
1084 			      op_mod, ft_attr->flags);
1085 	if (IS_ERR(ft)) {
1086 		err = PTR_ERR(ft);
1087 		goto unlock_root;
1088 	}
1089 
1090 	tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
1091 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
1092 	next_ft = unmanaged ? ft_attr->next_ft :
1093 			      find_next_chained_ft(fs_prio);
1094 	ft->def_miss_action = ns->def_miss_action;
1095 	ft->ns = ns;
1096 	err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft);
1097 	if (err)
1098 		goto free_ft;
1099 
1100 	if (!unmanaged) {
1101 		err = connect_flow_table(root->dev, ft, fs_prio);
1102 		if (err)
1103 			goto destroy_ft;
1104 	}
1105 
1106 	ft->node.active = true;
1107 	down_write_ref_node(&fs_prio->node, false);
1108 	if (!unmanaged) {
1109 		tree_add_node(&ft->node, &fs_prio->node);
1110 		list_add_flow_table(ft, fs_prio);
1111 	} else {
1112 		ft->node.root = fs_prio->node.root;
1113 	}
1114 	fs_prio->num_ft++;
1115 	up_write_ref_node(&fs_prio->node, false);
1116 	mutex_unlock(&root->chain_lock);
1117 	trace_mlx5_fs_add_ft(ft);
1118 	return ft;
1119 destroy_ft:
1120 	root->cmds->destroy_flow_table(root, ft);
1121 free_ft:
1122 	kfree(ft);
1123 unlock_root:
1124 	mutex_unlock(&root->chain_lock);
1125 	return ERR_PTR(err);
1126 }
1127 
1128 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
1129 					       struct mlx5_flow_table_attr *ft_attr)
1130 {
1131 	return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0);
1132 }
1133 
1134 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
1135 						     int prio, int max_fte,
1136 						     u32 level, u16 vport)
1137 {
1138 	struct mlx5_flow_table_attr ft_attr = {};
1139 
1140 	ft_attr.max_fte = max_fte;
1141 	ft_attr.level   = level;
1142 	ft_attr.prio    = prio;
1143 
1144 	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
1145 }
1146 
1147 struct mlx5_flow_table*
1148 mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
1149 				 int prio, u32 level)
1150 {
1151 	struct mlx5_flow_table_attr ft_attr = {};
1152 
1153 	ft_attr.level = level;
1154 	ft_attr.prio  = prio;
1155 	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
1156 }
1157 EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
1158 
1159 struct mlx5_flow_table*
1160 mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
1161 				    struct mlx5_flow_table_attr *ft_attr)
1162 {
1163 	int num_reserved_entries = ft_attr->autogroup.num_reserved_entries;
1164 	int autogroups_max_fte = ft_attr->max_fte - num_reserved_entries;
1165 	int max_num_groups = ft_attr->autogroup.max_num_groups;
1166 	struct mlx5_flow_table *ft;
1167 
1168 	if (max_num_groups > autogroups_max_fte)
1169 		return ERR_PTR(-EINVAL);
1170 	if (num_reserved_entries > ft_attr->max_fte)
1171 		return ERR_PTR(-EINVAL);
1172 
1173 	ft = mlx5_create_flow_table(ns, ft_attr);
1174 	if (IS_ERR(ft))
1175 		return ft;
1176 
1177 	ft->autogroup.active = true;
1178 	ft->autogroup.required_groups = max_num_groups;
1179 	ft->autogroup.max_fte = autogroups_max_fte;
1180 	/* We save place for flow groups in addition to max types */
1181 	ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1);
1182 
1183 	return ft;
1184 }
1185 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
1186 
1187 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
1188 					       u32 *fg_in)
1189 {
1190 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1191 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
1192 					    fg_in, match_criteria);
1193 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
1194 					    fg_in,
1195 					    match_criteria_enable);
1196 	int start_index = MLX5_GET(create_flow_group_in, fg_in,
1197 				   start_flow_index);
1198 	int end_index = MLX5_GET(create_flow_group_in, fg_in,
1199 				 end_flow_index);
1200 	struct mlx5_flow_group *fg;
1201 	int err;
1202 
1203 	if (ft->autogroup.active && start_index < ft->autogroup.max_fte)
1204 		return ERR_PTR(-EPERM);
1205 
1206 	down_write_ref_node(&ft->node, false);
1207 	fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria,
1208 				     start_index, end_index,
1209 				     ft->node.children.prev);
1210 	up_write_ref_node(&ft->node, false);
1211 	if (IS_ERR(fg))
1212 		return fg;
1213 
1214 	err = root->cmds->create_flow_group(root, ft, fg_in, fg);
1215 	if (err) {
1216 		tree_put_node(&fg->node, false);
1217 		return ERR_PTR(err);
1218 	}
1219 	trace_mlx5_fs_add_fg(fg);
1220 	fg->node.active = true;
1221 
1222 	return fg;
1223 }
1224 
1225 static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
1226 {
1227 	struct mlx5_flow_rule *rule;
1228 
1229 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
1230 	if (!rule)
1231 		return NULL;
1232 
1233 	INIT_LIST_HEAD(&rule->next_ft);
1234 	rule->node.type = FS_TYPE_FLOW_DEST;
1235 	if (dest)
1236 		memcpy(&rule->dest_attr, dest, sizeof(*dest));
1237 
1238 	return rule;
1239 }
1240 
1241 static struct mlx5_flow_handle *alloc_handle(int num_rules)
1242 {
1243 	struct mlx5_flow_handle *handle;
1244 
1245 	handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL);
1246 	if (!handle)
1247 		return NULL;
1248 
1249 	handle->num_rules = num_rules;
1250 
1251 	return handle;
1252 }
1253 
1254 static void destroy_flow_handle(struct fs_fte *fte,
1255 				struct mlx5_flow_handle *handle,
1256 				struct mlx5_flow_destination *dest,
1257 				int i)
1258 {
1259 	for (; --i >= 0;) {
1260 		if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) {
1261 			fte->dests_size--;
1262 			list_del(&handle->rule[i]->node.list);
1263 			kfree(handle->rule[i]);
1264 		}
1265 	}
1266 	kfree(handle);
1267 }
1268 
1269 static struct mlx5_flow_handle *
1270 create_flow_handle(struct fs_fte *fte,
1271 		   struct mlx5_flow_destination *dest,
1272 		   int dest_num,
1273 		   int *modify_mask,
1274 		   bool *new_rule)
1275 {
1276 	struct mlx5_flow_handle *handle;
1277 	struct mlx5_flow_rule *rule = NULL;
1278 	static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
1279 	static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
1280 	int type;
1281 	int i = 0;
1282 
1283 	handle = alloc_handle((dest_num) ? dest_num : 1);
1284 	if (!handle)
1285 		return ERR_PTR(-ENOMEM);
1286 
1287 	do {
1288 		if (dest) {
1289 			rule = find_flow_rule(fte, dest + i);
1290 			if (rule) {
1291 				refcount_inc(&rule->node.refcount);
1292 				goto rule_found;
1293 			}
1294 		}
1295 
1296 		*new_rule = true;
1297 		rule = alloc_rule(dest + i);
1298 		if (!rule)
1299 			goto free_rules;
1300 
1301 		/* Add dest to dests list- we need flow tables to be in the
1302 		 * end of the list for forward to next prio rules.
1303 		 */
1304 		tree_init_node(&rule->node, NULL, del_sw_hw_rule);
1305 		if (dest &&
1306 		    dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
1307 			list_add(&rule->node.list, &fte->node.children);
1308 		else
1309 			list_add_tail(&rule->node.list, &fte->node.children);
1310 		if (dest) {
1311 			fte->dests_size++;
1312 
1313 			type = dest[i].type ==
1314 				MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1315 			*modify_mask |= type ? count : dst;
1316 		}
1317 rule_found:
1318 		handle->rule[i] = rule;
1319 	} while (++i < dest_num);
1320 
1321 	return handle;
1322 
1323 free_rules:
1324 	destroy_flow_handle(fte, handle, dest, i);
1325 	return ERR_PTR(-ENOMEM);
1326 }
1327 
1328 /* fte should not be deleted while calling this function */
1329 static struct mlx5_flow_handle *
1330 add_rule_fte(struct fs_fte *fte,
1331 	     struct mlx5_flow_group *fg,
1332 	     struct mlx5_flow_destination *dest,
1333 	     int dest_num,
1334 	     bool update_action)
1335 {
1336 	struct mlx5_flow_root_namespace *root;
1337 	struct mlx5_flow_handle *handle;
1338 	struct mlx5_flow_table *ft;
1339 	int modify_mask = 0;
1340 	int err;
1341 	bool new_rule = false;
1342 
1343 	handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
1344 				    &new_rule);
1345 	if (IS_ERR(handle) || !new_rule)
1346 		goto out;
1347 
1348 	if (update_action)
1349 		modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
1350 
1351 	fs_get_obj(ft, fg->node.parent);
1352 	root = find_root(&fg->node);
1353 	if (!(fte->status & FS_FTE_STATUS_EXISTING))
1354 		err = root->cmds->create_fte(root, ft, fg, fte);
1355 	else
1356 		err = root->cmds->update_fte(root, ft, fg, modify_mask, fte);
1357 	if (err)
1358 		goto free_handle;
1359 
1360 	fte->node.active = true;
1361 	fte->status |= FS_FTE_STATUS_EXISTING;
1362 	atomic_inc(&fg->node.version);
1363 
1364 out:
1365 	return handle;
1366 
1367 free_handle:
1368 	destroy_flow_handle(fte, handle, dest, handle->num_rules);
1369 	return ERR_PTR(err);
1370 }
1371 
1372 static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table  *ft,
1373 						     const struct mlx5_flow_spec *spec)
1374 {
1375 	struct list_head *prev = &ft->node.children;
1376 	u32 max_fte = ft->autogroup.max_fte;
1377 	unsigned int candidate_index = 0;
1378 	unsigned int group_size = 0;
1379 	struct mlx5_flow_group *fg;
1380 
1381 	if (!ft->autogroup.active)
1382 		return ERR_PTR(-ENOENT);
1383 
1384 	if (ft->autogroup.num_groups < ft->autogroup.required_groups)
1385 		group_size = ft->autogroup.group_size;
1386 
1387 	/*  max_fte == ft->autogroup.max_types */
1388 	if (group_size == 0)
1389 		group_size = 1;
1390 
1391 	/* sorted by start_index */
1392 	fs_for_each_fg(fg, ft) {
1393 		if (candidate_index + group_size > fg->start_index)
1394 			candidate_index = fg->start_index + fg->max_ftes;
1395 		else
1396 			break;
1397 		prev = &fg->node.list;
1398 	}
1399 
1400 	if (candidate_index + group_size > max_fte)
1401 		return ERR_PTR(-ENOSPC);
1402 
1403 	fg = alloc_insert_flow_group(ft,
1404 				     spec->match_criteria_enable,
1405 				     spec->match_criteria,
1406 				     candidate_index,
1407 				     candidate_index + group_size - 1,
1408 				     prev);
1409 	if (IS_ERR(fg))
1410 		goto out;
1411 
1412 	if (group_size == ft->autogroup.group_size)
1413 		ft->autogroup.num_groups++;
1414 
1415 out:
1416 	return fg;
1417 }
1418 
1419 static int create_auto_flow_group(struct mlx5_flow_table *ft,
1420 				  struct mlx5_flow_group *fg)
1421 {
1422 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1423 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1424 	void *match_criteria_addr;
1425 	u8 src_esw_owner_mask_on;
1426 	void *misc;
1427 	int err;
1428 	u32 *in;
1429 
1430 	in = kvzalloc(inlen, GFP_KERNEL);
1431 	if (!in)
1432 		return -ENOMEM;
1433 
1434 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
1435 		 fg->mask.match_criteria_enable);
1436 	MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index);
1437 	MLX5_SET(create_flow_group_in, in, end_flow_index,   fg->start_index +
1438 		 fg->max_ftes - 1);
1439 
1440 	misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria,
1441 			    misc_parameters);
1442 	src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc,
1443 					 source_eswitch_owner_vhca_id);
1444 	MLX5_SET(create_flow_group_in, in,
1445 		 source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on);
1446 
1447 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
1448 					   in, match_criteria);
1449 	memcpy(match_criteria_addr, fg->mask.match_criteria,
1450 	       sizeof(fg->mask.match_criteria));
1451 
1452 	err = root->cmds->create_flow_group(root, ft, in, fg);
1453 	if (!err) {
1454 		fg->node.active = true;
1455 		trace_mlx5_fs_add_fg(fg);
1456 	}
1457 
1458 	kvfree(in);
1459 	return err;
1460 }
1461 
1462 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
1463 				struct mlx5_flow_destination *d2)
1464 {
1465 	if (d1->type == d2->type) {
1466 		if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
1467 		     d1->vport.num == d2->vport.num &&
1468 		     d1->vport.flags == d2->vport.flags &&
1469 		     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
1470 		      (d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
1471 		     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
1472 		      (d1->vport.pkt_reformat->id ==
1473 		       d2->vport.pkt_reformat->id) : true)) ||
1474 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1475 		     d1->ft == d2->ft) ||
1476 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1477 		     d1->tir_num == d2->tir_num) ||
1478 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
1479 		     d1->ft_num == d2->ft_num))
1480 			return true;
1481 	}
1482 
1483 	return false;
1484 }
1485 
1486 static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
1487 					     struct mlx5_flow_destination *dest)
1488 {
1489 	struct mlx5_flow_rule *rule;
1490 
1491 	list_for_each_entry(rule, &fte->node.children, node.list) {
1492 		if (mlx5_flow_dests_cmp(&rule->dest_attr, dest))
1493 			return rule;
1494 	}
1495 	return NULL;
1496 }
1497 
1498 static bool check_conflicting_actions(u32 action1, u32 action2)
1499 {
1500 	u32 xored_actions = action1 ^ action2;
1501 
1502 	/* if one rule only wants to count, it's ok */
1503 	if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
1504 	    action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT)
1505 		return false;
1506 
1507 	if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP  |
1508 			     MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
1509 			     MLX5_FLOW_CONTEXT_ACTION_DECAP |
1510 			     MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
1511 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
1512 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
1513 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 |
1514 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
1515 		return true;
1516 
1517 	return false;
1518 }
1519 
1520 static int check_conflicting_ftes(struct fs_fte *fte,
1521 				  const struct mlx5_flow_context *flow_context,
1522 				  const struct mlx5_flow_act *flow_act)
1523 {
1524 	if (check_conflicting_actions(flow_act->action, fte->action.action)) {
1525 		mlx5_core_warn(get_dev(&fte->node),
1526 			       "Found two FTEs with conflicting actions\n");
1527 		return -EEXIST;
1528 	}
1529 
1530 	if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
1531 	    fte->flow_context.flow_tag != flow_context->flow_tag) {
1532 		mlx5_core_warn(get_dev(&fte->node),
1533 			       "FTE flow tag %u already exists with different flow tag %u\n",
1534 			       fte->flow_context.flow_tag,
1535 			       flow_context->flow_tag);
1536 		return -EEXIST;
1537 	}
1538 
1539 	return 0;
1540 }
1541 
1542 static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
1543 					    const struct mlx5_flow_spec *spec,
1544 					    struct mlx5_flow_act *flow_act,
1545 					    struct mlx5_flow_destination *dest,
1546 					    int dest_num,
1547 					    struct fs_fte *fte)
1548 {
1549 	struct mlx5_flow_handle *handle;
1550 	int old_action;
1551 	int i;
1552 	int ret;
1553 
1554 	ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
1555 	if (ret)
1556 		return ERR_PTR(ret);
1557 
1558 	old_action = fte->action.action;
1559 	fte->action.action |= flow_act->action;
1560 	handle = add_rule_fte(fte, fg, dest, dest_num,
1561 			      old_action != flow_act->action);
1562 	if (IS_ERR(handle)) {
1563 		fte->action.action = old_action;
1564 		return handle;
1565 	}
1566 	trace_mlx5_fs_set_fte(fte, false);
1567 
1568 	for (i = 0; i < handle->num_rules; i++) {
1569 		if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
1570 			tree_add_node(&handle->rule[i]->node, &fte->node);
1571 			trace_mlx5_fs_add_rule(handle->rule[i]);
1572 		}
1573 	}
1574 	return handle;
1575 }
1576 
1577 static bool counter_is_valid(u32 action)
1578 {
1579 	return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
1580 			  MLX5_FLOW_CONTEXT_ACTION_ALLOW |
1581 			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
1582 }
1583 
1584 static bool dest_is_valid(struct mlx5_flow_destination *dest,
1585 			  struct mlx5_flow_act *flow_act,
1586 			  struct mlx5_flow_table *ft)
1587 {
1588 	bool ignore_level = flow_act->flags & FLOW_ACT_IGNORE_FLOW_LEVEL;
1589 	u32 action = flow_act->action;
1590 
1591 	if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
1592 		return counter_is_valid(action);
1593 
1594 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
1595 		return true;
1596 
1597 	if (ignore_level) {
1598 		if (ft->type != FS_FT_FDB)
1599 			return false;
1600 
1601 		if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1602 		    dest->ft->type != FS_FT_FDB)
1603 			return false;
1604 	}
1605 
1606 	if (!dest || ((dest->type ==
1607 	    MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) &&
1608 	    (dest->ft->level <= ft->level && !ignore_level)))
1609 		return false;
1610 	return true;
1611 }
1612 
1613 struct match_list {
1614 	struct list_head	list;
1615 	struct mlx5_flow_group *g;
1616 };
1617 
1618 static void free_match_list(struct match_list *head, bool ft_locked)
1619 {
1620 	struct match_list *iter, *match_tmp;
1621 
1622 	list_for_each_entry_safe(iter, match_tmp, &head->list,
1623 				 list) {
1624 		tree_put_node(&iter->g->node, ft_locked);
1625 		list_del(&iter->list);
1626 		kfree(iter);
1627 	}
1628 }
1629 
1630 static int build_match_list(struct match_list *match_head,
1631 			    struct mlx5_flow_table *ft,
1632 			    const struct mlx5_flow_spec *spec,
1633 			    bool ft_locked)
1634 {
1635 	struct rhlist_head *tmp, *list;
1636 	struct mlx5_flow_group *g;
1637 	int err = 0;
1638 
1639 	rcu_read_lock();
1640 	INIT_LIST_HEAD(&match_head->list);
1641 	/* Collect all fgs which has a matching match_criteria */
1642 	list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg);
1643 	/* RCU is atomic, we can't execute FW commands here */
1644 	rhl_for_each_entry_rcu(g, tmp, list, hash) {
1645 		struct match_list *curr_match;
1646 
1647 		if (unlikely(!tree_get_node(&g->node)))
1648 			continue;
1649 
1650 		curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
1651 		if (!curr_match) {
1652 			free_match_list(match_head, ft_locked);
1653 			err = -ENOMEM;
1654 			goto out;
1655 		}
1656 		curr_match->g = g;
1657 		list_add_tail(&curr_match->list, &match_head->list);
1658 	}
1659 out:
1660 	rcu_read_unlock();
1661 	return err;
1662 }
1663 
1664 static u64 matched_fgs_get_version(struct list_head *match_head)
1665 {
1666 	struct match_list *iter;
1667 	u64 version = 0;
1668 
1669 	list_for_each_entry(iter, match_head, list)
1670 		version += (u64)atomic_read(&iter->g->node.version);
1671 	return version;
1672 }
1673 
1674 static struct fs_fte *
1675 lookup_fte_locked(struct mlx5_flow_group *g,
1676 		  const u32 *match_value,
1677 		  bool take_write)
1678 {
1679 	struct fs_fte *fte_tmp;
1680 
1681 	if (take_write)
1682 		nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
1683 	else
1684 		nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
1685 	fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
1686 					 rhash_fte);
1687 	if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
1688 		fte_tmp = NULL;
1689 		goto out;
1690 	}
1691 	if (!fte_tmp->node.active) {
1692 		tree_put_node(&fte_tmp->node, false);
1693 		fte_tmp = NULL;
1694 		goto out;
1695 	}
1696 
1697 	nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
1698 out:
1699 	if (take_write)
1700 		up_write_ref_node(&g->node, false);
1701 	else
1702 		up_read_ref_node(&g->node);
1703 	return fte_tmp;
1704 }
1705 
1706 static struct mlx5_flow_handle *
1707 try_add_to_existing_fg(struct mlx5_flow_table *ft,
1708 		       struct list_head *match_head,
1709 		       const struct mlx5_flow_spec *spec,
1710 		       struct mlx5_flow_act *flow_act,
1711 		       struct mlx5_flow_destination *dest,
1712 		       int dest_num,
1713 		       int ft_version)
1714 {
1715 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
1716 	struct mlx5_flow_group *g;
1717 	struct mlx5_flow_handle *rule;
1718 	struct match_list *iter;
1719 	bool take_write = false;
1720 	struct fs_fte *fte;
1721 	u64  version = 0;
1722 	int err;
1723 
1724 	fte = alloc_fte(ft, spec, flow_act);
1725 	if (IS_ERR(fte))
1726 		return  ERR_PTR(-ENOMEM);
1727 
1728 search_again_locked:
1729 	if (flow_act->flags & FLOW_ACT_NO_APPEND)
1730 		goto skip_search;
1731 	version = matched_fgs_get_version(match_head);
1732 	/* Try to find an fte with identical match value and attempt update its
1733 	 * action.
1734 	 */
1735 	list_for_each_entry(iter, match_head, list) {
1736 		struct fs_fte *fte_tmp;
1737 
1738 		g = iter->g;
1739 		fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
1740 		if (!fte_tmp)
1741 			continue;
1742 		rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
1743 		up_write_ref_node(&fte_tmp->node, false);
1744 		tree_put_node(&fte_tmp->node, false);
1745 		kmem_cache_free(steering->ftes_cache, fte);
1746 		return rule;
1747 	}
1748 
1749 skip_search:
1750 	/* No group with matching fte found, or we skipped the search.
1751 	 * Try to add a new fte to any matching fg.
1752 	 */
1753 
1754 	/* Check the ft version, for case that new flow group
1755 	 * was added while the fgs weren't locked
1756 	 */
1757 	if (atomic_read(&ft->node.version) != ft_version) {
1758 		rule = ERR_PTR(-EAGAIN);
1759 		goto out;
1760 	}
1761 
1762 	/* Check the fgs version. If version have changed it could be that an
1763 	 * FTE with the same match value was added while the fgs weren't
1764 	 * locked.
1765 	 */
1766 	if (!(flow_act->flags & FLOW_ACT_NO_APPEND) &&
1767 	    version != matched_fgs_get_version(match_head)) {
1768 		take_write = true;
1769 		goto search_again_locked;
1770 	}
1771 
1772 	list_for_each_entry(iter, match_head, list) {
1773 		g = iter->g;
1774 
1775 		nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
1776 
1777 		if (!g->node.active) {
1778 			up_write_ref_node(&g->node, false);
1779 			continue;
1780 		}
1781 
1782 		err = insert_fte(g, fte);
1783 		if (err) {
1784 			up_write_ref_node(&g->node, false);
1785 			if (err == -ENOSPC)
1786 				continue;
1787 			kmem_cache_free(steering->ftes_cache, fte);
1788 			return ERR_PTR(err);
1789 		}
1790 
1791 		nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
1792 		up_write_ref_node(&g->node, false);
1793 		rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
1794 		up_write_ref_node(&fte->node, false);
1795 		tree_put_node(&fte->node, false);
1796 		return rule;
1797 	}
1798 	rule = ERR_PTR(-ENOENT);
1799 out:
1800 	kmem_cache_free(steering->ftes_cache, fte);
1801 	return rule;
1802 }
1803 
1804 static struct mlx5_flow_handle *
1805 _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
1806 		     const struct mlx5_flow_spec *spec,
1807 		     struct mlx5_flow_act *flow_act,
1808 		     struct mlx5_flow_destination *dest,
1809 		     int dest_num)
1810 
1811 {
1812 	struct mlx5_flow_steering *steering = get_steering(&ft->node);
1813 	struct mlx5_flow_handle *rule;
1814 	struct match_list match_head;
1815 	struct mlx5_flow_group *g;
1816 	bool take_write = false;
1817 	struct fs_fte *fte;
1818 	int version;
1819 	int err;
1820 	int i;
1821 
1822 	if (!check_valid_spec(spec))
1823 		return ERR_PTR(-EINVAL);
1824 
1825 	for (i = 0; i < dest_num; i++) {
1826 		if (!dest_is_valid(&dest[i], flow_act, ft))
1827 			return ERR_PTR(-EINVAL);
1828 	}
1829 	nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
1830 search_again_locked:
1831 	version = atomic_read(&ft->node.version);
1832 
1833 	/* Collect all fgs which has a matching match_criteria */
1834 	err = build_match_list(&match_head, ft, spec, take_write);
1835 	if (err) {
1836 		if (take_write)
1837 			up_write_ref_node(&ft->node, false);
1838 		else
1839 			up_read_ref_node(&ft->node);
1840 		return ERR_PTR(err);
1841 	}
1842 
1843 	if (!take_write)
1844 		up_read_ref_node(&ft->node);
1845 
1846 	rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest,
1847 				      dest_num, version);
1848 	free_match_list(&match_head, take_write);
1849 	if (!IS_ERR(rule) ||
1850 	    (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
1851 		if (take_write)
1852 			up_write_ref_node(&ft->node, false);
1853 		return rule;
1854 	}
1855 
1856 	if (!take_write) {
1857 		nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
1858 		take_write = true;
1859 	}
1860 
1861 	if (PTR_ERR(rule) == -EAGAIN ||
1862 	    version != atomic_read(&ft->node.version))
1863 		goto search_again_locked;
1864 
1865 	g = alloc_auto_flow_group(ft, spec);
1866 	if (IS_ERR(g)) {
1867 		rule = ERR_CAST(g);
1868 		up_write_ref_node(&ft->node, false);
1869 		return rule;
1870 	}
1871 
1872 	fte = alloc_fte(ft, spec, flow_act);
1873 	if (IS_ERR(fte)) {
1874 		up_write_ref_node(&ft->node, false);
1875 		err = PTR_ERR(fte);
1876 		goto err_alloc_fte;
1877 	}
1878 
1879 	nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
1880 	up_write_ref_node(&ft->node, false);
1881 
1882 	err = create_auto_flow_group(ft, g);
1883 	if (err)
1884 		goto err_release_fg;
1885 
1886 	err = insert_fte(g, fte);
1887 	if (err)
1888 		goto err_release_fg;
1889 
1890 	nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
1891 	up_write_ref_node(&g->node, false);
1892 	rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
1893 	up_write_ref_node(&fte->node, false);
1894 	tree_put_node(&fte->node, false);
1895 	tree_put_node(&g->node, false);
1896 	return rule;
1897 
1898 err_release_fg:
1899 	up_write_ref_node(&g->node, false);
1900 	kmem_cache_free(steering->ftes_cache, fte);
1901 err_alloc_fte:
1902 	tree_put_node(&g->node, false);
1903 	return ERR_PTR(err);
1904 }
1905 
1906 static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
1907 {
1908 	return ((ft->type == FS_FT_NIC_RX) &&
1909 		(MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
1910 }
1911 
1912 struct mlx5_flow_handle *
1913 mlx5_add_flow_rules(struct mlx5_flow_table *ft,
1914 		    const struct mlx5_flow_spec *spec,
1915 		    struct mlx5_flow_act *flow_act,
1916 		    struct mlx5_flow_destination *dest,
1917 		    int num_dest)
1918 {
1919 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1920 	static const struct mlx5_flow_spec zero_spec = {};
1921 	struct mlx5_flow_destination *gen_dest = NULL;
1922 	struct mlx5_flow_table *next_ft = NULL;
1923 	struct mlx5_flow_handle *handle = NULL;
1924 	u32 sw_action = flow_act->action;
1925 	int i;
1926 
1927 	if (!spec)
1928 		spec = &zero_spec;
1929 
1930 	if (!is_fwd_next_action(sw_action))
1931 		return _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
1932 
1933 	if (!fwd_next_prio_supported(ft))
1934 		return ERR_PTR(-EOPNOTSUPP);
1935 
1936 	mutex_lock(&root->chain_lock);
1937 	next_ft = find_next_fwd_ft(ft, flow_act);
1938 	if (!next_ft) {
1939 		handle = ERR_PTR(-EOPNOTSUPP);
1940 		goto unlock;
1941 	}
1942 
1943 	gen_dest = kcalloc(num_dest + 1, sizeof(*dest),
1944 			   GFP_KERNEL);
1945 	if (!gen_dest) {
1946 		handle = ERR_PTR(-ENOMEM);
1947 		goto unlock;
1948 	}
1949 	for (i = 0; i < num_dest; i++)
1950 		gen_dest[i] = dest[i];
1951 	gen_dest[i].type =
1952 		MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1953 	gen_dest[i].ft = next_ft;
1954 	dest = gen_dest;
1955 	num_dest++;
1956 	flow_act->action &= ~(MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
1957 			      MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
1958 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1959 	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
1960 	if (IS_ERR(handle))
1961 		goto unlock;
1962 
1963 	if (list_empty(&handle->rule[num_dest - 1]->next_ft)) {
1964 		mutex_lock(&next_ft->lock);
1965 		list_add(&handle->rule[num_dest - 1]->next_ft,
1966 			 &next_ft->fwd_rules);
1967 		mutex_unlock(&next_ft->lock);
1968 		handle->rule[num_dest - 1]->sw_action = sw_action;
1969 		handle->rule[num_dest - 1]->ft = ft;
1970 	}
1971 unlock:
1972 	mutex_unlock(&root->chain_lock);
1973 	kfree(gen_dest);
1974 	return handle;
1975 }
1976 EXPORT_SYMBOL(mlx5_add_flow_rules);
1977 
1978 void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
1979 {
1980 	struct fs_fte *fte;
1981 	int i;
1982 
1983 	/* In order to consolidate the HW changes we lock the FTE for other
1984 	 * changes, and increase its refcount, in order not to perform the
1985 	 * "del" functions of the FTE. Will handle them here.
1986 	 * The removal of the rules is done under locked FTE.
1987 	 * After removing all the handle's rules, if there are remaining
1988 	 * rules, it means we just need to modify the FTE in FW, and
1989 	 * unlock/decrease the refcount we increased before.
1990 	 * Otherwise, it means the FTE should be deleted. First delete the
1991 	 * FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of
1992 	 * the FTE, which will handle the last decrease of the refcount, as
1993 	 * well as required handling of its parent.
1994 	 */
1995 	fs_get_obj(fte, handle->rule[0]->node.parent);
1996 	down_write_ref_node(&fte->node, false);
1997 	for (i = handle->num_rules - 1; i >= 0; i--)
1998 		tree_remove_node(&handle->rule[i]->node, true);
1999 	if (fte->modify_mask && fte->dests_size) {
2000 		modify_fte(fte);
2001 		up_write_ref_node(&fte->node, false);
2002 	} else {
2003 		del_hw_fte(&fte->node);
2004 		up_write(&fte->node.lock);
2005 		tree_put_node(&fte->node, false);
2006 	}
2007 	kfree(handle);
2008 }
2009 EXPORT_SYMBOL(mlx5_del_flow_rules);
2010 
2011 /* Assuming prio->node.children(flow tables) is sorted by level */
2012 static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
2013 {
2014 	struct fs_prio *prio;
2015 
2016 	fs_get_obj(prio, ft->node.parent);
2017 
2018 	if (!list_is_last(&ft->node.list, &prio->node.children))
2019 		return list_next_entry(ft, node.list);
2020 	return find_next_chained_ft(prio);
2021 }
2022 
2023 static int update_root_ft_destroy(struct mlx5_flow_table *ft)
2024 {
2025 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
2026 	struct mlx5_ft_underlay_qp *uqp;
2027 	struct mlx5_flow_table *new_root_ft = NULL;
2028 	int err = 0;
2029 	u32 qpn;
2030 
2031 	if (root->root_ft != ft)
2032 		return 0;
2033 
2034 	new_root_ft = find_next_ft(ft);
2035 	if (!new_root_ft) {
2036 		root->root_ft = NULL;
2037 		return 0;
2038 	}
2039 
2040 	if (list_empty(&root->underlay_qpns)) {
2041 		/* Don't set any QPN (zero) in case QPN list is empty */
2042 		qpn = 0;
2043 		err = root->cmds->update_root_ft(root, new_root_ft,
2044 						 qpn, false);
2045 	} else {
2046 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
2047 			qpn = uqp->qpn;
2048 			err = root->cmds->update_root_ft(root,
2049 							 new_root_ft, qpn,
2050 							 false);
2051 			if (err)
2052 				break;
2053 		}
2054 	}
2055 
2056 	if (err)
2057 		mlx5_core_warn(root->dev,
2058 			       "Update root flow table of id(%u) qpn(%d) failed\n",
2059 			       ft->id, qpn);
2060 	else
2061 		root->root_ft = new_root_ft;
2062 
2063 	return 0;
2064 }
2065 
2066 /* Connect flow table from previous priority to
2067  * the next flow table.
2068  */
2069 static int disconnect_flow_table(struct mlx5_flow_table *ft)
2070 {
2071 	struct mlx5_core_dev *dev = get_dev(&ft->node);
2072 	struct mlx5_flow_table *next_ft;
2073 	struct fs_prio *prio;
2074 	int err = 0;
2075 
2076 	err = update_root_ft_destroy(ft);
2077 	if (err)
2078 		return err;
2079 
2080 	fs_get_obj(prio, ft->node.parent);
2081 	if  (!(list_first_entry(&prio->node.children,
2082 				struct mlx5_flow_table,
2083 				node.list) == ft))
2084 		return 0;
2085 
2086 	next_ft = find_next_chained_ft(prio);
2087 	err = connect_fwd_rules(dev, next_ft, ft);
2088 	if (err)
2089 		return err;
2090 
2091 	err = connect_prev_fts(dev, next_ft, prio);
2092 	if (err)
2093 		mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
2094 			       ft->id);
2095 	return err;
2096 }
2097 
2098 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
2099 {
2100 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
2101 	int err = 0;
2102 
2103 	mutex_lock(&root->chain_lock);
2104 	if (!(ft->flags & MLX5_FLOW_TABLE_UNMANAGED))
2105 		err = disconnect_flow_table(ft);
2106 	if (err) {
2107 		mutex_unlock(&root->chain_lock);
2108 		return err;
2109 	}
2110 	if (tree_remove_node(&ft->node, false))
2111 		mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
2112 			       ft->id);
2113 	mutex_unlock(&root->chain_lock);
2114 
2115 	return err;
2116 }
2117 EXPORT_SYMBOL(mlx5_destroy_flow_table);
2118 
2119 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
2120 {
2121 	if (tree_remove_node(&fg->node, false))
2122 		mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
2123 			       fg->id);
2124 }
2125 
2126 struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
2127 						int n)
2128 {
2129 	struct mlx5_flow_steering *steering = dev->priv.steering;
2130 
2131 	if (!steering || !steering->fdb_sub_ns)
2132 		return NULL;
2133 
2134 	return steering->fdb_sub_ns[n];
2135 }
2136 EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
2137 
2138 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
2139 						    enum mlx5_flow_namespace_type type)
2140 {
2141 	struct mlx5_flow_steering *steering = dev->priv.steering;
2142 	struct mlx5_flow_root_namespace *root_ns;
2143 	int prio = 0;
2144 	struct fs_prio *fs_prio;
2145 	struct mlx5_flow_namespace *ns;
2146 
2147 	if (!steering)
2148 		return NULL;
2149 
2150 	switch (type) {
2151 	case MLX5_FLOW_NAMESPACE_FDB:
2152 		if (steering->fdb_root_ns)
2153 			return &steering->fdb_root_ns->ns;
2154 		return NULL;
2155 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
2156 		if (steering->sniffer_rx_root_ns)
2157 			return &steering->sniffer_rx_root_ns->ns;
2158 		return NULL;
2159 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
2160 		if (steering->sniffer_tx_root_ns)
2161 			return &steering->sniffer_tx_root_ns->ns;
2162 		return NULL;
2163 	default:
2164 		break;
2165 	}
2166 
2167 	if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
2168 		root_ns = steering->egress_root_ns;
2169 	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
2170 		root_ns = steering->rdma_rx_root_ns;
2171 		prio = RDMA_RX_BYPASS_PRIO;
2172 	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) {
2173 		root_ns = steering->rdma_rx_root_ns;
2174 		prio = RDMA_RX_KERNEL_PRIO;
2175 	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
2176 		root_ns = steering->rdma_tx_root_ns;
2177 	} else { /* Must be NIC RX */
2178 		root_ns = steering->root_ns;
2179 		prio = type;
2180 	}
2181 
2182 	if (!root_ns)
2183 		return NULL;
2184 
2185 	fs_prio = find_prio(&root_ns->ns, prio);
2186 	if (!fs_prio)
2187 		return NULL;
2188 
2189 	ns = list_first_entry(&fs_prio->node.children,
2190 			      typeof(*ns),
2191 			      node.list);
2192 
2193 	return ns;
2194 }
2195 EXPORT_SYMBOL(mlx5_get_flow_namespace);
2196 
2197 struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
2198 							      enum mlx5_flow_namespace_type type,
2199 							      int vport)
2200 {
2201 	struct mlx5_flow_steering *steering = dev->priv.steering;
2202 
2203 	if (!steering || vport >= mlx5_eswitch_get_total_vports(dev))
2204 		return NULL;
2205 
2206 	switch (type) {
2207 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
2208 		if (steering->esw_egress_root_ns &&
2209 		    steering->esw_egress_root_ns[vport])
2210 			return &steering->esw_egress_root_ns[vport]->ns;
2211 		else
2212 			return NULL;
2213 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
2214 		if (steering->esw_ingress_root_ns &&
2215 		    steering->esw_ingress_root_ns[vport])
2216 			return &steering->esw_ingress_root_ns[vport]->ns;
2217 		else
2218 			return NULL;
2219 	default:
2220 		return NULL;
2221 	}
2222 }
2223 
2224 static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
2225 				       unsigned int prio,
2226 				       int num_levels,
2227 				       enum fs_node_type type)
2228 {
2229 	struct fs_prio *fs_prio;
2230 
2231 	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
2232 	if (!fs_prio)
2233 		return ERR_PTR(-ENOMEM);
2234 
2235 	fs_prio->node.type = type;
2236 	tree_init_node(&fs_prio->node, NULL, del_sw_prio);
2237 	tree_add_node(&fs_prio->node, &ns->node);
2238 	fs_prio->num_levels = num_levels;
2239 	fs_prio->prio = prio;
2240 	list_add_tail(&fs_prio->node.list, &ns->node.children);
2241 
2242 	return fs_prio;
2243 }
2244 
2245 static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns,
2246 					      unsigned int prio,
2247 					      int num_levels)
2248 {
2249 	return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS);
2250 }
2251 
2252 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
2253 				      unsigned int prio, int num_levels)
2254 {
2255 	return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO);
2256 }
2257 
2258 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
2259 						     *ns)
2260 {
2261 	ns->node.type = FS_TYPE_NAMESPACE;
2262 
2263 	return ns;
2264 }
2265 
2266 static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio,
2267 						       int def_miss_act)
2268 {
2269 	struct mlx5_flow_namespace	*ns;
2270 
2271 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
2272 	if (!ns)
2273 		return ERR_PTR(-ENOMEM);
2274 
2275 	fs_init_namespace(ns);
2276 	ns->def_miss_action = def_miss_act;
2277 	tree_init_node(&ns->node, NULL, del_sw_ns);
2278 	tree_add_node(&ns->node, &prio->node);
2279 	list_add_tail(&ns->node.list, &prio->node.children);
2280 
2281 	return ns;
2282 }
2283 
2284 static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
2285 			     struct init_tree_node *prio_metadata)
2286 {
2287 	struct fs_prio *fs_prio;
2288 	int i;
2289 
2290 	for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
2291 		fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels);
2292 		if (IS_ERR(fs_prio))
2293 			return PTR_ERR(fs_prio);
2294 	}
2295 	return 0;
2296 }
2297 
2298 #define FLOW_TABLE_BIT_SZ 1
2299 #define GET_FLOW_TABLE_CAP(dev, offset) \
2300 	((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) +	\
2301 			offset / 32)) >>					\
2302 	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
2303 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
2304 {
2305 	int i;
2306 
2307 	for (i = 0; i < caps->arr_sz; i++) {
2308 		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
2309 			return false;
2310 	}
2311 	return true;
2312 }
2313 
2314 static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
2315 				    struct init_tree_node *init_node,
2316 				    struct fs_node *fs_parent_node,
2317 				    struct init_tree_node *init_parent_node,
2318 				    int prio)
2319 {
2320 	int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
2321 					      flow_table_properties_nic_receive.
2322 					      max_ft_level);
2323 	struct mlx5_flow_namespace *fs_ns;
2324 	struct fs_prio *fs_prio;
2325 	struct fs_node *base;
2326 	int i;
2327 	int err;
2328 
2329 	if (init_node->type == FS_TYPE_PRIO) {
2330 		if ((init_node->min_ft_level > max_ft_level) ||
2331 		    !has_required_caps(steering->dev, &init_node->caps))
2332 			return 0;
2333 
2334 		fs_get_obj(fs_ns, fs_parent_node);
2335 		if (init_node->num_leaf_prios)
2336 			return create_leaf_prios(fs_ns, prio, init_node);
2337 		fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels);
2338 		if (IS_ERR(fs_prio))
2339 			return PTR_ERR(fs_prio);
2340 		base = &fs_prio->node;
2341 	} else if (init_node->type == FS_TYPE_NAMESPACE) {
2342 		fs_get_obj(fs_prio, fs_parent_node);
2343 		fs_ns = fs_create_namespace(fs_prio, init_node->def_miss_action);
2344 		if (IS_ERR(fs_ns))
2345 			return PTR_ERR(fs_ns);
2346 		base = &fs_ns->node;
2347 	} else {
2348 		return -EINVAL;
2349 	}
2350 	prio = 0;
2351 	for (i = 0; i < init_node->ar_size; i++) {
2352 		err = init_root_tree_recursive(steering, &init_node->children[i],
2353 					       base, init_node, prio);
2354 		if (err)
2355 			return err;
2356 		if (init_node->children[i].type == FS_TYPE_PRIO &&
2357 		    init_node->children[i].num_leaf_prios) {
2358 			prio += init_node->children[i].num_leaf_prios;
2359 		}
2360 	}
2361 
2362 	return 0;
2363 }
2364 
2365 static int init_root_tree(struct mlx5_flow_steering *steering,
2366 			  struct init_tree_node *init_node,
2367 			  struct fs_node *fs_parent_node)
2368 {
2369 	int i;
2370 	struct mlx5_flow_namespace *fs_ns;
2371 	int err;
2372 
2373 	fs_get_obj(fs_ns, fs_parent_node);
2374 	for (i = 0; i < init_node->ar_size; i++) {
2375 		err = init_root_tree_recursive(steering, &init_node->children[i],
2376 					       &fs_ns->node,
2377 					       init_node, i);
2378 		if (err)
2379 			return err;
2380 	}
2381 	return 0;
2382 }
2383 
2384 static void del_sw_root_ns(struct fs_node *node)
2385 {
2386 	struct mlx5_flow_root_namespace *root_ns;
2387 	struct mlx5_flow_namespace *ns;
2388 
2389 	fs_get_obj(ns, node);
2390 	root_ns = container_of(ns, struct mlx5_flow_root_namespace, ns);
2391 	mutex_destroy(&root_ns->chain_lock);
2392 	kfree(node);
2393 }
2394 
2395 static struct mlx5_flow_root_namespace
2396 *create_root_ns(struct mlx5_flow_steering *steering,
2397 		enum fs_flow_table_type table_type)
2398 {
2399 	const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type);
2400 	struct mlx5_flow_root_namespace *root_ns;
2401 	struct mlx5_flow_namespace *ns;
2402 
2403 	if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
2404 	    (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
2405 		cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
2406 
2407 	/* Create the root namespace */
2408 	root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
2409 	if (!root_ns)
2410 		return NULL;
2411 
2412 	root_ns->dev = steering->dev;
2413 	root_ns->table_type = table_type;
2414 	root_ns->cmds = cmds;
2415 
2416 	INIT_LIST_HEAD(&root_ns->underlay_qpns);
2417 
2418 	ns = &root_ns->ns;
2419 	fs_init_namespace(ns);
2420 	mutex_init(&root_ns->chain_lock);
2421 	tree_init_node(&ns->node, NULL, del_sw_root_ns);
2422 	tree_add_node(&ns->node, NULL);
2423 
2424 	return root_ns;
2425 }
2426 
2427 static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
2428 
2429 static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
2430 {
2431 	struct fs_prio *prio;
2432 
2433 	fs_for_each_prio(prio, ns) {
2434 		 /* This updates prio start_level and num_levels */
2435 		set_prio_attrs_in_prio(prio, acc_level);
2436 		acc_level += prio->num_levels;
2437 	}
2438 	return acc_level;
2439 }
2440 
2441 static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
2442 {
2443 	struct mlx5_flow_namespace *ns;
2444 	int acc_level_ns = acc_level;
2445 
2446 	prio->start_level = acc_level;
2447 	fs_for_each_ns(ns, prio) {
2448 		/* This updates start_level and num_levels of ns's priority descendants */
2449 		acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
2450 
2451 		/* If this a prio with chains, and we can jump from one chain
2452 		 * (namepsace) to another, so we accumulate the levels
2453 		 */
2454 		if (prio->node.type == FS_TYPE_PRIO_CHAINS)
2455 			acc_level = acc_level_ns;
2456 	}
2457 
2458 	if (!prio->num_levels)
2459 		prio->num_levels = acc_level_ns - prio->start_level;
2460 	WARN_ON(prio->num_levels < acc_level_ns - prio->start_level);
2461 }
2462 
2463 static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
2464 {
2465 	struct mlx5_flow_namespace *ns = &root_ns->ns;
2466 	struct fs_prio *prio;
2467 	int start_level = 0;
2468 
2469 	fs_for_each_prio(prio, ns) {
2470 		set_prio_attrs_in_prio(prio, start_level);
2471 		start_level += prio->num_levels;
2472 	}
2473 }
2474 
2475 #define ANCHOR_PRIO 0
2476 #define ANCHOR_SIZE 1
2477 #define ANCHOR_LEVEL 0
2478 static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
2479 {
2480 	struct mlx5_flow_namespace *ns = NULL;
2481 	struct mlx5_flow_table_attr ft_attr = {};
2482 	struct mlx5_flow_table *ft;
2483 
2484 	ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
2485 	if (WARN_ON(!ns))
2486 		return -EINVAL;
2487 
2488 	ft_attr.max_fte = ANCHOR_SIZE;
2489 	ft_attr.level   = ANCHOR_LEVEL;
2490 	ft_attr.prio    = ANCHOR_PRIO;
2491 
2492 	ft = mlx5_create_flow_table(ns, &ft_attr);
2493 	if (IS_ERR(ft)) {
2494 		mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
2495 		return PTR_ERR(ft);
2496 	}
2497 	return 0;
2498 }
2499 
2500 static int init_root_ns(struct mlx5_flow_steering *steering)
2501 {
2502 	int err;
2503 
2504 	steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
2505 	if (!steering->root_ns)
2506 		return -ENOMEM;
2507 
2508 	err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node);
2509 	if (err)
2510 		goto out_err;
2511 
2512 	set_prio_attrs(steering->root_ns);
2513 	err = create_anchor_flow_table(steering);
2514 	if (err)
2515 		goto out_err;
2516 
2517 	return 0;
2518 
2519 out_err:
2520 	cleanup_root_ns(steering->root_ns);
2521 	steering->root_ns = NULL;
2522 	return err;
2523 }
2524 
2525 static void clean_tree(struct fs_node *node)
2526 {
2527 	if (node) {
2528 		struct fs_node *iter;
2529 		struct fs_node *temp;
2530 
2531 		tree_get_node(node);
2532 		list_for_each_entry_safe(iter, temp, &node->children, list)
2533 			clean_tree(iter);
2534 		tree_put_node(node, false);
2535 		tree_remove_node(node, false);
2536 	}
2537 }
2538 
2539 static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
2540 {
2541 	if (!root_ns)
2542 		return;
2543 
2544 	clean_tree(&root_ns->ns.node);
2545 }
2546 
2547 static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev)
2548 {
2549 	struct mlx5_flow_steering *steering = dev->priv.steering;
2550 	int i;
2551 
2552 	if (!steering->esw_egress_root_ns)
2553 		return;
2554 
2555 	for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
2556 		cleanup_root_ns(steering->esw_egress_root_ns[i]);
2557 
2558 	kfree(steering->esw_egress_root_ns);
2559 	steering->esw_egress_root_ns = NULL;
2560 }
2561 
2562 static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
2563 {
2564 	struct mlx5_flow_steering *steering = dev->priv.steering;
2565 	int i;
2566 
2567 	if (!steering->esw_ingress_root_ns)
2568 		return;
2569 
2570 	for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
2571 		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
2572 
2573 	kfree(steering->esw_ingress_root_ns);
2574 	steering->esw_ingress_root_ns = NULL;
2575 }
2576 
2577 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
2578 {
2579 	struct mlx5_flow_steering *steering = dev->priv.steering;
2580 
2581 	cleanup_root_ns(steering->root_ns);
2582 	cleanup_egress_acls_root_ns(dev);
2583 	cleanup_ingress_acls_root_ns(dev);
2584 	cleanup_root_ns(steering->fdb_root_ns);
2585 	steering->fdb_root_ns = NULL;
2586 	kfree(steering->fdb_sub_ns);
2587 	steering->fdb_sub_ns = NULL;
2588 	cleanup_root_ns(steering->sniffer_rx_root_ns);
2589 	cleanup_root_ns(steering->sniffer_tx_root_ns);
2590 	cleanup_root_ns(steering->rdma_rx_root_ns);
2591 	cleanup_root_ns(steering->rdma_tx_root_ns);
2592 	cleanup_root_ns(steering->egress_root_ns);
2593 	mlx5_cleanup_fc_stats(dev);
2594 	kmem_cache_destroy(steering->ftes_cache);
2595 	kmem_cache_destroy(steering->fgs_cache);
2596 	kfree(steering);
2597 }
2598 
2599 static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
2600 {
2601 	struct fs_prio *prio;
2602 
2603 	steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX);
2604 	if (!steering->sniffer_tx_root_ns)
2605 		return -ENOMEM;
2606 
2607 	/* Create single prio */
2608 	prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
2609 	return PTR_ERR_OR_ZERO(prio);
2610 }
2611 
2612 static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
2613 {
2614 	struct fs_prio *prio;
2615 
2616 	steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX);
2617 	if (!steering->sniffer_rx_root_ns)
2618 		return -ENOMEM;
2619 
2620 	/* Create single prio */
2621 	prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
2622 	return PTR_ERR_OR_ZERO(prio);
2623 }
2624 
2625 static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
2626 {
2627 	int err;
2628 
2629 	steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX);
2630 	if (!steering->rdma_rx_root_ns)
2631 		return -ENOMEM;
2632 
2633 	err = init_root_tree(steering, &rdma_rx_root_fs,
2634 			     &steering->rdma_rx_root_ns->ns.node);
2635 	if (err)
2636 		goto out_err;
2637 
2638 	set_prio_attrs(steering->rdma_rx_root_ns);
2639 
2640 	return 0;
2641 
2642 out_err:
2643 	cleanup_root_ns(steering->rdma_rx_root_ns);
2644 	steering->rdma_rx_root_ns = NULL;
2645 	return err;
2646 }
2647 
2648 static int init_rdma_tx_root_ns(struct mlx5_flow_steering *steering)
2649 {
2650 	int err;
2651 
2652 	steering->rdma_tx_root_ns = create_root_ns(steering, FS_FT_RDMA_TX);
2653 	if (!steering->rdma_tx_root_ns)
2654 		return -ENOMEM;
2655 
2656 	err = init_root_tree(steering, &rdma_tx_root_fs,
2657 			     &steering->rdma_tx_root_ns->ns.node);
2658 	if (err)
2659 		goto out_err;
2660 
2661 	set_prio_attrs(steering->rdma_tx_root_ns);
2662 
2663 	return 0;
2664 
2665 out_err:
2666 	cleanup_root_ns(steering->rdma_tx_root_ns);
2667 	steering->rdma_tx_root_ns = NULL;
2668 	return err;
2669 }
2670 
2671 /* FT and tc chains are stored in the same array so we can re-use the
2672  * mlx5_get_fdb_sub_ns() and tc api for FT chains.
2673  * When creating a new ns for each chain store it in the first available slot.
2674  * Assume tc chains are created and stored first and only then the FT chain.
2675  */
2676 static void store_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering,
2677 					struct mlx5_flow_namespace *ns)
2678 {
2679 	int chain = 0;
2680 
2681 	while (steering->fdb_sub_ns[chain])
2682 		++chain;
2683 
2684 	steering->fdb_sub_ns[chain] = ns;
2685 }
2686 
2687 static int create_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering,
2688 					struct fs_prio *maj_prio)
2689 {
2690 	struct mlx5_flow_namespace *ns;
2691 	struct fs_prio *min_prio;
2692 	int prio;
2693 
2694 	ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
2695 	if (IS_ERR(ns))
2696 		return PTR_ERR(ns);
2697 
2698 	for (prio = 0; prio < FDB_TC_MAX_PRIO; prio++) {
2699 		min_prio = fs_create_prio(ns, prio, FDB_TC_LEVELS_PER_PRIO);
2700 		if (IS_ERR(min_prio))
2701 			return PTR_ERR(min_prio);
2702 	}
2703 
2704 	store_fdb_sub_ns_prio_chain(steering, ns);
2705 
2706 	return 0;
2707 }
2708 
2709 static int create_fdb_chains(struct mlx5_flow_steering *steering,
2710 			     int fs_prio,
2711 			     int chains)
2712 {
2713 	struct fs_prio *maj_prio;
2714 	int levels;
2715 	int chain;
2716 	int err;
2717 
2718 	levels = FDB_TC_LEVELS_PER_PRIO * FDB_TC_MAX_PRIO * chains;
2719 	maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns,
2720 					  fs_prio,
2721 					  levels);
2722 	if (IS_ERR(maj_prio))
2723 		return PTR_ERR(maj_prio);
2724 
2725 	for (chain = 0; chain < chains; chain++) {
2726 		err = create_fdb_sub_ns_prio_chain(steering, maj_prio);
2727 		if (err)
2728 			return err;
2729 	}
2730 
2731 	return 0;
2732 }
2733 
2734 static int create_fdb_fast_path(struct mlx5_flow_steering *steering)
2735 {
2736 	int err;
2737 
2738 	steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS,
2739 				       sizeof(*steering->fdb_sub_ns),
2740 				       GFP_KERNEL);
2741 	if (!steering->fdb_sub_ns)
2742 		return -ENOMEM;
2743 
2744 	err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1);
2745 	if (err)
2746 		return err;
2747 
2748 	err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1);
2749 	if (err)
2750 		return err;
2751 
2752 	return 0;
2753 }
2754 
2755 static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
2756 {
2757 	struct fs_prio *maj_prio;
2758 	int err;
2759 
2760 	steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
2761 	if (!steering->fdb_root_ns)
2762 		return -ENOMEM;
2763 
2764 	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH,
2765 				  1);
2766 	if (IS_ERR(maj_prio)) {
2767 		err = PTR_ERR(maj_prio);
2768 		goto out_err;
2769 	}
2770 	err = create_fdb_fast_path(steering);
2771 	if (err)
2772 		goto out_err;
2773 
2774 	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
2775 	if (IS_ERR(maj_prio)) {
2776 		err = PTR_ERR(maj_prio);
2777 		goto out_err;
2778 	}
2779 
2780 	/* We put this priority last, knowing that nothing will get here
2781 	 * unless explicitly forwarded to. This is possible because the
2782 	 * slow path tables have catch all rules and nothing gets passed
2783 	 * those tables.
2784 	 */
2785 	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_PER_VPORT, 1);
2786 	if (IS_ERR(maj_prio)) {
2787 		err = PTR_ERR(maj_prio);
2788 		goto out_err;
2789 	}
2790 
2791 	set_prio_attrs(steering->fdb_root_ns);
2792 	return 0;
2793 
2794 out_err:
2795 	cleanup_root_ns(steering->fdb_root_ns);
2796 	kfree(steering->fdb_sub_ns);
2797 	steering->fdb_sub_ns = NULL;
2798 	steering->fdb_root_ns = NULL;
2799 	return err;
2800 }
2801 
2802 static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
2803 {
2804 	struct fs_prio *prio;
2805 
2806 	steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
2807 	if (!steering->esw_egress_root_ns[vport])
2808 		return -ENOMEM;
2809 
2810 	/* create 1 prio*/
2811 	prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1);
2812 	return PTR_ERR_OR_ZERO(prio);
2813 }
2814 
2815 static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
2816 {
2817 	struct fs_prio *prio;
2818 
2819 	steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
2820 	if (!steering->esw_ingress_root_ns[vport])
2821 		return -ENOMEM;
2822 
2823 	/* create 1 prio*/
2824 	prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1);
2825 	return PTR_ERR_OR_ZERO(prio);
2826 }
2827 
2828 static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
2829 {
2830 	struct mlx5_flow_steering *steering = dev->priv.steering;
2831 	int total_vports = mlx5_eswitch_get_total_vports(dev);
2832 	int err;
2833 	int i;
2834 
2835 	steering->esw_egress_root_ns =
2836 			kcalloc(total_vports,
2837 				sizeof(*steering->esw_egress_root_ns),
2838 				GFP_KERNEL);
2839 	if (!steering->esw_egress_root_ns)
2840 		return -ENOMEM;
2841 
2842 	for (i = 0; i < total_vports; i++) {
2843 		err = init_egress_acl_root_ns(steering, i);
2844 		if (err)
2845 			goto cleanup_root_ns;
2846 	}
2847 
2848 	return 0;
2849 
2850 cleanup_root_ns:
2851 	for (i--; i >= 0; i--)
2852 		cleanup_root_ns(steering->esw_egress_root_ns[i]);
2853 	kfree(steering->esw_egress_root_ns);
2854 	steering->esw_egress_root_ns = NULL;
2855 	return err;
2856 }
2857 
2858 static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
2859 {
2860 	struct mlx5_flow_steering *steering = dev->priv.steering;
2861 	int total_vports = mlx5_eswitch_get_total_vports(dev);
2862 	int err;
2863 	int i;
2864 
2865 	steering->esw_ingress_root_ns =
2866 			kcalloc(total_vports,
2867 				sizeof(*steering->esw_ingress_root_ns),
2868 				GFP_KERNEL);
2869 	if (!steering->esw_ingress_root_ns)
2870 		return -ENOMEM;
2871 
2872 	for (i = 0; i < total_vports; i++) {
2873 		err = init_ingress_acl_root_ns(steering, i);
2874 		if (err)
2875 			goto cleanup_root_ns;
2876 	}
2877 
2878 	return 0;
2879 
2880 cleanup_root_ns:
2881 	for (i--; i >= 0; i--)
2882 		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
2883 	kfree(steering->esw_ingress_root_ns);
2884 	steering->esw_ingress_root_ns = NULL;
2885 	return err;
2886 }
2887 
2888 static int init_egress_root_ns(struct mlx5_flow_steering *steering)
2889 {
2890 	int err;
2891 
2892 	steering->egress_root_ns = create_root_ns(steering,
2893 						  FS_FT_NIC_TX);
2894 	if (!steering->egress_root_ns)
2895 		return -ENOMEM;
2896 
2897 	err = init_root_tree(steering, &egress_root_fs,
2898 			     &steering->egress_root_ns->ns.node);
2899 	if (err)
2900 		goto cleanup;
2901 	set_prio_attrs(steering->egress_root_ns);
2902 	return 0;
2903 cleanup:
2904 	cleanup_root_ns(steering->egress_root_ns);
2905 	steering->egress_root_ns = NULL;
2906 	return err;
2907 }
2908 
2909 int mlx5_init_fs(struct mlx5_core_dev *dev)
2910 {
2911 	struct mlx5_flow_steering *steering;
2912 	int err = 0;
2913 
2914 	err = mlx5_init_fc_stats(dev);
2915 	if (err)
2916 		return err;
2917 
2918 	steering = kzalloc(sizeof(*steering), GFP_KERNEL);
2919 	if (!steering)
2920 		return -ENOMEM;
2921 	steering->dev = dev;
2922 	dev->priv.steering = steering;
2923 
2924 	steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
2925 						sizeof(struct mlx5_flow_group), 0,
2926 						0, NULL);
2927 	steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
2928 						 0, NULL);
2929 	if (!steering->ftes_cache || !steering->fgs_cache) {
2930 		err = -ENOMEM;
2931 		goto err;
2932 	}
2933 
2934 	if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
2935 	      (MLX5_CAP_GEN(dev, nic_flow_table))) ||
2936 	     ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
2937 	      MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) &&
2938 	    MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
2939 		err = init_root_ns(steering);
2940 		if (err)
2941 			goto err;
2942 	}
2943 
2944 	if (MLX5_ESWITCH_MANAGER(dev)) {
2945 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
2946 			err = init_fdb_root_ns(steering);
2947 			if (err)
2948 				goto err;
2949 		}
2950 		if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
2951 			err = init_egress_acls_root_ns(dev);
2952 			if (err)
2953 				goto err;
2954 		}
2955 		if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
2956 			err = init_ingress_acls_root_ns(dev);
2957 			if (err)
2958 				goto err;
2959 		}
2960 	}
2961 
2962 	if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
2963 		err = init_sniffer_rx_root_ns(steering);
2964 		if (err)
2965 			goto err;
2966 	}
2967 
2968 	if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) {
2969 		err = init_sniffer_tx_root_ns(steering);
2970 		if (err)
2971 			goto err;
2972 	}
2973 
2974 	if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
2975 	    MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) {
2976 		err = init_rdma_rx_root_ns(steering);
2977 		if (err)
2978 			goto err;
2979 	}
2980 
2981 	if (MLX5_CAP_FLOWTABLE_RDMA_TX(dev, ft_support)) {
2982 		err = init_rdma_tx_root_ns(steering);
2983 		if (err)
2984 			goto err;
2985 	}
2986 
2987 	if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE ||
2988 	    MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
2989 		err = init_egress_root_ns(steering);
2990 		if (err)
2991 			goto err;
2992 	}
2993 
2994 	return 0;
2995 err:
2996 	mlx5_cleanup_fs(dev);
2997 	return err;
2998 }
2999 
3000 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
3001 {
3002 	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
3003 	struct mlx5_ft_underlay_qp *new_uqp;
3004 	int err = 0;
3005 
3006 	new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL);
3007 	if (!new_uqp)
3008 		return -ENOMEM;
3009 
3010 	mutex_lock(&root->chain_lock);
3011 
3012 	if (!root->root_ft) {
3013 		err = -EINVAL;
3014 		goto update_ft_fail;
3015 	}
3016 
3017 	err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn,
3018 					 false);
3019 	if (err) {
3020 		mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
3021 			       underlay_qpn, err);
3022 		goto update_ft_fail;
3023 	}
3024 
3025 	new_uqp->qpn = underlay_qpn;
3026 	list_add_tail(&new_uqp->list, &root->underlay_qpns);
3027 
3028 	mutex_unlock(&root->chain_lock);
3029 
3030 	return 0;
3031 
3032 update_ft_fail:
3033 	mutex_unlock(&root->chain_lock);
3034 	kfree(new_uqp);
3035 	return err;
3036 }
3037 EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
3038 
3039 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
3040 {
3041 	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
3042 	struct mlx5_ft_underlay_qp *uqp;
3043 	bool found = false;
3044 	int err = 0;
3045 
3046 	mutex_lock(&root->chain_lock);
3047 	list_for_each_entry(uqp, &root->underlay_qpns, list) {
3048 		if (uqp->qpn == underlay_qpn) {
3049 			found = true;
3050 			break;
3051 		}
3052 	}
3053 
3054 	if (!found) {
3055 		mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n",
3056 			       underlay_qpn);
3057 		err = -EINVAL;
3058 		goto out;
3059 	}
3060 
3061 	err = root->cmds->update_root_ft(root, root->root_ft, underlay_qpn,
3062 					 true);
3063 	if (err)
3064 		mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
3065 			       underlay_qpn, err);
3066 
3067 	list_del(&uqp->list);
3068 	mutex_unlock(&root->chain_lock);
3069 	kfree(uqp);
3070 
3071 	return 0;
3072 
3073 out:
3074 	mutex_unlock(&root->chain_lock);
3075 	return err;
3076 }
3077 EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
3078 
3079 static struct mlx5_flow_root_namespace
3080 *get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type)
3081 {
3082 	struct mlx5_flow_namespace *ns;
3083 
3084 	if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS ||
3085 	    ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS)
3086 		ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0);
3087 	else
3088 		ns = mlx5_get_flow_namespace(dev, ns_type);
3089 	if (!ns)
3090 		return NULL;
3091 
3092 	return find_root(&ns->node);
3093 }
3094 
3095 struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
3096 						 u8 ns_type, u8 num_actions,
3097 						 void *modify_actions)
3098 {
3099 	struct mlx5_flow_root_namespace *root;
3100 	struct mlx5_modify_hdr *modify_hdr;
3101 	int err;
3102 
3103 	root = get_root_namespace(dev, ns_type);
3104 	if (!root)
3105 		return ERR_PTR(-EOPNOTSUPP);
3106 
3107 	modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL);
3108 	if (!modify_hdr)
3109 		return ERR_PTR(-ENOMEM);
3110 
3111 	modify_hdr->ns_type = ns_type;
3112 	err = root->cmds->modify_header_alloc(root, ns_type, num_actions,
3113 					      modify_actions, modify_hdr);
3114 	if (err) {
3115 		kfree(modify_hdr);
3116 		return ERR_PTR(err);
3117 	}
3118 
3119 	return modify_hdr;
3120 }
3121 EXPORT_SYMBOL(mlx5_modify_header_alloc);
3122 
3123 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
3124 				struct mlx5_modify_hdr *modify_hdr)
3125 {
3126 	struct mlx5_flow_root_namespace *root;
3127 
3128 	root = get_root_namespace(dev, modify_hdr->ns_type);
3129 	if (WARN_ON(!root))
3130 		return;
3131 	root->cmds->modify_header_dealloc(root, modify_hdr);
3132 	kfree(modify_hdr);
3133 }
3134 EXPORT_SYMBOL(mlx5_modify_header_dealloc);
3135 
3136 struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
3137 						     int reformat_type,
3138 						     size_t size,
3139 						     void *reformat_data,
3140 						     enum mlx5_flow_namespace_type ns_type)
3141 {
3142 	struct mlx5_pkt_reformat *pkt_reformat;
3143 	struct mlx5_flow_root_namespace *root;
3144 	int err;
3145 
3146 	root = get_root_namespace(dev, ns_type);
3147 	if (!root)
3148 		return ERR_PTR(-EOPNOTSUPP);
3149 
3150 	pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL);
3151 	if (!pkt_reformat)
3152 		return ERR_PTR(-ENOMEM);
3153 
3154 	pkt_reformat->ns_type = ns_type;
3155 	pkt_reformat->reformat_type = reformat_type;
3156 	err = root->cmds->packet_reformat_alloc(root, reformat_type, size,
3157 						reformat_data, ns_type,
3158 						pkt_reformat);
3159 	if (err) {
3160 		kfree(pkt_reformat);
3161 		return ERR_PTR(err);
3162 	}
3163 
3164 	return pkt_reformat;
3165 }
3166 EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
3167 
3168 void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
3169 				  struct mlx5_pkt_reformat *pkt_reformat)
3170 {
3171 	struct mlx5_flow_root_namespace *root;
3172 
3173 	root = get_root_namespace(dev, pkt_reformat->ns_type);
3174 	if (WARN_ON(!root))
3175 		return;
3176 	root->cmds->packet_reformat_dealloc(root, pkt_reformat);
3177 	kfree(pkt_reformat);
3178 }
3179 EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
3180 
3181 int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
3182 				 struct mlx5_flow_root_namespace *peer_ns)
3183 {
3184 	if (peer_ns && ns->mode != peer_ns->mode) {
3185 		mlx5_core_err(ns->dev,
3186 			      "Can't peer namespace of different steering mode\n");
3187 		return -EINVAL;
3188 	}
3189 
3190 	return ns->cmds->set_peer(ns, peer_ns);
3191 }
3192 
3193 /* This function should be called only at init stage of the namespace.
3194  * It is not safe to call this function while steering operations
3195  * are executed in the namespace.
3196  */
3197 int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
3198 				 enum mlx5_flow_steering_mode mode)
3199 {
3200 	struct mlx5_flow_root_namespace *root;
3201 	const struct mlx5_flow_cmds *cmds;
3202 	int err;
3203 
3204 	root = find_root(&ns->node);
3205 	if (&root->ns != ns)
3206 	/* Can't set cmds to non root namespace */
3207 		return -EINVAL;
3208 
3209 	if (root->table_type != FS_FT_FDB)
3210 		return -EOPNOTSUPP;
3211 
3212 	if (root->mode == mode)
3213 		return 0;
3214 
3215 	if (mode == MLX5_FLOW_STEERING_MODE_SMFS)
3216 		cmds = mlx5_fs_cmd_get_dr_cmds();
3217 	else
3218 		cmds = mlx5_fs_cmd_get_fw_cmds();
3219 	if (!cmds)
3220 		return -EOPNOTSUPP;
3221 
3222 	err = cmds->create_ns(root);
3223 	if (err) {
3224 		mlx5_core_err(root->dev, "Failed to create flow namespace (%d)\n",
3225 			      err);
3226 		return err;
3227 	}
3228 
3229 	root->cmds->destroy_ns(root);
3230 	root->cmds = cmds;
3231 	root->mode = mode;
3232 
3233 	return 0;
3234 }
3235