1 /*
2  * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/mutex.h>
34 #include <linux/mlx5/driver.h>
35 
36 #include "mlx5_core.h"
37 #include "fs_core.h"
38 #include "fs_cmd.h"
39 
40 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
41 					 sizeof(struct init_tree_node))
42 
43 #define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\
44 		 ...) {.type = FS_TYPE_PRIO,\
45 	.min_ft_level = min_level_val,\
46 	.num_levels = num_levels_val,\
47 	.num_leaf_prios = num_prios_val,\
48 	.caps = caps_val,\
49 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
50 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
51 }
52 
53 #define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\
54 	ADD_PRIO(num_prios_val, 0, num_levels_val, {},\
55 		 __VA_ARGS__)\
56 
57 #define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\
58 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
59 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
60 }
61 
62 #define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
63 				   sizeof(long))
64 
65 #define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
66 
67 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
68 			       .caps = (long[]) {__VA_ARGS__} }
69 
70 #define FS_CHAINING_CAPS  FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \
71 					   FS_CAP(flow_table_properties_nic_receive.modify_root), \
72 					   FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
73 					   FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
74 
75 #define LEFTOVERS_NUM_LEVELS 1
76 #define LEFTOVERS_NUM_PRIOS 1
77 
78 #define BY_PASS_PRIO_NUM_LEVELS 1
79 #define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
80 			   LEFTOVERS_NUM_PRIOS)
81 
82 #define ETHTOOL_PRIO_NUM_LEVELS 1
83 #define ETHTOOL_NUM_PRIOS 11
84 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
85 /* Vlan, mac, ttc, aRFS */
86 #define KERNEL_NIC_PRIO_NUM_LEVELS 4
87 #define KERNEL_NIC_NUM_PRIOS 1
88 /* One more level for tc */
89 #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
90 
91 #define ANCHOR_NUM_LEVELS 1
92 #define ANCHOR_NUM_PRIOS 1
93 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
94 
95 #define OFFLOADS_MAX_FT 1
96 #define OFFLOADS_NUM_PRIOS 1
97 #define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
98 
99 #define LAG_PRIO_NUM_LEVELS 1
100 #define LAG_NUM_PRIOS 1
101 #define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
102 
103 struct node_caps {
104 	size_t	arr_sz;
105 	long	*caps;
106 };
107 
108 static struct init_tree_node {
109 	enum fs_node_type	type;
110 	struct init_tree_node *children;
111 	int ar_size;
112 	struct node_caps caps;
113 	int min_ft_level;
114 	int num_leaf_prios;
115 	int prio;
116 	int num_levels;
117 } root_fs = {
118 	.type = FS_TYPE_NAMESPACE,
119 	.ar_size = 7,
120 	.children = (struct init_tree_node[]) {
121 		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
122 			 FS_CHAINING_CAPS,
123 			 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
124 						  BY_PASS_PRIO_NUM_LEVELS))),
125 		ADD_PRIO(0, LAG_MIN_LEVEL, 0,
126 			 FS_CHAINING_CAPS,
127 			 ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
128 						  LAG_PRIO_NUM_LEVELS))),
129 		ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
130 			 ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))),
131 		ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0,
132 			 FS_CHAINING_CAPS,
133 			 ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
134 						  ETHTOOL_PRIO_NUM_LEVELS))),
135 		ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
136 			 ADD_NS(ADD_MULTIPLE_PRIO(1, 1),
137 				ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
138 						  KERNEL_NIC_PRIO_NUM_LEVELS))),
139 		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
140 			 FS_CHAINING_CAPS,
141 			 ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))),
142 		ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
143 			 ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))),
144 	}
145 };
146 
147 enum fs_i_mutex_lock_class {
148 	FS_MUTEX_GRANDPARENT,
149 	FS_MUTEX_PARENT,
150 	FS_MUTEX_CHILD
151 };
152 
153 static void del_rule(struct fs_node *node);
154 static void del_flow_table(struct fs_node *node);
155 static void del_flow_group(struct fs_node *node);
156 static void del_fte(struct fs_node *node);
157 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
158 				struct mlx5_flow_destination *d2);
159 static struct mlx5_flow_rule *
160 find_flow_rule(struct fs_fte *fte,
161 	       struct mlx5_flow_destination *dest);
162 
163 static void tree_init_node(struct fs_node *node,
164 			   unsigned int refcount,
165 			   void (*remove_func)(struct fs_node *))
166 {
167 	atomic_set(&node->refcount, refcount);
168 	INIT_LIST_HEAD(&node->list);
169 	INIT_LIST_HEAD(&node->children);
170 	mutex_init(&node->lock);
171 	node->remove_func = remove_func;
172 }
173 
174 static void tree_add_node(struct fs_node *node, struct fs_node *parent)
175 {
176 	if (parent)
177 		atomic_inc(&parent->refcount);
178 	node->parent = parent;
179 
180 	/* Parent is the root */
181 	if (!parent)
182 		node->root = node;
183 	else
184 		node->root = parent->root;
185 }
186 
187 static void tree_get_node(struct fs_node *node)
188 {
189 	atomic_inc(&node->refcount);
190 }
191 
192 static void nested_lock_ref_node(struct fs_node *node,
193 				 enum fs_i_mutex_lock_class class)
194 {
195 	if (node) {
196 		mutex_lock_nested(&node->lock, class);
197 		atomic_inc(&node->refcount);
198 	}
199 }
200 
201 static void lock_ref_node(struct fs_node *node)
202 {
203 	if (node) {
204 		mutex_lock(&node->lock);
205 		atomic_inc(&node->refcount);
206 	}
207 }
208 
209 static void unlock_ref_node(struct fs_node *node)
210 {
211 	if (node) {
212 		atomic_dec(&node->refcount);
213 		mutex_unlock(&node->lock);
214 	}
215 }
216 
217 static void tree_put_node(struct fs_node *node)
218 {
219 	struct fs_node *parent_node = node->parent;
220 
221 	lock_ref_node(parent_node);
222 	if (atomic_dec_and_test(&node->refcount)) {
223 		if (parent_node)
224 			list_del_init(&node->list);
225 		if (node->remove_func)
226 			node->remove_func(node);
227 		kfree(node);
228 		node = NULL;
229 	}
230 	unlock_ref_node(parent_node);
231 	if (!node && parent_node)
232 		tree_put_node(parent_node);
233 }
234 
235 static int tree_remove_node(struct fs_node *node)
236 {
237 	if (atomic_read(&node->refcount) > 1) {
238 		atomic_dec(&node->refcount);
239 		return -EEXIST;
240 	}
241 	tree_put_node(node);
242 	return 0;
243 }
244 
245 static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
246 				 unsigned int prio)
247 {
248 	struct fs_prio *iter_prio;
249 
250 	fs_for_each_prio(iter_prio, ns) {
251 		if (iter_prio->prio == prio)
252 			return iter_prio;
253 	}
254 
255 	return NULL;
256 }
257 
258 static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size)
259 {
260 	unsigned int i;
261 
262 	for (i = 0; i < size; i++, mask++, val1++, val2++)
263 		if ((*((u8 *)val1) & (*(u8 *)mask)) !=
264 		    ((*(u8 *)val2) & (*(u8 *)mask)))
265 			return false;
266 
267 	return true;
268 }
269 
270 static bool compare_match_value(struct mlx5_flow_group_mask *mask,
271 				void *fte_param1, void *fte_param2)
272 {
273 	if (mask->match_criteria_enable &
274 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
275 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
276 						fte_param1, outer_headers);
277 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
278 						fte_param2, outer_headers);
279 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
280 					      mask->match_criteria, outer_headers);
281 
282 		if (!masked_memcmp(fte_mask, fte_match1, fte_match2,
283 				   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
284 			return false;
285 	}
286 
287 	if (mask->match_criteria_enable &
288 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
289 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
290 						fte_param1, misc_parameters);
291 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
292 						fte_param2, misc_parameters);
293 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
294 					  mask->match_criteria, misc_parameters);
295 
296 		if (!masked_memcmp(fte_mask, fte_match1, fte_match2,
297 				   MLX5_ST_SZ_BYTES(fte_match_set_misc)))
298 			return false;
299 	}
300 
301 	if (mask->match_criteria_enable &
302 	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
303 		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
304 						fte_param1, inner_headers);
305 		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
306 						fte_param2, inner_headers);
307 		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
308 					  mask->match_criteria, inner_headers);
309 
310 		if (!masked_memcmp(fte_mask, fte_match1, fte_match2,
311 				   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
312 			return false;
313 	}
314 	return true;
315 }
316 
317 static bool compare_match_criteria(u8 match_criteria_enable1,
318 				   u8 match_criteria_enable2,
319 				   void *mask1, void *mask2)
320 {
321 	return match_criteria_enable1 == match_criteria_enable2 &&
322 		!memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param));
323 }
324 
325 static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
326 {
327 	struct fs_node *root;
328 	struct mlx5_flow_namespace *ns;
329 
330 	root = node->root;
331 
332 	if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) {
333 		pr_warn("mlx5: flow steering node is not in tree or garbaged\n");
334 		return NULL;
335 	}
336 
337 	ns = container_of(root, struct mlx5_flow_namespace, node);
338 	return container_of(ns, struct mlx5_flow_root_namespace, ns);
339 }
340 
341 static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
342 {
343 	struct mlx5_flow_root_namespace *root = find_root(node);
344 
345 	if (root)
346 		return root->dev;
347 	return NULL;
348 }
349 
350 static void del_flow_table(struct fs_node *node)
351 {
352 	struct mlx5_flow_table *ft;
353 	struct mlx5_core_dev *dev;
354 	struct fs_prio *prio;
355 	int err;
356 
357 	fs_get_obj(ft, node);
358 	dev = get_dev(&ft->node);
359 
360 	err = mlx5_cmd_destroy_flow_table(dev, ft);
361 	if (err)
362 		mlx5_core_warn(dev, "flow steering can't destroy ft\n");
363 	fs_get_obj(prio, ft->node.parent);
364 	prio->num_ft--;
365 }
366 
367 static void del_rule(struct fs_node *node)
368 {
369 	struct mlx5_flow_rule *rule;
370 	struct mlx5_flow_table *ft;
371 	struct mlx5_flow_group *fg;
372 	struct fs_fte *fte;
373 	u32	*match_value;
374 	int modify_mask;
375 	struct mlx5_core_dev *dev = get_dev(node);
376 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
377 	int err;
378 	bool update_fte = false;
379 
380 	match_value = kvzalloc(match_len, GFP_KERNEL);
381 	if (!match_value)
382 		return;
383 
384 	fs_get_obj(rule, node);
385 	fs_get_obj(fte, rule->node.parent);
386 	fs_get_obj(fg, fte->node.parent);
387 	memcpy(match_value, fte->val, sizeof(fte->val));
388 	fs_get_obj(ft, fg->node.parent);
389 	list_del(&rule->node.list);
390 	if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
391 		mutex_lock(&rule->dest_attr.ft->lock);
392 		list_del(&rule->next_ft);
393 		mutex_unlock(&rule->dest_attr.ft->lock);
394 	}
395 
396 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
397 	    --fte->dests_size) {
398 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
399 		fte->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
400 		update_fte = true;
401 		goto out;
402 	}
403 
404 	if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
405 	    --fte->dests_size) {
406 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
407 		update_fte = true;
408 	}
409 out:
410 	if (update_fte && fte->dests_size) {
411 		err = mlx5_cmd_update_fte(dev, ft, fg->id, modify_mask, fte);
412 		if (err)
413 			mlx5_core_warn(dev,
414 				       "%s can't del rule fg id=%d fte_index=%d\n",
415 				       __func__, fg->id, fte->index);
416 	}
417 	kvfree(match_value);
418 }
419 
420 static void del_fte(struct fs_node *node)
421 {
422 	struct mlx5_flow_table *ft;
423 	struct mlx5_flow_group *fg;
424 	struct mlx5_core_dev *dev;
425 	struct fs_fte *fte;
426 	int err;
427 
428 	fs_get_obj(fte, node);
429 	fs_get_obj(fg, fte->node.parent);
430 	fs_get_obj(ft, fg->node.parent);
431 
432 	dev = get_dev(&ft->node);
433 	err = mlx5_cmd_delete_fte(dev, ft,
434 				  fte->index);
435 	if (err)
436 		mlx5_core_warn(dev,
437 			       "flow steering can't delete fte in index %d of flow group id %d\n",
438 			       fte->index, fg->id);
439 
440 	fte->status = 0;
441 	fg->num_ftes--;
442 }
443 
444 static void del_flow_group(struct fs_node *node)
445 {
446 	struct mlx5_flow_group *fg;
447 	struct mlx5_flow_table *ft;
448 	struct mlx5_core_dev *dev;
449 
450 	fs_get_obj(fg, node);
451 	fs_get_obj(ft, fg->node.parent);
452 	dev = get_dev(&ft->node);
453 
454 	if (ft->autogroup.active)
455 		ft->autogroup.num_groups--;
456 
457 	if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
458 		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
459 			       fg->id, ft->id);
460 }
461 
462 static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act,
463 				u32 *match_value,
464 				unsigned int index)
465 {
466 	struct fs_fte *fte;
467 
468 	fte = kzalloc(sizeof(*fte), GFP_KERNEL);
469 	if (!fte)
470 		return ERR_PTR(-ENOMEM);
471 
472 	memcpy(fte->val, match_value, sizeof(fte->val));
473 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
474 	fte->flow_tag = flow_act->flow_tag;
475 	fte->index = index;
476 	fte->action = flow_act->action;
477 	fte->encap_id = flow_act->encap_id;
478 	fte->modify_id = flow_act->modify_id;
479 
480 	return fte;
481 }
482 
483 static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
484 {
485 	struct mlx5_flow_group *fg;
486 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
487 					    create_fg_in, match_criteria);
488 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
489 					    create_fg_in,
490 					    match_criteria_enable);
491 	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
492 	if (!fg)
493 		return ERR_PTR(-ENOMEM);
494 
495 	fg->mask.match_criteria_enable = match_criteria_enable;
496 	memcpy(&fg->mask.match_criteria, match_criteria,
497 	       sizeof(fg->mask.match_criteria));
498 	fg->node.type =  FS_TYPE_FLOW_GROUP;
499 	fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
500 				   start_flow_index);
501 	fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
502 				end_flow_index) - fg->start_index + 1;
503 	return fg;
504 }
505 
506 static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
507 						enum fs_flow_table_type table_type,
508 						enum fs_flow_table_op_mod op_mod,
509 						u32 flags)
510 {
511 	struct mlx5_flow_table *ft;
512 
513 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
514 	if (!ft)
515 		return NULL;
516 
517 	ft->level = level;
518 	ft->node.type = FS_TYPE_FLOW_TABLE;
519 	ft->op_mod = op_mod;
520 	ft->type = table_type;
521 	ft->vport = vport;
522 	ft->max_fte = max_fte;
523 	ft->flags = flags;
524 	INIT_LIST_HEAD(&ft->fwd_rules);
525 	mutex_init(&ft->lock);
526 
527 	return ft;
528 }
529 
530 /* If reverse is false, then we search for the first flow table in the
531  * root sub-tree from start(closest from right), else we search for the
532  * last flow table in the root sub-tree till start(closest from left).
533  */
534 static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
535 							 struct list_head *start,
536 							 bool reverse)
537 {
538 #define list_advance_entry(pos, reverse)		\
539 	((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
540 
541 #define list_for_each_advance_continue(pos, head, reverse)	\
542 	for (pos = list_advance_entry(pos, reverse);		\
543 	     &pos->list != (head);				\
544 	     pos = list_advance_entry(pos, reverse))
545 
546 	struct fs_node *iter = list_entry(start, struct fs_node, list);
547 	struct mlx5_flow_table *ft = NULL;
548 
549 	if (!root)
550 		return NULL;
551 
552 	list_for_each_advance_continue(iter, &root->children, reverse) {
553 		if (iter->type == FS_TYPE_FLOW_TABLE) {
554 			fs_get_obj(ft, iter);
555 			return ft;
556 		}
557 		ft = find_closest_ft_recursive(iter, &iter->children, reverse);
558 		if (ft)
559 			return ft;
560 	}
561 
562 	return ft;
563 }
564 
565 /* If reverse if false then return the first flow table in next priority of
566  * prio in the tree, else return the last flow table in the previous priority
567  * of prio in the tree.
568  */
569 static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
570 {
571 	struct mlx5_flow_table *ft = NULL;
572 	struct fs_node *curr_node;
573 	struct fs_node *parent;
574 
575 	parent = prio->node.parent;
576 	curr_node = &prio->node;
577 	while (!ft && parent) {
578 		ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
579 		curr_node = parent;
580 		parent = curr_node->parent;
581 	}
582 	return ft;
583 }
584 
585 /* Assuming all the tree is locked by mutex chain lock */
586 static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
587 {
588 	return find_closest_ft(prio, false);
589 }
590 
591 /* Assuming all the tree is locked by mutex chain lock */
592 static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
593 {
594 	return find_closest_ft(prio, true);
595 }
596 
597 static int connect_fts_in_prio(struct mlx5_core_dev *dev,
598 			       struct fs_prio *prio,
599 			       struct mlx5_flow_table *ft)
600 {
601 	struct mlx5_flow_table *iter;
602 	int i = 0;
603 	int err;
604 
605 	fs_for_each_ft(iter, prio) {
606 		i++;
607 		err = mlx5_cmd_modify_flow_table(dev,
608 						 iter,
609 						 ft);
610 		if (err) {
611 			mlx5_core_warn(dev, "Failed to modify flow table %d\n",
612 				       iter->id);
613 			/* The driver is out of sync with the FW */
614 			if (i > 1)
615 				WARN_ON(true);
616 			return err;
617 		}
618 	}
619 	return 0;
620 }
621 
622 /* Connect flow tables from previous priority of prio to ft */
623 static int connect_prev_fts(struct mlx5_core_dev *dev,
624 			    struct mlx5_flow_table *ft,
625 			    struct fs_prio *prio)
626 {
627 	struct mlx5_flow_table *prev_ft;
628 
629 	prev_ft = find_prev_chained_ft(prio);
630 	if (prev_ft) {
631 		struct fs_prio *prev_prio;
632 
633 		fs_get_obj(prev_prio, prev_ft->node.parent);
634 		return connect_fts_in_prio(dev, prev_prio, ft);
635 	}
636 	return 0;
637 }
638 
639 static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
640 				 *prio)
641 {
642 	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
643 	int min_level = INT_MAX;
644 	int err;
645 
646 	if (root->root_ft)
647 		min_level = root->root_ft->level;
648 
649 	if (ft->level >= min_level)
650 		return 0;
651 
652 	err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn);
653 	if (err)
654 		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
655 			       ft->id);
656 	else
657 		root->root_ft = ft;
658 
659 	return err;
660 }
661 
662 static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
663 					 struct mlx5_flow_destination *dest)
664 {
665 	struct mlx5_flow_table *ft;
666 	struct mlx5_flow_group *fg;
667 	struct fs_fte *fte;
668 	int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
669 	int err = 0;
670 
671 	fs_get_obj(fte, rule->node.parent);
672 	if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
673 		return -EINVAL;
674 	lock_ref_node(&fte->node);
675 	fs_get_obj(fg, fte->node.parent);
676 	fs_get_obj(ft, fg->node.parent);
677 
678 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
679 	err = mlx5_cmd_update_fte(get_dev(&ft->node),
680 				  ft, fg->id,
681 				  modify_mask,
682 				  fte);
683 	unlock_ref_node(&fte->node);
684 
685 	return err;
686 }
687 
688 int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
689 				 struct mlx5_flow_destination *new_dest,
690 				 struct mlx5_flow_destination *old_dest)
691 {
692 	int i;
693 
694 	if (!old_dest) {
695 		if (handle->num_rules != 1)
696 			return -EINVAL;
697 		return _mlx5_modify_rule_destination(handle->rule[0],
698 						     new_dest);
699 	}
700 
701 	for (i = 0; i < handle->num_rules; i++) {
702 		if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
703 			return _mlx5_modify_rule_destination(handle->rule[i],
704 							     new_dest);
705 	}
706 
707 	return -EINVAL;
708 }
709 
710 /* Modify/set FWD rules that point on old_next_ft to point on new_next_ft  */
711 static int connect_fwd_rules(struct mlx5_core_dev *dev,
712 			     struct mlx5_flow_table *new_next_ft,
713 			     struct mlx5_flow_table *old_next_ft)
714 {
715 	struct mlx5_flow_destination dest;
716 	struct mlx5_flow_rule *iter;
717 	int err = 0;
718 
719 	/* new_next_ft and old_next_ft could be NULL only
720 	 * when we create/destroy the anchor flow table.
721 	 */
722 	if (!new_next_ft || !old_next_ft)
723 		return 0;
724 
725 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
726 	dest.ft = new_next_ft;
727 
728 	mutex_lock(&old_next_ft->lock);
729 	list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
730 	mutex_unlock(&old_next_ft->lock);
731 	list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
732 		err = _mlx5_modify_rule_destination(iter, &dest);
733 		if (err)
734 			pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
735 			       new_next_ft->id);
736 	}
737 	return 0;
738 }
739 
740 static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
741 			      struct fs_prio *prio)
742 {
743 	struct mlx5_flow_table *next_ft;
744 	int err = 0;
745 
746 	/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
747 
748 	if (list_empty(&prio->node.children)) {
749 		err = connect_prev_fts(dev, ft, prio);
750 		if (err)
751 			return err;
752 
753 		next_ft = find_next_chained_ft(prio);
754 		err = connect_fwd_rules(dev, ft, next_ft);
755 		if (err)
756 			return err;
757 	}
758 
759 	if (MLX5_CAP_FLOWTABLE(dev,
760 			       flow_table_properties_nic_receive.modify_root))
761 		err = update_root_ft_create(ft, prio);
762 	return err;
763 }
764 
765 static void list_add_flow_table(struct mlx5_flow_table *ft,
766 				struct fs_prio *prio)
767 {
768 	struct list_head *prev = &prio->node.children;
769 	struct mlx5_flow_table *iter;
770 
771 	fs_for_each_ft(iter, prio) {
772 		if (iter->level > ft->level)
773 			break;
774 		prev = &iter->node.list;
775 	}
776 	list_add(&ft->node.list, prev);
777 }
778 
779 static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
780 							struct mlx5_flow_table_attr *ft_attr,
781 							enum fs_flow_table_op_mod op_mod,
782 							u16 vport)
783 {
784 	struct mlx5_flow_root_namespace *root = find_root(&ns->node);
785 	struct mlx5_flow_table *next_ft = NULL;
786 	struct fs_prio *fs_prio = NULL;
787 	struct mlx5_flow_table *ft;
788 	int log_table_sz;
789 	int err;
790 
791 	if (!root) {
792 		pr_err("mlx5: flow steering failed to find root of namespace\n");
793 		return ERR_PTR(-ENODEV);
794 	}
795 
796 	mutex_lock(&root->chain_lock);
797 	fs_prio = find_prio(ns, ft_attr->prio);
798 	if (!fs_prio) {
799 		err = -EINVAL;
800 		goto unlock_root;
801 	}
802 	if (ft_attr->level >= fs_prio->num_levels) {
803 		err = -ENOSPC;
804 		goto unlock_root;
805 	}
806 	/* The level is related to the
807 	 * priority level range.
808 	 */
809 	ft_attr->level += fs_prio->start_level;
810 	ft = alloc_flow_table(ft_attr->level,
811 			      vport,
812 			      ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
813 			      root->table_type,
814 			      op_mod, ft_attr->flags);
815 	if (!ft) {
816 		err = -ENOMEM;
817 		goto unlock_root;
818 	}
819 
820 	tree_init_node(&ft->node, 1, del_flow_table);
821 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
822 	next_ft = find_next_chained_ft(fs_prio);
823 	err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
824 					 ft->level, log_table_sz, next_ft, &ft->id,
825 					 ft->flags);
826 	if (err)
827 		goto free_ft;
828 
829 	err = connect_flow_table(root->dev, ft, fs_prio);
830 	if (err)
831 		goto destroy_ft;
832 	lock_ref_node(&fs_prio->node);
833 	tree_add_node(&ft->node, &fs_prio->node);
834 	list_add_flow_table(ft, fs_prio);
835 	fs_prio->num_ft++;
836 	unlock_ref_node(&fs_prio->node);
837 	mutex_unlock(&root->chain_lock);
838 	return ft;
839 destroy_ft:
840 	mlx5_cmd_destroy_flow_table(root->dev, ft);
841 free_ft:
842 	kfree(ft);
843 unlock_root:
844 	mutex_unlock(&root->chain_lock);
845 	return ERR_PTR(err);
846 }
847 
848 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
849 					       struct mlx5_flow_table_attr *ft_attr)
850 {
851 	return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0);
852 }
853 
854 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
855 						     int prio, int max_fte,
856 						     u32 level, u16 vport)
857 {
858 	struct mlx5_flow_table_attr ft_attr = {};
859 
860 	ft_attr.max_fte = max_fte;
861 	ft_attr.level   = level;
862 	ft_attr.prio    = prio;
863 
864 	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
865 }
866 
867 struct mlx5_flow_table*
868 mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
869 				 int prio, u32 level)
870 {
871 	struct mlx5_flow_table_attr ft_attr = {};
872 
873 	ft_attr.level = level;
874 	ft_attr.prio  = prio;
875 	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
876 }
877 EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
878 
879 struct mlx5_flow_table*
880 mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
881 				    int prio,
882 				    int num_flow_table_entries,
883 				    int max_num_groups,
884 				    u32 level,
885 				    u32 flags)
886 {
887 	struct mlx5_flow_table_attr ft_attr = {};
888 	struct mlx5_flow_table *ft;
889 
890 	if (max_num_groups > num_flow_table_entries)
891 		return ERR_PTR(-EINVAL);
892 
893 	ft_attr.max_fte = num_flow_table_entries;
894 	ft_attr.prio    = prio;
895 	ft_attr.level   = level;
896 	ft_attr.flags   = flags;
897 
898 	ft = mlx5_create_flow_table(ns, &ft_attr);
899 	if (IS_ERR(ft))
900 		return ft;
901 
902 	ft->autogroup.active = true;
903 	ft->autogroup.required_groups = max_num_groups;
904 
905 	return ft;
906 }
907 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
908 
909 /* Flow table should be locked */
910 static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft,
911 							u32 *fg_in,
912 							struct list_head
913 							*prev_fg,
914 							bool is_auto_fg)
915 {
916 	struct mlx5_flow_group *fg;
917 	struct mlx5_core_dev *dev = get_dev(&ft->node);
918 	int err;
919 
920 	if (!dev)
921 		return ERR_PTR(-ENODEV);
922 
923 	fg = alloc_flow_group(fg_in);
924 	if (IS_ERR(fg))
925 		return fg;
926 
927 	err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
928 	if (err) {
929 		kfree(fg);
930 		return ERR_PTR(err);
931 	}
932 
933 	if (ft->autogroup.active)
934 		ft->autogroup.num_groups++;
935 	/* Add node to tree */
936 	tree_init_node(&fg->node, !is_auto_fg, del_flow_group);
937 	tree_add_node(&fg->node, &ft->node);
938 	/* Add node to group list */
939 	list_add(&fg->node.list, prev_fg);
940 
941 	return fg;
942 }
943 
944 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
945 					       u32 *fg_in)
946 {
947 	struct mlx5_flow_group *fg;
948 
949 	if (ft->autogroup.active)
950 		return ERR_PTR(-EPERM);
951 
952 	lock_ref_node(&ft->node);
953 	fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false);
954 	unlock_ref_node(&ft->node);
955 
956 	return fg;
957 }
958 
959 static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
960 {
961 	struct mlx5_flow_rule *rule;
962 
963 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
964 	if (!rule)
965 		return NULL;
966 
967 	INIT_LIST_HEAD(&rule->next_ft);
968 	rule->node.type = FS_TYPE_FLOW_DEST;
969 	if (dest)
970 		memcpy(&rule->dest_attr, dest, sizeof(*dest));
971 
972 	return rule;
973 }
974 
975 static struct mlx5_flow_handle *alloc_handle(int num_rules)
976 {
977 	struct mlx5_flow_handle *handle;
978 
979 	handle = kzalloc(sizeof(*handle) + sizeof(handle->rule[0]) *
980 			  num_rules, GFP_KERNEL);
981 	if (!handle)
982 		return NULL;
983 
984 	handle->num_rules = num_rules;
985 
986 	return handle;
987 }
988 
989 static void destroy_flow_handle(struct fs_fte *fte,
990 				struct mlx5_flow_handle *handle,
991 				struct mlx5_flow_destination *dest,
992 				int i)
993 {
994 	for (; --i >= 0;) {
995 		if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) {
996 			fte->dests_size--;
997 			list_del(&handle->rule[i]->node.list);
998 			kfree(handle->rule[i]);
999 		}
1000 	}
1001 	kfree(handle);
1002 }
1003 
1004 static struct mlx5_flow_handle *
1005 create_flow_handle(struct fs_fte *fte,
1006 		   struct mlx5_flow_destination *dest,
1007 		   int dest_num,
1008 		   int *modify_mask,
1009 		   bool *new_rule)
1010 {
1011 	struct mlx5_flow_handle *handle;
1012 	struct mlx5_flow_rule *rule = NULL;
1013 	static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
1014 	static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
1015 	int type;
1016 	int i = 0;
1017 
1018 	handle = alloc_handle((dest_num) ? dest_num : 1);
1019 	if (!handle)
1020 		return ERR_PTR(-ENOMEM);
1021 
1022 	do {
1023 		if (dest) {
1024 			rule = find_flow_rule(fte, dest + i);
1025 			if (rule) {
1026 				atomic_inc(&rule->node.refcount);
1027 				goto rule_found;
1028 			}
1029 		}
1030 
1031 		*new_rule = true;
1032 		rule = alloc_rule(dest + i);
1033 		if (!rule)
1034 			goto free_rules;
1035 
1036 		/* Add dest to dests list- we need flow tables to be in the
1037 		 * end of the list for forward to next prio rules.
1038 		 */
1039 		tree_init_node(&rule->node, 1, del_rule);
1040 		if (dest &&
1041 		    dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
1042 			list_add(&rule->node.list, &fte->node.children);
1043 		else
1044 			list_add_tail(&rule->node.list, &fte->node.children);
1045 		if (dest) {
1046 			fte->dests_size++;
1047 
1048 			type = dest[i].type ==
1049 				MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1050 			*modify_mask |= type ? count : dst;
1051 		}
1052 rule_found:
1053 		handle->rule[i] = rule;
1054 	} while (++i < dest_num);
1055 
1056 	return handle;
1057 
1058 free_rules:
1059 	destroy_flow_handle(fte, handle, dest, i);
1060 	return ERR_PTR(-ENOMEM);
1061 }
1062 
1063 /* fte should not be deleted while calling this function */
1064 static struct mlx5_flow_handle *
1065 add_rule_fte(struct fs_fte *fte,
1066 	     struct mlx5_flow_group *fg,
1067 	     struct mlx5_flow_destination *dest,
1068 	     int dest_num,
1069 	     bool update_action)
1070 {
1071 	struct mlx5_flow_handle *handle;
1072 	struct mlx5_flow_table *ft;
1073 	int modify_mask = 0;
1074 	int err;
1075 	bool new_rule = false;
1076 
1077 	handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
1078 				    &new_rule);
1079 	if (IS_ERR(handle) || !new_rule)
1080 		goto out;
1081 
1082 	if (update_action)
1083 		modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
1084 
1085 	fs_get_obj(ft, fg->node.parent);
1086 	if (!(fte->status & FS_FTE_STATUS_EXISTING))
1087 		err = mlx5_cmd_create_fte(get_dev(&ft->node),
1088 					  ft, fg->id, fte);
1089 	else
1090 		err = mlx5_cmd_update_fte(get_dev(&ft->node),
1091 					  ft, fg->id, modify_mask, fte);
1092 	if (err)
1093 		goto free_handle;
1094 
1095 	fte->status |= FS_FTE_STATUS_EXISTING;
1096 
1097 out:
1098 	return handle;
1099 
1100 free_handle:
1101 	destroy_flow_handle(fte, handle, dest, handle->num_rules);
1102 	return ERR_PTR(err);
1103 }
1104 
1105 /* Assumed fg is locked */
1106 static unsigned int get_free_fte_index(struct mlx5_flow_group *fg,
1107 				       struct list_head **prev)
1108 {
1109 	struct fs_fte *fte;
1110 	unsigned int start = fg->start_index;
1111 
1112 	if (prev)
1113 		*prev = &fg->node.children;
1114 
1115 	/* assumed list is sorted by index */
1116 	fs_for_each_fte(fte, fg) {
1117 		if (fte->index != start)
1118 			return start;
1119 		start++;
1120 		if (prev)
1121 			*prev = &fte->node.list;
1122 	}
1123 
1124 	return start;
1125 }
1126 
1127 /* prev is output, prev->next = new_fte */
1128 static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
1129 				 u32 *match_value,
1130 				 struct mlx5_flow_act *flow_act,
1131 				 struct list_head **prev)
1132 {
1133 	struct fs_fte *fte;
1134 	int index;
1135 
1136 	index = get_free_fte_index(fg, prev);
1137 	fte = alloc_fte(flow_act, match_value, index);
1138 	if (IS_ERR(fte))
1139 		return fte;
1140 
1141 	return fte;
1142 }
1143 
1144 static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
1145 						u8 match_criteria_enable,
1146 						u32 *match_criteria)
1147 {
1148 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1149 	struct list_head *prev = &ft->node.children;
1150 	unsigned int candidate_index = 0;
1151 	struct mlx5_flow_group *fg;
1152 	void *match_criteria_addr;
1153 	unsigned int group_size = 0;
1154 	u32 *in;
1155 
1156 	if (!ft->autogroup.active)
1157 		return ERR_PTR(-ENOENT);
1158 
1159 	in = kvzalloc(inlen, GFP_KERNEL);
1160 	if (!in)
1161 		return ERR_PTR(-ENOMEM);
1162 
1163 	if (ft->autogroup.num_groups < ft->autogroup.required_groups)
1164 		/* We save place for flow groups in addition to max types */
1165 		group_size = ft->max_fte / (ft->autogroup.required_groups + 1);
1166 
1167 	/*  ft->max_fte == ft->autogroup.max_types */
1168 	if (group_size == 0)
1169 		group_size = 1;
1170 
1171 	/* sorted by start_index */
1172 	fs_for_each_fg(fg, ft) {
1173 		if (candidate_index + group_size > fg->start_index)
1174 			candidate_index = fg->start_index + fg->max_ftes;
1175 		else
1176 			break;
1177 		prev = &fg->node.list;
1178 	}
1179 
1180 	if (candidate_index + group_size > ft->max_fte) {
1181 		fg = ERR_PTR(-ENOSPC);
1182 		goto out;
1183 	}
1184 
1185 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
1186 		 match_criteria_enable);
1187 	MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
1188 	MLX5_SET(create_flow_group_in, in, end_flow_index,   candidate_index +
1189 		 group_size - 1);
1190 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
1191 					   in, match_criteria);
1192 	memcpy(match_criteria_addr, match_criteria,
1193 	       MLX5_ST_SZ_BYTES(fte_match_param));
1194 
1195 	fg = create_flow_group_common(ft, in, prev, true);
1196 out:
1197 	kvfree(in);
1198 	return fg;
1199 }
1200 
1201 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
1202 				struct mlx5_flow_destination *d2)
1203 {
1204 	if (d1->type == d2->type) {
1205 		if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
1206 		     d1->vport_num == d2->vport_num) ||
1207 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1208 		     d1->ft == d2->ft) ||
1209 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1210 		     d1->tir_num == d2->tir_num))
1211 			return true;
1212 	}
1213 
1214 	return false;
1215 }
1216 
1217 static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
1218 					     struct mlx5_flow_destination *dest)
1219 {
1220 	struct mlx5_flow_rule *rule;
1221 
1222 	list_for_each_entry(rule, &fte->node.children, node.list) {
1223 		if (mlx5_flow_dests_cmp(&rule->dest_attr, dest))
1224 			return rule;
1225 	}
1226 	return NULL;
1227 }
1228 
1229 static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
1230 					    u32 *match_value,
1231 					    struct mlx5_flow_act *flow_act,
1232 					    struct mlx5_flow_destination *dest,
1233 					    int dest_num)
1234 {
1235 	struct mlx5_flow_handle *handle;
1236 	struct mlx5_flow_table *ft;
1237 	struct list_head *prev;
1238 	struct fs_fte *fte;
1239 	int i;
1240 
1241 	nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT);
1242 	fs_for_each_fte(fte, fg) {
1243 		nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
1244 		if (compare_match_value(&fg->mask, match_value, &fte->val) &&
1245 		    (flow_act->action & fte->action)) {
1246 			int old_action = fte->action;
1247 
1248 			if (fte->flow_tag != flow_act->flow_tag) {
1249 				mlx5_core_warn(get_dev(&fte->node),
1250 					       "FTE flow tag %u already exists with different flow tag %u\n",
1251 					       fte->flow_tag,
1252 					       flow_act->flow_tag);
1253 				handle = ERR_PTR(-EEXIST);
1254 				goto unlock_fte;
1255 			}
1256 
1257 			fte->action |= flow_act->action;
1258 			handle = add_rule_fte(fte, fg, dest, dest_num,
1259 					      old_action != flow_act->action);
1260 			if (IS_ERR(handle)) {
1261 				fte->action = old_action;
1262 				goto unlock_fte;
1263 			} else {
1264 				goto add_rules;
1265 			}
1266 		}
1267 		unlock_ref_node(&fte->node);
1268 	}
1269 	fs_get_obj(ft, fg->node.parent);
1270 	if (fg->num_ftes >= fg->max_ftes) {
1271 		handle = ERR_PTR(-ENOSPC);
1272 		goto unlock_fg;
1273 	}
1274 
1275 	fte = create_fte(fg, match_value, flow_act, &prev);
1276 	if (IS_ERR(fte)) {
1277 		handle = (void *)fte;
1278 		goto unlock_fg;
1279 	}
1280 	tree_init_node(&fte->node, 0, del_fte);
1281 	nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
1282 	handle = add_rule_fte(fte, fg, dest, dest_num, false);
1283 	if (IS_ERR(handle)) {
1284 		unlock_ref_node(&fte->node);
1285 		kfree(fte);
1286 		goto unlock_fg;
1287 	}
1288 
1289 	fg->num_ftes++;
1290 
1291 	tree_add_node(&fte->node, &fg->node);
1292 	list_add(&fte->node.list, prev);
1293 add_rules:
1294 	for (i = 0; i < handle->num_rules; i++) {
1295 		if (atomic_read(&handle->rule[i]->node.refcount) == 1)
1296 			tree_add_node(&handle->rule[i]->node, &fte->node);
1297 	}
1298 unlock_fte:
1299 	unlock_ref_node(&fte->node);
1300 unlock_fg:
1301 	unlock_ref_node(&fg->node);
1302 	return handle;
1303 }
1304 
1305 struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
1306 {
1307 	struct mlx5_flow_rule *dst;
1308 	struct fs_fte *fte;
1309 
1310 	fs_get_obj(fte, handle->rule[0]->node.parent);
1311 
1312 	fs_for_each_dst(dst, fte) {
1313 		if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
1314 			return dst->dest_attr.counter;
1315 	}
1316 
1317 	return NULL;
1318 }
1319 
1320 static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
1321 {
1322 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
1323 		return !counter;
1324 
1325 	if (!counter)
1326 		return false;
1327 
1328 	return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
1329 			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
1330 }
1331 
1332 static bool dest_is_valid(struct mlx5_flow_destination *dest,
1333 			  u32 action,
1334 			  struct mlx5_flow_table *ft)
1335 {
1336 	if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
1337 		return counter_is_valid(dest->counter, action);
1338 
1339 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
1340 		return true;
1341 
1342 	if (!dest || ((dest->type ==
1343 	    MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) &&
1344 	    (dest->ft->level <= ft->level)))
1345 		return false;
1346 	return true;
1347 }
1348 
1349 static struct mlx5_flow_handle *
1350 _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
1351 		     struct mlx5_flow_spec *spec,
1352 		     struct mlx5_flow_act *flow_act,
1353 		     struct mlx5_flow_destination *dest,
1354 		     int dest_num)
1355 
1356 {
1357 	struct mlx5_flow_group *g;
1358 	struct mlx5_flow_handle *rule;
1359 	int i;
1360 
1361 	for (i = 0; i < dest_num; i++) {
1362 		if (!dest_is_valid(&dest[i], flow_act->action, ft))
1363 			return ERR_PTR(-EINVAL);
1364 	}
1365 
1366 	nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
1367 	fs_for_each_fg(g, ft)
1368 		if (compare_match_criteria(g->mask.match_criteria_enable,
1369 					   spec->match_criteria_enable,
1370 					   g->mask.match_criteria,
1371 					   spec->match_criteria)) {
1372 			rule = add_rule_fg(g, spec->match_value,
1373 					   flow_act, dest, dest_num);
1374 			if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
1375 				goto unlock;
1376 		}
1377 
1378 	g = create_autogroup(ft, spec->match_criteria_enable,
1379 			     spec->match_criteria);
1380 	if (IS_ERR(g)) {
1381 		rule = (void *)g;
1382 		goto unlock;
1383 	}
1384 
1385 	rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num);
1386 	if (IS_ERR(rule)) {
1387 		/* Remove assumes refcount > 0 and autogroup creates a group
1388 		 * with a refcount = 0.
1389 		 */
1390 		unlock_ref_node(&ft->node);
1391 		tree_get_node(&g->node);
1392 		tree_remove_node(&g->node);
1393 		return rule;
1394 	}
1395 unlock:
1396 	unlock_ref_node(&ft->node);
1397 	return rule;
1398 }
1399 
1400 static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
1401 {
1402 	return ((ft->type == FS_FT_NIC_RX) &&
1403 		(MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
1404 }
1405 
1406 struct mlx5_flow_handle *
1407 mlx5_add_flow_rules(struct mlx5_flow_table *ft,
1408 		    struct mlx5_flow_spec *spec,
1409 		    struct mlx5_flow_act *flow_act,
1410 		    struct mlx5_flow_destination *dest,
1411 		    int dest_num)
1412 {
1413 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1414 	struct mlx5_flow_destination gen_dest;
1415 	struct mlx5_flow_table *next_ft = NULL;
1416 	struct mlx5_flow_handle *handle = NULL;
1417 	u32 sw_action = flow_act->action;
1418 	struct fs_prio *prio;
1419 
1420 	fs_get_obj(prio, ft->node.parent);
1421 	if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
1422 		if (!fwd_next_prio_supported(ft))
1423 			return ERR_PTR(-EOPNOTSUPP);
1424 		if (dest)
1425 			return ERR_PTR(-EINVAL);
1426 		mutex_lock(&root->chain_lock);
1427 		next_ft = find_next_chained_ft(prio);
1428 		if (next_ft) {
1429 			gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1430 			gen_dest.ft = next_ft;
1431 			dest = &gen_dest;
1432 			dest_num = 1;
1433 			flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1434 		} else {
1435 			mutex_unlock(&root->chain_lock);
1436 			return ERR_PTR(-EOPNOTSUPP);
1437 		}
1438 	}
1439 
1440 	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, dest_num);
1441 
1442 	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
1443 		if (!IS_ERR_OR_NULL(handle) &&
1444 		    (list_empty(&handle->rule[0]->next_ft))) {
1445 			mutex_lock(&next_ft->lock);
1446 			list_add(&handle->rule[0]->next_ft,
1447 				 &next_ft->fwd_rules);
1448 			mutex_unlock(&next_ft->lock);
1449 			handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1450 		}
1451 		mutex_unlock(&root->chain_lock);
1452 	}
1453 	return handle;
1454 }
1455 EXPORT_SYMBOL(mlx5_add_flow_rules);
1456 
1457 void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
1458 {
1459 	int i;
1460 
1461 	for (i = handle->num_rules - 1; i >= 0; i--)
1462 		tree_remove_node(&handle->rule[i]->node);
1463 	kfree(handle);
1464 }
1465 EXPORT_SYMBOL(mlx5_del_flow_rules);
1466 
1467 /* Assuming prio->node.children(flow tables) is sorted by level */
1468 static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
1469 {
1470 	struct fs_prio *prio;
1471 
1472 	fs_get_obj(prio, ft->node.parent);
1473 
1474 	if (!list_is_last(&ft->node.list, &prio->node.children))
1475 		return list_next_entry(ft, node.list);
1476 	return find_next_chained_ft(prio);
1477 }
1478 
1479 static int update_root_ft_destroy(struct mlx5_flow_table *ft)
1480 {
1481 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1482 	struct mlx5_flow_table *new_root_ft = NULL;
1483 
1484 	if (root->root_ft != ft)
1485 		return 0;
1486 
1487 	new_root_ft = find_next_ft(ft);
1488 	if (new_root_ft) {
1489 		int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
1490 						  root->underlay_qpn);
1491 
1492 		if (err) {
1493 			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
1494 				       ft->id);
1495 			return err;
1496 		}
1497 	}
1498 	root->root_ft = new_root_ft;
1499 	return 0;
1500 }
1501 
1502 /* Connect flow table from previous priority to
1503  * the next flow table.
1504  */
1505 static int disconnect_flow_table(struct mlx5_flow_table *ft)
1506 {
1507 	struct mlx5_core_dev *dev = get_dev(&ft->node);
1508 	struct mlx5_flow_table *next_ft;
1509 	struct fs_prio *prio;
1510 	int err = 0;
1511 
1512 	err = update_root_ft_destroy(ft);
1513 	if (err)
1514 		return err;
1515 
1516 	fs_get_obj(prio, ft->node.parent);
1517 	if  (!(list_first_entry(&prio->node.children,
1518 				struct mlx5_flow_table,
1519 				node.list) == ft))
1520 		return 0;
1521 
1522 	next_ft = find_next_chained_ft(prio);
1523 	err = connect_fwd_rules(dev, next_ft, ft);
1524 	if (err)
1525 		return err;
1526 
1527 	err = connect_prev_fts(dev, next_ft, prio);
1528 	if (err)
1529 		mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
1530 			       ft->id);
1531 	return err;
1532 }
1533 
1534 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
1535 {
1536 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1537 	int err = 0;
1538 
1539 	mutex_lock(&root->chain_lock);
1540 	err = disconnect_flow_table(ft);
1541 	if (err) {
1542 		mutex_unlock(&root->chain_lock);
1543 		return err;
1544 	}
1545 	if (tree_remove_node(&ft->node))
1546 		mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
1547 			       ft->id);
1548 	mutex_unlock(&root->chain_lock);
1549 
1550 	return err;
1551 }
1552 EXPORT_SYMBOL(mlx5_destroy_flow_table);
1553 
1554 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
1555 {
1556 	if (tree_remove_node(&fg->node))
1557 		mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
1558 			       fg->id);
1559 }
1560 
1561 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
1562 						    enum mlx5_flow_namespace_type type)
1563 {
1564 	struct mlx5_flow_steering *steering = dev->priv.steering;
1565 	struct mlx5_flow_root_namespace *root_ns;
1566 	int prio;
1567 	struct fs_prio *fs_prio;
1568 	struct mlx5_flow_namespace *ns;
1569 
1570 	if (!steering)
1571 		return NULL;
1572 
1573 	switch (type) {
1574 	case MLX5_FLOW_NAMESPACE_BYPASS:
1575 	case MLX5_FLOW_NAMESPACE_LAG:
1576 	case MLX5_FLOW_NAMESPACE_OFFLOADS:
1577 	case MLX5_FLOW_NAMESPACE_ETHTOOL:
1578 	case MLX5_FLOW_NAMESPACE_KERNEL:
1579 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
1580 	case MLX5_FLOW_NAMESPACE_ANCHOR:
1581 		prio = type;
1582 		break;
1583 	case MLX5_FLOW_NAMESPACE_FDB:
1584 		if (steering->fdb_root_ns)
1585 			return &steering->fdb_root_ns->ns;
1586 		else
1587 			return NULL;
1588 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
1589 		if (steering->esw_egress_root_ns)
1590 			return &steering->esw_egress_root_ns->ns;
1591 		else
1592 			return NULL;
1593 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
1594 		if (steering->esw_ingress_root_ns)
1595 			return &steering->esw_ingress_root_ns->ns;
1596 		else
1597 			return NULL;
1598 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
1599 		if (steering->sniffer_rx_root_ns)
1600 			return &steering->sniffer_rx_root_ns->ns;
1601 		else
1602 			return NULL;
1603 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
1604 		if (steering->sniffer_tx_root_ns)
1605 			return &steering->sniffer_tx_root_ns->ns;
1606 		else
1607 			return NULL;
1608 	default:
1609 		return NULL;
1610 	}
1611 
1612 	root_ns = steering->root_ns;
1613 	if (!root_ns)
1614 		return NULL;
1615 
1616 	fs_prio = find_prio(&root_ns->ns, prio);
1617 	if (!fs_prio)
1618 		return NULL;
1619 
1620 	ns = list_first_entry(&fs_prio->node.children,
1621 			      typeof(*ns),
1622 			      node.list);
1623 
1624 	return ns;
1625 }
1626 EXPORT_SYMBOL(mlx5_get_flow_namespace);
1627 
1628 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
1629 				      unsigned int prio, int num_levels)
1630 {
1631 	struct fs_prio *fs_prio;
1632 
1633 	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
1634 	if (!fs_prio)
1635 		return ERR_PTR(-ENOMEM);
1636 
1637 	fs_prio->node.type = FS_TYPE_PRIO;
1638 	tree_init_node(&fs_prio->node, 1, NULL);
1639 	tree_add_node(&fs_prio->node, &ns->node);
1640 	fs_prio->num_levels = num_levels;
1641 	fs_prio->prio = prio;
1642 	list_add_tail(&fs_prio->node.list, &ns->node.children);
1643 
1644 	return fs_prio;
1645 }
1646 
1647 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
1648 						     *ns)
1649 {
1650 	ns->node.type = FS_TYPE_NAMESPACE;
1651 
1652 	return ns;
1653 }
1654 
1655 static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
1656 {
1657 	struct mlx5_flow_namespace	*ns;
1658 
1659 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
1660 	if (!ns)
1661 		return ERR_PTR(-ENOMEM);
1662 
1663 	fs_init_namespace(ns);
1664 	tree_init_node(&ns->node, 1, NULL);
1665 	tree_add_node(&ns->node, &prio->node);
1666 	list_add_tail(&ns->node.list, &prio->node.children);
1667 
1668 	return ns;
1669 }
1670 
1671 static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
1672 			     struct init_tree_node *prio_metadata)
1673 {
1674 	struct fs_prio *fs_prio;
1675 	int i;
1676 
1677 	for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
1678 		fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels);
1679 		if (IS_ERR(fs_prio))
1680 			return PTR_ERR(fs_prio);
1681 	}
1682 	return 0;
1683 }
1684 
1685 #define FLOW_TABLE_BIT_SZ 1
1686 #define GET_FLOW_TABLE_CAP(dev, offset) \
1687 	((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) +	\
1688 			offset / 32)) >>					\
1689 	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
1690 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
1691 {
1692 	int i;
1693 
1694 	for (i = 0; i < caps->arr_sz; i++) {
1695 		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
1696 			return false;
1697 	}
1698 	return true;
1699 }
1700 
1701 static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
1702 				    struct init_tree_node *init_node,
1703 				    struct fs_node *fs_parent_node,
1704 				    struct init_tree_node *init_parent_node,
1705 				    int prio)
1706 {
1707 	int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
1708 					      flow_table_properties_nic_receive.
1709 					      max_ft_level);
1710 	struct mlx5_flow_namespace *fs_ns;
1711 	struct fs_prio *fs_prio;
1712 	struct fs_node *base;
1713 	int i;
1714 	int err;
1715 
1716 	if (init_node->type == FS_TYPE_PRIO) {
1717 		if ((init_node->min_ft_level > max_ft_level) ||
1718 		    !has_required_caps(steering->dev, &init_node->caps))
1719 			return 0;
1720 
1721 		fs_get_obj(fs_ns, fs_parent_node);
1722 		if (init_node->num_leaf_prios)
1723 			return create_leaf_prios(fs_ns, prio, init_node);
1724 		fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels);
1725 		if (IS_ERR(fs_prio))
1726 			return PTR_ERR(fs_prio);
1727 		base = &fs_prio->node;
1728 	} else if (init_node->type == FS_TYPE_NAMESPACE) {
1729 		fs_get_obj(fs_prio, fs_parent_node);
1730 		fs_ns = fs_create_namespace(fs_prio);
1731 		if (IS_ERR(fs_ns))
1732 			return PTR_ERR(fs_ns);
1733 		base = &fs_ns->node;
1734 	} else {
1735 		return -EINVAL;
1736 	}
1737 	prio = 0;
1738 	for (i = 0; i < init_node->ar_size; i++) {
1739 		err = init_root_tree_recursive(steering, &init_node->children[i],
1740 					       base, init_node, prio);
1741 		if (err)
1742 			return err;
1743 		if (init_node->children[i].type == FS_TYPE_PRIO &&
1744 		    init_node->children[i].num_leaf_prios) {
1745 			prio += init_node->children[i].num_leaf_prios;
1746 		}
1747 	}
1748 
1749 	return 0;
1750 }
1751 
1752 static int init_root_tree(struct mlx5_flow_steering *steering,
1753 			  struct init_tree_node *init_node,
1754 			  struct fs_node *fs_parent_node)
1755 {
1756 	int i;
1757 	struct mlx5_flow_namespace *fs_ns;
1758 	int err;
1759 
1760 	fs_get_obj(fs_ns, fs_parent_node);
1761 	for (i = 0; i < init_node->ar_size; i++) {
1762 		err = init_root_tree_recursive(steering, &init_node->children[i],
1763 					       &fs_ns->node,
1764 					       init_node, i);
1765 		if (err)
1766 			return err;
1767 	}
1768 	return 0;
1769 }
1770 
1771 static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering,
1772 						       enum fs_flow_table_type
1773 						       table_type)
1774 {
1775 	struct mlx5_flow_root_namespace *root_ns;
1776 	struct mlx5_flow_namespace *ns;
1777 
1778 	/* Create the root namespace */
1779 	root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL);
1780 	if (!root_ns)
1781 		return NULL;
1782 
1783 	root_ns->dev = steering->dev;
1784 	root_ns->table_type = table_type;
1785 
1786 	ns = &root_ns->ns;
1787 	fs_init_namespace(ns);
1788 	mutex_init(&root_ns->chain_lock);
1789 	tree_init_node(&ns->node, 1, NULL);
1790 	tree_add_node(&ns->node, NULL);
1791 
1792 	return root_ns;
1793 }
1794 
1795 static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
1796 
1797 static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
1798 {
1799 	struct fs_prio *prio;
1800 
1801 	fs_for_each_prio(prio, ns) {
1802 		 /* This updates prio start_level and num_levels */
1803 		set_prio_attrs_in_prio(prio, acc_level);
1804 		acc_level += prio->num_levels;
1805 	}
1806 	return acc_level;
1807 }
1808 
1809 static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
1810 {
1811 	struct mlx5_flow_namespace *ns;
1812 	int acc_level_ns = acc_level;
1813 
1814 	prio->start_level = acc_level;
1815 	fs_for_each_ns(ns, prio)
1816 		/* This updates start_level and num_levels of ns's priority descendants */
1817 		acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
1818 	if (!prio->num_levels)
1819 		prio->num_levels = acc_level_ns - prio->start_level;
1820 	WARN_ON(prio->num_levels < acc_level_ns - prio->start_level);
1821 }
1822 
1823 static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
1824 {
1825 	struct mlx5_flow_namespace *ns = &root_ns->ns;
1826 	struct fs_prio *prio;
1827 	int start_level = 0;
1828 
1829 	fs_for_each_prio(prio, ns) {
1830 		set_prio_attrs_in_prio(prio, start_level);
1831 		start_level += prio->num_levels;
1832 	}
1833 }
1834 
1835 #define ANCHOR_PRIO 0
1836 #define ANCHOR_SIZE 1
1837 #define ANCHOR_LEVEL 0
1838 static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
1839 {
1840 	struct mlx5_flow_namespace *ns = NULL;
1841 	struct mlx5_flow_table_attr ft_attr = {};
1842 	struct mlx5_flow_table *ft;
1843 
1844 	ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
1845 	if (WARN_ON(!ns))
1846 		return -EINVAL;
1847 
1848 	ft_attr.max_fte = ANCHOR_SIZE;
1849 	ft_attr.level   = ANCHOR_LEVEL;
1850 	ft_attr.prio    = ANCHOR_PRIO;
1851 
1852 	ft = mlx5_create_flow_table(ns, &ft_attr);
1853 	if (IS_ERR(ft)) {
1854 		mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
1855 		return PTR_ERR(ft);
1856 	}
1857 	return 0;
1858 }
1859 
1860 static int init_root_ns(struct mlx5_flow_steering *steering)
1861 {
1862 	steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
1863 	if (!steering->root_ns)
1864 		goto cleanup;
1865 
1866 	if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node))
1867 		goto cleanup;
1868 
1869 	set_prio_attrs(steering->root_ns);
1870 
1871 	if (create_anchor_flow_table(steering))
1872 		goto cleanup;
1873 
1874 	return 0;
1875 
1876 cleanup:
1877 	mlx5_cleanup_fs(steering->dev);
1878 	return -ENOMEM;
1879 }
1880 
1881 static void clean_tree(struct fs_node *node)
1882 {
1883 	if (node) {
1884 		struct fs_node *iter;
1885 		struct fs_node *temp;
1886 
1887 		list_for_each_entry_safe(iter, temp, &node->children, list)
1888 			clean_tree(iter);
1889 		tree_remove_node(node);
1890 	}
1891 }
1892 
1893 static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
1894 {
1895 	if (!root_ns)
1896 		return;
1897 
1898 	clean_tree(&root_ns->ns.node);
1899 }
1900 
1901 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
1902 {
1903 	struct mlx5_flow_steering *steering = dev->priv.steering;
1904 
1905 	cleanup_root_ns(steering->root_ns);
1906 	cleanup_root_ns(steering->esw_egress_root_ns);
1907 	cleanup_root_ns(steering->esw_ingress_root_ns);
1908 	cleanup_root_ns(steering->fdb_root_ns);
1909 	cleanup_root_ns(steering->sniffer_rx_root_ns);
1910 	cleanup_root_ns(steering->sniffer_tx_root_ns);
1911 	mlx5_cleanup_fc_stats(dev);
1912 	kfree(steering);
1913 }
1914 
1915 static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
1916 {
1917 	struct fs_prio *prio;
1918 
1919 	steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX);
1920 	if (!steering->sniffer_tx_root_ns)
1921 		return -ENOMEM;
1922 
1923 	/* Create single prio */
1924 	prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
1925 	if (IS_ERR(prio)) {
1926 		cleanup_root_ns(steering->sniffer_tx_root_ns);
1927 		return PTR_ERR(prio);
1928 	}
1929 	return 0;
1930 }
1931 
1932 static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
1933 {
1934 	struct fs_prio *prio;
1935 
1936 	steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX);
1937 	if (!steering->sniffer_rx_root_ns)
1938 		return -ENOMEM;
1939 
1940 	/* Create single prio */
1941 	prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
1942 	if (IS_ERR(prio)) {
1943 		cleanup_root_ns(steering->sniffer_rx_root_ns);
1944 		return PTR_ERR(prio);
1945 	}
1946 	return 0;
1947 }
1948 
1949 static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
1950 {
1951 	struct fs_prio *prio;
1952 
1953 	steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
1954 	if (!steering->fdb_root_ns)
1955 		return -ENOMEM;
1956 
1957 	prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1);
1958 	if (IS_ERR(prio))
1959 		goto out_err;
1960 
1961 	prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
1962 	if (IS_ERR(prio))
1963 		goto out_err;
1964 
1965 	set_prio_attrs(steering->fdb_root_ns);
1966 	return 0;
1967 
1968 out_err:
1969 	cleanup_root_ns(steering->fdb_root_ns);
1970 	steering->fdb_root_ns = NULL;
1971 	return PTR_ERR(prio);
1972 }
1973 
1974 static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering)
1975 {
1976 	struct fs_prio *prio;
1977 
1978 	steering->esw_egress_root_ns = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
1979 	if (!steering->esw_egress_root_ns)
1980 		return -ENOMEM;
1981 
1982 	/* create 1 prio*/
1983 	prio = fs_create_prio(&steering->esw_egress_root_ns->ns, 0,
1984 			      MLX5_TOTAL_VPORTS(steering->dev));
1985 	return PTR_ERR_OR_ZERO(prio);
1986 }
1987 
1988 static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering)
1989 {
1990 	struct fs_prio *prio;
1991 
1992 	steering->esw_ingress_root_ns = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
1993 	if (!steering->esw_ingress_root_ns)
1994 		return -ENOMEM;
1995 
1996 	/* create 1 prio*/
1997 	prio = fs_create_prio(&steering->esw_ingress_root_ns->ns, 0,
1998 			      MLX5_TOTAL_VPORTS(steering->dev));
1999 	return PTR_ERR_OR_ZERO(prio);
2000 }
2001 
2002 int mlx5_init_fs(struct mlx5_core_dev *dev)
2003 {
2004 	struct mlx5_flow_steering *steering;
2005 	int err = 0;
2006 
2007 	err = mlx5_init_fc_stats(dev);
2008 	if (err)
2009 		return err;
2010 
2011 	steering = kzalloc(sizeof(*steering), GFP_KERNEL);
2012 	if (!steering)
2013 		return -ENOMEM;
2014 	steering->dev = dev;
2015 	dev->priv.steering = steering;
2016 
2017 	if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
2018 	      (MLX5_CAP_GEN(dev, nic_flow_table))) ||
2019 	     ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
2020 	      MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) &&
2021 	    MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
2022 		err = init_root_ns(steering);
2023 		if (err)
2024 			goto err;
2025 	}
2026 
2027 	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
2028 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
2029 			err = init_fdb_root_ns(steering);
2030 			if (err)
2031 				goto err;
2032 		}
2033 		if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
2034 			err = init_egress_acl_root_ns(steering);
2035 			if (err)
2036 				goto err;
2037 		}
2038 		if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
2039 			err = init_ingress_acl_root_ns(steering);
2040 			if (err)
2041 				goto err;
2042 		}
2043 	}
2044 
2045 	if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
2046 		err = init_sniffer_rx_root_ns(steering);
2047 		if (err)
2048 			goto err;
2049 	}
2050 
2051 	if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) {
2052 		err = init_sniffer_tx_root_ns(steering);
2053 		if (err)
2054 			goto err;
2055 	}
2056 
2057 	return 0;
2058 err:
2059 	mlx5_cleanup_fs(dev);
2060 	return err;
2061 }
2062 
2063 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
2064 {
2065 	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
2066 
2067 	root->underlay_qpn = underlay_qpn;
2068 	return 0;
2069 }
2070 EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
2071 
2072 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
2073 {
2074 	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
2075 
2076 	root->underlay_qpn = 0;
2077 	return 0;
2078 }
2079 EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
2080