1 /*
2  * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/mutex.h>
34 #include <linux/mlx5/driver.h>
35 
36 #include "mlx5_core.h"
37 #include "fs_core.h"
38 #include "fs_cmd.h"
39 #include "diag/fs_tracepoint.h"
40 
41 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
42 					 sizeof(struct init_tree_node))
43 
44 #define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\
45 		 ...) {.type = FS_TYPE_PRIO,\
46 	.min_ft_level = min_level_val,\
47 	.num_levels = num_levels_val,\
48 	.num_leaf_prios = num_prios_val,\
49 	.caps = caps_val,\
50 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
51 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
52 }
53 
54 #define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\
55 	ADD_PRIO(num_prios_val, 0, num_levels_val, {},\
56 		 __VA_ARGS__)\
57 
58 #define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\
59 	.children = (struct init_tree_node[]) {__VA_ARGS__},\
60 	.ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
61 }
62 
63 #define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
64 				   sizeof(long))
65 
66 #define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
67 
68 #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
69 			       .caps = (long[]) {__VA_ARGS__} }
70 
71 #define FS_CHAINING_CAPS  FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \
72 					   FS_CAP(flow_table_properties_nic_receive.modify_root), \
73 					   FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
74 					   FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
75 
76 #define LEFTOVERS_NUM_LEVELS 1
77 #define LEFTOVERS_NUM_PRIOS 1
78 
79 #define BY_PASS_PRIO_NUM_LEVELS 1
80 #define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
81 			   LEFTOVERS_NUM_PRIOS)
82 
83 #define ETHTOOL_PRIO_NUM_LEVELS 1
84 #define ETHTOOL_NUM_PRIOS 11
85 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
86 /* Vlan, mac, ttc, inner ttc, aRFS */
87 #define KERNEL_NIC_PRIO_NUM_LEVELS 5
88 #define KERNEL_NIC_NUM_PRIOS 1
89 /* One more level for tc */
90 #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
91 
92 #define ANCHOR_NUM_LEVELS 1
93 #define ANCHOR_NUM_PRIOS 1
94 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
95 
96 #define OFFLOADS_MAX_FT 1
97 #define OFFLOADS_NUM_PRIOS 1
98 #define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
99 
100 #define LAG_PRIO_NUM_LEVELS 1
101 #define LAG_NUM_PRIOS 1
102 #define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
103 
104 struct node_caps {
105 	size_t	arr_sz;
106 	long	*caps;
107 };
108 
109 static struct init_tree_node {
110 	enum fs_node_type	type;
111 	struct init_tree_node *children;
112 	int ar_size;
113 	struct node_caps caps;
114 	int min_ft_level;
115 	int num_leaf_prios;
116 	int prio;
117 	int num_levels;
118 } root_fs = {
119 	.type = FS_TYPE_NAMESPACE,
120 	.ar_size = 7,
121 	.children = (struct init_tree_node[]) {
122 		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
123 			 FS_CHAINING_CAPS,
124 			 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
125 						  BY_PASS_PRIO_NUM_LEVELS))),
126 		ADD_PRIO(0, LAG_MIN_LEVEL, 0,
127 			 FS_CHAINING_CAPS,
128 			 ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
129 						  LAG_PRIO_NUM_LEVELS))),
130 		ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
131 			 ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))),
132 		ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0,
133 			 FS_CHAINING_CAPS,
134 			 ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
135 						  ETHTOOL_PRIO_NUM_LEVELS))),
136 		ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
137 			 ADD_NS(ADD_MULTIPLE_PRIO(1, 1),
138 				ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
139 						  KERNEL_NIC_PRIO_NUM_LEVELS))),
140 		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
141 			 FS_CHAINING_CAPS,
142 			 ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))),
143 		ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
144 			 ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))),
145 	}
146 };
147 
148 enum fs_i_mutex_lock_class {
149 	FS_MUTEX_GRANDPARENT,
150 	FS_MUTEX_PARENT,
151 	FS_MUTEX_CHILD
152 };
153 
154 static const struct rhashtable_params rhash_fte = {
155 	.key_len = FIELD_SIZEOF(struct fs_fte, val),
156 	.key_offset = offsetof(struct fs_fte, val),
157 	.head_offset = offsetof(struct fs_fte, hash),
158 	.automatic_shrinking = true,
159 	.min_size = 1,
160 };
161 
162 static const struct rhashtable_params rhash_fg = {
163 	.key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask),
164 	.key_offset = offsetof(struct mlx5_flow_group, mask),
165 	.head_offset = offsetof(struct mlx5_flow_group, hash),
166 	.automatic_shrinking = true,
167 	.min_size = 1,
168 
169 };
170 
171 static void del_rule(struct fs_node *node);
172 static void del_flow_table(struct fs_node *node);
173 static void del_flow_group(struct fs_node *node);
174 static void del_fte(struct fs_node *node);
175 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
176 				struct mlx5_flow_destination *d2);
177 static struct mlx5_flow_rule *
178 find_flow_rule(struct fs_fte *fte,
179 	       struct mlx5_flow_destination *dest);
180 
181 static void tree_init_node(struct fs_node *node,
182 			   unsigned int refcount,
183 			   void (*remove_func)(struct fs_node *))
184 {
185 	atomic_set(&node->refcount, refcount);
186 	INIT_LIST_HEAD(&node->list);
187 	INIT_LIST_HEAD(&node->children);
188 	mutex_init(&node->lock);
189 	node->remove_func = remove_func;
190 }
191 
192 static void tree_add_node(struct fs_node *node, struct fs_node *parent)
193 {
194 	if (parent)
195 		atomic_inc(&parent->refcount);
196 	node->parent = parent;
197 
198 	/* Parent is the root */
199 	if (!parent)
200 		node->root = node;
201 	else
202 		node->root = parent->root;
203 }
204 
205 static void tree_get_node(struct fs_node *node)
206 {
207 	atomic_inc(&node->refcount);
208 }
209 
210 static void nested_lock_ref_node(struct fs_node *node,
211 				 enum fs_i_mutex_lock_class class)
212 {
213 	if (node) {
214 		mutex_lock_nested(&node->lock, class);
215 		atomic_inc(&node->refcount);
216 	}
217 }
218 
219 static void lock_ref_node(struct fs_node *node)
220 {
221 	if (node) {
222 		mutex_lock(&node->lock);
223 		atomic_inc(&node->refcount);
224 	}
225 }
226 
227 static void unlock_ref_node(struct fs_node *node)
228 {
229 	if (node) {
230 		atomic_dec(&node->refcount);
231 		mutex_unlock(&node->lock);
232 	}
233 }
234 
235 static void tree_put_node(struct fs_node *node)
236 {
237 	struct fs_node *parent_node = node->parent;
238 
239 	lock_ref_node(parent_node);
240 	if (atomic_dec_and_test(&node->refcount)) {
241 		if (parent_node)
242 			list_del_init(&node->list);
243 		if (node->remove_func)
244 			node->remove_func(node);
245 		kfree(node);
246 		node = NULL;
247 	}
248 	unlock_ref_node(parent_node);
249 	if (!node && parent_node)
250 		tree_put_node(parent_node);
251 }
252 
253 static int tree_remove_node(struct fs_node *node)
254 {
255 	if (atomic_read(&node->refcount) > 1) {
256 		atomic_dec(&node->refcount);
257 		return -EEXIST;
258 	}
259 	tree_put_node(node);
260 	return 0;
261 }
262 
263 static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
264 				 unsigned int prio)
265 {
266 	struct fs_prio *iter_prio;
267 
268 	fs_for_each_prio(iter_prio, ns) {
269 		if (iter_prio->prio == prio)
270 			return iter_prio;
271 	}
272 
273 	return NULL;
274 }
275 
276 static bool check_last_reserved(const u32 *match_criteria)
277 {
278 	char *match_criteria_reserved =
279 		MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED);
280 
281 	return	!match_criteria_reserved[0] &&
282 		!memcmp(match_criteria_reserved, match_criteria_reserved + 1,
283 			MLX5_FLD_SZ_BYTES(fte_match_param,
284 					  MLX5_FTE_MATCH_PARAM_RESERVED) - 1);
285 }
286 
287 static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria)
288 {
289 	if (match_criteria_enable & ~(
290 		(1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)   |
291 		(1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) |
292 		(1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)))
293 		return false;
294 
295 	if (!(match_criteria_enable &
296 	      1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) {
297 		char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
298 						  match_criteria, outer_headers);
299 
300 		if (fg_type_mask[0] ||
301 		    memcmp(fg_type_mask, fg_type_mask + 1,
302 			   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1))
303 			return false;
304 	}
305 
306 	if (!(match_criteria_enable &
307 	      1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) {
308 		char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
309 						  match_criteria, misc_parameters);
310 
311 		if (fg_type_mask[0] ||
312 		    memcmp(fg_type_mask, fg_type_mask + 1,
313 			   MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
314 			return false;
315 	}
316 
317 	if (!(match_criteria_enable &
318 	      1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) {
319 		char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
320 						  match_criteria, inner_headers);
321 
322 		if (fg_type_mask[0] ||
323 		    memcmp(fg_type_mask, fg_type_mask + 1,
324 			   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1))
325 			return false;
326 	}
327 
328 	return check_last_reserved(match_criteria);
329 }
330 
331 static bool check_valid_spec(const struct mlx5_flow_spec *spec)
332 {
333 	int i;
334 
335 	if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) {
336 		pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n");
337 		return false;
338 	}
339 
340 	for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
341 		if (spec->match_value[i] & ~spec->match_criteria[i]) {
342 			pr_warn("mlx5_core: match_value differs from match_criteria\n");
343 			return false;
344 		}
345 
346 	return check_last_reserved(spec->match_value);
347 }
348 
349 static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
350 {
351 	struct fs_node *root;
352 	struct mlx5_flow_namespace *ns;
353 
354 	root = node->root;
355 
356 	if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) {
357 		pr_warn("mlx5: flow steering node is not in tree or garbaged\n");
358 		return NULL;
359 	}
360 
361 	ns = container_of(root, struct mlx5_flow_namespace, node);
362 	return container_of(ns, struct mlx5_flow_root_namespace, ns);
363 }
364 
365 static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
366 {
367 	struct mlx5_flow_root_namespace *root = find_root(node);
368 
369 	if (root)
370 		return root->dev;
371 	return NULL;
372 }
373 
374 static void del_flow_table(struct fs_node *node)
375 {
376 	struct mlx5_flow_table *ft;
377 	struct mlx5_core_dev *dev;
378 	struct fs_prio *prio;
379 	int err;
380 
381 	fs_get_obj(ft, node);
382 	dev = get_dev(&ft->node);
383 
384 	err = mlx5_cmd_destroy_flow_table(dev, ft);
385 	if (err)
386 		mlx5_core_warn(dev, "flow steering can't destroy ft\n");
387 	ida_destroy(&ft->fte_allocator);
388 	rhltable_destroy(&ft->fgs_hash);
389 	fs_get_obj(prio, ft->node.parent);
390 	prio->num_ft--;
391 }
392 
393 static void del_rule(struct fs_node *node)
394 {
395 	struct mlx5_flow_rule *rule;
396 	struct mlx5_flow_table *ft;
397 	struct mlx5_flow_group *fg;
398 	struct fs_fte *fte;
399 	int modify_mask;
400 	struct mlx5_core_dev *dev = get_dev(node);
401 	int err;
402 	bool update_fte = false;
403 
404 	fs_get_obj(rule, node);
405 	fs_get_obj(fte, rule->node.parent);
406 	fs_get_obj(fg, fte->node.parent);
407 	fs_get_obj(ft, fg->node.parent);
408 	trace_mlx5_fs_del_rule(rule);
409 	list_del(&rule->node.list);
410 	if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
411 		mutex_lock(&rule->dest_attr.ft->lock);
412 		list_del(&rule->next_ft);
413 		mutex_unlock(&rule->dest_attr.ft->lock);
414 	}
415 
416 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
417 	    --fte->dests_size) {
418 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
419 		fte->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
420 		update_fte = true;
421 		goto out;
422 	}
423 
424 	if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
425 	    --fte->dests_size) {
426 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
427 		update_fte = true;
428 	}
429 out:
430 	if (update_fte && fte->dests_size) {
431 		err = mlx5_cmd_update_fte(dev, ft, fg->id, modify_mask, fte);
432 		if (err)
433 			mlx5_core_warn(dev,
434 				       "%s can't del rule fg id=%d fte_index=%d\n",
435 				       __func__, fg->id, fte->index);
436 	}
437 }
438 
439 static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg)
440 {
441 	struct mlx5_flow_table *ft;
442 	int ret;
443 
444 	ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte);
445 	WARN_ON(ret);
446 	fte->status = 0;
447 	fs_get_obj(ft, fg->node.parent);
448 	ida_simple_remove(&ft->fte_allocator, fte->index);
449 }
450 
451 static void del_fte(struct fs_node *node)
452 {
453 	struct mlx5_flow_table *ft;
454 	struct mlx5_flow_group *fg;
455 	struct mlx5_core_dev *dev;
456 	struct fs_fte *fte;
457 	int err;
458 
459 	fs_get_obj(fte, node);
460 	fs_get_obj(fg, fte->node.parent);
461 	fs_get_obj(ft, fg->node.parent);
462 	trace_mlx5_fs_del_fte(fte);
463 
464 	dev = get_dev(&ft->node);
465 	err = mlx5_cmd_delete_fte(dev, ft,
466 				  fte->index);
467 	if (err)
468 		mlx5_core_warn(dev,
469 			       "flow steering can't delete fte in index %d of flow group id %d\n",
470 			       fte->index, fg->id);
471 
472 	destroy_fte(fte, fg);
473 }
474 
475 static void del_flow_group(struct fs_node *node)
476 {
477 	struct mlx5_flow_group *fg;
478 	struct mlx5_flow_table *ft;
479 	struct mlx5_core_dev *dev;
480 	int err;
481 
482 	fs_get_obj(fg, node);
483 	fs_get_obj(ft, fg->node.parent);
484 	dev = get_dev(&ft->node);
485 	trace_mlx5_fs_del_fg(fg);
486 
487 	if (ft->autogroup.active)
488 		ft->autogroup.num_groups--;
489 
490 	rhashtable_destroy(&fg->ftes_hash);
491 	err = rhltable_remove(&ft->fgs_hash,
492 			      &fg->hash,
493 			      rhash_fg);
494 	WARN_ON(err);
495 	if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
496 		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
497 			       fg->id, ft->id);
498 }
499 
500 static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act,
501 				u32 *match_value,
502 				unsigned int index)
503 {
504 	struct fs_fte *fte;
505 
506 	fte = kzalloc(sizeof(*fte), GFP_KERNEL);
507 	if (!fte)
508 		return ERR_PTR(-ENOMEM);
509 
510 	memcpy(fte->val, match_value, sizeof(fte->val));
511 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
512 	fte->flow_tag = flow_act->flow_tag;
513 	fte->index = index;
514 	fte->action = flow_act->action;
515 	fte->encap_id = flow_act->encap_id;
516 	fte->modify_id = flow_act->modify_id;
517 
518 	return fte;
519 }
520 
521 static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
522 {
523 	struct mlx5_flow_group *fg;
524 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
525 					    create_fg_in, match_criteria);
526 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
527 					    create_fg_in,
528 					    match_criteria_enable);
529 	int ret;
530 
531 	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
532 	if (!fg)
533 		return ERR_PTR(-ENOMEM);
534 
535 	ret = rhashtable_init(&fg->ftes_hash, &rhash_fte);
536 	if (ret) {
537 		kfree(fg);
538 		return ERR_PTR(ret);
539 	}
540 	fg->mask.match_criteria_enable = match_criteria_enable;
541 	memcpy(&fg->mask.match_criteria, match_criteria,
542 	       sizeof(fg->mask.match_criteria));
543 	fg->node.type =  FS_TYPE_FLOW_GROUP;
544 	fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
545 				   start_flow_index);
546 	fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
547 				end_flow_index) - fg->start_index + 1;
548 	return fg;
549 }
550 
551 static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
552 						enum fs_flow_table_type table_type,
553 						enum fs_flow_table_op_mod op_mod,
554 						u32 flags)
555 {
556 	struct mlx5_flow_table *ft;
557 	int ret;
558 
559 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
560 	if (!ft)
561 		return ERR_PTR(-ENOMEM);
562 
563 	ret = rhltable_init(&ft->fgs_hash, &rhash_fg);
564 	if (ret) {
565 		kfree(ft);
566 		return ERR_PTR(ret);
567 	}
568 
569 	ft->level = level;
570 	ft->node.type = FS_TYPE_FLOW_TABLE;
571 	ft->op_mod = op_mod;
572 	ft->type = table_type;
573 	ft->vport = vport;
574 	ft->max_fte = max_fte;
575 	ft->flags = flags;
576 	INIT_LIST_HEAD(&ft->fwd_rules);
577 	mutex_init(&ft->lock);
578 	ida_init(&ft->fte_allocator);
579 
580 	return ft;
581 }
582 
583 /* If reverse is false, then we search for the first flow table in the
584  * root sub-tree from start(closest from right), else we search for the
585  * last flow table in the root sub-tree till start(closest from left).
586  */
587 static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
588 							 struct list_head *start,
589 							 bool reverse)
590 {
591 #define list_advance_entry(pos, reverse)		\
592 	((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
593 
594 #define list_for_each_advance_continue(pos, head, reverse)	\
595 	for (pos = list_advance_entry(pos, reverse);		\
596 	     &pos->list != (head);				\
597 	     pos = list_advance_entry(pos, reverse))
598 
599 	struct fs_node *iter = list_entry(start, struct fs_node, list);
600 	struct mlx5_flow_table *ft = NULL;
601 
602 	if (!root)
603 		return NULL;
604 
605 	list_for_each_advance_continue(iter, &root->children, reverse) {
606 		if (iter->type == FS_TYPE_FLOW_TABLE) {
607 			fs_get_obj(ft, iter);
608 			return ft;
609 		}
610 		ft = find_closest_ft_recursive(iter, &iter->children, reverse);
611 		if (ft)
612 			return ft;
613 	}
614 
615 	return ft;
616 }
617 
618 /* If reverse if false then return the first flow table in next priority of
619  * prio in the tree, else return the last flow table in the previous priority
620  * of prio in the tree.
621  */
622 static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
623 {
624 	struct mlx5_flow_table *ft = NULL;
625 	struct fs_node *curr_node;
626 	struct fs_node *parent;
627 
628 	parent = prio->node.parent;
629 	curr_node = &prio->node;
630 	while (!ft && parent) {
631 		ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
632 		curr_node = parent;
633 		parent = curr_node->parent;
634 	}
635 	return ft;
636 }
637 
638 /* Assuming all the tree is locked by mutex chain lock */
639 static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
640 {
641 	return find_closest_ft(prio, false);
642 }
643 
644 /* Assuming all the tree is locked by mutex chain lock */
645 static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
646 {
647 	return find_closest_ft(prio, true);
648 }
649 
650 static int connect_fts_in_prio(struct mlx5_core_dev *dev,
651 			       struct fs_prio *prio,
652 			       struct mlx5_flow_table *ft)
653 {
654 	struct mlx5_flow_table *iter;
655 	int i = 0;
656 	int err;
657 
658 	fs_for_each_ft(iter, prio) {
659 		i++;
660 		err = mlx5_cmd_modify_flow_table(dev,
661 						 iter,
662 						 ft);
663 		if (err) {
664 			mlx5_core_warn(dev, "Failed to modify flow table %d\n",
665 				       iter->id);
666 			/* The driver is out of sync with the FW */
667 			if (i > 1)
668 				WARN_ON(true);
669 			return err;
670 		}
671 	}
672 	return 0;
673 }
674 
675 /* Connect flow tables from previous priority of prio to ft */
676 static int connect_prev_fts(struct mlx5_core_dev *dev,
677 			    struct mlx5_flow_table *ft,
678 			    struct fs_prio *prio)
679 {
680 	struct mlx5_flow_table *prev_ft;
681 
682 	prev_ft = find_prev_chained_ft(prio);
683 	if (prev_ft) {
684 		struct fs_prio *prev_prio;
685 
686 		fs_get_obj(prev_prio, prev_ft->node.parent);
687 		return connect_fts_in_prio(dev, prev_prio, ft);
688 	}
689 	return 0;
690 }
691 
692 static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
693 				 *prio)
694 {
695 	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
696 	int min_level = INT_MAX;
697 	int err;
698 
699 	if (root->root_ft)
700 		min_level = root->root_ft->level;
701 
702 	if (ft->level >= min_level)
703 		return 0;
704 
705 	err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn);
706 	if (err)
707 		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
708 			       ft->id);
709 	else
710 		root->root_ft = ft;
711 
712 	return err;
713 }
714 
715 static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
716 					 struct mlx5_flow_destination *dest)
717 {
718 	struct mlx5_flow_table *ft;
719 	struct mlx5_flow_group *fg;
720 	struct fs_fte *fte;
721 	int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
722 	int err = 0;
723 
724 	fs_get_obj(fte, rule->node.parent);
725 	if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
726 		return -EINVAL;
727 	lock_ref_node(&fte->node);
728 	fs_get_obj(fg, fte->node.parent);
729 	fs_get_obj(ft, fg->node.parent);
730 
731 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
732 	err = mlx5_cmd_update_fte(get_dev(&ft->node),
733 				  ft, fg->id,
734 				  modify_mask,
735 				  fte);
736 	unlock_ref_node(&fte->node);
737 
738 	return err;
739 }
740 
741 int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
742 				 struct mlx5_flow_destination *new_dest,
743 				 struct mlx5_flow_destination *old_dest)
744 {
745 	int i;
746 
747 	if (!old_dest) {
748 		if (handle->num_rules != 1)
749 			return -EINVAL;
750 		return _mlx5_modify_rule_destination(handle->rule[0],
751 						     new_dest);
752 	}
753 
754 	for (i = 0; i < handle->num_rules; i++) {
755 		if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
756 			return _mlx5_modify_rule_destination(handle->rule[i],
757 							     new_dest);
758 	}
759 
760 	return -EINVAL;
761 }
762 
763 /* Modify/set FWD rules that point on old_next_ft to point on new_next_ft  */
764 static int connect_fwd_rules(struct mlx5_core_dev *dev,
765 			     struct mlx5_flow_table *new_next_ft,
766 			     struct mlx5_flow_table *old_next_ft)
767 {
768 	struct mlx5_flow_destination dest;
769 	struct mlx5_flow_rule *iter;
770 	int err = 0;
771 
772 	/* new_next_ft and old_next_ft could be NULL only
773 	 * when we create/destroy the anchor flow table.
774 	 */
775 	if (!new_next_ft || !old_next_ft)
776 		return 0;
777 
778 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
779 	dest.ft = new_next_ft;
780 
781 	mutex_lock(&old_next_ft->lock);
782 	list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
783 	mutex_unlock(&old_next_ft->lock);
784 	list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
785 		err = _mlx5_modify_rule_destination(iter, &dest);
786 		if (err)
787 			pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
788 			       new_next_ft->id);
789 	}
790 	return 0;
791 }
792 
793 static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
794 			      struct fs_prio *prio)
795 {
796 	struct mlx5_flow_table *next_ft;
797 	int err = 0;
798 
799 	/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
800 
801 	if (list_empty(&prio->node.children)) {
802 		err = connect_prev_fts(dev, ft, prio);
803 		if (err)
804 			return err;
805 
806 		next_ft = find_next_chained_ft(prio);
807 		err = connect_fwd_rules(dev, ft, next_ft);
808 		if (err)
809 			return err;
810 	}
811 
812 	if (MLX5_CAP_FLOWTABLE(dev,
813 			       flow_table_properties_nic_receive.modify_root))
814 		err = update_root_ft_create(ft, prio);
815 	return err;
816 }
817 
818 static void list_add_flow_table(struct mlx5_flow_table *ft,
819 				struct fs_prio *prio)
820 {
821 	struct list_head *prev = &prio->node.children;
822 	struct mlx5_flow_table *iter;
823 
824 	fs_for_each_ft(iter, prio) {
825 		if (iter->level > ft->level)
826 			break;
827 		prev = &iter->node.list;
828 	}
829 	list_add(&ft->node.list, prev);
830 }
831 
832 static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
833 							struct mlx5_flow_table_attr *ft_attr,
834 							enum fs_flow_table_op_mod op_mod,
835 							u16 vport)
836 {
837 	struct mlx5_flow_root_namespace *root = find_root(&ns->node);
838 	struct mlx5_flow_table *next_ft = NULL;
839 	struct fs_prio *fs_prio = NULL;
840 	struct mlx5_flow_table *ft;
841 	int log_table_sz;
842 	int err;
843 
844 	if (!root) {
845 		pr_err("mlx5: flow steering failed to find root of namespace\n");
846 		return ERR_PTR(-ENODEV);
847 	}
848 
849 	mutex_lock(&root->chain_lock);
850 	fs_prio = find_prio(ns, ft_attr->prio);
851 	if (!fs_prio) {
852 		err = -EINVAL;
853 		goto unlock_root;
854 	}
855 	if (ft_attr->level >= fs_prio->num_levels) {
856 		err = -ENOSPC;
857 		goto unlock_root;
858 	}
859 	/* The level is related to the
860 	 * priority level range.
861 	 */
862 	ft_attr->level += fs_prio->start_level;
863 	ft = alloc_flow_table(ft_attr->level,
864 			      vport,
865 			      ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
866 			      root->table_type,
867 			      op_mod, ft_attr->flags);
868 	if (IS_ERR(ft)) {
869 		err = PTR_ERR(ft);
870 		goto unlock_root;
871 	}
872 
873 	tree_init_node(&ft->node, 1, del_flow_table);
874 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
875 	next_ft = find_next_chained_ft(fs_prio);
876 	err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
877 					 ft->level, log_table_sz, next_ft, &ft->id,
878 					 ft->flags);
879 	if (err)
880 		goto free_ft;
881 
882 	err = connect_flow_table(root->dev, ft, fs_prio);
883 	if (err)
884 		goto destroy_ft;
885 	lock_ref_node(&fs_prio->node);
886 	tree_add_node(&ft->node, &fs_prio->node);
887 	list_add_flow_table(ft, fs_prio);
888 	fs_prio->num_ft++;
889 	unlock_ref_node(&fs_prio->node);
890 	mutex_unlock(&root->chain_lock);
891 	return ft;
892 destroy_ft:
893 	mlx5_cmd_destroy_flow_table(root->dev, ft);
894 free_ft:
895 	ida_destroy(&ft->fte_allocator);
896 	kfree(ft);
897 unlock_root:
898 	mutex_unlock(&root->chain_lock);
899 	return ERR_PTR(err);
900 }
901 
902 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
903 					       struct mlx5_flow_table_attr *ft_attr)
904 {
905 	return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0);
906 }
907 
908 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
909 						     int prio, int max_fte,
910 						     u32 level, u16 vport)
911 {
912 	struct mlx5_flow_table_attr ft_attr = {};
913 
914 	ft_attr.max_fte = max_fte;
915 	ft_attr.level   = level;
916 	ft_attr.prio    = prio;
917 
918 	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
919 }
920 
921 struct mlx5_flow_table*
922 mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
923 				 int prio, u32 level)
924 {
925 	struct mlx5_flow_table_attr ft_attr = {};
926 
927 	ft_attr.level = level;
928 	ft_attr.prio  = prio;
929 	return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
930 }
931 EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
932 
933 struct mlx5_flow_table*
934 mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
935 				    int prio,
936 				    int num_flow_table_entries,
937 				    int max_num_groups,
938 				    u32 level,
939 				    u32 flags)
940 {
941 	struct mlx5_flow_table_attr ft_attr = {};
942 	struct mlx5_flow_table *ft;
943 
944 	if (max_num_groups > num_flow_table_entries)
945 		return ERR_PTR(-EINVAL);
946 
947 	ft_attr.max_fte = num_flow_table_entries;
948 	ft_attr.prio    = prio;
949 	ft_attr.level   = level;
950 	ft_attr.flags   = flags;
951 
952 	ft = mlx5_create_flow_table(ns, &ft_attr);
953 	if (IS_ERR(ft))
954 		return ft;
955 
956 	ft->autogroup.active = true;
957 	ft->autogroup.required_groups = max_num_groups;
958 
959 	return ft;
960 }
961 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
962 
963 /* Flow table should be locked */
964 static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft,
965 							u32 *fg_in,
966 							struct list_head
967 							*prev_fg,
968 							bool is_auto_fg)
969 {
970 	struct mlx5_flow_group *fg;
971 	struct mlx5_core_dev *dev = get_dev(&ft->node);
972 	int err;
973 
974 	if (!dev)
975 		return ERR_PTR(-ENODEV);
976 
977 	fg = alloc_flow_group(fg_in);
978 	if (IS_ERR(fg))
979 		return fg;
980 
981 	err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg);
982 	if (err)
983 		goto err_free_fg;
984 
985 	err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
986 	if (err)
987 		goto err_remove_fg;
988 
989 	if (ft->autogroup.active)
990 		ft->autogroup.num_groups++;
991 	/* Add node to tree */
992 	tree_init_node(&fg->node, !is_auto_fg, del_flow_group);
993 	tree_add_node(&fg->node, &ft->node);
994 	/* Add node to group list */
995 	list_add(&fg->node.list, prev_fg);
996 
997 	trace_mlx5_fs_add_fg(fg);
998 	return fg;
999 
1000 err_remove_fg:
1001 	WARN_ON(rhltable_remove(&ft->fgs_hash,
1002 				&fg->hash,
1003 				rhash_fg));
1004 err_free_fg:
1005 	rhashtable_destroy(&fg->ftes_hash);
1006 	kfree(fg);
1007 
1008 	return ERR_PTR(err);
1009 }
1010 
1011 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
1012 					       u32 *fg_in)
1013 {
1014 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
1015 					    fg_in, match_criteria);
1016 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
1017 					    fg_in,
1018 					    match_criteria_enable);
1019 	struct mlx5_flow_group *fg;
1020 
1021 	if (!check_valid_mask(match_criteria_enable, match_criteria))
1022 		return ERR_PTR(-EINVAL);
1023 
1024 	if (ft->autogroup.active)
1025 		return ERR_PTR(-EPERM);
1026 
1027 	lock_ref_node(&ft->node);
1028 	fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false);
1029 	unlock_ref_node(&ft->node);
1030 
1031 	return fg;
1032 }
1033 
1034 static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
1035 {
1036 	struct mlx5_flow_rule *rule;
1037 
1038 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
1039 	if (!rule)
1040 		return NULL;
1041 
1042 	INIT_LIST_HEAD(&rule->next_ft);
1043 	rule->node.type = FS_TYPE_FLOW_DEST;
1044 	if (dest)
1045 		memcpy(&rule->dest_attr, dest, sizeof(*dest));
1046 
1047 	return rule;
1048 }
1049 
1050 static struct mlx5_flow_handle *alloc_handle(int num_rules)
1051 {
1052 	struct mlx5_flow_handle *handle;
1053 
1054 	handle = kzalloc(sizeof(*handle) + sizeof(handle->rule[0]) *
1055 			  num_rules, GFP_KERNEL);
1056 	if (!handle)
1057 		return NULL;
1058 
1059 	handle->num_rules = num_rules;
1060 
1061 	return handle;
1062 }
1063 
1064 static void destroy_flow_handle(struct fs_fte *fte,
1065 				struct mlx5_flow_handle *handle,
1066 				struct mlx5_flow_destination *dest,
1067 				int i)
1068 {
1069 	for (; --i >= 0;) {
1070 		if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) {
1071 			fte->dests_size--;
1072 			list_del(&handle->rule[i]->node.list);
1073 			kfree(handle->rule[i]);
1074 		}
1075 	}
1076 	kfree(handle);
1077 }
1078 
1079 static struct mlx5_flow_handle *
1080 create_flow_handle(struct fs_fte *fte,
1081 		   struct mlx5_flow_destination *dest,
1082 		   int dest_num,
1083 		   int *modify_mask,
1084 		   bool *new_rule)
1085 {
1086 	struct mlx5_flow_handle *handle;
1087 	struct mlx5_flow_rule *rule = NULL;
1088 	static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
1089 	static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
1090 	int type;
1091 	int i = 0;
1092 
1093 	handle = alloc_handle((dest_num) ? dest_num : 1);
1094 	if (!handle)
1095 		return ERR_PTR(-ENOMEM);
1096 
1097 	do {
1098 		if (dest) {
1099 			rule = find_flow_rule(fte, dest + i);
1100 			if (rule) {
1101 				atomic_inc(&rule->node.refcount);
1102 				goto rule_found;
1103 			}
1104 		}
1105 
1106 		*new_rule = true;
1107 		rule = alloc_rule(dest + i);
1108 		if (!rule)
1109 			goto free_rules;
1110 
1111 		/* Add dest to dests list- we need flow tables to be in the
1112 		 * end of the list for forward to next prio rules.
1113 		 */
1114 		tree_init_node(&rule->node, 1, del_rule);
1115 		if (dest &&
1116 		    dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
1117 			list_add(&rule->node.list, &fte->node.children);
1118 		else
1119 			list_add_tail(&rule->node.list, &fte->node.children);
1120 		if (dest) {
1121 			fte->dests_size++;
1122 
1123 			type = dest[i].type ==
1124 				MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1125 			*modify_mask |= type ? count : dst;
1126 		}
1127 rule_found:
1128 		handle->rule[i] = rule;
1129 	} while (++i < dest_num);
1130 
1131 	return handle;
1132 
1133 free_rules:
1134 	destroy_flow_handle(fte, handle, dest, i);
1135 	return ERR_PTR(-ENOMEM);
1136 }
1137 
1138 /* fte should not be deleted while calling this function */
1139 static struct mlx5_flow_handle *
1140 add_rule_fte(struct fs_fte *fte,
1141 	     struct mlx5_flow_group *fg,
1142 	     struct mlx5_flow_destination *dest,
1143 	     int dest_num,
1144 	     bool update_action)
1145 {
1146 	struct mlx5_flow_handle *handle;
1147 	struct mlx5_flow_table *ft;
1148 	int modify_mask = 0;
1149 	int err;
1150 	bool new_rule = false;
1151 
1152 	handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
1153 				    &new_rule);
1154 	if (IS_ERR(handle) || !new_rule)
1155 		goto out;
1156 
1157 	if (update_action)
1158 		modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
1159 
1160 	fs_get_obj(ft, fg->node.parent);
1161 	if (!(fte->status & FS_FTE_STATUS_EXISTING))
1162 		err = mlx5_cmd_create_fte(get_dev(&ft->node),
1163 					  ft, fg->id, fte);
1164 	else
1165 		err = mlx5_cmd_update_fte(get_dev(&ft->node),
1166 					  ft, fg->id, modify_mask, fte);
1167 	if (err)
1168 		goto free_handle;
1169 
1170 	fte->status |= FS_FTE_STATUS_EXISTING;
1171 
1172 out:
1173 	return handle;
1174 
1175 free_handle:
1176 	destroy_flow_handle(fte, handle, dest, handle->num_rules);
1177 	return ERR_PTR(err);
1178 }
1179 
1180 static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
1181 				 u32 *match_value,
1182 				 struct mlx5_flow_act *flow_act)
1183 {
1184 	struct mlx5_flow_table *ft;
1185 	struct fs_fte *fte;
1186 	int index;
1187 	int ret;
1188 
1189 	fs_get_obj(ft, fg->node.parent);
1190 	index = ida_simple_get(&ft->fte_allocator, fg->start_index,
1191 			       fg->start_index + fg->max_ftes,
1192 			       GFP_KERNEL);
1193 	if (index < 0)
1194 		return ERR_PTR(index);
1195 
1196 	fte = alloc_fte(flow_act, match_value, index);
1197 	if (IS_ERR(fte)) {
1198 		ret = PTR_ERR(fte);
1199 		goto err_alloc;
1200 	}
1201 	ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte);
1202 	if (ret)
1203 		goto err_hash;
1204 
1205 	return fte;
1206 
1207 err_hash:
1208 	kfree(fte);
1209 err_alloc:
1210 	ida_simple_remove(&ft->fte_allocator, index);
1211 	return ERR_PTR(ret);
1212 }
1213 
1214 static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
1215 						u8 match_criteria_enable,
1216 						u32 *match_criteria)
1217 {
1218 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1219 	struct list_head *prev = &ft->node.children;
1220 	unsigned int candidate_index = 0;
1221 	struct mlx5_flow_group *fg;
1222 	void *match_criteria_addr;
1223 	unsigned int group_size = 0;
1224 	u32 *in;
1225 
1226 	if (!ft->autogroup.active)
1227 		return ERR_PTR(-ENOENT);
1228 
1229 	in = kvzalloc(inlen, GFP_KERNEL);
1230 	if (!in)
1231 		return ERR_PTR(-ENOMEM);
1232 
1233 	if (ft->autogroup.num_groups < ft->autogroup.required_groups)
1234 		/* We save place for flow groups in addition to max types */
1235 		group_size = ft->max_fte / (ft->autogroup.required_groups + 1);
1236 
1237 	/*  ft->max_fte == ft->autogroup.max_types */
1238 	if (group_size == 0)
1239 		group_size = 1;
1240 
1241 	/* sorted by start_index */
1242 	fs_for_each_fg(fg, ft) {
1243 		if (candidate_index + group_size > fg->start_index)
1244 			candidate_index = fg->start_index + fg->max_ftes;
1245 		else
1246 			break;
1247 		prev = &fg->node.list;
1248 	}
1249 
1250 	if (candidate_index + group_size > ft->max_fte) {
1251 		fg = ERR_PTR(-ENOSPC);
1252 		goto out;
1253 	}
1254 
1255 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
1256 		 match_criteria_enable);
1257 	MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
1258 	MLX5_SET(create_flow_group_in, in, end_flow_index,   candidate_index +
1259 		 group_size - 1);
1260 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
1261 					   in, match_criteria);
1262 	memcpy(match_criteria_addr, match_criteria,
1263 	       MLX5_ST_SZ_BYTES(fte_match_param));
1264 
1265 	fg = create_flow_group_common(ft, in, prev, true);
1266 out:
1267 	kvfree(in);
1268 	return fg;
1269 }
1270 
1271 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
1272 				struct mlx5_flow_destination *d2)
1273 {
1274 	if (d1->type == d2->type) {
1275 		if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
1276 		     d1->vport_num == d2->vport_num) ||
1277 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1278 		     d1->ft == d2->ft) ||
1279 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1280 		     d1->tir_num == d2->tir_num))
1281 			return true;
1282 	}
1283 
1284 	return false;
1285 }
1286 
1287 static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
1288 					     struct mlx5_flow_destination *dest)
1289 {
1290 	struct mlx5_flow_rule *rule;
1291 
1292 	list_for_each_entry(rule, &fte->node.children, node.list) {
1293 		if (mlx5_flow_dests_cmp(&rule->dest_attr, dest))
1294 			return rule;
1295 	}
1296 	return NULL;
1297 }
1298 
1299 static bool check_conflicting_actions(u32 action1, u32 action2)
1300 {
1301 	u32 xored_actions = action1 ^ action2;
1302 
1303 	/* if one rule only wants to count, it's ok */
1304 	if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
1305 	    action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT)
1306 		return false;
1307 
1308 	if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP  |
1309 			     MLX5_FLOW_CONTEXT_ACTION_ENCAP |
1310 			     MLX5_FLOW_CONTEXT_ACTION_DECAP))
1311 		return true;
1312 
1313 	return false;
1314 }
1315 
1316 static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
1317 {
1318 	if (check_conflicting_actions(flow_act->action, fte->action)) {
1319 		mlx5_core_warn(get_dev(&fte->node),
1320 			       "Found two FTEs with conflicting actions\n");
1321 		return -EEXIST;
1322 	}
1323 
1324 	if (fte->flow_tag != flow_act->flow_tag) {
1325 		mlx5_core_warn(get_dev(&fte->node),
1326 			       "FTE flow tag %u already exists with different flow tag %u\n",
1327 			       fte->flow_tag,
1328 			       flow_act->flow_tag);
1329 		return -EEXIST;
1330 	}
1331 
1332 	return 0;
1333 }
1334 
1335 static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
1336 					    u32 *match_value,
1337 					    struct mlx5_flow_act *flow_act,
1338 					    struct mlx5_flow_destination *dest,
1339 					    int dest_num,
1340 					    struct fs_fte *fte)
1341 {
1342 	struct mlx5_flow_handle *handle;
1343 	struct mlx5_flow_table *ft;
1344 	int i;
1345 
1346 	if (fte) {
1347 		int old_action;
1348 		int ret;
1349 
1350 		nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
1351 		ret = check_conflicting_ftes(fte, flow_act);
1352 		if (ret) {
1353 			handle = ERR_PTR(ret);
1354 			goto unlock_fte;
1355 		}
1356 
1357 		old_action = fte->action;
1358 		fte->action |= flow_act->action;
1359 		handle = add_rule_fte(fte, fg, dest, dest_num,
1360 				      old_action != flow_act->action);
1361 		if (IS_ERR(handle)) {
1362 			fte->action = old_action;
1363 			goto unlock_fte;
1364 		} else {
1365 			trace_mlx5_fs_set_fte(fte, false);
1366 			goto add_rules;
1367 		}
1368 	}
1369 	fs_get_obj(ft, fg->node.parent);
1370 
1371 	fte = create_fte(fg, match_value, flow_act);
1372 	if (IS_ERR(fte))
1373 		return (void *)fte;
1374 	tree_init_node(&fte->node, 0, del_fte);
1375 	nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
1376 	handle = add_rule_fte(fte, fg, dest, dest_num, false);
1377 	if (IS_ERR(handle)) {
1378 		unlock_ref_node(&fte->node);
1379 		destroy_fte(fte, fg);
1380 		kfree(fte);
1381 		return handle;
1382 	}
1383 
1384 	tree_add_node(&fte->node, &fg->node);
1385 	/* fte list isn't sorted */
1386 	list_add_tail(&fte->node.list, &fg->node.children);
1387 	trace_mlx5_fs_set_fte(fte, true);
1388 add_rules:
1389 	for (i = 0; i < handle->num_rules; i++) {
1390 		if (atomic_read(&handle->rule[i]->node.refcount) == 1) {
1391 			tree_add_node(&handle->rule[i]->node, &fte->node);
1392 			trace_mlx5_fs_add_rule(handle->rule[i]);
1393 		}
1394 	}
1395 unlock_fte:
1396 	unlock_ref_node(&fte->node);
1397 	return handle;
1398 }
1399 
1400 struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
1401 {
1402 	struct mlx5_flow_rule *dst;
1403 	struct fs_fte *fte;
1404 
1405 	fs_get_obj(fte, handle->rule[0]->node.parent);
1406 
1407 	fs_for_each_dst(dst, fte) {
1408 		if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
1409 			return dst->dest_attr.counter;
1410 	}
1411 
1412 	return NULL;
1413 }
1414 
1415 static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
1416 {
1417 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
1418 		return !counter;
1419 
1420 	if (!counter)
1421 		return false;
1422 
1423 	return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
1424 			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
1425 }
1426 
1427 static bool dest_is_valid(struct mlx5_flow_destination *dest,
1428 			  u32 action,
1429 			  struct mlx5_flow_table *ft)
1430 {
1431 	if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
1432 		return counter_is_valid(dest->counter, action);
1433 
1434 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
1435 		return true;
1436 
1437 	if (!dest || ((dest->type ==
1438 	    MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) &&
1439 	    (dest->ft->level <= ft->level)))
1440 		return false;
1441 	return true;
1442 }
1443 
1444 static struct mlx5_flow_handle *
1445 try_add_to_existing_fg(struct mlx5_flow_table *ft,
1446 		       struct mlx5_flow_spec *spec,
1447 		       struct mlx5_flow_act *flow_act,
1448 		       struct mlx5_flow_destination *dest,
1449 		       int dest_num)
1450 {
1451 	struct mlx5_flow_group *g;
1452 	struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT);
1453 	struct rhlist_head *tmp, *list;
1454 	struct match_list {
1455 		struct list_head	list;
1456 		struct mlx5_flow_group *g;
1457 	} match_list, *iter;
1458 	LIST_HEAD(match_head);
1459 
1460 	rcu_read_lock();
1461 	/* Collect all fgs which has a matching match_criteria */
1462 	list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg);
1463 	rhl_for_each_entry_rcu(g, tmp, list, hash) {
1464 		struct match_list *curr_match;
1465 
1466 		if (likely(list_empty(&match_head))) {
1467 			match_list.g = g;
1468 			list_add_tail(&match_list.list, &match_head);
1469 			continue;
1470 		}
1471 		curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
1472 
1473 		if (!curr_match) {
1474 			rcu_read_unlock();
1475 			rule = ERR_PTR(-ENOMEM);
1476 			goto free_list;
1477 		}
1478 		curr_match->g = g;
1479 		list_add_tail(&curr_match->list, &match_head);
1480 	}
1481 	rcu_read_unlock();
1482 
1483 	/* Try to find a fg that already contains a matching fte */
1484 	list_for_each_entry(iter, &match_head, list) {
1485 		struct fs_fte *fte;
1486 
1487 		g = iter->g;
1488 		nested_lock_ref_node(&g->node, FS_MUTEX_PARENT);
1489 		fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value,
1490 					     rhash_fte);
1491 		if (fte) {
1492 			rule = add_rule_fg(g, spec->match_value,
1493 					   flow_act, dest, dest_num, fte);
1494 			unlock_ref_node(&g->node);
1495 			goto free_list;
1496 		}
1497 		unlock_ref_node(&g->node);
1498 	}
1499 
1500 	/* No group with matching fte found. Try to add a new fte to any
1501 	 * matching fg.
1502 	 */
1503 	list_for_each_entry(iter, &match_head, list) {
1504 		g = iter->g;
1505 
1506 		nested_lock_ref_node(&g->node, FS_MUTEX_PARENT);
1507 		rule = add_rule_fg(g, spec->match_value,
1508 				   flow_act, dest, dest_num, NULL);
1509 		if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) {
1510 			unlock_ref_node(&g->node);
1511 			goto free_list;
1512 		}
1513 		unlock_ref_node(&g->node);
1514 	}
1515 
1516 free_list:
1517 	if (!list_empty(&match_head)) {
1518 		struct match_list *match_tmp;
1519 
1520 		/* The most common case is having one FG. Since we want to
1521 		 * optimize this case, we save the first on the stack.
1522 		 * Therefore, no need to free it.
1523 		 */
1524 		list_del(&list_first_entry(&match_head, typeof(*iter), list)->list);
1525 		list_for_each_entry_safe(iter, match_tmp, &match_head, list) {
1526 			list_del(&iter->list);
1527 			kfree(iter);
1528 		}
1529 	}
1530 
1531 	return rule;
1532 }
1533 
1534 static struct mlx5_flow_handle *
1535 _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
1536 		     struct mlx5_flow_spec *spec,
1537 		     struct mlx5_flow_act *flow_act,
1538 		     struct mlx5_flow_destination *dest,
1539 		     int dest_num)
1540 
1541 {
1542 	struct mlx5_flow_group *g;
1543 	struct mlx5_flow_handle *rule;
1544 	int i;
1545 
1546 	if (!check_valid_spec(spec))
1547 		return ERR_PTR(-EINVAL);
1548 
1549 	for (i = 0; i < dest_num; i++) {
1550 		if (!dest_is_valid(&dest[i], flow_act->action, ft))
1551 			return ERR_PTR(-EINVAL);
1552 	}
1553 
1554 	nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
1555 	rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num);
1556 	if (!IS_ERR(rule))
1557 		goto unlock;
1558 
1559 	g = create_autogroup(ft, spec->match_criteria_enable,
1560 			     spec->match_criteria);
1561 	if (IS_ERR(g)) {
1562 		rule = (void *)g;
1563 		goto unlock;
1564 	}
1565 
1566 	rule = add_rule_fg(g, spec->match_value, flow_act, dest,
1567 			   dest_num, NULL);
1568 	if (IS_ERR(rule)) {
1569 		/* Remove assumes refcount > 0 and autogroup creates a group
1570 		 * with a refcount = 0.
1571 		 */
1572 		unlock_ref_node(&ft->node);
1573 		tree_get_node(&g->node);
1574 		tree_remove_node(&g->node);
1575 		return rule;
1576 	}
1577 unlock:
1578 	unlock_ref_node(&ft->node);
1579 	return rule;
1580 }
1581 
1582 static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
1583 {
1584 	return ((ft->type == FS_FT_NIC_RX) &&
1585 		(MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
1586 }
1587 
1588 struct mlx5_flow_handle *
1589 mlx5_add_flow_rules(struct mlx5_flow_table *ft,
1590 		    struct mlx5_flow_spec *spec,
1591 		    struct mlx5_flow_act *flow_act,
1592 		    struct mlx5_flow_destination *dest,
1593 		    int dest_num)
1594 {
1595 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1596 	struct mlx5_flow_destination gen_dest;
1597 	struct mlx5_flow_table *next_ft = NULL;
1598 	struct mlx5_flow_handle *handle = NULL;
1599 	u32 sw_action = flow_act->action;
1600 	struct fs_prio *prio;
1601 
1602 	fs_get_obj(prio, ft->node.parent);
1603 	if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
1604 		if (!fwd_next_prio_supported(ft))
1605 			return ERR_PTR(-EOPNOTSUPP);
1606 		if (dest)
1607 			return ERR_PTR(-EINVAL);
1608 		mutex_lock(&root->chain_lock);
1609 		next_ft = find_next_chained_ft(prio);
1610 		if (next_ft) {
1611 			gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1612 			gen_dest.ft = next_ft;
1613 			dest = &gen_dest;
1614 			dest_num = 1;
1615 			flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1616 		} else {
1617 			mutex_unlock(&root->chain_lock);
1618 			return ERR_PTR(-EOPNOTSUPP);
1619 		}
1620 	}
1621 
1622 	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, dest_num);
1623 
1624 	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
1625 		if (!IS_ERR_OR_NULL(handle) &&
1626 		    (list_empty(&handle->rule[0]->next_ft))) {
1627 			mutex_lock(&next_ft->lock);
1628 			list_add(&handle->rule[0]->next_ft,
1629 				 &next_ft->fwd_rules);
1630 			mutex_unlock(&next_ft->lock);
1631 			handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1632 		}
1633 		mutex_unlock(&root->chain_lock);
1634 	}
1635 	return handle;
1636 }
1637 EXPORT_SYMBOL(mlx5_add_flow_rules);
1638 
1639 void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
1640 {
1641 	int i;
1642 
1643 	for (i = handle->num_rules - 1; i >= 0; i--)
1644 		tree_remove_node(&handle->rule[i]->node);
1645 	kfree(handle);
1646 }
1647 EXPORT_SYMBOL(mlx5_del_flow_rules);
1648 
1649 /* Assuming prio->node.children(flow tables) is sorted by level */
1650 static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
1651 {
1652 	struct fs_prio *prio;
1653 
1654 	fs_get_obj(prio, ft->node.parent);
1655 
1656 	if (!list_is_last(&ft->node.list, &prio->node.children))
1657 		return list_next_entry(ft, node.list);
1658 	return find_next_chained_ft(prio);
1659 }
1660 
1661 static int update_root_ft_destroy(struct mlx5_flow_table *ft)
1662 {
1663 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1664 	struct mlx5_flow_table *new_root_ft = NULL;
1665 
1666 	if (root->root_ft != ft)
1667 		return 0;
1668 
1669 	new_root_ft = find_next_ft(ft);
1670 	if (new_root_ft) {
1671 		int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
1672 						  root->underlay_qpn);
1673 
1674 		if (err) {
1675 			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
1676 				       ft->id);
1677 			return err;
1678 		}
1679 	}
1680 	root->root_ft = new_root_ft;
1681 	return 0;
1682 }
1683 
1684 /* Connect flow table from previous priority to
1685  * the next flow table.
1686  */
1687 static int disconnect_flow_table(struct mlx5_flow_table *ft)
1688 {
1689 	struct mlx5_core_dev *dev = get_dev(&ft->node);
1690 	struct mlx5_flow_table *next_ft;
1691 	struct fs_prio *prio;
1692 	int err = 0;
1693 
1694 	err = update_root_ft_destroy(ft);
1695 	if (err)
1696 		return err;
1697 
1698 	fs_get_obj(prio, ft->node.parent);
1699 	if  (!(list_first_entry(&prio->node.children,
1700 				struct mlx5_flow_table,
1701 				node.list) == ft))
1702 		return 0;
1703 
1704 	next_ft = find_next_chained_ft(prio);
1705 	err = connect_fwd_rules(dev, next_ft, ft);
1706 	if (err)
1707 		return err;
1708 
1709 	err = connect_prev_fts(dev, next_ft, prio);
1710 	if (err)
1711 		mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
1712 			       ft->id);
1713 	return err;
1714 }
1715 
1716 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
1717 {
1718 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1719 	int err = 0;
1720 
1721 	mutex_lock(&root->chain_lock);
1722 	err = disconnect_flow_table(ft);
1723 	if (err) {
1724 		mutex_unlock(&root->chain_lock);
1725 		return err;
1726 	}
1727 	if (tree_remove_node(&ft->node))
1728 		mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
1729 			       ft->id);
1730 	mutex_unlock(&root->chain_lock);
1731 
1732 	return err;
1733 }
1734 EXPORT_SYMBOL(mlx5_destroy_flow_table);
1735 
1736 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
1737 {
1738 	if (tree_remove_node(&fg->node))
1739 		mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
1740 			       fg->id);
1741 }
1742 
1743 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
1744 						    enum mlx5_flow_namespace_type type)
1745 {
1746 	struct mlx5_flow_steering *steering = dev->priv.steering;
1747 	struct mlx5_flow_root_namespace *root_ns;
1748 	int prio;
1749 	struct fs_prio *fs_prio;
1750 	struct mlx5_flow_namespace *ns;
1751 
1752 	if (!steering)
1753 		return NULL;
1754 
1755 	switch (type) {
1756 	case MLX5_FLOW_NAMESPACE_BYPASS:
1757 	case MLX5_FLOW_NAMESPACE_LAG:
1758 	case MLX5_FLOW_NAMESPACE_OFFLOADS:
1759 	case MLX5_FLOW_NAMESPACE_ETHTOOL:
1760 	case MLX5_FLOW_NAMESPACE_KERNEL:
1761 	case MLX5_FLOW_NAMESPACE_LEFTOVERS:
1762 	case MLX5_FLOW_NAMESPACE_ANCHOR:
1763 		prio = type;
1764 		break;
1765 	case MLX5_FLOW_NAMESPACE_FDB:
1766 		if (steering->fdb_root_ns)
1767 			return &steering->fdb_root_ns->ns;
1768 		else
1769 			return NULL;
1770 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
1771 		if (steering->esw_egress_root_ns)
1772 			return &steering->esw_egress_root_ns->ns;
1773 		else
1774 			return NULL;
1775 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
1776 		if (steering->esw_ingress_root_ns)
1777 			return &steering->esw_ingress_root_ns->ns;
1778 		else
1779 			return NULL;
1780 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
1781 		if (steering->sniffer_rx_root_ns)
1782 			return &steering->sniffer_rx_root_ns->ns;
1783 		else
1784 			return NULL;
1785 	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
1786 		if (steering->sniffer_tx_root_ns)
1787 			return &steering->sniffer_tx_root_ns->ns;
1788 		else
1789 			return NULL;
1790 	default:
1791 		return NULL;
1792 	}
1793 
1794 	root_ns = steering->root_ns;
1795 	if (!root_ns)
1796 		return NULL;
1797 
1798 	fs_prio = find_prio(&root_ns->ns, prio);
1799 	if (!fs_prio)
1800 		return NULL;
1801 
1802 	ns = list_first_entry(&fs_prio->node.children,
1803 			      typeof(*ns),
1804 			      node.list);
1805 
1806 	return ns;
1807 }
1808 EXPORT_SYMBOL(mlx5_get_flow_namespace);
1809 
1810 static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
1811 				      unsigned int prio, int num_levels)
1812 {
1813 	struct fs_prio *fs_prio;
1814 
1815 	fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
1816 	if (!fs_prio)
1817 		return ERR_PTR(-ENOMEM);
1818 
1819 	fs_prio->node.type = FS_TYPE_PRIO;
1820 	tree_init_node(&fs_prio->node, 1, NULL);
1821 	tree_add_node(&fs_prio->node, &ns->node);
1822 	fs_prio->num_levels = num_levels;
1823 	fs_prio->prio = prio;
1824 	list_add_tail(&fs_prio->node.list, &ns->node.children);
1825 
1826 	return fs_prio;
1827 }
1828 
1829 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
1830 						     *ns)
1831 {
1832 	ns->node.type = FS_TYPE_NAMESPACE;
1833 
1834 	return ns;
1835 }
1836 
1837 static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
1838 {
1839 	struct mlx5_flow_namespace	*ns;
1840 
1841 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
1842 	if (!ns)
1843 		return ERR_PTR(-ENOMEM);
1844 
1845 	fs_init_namespace(ns);
1846 	tree_init_node(&ns->node, 1, NULL);
1847 	tree_add_node(&ns->node, &prio->node);
1848 	list_add_tail(&ns->node.list, &prio->node.children);
1849 
1850 	return ns;
1851 }
1852 
1853 static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
1854 			     struct init_tree_node *prio_metadata)
1855 {
1856 	struct fs_prio *fs_prio;
1857 	int i;
1858 
1859 	for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
1860 		fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels);
1861 		if (IS_ERR(fs_prio))
1862 			return PTR_ERR(fs_prio);
1863 	}
1864 	return 0;
1865 }
1866 
1867 #define FLOW_TABLE_BIT_SZ 1
1868 #define GET_FLOW_TABLE_CAP(dev, offset) \
1869 	((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) +	\
1870 			offset / 32)) >>					\
1871 	  (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
1872 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
1873 {
1874 	int i;
1875 
1876 	for (i = 0; i < caps->arr_sz; i++) {
1877 		if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
1878 			return false;
1879 	}
1880 	return true;
1881 }
1882 
1883 static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
1884 				    struct init_tree_node *init_node,
1885 				    struct fs_node *fs_parent_node,
1886 				    struct init_tree_node *init_parent_node,
1887 				    int prio)
1888 {
1889 	int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
1890 					      flow_table_properties_nic_receive.
1891 					      max_ft_level);
1892 	struct mlx5_flow_namespace *fs_ns;
1893 	struct fs_prio *fs_prio;
1894 	struct fs_node *base;
1895 	int i;
1896 	int err;
1897 
1898 	if (init_node->type == FS_TYPE_PRIO) {
1899 		if ((init_node->min_ft_level > max_ft_level) ||
1900 		    !has_required_caps(steering->dev, &init_node->caps))
1901 			return 0;
1902 
1903 		fs_get_obj(fs_ns, fs_parent_node);
1904 		if (init_node->num_leaf_prios)
1905 			return create_leaf_prios(fs_ns, prio, init_node);
1906 		fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels);
1907 		if (IS_ERR(fs_prio))
1908 			return PTR_ERR(fs_prio);
1909 		base = &fs_prio->node;
1910 	} else if (init_node->type == FS_TYPE_NAMESPACE) {
1911 		fs_get_obj(fs_prio, fs_parent_node);
1912 		fs_ns = fs_create_namespace(fs_prio);
1913 		if (IS_ERR(fs_ns))
1914 			return PTR_ERR(fs_ns);
1915 		base = &fs_ns->node;
1916 	} else {
1917 		return -EINVAL;
1918 	}
1919 	prio = 0;
1920 	for (i = 0; i < init_node->ar_size; i++) {
1921 		err = init_root_tree_recursive(steering, &init_node->children[i],
1922 					       base, init_node, prio);
1923 		if (err)
1924 			return err;
1925 		if (init_node->children[i].type == FS_TYPE_PRIO &&
1926 		    init_node->children[i].num_leaf_prios) {
1927 			prio += init_node->children[i].num_leaf_prios;
1928 		}
1929 	}
1930 
1931 	return 0;
1932 }
1933 
1934 static int init_root_tree(struct mlx5_flow_steering *steering,
1935 			  struct init_tree_node *init_node,
1936 			  struct fs_node *fs_parent_node)
1937 {
1938 	int i;
1939 	struct mlx5_flow_namespace *fs_ns;
1940 	int err;
1941 
1942 	fs_get_obj(fs_ns, fs_parent_node);
1943 	for (i = 0; i < init_node->ar_size; i++) {
1944 		err = init_root_tree_recursive(steering, &init_node->children[i],
1945 					       &fs_ns->node,
1946 					       init_node, i);
1947 		if (err)
1948 			return err;
1949 	}
1950 	return 0;
1951 }
1952 
1953 static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering,
1954 						       enum fs_flow_table_type
1955 						       table_type)
1956 {
1957 	struct mlx5_flow_root_namespace *root_ns;
1958 	struct mlx5_flow_namespace *ns;
1959 
1960 	/* Create the root namespace */
1961 	root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL);
1962 	if (!root_ns)
1963 		return NULL;
1964 
1965 	root_ns->dev = steering->dev;
1966 	root_ns->table_type = table_type;
1967 
1968 	ns = &root_ns->ns;
1969 	fs_init_namespace(ns);
1970 	mutex_init(&root_ns->chain_lock);
1971 	tree_init_node(&ns->node, 1, NULL);
1972 	tree_add_node(&ns->node, NULL);
1973 
1974 	return root_ns;
1975 }
1976 
1977 static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
1978 
1979 static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
1980 {
1981 	struct fs_prio *prio;
1982 
1983 	fs_for_each_prio(prio, ns) {
1984 		 /* This updates prio start_level and num_levels */
1985 		set_prio_attrs_in_prio(prio, acc_level);
1986 		acc_level += prio->num_levels;
1987 	}
1988 	return acc_level;
1989 }
1990 
1991 static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
1992 {
1993 	struct mlx5_flow_namespace *ns;
1994 	int acc_level_ns = acc_level;
1995 
1996 	prio->start_level = acc_level;
1997 	fs_for_each_ns(ns, prio)
1998 		/* This updates start_level and num_levels of ns's priority descendants */
1999 		acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
2000 	if (!prio->num_levels)
2001 		prio->num_levels = acc_level_ns - prio->start_level;
2002 	WARN_ON(prio->num_levels < acc_level_ns - prio->start_level);
2003 }
2004 
2005 static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
2006 {
2007 	struct mlx5_flow_namespace *ns = &root_ns->ns;
2008 	struct fs_prio *prio;
2009 	int start_level = 0;
2010 
2011 	fs_for_each_prio(prio, ns) {
2012 		set_prio_attrs_in_prio(prio, start_level);
2013 		start_level += prio->num_levels;
2014 	}
2015 }
2016 
2017 #define ANCHOR_PRIO 0
2018 #define ANCHOR_SIZE 1
2019 #define ANCHOR_LEVEL 0
2020 static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
2021 {
2022 	struct mlx5_flow_namespace *ns = NULL;
2023 	struct mlx5_flow_table_attr ft_attr = {};
2024 	struct mlx5_flow_table *ft;
2025 
2026 	ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
2027 	if (WARN_ON(!ns))
2028 		return -EINVAL;
2029 
2030 	ft_attr.max_fte = ANCHOR_SIZE;
2031 	ft_attr.level   = ANCHOR_LEVEL;
2032 	ft_attr.prio    = ANCHOR_PRIO;
2033 
2034 	ft = mlx5_create_flow_table(ns, &ft_attr);
2035 	if (IS_ERR(ft)) {
2036 		mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
2037 		return PTR_ERR(ft);
2038 	}
2039 	return 0;
2040 }
2041 
2042 static int init_root_ns(struct mlx5_flow_steering *steering)
2043 {
2044 	steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
2045 	if (!steering->root_ns)
2046 		goto cleanup;
2047 
2048 	if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node))
2049 		goto cleanup;
2050 
2051 	set_prio_attrs(steering->root_ns);
2052 
2053 	if (create_anchor_flow_table(steering))
2054 		goto cleanup;
2055 
2056 	return 0;
2057 
2058 cleanup:
2059 	mlx5_cleanup_fs(steering->dev);
2060 	return -ENOMEM;
2061 }
2062 
2063 static void clean_tree(struct fs_node *node)
2064 {
2065 	if (node) {
2066 		struct fs_node *iter;
2067 		struct fs_node *temp;
2068 
2069 		list_for_each_entry_safe(iter, temp, &node->children, list)
2070 			clean_tree(iter);
2071 		tree_remove_node(node);
2072 	}
2073 }
2074 
2075 static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
2076 {
2077 	if (!root_ns)
2078 		return;
2079 
2080 	clean_tree(&root_ns->ns.node);
2081 }
2082 
2083 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
2084 {
2085 	struct mlx5_flow_steering *steering = dev->priv.steering;
2086 
2087 	cleanup_root_ns(steering->root_ns);
2088 	cleanup_root_ns(steering->esw_egress_root_ns);
2089 	cleanup_root_ns(steering->esw_ingress_root_ns);
2090 	cleanup_root_ns(steering->fdb_root_ns);
2091 	cleanup_root_ns(steering->sniffer_rx_root_ns);
2092 	cleanup_root_ns(steering->sniffer_tx_root_ns);
2093 	mlx5_cleanup_fc_stats(dev);
2094 	kfree(steering);
2095 }
2096 
2097 static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
2098 {
2099 	struct fs_prio *prio;
2100 
2101 	steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX);
2102 	if (!steering->sniffer_tx_root_ns)
2103 		return -ENOMEM;
2104 
2105 	/* Create single prio */
2106 	prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
2107 	if (IS_ERR(prio)) {
2108 		cleanup_root_ns(steering->sniffer_tx_root_ns);
2109 		return PTR_ERR(prio);
2110 	}
2111 	return 0;
2112 }
2113 
2114 static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
2115 {
2116 	struct fs_prio *prio;
2117 
2118 	steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX);
2119 	if (!steering->sniffer_rx_root_ns)
2120 		return -ENOMEM;
2121 
2122 	/* Create single prio */
2123 	prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
2124 	if (IS_ERR(prio)) {
2125 		cleanup_root_ns(steering->sniffer_rx_root_ns);
2126 		return PTR_ERR(prio);
2127 	}
2128 	return 0;
2129 }
2130 
2131 static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
2132 {
2133 	struct fs_prio *prio;
2134 
2135 	steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
2136 	if (!steering->fdb_root_ns)
2137 		return -ENOMEM;
2138 
2139 	prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1);
2140 	if (IS_ERR(prio))
2141 		goto out_err;
2142 
2143 	prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
2144 	if (IS_ERR(prio))
2145 		goto out_err;
2146 
2147 	set_prio_attrs(steering->fdb_root_ns);
2148 	return 0;
2149 
2150 out_err:
2151 	cleanup_root_ns(steering->fdb_root_ns);
2152 	steering->fdb_root_ns = NULL;
2153 	return PTR_ERR(prio);
2154 }
2155 
2156 static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering)
2157 {
2158 	struct fs_prio *prio;
2159 
2160 	steering->esw_egress_root_ns = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
2161 	if (!steering->esw_egress_root_ns)
2162 		return -ENOMEM;
2163 
2164 	/* create 1 prio*/
2165 	prio = fs_create_prio(&steering->esw_egress_root_ns->ns, 0,
2166 			      MLX5_TOTAL_VPORTS(steering->dev));
2167 	return PTR_ERR_OR_ZERO(prio);
2168 }
2169 
2170 static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering)
2171 {
2172 	struct fs_prio *prio;
2173 
2174 	steering->esw_ingress_root_ns = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
2175 	if (!steering->esw_ingress_root_ns)
2176 		return -ENOMEM;
2177 
2178 	/* create 1 prio*/
2179 	prio = fs_create_prio(&steering->esw_ingress_root_ns->ns, 0,
2180 			      MLX5_TOTAL_VPORTS(steering->dev));
2181 	return PTR_ERR_OR_ZERO(prio);
2182 }
2183 
2184 int mlx5_init_fs(struct mlx5_core_dev *dev)
2185 {
2186 	struct mlx5_flow_steering *steering;
2187 	int err = 0;
2188 
2189 	err = mlx5_init_fc_stats(dev);
2190 	if (err)
2191 		return err;
2192 
2193 	steering = kzalloc(sizeof(*steering), GFP_KERNEL);
2194 	if (!steering)
2195 		return -ENOMEM;
2196 	steering->dev = dev;
2197 	dev->priv.steering = steering;
2198 
2199 	if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
2200 	      (MLX5_CAP_GEN(dev, nic_flow_table))) ||
2201 	     ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
2202 	      MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) &&
2203 	    MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
2204 		err = init_root_ns(steering);
2205 		if (err)
2206 			goto err;
2207 	}
2208 
2209 	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
2210 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
2211 			err = init_fdb_root_ns(steering);
2212 			if (err)
2213 				goto err;
2214 		}
2215 		if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
2216 			err = init_egress_acl_root_ns(steering);
2217 			if (err)
2218 				goto err;
2219 		}
2220 		if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
2221 			err = init_ingress_acl_root_ns(steering);
2222 			if (err)
2223 				goto err;
2224 		}
2225 	}
2226 
2227 	if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
2228 		err = init_sniffer_rx_root_ns(steering);
2229 		if (err)
2230 			goto err;
2231 	}
2232 
2233 	if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) {
2234 		err = init_sniffer_tx_root_ns(steering);
2235 		if (err)
2236 			goto err;
2237 	}
2238 
2239 	return 0;
2240 err:
2241 	mlx5_cleanup_fs(dev);
2242 	return err;
2243 }
2244 
2245 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
2246 {
2247 	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
2248 
2249 	root->underlay_qpn = underlay_qpn;
2250 	return 0;
2251 }
2252 EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
2253 
2254 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
2255 {
2256 	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
2257 
2258 	root->underlay_qpn = 0;
2259 	return 0;
2260 }
2261 EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
2262