xref: /openbmc/linux/drivers/net/ethernet/intel/ice/ice_sched.c (revision 023e41632e065d49bcbe31b3c4b336217f96a271)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, Intel Corporation. */
3 
4 #include "ice_sched.h"
5 
6 /**
7  * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
8  * @pi: port information structure
9  * @info: Scheduler element information from firmware
10  *
11  * This function inserts the root node of the scheduling tree topology
12  * to the SW DB.
13  */
14 static enum ice_status
15 ice_sched_add_root_node(struct ice_port_info *pi,
16 			struct ice_aqc_txsched_elem_data *info)
17 {
18 	struct ice_sched_node *root;
19 	struct ice_hw *hw;
20 
21 	if (!pi)
22 		return ICE_ERR_PARAM;
23 
24 	hw = pi->hw;
25 
26 	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
27 	if (!root)
28 		return ICE_ERR_NO_MEMORY;
29 
30 	/* coverity[suspicious_sizeof] */
31 	root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
32 				      sizeof(*root), GFP_KERNEL);
33 	if (!root->children) {
34 		devm_kfree(ice_hw_to_dev(hw), root);
35 		return ICE_ERR_NO_MEMORY;
36 	}
37 
38 	memcpy(&root->info, info, sizeof(*info));
39 	pi->root = root;
40 	return 0;
41 }
42 
43 /**
44  * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
45  * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
46  * @teid: node teid to search
47  *
48  * This function searches for a node matching the teid in the scheduling tree
49  * from the SW DB. The search is recursive and is restricted by the number of
50  * layers it has searched through; stopping at the max supported layer.
51  *
52  * This function needs to be called when holding the port_info->sched_lock
53  */
54 struct ice_sched_node *
55 ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
56 {
57 	u16 i;
58 
59 	/* The TEID is same as that of the start_node */
60 	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
61 		return start_node;
62 
63 	/* The node has no children or is at the max layer */
64 	if (!start_node->num_children ||
65 	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
66 	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
67 		return NULL;
68 
69 	/* Check if teid matches to any of the children nodes */
70 	for (i = 0; i < start_node->num_children; i++)
71 		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
72 			return start_node->children[i];
73 
74 	/* Search within each child's sub-tree */
75 	for (i = 0; i < start_node->num_children; i++) {
76 		struct ice_sched_node *tmp;
77 
78 		tmp = ice_sched_find_node_by_teid(start_node->children[i],
79 						  teid);
80 		if (tmp)
81 			return tmp;
82 	}
83 
84 	return NULL;
85 }
86 
87 /**
88  * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
89  * @hw: pointer to the hw struct
90  * @cmd_opc: cmd opcode
91  * @elems_req: number of elements to request
92  * @buf: pointer to buffer
93  * @buf_size: buffer size in bytes
94  * @elems_resp: returns total number of elements response
95  * @cd: pointer to command details structure or NULL
96  *
97  * This function sends a scheduling elements cmd (cmd_opc)
98  */
99 static enum ice_status
100 ice_aqc_send_sched_elem_cmd(struct ice_hw *hw, enum ice_adminq_opc cmd_opc,
101 			    u16 elems_req, void *buf, u16 buf_size,
102 			    u16 *elems_resp, struct ice_sq_cd *cd)
103 {
104 	struct ice_aqc_sched_elem_cmd *cmd;
105 	struct ice_aq_desc desc;
106 	enum ice_status status;
107 
108 	cmd = &desc.params.sched_elem_cmd;
109 	ice_fill_dflt_direct_cmd_desc(&desc, cmd_opc);
110 	cmd->num_elem_req = cpu_to_le16(elems_req);
111 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
112 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
113 	if (!status && elems_resp)
114 		*elems_resp = le16_to_cpu(cmd->num_elem_resp);
115 
116 	return status;
117 }
118 
119 /**
120  * ice_aq_query_sched_elems - query scheduler elements
121  * @hw: pointer to the hw struct
122  * @elems_req: number of elements to query
123  * @buf: pointer to buffer
124  * @buf_size: buffer size in bytes
125  * @elems_ret: returns total number of elements returned
126  * @cd: pointer to command details structure or NULL
127  *
128  * Query scheduling elements (0x0404)
129  */
130 static enum ice_status
131 ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
132 			 struct ice_aqc_get_elem *buf, u16 buf_size,
133 			 u16 *elems_ret, struct ice_sq_cd *cd)
134 {
135 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_get_sched_elems,
136 					   elems_req, (void *)buf, buf_size,
137 					   elems_ret, cd);
138 }
139 
140 /**
141  * ice_sched_query_elem - query element information from hw
142  * @hw: pointer to the hw struct
143  * @node_teid: node teid to be queried
144  * @buf: buffer to element information
145  *
146  * This function queries HW element information
147  */
148 static enum ice_status
149 ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
150 		     struct ice_aqc_get_elem *buf)
151 {
152 	u16 buf_size, num_elem_ret = 0;
153 	enum ice_status status;
154 
155 	buf_size = sizeof(*buf);
156 	memset(buf, 0, buf_size);
157 	buf->generic[0].node_teid = cpu_to_le32(node_teid);
158 	status = ice_aq_query_sched_elems(hw, 1, buf, buf_size, &num_elem_ret,
159 					  NULL);
160 	if (status || num_elem_ret != 1)
161 		ice_debug(hw, ICE_DBG_SCHED, "query element failed\n");
162 	return status;
163 }
164 
165 /**
166  * ice_sched_add_node - Insert the Tx scheduler node in SW DB
167  * @pi: port information structure
168  * @layer: Scheduler layer of the node
169  * @info: Scheduler element information from firmware
170  *
171  * This function inserts a scheduler node to the SW DB.
172  */
173 enum ice_status
174 ice_sched_add_node(struct ice_port_info *pi, u8 layer,
175 		   struct ice_aqc_txsched_elem_data *info)
176 {
177 	struct ice_sched_node *parent;
178 	struct ice_aqc_get_elem elem;
179 	struct ice_sched_node *node;
180 	enum ice_status status;
181 	struct ice_hw *hw;
182 
183 	if (!pi)
184 		return ICE_ERR_PARAM;
185 
186 	hw = pi->hw;
187 
188 	/* A valid parent node should be there */
189 	parent = ice_sched_find_node_by_teid(pi->root,
190 					     le32_to_cpu(info->parent_teid));
191 	if (!parent) {
192 		ice_debug(hw, ICE_DBG_SCHED,
193 			  "Parent Node not found for parent_teid=0x%x\n",
194 			  le32_to_cpu(info->parent_teid));
195 		return ICE_ERR_PARAM;
196 	}
197 
198 	/* query the current node information from FW  before additing it
199 	 * to the SW DB
200 	 */
201 	status = ice_sched_query_elem(hw, le32_to_cpu(info->node_teid), &elem);
202 	if (status)
203 		return status;
204 
205 	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
206 	if (!node)
207 		return ICE_ERR_NO_MEMORY;
208 	if (hw->max_children[layer]) {
209 		/* coverity[suspicious_sizeof] */
210 		node->children = devm_kcalloc(ice_hw_to_dev(hw),
211 					      hw->max_children[layer],
212 					      sizeof(*node), GFP_KERNEL);
213 		if (!node->children) {
214 			devm_kfree(ice_hw_to_dev(hw), node);
215 			return ICE_ERR_NO_MEMORY;
216 		}
217 	}
218 
219 	node->in_use = true;
220 	node->parent = parent;
221 	node->tx_sched_layer = layer;
222 	parent->children[parent->num_children++] = node;
223 	memcpy(&node->info, &elem.generic[0], sizeof(node->info));
224 	return 0;
225 }
226 
227 /**
228  * ice_aq_delete_sched_elems - delete scheduler elements
229  * @hw: pointer to the hw struct
230  * @grps_req: number of groups to delete
231  * @buf: pointer to buffer
232  * @buf_size: buffer size in bytes
233  * @grps_del: returns total number of elements deleted
234  * @cd: pointer to command details structure or NULL
235  *
236  * Delete scheduling elements (0x040F)
237  */
238 static enum ice_status
239 ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
240 			  struct ice_aqc_delete_elem *buf, u16 buf_size,
241 			  u16 *grps_del, struct ice_sq_cd *cd)
242 {
243 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_delete_sched_elems,
244 					   grps_req, (void *)buf, buf_size,
245 					   grps_del, cd);
246 }
247 
248 /**
249  * ice_sched_remove_elems - remove nodes from hw
250  * @hw: pointer to the hw struct
251  * @parent: pointer to the parent node
252  * @num_nodes: number of nodes
253  * @node_teids: array of node teids to be deleted
254  *
255  * This function remove nodes from hw
256  */
257 static enum ice_status
258 ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
259 		       u16 num_nodes, u32 *node_teids)
260 {
261 	struct ice_aqc_delete_elem *buf;
262 	u16 i, num_groups_removed = 0;
263 	enum ice_status status;
264 	u16 buf_size;
265 
266 	buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1);
267 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
268 	if (!buf)
269 		return ICE_ERR_NO_MEMORY;
270 
271 	buf->hdr.parent_teid = parent->info.node_teid;
272 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
273 	for (i = 0; i < num_nodes; i++)
274 		buf->teid[i] = cpu_to_le32(node_teids[i]);
275 
276 	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
277 					   &num_groups_removed, NULL);
278 	if (status || num_groups_removed != 1)
279 		ice_debug(hw, ICE_DBG_SCHED, "remove elements failed\n");
280 
281 	devm_kfree(ice_hw_to_dev(hw), buf);
282 	return status;
283 }
284 
285 /**
286  * ice_sched_get_first_node - get the first node of the given layer
287  * @hw: pointer to the hw struct
288  * @parent: pointer the base node of the subtree
289  * @layer: layer number
290  *
291  * This function retrieves the first node of the given layer from the subtree
292  */
293 static struct ice_sched_node *
294 ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent,
295 			 u8 layer)
296 {
297 	u8 i;
298 
299 	if (layer < hw->sw_entry_point_layer)
300 		return NULL;
301 	for (i = 0; i < parent->num_children; i++) {
302 		struct ice_sched_node *node = parent->children[i];
303 
304 		if (node) {
305 			if (node->tx_sched_layer == layer)
306 				return node;
307 			/* this recursion is intentional, and wouldn't
308 			 * go more than 9 calls
309 			 */
310 			return ice_sched_get_first_node(hw, node, layer);
311 		}
312 	}
313 	return NULL;
314 }
315 
316 /**
317  * ice_sched_get_tc_node - get pointer to TC node
318  * @pi: port information structure
319  * @tc: TC number
320  *
321  * This function returns the TC node pointer
322  */
323 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
324 {
325 	u8 i;
326 
327 	if (!pi)
328 		return NULL;
329 	for (i = 0; i < pi->root->num_children; i++)
330 		if (pi->root->children[i]->tc_num == tc)
331 			return pi->root->children[i];
332 	return NULL;
333 }
334 
335 /**
336  * ice_free_sched_node - Free a Tx scheduler node from SW DB
337  * @pi: port information structure
338  * @node: pointer to the ice_sched_node struct
339  *
340  * This function frees up a node from SW DB as well as from HW
341  *
342  * This function needs to be called with the port_info->sched_lock held
343  */
344 void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
345 {
346 	struct ice_sched_node *parent;
347 	struct ice_hw *hw = pi->hw;
348 	u8 i, j;
349 
350 	/* Free the children before freeing up the parent node
351 	 * The parent array is updated below and that shifts the nodes
352 	 * in the array. So always pick the first child if num children > 0
353 	 */
354 	while (node->num_children)
355 		ice_free_sched_node(pi, node->children[0]);
356 
357 	/* Leaf, TC and root nodes can't be deleted by SW */
358 	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
359 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
360 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
361 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
362 		u32 teid = le32_to_cpu(node->info.node_teid);
363 		enum ice_status status;
364 
365 		status = ice_sched_remove_elems(hw, node->parent, 1, &teid);
366 		if (status)
367 			ice_debug(hw, ICE_DBG_SCHED,
368 				  "remove element failed %d\n", status);
369 	}
370 	parent = node->parent;
371 	/* root has no parent */
372 	if (parent) {
373 		struct ice_sched_node *p, *tc_node;
374 
375 		/* update the parent */
376 		for (i = 0; i < parent->num_children; i++)
377 			if (parent->children[i] == node) {
378 				for (j = i + 1; j < parent->num_children; j++)
379 					parent->children[j - 1] =
380 						parent->children[j];
381 				parent->num_children--;
382 				break;
383 			}
384 
385 		/* search for previous sibling that points to this node and
386 		 * remove the reference
387 		 */
388 		tc_node = ice_sched_get_tc_node(pi, node->tc_num);
389 		if (!tc_node) {
390 			ice_debug(hw, ICE_DBG_SCHED,
391 				  "Invalid TC number %d\n", node->tc_num);
392 			goto err_exit;
393 		}
394 		p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer);
395 		while (p) {
396 			if (p->sibling == node) {
397 				p->sibling = node->sibling;
398 				break;
399 			}
400 			p = p->sibling;
401 		}
402 	}
403 err_exit:
404 	/* leaf nodes have no children */
405 	if (node->children)
406 		devm_kfree(ice_hw_to_dev(hw), node->children);
407 	devm_kfree(ice_hw_to_dev(hw), node);
408 }
409 
410 /**
411  * ice_aq_get_dflt_topo - gets default scheduler topology
412  * @hw: pointer to the hw struct
413  * @lport: logical port number
414  * @buf: pointer to buffer
415  * @buf_size: buffer size in bytes
416  * @num_branches: returns total number of queue to port branches
417  * @cd: pointer to command details structure or NULL
418  *
419  * Get default scheduler topology (0x400)
420  */
421 static enum ice_status
422 ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
423 		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
424 		     u8 *num_branches, struct ice_sq_cd *cd)
425 {
426 	struct ice_aqc_get_topo *cmd;
427 	struct ice_aq_desc desc;
428 	enum ice_status status;
429 
430 	cmd = &desc.params.get_topo;
431 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
432 	cmd->port_num = lport;
433 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
434 	if (!status && num_branches)
435 		*num_branches = cmd->num_branches;
436 
437 	return status;
438 }
439 
440 /**
441  * ice_aq_add_sched_elems - adds scheduling element
442  * @hw: pointer to the hw struct
443  * @grps_req: the number of groups that are requested to be added
444  * @buf: pointer to buffer
445  * @buf_size: buffer size in bytes
446  * @grps_added: returns total number of groups added
447  * @cd: pointer to command details structure or NULL
448  *
449  * Add scheduling elements (0x0401)
450  */
451 static enum ice_status
452 ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
453 		       struct ice_aqc_add_elem *buf, u16 buf_size,
454 		       u16 *grps_added, struct ice_sq_cd *cd)
455 {
456 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_add_sched_elems,
457 					   grps_req, (void *)buf, buf_size,
458 					   grps_added, cd);
459 }
460 
461 /**
462  * ice_aq_suspend_sched_elems - suspend scheduler elements
463  * @hw: pointer to the hw struct
464  * @elems_req: number of elements to suspend
465  * @buf: pointer to buffer
466  * @buf_size: buffer size in bytes
467  * @elems_ret: returns total number of elements suspended
468  * @cd: pointer to command details structure or NULL
469  *
470  * Suspend scheduling elements (0x0409)
471  */
472 static enum ice_status
473 ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req,
474 			   struct ice_aqc_suspend_resume_elem *buf,
475 			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
476 {
477 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_suspend_sched_elems,
478 					   elems_req, (void *)buf, buf_size,
479 					   elems_ret, cd);
480 }
481 
482 /**
483  * ice_aq_resume_sched_elems - resume scheduler elements
484  * @hw: pointer to the hw struct
485  * @elems_req: number of elements to resume
486  * @buf: pointer to buffer
487  * @buf_size: buffer size in bytes
488  * @elems_ret: returns total number of elements resumed
489  * @cd: pointer to command details structure or NULL
490  *
491  * resume scheduling elements (0x040A)
492  */
493 static enum ice_status
494 ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req,
495 			  struct ice_aqc_suspend_resume_elem *buf,
496 			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
497 {
498 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_resume_sched_elems,
499 					   elems_req, (void *)buf, buf_size,
500 					   elems_ret, cd);
501 }
502 
503 /**
504  * ice_aq_query_sched_res - query scheduler resource
505  * @hw: pointer to the hw struct
506  * @buf_size: buffer size in bytes
507  * @buf: pointer to buffer
508  * @cd: pointer to command details structure or NULL
509  *
510  * Query scheduler resource allocation (0x0412)
511  */
512 static enum ice_status
513 ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
514 		       struct ice_aqc_query_txsched_res_resp *buf,
515 		       struct ice_sq_cd *cd)
516 {
517 	struct ice_aq_desc desc;
518 
519 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
520 	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
521 }
522 
523 /**
524  * ice_sched_suspend_resume_elems - suspend or resume hw nodes
525  * @hw: pointer to the hw struct
526  * @num_nodes: number of nodes
527  * @node_teids: array of node teids to be suspended or resumed
528  * @suspend: true means suspend / false means resume
529  *
530  * This function suspends or resumes hw nodes
531  */
532 static enum ice_status
533 ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
534 			       bool suspend)
535 {
536 	struct ice_aqc_suspend_resume_elem *buf;
537 	u16 i, buf_size, num_elem_ret = 0;
538 	enum ice_status status;
539 
540 	buf_size = sizeof(*buf) * num_nodes;
541 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
542 	if (!buf)
543 		return ICE_ERR_NO_MEMORY;
544 
545 	for (i = 0; i < num_nodes; i++)
546 		buf->teid[i] = cpu_to_le32(node_teids[i]);
547 
548 	if (suspend)
549 		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
550 						    buf_size, &num_elem_ret,
551 						    NULL);
552 	else
553 		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
554 						   buf_size, &num_elem_ret,
555 						   NULL);
556 	if (status || num_elem_ret != num_nodes)
557 		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
558 
559 	devm_kfree(ice_hw_to_dev(hw), buf);
560 	return status;
561 }
562 
563 /**
564  * ice_sched_clear_agg - clears the agg related information
565  * @hw: pointer to the hardware structure
566  *
567  * This function removes agg list and free up agg related memory
568  * previously allocated.
569  */
570 void ice_sched_clear_agg(struct ice_hw *hw)
571 {
572 	struct ice_sched_agg_info *agg_info;
573 	struct ice_sched_agg_info *atmp;
574 
575 	list_for_each_entry_safe(agg_info, atmp, &hw->agg_list, list_entry) {
576 		struct ice_sched_agg_vsi_info *agg_vsi_info;
577 		struct ice_sched_agg_vsi_info *vtmp;
578 
579 		list_for_each_entry_safe(agg_vsi_info, vtmp,
580 					 &agg_info->agg_vsi_list, list_entry) {
581 			list_del(&agg_vsi_info->list_entry);
582 			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
583 		}
584 		list_del(&agg_info->list_entry);
585 		devm_kfree(ice_hw_to_dev(hw), agg_info);
586 	}
587 }
588 
589 /**
590  * ice_sched_clear_tx_topo - clears the scheduler tree nodes
591  * @pi: port information structure
592  *
593  * This function removes all the nodes from HW as well as from SW DB.
594  */
595 static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
596 {
597 	if (!pi)
598 		return;
599 	if (pi->root) {
600 		ice_free_sched_node(pi, pi->root);
601 		pi->root = NULL;
602 	}
603 }
604 
605 /**
606  * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
607  * @pi: port information structure
608  *
609  * Cleanup scheduling elements from SW DB
610  */
611 void ice_sched_clear_port(struct ice_port_info *pi)
612 {
613 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
614 		return;
615 
616 	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
617 	mutex_lock(&pi->sched_lock);
618 	ice_sched_clear_tx_topo(pi);
619 	mutex_unlock(&pi->sched_lock);
620 	mutex_destroy(&pi->sched_lock);
621 }
622 
623 /**
624  * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
625  * @hw: pointer to the hw struct
626  *
627  * Cleanup scheduling elements from SW DB for all the ports
628  */
629 void ice_sched_cleanup_all(struct ice_hw *hw)
630 {
631 	if (!hw)
632 		return;
633 
634 	if (hw->layer_info) {
635 		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
636 		hw->layer_info = NULL;
637 	}
638 
639 	if (hw->port_info)
640 		ice_sched_clear_port(hw->port_info);
641 
642 	hw->num_tx_sched_layers = 0;
643 	hw->num_tx_sched_phys_layers = 0;
644 	hw->flattened_layers = 0;
645 	hw->max_cgds = 0;
646 }
647 
648 /**
649  * ice_sched_add_elems - add nodes to hw and SW DB
650  * @pi: port information structure
651  * @tc_node: pointer to the branch node
652  * @parent: pointer to the parent node
653  * @layer: layer number to add nodes
654  * @num_nodes: number of nodes
655  * @num_nodes_added: pointer to num nodes added
656  * @first_node_teid: if new nodes are added then return the teid of first node
657  *
658  * This function add nodes to hw as well as to SW DB for a given layer
659  */
660 static enum ice_status
661 ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
662 		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
663 		    u16 *num_nodes_added, u32 *first_node_teid)
664 {
665 	struct ice_sched_node *prev, *new_node;
666 	struct ice_aqc_add_elem *buf;
667 	u16 i, num_groups_added = 0;
668 	enum ice_status status = 0;
669 	struct ice_hw *hw = pi->hw;
670 	u16 buf_size;
671 	u32 teid;
672 
673 	buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
674 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
675 	if (!buf)
676 		return ICE_ERR_NO_MEMORY;
677 
678 	buf->hdr.parent_teid = parent->info.node_teid;
679 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
680 	for (i = 0; i < num_nodes; i++) {
681 		buf->generic[i].parent_teid = parent->info.node_teid;
682 		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
683 		buf->generic[i].data.valid_sections =
684 			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
685 			ICE_AQC_ELEM_VALID_EIR;
686 		buf->generic[i].data.generic = 0;
687 		buf->generic[i].data.cir_bw.bw_profile_idx =
688 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
689 		buf->generic[i].data.cir_bw.bw_alloc =
690 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
691 		buf->generic[i].data.eir_bw.bw_profile_idx =
692 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
693 		buf->generic[i].data.eir_bw.bw_alloc =
694 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
695 	}
696 
697 	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
698 					&num_groups_added, NULL);
699 	if (status || num_groups_added != 1) {
700 		ice_debug(hw, ICE_DBG_SCHED, "add elements failed\n");
701 		devm_kfree(ice_hw_to_dev(hw), buf);
702 		return ICE_ERR_CFG;
703 	}
704 
705 	*num_nodes_added = num_nodes;
706 	/* add nodes to the SW DB */
707 	for (i = 0; i < num_nodes; i++) {
708 		status = ice_sched_add_node(pi, layer, &buf->generic[i]);
709 		if (status) {
710 			ice_debug(hw, ICE_DBG_SCHED,
711 				  "add nodes in SW DB failed status =%d\n",
712 				  status);
713 			break;
714 		}
715 
716 		teid = le32_to_cpu(buf->generic[i].node_teid);
717 		new_node = ice_sched_find_node_by_teid(parent, teid);
718 		if (!new_node) {
719 			ice_debug(hw, ICE_DBG_SCHED,
720 				  "Node is missing for teid =%d\n", teid);
721 			break;
722 		}
723 
724 		new_node->sibling = NULL;
725 		new_node->tc_num = tc_node->tc_num;
726 
727 		/* add it to previous node sibling pointer */
728 		/* Note: siblings are not linked across branches */
729 		prev = ice_sched_get_first_node(hw, tc_node, layer);
730 		if (prev && prev != new_node) {
731 			while (prev->sibling)
732 				prev = prev->sibling;
733 			prev->sibling = new_node;
734 		}
735 
736 		if (i == 0)
737 			*first_node_teid = teid;
738 	}
739 
740 	devm_kfree(ice_hw_to_dev(hw), buf);
741 	return status;
742 }
743 
744 /**
745  * ice_sched_add_nodes_to_layer - Add nodes to a given layer
746  * @pi: port information structure
747  * @tc_node: pointer to TC node
748  * @parent: pointer to parent node
749  * @layer: layer number to add nodes
750  * @num_nodes: number of nodes to be added
751  * @first_node_teid: pointer to the first node teid
752  * @num_nodes_added: pointer to number of nodes added
753  *
754  * This function add nodes to a given layer.
755  */
756 static enum ice_status
757 ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
758 			     struct ice_sched_node *tc_node,
759 			     struct ice_sched_node *parent, u8 layer,
760 			     u16 num_nodes, u32 *first_node_teid,
761 			     u16 *num_nodes_added)
762 {
763 	u32 *first_teid_ptr = first_node_teid;
764 	u16 new_num_nodes, max_child_nodes;
765 	enum ice_status status = 0;
766 	struct ice_hw *hw = pi->hw;
767 	u16 num_added = 0;
768 	u32 temp;
769 
770 	*num_nodes_added = 0;
771 
772 	if (!num_nodes)
773 		return status;
774 
775 	if (!parent || layer < hw->sw_entry_point_layer)
776 		return ICE_ERR_PARAM;
777 
778 	/* max children per node per layer */
779 	max_child_nodes = hw->max_children[parent->tx_sched_layer];
780 
781 	/* current number of children + required nodes exceed max children ? */
782 	if ((parent->num_children + num_nodes) > max_child_nodes) {
783 		/* Fail if the parent is a TC node */
784 		if (parent == tc_node)
785 			return ICE_ERR_CFG;
786 
787 		/* utilize all the spaces if the parent is not full */
788 		if (parent->num_children < max_child_nodes) {
789 			new_num_nodes = max_child_nodes - parent->num_children;
790 			/* this recursion is intentional, and wouldn't
791 			 * go more than 2 calls
792 			 */
793 			status = ice_sched_add_nodes_to_layer(pi, tc_node,
794 							      parent, layer,
795 							      new_num_nodes,
796 							      first_node_teid,
797 							      &num_added);
798 			if (status)
799 				return status;
800 
801 			*num_nodes_added += num_added;
802 		}
803 		/* Don't modify the first node teid memory if the first node was
804 		 * added already in the above call. Instead send some temp
805 		 * memory for all other recursive calls.
806 		 */
807 		if (num_added)
808 			first_teid_ptr = &temp;
809 
810 		new_num_nodes = num_nodes - num_added;
811 
812 		/* This parent is full, try the next sibling */
813 		parent = parent->sibling;
814 
815 		/* this recursion is intentional, for 1024 queues
816 		 * per VSI, it goes max of 16 iterations.
817 		 * 1024 / 8 = 128 layer 8 nodes
818 		 * 128 /8 = 16 (add 8 nodes per iteration)
819 		 */
820 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
821 						      layer, new_num_nodes,
822 						      first_teid_ptr,
823 						      &num_added);
824 		*num_nodes_added += num_added;
825 		return status;
826 	}
827 
828 	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
829 				     num_nodes_added, first_node_teid);
830 	return status;
831 }
832 
833 /**
834  * ice_sched_get_qgrp_layer - get the current queue group layer number
835  * @hw: pointer to the hw struct
836  *
837  * This function returns the current queue group layer number
838  */
839 static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
840 {
841 	/* It's always total layers - 1, the array is 0 relative so -2 */
842 	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
843 }
844 
845 /**
846  * ice_sched_get_vsi_layer - get the current VSI layer number
847  * @hw: pointer to the hw struct
848  *
849  * This function returns the current VSI layer number
850  */
851 static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
852 {
853 	/* Num Layers       VSI layer
854 	 *     9               6
855 	 *     7               4
856 	 *     5 or less       sw_entry_point_layer
857 	 */
858 	/* calculate the vsi layer based on number of layers. */
859 	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
860 		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
861 
862 		if (layer > hw->sw_entry_point_layer)
863 			return layer;
864 	}
865 	return hw->sw_entry_point_layer;
866 }
867 
868 /**
869  * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
870  * @pi: port information structure
871  *
872  * This function removes the leaf node that was created by the FW
873  * during initialization
874  */
875 static void ice_rm_dflt_leaf_node(struct ice_port_info *pi)
876 {
877 	struct ice_sched_node *node;
878 
879 	node = pi->root;
880 	while (node) {
881 		if (!node->num_children)
882 			break;
883 		node = node->children[0];
884 	}
885 	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
886 		u32 teid = le32_to_cpu(node->info.node_teid);
887 		enum ice_status status;
888 
889 		/* remove the default leaf node */
890 		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
891 		if (!status)
892 			ice_free_sched_node(pi, node);
893 	}
894 }
895 
896 /**
897  * ice_sched_rm_dflt_nodes - free the default nodes in the tree
898  * @pi: port information structure
899  *
900  * This function frees all the nodes except root and TC that were created by
901  * the FW during initialization
902  */
903 static void ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
904 {
905 	struct ice_sched_node *node;
906 
907 	ice_rm_dflt_leaf_node(pi);
908 
909 	/* remove the default nodes except TC and root nodes */
910 	node = pi->root;
911 	while (node) {
912 		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
913 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
914 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
915 			ice_free_sched_node(pi, node);
916 			break;
917 		}
918 
919 		if (!node->num_children)
920 			break;
921 		node = node->children[0];
922 	}
923 }
924 
925 /**
926  * ice_sched_init_port - Initialize scheduler by querying information from FW
927  * @pi: port info structure for the tree to cleanup
928  *
929  * This function is the initial call to find the total number of Tx scheduler
930  * resources, default topology created by firmware and storing the information
931  * in SW DB.
932  */
933 enum ice_status ice_sched_init_port(struct ice_port_info *pi)
934 {
935 	struct ice_aqc_get_topo_elem *buf;
936 	enum ice_status status;
937 	struct ice_hw *hw;
938 	u8 num_branches;
939 	u16 num_elems;
940 	u8 i, j;
941 
942 	if (!pi)
943 		return ICE_ERR_PARAM;
944 	hw = pi->hw;
945 
946 	/* Query the Default Topology from FW */
947 	buf = devm_kzalloc(ice_hw_to_dev(hw), ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
948 	if (!buf)
949 		return ICE_ERR_NO_MEMORY;
950 
951 	/* Query default scheduling tree topology */
952 	status = ice_aq_get_dflt_topo(hw, pi->lport, buf, ICE_AQ_MAX_BUF_LEN,
953 				      &num_branches, NULL);
954 	if (status)
955 		goto err_init_port;
956 
957 	/* num_branches should be between 1-8 */
958 	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
959 		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
960 			  num_branches);
961 		status = ICE_ERR_PARAM;
962 		goto err_init_port;
963 	}
964 
965 	/* get the number of elements on the default/first branch */
966 	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
967 
968 	/* num_elems should always be between 1-9 */
969 	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
970 		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
971 			  num_elems);
972 		status = ICE_ERR_PARAM;
973 		goto err_init_port;
974 	}
975 
976 	/* If the last node is a leaf node then the index of the Q group
977 	 * layer is two less than the number of elements.
978 	 */
979 	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
980 	    ICE_AQC_ELEM_TYPE_LEAF)
981 		pi->last_node_teid =
982 			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
983 	else
984 		pi->last_node_teid =
985 			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
986 
987 	/* Insert the Tx Sched root node */
988 	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
989 	if (status)
990 		goto err_init_port;
991 
992 	/* Parse the default tree and cache the information */
993 	for (i = 0; i < num_branches; i++) {
994 		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
995 
996 		/* Skip root element as already inserted */
997 		for (j = 1; j < num_elems; j++) {
998 			/* update the sw entry point */
999 			if (buf[0].generic[j].data.elem_type ==
1000 			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
1001 				hw->sw_entry_point_layer = j;
1002 
1003 			status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
1004 			if (status)
1005 				goto err_init_port;
1006 		}
1007 	}
1008 
1009 	/* Remove the default nodes. */
1010 	if (pi->root)
1011 		ice_sched_rm_dflt_nodes(pi);
1012 
1013 	/* initialize the port for handling the scheduler tree */
1014 	pi->port_state = ICE_SCHED_PORT_STATE_READY;
1015 	mutex_init(&pi->sched_lock);
1016 
1017 err_init_port:
1018 	if (status && pi->root) {
1019 		ice_free_sched_node(pi, pi->root);
1020 		pi->root = NULL;
1021 	}
1022 
1023 	devm_kfree(ice_hw_to_dev(hw), buf);
1024 	return status;
1025 }
1026 
1027 /**
1028  * ice_sched_query_res_alloc - query the FW for num of logical sched layers
1029  * @hw: pointer to the HW struct
1030  *
1031  * query FW for allocated scheduler resources and store in HW struct
1032  */
1033 enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
1034 {
1035 	struct ice_aqc_query_txsched_res_resp *buf;
1036 	enum ice_status status = 0;
1037 	__le16 max_sibl;
1038 	u8 i;
1039 
1040 	if (hw->layer_info)
1041 		return status;
1042 
1043 	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
1044 	if (!buf)
1045 		return ICE_ERR_NO_MEMORY;
1046 
1047 	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
1048 	if (status)
1049 		goto sched_query_out;
1050 
1051 	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
1052 	hw->num_tx_sched_phys_layers =
1053 		le16_to_cpu(buf->sched_props.phys_levels);
1054 	hw->flattened_layers = buf->sched_props.flattening_bitmap;
1055 	hw->max_cgds = buf->sched_props.max_pf_cgds;
1056 
1057 	/* max sibling group size of current layer refers to the max children
1058 	 * of the below layer node.
1059 	 * layer 1 node max children will be layer 2 max sibling group size
1060 	 * layer 2 node max children will be layer 3 max sibling group size
1061 	 * and so on. This array will be populated from root (index 0) to
1062 	 * qgroup layer 7. Leaf node has no children.
1063 	 */
1064 	for (i = 0; i < hw->num_tx_sched_layers; i++) {
1065 		max_sibl = buf->layer_props[i].max_sibl_grp_sz;
1066 		hw->max_children[i] = le16_to_cpu(max_sibl);
1067 	}
1068 
1069 	hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
1070 				      (hw->num_tx_sched_layers *
1071 				       sizeof(*hw->layer_info)),
1072 				      GFP_KERNEL);
1073 	if (!hw->layer_info) {
1074 		status = ICE_ERR_NO_MEMORY;
1075 		goto sched_query_out;
1076 	}
1077 
1078 sched_query_out:
1079 	devm_kfree(ice_hw_to_dev(hw), buf);
1080 	return status;
1081 }
1082 
1083 /**
1084  * ice_sched_find_node_in_subtree - Find node in part of base node subtree
1085  * @hw: pointer to the hw struct
1086  * @base: pointer to the base node
1087  * @node: pointer to the node to search
1088  *
1089  * This function checks whether a given node is part of the base node
1090  * subtree or not
1091  */
1092 static bool
1093 ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
1094 			       struct ice_sched_node *node)
1095 {
1096 	u8 i;
1097 
1098 	for (i = 0; i < base->num_children; i++) {
1099 		struct ice_sched_node *child = base->children[i];
1100 
1101 		if (node == child)
1102 			return true;
1103 
1104 		if (child->tx_sched_layer > node->tx_sched_layer)
1105 			return false;
1106 
1107 		/* this recursion is intentional, and wouldn't
1108 		 * go more than 8 calls
1109 		 */
1110 		if (ice_sched_find_node_in_subtree(hw, child, node))
1111 			return true;
1112 	}
1113 	return false;
1114 }
1115 
1116 /**
1117  * ice_sched_get_free_qparent - Get a free lan or rdma q group node
1118  * @pi: port information structure
1119  * @vsi_handle: software VSI handle
1120  * @tc: branch number
1121  * @owner: lan or rdma
1122  *
1123  * This function retrieves a free lan or rdma q group node
1124  */
1125 struct ice_sched_node *
1126 ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
1127 			   u8 owner)
1128 {
1129 	struct ice_sched_node *vsi_node, *qgrp_node = NULL;
1130 	struct ice_vsi_ctx *vsi_ctx;
1131 	u16 max_children;
1132 	u8 qgrp_layer;
1133 
1134 	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
1135 	max_children = pi->hw->max_children[qgrp_layer];
1136 
1137 	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
1138 	if (!vsi_ctx)
1139 		return NULL;
1140 	vsi_node = vsi_ctx->sched.vsi_node[tc];
1141 	/* validate invalid VSI id */
1142 	if (!vsi_node)
1143 		goto lan_q_exit;
1144 
1145 	/* get the first q group node from VSI sub-tree */
1146 	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
1147 	while (qgrp_node) {
1148 		/* make sure the qgroup node is part of the VSI subtree */
1149 		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
1150 			if (qgrp_node->num_children < max_children &&
1151 			    qgrp_node->owner == owner)
1152 				break;
1153 		qgrp_node = qgrp_node->sibling;
1154 	}
1155 
1156 lan_q_exit:
1157 	return qgrp_node;
1158 }
1159 
1160 /**
1161  * ice_sched_get_vsi_node - Get a VSI node based on VSI id
1162  * @hw: pointer to the hw struct
1163  * @tc_node: pointer to the TC node
1164  * @vsi_handle: software VSI handle
1165  *
1166  * This function retrieves a VSI node for a given VSI id from a given
1167  * TC branch
1168  */
1169 static struct ice_sched_node *
1170 ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
1171 		       u16 vsi_handle)
1172 {
1173 	struct ice_sched_node *node;
1174 	u8 vsi_layer;
1175 
1176 	vsi_layer = ice_sched_get_vsi_layer(hw);
1177 	node = ice_sched_get_first_node(hw, tc_node, vsi_layer);
1178 
1179 	/* Check whether it already exists */
1180 	while (node) {
1181 		if (node->vsi_handle == vsi_handle)
1182 			return node;
1183 		node = node->sibling;
1184 	}
1185 
1186 	return node;
1187 }
1188 
1189 /**
1190  * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
1191  * @hw: pointer to the hw struct
1192  * @num_qs: number of queues
1193  * @num_nodes: num nodes array
1194  *
1195  * This function calculates the number of VSI child nodes based on the
1196  * number of queues.
1197  */
1198 static void
1199 ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
1200 {
1201 	u16 num = num_qs;
1202 	u8 i, qgl, vsil;
1203 
1204 	qgl = ice_sched_get_qgrp_layer(hw);
1205 	vsil = ice_sched_get_vsi_layer(hw);
1206 
1207 	/* calculate num nodes from q group to VSI layer */
1208 	for (i = qgl; i > vsil; i--) {
1209 		/* round to the next integer if there is a remainder */
1210 		num = DIV_ROUND_UP(num, hw->max_children[i]);
1211 
1212 		/* need at least one node */
1213 		num_nodes[i] = num ? num : 1;
1214 	}
1215 }
1216 
1217 /**
1218  * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
1219  * @pi: port information structure
1220  * @vsi_handle: software VSI handle
1221  * @tc_node: pointer to the TC node
1222  * @num_nodes: pointer to the num nodes that needs to be added per layer
1223  * @owner: node owner (lan or rdma)
1224  *
1225  * This function adds the VSI child nodes to tree. It gets called for
1226  * lan and rdma separately.
1227  */
1228 static enum ice_status
1229 ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
1230 			      struct ice_sched_node *tc_node, u16 *num_nodes,
1231 			      u8 owner)
1232 {
1233 	struct ice_sched_node *parent, *node;
1234 	struct ice_hw *hw = pi->hw;
1235 	enum ice_status status;
1236 	u32 first_node_teid;
1237 	u16 num_added = 0;
1238 	u8 i, qgl, vsil;
1239 
1240 	qgl = ice_sched_get_qgrp_layer(hw);
1241 	vsil = ice_sched_get_vsi_layer(hw);
1242 	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1243 	for (i = vsil + 1; i <= qgl; i++) {
1244 		if (!parent)
1245 			return ICE_ERR_CFG;
1246 
1247 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
1248 						      num_nodes[i],
1249 						      &first_node_teid,
1250 						      &num_added);
1251 		if (status || num_nodes[i] != num_added)
1252 			return ICE_ERR_CFG;
1253 
1254 		/* The newly added node can be a new parent for the next
1255 		 * layer nodes
1256 		 */
1257 		if (num_added) {
1258 			parent = ice_sched_find_node_by_teid(tc_node,
1259 							     first_node_teid);
1260 			node = parent;
1261 			while (node) {
1262 				node->owner = owner;
1263 				node = node->sibling;
1264 			}
1265 		} else {
1266 			parent = parent->children[0];
1267 		}
1268 	}
1269 
1270 	return 0;
1271 }
1272 
1273 /**
1274  * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree
1275  * @pi: port information structure
1276  * @vsi_node: pointer to the VSI node
1277  * @num_nodes: pointer to the num nodes that needs to be removed per layer
1278  * @owner: node owner (lan or rdma)
1279  *
1280  * This function removes the VSI child nodes from the tree. It gets called for
1281  * lan and rdma separately.
1282  */
1283 static void
1284 ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi,
1285 			     struct ice_sched_node *vsi_node, u16 *num_nodes,
1286 			     u8 owner)
1287 {
1288 	struct ice_sched_node *node, *next;
1289 	u8 i, qgl, vsil;
1290 	u16 num;
1291 
1292 	qgl = ice_sched_get_qgrp_layer(pi->hw);
1293 	vsil = ice_sched_get_vsi_layer(pi->hw);
1294 
1295 	for (i = qgl; i > vsil; i--) {
1296 		num = num_nodes[i];
1297 		node = ice_sched_get_first_node(pi->hw, vsi_node, i);
1298 		while (node && num) {
1299 			next = node->sibling;
1300 			if (node->owner == owner && !node->num_children) {
1301 				ice_free_sched_node(pi, node);
1302 				num--;
1303 			}
1304 			node = next;
1305 		}
1306 	}
1307 }
1308 
1309 /**
1310  * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
1311  * @hw: pointer to the hw struct
1312  * @tc_node: pointer to TC node
1313  * @num_nodes: pointer to num nodes array
1314  *
1315  * This function calculates the number of supported nodes needed to add this
1316  * VSI into Tx tree including the VSI, parent and intermediate nodes in below
1317  * layers
1318  */
1319 static void
1320 ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
1321 				 struct ice_sched_node *tc_node, u16 *num_nodes)
1322 {
1323 	struct ice_sched_node *node;
1324 	u8 vsil;
1325 	int i;
1326 
1327 	vsil = ice_sched_get_vsi_layer(hw);
1328 	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
1329 		/* Add intermediate nodes if TC has no children and
1330 		 * need at least one node for VSI
1331 		 */
1332 		if (!tc_node->num_children || i == vsil) {
1333 			num_nodes[i]++;
1334 		} else {
1335 			/* If intermediate nodes are reached max children
1336 			 * then add a new one.
1337 			 */
1338 			node = ice_sched_get_first_node(hw, tc_node, (u8)i);
1339 			/* scan all the siblings */
1340 			while (node) {
1341 				if (node->num_children < hw->max_children[i])
1342 					break;
1343 				node = node->sibling;
1344 			}
1345 
1346 			/* tree has one intermediate node to add this new VSI.
1347 			 * So no need to calculate supported nodes for below
1348 			 * layers.
1349 			 */
1350 			if (node)
1351 				break;
1352 			/* all the nodes are full, allocate a new one */
1353 			num_nodes[i]++;
1354 		}
1355 }
1356 
1357 /**
1358  * ice_sched_add_vsi_support_nodes - add VSI supported nodes into Tx tree
1359  * @pi: port information structure
1360  * @vsi_handle: software VSI handle
1361  * @tc_node: pointer to TC node
1362  * @num_nodes: pointer to num nodes array
1363  *
1364  * This function adds the VSI supported nodes into Tx tree including the
1365  * VSI, its parent and intermediate nodes in below layers
1366  */
1367 static enum ice_status
1368 ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
1369 				struct ice_sched_node *tc_node, u16 *num_nodes)
1370 {
1371 	struct ice_sched_node *parent = tc_node;
1372 	enum ice_status status;
1373 	u32 first_node_teid;
1374 	u16 num_added = 0;
1375 	u8 i, vsil;
1376 
1377 	if (!pi)
1378 		return ICE_ERR_PARAM;
1379 
1380 	vsil = ice_sched_get_vsi_layer(pi->hw);
1381 	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
1382 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
1383 						      i, num_nodes[i],
1384 						      &first_node_teid,
1385 						      &num_added);
1386 		if (status || num_nodes[i] != num_added)
1387 			return ICE_ERR_CFG;
1388 
1389 		/* The newly added node can be a new parent for the next
1390 		 * layer nodes
1391 		 */
1392 		if (num_added)
1393 			parent = ice_sched_find_node_by_teid(tc_node,
1394 							     first_node_teid);
1395 		else
1396 			parent = parent->children[0];
1397 
1398 		if (!parent)
1399 			return ICE_ERR_CFG;
1400 
1401 		if (i == vsil)
1402 			parent->vsi_handle = vsi_handle;
1403 	}
1404 
1405 	return 0;
1406 }
1407 
1408 /**
1409  * ice_sched_add_vsi_to_topo - add a new VSI into tree
1410  * @pi: port information structure
1411  * @vsi_handle: software VSI handle
1412  * @tc: TC number
1413  *
1414  * This function adds a new VSI into scheduler tree
1415  */
1416 static enum ice_status
1417 ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
1418 {
1419 	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1420 	struct ice_sched_node *tc_node;
1421 	struct ice_hw *hw = pi->hw;
1422 
1423 	tc_node = ice_sched_get_tc_node(pi, tc);
1424 	if (!tc_node)
1425 		return ICE_ERR_PARAM;
1426 
1427 	/* calculate number of supported nodes needed for this VSI */
1428 	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
1429 
1430 	/* add vsi supported nodes to tc subtree */
1431 	return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
1432 					       num_nodes);
1433 }
1434 
1435 /**
1436  * ice_sched_update_vsi_child_nodes - update VSI child nodes
1437  * @pi: port information structure
1438  * @vsi_handle: software VSI handle
1439  * @tc: TC number
1440  * @new_numqs: new number of max queues
1441  * @owner: owner of this subtree
1442  *
1443  * This function updates the VSI child nodes based on the number of queues
1444  */
1445 static enum ice_status
1446 ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
1447 				 u8 tc, u16 new_numqs, u8 owner)
1448 {
1449 	u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1450 	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1451 	struct ice_sched_node *vsi_node;
1452 	struct ice_sched_node *tc_node;
1453 	struct ice_vsi_ctx *vsi_ctx;
1454 	enum ice_status status = 0;
1455 	struct ice_hw *hw = pi->hw;
1456 	u16 prev_numqs;
1457 	u8 i;
1458 
1459 	tc_node = ice_sched_get_tc_node(pi, tc);
1460 	if (!tc_node)
1461 		return ICE_ERR_CFG;
1462 
1463 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1464 	if (!vsi_node)
1465 		return ICE_ERR_CFG;
1466 
1467 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
1468 	if (!vsi_ctx)
1469 		return ICE_ERR_PARAM;
1470 
1471 	if (owner == ICE_SCHED_NODE_OWNER_LAN)
1472 		prev_numqs = vsi_ctx->sched.max_lanq[tc];
1473 	else
1474 		return ICE_ERR_PARAM;
1475 
1476 	/* num queues are not changed */
1477 	if (prev_numqs == new_numqs)
1478 		return status;
1479 
1480 	/* calculate number of nodes based on prev/new number of qs */
1481 	if (prev_numqs)
1482 		ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes);
1483 
1484 	if (new_numqs)
1485 		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
1486 
1487 	if (prev_numqs > new_numqs) {
1488 		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
1489 			new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i];
1490 
1491 		ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes,
1492 					     owner);
1493 	} else {
1494 		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
1495 			new_num_nodes[i] -= prev_num_nodes[i];
1496 
1497 		status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
1498 						       new_num_nodes, owner);
1499 		if (status)
1500 			return status;
1501 	}
1502 
1503 	vsi_ctx->sched.max_lanq[tc] = new_numqs;
1504 
1505 	return status;
1506 }
1507 
1508 /**
1509  * ice_sched_cfg_vsi - configure the new/existing VSI
1510  * @pi: port information structure
1511  * @vsi_handle: software VSI handle
1512  * @tc: TC number
1513  * @maxqs: max number of queues
1514  * @owner: lan or rdma
1515  * @enable: TC enabled or disabled
1516  *
1517  * This function adds/updates VSI nodes based on the number of queues. If TC is
1518  * enabled and VSI is in suspended state then resume the VSI back. If TC is
1519  * disabled then suspend the VSI if it is not already.
1520  */
1521 enum ice_status
1522 ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
1523 		  u8 owner, bool enable)
1524 {
1525 	struct ice_sched_node *vsi_node, *tc_node;
1526 	struct ice_vsi_ctx *vsi_ctx;
1527 	enum ice_status status = 0;
1528 	struct ice_hw *hw = pi->hw;
1529 
1530 	tc_node = ice_sched_get_tc_node(pi, tc);
1531 	if (!tc_node)
1532 		return ICE_ERR_PARAM;
1533 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
1534 	if (!vsi_ctx)
1535 		return ICE_ERR_PARAM;
1536 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1537 
1538 	/* suspend the VSI if tc is not enabled */
1539 	if (!enable) {
1540 		if (vsi_node && vsi_node->in_use) {
1541 			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1542 
1543 			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
1544 								true);
1545 			if (!status)
1546 				vsi_node->in_use = false;
1547 		}
1548 		return status;
1549 	}
1550 
1551 	/* TC is enabled, if it is a new VSI then add it to the tree */
1552 	if (!vsi_node) {
1553 		status = ice_sched_add_vsi_to_topo(pi, vsi_handle, tc);
1554 		if (status)
1555 			return status;
1556 
1557 		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1558 		if (!vsi_node)
1559 			return ICE_ERR_CFG;
1560 
1561 		vsi_ctx->sched.vsi_node[tc] = vsi_node;
1562 		vsi_node->in_use = true;
1563 		/* invalidate the max queues whenever VSI gets added first time
1564 		 * into the scheduler tree (boot or after reset). We need to
1565 		 * recreate the child nodes all the time in these cases.
1566 		 */
1567 		vsi_ctx->sched.max_lanq[tc] = 0;
1568 	}
1569 
1570 	/* update the VSI child nodes */
1571 	status = ice_sched_update_vsi_child_nodes(pi, vsi_handle, tc, maxqs,
1572 						  owner);
1573 	if (status)
1574 		return status;
1575 
1576 	/* TC is enabled, resume the VSI if it is in the suspend state */
1577 	if (!vsi_node->in_use) {
1578 		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1579 
1580 		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
1581 		if (!status)
1582 			vsi_node->in_use = true;
1583 	}
1584 
1585 	return status;
1586 }
1587 
1588 /**
1589  * ice_sched_rm_agg_vsi_entry - remove agg related VSI info entry
1590  * @pi: port information structure
1591  * @vsi_handle: software VSI handle
1592  *
1593  * This function removes single aggregator VSI info entry from
1594  * aggregator list.
1595  */
1596 static void
1597 ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
1598 {
1599 	struct ice_sched_agg_info *agg_info;
1600 	struct ice_sched_agg_info *atmp;
1601 
1602 	list_for_each_entry_safe(agg_info, atmp, &pi->hw->agg_list,
1603 				 list_entry) {
1604 		struct ice_sched_agg_vsi_info *agg_vsi_info;
1605 		struct ice_sched_agg_vsi_info *vtmp;
1606 
1607 		list_for_each_entry_safe(agg_vsi_info, vtmp,
1608 					 &agg_info->agg_vsi_list, list_entry)
1609 			if (agg_vsi_info->vsi_handle == vsi_handle) {
1610 				list_del(&agg_vsi_info->list_entry);
1611 				devm_kfree(ice_hw_to_dev(pi->hw),
1612 					   agg_vsi_info);
1613 				return;
1614 			}
1615 	}
1616 }
1617 
1618 /**
1619  * ice_sched_is_leaf_node_present - check for a leaf node in the sub-tree
1620  * @node: pointer to the sub-tree node
1621  *
1622  * This function checks for a leaf node presence in a given sub-tree node.
1623  */
1624 static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
1625 {
1626 	u8 i;
1627 
1628 	for (i = 0; i < node->num_children; i++)
1629 		if (ice_sched_is_leaf_node_present(node->children[i]))
1630 			return true;
1631 	/* check for a leaf node */
1632 	return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF);
1633 }
1634 
1635 /**
1636  * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
1637  * @pi: port information structure
1638  * @vsi_handle: software VSI handle
1639  * @owner: LAN or RDMA
1640  *
1641  * This function removes the VSI and its LAN or RDMA children nodes from the
1642  * scheduler tree.
1643  */
1644 static enum ice_status
1645 ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
1646 {
1647 	enum ice_status status = ICE_ERR_PARAM;
1648 	struct ice_vsi_ctx *vsi_ctx;
1649 	u8 i, j = 0;
1650 
1651 	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
1652 		return status;
1653 	mutex_lock(&pi->sched_lock);
1654 	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
1655 	if (!vsi_ctx)
1656 		goto exit_sched_rm_vsi_cfg;
1657 
1658 	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
1659 		struct ice_sched_node *vsi_node, *tc_node;
1660 
1661 		tc_node = ice_sched_get_tc_node(pi, i);
1662 		if (!tc_node)
1663 			continue;
1664 
1665 		vsi_node = ice_sched_get_vsi_node(pi->hw, tc_node, vsi_handle);
1666 		if (!vsi_node)
1667 			continue;
1668 
1669 		if (ice_sched_is_leaf_node_present(vsi_node)) {
1670 			ice_debug(pi->hw, ICE_DBG_SCHED,
1671 				  "VSI has leaf nodes in TC %d\n", i);
1672 			status = ICE_ERR_IN_USE;
1673 			goto exit_sched_rm_vsi_cfg;
1674 		}
1675 		while (j < vsi_node->num_children) {
1676 			if (vsi_node->children[j]->owner == owner) {
1677 				ice_free_sched_node(pi, vsi_node->children[j]);
1678 
1679 				/* reset the counter again since the num
1680 				 * children will be updated after node removal
1681 				 */
1682 				j = 0;
1683 			} else {
1684 				j++;
1685 			}
1686 		}
1687 		/* remove the VSI if it has no children */
1688 		if (!vsi_node->num_children) {
1689 			ice_free_sched_node(pi, vsi_node);
1690 			vsi_ctx->sched.vsi_node[i] = NULL;
1691 
1692 			/* clean up agg related vsi info if any */
1693 			ice_sched_rm_agg_vsi_info(pi, vsi_handle);
1694 		}
1695 		if (owner == ICE_SCHED_NODE_OWNER_LAN)
1696 			vsi_ctx->sched.max_lanq[i] = 0;
1697 	}
1698 	status = 0;
1699 
1700 exit_sched_rm_vsi_cfg:
1701 	mutex_unlock(&pi->sched_lock);
1702 	return status;
1703 }
1704 
1705 /**
1706  * ice_rm_vsi_lan_cfg - remove VSI and its LAN children nodes
1707  * @pi: port information structure
1708  * @vsi_handle: software VSI handle
1709  *
1710  * This function clears the VSI and its LAN children nodes from scheduler tree
1711  * for all TCs.
1712  */
1713 enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
1714 {
1715 	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
1716 }
1717