xref: /openbmc/linux/drivers/net/ethernet/intel/ice/ice_sched.c (revision 8631f940b81bf0da3d375fce166d381fa8c47bb2)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, Intel Corporation. */
3 
4 #include "ice_sched.h"
5 
6 /**
7  * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
8  * @pi: port information structure
9  * @info: Scheduler element information from firmware
10  *
11  * This function inserts the root node of the scheduling tree topology
12  * to the SW DB.
13  */
14 static enum ice_status
15 ice_sched_add_root_node(struct ice_port_info *pi,
16 			struct ice_aqc_txsched_elem_data *info)
17 {
18 	struct ice_sched_node *root;
19 	struct ice_hw *hw;
20 
21 	if (!pi)
22 		return ICE_ERR_PARAM;
23 
24 	hw = pi->hw;
25 
26 	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
27 	if (!root)
28 		return ICE_ERR_NO_MEMORY;
29 
30 	/* coverity[suspicious_sizeof] */
31 	root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
32 				      sizeof(*root), GFP_KERNEL);
33 	if (!root->children) {
34 		devm_kfree(ice_hw_to_dev(hw), root);
35 		return ICE_ERR_NO_MEMORY;
36 	}
37 
38 	memcpy(&root->info, info, sizeof(*info));
39 	pi->root = root;
40 	return 0;
41 }
42 
43 /**
44  * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
45  * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
46  * @teid: node teid to search
47  *
48  * This function searches for a node matching the teid in the scheduling tree
49  * from the SW DB. The search is recursive and is restricted by the number of
50  * layers it has searched through; stopping at the max supported layer.
51  *
52  * This function needs to be called when holding the port_info->sched_lock
53  */
54 struct ice_sched_node *
55 ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
56 {
57 	u16 i;
58 
59 	/* The TEID is same as that of the start_node */
60 	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
61 		return start_node;
62 
63 	/* The node has no children or is at the max layer */
64 	if (!start_node->num_children ||
65 	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
66 	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
67 		return NULL;
68 
69 	/* Check if teid matches to any of the children nodes */
70 	for (i = 0; i < start_node->num_children; i++)
71 		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
72 			return start_node->children[i];
73 
74 	/* Search within each child's sub-tree */
75 	for (i = 0; i < start_node->num_children; i++) {
76 		struct ice_sched_node *tmp;
77 
78 		tmp = ice_sched_find_node_by_teid(start_node->children[i],
79 						  teid);
80 		if (tmp)
81 			return tmp;
82 	}
83 
84 	return NULL;
85 }
86 
87 /**
88  * ice_aq_query_sched_elems - query scheduler elements
89  * @hw: pointer to the hw struct
90  * @elems_req: number of elements to query
91  * @buf: pointer to buffer
92  * @buf_size: buffer size in bytes
93  * @elems_ret: returns total number of elements returned
94  * @cd: pointer to command details structure or NULL
95  *
96  * Query scheduling elements (0x0404)
97  */
98 static enum ice_status
99 ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
100 			 struct ice_aqc_get_elem *buf, u16 buf_size,
101 			 u16 *elems_ret, struct ice_sq_cd *cd)
102 {
103 	struct ice_aqc_get_cfg_elem *cmd;
104 	struct ice_aq_desc desc;
105 	enum ice_status status;
106 
107 	cmd = &desc.params.get_update_elem;
108 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sched_elems);
109 	cmd->num_elem_req = cpu_to_le16(elems_req);
110 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
111 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
112 	if (!status && elems_ret)
113 		*elems_ret = le16_to_cpu(cmd->num_elem_resp);
114 
115 	return status;
116 }
117 
118 /**
119  * ice_sched_query_elem - query element information from hw
120  * @hw: pointer to the hw struct
121  * @node_teid: node teid to be queried
122  * @buf: buffer to element information
123  *
124  * This function queries HW element information
125  */
126 static enum ice_status
127 ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
128 		     struct ice_aqc_get_elem *buf)
129 {
130 	u16 buf_size, num_elem_ret = 0;
131 	enum ice_status status;
132 
133 	buf_size = sizeof(*buf);
134 	memset(buf, 0, buf_size);
135 	buf->generic[0].node_teid = cpu_to_le32(node_teid);
136 	status = ice_aq_query_sched_elems(hw, 1, buf, buf_size, &num_elem_ret,
137 					  NULL);
138 	if (status || num_elem_ret != 1)
139 		ice_debug(hw, ICE_DBG_SCHED, "query element failed\n");
140 	return status;
141 }
142 
143 /**
144  * ice_sched_add_node - Insert the Tx scheduler node in SW DB
145  * @pi: port information structure
146  * @layer: Scheduler layer of the node
147  * @info: Scheduler element information from firmware
148  *
149  * This function inserts a scheduler node to the SW DB.
150  */
151 enum ice_status
152 ice_sched_add_node(struct ice_port_info *pi, u8 layer,
153 		   struct ice_aqc_txsched_elem_data *info)
154 {
155 	struct ice_sched_node *parent;
156 	struct ice_aqc_get_elem elem;
157 	struct ice_sched_node *node;
158 	enum ice_status status;
159 	struct ice_hw *hw;
160 
161 	if (!pi)
162 		return ICE_ERR_PARAM;
163 
164 	hw = pi->hw;
165 
166 	/* A valid parent node should be there */
167 	parent = ice_sched_find_node_by_teid(pi->root,
168 					     le32_to_cpu(info->parent_teid));
169 	if (!parent) {
170 		ice_debug(hw, ICE_DBG_SCHED,
171 			  "Parent Node not found for parent_teid=0x%x\n",
172 			  le32_to_cpu(info->parent_teid));
173 		return ICE_ERR_PARAM;
174 	}
175 
176 	/* query the current node information from FW  before additing it
177 	 * to the SW DB
178 	 */
179 	status = ice_sched_query_elem(hw, le32_to_cpu(info->node_teid), &elem);
180 	if (status)
181 		return status;
182 
183 	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
184 	if (!node)
185 		return ICE_ERR_NO_MEMORY;
186 	if (hw->max_children[layer]) {
187 		/* coverity[suspicious_sizeof] */
188 		node->children = devm_kcalloc(ice_hw_to_dev(hw),
189 					      hw->max_children[layer],
190 					      sizeof(*node), GFP_KERNEL);
191 		if (!node->children) {
192 			devm_kfree(ice_hw_to_dev(hw), node);
193 			return ICE_ERR_NO_MEMORY;
194 		}
195 	}
196 
197 	node->in_use = true;
198 	node->parent = parent;
199 	node->tx_sched_layer = layer;
200 	parent->children[parent->num_children++] = node;
201 	memcpy(&node->info, &elem.generic[0], sizeof(node->info));
202 	return 0;
203 }
204 
205 /**
206  * ice_aq_delete_sched_elems - delete scheduler elements
207  * @hw: pointer to the hw struct
208  * @grps_req: number of groups to delete
209  * @buf: pointer to buffer
210  * @buf_size: buffer size in bytes
211  * @grps_del: returns total number of elements deleted
212  * @cd: pointer to command details structure or NULL
213  *
214  * Delete scheduling elements (0x040F)
215  */
216 static enum ice_status
217 ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
218 			  struct ice_aqc_delete_elem *buf, u16 buf_size,
219 			  u16 *grps_del, struct ice_sq_cd *cd)
220 {
221 	struct ice_aqc_add_move_delete_elem *cmd;
222 	struct ice_aq_desc desc;
223 	enum ice_status status;
224 
225 	cmd = &desc.params.add_move_delete_elem;
226 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_delete_sched_elems);
227 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
228 	cmd->num_grps_req = cpu_to_le16(grps_req);
229 
230 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
231 	if (!status && grps_del)
232 		*grps_del = le16_to_cpu(cmd->num_grps_updated);
233 
234 	return status;
235 }
236 
237 /**
238  * ice_sched_remove_elems - remove nodes from hw
239  * @hw: pointer to the hw struct
240  * @parent: pointer to the parent node
241  * @num_nodes: number of nodes
242  * @node_teids: array of node teids to be deleted
243  *
244  * This function remove nodes from hw
245  */
246 static enum ice_status
247 ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
248 		       u16 num_nodes, u32 *node_teids)
249 {
250 	struct ice_aqc_delete_elem *buf;
251 	u16 i, num_groups_removed = 0;
252 	enum ice_status status;
253 	u16 buf_size;
254 
255 	buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1);
256 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
257 	if (!buf)
258 		return ICE_ERR_NO_MEMORY;
259 
260 	buf->hdr.parent_teid = parent->info.node_teid;
261 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
262 	for (i = 0; i < num_nodes; i++)
263 		buf->teid[i] = cpu_to_le32(node_teids[i]);
264 
265 	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
266 					   &num_groups_removed, NULL);
267 	if (status || num_groups_removed != 1)
268 		ice_debug(hw, ICE_DBG_SCHED, "remove elements failed\n");
269 
270 	devm_kfree(ice_hw_to_dev(hw), buf);
271 	return status;
272 }
273 
274 /**
275  * ice_sched_get_first_node - get the first node of the given layer
276  * @hw: pointer to the hw struct
277  * @parent: pointer the base node of the subtree
278  * @layer: layer number
279  *
280  * This function retrieves the first node of the given layer from the subtree
281  */
282 static struct ice_sched_node *
283 ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent,
284 			 u8 layer)
285 {
286 	u8 i;
287 
288 	if (layer < hw->sw_entry_point_layer)
289 		return NULL;
290 	for (i = 0; i < parent->num_children; i++) {
291 		struct ice_sched_node *node = parent->children[i];
292 
293 		if (node) {
294 			if (node->tx_sched_layer == layer)
295 				return node;
296 			/* this recursion is intentional, and wouldn't
297 			 * go more than 9 calls
298 			 */
299 			return ice_sched_get_first_node(hw, node, layer);
300 		}
301 	}
302 	return NULL;
303 }
304 
305 /**
306  * ice_sched_get_tc_node - get pointer to TC node
307  * @pi: port information structure
308  * @tc: TC number
309  *
310  * This function returns the TC node pointer
311  */
312 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
313 {
314 	u8 i;
315 
316 	if (!pi)
317 		return NULL;
318 	for (i = 0; i < pi->root->num_children; i++)
319 		if (pi->root->children[i]->tc_num == tc)
320 			return pi->root->children[i];
321 	return NULL;
322 }
323 
324 /**
325  * ice_free_sched_node - Free a Tx scheduler node from SW DB
326  * @pi: port information structure
327  * @node: pointer to the ice_sched_node struct
328  *
329  * This function frees up a node from SW DB as well as from HW
330  *
331  * This function needs to be called with the port_info->sched_lock held
332  */
333 void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
334 {
335 	struct ice_sched_node *parent;
336 	struct ice_hw *hw = pi->hw;
337 	u8 i, j;
338 
339 	/* Free the children before freeing up the parent node
340 	 * The parent array is updated below and that shifts the nodes
341 	 * in the array. So always pick the first child if num children > 0
342 	 */
343 	while (node->num_children)
344 		ice_free_sched_node(pi, node->children[0]);
345 
346 	/* Leaf, TC and root nodes can't be deleted by SW */
347 	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
348 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
349 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
350 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
351 		u32 teid = le32_to_cpu(node->info.node_teid);
352 		enum ice_status status;
353 
354 		status = ice_sched_remove_elems(hw, node->parent, 1, &teid);
355 		if (status)
356 			ice_debug(hw, ICE_DBG_SCHED,
357 				  "remove element failed %d\n", status);
358 	}
359 	parent = node->parent;
360 	/* root has no parent */
361 	if (parent) {
362 		struct ice_sched_node *p, *tc_node;
363 
364 		/* update the parent */
365 		for (i = 0; i < parent->num_children; i++)
366 			if (parent->children[i] == node) {
367 				for (j = i + 1; j < parent->num_children; j++)
368 					parent->children[j - 1] =
369 						parent->children[j];
370 				parent->num_children--;
371 				break;
372 			}
373 
374 		/* search for previous sibling that points to this node and
375 		 * remove the reference
376 		 */
377 		tc_node = ice_sched_get_tc_node(pi, node->tc_num);
378 		if (!tc_node) {
379 			ice_debug(hw, ICE_DBG_SCHED,
380 				  "Invalid TC number %d\n", node->tc_num);
381 			goto err_exit;
382 		}
383 		p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer);
384 		while (p) {
385 			if (p->sibling == node) {
386 				p->sibling = node->sibling;
387 				break;
388 			}
389 			p = p->sibling;
390 		}
391 	}
392 err_exit:
393 	/* leaf nodes have no children */
394 	if (node->children)
395 		devm_kfree(ice_hw_to_dev(hw), node->children);
396 	devm_kfree(ice_hw_to_dev(hw), node);
397 }
398 
399 /**
400  * ice_aq_get_dflt_topo - gets default scheduler topology
401  * @hw: pointer to the hw struct
402  * @lport: logical port number
403  * @buf: pointer to buffer
404  * @buf_size: buffer size in bytes
405  * @num_branches: returns total number of queue to port branches
406  * @cd: pointer to command details structure or NULL
407  *
408  * Get default scheduler topology (0x400)
409  */
410 static enum ice_status
411 ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
412 		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
413 		     u8 *num_branches, struct ice_sq_cd *cd)
414 {
415 	struct ice_aqc_get_topo *cmd;
416 	struct ice_aq_desc desc;
417 	enum ice_status status;
418 
419 	cmd = &desc.params.get_topo;
420 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
421 	cmd->port_num = lport;
422 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
423 	if (!status && num_branches)
424 		*num_branches = cmd->num_branches;
425 
426 	return status;
427 }
428 
429 /**
430  * ice_aq_add_sched_elems - adds scheduling element
431  * @hw: pointer to the hw struct
432  * @grps_req: the number of groups that are requested to be added
433  * @buf: pointer to buffer
434  * @buf_size: buffer size in bytes
435  * @grps_added: returns total number of groups added
436  * @cd: pointer to command details structure or NULL
437  *
438  * Add scheduling elements (0x0401)
439  */
440 static enum ice_status
441 ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
442 		       struct ice_aqc_add_elem *buf, u16 buf_size,
443 		       u16 *grps_added, struct ice_sq_cd *cd)
444 {
445 	struct ice_aqc_add_move_delete_elem *cmd;
446 	struct ice_aq_desc desc;
447 	enum ice_status status;
448 
449 	cmd = &desc.params.add_move_delete_elem;
450 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_sched_elems);
451 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
452 
453 	cmd->num_grps_req = cpu_to_le16(grps_req);
454 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
455 	if (!status && grps_added)
456 		*grps_added = le16_to_cpu(cmd->num_grps_updated);
457 
458 	return status;
459 }
460 
461 /**
462  * ice_suspend_resume_elems - suspend/resume scheduler elements
463  * @hw: pointer to the hw struct
464  * @elems_req: number of elements to suspend
465  * @buf: pointer to buffer
466  * @buf_size: buffer size in bytes
467  * @elems_ret: returns total number of elements suspended
468  * @cd: pointer to command details structure or NULL
469  * @cmd_code: command code for suspend or resume
470  *
471  * suspend/resume scheduler elements
472  */
473 static enum ice_status
474 ice_suspend_resume_elems(struct ice_hw *hw, u16 elems_req,
475 			 struct ice_aqc_suspend_resume_elem *buf, u16 buf_size,
476 			 u16 *elems_ret, struct ice_sq_cd *cd,
477 			 enum ice_adminq_opc cmd_code)
478 {
479 	struct ice_aqc_get_cfg_elem *cmd;
480 	struct ice_aq_desc desc;
481 	enum ice_status status;
482 
483 	cmd = &desc.params.get_update_elem;
484 	ice_fill_dflt_direct_cmd_desc(&desc, cmd_code);
485 	cmd->num_elem_req = cpu_to_le16(elems_req);
486 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
487 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
488 	if (!status && elems_ret)
489 		*elems_ret = le16_to_cpu(cmd->num_elem_resp);
490 	return status;
491 }
492 
493 /**
494  * ice_aq_suspend_sched_elems - suspend scheduler elements
495  * @hw: pointer to the hw struct
496  * @elems_req: number of elements to suspend
497  * @buf: pointer to buffer
498  * @buf_size: buffer size in bytes
499  * @elems_ret: returns total number of elements suspended
500  * @cd: pointer to command details structure or NULL
501  *
502  * Suspend scheduling elements (0x0409)
503  */
504 static enum ice_status
505 ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req,
506 			   struct ice_aqc_suspend_resume_elem *buf,
507 			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
508 {
509 	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
510 					cd, ice_aqc_opc_suspend_sched_elems);
511 }
512 
513 /**
514  * ice_aq_resume_sched_elems - resume scheduler elements
515  * @hw: pointer to the hw struct
516  * @elems_req: number of elements to resume
517  * @buf: pointer to buffer
518  * @buf_size: buffer size in bytes
519  * @elems_ret: returns total number of elements resumed
520  * @cd: pointer to command details structure or NULL
521  *
522  * resume scheduling elements (0x040A)
523  */
524 static enum ice_status
525 ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req,
526 			  struct ice_aqc_suspend_resume_elem *buf,
527 			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
528 {
529 	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
530 					cd, ice_aqc_opc_resume_sched_elems);
531 }
532 
533 /**
534  * ice_aq_query_sched_res - query scheduler resource
535  * @hw: pointer to the hw struct
536  * @buf_size: buffer size in bytes
537  * @buf: pointer to buffer
538  * @cd: pointer to command details structure or NULL
539  *
540  * Query scheduler resource allocation (0x0412)
541  */
542 static enum ice_status
543 ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
544 		       struct ice_aqc_query_txsched_res_resp *buf,
545 		       struct ice_sq_cd *cd)
546 {
547 	struct ice_aq_desc desc;
548 
549 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
550 	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
551 }
552 
553 /**
554  * ice_sched_suspend_resume_elems - suspend or resume hw nodes
555  * @hw: pointer to the hw struct
556  * @num_nodes: number of nodes
557  * @node_teids: array of node teids to be suspended or resumed
558  * @suspend: true means suspend / false means resume
559  *
560  * This function suspends or resumes hw nodes
561  */
562 static enum ice_status
563 ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
564 			       bool suspend)
565 {
566 	struct ice_aqc_suspend_resume_elem *buf;
567 	u16 i, buf_size, num_elem_ret = 0;
568 	enum ice_status status;
569 
570 	buf_size = sizeof(*buf) * num_nodes;
571 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
572 	if (!buf)
573 		return ICE_ERR_NO_MEMORY;
574 
575 	for (i = 0; i < num_nodes; i++)
576 		buf->teid[i] = cpu_to_le32(node_teids[i]);
577 
578 	if (suspend)
579 		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
580 						    buf_size, &num_elem_ret,
581 						    NULL);
582 	else
583 		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
584 						   buf_size, &num_elem_ret,
585 						   NULL);
586 	if (status || num_elem_ret != num_nodes)
587 		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
588 
589 	devm_kfree(ice_hw_to_dev(hw), buf);
590 	return status;
591 }
592 
593 /**
594  * ice_sched_clear_tx_topo - clears the schduler tree nodes
595  * @pi: port information structure
596  *
597  * This function removes all the nodes from HW as well as from SW DB.
598  */
599 static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
600 {
601 	struct ice_sched_agg_info *agg_info;
602 	struct ice_sched_agg_info *atmp;
603 	struct ice_hw *hw;
604 
605 	if (!pi)
606 		return;
607 
608 	hw = pi->hw;
609 
610 	list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) {
611 		struct ice_sched_agg_vsi_info *agg_vsi_info;
612 		struct ice_sched_agg_vsi_info *vtmp;
613 
614 		list_for_each_entry_safe(agg_vsi_info, vtmp,
615 					 &agg_info->agg_vsi_list, list_entry) {
616 			list_del(&agg_vsi_info->list_entry);
617 			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
618 		}
619 	}
620 
621 	if (pi->root) {
622 		ice_free_sched_node(pi, pi->root);
623 		pi->root = NULL;
624 	}
625 }
626 
627 /**
628  * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
629  * @pi: port information structure
630  *
631  * Cleanup scheduling elements from SW DB
632  */
633 void ice_sched_clear_port(struct ice_port_info *pi)
634 {
635 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
636 		return;
637 
638 	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
639 	mutex_lock(&pi->sched_lock);
640 	ice_sched_clear_tx_topo(pi);
641 	mutex_unlock(&pi->sched_lock);
642 	mutex_destroy(&pi->sched_lock);
643 }
644 
645 /**
646  * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
647  * @hw: pointer to the hw struct
648  *
649  * Cleanup scheduling elements from SW DB for all the ports
650  */
651 void ice_sched_cleanup_all(struct ice_hw *hw)
652 {
653 	if (!hw)
654 		return;
655 
656 	if (hw->layer_info) {
657 		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
658 		hw->layer_info = NULL;
659 	}
660 
661 	if (hw->port_info)
662 		ice_sched_clear_port(hw->port_info);
663 
664 	hw->num_tx_sched_layers = 0;
665 	hw->num_tx_sched_phys_layers = 0;
666 	hw->flattened_layers = 0;
667 	hw->max_cgds = 0;
668 }
669 
670 /**
671  * ice_sched_add_elems - add nodes to hw and SW DB
672  * @pi: port information structure
673  * @tc_node: pointer to the branch node
674  * @parent: pointer to the parent node
675  * @layer: layer number to add nodes
676  * @num_nodes: number of nodes
677  * @num_nodes_added: pointer to num nodes added
678  * @first_node_teid: if new nodes are added then return the teid of first node
679  *
680  * This function add nodes to hw as well as to SW DB for a given layer
681  */
682 static enum ice_status
683 ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
684 		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
685 		    u16 *num_nodes_added, u32 *first_node_teid)
686 {
687 	struct ice_sched_node *prev, *new_node;
688 	struct ice_aqc_add_elem *buf;
689 	u16 i, num_groups_added = 0;
690 	enum ice_status status = 0;
691 	struct ice_hw *hw = pi->hw;
692 	u16 buf_size;
693 	u32 teid;
694 
695 	buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
696 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
697 	if (!buf)
698 		return ICE_ERR_NO_MEMORY;
699 
700 	buf->hdr.parent_teid = parent->info.node_teid;
701 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
702 	for (i = 0; i < num_nodes; i++) {
703 		buf->generic[i].parent_teid = parent->info.node_teid;
704 		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
705 		buf->generic[i].data.valid_sections =
706 			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
707 			ICE_AQC_ELEM_VALID_EIR;
708 		buf->generic[i].data.generic = 0;
709 		buf->generic[i].data.cir_bw.bw_profile_idx =
710 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
711 		buf->generic[i].data.cir_bw.bw_alloc =
712 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
713 		buf->generic[i].data.eir_bw.bw_profile_idx =
714 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
715 		buf->generic[i].data.eir_bw.bw_alloc =
716 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
717 	}
718 
719 	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
720 					&num_groups_added, NULL);
721 	if (status || num_groups_added != 1) {
722 		ice_debug(hw, ICE_DBG_SCHED, "add elements failed\n");
723 		devm_kfree(ice_hw_to_dev(hw), buf);
724 		return ICE_ERR_CFG;
725 	}
726 
727 	*num_nodes_added = num_nodes;
728 	/* add nodes to the SW DB */
729 	for (i = 0; i < num_nodes; i++) {
730 		status = ice_sched_add_node(pi, layer, &buf->generic[i]);
731 		if (status) {
732 			ice_debug(hw, ICE_DBG_SCHED,
733 				  "add nodes in SW DB failed status =%d\n",
734 				  status);
735 			break;
736 		}
737 
738 		teid = le32_to_cpu(buf->generic[i].node_teid);
739 		new_node = ice_sched_find_node_by_teid(parent, teid);
740 		if (!new_node) {
741 			ice_debug(hw, ICE_DBG_SCHED,
742 				  "Node is missing for teid =%d\n", teid);
743 			break;
744 		}
745 
746 		new_node->sibling = NULL;
747 		new_node->tc_num = tc_node->tc_num;
748 
749 		/* add it to previous node sibling pointer */
750 		/* Note: siblings are not linked across branches */
751 		prev = ice_sched_get_first_node(hw, tc_node, layer);
752 		if (prev && prev != new_node) {
753 			while (prev->sibling)
754 				prev = prev->sibling;
755 			prev->sibling = new_node;
756 		}
757 
758 		if (i == 0)
759 			*first_node_teid = teid;
760 	}
761 
762 	devm_kfree(ice_hw_to_dev(hw), buf);
763 	return status;
764 }
765 
766 /**
767  * ice_sched_add_nodes_to_layer - Add nodes to a given layer
768  * @pi: port information structure
769  * @tc_node: pointer to TC node
770  * @parent: pointer to parent node
771  * @layer: layer number to add nodes
772  * @num_nodes: number of nodes to be added
773  * @first_node_teid: pointer to the first node teid
774  * @num_nodes_added: pointer to number of nodes added
775  *
776  * This function add nodes to a given layer.
777  */
778 static enum ice_status
779 ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
780 			     struct ice_sched_node *tc_node,
781 			     struct ice_sched_node *parent, u8 layer,
782 			     u16 num_nodes, u32 *first_node_teid,
783 			     u16 *num_nodes_added)
784 {
785 	u32 *first_teid_ptr = first_node_teid;
786 	u16 new_num_nodes, max_child_nodes;
787 	enum ice_status status = 0;
788 	struct ice_hw *hw = pi->hw;
789 	u16 num_added = 0;
790 	u32 temp;
791 
792 	*num_nodes_added = 0;
793 
794 	if (!num_nodes)
795 		return status;
796 
797 	if (!parent || layer < hw->sw_entry_point_layer)
798 		return ICE_ERR_PARAM;
799 
800 	/* max children per node per layer */
801 	max_child_nodes = hw->max_children[parent->tx_sched_layer];
802 
803 	/* current number of children + required nodes exceed max children ? */
804 	if ((parent->num_children + num_nodes) > max_child_nodes) {
805 		/* Fail if the parent is a TC node */
806 		if (parent == tc_node)
807 			return ICE_ERR_CFG;
808 
809 		/* utilize all the spaces if the parent is not full */
810 		if (parent->num_children < max_child_nodes) {
811 			new_num_nodes = max_child_nodes - parent->num_children;
812 			/* this recursion is intentional, and wouldn't
813 			 * go more than 2 calls
814 			 */
815 			status = ice_sched_add_nodes_to_layer(pi, tc_node,
816 							      parent, layer,
817 							      new_num_nodes,
818 							      first_node_teid,
819 							      &num_added);
820 			if (status)
821 				return status;
822 
823 			*num_nodes_added += num_added;
824 		}
825 		/* Don't modify the first node teid memory if the first node was
826 		 * added already in the above call. Instead send some temp
827 		 * memory for all other recursive calls.
828 		 */
829 		if (num_added)
830 			first_teid_ptr = &temp;
831 
832 		new_num_nodes = num_nodes - num_added;
833 
834 		/* This parent is full, try the next sibling */
835 		parent = parent->sibling;
836 
837 		/* this recursion is intentional, for 1024 queues
838 		 * per VSI, it goes max of 16 iterations.
839 		 * 1024 / 8 = 128 layer 8 nodes
840 		 * 128 /8 = 16 (add 8 nodes per iteration)
841 		 */
842 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
843 						      layer, new_num_nodes,
844 						      first_teid_ptr,
845 						      &num_added);
846 		*num_nodes_added += num_added;
847 		return status;
848 	}
849 
850 	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
851 				     num_nodes_added, first_node_teid);
852 	return status;
853 }
854 
855 /**
856  * ice_sched_get_qgrp_layer - get the current queue group layer number
857  * @hw: pointer to the hw struct
858  *
859  * This function returns the current queue group layer number
860  */
861 static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
862 {
863 	/* It's always total layers - 1, the array is 0 relative so -2 */
864 	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
865 }
866 
867 /**
868  * ice_sched_get_vsi_layer - get the current VSI layer number
869  * @hw: pointer to the hw struct
870  *
871  * This function returns the current VSI layer number
872  */
873 static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
874 {
875 	/* Num Layers       VSI layer
876 	 *     9               6
877 	 *     7               4
878 	 *     5 or less       sw_entry_point_layer
879 	 */
880 	/* calculate the vsi layer based on number of layers. */
881 	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
882 		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
883 
884 		if (layer > hw->sw_entry_point_layer)
885 			return layer;
886 	}
887 	return hw->sw_entry_point_layer;
888 }
889 
890 /**
891  * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
892  * @pi: port information structure
893  *
894  * This function removes the leaf node that was created by the FW
895  * during initialization
896  */
897 static void ice_rm_dflt_leaf_node(struct ice_port_info *pi)
898 {
899 	struct ice_sched_node *node;
900 
901 	node = pi->root;
902 	while (node) {
903 		if (!node->num_children)
904 			break;
905 		node = node->children[0];
906 	}
907 	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
908 		u32 teid = le32_to_cpu(node->info.node_teid);
909 		enum ice_status status;
910 
911 		/* remove the default leaf node */
912 		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
913 		if (!status)
914 			ice_free_sched_node(pi, node);
915 	}
916 }
917 
918 /**
919  * ice_sched_rm_dflt_nodes - free the default nodes in the tree
920  * @pi: port information structure
921  *
922  * This function frees all the nodes except root and TC that were created by
923  * the FW during initialization
924  */
925 static void ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
926 {
927 	struct ice_sched_node *node;
928 
929 	ice_rm_dflt_leaf_node(pi);
930 
931 	/* remove the default nodes except TC and root nodes */
932 	node = pi->root;
933 	while (node) {
934 		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
935 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
936 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
937 			ice_free_sched_node(pi, node);
938 			break;
939 		}
940 
941 		if (!node->num_children)
942 			break;
943 		node = node->children[0];
944 	}
945 }
946 
947 /**
948  * ice_sched_init_port - Initialize scheduler by querying information from FW
949  * @pi: port info structure for the tree to cleanup
950  *
951  * This function is the initial call to find the total number of Tx scheduler
952  * resources, default topology created by firmware and storing the information
953  * in SW DB.
954  */
955 enum ice_status ice_sched_init_port(struct ice_port_info *pi)
956 {
957 	struct ice_aqc_get_topo_elem *buf;
958 	enum ice_status status;
959 	struct ice_hw *hw;
960 	u8 num_branches;
961 	u16 num_elems;
962 	u8 i, j;
963 
964 	if (!pi)
965 		return ICE_ERR_PARAM;
966 	hw = pi->hw;
967 
968 	/* Query the Default Topology from FW */
969 	buf = devm_kzalloc(ice_hw_to_dev(hw), ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
970 	if (!buf)
971 		return ICE_ERR_NO_MEMORY;
972 
973 	/* Query default scheduling tree topology */
974 	status = ice_aq_get_dflt_topo(hw, pi->lport, buf, ICE_AQ_MAX_BUF_LEN,
975 				      &num_branches, NULL);
976 	if (status)
977 		goto err_init_port;
978 
979 	/* num_branches should be between 1-8 */
980 	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
981 		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
982 			  num_branches);
983 		status = ICE_ERR_PARAM;
984 		goto err_init_port;
985 	}
986 
987 	/* get the number of elements on the default/first branch */
988 	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
989 
990 	/* num_elems should always be between 1-9 */
991 	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
992 		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
993 			  num_elems);
994 		status = ICE_ERR_PARAM;
995 		goto err_init_port;
996 	}
997 
998 	/* If the last node is a leaf node then the index of the Q group
999 	 * layer is two less than the number of elements.
1000 	 */
1001 	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
1002 	    ICE_AQC_ELEM_TYPE_LEAF)
1003 		pi->last_node_teid =
1004 			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
1005 	else
1006 		pi->last_node_teid =
1007 			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
1008 
1009 	/* Insert the Tx Sched root node */
1010 	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
1011 	if (status)
1012 		goto err_init_port;
1013 
1014 	/* Parse the default tree and cache the information */
1015 	for (i = 0; i < num_branches; i++) {
1016 		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
1017 
1018 		/* Skip root element as already inserted */
1019 		for (j = 1; j < num_elems; j++) {
1020 			/* update the sw entry point */
1021 			if (buf[0].generic[j].data.elem_type ==
1022 			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
1023 				hw->sw_entry_point_layer = j;
1024 
1025 			status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
1026 			if (status)
1027 				goto err_init_port;
1028 		}
1029 	}
1030 
1031 	/* Remove the default nodes. */
1032 	if (pi->root)
1033 		ice_sched_rm_dflt_nodes(pi);
1034 
1035 	/* initialize the port for handling the scheduler tree */
1036 	pi->port_state = ICE_SCHED_PORT_STATE_READY;
1037 	mutex_init(&pi->sched_lock);
1038 	INIT_LIST_HEAD(&pi->agg_list);
1039 
1040 err_init_port:
1041 	if (status && pi->root) {
1042 		ice_free_sched_node(pi, pi->root);
1043 		pi->root = NULL;
1044 	}
1045 
1046 	devm_kfree(ice_hw_to_dev(hw), buf);
1047 	return status;
1048 }
1049 
1050 /**
1051  * ice_sched_query_res_alloc - query the FW for num of logical sched layers
1052  * @hw: pointer to the HW struct
1053  *
1054  * query FW for allocated scheduler resources and store in HW struct
1055  */
1056 enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
1057 {
1058 	struct ice_aqc_query_txsched_res_resp *buf;
1059 	enum ice_status status = 0;
1060 	__le16 max_sibl;
1061 	u8 i;
1062 
1063 	if (hw->layer_info)
1064 		return status;
1065 
1066 	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
1067 	if (!buf)
1068 		return ICE_ERR_NO_MEMORY;
1069 
1070 	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
1071 	if (status)
1072 		goto sched_query_out;
1073 
1074 	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
1075 	hw->num_tx_sched_phys_layers =
1076 		le16_to_cpu(buf->sched_props.phys_levels);
1077 	hw->flattened_layers = buf->sched_props.flattening_bitmap;
1078 	hw->max_cgds = buf->sched_props.max_pf_cgds;
1079 
1080 	/* max sibling group size of current layer refers to the max children
1081 	 * of the below layer node.
1082 	 * layer 1 node max children will be layer 2 max sibling group size
1083 	 * layer 2 node max children will be layer 3 max sibling group size
1084 	 * and so on. This array will be populated from root (index 0) to
1085 	 * qgroup layer 7. Leaf node has no children.
1086 	 */
1087 	for (i = 0; i < hw->num_tx_sched_layers; i++) {
1088 		max_sibl = buf->layer_props[i].max_sibl_grp_sz;
1089 		hw->max_children[i] = le16_to_cpu(max_sibl);
1090 	}
1091 
1092 	hw->layer_info = (struct ice_aqc_layer_props *)
1093 			  devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
1094 				       (hw->num_tx_sched_layers *
1095 					sizeof(*hw->layer_info)),
1096 				       GFP_KERNEL);
1097 	if (!hw->layer_info) {
1098 		status = ICE_ERR_NO_MEMORY;
1099 		goto sched_query_out;
1100 	}
1101 
1102 sched_query_out:
1103 	devm_kfree(ice_hw_to_dev(hw), buf);
1104 	return status;
1105 }
1106 
1107 /**
1108  * ice_sched_find_node_in_subtree - Find node in part of base node subtree
1109  * @hw: pointer to the hw struct
1110  * @base: pointer to the base node
1111  * @node: pointer to the node to search
1112  *
1113  * This function checks whether a given node is part of the base node
1114  * subtree or not
1115  */
1116 static bool
1117 ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
1118 			       struct ice_sched_node *node)
1119 {
1120 	u8 i;
1121 
1122 	for (i = 0; i < base->num_children; i++) {
1123 		struct ice_sched_node *child = base->children[i];
1124 
1125 		if (node == child)
1126 			return true;
1127 
1128 		if (child->tx_sched_layer > node->tx_sched_layer)
1129 			return false;
1130 
1131 		/* this recursion is intentional, and wouldn't
1132 		 * go more than 8 calls
1133 		 */
1134 		if (ice_sched_find_node_in_subtree(hw, child, node))
1135 			return true;
1136 	}
1137 	return false;
1138 }
1139 
1140 /**
1141  * ice_sched_get_free_qparent - Get a free lan or rdma q group node
1142  * @pi: port information structure
1143  * @vsi_handle: software VSI handle
1144  * @tc: branch number
1145  * @owner: lan or rdma
1146  *
1147  * This function retrieves a free lan or rdma q group node
1148  */
1149 struct ice_sched_node *
1150 ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
1151 			   u8 owner)
1152 {
1153 	struct ice_sched_node *vsi_node, *qgrp_node = NULL;
1154 	struct ice_vsi_ctx *vsi_ctx;
1155 	u16 max_children;
1156 	u8 qgrp_layer;
1157 
1158 	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
1159 	max_children = pi->hw->max_children[qgrp_layer];
1160 
1161 	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
1162 	if (!vsi_ctx)
1163 		return NULL;
1164 	vsi_node = vsi_ctx->sched.vsi_node[tc];
1165 	/* validate invalid VSI id */
1166 	if (!vsi_node)
1167 		goto lan_q_exit;
1168 
1169 	/* get the first q group node from VSI sub-tree */
1170 	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
1171 	while (qgrp_node) {
1172 		/* make sure the qgroup node is part of the VSI subtree */
1173 		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
1174 			if (qgrp_node->num_children < max_children &&
1175 			    qgrp_node->owner == owner)
1176 				break;
1177 		qgrp_node = qgrp_node->sibling;
1178 	}
1179 
1180 lan_q_exit:
1181 	return qgrp_node;
1182 }
1183 
1184 /**
1185  * ice_sched_get_vsi_node - Get a VSI node based on VSI id
1186  * @hw: pointer to the hw struct
1187  * @tc_node: pointer to the TC node
1188  * @vsi_handle: software VSI handle
1189  *
1190  * This function retrieves a VSI node for a given VSI id from a given
1191  * TC branch
1192  */
1193 static struct ice_sched_node *
1194 ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
1195 		       u16 vsi_handle)
1196 {
1197 	struct ice_sched_node *node;
1198 	u8 vsi_layer;
1199 
1200 	vsi_layer = ice_sched_get_vsi_layer(hw);
1201 	node = ice_sched_get_first_node(hw, tc_node, vsi_layer);
1202 
1203 	/* Check whether it already exists */
1204 	while (node) {
1205 		if (node->vsi_handle == vsi_handle)
1206 			return node;
1207 		node = node->sibling;
1208 	}
1209 
1210 	return node;
1211 }
1212 
1213 /**
1214  * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
1215  * @hw: pointer to the hw struct
1216  * @num_qs: number of queues
1217  * @num_nodes: num nodes array
1218  *
1219  * This function calculates the number of VSI child nodes based on the
1220  * number of queues.
1221  */
1222 static void
1223 ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
1224 {
1225 	u16 num = num_qs;
1226 	u8 i, qgl, vsil;
1227 
1228 	qgl = ice_sched_get_qgrp_layer(hw);
1229 	vsil = ice_sched_get_vsi_layer(hw);
1230 
1231 	/* calculate num nodes from q group to VSI layer */
1232 	for (i = qgl; i > vsil; i--) {
1233 		/* round to the next integer if there is a remainder */
1234 		num = DIV_ROUND_UP(num, hw->max_children[i]);
1235 
1236 		/* need at least one node */
1237 		num_nodes[i] = num ? num : 1;
1238 	}
1239 }
1240 
1241 /**
1242  * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
1243  * @pi: port information structure
1244  * @vsi_handle: software VSI handle
1245  * @tc_node: pointer to the TC node
1246  * @num_nodes: pointer to the num nodes that needs to be added per layer
1247  * @owner: node owner (lan or rdma)
1248  *
1249  * This function adds the VSI child nodes to tree. It gets called for
1250  * lan and rdma separately.
1251  */
1252 static enum ice_status
1253 ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
1254 			      struct ice_sched_node *tc_node, u16 *num_nodes,
1255 			      u8 owner)
1256 {
1257 	struct ice_sched_node *parent, *node;
1258 	struct ice_hw *hw = pi->hw;
1259 	enum ice_status status;
1260 	u32 first_node_teid;
1261 	u16 num_added = 0;
1262 	u8 i, qgl, vsil;
1263 
1264 	qgl = ice_sched_get_qgrp_layer(hw);
1265 	vsil = ice_sched_get_vsi_layer(hw);
1266 	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1267 	for (i = vsil + 1; i <= qgl; i++) {
1268 		if (!parent)
1269 			return ICE_ERR_CFG;
1270 
1271 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
1272 						      num_nodes[i],
1273 						      &first_node_teid,
1274 						      &num_added);
1275 		if (status || num_nodes[i] != num_added)
1276 			return ICE_ERR_CFG;
1277 
1278 		/* The newly added node can be a new parent for the next
1279 		 * layer nodes
1280 		 */
1281 		if (num_added) {
1282 			parent = ice_sched_find_node_by_teid(tc_node,
1283 							     first_node_teid);
1284 			node = parent;
1285 			while (node) {
1286 				node->owner = owner;
1287 				node = node->sibling;
1288 			}
1289 		} else {
1290 			parent = parent->children[0];
1291 		}
1292 	}
1293 
1294 	return 0;
1295 }
1296 
1297 /**
1298  * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree
1299  * @pi: port information structure
1300  * @vsi_node: pointer to the VSI node
1301  * @num_nodes: pointer to the num nodes that needs to be removed per layer
1302  * @owner: node owner (lan or rdma)
1303  *
1304  * This function removes the VSI child nodes from the tree. It gets called for
1305  * lan and rdma separately.
1306  */
1307 static void
1308 ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi,
1309 			     struct ice_sched_node *vsi_node, u16 *num_nodes,
1310 			     u8 owner)
1311 {
1312 	struct ice_sched_node *node, *next;
1313 	u8 i, qgl, vsil;
1314 	u16 num;
1315 
1316 	qgl = ice_sched_get_qgrp_layer(pi->hw);
1317 	vsil = ice_sched_get_vsi_layer(pi->hw);
1318 
1319 	for (i = qgl; i > vsil; i--) {
1320 		num = num_nodes[i];
1321 		node = ice_sched_get_first_node(pi->hw, vsi_node, i);
1322 		while (node && num) {
1323 			next = node->sibling;
1324 			if (node->owner == owner && !node->num_children) {
1325 				ice_free_sched_node(pi, node);
1326 				num--;
1327 			}
1328 			node = next;
1329 		}
1330 	}
1331 }
1332 
1333 /**
1334  * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
1335  * @hw: pointer to the hw struct
1336  * @tc_node: pointer to TC node
1337  * @num_nodes: pointer to num nodes array
1338  *
1339  * This function calculates the number of supported nodes needed to add this
1340  * VSI into Tx tree including the VSI, parent and intermediate nodes in below
1341  * layers
1342  */
1343 static void
1344 ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
1345 				 struct ice_sched_node *tc_node, u16 *num_nodes)
1346 {
1347 	struct ice_sched_node *node;
1348 	u8 vsil;
1349 	int i;
1350 
1351 	vsil = ice_sched_get_vsi_layer(hw);
1352 	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
1353 		/* Add intermediate nodes if TC has no children and
1354 		 * need at least one node for VSI
1355 		 */
1356 		if (!tc_node->num_children || i == vsil) {
1357 			num_nodes[i]++;
1358 		} else {
1359 			/* If intermediate nodes are reached max children
1360 			 * then add a new one.
1361 			 */
1362 			node = ice_sched_get_first_node(hw, tc_node, (u8)i);
1363 			/* scan all the siblings */
1364 			while (node) {
1365 				if (node->num_children < hw->max_children[i])
1366 					break;
1367 				node = node->sibling;
1368 			}
1369 
1370 			/* all the nodes are full, allocate a new one */
1371 			if (!node)
1372 				num_nodes[i]++;
1373 		}
1374 }
1375 
1376 /**
1377  * ice_sched_add_vsi_support_nodes - add VSI supported nodes into Tx tree
1378  * @pi: port information structure
1379  * @vsi_handle: software VSI handle
1380  * @tc_node: pointer to TC node
1381  * @num_nodes: pointer to num nodes array
1382  *
1383  * This function adds the VSI supported nodes into Tx tree including the
1384  * VSI, its parent and intermediate nodes in below layers
1385  */
1386 static enum ice_status
1387 ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
1388 				struct ice_sched_node *tc_node, u16 *num_nodes)
1389 {
1390 	struct ice_sched_node *parent = tc_node;
1391 	enum ice_status status;
1392 	u32 first_node_teid;
1393 	u16 num_added = 0;
1394 	u8 i, vsil;
1395 
1396 	if (!pi)
1397 		return ICE_ERR_PARAM;
1398 
1399 	vsil = ice_sched_get_vsi_layer(pi->hw);
1400 	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
1401 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
1402 						      i, num_nodes[i],
1403 						      &first_node_teid,
1404 						      &num_added);
1405 		if (status || num_nodes[i] != num_added)
1406 			return ICE_ERR_CFG;
1407 
1408 		/* The newly added node can be a new parent for the next
1409 		 * layer nodes
1410 		 */
1411 		if (num_added)
1412 			parent = ice_sched_find_node_by_teid(tc_node,
1413 							     first_node_teid);
1414 		else
1415 			parent = parent->children[0];
1416 
1417 		if (!parent)
1418 			return ICE_ERR_CFG;
1419 
1420 		if (i == vsil)
1421 			parent->vsi_handle = vsi_handle;
1422 	}
1423 
1424 	return 0;
1425 }
1426 
1427 /**
1428  * ice_sched_add_vsi_to_topo - add a new VSI into tree
1429  * @pi: port information structure
1430  * @vsi_handle: software VSI handle
1431  * @tc: TC number
1432  *
1433  * This function adds a new VSI into scheduler tree
1434  */
1435 static enum ice_status
1436 ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
1437 {
1438 	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1439 	struct ice_sched_node *tc_node;
1440 	struct ice_hw *hw = pi->hw;
1441 
1442 	tc_node = ice_sched_get_tc_node(pi, tc);
1443 	if (!tc_node)
1444 		return ICE_ERR_PARAM;
1445 
1446 	/* calculate number of supported nodes needed for this VSI */
1447 	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
1448 
1449 	/* add vsi supported nodes to tc subtree */
1450 	return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
1451 					       num_nodes);
1452 }
1453 
1454 /**
1455  * ice_sched_update_vsi_child_nodes - update VSI child nodes
1456  * @pi: port information structure
1457  * @vsi_handle: software VSI handle
1458  * @tc: TC number
1459  * @new_numqs: new number of max queues
1460  * @owner: owner of this subtree
1461  *
1462  * This function updates the VSI child nodes based on the number of queues
1463  */
1464 static enum ice_status
1465 ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
1466 				 u8 tc, u16 new_numqs, u8 owner)
1467 {
1468 	u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1469 	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1470 	struct ice_sched_node *vsi_node;
1471 	struct ice_sched_node *tc_node;
1472 	struct ice_vsi_ctx *vsi_ctx;
1473 	enum ice_status status = 0;
1474 	struct ice_hw *hw = pi->hw;
1475 	u16 prev_numqs;
1476 	u8 i;
1477 
1478 	tc_node = ice_sched_get_tc_node(pi, tc);
1479 	if (!tc_node)
1480 		return ICE_ERR_CFG;
1481 
1482 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1483 	if (!vsi_node)
1484 		return ICE_ERR_CFG;
1485 
1486 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
1487 	if (!vsi_ctx)
1488 		return ICE_ERR_PARAM;
1489 
1490 	if (owner == ICE_SCHED_NODE_OWNER_LAN)
1491 		prev_numqs = vsi_ctx->sched.max_lanq[tc];
1492 	else
1493 		return ICE_ERR_PARAM;
1494 
1495 	/* num queues are not changed */
1496 	if (prev_numqs == new_numqs)
1497 		return status;
1498 
1499 	/* calculate number of nodes based on prev/new number of qs */
1500 	if (prev_numqs)
1501 		ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes);
1502 
1503 	if (new_numqs)
1504 		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
1505 
1506 	if (prev_numqs > new_numqs) {
1507 		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
1508 			new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i];
1509 
1510 		ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes,
1511 					     owner);
1512 	} else {
1513 		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
1514 			new_num_nodes[i] -= prev_num_nodes[i];
1515 
1516 		status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
1517 						       new_num_nodes, owner);
1518 		if (status)
1519 			return status;
1520 	}
1521 
1522 	vsi_ctx->sched.max_lanq[tc] = new_numqs;
1523 
1524 	return status;
1525 }
1526 
1527 /**
1528  * ice_sched_cfg_vsi - configure the new/existing VSI
1529  * @pi: port information structure
1530  * @vsi_handle: software VSI handle
1531  * @tc: TC number
1532  * @maxqs: max number of queues
1533  * @owner: lan or rdma
1534  * @enable: TC enabled or disabled
1535  *
1536  * This function adds/updates VSI nodes based on the number of queues. If TC is
1537  * enabled and VSI is in suspended state then resume the VSI back. If TC is
1538  * disabled then suspend the VSI if it is not already.
1539  */
1540 enum ice_status
1541 ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
1542 		  u8 owner, bool enable)
1543 {
1544 	struct ice_sched_node *vsi_node, *tc_node;
1545 	struct ice_vsi_ctx *vsi_ctx;
1546 	enum ice_status status = 0;
1547 	struct ice_hw *hw = pi->hw;
1548 
1549 	tc_node = ice_sched_get_tc_node(pi, tc);
1550 	if (!tc_node)
1551 		return ICE_ERR_PARAM;
1552 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
1553 	if (!vsi_ctx)
1554 		return ICE_ERR_PARAM;
1555 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1556 
1557 	/* suspend the VSI if tc is not enabled */
1558 	if (!enable) {
1559 		if (vsi_node && vsi_node->in_use) {
1560 			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1561 
1562 			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
1563 								true);
1564 			if (!status)
1565 				vsi_node->in_use = false;
1566 		}
1567 		return status;
1568 	}
1569 
1570 	/* TC is enabled, if it is a new VSI then add it to the tree */
1571 	if (!vsi_node) {
1572 		status = ice_sched_add_vsi_to_topo(pi, vsi_handle, tc);
1573 		if (status)
1574 			return status;
1575 
1576 		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1577 		if (!vsi_node)
1578 			return ICE_ERR_CFG;
1579 
1580 		vsi_ctx->sched.vsi_node[tc] = vsi_node;
1581 		vsi_node->in_use = true;
1582 		/* invalidate the max queues whenever VSI gets added first time
1583 		 * into the scheduler tree (boot or after reset). We need to
1584 		 * recreate the child nodes all the time in these cases.
1585 		 */
1586 		vsi_ctx->sched.max_lanq[tc] = 0;
1587 	}
1588 
1589 	/* update the VSI child nodes */
1590 	status = ice_sched_update_vsi_child_nodes(pi, vsi_handle, tc, maxqs,
1591 						  owner);
1592 	if (status)
1593 		return status;
1594 
1595 	/* TC is enabled, resume the VSI if it is in the suspend state */
1596 	if (!vsi_node->in_use) {
1597 		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1598 
1599 		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
1600 		if (!status)
1601 			vsi_node->in_use = true;
1602 	}
1603 
1604 	return status;
1605 }
1606 
1607 /**
1608  * ice_sched_rm_agg_vsi_entry - remove agg related VSI info entry
1609  * @pi: port information structure
1610  * @vsi_handle: software VSI handle
1611  *
1612  * This function removes single aggregator VSI info entry from
1613  * aggregator list.
1614  */
1615 static void
1616 ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
1617 {
1618 	struct ice_sched_agg_info *agg_info;
1619 	struct ice_sched_agg_info *atmp;
1620 
1621 	list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) {
1622 		struct ice_sched_agg_vsi_info *agg_vsi_info;
1623 		struct ice_sched_agg_vsi_info *vtmp;
1624 
1625 		list_for_each_entry_safe(agg_vsi_info, vtmp,
1626 					 &agg_info->agg_vsi_list, list_entry)
1627 			if (agg_vsi_info->vsi_handle == vsi_handle) {
1628 				list_del(&agg_vsi_info->list_entry);
1629 				devm_kfree(ice_hw_to_dev(pi->hw),
1630 					   agg_vsi_info);
1631 				return;
1632 			}
1633 	}
1634 }
1635 
1636 /**
1637  * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
1638  * @pi: port information structure
1639  * @vsi_handle: software VSI handle
1640  * @owner: LAN or RDMA
1641  *
1642  * This function removes the VSI and its LAN or RDMA children nodes from the
1643  * scheduler tree.
1644  */
1645 static enum ice_status
1646 ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
1647 {
1648 	enum ice_status status = ICE_ERR_PARAM;
1649 	struct ice_vsi_ctx *vsi_ctx;
1650 	u8 i, j = 0;
1651 
1652 	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
1653 		return status;
1654 	mutex_lock(&pi->sched_lock);
1655 	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
1656 	if (!vsi_ctx)
1657 		goto exit_sched_rm_vsi_cfg;
1658 
1659 	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
1660 		struct ice_sched_node *vsi_node, *tc_node;
1661 
1662 		tc_node = ice_sched_get_tc_node(pi, i);
1663 		if (!tc_node)
1664 			continue;
1665 
1666 		vsi_node = ice_sched_get_vsi_node(pi->hw, tc_node, vsi_handle);
1667 		if (!vsi_node)
1668 			continue;
1669 
1670 		while (j < vsi_node->num_children) {
1671 			if (vsi_node->children[j]->owner == owner) {
1672 				ice_free_sched_node(pi, vsi_node->children[j]);
1673 
1674 				/* reset the counter again since the num
1675 				 * children will be updated after node removal
1676 				 */
1677 				j = 0;
1678 			} else {
1679 				j++;
1680 			}
1681 		}
1682 		/* remove the VSI if it has no children */
1683 		if (!vsi_node->num_children) {
1684 			ice_free_sched_node(pi, vsi_node);
1685 			vsi_ctx->sched.vsi_node[i] = NULL;
1686 
1687 			/* clean up agg related vsi info if any */
1688 			ice_sched_rm_agg_vsi_info(pi, vsi_handle);
1689 		}
1690 		if (owner == ICE_SCHED_NODE_OWNER_LAN)
1691 			vsi_ctx->sched.max_lanq[i] = 0;
1692 	}
1693 	status = 0;
1694 
1695 exit_sched_rm_vsi_cfg:
1696 	mutex_unlock(&pi->sched_lock);
1697 	return status;
1698 }
1699 
1700 /**
1701  * ice_rm_vsi_lan_cfg - remove VSI and its LAN children nodes
1702  * @pi: port information structure
1703  * @vsi_handle: software VSI handle
1704  *
1705  * This function clears the VSI and its LAN children nodes from scheduler tree
1706  * for all TCs.
1707  */
1708 enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
1709 {
1710 	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
1711 }
1712