xref: /openbmc/linux/drivers/net/ethernet/intel/ice/ice_sched.c (revision dd2934a95701576203b2f61e8ded4e4a2f9183ea)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, Intel Corporation. */
3 
4 #include "ice_sched.h"
5 
6 /**
7  * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
8  * @pi: port information structure
9  * @info: Scheduler element information from firmware
10  *
11  * This function inserts the root node of the scheduling tree topology
12  * to the SW DB.
13  */
14 static enum ice_status
15 ice_sched_add_root_node(struct ice_port_info *pi,
16 			struct ice_aqc_txsched_elem_data *info)
17 {
18 	struct ice_sched_node *root;
19 	struct ice_hw *hw;
20 
21 	if (!pi)
22 		return ICE_ERR_PARAM;
23 
24 	hw = pi->hw;
25 
26 	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
27 	if (!root)
28 		return ICE_ERR_NO_MEMORY;
29 
30 	/* coverity[suspicious_sizeof] */
31 	root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
32 				      sizeof(*root), GFP_KERNEL);
33 	if (!root->children) {
34 		devm_kfree(ice_hw_to_dev(hw), root);
35 		return ICE_ERR_NO_MEMORY;
36 	}
37 
38 	memcpy(&root->info, info, sizeof(*info));
39 	pi->root = root;
40 	return 0;
41 }
42 
43 /**
44  * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
45  * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
46  * @teid: node teid to search
47  *
48  * This function searches for a node matching the teid in the scheduling tree
49  * from the SW DB. The search is recursive and is restricted by the number of
50  * layers it has searched through; stopping at the max supported layer.
51  *
52  * This function needs to be called when holding the port_info->sched_lock
53  */
54 struct ice_sched_node *
55 ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
56 {
57 	u16 i;
58 
59 	/* The TEID is same as that of the start_node */
60 	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
61 		return start_node;
62 
63 	/* The node has no children or is at the max layer */
64 	if (!start_node->num_children ||
65 	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
66 	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
67 		return NULL;
68 
69 	/* Check if teid matches to any of the children nodes */
70 	for (i = 0; i < start_node->num_children; i++)
71 		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
72 			return start_node->children[i];
73 
74 	/* Search within each child's sub-tree */
75 	for (i = 0; i < start_node->num_children; i++) {
76 		struct ice_sched_node *tmp;
77 
78 		tmp = ice_sched_find_node_by_teid(start_node->children[i],
79 						  teid);
80 		if (tmp)
81 			return tmp;
82 	}
83 
84 	return NULL;
85 }
86 
87 /**
88  * ice_sched_add_node - Insert the Tx scheduler node in SW DB
89  * @pi: port information structure
90  * @layer: Scheduler layer of the node
91  * @info: Scheduler element information from firmware
92  *
93  * This function inserts a scheduler node to the SW DB.
94  */
95 enum ice_status
96 ice_sched_add_node(struct ice_port_info *pi, u8 layer,
97 		   struct ice_aqc_txsched_elem_data *info)
98 {
99 	struct ice_sched_node *parent;
100 	struct ice_sched_node *node;
101 	struct ice_hw *hw;
102 
103 	if (!pi)
104 		return ICE_ERR_PARAM;
105 
106 	hw = pi->hw;
107 
108 	/* A valid parent node should be there */
109 	parent = ice_sched_find_node_by_teid(pi->root,
110 					     le32_to_cpu(info->parent_teid));
111 	if (!parent) {
112 		ice_debug(hw, ICE_DBG_SCHED,
113 			  "Parent Node not found for parent_teid=0x%x\n",
114 			  le32_to_cpu(info->parent_teid));
115 		return ICE_ERR_PARAM;
116 	}
117 
118 	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
119 	if (!node)
120 		return ICE_ERR_NO_MEMORY;
121 	if (hw->max_children[layer]) {
122 		/* coverity[suspicious_sizeof] */
123 		node->children = devm_kcalloc(ice_hw_to_dev(hw),
124 					      hw->max_children[layer],
125 					      sizeof(*node), GFP_KERNEL);
126 		if (!node->children) {
127 			devm_kfree(ice_hw_to_dev(hw), node);
128 			return ICE_ERR_NO_MEMORY;
129 		}
130 	}
131 
132 	node->in_use = true;
133 	node->parent = parent;
134 	node->tx_sched_layer = layer;
135 	parent->children[parent->num_children++] = node;
136 	memcpy(&node->info, info, sizeof(*info));
137 	return 0;
138 }
139 
140 /**
141  * ice_aq_delete_sched_elems - delete scheduler elements
142  * @hw: pointer to the hw struct
143  * @grps_req: number of groups to delete
144  * @buf: pointer to buffer
145  * @buf_size: buffer size in bytes
146  * @grps_del: returns total number of elements deleted
147  * @cd: pointer to command details structure or NULL
148  *
149  * Delete scheduling elements (0x040F)
150  */
151 static enum ice_status
152 ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
153 			  struct ice_aqc_delete_elem *buf, u16 buf_size,
154 			  u16 *grps_del, struct ice_sq_cd *cd)
155 {
156 	struct ice_aqc_add_move_delete_elem *cmd;
157 	struct ice_aq_desc desc;
158 	enum ice_status status;
159 
160 	cmd = &desc.params.add_move_delete_elem;
161 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_delete_sched_elems);
162 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
163 	cmd->num_grps_req = cpu_to_le16(grps_req);
164 
165 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
166 	if (!status && grps_del)
167 		*grps_del = le16_to_cpu(cmd->num_grps_updated);
168 
169 	return status;
170 }
171 
172 /**
173  * ice_sched_remove_elems - remove nodes from hw
174  * @hw: pointer to the hw struct
175  * @parent: pointer to the parent node
176  * @num_nodes: number of nodes
177  * @node_teids: array of node teids to be deleted
178  *
179  * This function remove nodes from hw
180  */
181 static enum ice_status
182 ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
183 		       u16 num_nodes, u32 *node_teids)
184 {
185 	struct ice_aqc_delete_elem *buf;
186 	u16 i, num_groups_removed = 0;
187 	enum ice_status status;
188 	u16 buf_size;
189 
190 	buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1);
191 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
192 	if (!buf)
193 		return ICE_ERR_NO_MEMORY;
194 
195 	buf->hdr.parent_teid = parent->info.node_teid;
196 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
197 	for (i = 0; i < num_nodes; i++)
198 		buf->teid[i] = cpu_to_le32(node_teids[i]);
199 
200 	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
201 					   &num_groups_removed, NULL);
202 	if (status || num_groups_removed != 1)
203 		ice_debug(hw, ICE_DBG_SCHED, "remove elements failed\n");
204 
205 	devm_kfree(ice_hw_to_dev(hw), buf);
206 	return status;
207 }
208 
209 /**
210  * ice_sched_get_first_node - get the first node of the given layer
211  * @hw: pointer to the hw struct
212  * @parent: pointer the base node of the subtree
213  * @layer: layer number
214  *
215  * This function retrieves the first node of the given layer from the subtree
216  */
217 static struct ice_sched_node *
218 ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent,
219 			 u8 layer)
220 {
221 	u8 i;
222 
223 	if (layer < hw->sw_entry_point_layer)
224 		return NULL;
225 	for (i = 0; i < parent->num_children; i++) {
226 		struct ice_sched_node *node = parent->children[i];
227 
228 		if (node) {
229 			if (node->tx_sched_layer == layer)
230 				return node;
231 			/* this recursion is intentional, and wouldn't
232 			 * go more than 9 calls
233 			 */
234 			return ice_sched_get_first_node(hw, node, layer);
235 		}
236 	}
237 	return NULL;
238 }
239 
240 /**
241  * ice_sched_get_tc_node - get pointer to TC node
242  * @pi: port information structure
243  * @tc: TC number
244  *
245  * This function returns the TC node pointer
246  */
247 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
248 {
249 	u8 i;
250 
251 	if (!pi)
252 		return NULL;
253 	for (i = 0; i < pi->root->num_children; i++)
254 		if (pi->root->children[i]->tc_num == tc)
255 			return pi->root->children[i];
256 	return NULL;
257 }
258 
259 /**
260  * ice_free_sched_node - Free a Tx scheduler node from SW DB
261  * @pi: port information structure
262  * @node: pointer to the ice_sched_node struct
263  *
264  * This function frees up a node from SW DB as well as from HW
265  *
266  * This function needs to be called with the port_info->sched_lock held
267  */
268 void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
269 {
270 	struct ice_sched_node *parent;
271 	struct ice_hw *hw = pi->hw;
272 	u8 i, j;
273 
274 	/* Free the children before freeing up the parent node
275 	 * The parent array is updated below and that shifts the nodes
276 	 * in the array. So always pick the first child if num children > 0
277 	 */
278 	while (node->num_children)
279 		ice_free_sched_node(pi, node->children[0]);
280 
281 	/* Leaf, TC and root nodes can't be deleted by SW */
282 	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
283 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
284 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
285 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
286 		u32 teid = le32_to_cpu(node->info.node_teid);
287 		enum ice_status status;
288 
289 		status = ice_sched_remove_elems(hw, node->parent, 1, &teid);
290 		if (status)
291 			ice_debug(hw, ICE_DBG_SCHED,
292 				  "remove element failed %d\n", status);
293 	}
294 	parent = node->parent;
295 	/* root has no parent */
296 	if (parent) {
297 		struct ice_sched_node *p, *tc_node;
298 
299 		/* update the parent */
300 		for (i = 0; i < parent->num_children; i++)
301 			if (parent->children[i] == node) {
302 				for (j = i + 1; j < parent->num_children; j++)
303 					parent->children[j - 1] =
304 						parent->children[j];
305 				parent->num_children--;
306 				break;
307 			}
308 
309 		/* search for previous sibling that points to this node and
310 		 * remove the reference
311 		 */
312 		tc_node = ice_sched_get_tc_node(pi, node->tc_num);
313 		if (!tc_node) {
314 			ice_debug(hw, ICE_DBG_SCHED,
315 				  "Invalid TC number %d\n", node->tc_num);
316 			goto err_exit;
317 		}
318 		p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer);
319 		while (p) {
320 			if (p->sibling == node) {
321 				p->sibling = node->sibling;
322 				break;
323 			}
324 			p = p->sibling;
325 		}
326 	}
327 err_exit:
328 	/* leaf nodes have no children */
329 	if (node->children)
330 		devm_kfree(ice_hw_to_dev(hw), node->children);
331 	devm_kfree(ice_hw_to_dev(hw), node);
332 }
333 
334 /**
335  * ice_aq_get_dflt_topo - gets default scheduler topology
336  * @hw: pointer to the hw struct
337  * @lport: logical port number
338  * @buf: pointer to buffer
339  * @buf_size: buffer size in bytes
340  * @num_branches: returns total number of queue to port branches
341  * @cd: pointer to command details structure or NULL
342  *
343  * Get default scheduler topology (0x400)
344  */
345 static enum ice_status
346 ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
347 		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
348 		     u8 *num_branches, struct ice_sq_cd *cd)
349 {
350 	struct ice_aqc_get_topo *cmd;
351 	struct ice_aq_desc desc;
352 	enum ice_status status;
353 
354 	cmd = &desc.params.get_topo;
355 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
356 	cmd->port_num = lport;
357 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
358 	if (!status && num_branches)
359 		*num_branches = cmd->num_branches;
360 
361 	return status;
362 }
363 
364 /**
365  * ice_aq_add_sched_elems - adds scheduling element
366  * @hw: pointer to the hw struct
367  * @grps_req: the number of groups that are requested to be added
368  * @buf: pointer to buffer
369  * @buf_size: buffer size in bytes
370  * @grps_added: returns total number of groups added
371  * @cd: pointer to command details structure or NULL
372  *
373  * Add scheduling elements (0x0401)
374  */
375 static enum ice_status
376 ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
377 		       struct ice_aqc_add_elem *buf, u16 buf_size,
378 		       u16 *grps_added, struct ice_sq_cd *cd)
379 {
380 	struct ice_aqc_add_move_delete_elem *cmd;
381 	struct ice_aq_desc desc;
382 	enum ice_status status;
383 
384 	cmd = &desc.params.add_move_delete_elem;
385 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_sched_elems);
386 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
387 
388 	cmd->num_grps_req = cpu_to_le16(grps_req);
389 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
390 	if (!status && grps_added)
391 		*grps_added = le16_to_cpu(cmd->num_grps_updated);
392 
393 	return status;
394 }
395 
396 /**
397  * ice_suspend_resume_elems - suspend/resume scheduler elements
398  * @hw: pointer to the hw struct
399  * @elems_req: number of elements to suspend
400  * @buf: pointer to buffer
401  * @buf_size: buffer size in bytes
402  * @elems_ret: returns total number of elements suspended
403  * @cd: pointer to command details structure or NULL
404  * @cmd_code: command code for suspend or resume
405  *
406  * suspend/resume scheduler elements
407  */
408 static enum ice_status
409 ice_suspend_resume_elems(struct ice_hw *hw, u16 elems_req,
410 			 struct ice_aqc_suspend_resume_elem *buf, u16 buf_size,
411 			 u16 *elems_ret, struct ice_sq_cd *cd,
412 			 enum ice_adminq_opc cmd_code)
413 {
414 	struct ice_aqc_get_cfg_elem *cmd;
415 	struct ice_aq_desc desc;
416 	enum ice_status status;
417 
418 	cmd = &desc.params.get_update_elem;
419 	ice_fill_dflt_direct_cmd_desc(&desc, cmd_code);
420 	cmd->num_elem_req = cpu_to_le16(elems_req);
421 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
422 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
423 	if (!status && elems_ret)
424 		*elems_ret = le16_to_cpu(cmd->num_elem_resp);
425 	return status;
426 }
427 
428 /**
429  * ice_aq_suspend_sched_elems - suspend scheduler elements
430  * @hw: pointer to the hw struct
431  * @elems_req: number of elements to suspend
432  * @buf: pointer to buffer
433  * @buf_size: buffer size in bytes
434  * @elems_ret: returns total number of elements suspended
435  * @cd: pointer to command details structure or NULL
436  *
437  * Suspend scheduling elements (0x0409)
438  */
439 static enum ice_status
440 ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req,
441 			   struct ice_aqc_suspend_resume_elem *buf,
442 			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
443 {
444 	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
445 					cd, ice_aqc_opc_suspend_sched_elems);
446 }
447 
448 /**
449  * ice_aq_resume_sched_elems - resume scheduler elements
450  * @hw: pointer to the hw struct
451  * @elems_req: number of elements to resume
452  * @buf: pointer to buffer
453  * @buf_size: buffer size in bytes
454  * @elems_ret: returns total number of elements resumed
455  * @cd: pointer to command details structure or NULL
456  *
457  * resume scheduling elements (0x040A)
458  */
459 static enum ice_status
460 ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req,
461 			  struct ice_aqc_suspend_resume_elem *buf,
462 			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
463 {
464 	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
465 					cd, ice_aqc_opc_resume_sched_elems);
466 }
467 
468 /**
469  * ice_aq_query_sched_res - query scheduler resource
470  * @hw: pointer to the hw struct
471  * @buf_size: buffer size in bytes
472  * @buf: pointer to buffer
473  * @cd: pointer to command details structure or NULL
474  *
475  * Query scheduler resource allocation (0x0412)
476  */
477 static enum ice_status
478 ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
479 		       struct ice_aqc_query_txsched_res_resp *buf,
480 		       struct ice_sq_cd *cd)
481 {
482 	struct ice_aq_desc desc;
483 
484 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
485 	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
486 }
487 
488 /**
489  * ice_sched_suspend_resume_elems - suspend or resume hw nodes
490  * @hw: pointer to the hw struct
491  * @num_nodes: number of nodes
492  * @node_teids: array of node teids to be suspended or resumed
493  * @suspend: true means suspend / false means resume
494  *
495  * This function suspends or resumes hw nodes
496  */
497 static enum ice_status
498 ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
499 			       bool suspend)
500 {
501 	struct ice_aqc_suspend_resume_elem *buf;
502 	u16 i, buf_size, num_elem_ret = 0;
503 	enum ice_status status;
504 
505 	buf_size = sizeof(*buf) * num_nodes;
506 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
507 	if (!buf)
508 		return ICE_ERR_NO_MEMORY;
509 
510 	for (i = 0; i < num_nodes; i++)
511 		buf->teid[i] = cpu_to_le32(node_teids[i]);
512 
513 	if (suspend)
514 		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
515 						    buf_size, &num_elem_ret,
516 						    NULL);
517 	else
518 		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
519 						   buf_size, &num_elem_ret,
520 						   NULL);
521 	if (status || num_elem_ret != num_nodes)
522 		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
523 
524 	devm_kfree(ice_hw_to_dev(hw), buf);
525 	return status;
526 }
527 
528 /**
529  * ice_sched_clear_tx_topo - clears the schduler tree nodes
530  * @pi: port information structure
531  *
532  * This function removes all the nodes from HW as well as from SW DB.
533  */
534 static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
535 {
536 	struct ice_sched_agg_info *agg_info;
537 	struct ice_sched_vsi_info *vsi_elem;
538 	struct ice_sched_agg_info *atmp;
539 	struct ice_sched_vsi_info *tmp;
540 	struct ice_hw *hw;
541 
542 	if (!pi)
543 		return;
544 
545 	hw = pi->hw;
546 
547 	list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) {
548 		struct ice_sched_agg_vsi_info *agg_vsi_info;
549 		struct ice_sched_agg_vsi_info *vtmp;
550 
551 		list_for_each_entry_safe(agg_vsi_info, vtmp,
552 					 &agg_info->agg_vsi_list, list_entry) {
553 			list_del(&agg_vsi_info->list_entry);
554 			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
555 		}
556 	}
557 
558 	/* remove the vsi list */
559 	list_for_each_entry_safe(vsi_elem, tmp, &pi->vsi_info_list,
560 				 list_entry) {
561 		list_del(&vsi_elem->list_entry);
562 		devm_kfree(ice_hw_to_dev(hw), vsi_elem);
563 	}
564 
565 	if (pi->root) {
566 		ice_free_sched_node(pi, pi->root);
567 		pi->root = NULL;
568 	}
569 }
570 
571 /**
572  * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
573  * @pi: port information structure
574  *
575  * Cleanup scheduling elements from SW DB
576  */
577 static void ice_sched_clear_port(struct ice_port_info *pi)
578 {
579 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
580 		return;
581 
582 	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
583 	mutex_lock(&pi->sched_lock);
584 	ice_sched_clear_tx_topo(pi);
585 	mutex_unlock(&pi->sched_lock);
586 	mutex_destroy(&pi->sched_lock);
587 }
588 
589 /**
590  * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
591  * @hw: pointer to the hw struct
592  *
593  * Cleanup scheduling elements from SW DB for all the ports
594  */
595 void ice_sched_cleanup_all(struct ice_hw *hw)
596 {
597 	if (!hw)
598 		return;
599 
600 	if (hw->layer_info) {
601 		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
602 		hw->layer_info = NULL;
603 	}
604 
605 	if (hw->port_info)
606 		ice_sched_clear_port(hw->port_info);
607 
608 	hw->num_tx_sched_layers = 0;
609 	hw->num_tx_sched_phys_layers = 0;
610 	hw->flattened_layers = 0;
611 	hw->max_cgds = 0;
612 }
613 
614 /**
615  * ice_sched_create_vsi_info_entry - create an empty new VSI entry
616  * @pi: port information structure
617  * @vsi_id: VSI Id
618  *
619  * This function creates a new VSI entry and adds it to list
620  */
621 static struct ice_sched_vsi_info *
622 ice_sched_create_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id)
623 {
624 	struct ice_sched_vsi_info *vsi_elem;
625 
626 	if (!pi)
627 		return NULL;
628 
629 	vsi_elem = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*vsi_elem),
630 				GFP_KERNEL);
631 	if (!vsi_elem)
632 		return NULL;
633 
634 	list_add(&vsi_elem->list_entry, &pi->vsi_info_list);
635 	vsi_elem->vsi_id = vsi_id;
636 	return vsi_elem;
637 }
638 
639 /**
640  * ice_sched_add_elems - add nodes to hw and SW DB
641  * @pi: port information structure
642  * @tc_node: pointer to the branch node
643  * @parent: pointer to the parent node
644  * @layer: layer number to add nodes
645  * @num_nodes: number of nodes
646  * @num_nodes_added: pointer to num nodes added
647  * @first_node_teid: if new nodes are added then return the teid of first node
648  *
649  * This function add nodes to hw as well as to SW DB for a given layer
650  */
651 static enum ice_status
652 ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
653 		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
654 		    u16 *num_nodes_added, u32 *first_node_teid)
655 {
656 	struct ice_sched_node *prev, *new_node;
657 	struct ice_aqc_add_elem *buf;
658 	u16 i, num_groups_added = 0;
659 	enum ice_status status = 0;
660 	struct ice_hw *hw = pi->hw;
661 	u16 buf_size;
662 	u32 teid;
663 
664 	buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
665 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
666 	if (!buf)
667 		return ICE_ERR_NO_MEMORY;
668 
669 	buf->hdr.parent_teid = parent->info.node_teid;
670 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
671 	for (i = 0; i < num_nodes; i++) {
672 		buf->generic[i].parent_teid = parent->info.node_teid;
673 		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
674 		buf->generic[i].data.valid_sections =
675 			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
676 			ICE_AQC_ELEM_VALID_EIR;
677 		buf->generic[i].data.generic = 0;
678 		buf->generic[i].data.cir_bw.bw_profile_idx =
679 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
680 		buf->generic[i].data.cir_bw.bw_alloc =
681 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
682 		buf->generic[i].data.eir_bw.bw_profile_idx =
683 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
684 		buf->generic[i].data.eir_bw.bw_alloc =
685 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
686 	}
687 
688 	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
689 					&num_groups_added, NULL);
690 	if (status || num_groups_added != 1) {
691 		ice_debug(hw, ICE_DBG_SCHED, "add elements failed\n");
692 		devm_kfree(ice_hw_to_dev(hw), buf);
693 		return ICE_ERR_CFG;
694 	}
695 
696 	*num_nodes_added = num_nodes;
697 	/* add nodes to the SW DB */
698 	for (i = 0; i < num_nodes; i++) {
699 		status = ice_sched_add_node(pi, layer, &buf->generic[i]);
700 		if (status) {
701 			ice_debug(hw, ICE_DBG_SCHED,
702 				  "add nodes in SW DB failed status =%d\n",
703 				  status);
704 			break;
705 		}
706 
707 		teid = le32_to_cpu(buf->generic[i].node_teid);
708 		new_node = ice_sched_find_node_by_teid(parent, teid);
709 		if (!new_node) {
710 			ice_debug(hw, ICE_DBG_SCHED,
711 				  "Node is missing for teid =%d\n", teid);
712 			break;
713 		}
714 
715 		new_node->sibling = NULL;
716 		new_node->tc_num = tc_node->tc_num;
717 
718 		/* add it to previous node sibling pointer */
719 		/* Note: siblings are not linked across branches */
720 		prev = ice_sched_get_first_node(hw, tc_node, layer);
721 		if (prev && prev != new_node) {
722 			while (prev->sibling)
723 				prev = prev->sibling;
724 			prev->sibling = new_node;
725 		}
726 
727 		if (i == 0)
728 			*first_node_teid = teid;
729 	}
730 
731 	devm_kfree(ice_hw_to_dev(hw), buf);
732 	return status;
733 }
734 
735 /**
736  * ice_sched_add_nodes_to_layer - Add nodes to a given layer
737  * @pi: port information structure
738  * @tc_node: pointer to TC node
739  * @parent: pointer to parent node
740  * @layer: layer number to add nodes
741  * @num_nodes: number of nodes to be added
742  * @first_node_teid: pointer to the first node teid
743  * @num_nodes_added: pointer to number of nodes added
744  *
745  * This function add nodes to a given layer.
746  */
747 static enum ice_status
748 ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
749 			     struct ice_sched_node *tc_node,
750 			     struct ice_sched_node *parent, u8 layer,
751 			     u16 num_nodes, u32 *first_node_teid,
752 			     u16 *num_nodes_added)
753 {
754 	u32 *first_teid_ptr = first_node_teid;
755 	u16 new_num_nodes, max_child_nodes;
756 	enum ice_status status = 0;
757 	struct ice_hw *hw = pi->hw;
758 	u16 num_added = 0;
759 	u32 temp;
760 
761 	*num_nodes_added = 0;
762 
763 	if (!num_nodes)
764 		return status;
765 
766 	if (!parent || layer < hw->sw_entry_point_layer)
767 		return ICE_ERR_PARAM;
768 
769 	/* max children per node per layer */
770 	max_child_nodes = hw->max_children[parent->tx_sched_layer];
771 
772 	/* current number of children + required nodes exceed max children ? */
773 	if ((parent->num_children + num_nodes) > max_child_nodes) {
774 		/* Fail if the parent is a TC node */
775 		if (parent == tc_node)
776 			return ICE_ERR_CFG;
777 
778 		/* utilize all the spaces if the parent is not full */
779 		if (parent->num_children < max_child_nodes) {
780 			new_num_nodes = max_child_nodes - parent->num_children;
781 			/* this recursion is intentional, and wouldn't
782 			 * go more than 2 calls
783 			 */
784 			status = ice_sched_add_nodes_to_layer(pi, tc_node,
785 							      parent, layer,
786 							      new_num_nodes,
787 							      first_node_teid,
788 							      &num_added);
789 			if (status)
790 				return status;
791 
792 			*num_nodes_added += num_added;
793 		}
794 		/* Don't modify the first node teid memory if the first node was
795 		 * added already in the above call. Instead send some temp
796 		 * memory for all other recursive calls.
797 		 */
798 		if (num_added)
799 			first_teid_ptr = &temp;
800 
801 		new_num_nodes = num_nodes - num_added;
802 
803 		/* This parent is full, try the next sibling */
804 		parent = parent->sibling;
805 
806 		/* this recursion is intentional, for 1024 queues
807 		 * per VSI, it goes max of 16 iterations.
808 		 * 1024 / 8 = 128 layer 8 nodes
809 		 * 128 /8 = 16 (add 8 nodes per iteration)
810 		 */
811 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
812 						      layer, new_num_nodes,
813 						      first_teid_ptr,
814 						      &num_added);
815 		*num_nodes_added += num_added;
816 		return status;
817 	}
818 
819 	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
820 				     num_nodes_added, first_node_teid);
821 	return status;
822 }
823 
824 /**
825  * ice_sched_get_qgrp_layer - get the current queue group layer number
826  * @hw: pointer to the hw struct
827  *
828  * This function returns the current queue group layer number
829  */
830 static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
831 {
832 	/* It's always total layers - 1, the array is 0 relative so -2 */
833 	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
834 }
835 
836 /**
837  * ice_sched_get_vsi_layer - get the current VSI layer number
838  * @hw: pointer to the hw struct
839  *
840  * This function returns the current VSI layer number
841  */
842 static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
843 {
844 	/* Num Layers       VSI layer
845 	 *     9               6
846 	 *     7               4
847 	 *     5 or less       sw_entry_point_layer
848 	 */
849 	/* calculate the vsi layer based on number of layers. */
850 	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
851 		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
852 
853 		if (layer > hw->sw_entry_point_layer)
854 			return layer;
855 	}
856 	return hw->sw_entry_point_layer;
857 }
858 
859 /**
860  * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
861  * @pi: port information structure
862  *
863  * This function removes the leaf node that was created by the FW
864  * during initialization
865  */
866 static void
867 ice_rm_dflt_leaf_node(struct ice_port_info *pi)
868 {
869 	struct ice_sched_node *node;
870 
871 	node = pi->root;
872 	while (node) {
873 		if (!node->num_children)
874 			break;
875 		node = node->children[0];
876 	}
877 	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
878 		u32 teid = le32_to_cpu(node->info.node_teid);
879 		enum ice_status status;
880 
881 		/* remove the default leaf node */
882 		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
883 		if (!status)
884 			ice_free_sched_node(pi, node);
885 	}
886 }
887 
888 /**
889  * ice_sched_rm_dflt_nodes - free the default nodes in the tree
890  * @pi: port information structure
891  *
892  * This function frees all the nodes except root and TC that were created by
893  * the FW during initialization
894  */
895 static void
896 ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
897 {
898 	struct ice_sched_node *node;
899 
900 	ice_rm_dflt_leaf_node(pi);
901 
902 	/* remove the default nodes except TC and root nodes */
903 	node = pi->root;
904 	while (node) {
905 		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
906 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
907 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
908 			ice_free_sched_node(pi, node);
909 			break;
910 		}
911 
912 		if (!node->num_children)
913 			break;
914 		node = node->children[0];
915 	}
916 }
917 
918 /**
919  * ice_sched_init_port - Initialize scheduler by querying information from FW
920  * @pi: port info structure for the tree to cleanup
921  *
922  * This function is the initial call to find the total number of Tx scheduler
923  * resources, default topology created by firmware and storing the information
924  * in SW DB.
925  */
926 enum ice_status ice_sched_init_port(struct ice_port_info *pi)
927 {
928 	struct ice_aqc_get_topo_elem *buf;
929 	enum ice_status status;
930 	struct ice_hw *hw;
931 	u8 num_branches;
932 	u16 num_elems;
933 	u8 i, j;
934 
935 	if (!pi)
936 		return ICE_ERR_PARAM;
937 	hw = pi->hw;
938 
939 	/* Query the Default Topology from FW */
940 	buf = devm_kzalloc(ice_hw_to_dev(hw), ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
941 	if (!buf)
942 		return ICE_ERR_NO_MEMORY;
943 
944 	/* Query default scheduling tree topology */
945 	status = ice_aq_get_dflt_topo(hw, pi->lport, buf, ICE_AQ_MAX_BUF_LEN,
946 				      &num_branches, NULL);
947 	if (status)
948 		goto err_init_port;
949 
950 	/* num_branches should be between 1-8 */
951 	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
952 		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
953 			  num_branches);
954 		status = ICE_ERR_PARAM;
955 		goto err_init_port;
956 	}
957 
958 	/* get the number of elements on the default/first branch */
959 	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
960 
961 	/* num_elems should always be between 1-9 */
962 	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
963 		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
964 			  num_elems);
965 		status = ICE_ERR_PARAM;
966 		goto err_init_port;
967 	}
968 
969 	/* If the last node is a leaf node then the index of the Q group
970 	 * layer is two less than the number of elements.
971 	 */
972 	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
973 	    ICE_AQC_ELEM_TYPE_LEAF)
974 		pi->last_node_teid =
975 			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
976 	else
977 		pi->last_node_teid =
978 			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
979 
980 	/* Insert the Tx Sched root node */
981 	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
982 	if (status)
983 		goto err_init_port;
984 
985 	/* Parse the default tree and cache the information */
986 	for (i = 0; i < num_branches; i++) {
987 		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
988 
989 		/* Skip root element as already inserted */
990 		for (j = 1; j < num_elems; j++) {
991 			/* update the sw entry point */
992 			if (buf[0].generic[j].data.elem_type ==
993 			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
994 				hw->sw_entry_point_layer = j;
995 
996 			status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
997 			if (status)
998 				goto err_init_port;
999 		}
1000 	}
1001 
1002 	/* Remove the default nodes. */
1003 	if (pi->root)
1004 		ice_sched_rm_dflt_nodes(pi);
1005 
1006 	/* initialize the port for handling the scheduler tree */
1007 	pi->port_state = ICE_SCHED_PORT_STATE_READY;
1008 	mutex_init(&pi->sched_lock);
1009 	INIT_LIST_HEAD(&pi->agg_list);
1010 	INIT_LIST_HEAD(&pi->vsi_info_list);
1011 
1012 err_init_port:
1013 	if (status && pi->root) {
1014 		ice_free_sched_node(pi, pi->root);
1015 		pi->root = NULL;
1016 	}
1017 
1018 	devm_kfree(ice_hw_to_dev(hw), buf);
1019 	return status;
1020 }
1021 
1022 /**
1023  * ice_sched_query_res_alloc - query the FW for num of logical sched layers
1024  * @hw: pointer to the HW struct
1025  *
1026  * query FW for allocated scheduler resources and store in HW struct
1027  */
1028 enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
1029 {
1030 	struct ice_aqc_query_txsched_res_resp *buf;
1031 	enum ice_status status = 0;
1032 	__le16 max_sibl;
1033 	u8 i;
1034 
1035 	if (hw->layer_info)
1036 		return status;
1037 
1038 	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
1039 	if (!buf)
1040 		return ICE_ERR_NO_MEMORY;
1041 
1042 	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
1043 	if (status)
1044 		goto sched_query_out;
1045 
1046 	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
1047 	hw->num_tx_sched_phys_layers =
1048 		le16_to_cpu(buf->sched_props.phys_levels);
1049 	hw->flattened_layers = buf->sched_props.flattening_bitmap;
1050 	hw->max_cgds = buf->sched_props.max_pf_cgds;
1051 
1052 	/* max sibling group size of current layer refers to the max children
1053 	 * of the below layer node.
1054 	 * layer 1 node max children will be layer 2 max sibling group size
1055 	 * layer 2 node max children will be layer 3 max sibling group size
1056 	 * and so on. This array will be populated from root (index 0) to
1057 	 * qgroup layer 7. Leaf node has no children.
1058 	 */
1059 	for (i = 0; i < hw->num_tx_sched_layers; i++) {
1060 		max_sibl = buf->layer_props[i].max_sibl_grp_sz;
1061 		hw->max_children[i] = le16_to_cpu(max_sibl);
1062 	}
1063 
1064 	hw->layer_info = (struct ice_aqc_layer_props *)
1065 			  devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
1066 				       (hw->num_tx_sched_layers *
1067 					sizeof(*hw->layer_info)),
1068 				       GFP_KERNEL);
1069 	if (!hw->layer_info) {
1070 		status = ICE_ERR_NO_MEMORY;
1071 		goto sched_query_out;
1072 	}
1073 
1074 sched_query_out:
1075 	devm_kfree(ice_hw_to_dev(hw), buf);
1076 	return status;
1077 }
1078 
1079 /**
1080  * ice_sched_get_vsi_info_entry - Get the vsi entry list for given vsi_id
1081  * @pi: port information structure
1082  * @vsi_id: vsi id
1083  *
1084  * This function retrieves the vsi list for the given vsi id
1085  */
1086 static struct ice_sched_vsi_info *
1087 ice_sched_get_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id)
1088 {
1089 	struct ice_sched_vsi_info *list_elem;
1090 
1091 	if (!pi)
1092 		return NULL;
1093 
1094 	list_for_each_entry(list_elem, &pi->vsi_info_list, list_entry)
1095 		if (list_elem->vsi_id == vsi_id)
1096 			return list_elem;
1097 	return NULL;
1098 }
1099 
1100 /**
1101  * ice_sched_find_node_in_subtree - Find node in part of base node subtree
1102  * @hw: pointer to the hw struct
1103  * @base: pointer to the base node
1104  * @node: pointer to the node to search
1105  *
1106  * This function checks whether a given node is part of the base node
1107  * subtree or not
1108  */
1109 static bool
1110 ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
1111 			       struct ice_sched_node *node)
1112 {
1113 	u8 i;
1114 
1115 	for (i = 0; i < base->num_children; i++) {
1116 		struct ice_sched_node *child = base->children[i];
1117 
1118 		if (node == child)
1119 			return true;
1120 
1121 		if (child->tx_sched_layer > node->tx_sched_layer)
1122 			return false;
1123 
1124 		/* this recursion is intentional, and wouldn't
1125 		 * go more than 8 calls
1126 		 */
1127 		if (ice_sched_find_node_in_subtree(hw, child, node))
1128 			return true;
1129 	}
1130 	return false;
1131 }
1132 
1133 /**
1134  * ice_sched_get_free_qparent - Get a free lan or rdma q group node
1135  * @pi: port information structure
1136  * @vsi_id: vsi id
1137  * @tc: branch number
1138  * @owner: lan or rdma
1139  *
1140  * This function retrieves a free lan or rdma q group node
1141  */
1142 struct ice_sched_node *
1143 ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
1144 			   u8 owner)
1145 {
1146 	struct ice_sched_node *vsi_node, *qgrp_node = NULL;
1147 	struct ice_sched_vsi_info *list_elem;
1148 	u16 max_children;
1149 	u8 qgrp_layer;
1150 
1151 	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
1152 	max_children = pi->hw->max_children[qgrp_layer];
1153 
1154 	list_elem = ice_sched_get_vsi_info_entry(pi, vsi_id);
1155 	if (!list_elem)
1156 		goto lan_q_exit;
1157 
1158 	vsi_node = list_elem->vsi_node[tc];
1159 
1160 	/* validate invalid VSI id */
1161 	if (!vsi_node)
1162 		goto lan_q_exit;
1163 
1164 	/* get the first q group node from VSI sub-tree */
1165 	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
1166 	while (qgrp_node) {
1167 		/* make sure the qgroup node is part of the VSI subtree */
1168 		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
1169 			if (qgrp_node->num_children < max_children &&
1170 			    qgrp_node->owner == owner)
1171 				break;
1172 		qgrp_node = qgrp_node->sibling;
1173 	}
1174 
1175 lan_q_exit:
1176 	return qgrp_node;
1177 }
1178 
1179 /**
1180  * ice_sched_get_vsi_node - Get a VSI node based on VSI id
1181  * @hw: pointer to the hw struct
1182  * @tc_node: pointer to the TC node
1183  * @vsi_id: VSI id
1184  *
1185  * This function retrieves a VSI node for a given VSI id from a given
1186  * TC branch
1187  */
1188 static struct ice_sched_node *
1189 ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
1190 		       u16 vsi_id)
1191 {
1192 	struct ice_sched_node *node;
1193 	u8 vsi_layer;
1194 
1195 	vsi_layer = ice_sched_get_vsi_layer(hw);
1196 	node = ice_sched_get_first_node(hw, tc_node, vsi_layer);
1197 
1198 	/* Check whether it already exists */
1199 	while (node) {
1200 		if (node->vsi_id == vsi_id)
1201 			return node;
1202 		node = node->sibling;
1203 	}
1204 
1205 	return node;
1206 }
1207 
1208 /**
1209  * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
1210  * @hw: pointer to the hw struct
1211  * @num_qs: number of queues
1212  * @num_nodes: num nodes array
1213  *
1214  * This function calculates the number of VSI child nodes based on the
1215  * number of queues.
1216  */
1217 static void
1218 ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
1219 {
1220 	u16 num = num_qs;
1221 	u8 i, qgl, vsil;
1222 
1223 	qgl = ice_sched_get_qgrp_layer(hw);
1224 	vsil = ice_sched_get_vsi_layer(hw);
1225 
1226 	/* calculate num nodes from q group to VSI layer */
1227 	for (i = qgl; i > vsil; i--) {
1228 		/* round to the next integer if there is a remainder */
1229 		num = DIV_ROUND_UP(num, hw->max_children[i]);
1230 
1231 		/* need at least one node */
1232 		num_nodes[i] = num ? num : 1;
1233 	}
1234 }
1235 
1236 /**
1237  * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
1238  * @pi: port information structure
1239  * @vsi_id: VSI id
1240  * @tc_node: pointer to the TC node
1241  * @num_nodes: pointer to the num nodes that needs to be added per layer
1242  * @owner: node owner (lan or rdma)
1243  *
1244  * This function adds the VSI child nodes to tree. It gets called for
1245  * lan and rdma separately.
1246  */
1247 static enum ice_status
1248 ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id,
1249 			      struct ice_sched_node *tc_node, u16 *num_nodes,
1250 			      u8 owner)
1251 {
1252 	struct ice_sched_node *parent, *node;
1253 	struct ice_hw *hw = pi->hw;
1254 	enum ice_status status;
1255 	u32 first_node_teid;
1256 	u16 num_added = 0;
1257 	u8 i, qgl, vsil;
1258 
1259 	qgl = ice_sched_get_qgrp_layer(hw);
1260 	vsil = ice_sched_get_vsi_layer(hw);
1261 	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
1262 	for (i = vsil + 1; i <= qgl; i++) {
1263 		if (!parent)
1264 			return ICE_ERR_CFG;
1265 
1266 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
1267 						      num_nodes[i],
1268 						      &first_node_teid,
1269 						      &num_added);
1270 		if (status || num_nodes[i] != num_added)
1271 			return ICE_ERR_CFG;
1272 
1273 		/* The newly added node can be a new parent for the next
1274 		 * layer nodes
1275 		 */
1276 		if (num_added) {
1277 			parent = ice_sched_find_node_by_teid(tc_node,
1278 							     first_node_teid);
1279 			node = parent;
1280 			while (node) {
1281 				node->owner = owner;
1282 				node = node->sibling;
1283 			}
1284 		} else {
1285 			parent = parent->children[0];
1286 		}
1287 	}
1288 
1289 	return 0;
1290 }
1291 
1292 /**
1293  * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree
1294  * @pi: port information structure
1295  * @vsi_node: pointer to the VSI node
1296  * @num_nodes: pointer to the num nodes that needs to be removed per layer
1297  * @owner: node owner (lan or rdma)
1298  *
1299  * This function removes the VSI child nodes from the tree. It gets called for
1300  * lan and rdma separately.
1301  */
1302 static void
1303 ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi,
1304 			     struct ice_sched_node *vsi_node, u16 *num_nodes,
1305 			     u8 owner)
1306 {
1307 	struct ice_sched_node *node, *next;
1308 	u8 i, qgl, vsil;
1309 	u16 num;
1310 
1311 	qgl = ice_sched_get_qgrp_layer(pi->hw);
1312 	vsil = ice_sched_get_vsi_layer(pi->hw);
1313 
1314 	for (i = qgl; i > vsil; i--) {
1315 		num = num_nodes[i];
1316 		node = ice_sched_get_first_node(pi->hw, vsi_node, i);
1317 		while (node && num) {
1318 			next = node->sibling;
1319 			if (node->owner == owner && !node->num_children) {
1320 				ice_free_sched_node(pi, node);
1321 				num--;
1322 			}
1323 			node = next;
1324 		}
1325 	}
1326 }
1327 
1328 /**
1329  * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
1330  * @hw: pointer to the hw struct
1331  * @tc_node: pointer to TC node
1332  * @num_nodes: pointer to num nodes array
1333  *
1334  * This function calculates the number of supported nodes needed to add this
1335  * VSI into tx tree including the VSI, parent and intermediate nodes in below
1336  * layers
1337  */
1338 static void
1339 ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
1340 				 struct ice_sched_node *tc_node, u16 *num_nodes)
1341 {
1342 	struct ice_sched_node *node;
1343 	u8 vsil;
1344 	int i;
1345 
1346 	vsil = ice_sched_get_vsi_layer(hw);
1347 	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
1348 		/* Add intermediate nodes if TC has no children and
1349 		 * need at least one node for VSI
1350 		 */
1351 		if (!tc_node->num_children || i == vsil) {
1352 			num_nodes[i]++;
1353 		} else {
1354 			/* If intermediate nodes are reached max children
1355 			 * then add a new one.
1356 			 */
1357 			node = ice_sched_get_first_node(hw, tc_node, (u8)i);
1358 			/* scan all the siblings */
1359 			while (node) {
1360 				if (node->num_children < hw->max_children[i])
1361 					break;
1362 				node = node->sibling;
1363 			}
1364 
1365 			/* all the nodes are full, allocate a new one */
1366 			if (!node)
1367 				num_nodes[i]++;
1368 		}
1369 }
1370 
1371 /**
1372  * ice_sched_add_vsi_support_nodes - add VSI supported nodes into tx tree
1373  * @pi: port information structure
1374  * @vsi_id: VSI Id
1375  * @tc_node: pointer to TC node
1376  * @num_nodes: pointer to num nodes array
1377  *
1378  * This function adds the VSI supported nodes into tx tree including the
1379  * VSI, its parent and intermediate nodes in below layers
1380  */
1381 static enum ice_status
1382 ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_id,
1383 				struct ice_sched_node *tc_node, u16 *num_nodes)
1384 {
1385 	struct ice_sched_node *parent = tc_node;
1386 	enum ice_status status;
1387 	u32 first_node_teid;
1388 	u16 num_added = 0;
1389 	u8 i, vsil;
1390 
1391 	if (!pi)
1392 		return ICE_ERR_PARAM;
1393 
1394 	vsil = ice_sched_get_vsi_layer(pi->hw);
1395 	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
1396 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
1397 						      i, num_nodes[i],
1398 						      &first_node_teid,
1399 						      &num_added);
1400 		if (status || num_nodes[i] != num_added)
1401 			return ICE_ERR_CFG;
1402 
1403 		/* The newly added node can be a new parent for the next
1404 		 * layer nodes
1405 		 */
1406 		if (num_added)
1407 			parent = ice_sched_find_node_by_teid(tc_node,
1408 							     first_node_teid);
1409 		else
1410 			parent = parent->children[0];
1411 
1412 		if (!parent)
1413 			return ICE_ERR_CFG;
1414 
1415 		if (i == vsil)
1416 			parent->vsi_id = vsi_id;
1417 	}
1418 
1419 	return 0;
1420 }
1421 
1422 /**
1423  * ice_sched_add_vsi_to_topo - add a new VSI into tree
1424  * @pi: port information structure
1425  * @vsi_id: VSI Id
1426  * @tc: TC number
1427  *
1428  * This function adds a new VSI into scheduler tree
1429  */
1430 static enum ice_status
1431 ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_id, u8 tc)
1432 {
1433 	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1434 	struct ice_sched_node *tc_node;
1435 	struct ice_hw *hw = pi->hw;
1436 
1437 	tc_node = ice_sched_get_tc_node(pi, tc);
1438 	if (!tc_node)
1439 		return ICE_ERR_PARAM;
1440 
1441 	/* calculate number of supported nodes needed for this VSI */
1442 	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
1443 
1444 	/* add vsi supported nodes to tc subtree */
1445 	return ice_sched_add_vsi_support_nodes(pi, vsi_id, tc_node, num_nodes);
1446 }
1447 
1448 /**
1449  * ice_sched_update_vsi_child_nodes - update VSI child nodes
1450  * @pi: port information structure
1451  * @vsi_id: VSI Id
1452  * @tc: TC number
1453  * @new_numqs: new number of max queues
1454  * @owner: owner of this subtree
1455  *
1456  * This function updates the VSI child nodes based on the number of queues
1457  */
1458 static enum ice_status
1459 ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc,
1460 				 u16 new_numqs, u8 owner)
1461 {
1462 	u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1463 	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1464 	struct ice_sched_node *vsi_node;
1465 	struct ice_sched_node *tc_node;
1466 	struct ice_sched_vsi_info *vsi;
1467 	enum ice_status status = 0;
1468 	struct ice_hw *hw = pi->hw;
1469 	u16 prev_numqs;
1470 	u8 i;
1471 
1472 	tc_node = ice_sched_get_tc_node(pi, tc);
1473 	if (!tc_node)
1474 		return ICE_ERR_CFG;
1475 
1476 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
1477 	if (!vsi_node)
1478 		return ICE_ERR_CFG;
1479 
1480 	vsi = ice_sched_get_vsi_info_entry(pi, vsi_id);
1481 	if (!vsi)
1482 		return ICE_ERR_CFG;
1483 
1484 	if (owner == ICE_SCHED_NODE_OWNER_LAN)
1485 		prev_numqs = vsi->max_lanq[tc];
1486 	else
1487 		return ICE_ERR_PARAM;
1488 
1489 	/* num queues are not changed */
1490 	if (prev_numqs == new_numqs)
1491 		return status;
1492 
1493 	/* calculate number of nodes based on prev/new number of qs */
1494 	if (prev_numqs)
1495 		ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes);
1496 
1497 	if (new_numqs)
1498 		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
1499 
1500 	if (prev_numqs > new_numqs) {
1501 		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
1502 			new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i];
1503 
1504 		ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes,
1505 					     owner);
1506 	} else {
1507 		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
1508 			new_num_nodes[i] -= prev_num_nodes[i];
1509 
1510 		status = ice_sched_add_vsi_child_nodes(pi, vsi_id, tc_node,
1511 						       new_num_nodes, owner);
1512 		if (status)
1513 			return status;
1514 	}
1515 
1516 	vsi->max_lanq[tc] = new_numqs;
1517 
1518 	return status;
1519 }
1520 
1521 /**
1522  * ice_sched_cfg_vsi - configure the new/exisiting VSI
1523  * @pi: port information structure
1524  * @vsi_id: VSI Id
1525  * @tc: TC number
1526  * @maxqs: max number of queues
1527  * @owner: lan or rdma
1528  * @enable: TC enabled or disabled
1529  *
1530  * This function adds/updates VSI nodes based on the number of queues. If TC is
1531  * enabled and VSI is in suspended state then resume the VSI back. If TC is
1532  * disabled then suspend the VSI if it is not already.
1533  */
1534 enum ice_status
1535 ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs,
1536 		  u8 owner, bool enable)
1537 {
1538 	struct ice_sched_node *vsi_node, *tc_node;
1539 	struct ice_sched_vsi_info *vsi;
1540 	enum ice_status status = 0;
1541 	struct ice_hw *hw = pi->hw;
1542 
1543 	tc_node = ice_sched_get_tc_node(pi, tc);
1544 	if (!tc_node)
1545 		return ICE_ERR_PARAM;
1546 
1547 	vsi = ice_sched_get_vsi_info_entry(pi, vsi_id);
1548 	if (!vsi)
1549 		vsi = ice_sched_create_vsi_info_entry(pi, vsi_id);
1550 	if (!vsi)
1551 		return ICE_ERR_NO_MEMORY;
1552 
1553 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
1554 
1555 	/* suspend the VSI if tc is not enabled */
1556 	if (!enable) {
1557 		if (vsi_node && vsi_node->in_use) {
1558 			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1559 
1560 			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
1561 								true);
1562 			if (!status)
1563 				vsi_node->in_use = false;
1564 		}
1565 		return status;
1566 	}
1567 
1568 	/* TC is enabled, if it is a new VSI then add it to the tree */
1569 	if (!vsi_node) {
1570 		status = ice_sched_add_vsi_to_topo(pi, vsi_id, tc);
1571 		if (status)
1572 			return status;
1573 
1574 		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
1575 		if (!vsi_node)
1576 			return ICE_ERR_CFG;
1577 
1578 		vsi->vsi_node[tc] = vsi_node;
1579 		vsi_node->in_use = true;
1580 	}
1581 
1582 	/* update the VSI child nodes */
1583 	status = ice_sched_update_vsi_child_nodes(pi, vsi_id, tc, maxqs, owner);
1584 	if (status)
1585 		return status;
1586 
1587 	/* TC is enabled, resume the VSI if it is in the suspend state */
1588 	if (!vsi_node->in_use) {
1589 		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1590 
1591 		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
1592 		if (!status)
1593 			vsi_node->in_use = true;
1594 	}
1595 
1596 	return status;
1597 }
1598