xref: /openbmc/linux/drivers/net/ethernet/intel/ice/ice_sched.c (revision 5ef12cb4a3a78ffb331c03a795a15eea4ae35155)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, Intel Corporation. */
3 
4 #include "ice_sched.h"
5 
6 /**
7  * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
8  * @pi: port information structure
9  * @info: Scheduler element information from firmware
10  *
11  * This function inserts the root node of the scheduling tree topology
12  * to the SW DB.
13  */
14 static enum ice_status
15 ice_sched_add_root_node(struct ice_port_info *pi,
16 			struct ice_aqc_txsched_elem_data *info)
17 {
18 	struct ice_sched_node *root;
19 	struct ice_hw *hw;
20 	u16 max_children;
21 
22 	if (!pi)
23 		return ICE_ERR_PARAM;
24 
25 	hw = pi->hw;
26 
27 	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
28 	if (!root)
29 		return ICE_ERR_NO_MEMORY;
30 
31 	max_children = le16_to_cpu(hw->layer_info[0].max_children);
32 	root->children = devm_kcalloc(ice_hw_to_dev(hw), max_children,
33 				      sizeof(*root), GFP_KERNEL);
34 	if (!root->children) {
35 		devm_kfree(ice_hw_to_dev(hw), root);
36 		return ICE_ERR_NO_MEMORY;
37 	}
38 
39 	memcpy(&root->info, info, sizeof(*info));
40 	pi->root = root;
41 	return 0;
42 }
43 
44 /**
45  * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
46  * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
47  * @teid: node teid to search
48  *
49  * This function searches for a node matching the teid in the scheduling tree
50  * from the SW DB. The search is recursive and is restricted by the number of
51  * layers it has searched through; stopping at the max supported layer.
52  *
53  * This function needs to be called when holding the port_info->sched_lock
54  */
55 struct ice_sched_node *
56 ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
57 {
58 	u16 i;
59 
60 	/* The TEID is same as that of the start_node */
61 	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
62 		return start_node;
63 
64 	/* The node has no children or is at the max layer */
65 	if (!start_node->num_children ||
66 	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
67 	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
68 		return NULL;
69 
70 	/* Check if teid matches to any of the children nodes */
71 	for (i = 0; i < start_node->num_children; i++)
72 		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
73 			return start_node->children[i];
74 
75 	/* Search within each child's sub-tree */
76 	for (i = 0; i < start_node->num_children; i++) {
77 		struct ice_sched_node *tmp;
78 
79 		tmp = ice_sched_find_node_by_teid(start_node->children[i],
80 						  teid);
81 		if (tmp)
82 			return tmp;
83 	}
84 
85 	return NULL;
86 }
87 
88 /**
89  * ice_sched_add_node - Insert the Tx scheduler node in SW DB
90  * @pi: port information structure
91  * @layer: Scheduler layer of the node
92  * @info: Scheduler element information from firmware
93  *
94  * This function inserts a scheduler node to the SW DB.
95  */
96 enum ice_status
97 ice_sched_add_node(struct ice_port_info *pi, u8 layer,
98 		   struct ice_aqc_txsched_elem_data *info)
99 {
100 	struct ice_sched_node *parent;
101 	struct ice_sched_node *node;
102 	struct ice_hw *hw;
103 	u16 max_children;
104 
105 	if (!pi)
106 		return ICE_ERR_PARAM;
107 
108 	hw = pi->hw;
109 
110 	/* A valid parent node should be there */
111 	parent = ice_sched_find_node_by_teid(pi->root,
112 					     le32_to_cpu(info->parent_teid));
113 	if (!parent) {
114 		ice_debug(hw, ICE_DBG_SCHED,
115 			  "Parent Node not found for parent_teid=0x%x\n",
116 			  le32_to_cpu(info->parent_teid));
117 		return ICE_ERR_PARAM;
118 	}
119 
120 	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
121 	if (!node)
122 		return ICE_ERR_NO_MEMORY;
123 	max_children = le16_to_cpu(hw->layer_info[layer].max_children);
124 	if (max_children) {
125 		node->children = devm_kcalloc(ice_hw_to_dev(hw), max_children,
126 					      sizeof(*node), GFP_KERNEL);
127 		if (!node->children) {
128 			devm_kfree(ice_hw_to_dev(hw), node);
129 			return ICE_ERR_NO_MEMORY;
130 		}
131 	}
132 
133 	node->in_use = true;
134 	node->parent = parent;
135 	node->tx_sched_layer = layer;
136 	parent->children[parent->num_children++] = node;
137 	memcpy(&node->info, info, sizeof(*info));
138 	return 0;
139 }
140 
141 /**
142  * ice_aq_delete_sched_elems - delete scheduler elements
143  * @hw: pointer to the hw struct
144  * @grps_req: number of groups to delete
145  * @buf: pointer to buffer
146  * @buf_size: buffer size in bytes
147  * @grps_del: returns total number of elements deleted
148  * @cd: pointer to command details structure or NULL
149  *
150  * Delete scheduling elements (0x040F)
151  */
152 static enum ice_status
153 ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
154 			  struct ice_aqc_delete_elem *buf, u16 buf_size,
155 			  u16 *grps_del, struct ice_sq_cd *cd)
156 {
157 	struct ice_aqc_add_move_delete_elem *cmd;
158 	struct ice_aq_desc desc;
159 	enum ice_status status;
160 
161 	cmd = &desc.params.add_move_delete_elem;
162 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_delete_sched_elems);
163 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
164 	cmd->num_grps_req = cpu_to_le16(grps_req);
165 
166 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
167 	if (!status && grps_del)
168 		*grps_del = le16_to_cpu(cmd->num_grps_updated);
169 
170 	return status;
171 }
172 
173 /**
174  * ice_sched_remove_elems - remove nodes from hw
175  * @hw: pointer to the hw struct
176  * @parent: pointer to the parent node
177  * @num_nodes: number of nodes
178  * @node_teids: array of node teids to be deleted
179  *
180  * This function remove nodes from hw
181  */
182 static enum ice_status
183 ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
184 		       u16 num_nodes, u32 *node_teids)
185 {
186 	struct ice_aqc_delete_elem *buf;
187 	u16 i, num_groups_removed = 0;
188 	enum ice_status status;
189 	u16 buf_size;
190 
191 	buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1);
192 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
193 	if (!buf)
194 		return ICE_ERR_NO_MEMORY;
195 	buf->hdr.parent_teid = parent->info.node_teid;
196 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
197 	for (i = 0; i < num_nodes; i++)
198 		buf->teid[i] = cpu_to_le32(node_teids[i]);
199 	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
200 					   &num_groups_removed, NULL);
201 	if (status || num_groups_removed != 1)
202 		ice_debug(hw, ICE_DBG_SCHED, "remove elements failed\n");
203 	devm_kfree(ice_hw_to_dev(hw), buf);
204 	return status;
205 }
206 
207 /**
208  * ice_sched_get_first_node - get the first node of the given layer
209  * @hw: pointer to the hw struct
210  * @parent: pointer the base node of the subtree
211  * @layer: layer number
212  *
213  * This function retrieves the first node of the given layer from the subtree
214  */
215 static struct ice_sched_node *
216 ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent,
217 			 u8 layer)
218 {
219 	u8 i;
220 
221 	if (layer < hw->sw_entry_point_layer)
222 		return NULL;
223 	for (i = 0; i < parent->num_children; i++) {
224 		struct ice_sched_node *node = parent->children[i];
225 
226 		if (node) {
227 			if (node->tx_sched_layer == layer)
228 				return node;
229 			/* this recursion is intentional, and wouldn't
230 			 * go more than 9 calls
231 			 */
232 			return ice_sched_get_first_node(hw, node, layer);
233 		}
234 	}
235 	return NULL;
236 }
237 
238 /**
239  * ice_sched_get_tc_node - get pointer to TC node
240  * @pi: port information structure
241  * @tc: TC number
242  *
243  * This function returns the TC node pointer
244  */
245 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
246 {
247 	u8 i;
248 
249 	if (!pi)
250 		return NULL;
251 	for (i = 0; i < pi->root->num_children; i++)
252 		if (pi->root->children[i]->tc_num == tc)
253 			return pi->root->children[i];
254 	return NULL;
255 }
256 
257 /**
258  * ice_free_sched_node - Free a Tx scheduler node from SW DB
259  * @pi: port information structure
260  * @node: pointer to the ice_sched_node struct
261  *
262  * This function frees up a node from SW DB as well as from HW
263  *
264  * This function needs to be called with the port_info->sched_lock held
265  */
266 void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
267 {
268 	struct ice_sched_node *parent;
269 	struct ice_hw *hw = pi->hw;
270 	u8 i, j;
271 
272 	/* Free the children before freeing up the parent node
273 	 * The parent array is updated below and that shifts the nodes
274 	 * in the array. So always pick the first child if num children > 0
275 	 */
276 	while (node->num_children)
277 		ice_free_sched_node(pi, node->children[0]);
278 
279 	/* Leaf, TC and root nodes can't be deleted by SW */
280 	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
281 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
282 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
283 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
284 		u32 teid = le32_to_cpu(node->info.node_teid);
285 		enum ice_status status;
286 
287 		status = ice_sched_remove_elems(hw, node->parent, 1, &teid);
288 		if (status)
289 			ice_debug(hw, ICE_DBG_SCHED,
290 				  "remove element failed %d\n", status);
291 	}
292 	parent = node->parent;
293 	/* root has no parent */
294 	if (parent) {
295 		struct ice_sched_node *p, *tc_node;
296 
297 		/* update the parent */
298 		for (i = 0; i < parent->num_children; i++)
299 			if (parent->children[i] == node) {
300 				for (j = i + 1; j < parent->num_children; j++)
301 					parent->children[j - 1] =
302 						parent->children[j];
303 				parent->num_children--;
304 				break;
305 			}
306 
307 		/* search for previous sibling that points to this node and
308 		 * remove the reference
309 		 */
310 		tc_node = ice_sched_get_tc_node(pi, node->tc_num);
311 		if (!tc_node) {
312 			ice_debug(hw, ICE_DBG_SCHED,
313 				  "Invalid TC number %d\n", node->tc_num);
314 			goto err_exit;
315 		}
316 		p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer);
317 		while (p) {
318 			if (p->sibling == node) {
319 				p->sibling = node->sibling;
320 				break;
321 			}
322 			p = p->sibling;
323 		}
324 	}
325 err_exit:
326 	/* leaf nodes have no children */
327 	if (node->children)
328 		devm_kfree(ice_hw_to_dev(hw), node->children);
329 	devm_kfree(ice_hw_to_dev(hw), node);
330 }
331 
332 /**
333  * ice_aq_get_dflt_topo - gets default scheduler topology
334  * @hw: pointer to the hw struct
335  * @lport: logical port number
336  * @buf: pointer to buffer
337  * @buf_size: buffer size in bytes
338  * @num_branches: returns total number of queue to port branches
339  * @cd: pointer to command details structure or NULL
340  *
341  * Get default scheduler topology (0x400)
342  */
343 static enum ice_status
344 ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
345 		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
346 		     u8 *num_branches, struct ice_sq_cd *cd)
347 {
348 	struct ice_aqc_get_topo *cmd;
349 	struct ice_aq_desc desc;
350 	enum ice_status status;
351 
352 	cmd = &desc.params.get_topo;
353 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
354 	cmd->port_num = lport;
355 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
356 	if (!status && num_branches)
357 		*num_branches = cmd->num_branches;
358 
359 	return status;
360 }
361 
362 /**
363  * ice_aq_add_sched_elems - adds scheduling element
364  * @hw: pointer to the hw struct
365  * @grps_req: the number of groups that are requested to be added
366  * @buf: pointer to buffer
367  * @buf_size: buffer size in bytes
368  * @grps_added: returns total number of groups added
369  * @cd: pointer to command details structure or NULL
370  *
371  * Add scheduling elements (0x0401)
372  */
373 static enum ice_status
374 ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
375 		       struct ice_aqc_add_elem *buf, u16 buf_size,
376 		       u16 *grps_added, struct ice_sq_cd *cd)
377 {
378 	struct ice_aqc_add_move_delete_elem *cmd;
379 	struct ice_aq_desc desc;
380 	enum ice_status status;
381 
382 	cmd = &desc.params.add_move_delete_elem;
383 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_sched_elems);
384 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
385 
386 	cmd->num_grps_req = cpu_to_le16(grps_req);
387 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
388 	if (!status && grps_added)
389 		*grps_added = le16_to_cpu(cmd->num_grps_updated);
390 
391 	return status;
392 }
393 
394 /**
395  * ice_suspend_resume_elems - suspend/resume scheduler elements
396  * @hw: pointer to the hw struct
397  * @elems_req: number of elements to suspend
398  * @buf: pointer to buffer
399  * @buf_size: buffer size in bytes
400  * @elems_ret: returns total number of elements suspended
401  * @cd: pointer to command details structure or NULL
402  * @cmd_code: command code for suspend or resume
403  *
404  * suspend/resume scheduler elements
405  */
406 static enum ice_status
407 ice_suspend_resume_elems(struct ice_hw *hw, u16 elems_req,
408 			 struct ice_aqc_suspend_resume_elem *buf, u16 buf_size,
409 			 u16 *elems_ret, struct ice_sq_cd *cd,
410 			 enum ice_adminq_opc cmd_code)
411 {
412 	struct ice_aqc_get_cfg_elem *cmd;
413 	struct ice_aq_desc desc;
414 	enum ice_status status;
415 
416 	cmd = &desc.params.get_update_elem;
417 	ice_fill_dflt_direct_cmd_desc(&desc, cmd_code);
418 	cmd->num_elem_req = cpu_to_le16(elems_req);
419 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
420 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
421 	if (!status && elems_ret)
422 		*elems_ret = le16_to_cpu(cmd->num_elem_resp);
423 	return status;
424 }
425 
426 /**
427  * ice_aq_suspend_sched_elems - suspend scheduler elements
428  * @hw: pointer to the hw struct
429  * @elems_req: number of elements to suspend
430  * @buf: pointer to buffer
431  * @buf_size: buffer size in bytes
432  * @elems_ret: returns total number of elements suspended
433  * @cd: pointer to command details structure or NULL
434  *
435  * Suspend scheduling elements (0x0409)
436  */
437 static enum ice_status
438 ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req,
439 			   struct ice_aqc_suspend_resume_elem *buf,
440 			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
441 {
442 	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
443 					cd, ice_aqc_opc_suspend_sched_elems);
444 }
445 
446 /**
447  * ice_aq_resume_sched_elems - resume scheduler elements
448  * @hw: pointer to the hw struct
449  * @elems_req: number of elements to resume
450  * @buf: pointer to buffer
451  * @buf_size: buffer size in bytes
452  * @elems_ret: returns total number of elements resumed
453  * @cd: pointer to command details structure or NULL
454  *
455  * resume scheduling elements (0x040A)
456  */
457 static enum ice_status
458 ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req,
459 			  struct ice_aqc_suspend_resume_elem *buf,
460 			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
461 {
462 	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
463 					cd, ice_aqc_opc_resume_sched_elems);
464 }
465 
466 /**
467  * ice_aq_query_sched_res - query scheduler resource
468  * @hw: pointer to the hw struct
469  * @buf_size: buffer size in bytes
470  * @buf: pointer to buffer
471  * @cd: pointer to command details structure or NULL
472  *
473  * Query scheduler resource allocation (0x0412)
474  */
475 static enum ice_status
476 ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
477 		       struct ice_aqc_query_txsched_res_resp *buf,
478 		       struct ice_sq_cd *cd)
479 {
480 	struct ice_aq_desc desc;
481 
482 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
483 	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
484 }
485 
486 /**
487  * ice_sched_suspend_resume_elems - suspend or resume hw nodes
488  * @hw: pointer to the hw struct
489  * @num_nodes: number of nodes
490  * @node_teids: array of node teids to be suspended or resumed
491  * @suspend: true means suspend / false means resume
492  *
493  * This function suspends or resumes hw nodes
494  */
495 static enum ice_status
496 ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
497 			       bool suspend)
498 {
499 	struct ice_aqc_suspend_resume_elem *buf;
500 	u16 i, buf_size, num_elem_ret = 0;
501 	enum ice_status status;
502 
503 	buf_size = sizeof(*buf) * num_nodes;
504 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
505 	if (!buf)
506 		return ICE_ERR_NO_MEMORY;
507 
508 	for (i = 0; i < num_nodes; i++)
509 		buf->teid[i] = cpu_to_le32(node_teids[i]);
510 
511 	if (suspend)
512 		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
513 						    buf_size, &num_elem_ret,
514 						    NULL);
515 	else
516 		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
517 						   buf_size, &num_elem_ret,
518 						   NULL);
519 	if (status || num_elem_ret != num_nodes)
520 		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
521 
522 	devm_kfree(ice_hw_to_dev(hw), buf);
523 	return status;
524 }
525 
526 /**
527  * ice_sched_clear_tx_topo - clears the schduler tree nodes
528  * @pi: port information structure
529  *
530  * This function removes all the nodes from HW as well as from SW DB.
531  */
532 static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
533 {
534 	struct ice_sched_agg_info *agg_info;
535 	struct ice_sched_vsi_info *vsi_elem;
536 	struct ice_sched_agg_info *atmp;
537 	struct ice_sched_vsi_info *tmp;
538 	struct ice_hw *hw;
539 
540 	if (!pi)
541 		return;
542 
543 	hw = pi->hw;
544 
545 	list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) {
546 		struct ice_sched_agg_vsi_info *agg_vsi_info;
547 		struct ice_sched_agg_vsi_info *vtmp;
548 
549 		list_for_each_entry_safe(agg_vsi_info, vtmp,
550 					 &agg_info->agg_vsi_list, list_entry) {
551 			list_del(&agg_vsi_info->list_entry);
552 			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
553 		}
554 	}
555 
556 	/* remove the vsi list */
557 	list_for_each_entry_safe(vsi_elem, tmp, &pi->vsi_info_list,
558 				 list_entry) {
559 		list_del(&vsi_elem->list_entry);
560 		devm_kfree(ice_hw_to_dev(hw), vsi_elem);
561 	}
562 
563 	if (pi->root) {
564 		ice_free_sched_node(pi, pi->root);
565 		pi->root = NULL;
566 	}
567 }
568 
569 /**
570  * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
571  * @pi: port information structure
572  *
573  * Cleanup scheduling elements from SW DB
574  */
575 static void ice_sched_clear_port(struct ice_port_info *pi)
576 {
577 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
578 		return;
579 
580 	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
581 	mutex_lock(&pi->sched_lock);
582 	ice_sched_clear_tx_topo(pi);
583 	mutex_unlock(&pi->sched_lock);
584 	mutex_destroy(&pi->sched_lock);
585 }
586 
587 /**
588  * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
589  * @hw: pointer to the hw struct
590  *
591  * Cleanup scheduling elements from SW DB for all the ports
592  */
593 void ice_sched_cleanup_all(struct ice_hw *hw)
594 {
595 	if (!hw || !hw->port_info)
596 		return;
597 
598 	if (hw->layer_info)
599 		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
600 
601 	ice_sched_clear_port(hw->port_info);
602 
603 	hw->num_tx_sched_layers = 0;
604 	hw->num_tx_sched_phys_layers = 0;
605 	hw->flattened_layers = 0;
606 	hw->max_cgds = 0;
607 }
608 
609 /**
610  * ice_sched_create_vsi_info_entry - create an empty new VSI entry
611  * @pi: port information structure
612  * @vsi_id: VSI Id
613  *
614  * This function creates a new VSI entry and adds it to list
615  */
616 static struct ice_sched_vsi_info *
617 ice_sched_create_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id)
618 {
619 	struct ice_sched_vsi_info *vsi_elem;
620 
621 	if (!pi)
622 		return NULL;
623 
624 	vsi_elem = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*vsi_elem),
625 				GFP_KERNEL);
626 	if (!vsi_elem)
627 		return NULL;
628 
629 	list_add(&vsi_elem->list_entry, &pi->vsi_info_list);
630 	vsi_elem->vsi_id = vsi_id;
631 	return vsi_elem;
632 }
633 
634 /**
635  * ice_sched_add_elems - add nodes to hw and SW DB
636  * @pi: port information structure
637  * @tc_node: pointer to the branch node
638  * @parent: pointer to the parent node
639  * @layer: layer number to add nodes
640  * @num_nodes: number of nodes
641  * @num_nodes_added: pointer to num nodes added
642  * @first_node_teid: if new nodes are added then return the teid of first node
643  *
644  * This function add nodes to hw as well as to SW DB for a given layer
645  */
646 static enum ice_status
647 ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
648 		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
649 		    u16 *num_nodes_added, u32 *first_node_teid)
650 {
651 	struct ice_sched_node *prev, *new_node;
652 	struct ice_aqc_add_elem *buf;
653 	u16 i, num_groups_added = 0;
654 	enum ice_status status = 0;
655 	struct ice_hw *hw = pi->hw;
656 	u16 buf_size;
657 	u32 teid;
658 
659 	buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
660 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
661 	if (!buf)
662 		return ICE_ERR_NO_MEMORY;
663 
664 	buf->hdr.parent_teid = parent->info.node_teid;
665 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
666 	for (i = 0; i < num_nodes; i++) {
667 		buf->generic[i].parent_teid = parent->info.node_teid;
668 		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
669 		buf->generic[i].data.valid_sections =
670 			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
671 			ICE_AQC_ELEM_VALID_EIR;
672 		buf->generic[i].data.generic = 0;
673 		buf->generic[i].data.cir_bw.bw_profile_idx =
674 			ICE_SCHED_DFLT_RL_PROF_ID;
675 		buf->generic[i].data.eir_bw.bw_profile_idx =
676 			ICE_SCHED_DFLT_RL_PROF_ID;
677 	}
678 
679 	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
680 					&num_groups_added, NULL);
681 	if (status || num_groups_added != 1) {
682 		ice_debug(hw, ICE_DBG_SCHED, "add elements failed\n");
683 		devm_kfree(ice_hw_to_dev(hw), buf);
684 		return ICE_ERR_CFG;
685 	}
686 
687 	*num_nodes_added = num_nodes;
688 	/* add nodes to the SW DB */
689 	for (i = 0; i < num_nodes; i++) {
690 		status = ice_sched_add_node(pi, layer, &buf->generic[i]);
691 		if (status) {
692 			ice_debug(hw, ICE_DBG_SCHED,
693 				  "add nodes in SW DB failed status =%d\n",
694 				  status);
695 			break;
696 		}
697 
698 		teid = le32_to_cpu(buf->generic[i].node_teid);
699 		new_node = ice_sched_find_node_by_teid(parent, teid);
700 
701 		if (!new_node) {
702 			ice_debug(hw, ICE_DBG_SCHED,
703 				  "Node is missing for teid =%d\n", teid);
704 			break;
705 		}
706 
707 		new_node->sibling = NULL;
708 		new_node->tc_num = tc_node->tc_num;
709 
710 		/* add it to previous node sibling pointer */
711 		/* Note: siblings are not linked across branches */
712 		prev = ice_sched_get_first_node(hw, tc_node, layer);
713 
714 		if (prev && prev != new_node) {
715 			while (prev->sibling)
716 				prev = prev->sibling;
717 			prev->sibling = new_node;
718 		}
719 
720 		if (i == 0)
721 			*first_node_teid = teid;
722 	}
723 
724 	devm_kfree(ice_hw_to_dev(hw), buf);
725 	return status;
726 }
727 
728 /**
729  * ice_sched_add_nodes_to_layer - Add nodes to a given layer
730  * @pi: port information structure
731  * @tc_node: pointer to TC node
732  * @parent: pointer to parent node
733  * @layer: layer number to add nodes
734  * @num_nodes: number of nodes to be added
735  * @first_node_teid: pointer to the first node teid
736  * @num_nodes_added: pointer to number of nodes added
737  *
738  * This function add nodes to a given layer.
739  */
740 static enum ice_status
741 ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
742 			     struct ice_sched_node *tc_node,
743 			     struct ice_sched_node *parent, u8 layer,
744 			     u16 num_nodes, u32 *first_node_teid,
745 			     u16 *num_nodes_added)
746 {
747 	u32 *first_teid_ptr = first_node_teid;
748 	u16 new_num_nodes, max_child_nodes;
749 	enum ice_status status = 0;
750 	struct ice_hw *hw = pi->hw;
751 	u16 num_added = 0;
752 	u32 temp;
753 
754 	*num_nodes_added = 0;
755 
756 	if (!num_nodes)
757 		return status;
758 
759 	if (!parent || layer < hw->sw_entry_point_layer)
760 		return ICE_ERR_PARAM;
761 
762 	/* max children per node per layer */
763 	max_child_nodes =
764 	    le16_to_cpu(hw->layer_info[parent->tx_sched_layer].max_children);
765 
766 	/* current number of children + required nodes exceed max children ? */
767 	if ((parent->num_children + num_nodes) > max_child_nodes) {
768 		/* Fail if the parent is a TC node */
769 		if (parent == tc_node)
770 			return ICE_ERR_CFG;
771 
772 		/* utilize all the spaces if the parent is not full */
773 		if (parent->num_children < max_child_nodes) {
774 			new_num_nodes = max_child_nodes - parent->num_children;
775 			/* this recursion is intentional, and wouldn't
776 			 * go more than 2 calls
777 			 */
778 			status = ice_sched_add_nodes_to_layer(pi, tc_node,
779 							      parent, layer,
780 							      new_num_nodes,
781 							      first_node_teid,
782 							      &num_added);
783 			if (status)
784 				return status;
785 
786 			*num_nodes_added += num_added;
787 		}
788 		/* Don't modify the first node teid memory if the first node was
789 		 * added already in the above call. Instead send some temp
790 		 * memory for all other recursive calls.
791 		 */
792 		if (num_added)
793 			first_teid_ptr = &temp;
794 
795 		new_num_nodes = num_nodes - num_added;
796 
797 		/* This parent is full, try the next sibling */
798 		parent = parent->sibling;
799 
800 		/* this recursion is intentional, for 1024 queues
801 		 * per VSI, it goes max of 16 iterations.
802 		 * 1024 / 8 = 128 layer 8 nodes
803 		 * 128 /8 = 16 (add 8 nodes per iteration)
804 		 */
805 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
806 						      layer, new_num_nodes,
807 						      first_teid_ptr,
808 						      &num_added);
809 		*num_nodes_added += num_added;
810 		return status;
811 	}
812 
813 	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
814 				     num_nodes_added, first_node_teid);
815 	return status;
816 }
817 
818 /**
819  * ice_sched_get_qgrp_layer - get the current queue group layer number
820  * @hw: pointer to the hw struct
821  *
822  * This function returns the current queue group layer number
823  */
824 static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
825 {
826 	/* It's always total layers - 1, the array is 0 relative so -2 */
827 	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
828 }
829 
830 /**
831  * ice_sched_get_vsi_layer - get the current VSI layer number
832  * @hw: pointer to the hw struct
833  *
834  * This function returns the current VSI layer number
835  */
836 static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
837 {
838 	/* Num Layers       VSI layer
839 	 *     9               6
840 	 *     7               4
841 	 *     5 or less       sw_entry_point_layer
842 	 */
843 	/* calculate the vsi layer based on number of layers. */
844 	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
845 		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
846 
847 		if (layer > hw->sw_entry_point_layer)
848 			return layer;
849 	}
850 	return hw->sw_entry_point_layer;
851 }
852 
853 /**
854  * ice_sched_get_num_nodes_per_layer - Get the total number of nodes per layer
855  * @pi: pointer to the port info struct
856  * @layer: layer number
857  *
858  * This function calculates the number of nodes present in the scheduler tree
859  * including all the branches for a given layer
860  */
861 static u16
862 ice_sched_get_num_nodes_per_layer(struct ice_port_info *pi, u8 layer)
863 {
864 	struct ice_hw *hw;
865 	u16 num_nodes = 0;
866 	u8 i;
867 
868 	if (!pi)
869 		return num_nodes;
870 
871 	hw = pi->hw;
872 
873 	/* Calculate the number of nodes for all TCs */
874 	for (i = 0; i < pi->root->num_children; i++) {
875 		struct ice_sched_node *tc_node, *node;
876 
877 		tc_node = pi->root->children[i];
878 
879 		/* Get the first node */
880 		node = ice_sched_get_first_node(hw, tc_node, layer);
881 		if (!node)
882 			continue;
883 
884 		/* count the siblings */
885 		while (node) {
886 			num_nodes++;
887 			node = node->sibling;
888 		}
889 	}
890 
891 	return num_nodes;
892 }
893 
894 /**
895  * ice_sched_val_max_nodes - check max number of nodes reached or not
896  * @pi: port information structure
897  * @new_num_nodes_per_layer: pointer to the new number of nodes array
898  *
899  * This function checks whether the scheduler tree layers have enough space to
900  * add new nodes
901  */
902 static enum ice_status
903 ice_sched_validate_for_max_nodes(struct ice_port_info *pi,
904 				 u16 *new_num_nodes_per_layer)
905 {
906 	struct ice_hw *hw = pi->hw;
907 	u8 i, qg_layer;
908 	u16 num_nodes;
909 
910 	qg_layer = ice_sched_get_qgrp_layer(hw);
911 
912 	/* walk through all the layers from SW entry point to qgroup layer */
913 	for (i = hw->sw_entry_point_layer; i <= qg_layer; i++) {
914 		num_nodes = ice_sched_get_num_nodes_per_layer(pi, i);
915 		if (num_nodes + new_num_nodes_per_layer[i] >
916 		    le16_to_cpu(hw->layer_info[i].max_pf_nodes)) {
917 			ice_debug(hw, ICE_DBG_SCHED,
918 				  "max nodes reached for layer = %d\n", i);
919 			return ICE_ERR_CFG;
920 		}
921 	}
922 	return 0;
923 }
924 
925 /**
926  * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
927  * @pi: port information structure
928  *
929  * This function removes the leaf node that was created by the FW
930  * during initialization
931  */
932 static void
933 ice_rm_dflt_leaf_node(struct ice_port_info *pi)
934 {
935 	struct ice_sched_node *node;
936 
937 	node = pi->root;
938 	while (node) {
939 		if (!node->num_children)
940 			break;
941 		node = node->children[0];
942 	}
943 	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
944 		u32 teid = le32_to_cpu(node->info.node_teid);
945 		enum ice_status status;
946 
947 		/* remove the default leaf node */
948 		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
949 		if (!status)
950 			ice_free_sched_node(pi, node);
951 	}
952 }
953 
954 /**
955  * ice_sched_rm_dflt_nodes - free the default nodes in the tree
956  * @pi: port information structure
957  *
958  * This function frees all the nodes except root and TC that were created by
959  * the FW during initialization
960  */
961 static void
962 ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
963 {
964 	struct ice_sched_node *node;
965 
966 	ice_rm_dflt_leaf_node(pi);
967 
968 	/* remove the default nodes except TC and root nodes */
969 	node = pi->root;
970 	while (node) {
971 		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
972 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
973 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
974 			ice_free_sched_node(pi, node);
975 			break;
976 		}
977 
978 		if (!node->num_children)
979 			break;
980 		node = node->children[0];
981 	}
982 }
983 
984 /**
985  * ice_sched_init_port - Initialize scheduler by querying information from FW
986  * @pi: port info structure for the tree to cleanup
987  *
988  * This function is the initial call to find the total number of Tx scheduler
989  * resources, default topology created by firmware and storing the information
990  * in SW DB.
991  */
992 enum ice_status ice_sched_init_port(struct ice_port_info *pi)
993 {
994 	struct ice_aqc_get_topo_elem *buf;
995 	enum ice_status status;
996 	struct ice_hw *hw;
997 	u8 num_branches;
998 	u16 num_elems;
999 	u8 i, j;
1000 
1001 	if (!pi)
1002 		return ICE_ERR_PARAM;
1003 	hw = pi->hw;
1004 
1005 	/* Query the Default Topology from FW */
1006 	buf = devm_kcalloc(ice_hw_to_dev(hw), ICE_TXSCHED_MAX_BRANCHES,
1007 			   sizeof(*buf), GFP_KERNEL);
1008 	if (!buf)
1009 		return ICE_ERR_NO_MEMORY;
1010 
1011 	/* Query default scheduling tree topology */
1012 	status = ice_aq_get_dflt_topo(hw, pi->lport, buf,
1013 				      sizeof(*buf) * ICE_TXSCHED_MAX_BRANCHES,
1014 				      &num_branches, NULL);
1015 	if (status)
1016 		goto err_init_port;
1017 
1018 	/* num_branches should be between 1-8 */
1019 	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
1020 		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
1021 			  num_branches);
1022 		status = ICE_ERR_PARAM;
1023 		goto err_init_port;
1024 	}
1025 
1026 	/* get the number of elements on the default/first branch */
1027 	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
1028 
1029 	/* num_elems should always be between 1-9 */
1030 	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
1031 		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
1032 			  num_elems);
1033 		status = ICE_ERR_PARAM;
1034 		goto err_init_port;
1035 	}
1036 
1037 	/* If the last node is a leaf node then the index of the Q group
1038 	 * layer is two less than the number of elements.
1039 	 */
1040 	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
1041 	    ICE_AQC_ELEM_TYPE_LEAF)
1042 		pi->last_node_teid =
1043 			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
1044 	else
1045 		pi->last_node_teid =
1046 			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
1047 
1048 	/* Insert the Tx Sched root node */
1049 	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
1050 	if (status)
1051 		goto err_init_port;
1052 
1053 	/* Parse the default tree and cache the information */
1054 	for (i = 0; i < num_branches; i++) {
1055 		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
1056 
1057 		/* Skip root element as already inserted */
1058 		for (j = 1; j < num_elems; j++) {
1059 			/* update the sw entry point */
1060 			if (buf[0].generic[j].data.elem_type ==
1061 			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
1062 				hw->sw_entry_point_layer = j;
1063 
1064 			status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
1065 			if (status)
1066 				goto err_init_port;
1067 		}
1068 	}
1069 
1070 	/* Remove the default nodes. */
1071 	if (pi->root)
1072 		ice_sched_rm_dflt_nodes(pi);
1073 
1074 	/* initialize the port for handling the scheduler tree */
1075 	pi->port_state = ICE_SCHED_PORT_STATE_READY;
1076 	mutex_init(&pi->sched_lock);
1077 	INIT_LIST_HEAD(&pi->agg_list);
1078 	INIT_LIST_HEAD(&pi->vsi_info_list);
1079 
1080 err_init_port:
1081 	if (status && pi->root) {
1082 		ice_free_sched_node(pi, pi->root);
1083 		pi->root = NULL;
1084 	}
1085 
1086 	devm_kfree(ice_hw_to_dev(hw), buf);
1087 	return status;
1088 }
1089 
1090 /**
1091  * ice_sched_query_res_alloc - query the FW for num of logical sched layers
1092  * @hw: pointer to the HW struct
1093  *
1094  * query FW for allocated scheduler resources and store in HW struct
1095  */
1096 enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
1097 {
1098 	struct ice_aqc_query_txsched_res_resp *buf;
1099 	enum ice_status status = 0;
1100 
1101 	if (hw->layer_info)
1102 		return status;
1103 
1104 	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
1105 	if (!buf)
1106 		return ICE_ERR_NO_MEMORY;
1107 
1108 	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
1109 	if (status)
1110 		goto sched_query_out;
1111 
1112 	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
1113 	hw->num_tx_sched_phys_layers =
1114 		le16_to_cpu(buf->sched_props.phys_levels);
1115 	hw->flattened_layers = buf->sched_props.flattening_bitmap;
1116 	hw->max_cgds = buf->sched_props.max_pf_cgds;
1117 
1118 	 hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
1119 				       (hw->num_tx_sched_layers *
1120 					sizeof(*hw->layer_info)),
1121 				       GFP_KERNEL);
1122 	if (!hw->layer_info) {
1123 		status = ICE_ERR_NO_MEMORY;
1124 		goto sched_query_out;
1125 	}
1126 
1127 sched_query_out:
1128 	devm_kfree(ice_hw_to_dev(hw), buf);
1129 	return status;
1130 }
1131 
1132 /**
1133  * ice_sched_get_vsi_info_entry - Get the vsi entry list for given vsi_id
1134  * @pi: port information structure
1135  * @vsi_id: vsi id
1136  *
1137  * This function retrieves the vsi list for the given vsi id
1138  */
1139 static struct ice_sched_vsi_info *
1140 ice_sched_get_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id)
1141 {
1142 	struct ice_sched_vsi_info *list_elem;
1143 
1144 	if (!pi)
1145 		return NULL;
1146 
1147 	list_for_each_entry(list_elem, &pi->vsi_info_list, list_entry)
1148 		if (list_elem->vsi_id == vsi_id)
1149 			return list_elem;
1150 	return NULL;
1151 }
1152 
1153 /**
1154  * ice_sched_find_node_in_subtree - Find node in part of base node subtree
1155  * @hw: pointer to the hw struct
1156  * @base: pointer to the base node
1157  * @node: pointer to the node to search
1158  *
1159  * This function checks whether a given node is part of the base node
1160  * subtree or not
1161  */
1162 static bool
1163 ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
1164 			       struct ice_sched_node *node)
1165 {
1166 	u8 i;
1167 
1168 	for (i = 0; i < base->num_children; i++) {
1169 		struct ice_sched_node *child = base->children[i];
1170 
1171 		if (node == child)
1172 			return true;
1173 
1174 		if (child->tx_sched_layer > node->tx_sched_layer)
1175 			return false;
1176 
1177 		/* this recursion is intentional, and wouldn't
1178 		 * go more than 8 calls
1179 		 */
1180 		if (ice_sched_find_node_in_subtree(hw, child, node))
1181 			return true;
1182 	}
1183 	return false;
1184 }
1185 
1186 /**
1187  * ice_sched_get_free_qparent - Get a free lan or rdma q group node
1188  * @pi: port information structure
1189  * @vsi_id: vsi id
1190  * @tc: branch number
1191  * @owner: lan or rdma
1192  *
1193  * This function retrieves a free lan or rdma q group node
1194  */
1195 struct ice_sched_node *
1196 ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
1197 			   u8 owner)
1198 {
1199 	struct ice_sched_node *vsi_node, *qgrp_node = NULL;
1200 	struct ice_sched_vsi_info *list_elem;
1201 	u16 max_children;
1202 	u8 qgrp_layer;
1203 
1204 	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
1205 	max_children = le16_to_cpu(pi->hw->layer_info[qgrp_layer].max_children);
1206 
1207 	list_elem = ice_sched_get_vsi_info_entry(pi, vsi_id);
1208 	if (!list_elem)
1209 		goto lan_q_exit;
1210 
1211 	vsi_node = list_elem->vsi_node[tc];
1212 
1213 	/* validate invalid VSI id */
1214 	if (!vsi_node)
1215 		goto lan_q_exit;
1216 
1217 	/* get the first q group node from VSI sub-tree */
1218 	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
1219 	while (qgrp_node) {
1220 		/* make sure the qgroup node is part of the VSI subtree */
1221 		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
1222 			if (qgrp_node->num_children < max_children &&
1223 			    qgrp_node->owner == owner)
1224 				break;
1225 		qgrp_node = qgrp_node->sibling;
1226 	}
1227 
1228 lan_q_exit:
1229 	return qgrp_node;
1230 }
1231 
1232 /**
1233  * ice_sched_get_vsi_node - Get a VSI node based on VSI id
1234  * @hw: pointer to the hw struct
1235  * @tc_node: pointer to the TC node
1236  * @vsi_id: VSI id
1237  *
1238  * This function retrieves a VSI node for a given VSI id from a given
1239  * TC branch
1240  */
1241 static struct ice_sched_node *
1242 ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
1243 		       u16 vsi_id)
1244 {
1245 	struct ice_sched_node *node;
1246 	u8 vsi_layer;
1247 
1248 	vsi_layer = ice_sched_get_vsi_layer(hw);
1249 	node = ice_sched_get_first_node(hw, tc_node, vsi_layer);
1250 
1251 	/* Check whether it already exists */
1252 	while (node) {
1253 		if (node->vsi_id == vsi_id)
1254 			return node;
1255 		node = node->sibling;
1256 	}
1257 
1258 	return node;
1259 }
1260 
1261 /**
1262  * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
1263  * @hw: pointer to the hw struct
1264  * @num_qs: number of queues
1265  * @num_nodes: num nodes array
1266  *
1267  * This function calculates the number of VSI child nodes based on the
1268  * number of queues.
1269  */
1270 static void
1271 ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
1272 {
1273 	u16 num = num_qs;
1274 	u8 i, qgl, vsil;
1275 
1276 	qgl = ice_sched_get_qgrp_layer(hw);
1277 	vsil = ice_sched_get_vsi_layer(hw);
1278 
1279 	/* calculate num nodes from q group to VSI layer */
1280 	for (i = qgl; i > vsil; i--) {
1281 		u16 max_children = le16_to_cpu(hw->layer_info[i].max_children);
1282 
1283 		/* round to the next integer if there is a remainder */
1284 		num = DIV_ROUND_UP(num, max_children);
1285 
1286 		/* need at least one node */
1287 		num_nodes[i] = num ? num : 1;
1288 	}
1289 }
1290 
1291 /**
1292  * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
1293  * @pi: port information structure
1294  * @vsi_id: VSI id
1295  * @tc_node: pointer to the TC node
1296  * @num_nodes: pointer to the num nodes that needs to be added per layer
1297  * @owner: node owner (lan or rdma)
1298  *
1299  * This function adds the VSI child nodes to tree. It gets called for
1300  * lan and rdma separately.
1301  */
1302 static enum ice_status
1303 ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id,
1304 			      struct ice_sched_node *tc_node, u16 *num_nodes,
1305 			      u8 owner)
1306 {
1307 	struct ice_sched_node *parent, *node;
1308 	struct ice_hw *hw = pi->hw;
1309 	enum ice_status status;
1310 	u32 first_node_teid;
1311 	u16 num_added = 0;
1312 	u8 i, qgl, vsil;
1313 
1314 	status = ice_sched_validate_for_max_nodes(pi, num_nodes);
1315 	if (status)
1316 		return status;
1317 
1318 	qgl = ice_sched_get_qgrp_layer(hw);
1319 	vsil = ice_sched_get_vsi_layer(hw);
1320 	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
1321 	for (i = vsil + 1; i <= qgl; i++) {
1322 		if (!parent)
1323 			return ICE_ERR_CFG;
1324 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
1325 						      num_nodes[i],
1326 						      &first_node_teid,
1327 						      &num_added);
1328 		if (status || num_nodes[i] != num_added)
1329 			return ICE_ERR_CFG;
1330 
1331 		/* The newly added node can be a new parent for the next
1332 		 * layer nodes
1333 		 */
1334 		if (num_added) {
1335 			parent = ice_sched_find_node_by_teid(tc_node,
1336 							     first_node_teid);
1337 			node = parent;
1338 			while (node) {
1339 				node->owner = owner;
1340 				node = node->sibling;
1341 			}
1342 		} else {
1343 			parent = parent->children[0];
1344 		}
1345 	}
1346 
1347 	return 0;
1348 }
1349 
1350 /**
1351  * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree
1352  * @pi: port information structure
1353  * @vsi_node: pointer to the VSI node
1354  * @num_nodes: pointer to the num nodes that needs to be removed per layer
1355  * @owner: node owner (lan or rdma)
1356  *
1357  * This function removes the VSI child nodes from the tree. It gets called for
1358  * lan and rdma separately.
1359  */
1360 static void
1361 ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi,
1362 			     struct ice_sched_node *vsi_node, u16 *num_nodes,
1363 			     u8 owner)
1364 {
1365 	struct ice_sched_node *node, *next;
1366 	u8 i, qgl, vsil;
1367 	u16 num;
1368 
1369 	qgl = ice_sched_get_qgrp_layer(pi->hw);
1370 	vsil = ice_sched_get_vsi_layer(pi->hw);
1371 
1372 	for (i = qgl; i > vsil; i--) {
1373 		num = num_nodes[i];
1374 		node = ice_sched_get_first_node(pi->hw, vsi_node, i);
1375 		while (node && num) {
1376 			next = node->sibling;
1377 			if (node->owner == owner && !node->num_children) {
1378 				ice_free_sched_node(pi, node);
1379 				num--;
1380 			}
1381 			node = next;
1382 		}
1383 	}
1384 }
1385 
1386 /**
1387  * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
1388  * @hw: pointer to the hw struct
1389  * @tc_node: pointer to TC node
1390  * @num_nodes: pointer to num nodes array
1391  *
1392  * This function calculates the number of supported nodes needed to add this
1393  * VSI into tx tree including the VSI, parent and intermediate nodes in below
1394  * layers
1395  */
1396 static void
1397 ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
1398 				 struct ice_sched_node *tc_node, u16 *num_nodes)
1399 {
1400 	struct ice_sched_node *node;
1401 	u16 max_child;
1402 	u8 i, vsil;
1403 
1404 	vsil = ice_sched_get_vsi_layer(hw);
1405 	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
1406 		/* Add intermediate nodes if TC has no children and
1407 		 * need at least one node for VSI
1408 		 */
1409 		if (!tc_node->num_children || i == vsil) {
1410 			num_nodes[i]++;
1411 		} else {
1412 			/* If intermediate nodes are reached max children
1413 			 * then add a new one.
1414 			 */
1415 			node = ice_sched_get_first_node(hw, tc_node, i);
1416 			max_child = le16_to_cpu(hw->layer_info[i].max_children);
1417 
1418 			/* scan all the siblings */
1419 			while (node) {
1420 				if (node->num_children < max_child)
1421 					break;
1422 				node = node->sibling;
1423 			}
1424 
1425 			/* all the nodes are full, allocate a new one */
1426 			if (!node)
1427 				num_nodes[i]++;
1428 		}
1429 }
1430 
1431 /**
1432  * ice_sched_add_vsi_support_nodes - add VSI supported nodes into tx tree
1433  * @pi: port information structure
1434  * @vsi_id: VSI Id
1435  * @tc_node: pointer to TC node
1436  * @num_nodes: pointer to num nodes array
1437  *
1438  * This function adds the VSI supported nodes into tx tree including the
1439  * VSI, its parent and intermediate nodes in below layers
1440  */
1441 static enum ice_status
1442 ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_id,
1443 				struct ice_sched_node *tc_node, u16 *num_nodes)
1444 {
1445 	struct ice_sched_node *parent = tc_node;
1446 	enum ice_status status;
1447 	u32 first_node_teid;
1448 	u16 num_added = 0;
1449 	u8 i, vsil;
1450 
1451 	if (!pi)
1452 		return ICE_ERR_PARAM;
1453 
1454 	status = ice_sched_validate_for_max_nodes(pi, num_nodes);
1455 	if (status)
1456 		return status;
1457 
1458 	vsil = ice_sched_get_vsi_layer(pi->hw);
1459 	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
1460 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
1461 						      i, num_nodes[i],
1462 						      &first_node_teid,
1463 						      &num_added);
1464 		if (status || num_nodes[i] != num_added)
1465 			return ICE_ERR_CFG;
1466 
1467 		/* The newly added node can be a new parent for the next
1468 		 * layer nodes
1469 		 */
1470 		if (num_added)
1471 			parent = ice_sched_find_node_by_teid(tc_node,
1472 							     first_node_teid);
1473 		else
1474 			parent = parent->children[0];
1475 
1476 		if (!parent)
1477 			return ICE_ERR_CFG;
1478 
1479 		if (i == vsil)
1480 			parent->vsi_id = vsi_id;
1481 	}
1482 	return 0;
1483 }
1484 
1485 /**
1486  * ice_sched_add_vsi_to_topo - add a new VSI into tree
1487  * @pi: port information structure
1488  * @vsi_id: VSI Id
1489  * @tc: TC number
1490  *
1491  * This function adds a new VSI into scheduler tree
1492  */
1493 static enum ice_status
1494 ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_id, u8 tc)
1495 {
1496 	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1497 	struct ice_sched_node *tc_node;
1498 	struct ice_hw *hw = pi->hw;
1499 
1500 	tc_node = ice_sched_get_tc_node(pi, tc);
1501 	if (!tc_node)
1502 		return ICE_ERR_PARAM;
1503 
1504 	/* calculate number of supported nodes needed for this VSI */
1505 	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
1506 
1507 	/* add vsi supported nodes to tc subtree */
1508 	return ice_sched_add_vsi_support_nodes(pi, vsi_id, tc_node, num_nodes);
1509 }
1510 
1511 /**
1512  * ice_sched_update_vsi_child_nodes - update VSI child nodes
1513  * @pi: port information structure
1514  * @vsi_id: VSI Id
1515  * @tc: TC number
1516  * @new_numqs: new number of max queues
1517  * @owner: owner of this subtree
1518  *
1519  * This function updates the VSI child nodes based on the number of queues
1520  */
1521 static enum ice_status
1522 ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc,
1523 				 u16 new_numqs, u8 owner)
1524 {
1525 	u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1526 	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1527 	struct ice_sched_node *vsi_node;
1528 	struct ice_sched_node *tc_node;
1529 	struct ice_sched_vsi_info *vsi;
1530 	enum ice_status status = 0;
1531 	struct ice_hw *hw = pi->hw;
1532 	u16 prev_numqs;
1533 	u8 i;
1534 
1535 	tc_node = ice_sched_get_tc_node(pi, tc);
1536 	if (!tc_node)
1537 		return ICE_ERR_CFG;
1538 
1539 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
1540 	if (!vsi_node)
1541 		return ICE_ERR_CFG;
1542 
1543 	vsi = ice_sched_get_vsi_info_entry(pi, vsi_id);
1544 	if (!vsi)
1545 		return ICE_ERR_CFG;
1546 
1547 	if (owner == ICE_SCHED_NODE_OWNER_LAN)
1548 		prev_numqs = vsi->max_lanq[tc];
1549 	else
1550 		return ICE_ERR_PARAM;
1551 
1552 	/* num queues are not changed */
1553 	if (prev_numqs == new_numqs)
1554 		return status;
1555 
1556 	/* calculate number of nodes based on prev/new number of qs */
1557 	if (prev_numqs)
1558 		ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes);
1559 
1560 	if (new_numqs)
1561 		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
1562 
1563 	if (prev_numqs > new_numqs) {
1564 		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
1565 			new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i];
1566 
1567 		ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes,
1568 					     owner);
1569 	} else {
1570 		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
1571 			new_num_nodes[i] -= prev_num_nodes[i];
1572 
1573 		status = ice_sched_add_vsi_child_nodes(pi, vsi_id, tc_node,
1574 						       new_num_nodes, owner);
1575 		if (status)
1576 			return status;
1577 	}
1578 
1579 	if (owner == ICE_SCHED_NODE_OWNER_LAN)
1580 		vsi->max_lanq[tc] = new_numqs;
1581 
1582 	return status;
1583 }
1584 
1585 /**
1586  * ice_sched_cfg_vsi - configure the new/exisiting VSI
1587  * @pi: port information structure
1588  * @vsi_id: VSI Id
1589  * @tc: TC number
1590  * @maxqs: max number of queues
1591  * @owner: lan or rdma
1592  * @enable: TC enabled or disabled
1593  *
1594  * This function adds/updates VSI nodes based on the number of queues. If TC is
1595  * enabled and VSI is in suspended state then resume the VSI back. If TC is
1596  * disabled then suspend the VSI if it is not already.
1597  */
1598 enum ice_status
1599 ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs,
1600 		  u8 owner, bool enable)
1601 {
1602 	struct ice_sched_node *vsi_node, *tc_node;
1603 	struct ice_sched_vsi_info *vsi;
1604 	enum ice_status status = 0;
1605 	struct ice_hw *hw = pi->hw;
1606 
1607 	tc_node = ice_sched_get_tc_node(pi, tc);
1608 	if (!tc_node)
1609 		return ICE_ERR_PARAM;
1610 
1611 	vsi = ice_sched_get_vsi_info_entry(pi, vsi_id);
1612 	if (!vsi)
1613 		vsi = ice_sched_create_vsi_info_entry(pi, vsi_id);
1614 	if (!vsi)
1615 		return ICE_ERR_NO_MEMORY;
1616 
1617 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
1618 
1619 	/* suspend the VSI if tc is not enabled */
1620 	if (!enable) {
1621 		if (vsi_node && vsi_node->in_use) {
1622 			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1623 
1624 			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
1625 								true);
1626 			if (!status)
1627 				vsi_node->in_use = false;
1628 		}
1629 		return status;
1630 	}
1631 
1632 	/* TC is enabled, if it is a new VSI then add it to the tree */
1633 	if (!vsi_node) {
1634 		status = ice_sched_add_vsi_to_topo(pi, vsi_id, tc);
1635 		if (status)
1636 			return status;
1637 		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
1638 		if (!vsi_node)
1639 			return ICE_ERR_CFG;
1640 		vsi->vsi_node[tc] = vsi_node;
1641 		vsi_node->in_use = true;
1642 	}
1643 
1644 	/* update the VSI child nodes */
1645 	status = ice_sched_update_vsi_child_nodes(pi, vsi_id, tc, maxqs, owner);
1646 	if (status)
1647 		return status;
1648 
1649 	/* TC is enabled, resume the VSI if it is in the suspend state */
1650 	if (!vsi_node->in_use) {
1651 		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1652 
1653 		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
1654 		if (!status)
1655 			vsi_node->in_use = true;
1656 	}
1657 
1658 	return status;
1659 }
1660