xref: /openbmc/linux/net/sched/sch_htb.c (revision 413d6ed3)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
4  *
5  * Authors:	Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *			HTB support at LARTC mailing list
10  *		Ondrej Kraus, <krauso@barr.cz>
11  *			found missing INIT_QDISC(htb)
12  *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *			helped a lot to locate nasty class stall bug
14  *		Andi Kleen, Jamal Hadi, Bert Hubert
15  *			code review and helpful comments on shaping
16  *		Tomasz Wrona, <tw@eter.tym.pl>
17  *			created test case so that I was able to fix nasty bug
18  *		Wilfried Weissmann
19  *			spotted bug in dequeue code and helped with fix
20  *		Jiri Fojtasek
21  *			fixed requeue routine
22  *		and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40 
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47 
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53 
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
56 
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60 
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64 
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68 
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71 	HTB_CANT_SEND,		/* class can't send and can't borrow */
72 	HTB_MAY_BORROW,		/* class can't send but may borrow */
73 	HTB_CAN_SEND		/* class can send */
74 };
75 
76 struct htb_prio {
77 	union {
78 		struct rb_root	row;
79 		struct rb_root	feed;
80 	};
81 	struct rb_node	*ptr;
82 	/* When class changes from state 1->2 and disconnects from
83 	 * parent's feed then we lost ptr value and start from the
84 	 * first child again. Here we store classid of the
85 	 * last valid ptr (used when ptr is NULL).
86 	 */
87 	u32		last_ptr_id;
88 };
89 
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95 	struct Qdisc_class_common common;
96 	struct psched_ratecfg	rate;
97 	struct psched_ratecfg	ceil;
98 	s64			buffer, cbuffer;/* token bucket depth/rate */
99 	s64			mbuffer;	/* max wait time */
100 	u32			prio;		/* these two are used only by leaves... */
101 	int			quantum;	/* but stored for parent-to-leaf return */
102 
103 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
104 	struct tcf_block	*block;
105 	int			filter_cnt;
106 
107 	int			level;		/* our level (see above) */
108 	unsigned int		children;
109 	struct htb_class	*parent;	/* parent class */
110 
111 	struct net_rate_estimator __rcu *rate_est;
112 
113 	/*
114 	 * Written often fields
115 	 */
116 	struct gnet_stats_basic_packed bstats;
117 	struct gnet_stats_basic_packed bstats_bias;
118 	struct tc_htb_xstats	xstats;	/* our special stats */
119 
120 	/* token bucket parameters */
121 	s64			tokens, ctokens;/* current number of tokens */
122 	s64			t_c;		/* checkpoint time */
123 
124 	union {
125 		struct htb_class_leaf {
126 			int		deficit[TC_HTB_MAXDEPTH];
127 			struct Qdisc	*q;
128 		} leaf;
129 		struct htb_class_inner {
130 			struct htb_prio clprio[TC_HTB_NUMPRIO];
131 		} inner;
132 	};
133 	s64			pq_key;
134 
135 	int			prio_activity;	/* for which prios are we active */
136 	enum htb_cmode		cmode;		/* current mode of the class */
137 	struct rb_node		pq_node;	/* node for event queue */
138 	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
139 
140 	unsigned int drops ____cacheline_aligned_in_smp;
141 	unsigned int		overlimits;
142 };
143 
144 struct htb_level {
145 	struct rb_root	wait_pq;
146 	struct htb_prio hprio[TC_HTB_NUMPRIO];
147 };
148 
149 struct htb_sched {
150 	struct Qdisc_class_hash clhash;
151 	int			defcls;		/* class where unclassified flows go to */
152 	int			rate2quantum;	/* quant = rate / rate2quantum */
153 
154 	/* filters for qdisc itself */
155 	struct tcf_proto __rcu	*filter_list;
156 	struct tcf_block	*block;
157 
158 #define HTB_WARN_TOOMANYEVENTS	0x1
159 	unsigned int		warned;	/* only one warning */
160 	int			direct_qlen;
161 	struct work_struct	work;
162 
163 	/* non shaped skbs; let them go directly thru */
164 	struct qdisc_skb_head	direct_queue;
165 	u32			direct_pkts;
166 	u32			overlimits;
167 
168 	struct qdisc_watchdog	watchdog;
169 
170 	s64			now;	/* cached dequeue time */
171 
172 	/* time of nearest event per level (row) */
173 	s64			near_ev_cache[TC_HTB_MAXDEPTH];
174 
175 	int			row_mask[TC_HTB_MAXDEPTH];
176 
177 	struct htb_level	hlevel[TC_HTB_MAXDEPTH];
178 
179 	struct Qdisc		**direct_qdiscs;
180 	unsigned int            num_direct_qdiscs;
181 
182 	bool			offload;
183 };
184 
185 /* find class in global hash table using given handle */
186 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187 {
188 	struct htb_sched *q = qdisc_priv(sch);
189 	struct Qdisc_class_common *clc;
190 
191 	clc = qdisc_class_find(&q->clhash, handle);
192 	if (clc == NULL)
193 		return NULL;
194 	return container_of(clc, struct htb_class, common);
195 }
196 
197 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198 {
199 	return (unsigned long)htb_find(handle, sch);
200 }
201 /**
202  * htb_classify - classify a packet into class
203  *
204  * It returns NULL if the packet should be dropped or -1 if the packet
205  * should be passed directly thru. In all other cases leaf class is returned.
206  * We allow direct class selection by classid in priority. The we examine
207  * filters in qdisc and in inner nodes (if higher filter points to the inner
208  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
209  * internal fifo (direct). These packets then go directly thru. If we still
210  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
211  * then finish and return direct queue.
212  */
213 #define HTB_DIRECT ((struct htb_class *)-1L)
214 
215 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216 				      int *qerr)
217 {
218 	struct htb_sched *q = qdisc_priv(sch);
219 	struct htb_class *cl;
220 	struct tcf_result res;
221 	struct tcf_proto *tcf;
222 	int result;
223 
224 	/* allow to select class by setting skb->priority to valid classid;
225 	 * note that nfmark can be used too by attaching filter fw with no
226 	 * rules in it
227 	 */
228 	if (skb->priority == sch->handle)
229 		return HTB_DIRECT;	/* X:0 (direct flow) selected */
230 	cl = htb_find(skb->priority, sch);
231 	if (cl) {
232 		if (cl->level == 0)
233 			return cl;
234 		/* Start with inner filter chain if a non-leaf class is selected */
235 		tcf = rcu_dereference_bh(cl->filter_list);
236 	} else {
237 		tcf = rcu_dereference_bh(q->filter_list);
238 	}
239 
240 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
241 	while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
242 #ifdef CONFIG_NET_CLS_ACT
243 		switch (result) {
244 		case TC_ACT_QUEUED:
245 		case TC_ACT_STOLEN:
246 		case TC_ACT_TRAP:
247 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
248 			fallthrough;
249 		case TC_ACT_SHOT:
250 			return NULL;
251 		}
252 #endif
253 		cl = (void *)res.class;
254 		if (!cl) {
255 			if (res.classid == sch->handle)
256 				return HTB_DIRECT;	/* X:0 (direct flow) */
257 			cl = htb_find(res.classid, sch);
258 			if (!cl)
259 				break;	/* filter selected invalid classid */
260 		}
261 		if (!cl->level)
262 			return cl;	/* we hit leaf; return it */
263 
264 		/* we have got inner class; apply inner filter chain */
265 		tcf = rcu_dereference_bh(cl->filter_list);
266 	}
267 	/* classification failed; try to use default class */
268 	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
269 	if (!cl || cl->level)
270 		return HTB_DIRECT;	/* bad default .. this is safe bet */
271 	return cl;
272 }
273 
274 /**
275  * htb_add_to_id_tree - adds class to the round robin list
276  *
277  * Routine adds class to the list (actually tree) sorted by classid.
278  * Make sure that class is not already on such list for given prio.
279  */
280 static void htb_add_to_id_tree(struct rb_root *root,
281 			       struct htb_class *cl, int prio)
282 {
283 	struct rb_node **p = &root->rb_node, *parent = NULL;
284 
285 	while (*p) {
286 		struct htb_class *c;
287 		parent = *p;
288 		c = rb_entry(parent, struct htb_class, node[prio]);
289 
290 		if (cl->common.classid > c->common.classid)
291 			p = &parent->rb_right;
292 		else
293 			p = &parent->rb_left;
294 	}
295 	rb_link_node(&cl->node[prio], parent, p);
296 	rb_insert_color(&cl->node[prio], root);
297 }
298 
299 /**
300  * htb_add_to_wait_tree - adds class to the event queue with delay
301  *
302  * The class is added to priority event queue to indicate that class will
303  * change its mode in cl->pq_key microseconds. Make sure that class is not
304  * already in the queue.
305  */
306 static void htb_add_to_wait_tree(struct htb_sched *q,
307 				 struct htb_class *cl, s64 delay)
308 {
309 	struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
310 
311 	cl->pq_key = q->now + delay;
312 	if (cl->pq_key == q->now)
313 		cl->pq_key++;
314 
315 	/* update the nearest event cache */
316 	if (q->near_ev_cache[cl->level] > cl->pq_key)
317 		q->near_ev_cache[cl->level] = cl->pq_key;
318 
319 	while (*p) {
320 		struct htb_class *c;
321 		parent = *p;
322 		c = rb_entry(parent, struct htb_class, pq_node);
323 		if (cl->pq_key >= c->pq_key)
324 			p = &parent->rb_right;
325 		else
326 			p = &parent->rb_left;
327 	}
328 	rb_link_node(&cl->pq_node, parent, p);
329 	rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
330 }
331 
332 /**
333  * htb_next_rb_node - finds next node in binary tree
334  *
335  * When we are past last key we return NULL.
336  * Average complexity is 2 steps per call.
337  */
338 static inline void htb_next_rb_node(struct rb_node **n)
339 {
340 	*n = rb_next(*n);
341 }
342 
343 /**
344  * htb_add_class_to_row - add class to its row
345  *
346  * The class is added to row at priorities marked in mask.
347  * It does nothing if mask == 0.
348  */
349 static inline void htb_add_class_to_row(struct htb_sched *q,
350 					struct htb_class *cl, int mask)
351 {
352 	q->row_mask[cl->level] |= mask;
353 	while (mask) {
354 		int prio = ffz(~mask);
355 		mask &= ~(1 << prio);
356 		htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
357 	}
358 }
359 
360 /* If this triggers, it is a bug in this code, but it need not be fatal */
361 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
362 {
363 	if (RB_EMPTY_NODE(rb)) {
364 		WARN_ON(1);
365 	} else {
366 		rb_erase(rb, root);
367 		RB_CLEAR_NODE(rb);
368 	}
369 }
370 
371 
372 /**
373  * htb_remove_class_from_row - removes class from its row
374  *
375  * The class is removed from row at priorities marked in mask.
376  * It does nothing if mask == 0.
377  */
378 static inline void htb_remove_class_from_row(struct htb_sched *q,
379 						 struct htb_class *cl, int mask)
380 {
381 	int m = 0;
382 	struct htb_level *hlevel = &q->hlevel[cl->level];
383 
384 	while (mask) {
385 		int prio = ffz(~mask);
386 		struct htb_prio *hprio = &hlevel->hprio[prio];
387 
388 		mask &= ~(1 << prio);
389 		if (hprio->ptr == cl->node + prio)
390 			htb_next_rb_node(&hprio->ptr);
391 
392 		htb_safe_rb_erase(cl->node + prio, &hprio->row);
393 		if (!hprio->row.rb_node)
394 			m |= 1 << prio;
395 	}
396 	q->row_mask[cl->level] &= ~m;
397 }
398 
399 /**
400  * htb_activate_prios - creates active classe's feed chain
401  *
402  * The class is connected to ancestors and/or appropriate rows
403  * for priorities it is participating on. cl->cmode must be new
404  * (activated) mode. It does nothing if cl->prio_activity == 0.
405  */
406 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
407 {
408 	struct htb_class *p = cl->parent;
409 	long m, mask = cl->prio_activity;
410 
411 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
412 		m = mask;
413 		while (m) {
414 			int prio = ffz(~m);
415 			m &= ~(1 << prio);
416 
417 			if (p->inner.clprio[prio].feed.rb_node)
418 				/* parent already has its feed in use so that
419 				 * reset bit in mask as parent is already ok
420 				 */
421 				mask &= ~(1 << prio);
422 
423 			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
424 		}
425 		p->prio_activity |= mask;
426 		cl = p;
427 		p = cl->parent;
428 
429 	}
430 	if (cl->cmode == HTB_CAN_SEND && mask)
431 		htb_add_class_to_row(q, cl, mask);
432 }
433 
434 /**
435  * htb_deactivate_prios - remove class from feed chain
436  *
437  * cl->cmode must represent old mode (before deactivation). It does
438  * nothing if cl->prio_activity == 0. Class is removed from all feed
439  * chains and rows.
440  */
441 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
442 {
443 	struct htb_class *p = cl->parent;
444 	long m, mask = cl->prio_activity;
445 
446 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
447 		m = mask;
448 		mask = 0;
449 		while (m) {
450 			int prio = ffz(~m);
451 			m &= ~(1 << prio);
452 
453 			if (p->inner.clprio[prio].ptr == cl->node + prio) {
454 				/* we are removing child which is pointed to from
455 				 * parent feed - forget the pointer but remember
456 				 * classid
457 				 */
458 				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
459 				p->inner.clprio[prio].ptr = NULL;
460 			}
461 
462 			htb_safe_rb_erase(cl->node + prio,
463 					  &p->inner.clprio[prio].feed);
464 
465 			if (!p->inner.clprio[prio].feed.rb_node)
466 				mask |= 1 << prio;
467 		}
468 
469 		p->prio_activity &= ~mask;
470 		cl = p;
471 		p = cl->parent;
472 
473 	}
474 	if (cl->cmode == HTB_CAN_SEND && mask)
475 		htb_remove_class_from_row(q, cl, mask);
476 }
477 
478 static inline s64 htb_lowater(const struct htb_class *cl)
479 {
480 	if (htb_hysteresis)
481 		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
482 	else
483 		return 0;
484 }
485 static inline s64 htb_hiwater(const struct htb_class *cl)
486 {
487 	if (htb_hysteresis)
488 		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
489 	else
490 		return 0;
491 }
492 
493 
494 /**
495  * htb_class_mode - computes and returns current class mode
496  *
497  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
498  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
499  * from now to time when cl will change its state.
500  * Also it is worth to note that class mode doesn't change simply
501  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
502  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
503  * mode transitions per time unit. The speed gain is about 1/6.
504  */
505 static inline enum htb_cmode
506 htb_class_mode(struct htb_class *cl, s64 *diff)
507 {
508 	s64 toks;
509 
510 	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
511 		*diff = -toks;
512 		return HTB_CANT_SEND;
513 	}
514 
515 	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
516 		return HTB_CAN_SEND;
517 
518 	*diff = -toks;
519 	return HTB_MAY_BORROW;
520 }
521 
522 /**
523  * htb_change_class_mode - changes classe's mode
524  *
525  * This should be the only way how to change classe's mode under normal
526  * cirsumstances. Routine will update feed lists linkage, change mode
527  * and add class to the wait event queue if appropriate. New mode should
528  * be different from old one and cl->pq_key has to be valid if changing
529  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
530  */
531 static void
532 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
533 {
534 	enum htb_cmode new_mode = htb_class_mode(cl, diff);
535 
536 	if (new_mode == cl->cmode)
537 		return;
538 
539 	if (new_mode == HTB_CANT_SEND) {
540 		cl->overlimits++;
541 		q->overlimits++;
542 	}
543 
544 	if (cl->prio_activity) {	/* not necessary: speed optimization */
545 		if (cl->cmode != HTB_CANT_SEND)
546 			htb_deactivate_prios(q, cl);
547 		cl->cmode = new_mode;
548 		if (new_mode != HTB_CANT_SEND)
549 			htb_activate_prios(q, cl);
550 	} else
551 		cl->cmode = new_mode;
552 }
553 
554 /**
555  * htb_activate - inserts leaf cl into appropriate active feeds
556  *
557  * Routine learns (new) priority of leaf and activates feed chain
558  * for the prio. It can be called on already active leaf safely.
559  * It also adds leaf into droplist.
560  */
561 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
562 {
563 	WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
564 
565 	if (!cl->prio_activity) {
566 		cl->prio_activity = 1 << cl->prio;
567 		htb_activate_prios(q, cl);
568 	}
569 }
570 
571 /**
572  * htb_deactivate - remove leaf cl from active feeds
573  *
574  * Make sure that leaf is active. In the other words it can't be called
575  * with non-active leaf. It also removes class from the drop list.
576  */
577 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
578 {
579 	WARN_ON(!cl->prio_activity);
580 
581 	htb_deactivate_prios(q, cl);
582 	cl->prio_activity = 0;
583 }
584 
585 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
586 		       struct sk_buff **to_free)
587 {
588 	int ret;
589 	unsigned int len = qdisc_pkt_len(skb);
590 	struct htb_sched *q = qdisc_priv(sch);
591 	struct htb_class *cl = htb_classify(skb, sch, &ret);
592 
593 	if (cl == HTB_DIRECT) {
594 		/* enqueue to helper queue */
595 		if (q->direct_queue.qlen < q->direct_qlen) {
596 			__qdisc_enqueue_tail(skb, &q->direct_queue);
597 			q->direct_pkts++;
598 		} else {
599 			return qdisc_drop(skb, sch, to_free);
600 		}
601 #ifdef CONFIG_NET_CLS_ACT
602 	} else if (!cl) {
603 		if (ret & __NET_XMIT_BYPASS)
604 			qdisc_qstats_drop(sch);
605 		__qdisc_drop(skb, to_free);
606 		return ret;
607 #endif
608 	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
609 					to_free)) != NET_XMIT_SUCCESS) {
610 		if (net_xmit_drop_count(ret)) {
611 			qdisc_qstats_drop(sch);
612 			cl->drops++;
613 		}
614 		return ret;
615 	} else {
616 		htb_activate(q, cl);
617 	}
618 
619 	sch->qstats.backlog += len;
620 	sch->q.qlen++;
621 	return NET_XMIT_SUCCESS;
622 }
623 
624 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
625 {
626 	s64 toks = diff + cl->tokens;
627 
628 	if (toks > cl->buffer)
629 		toks = cl->buffer;
630 	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
631 	if (toks <= -cl->mbuffer)
632 		toks = 1 - cl->mbuffer;
633 
634 	cl->tokens = toks;
635 }
636 
637 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
638 {
639 	s64 toks = diff + cl->ctokens;
640 
641 	if (toks > cl->cbuffer)
642 		toks = cl->cbuffer;
643 	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
644 	if (toks <= -cl->mbuffer)
645 		toks = 1 - cl->mbuffer;
646 
647 	cl->ctokens = toks;
648 }
649 
650 /**
651  * htb_charge_class - charges amount "bytes" to leaf and ancestors
652  *
653  * Routine assumes that packet "bytes" long was dequeued from leaf cl
654  * borrowing from "level". It accounts bytes to ceil leaky bucket for
655  * leaf and all ancestors and to rate bucket for ancestors at levels
656  * "level" and higher. It also handles possible change of mode resulting
657  * from the update. Note that mode can also increase here (MAY_BORROW to
658  * CAN_SEND) because we can use more precise clock that event queue here.
659  * In such case we remove class from event queue first.
660  */
661 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
662 			     int level, struct sk_buff *skb)
663 {
664 	int bytes = qdisc_pkt_len(skb);
665 	enum htb_cmode old_mode;
666 	s64 diff;
667 
668 	while (cl) {
669 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
670 		if (cl->level >= level) {
671 			if (cl->level == level)
672 				cl->xstats.lends++;
673 			htb_accnt_tokens(cl, bytes, diff);
674 		} else {
675 			cl->xstats.borrows++;
676 			cl->tokens += diff;	/* we moved t_c; update tokens */
677 		}
678 		htb_accnt_ctokens(cl, bytes, diff);
679 		cl->t_c = q->now;
680 
681 		old_mode = cl->cmode;
682 		diff = 0;
683 		htb_change_class_mode(q, cl, &diff);
684 		if (old_mode != cl->cmode) {
685 			if (old_mode != HTB_CAN_SEND)
686 				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
687 			if (cl->cmode != HTB_CAN_SEND)
688 				htb_add_to_wait_tree(q, cl, diff);
689 		}
690 
691 		/* update basic stats except for leaves which are already updated */
692 		if (cl->level)
693 			bstats_update(&cl->bstats, skb);
694 
695 		cl = cl->parent;
696 	}
697 }
698 
699 /**
700  * htb_do_events - make mode changes to classes at the level
701  *
702  * Scans event queue for pending events and applies them. Returns time of
703  * next pending event (0 for no event in pq, q->now for too many events).
704  * Note: Applied are events whose have cl->pq_key <= q->now.
705  */
706 static s64 htb_do_events(struct htb_sched *q, const int level,
707 			 unsigned long start)
708 {
709 	/* don't run for longer than 2 jiffies; 2 is used instead of
710 	 * 1 to simplify things when jiffy is going to be incremented
711 	 * too soon
712 	 */
713 	unsigned long stop_at = start + 2;
714 	struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
715 
716 	while (time_before(jiffies, stop_at)) {
717 		struct htb_class *cl;
718 		s64 diff;
719 		struct rb_node *p = rb_first(wait_pq);
720 
721 		if (!p)
722 			return 0;
723 
724 		cl = rb_entry(p, struct htb_class, pq_node);
725 		if (cl->pq_key > q->now)
726 			return cl->pq_key;
727 
728 		htb_safe_rb_erase(p, wait_pq);
729 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
730 		htb_change_class_mode(q, cl, &diff);
731 		if (cl->cmode != HTB_CAN_SEND)
732 			htb_add_to_wait_tree(q, cl, diff);
733 	}
734 
735 	/* too much load - let's continue after a break for scheduling */
736 	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
737 		pr_warn("htb: too many events!\n");
738 		q->warned |= HTB_WARN_TOOMANYEVENTS;
739 	}
740 
741 	return q->now;
742 }
743 
744 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
745  * is no such one exists.
746  */
747 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
748 					      u32 id)
749 {
750 	struct rb_node *r = NULL;
751 	while (n) {
752 		struct htb_class *cl =
753 		    rb_entry(n, struct htb_class, node[prio]);
754 
755 		if (id > cl->common.classid) {
756 			n = n->rb_right;
757 		} else if (id < cl->common.classid) {
758 			r = n;
759 			n = n->rb_left;
760 		} else {
761 			return n;
762 		}
763 	}
764 	return r;
765 }
766 
767 /**
768  * htb_lookup_leaf - returns next leaf class in DRR order
769  *
770  * Find leaf where current feed pointers points to.
771  */
772 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
773 {
774 	int i;
775 	struct {
776 		struct rb_node *root;
777 		struct rb_node **pptr;
778 		u32 *pid;
779 	} stk[TC_HTB_MAXDEPTH], *sp = stk;
780 
781 	BUG_ON(!hprio->row.rb_node);
782 	sp->root = hprio->row.rb_node;
783 	sp->pptr = &hprio->ptr;
784 	sp->pid = &hprio->last_ptr_id;
785 
786 	for (i = 0; i < 65535; i++) {
787 		if (!*sp->pptr && *sp->pid) {
788 			/* ptr was invalidated but id is valid - try to recover
789 			 * the original or next ptr
790 			 */
791 			*sp->pptr =
792 			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
793 		}
794 		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
795 				 * can become out of date quickly
796 				 */
797 		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
798 			*sp->pptr = sp->root;
799 			while ((*sp->pptr)->rb_left)
800 				*sp->pptr = (*sp->pptr)->rb_left;
801 			if (sp > stk) {
802 				sp--;
803 				if (!*sp->pptr) {
804 					WARN_ON(1);
805 					return NULL;
806 				}
807 				htb_next_rb_node(sp->pptr);
808 			}
809 		} else {
810 			struct htb_class *cl;
811 			struct htb_prio *clp;
812 
813 			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
814 			if (!cl->level)
815 				return cl;
816 			clp = &cl->inner.clprio[prio];
817 			(++sp)->root = clp->feed.rb_node;
818 			sp->pptr = &clp->ptr;
819 			sp->pid = &clp->last_ptr_id;
820 		}
821 	}
822 	WARN_ON(1);
823 	return NULL;
824 }
825 
826 /* dequeues packet at given priority and level; call only if
827  * you are sure that there is active class at prio/level
828  */
829 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
830 					const int level)
831 {
832 	struct sk_buff *skb = NULL;
833 	struct htb_class *cl, *start;
834 	struct htb_level *hlevel = &q->hlevel[level];
835 	struct htb_prio *hprio = &hlevel->hprio[prio];
836 
837 	/* look initial class up in the row */
838 	start = cl = htb_lookup_leaf(hprio, prio);
839 
840 	do {
841 next:
842 		if (unlikely(!cl))
843 			return NULL;
844 
845 		/* class can be empty - it is unlikely but can be true if leaf
846 		 * qdisc drops packets in enqueue routine or if someone used
847 		 * graft operation on the leaf since last dequeue;
848 		 * simply deactivate and skip such class
849 		 */
850 		if (unlikely(cl->leaf.q->q.qlen == 0)) {
851 			struct htb_class *next;
852 			htb_deactivate(q, cl);
853 
854 			/* row/level might become empty */
855 			if ((q->row_mask[level] & (1 << prio)) == 0)
856 				return NULL;
857 
858 			next = htb_lookup_leaf(hprio, prio);
859 
860 			if (cl == start)	/* fix start if we just deleted it */
861 				start = next;
862 			cl = next;
863 			goto next;
864 		}
865 
866 		skb = cl->leaf.q->dequeue(cl->leaf.q);
867 		if (likely(skb != NULL))
868 			break;
869 
870 		qdisc_warn_nonwc("htb", cl->leaf.q);
871 		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
872 					 &q->hlevel[0].hprio[prio].ptr);
873 		cl = htb_lookup_leaf(hprio, prio);
874 
875 	} while (cl != start);
876 
877 	if (likely(skb != NULL)) {
878 		bstats_update(&cl->bstats, skb);
879 		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
880 		if (cl->leaf.deficit[level] < 0) {
881 			cl->leaf.deficit[level] += cl->quantum;
882 			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
883 						 &q->hlevel[0].hprio[prio].ptr);
884 		}
885 		/* this used to be after charge_class but this constelation
886 		 * gives us slightly better performance
887 		 */
888 		if (!cl->leaf.q->q.qlen)
889 			htb_deactivate(q, cl);
890 		htb_charge_class(q, cl, level, skb);
891 	}
892 	return skb;
893 }
894 
895 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
896 {
897 	struct sk_buff *skb;
898 	struct htb_sched *q = qdisc_priv(sch);
899 	int level;
900 	s64 next_event;
901 	unsigned long start_at;
902 
903 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
904 	skb = __qdisc_dequeue_head(&q->direct_queue);
905 	if (skb != NULL) {
906 ok:
907 		qdisc_bstats_update(sch, skb);
908 		qdisc_qstats_backlog_dec(sch, skb);
909 		sch->q.qlen--;
910 		return skb;
911 	}
912 
913 	if (!sch->q.qlen)
914 		goto fin;
915 	q->now = ktime_get_ns();
916 	start_at = jiffies;
917 
918 	next_event = q->now + 5LLU * NSEC_PER_SEC;
919 
920 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
921 		/* common case optimization - skip event handler quickly */
922 		int m;
923 		s64 event = q->near_ev_cache[level];
924 
925 		if (q->now >= event) {
926 			event = htb_do_events(q, level, start_at);
927 			if (!event)
928 				event = q->now + NSEC_PER_SEC;
929 			q->near_ev_cache[level] = event;
930 		}
931 
932 		if (next_event > event)
933 			next_event = event;
934 
935 		m = ~q->row_mask[level];
936 		while (m != (int)(-1)) {
937 			int prio = ffz(m);
938 
939 			m |= 1 << prio;
940 			skb = htb_dequeue_tree(q, prio, level);
941 			if (likely(skb != NULL))
942 				goto ok;
943 		}
944 	}
945 	if (likely(next_event > q->now))
946 		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
947 	else
948 		schedule_work(&q->work);
949 fin:
950 	return skb;
951 }
952 
953 /* reset all classes */
954 /* always caled under BH & queue lock */
955 static void htb_reset(struct Qdisc *sch)
956 {
957 	struct htb_sched *q = qdisc_priv(sch);
958 	struct htb_class *cl;
959 	unsigned int i;
960 
961 	for (i = 0; i < q->clhash.hashsize; i++) {
962 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
963 			if (cl->level)
964 				memset(&cl->inner, 0, sizeof(cl->inner));
965 			else {
966 				if (cl->leaf.q && !q->offload)
967 					qdisc_reset(cl->leaf.q);
968 			}
969 			cl->prio_activity = 0;
970 			cl->cmode = HTB_CAN_SEND;
971 		}
972 	}
973 	qdisc_watchdog_cancel(&q->watchdog);
974 	__qdisc_reset_queue(&q->direct_queue);
975 	sch->q.qlen = 0;
976 	sch->qstats.backlog = 0;
977 	memset(q->hlevel, 0, sizeof(q->hlevel));
978 	memset(q->row_mask, 0, sizeof(q->row_mask));
979 }
980 
981 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
982 	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
983 	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
984 	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
985 	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
986 	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
987 	[TCA_HTB_RATE64] = { .type = NLA_U64 },
988 	[TCA_HTB_CEIL64] = { .type = NLA_U64 },
989 	[TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
990 };
991 
992 static void htb_work_func(struct work_struct *work)
993 {
994 	struct htb_sched *q = container_of(work, struct htb_sched, work);
995 	struct Qdisc *sch = q->watchdog.qdisc;
996 
997 	rcu_read_lock();
998 	__netif_schedule(qdisc_root(sch));
999 	rcu_read_unlock();
1000 }
1001 
1002 static void htb_set_lockdep_class_child(struct Qdisc *q)
1003 {
1004 	static struct lock_class_key child_key;
1005 
1006 	lockdep_set_class(qdisc_lock(q), &child_key);
1007 }
1008 
1009 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1010 {
1011 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1012 }
1013 
1014 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1015 		    struct netlink_ext_ack *extack)
1016 {
1017 	struct net_device *dev = qdisc_dev(sch);
1018 	struct tc_htb_qopt_offload offload_opt;
1019 	struct htb_sched *q = qdisc_priv(sch);
1020 	struct nlattr *tb[TCA_HTB_MAX + 1];
1021 	struct tc_htb_glob *gopt;
1022 	unsigned int ntx;
1023 	bool offload;
1024 	int err;
1025 
1026 	qdisc_watchdog_init(&q->watchdog, sch);
1027 	INIT_WORK(&q->work, htb_work_func);
1028 
1029 	if (!opt)
1030 		return -EINVAL;
1031 
1032 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1033 	if (err)
1034 		return err;
1035 
1036 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1037 					  NULL);
1038 	if (err < 0)
1039 		return err;
1040 
1041 	if (!tb[TCA_HTB_INIT])
1042 		return -EINVAL;
1043 
1044 	gopt = nla_data(tb[TCA_HTB_INIT]);
1045 	if (gopt->version != HTB_VER >> 16)
1046 		return -EINVAL;
1047 
1048 	offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1049 
1050 	if (offload) {
1051 		if (sch->parent != TC_H_ROOT)
1052 			return -EOPNOTSUPP;
1053 
1054 		if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1055 			return -EOPNOTSUPP;
1056 
1057 		q->num_direct_qdiscs = dev->real_num_tx_queues;
1058 		q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1059 					   sizeof(*q->direct_qdiscs),
1060 					   GFP_KERNEL);
1061 		if (!q->direct_qdiscs)
1062 			return -ENOMEM;
1063 	}
1064 
1065 	err = qdisc_class_hash_init(&q->clhash);
1066 	if (err < 0)
1067 		goto err_free_direct_qdiscs;
1068 
1069 	qdisc_skb_head_init(&q->direct_queue);
1070 
1071 	if (tb[TCA_HTB_DIRECT_QLEN])
1072 		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1073 	else
1074 		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1075 
1076 	if ((q->rate2quantum = gopt->rate2quantum) < 1)
1077 		q->rate2quantum = 1;
1078 	q->defcls = gopt->defcls;
1079 
1080 	if (!offload)
1081 		return 0;
1082 
1083 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1084 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1085 		struct Qdisc *qdisc;
1086 
1087 		qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1088 					  TC_H_MAKE(sch->handle, 0), extack);
1089 		if (!qdisc) {
1090 			err = -ENOMEM;
1091 			goto err_free_qdiscs;
1092 		}
1093 
1094 		htb_set_lockdep_class_child(qdisc);
1095 		q->direct_qdiscs[ntx] = qdisc;
1096 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1097 	}
1098 
1099 	sch->flags |= TCQ_F_MQROOT;
1100 
1101 	offload_opt = (struct tc_htb_qopt_offload) {
1102 		.command = TC_HTB_CREATE,
1103 		.parent_classid = TC_H_MAJ(sch->handle) >> 16,
1104 		.classid = TC_H_MIN(q->defcls),
1105 		.extack = extack,
1106 	};
1107 	err = htb_offload(dev, &offload_opt);
1108 	if (err)
1109 		goto err_free_qdiscs;
1110 
1111 	/* Defer this assignment, so that htb_destroy skips offload-related
1112 	 * parts (especially calling ndo_setup_tc) on errors.
1113 	 */
1114 	q->offload = true;
1115 
1116 	return 0;
1117 
1118 err_free_qdiscs:
1119 	for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1120 	     ntx++)
1121 		qdisc_put(q->direct_qdiscs[ntx]);
1122 
1123 	qdisc_class_hash_destroy(&q->clhash);
1124 	/* Prevent use-after-free and double-free when htb_destroy gets called.
1125 	 */
1126 	q->clhash.hash = NULL;
1127 	q->clhash.hashsize = 0;
1128 
1129 err_free_direct_qdiscs:
1130 	kfree(q->direct_qdiscs);
1131 	q->direct_qdiscs = NULL;
1132 	return err;
1133 }
1134 
1135 static void htb_attach_offload(struct Qdisc *sch)
1136 {
1137 	struct net_device *dev = qdisc_dev(sch);
1138 	struct htb_sched *q = qdisc_priv(sch);
1139 	unsigned int ntx;
1140 
1141 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1142 		struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1143 
1144 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1145 		qdisc_put(old);
1146 		qdisc_hash_add(qdisc, false);
1147 	}
1148 	for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1149 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1150 		struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1151 
1152 		qdisc_put(old);
1153 	}
1154 
1155 	kfree(q->direct_qdiscs);
1156 	q->direct_qdiscs = NULL;
1157 }
1158 
1159 static void htb_attach_software(struct Qdisc *sch)
1160 {
1161 	struct net_device *dev = qdisc_dev(sch);
1162 	unsigned int ntx;
1163 
1164 	/* Resemble qdisc_graft behavior. */
1165 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1166 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1167 		struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1168 
1169 		qdisc_refcount_inc(sch);
1170 
1171 		qdisc_put(old);
1172 	}
1173 }
1174 
1175 static void htb_attach(struct Qdisc *sch)
1176 {
1177 	struct htb_sched *q = qdisc_priv(sch);
1178 
1179 	if (q->offload)
1180 		htb_attach_offload(sch);
1181 	else
1182 		htb_attach_software(sch);
1183 }
1184 
1185 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1186 {
1187 	struct htb_sched *q = qdisc_priv(sch);
1188 	struct nlattr *nest;
1189 	struct tc_htb_glob gopt;
1190 
1191 	if (q->offload)
1192 		sch->flags |= TCQ_F_OFFLOADED;
1193 	else
1194 		sch->flags &= ~TCQ_F_OFFLOADED;
1195 
1196 	sch->qstats.overlimits = q->overlimits;
1197 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1198 	 * no change can happen on the qdisc parameters.
1199 	 */
1200 
1201 	gopt.direct_pkts = q->direct_pkts;
1202 	gopt.version = HTB_VER;
1203 	gopt.rate2quantum = q->rate2quantum;
1204 	gopt.defcls = q->defcls;
1205 	gopt.debug = 0;
1206 
1207 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1208 	if (nest == NULL)
1209 		goto nla_put_failure;
1210 	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1211 	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1212 		goto nla_put_failure;
1213 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1214 		goto nla_put_failure;
1215 
1216 	return nla_nest_end(skb, nest);
1217 
1218 nla_put_failure:
1219 	nla_nest_cancel(skb, nest);
1220 	return -1;
1221 }
1222 
1223 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1224 			  struct sk_buff *skb, struct tcmsg *tcm)
1225 {
1226 	struct htb_class *cl = (struct htb_class *)arg;
1227 	struct htb_sched *q = qdisc_priv(sch);
1228 	struct nlattr *nest;
1229 	struct tc_htb_opt opt;
1230 
1231 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1232 	 * no change can happen on the class parameters.
1233 	 */
1234 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1235 	tcm->tcm_handle = cl->common.classid;
1236 	if (!cl->level && cl->leaf.q)
1237 		tcm->tcm_info = cl->leaf.q->handle;
1238 
1239 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1240 	if (nest == NULL)
1241 		goto nla_put_failure;
1242 
1243 	memset(&opt, 0, sizeof(opt));
1244 
1245 	psched_ratecfg_getrate(&opt.rate, &cl->rate);
1246 	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1247 	psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1248 	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1249 	opt.quantum = cl->quantum;
1250 	opt.prio = cl->prio;
1251 	opt.level = cl->level;
1252 	if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1253 		goto nla_put_failure;
1254 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1255 		goto nla_put_failure;
1256 	if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1257 	    nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1258 			      TCA_HTB_PAD))
1259 		goto nla_put_failure;
1260 	if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1261 	    nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1262 			      TCA_HTB_PAD))
1263 		goto nla_put_failure;
1264 
1265 	return nla_nest_end(skb, nest);
1266 
1267 nla_put_failure:
1268 	nla_nest_cancel(skb, nest);
1269 	return -1;
1270 }
1271 
1272 static void htb_offload_aggregate_stats(struct htb_sched *q,
1273 					struct htb_class *cl)
1274 {
1275 	struct htb_class *c;
1276 	unsigned int i;
1277 
1278 	memset(&cl->bstats, 0, sizeof(cl->bstats));
1279 
1280 	for (i = 0; i < q->clhash.hashsize; i++) {
1281 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1282 			struct htb_class *p = c;
1283 
1284 			while (p && p->level < cl->level)
1285 				p = p->parent;
1286 
1287 			if (p != cl)
1288 				continue;
1289 
1290 			cl->bstats.bytes += c->bstats_bias.bytes;
1291 			cl->bstats.packets += c->bstats_bias.packets;
1292 			if (c->level == 0) {
1293 				cl->bstats.bytes += c->leaf.q->bstats.bytes;
1294 				cl->bstats.packets += c->leaf.q->bstats.packets;
1295 			}
1296 		}
1297 	}
1298 }
1299 
1300 static int
1301 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1302 {
1303 	struct htb_class *cl = (struct htb_class *)arg;
1304 	struct htb_sched *q = qdisc_priv(sch);
1305 	struct gnet_stats_queue qs = {
1306 		.drops = cl->drops,
1307 		.overlimits = cl->overlimits,
1308 	};
1309 	__u32 qlen = 0;
1310 
1311 	if (!cl->level && cl->leaf.q)
1312 		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1313 
1314 	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1315 				    INT_MIN, INT_MAX);
1316 	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1317 				     INT_MIN, INT_MAX);
1318 
1319 	if (q->offload) {
1320 		if (!cl->level) {
1321 			if (cl->leaf.q)
1322 				cl->bstats = cl->leaf.q->bstats;
1323 			else
1324 				memset(&cl->bstats, 0, sizeof(cl->bstats));
1325 			cl->bstats.bytes += cl->bstats_bias.bytes;
1326 			cl->bstats.packets += cl->bstats_bias.packets;
1327 		} else {
1328 			htb_offload_aggregate_stats(q, cl);
1329 		}
1330 	}
1331 
1332 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1333 				  d, NULL, &cl->bstats) < 0 ||
1334 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1335 	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1336 		return -1;
1337 
1338 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1339 }
1340 
1341 static struct netdev_queue *
1342 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1343 {
1344 	struct net_device *dev = qdisc_dev(sch);
1345 	struct tc_htb_qopt_offload offload_opt;
1346 	struct htb_sched *q = qdisc_priv(sch);
1347 	int err;
1348 
1349 	if (!q->offload)
1350 		return sch->dev_queue;
1351 
1352 	offload_opt = (struct tc_htb_qopt_offload) {
1353 		.command = TC_HTB_LEAF_QUERY_QUEUE,
1354 		.classid = TC_H_MIN(tcm->tcm_parent),
1355 	};
1356 	err = htb_offload(dev, &offload_opt);
1357 	if (err || offload_opt.qid >= dev->num_tx_queues)
1358 		return NULL;
1359 	return netdev_get_tx_queue(dev, offload_opt.qid);
1360 }
1361 
1362 static struct Qdisc *
1363 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1364 {
1365 	struct net_device *dev = dev_queue->dev;
1366 	struct Qdisc *old_q;
1367 
1368 	if (dev->flags & IFF_UP)
1369 		dev_deactivate(dev);
1370 	old_q = dev_graft_qdisc(dev_queue, new_q);
1371 	if (new_q)
1372 		new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1373 	if (dev->flags & IFF_UP)
1374 		dev_activate(dev);
1375 
1376 	return old_q;
1377 }
1378 
1379 static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
1380 {
1381 	struct netdev_queue *queue_old, *queue_new;
1382 	struct net_device *dev = qdisc_dev(sch);
1383 	struct Qdisc *qdisc;
1384 
1385 	queue_old = netdev_get_tx_queue(dev, qid_old);
1386 	queue_new = netdev_get_tx_queue(dev, qid_new);
1387 
1388 	if (dev->flags & IFF_UP)
1389 		dev_deactivate(dev);
1390 	qdisc = dev_graft_qdisc(queue_old, NULL);
1391 	qdisc->dev_queue = queue_new;
1392 	qdisc = dev_graft_qdisc(queue_new, qdisc);
1393 	if (dev->flags & IFF_UP)
1394 		dev_activate(dev);
1395 
1396 	WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1397 }
1398 
1399 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1400 		     struct Qdisc **old, struct netlink_ext_ack *extack)
1401 {
1402 	struct netdev_queue *dev_queue = sch->dev_queue;
1403 	struct htb_class *cl = (struct htb_class *)arg;
1404 	struct htb_sched *q = qdisc_priv(sch);
1405 	struct Qdisc *old_q;
1406 
1407 	if (cl->level)
1408 		return -EINVAL;
1409 
1410 	if (q->offload) {
1411 		dev_queue = new->dev_queue;
1412 		WARN_ON(dev_queue != cl->leaf.q->dev_queue);
1413 	}
1414 
1415 	if (!new) {
1416 		new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1417 					cl->common.classid, extack);
1418 		if (!new)
1419 			return -ENOBUFS;
1420 	}
1421 
1422 	if (q->offload) {
1423 		htb_set_lockdep_class_child(new);
1424 		/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1425 		qdisc_refcount_inc(new);
1426 		old_q = htb_graft_helper(dev_queue, new);
1427 	}
1428 
1429 	*old = qdisc_replace(sch, new, &cl->leaf.q);
1430 
1431 	if (q->offload) {
1432 		WARN_ON(old_q != *old);
1433 		qdisc_put(old_q);
1434 	}
1435 
1436 	return 0;
1437 }
1438 
1439 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1440 {
1441 	struct htb_class *cl = (struct htb_class *)arg;
1442 	return !cl->level ? cl->leaf.q : NULL;
1443 }
1444 
1445 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1446 {
1447 	struct htb_class *cl = (struct htb_class *)arg;
1448 
1449 	htb_deactivate(qdisc_priv(sch), cl);
1450 }
1451 
1452 static inline int htb_parent_last_child(struct htb_class *cl)
1453 {
1454 	if (!cl->parent)
1455 		/* the root class */
1456 		return 0;
1457 	if (cl->parent->children > 1)
1458 		/* not the last child */
1459 		return 0;
1460 	return 1;
1461 }
1462 
1463 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1464 			       struct Qdisc *new_q)
1465 {
1466 	struct htb_sched *q = qdisc_priv(sch);
1467 	struct htb_class *parent = cl->parent;
1468 
1469 	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1470 
1471 	if (parent->cmode != HTB_CAN_SEND)
1472 		htb_safe_rb_erase(&parent->pq_node,
1473 				  &q->hlevel[parent->level].wait_pq);
1474 
1475 	parent->level = 0;
1476 	memset(&parent->inner, 0, sizeof(parent->inner));
1477 	parent->leaf.q = new_q ? new_q : &noop_qdisc;
1478 	parent->tokens = parent->buffer;
1479 	parent->ctokens = parent->cbuffer;
1480 	parent->t_c = ktime_get_ns();
1481 	parent->cmode = HTB_CAN_SEND;
1482 }
1483 
1484 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1485 				       struct netdev_queue *dev_queue,
1486 				       struct Qdisc *new_q)
1487 {
1488 	struct Qdisc *old_q;
1489 
1490 	/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1491 	qdisc_refcount_inc(new_q);
1492 	old_q = htb_graft_helper(dev_queue, new_q);
1493 	WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1494 }
1495 
1496 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1497 				     bool last_child, bool destroying,
1498 				     struct netlink_ext_ack *extack)
1499 {
1500 	struct tc_htb_qopt_offload offload_opt;
1501 	struct Qdisc *q = cl->leaf.q;
1502 	struct Qdisc *old = NULL;
1503 	int err;
1504 
1505 	if (cl->level)
1506 		return -EINVAL;
1507 
1508 	WARN_ON(!q);
1509 	if (!destroying) {
1510 		/* On destroy of HTB, two cases are possible:
1511 		 * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
1512 		 * 2. q is a noop qdisc (for nodes that were inner),
1513 		 *    q->dev_queue is noop_netdev_queue.
1514 		 */
1515 		old = htb_graft_helper(q->dev_queue, NULL);
1516 		WARN_ON(!old);
1517 		WARN_ON(old != q);
1518 	}
1519 
1520 	if (cl->parent) {
1521 		cl->parent->bstats_bias.bytes += q->bstats.bytes;
1522 		cl->parent->bstats_bias.packets += q->bstats.packets;
1523 	}
1524 
1525 	offload_opt = (struct tc_htb_qopt_offload) {
1526 		.command = !last_child ? TC_HTB_LEAF_DEL :
1527 			   destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1528 			   TC_HTB_LEAF_DEL_LAST,
1529 		.classid = cl->common.classid,
1530 		.extack = extack,
1531 	};
1532 	err = htb_offload(qdisc_dev(sch), &offload_opt);
1533 
1534 	if (!err || destroying)
1535 		qdisc_put(old);
1536 	else
1537 		htb_graft_helper(q->dev_queue, old);
1538 
1539 	if (last_child)
1540 		return err;
1541 
1542 	if (!err && offload_opt.moved_qid != 0) {
1543 		if (destroying)
1544 			q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
1545 							   offload_opt.qid);
1546 		else
1547 			htb_offload_move_qdisc(sch, offload_opt.moved_qid,
1548 					       offload_opt.qid);
1549 	}
1550 
1551 	return err;
1552 }
1553 
1554 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1555 {
1556 	if (!cl->level) {
1557 		WARN_ON(!cl->leaf.q);
1558 		qdisc_put(cl->leaf.q);
1559 	}
1560 	gen_kill_estimator(&cl->rate_est);
1561 	tcf_block_put(cl->block);
1562 	kfree(cl);
1563 }
1564 
1565 static void htb_destroy(struct Qdisc *sch)
1566 {
1567 	struct net_device *dev = qdisc_dev(sch);
1568 	struct tc_htb_qopt_offload offload_opt;
1569 	struct htb_sched *q = qdisc_priv(sch);
1570 	struct hlist_node *next;
1571 	bool nonempty, changed;
1572 	struct htb_class *cl;
1573 	unsigned int i;
1574 
1575 	cancel_work_sync(&q->work);
1576 	qdisc_watchdog_cancel(&q->watchdog);
1577 	/* This line used to be after htb_destroy_class call below
1578 	 * and surprisingly it worked in 2.4. But it must precede it
1579 	 * because filter need its target class alive to be able to call
1580 	 * unbind_filter on it (without Oops).
1581 	 */
1582 	tcf_block_put(q->block);
1583 
1584 	for (i = 0; i < q->clhash.hashsize; i++) {
1585 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1586 			tcf_block_put(cl->block);
1587 			cl->block = NULL;
1588 		}
1589 	}
1590 
1591 	do {
1592 		nonempty = false;
1593 		changed = false;
1594 		for (i = 0; i < q->clhash.hashsize; i++) {
1595 			hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1596 						  common.hnode) {
1597 				bool last_child;
1598 
1599 				if (!q->offload) {
1600 					htb_destroy_class(sch, cl);
1601 					continue;
1602 				}
1603 
1604 				nonempty = true;
1605 
1606 				if (cl->level)
1607 					continue;
1608 
1609 				changed = true;
1610 
1611 				last_child = htb_parent_last_child(cl);
1612 				htb_destroy_class_offload(sch, cl, last_child,
1613 							  true, NULL);
1614 				qdisc_class_hash_remove(&q->clhash,
1615 							&cl->common);
1616 				if (cl->parent)
1617 					cl->parent->children--;
1618 				if (last_child)
1619 					htb_parent_to_leaf(sch, cl, NULL);
1620 				htb_destroy_class(sch, cl);
1621 			}
1622 		}
1623 	} while (changed);
1624 	WARN_ON(nonempty);
1625 
1626 	qdisc_class_hash_destroy(&q->clhash);
1627 	__qdisc_reset_queue(&q->direct_queue);
1628 
1629 	if (!q->offload)
1630 		return;
1631 
1632 	offload_opt = (struct tc_htb_qopt_offload) {
1633 		.command = TC_HTB_DESTROY,
1634 	};
1635 	htb_offload(dev, &offload_opt);
1636 
1637 	if (!q->direct_qdiscs)
1638 		return;
1639 	for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1640 		qdisc_put(q->direct_qdiscs[i]);
1641 	kfree(q->direct_qdiscs);
1642 }
1643 
1644 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1645 		      struct netlink_ext_ack *extack)
1646 {
1647 	struct htb_sched *q = qdisc_priv(sch);
1648 	struct htb_class *cl = (struct htb_class *)arg;
1649 	struct Qdisc *new_q = NULL;
1650 	int last_child = 0;
1651 	int err;
1652 
1653 	/* TODO: why don't allow to delete subtree ? references ? does
1654 	 * tc subsys guarantee us that in htb_destroy it holds no class
1655 	 * refs so that we can remove children safely there ?
1656 	 */
1657 	if (cl->children || cl->filter_cnt)
1658 		return -EBUSY;
1659 
1660 	if (!cl->level && htb_parent_last_child(cl))
1661 		last_child = 1;
1662 
1663 	if (q->offload) {
1664 		err = htb_destroy_class_offload(sch, cl, last_child, false,
1665 						extack);
1666 		if (err)
1667 			return err;
1668 	}
1669 
1670 	if (last_child) {
1671 		struct netdev_queue *dev_queue;
1672 
1673 		dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
1674 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1675 					  cl->parent->common.classid,
1676 					  NULL);
1677 		if (q->offload) {
1678 			if (new_q) {
1679 				htb_set_lockdep_class_child(new_q);
1680 				htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1681 			}
1682 		}
1683 	}
1684 
1685 	sch_tree_lock(sch);
1686 
1687 	if (!cl->level)
1688 		qdisc_purge_queue(cl->leaf.q);
1689 
1690 	/* delete from hash and active; remainder in destroy_class */
1691 	qdisc_class_hash_remove(&q->clhash, &cl->common);
1692 	if (cl->parent)
1693 		cl->parent->children--;
1694 
1695 	if (cl->prio_activity)
1696 		htb_deactivate(q, cl);
1697 
1698 	if (cl->cmode != HTB_CAN_SEND)
1699 		htb_safe_rb_erase(&cl->pq_node,
1700 				  &q->hlevel[cl->level].wait_pq);
1701 
1702 	if (last_child)
1703 		htb_parent_to_leaf(sch, cl, new_q);
1704 
1705 	sch_tree_unlock(sch);
1706 
1707 	htb_destroy_class(sch, cl);
1708 	return 0;
1709 }
1710 
1711 static int htb_change_class(struct Qdisc *sch, u32 classid,
1712 			    u32 parentid, struct nlattr **tca,
1713 			    unsigned long *arg, struct netlink_ext_ack *extack)
1714 {
1715 	int err = -EINVAL;
1716 	struct htb_sched *q = qdisc_priv(sch);
1717 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
1718 	struct tc_htb_qopt_offload offload_opt;
1719 	struct nlattr *opt = tca[TCA_OPTIONS];
1720 	struct nlattr *tb[TCA_HTB_MAX + 1];
1721 	struct Qdisc *parent_qdisc = NULL;
1722 	struct netdev_queue *dev_queue;
1723 	struct tc_htb_opt *hopt;
1724 	u64 rate64, ceil64;
1725 	int warn = 0;
1726 
1727 	/* extract all subattrs from opt attr */
1728 	if (!opt)
1729 		goto failure;
1730 
1731 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1732 					  NULL);
1733 	if (err < 0)
1734 		goto failure;
1735 
1736 	err = -EINVAL;
1737 	if (tb[TCA_HTB_PARMS] == NULL)
1738 		goto failure;
1739 
1740 	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1741 
1742 	hopt = nla_data(tb[TCA_HTB_PARMS]);
1743 	if (!hopt->rate.rate || !hopt->ceil.rate)
1744 		goto failure;
1745 
1746 	/* Keeping backward compatible with rate_table based iproute2 tc */
1747 	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1748 		qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1749 					      NULL));
1750 
1751 	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1752 		qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1753 					      NULL));
1754 
1755 	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1756 	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1757 
1758 	if (!cl) {		/* new class */
1759 		struct net_device *dev = qdisc_dev(sch);
1760 		struct Qdisc *new_q, *old_q;
1761 		int prio;
1762 		struct {
1763 			struct nlattr		nla;
1764 			struct gnet_estimator	opt;
1765 		} est = {
1766 			.nla = {
1767 				.nla_len	= nla_attr_size(sizeof(est.opt)),
1768 				.nla_type	= TCA_RATE,
1769 			},
1770 			.opt = {
1771 				/* 4s interval, 16s averaging constant */
1772 				.interval	= 2,
1773 				.ewma_log	= 2,
1774 			},
1775 		};
1776 
1777 		/* check for valid classid */
1778 		if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1779 		    htb_find(classid, sch))
1780 			goto failure;
1781 
1782 		/* check maximal depth */
1783 		if (parent && parent->parent && parent->parent->level < 2) {
1784 			pr_err("htb: tree is too deep\n");
1785 			goto failure;
1786 		}
1787 		err = -ENOBUFS;
1788 		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1789 		if (!cl)
1790 			goto failure;
1791 
1792 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1793 		if (err) {
1794 			kfree(cl);
1795 			goto failure;
1796 		}
1797 		if (htb_rate_est || tca[TCA_RATE]) {
1798 			err = gen_new_estimator(&cl->bstats, NULL,
1799 						&cl->rate_est,
1800 						NULL,
1801 						qdisc_root_sleeping_running(sch),
1802 						tca[TCA_RATE] ? : &est.nla);
1803 			if (err)
1804 				goto err_block_put;
1805 		}
1806 
1807 		cl->children = 0;
1808 		RB_CLEAR_NODE(&cl->pq_node);
1809 
1810 		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1811 			RB_CLEAR_NODE(&cl->node[prio]);
1812 
1813 		cl->common.classid = classid;
1814 
1815 		/* Make sure nothing interrupts us in between of two
1816 		 * ndo_setup_tc calls.
1817 		 */
1818 		ASSERT_RTNL();
1819 
1820 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1821 		 * so that can't be used inside of sch_tree_lock
1822 		 * -- thanks to Karlis Peisenieks
1823 		 */
1824 		if (!q->offload) {
1825 			dev_queue = sch->dev_queue;
1826 		} else if (!(parent && !parent->level)) {
1827 			/* Assign a dev_queue to this classid. */
1828 			offload_opt = (struct tc_htb_qopt_offload) {
1829 				.command = TC_HTB_LEAF_ALLOC_QUEUE,
1830 				.classid = cl->common.classid,
1831 				.parent_classid = parent ?
1832 					TC_H_MIN(parent->common.classid) :
1833 					TC_HTB_CLASSID_ROOT,
1834 				.rate = max_t(u64, hopt->rate.rate, rate64),
1835 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1836 				.extack = extack,
1837 			};
1838 			err = htb_offload(dev, &offload_opt);
1839 			if (err) {
1840 				pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1841 				       err);
1842 				goto err_kill_estimator;
1843 			}
1844 			dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1845 		} else { /* First child. */
1846 			dev_queue = parent->leaf.q->dev_queue;
1847 			old_q = htb_graft_helper(dev_queue, NULL);
1848 			WARN_ON(old_q != parent->leaf.q);
1849 			offload_opt = (struct tc_htb_qopt_offload) {
1850 				.command = TC_HTB_LEAF_TO_INNER,
1851 				.classid = cl->common.classid,
1852 				.parent_classid =
1853 					TC_H_MIN(parent->common.classid),
1854 				.rate = max_t(u64, hopt->rate.rate, rate64),
1855 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1856 				.extack = extack,
1857 			};
1858 			err = htb_offload(dev, &offload_opt);
1859 			if (err) {
1860 				pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1861 				       err);
1862 				htb_graft_helper(dev_queue, old_q);
1863 				goto err_kill_estimator;
1864 			}
1865 			parent->bstats_bias.bytes += old_q->bstats.bytes;
1866 			parent->bstats_bias.packets += old_q->bstats.packets;
1867 			qdisc_put(old_q);
1868 		}
1869 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1870 					  classid, NULL);
1871 		if (q->offload) {
1872 			if (new_q) {
1873 				htb_set_lockdep_class_child(new_q);
1874 				/* One ref for cl->leaf.q, the other for
1875 				 * dev_queue->qdisc.
1876 				 */
1877 				qdisc_refcount_inc(new_q);
1878 			}
1879 			old_q = htb_graft_helper(dev_queue, new_q);
1880 			/* No qdisc_put needed. */
1881 			WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1882 		}
1883 		sch_tree_lock(sch);
1884 		if (parent && !parent->level) {
1885 			/* turn parent into inner node */
1886 			qdisc_purge_queue(parent->leaf.q);
1887 			parent_qdisc = parent->leaf.q;
1888 			if (parent->prio_activity)
1889 				htb_deactivate(q, parent);
1890 
1891 			/* remove from evt list because of level change */
1892 			if (parent->cmode != HTB_CAN_SEND) {
1893 				htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1894 				parent->cmode = HTB_CAN_SEND;
1895 			}
1896 			parent->level = (parent->parent ? parent->parent->level
1897 					 : TC_HTB_MAXDEPTH) - 1;
1898 			memset(&parent->inner, 0, sizeof(parent->inner));
1899 		}
1900 
1901 		/* leaf (we) needs elementary qdisc */
1902 		cl->leaf.q = new_q ? new_q : &noop_qdisc;
1903 
1904 		cl->parent = parent;
1905 
1906 		/* set class to be in HTB_CAN_SEND state */
1907 		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1908 		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1909 		cl->mbuffer = 60ULL * NSEC_PER_SEC;	/* 1min */
1910 		cl->t_c = ktime_get_ns();
1911 		cl->cmode = HTB_CAN_SEND;
1912 
1913 		/* attach to the hash list and parent's family */
1914 		qdisc_class_hash_insert(&q->clhash, &cl->common);
1915 		if (parent)
1916 			parent->children++;
1917 		if (cl->leaf.q != &noop_qdisc)
1918 			qdisc_hash_add(cl->leaf.q, true);
1919 	} else {
1920 		if (tca[TCA_RATE]) {
1921 			err = gen_replace_estimator(&cl->bstats, NULL,
1922 						    &cl->rate_est,
1923 						    NULL,
1924 						    qdisc_root_sleeping_running(sch),
1925 						    tca[TCA_RATE]);
1926 			if (err)
1927 				return err;
1928 		}
1929 
1930 		if (q->offload) {
1931 			struct net_device *dev = qdisc_dev(sch);
1932 
1933 			offload_opt = (struct tc_htb_qopt_offload) {
1934 				.command = TC_HTB_NODE_MODIFY,
1935 				.classid = cl->common.classid,
1936 				.rate = max_t(u64, hopt->rate.rate, rate64),
1937 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1938 				.extack = extack,
1939 			};
1940 			err = htb_offload(dev, &offload_opt);
1941 			if (err)
1942 				/* Estimator was replaced, and rollback may fail
1943 				 * as well, so we don't try to recover it, and
1944 				 * the estimator won't work property with the
1945 				 * offload anyway, because bstats are updated
1946 				 * only when the stats are queried.
1947 				 */
1948 				return err;
1949 		}
1950 
1951 		sch_tree_lock(sch);
1952 	}
1953 
1954 	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1955 	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1956 
1957 	/* it used to be a nasty bug here, we have to check that node
1958 	 * is really leaf before changing cl->leaf !
1959 	 */
1960 	if (!cl->level) {
1961 		u64 quantum = cl->rate.rate_bytes_ps;
1962 
1963 		do_div(quantum, q->rate2quantum);
1964 		cl->quantum = min_t(u64, quantum, INT_MAX);
1965 
1966 		if (!hopt->quantum && cl->quantum < 1000) {
1967 			warn = -1;
1968 			cl->quantum = 1000;
1969 		}
1970 		if (!hopt->quantum && cl->quantum > 200000) {
1971 			warn = 1;
1972 			cl->quantum = 200000;
1973 		}
1974 		if (hopt->quantum)
1975 			cl->quantum = hopt->quantum;
1976 		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1977 			cl->prio = TC_HTB_NUMPRIO - 1;
1978 	}
1979 
1980 	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1981 	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
1982 
1983 	sch_tree_unlock(sch);
1984 	qdisc_put(parent_qdisc);
1985 
1986 	if (warn)
1987 		pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
1988 			    cl->common.classid, (warn == -1 ? "small" : "big"));
1989 
1990 	qdisc_class_hash_grow(sch, &q->clhash);
1991 
1992 	*arg = (unsigned long)cl;
1993 	return 0;
1994 
1995 err_kill_estimator:
1996 	gen_kill_estimator(&cl->rate_est);
1997 err_block_put:
1998 	tcf_block_put(cl->block);
1999 	kfree(cl);
2000 failure:
2001 	return err;
2002 }
2003 
2004 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2005 				       struct netlink_ext_ack *extack)
2006 {
2007 	struct htb_sched *q = qdisc_priv(sch);
2008 	struct htb_class *cl = (struct htb_class *)arg;
2009 
2010 	return cl ? cl->block : q->block;
2011 }
2012 
2013 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2014 				     u32 classid)
2015 {
2016 	struct htb_class *cl = htb_find(classid, sch);
2017 
2018 	/*if (cl && !cl->level) return 0;
2019 	 * The line above used to be there to prevent attaching filters to
2020 	 * leaves. But at least tc_index filter uses this just to get class
2021 	 * for other reasons so that we have to allow for it.
2022 	 * ----
2023 	 * 19.6.2002 As Werner explained it is ok - bind filter is just
2024 	 * another way to "lock" the class - unlike "get" this lock can
2025 	 * be broken by class during destroy IIUC.
2026 	 */
2027 	if (cl)
2028 		cl->filter_cnt++;
2029 	return (unsigned long)cl;
2030 }
2031 
2032 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2033 {
2034 	struct htb_class *cl = (struct htb_class *)arg;
2035 
2036 	if (cl)
2037 		cl->filter_cnt--;
2038 }
2039 
2040 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2041 {
2042 	struct htb_sched *q = qdisc_priv(sch);
2043 	struct htb_class *cl;
2044 	unsigned int i;
2045 
2046 	if (arg->stop)
2047 		return;
2048 
2049 	for (i = 0; i < q->clhash.hashsize; i++) {
2050 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2051 			if (arg->count < arg->skip) {
2052 				arg->count++;
2053 				continue;
2054 			}
2055 			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2056 				arg->stop = 1;
2057 				return;
2058 			}
2059 			arg->count++;
2060 		}
2061 	}
2062 }
2063 
2064 static const struct Qdisc_class_ops htb_class_ops = {
2065 	.select_queue	=	htb_select_queue,
2066 	.graft		=	htb_graft,
2067 	.leaf		=	htb_leaf,
2068 	.qlen_notify	=	htb_qlen_notify,
2069 	.find		=	htb_search,
2070 	.change		=	htb_change_class,
2071 	.delete		=	htb_delete,
2072 	.walk		=	htb_walk,
2073 	.tcf_block	=	htb_tcf_block,
2074 	.bind_tcf	=	htb_bind_filter,
2075 	.unbind_tcf	=	htb_unbind_filter,
2076 	.dump		=	htb_dump_class,
2077 	.dump_stats	=	htb_dump_class_stats,
2078 };
2079 
2080 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2081 	.cl_ops		=	&htb_class_ops,
2082 	.id		=	"htb",
2083 	.priv_size	=	sizeof(struct htb_sched),
2084 	.enqueue	=	htb_enqueue,
2085 	.dequeue	=	htb_dequeue,
2086 	.peek		=	qdisc_peek_dequeued,
2087 	.init		=	htb_init,
2088 	.attach		=	htb_attach,
2089 	.reset		=	htb_reset,
2090 	.destroy	=	htb_destroy,
2091 	.dump		=	htb_dump,
2092 	.owner		=	THIS_MODULE,
2093 };
2094 
2095 static int __init htb_module_init(void)
2096 {
2097 	return register_qdisc(&htb_qdisc_ops);
2098 }
2099 static void __exit htb_module_exit(void)
2100 {
2101 	unregister_qdisc(&htb_qdisc_ops);
2102 }
2103 
2104 module_init(htb_module_init)
2105 module_exit(htb_module_exit)
2106 MODULE_LICENSE("GPL");
2107