xref: /openbmc/linux/net/sched/sch_htb.c (revision 6a143a7c)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
4  *
5  * Authors:	Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *			HTB support at LARTC mailing list
10  *		Ondrej Kraus, <krauso@barr.cz>
11  *			found missing INIT_QDISC(htb)
12  *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *			helped a lot to locate nasty class stall bug
14  *		Andi Kleen, Jamal Hadi, Bert Hubert
15  *			code review and helpful comments on shaping
16  *		Tomasz Wrona, <tw@eter.tym.pl>
17  *			created test case so that I was able to fix nasty bug
18  *		Wilfried Weissmann
19  *			spotted bug in dequeue code and helped with fix
20  *		Jiri Fojtasek
21  *			fixed requeue routine
22  *		and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40 
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47 
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53 
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
56 
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60 
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64 
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68 
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71 	HTB_CANT_SEND,		/* class can't send and can't borrow */
72 	HTB_MAY_BORROW,		/* class can't send but may borrow */
73 	HTB_CAN_SEND		/* class can send */
74 };
75 
76 struct htb_prio {
77 	union {
78 		struct rb_root	row;
79 		struct rb_root	feed;
80 	};
81 	struct rb_node	*ptr;
82 	/* When class changes from state 1->2 and disconnects from
83 	 * parent's feed then we lost ptr value and start from the
84 	 * first child again. Here we store classid of the
85 	 * last valid ptr (used when ptr is NULL).
86 	 */
87 	u32		last_ptr_id;
88 };
89 
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95 	struct Qdisc_class_common common;
96 	struct psched_ratecfg	rate;
97 	struct psched_ratecfg	ceil;
98 	s64			buffer, cbuffer;/* token bucket depth/rate */
99 	s64			mbuffer;	/* max wait time */
100 	u32			prio;		/* these two are used only by leaves... */
101 	int			quantum;	/* but stored for parent-to-leaf return */
102 
103 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
104 	struct tcf_block	*block;
105 	int			filter_cnt;
106 
107 	int			level;		/* our level (see above) */
108 	unsigned int		children;
109 	struct htb_class	*parent;	/* parent class */
110 
111 	struct net_rate_estimator __rcu *rate_est;
112 
113 	/*
114 	 * Written often fields
115 	 */
116 	struct gnet_stats_basic_packed bstats;
117 	struct gnet_stats_basic_packed bstats_bias;
118 	struct tc_htb_xstats	xstats;	/* our special stats */
119 
120 	/* token bucket parameters */
121 	s64			tokens, ctokens;/* current number of tokens */
122 	s64			t_c;		/* checkpoint time */
123 
124 	union {
125 		struct htb_class_leaf {
126 			int		deficit[TC_HTB_MAXDEPTH];
127 			struct Qdisc	*q;
128 		} leaf;
129 		struct htb_class_inner {
130 			struct htb_prio clprio[TC_HTB_NUMPRIO];
131 		} inner;
132 	};
133 	s64			pq_key;
134 
135 	int			prio_activity;	/* for which prios are we active */
136 	enum htb_cmode		cmode;		/* current mode of the class */
137 	struct rb_node		pq_node;	/* node for event queue */
138 	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
139 
140 	unsigned int drops ____cacheline_aligned_in_smp;
141 	unsigned int		overlimits;
142 };
143 
144 struct htb_level {
145 	struct rb_root	wait_pq;
146 	struct htb_prio hprio[TC_HTB_NUMPRIO];
147 };
148 
149 struct htb_sched {
150 	struct Qdisc_class_hash clhash;
151 	int			defcls;		/* class where unclassified flows go to */
152 	int			rate2quantum;	/* quant = rate / rate2quantum */
153 
154 	/* filters for qdisc itself */
155 	struct tcf_proto __rcu	*filter_list;
156 	struct tcf_block	*block;
157 
158 #define HTB_WARN_TOOMANYEVENTS	0x1
159 	unsigned int		warned;	/* only one warning */
160 	int			direct_qlen;
161 	struct work_struct	work;
162 
163 	/* non shaped skbs; let them go directly thru */
164 	struct qdisc_skb_head	direct_queue;
165 	u32			direct_pkts;
166 	u32			overlimits;
167 
168 	struct qdisc_watchdog	watchdog;
169 
170 	s64			now;	/* cached dequeue time */
171 
172 	/* time of nearest event per level (row) */
173 	s64			near_ev_cache[TC_HTB_MAXDEPTH];
174 
175 	int			row_mask[TC_HTB_MAXDEPTH];
176 
177 	struct htb_level	hlevel[TC_HTB_MAXDEPTH];
178 
179 	struct Qdisc		**direct_qdiscs;
180 	unsigned int            num_direct_qdiscs;
181 
182 	bool			offload;
183 };
184 
185 /* find class in global hash table using given handle */
186 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187 {
188 	struct htb_sched *q = qdisc_priv(sch);
189 	struct Qdisc_class_common *clc;
190 
191 	clc = qdisc_class_find(&q->clhash, handle);
192 	if (clc == NULL)
193 		return NULL;
194 	return container_of(clc, struct htb_class, common);
195 }
196 
197 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198 {
199 	return (unsigned long)htb_find(handle, sch);
200 }
201 /**
202  * htb_classify - classify a packet into class
203  *
204  * It returns NULL if the packet should be dropped or -1 if the packet
205  * should be passed directly thru. In all other cases leaf class is returned.
206  * We allow direct class selection by classid in priority. The we examine
207  * filters in qdisc and in inner nodes (if higher filter points to the inner
208  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
209  * internal fifo (direct). These packets then go directly thru. If we still
210  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
211  * then finish and return direct queue.
212  */
213 #define HTB_DIRECT ((struct htb_class *)-1L)
214 
215 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216 				      int *qerr)
217 {
218 	struct htb_sched *q = qdisc_priv(sch);
219 	struct htb_class *cl;
220 	struct tcf_result res;
221 	struct tcf_proto *tcf;
222 	int result;
223 
224 	/* allow to select class by setting skb->priority to valid classid;
225 	 * note that nfmark can be used too by attaching filter fw with no
226 	 * rules in it
227 	 */
228 	if (skb->priority == sch->handle)
229 		return HTB_DIRECT;	/* X:0 (direct flow) selected */
230 	cl = htb_find(skb->priority, sch);
231 	if (cl) {
232 		if (cl->level == 0)
233 			return cl;
234 		/* Start with inner filter chain if a non-leaf class is selected */
235 		tcf = rcu_dereference_bh(cl->filter_list);
236 	} else {
237 		tcf = rcu_dereference_bh(q->filter_list);
238 	}
239 
240 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
241 	while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
242 #ifdef CONFIG_NET_CLS_ACT
243 		switch (result) {
244 		case TC_ACT_QUEUED:
245 		case TC_ACT_STOLEN:
246 		case TC_ACT_TRAP:
247 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
248 			fallthrough;
249 		case TC_ACT_SHOT:
250 			return NULL;
251 		}
252 #endif
253 		cl = (void *)res.class;
254 		if (!cl) {
255 			if (res.classid == sch->handle)
256 				return HTB_DIRECT;	/* X:0 (direct flow) */
257 			cl = htb_find(res.classid, sch);
258 			if (!cl)
259 				break;	/* filter selected invalid classid */
260 		}
261 		if (!cl->level)
262 			return cl;	/* we hit leaf; return it */
263 
264 		/* we have got inner class; apply inner filter chain */
265 		tcf = rcu_dereference_bh(cl->filter_list);
266 	}
267 	/* classification failed; try to use default class */
268 	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
269 	if (!cl || cl->level)
270 		return HTB_DIRECT;	/* bad default .. this is safe bet */
271 	return cl;
272 }
273 
274 /**
275  * htb_add_to_id_tree - adds class to the round robin list
276  *
277  * Routine adds class to the list (actually tree) sorted by classid.
278  * Make sure that class is not already on such list for given prio.
279  */
280 static void htb_add_to_id_tree(struct rb_root *root,
281 			       struct htb_class *cl, int prio)
282 {
283 	struct rb_node **p = &root->rb_node, *parent = NULL;
284 
285 	while (*p) {
286 		struct htb_class *c;
287 		parent = *p;
288 		c = rb_entry(parent, struct htb_class, node[prio]);
289 
290 		if (cl->common.classid > c->common.classid)
291 			p = &parent->rb_right;
292 		else
293 			p = &parent->rb_left;
294 	}
295 	rb_link_node(&cl->node[prio], parent, p);
296 	rb_insert_color(&cl->node[prio], root);
297 }
298 
299 /**
300  * htb_add_to_wait_tree - adds class to the event queue with delay
301  *
302  * The class is added to priority event queue to indicate that class will
303  * change its mode in cl->pq_key microseconds. Make sure that class is not
304  * already in the queue.
305  */
306 static void htb_add_to_wait_tree(struct htb_sched *q,
307 				 struct htb_class *cl, s64 delay)
308 {
309 	struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
310 
311 	cl->pq_key = q->now + delay;
312 	if (cl->pq_key == q->now)
313 		cl->pq_key++;
314 
315 	/* update the nearest event cache */
316 	if (q->near_ev_cache[cl->level] > cl->pq_key)
317 		q->near_ev_cache[cl->level] = cl->pq_key;
318 
319 	while (*p) {
320 		struct htb_class *c;
321 		parent = *p;
322 		c = rb_entry(parent, struct htb_class, pq_node);
323 		if (cl->pq_key >= c->pq_key)
324 			p = &parent->rb_right;
325 		else
326 			p = &parent->rb_left;
327 	}
328 	rb_link_node(&cl->pq_node, parent, p);
329 	rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
330 }
331 
332 /**
333  * htb_next_rb_node - finds next node in binary tree
334  *
335  * When we are past last key we return NULL.
336  * Average complexity is 2 steps per call.
337  */
338 static inline void htb_next_rb_node(struct rb_node **n)
339 {
340 	*n = rb_next(*n);
341 }
342 
343 /**
344  * htb_add_class_to_row - add class to its row
345  *
346  * The class is added to row at priorities marked in mask.
347  * It does nothing if mask == 0.
348  */
349 static inline void htb_add_class_to_row(struct htb_sched *q,
350 					struct htb_class *cl, int mask)
351 {
352 	q->row_mask[cl->level] |= mask;
353 	while (mask) {
354 		int prio = ffz(~mask);
355 		mask &= ~(1 << prio);
356 		htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
357 	}
358 }
359 
360 /* If this triggers, it is a bug in this code, but it need not be fatal */
361 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
362 {
363 	if (RB_EMPTY_NODE(rb)) {
364 		WARN_ON(1);
365 	} else {
366 		rb_erase(rb, root);
367 		RB_CLEAR_NODE(rb);
368 	}
369 }
370 
371 
372 /**
373  * htb_remove_class_from_row - removes class from its row
374  *
375  * The class is removed from row at priorities marked in mask.
376  * It does nothing if mask == 0.
377  */
378 static inline void htb_remove_class_from_row(struct htb_sched *q,
379 						 struct htb_class *cl, int mask)
380 {
381 	int m = 0;
382 	struct htb_level *hlevel = &q->hlevel[cl->level];
383 
384 	while (mask) {
385 		int prio = ffz(~mask);
386 		struct htb_prio *hprio = &hlevel->hprio[prio];
387 
388 		mask &= ~(1 << prio);
389 		if (hprio->ptr == cl->node + prio)
390 			htb_next_rb_node(&hprio->ptr);
391 
392 		htb_safe_rb_erase(cl->node + prio, &hprio->row);
393 		if (!hprio->row.rb_node)
394 			m |= 1 << prio;
395 	}
396 	q->row_mask[cl->level] &= ~m;
397 }
398 
399 /**
400  * htb_activate_prios - creates active classe's feed chain
401  *
402  * The class is connected to ancestors and/or appropriate rows
403  * for priorities it is participating on. cl->cmode must be new
404  * (activated) mode. It does nothing if cl->prio_activity == 0.
405  */
406 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
407 {
408 	struct htb_class *p = cl->parent;
409 	long m, mask = cl->prio_activity;
410 
411 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
412 		m = mask;
413 		while (m) {
414 			int prio = ffz(~m);
415 			m &= ~(1 << prio);
416 
417 			if (p->inner.clprio[prio].feed.rb_node)
418 				/* parent already has its feed in use so that
419 				 * reset bit in mask as parent is already ok
420 				 */
421 				mask &= ~(1 << prio);
422 
423 			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
424 		}
425 		p->prio_activity |= mask;
426 		cl = p;
427 		p = cl->parent;
428 
429 	}
430 	if (cl->cmode == HTB_CAN_SEND && mask)
431 		htb_add_class_to_row(q, cl, mask);
432 }
433 
434 /**
435  * htb_deactivate_prios - remove class from feed chain
436  *
437  * cl->cmode must represent old mode (before deactivation). It does
438  * nothing if cl->prio_activity == 0. Class is removed from all feed
439  * chains and rows.
440  */
441 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
442 {
443 	struct htb_class *p = cl->parent;
444 	long m, mask = cl->prio_activity;
445 
446 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
447 		m = mask;
448 		mask = 0;
449 		while (m) {
450 			int prio = ffz(~m);
451 			m &= ~(1 << prio);
452 
453 			if (p->inner.clprio[prio].ptr == cl->node + prio) {
454 				/* we are removing child which is pointed to from
455 				 * parent feed - forget the pointer but remember
456 				 * classid
457 				 */
458 				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
459 				p->inner.clprio[prio].ptr = NULL;
460 			}
461 
462 			htb_safe_rb_erase(cl->node + prio,
463 					  &p->inner.clprio[prio].feed);
464 
465 			if (!p->inner.clprio[prio].feed.rb_node)
466 				mask |= 1 << prio;
467 		}
468 
469 		p->prio_activity &= ~mask;
470 		cl = p;
471 		p = cl->parent;
472 
473 	}
474 	if (cl->cmode == HTB_CAN_SEND && mask)
475 		htb_remove_class_from_row(q, cl, mask);
476 }
477 
478 static inline s64 htb_lowater(const struct htb_class *cl)
479 {
480 	if (htb_hysteresis)
481 		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
482 	else
483 		return 0;
484 }
485 static inline s64 htb_hiwater(const struct htb_class *cl)
486 {
487 	if (htb_hysteresis)
488 		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
489 	else
490 		return 0;
491 }
492 
493 
494 /**
495  * htb_class_mode - computes and returns current class mode
496  *
497  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
498  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
499  * from now to time when cl will change its state.
500  * Also it is worth to note that class mode doesn't change simply
501  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
502  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
503  * mode transitions per time unit. The speed gain is about 1/6.
504  */
505 static inline enum htb_cmode
506 htb_class_mode(struct htb_class *cl, s64 *diff)
507 {
508 	s64 toks;
509 
510 	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
511 		*diff = -toks;
512 		return HTB_CANT_SEND;
513 	}
514 
515 	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
516 		return HTB_CAN_SEND;
517 
518 	*diff = -toks;
519 	return HTB_MAY_BORROW;
520 }
521 
522 /**
523  * htb_change_class_mode - changes classe's mode
524  *
525  * This should be the only way how to change classe's mode under normal
526  * cirsumstances. Routine will update feed lists linkage, change mode
527  * and add class to the wait event queue if appropriate. New mode should
528  * be different from old one and cl->pq_key has to be valid if changing
529  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
530  */
531 static void
532 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
533 {
534 	enum htb_cmode new_mode = htb_class_mode(cl, diff);
535 
536 	if (new_mode == cl->cmode)
537 		return;
538 
539 	if (new_mode == HTB_CANT_SEND) {
540 		cl->overlimits++;
541 		q->overlimits++;
542 	}
543 
544 	if (cl->prio_activity) {	/* not necessary: speed optimization */
545 		if (cl->cmode != HTB_CANT_SEND)
546 			htb_deactivate_prios(q, cl);
547 		cl->cmode = new_mode;
548 		if (new_mode != HTB_CANT_SEND)
549 			htb_activate_prios(q, cl);
550 	} else
551 		cl->cmode = new_mode;
552 }
553 
554 /**
555  * htb_activate - inserts leaf cl into appropriate active feeds
556  *
557  * Routine learns (new) priority of leaf and activates feed chain
558  * for the prio. It can be called on already active leaf safely.
559  * It also adds leaf into droplist.
560  */
561 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
562 {
563 	WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
564 
565 	if (!cl->prio_activity) {
566 		cl->prio_activity = 1 << cl->prio;
567 		htb_activate_prios(q, cl);
568 	}
569 }
570 
571 /**
572  * htb_deactivate - remove leaf cl from active feeds
573  *
574  * Make sure that leaf is active. In the other words it can't be called
575  * with non-active leaf. It also removes class from the drop list.
576  */
577 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
578 {
579 	WARN_ON(!cl->prio_activity);
580 
581 	htb_deactivate_prios(q, cl);
582 	cl->prio_activity = 0;
583 }
584 
585 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
586 		       struct sk_buff **to_free)
587 {
588 	int ret;
589 	unsigned int len = qdisc_pkt_len(skb);
590 	struct htb_sched *q = qdisc_priv(sch);
591 	struct htb_class *cl = htb_classify(skb, sch, &ret);
592 
593 	if (cl == HTB_DIRECT) {
594 		/* enqueue to helper queue */
595 		if (q->direct_queue.qlen < q->direct_qlen) {
596 			__qdisc_enqueue_tail(skb, &q->direct_queue);
597 			q->direct_pkts++;
598 		} else {
599 			return qdisc_drop(skb, sch, to_free);
600 		}
601 #ifdef CONFIG_NET_CLS_ACT
602 	} else if (!cl) {
603 		if (ret & __NET_XMIT_BYPASS)
604 			qdisc_qstats_drop(sch);
605 		__qdisc_drop(skb, to_free);
606 		return ret;
607 #endif
608 	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
609 					to_free)) != NET_XMIT_SUCCESS) {
610 		if (net_xmit_drop_count(ret)) {
611 			qdisc_qstats_drop(sch);
612 			cl->drops++;
613 		}
614 		return ret;
615 	} else {
616 		htb_activate(q, cl);
617 	}
618 
619 	sch->qstats.backlog += len;
620 	sch->q.qlen++;
621 	return NET_XMIT_SUCCESS;
622 }
623 
624 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
625 {
626 	s64 toks = diff + cl->tokens;
627 
628 	if (toks > cl->buffer)
629 		toks = cl->buffer;
630 	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
631 	if (toks <= -cl->mbuffer)
632 		toks = 1 - cl->mbuffer;
633 
634 	cl->tokens = toks;
635 }
636 
637 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
638 {
639 	s64 toks = diff + cl->ctokens;
640 
641 	if (toks > cl->cbuffer)
642 		toks = cl->cbuffer;
643 	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
644 	if (toks <= -cl->mbuffer)
645 		toks = 1 - cl->mbuffer;
646 
647 	cl->ctokens = toks;
648 }
649 
650 /**
651  * htb_charge_class - charges amount "bytes" to leaf and ancestors
652  *
653  * Routine assumes that packet "bytes" long was dequeued from leaf cl
654  * borrowing from "level". It accounts bytes to ceil leaky bucket for
655  * leaf and all ancestors and to rate bucket for ancestors at levels
656  * "level" and higher. It also handles possible change of mode resulting
657  * from the update. Note that mode can also increase here (MAY_BORROW to
658  * CAN_SEND) because we can use more precise clock that event queue here.
659  * In such case we remove class from event queue first.
660  */
661 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
662 			     int level, struct sk_buff *skb)
663 {
664 	int bytes = qdisc_pkt_len(skb);
665 	enum htb_cmode old_mode;
666 	s64 diff;
667 
668 	while (cl) {
669 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
670 		if (cl->level >= level) {
671 			if (cl->level == level)
672 				cl->xstats.lends++;
673 			htb_accnt_tokens(cl, bytes, diff);
674 		} else {
675 			cl->xstats.borrows++;
676 			cl->tokens += diff;	/* we moved t_c; update tokens */
677 		}
678 		htb_accnt_ctokens(cl, bytes, diff);
679 		cl->t_c = q->now;
680 
681 		old_mode = cl->cmode;
682 		diff = 0;
683 		htb_change_class_mode(q, cl, &diff);
684 		if (old_mode != cl->cmode) {
685 			if (old_mode != HTB_CAN_SEND)
686 				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
687 			if (cl->cmode != HTB_CAN_SEND)
688 				htb_add_to_wait_tree(q, cl, diff);
689 		}
690 
691 		/* update basic stats except for leaves which are already updated */
692 		if (cl->level)
693 			bstats_update(&cl->bstats, skb);
694 
695 		cl = cl->parent;
696 	}
697 }
698 
699 /**
700  * htb_do_events - make mode changes to classes at the level
701  *
702  * Scans event queue for pending events and applies them. Returns time of
703  * next pending event (0 for no event in pq, q->now for too many events).
704  * Note: Applied are events whose have cl->pq_key <= q->now.
705  */
706 static s64 htb_do_events(struct htb_sched *q, const int level,
707 			 unsigned long start)
708 {
709 	/* don't run for longer than 2 jiffies; 2 is used instead of
710 	 * 1 to simplify things when jiffy is going to be incremented
711 	 * too soon
712 	 */
713 	unsigned long stop_at = start + 2;
714 	struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
715 
716 	while (time_before(jiffies, stop_at)) {
717 		struct htb_class *cl;
718 		s64 diff;
719 		struct rb_node *p = rb_first(wait_pq);
720 
721 		if (!p)
722 			return 0;
723 
724 		cl = rb_entry(p, struct htb_class, pq_node);
725 		if (cl->pq_key > q->now)
726 			return cl->pq_key;
727 
728 		htb_safe_rb_erase(p, wait_pq);
729 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
730 		htb_change_class_mode(q, cl, &diff);
731 		if (cl->cmode != HTB_CAN_SEND)
732 			htb_add_to_wait_tree(q, cl, diff);
733 	}
734 
735 	/* too much load - let's continue after a break for scheduling */
736 	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
737 		pr_warn("htb: too many events!\n");
738 		q->warned |= HTB_WARN_TOOMANYEVENTS;
739 	}
740 
741 	return q->now;
742 }
743 
744 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
745  * is no such one exists.
746  */
747 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
748 					      u32 id)
749 {
750 	struct rb_node *r = NULL;
751 	while (n) {
752 		struct htb_class *cl =
753 		    rb_entry(n, struct htb_class, node[prio]);
754 
755 		if (id > cl->common.classid) {
756 			n = n->rb_right;
757 		} else if (id < cl->common.classid) {
758 			r = n;
759 			n = n->rb_left;
760 		} else {
761 			return n;
762 		}
763 	}
764 	return r;
765 }
766 
767 /**
768  * htb_lookup_leaf - returns next leaf class in DRR order
769  *
770  * Find leaf where current feed pointers points to.
771  */
772 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
773 {
774 	int i;
775 	struct {
776 		struct rb_node *root;
777 		struct rb_node **pptr;
778 		u32 *pid;
779 	} stk[TC_HTB_MAXDEPTH], *sp = stk;
780 
781 	BUG_ON(!hprio->row.rb_node);
782 	sp->root = hprio->row.rb_node;
783 	sp->pptr = &hprio->ptr;
784 	sp->pid = &hprio->last_ptr_id;
785 
786 	for (i = 0; i < 65535; i++) {
787 		if (!*sp->pptr && *sp->pid) {
788 			/* ptr was invalidated but id is valid - try to recover
789 			 * the original or next ptr
790 			 */
791 			*sp->pptr =
792 			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
793 		}
794 		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
795 				 * can become out of date quickly
796 				 */
797 		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
798 			*sp->pptr = sp->root;
799 			while ((*sp->pptr)->rb_left)
800 				*sp->pptr = (*sp->pptr)->rb_left;
801 			if (sp > stk) {
802 				sp--;
803 				if (!*sp->pptr) {
804 					WARN_ON(1);
805 					return NULL;
806 				}
807 				htb_next_rb_node(sp->pptr);
808 			}
809 		} else {
810 			struct htb_class *cl;
811 			struct htb_prio *clp;
812 
813 			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
814 			if (!cl->level)
815 				return cl;
816 			clp = &cl->inner.clprio[prio];
817 			(++sp)->root = clp->feed.rb_node;
818 			sp->pptr = &clp->ptr;
819 			sp->pid = &clp->last_ptr_id;
820 		}
821 	}
822 	WARN_ON(1);
823 	return NULL;
824 }
825 
826 /* dequeues packet at given priority and level; call only if
827  * you are sure that there is active class at prio/level
828  */
829 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
830 					const int level)
831 {
832 	struct sk_buff *skb = NULL;
833 	struct htb_class *cl, *start;
834 	struct htb_level *hlevel = &q->hlevel[level];
835 	struct htb_prio *hprio = &hlevel->hprio[prio];
836 
837 	/* look initial class up in the row */
838 	start = cl = htb_lookup_leaf(hprio, prio);
839 
840 	do {
841 next:
842 		if (unlikely(!cl))
843 			return NULL;
844 
845 		/* class can be empty - it is unlikely but can be true if leaf
846 		 * qdisc drops packets in enqueue routine or if someone used
847 		 * graft operation on the leaf since last dequeue;
848 		 * simply deactivate and skip such class
849 		 */
850 		if (unlikely(cl->leaf.q->q.qlen == 0)) {
851 			struct htb_class *next;
852 			htb_deactivate(q, cl);
853 
854 			/* row/level might become empty */
855 			if ((q->row_mask[level] & (1 << prio)) == 0)
856 				return NULL;
857 
858 			next = htb_lookup_leaf(hprio, prio);
859 
860 			if (cl == start)	/* fix start if we just deleted it */
861 				start = next;
862 			cl = next;
863 			goto next;
864 		}
865 
866 		skb = cl->leaf.q->dequeue(cl->leaf.q);
867 		if (likely(skb != NULL))
868 			break;
869 
870 		qdisc_warn_nonwc("htb", cl->leaf.q);
871 		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
872 					 &q->hlevel[0].hprio[prio].ptr);
873 		cl = htb_lookup_leaf(hprio, prio);
874 
875 	} while (cl != start);
876 
877 	if (likely(skb != NULL)) {
878 		bstats_update(&cl->bstats, skb);
879 		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
880 		if (cl->leaf.deficit[level] < 0) {
881 			cl->leaf.deficit[level] += cl->quantum;
882 			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
883 						 &q->hlevel[0].hprio[prio].ptr);
884 		}
885 		/* this used to be after charge_class but this constelation
886 		 * gives us slightly better performance
887 		 */
888 		if (!cl->leaf.q->q.qlen)
889 			htb_deactivate(q, cl);
890 		htb_charge_class(q, cl, level, skb);
891 	}
892 	return skb;
893 }
894 
895 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
896 {
897 	struct sk_buff *skb;
898 	struct htb_sched *q = qdisc_priv(sch);
899 	int level;
900 	s64 next_event;
901 	unsigned long start_at;
902 
903 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
904 	skb = __qdisc_dequeue_head(&q->direct_queue);
905 	if (skb != NULL) {
906 ok:
907 		qdisc_bstats_update(sch, skb);
908 		qdisc_qstats_backlog_dec(sch, skb);
909 		sch->q.qlen--;
910 		return skb;
911 	}
912 
913 	if (!sch->q.qlen)
914 		goto fin;
915 	q->now = ktime_get_ns();
916 	start_at = jiffies;
917 
918 	next_event = q->now + 5LLU * NSEC_PER_SEC;
919 
920 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
921 		/* common case optimization - skip event handler quickly */
922 		int m;
923 		s64 event = q->near_ev_cache[level];
924 
925 		if (q->now >= event) {
926 			event = htb_do_events(q, level, start_at);
927 			if (!event)
928 				event = q->now + NSEC_PER_SEC;
929 			q->near_ev_cache[level] = event;
930 		}
931 
932 		if (next_event > event)
933 			next_event = event;
934 
935 		m = ~q->row_mask[level];
936 		while (m != (int)(-1)) {
937 			int prio = ffz(m);
938 
939 			m |= 1 << prio;
940 			skb = htb_dequeue_tree(q, prio, level);
941 			if (likely(skb != NULL))
942 				goto ok;
943 		}
944 	}
945 	if (likely(next_event > q->now))
946 		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
947 	else
948 		schedule_work(&q->work);
949 fin:
950 	return skb;
951 }
952 
953 /* reset all classes */
954 /* always caled under BH & queue lock */
955 static void htb_reset(struct Qdisc *sch)
956 {
957 	struct htb_sched *q = qdisc_priv(sch);
958 	struct htb_class *cl;
959 	unsigned int i;
960 
961 	for (i = 0; i < q->clhash.hashsize; i++) {
962 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
963 			if (cl->level)
964 				memset(&cl->inner, 0, sizeof(cl->inner));
965 			else {
966 				if (cl->leaf.q && !q->offload)
967 					qdisc_reset(cl->leaf.q);
968 			}
969 			cl->prio_activity = 0;
970 			cl->cmode = HTB_CAN_SEND;
971 		}
972 	}
973 	qdisc_watchdog_cancel(&q->watchdog);
974 	__qdisc_reset_queue(&q->direct_queue);
975 	sch->q.qlen = 0;
976 	sch->qstats.backlog = 0;
977 	memset(q->hlevel, 0, sizeof(q->hlevel));
978 	memset(q->row_mask, 0, sizeof(q->row_mask));
979 }
980 
981 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
982 	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
983 	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
984 	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
985 	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
986 	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
987 	[TCA_HTB_RATE64] = { .type = NLA_U64 },
988 	[TCA_HTB_CEIL64] = { .type = NLA_U64 },
989 	[TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
990 };
991 
992 static void htb_work_func(struct work_struct *work)
993 {
994 	struct htb_sched *q = container_of(work, struct htb_sched, work);
995 	struct Qdisc *sch = q->watchdog.qdisc;
996 
997 	rcu_read_lock();
998 	__netif_schedule(qdisc_root(sch));
999 	rcu_read_unlock();
1000 }
1001 
1002 static void htb_set_lockdep_class_child(struct Qdisc *q)
1003 {
1004 	static struct lock_class_key child_key;
1005 
1006 	lockdep_set_class(qdisc_lock(q), &child_key);
1007 }
1008 
1009 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1010 {
1011 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1012 }
1013 
1014 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1015 		    struct netlink_ext_ack *extack)
1016 {
1017 	struct net_device *dev = qdisc_dev(sch);
1018 	struct tc_htb_qopt_offload offload_opt;
1019 	struct htb_sched *q = qdisc_priv(sch);
1020 	struct nlattr *tb[TCA_HTB_MAX + 1];
1021 	struct tc_htb_glob *gopt;
1022 	unsigned int ntx;
1023 	int err;
1024 
1025 	qdisc_watchdog_init(&q->watchdog, sch);
1026 	INIT_WORK(&q->work, htb_work_func);
1027 
1028 	if (!opt)
1029 		return -EINVAL;
1030 
1031 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1032 	if (err)
1033 		return err;
1034 
1035 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1036 					  NULL);
1037 	if (err < 0)
1038 		return err;
1039 
1040 	if (!tb[TCA_HTB_INIT])
1041 		return -EINVAL;
1042 
1043 	gopt = nla_data(tb[TCA_HTB_INIT]);
1044 	if (gopt->version != HTB_VER >> 16)
1045 		return -EINVAL;
1046 
1047 	q->offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1048 
1049 	if (q->offload) {
1050 		if (sch->parent != TC_H_ROOT)
1051 			return -EOPNOTSUPP;
1052 
1053 		if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1054 			return -EOPNOTSUPP;
1055 
1056 		q->num_direct_qdiscs = dev->real_num_tx_queues;
1057 		q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1058 					   sizeof(*q->direct_qdiscs),
1059 					   GFP_KERNEL);
1060 		if (!q->direct_qdiscs)
1061 			return -ENOMEM;
1062 	}
1063 
1064 	err = qdisc_class_hash_init(&q->clhash);
1065 	if (err < 0)
1066 		goto err_free_direct_qdiscs;
1067 
1068 	qdisc_skb_head_init(&q->direct_queue);
1069 
1070 	if (tb[TCA_HTB_DIRECT_QLEN])
1071 		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1072 	else
1073 		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1074 
1075 	if ((q->rate2quantum = gopt->rate2quantum) < 1)
1076 		q->rate2quantum = 1;
1077 	q->defcls = gopt->defcls;
1078 
1079 	if (!q->offload)
1080 		return 0;
1081 
1082 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1083 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1084 		struct Qdisc *qdisc;
1085 
1086 		qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1087 					  TC_H_MAKE(sch->handle, 0), extack);
1088 		if (!qdisc) {
1089 			err = -ENOMEM;
1090 			goto err_free_qdiscs;
1091 		}
1092 
1093 		htb_set_lockdep_class_child(qdisc);
1094 		q->direct_qdiscs[ntx] = qdisc;
1095 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1096 	}
1097 
1098 	sch->flags |= TCQ_F_MQROOT;
1099 
1100 	offload_opt = (struct tc_htb_qopt_offload) {
1101 		.command = TC_HTB_CREATE,
1102 		.parent_classid = TC_H_MAJ(sch->handle) >> 16,
1103 		.classid = TC_H_MIN(q->defcls),
1104 		.extack = extack,
1105 	};
1106 	err = htb_offload(dev, &offload_opt);
1107 	if (err)
1108 		goto err_free_qdiscs;
1109 
1110 	return 0;
1111 
1112 err_free_qdiscs:
1113 	/* TC_HTB_CREATE call failed, avoid any further calls to the driver. */
1114 	q->offload = false;
1115 
1116 	for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1117 	     ntx++)
1118 		qdisc_put(q->direct_qdiscs[ntx]);
1119 
1120 	qdisc_class_hash_destroy(&q->clhash);
1121 	/* Prevent use-after-free and double-free when htb_destroy gets called.
1122 	 */
1123 	q->clhash.hash = NULL;
1124 	q->clhash.hashsize = 0;
1125 
1126 err_free_direct_qdiscs:
1127 	kfree(q->direct_qdiscs);
1128 	q->direct_qdiscs = NULL;
1129 	return err;
1130 }
1131 
1132 static void htb_attach_offload(struct Qdisc *sch)
1133 {
1134 	struct net_device *dev = qdisc_dev(sch);
1135 	struct htb_sched *q = qdisc_priv(sch);
1136 	unsigned int ntx;
1137 
1138 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1139 		struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1140 
1141 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1142 		qdisc_put(old);
1143 		qdisc_hash_add(qdisc, false);
1144 	}
1145 	for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1146 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1147 		struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1148 
1149 		qdisc_put(old);
1150 	}
1151 
1152 	kfree(q->direct_qdiscs);
1153 	q->direct_qdiscs = NULL;
1154 }
1155 
1156 static void htb_attach_software(struct Qdisc *sch)
1157 {
1158 	struct net_device *dev = qdisc_dev(sch);
1159 	unsigned int ntx;
1160 
1161 	/* Resemble qdisc_graft behavior. */
1162 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1163 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1164 		struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1165 
1166 		qdisc_refcount_inc(sch);
1167 
1168 		qdisc_put(old);
1169 	}
1170 }
1171 
1172 static void htb_attach(struct Qdisc *sch)
1173 {
1174 	struct htb_sched *q = qdisc_priv(sch);
1175 
1176 	if (q->offload)
1177 		htb_attach_offload(sch);
1178 	else
1179 		htb_attach_software(sch);
1180 }
1181 
1182 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1183 {
1184 	struct htb_sched *q = qdisc_priv(sch);
1185 	struct nlattr *nest;
1186 	struct tc_htb_glob gopt;
1187 
1188 	if (q->offload)
1189 		sch->flags |= TCQ_F_OFFLOADED;
1190 	else
1191 		sch->flags &= ~TCQ_F_OFFLOADED;
1192 
1193 	sch->qstats.overlimits = q->overlimits;
1194 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1195 	 * no change can happen on the qdisc parameters.
1196 	 */
1197 
1198 	gopt.direct_pkts = q->direct_pkts;
1199 	gopt.version = HTB_VER;
1200 	gopt.rate2quantum = q->rate2quantum;
1201 	gopt.defcls = q->defcls;
1202 	gopt.debug = 0;
1203 
1204 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1205 	if (nest == NULL)
1206 		goto nla_put_failure;
1207 	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1208 	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1209 		goto nla_put_failure;
1210 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1211 		goto nla_put_failure;
1212 
1213 	return nla_nest_end(skb, nest);
1214 
1215 nla_put_failure:
1216 	nla_nest_cancel(skb, nest);
1217 	return -1;
1218 }
1219 
1220 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1221 			  struct sk_buff *skb, struct tcmsg *tcm)
1222 {
1223 	struct htb_class *cl = (struct htb_class *)arg;
1224 	struct htb_sched *q = qdisc_priv(sch);
1225 	struct nlattr *nest;
1226 	struct tc_htb_opt opt;
1227 
1228 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1229 	 * no change can happen on the class parameters.
1230 	 */
1231 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1232 	tcm->tcm_handle = cl->common.classid;
1233 	if (!cl->level && cl->leaf.q)
1234 		tcm->tcm_info = cl->leaf.q->handle;
1235 
1236 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1237 	if (nest == NULL)
1238 		goto nla_put_failure;
1239 
1240 	memset(&opt, 0, sizeof(opt));
1241 
1242 	psched_ratecfg_getrate(&opt.rate, &cl->rate);
1243 	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1244 	psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1245 	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1246 	opt.quantum = cl->quantum;
1247 	opt.prio = cl->prio;
1248 	opt.level = cl->level;
1249 	if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1250 		goto nla_put_failure;
1251 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1252 		goto nla_put_failure;
1253 	if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1254 	    nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1255 			      TCA_HTB_PAD))
1256 		goto nla_put_failure;
1257 	if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1258 	    nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1259 			      TCA_HTB_PAD))
1260 		goto nla_put_failure;
1261 
1262 	return nla_nest_end(skb, nest);
1263 
1264 nla_put_failure:
1265 	nla_nest_cancel(skb, nest);
1266 	return -1;
1267 }
1268 
1269 static void htb_offload_aggregate_stats(struct htb_sched *q,
1270 					struct htb_class *cl)
1271 {
1272 	struct htb_class *c;
1273 	unsigned int i;
1274 
1275 	memset(&cl->bstats, 0, sizeof(cl->bstats));
1276 
1277 	for (i = 0; i < q->clhash.hashsize; i++) {
1278 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1279 			struct htb_class *p = c;
1280 
1281 			while (p && p->level < cl->level)
1282 				p = p->parent;
1283 
1284 			if (p != cl)
1285 				continue;
1286 
1287 			cl->bstats.bytes += c->bstats_bias.bytes;
1288 			cl->bstats.packets += c->bstats_bias.packets;
1289 			if (c->level == 0) {
1290 				cl->bstats.bytes += c->leaf.q->bstats.bytes;
1291 				cl->bstats.packets += c->leaf.q->bstats.packets;
1292 			}
1293 		}
1294 	}
1295 }
1296 
1297 static int
1298 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1299 {
1300 	struct htb_class *cl = (struct htb_class *)arg;
1301 	struct htb_sched *q = qdisc_priv(sch);
1302 	struct gnet_stats_queue qs = {
1303 		.drops = cl->drops,
1304 		.overlimits = cl->overlimits,
1305 	};
1306 	__u32 qlen = 0;
1307 
1308 	if (!cl->level && cl->leaf.q)
1309 		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1310 
1311 	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1312 				    INT_MIN, INT_MAX);
1313 	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1314 				     INT_MIN, INT_MAX);
1315 
1316 	if (q->offload) {
1317 		if (!cl->level) {
1318 			if (cl->leaf.q)
1319 				cl->bstats = cl->leaf.q->bstats;
1320 			else
1321 				memset(&cl->bstats, 0, sizeof(cl->bstats));
1322 			cl->bstats.bytes += cl->bstats_bias.bytes;
1323 			cl->bstats.packets += cl->bstats_bias.packets;
1324 		} else {
1325 			htb_offload_aggregate_stats(q, cl);
1326 		}
1327 	}
1328 
1329 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1330 				  d, NULL, &cl->bstats) < 0 ||
1331 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1332 	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1333 		return -1;
1334 
1335 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1336 }
1337 
1338 static struct netdev_queue *
1339 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1340 {
1341 	struct net_device *dev = qdisc_dev(sch);
1342 	struct tc_htb_qopt_offload offload_opt;
1343 	int err;
1344 
1345 	offload_opt = (struct tc_htb_qopt_offload) {
1346 		.command = TC_HTB_LEAF_QUERY_QUEUE,
1347 		.classid = TC_H_MIN(tcm->tcm_parent),
1348 	};
1349 	err = htb_offload(dev, &offload_opt);
1350 	if (err || offload_opt.qid >= dev->num_tx_queues)
1351 		return NULL;
1352 	return netdev_get_tx_queue(dev, offload_opt.qid);
1353 }
1354 
1355 static struct Qdisc *
1356 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1357 {
1358 	struct net_device *dev = dev_queue->dev;
1359 	struct Qdisc *old_q;
1360 
1361 	if (dev->flags & IFF_UP)
1362 		dev_deactivate(dev);
1363 	old_q = dev_graft_qdisc(dev_queue, new_q);
1364 	if (new_q)
1365 		new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1366 	if (dev->flags & IFF_UP)
1367 		dev_activate(dev);
1368 
1369 	return old_q;
1370 }
1371 
1372 static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
1373 {
1374 	struct netdev_queue *queue_old, *queue_new;
1375 	struct net_device *dev = qdisc_dev(sch);
1376 	struct Qdisc *qdisc;
1377 
1378 	queue_old = netdev_get_tx_queue(dev, qid_old);
1379 	queue_new = netdev_get_tx_queue(dev, qid_new);
1380 
1381 	if (dev->flags & IFF_UP)
1382 		dev_deactivate(dev);
1383 	qdisc = dev_graft_qdisc(queue_old, NULL);
1384 	qdisc->dev_queue = queue_new;
1385 	qdisc = dev_graft_qdisc(queue_new, qdisc);
1386 	if (dev->flags & IFF_UP)
1387 		dev_activate(dev);
1388 
1389 	WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1390 }
1391 
1392 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1393 		     struct Qdisc **old, struct netlink_ext_ack *extack)
1394 {
1395 	struct netdev_queue *dev_queue = sch->dev_queue;
1396 	struct htb_class *cl = (struct htb_class *)arg;
1397 	struct htb_sched *q = qdisc_priv(sch);
1398 	struct Qdisc *old_q;
1399 
1400 	if (cl->level)
1401 		return -EINVAL;
1402 
1403 	if (q->offload) {
1404 		dev_queue = new->dev_queue;
1405 		WARN_ON(dev_queue != cl->leaf.q->dev_queue);
1406 	}
1407 
1408 	if (!new) {
1409 		new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1410 					cl->common.classid, extack);
1411 		if (!new)
1412 			return -ENOBUFS;
1413 	}
1414 
1415 	if (q->offload) {
1416 		htb_set_lockdep_class_child(new);
1417 		/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1418 		qdisc_refcount_inc(new);
1419 		old_q = htb_graft_helper(dev_queue, new);
1420 	}
1421 
1422 	*old = qdisc_replace(sch, new, &cl->leaf.q);
1423 
1424 	if (q->offload) {
1425 		WARN_ON(old_q != *old);
1426 		qdisc_put(old_q);
1427 	}
1428 
1429 	return 0;
1430 }
1431 
1432 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1433 {
1434 	struct htb_class *cl = (struct htb_class *)arg;
1435 	return !cl->level ? cl->leaf.q : NULL;
1436 }
1437 
1438 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1439 {
1440 	struct htb_class *cl = (struct htb_class *)arg;
1441 
1442 	htb_deactivate(qdisc_priv(sch), cl);
1443 }
1444 
1445 static inline int htb_parent_last_child(struct htb_class *cl)
1446 {
1447 	if (!cl->parent)
1448 		/* the root class */
1449 		return 0;
1450 	if (cl->parent->children > 1)
1451 		/* not the last child */
1452 		return 0;
1453 	return 1;
1454 }
1455 
1456 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1457 			       struct Qdisc *new_q)
1458 {
1459 	struct htb_sched *q = qdisc_priv(sch);
1460 	struct htb_class *parent = cl->parent;
1461 
1462 	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1463 
1464 	if (parent->cmode != HTB_CAN_SEND)
1465 		htb_safe_rb_erase(&parent->pq_node,
1466 				  &q->hlevel[parent->level].wait_pq);
1467 
1468 	parent->level = 0;
1469 	memset(&parent->inner, 0, sizeof(parent->inner));
1470 	parent->leaf.q = new_q ? new_q : &noop_qdisc;
1471 	parent->tokens = parent->buffer;
1472 	parent->ctokens = parent->cbuffer;
1473 	parent->t_c = ktime_get_ns();
1474 	parent->cmode = HTB_CAN_SEND;
1475 }
1476 
1477 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1478 				       struct netdev_queue *dev_queue,
1479 				       struct Qdisc *new_q)
1480 {
1481 	struct Qdisc *old_q;
1482 
1483 	/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1484 	qdisc_refcount_inc(new_q);
1485 	old_q = htb_graft_helper(dev_queue, new_q);
1486 	WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1487 }
1488 
1489 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1490 				     bool last_child, bool destroying,
1491 				     struct netlink_ext_ack *extack)
1492 {
1493 	struct tc_htb_qopt_offload offload_opt;
1494 	struct Qdisc *q = cl->leaf.q;
1495 	struct Qdisc *old = NULL;
1496 	int err;
1497 
1498 	if (cl->level)
1499 		return -EINVAL;
1500 
1501 	WARN_ON(!q);
1502 	if (!destroying) {
1503 		/* On destroy of HTB, two cases are possible:
1504 		 * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
1505 		 * 2. q is a noop qdisc (for nodes that were inner),
1506 		 *    q->dev_queue is noop_netdev_queue.
1507 		 */
1508 		old = htb_graft_helper(q->dev_queue, NULL);
1509 		WARN_ON(!old);
1510 		WARN_ON(old != q);
1511 	}
1512 
1513 	if (cl->parent) {
1514 		cl->parent->bstats_bias.bytes += q->bstats.bytes;
1515 		cl->parent->bstats_bias.packets += q->bstats.packets;
1516 	}
1517 
1518 	offload_opt = (struct tc_htb_qopt_offload) {
1519 		.command = !last_child ? TC_HTB_LEAF_DEL :
1520 			   destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1521 			   TC_HTB_LEAF_DEL_LAST,
1522 		.classid = cl->common.classid,
1523 		.extack = extack,
1524 	};
1525 	err = htb_offload(qdisc_dev(sch), &offload_opt);
1526 
1527 	if (!err || destroying)
1528 		qdisc_put(old);
1529 	else
1530 		htb_graft_helper(q->dev_queue, old);
1531 
1532 	if (last_child)
1533 		return err;
1534 
1535 	if (!err && offload_opt.moved_qid != 0) {
1536 		if (destroying)
1537 			q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
1538 							   offload_opt.qid);
1539 		else
1540 			htb_offload_move_qdisc(sch, offload_opt.moved_qid,
1541 					       offload_opt.qid);
1542 	}
1543 
1544 	return err;
1545 }
1546 
1547 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1548 {
1549 	if (!cl->level) {
1550 		WARN_ON(!cl->leaf.q);
1551 		qdisc_put(cl->leaf.q);
1552 	}
1553 	gen_kill_estimator(&cl->rate_est);
1554 	tcf_block_put(cl->block);
1555 	kfree(cl);
1556 }
1557 
1558 static void htb_destroy(struct Qdisc *sch)
1559 {
1560 	struct net_device *dev = qdisc_dev(sch);
1561 	struct tc_htb_qopt_offload offload_opt;
1562 	struct htb_sched *q = qdisc_priv(sch);
1563 	struct hlist_node *next;
1564 	bool nonempty, changed;
1565 	struct htb_class *cl;
1566 	unsigned int i;
1567 
1568 	cancel_work_sync(&q->work);
1569 	qdisc_watchdog_cancel(&q->watchdog);
1570 	/* This line used to be after htb_destroy_class call below
1571 	 * and surprisingly it worked in 2.4. But it must precede it
1572 	 * because filter need its target class alive to be able to call
1573 	 * unbind_filter on it (without Oops).
1574 	 */
1575 	tcf_block_put(q->block);
1576 
1577 	for (i = 0; i < q->clhash.hashsize; i++) {
1578 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1579 			tcf_block_put(cl->block);
1580 			cl->block = NULL;
1581 		}
1582 	}
1583 
1584 	do {
1585 		nonempty = false;
1586 		changed = false;
1587 		for (i = 0; i < q->clhash.hashsize; i++) {
1588 			hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1589 						  common.hnode) {
1590 				bool last_child;
1591 
1592 				if (!q->offload) {
1593 					htb_destroy_class(sch, cl);
1594 					continue;
1595 				}
1596 
1597 				nonempty = true;
1598 
1599 				if (cl->level)
1600 					continue;
1601 
1602 				changed = true;
1603 
1604 				last_child = htb_parent_last_child(cl);
1605 				htb_destroy_class_offload(sch, cl, last_child,
1606 							  true, NULL);
1607 				qdisc_class_hash_remove(&q->clhash,
1608 							&cl->common);
1609 				if (cl->parent)
1610 					cl->parent->children--;
1611 				if (last_child)
1612 					htb_parent_to_leaf(sch, cl, NULL);
1613 				htb_destroy_class(sch, cl);
1614 			}
1615 		}
1616 	} while (changed);
1617 	WARN_ON(nonempty);
1618 
1619 	qdisc_class_hash_destroy(&q->clhash);
1620 	__qdisc_reset_queue(&q->direct_queue);
1621 
1622 	if (!q->offload)
1623 		return;
1624 
1625 	offload_opt = (struct tc_htb_qopt_offload) {
1626 		.command = TC_HTB_DESTROY,
1627 	};
1628 	htb_offload(dev, &offload_opt);
1629 
1630 	if (!q->direct_qdiscs)
1631 		return;
1632 	for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1633 		qdisc_put(q->direct_qdiscs[i]);
1634 	kfree(q->direct_qdiscs);
1635 }
1636 
1637 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1638 		      struct netlink_ext_ack *extack)
1639 {
1640 	struct htb_sched *q = qdisc_priv(sch);
1641 	struct htb_class *cl = (struct htb_class *)arg;
1642 	struct Qdisc *new_q = NULL;
1643 	int last_child = 0;
1644 	int err;
1645 
1646 	/* TODO: why don't allow to delete subtree ? references ? does
1647 	 * tc subsys guarantee us that in htb_destroy it holds no class
1648 	 * refs so that we can remove children safely there ?
1649 	 */
1650 	if (cl->children || cl->filter_cnt)
1651 		return -EBUSY;
1652 
1653 	if (!cl->level && htb_parent_last_child(cl))
1654 		last_child = 1;
1655 
1656 	if (q->offload) {
1657 		err = htb_destroy_class_offload(sch, cl, last_child, false,
1658 						extack);
1659 		if (err)
1660 			return err;
1661 	}
1662 
1663 	if (last_child) {
1664 		struct netdev_queue *dev_queue;
1665 
1666 		dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
1667 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1668 					  cl->parent->common.classid,
1669 					  NULL);
1670 		if (q->offload) {
1671 			if (new_q)
1672 				htb_set_lockdep_class_child(new_q);
1673 			htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1674 		}
1675 	}
1676 
1677 	sch_tree_lock(sch);
1678 
1679 	if (!cl->level)
1680 		qdisc_purge_queue(cl->leaf.q);
1681 
1682 	/* delete from hash and active; remainder in destroy_class */
1683 	qdisc_class_hash_remove(&q->clhash, &cl->common);
1684 	if (cl->parent)
1685 		cl->parent->children--;
1686 
1687 	if (cl->prio_activity)
1688 		htb_deactivate(q, cl);
1689 
1690 	if (cl->cmode != HTB_CAN_SEND)
1691 		htb_safe_rb_erase(&cl->pq_node,
1692 				  &q->hlevel[cl->level].wait_pq);
1693 
1694 	if (last_child)
1695 		htb_parent_to_leaf(sch, cl, new_q);
1696 
1697 	sch_tree_unlock(sch);
1698 
1699 	htb_destroy_class(sch, cl);
1700 	return 0;
1701 }
1702 
1703 static int htb_change_class(struct Qdisc *sch, u32 classid,
1704 			    u32 parentid, struct nlattr **tca,
1705 			    unsigned long *arg, struct netlink_ext_ack *extack)
1706 {
1707 	int err = -EINVAL;
1708 	struct htb_sched *q = qdisc_priv(sch);
1709 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
1710 	struct tc_htb_qopt_offload offload_opt;
1711 	struct nlattr *opt = tca[TCA_OPTIONS];
1712 	struct nlattr *tb[TCA_HTB_MAX + 1];
1713 	struct Qdisc *parent_qdisc = NULL;
1714 	struct netdev_queue *dev_queue;
1715 	struct tc_htb_opt *hopt;
1716 	u64 rate64, ceil64;
1717 	int warn = 0;
1718 
1719 	/* extract all subattrs from opt attr */
1720 	if (!opt)
1721 		goto failure;
1722 
1723 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1724 					  NULL);
1725 	if (err < 0)
1726 		goto failure;
1727 
1728 	err = -EINVAL;
1729 	if (tb[TCA_HTB_PARMS] == NULL)
1730 		goto failure;
1731 
1732 	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1733 
1734 	hopt = nla_data(tb[TCA_HTB_PARMS]);
1735 	if (!hopt->rate.rate || !hopt->ceil.rate)
1736 		goto failure;
1737 
1738 	/* Keeping backward compatible with rate_table based iproute2 tc */
1739 	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1740 		qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1741 					      NULL));
1742 
1743 	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1744 		qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1745 					      NULL));
1746 
1747 	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1748 	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1749 
1750 	if (!cl) {		/* new class */
1751 		struct net_device *dev = qdisc_dev(sch);
1752 		struct Qdisc *new_q, *old_q;
1753 		int prio;
1754 		struct {
1755 			struct nlattr		nla;
1756 			struct gnet_estimator	opt;
1757 		} est = {
1758 			.nla = {
1759 				.nla_len	= nla_attr_size(sizeof(est.opt)),
1760 				.nla_type	= TCA_RATE,
1761 			},
1762 			.opt = {
1763 				/* 4s interval, 16s averaging constant */
1764 				.interval	= 2,
1765 				.ewma_log	= 2,
1766 			},
1767 		};
1768 
1769 		/* check for valid classid */
1770 		if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1771 		    htb_find(classid, sch))
1772 			goto failure;
1773 
1774 		/* check maximal depth */
1775 		if (parent && parent->parent && parent->parent->level < 2) {
1776 			pr_err("htb: tree is too deep\n");
1777 			goto failure;
1778 		}
1779 		err = -ENOBUFS;
1780 		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1781 		if (!cl)
1782 			goto failure;
1783 
1784 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1785 		if (err) {
1786 			kfree(cl);
1787 			goto failure;
1788 		}
1789 		if (htb_rate_est || tca[TCA_RATE]) {
1790 			err = gen_new_estimator(&cl->bstats, NULL,
1791 						&cl->rate_est,
1792 						NULL,
1793 						qdisc_root_sleeping_running(sch),
1794 						tca[TCA_RATE] ? : &est.nla);
1795 			if (err)
1796 				goto err_block_put;
1797 		}
1798 
1799 		cl->children = 0;
1800 		RB_CLEAR_NODE(&cl->pq_node);
1801 
1802 		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1803 			RB_CLEAR_NODE(&cl->node[prio]);
1804 
1805 		cl->common.classid = classid;
1806 
1807 		/* Make sure nothing interrupts us in between of two
1808 		 * ndo_setup_tc calls.
1809 		 */
1810 		ASSERT_RTNL();
1811 
1812 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1813 		 * so that can't be used inside of sch_tree_lock
1814 		 * -- thanks to Karlis Peisenieks
1815 		 */
1816 		if (!q->offload) {
1817 			dev_queue = sch->dev_queue;
1818 		} else if (!(parent && !parent->level)) {
1819 			/* Assign a dev_queue to this classid. */
1820 			offload_opt = (struct tc_htb_qopt_offload) {
1821 				.command = TC_HTB_LEAF_ALLOC_QUEUE,
1822 				.classid = cl->common.classid,
1823 				.parent_classid = parent ?
1824 					TC_H_MIN(parent->common.classid) :
1825 					TC_HTB_CLASSID_ROOT,
1826 				.rate = max_t(u64, hopt->rate.rate, rate64),
1827 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1828 				.extack = extack,
1829 			};
1830 			err = htb_offload(dev, &offload_opt);
1831 			if (err) {
1832 				pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1833 				       err);
1834 				goto err_kill_estimator;
1835 			}
1836 			dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1837 		} else { /* First child. */
1838 			dev_queue = parent->leaf.q->dev_queue;
1839 			old_q = htb_graft_helper(dev_queue, NULL);
1840 			WARN_ON(old_q != parent->leaf.q);
1841 			offload_opt = (struct tc_htb_qopt_offload) {
1842 				.command = TC_HTB_LEAF_TO_INNER,
1843 				.classid = cl->common.classid,
1844 				.parent_classid =
1845 					TC_H_MIN(parent->common.classid),
1846 				.rate = max_t(u64, hopt->rate.rate, rate64),
1847 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1848 				.extack = extack,
1849 			};
1850 			err = htb_offload(dev, &offload_opt);
1851 			if (err) {
1852 				pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1853 				       err);
1854 				htb_graft_helper(dev_queue, old_q);
1855 				goto err_kill_estimator;
1856 			}
1857 			parent->bstats_bias.bytes += old_q->bstats.bytes;
1858 			parent->bstats_bias.packets += old_q->bstats.packets;
1859 			qdisc_put(old_q);
1860 		}
1861 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1862 					  classid, NULL);
1863 		if (q->offload) {
1864 			if (new_q) {
1865 				htb_set_lockdep_class_child(new_q);
1866 				/* One ref for cl->leaf.q, the other for
1867 				 * dev_queue->qdisc.
1868 				 */
1869 				qdisc_refcount_inc(new_q);
1870 			}
1871 			old_q = htb_graft_helper(dev_queue, new_q);
1872 			/* No qdisc_put needed. */
1873 			WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1874 		}
1875 		sch_tree_lock(sch);
1876 		if (parent && !parent->level) {
1877 			/* turn parent into inner node */
1878 			qdisc_purge_queue(parent->leaf.q);
1879 			parent_qdisc = parent->leaf.q;
1880 			if (parent->prio_activity)
1881 				htb_deactivate(q, parent);
1882 
1883 			/* remove from evt list because of level change */
1884 			if (parent->cmode != HTB_CAN_SEND) {
1885 				htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1886 				parent->cmode = HTB_CAN_SEND;
1887 			}
1888 			parent->level = (parent->parent ? parent->parent->level
1889 					 : TC_HTB_MAXDEPTH) - 1;
1890 			memset(&parent->inner, 0, sizeof(parent->inner));
1891 		}
1892 
1893 		/* leaf (we) needs elementary qdisc */
1894 		cl->leaf.q = new_q ? new_q : &noop_qdisc;
1895 
1896 		cl->parent = parent;
1897 
1898 		/* set class to be in HTB_CAN_SEND state */
1899 		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1900 		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1901 		cl->mbuffer = 60ULL * NSEC_PER_SEC;	/* 1min */
1902 		cl->t_c = ktime_get_ns();
1903 		cl->cmode = HTB_CAN_SEND;
1904 
1905 		/* attach to the hash list and parent's family */
1906 		qdisc_class_hash_insert(&q->clhash, &cl->common);
1907 		if (parent)
1908 			parent->children++;
1909 		if (cl->leaf.q != &noop_qdisc)
1910 			qdisc_hash_add(cl->leaf.q, true);
1911 	} else {
1912 		if (tca[TCA_RATE]) {
1913 			err = gen_replace_estimator(&cl->bstats, NULL,
1914 						    &cl->rate_est,
1915 						    NULL,
1916 						    qdisc_root_sleeping_running(sch),
1917 						    tca[TCA_RATE]);
1918 			if (err)
1919 				return err;
1920 		}
1921 
1922 		if (q->offload) {
1923 			struct net_device *dev = qdisc_dev(sch);
1924 
1925 			offload_opt = (struct tc_htb_qopt_offload) {
1926 				.command = TC_HTB_NODE_MODIFY,
1927 				.classid = cl->common.classid,
1928 				.rate = max_t(u64, hopt->rate.rate, rate64),
1929 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1930 				.extack = extack,
1931 			};
1932 			err = htb_offload(dev, &offload_opt);
1933 			if (err)
1934 				/* Estimator was replaced, and rollback may fail
1935 				 * as well, so we don't try to recover it, and
1936 				 * the estimator won't work property with the
1937 				 * offload anyway, because bstats are updated
1938 				 * only when the stats are queried.
1939 				 */
1940 				return err;
1941 		}
1942 
1943 		sch_tree_lock(sch);
1944 	}
1945 
1946 	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1947 	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1948 
1949 	/* it used to be a nasty bug here, we have to check that node
1950 	 * is really leaf before changing cl->leaf !
1951 	 */
1952 	if (!cl->level) {
1953 		u64 quantum = cl->rate.rate_bytes_ps;
1954 
1955 		do_div(quantum, q->rate2quantum);
1956 		cl->quantum = min_t(u64, quantum, INT_MAX);
1957 
1958 		if (!hopt->quantum && cl->quantum < 1000) {
1959 			warn = -1;
1960 			cl->quantum = 1000;
1961 		}
1962 		if (!hopt->quantum && cl->quantum > 200000) {
1963 			warn = 1;
1964 			cl->quantum = 200000;
1965 		}
1966 		if (hopt->quantum)
1967 			cl->quantum = hopt->quantum;
1968 		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1969 			cl->prio = TC_HTB_NUMPRIO - 1;
1970 	}
1971 
1972 	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1973 	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
1974 
1975 	sch_tree_unlock(sch);
1976 	qdisc_put(parent_qdisc);
1977 
1978 	if (warn)
1979 		pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
1980 			    cl->common.classid, (warn == -1 ? "small" : "big"));
1981 
1982 	qdisc_class_hash_grow(sch, &q->clhash);
1983 
1984 	*arg = (unsigned long)cl;
1985 	return 0;
1986 
1987 err_kill_estimator:
1988 	gen_kill_estimator(&cl->rate_est);
1989 err_block_put:
1990 	tcf_block_put(cl->block);
1991 	kfree(cl);
1992 failure:
1993 	return err;
1994 }
1995 
1996 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
1997 				       struct netlink_ext_ack *extack)
1998 {
1999 	struct htb_sched *q = qdisc_priv(sch);
2000 	struct htb_class *cl = (struct htb_class *)arg;
2001 
2002 	return cl ? cl->block : q->block;
2003 }
2004 
2005 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2006 				     u32 classid)
2007 {
2008 	struct htb_class *cl = htb_find(classid, sch);
2009 
2010 	/*if (cl && !cl->level) return 0;
2011 	 * The line above used to be there to prevent attaching filters to
2012 	 * leaves. But at least tc_index filter uses this just to get class
2013 	 * for other reasons so that we have to allow for it.
2014 	 * ----
2015 	 * 19.6.2002 As Werner explained it is ok - bind filter is just
2016 	 * another way to "lock" the class - unlike "get" this lock can
2017 	 * be broken by class during destroy IIUC.
2018 	 */
2019 	if (cl)
2020 		cl->filter_cnt++;
2021 	return (unsigned long)cl;
2022 }
2023 
2024 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2025 {
2026 	struct htb_class *cl = (struct htb_class *)arg;
2027 
2028 	if (cl)
2029 		cl->filter_cnt--;
2030 }
2031 
2032 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2033 {
2034 	struct htb_sched *q = qdisc_priv(sch);
2035 	struct htb_class *cl;
2036 	unsigned int i;
2037 
2038 	if (arg->stop)
2039 		return;
2040 
2041 	for (i = 0; i < q->clhash.hashsize; i++) {
2042 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2043 			if (arg->count < arg->skip) {
2044 				arg->count++;
2045 				continue;
2046 			}
2047 			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2048 				arg->stop = 1;
2049 				return;
2050 			}
2051 			arg->count++;
2052 		}
2053 	}
2054 }
2055 
2056 static const struct Qdisc_class_ops htb_class_ops = {
2057 	.select_queue	=	htb_select_queue,
2058 	.graft		=	htb_graft,
2059 	.leaf		=	htb_leaf,
2060 	.qlen_notify	=	htb_qlen_notify,
2061 	.find		=	htb_search,
2062 	.change		=	htb_change_class,
2063 	.delete		=	htb_delete,
2064 	.walk		=	htb_walk,
2065 	.tcf_block	=	htb_tcf_block,
2066 	.bind_tcf	=	htb_bind_filter,
2067 	.unbind_tcf	=	htb_unbind_filter,
2068 	.dump		=	htb_dump_class,
2069 	.dump_stats	=	htb_dump_class_stats,
2070 };
2071 
2072 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2073 	.cl_ops		=	&htb_class_ops,
2074 	.id		=	"htb",
2075 	.priv_size	=	sizeof(struct htb_sched),
2076 	.enqueue	=	htb_enqueue,
2077 	.dequeue	=	htb_dequeue,
2078 	.peek		=	qdisc_peek_dequeued,
2079 	.init		=	htb_init,
2080 	.attach		=	htb_attach,
2081 	.reset		=	htb_reset,
2082 	.destroy	=	htb_destroy,
2083 	.dump		=	htb_dump,
2084 	.owner		=	THIS_MODULE,
2085 };
2086 
2087 static int __init htb_module_init(void)
2088 {
2089 	return register_qdisc(&htb_qdisc_ops);
2090 }
2091 static void __exit htb_module_exit(void)
2092 {
2093 	unregister_qdisc(&htb_qdisc_ops);
2094 }
2095 
2096 module_init(htb_module_init)
2097 module_exit(htb_module_exit)
2098 MODULE_LICENSE("GPL");
2099