xref: /openbmc/linux/net/sched/sch_htb.c (revision ac4dfccb)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
4  *
5  * Authors:	Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *			HTB support at LARTC mailing list
10  *		Ondrej Kraus, <krauso@barr.cz>
11  *			found missing INIT_QDISC(htb)
12  *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *			helped a lot to locate nasty class stall bug
14  *		Andi Kleen, Jamal Hadi, Bert Hubert
15  *			code review and helpful comments on shaping
16  *		Tomasz Wrona, <tw@eter.tym.pl>
17  *			created test case so that I was able to fix nasty bug
18  *		Wilfried Weissmann
19  *			spotted bug in dequeue code and helped with fix
20  *		Jiri Fojtasek
21  *			fixed requeue routine
22  *		and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40 
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47 
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53 
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011		/* major must be matched with number supplied by TC as version */
56 
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60 
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64 
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68 
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71 	HTB_CANT_SEND,		/* class can't send and can't borrow */
72 	HTB_MAY_BORROW,		/* class can't send but may borrow */
73 	HTB_CAN_SEND		/* class can send */
74 };
75 
76 struct htb_prio {
77 	union {
78 		struct rb_root	row;
79 		struct rb_root	feed;
80 	};
81 	struct rb_node	*ptr;
82 	/* When class changes from state 1->2 and disconnects from
83 	 * parent's feed then we lost ptr value and start from the
84 	 * first child again. Here we store classid of the
85 	 * last valid ptr (used when ptr is NULL).
86 	 */
87 	u32		last_ptr_id;
88 };
89 
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95 	struct Qdisc_class_common common;
96 	struct psched_ratecfg	rate;
97 	struct psched_ratecfg	ceil;
98 	s64			buffer, cbuffer;/* token bucket depth/rate */
99 	s64			mbuffer;	/* max wait time */
100 	u32			prio;		/* these two are used only by leaves... */
101 	int			quantum;	/* but stored for parent-to-leaf return */
102 
103 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
104 	struct tcf_block	*block;
105 	int			filter_cnt;
106 
107 	int			level;		/* our level (see above) */
108 	unsigned int		children;
109 	struct htb_class	*parent;	/* parent class */
110 
111 	struct net_rate_estimator __rcu *rate_est;
112 
113 	/*
114 	 * Written often fields
115 	 */
116 	struct gnet_stats_basic_packed bstats;
117 	struct gnet_stats_basic_packed bstats_bias;
118 	struct tc_htb_xstats	xstats;	/* our special stats */
119 
120 	/* token bucket parameters */
121 	s64			tokens, ctokens;/* current number of tokens */
122 	s64			t_c;		/* checkpoint time */
123 
124 	union {
125 		struct htb_class_leaf {
126 			int		deficit[TC_HTB_MAXDEPTH];
127 			struct Qdisc	*q;
128 		} leaf;
129 		struct htb_class_inner {
130 			struct htb_prio clprio[TC_HTB_NUMPRIO];
131 		} inner;
132 	};
133 	s64			pq_key;
134 
135 	int			prio_activity;	/* for which prios are we active */
136 	enum htb_cmode		cmode;		/* current mode of the class */
137 	struct rb_node		pq_node;	/* node for event queue */
138 	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
139 
140 	unsigned int drops ____cacheline_aligned_in_smp;
141 	unsigned int		overlimits;
142 };
143 
144 struct htb_level {
145 	struct rb_root	wait_pq;
146 	struct htb_prio hprio[TC_HTB_NUMPRIO];
147 };
148 
149 struct htb_sched {
150 	struct Qdisc_class_hash clhash;
151 	int			defcls;		/* class where unclassified flows go to */
152 	int			rate2quantum;	/* quant = rate / rate2quantum */
153 
154 	/* filters for qdisc itself */
155 	struct tcf_proto __rcu	*filter_list;
156 	struct tcf_block	*block;
157 
158 #define HTB_WARN_TOOMANYEVENTS	0x1
159 	unsigned int		warned;	/* only one warning */
160 	int			direct_qlen;
161 	struct work_struct	work;
162 
163 	/* non shaped skbs; let them go directly thru */
164 	struct qdisc_skb_head	direct_queue;
165 	u32			direct_pkts;
166 	u32			overlimits;
167 
168 	struct qdisc_watchdog	watchdog;
169 
170 	s64			now;	/* cached dequeue time */
171 
172 	/* time of nearest event per level (row) */
173 	s64			near_ev_cache[TC_HTB_MAXDEPTH];
174 
175 	int			row_mask[TC_HTB_MAXDEPTH];
176 
177 	struct htb_level	hlevel[TC_HTB_MAXDEPTH];
178 
179 	struct Qdisc		**direct_qdiscs;
180 	unsigned int            num_direct_qdiscs;
181 
182 	bool			offload;
183 };
184 
185 /* find class in global hash table using given handle */
186 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187 {
188 	struct htb_sched *q = qdisc_priv(sch);
189 	struct Qdisc_class_common *clc;
190 
191 	clc = qdisc_class_find(&q->clhash, handle);
192 	if (clc == NULL)
193 		return NULL;
194 	return container_of(clc, struct htb_class, common);
195 }
196 
197 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198 {
199 	return (unsigned long)htb_find(handle, sch);
200 }
201 /**
202  * htb_classify - classify a packet into class
203  *
204  * It returns NULL if the packet should be dropped or -1 if the packet
205  * should be passed directly thru. In all other cases leaf class is returned.
206  * We allow direct class selection by classid in priority. The we examine
207  * filters in qdisc and in inner nodes (if higher filter points to the inner
208  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
209  * internal fifo (direct). These packets then go directly thru. If we still
210  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
211  * then finish and return direct queue.
212  */
213 #define HTB_DIRECT ((struct htb_class *)-1L)
214 
215 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216 				      int *qerr)
217 {
218 	struct htb_sched *q = qdisc_priv(sch);
219 	struct htb_class *cl;
220 	struct tcf_result res;
221 	struct tcf_proto *tcf;
222 	int result;
223 
224 	/* allow to select class by setting skb->priority to valid classid;
225 	 * note that nfmark can be used too by attaching filter fw with no
226 	 * rules in it
227 	 */
228 	if (skb->priority == sch->handle)
229 		return HTB_DIRECT;	/* X:0 (direct flow) selected */
230 	cl = htb_find(skb->priority, sch);
231 	if (cl) {
232 		if (cl->level == 0)
233 			return cl;
234 		/* Start with inner filter chain if a non-leaf class is selected */
235 		tcf = rcu_dereference_bh(cl->filter_list);
236 	} else {
237 		tcf = rcu_dereference_bh(q->filter_list);
238 	}
239 
240 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
241 	while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
242 #ifdef CONFIG_NET_CLS_ACT
243 		switch (result) {
244 		case TC_ACT_QUEUED:
245 		case TC_ACT_STOLEN:
246 		case TC_ACT_TRAP:
247 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
248 			fallthrough;
249 		case TC_ACT_SHOT:
250 			return NULL;
251 		}
252 #endif
253 		cl = (void *)res.class;
254 		if (!cl) {
255 			if (res.classid == sch->handle)
256 				return HTB_DIRECT;	/* X:0 (direct flow) */
257 			cl = htb_find(res.classid, sch);
258 			if (!cl)
259 				break;	/* filter selected invalid classid */
260 		}
261 		if (!cl->level)
262 			return cl;	/* we hit leaf; return it */
263 
264 		/* we have got inner class; apply inner filter chain */
265 		tcf = rcu_dereference_bh(cl->filter_list);
266 	}
267 	/* classification failed; try to use default class */
268 	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
269 	if (!cl || cl->level)
270 		return HTB_DIRECT;	/* bad default .. this is safe bet */
271 	return cl;
272 }
273 
274 /**
275  * htb_add_to_id_tree - adds class to the round robin list
276  * @root: the root of the tree
277  * @cl: the class to add
278  * @prio: the give prio in class
279  *
280  * Routine adds class to the list (actually tree) sorted by classid.
281  * Make sure that class is not already on such list for given prio.
282  */
283 static void htb_add_to_id_tree(struct rb_root *root,
284 			       struct htb_class *cl, int prio)
285 {
286 	struct rb_node **p = &root->rb_node, *parent = NULL;
287 
288 	while (*p) {
289 		struct htb_class *c;
290 		parent = *p;
291 		c = rb_entry(parent, struct htb_class, node[prio]);
292 
293 		if (cl->common.classid > c->common.classid)
294 			p = &parent->rb_right;
295 		else
296 			p = &parent->rb_left;
297 	}
298 	rb_link_node(&cl->node[prio], parent, p);
299 	rb_insert_color(&cl->node[prio], root);
300 }
301 
302 /**
303  * htb_add_to_wait_tree - adds class to the event queue with delay
304  * @q: the priority event queue
305  * @cl: the class to add
306  * @delay: delay in microseconds
307  *
308  * The class is added to priority event queue to indicate that class will
309  * change its mode in cl->pq_key microseconds. Make sure that class is not
310  * already in the queue.
311  */
312 static void htb_add_to_wait_tree(struct htb_sched *q,
313 				 struct htb_class *cl, s64 delay)
314 {
315 	struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
316 
317 	cl->pq_key = q->now + delay;
318 	if (cl->pq_key == q->now)
319 		cl->pq_key++;
320 
321 	/* update the nearest event cache */
322 	if (q->near_ev_cache[cl->level] > cl->pq_key)
323 		q->near_ev_cache[cl->level] = cl->pq_key;
324 
325 	while (*p) {
326 		struct htb_class *c;
327 		parent = *p;
328 		c = rb_entry(parent, struct htb_class, pq_node);
329 		if (cl->pq_key >= c->pq_key)
330 			p = &parent->rb_right;
331 		else
332 			p = &parent->rb_left;
333 	}
334 	rb_link_node(&cl->pq_node, parent, p);
335 	rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
336 }
337 
338 /**
339  * htb_next_rb_node - finds next node in binary tree
340  * @n: the current node in binary tree
341  *
342  * When we are past last key we return NULL.
343  * Average complexity is 2 steps per call.
344  */
345 static inline void htb_next_rb_node(struct rb_node **n)
346 {
347 	*n = rb_next(*n);
348 }
349 
350 /**
351  * htb_add_class_to_row - add class to its row
352  * @q: the priority event queue
353  * @cl: the class to add
354  * @mask: the given priorities in class in bitmap
355  *
356  * The class is added to row at priorities marked in mask.
357  * It does nothing if mask == 0.
358  */
359 static inline void htb_add_class_to_row(struct htb_sched *q,
360 					struct htb_class *cl, int mask)
361 {
362 	q->row_mask[cl->level] |= mask;
363 	while (mask) {
364 		int prio = ffz(~mask);
365 		mask &= ~(1 << prio);
366 		htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
367 	}
368 }
369 
370 /* If this triggers, it is a bug in this code, but it need not be fatal */
371 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
372 {
373 	if (RB_EMPTY_NODE(rb)) {
374 		WARN_ON(1);
375 	} else {
376 		rb_erase(rb, root);
377 		RB_CLEAR_NODE(rb);
378 	}
379 }
380 
381 
382 /**
383  * htb_remove_class_from_row - removes class from its row
384  * @q: the priority event queue
385  * @cl: the class to add
386  * @mask: the given priorities in class in bitmap
387  *
388  * The class is removed from row at priorities marked in mask.
389  * It does nothing if mask == 0.
390  */
391 static inline void htb_remove_class_from_row(struct htb_sched *q,
392 						 struct htb_class *cl, int mask)
393 {
394 	int m = 0;
395 	struct htb_level *hlevel = &q->hlevel[cl->level];
396 
397 	while (mask) {
398 		int prio = ffz(~mask);
399 		struct htb_prio *hprio = &hlevel->hprio[prio];
400 
401 		mask &= ~(1 << prio);
402 		if (hprio->ptr == cl->node + prio)
403 			htb_next_rb_node(&hprio->ptr);
404 
405 		htb_safe_rb_erase(cl->node + prio, &hprio->row);
406 		if (!hprio->row.rb_node)
407 			m |= 1 << prio;
408 	}
409 	q->row_mask[cl->level] &= ~m;
410 }
411 
412 /**
413  * htb_activate_prios - creates active classe's feed chain
414  * @q: the priority event queue
415  * @cl: the class to activate
416  *
417  * The class is connected to ancestors and/or appropriate rows
418  * for priorities it is participating on. cl->cmode must be new
419  * (activated) mode. It does nothing if cl->prio_activity == 0.
420  */
421 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
422 {
423 	struct htb_class *p = cl->parent;
424 	long m, mask = cl->prio_activity;
425 
426 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
427 		m = mask;
428 		while (m) {
429 			int prio = ffz(~m);
430 			m &= ~(1 << prio);
431 
432 			if (p->inner.clprio[prio].feed.rb_node)
433 				/* parent already has its feed in use so that
434 				 * reset bit in mask as parent is already ok
435 				 */
436 				mask &= ~(1 << prio);
437 
438 			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
439 		}
440 		p->prio_activity |= mask;
441 		cl = p;
442 		p = cl->parent;
443 
444 	}
445 	if (cl->cmode == HTB_CAN_SEND && mask)
446 		htb_add_class_to_row(q, cl, mask);
447 }
448 
449 /**
450  * htb_deactivate_prios - remove class from feed chain
451  * @q: the priority event queue
452  * @cl: the class to deactivate
453  *
454  * cl->cmode must represent old mode (before deactivation). It does
455  * nothing if cl->prio_activity == 0. Class is removed from all feed
456  * chains and rows.
457  */
458 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
459 {
460 	struct htb_class *p = cl->parent;
461 	long m, mask = cl->prio_activity;
462 
463 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
464 		m = mask;
465 		mask = 0;
466 		while (m) {
467 			int prio = ffz(~m);
468 			m &= ~(1 << prio);
469 
470 			if (p->inner.clprio[prio].ptr == cl->node + prio) {
471 				/* we are removing child which is pointed to from
472 				 * parent feed - forget the pointer but remember
473 				 * classid
474 				 */
475 				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
476 				p->inner.clprio[prio].ptr = NULL;
477 			}
478 
479 			htb_safe_rb_erase(cl->node + prio,
480 					  &p->inner.clprio[prio].feed);
481 
482 			if (!p->inner.clprio[prio].feed.rb_node)
483 				mask |= 1 << prio;
484 		}
485 
486 		p->prio_activity &= ~mask;
487 		cl = p;
488 		p = cl->parent;
489 
490 	}
491 	if (cl->cmode == HTB_CAN_SEND && mask)
492 		htb_remove_class_from_row(q, cl, mask);
493 }
494 
495 static inline s64 htb_lowater(const struct htb_class *cl)
496 {
497 	if (htb_hysteresis)
498 		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
499 	else
500 		return 0;
501 }
502 static inline s64 htb_hiwater(const struct htb_class *cl)
503 {
504 	if (htb_hysteresis)
505 		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
506 	else
507 		return 0;
508 }
509 
510 
511 /**
512  * htb_class_mode - computes and returns current class mode
513  * @cl: the target class
514  * @diff: diff time in microseconds
515  *
516  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
517  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
518  * from now to time when cl will change its state.
519  * Also it is worth to note that class mode doesn't change simply
520  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
521  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
522  * mode transitions per time unit. The speed gain is about 1/6.
523  */
524 static inline enum htb_cmode
525 htb_class_mode(struct htb_class *cl, s64 *diff)
526 {
527 	s64 toks;
528 
529 	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
530 		*diff = -toks;
531 		return HTB_CANT_SEND;
532 	}
533 
534 	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
535 		return HTB_CAN_SEND;
536 
537 	*diff = -toks;
538 	return HTB_MAY_BORROW;
539 }
540 
541 /**
542  * htb_change_class_mode - changes classe's mode
543  * @q: the priority event queue
544  * @cl: the target class
545  * @diff: diff time in microseconds
546  *
547  * This should be the only way how to change classe's mode under normal
548  * circumstances. Routine will update feed lists linkage, change mode
549  * and add class to the wait event queue if appropriate. New mode should
550  * be different from old one and cl->pq_key has to be valid if changing
551  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
552  */
553 static void
554 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
555 {
556 	enum htb_cmode new_mode = htb_class_mode(cl, diff);
557 
558 	if (new_mode == cl->cmode)
559 		return;
560 
561 	if (new_mode == HTB_CANT_SEND) {
562 		cl->overlimits++;
563 		q->overlimits++;
564 	}
565 
566 	if (cl->prio_activity) {	/* not necessary: speed optimization */
567 		if (cl->cmode != HTB_CANT_SEND)
568 			htb_deactivate_prios(q, cl);
569 		cl->cmode = new_mode;
570 		if (new_mode != HTB_CANT_SEND)
571 			htb_activate_prios(q, cl);
572 	} else
573 		cl->cmode = new_mode;
574 }
575 
576 /**
577  * htb_activate - inserts leaf cl into appropriate active feeds
578  * @q: the priority event queue
579  * @cl: the target class
580  *
581  * Routine learns (new) priority of leaf and activates feed chain
582  * for the prio. It can be called on already active leaf safely.
583  * It also adds leaf into droplist.
584  */
585 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
586 {
587 	WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
588 
589 	if (!cl->prio_activity) {
590 		cl->prio_activity = 1 << cl->prio;
591 		htb_activate_prios(q, cl);
592 	}
593 }
594 
595 /**
596  * htb_deactivate - remove leaf cl from active feeds
597  * @q: the priority event queue
598  * @cl: the target class
599  *
600  * Make sure that leaf is active. In the other words it can't be called
601  * with non-active leaf. It also removes class from the drop list.
602  */
603 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
604 {
605 	WARN_ON(!cl->prio_activity);
606 
607 	htb_deactivate_prios(q, cl);
608 	cl->prio_activity = 0;
609 }
610 
611 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
612 		       struct sk_buff **to_free)
613 {
614 	int ret;
615 	unsigned int len = qdisc_pkt_len(skb);
616 	struct htb_sched *q = qdisc_priv(sch);
617 	struct htb_class *cl = htb_classify(skb, sch, &ret);
618 
619 	if (cl == HTB_DIRECT) {
620 		/* enqueue to helper queue */
621 		if (q->direct_queue.qlen < q->direct_qlen) {
622 			__qdisc_enqueue_tail(skb, &q->direct_queue);
623 			q->direct_pkts++;
624 		} else {
625 			return qdisc_drop(skb, sch, to_free);
626 		}
627 #ifdef CONFIG_NET_CLS_ACT
628 	} else if (!cl) {
629 		if (ret & __NET_XMIT_BYPASS)
630 			qdisc_qstats_drop(sch);
631 		__qdisc_drop(skb, to_free);
632 		return ret;
633 #endif
634 	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
635 					to_free)) != NET_XMIT_SUCCESS) {
636 		if (net_xmit_drop_count(ret)) {
637 			qdisc_qstats_drop(sch);
638 			cl->drops++;
639 		}
640 		return ret;
641 	} else {
642 		htb_activate(q, cl);
643 	}
644 
645 	sch->qstats.backlog += len;
646 	sch->q.qlen++;
647 	return NET_XMIT_SUCCESS;
648 }
649 
650 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
651 {
652 	s64 toks = diff + cl->tokens;
653 
654 	if (toks > cl->buffer)
655 		toks = cl->buffer;
656 	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
657 	if (toks <= -cl->mbuffer)
658 		toks = 1 - cl->mbuffer;
659 
660 	cl->tokens = toks;
661 }
662 
663 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
664 {
665 	s64 toks = diff + cl->ctokens;
666 
667 	if (toks > cl->cbuffer)
668 		toks = cl->cbuffer;
669 	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
670 	if (toks <= -cl->mbuffer)
671 		toks = 1 - cl->mbuffer;
672 
673 	cl->ctokens = toks;
674 }
675 
676 /**
677  * htb_charge_class - charges amount "bytes" to leaf and ancestors
678  * @q: the priority event queue
679  * @cl: the class to start iterate
680  * @level: the minimum level to account
681  * @skb: the socket buffer
682  *
683  * Routine assumes that packet "bytes" long was dequeued from leaf cl
684  * borrowing from "level". It accounts bytes to ceil leaky bucket for
685  * leaf and all ancestors and to rate bucket for ancestors at levels
686  * "level" and higher. It also handles possible change of mode resulting
687  * from the update. Note that mode can also increase here (MAY_BORROW to
688  * CAN_SEND) because we can use more precise clock that event queue here.
689  * In such case we remove class from event queue first.
690  */
691 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
692 			     int level, struct sk_buff *skb)
693 {
694 	int bytes = qdisc_pkt_len(skb);
695 	enum htb_cmode old_mode;
696 	s64 diff;
697 
698 	while (cl) {
699 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
700 		if (cl->level >= level) {
701 			if (cl->level == level)
702 				cl->xstats.lends++;
703 			htb_accnt_tokens(cl, bytes, diff);
704 		} else {
705 			cl->xstats.borrows++;
706 			cl->tokens += diff;	/* we moved t_c; update tokens */
707 		}
708 		htb_accnt_ctokens(cl, bytes, diff);
709 		cl->t_c = q->now;
710 
711 		old_mode = cl->cmode;
712 		diff = 0;
713 		htb_change_class_mode(q, cl, &diff);
714 		if (old_mode != cl->cmode) {
715 			if (old_mode != HTB_CAN_SEND)
716 				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
717 			if (cl->cmode != HTB_CAN_SEND)
718 				htb_add_to_wait_tree(q, cl, diff);
719 		}
720 
721 		/* update basic stats except for leaves which are already updated */
722 		if (cl->level)
723 			bstats_update(&cl->bstats, skb);
724 
725 		cl = cl->parent;
726 	}
727 }
728 
729 /**
730  * htb_do_events - make mode changes to classes at the level
731  * @q: the priority event queue
732  * @level: which wait_pq in 'q->hlevel'
733  * @start: start jiffies
734  *
735  * Scans event queue for pending events and applies them. Returns time of
736  * next pending event (0 for no event in pq, q->now for too many events).
737  * Note: Applied are events whose have cl->pq_key <= q->now.
738  */
739 static s64 htb_do_events(struct htb_sched *q, const int level,
740 			 unsigned long start)
741 {
742 	/* don't run for longer than 2 jiffies; 2 is used instead of
743 	 * 1 to simplify things when jiffy is going to be incremented
744 	 * too soon
745 	 */
746 	unsigned long stop_at = start + 2;
747 	struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
748 
749 	while (time_before(jiffies, stop_at)) {
750 		struct htb_class *cl;
751 		s64 diff;
752 		struct rb_node *p = rb_first(wait_pq);
753 
754 		if (!p)
755 			return 0;
756 
757 		cl = rb_entry(p, struct htb_class, pq_node);
758 		if (cl->pq_key > q->now)
759 			return cl->pq_key;
760 
761 		htb_safe_rb_erase(p, wait_pq);
762 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
763 		htb_change_class_mode(q, cl, &diff);
764 		if (cl->cmode != HTB_CAN_SEND)
765 			htb_add_to_wait_tree(q, cl, diff);
766 	}
767 
768 	/* too much load - let's continue after a break for scheduling */
769 	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
770 		pr_warn("htb: too many events!\n");
771 		q->warned |= HTB_WARN_TOOMANYEVENTS;
772 	}
773 
774 	return q->now;
775 }
776 
777 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
778  * is no such one exists.
779  */
780 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
781 					      u32 id)
782 {
783 	struct rb_node *r = NULL;
784 	while (n) {
785 		struct htb_class *cl =
786 		    rb_entry(n, struct htb_class, node[prio]);
787 
788 		if (id > cl->common.classid) {
789 			n = n->rb_right;
790 		} else if (id < cl->common.classid) {
791 			r = n;
792 			n = n->rb_left;
793 		} else {
794 			return n;
795 		}
796 	}
797 	return r;
798 }
799 
800 /**
801  * htb_lookup_leaf - returns next leaf class in DRR order
802  * @hprio: the current one
803  * @prio: which prio in class
804  *
805  * Find leaf where current feed pointers points to.
806  */
807 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
808 {
809 	int i;
810 	struct {
811 		struct rb_node *root;
812 		struct rb_node **pptr;
813 		u32 *pid;
814 	} stk[TC_HTB_MAXDEPTH], *sp = stk;
815 
816 	BUG_ON(!hprio->row.rb_node);
817 	sp->root = hprio->row.rb_node;
818 	sp->pptr = &hprio->ptr;
819 	sp->pid = &hprio->last_ptr_id;
820 
821 	for (i = 0; i < 65535; i++) {
822 		if (!*sp->pptr && *sp->pid) {
823 			/* ptr was invalidated but id is valid - try to recover
824 			 * the original or next ptr
825 			 */
826 			*sp->pptr =
827 			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
828 		}
829 		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
830 				 * can become out of date quickly
831 				 */
832 		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
833 			*sp->pptr = sp->root;
834 			while ((*sp->pptr)->rb_left)
835 				*sp->pptr = (*sp->pptr)->rb_left;
836 			if (sp > stk) {
837 				sp--;
838 				if (!*sp->pptr) {
839 					WARN_ON(1);
840 					return NULL;
841 				}
842 				htb_next_rb_node(sp->pptr);
843 			}
844 		} else {
845 			struct htb_class *cl;
846 			struct htb_prio *clp;
847 
848 			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
849 			if (!cl->level)
850 				return cl;
851 			clp = &cl->inner.clprio[prio];
852 			(++sp)->root = clp->feed.rb_node;
853 			sp->pptr = &clp->ptr;
854 			sp->pid = &clp->last_ptr_id;
855 		}
856 	}
857 	WARN_ON(1);
858 	return NULL;
859 }
860 
861 /* dequeues packet at given priority and level; call only if
862  * you are sure that there is active class at prio/level
863  */
864 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
865 					const int level)
866 {
867 	struct sk_buff *skb = NULL;
868 	struct htb_class *cl, *start;
869 	struct htb_level *hlevel = &q->hlevel[level];
870 	struct htb_prio *hprio = &hlevel->hprio[prio];
871 
872 	/* look initial class up in the row */
873 	start = cl = htb_lookup_leaf(hprio, prio);
874 
875 	do {
876 next:
877 		if (unlikely(!cl))
878 			return NULL;
879 
880 		/* class can be empty - it is unlikely but can be true if leaf
881 		 * qdisc drops packets in enqueue routine or if someone used
882 		 * graft operation on the leaf since last dequeue;
883 		 * simply deactivate and skip such class
884 		 */
885 		if (unlikely(cl->leaf.q->q.qlen == 0)) {
886 			struct htb_class *next;
887 			htb_deactivate(q, cl);
888 
889 			/* row/level might become empty */
890 			if ((q->row_mask[level] & (1 << prio)) == 0)
891 				return NULL;
892 
893 			next = htb_lookup_leaf(hprio, prio);
894 
895 			if (cl == start)	/* fix start if we just deleted it */
896 				start = next;
897 			cl = next;
898 			goto next;
899 		}
900 
901 		skb = cl->leaf.q->dequeue(cl->leaf.q);
902 		if (likely(skb != NULL))
903 			break;
904 
905 		qdisc_warn_nonwc("htb", cl->leaf.q);
906 		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
907 					 &q->hlevel[0].hprio[prio].ptr);
908 		cl = htb_lookup_leaf(hprio, prio);
909 
910 	} while (cl != start);
911 
912 	if (likely(skb != NULL)) {
913 		bstats_update(&cl->bstats, skb);
914 		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
915 		if (cl->leaf.deficit[level] < 0) {
916 			cl->leaf.deficit[level] += cl->quantum;
917 			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
918 						 &q->hlevel[0].hprio[prio].ptr);
919 		}
920 		/* this used to be after charge_class but this constelation
921 		 * gives us slightly better performance
922 		 */
923 		if (!cl->leaf.q->q.qlen)
924 			htb_deactivate(q, cl);
925 		htb_charge_class(q, cl, level, skb);
926 	}
927 	return skb;
928 }
929 
930 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
931 {
932 	struct sk_buff *skb;
933 	struct htb_sched *q = qdisc_priv(sch);
934 	int level;
935 	s64 next_event;
936 	unsigned long start_at;
937 
938 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
939 	skb = __qdisc_dequeue_head(&q->direct_queue);
940 	if (skb != NULL) {
941 ok:
942 		qdisc_bstats_update(sch, skb);
943 		qdisc_qstats_backlog_dec(sch, skb);
944 		sch->q.qlen--;
945 		return skb;
946 	}
947 
948 	if (!sch->q.qlen)
949 		goto fin;
950 	q->now = ktime_get_ns();
951 	start_at = jiffies;
952 
953 	next_event = q->now + 5LLU * NSEC_PER_SEC;
954 
955 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
956 		/* common case optimization - skip event handler quickly */
957 		int m;
958 		s64 event = q->near_ev_cache[level];
959 
960 		if (q->now >= event) {
961 			event = htb_do_events(q, level, start_at);
962 			if (!event)
963 				event = q->now + NSEC_PER_SEC;
964 			q->near_ev_cache[level] = event;
965 		}
966 
967 		if (next_event > event)
968 			next_event = event;
969 
970 		m = ~q->row_mask[level];
971 		while (m != (int)(-1)) {
972 			int prio = ffz(m);
973 
974 			m |= 1 << prio;
975 			skb = htb_dequeue_tree(q, prio, level);
976 			if (likely(skb != NULL))
977 				goto ok;
978 		}
979 	}
980 	if (likely(next_event > q->now))
981 		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
982 	else
983 		schedule_work(&q->work);
984 fin:
985 	return skb;
986 }
987 
988 /* reset all classes */
989 /* always caled under BH & queue lock */
990 static void htb_reset(struct Qdisc *sch)
991 {
992 	struct htb_sched *q = qdisc_priv(sch);
993 	struct htb_class *cl;
994 	unsigned int i;
995 
996 	for (i = 0; i < q->clhash.hashsize; i++) {
997 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
998 			if (cl->level)
999 				memset(&cl->inner, 0, sizeof(cl->inner));
1000 			else {
1001 				if (cl->leaf.q && !q->offload)
1002 					qdisc_reset(cl->leaf.q);
1003 			}
1004 			cl->prio_activity = 0;
1005 			cl->cmode = HTB_CAN_SEND;
1006 		}
1007 	}
1008 	qdisc_watchdog_cancel(&q->watchdog);
1009 	__qdisc_reset_queue(&q->direct_queue);
1010 	sch->q.qlen = 0;
1011 	sch->qstats.backlog = 0;
1012 	memset(q->hlevel, 0, sizeof(q->hlevel));
1013 	memset(q->row_mask, 0, sizeof(q->row_mask));
1014 }
1015 
1016 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1017 	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
1018 	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
1019 	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1020 	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1021 	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1022 	[TCA_HTB_RATE64] = { .type = NLA_U64 },
1023 	[TCA_HTB_CEIL64] = { .type = NLA_U64 },
1024 	[TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1025 };
1026 
1027 static void htb_work_func(struct work_struct *work)
1028 {
1029 	struct htb_sched *q = container_of(work, struct htb_sched, work);
1030 	struct Qdisc *sch = q->watchdog.qdisc;
1031 
1032 	rcu_read_lock();
1033 	__netif_schedule(qdisc_root(sch));
1034 	rcu_read_unlock();
1035 }
1036 
1037 static void htb_set_lockdep_class_child(struct Qdisc *q)
1038 {
1039 	static struct lock_class_key child_key;
1040 
1041 	lockdep_set_class(qdisc_lock(q), &child_key);
1042 }
1043 
1044 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1045 {
1046 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1047 }
1048 
1049 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1050 		    struct netlink_ext_ack *extack)
1051 {
1052 	struct net_device *dev = qdisc_dev(sch);
1053 	struct tc_htb_qopt_offload offload_opt;
1054 	struct htb_sched *q = qdisc_priv(sch);
1055 	struct nlattr *tb[TCA_HTB_MAX + 1];
1056 	struct tc_htb_glob *gopt;
1057 	unsigned int ntx;
1058 	bool offload;
1059 	int err;
1060 
1061 	qdisc_watchdog_init(&q->watchdog, sch);
1062 	INIT_WORK(&q->work, htb_work_func);
1063 
1064 	if (!opt)
1065 		return -EINVAL;
1066 
1067 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1068 	if (err)
1069 		return err;
1070 
1071 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1072 					  NULL);
1073 	if (err < 0)
1074 		return err;
1075 
1076 	if (!tb[TCA_HTB_INIT])
1077 		return -EINVAL;
1078 
1079 	gopt = nla_data(tb[TCA_HTB_INIT]);
1080 	if (gopt->version != HTB_VER >> 16)
1081 		return -EINVAL;
1082 
1083 	offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1084 
1085 	if (offload) {
1086 		if (sch->parent != TC_H_ROOT)
1087 			return -EOPNOTSUPP;
1088 
1089 		if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1090 			return -EOPNOTSUPP;
1091 
1092 		q->num_direct_qdiscs = dev->real_num_tx_queues;
1093 		q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1094 					   sizeof(*q->direct_qdiscs),
1095 					   GFP_KERNEL);
1096 		if (!q->direct_qdiscs)
1097 			return -ENOMEM;
1098 	}
1099 
1100 	err = qdisc_class_hash_init(&q->clhash);
1101 	if (err < 0)
1102 		goto err_free_direct_qdiscs;
1103 
1104 	qdisc_skb_head_init(&q->direct_queue);
1105 
1106 	if (tb[TCA_HTB_DIRECT_QLEN])
1107 		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1108 	else
1109 		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1110 
1111 	if ((q->rate2quantum = gopt->rate2quantum) < 1)
1112 		q->rate2quantum = 1;
1113 	q->defcls = gopt->defcls;
1114 
1115 	if (!offload)
1116 		return 0;
1117 
1118 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1119 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1120 		struct Qdisc *qdisc;
1121 
1122 		qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1123 					  TC_H_MAKE(sch->handle, 0), extack);
1124 		if (!qdisc) {
1125 			err = -ENOMEM;
1126 			goto err_free_qdiscs;
1127 		}
1128 
1129 		htb_set_lockdep_class_child(qdisc);
1130 		q->direct_qdiscs[ntx] = qdisc;
1131 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1132 	}
1133 
1134 	sch->flags |= TCQ_F_MQROOT;
1135 
1136 	offload_opt = (struct tc_htb_qopt_offload) {
1137 		.command = TC_HTB_CREATE,
1138 		.parent_classid = TC_H_MAJ(sch->handle) >> 16,
1139 		.classid = TC_H_MIN(q->defcls),
1140 		.extack = extack,
1141 	};
1142 	err = htb_offload(dev, &offload_opt);
1143 	if (err)
1144 		goto err_free_qdiscs;
1145 
1146 	/* Defer this assignment, so that htb_destroy skips offload-related
1147 	 * parts (especially calling ndo_setup_tc) on errors.
1148 	 */
1149 	q->offload = true;
1150 
1151 	return 0;
1152 
1153 err_free_qdiscs:
1154 	for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1155 	     ntx++)
1156 		qdisc_put(q->direct_qdiscs[ntx]);
1157 
1158 	qdisc_class_hash_destroy(&q->clhash);
1159 	/* Prevent use-after-free and double-free when htb_destroy gets called.
1160 	 */
1161 	q->clhash.hash = NULL;
1162 	q->clhash.hashsize = 0;
1163 
1164 err_free_direct_qdiscs:
1165 	kfree(q->direct_qdiscs);
1166 	q->direct_qdiscs = NULL;
1167 	return err;
1168 }
1169 
1170 static void htb_attach_offload(struct Qdisc *sch)
1171 {
1172 	struct net_device *dev = qdisc_dev(sch);
1173 	struct htb_sched *q = qdisc_priv(sch);
1174 	unsigned int ntx;
1175 
1176 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1177 		struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1178 
1179 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1180 		qdisc_put(old);
1181 		qdisc_hash_add(qdisc, false);
1182 	}
1183 	for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1184 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1185 		struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1186 
1187 		qdisc_put(old);
1188 	}
1189 
1190 	kfree(q->direct_qdiscs);
1191 	q->direct_qdiscs = NULL;
1192 }
1193 
1194 static void htb_attach_software(struct Qdisc *sch)
1195 {
1196 	struct net_device *dev = qdisc_dev(sch);
1197 	unsigned int ntx;
1198 
1199 	/* Resemble qdisc_graft behavior. */
1200 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1201 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1202 		struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1203 
1204 		qdisc_refcount_inc(sch);
1205 
1206 		qdisc_put(old);
1207 	}
1208 }
1209 
1210 static void htb_attach(struct Qdisc *sch)
1211 {
1212 	struct htb_sched *q = qdisc_priv(sch);
1213 
1214 	if (q->offload)
1215 		htb_attach_offload(sch);
1216 	else
1217 		htb_attach_software(sch);
1218 }
1219 
1220 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1221 {
1222 	struct htb_sched *q = qdisc_priv(sch);
1223 	struct nlattr *nest;
1224 	struct tc_htb_glob gopt;
1225 
1226 	if (q->offload)
1227 		sch->flags |= TCQ_F_OFFLOADED;
1228 	else
1229 		sch->flags &= ~TCQ_F_OFFLOADED;
1230 
1231 	sch->qstats.overlimits = q->overlimits;
1232 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1233 	 * no change can happen on the qdisc parameters.
1234 	 */
1235 
1236 	gopt.direct_pkts = q->direct_pkts;
1237 	gopt.version = HTB_VER;
1238 	gopt.rate2quantum = q->rate2quantum;
1239 	gopt.defcls = q->defcls;
1240 	gopt.debug = 0;
1241 
1242 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1243 	if (nest == NULL)
1244 		goto nla_put_failure;
1245 	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1246 	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1247 		goto nla_put_failure;
1248 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1249 		goto nla_put_failure;
1250 
1251 	return nla_nest_end(skb, nest);
1252 
1253 nla_put_failure:
1254 	nla_nest_cancel(skb, nest);
1255 	return -1;
1256 }
1257 
1258 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1259 			  struct sk_buff *skb, struct tcmsg *tcm)
1260 {
1261 	struct htb_class *cl = (struct htb_class *)arg;
1262 	struct htb_sched *q = qdisc_priv(sch);
1263 	struct nlattr *nest;
1264 	struct tc_htb_opt opt;
1265 
1266 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1267 	 * no change can happen on the class parameters.
1268 	 */
1269 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1270 	tcm->tcm_handle = cl->common.classid;
1271 	if (!cl->level && cl->leaf.q)
1272 		tcm->tcm_info = cl->leaf.q->handle;
1273 
1274 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1275 	if (nest == NULL)
1276 		goto nla_put_failure;
1277 
1278 	memset(&opt, 0, sizeof(opt));
1279 
1280 	psched_ratecfg_getrate(&opt.rate, &cl->rate);
1281 	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1282 	psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1283 	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1284 	opt.quantum = cl->quantum;
1285 	opt.prio = cl->prio;
1286 	opt.level = cl->level;
1287 	if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1288 		goto nla_put_failure;
1289 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1290 		goto nla_put_failure;
1291 	if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1292 	    nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1293 			      TCA_HTB_PAD))
1294 		goto nla_put_failure;
1295 	if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1296 	    nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1297 			      TCA_HTB_PAD))
1298 		goto nla_put_failure;
1299 
1300 	return nla_nest_end(skb, nest);
1301 
1302 nla_put_failure:
1303 	nla_nest_cancel(skb, nest);
1304 	return -1;
1305 }
1306 
1307 static void htb_offload_aggregate_stats(struct htb_sched *q,
1308 					struct htb_class *cl)
1309 {
1310 	struct htb_class *c;
1311 	unsigned int i;
1312 
1313 	memset(&cl->bstats, 0, sizeof(cl->bstats));
1314 
1315 	for (i = 0; i < q->clhash.hashsize; i++) {
1316 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1317 			struct htb_class *p = c;
1318 
1319 			while (p && p->level < cl->level)
1320 				p = p->parent;
1321 
1322 			if (p != cl)
1323 				continue;
1324 
1325 			cl->bstats.bytes += c->bstats_bias.bytes;
1326 			cl->bstats.packets += c->bstats_bias.packets;
1327 			if (c->level == 0) {
1328 				cl->bstats.bytes += c->leaf.q->bstats.bytes;
1329 				cl->bstats.packets += c->leaf.q->bstats.packets;
1330 			}
1331 		}
1332 	}
1333 }
1334 
1335 static int
1336 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1337 {
1338 	struct htb_class *cl = (struct htb_class *)arg;
1339 	struct htb_sched *q = qdisc_priv(sch);
1340 	struct gnet_stats_queue qs = {
1341 		.drops = cl->drops,
1342 		.overlimits = cl->overlimits,
1343 	};
1344 	__u32 qlen = 0;
1345 
1346 	if (!cl->level && cl->leaf.q)
1347 		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1348 
1349 	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1350 				    INT_MIN, INT_MAX);
1351 	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1352 				     INT_MIN, INT_MAX);
1353 
1354 	if (q->offload) {
1355 		if (!cl->level) {
1356 			if (cl->leaf.q)
1357 				cl->bstats = cl->leaf.q->bstats;
1358 			else
1359 				memset(&cl->bstats, 0, sizeof(cl->bstats));
1360 			cl->bstats.bytes += cl->bstats_bias.bytes;
1361 			cl->bstats.packets += cl->bstats_bias.packets;
1362 		} else {
1363 			htb_offload_aggregate_stats(q, cl);
1364 		}
1365 	}
1366 
1367 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1368 				  d, NULL, &cl->bstats) < 0 ||
1369 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1370 	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1371 		return -1;
1372 
1373 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1374 }
1375 
1376 static struct netdev_queue *
1377 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1378 {
1379 	struct net_device *dev = qdisc_dev(sch);
1380 	struct tc_htb_qopt_offload offload_opt;
1381 	struct htb_sched *q = qdisc_priv(sch);
1382 	int err;
1383 
1384 	if (!q->offload)
1385 		return sch->dev_queue;
1386 
1387 	offload_opt = (struct tc_htb_qopt_offload) {
1388 		.command = TC_HTB_LEAF_QUERY_QUEUE,
1389 		.classid = TC_H_MIN(tcm->tcm_parent),
1390 	};
1391 	err = htb_offload(dev, &offload_opt);
1392 	if (err || offload_opt.qid >= dev->num_tx_queues)
1393 		return NULL;
1394 	return netdev_get_tx_queue(dev, offload_opt.qid);
1395 }
1396 
1397 static struct Qdisc *
1398 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1399 {
1400 	struct net_device *dev = dev_queue->dev;
1401 	struct Qdisc *old_q;
1402 
1403 	if (dev->flags & IFF_UP)
1404 		dev_deactivate(dev);
1405 	old_q = dev_graft_qdisc(dev_queue, new_q);
1406 	if (new_q)
1407 		new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1408 	if (dev->flags & IFF_UP)
1409 		dev_activate(dev);
1410 
1411 	return old_q;
1412 }
1413 
1414 static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
1415 {
1416 	struct netdev_queue *queue_old, *queue_new;
1417 	struct net_device *dev = qdisc_dev(sch);
1418 	struct Qdisc *qdisc;
1419 
1420 	queue_old = netdev_get_tx_queue(dev, qid_old);
1421 	queue_new = netdev_get_tx_queue(dev, qid_new);
1422 
1423 	if (dev->flags & IFF_UP)
1424 		dev_deactivate(dev);
1425 	qdisc = dev_graft_qdisc(queue_old, NULL);
1426 	qdisc->dev_queue = queue_new;
1427 	qdisc = dev_graft_qdisc(queue_new, qdisc);
1428 	if (dev->flags & IFF_UP)
1429 		dev_activate(dev);
1430 
1431 	WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1432 }
1433 
1434 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1435 		     struct Qdisc **old, struct netlink_ext_ack *extack)
1436 {
1437 	struct netdev_queue *dev_queue = sch->dev_queue;
1438 	struct htb_class *cl = (struct htb_class *)arg;
1439 	struct htb_sched *q = qdisc_priv(sch);
1440 	struct Qdisc *old_q;
1441 
1442 	if (cl->level)
1443 		return -EINVAL;
1444 
1445 	if (q->offload) {
1446 		dev_queue = new->dev_queue;
1447 		WARN_ON(dev_queue != cl->leaf.q->dev_queue);
1448 	}
1449 
1450 	if (!new) {
1451 		new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1452 					cl->common.classid, extack);
1453 		if (!new)
1454 			return -ENOBUFS;
1455 	}
1456 
1457 	if (q->offload) {
1458 		htb_set_lockdep_class_child(new);
1459 		/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1460 		qdisc_refcount_inc(new);
1461 		old_q = htb_graft_helper(dev_queue, new);
1462 	}
1463 
1464 	*old = qdisc_replace(sch, new, &cl->leaf.q);
1465 
1466 	if (q->offload) {
1467 		WARN_ON(old_q != *old);
1468 		qdisc_put(old_q);
1469 	}
1470 
1471 	return 0;
1472 }
1473 
1474 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1475 {
1476 	struct htb_class *cl = (struct htb_class *)arg;
1477 	return !cl->level ? cl->leaf.q : NULL;
1478 }
1479 
1480 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1481 {
1482 	struct htb_class *cl = (struct htb_class *)arg;
1483 
1484 	htb_deactivate(qdisc_priv(sch), cl);
1485 }
1486 
1487 static inline int htb_parent_last_child(struct htb_class *cl)
1488 {
1489 	if (!cl->parent)
1490 		/* the root class */
1491 		return 0;
1492 	if (cl->parent->children > 1)
1493 		/* not the last child */
1494 		return 0;
1495 	return 1;
1496 }
1497 
1498 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1499 			       struct Qdisc *new_q)
1500 {
1501 	struct htb_sched *q = qdisc_priv(sch);
1502 	struct htb_class *parent = cl->parent;
1503 
1504 	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1505 
1506 	if (parent->cmode != HTB_CAN_SEND)
1507 		htb_safe_rb_erase(&parent->pq_node,
1508 				  &q->hlevel[parent->level].wait_pq);
1509 
1510 	parent->level = 0;
1511 	memset(&parent->inner, 0, sizeof(parent->inner));
1512 	parent->leaf.q = new_q ? new_q : &noop_qdisc;
1513 	parent->tokens = parent->buffer;
1514 	parent->ctokens = parent->cbuffer;
1515 	parent->t_c = ktime_get_ns();
1516 	parent->cmode = HTB_CAN_SEND;
1517 }
1518 
1519 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1520 				       struct netdev_queue *dev_queue,
1521 				       struct Qdisc *new_q)
1522 {
1523 	struct Qdisc *old_q;
1524 
1525 	/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1526 	if (new_q)
1527 		qdisc_refcount_inc(new_q);
1528 	old_q = htb_graft_helper(dev_queue, new_q);
1529 	WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1530 }
1531 
1532 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1533 				     bool last_child, bool destroying,
1534 				     struct netlink_ext_ack *extack)
1535 {
1536 	struct tc_htb_qopt_offload offload_opt;
1537 	struct Qdisc *q = cl->leaf.q;
1538 	struct Qdisc *old = NULL;
1539 	int err;
1540 
1541 	if (cl->level)
1542 		return -EINVAL;
1543 
1544 	WARN_ON(!q);
1545 	if (!destroying) {
1546 		/* On destroy of HTB, two cases are possible:
1547 		 * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
1548 		 * 2. q is a noop qdisc (for nodes that were inner),
1549 		 *    q->dev_queue is noop_netdev_queue.
1550 		 */
1551 		old = htb_graft_helper(q->dev_queue, NULL);
1552 		WARN_ON(!old);
1553 		WARN_ON(old != q);
1554 	}
1555 
1556 	if (cl->parent) {
1557 		cl->parent->bstats_bias.bytes += q->bstats.bytes;
1558 		cl->parent->bstats_bias.packets += q->bstats.packets;
1559 	}
1560 
1561 	offload_opt = (struct tc_htb_qopt_offload) {
1562 		.command = !last_child ? TC_HTB_LEAF_DEL :
1563 			   destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1564 			   TC_HTB_LEAF_DEL_LAST,
1565 		.classid = cl->common.classid,
1566 		.extack = extack,
1567 	};
1568 	err = htb_offload(qdisc_dev(sch), &offload_opt);
1569 
1570 	if (!err || destroying)
1571 		qdisc_put(old);
1572 	else
1573 		htb_graft_helper(q->dev_queue, old);
1574 
1575 	if (last_child)
1576 		return err;
1577 
1578 	if (!err && offload_opt.moved_qid != 0) {
1579 		if (destroying)
1580 			q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
1581 							   offload_opt.qid);
1582 		else
1583 			htb_offload_move_qdisc(sch, offload_opt.moved_qid,
1584 					       offload_opt.qid);
1585 	}
1586 
1587 	return err;
1588 }
1589 
1590 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1591 {
1592 	if (!cl->level) {
1593 		WARN_ON(!cl->leaf.q);
1594 		qdisc_put(cl->leaf.q);
1595 	}
1596 	gen_kill_estimator(&cl->rate_est);
1597 	tcf_block_put(cl->block);
1598 	kfree(cl);
1599 }
1600 
1601 static void htb_destroy(struct Qdisc *sch)
1602 {
1603 	struct net_device *dev = qdisc_dev(sch);
1604 	struct tc_htb_qopt_offload offload_opt;
1605 	struct htb_sched *q = qdisc_priv(sch);
1606 	struct hlist_node *next;
1607 	bool nonempty, changed;
1608 	struct htb_class *cl;
1609 	unsigned int i;
1610 
1611 	cancel_work_sync(&q->work);
1612 	qdisc_watchdog_cancel(&q->watchdog);
1613 	/* This line used to be after htb_destroy_class call below
1614 	 * and surprisingly it worked in 2.4. But it must precede it
1615 	 * because filter need its target class alive to be able to call
1616 	 * unbind_filter on it (without Oops).
1617 	 */
1618 	tcf_block_put(q->block);
1619 
1620 	for (i = 0; i < q->clhash.hashsize; i++) {
1621 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1622 			tcf_block_put(cl->block);
1623 			cl->block = NULL;
1624 		}
1625 	}
1626 
1627 	do {
1628 		nonempty = false;
1629 		changed = false;
1630 		for (i = 0; i < q->clhash.hashsize; i++) {
1631 			hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1632 						  common.hnode) {
1633 				bool last_child;
1634 
1635 				if (!q->offload) {
1636 					htb_destroy_class(sch, cl);
1637 					continue;
1638 				}
1639 
1640 				nonempty = true;
1641 
1642 				if (cl->level)
1643 					continue;
1644 
1645 				changed = true;
1646 
1647 				last_child = htb_parent_last_child(cl);
1648 				htb_destroy_class_offload(sch, cl, last_child,
1649 							  true, NULL);
1650 				qdisc_class_hash_remove(&q->clhash,
1651 							&cl->common);
1652 				if (cl->parent)
1653 					cl->parent->children--;
1654 				if (last_child)
1655 					htb_parent_to_leaf(sch, cl, NULL);
1656 				htb_destroy_class(sch, cl);
1657 			}
1658 		}
1659 	} while (changed);
1660 	WARN_ON(nonempty);
1661 
1662 	qdisc_class_hash_destroy(&q->clhash);
1663 	__qdisc_reset_queue(&q->direct_queue);
1664 
1665 	if (!q->offload)
1666 		return;
1667 
1668 	offload_opt = (struct tc_htb_qopt_offload) {
1669 		.command = TC_HTB_DESTROY,
1670 	};
1671 	htb_offload(dev, &offload_opt);
1672 
1673 	if (!q->direct_qdiscs)
1674 		return;
1675 	for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1676 		qdisc_put(q->direct_qdiscs[i]);
1677 	kfree(q->direct_qdiscs);
1678 }
1679 
1680 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1681 		      struct netlink_ext_ack *extack)
1682 {
1683 	struct htb_sched *q = qdisc_priv(sch);
1684 	struct htb_class *cl = (struct htb_class *)arg;
1685 	struct Qdisc *new_q = NULL;
1686 	int last_child = 0;
1687 	int err;
1688 
1689 	/* TODO: why don't allow to delete subtree ? references ? does
1690 	 * tc subsys guarantee us that in htb_destroy it holds no class
1691 	 * refs so that we can remove children safely there ?
1692 	 */
1693 	if (cl->children || cl->filter_cnt)
1694 		return -EBUSY;
1695 
1696 	if (!cl->level && htb_parent_last_child(cl))
1697 		last_child = 1;
1698 
1699 	if (q->offload) {
1700 		err = htb_destroy_class_offload(sch, cl, last_child, false,
1701 						extack);
1702 		if (err)
1703 			return err;
1704 	}
1705 
1706 	if (last_child) {
1707 		struct netdev_queue *dev_queue;
1708 
1709 		dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
1710 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1711 					  cl->parent->common.classid,
1712 					  NULL);
1713 		if (q->offload) {
1714 			if (new_q)
1715 				htb_set_lockdep_class_child(new_q);
1716 			htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1717 		}
1718 	}
1719 
1720 	sch_tree_lock(sch);
1721 
1722 	if (!cl->level)
1723 		qdisc_purge_queue(cl->leaf.q);
1724 
1725 	/* delete from hash and active; remainder in destroy_class */
1726 	qdisc_class_hash_remove(&q->clhash, &cl->common);
1727 	if (cl->parent)
1728 		cl->parent->children--;
1729 
1730 	if (cl->prio_activity)
1731 		htb_deactivate(q, cl);
1732 
1733 	if (cl->cmode != HTB_CAN_SEND)
1734 		htb_safe_rb_erase(&cl->pq_node,
1735 				  &q->hlevel[cl->level].wait_pq);
1736 
1737 	if (last_child)
1738 		htb_parent_to_leaf(sch, cl, new_q);
1739 
1740 	sch_tree_unlock(sch);
1741 
1742 	htb_destroy_class(sch, cl);
1743 	return 0;
1744 }
1745 
1746 static int htb_change_class(struct Qdisc *sch, u32 classid,
1747 			    u32 parentid, struct nlattr **tca,
1748 			    unsigned long *arg, struct netlink_ext_ack *extack)
1749 {
1750 	int err = -EINVAL;
1751 	struct htb_sched *q = qdisc_priv(sch);
1752 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
1753 	struct tc_htb_qopt_offload offload_opt;
1754 	struct nlattr *opt = tca[TCA_OPTIONS];
1755 	struct nlattr *tb[TCA_HTB_MAX + 1];
1756 	struct Qdisc *parent_qdisc = NULL;
1757 	struct netdev_queue *dev_queue;
1758 	struct tc_htb_opt *hopt;
1759 	u64 rate64, ceil64;
1760 	int warn = 0;
1761 
1762 	/* extract all subattrs from opt attr */
1763 	if (!opt)
1764 		goto failure;
1765 
1766 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1767 					  NULL);
1768 	if (err < 0)
1769 		goto failure;
1770 
1771 	err = -EINVAL;
1772 	if (tb[TCA_HTB_PARMS] == NULL)
1773 		goto failure;
1774 
1775 	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1776 
1777 	hopt = nla_data(tb[TCA_HTB_PARMS]);
1778 	if (!hopt->rate.rate || !hopt->ceil.rate)
1779 		goto failure;
1780 
1781 	/* Keeping backward compatible with rate_table based iproute2 tc */
1782 	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1783 		qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1784 					      NULL));
1785 
1786 	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1787 		qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1788 					      NULL));
1789 
1790 	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1791 	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1792 
1793 	if (!cl) {		/* new class */
1794 		struct net_device *dev = qdisc_dev(sch);
1795 		struct Qdisc *new_q, *old_q;
1796 		int prio;
1797 		struct {
1798 			struct nlattr		nla;
1799 			struct gnet_estimator	opt;
1800 		} est = {
1801 			.nla = {
1802 				.nla_len	= nla_attr_size(sizeof(est.opt)),
1803 				.nla_type	= TCA_RATE,
1804 			},
1805 			.opt = {
1806 				/* 4s interval, 16s averaging constant */
1807 				.interval	= 2,
1808 				.ewma_log	= 2,
1809 			},
1810 		};
1811 
1812 		/* check for valid classid */
1813 		if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1814 		    htb_find(classid, sch))
1815 			goto failure;
1816 
1817 		/* check maximal depth */
1818 		if (parent && parent->parent && parent->parent->level < 2) {
1819 			pr_err("htb: tree is too deep\n");
1820 			goto failure;
1821 		}
1822 		err = -ENOBUFS;
1823 		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1824 		if (!cl)
1825 			goto failure;
1826 
1827 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1828 		if (err) {
1829 			kfree(cl);
1830 			goto failure;
1831 		}
1832 		if (htb_rate_est || tca[TCA_RATE]) {
1833 			err = gen_new_estimator(&cl->bstats, NULL,
1834 						&cl->rate_est,
1835 						NULL,
1836 						qdisc_root_sleeping_running(sch),
1837 						tca[TCA_RATE] ? : &est.nla);
1838 			if (err)
1839 				goto err_block_put;
1840 		}
1841 
1842 		cl->children = 0;
1843 		RB_CLEAR_NODE(&cl->pq_node);
1844 
1845 		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1846 			RB_CLEAR_NODE(&cl->node[prio]);
1847 
1848 		cl->common.classid = classid;
1849 
1850 		/* Make sure nothing interrupts us in between of two
1851 		 * ndo_setup_tc calls.
1852 		 */
1853 		ASSERT_RTNL();
1854 
1855 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1856 		 * so that can't be used inside of sch_tree_lock
1857 		 * -- thanks to Karlis Peisenieks
1858 		 */
1859 		if (!q->offload) {
1860 			dev_queue = sch->dev_queue;
1861 		} else if (!(parent && !parent->level)) {
1862 			/* Assign a dev_queue to this classid. */
1863 			offload_opt = (struct tc_htb_qopt_offload) {
1864 				.command = TC_HTB_LEAF_ALLOC_QUEUE,
1865 				.classid = cl->common.classid,
1866 				.parent_classid = parent ?
1867 					TC_H_MIN(parent->common.classid) :
1868 					TC_HTB_CLASSID_ROOT,
1869 				.rate = max_t(u64, hopt->rate.rate, rate64),
1870 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1871 				.extack = extack,
1872 			};
1873 			err = htb_offload(dev, &offload_opt);
1874 			if (err) {
1875 				pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1876 				       err);
1877 				goto err_kill_estimator;
1878 			}
1879 			dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1880 		} else { /* First child. */
1881 			dev_queue = parent->leaf.q->dev_queue;
1882 			old_q = htb_graft_helper(dev_queue, NULL);
1883 			WARN_ON(old_q != parent->leaf.q);
1884 			offload_opt = (struct tc_htb_qopt_offload) {
1885 				.command = TC_HTB_LEAF_TO_INNER,
1886 				.classid = cl->common.classid,
1887 				.parent_classid =
1888 					TC_H_MIN(parent->common.classid),
1889 				.rate = max_t(u64, hopt->rate.rate, rate64),
1890 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1891 				.extack = extack,
1892 			};
1893 			err = htb_offload(dev, &offload_opt);
1894 			if (err) {
1895 				pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1896 				       err);
1897 				htb_graft_helper(dev_queue, old_q);
1898 				goto err_kill_estimator;
1899 			}
1900 			parent->bstats_bias.bytes += old_q->bstats.bytes;
1901 			parent->bstats_bias.packets += old_q->bstats.packets;
1902 			qdisc_put(old_q);
1903 		}
1904 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1905 					  classid, NULL);
1906 		if (q->offload) {
1907 			if (new_q) {
1908 				htb_set_lockdep_class_child(new_q);
1909 				/* One ref for cl->leaf.q, the other for
1910 				 * dev_queue->qdisc.
1911 				 */
1912 				qdisc_refcount_inc(new_q);
1913 			}
1914 			old_q = htb_graft_helper(dev_queue, new_q);
1915 			/* No qdisc_put needed. */
1916 			WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1917 		}
1918 		sch_tree_lock(sch);
1919 		if (parent && !parent->level) {
1920 			/* turn parent into inner node */
1921 			qdisc_purge_queue(parent->leaf.q);
1922 			parent_qdisc = parent->leaf.q;
1923 			if (parent->prio_activity)
1924 				htb_deactivate(q, parent);
1925 
1926 			/* remove from evt list because of level change */
1927 			if (parent->cmode != HTB_CAN_SEND) {
1928 				htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1929 				parent->cmode = HTB_CAN_SEND;
1930 			}
1931 			parent->level = (parent->parent ? parent->parent->level
1932 					 : TC_HTB_MAXDEPTH) - 1;
1933 			memset(&parent->inner, 0, sizeof(parent->inner));
1934 		}
1935 
1936 		/* leaf (we) needs elementary qdisc */
1937 		cl->leaf.q = new_q ? new_q : &noop_qdisc;
1938 
1939 		cl->parent = parent;
1940 
1941 		/* set class to be in HTB_CAN_SEND state */
1942 		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1943 		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1944 		cl->mbuffer = 60ULL * NSEC_PER_SEC;	/* 1min */
1945 		cl->t_c = ktime_get_ns();
1946 		cl->cmode = HTB_CAN_SEND;
1947 
1948 		/* attach to the hash list and parent's family */
1949 		qdisc_class_hash_insert(&q->clhash, &cl->common);
1950 		if (parent)
1951 			parent->children++;
1952 		if (cl->leaf.q != &noop_qdisc)
1953 			qdisc_hash_add(cl->leaf.q, true);
1954 	} else {
1955 		if (tca[TCA_RATE]) {
1956 			err = gen_replace_estimator(&cl->bstats, NULL,
1957 						    &cl->rate_est,
1958 						    NULL,
1959 						    qdisc_root_sleeping_running(sch),
1960 						    tca[TCA_RATE]);
1961 			if (err)
1962 				return err;
1963 		}
1964 
1965 		if (q->offload) {
1966 			struct net_device *dev = qdisc_dev(sch);
1967 
1968 			offload_opt = (struct tc_htb_qopt_offload) {
1969 				.command = TC_HTB_NODE_MODIFY,
1970 				.classid = cl->common.classid,
1971 				.rate = max_t(u64, hopt->rate.rate, rate64),
1972 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1973 				.extack = extack,
1974 			};
1975 			err = htb_offload(dev, &offload_opt);
1976 			if (err)
1977 				/* Estimator was replaced, and rollback may fail
1978 				 * as well, so we don't try to recover it, and
1979 				 * the estimator won't work property with the
1980 				 * offload anyway, because bstats are updated
1981 				 * only when the stats are queried.
1982 				 */
1983 				return err;
1984 		}
1985 
1986 		sch_tree_lock(sch);
1987 	}
1988 
1989 	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1990 	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1991 
1992 	/* it used to be a nasty bug here, we have to check that node
1993 	 * is really leaf before changing cl->leaf !
1994 	 */
1995 	if (!cl->level) {
1996 		u64 quantum = cl->rate.rate_bytes_ps;
1997 
1998 		do_div(quantum, q->rate2quantum);
1999 		cl->quantum = min_t(u64, quantum, INT_MAX);
2000 
2001 		if (!hopt->quantum && cl->quantum < 1000) {
2002 			warn = -1;
2003 			cl->quantum = 1000;
2004 		}
2005 		if (!hopt->quantum && cl->quantum > 200000) {
2006 			warn = 1;
2007 			cl->quantum = 200000;
2008 		}
2009 		if (hopt->quantum)
2010 			cl->quantum = hopt->quantum;
2011 		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2012 			cl->prio = TC_HTB_NUMPRIO - 1;
2013 	}
2014 
2015 	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2016 	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2017 
2018 	sch_tree_unlock(sch);
2019 	qdisc_put(parent_qdisc);
2020 
2021 	if (warn)
2022 		pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2023 			    cl->common.classid, (warn == -1 ? "small" : "big"));
2024 
2025 	qdisc_class_hash_grow(sch, &q->clhash);
2026 
2027 	*arg = (unsigned long)cl;
2028 	return 0;
2029 
2030 err_kill_estimator:
2031 	gen_kill_estimator(&cl->rate_est);
2032 err_block_put:
2033 	tcf_block_put(cl->block);
2034 	kfree(cl);
2035 failure:
2036 	return err;
2037 }
2038 
2039 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2040 				       struct netlink_ext_ack *extack)
2041 {
2042 	struct htb_sched *q = qdisc_priv(sch);
2043 	struct htb_class *cl = (struct htb_class *)arg;
2044 
2045 	return cl ? cl->block : q->block;
2046 }
2047 
2048 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2049 				     u32 classid)
2050 {
2051 	struct htb_class *cl = htb_find(classid, sch);
2052 
2053 	/*if (cl && !cl->level) return 0;
2054 	 * The line above used to be there to prevent attaching filters to
2055 	 * leaves. But at least tc_index filter uses this just to get class
2056 	 * for other reasons so that we have to allow for it.
2057 	 * ----
2058 	 * 19.6.2002 As Werner explained it is ok - bind filter is just
2059 	 * another way to "lock" the class - unlike "get" this lock can
2060 	 * be broken by class during destroy IIUC.
2061 	 */
2062 	if (cl)
2063 		cl->filter_cnt++;
2064 	return (unsigned long)cl;
2065 }
2066 
2067 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2068 {
2069 	struct htb_class *cl = (struct htb_class *)arg;
2070 
2071 	if (cl)
2072 		cl->filter_cnt--;
2073 }
2074 
2075 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2076 {
2077 	struct htb_sched *q = qdisc_priv(sch);
2078 	struct htb_class *cl;
2079 	unsigned int i;
2080 
2081 	if (arg->stop)
2082 		return;
2083 
2084 	for (i = 0; i < q->clhash.hashsize; i++) {
2085 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2086 			if (arg->count < arg->skip) {
2087 				arg->count++;
2088 				continue;
2089 			}
2090 			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2091 				arg->stop = 1;
2092 				return;
2093 			}
2094 			arg->count++;
2095 		}
2096 	}
2097 }
2098 
2099 static const struct Qdisc_class_ops htb_class_ops = {
2100 	.select_queue	=	htb_select_queue,
2101 	.graft		=	htb_graft,
2102 	.leaf		=	htb_leaf,
2103 	.qlen_notify	=	htb_qlen_notify,
2104 	.find		=	htb_search,
2105 	.change		=	htb_change_class,
2106 	.delete		=	htb_delete,
2107 	.walk		=	htb_walk,
2108 	.tcf_block	=	htb_tcf_block,
2109 	.bind_tcf	=	htb_bind_filter,
2110 	.unbind_tcf	=	htb_unbind_filter,
2111 	.dump		=	htb_dump_class,
2112 	.dump_stats	=	htb_dump_class_stats,
2113 };
2114 
2115 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2116 	.cl_ops		=	&htb_class_ops,
2117 	.id		=	"htb",
2118 	.priv_size	=	sizeof(struct htb_sched),
2119 	.enqueue	=	htb_enqueue,
2120 	.dequeue	=	htb_dequeue,
2121 	.peek		=	qdisc_peek_dequeued,
2122 	.init		=	htb_init,
2123 	.attach		=	htb_attach,
2124 	.reset		=	htb_reset,
2125 	.destroy	=	htb_destroy,
2126 	.dump		=	htb_dump,
2127 	.owner		=	THIS_MODULE,
2128 };
2129 
2130 static int __init htb_module_init(void)
2131 {
2132 	return register_qdisc(&htb_qdisc_ops);
2133 }
2134 static void __exit htb_module_exit(void)
2135 {
2136 	unregister_qdisc(&htb_qdisc_ops);
2137 }
2138 
2139 module_init(htb_module_init)
2140 module_exit(htb_module_exit)
2141 MODULE_LICENSE("GPL");
2142