xref: /openbmc/linux/net/sched/sch_pie.c (revision e7253313)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2013 Cisco Systems, Inc, 2013.
3  *
4  * Author: Vijay Subramanian <vijaynsu@cisco.com>
5  * Author: Mythili Prabhu <mysuryan@cisco.com>
6  *
7  * ECN support is added by Naeem Khademi <naeemk@ifi.uio.no>
8  * University of Oslo, Norway.
9  *
10  * References:
11  * RFC 8033: https://tools.ietf.org/html/rfc8033
12  */
13 
14 #include <linux/module.h>
15 #include <linux/slab.h>
16 #include <linux/types.h>
17 #include <linux/kernel.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <net/pkt_sched.h>
21 #include <net/inet_ecn.h>
22 
23 #define QUEUE_THRESHOLD 16384
24 #define DQCOUNT_INVALID -1
25 #define DTIME_INVALID 0xffffffffffffffff
26 #define MAX_PROB 0xffffffffffffffff
27 #define PIE_SCALE 8
28 
29 /* parameters used */
30 struct pie_params {
31 	psched_time_t target;	/* user specified target delay in pschedtime */
32 	u32 tupdate;		/* timer frequency (in jiffies) */
33 	u32 limit;		/* number of packets that can be enqueued */
34 	u32 alpha;		/* alpha and beta are between 0 and 32 */
35 	u32 beta;		/* and are used for shift relative to 1 */
36 	bool ecn;		/* true if ecn is enabled */
37 	bool bytemode;		/* to scale drop early prob based on pkt size */
38 	u8 dq_rate_estimator;	/* to calculate delay using Little's law */
39 };
40 
41 /* variables used */
42 struct pie_vars {
43 	u64 prob;		/* probability but scaled by u64 limit. */
44 	psched_time_t burst_time;
45 	psched_time_t qdelay;
46 	psched_time_t qdelay_old;
47 	u64 dq_count;		/* measured in bytes */
48 	psched_time_t dq_tstamp;	/* drain rate */
49 	u64 accu_prob;		/* accumulated drop probability */
50 	u32 avg_dq_rate;	/* bytes per pschedtime tick,scaled */
51 	u32 qlen_old;		/* in bytes */
52 	u8 accu_prob_overflows;	/* overflows of accu_prob */
53 };
54 
55 /* statistics gathering */
56 struct pie_stats {
57 	u32 packets_in;		/* total number of packets enqueued */
58 	u32 dropped;		/* packets dropped due to pie_action */
59 	u32 overlimit;		/* dropped due to lack of space in queue */
60 	u32 maxq;		/* maximum queue size */
61 	u32 ecn_mark;		/* packets marked with ECN */
62 };
63 
64 /* private data for the Qdisc */
65 struct pie_sched_data {
66 	struct pie_params params;
67 	struct pie_vars vars;
68 	struct pie_stats stats;
69 	struct timer_list adapt_timer;
70 	struct Qdisc *sch;
71 };
72 
73 static void pie_params_init(struct pie_params *params)
74 {
75 	params->alpha = 2;
76 	params->beta = 20;
77 	params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC);	/* 15 ms */
78 	params->limit = 1000;	/* default of 1000 packets */
79 	params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC);	/* 15 ms */
80 	params->ecn = false;
81 	params->bytemode = false;
82 	params->dq_rate_estimator = false;
83 }
84 
85 /* private skb vars */
86 struct pie_skb_cb {
87 	psched_time_t enqueue_time;
88 };
89 
90 static struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
91 {
92 	qdisc_cb_private_validate(skb, sizeof(struct pie_skb_cb));
93 	return (struct pie_skb_cb *)qdisc_skb_cb(skb)->data;
94 }
95 
96 static psched_time_t pie_get_enqueue_time(const struct sk_buff *skb)
97 {
98 	return get_pie_cb(skb)->enqueue_time;
99 }
100 
101 static void pie_set_enqueue_time(struct sk_buff *skb)
102 {
103 	get_pie_cb(skb)->enqueue_time = psched_get_time();
104 }
105 
106 static void pie_vars_init(struct pie_vars *vars)
107 {
108 	vars->dq_count = DQCOUNT_INVALID;
109 	vars->dq_tstamp = DTIME_INVALID;
110 	vars->accu_prob = 0;
111 	vars->avg_dq_rate = 0;
112 	/* default of 150 ms in pschedtime */
113 	vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC);
114 	vars->accu_prob_overflows = 0;
115 }
116 
117 static bool drop_early(struct Qdisc *sch, u32 packet_size)
118 {
119 	struct pie_sched_data *q = qdisc_priv(sch);
120 	u64 rnd;
121 	u64 local_prob = q->vars.prob;
122 	u32 mtu = psched_mtu(qdisc_dev(sch));
123 
124 	/* If there is still burst allowance left skip random early drop */
125 	if (q->vars.burst_time > 0)
126 		return false;
127 
128 	/* If current delay is less than half of target, and
129 	 * if drop prob is low already, disable early_drop
130 	 */
131 	if ((q->vars.qdelay < q->params.target / 2) &&
132 	    (q->vars.prob < MAX_PROB / 5))
133 		return false;
134 
135 	/* If we have fewer than 2 mtu-sized packets, disable drop_early,
136 	 * similar to min_th in RED
137 	 */
138 	if (sch->qstats.backlog < 2 * mtu)
139 		return false;
140 
141 	/* If bytemode is turned on, use packet size to compute new
142 	 * probablity. Smaller packets will have lower drop prob in this case
143 	 */
144 	if (q->params.bytemode && packet_size <= mtu)
145 		local_prob = (u64)packet_size * div_u64(local_prob, mtu);
146 	else
147 		local_prob = q->vars.prob;
148 
149 	if (local_prob == 0) {
150 		q->vars.accu_prob = 0;
151 		q->vars.accu_prob_overflows = 0;
152 	}
153 
154 	if (local_prob > MAX_PROB - q->vars.accu_prob)
155 		q->vars.accu_prob_overflows++;
156 
157 	q->vars.accu_prob += local_prob;
158 
159 	if (q->vars.accu_prob_overflows == 0 &&
160 	    q->vars.accu_prob < (MAX_PROB / 100) * 85)
161 		return false;
162 	if (q->vars.accu_prob_overflows == 8 &&
163 	    q->vars.accu_prob >= MAX_PROB / 2)
164 		return true;
165 
166 	prandom_bytes(&rnd, 8);
167 	if (rnd < local_prob) {
168 		q->vars.accu_prob = 0;
169 		q->vars.accu_prob_overflows = 0;
170 		return true;
171 	}
172 
173 	return false;
174 }
175 
176 static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
177 			     struct sk_buff **to_free)
178 {
179 	struct pie_sched_data *q = qdisc_priv(sch);
180 	bool enqueue = false;
181 
182 	if (unlikely(qdisc_qlen(sch) >= sch->limit)) {
183 		q->stats.overlimit++;
184 		goto out;
185 	}
186 
187 	if (!drop_early(sch, skb->len)) {
188 		enqueue = true;
189 	} else if (q->params.ecn && (q->vars.prob <= MAX_PROB / 10) &&
190 		   INET_ECN_set_ce(skb)) {
191 		/* If packet is ecn capable, mark it if drop probability
192 		 * is lower than 10%, else drop it.
193 		 */
194 		q->stats.ecn_mark++;
195 		enqueue = true;
196 	}
197 
198 	/* we can enqueue the packet */
199 	if (enqueue) {
200 		/* Set enqueue time only when dq_rate_estimator is disabled. */
201 		if (!q->params.dq_rate_estimator)
202 			pie_set_enqueue_time(skb);
203 
204 		q->stats.packets_in++;
205 		if (qdisc_qlen(sch) > q->stats.maxq)
206 			q->stats.maxq = qdisc_qlen(sch);
207 
208 		return qdisc_enqueue_tail(skb, sch);
209 	}
210 
211 out:
212 	q->stats.dropped++;
213 	q->vars.accu_prob = 0;
214 	q->vars.accu_prob_overflows = 0;
215 	return qdisc_drop(skb, sch, to_free);
216 }
217 
218 static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
219 	[TCA_PIE_TARGET] = {.type = NLA_U32},
220 	[TCA_PIE_LIMIT] = {.type = NLA_U32},
221 	[TCA_PIE_TUPDATE] = {.type = NLA_U32},
222 	[TCA_PIE_ALPHA] = {.type = NLA_U32},
223 	[TCA_PIE_BETA] = {.type = NLA_U32},
224 	[TCA_PIE_ECN] = {.type = NLA_U32},
225 	[TCA_PIE_BYTEMODE] = {.type = NLA_U32},
226 	[TCA_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32},
227 };
228 
229 static int pie_change(struct Qdisc *sch, struct nlattr *opt,
230 		      struct netlink_ext_ack *extack)
231 {
232 	struct pie_sched_data *q = qdisc_priv(sch);
233 	struct nlattr *tb[TCA_PIE_MAX + 1];
234 	unsigned int qlen, dropped = 0;
235 	int err;
236 
237 	if (!opt)
238 		return -EINVAL;
239 
240 	err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy,
241 					  NULL);
242 	if (err < 0)
243 		return err;
244 
245 	sch_tree_lock(sch);
246 
247 	/* convert from microseconds to pschedtime */
248 	if (tb[TCA_PIE_TARGET]) {
249 		/* target is in us */
250 		u32 target = nla_get_u32(tb[TCA_PIE_TARGET]);
251 
252 		/* convert to pschedtime */
253 		q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
254 	}
255 
256 	/* tupdate is in jiffies */
257 	if (tb[TCA_PIE_TUPDATE])
258 		q->params.tupdate =
259 			usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
260 
261 	if (tb[TCA_PIE_LIMIT]) {
262 		u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
263 
264 		q->params.limit = limit;
265 		sch->limit = limit;
266 	}
267 
268 	if (tb[TCA_PIE_ALPHA])
269 		q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]);
270 
271 	if (tb[TCA_PIE_BETA])
272 		q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]);
273 
274 	if (tb[TCA_PIE_ECN])
275 		q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]);
276 
277 	if (tb[TCA_PIE_BYTEMODE])
278 		q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);
279 
280 	if (tb[TCA_PIE_DQ_RATE_ESTIMATOR])
281 		q->params.dq_rate_estimator =
282 				nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]);
283 
284 	/* Drop excess packets if new limit is lower */
285 	qlen = sch->q.qlen;
286 	while (sch->q.qlen > sch->limit) {
287 		struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
288 
289 		dropped += qdisc_pkt_len(skb);
290 		qdisc_qstats_backlog_dec(sch, skb);
291 		rtnl_qdisc_drop(skb, sch);
292 	}
293 	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
294 
295 	sch_tree_unlock(sch);
296 	return 0;
297 }
298 
299 static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
300 {
301 	struct pie_sched_data *q = qdisc_priv(sch);
302 	int qlen = sch->qstats.backlog;	/* current queue size in bytes */
303 	psched_time_t now = psched_get_time();
304 	u32 dtime = 0;
305 
306 	/* If dq_rate_estimator is disabled, calculate qdelay using the
307 	 * packet timestamp.
308 	 */
309 	if (!q->params.dq_rate_estimator) {
310 		q->vars.qdelay = now - pie_get_enqueue_time(skb);
311 
312 		if (q->vars.dq_tstamp != DTIME_INVALID)
313 			dtime = now - q->vars.dq_tstamp;
314 
315 		q->vars.dq_tstamp = now;
316 
317 		if (qlen == 0)
318 			q->vars.qdelay = 0;
319 
320 		if (dtime == 0)
321 			return;
322 
323 		goto burst_allowance_reduction;
324 	}
325 
326 	/* If current queue is about 10 packets or more and dq_count is unset
327 	 * we have enough packets to calculate the drain rate. Save
328 	 * current time as dq_tstamp and start measurement cycle.
329 	 */
330 	if (qlen >= QUEUE_THRESHOLD && q->vars.dq_count == DQCOUNT_INVALID) {
331 		q->vars.dq_tstamp = psched_get_time();
332 		q->vars.dq_count = 0;
333 	}
334 
335 	/* Calculate the average drain rate from this value.  If queue length
336 	 * has receded to a small value viz., <= QUEUE_THRESHOLD bytes,reset
337 	 * the dq_count to -1 as we don't have enough packets to calculate the
338 	 * drain rate anymore The following if block is entered only when we
339 	 * have a substantial queue built up (QUEUE_THRESHOLD bytes or more)
340 	 * and we calculate the drain rate for the threshold here.  dq_count is
341 	 * in bytes, time difference in psched_time, hence rate is in
342 	 * bytes/psched_time.
343 	 */
344 	if (q->vars.dq_count != DQCOUNT_INVALID) {
345 		q->vars.dq_count += skb->len;
346 
347 		if (q->vars.dq_count >= QUEUE_THRESHOLD) {
348 			u32 count = q->vars.dq_count << PIE_SCALE;
349 
350 			dtime = now - q->vars.dq_tstamp;
351 
352 			if (dtime == 0)
353 				return;
354 
355 			count = count / dtime;
356 
357 			if (q->vars.avg_dq_rate == 0)
358 				q->vars.avg_dq_rate = count;
359 			else
360 				q->vars.avg_dq_rate =
361 				    (q->vars.avg_dq_rate -
362 				     (q->vars.avg_dq_rate >> 3)) + (count >> 3);
363 
364 			/* If the queue has receded below the threshold, we hold
365 			 * on to the last drain rate calculated, else we reset
366 			 * dq_count to 0 to re-enter the if block when the next
367 			 * packet is dequeued
368 			 */
369 			if (qlen < QUEUE_THRESHOLD) {
370 				q->vars.dq_count = DQCOUNT_INVALID;
371 			} else {
372 				q->vars.dq_count = 0;
373 				q->vars.dq_tstamp = psched_get_time();
374 			}
375 
376 			goto burst_allowance_reduction;
377 		}
378 	}
379 
380 	return;
381 
382 burst_allowance_reduction:
383 	if (q->vars.burst_time > 0) {
384 		if (q->vars.burst_time > dtime)
385 			q->vars.burst_time -= dtime;
386 		else
387 			q->vars.burst_time = 0;
388 	}
389 }
390 
391 static void calculate_probability(struct Qdisc *sch)
392 {
393 	struct pie_sched_data *q = qdisc_priv(sch);
394 	u32 qlen = sch->qstats.backlog;	/* queue size in bytes */
395 	psched_time_t qdelay = 0;	/* in pschedtime */
396 	psched_time_t qdelay_old = 0;	/* in pschedtime */
397 	s64 delta = 0;		/* determines the change in probability */
398 	u64 oldprob;
399 	u64 alpha, beta;
400 	u32 power;
401 	bool update_prob = true;
402 
403 	if (q->params.dq_rate_estimator) {
404 		qdelay_old = q->vars.qdelay;
405 		q->vars.qdelay_old = q->vars.qdelay;
406 
407 		if (q->vars.avg_dq_rate > 0)
408 			qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
409 		else
410 			qdelay = 0;
411 	} else {
412 		qdelay = q->vars.qdelay;
413 		qdelay_old = q->vars.qdelay_old;
414 	}
415 
416 	/* If qdelay is zero and qlen is not, it means qlen is very small, less
417 	 * than dequeue_rate, so we do not update probabilty in this round
418 	 */
419 	if (qdelay == 0 && qlen != 0)
420 		update_prob = false;
421 
422 	/* In the algorithm, alpha and beta are between 0 and 2 with typical
423 	 * value for alpha as 0.125. In this implementation, we use values 0-32
424 	 * passed from user space to represent this. Also, alpha and beta have
425 	 * unit of HZ and need to be scaled before they can used to update
426 	 * probability. alpha/beta are updated locally below by scaling down
427 	 * by 16 to come to 0-2 range.
428 	 */
429 	alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
430 	beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
431 
432 	/* We scale alpha and beta differently depending on how heavy the
433 	 * congestion is. Please see RFC 8033 for details.
434 	 */
435 	if (q->vars.prob < MAX_PROB / 10) {
436 		alpha >>= 1;
437 		beta >>= 1;
438 
439 		power = 100;
440 		while (q->vars.prob < div_u64(MAX_PROB, power) &&
441 		       power <= 1000000) {
442 			alpha >>= 2;
443 			beta >>= 2;
444 			power *= 10;
445 		}
446 	}
447 
448 	/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
449 	delta += alpha * (u64)(qdelay - q->params.target);
450 	delta += beta * (u64)(qdelay - qdelay_old);
451 
452 	oldprob = q->vars.prob;
453 
454 	/* to ensure we increase probability in steps of no more than 2% */
455 	if (delta > (s64)(MAX_PROB / (100 / 2)) &&
456 	    q->vars.prob >= MAX_PROB / 10)
457 		delta = (MAX_PROB / 100) * 2;
458 
459 	/* Non-linear drop:
460 	 * Tune drop probability to increase quickly for high delays(>= 250ms)
461 	 * 250ms is derived through experiments and provides error protection
462 	 */
463 
464 	if (qdelay > (PSCHED_NS2TICKS(250 * NSEC_PER_MSEC)))
465 		delta += MAX_PROB / (100 / 2);
466 
467 	q->vars.prob += delta;
468 
469 	if (delta > 0) {
470 		/* prevent overflow */
471 		if (q->vars.prob < oldprob) {
472 			q->vars.prob = MAX_PROB;
473 			/* Prevent normalization error. If probability is at
474 			 * maximum value already, we normalize it here, and
475 			 * skip the check to do a non-linear drop in the next
476 			 * section.
477 			 */
478 			update_prob = false;
479 		}
480 	} else {
481 		/* prevent underflow */
482 		if (q->vars.prob > oldprob)
483 			q->vars.prob = 0;
484 	}
485 
486 	/* Non-linear drop in probability: Reduce drop probability quickly if
487 	 * delay is 0 for 2 consecutive Tupdate periods.
488 	 */
489 
490 	if (qdelay == 0 && qdelay_old == 0 && update_prob)
491 		/* Reduce drop probability to 98.4% */
492 		q->vars.prob -= q->vars.prob / 64u;
493 
494 	q->vars.qdelay = qdelay;
495 	q->vars.qlen_old = qlen;
496 
497 	/* We restart the measurement cycle if the following conditions are met
498 	 * 1. If the delay has been low for 2 consecutive Tupdate periods
499 	 * 2. Calculated drop probability is zero
500 	 * 3. If average dq_rate_estimator is enabled, we have atleast one
501 	 *    estimate for the avg_dq_rate ie., is a non-zero value
502 	 */
503 	if ((q->vars.qdelay < q->params.target / 2) &&
504 	    (q->vars.qdelay_old < q->params.target / 2) &&
505 	    q->vars.prob == 0 &&
506 	    (!q->params.dq_rate_estimator || q->vars.avg_dq_rate > 0)) {
507 		pie_vars_init(&q->vars);
508 	}
509 
510 	if (!q->params.dq_rate_estimator)
511 		q->vars.qdelay_old = qdelay;
512 }
513 
514 static void pie_timer(struct timer_list *t)
515 {
516 	struct pie_sched_data *q = from_timer(q, t, adapt_timer);
517 	struct Qdisc *sch = q->sch;
518 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
519 
520 	spin_lock(root_lock);
521 	calculate_probability(sch);
522 
523 	/* reset the timer to fire after 'tupdate'. tupdate is in jiffies. */
524 	if (q->params.tupdate)
525 		mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
526 	spin_unlock(root_lock);
527 }
528 
529 static int pie_init(struct Qdisc *sch, struct nlattr *opt,
530 		    struct netlink_ext_ack *extack)
531 {
532 	struct pie_sched_data *q = qdisc_priv(sch);
533 
534 	pie_params_init(&q->params);
535 	pie_vars_init(&q->vars);
536 	sch->limit = q->params.limit;
537 
538 	q->sch = sch;
539 	timer_setup(&q->adapt_timer, pie_timer, 0);
540 
541 	if (opt) {
542 		int err = pie_change(sch, opt, extack);
543 
544 		if (err)
545 			return err;
546 	}
547 
548 	mod_timer(&q->adapt_timer, jiffies + HZ / 2);
549 	return 0;
550 }
551 
552 static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
553 {
554 	struct pie_sched_data *q = qdisc_priv(sch);
555 	struct nlattr *opts;
556 
557 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
558 	if (!opts)
559 		goto nla_put_failure;
560 
561 	/* convert target from pschedtime to us */
562 	if (nla_put_u32(skb, TCA_PIE_TARGET,
563 			((u32)PSCHED_TICKS2NS(q->params.target)) /
564 			NSEC_PER_USEC) ||
565 	    nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
566 	    nla_put_u32(skb, TCA_PIE_TUPDATE,
567 			jiffies_to_usecs(q->params.tupdate)) ||
568 	    nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
569 	    nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
570 	    nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
571 	    nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) ||
572 	    nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR,
573 			q->params.dq_rate_estimator))
574 		goto nla_put_failure;
575 
576 	return nla_nest_end(skb, opts);
577 
578 nla_put_failure:
579 	nla_nest_cancel(skb, opts);
580 	return -1;
581 }
582 
583 static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
584 {
585 	struct pie_sched_data *q = qdisc_priv(sch);
586 	struct tc_pie_xstats st = {
587 		.prob		= q->vars.prob,
588 		.delay		= ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
589 				   NSEC_PER_USEC,
590 		.packets_in	= q->stats.packets_in,
591 		.overlimit	= q->stats.overlimit,
592 		.maxq		= q->stats.maxq,
593 		.dropped	= q->stats.dropped,
594 		.ecn_mark	= q->stats.ecn_mark,
595 	};
596 
597 	/* avg_dq_rate is only valid if dq_rate_estimator is enabled */
598 	st.dq_rate_estimating = q->params.dq_rate_estimator;
599 
600 	/* unscale and return dq_rate in bytes per sec */
601 	if (q->params.dq_rate_estimator)
602 		st.avg_dq_rate = q->vars.avg_dq_rate *
603 				 (PSCHED_TICKS_PER_SEC) >> PIE_SCALE;
604 
605 	return gnet_stats_copy_app(d, &st, sizeof(st));
606 }
607 
608 static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
609 {
610 	struct sk_buff *skb = qdisc_dequeue_head(sch);
611 
612 	if (!skb)
613 		return NULL;
614 
615 	pie_process_dequeue(sch, skb);
616 	return skb;
617 }
618 
619 static void pie_reset(struct Qdisc *sch)
620 {
621 	struct pie_sched_data *q = qdisc_priv(sch);
622 
623 	qdisc_reset_queue(sch);
624 	pie_vars_init(&q->vars);
625 }
626 
627 static void pie_destroy(struct Qdisc *sch)
628 {
629 	struct pie_sched_data *q = qdisc_priv(sch);
630 
631 	q->params.tupdate = 0;
632 	del_timer_sync(&q->adapt_timer);
633 }
634 
635 static struct Qdisc_ops pie_qdisc_ops __read_mostly = {
636 	.id = "pie",
637 	.priv_size	= sizeof(struct pie_sched_data),
638 	.enqueue	= pie_qdisc_enqueue,
639 	.dequeue	= pie_qdisc_dequeue,
640 	.peek		= qdisc_peek_dequeued,
641 	.init		= pie_init,
642 	.destroy	= pie_destroy,
643 	.reset		= pie_reset,
644 	.change		= pie_change,
645 	.dump		= pie_dump,
646 	.dump_stats	= pie_dump_stats,
647 	.owner		= THIS_MODULE,
648 };
649 
650 static int __init pie_module_init(void)
651 {
652 	return register_qdisc(&pie_qdisc_ops);
653 }
654 
655 static void __exit pie_module_exit(void)
656 {
657 	unregister_qdisc(&pie_qdisc_ops);
658 }
659 
660 module_init(pie_module_init);
661 module_exit(pie_module_exit);
662 
663 MODULE_DESCRIPTION("Proportional Integral controller Enhanced (PIE) scheduler");
664 MODULE_AUTHOR("Vijay Subramanian");
665 MODULE_AUTHOR("Mythili Prabhu");
666 MODULE_LICENSE("GPL");
667