xref: /openbmc/linux/net/sched/sch_netem.c (revision 16f6ccde74a6f8538c62f127f17207c75f4dba7a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * net/sched/sch_netem.c	Network emulator
4  *
5  *  		Many of the algorithms and ideas for this came from
6  *		NIST Net which is not copyrighted.
7  *
8  * Authors:	Stephen Hemminger <shemminger@osdl.org>
9  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
10  */
11 
12 #include <linux/mm.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <linux/vmalloc.h>
20 #include <linux/rtnetlink.h>
21 #include <linux/reciprocal_div.h>
22 #include <linux/rbtree.h>
23 
24 #include <net/gso.h>
25 #include <net/netlink.h>
26 #include <net/pkt_sched.h>
27 #include <net/inet_ecn.h>
28 
29 #define VERSION "1.3"
30 
31 /*	Network Emulation Queuing algorithm.
32 	====================================
33 
34 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
35 		 Network Emulation Tool
36 		 [2] Luigi Rizzo, DummyNet for FreeBSD
37 
38 	 ----------------------------------------------------------------
39 
40 	 This started out as a simple way to delay outgoing packets to
41 	 test TCP but has grown to include most of the functionality
42 	 of a full blown network emulator like NISTnet. It can delay
43 	 packets and add random jitter (and correlation). The random
44 	 distribution can be loaded from a table as well to provide
45 	 normal, Pareto, or experimental curves. Packet loss,
46 	 duplication, and reordering can also be emulated.
47 
48 	 This qdisc does not do classification that can be handled in
49 	 layering other disciplines.  It does not need to do bandwidth
50 	 control either since that can be handled by using token
51 	 bucket or other rate control.
52 
53      Correlated Loss Generator models
54 
55 	Added generation of correlated loss according to the
56 	"Gilbert-Elliot" model, a 4-state markov model.
57 
58 	References:
59 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
60 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
61 	and intuitive loss model for packet networks and its implementation
62 	in the Netem module in the Linux kernel", available in [1]
63 
64 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
65 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
66 */
67 
68 struct disttable {
69 	u32  size;
70 	s16 table[];
71 };
72 
73 struct netem_sched_data {
74 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
75 	struct rb_root t_root;
76 
77 	/* a linear queue; reduces rbtree rebalancing when jitter is low */
78 	struct sk_buff	*t_head;
79 	struct sk_buff	*t_tail;
80 
81 	u32 t_len;
82 
83 	/* optional qdisc for classful handling (NULL at netem init) */
84 	struct Qdisc	*qdisc;
85 
86 	struct qdisc_watchdog watchdog;
87 
88 	s64 latency;
89 	s64 jitter;
90 
91 	u32 loss;
92 	u32 ecn;
93 	u32 limit;
94 	u32 counter;
95 	u32 gap;
96 	u32 duplicate;
97 	u32 reorder;
98 	u32 corrupt;
99 	u64 rate;
100 	s32 packet_overhead;
101 	u32 cell_size;
102 	struct reciprocal_value cell_size_reciprocal;
103 	s32 cell_overhead;
104 
105 	struct crndstate {
106 		u32 last;
107 		u32 rho;
108 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
109 
110 	struct prng  {
111 		u64 seed;
112 		struct rnd_state prng_state;
113 	} prng;
114 
115 	struct disttable *delay_dist;
116 
117 	enum  {
118 		CLG_RANDOM,
119 		CLG_4_STATES,
120 		CLG_GILB_ELL,
121 	} loss_model;
122 
123 	enum {
124 		TX_IN_GAP_PERIOD = 1,
125 		TX_IN_BURST_PERIOD,
126 		LOST_IN_GAP_PERIOD,
127 		LOST_IN_BURST_PERIOD,
128 	} _4_state_model;
129 
130 	enum {
131 		GOOD_STATE = 1,
132 		BAD_STATE,
133 	} GE_state_model;
134 
135 	/* Correlated Loss Generation models */
136 	struct clgstate {
137 		/* state of the Markov chain */
138 		u8 state;
139 
140 		/* 4-states and Gilbert-Elliot models */
141 		u32 a1;	/* p13 for 4-states or p for GE */
142 		u32 a2;	/* p31 for 4-states or r for GE */
143 		u32 a3;	/* p32 for 4-states or h for GE */
144 		u32 a4;	/* p14 for 4-states or 1-k for GE */
145 		u32 a5; /* p23 used only in 4-states */
146 	} clg;
147 
148 	struct tc_netem_slot slot_config;
149 	struct slotstate {
150 		u64 slot_next;
151 		s32 packets_left;
152 		s32 bytes_left;
153 	} slot;
154 
155 	struct disttable *slot_dist;
156 };
157 
158 /* Time stamp put into socket buffer control block
159  * Only valid when skbs are in our internal t(ime)fifo queue.
160  *
161  * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
162  * and skb->next & skb->prev are scratch space for a qdisc,
163  * we save skb->tstamp value in skb->cb[] before destroying it.
164  */
165 struct netem_skb_cb {
166 	u64	        time_to_send;
167 };
168 
netem_skb_cb(struct sk_buff * skb)169 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
170 {
171 	/* we assume we can use skb next/prev/tstamp as storage for rb_node */
172 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
173 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
174 }
175 
176 /* init_crandom - initialize correlated random number generator
177  * Use entropy source for initial seed.
178  */
init_crandom(struct crndstate * state,unsigned long rho)179 static void init_crandom(struct crndstate *state, unsigned long rho)
180 {
181 	state->rho = rho;
182 	state->last = get_random_u32();
183 }
184 
185 /* get_crandom - correlated random number generator
186  * Next number depends on last value.
187  * rho is scaled to avoid floating point.
188  */
get_crandom(struct crndstate * state,struct prng * p)189 static u32 get_crandom(struct crndstate *state, struct prng *p)
190 {
191 	u64 value, rho;
192 	unsigned long answer;
193 	struct rnd_state *s = &p->prng_state;
194 
195 	if (!state || state->rho == 0)	/* no correlation */
196 		return prandom_u32_state(s);
197 
198 	value = prandom_u32_state(s);
199 	rho = (u64)state->rho + 1;
200 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
201 	state->last = answer;
202 	return answer;
203 }
204 
205 /* loss_4state - 4-state model loss generator
206  * Generates losses according to the 4-state Markov chain adopted in
207  * the GI (General and Intuitive) loss model.
208  */
loss_4state(struct netem_sched_data * q)209 static bool loss_4state(struct netem_sched_data *q)
210 {
211 	struct clgstate *clg = &q->clg;
212 	u32 rnd = prandom_u32_state(&q->prng.prng_state);
213 
214 	/*
215 	 * Makes a comparison between rnd and the transition
216 	 * probabilities outgoing from the current state, then decides the
217 	 * next state and if the next packet has to be transmitted or lost.
218 	 * The four states correspond to:
219 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
220 	 *   LOST_IN_GAP_PERIOD => isolated losses within a gap period
221 	 *   LOST_IN_BURST_PERIOD => lost packets within a burst period
222 	 *   TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period
223 	 */
224 	switch (clg->state) {
225 	case TX_IN_GAP_PERIOD:
226 		if (rnd < clg->a4) {
227 			clg->state = LOST_IN_GAP_PERIOD;
228 			return true;
229 		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
230 			clg->state = LOST_IN_BURST_PERIOD;
231 			return true;
232 		} else if (clg->a1 + clg->a4 < rnd) {
233 			clg->state = TX_IN_GAP_PERIOD;
234 		}
235 
236 		break;
237 	case TX_IN_BURST_PERIOD:
238 		if (rnd < clg->a5) {
239 			clg->state = LOST_IN_BURST_PERIOD;
240 			return true;
241 		} else {
242 			clg->state = TX_IN_BURST_PERIOD;
243 		}
244 
245 		break;
246 	case LOST_IN_BURST_PERIOD:
247 		if (rnd < clg->a3)
248 			clg->state = TX_IN_BURST_PERIOD;
249 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
250 			clg->state = TX_IN_GAP_PERIOD;
251 		} else if (clg->a2 + clg->a3 < rnd) {
252 			clg->state = LOST_IN_BURST_PERIOD;
253 			return true;
254 		}
255 		break;
256 	case LOST_IN_GAP_PERIOD:
257 		clg->state = TX_IN_GAP_PERIOD;
258 		break;
259 	}
260 
261 	return false;
262 }
263 
264 /* loss_gilb_ell - Gilbert-Elliot model loss generator
265  * Generates losses according to the Gilbert-Elliot loss model or
266  * its special cases  (Gilbert or Simple Gilbert)
267  *
268  * Makes a comparison between random number and the transition
269  * probabilities outgoing from the current state, then decides the
270  * next state. A second random number is extracted and the comparison
271  * with the loss probability of the current state decides if the next
272  * packet will be transmitted or lost.
273  */
loss_gilb_ell(struct netem_sched_data * q)274 static bool loss_gilb_ell(struct netem_sched_data *q)
275 {
276 	struct clgstate *clg = &q->clg;
277 	struct rnd_state *s = &q->prng.prng_state;
278 
279 	switch (clg->state) {
280 	case GOOD_STATE:
281 		if (prandom_u32_state(s) < clg->a1)
282 			clg->state = BAD_STATE;
283 		if (prandom_u32_state(s) < clg->a4)
284 			return true;
285 		break;
286 	case BAD_STATE:
287 		if (prandom_u32_state(s) < clg->a2)
288 			clg->state = GOOD_STATE;
289 		if (prandom_u32_state(s) > clg->a3)
290 			return true;
291 	}
292 
293 	return false;
294 }
295 
loss_event(struct netem_sched_data * q)296 static bool loss_event(struct netem_sched_data *q)
297 {
298 	switch (q->loss_model) {
299 	case CLG_RANDOM:
300 		/* Random packet drop 0 => none, ~0 => all */
301 		return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng);
302 
303 	case CLG_4_STATES:
304 		/* 4state loss model algorithm (used also for GI model)
305 		* Extracts a value from the markov 4 state loss generator,
306 		* if it is 1 drops a packet and if needed writes the event in
307 		* the kernel logs
308 		*/
309 		return loss_4state(q);
310 
311 	case CLG_GILB_ELL:
312 		/* Gilbert-Elliot loss model algorithm
313 		* Extracts a value from the Gilbert-Elliot loss generator,
314 		* if it is 1 drops a packet and if needed writes the event in
315 		* the kernel logs
316 		*/
317 		return loss_gilb_ell(q);
318 	}
319 
320 	return false;	/* not reached */
321 }
322 
323 
324 /* tabledist - return a pseudo-randomly distributed value with mean mu and
325  * std deviation sigma.  Uses table lookup to approximate the desired
326  * distribution, and a uniformly-distributed pseudo-random source.
327  */
tabledist(s64 mu,s32 sigma,struct crndstate * state,struct prng * prng,const struct disttable * dist)328 static s64 tabledist(s64 mu, s32 sigma,
329 		     struct crndstate *state,
330 		     struct prng *prng,
331 		     const struct disttable *dist)
332 {
333 	s64 x;
334 	long t;
335 	u32 rnd;
336 
337 	if (sigma == 0)
338 		return mu;
339 
340 	rnd = get_crandom(state, prng);
341 
342 	/* default uniform distribution */
343 	if (dist == NULL)
344 		return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
345 
346 	t = dist->table[rnd % dist->size];
347 	x = (sigma % NETEM_DIST_SCALE) * t;
348 	if (x >= 0)
349 		x += NETEM_DIST_SCALE/2;
350 	else
351 		x -= NETEM_DIST_SCALE/2;
352 
353 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
354 }
355 
packet_time_ns(u64 len,const struct netem_sched_data * q)356 static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
357 {
358 	len += q->packet_overhead;
359 
360 	if (q->cell_size) {
361 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
362 
363 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
364 			cells++;
365 		len = cells * (q->cell_size + q->cell_overhead);
366 	}
367 
368 	return div64_u64(len * NSEC_PER_SEC, q->rate);
369 }
370 
tfifo_reset(struct Qdisc * sch)371 static void tfifo_reset(struct Qdisc *sch)
372 {
373 	struct netem_sched_data *q = qdisc_priv(sch);
374 	struct rb_node *p = rb_first(&q->t_root);
375 
376 	while (p) {
377 		struct sk_buff *skb = rb_to_skb(p);
378 
379 		p = rb_next(p);
380 		rb_erase(&skb->rbnode, &q->t_root);
381 		rtnl_kfree_skbs(skb, skb);
382 	}
383 
384 	rtnl_kfree_skbs(q->t_head, q->t_tail);
385 	q->t_head = NULL;
386 	q->t_tail = NULL;
387 	q->t_len = 0;
388 }
389 
tfifo_enqueue(struct sk_buff * nskb,struct Qdisc * sch)390 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
391 {
392 	struct netem_sched_data *q = qdisc_priv(sch);
393 	u64 tnext = netem_skb_cb(nskb)->time_to_send;
394 
395 	if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
396 		if (q->t_tail)
397 			q->t_tail->next = nskb;
398 		else
399 			q->t_head = nskb;
400 		q->t_tail = nskb;
401 	} else {
402 		struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
403 
404 		while (*p) {
405 			struct sk_buff *skb;
406 
407 			parent = *p;
408 			skb = rb_to_skb(parent);
409 			if (tnext >= netem_skb_cb(skb)->time_to_send)
410 				p = &parent->rb_right;
411 			else
412 				p = &parent->rb_left;
413 		}
414 		rb_link_node(&nskb->rbnode, parent, p);
415 		rb_insert_color(&nskb->rbnode, &q->t_root);
416 	}
417 	q->t_len++;
418 	sch->q.qlen++;
419 }
420 
421 /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
422  * when we statistically choose to corrupt one, we instead segment it, returning
423  * the first packet to be corrupted, and re-enqueue the remaining frames
424  */
netem_segment(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)425 static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
426 				     struct sk_buff **to_free)
427 {
428 	struct sk_buff *segs;
429 	netdev_features_t features = netif_skb_features(skb);
430 
431 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
432 
433 	if (IS_ERR_OR_NULL(segs)) {
434 		qdisc_drop(skb, sch, to_free);
435 		return NULL;
436 	}
437 	consume_skb(skb);
438 	return segs;
439 }
440 
441 /*
442  * Insert one skb into qdisc.
443  * Note: parent depends on return value to account for queue length.
444  * 	NET_XMIT_DROP: queue length didn't change.
445  *      NET_XMIT_SUCCESS: one skb was queued.
446  */
netem_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)447 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
448 			 struct sk_buff **to_free)
449 {
450 	struct netem_sched_data *q = qdisc_priv(sch);
451 	/* We don't fill cb now as skb_unshare() may invalidate it */
452 	struct netem_skb_cb *cb;
453 	struct sk_buff *skb2 = NULL;
454 	struct sk_buff *segs = NULL;
455 	unsigned int prev_len = qdisc_pkt_len(skb);
456 	int count = 1;
457 
458 	/* Do not fool qdisc_drop_all() */
459 	skb->prev = NULL;
460 
461 	/* Random duplication */
462 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng))
463 		++count;
464 
465 	/* Drop packet? */
466 	if (loss_event(q)) {
467 		if (q->ecn && INET_ECN_set_ce(skb))
468 			qdisc_qstats_drop(sch); /* mark packet */
469 		else
470 			--count;
471 	}
472 	if (count == 0) {
473 		qdisc_qstats_drop(sch);
474 		__qdisc_drop(skb, to_free);
475 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
476 	}
477 
478 	/* If a delay is expected, orphan the skb. (orphaning usually takes
479 	 * place at TX completion time, so _before_ the link transit delay)
480 	 */
481 	if (q->latency || q->jitter || q->rate)
482 		skb_orphan_partial(skb);
483 
484 	/*
485 	 * If we need to duplicate packet, then clone it before
486 	 * original is modified.
487 	 */
488 	if (count > 1)
489 		skb2 = skb_clone(skb, GFP_ATOMIC);
490 
491 	/*
492 	 * Randomized packet corruption.
493 	 * Make copy if needed since we are modifying
494 	 * If packet is going to be hardware checksummed, then
495 	 * do it now in software before we mangle it.
496 	 */
497 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) {
498 		if (skb_is_gso(skb)) {
499 			skb = netem_segment(skb, sch, to_free);
500 			if (!skb)
501 				goto finish_segs;
502 
503 			segs = skb->next;
504 			skb_mark_not_on_list(skb);
505 			qdisc_skb_cb(skb)->pkt_len = skb->len;
506 		}
507 
508 		skb = skb_unshare(skb, GFP_ATOMIC);
509 		if (unlikely(!skb)) {
510 			qdisc_qstats_drop(sch);
511 			goto finish_segs;
512 		}
513 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
514 		    skb_checksum_help(skb)) {
515 			qdisc_drop(skb, sch, to_free);
516 			skb = NULL;
517 			goto finish_segs;
518 		}
519 
520 		skb->data[get_random_u32_below(skb_headlen(skb))] ^=
521 			1<<get_random_u32_below(8);
522 	}
523 
524 	if (unlikely(q->t_len >= sch->limit)) {
525 		/* re-link segs, so that qdisc_drop_all() frees them all */
526 		skb->next = segs;
527 		qdisc_drop_all(skb, sch, to_free);
528 		if (skb2)
529 			__qdisc_drop(skb2, to_free);
530 		return NET_XMIT_DROP;
531 	}
532 
533 	/*
534 	 * If doing duplication then re-insert at top of the
535 	 * qdisc tree, since parent queuer expects that only one
536 	 * skb will be queued.
537 	 */
538 	if (skb2) {
539 		struct Qdisc *rootq = qdisc_root_bh(sch);
540 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
541 
542 		q->duplicate = 0;
543 		rootq->enqueue(skb2, rootq, to_free);
544 		q->duplicate = dupsave;
545 		skb2 = NULL;
546 	}
547 
548 	qdisc_qstats_backlog_inc(sch, skb);
549 
550 	cb = netem_skb_cb(skb);
551 	if (q->gap == 0 ||		/* not doing reordering */
552 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
553 	    q->reorder < get_crandom(&q->reorder_cor, &q->prng)) {
554 		u64 now;
555 		s64 delay;
556 
557 		delay = tabledist(q->latency, q->jitter,
558 				  &q->delay_cor, &q->prng, q->delay_dist);
559 
560 		now = ktime_get_ns();
561 
562 		if (q->rate) {
563 			struct netem_skb_cb *last = NULL;
564 
565 			if (sch->q.tail)
566 				last = netem_skb_cb(sch->q.tail);
567 			if (q->t_root.rb_node) {
568 				struct sk_buff *t_skb;
569 				struct netem_skb_cb *t_last;
570 
571 				t_skb = skb_rb_last(&q->t_root);
572 				t_last = netem_skb_cb(t_skb);
573 				if (!last ||
574 				    t_last->time_to_send > last->time_to_send)
575 					last = t_last;
576 			}
577 			if (q->t_tail) {
578 				struct netem_skb_cb *t_last =
579 					netem_skb_cb(q->t_tail);
580 
581 				if (!last ||
582 				    t_last->time_to_send > last->time_to_send)
583 					last = t_last;
584 			}
585 
586 			if (last) {
587 				/*
588 				 * Last packet in queue is reference point (now),
589 				 * calculate this time bonus and subtract
590 				 * from delay.
591 				 */
592 				delay -= last->time_to_send - now;
593 				delay = max_t(s64, 0, delay);
594 				now = last->time_to_send;
595 			}
596 
597 			delay += packet_time_ns(qdisc_pkt_len(skb), q);
598 		}
599 
600 		cb->time_to_send = now + delay;
601 		++q->counter;
602 		tfifo_enqueue(skb, sch);
603 	} else {
604 		/*
605 		 * Do re-ordering by putting one out of N packets at the front
606 		 * of the queue.
607 		 */
608 		cb->time_to_send = ktime_get_ns();
609 		q->counter = 0;
610 
611 		__qdisc_enqueue_head(skb, &sch->q);
612 		sch->qstats.requeues++;
613 	}
614 
615 finish_segs:
616 	if (skb2)
617 		__qdisc_drop(skb2, to_free);
618 
619 	if (segs) {
620 		unsigned int len, last_len;
621 		int rc, nb;
622 
623 		len = skb ? skb->len : 0;
624 		nb = skb ? 1 : 0;
625 
626 		while (segs) {
627 			skb2 = segs->next;
628 			skb_mark_not_on_list(segs);
629 			qdisc_skb_cb(segs)->pkt_len = segs->len;
630 			last_len = segs->len;
631 			rc = qdisc_enqueue(segs, sch, to_free);
632 			if (rc != NET_XMIT_SUCCESS) {
633 				if (net_xmit_drop_count(rc))
634 					qdisc_qstats_drop(sch);
635 			} else {
636 				nb++;
637 				len += last_len;
638 			}
639 			segs = skb2;
640 		}
641 		/* Parent qdiscs accounted for 1 skb of size @prev_len */
642 		qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
643 	} else if (!skb) {
644 		return NET_XMIT_DROP;
645 	}
646 	return NET_XMIT_SUCCESS;
647 }
648 
649 /* Delay the next round with a new future slot with a
650  * correct number of bytes and packets.
651  */
652 
get_slot_next(struct netem_sched_data * q,u64 now)653 static void get_slot_next(struct netem_sched_data *q, u64 now)
654 {
655 	s64 next_delay;
656 
657 	if (!q->slot_dist)
658 		next_delay = q->slot_config.min_delay +
659 				(get_random_u32() *
660 				 (q->slot_config.max_delay -
661 				  q->slot_config.min_delay) >> 32);
662 	else
663 		next_delay = tabledist(q->slot_config.dist_delay,
664 				       (s32)(q->slot_config.dist_jitter),
665 				       NULL, &q->prng, q->slot_dist);
666 
667 	q->slot.slot_next = now + next_delay;
668 	q->slot.packets_left = q->slot_config.max_packets;
669 	q->slot.bytes_left = q->slot_config.max_bytes;
670 }
671 
netem_peek(struct netem_sched_data * q)672 static struct sk_buff *netem_peek(struct netem_sched_data *q)
673 {
674 	struct sk_buff *skb = skb_rb_first(&q->t_root);
675 	u64 t1, t2;
676 
677 	if (!skb)
678 		return q->t_head;
679 	if (!q->t_head)
680 		return skb;
681 
682 	t1 = netem_skb_cb(skb)->time_to_send;
683 	t2 = netem_skb_cb(q->t_head)->time_to_send;
684 	if (t1 < t2)
685 		return skb;
686 	return q->t_head;
687 }
688 
netem_erase_head(struct netem_sched_data * q,struct sk_buff * skb)689 static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
690 {
691 	if (skb == q->t_head) {
692 		q->t_head = skb->next;
693 		if (!q->t_head)
694 			q->t_tail = NULL;
695 	} else {
696 		rb_erase(&skb->rbnode, &q->t_root);
697 	}
698 }
699 
netem_dequeue(struct Qdisc * sch)700 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
701 {
702 	struct netem_sched_data *q = qdisc_priv(sch);
703 	struct sk_buff *skb;
704 
705 tfifo_dequeue:
706 	skb = __qdisc_dequeue_head(&sch->q);
707 	if (skb) {
708 deliver:
709 		qdisc_qstats_backlog_dec(sch, skb);
710 		qdisc_bstats_update(sch, skb);
711 		return skb;
712 	}
713 	skb = netem_peek(q);
714 	if (skb) {
715 		u64 time_to_send;
716 		u64 now = ktime_get_ns();
717 
718 		/* if more time remaining? */
719 		time_to_send = netem_skb_cb(skb)->time_to_send;
720 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
721 			get_slot_next(q, now);
722 
723 		if (time_to_send <= now && q->slot.slot_next <= now) {
724 			netem_erase_head(q, skb);
725 			q->t_len--;
726 			skb->next = NULL;
727 			skb->prev = NULL;
728 			/* skb->dev shares skb->rbnode area,
729 			 * we need to restore its value.
730 			 */
731 			skb->dev = qdisc_dev(sch);
732 
733 			if (q->slot.slot_next) {
734 				q->slot.packets_left--;
735 				q->slot.bytes_left -= qdisc_pkt_len(skb);
736 				if (q->slot.packets_left <= 0 ||
737 				    q->slot.bytes_left <= 0)
738 					get_slot_next(q, now);
739 			}
740 
741 			if (q->qdisc) {
742 				unsigned int pkt_len = qdisc_pkt_len(skb);
743 				struct sk_buff *to_free = NULL;
744 				int err;
745 
746 				err = qdisc_enqueue(skb, q->qdisc, &to_free);
747 				kfree_skb_list(to_free);
748 				if (err != NET_XMIT_SUCCESS) {
749 					if (net_xmit_drop_count(err))
750 						qdisc_qstats_drop(sch);
751 					qdisc_tree_reduce_backlog(sch, 1, pkt_len);
752 					sch->qstats.backlog -= pkt_len;
753 					sch->q.qlen--;
754 				}
755 				goto tfifo_dequeue;
756 			}
757 			sch->q.qlen--;
758 			goto deliver;
759 		}
760 
761 		if (q->qdisc) {
762 			skb = q->qdisc->ops->dequeue(q->qdisc);
763 			if (skb) {
764 				sch->q.qlen--;
765 				goto deliver;
766 			}
767 		}
768 
769 		qdisc_watchdog_schedule_ns(&q->watchdog,
770 					   max(time_to_send,
771 					       q->slot.slot_next));
772 	}
773 
774 	if (q->qdisc) {
775 		skb = q->qdisc->ops->dequeue(q->qdisc);
776 		if (skb) {
777 			sch->q.qlen--;
778 			goto deliver;
779 		}
780 	}
781 	return NULL;
782 }
783 
netem_reset(struct Qdisc * sch)784 static void netem_reset(struct Qdisc *sch)
785 {
786 	struct netem_sched_data *q = qdisc_priv(sch);
787 
788 	qdisc_reset_queue(sch);
789 	tfifo_reset(sch);
790 	if (q->qdisc)
791 		qdisc_reset(q->qdisc);
792 	qdisc_watchdog_cancel(&q->watchdog);
793 }
794 
dist_free(struct disttable * d)795 static void dist_free(struct disttable *d)
796 {
797 	kvfree(d);
798 }
799 
800 /*
801  * Distribution data is a variable size payload containing
802  * signed 16 bit values.
803  */
804 
get_dist_table(struct disttable ** tbl,const struct nlattr * attr)805 static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
806 {
807 	size_t n = nla_len(attr)/sizeof(__s16);
808 	const __s16 *data = nla_data(attr);
809 	struct disttable *d;
810 	int i;
811 
812 	if (!n || n > NETEM_DIST_MAX)
813 		return -EINVAL;
814 
815 	d = kvmalloc(struct_size(d, table, n), GFP_KERNEL);
816 	if (!d)
817 		return -ENOMEM;
818 
819 	d->size = n;
820 	for (i = 0; i < n; i++)
821 		d->table[i] = data[i];
822 
823 	*tbl = d;
824 	return 0;
825 }
826 
get_slot(struct netem_sched_data * q,const struct nlattr * attr)827 static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
828 {
829 	const struct tc_netem_slot *c = nla_data(attr);
830 
831 	q->slot_config = *c;
832 	if (q->slot_config.max_packets == 0)
833 		q->slot_config.max_packets = INT_MAX;
834 	if (q->slot_config.max_bytes == 0)
835 		q->slot_config.max_bytes = INT_MAX;
836 
837 	/* capping dist_jitter to the range acceptable by tabledist() */
838 	q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
839 
840 	q->slot.packets_left = q->slot_config.max_packets;
841 	q->slot.bytes_left = q->slot_config.max_bytes;
842 	if (q->slot_config.min_delay | q->slot_config.max_delay |
843 	    q->slot_config.dist_jitter)
844 		q->slot.slot_next = ktime_get_ns();
845 	else
846 		q->slot.slot_next = 0;
847 }
848 
get_correlation(struct netem_sched_data * q,const struct nlattr * attr)849 static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
850 {
851 	const struct tc_netem_corr *c = nla_data(attr);
852 
853 	init_crandom(&q->delay_cor, c->delay_corr);
854 	init_crandom(&q->loss_cor, c->loss_corr);
855 	init_crandom(&q->dup_cor, c->dup_corr);
856 }
857 
get_reorder(struct netem_sched_data * q,const struct nlattr * attr)858 static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
859 {
860 	const struct tc_netem_reorder *r = nla_data(attr);
861 
862 	q->reorder = r->probability;
863 	init_crandom(&q->reorder_cor, r->correlation);
864 }
865 
get_corrupt(struct netem_sched_data * q,const struct nlattr * attr)866 static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
867 {
868 	const struct tc_netem_corrupt *r = nla_data(attr);
869 
870 	q->corrupt = r->probability;
871 	init_crandom(&q->corrupt_cor, r->correlation);
872 }
873 
get_rate(struct netem_sched_data * q,const struct nlattr * attr)874 static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
875 {
876 	const struct tc_netem_rate *r = nla_data(attr);
877 
878 	q->rate = r->rate;
879 	q->packet_overhead = r->packet_overhead;
880 	q->cell_size = r->cell_size;
881 	q->cell_overhead = r->cell_overhead;
882 	if (q->cell_size)
883 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
884 	else
885 		q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
886 }
887 
get_loss_clg(struct netem_sched_data * q,const struct nlattr * attr)888 static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
889 {
890 	const struct nlattr *la;
891 	int rem;
892 
893 	nla_for_each_nested(la, attr, rem) {
894 		u16 type = nla_type(la);
895 
896 		switch (type) {
897 		case NETEM_LOSS_GI: {
898 			const struct tc_netem_gimodel *gi = nla_data(la);
899 
900 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
901 				pr_info("netem: incorrect gi model size\n");
902 				return -EINVAL;
903 			}
904 
905 			q->loss_model = CLG_4_STATES;
906 
907 			q->clg.state = TX_IN_GAP_PERIOD;
908 			q->clg.a1 = gi->p13;
909 			q->clg.a2 = gi->p31;
910 			q->clg.a3 = gi->p32;
911 			q->clg.a4 = gi->p14;
912 			q->clg.a5 = gi->p23;
913 			break;
914 		}
915 
916 		case NETEM_LOSS_GE: {
917 			const struct tc_netem_gemodel *ge = nla_data(la);
918 
919 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
920 				pr_info("netem: incorrect ge model size\n");
921 				return -EINVAL;
922 			}
923 
924 			q->loss_model = CLG_GILB_ELL;
925 			q->clg.state = GOOD_STATE;
926 			q->clg.a1 = ge->p;
927 			q->clg.a2 = ge->r;
928 			q->clg.a3 = ge->h;
929 			q->clg.a4 = ge->k1;
930 			break;
931 		}
932 
933 		default:
934 			pr_info("netem: unknown loss type %u\n", type);
935 			return -EINVAL;
936 		}
937 	}
938 
939 	return 0;
940 }
941 
942 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
943 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
944 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
945 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
946 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
947 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
948 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
949 	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
950 	[TCA_NETEM_LATENCY64]	= { .type = NLA_S64 },
951 	[TCA_NETEM_JITTER64]	= { .type = NLA_S64 },
952 	[TCA_NETEM_SLOT]	= { .len = sizeof(struct tc_netem_slot) },
953 	[TCA_NETEM_PRNG_SEED]	= { .type = NLA_U64 },
954 };
955 
parse_attr(struct nlattr * tb[],int maxtype,struct nlattr * nla,const struct nla_policy * policy,int len)956 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
957 		      const struct nla_policy *policy, int len)
958 {
959 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
960 
961 	if (nested_len < 0) {
962 		pr_info("netem: invalid attributes len %d\n", nested_len);
963 		return -EINVAL;
964 	}
965 
966 	if (nested_len >= nla_attr_size(0))
967 		return nla_parse_deprecated(tb, maxtype,
968 					    nla_data(nla) + NLA_ALIGN(len),
969 					    nested_len, policy, NULL);
970 
971 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
972 	return 0;
973 }
974 
975 /* Parse netlink message to set options */
netem_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)976 static int netem_change(struct Qdisc *sch, struct nlattr *opt,
977 			struct netlink_ext_ack *extack)
978 {
979 	struct netem_sched_data *q = qdisc_priv(sch);
980 	struct nlattr *tb[TCA_NETEM_MAX + 1];
981 	struct disttable *delay_dist = NULL;
982 	struct disttable *slot_dist = NULL;
983 	struct tc_netem_qopt *qopt;
984 	struct clgstate old_clg;
985 	int old_loss_model = CLG_RANDOM;
986 	int ret;
987 
988 	qopt = nla_data(opt);
989 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
990 	if (ret < 0)
991 		return ret;
992 
993 	if (tb[TCA_NETEM_DELAY_DIST]) {
994 		ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]);
995 		if (ret)
996 			goto table_free;
997 	}
998 
999 	if (tb[TCA_NETEM_SLOT_DIST]) {
1000 		ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]);
1001 		if (ret)
1002 			goto table_free;
1003 	}
1004 
1005 	sch_tree_lock(sch);
1006 	/* backup q->clg and q->loss_model */
1007 	old_clg = q->clg;
1008 	old_loss_model = q->loss_model;
1009 
1010 	if (tb[TCA_NETEM_LOSS]) {
1011 		ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
1012 		if (ret) {
1013 			q->loss_model = old_loss_model;
1014 			q->clg = old_clg;
1015 			goto unlock;
1016 		}
1017 	} else {
1018 		q->loss_model = CLG_RANDOM;
1019 	}
1020 
1021 	if (delay_dist)
1022 		swap(q->delay_dist, delay_dist);
1023 	if (slot_dist)
1024 		swap(q->slot_dist, slot_dist);
1025 	sch->limit = qopt->limit;
1026 
1027 	q->latency = PSCHED_TICKS2NS(qopt->latency);
1028 	q->jitter = PSCHED_TICKS2NS(qopt->jitter);
1029 	q->limit = qopt->limit;
1030 	q->gap = qopt->gap;
1031 	q->counter = 0;
1032 	q->loss = qopt->loss;
1033 	q->duplicate = qopt->duplicate;
1034 
1035 	/* for compatibility with earlier versions.
1036 	 * if gap is set, need to assume 100% probability
1037 	 */
1038 	if (q->gap)
1039 		q->reorder = ~0;
1040 
1041 	if (tb[TCA_NETEM_CORR])
1042 		get_correlation(q, tb[TCA_NETEM_CORR]);
1043 
1044 	if (tb[TCA_NETEM_REORDER])
1045 		get_reorder(q, tb[TCA_NETEM_REORDER]);
1046 
1047 	if (tb[TCA_NETEM_CORRUPT])
1048 		get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
1049 
1050 	if (tb[TCA_NETEM_RATE])
1051 		get_rate(q, tb[TCA_NETEM_RATE]);
1052 
1053 	if (tb[TCA_NETEM_RATE64])
1054 		q->rate = max_t(u64, q->rate,
1055 				nla_get_u64(tb[TCA_NETEM_RATE64]));
1056 
1057 	if (tb[TCA_NETEM_LATENCY64])
1058 		q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
1059 
1060 	if (tb[TCA_NETEM_JITTER64])
1061 		q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
1062 
1063 	if (tb[TCA_NETEM_ECN])
1064 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1065 
1066 	if (tb[TCA_NETEM_SLOT])
1067 		get_slot(q, tb[TCA_NETEM_SLOT]);
1068 
1069 	/* capping jitter to the range acceptable by tabledist() */
1070 	q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
1071 
1072 	if (tb[TCA_NETEM_PRNG_SEED])
1073 		q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
1074 	else
1075 		q->prng.seed = get_random_u64();
1076 	prandom_seed_state(&q->prng.prng_state, q->prng.seed);
1077 
1078 unlock:
1079 	sch_tree_unlock(sch);
1080 
1081 table_free:
1082 	dist_free(delay_dist);
1083 	dist_free(slot_dist);
1084 	return ret;
1085 }
1086 
netem_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)1087 static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1088 		      struct netlink_ext_ack *extack)
1089 {
1090 	struct netem_sched_data *q = qdisc_priv(sch);
1091 	int ret;
1092 
1093 	qdisc_watchdog_init(&q->watchdog, sch);
1094 
1095 	if (!opt)
1096 		return -EINVAL;
1097 
1098 	q->loss_model = CLG_RANDOM;
1099 	ret = netem_change(sch, opt, extack);
1100 	if (ret)
1101 		pr_info("netem: change failed\n");
1102 	return ret;
1103 }
1104 
netem_destroy(struct Qdisc * sch)1105 static void netem_destroy(struct Qdisc *sch)
1106 {
1107 	struct netem_sched_data *q = qdisc_priv(sch);
1108 
1109 	qdisc_watchdog_cancel(&q->watchdog);
1110 	if (q->qdisc)
1111 		qdisc_put(q->qdisc);
1112 	dist_free(q->delay_dist);
1113 	dist_free(q->slot_dist);
1114 }
1115 
dump_loss_model(const struct netem_sched_data * q,struct sk_buff * skb)1116 static int dump_loss_model(const struct netem_sched_data *q,
1117 			   struct sk_buff *skb)
1118 {
1119 	struct nlattr *nest;
1120 
1121 	nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
1122 	if (nest == NULL)
1123 		goto nla_put_failure;
1124 
1125 	switch (q->loss_model) {
1126 	case CLG_RANDOM:
1127 		/* legacy loss model */
1128 		nla_nest_cancel(skb, nest);
1129 		return 0;	/* no data */
1130 
1131 	case CLG_4_STATES: {
1132 		struct tc_netem_gimodel gi = {
1133 			.p13 = q->clg.a1,
1134 			.p31 = q->clg.a2,
1135 			.p32 = q->clg.a3,
1136 			.p14 = q->clg.a4,
1137 			.p23 = q->clg.a5,
1138 		};
1139 
1140 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
1141 			goto nla_put_failure;
1142 		break;
1143 	}
1144 	case CLG_GILB_ELL: {
1145 		struct tc_netem_gemodel ge = {
1146 			.p = q->clg.a1,
1147 			.r = q->clg.a2,
1148 			.h = q->clg.a3,
1149 			.k1 = q->clg.a4,
1150 		};
1151 
1152 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
1153 			goto nla_put_failure;
1154 		break;
1155 	}
1156 	}
1157 
1158 	nla_nest_end(skb, nest);
1159 	return 0;
1160 
1161 nla_put_failure:
1162 	nla_nest_cancel(skb, nest);
1163 	return -1;
1164 }
1165 
netem_dump(struct Qdisc * sch,struct sk_buff * skb)1166 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1167 {
1168 	const struct netem_sched_data *q = qdisc_priv(sch);
1169 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1170 	struct tc_netem_qopt qopt;
1171 	struct tc_netem_corr cor;
1172 	struct tc_netem_reorder reorder;
1173 	struct tc_netem_corrupt corrupt;
1174 	struct tc_netem_rate rate;
1175 	struct tc_netem_slot slot;
1176 
1177 	qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
1178 			     UINT_MAX);
1179 	qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
1180 			    UINT_MAX);
1181 	qopt.limit = q->limit;
1182 	qopt.loss = q->loss;
1183 	qopt.gap = q->gap;
1184 	qopt.duplicate = q->duplicate;
1185 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1186 		goto nla_put_failure;
1187 
1188 	if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
1189 		goto nla_put_failure;
1190 
1191 	if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
1192 		goto nla_put_failure;
1193 
1194 	cor.delay_corr = q->delay_cor.rho;
1195 	cor.loss_corr = q->loss_cor.rho;
1196 	cor.dup_corr = q->dup_cor.rho;
1197 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
1198 		goto nla_put_failure;
1199 
1200 	reorder.probability = q->reorder;
1201 	reorder.correlation = q->reorder_cor.rho;
1202 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
1203 		goto nla_put_failure;
1204 
1205 	corrupt.probability = q->corrupt;
1206 	corrupt.correlation = q->corrupt_cor.rho;
1207 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
1208 		goto nla_put_failure;
1209 
1210 	if (q->rate >= (1ULL << 32)) {
1211 		if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
1212 				      TCA_NETEM_PAD))
1213 			goto nla_put_failure;
1214 		rate.rate = ~0U;
1215 	} else {
1216 		rate.rate = q->rate;
1217 	}
1218 	rate.packet_overhead = q->packet_overhead;
1219 	rate.cell_size = q->cell_size;
1220 	rate.cell_overhead = q->cell_overhead;
1221 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
1222 		goto nla_put_failure;
1223 
1224 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1225 		goto nla_put_failure;
1226 
1227 	if (dump_loss_model(q, skb) != 0)
1228 		goto nla_put_failure;
1229 
1230 	if (q->slot_config.min_delay | q->slot_config.max_delay |
1231 	    q->slot_config.dist_jitter) {
1232 		slot = q->slot_config;
1233 		if (slot.max_packets == INT_MAX)
1234 			slot.max_packets = 0;
1235 		if (slot.max_bytes == INT_MAX)
1236 			slot.max_bytes = 0;
1237 		if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1238 			goto nla_put_failure;
1239 	}
1240 
1241 	if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed,
1242 			      TCA_NETEM_PAD))
1243 		goto nla_put_failure;
1244 
1245 	return nla_nest_end(skb, nla);
1246 
1247 nla_put_failure:
1248 	nlmsg_trim(skb, nla);
1249 	return -1;
1250 }
1251 
netem_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)1252 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
1253 			  struct sk_buff *skb, struct tcmsg *tcm)
1254 {
1255 	struct netem_sched_data *q = qdisc_priv(sch);
1256 
1257 	if (cl != 1 || !q->qdisc) 	/* only one class */
1258 		return -ENOENT;
1259 
1260 	tcm->tcm_handle |= TC_H_MIN(1);
1261 	tcm->tcm_info = q->qdisc->handle;
1262 
1263 	return 0;
1264 }
1265 
netem_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)1266 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1267 		     struct Qdisc **old, struct netlink_ext_ack *extack)
1268 {
1269 	struct netem_sched_data *q = qdisc_priv(sch);
1270 
1271 	*old = qdisc_replace(sch, new, &q->qdisc);
1272 	return 0;
1273 }
1274 
netem_leaf(struct Qdisc * sch,unsigned long arg)1275 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
1276 {
1277 	struct netem_sched_data *q = qdisc_priv(sch);
1278 	return q->qdisc;
1279 }
1280 
netem_find(struct Qdisc * sch,u32 classid)1281 static unsigned long netem_find(struct Qdisc *sch, u32 classid)
1282 {
1283 	return 1;
1284 }
1285 
netem_walk(struct Qdisc * sch,struct qdisc_walker * walker)1286 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
1287 {
1288 	if (!walker->stop) {
1289 		if (!tc_qdisc_stats_dump(sch, 1, walker))
1290 			return;
1291 	}
1292 }
1293 
1294 static const struct Qdisc_class_ops netem_class_ops = {
1295 	.graft		=	netem_graft,
1296 	.leaf		=	netem_leaf,
1297 	.find		=	netem_find,
1298 	.walk		=	netem_walk,
1299 	.dump		=	netem_dump_class,
1300 };
1301 
1302 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1303 	.id		=	"netem",
1304 	.cl_ops		=	&netem_class_ops,
1305 	.priv_size	=	sizeof(struct netem_sched_data),
1306 	.enqueue	=	netem_enqueue,
1307 	.dequeue	=	netem_dequeue,
1308 	.peek		=	qdisc_peek_dequeued,
1309 	.init		=	netem_init,
1310 	.reset		=	netem_reset,
1311 	.destroy	=	netem_destroy,
1312 	.change		=	netem_change,
1313 	.dump		=	netem_dump,
1314 	.owner		=	THIS_MODULE,
1315 };
1316 
1317 
netem_module_init(void)1318 static int __init netem_module_init(void)
1319 {
1320 	pr_info("netem: version " VERSION "\n");
1321 	return register_qdisc(&netem_qdisc_ops);
1322 }
netem_module_exit(void)1323 static void __exit netem_module_exit(void)
1324 {
1325 	unregister_qdisc(&netem_qdisc_ops);
1326 }
1327 module_init(netem_module_init)
1328 module_exit(netem_module_exit)
1329 MODULE_LICENSE("GPL");
1330