xref: /openbmc/linux/net/sched/sch_netem.c (revision e0ad032e)
184a14ae8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * net/sched/sch_netem.c	Network emulator
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *  		Many of the algorithms and ideas for this came from
61da177e4SLinus Torvalds  *		NIST Net which is not copyrighted.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * Authors:	Stephen Hemminger <shemminger@osdl.org>
91da177e4SLinus Torvalds  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
101da177e4SLinus Torvalds  */
111da177e4SLinus Torvalds 
12b7f080cfSAlexey Dobriyan #include <linux/mm.h>
131da177e4SLinus Torvalds #include <linux/module.h>
145a0e3ad6STejun Heo #include <linux/slab.h>
151da177e4SLinus Torvalds #include <linux/types.h>
161da177e4SLinus Torvalds #include <linux/kernel.h>
171da177e4SLinus Torvalds #include <linux/errno.h>
181da177e4SLinus Torvalds #include <linux/skbuff.h>
1978776d3fSDavid S. Miller #include <linux/vmalloc.h>
201da177e4SLinus Torvalds #include <linux/rtnetlink.h>
2190b41a1cSHagen Paul Pfeifer #include <linux/reciprocal_div.h>
22aec0a40aSEric Dumazet #include <linux/rbtree.h>
231da177e4SLinus Torvalds 
24dc5fc579SArnaldo Carvalho de Melo #include <net/netlink.h>
251da177e4SLinus Torvalds #include <net/pkt_sched.h>
26e4ae004bSEric Dumazet #include <net/inet_ecn.h>
271da177e4SLinus Torvalds 
28250a65f7Sstephen hemminger #define VERSION "1.3"
29eb229c4cSStephen Hemminger 
301da177e4SLinus Torvalds /*	Network Emulation Queuing algorithm.
311da177e4SLinus Torvalds 	====================================
321da177e4SLinus Torvalds 
331da177e4SLinus Torvalds 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
341da177e4SLinus Torvalds 		 Network Emulation Tool
351da177e4SLinus Torvalds 		 [2] Luigi Rizzo, DummyNet for FreeBSD
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	 ----------------------------------------------------------------
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds 	 This started out as a simple way to delay outgoing packets to
401da177e4SLinus Torvalds 	 test TCP but has grown to include most of the functionality
411da177e4SLinus Torvalds 	 of a full blown network emulator like NISTnet. It can delay
421da177e4SLinus Torvalds 	 packets and add random jitter (and correlation). The random
431da177e4SLinus Torvalds 	 distribution can be loaded from a table as well to provide
441da177e4SLinus Torvalds 	 normal, Pareto, or experimental curves. Packet loss,
451da177e4SLinus Torvalds 	 duplication, and reordering can also be emulated.
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds 	 This qdisc does not do classification that can be handled in
481da177e4SLinus Torvalds 	 layering other disciplines.  It does not need to do bandwidth
491da177e4SLinus Torvalds 	 control either since that can be handled by using token
501da177e4SLinus Torvalds 	 bucket or other rate control.
51661b7972Sstephen hemminger 
52661b7972Sstephen hemminger      Correlated Loss Generator models
53661b7972Sstephen hemminger 
54661b7972Sstephen hemminger 	Added generation of correlated loss according to the
55661b7972Sstephen hemminger 	"Gilbert-Elliot" model, a 4-state markov model.
56661b7972Sstephen hemminger 
57661b7972Sstephen hemminger 	References:
58661b7972Sstephen hemminger 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
59661b7972Sstephen hemminger 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
60661b7972Sstephen hemminger 	and intuitive loss model for packet networks and its implementation
61661b7972Sstephen hemminger 	in the Netem module in the Linux kernel", available in [1]
62661b7972Sstephen hemminger 
63661b7972Sstephen hemminger 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
64661b7972Sstephen hemminger 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
651da177e4SLinus Torvalds */
661da177e4SLinus Torvalds 
670a9fe5c3SYousuk Seung struct disttable {
680a9fe5c3SYousuk Seung 	u32  size;
690a9fe5c3SYousuk Seung 	s16 table[0];
700a9fe5c3SYousuk Seung };
710a9fe5c3SYousuk Seung 
721da177e4SLinus Torvalds struct netem_sched_data {
73aec0a40aSEric Dumazet 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
74aec0a40aSEric Dumazet 	struct rb_root t_root;
7550612537SEric Dumazet 
76d66280b1SPeter Oskolkov 	/* a linear queue; reduces rbtree rebalancing when jitter is low */
77d66280b1SPeter Oskolkov 	struct sk_buff	*t_head;
78d66280b1SPeter Oskolkov 	struct sk_buff	*t_tail;
79d66280b1SPeter Oskolkov 
8050612537SEric Dumazet 	/* optional qdisc for classful handling (NULL at netem init) */
811da177e4SLinus Torvalds 	struct Qdisc	*qdisc;
8250612537SEric Dumazet 
8359cb5c67SPatrick McHardy 	struct qdisc_watchdog watchdog;
841da177e4SLinus Torvalds 
85112f9cb6SDave Taht 	s64 latency;
86112f9cb6SDave Taht 	s64 jitter;
87b407621cSStephen Hemminger 
881da177e4SLinus Torvalds 	u32 loss;
89e4ae004bSEric Dumazet 	u32 ecn;
901da177e4SLinus Torvalds 	u32 limit;
911da177e4SLinus Torvalds 	u32 counter;
921da177e4SLinus Torvalds 	u32 gap;
931da177e4SLinus Torvalds 	u32 duplicate;
940dca51d3SStephen Hemminger 	u32 reorder;
95c865e5d9SStephen Hemminger 	u32 corrupt;
966a031f67SYang Yingliang 	u64 rate;
9790b41a1cSHagen Paul Pfeifer 	s32 packet_overhead;
9890b41a1cSHagen Paul Pfeifer 	u32 cell_size;
99809fa972SHannes Frederic Sowa 	struct reciprocal_value cell_size_reciprocal;
10090b41a1cSHagen Paul Pfeifer 	s32 cell_overhead;
1011da177e4SLinus Torvalds 
1021da177e4SLinus Torvalds 	struct crndstate {
103b407621cSStephen Hemminger 		u32 last;
104b407621cSStephen Hemminger 		u32 rho;
105c865e5d9SStephen Hemminger 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1061da177e4SLinus Torvalds 
1070a9fe5c3SYousuk Seung 	struct disttable *delay_dist;
108661b7972Sstephen hemminger 
109661b7972Sstephen hemminger 	enum  {
110661b7972Sstephen hemminger 		CLG_RANDOM,
111661b7972Sstephen hemminger 		CLG_4_STATES,
112661b7972Sstephen hemminger 		CLG_GILB_ELL,
113661b7972Sstephen hemminger 	} loss_model;
114661b7972Sstephen hemminger 
115a6e2fe17SYang Yingliang 	enum {
116a6e2fe17SYang Yingliang 		TX_IN_GAP_PERIOD = 1,
117a6e2fe17SYang Yingliang 		TX_IN_BURST_PERIOD,
118a6e2fe17SYang Yingliang 		LOST_IN_GAP_PERIOD,
119a6e2fe17SYang Yingliang 		LOST_IN_BURST_PERIOD,
120a6e2fe17SYang Yingliang 	} _4_state_model;
121a6e2fe17SYang Yingliang 
122c045a734SYang Yingliang 	enum {
123c045a734SYang Yingliang 		GOOD_STATE = 1,
124c045a734SYang Yingliang 		BAD_STATE,
125c045a734SYang Yingliang 	} GE_state_model;
126c045a734SYang Yingliang 
127661b7972Sstephen hemminger 	/* Correlated Loss Generation models */
128661b7972Sstephen hemminger 	struct clgstate {
129661b7972Sstephen hemminger 		/* state of the Markov chain */
130661b7972Sstephen hemminger 		u8 state;
131661b7972Sstephen hemminger 
132661b7972Sstephen hemminger 		/* 4-states and Gilbert-Elliot models */
133661b7972Sstephen hemminger 		u32 a1;	/* p13 for 4-states or p for GE */
134661b7972Sstephen hemminger 		u32 a2;	/* p31 for 4-states or r for GE */
135661b7972Sstephen hemminger 		u32 a3;	/* p32 for 4-states or h for GE */
136661b7972Sstephen hemminger 		u32 a4;	/* p14 for 4-states or 1-k for GE */
137661b7972Sstephen hemminger 		u32 a5; /* p23 used only in 4-states */
138661b7972Sstephen hemminger 	} clg;
139661b7972Sstephen hemminger 
140836af83bSDave Taht 	struct tc_netem_slot slot_config;
141836af83bSDave Taht 	struct slotstate {
142836af83bSDave Taht 		u64 slot_next;
143836af83bSDave Taht 		s32 packets_left;
144836af83bSDave Taht 		s32 bytes_left;
145836af83bSDave Taht 	} slot;
146836af83bSDave Taht 
1470a9fe5c3SYousuk Seung 	struct disttable *slot_dist;
1481da177e4SLinus Torvalds };
1491da177e4SLinus Torvalds 
15050612537SEric Dumazet /* Time stamp put into socket buffer control block
15150612537SEric Dumazet  * Only valid when skbs are in our internal t(ime)fifo queue.
15256b17425SEric Dumazet  *
15356b17425SEric Dumazet  * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
15456b17425SEric Dumazet  * and skb->next & skb->prev are scratch space for a qdisc,
15556b17425SEric Dumazet  * we save skb->tstamp value in skb->cb[] before destroying it.
15650612537SEric Dumazet  */
1571da177e4SLinus Torvalds struct netem_skb_cb {
158112f9cb6SDave Taht 	u64	        time_to_send;
1591da177e4SLinus Torvalds };
1601da177e4SLinus Torvalds 
1615f86173bSJussi Kivilinna static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
1625f86173bSJussi Kivilinna {
163aec0a40aSEric Dumazet 	/* we assume we can use skb next/prev/tstamp as storage for rb_node */
16416bda13dSDavid S. Miller 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
165175f9c1bSJussi Kivilinna 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
1665f86173bSJussi Kivilinna }
1675f86173bSJussi Kivilinna 
1681da177e4SLinus Torvalds /* init_crandom - initialize correlated random number generator
1691da177e4SLinus Torvalds  * Use entropy source for initial seed.
1701da177e4SLinus Torvalds  */
1711da177e4SLinus Torvalds static void init_crandom(struct crndstate *state, unsigned long rho)
1721da177e4SLinus Torvalds {
1731da177e4SLinus Torvalds 	state->rho = rho;
17463862b5bSAruna-Hewapathirane 	state->last = prandom_u32();
1751da177e4SLinus Torvalds }
1761da177e4SLinus Torvalds 
1771da177e4SLinus Torvalds /* get_crandom - correlated random number generator
1781da177e4SLinus Torvalds  * Next number depends on last value.
1791da177e4SLinus Torvalds  * rho is scaled to avoid floating point.
1801da177e4SLinus Torvalds  */
181b407621cSStephen Hemminger static u32 get_crandom(struct crndstate *state)
1821da177e4SLinus Torvalds {
1831da177e4SLinus Torvalds 	u64 value, rho;
1841da177e4SLinus Torvalds 	unsigned long answer;
1851da177e4SLinus Torvalds 
1860a9fe5c3SYousuk Seung 	if (!state || state->rho == 0)	/* no correlation */
18763862b5bSAruna-Hewapathirane 		return prandom_u32();
1881da177e4SLinus Torvalds 
18963862b5bSAruna-Hewapathirane 	value = prandom_u32();
1901da177e4SLinus Torvalds 	rho = (u64)state->rho + 1;
1911da177e4SLinus Torvalds 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
1921da177e4SLinus Torvalds 	state->last = answer;
1931da177e4SLinus Torvalds 	return answer;
1941da177e4SLinus Torvalds }
1951da177e4SLinus Torvalds 
196661b7972Sstephen hemminger /* loss_4state - 4-state model loss generator
197661b7972Sstephen hemminger  * Generates losses according to the 4-state Markov chain adopted in
198661b7972Sstephen hemminger  * the GI (General and Intuitive) loss model.
199661b7972Sstephen hemminger  */
200661b7972Sstephen hemminger static bool loss_4state(struct netem_sched_data *q)
201661b7972Sstephen hemminger {
202661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
20363862b5bSAruna-Hewapathirane 	u32 rnd = prandom_u32();
204661b7972Sstephen hemminger 
205661b7972Sstephen hemminger 	/*
20625985edcSLucas De Marchi 	 * Makes a comparison between rnd and the transition
207661b7972Sstephen hemminger 	 * probabilities outgoing from the current state, then decides the
208661b7972Sstephen hemminger 	 * next state and if the next packet has to be transmitted or lost.
209661b7972Sstephen hemminger 	 * The four states correspond to:
210a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
211a6e2fe17SYang Yingliang 	 *   LOST_IN_BURST_PERIOD => isolated losses within a gap period
212a6e2fe17SYang Yingliang 	 *   LOST_IN_GAP_PERIOD => lost packets within a burst period
213a6e2fe17SYang Yingliang 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period
214661b7972Sstephen hemminger 	 */
215661b7972Sstephen hemminger 	switch (clg->state) {
216a6e2fe17SYang Yingliang 	case TX_IN_GAP_PERIOD:
217661b7972Sstephen hemminger 		if (rnd < clg->a4) {
218a6e2fe17SYang Yingliang 			clg->state = LOST_IN_BURST_PERIOD;
219661b7972Sstephen hemminger 			return true;
220ab6c27beSstephen hemminger 		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
221a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
222661b7972Sstephen hemminger 			return true;
223a6e2fe17SYang Yingliang 		} else if (clg->a1 + clg->a4 < rnd) {
224a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
225a6e2fe17SYang Yingliang 		}
226661b7972Sstephen hemminger 
227661b7972Sstephen hemminger 		break;
228a6e2fe17SYang Yingliang 	case TX_IN_BURST_PERIOD:
229661b7972Sstephen hemminger 		if (rnd < clg->a5) {
230a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
231661b7972Sstephen hemminger 			return true;
232a6e2fe17SYang Yingliang 		} else {
233a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
234a6e2fe17SYang Yingliang 		}
235661b7972Sstephen hemminger 
236661b7972Sstephen hemminger 		break;
237a6e2fe17SYang Yingliang 	case LOST_IN_GAP_PERIOD:
238661b7972Sstephen hemminger 		if (rnd < clg->a3)
239a6e2fe17SYang Yingliang 			clg->state = TX_IN_BURST_PERIOD;
240661b7972Sstephen hemminger 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
241a6e2fe17SYang Yingliang 			clg->state = TX_IN_GAP_PERIOD;
242661b7972Sstephen hemminger 		} else if (clg->a2 + clg->a3 < rnd) {
243a6e2fe17SYang Yingliang 			clg->state = LOST_IN_GAP_PERIOD;
244661b7972Sstephen hemminger 			return true;
245661b7972Sstephen hemminger 		}
246661b7972Sstephen hemminger 		break;
247a6e2fe17SYang Yingliang 	case LOST_IN_BURST_PERIOD:
248a6e2fe17SYang Yingliang 		clg->state = TX_IN_GAP_PERIOD;
249661b7972Sstephen hemminger 		break;
250661b7972Sstephen hemminger 	}
251661b7972Sstephen hemminger 
252661b7972Sstephen hemminger 	return false;
253661b7972Sstephen hemminger }
254661b7972Sstephen hemminger 
255661b7972Sstephen hemminger /* loss_gilb_ell - Gilbert-Elliot model loss generator
256661b7972Sstephen hemminger  * Generates losses according to the Gilbert-Elliot loss model or
257661b7972Sstephen hemminger  * its special cases  (Gilbert or Simple Gilbert)
258661b7972Sstephen hemminger  *
25925985edcSLucas De Marchi  * Makes a comparison between random number and the transition
260661b7972Sstephen hemminger  * probabilities outgoing from the current state, then decides the
26125985edcSLucas De Marchi  * next state. A second random number is extracted and the comparison
262661b7972Sstephen hemminger  * with the loss probability of the current state decides if the next
263661b7972Sstephen hemminger  * packet will be transmitted or lost.
264661b7972Sstephen hemminger  */
265661b7972Sstephen hemminger static bool loss_gilb_ell(struct netem_sched_data *q)
266661b7972Sstephen hemminger {
267661b7972Sstephen hemminger 	struct clgstate *clg = &q->clg;
268661b7972Sstephen hemminger 
269661b7972Sstephen hemminger 	switch (clg->state) {
270c045a734SYang Yingliang 	case GOOD_STATE:
27163862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a1)
272c045a734SYang Yingliang 			clg->state = BAD_STATE;
27363862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a4)
274661b7972Sstephen hemminger 			return true;
2757c2781faSstephen hemminger 		break;
276c045a734SYang Yingliang 	case BAD_STATE:
27763862b5bSAruna-Hewapathirane 		if (prandom_u32() < clg->a2)
278c045a734SYang Yingliang 			clg->state = GOOD_STATE;
27963862b5bSAruna-Hewapathirane 		if (prandom_u32() > clg->a3)
280661b7972Sstephen hemminger 			return true;
281661b7972Sstephen hemminger 	}
282661b7972Sstephen hemminger 
283661b7972Sstephen hemminger 	return false;
284661b7972Sstephen hemminger }
285661b7972Sstephen hemminger 
286661b7972Sstephen hemminger static bool loss_event(struct netem_sched_data *q)
287661b7972Sstephen hemminger {
288661b7972Sstephen hemminger 	switch (q->loss_model) {
289661b7972Sstephen hemminger 	case CLG_RANDOM:
290661b7972Sstephen hemminger 		/* Random packet drop 0 => none, ~0 => all */
291661b7972Sstephen hemminger 		return q->loss && q->loss >= get_crandom(&q->loss_cor);
292661b7972Sstephen hemminger 
293661b7972Sstephen hemminger 	case CLG_4_STATES:
294661b7972Sstephen hemminger 		/* 4state loss model algorithm (used also for GI model)
295661b7972Sstephen hemminger 		* Extracts a value from the markov 4 state loss generator,
296661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
297661b7972Sstephen hemminger 		* the kernel logs
298661b7972Sstephen hemminger 		*/
299661b7972Sstephen hemminger 		return loss_4state(q);
300661b7972Sstephen hemminger 
301661b7972Sstephen hemminger 	case CLG_GILB_ELL:
302661b7972Sstephen hemminger 		/* Gilbert-Elliot loss model algorithm
303661b7972Sstephen hemminger 		* Extracts a value from the Gilbert-Elliot loss generator,
304661b7972Sstephen hemminger 		* if it is 1 drops a packet and if needed writes the event in
305661b7972Sstephen hemminger 		* the kernel logs
306661b7972Sstephen hemminger 		*/
307661b7972Sstephen hemminger 		return loss_gilb_ell(q);
308661b7972Sstephen hemminger 	}
309661b7972Sstephen hemminger 
310661b7972Sstephen hemminger 	return false;	/* not reached */
311661b7972Sstephen hemminger }
312661b7972Sstephen hemminger 
313661b7972Sstephen hemminger 
3141da177e4SLinus Torvalds /* tabledist - return a pseudo-randomly distributed value with mean mu and
3151da177e4SLinus Torvalds  * std deviation sigma.  Uses table lookup to approximate the desired
3161da177e4SLinus Torvalds  * distribution, and a uniformly-distributed pseudo-random source.
3171da177e4SLinus Torvalds  */
3189b0ed891SStephen Hemminger static s64 tabledist(s64 mu, s32 sigma,
319b407621cSStephen Hemminger 		     struct crndstate *state,
320b407621cSStephen Hemminger 		     const struct disttable *dist)
3211da177e4SLinus Torvalds {
322112f9cb6SDave Taht 	s64 x;
323b407621cSStephen Hemminger 	long t;
324b407621cSStephen Hemminger 	u32 rnd;
3251da177e4SLinus Torvalds 
3261da177e4SLinus Torvalds 	if (sigma == 0)
3271da177e4SLinus Torvalds 		return mu;
3281da177e4SLinus Torvalds 
3291da177e4SLinus Torvalds 	rnd = get_crandom(state);
3301da177e4SLinus Torvalds 
3311da177e4SLinus Torvalds 	/* default uniform distribution */
3321da177e4SLinus Torvalds 	if (dist == NULL)
333043e337fSMd. Islam 		return ((rnd % (2 * sigma)) + mu) - sigma;
3341da177e4SLinus Torvalds 
3351da177e4SLinus Torvalds 	t = dist->table[rnd % dist->size];
3361da177e4SLinus Torvalds 	x = (sigma % NETEM_DIST_SCALE) * t;
3371da177e4SLinus Torvalds 	if (x >= 0)
3381da177e4SLinus Torvalds 		x += NETEM_DIST_SCALE/2;
3391da177e4SLinus Torvalds 	else
3401da177e4SLinus Torvalds 		x -= NETEM_DIST_SCALE/2;
3411da177e4SLinus Torvalds 
3421da177e4SLinus Torvalds 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
3431da177e4SLinus Torvalds }
3441da177e4SLinus Torvalds 
345bce552fdSStephen Hemminger static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
3467bc0f28cSHagen Paul Pfeifer {
34790b41a1cSHagen Paul Pfeifer 	len += q->packet_overhead;
34890b41a1cSHagen Paul Pfeifer 
34990b41a1cSHagen Paul Pfeifer 	if (q->cell_size) {
35090b41a1cSHagen Paul Pfeifer 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
35190b41a1cSHagen Paul Pfeifer 
35290b41a1cSHagen Paul Pfeifer 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
35390b41a1cSHagen Paul Pfeifer 			cells++;
35490b41a1cSHagen Paul Pfeifer 		len = cells * (q->cell_size + q->cell_overhead);
35590b41a1cSHagen Paul Pfeifer 	}
356bce552fdSStephen Hemminger 
357bce552fdSStephen Hemminger 	return div64_u64(len * NSEC_PER_SEC, q->rate);
3587bc0f28cSHagen Paul Pfeifer }
3597bc0f28cSHagen Paul Pfeifer 
360ff704050Sstephen hemminger static void tfifo_reset(struct Qdisc *sch)
361ff704050Sstephen hemminger {
362ff704050Sstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
3633aa605f2SEric Dumazet 	struct rb_node *p = rb_first(&q->t_root);
364ff704050Sstephen hemminger 
3653aa605f2SEric Dumazet 	while (p) {
36618a4c0eaSEric Dumazet 		struct sk_buff *skb = rb_to_skb(p);
367ff704050Sstephen hemminger 
3683aa605f2SEric Dumazet 		p = rb_next(p);
3693aa605f2SEric Dumazet 		rb_erase(&skb->rbnode, &q->t_root);
3702f08a9a1SEric Dumazet 		rtnl_kfree_skbs(skb, skb);
371ff704050Sstephen hemminger 	}
372d66280b1SPeter Oskolkov 
373d66280b1SPeter Oskolkov 	rtnl_kfree_skbs(q->t_head, q->t_tail);
374d66280b1SPeter Oskolkov 	q->t_head = NULL;
375d66280b1SPeter Oskolkov 	q->t_tail = NULL;
376ff704050Sstephen hemminger }
377ff704050Sstephen hemminger 
378960fb66eSEric Dumazet static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
37950612537SEric Dumazet {
380aec0a40aSEric Dumazet 	struct netem_sched_data *q = qdisc_priv(sch);
381112f9cb6SDave Taht 	u64 tnext = netem_skb_cb(nskb)->time_to_send;
382d66280b1SPeter Oskolkov 
383d66280b1SPeter Oskolkov 	if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
384d66280b1SPeter Oskolkov 		if (q->t_tail)
385d66280b1SPeter Oskolkov 			q->t_tail->next = nskb;
386d66280b1SPeter Oskolkov 		else
387d66280b1SPeter Oskolkov 			q->t_head = nskb;
388d66280b1SPeter Oskolkov 		q->t_tail = nskb;
389d66280b1SPeter Oskolkov 	} else {
390aec0a40aSEric Dumazet 		struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
39150612537SEric Dumazet 
392aec0a40aSEric Dumazet 		while (*p) {
393aec0a40aSEric Dumazet 			struct sk_buff *skb;
39450612537SEric Dumazet 
395aec0a40aSEric Dumazet 			parent = *p;
39618a4c0eaSEric Dumazet 			skb = rb_to_skb(parent);
39750612537SEric Dumazet 			if (tnext >= netem_skb_cb(skb)->time_to_send)
398aec0a40aSEric Dumazet 				p = &parent->rb_right;
399aec0a40aSEric Dumazet 			else
400aec0a40aSEric Dumazet 				p = &parent->rb_left;
40150612537SEric Dumazet 		}
40256b17425SEric Dumazet 		rb_link_node(&nskb->rbnode, parent, p);
40356b17425SEric Dumazet 		rb_insert_color(&nskb->rbnode, &q->t_root);
404d66280b1SPeter Oskolkov 	}
405aec0a40aSEric Dumazet 	sch->q.qlen++;
40650612537SEric Dumazet }
40750612537SEric Dumazet 
4086071bd1aSNeil Horman /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
4096071bd1aSNeil Horman  * when we statistically choose to corrupt one, we instead segment it, returning
4106071bd1aSNeil Horman  * the first packet to be corrupted, and re-enqueue the remaining frames
4116071bd1aSNeil Horman  */
412520ac30fSEric Dumazet static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
413520ac30fSEric Dumazet 				     struct sk_buff **to_free)
4146071bd1aSNeil Horman {
4156071bd1aSNeil Horman 	struct sk_buff *segs;
4166071bd1aSNeil Horman 	netdev_features_t features = netif_skb_features(skb);
4176071bd1aSNeil Horman 
4186071bd1aSNeil Horman 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
4196071bd1aSNeil Horman 
4206071bd1aSNeil Horman 	if (IS_ERR_OR_NULL(segs)) {
421520ac30fSEric Dumazet 		qdisc_drop(skb, sch, to_free);
4226071bd1aSNeil Horman 		return NULL;
4236071bd1aSNeil Horman 	}
4246071bd1aSNeil Horman 	consume_skb(skb);
4256071bd1aSNeil Horman 	return segs;
4266071bd1aSNeil Horman }
4276071bd1aSNeil Horman 
4280afb51e7SStephen Hemminger /*
4290afb51e7SStephen Hemminger  * Insert one skb into qdisc.
4300afb51e7SStephen Hemminger  * Note: parent depends on return value to account for queue length.
4310afb51e7SStephen Hemminger  * 	NET_XMIT_DROP: queue length didn't change.
4320afb51e7SStephen Hemminger  *      NET_XMIT_SUCCESS: one skb was queued.
4330afb51e7SStephen Hemminger  */
434520ac30fSEric Dumazet static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
435520ac30fSEric Dumazet 			 struct sk_buff **to_free)
4361da177e4SLinus Torvalds {
4371da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
43889e1df74SGuillaume Chazarain 	/* We don't fill cb now as skb_unshare() may invalidate it */
43989e1df74SGuillaume Chazarain 	struct netem_skb_cb *cb;
4400afb51e7SStephen Hemminger 	struct sk_buff *skb2;
4416071bd1aSNeil Horman 	struct sk_buff *segs = NULL;
442177b8007SJakub Kicinski 	unsigned int prev_len = qdisc_pkt_len(skb);
4430afb51e7SStephen Hemminger 	int count = 1;
4446071bd1aSNeil Horman 	int rc = NET_XMIT_SUCCESS;
4455845f706SSheng Lan 	int rc_drop = NET_XMIT_DROP;
4461da177e4SLinus Torvalds 
4479410d386SChristoph Paasch 	/* Do not fool qdisc_drop_all() */
4489410d386SChristoph Paasch 	skb->prev = NULL;
4499410d386SChristoph Paasch 
4500afb51e7SStephen Hemminger 	/* Random duplication */
4510afb51e7SStephen Hemminger 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
4520afb51e7SStephen Hemminger 		++count;
4530afb51e7SStephen Hemminger 
454661b7972Sstephen hemminger 	/* Drop packet? */
455e4ae004bSEric Dumazet 	if (loss_event(q)) {
456e4ae004bSEric Dumazet 		if (q->ecn && INET_ECN_set_ce(skb))
45725331d6cSJohn Fastabend 			qdisc_qstats_drop(sch); /* mark packet */
458e4ae004bSEric Dumazet 		else
4590afb51e7SStephen Hemminger 			--count;
460e4ae004bSEric Dumazet 	}
4610afb51e7SStephen Hemminger 	if (count == 0) {
46225331d6cSJohn Fastabend 		qdisc_qstats_drop(sch);
463520ac30fSEric Dumazet 		__qdisc_drop(skb, to_free);
464c27f339aSJarek Poplawski 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
4651da177e4SLinus Torvalds 	}
4661da177e4SLinus Torvalds 
4675a308f40SEric Dumazet 	/* If a delay is expected, orphan the skb. (orphaning usually takes
4685a308f40SEric Dumazet 	 * place at TX completion time, so _before_ the link transit delay)
4695a308f40SEric Dumazet 	 */
4705080f39eSNik Unger 	if (q->latency || q->jitter || q->rate)
471f2f872f9SEric Dumazet 		skb_orphan_partial(skb);
4724e8a5201SDavid S. Miller 
4730afb51e7SStephen Hemminger 	/*
4740afb51e7SStephen Hemminger 	 * If we need to duplicate packet, then re-insert at top of the
4750afb51e7SStephen Hemminger 	 * qdisc tree, since parent queuer expects that only one
4760afb51e7SStephen Hemminger 	 * skb will be queued.
477d5d75cd6SStephen Hemminger 	 */
4780afb51e7SStephen Hemminger 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
479159d2c7dSEric Dumazet 		struct Qdisc *rootq = qdisc_root_bh(sch);
4800afb51e7SStephen Hemminger 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
481d5d75cd6SStephen Hemminger 
482b396cca6SEric Dumazet 		q->duplicate = 0;
483520ac30fSEric Dumazet 		rootq->enqueue(skb2, rootq, to_free);
4840afb51e7SStephen Hemminger 		q->duplicate = dupsave;
4855845f706SSheng Lan 		rc_drop = NET_XMIT_SUCCESS;
4861da177e4SLinus Torvalds 	}
4871da177e4SLinus Torvalds 
488c865e5d9SStephen Hemminger 	/*
489c865e5d9SStephen Hemminger 	 * Randomized packet corruption.
490c865e5d9SStephen Hemminger 	 * Make copy if needed since we are modifying
491c865e5d9SStephen Hemminger 	 * If packet is going to be hardware checksummed, then
492c865e5d9SStephen Hemminger 	 * do it now in software before we mangle it.
493c865e5d9SStephen Hemminger 	 */
494c865e5d9SStephen Hemminger 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
4956071bd1aSNeil Horman 		if (skb_is_gso(skb)) {
4963e14c383SJakub Kicinski 			skb = netem_segment(skb, sch, to_free);
4973e14c383SJakub Kicinski 			if (!skb)
4985845f706SSheng Lan 				return rc_drop;
4993e14c383SJakub Kicinski 			segs = skb->next;
5003e14c383SJakub Kicinski 			skb_mark_not_on_list(skb);
5013e14c383SJakub Kicinski 			qdisc_skb_cb(skb)->pkt_len = skb->len;
5026071bd1aSNeil Horman 		}
5036071bd1aSNeil Horman 
5048a6e9c67SEric Dumazet 		skb = skb_unshare(skb, GFP_ATOMIC);
5058a6e9c67SEric Dumazet 		if (unlikely(!skb)) {
5068a6e9c67SEric Dumazet 			qdisc_qstats_drop(sch);
5078a6e9c67SEric Dumazet 			goto finish_segs;
5088a6e9c67SEric Dumazet 		}
5098a6e9c67SEric Dumazet 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
5108a6e9c67SEric Dumazet 		    skb_checksum_help(skb)) {
5118a6e9c67SEric Dumazet 			qdisc_drop(skb, sch, to_free);
512a7fa12d1SJakub Kicinski 			skb = NULL;
5136071bd1aSNeil Horman 			goto finish_segs;
5146071bd1aSNeil Horman 		}
515c865e5d9SStephen Hemminger 
51663862b5bSAruna-Hewapathirane 		skb->data[prandom_u32() % skb_headlen(skb)] ^=
51763862b5bSAruna-Hewapathirane 			1<<(prandom_u32() % 8);
518c865e5d9SStephen Hemminger 	}
519c865e5d9SStephen Hemminger 
5205845f706SSheng Lan 	if (unlikely(sch->q.qlen >= sch->limit)) {
5213e14c383SJakub Kicinski 		/* re-link segs, so that qdisc_drop_all() frees them all */
5223e14c383SJakub Kicinski 		skb->next = segs;
5235845f706SSheng Lan 		qdisc_drop_all(skb, sch, to_free);
5245845f706SSheng Lan 		return rc_drop;
5255845f706SSheng Lan 	}
526960fb66eSEric Dumazet 
52725331d6cSJohn Fastabend 	qdisc_qstats_backlog_inc(sch, skb);
528960fb66eSEric Dumazet 
5295f86173bSJussi Kivilinna 	cb = netem_skb_cb(skb);
530f64f9e71SJoe Perches 	if (q->gap == 0 ||		/* not doing reordering */
531a42b4799SVijay Subramanian 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
532f64f9e71SJoe Perches 	    q->reorder < get_crandom(&q->reorder_cor)) {
533112f9cb6SDave Taht 		u64 now;
534112f9cb6SDave Taht 		s64 delay;
53507aaa115SStephen Hemminger 
53607aaa115SStephen Hemminger 		delay = tabledist(q->latency, q->jitter,
53707aaa115SStephen Hemminger 				  &q->delay_cor, q->delay_dist);
53807aaa115SStephen Hemminger 
539112f9cb6SDave Taht 		now = ktime_get_ns();
5407bc0f28cSHagen Paul Pfeifer 
5417bc0f28cSHagen Paul Pfeifer 		if (q->rate) {
5425080f39eSNik Unger 			struct netem_skb_cb *last = NULL;
5437bc0f28cSHagen Paul Pfeifer 
5445080f39eSNik Unger 			if (sch->q.tail)
5455080f39eSNik Unger 				last = netem_skb_cb(sch->q.tail);
5465080f39eSNik Unger 			if (q->t_root.rb_node) {
5475080f39eSNik Unger 				struct sk_buff *t_skb;
5485080f39eSNik Unger 				struct netem_skb_cb *t_last;
5495080f39eSNik Unger 
55018a4c0eaSEric Dumazet 				t_skb = skb_rb_last(&q->t_root);
5515080f39eSNik Unger 				t_last = netem_skb_cb(t_skb);
5525080f39eSNik Unger 				if (!last ||
553d66280b1SPeter Oskolkov 				    t_last->time_to_send > last->time_to_send)
5545080f39eSNik Unger 					last = t_last;
5555080f39eSNik Unger 			}
556d66280b1SPeter Oskolkov 			if (q->t_tail) {
557d66280b1SPeter Oskolkov 				struct netem_skb_cb *t_last =
558d66280b1SPeter Oskolkov 					netem_skb_cb(q->t_tail);
559d66280b1SPeter Oskolkov 
560d66280b1SPeter Oskolkov 				if (!last ||
561d66280b1SPeter Oskolkov 				    t_last->time_to_send > last->time_to_send)
562d66280b1SPeter Oskolkov 					last = t_last;
5635080f39eSNik Unger 			}
5645080f39eSNik Unger 
565aec0a40aSEric Dumazet 			if (last) {
5667bc0f28cSHagen Paul Pfeifer 				/*
567a13d3104SJohannes Naab 				 * Last packet in queue is reference point (now),
568a13d3104SJohannes Naab 				 * calculate this time bonus and subtract
5697bc0f28cSHagen Paul Pfeifer 				 * from delay.
5707bc0f28cSHagen Paul Pfeifer 				 */
5715080f39eSNik Unger 				delay -= last->time_to_send - now;
572112f9cb6SDave Taht 				delay = max_t(s64, 0, delay);
5735080f39eSNik Unger 				now = last->time_to_send;
5747bc0f28cSHagen Paul Pfeifer 			}
575a13d3104SJohannes Naab 
576bce552fdSStephen Hemminger 			delay += packet_time_ns(qdisc_pkt_len(skb), q);
5777bc0f28cSHagen Paul Pfeifer 		}
5787bc0f28cSHagen Paul Pfeifer 
5797c59e25fSPatrick McHardy 		cb->time_to_send = now + delay;
5801da177e4SLinus Torvalds 		++q->counter;
581960fb66eSEric Dumazet 		tfifo_enqueue(skb, sch);
5821da177e4SLinus Torvalds 	} else {
5830dca51d3SStephen Hemminger 		/*
5840dca51d3SStephen Hemminger 		 * Do re-ordering by putting one out of N packets at the front
5850dca51d3SStephen Hemminger 		 * of the queue.
5860dca51d3SStephen Hemminger 		 */
587112f9cb6SDave Taht 		cb->time_to_send = ktime_get_ns();
5880dca51d3SStephen Hemminger 		q->counter = 0;
5898ba25dadSJarek Poplawski 
59059697730SDavid S. Miller 		__qdisc_enqueue_head(skb, &sch->q);
591eb101924SHagen Paul Pfeifer 		sch->qstats.requeues++;
592378a2f09SJarek Poplawski 	}
5931da177e4SLinus Torvalds 
5946071bd1aSNeil Horman finish_segs:
5956071bd1aSNeil Horman 	if (segs) {
596177b8007SJakub Kicinski 		unsigned int len, last_len;
597a7fa12d1SJakub Kicinski 		int nb;
598177b8007SJakub Kicinski 
599a7fa12d1SJakub Kicinski 		len = skb ? skb->len : 0;
600a7fa12d1SJakub Kicinski 		nb = skb ? 1 : 0;
601177b8007SJakub Kicinski 
6026071bd1aSNeil Horman 		while (segs) {
6036071bd1aSNeil Horman 			skb2 = segs->next;
604a8305bffSDavid S. Miller 			skb_mark_not_on_list(segs);
6056071bd1aSNeil Horman 			qdisc_skb_cb(segs)->pkt_len = segs->len;
6066071bd1aSNeil Horman 			last_len = segs->len;
607520ac30fSEric Dumazet 			rc = qdisc_enqueue(segs, sch, to_free);
6086071bd1aSNeil Horman 			if (rc != NET_XMIT_SUCCESS) {
6096071bd1aSNeil Horman 				if (net_xmit_drop_count(rc))
6106071bd1aSNeil Horman 					qdisc_qstats_drop(sch);
6116071bd1aSNeil Horman 			} else {
6126071bd1aSNeil Horman 				nb++;
6136071bd1aSNeil Horman 				len += last_len;
6146071bd1aSNeil Horman 			}
6156071bd1aSNeil Horman 			segs = skb2;
6166071bd1aSNeil Horman 		}
617a7fa12d1SJakub Kicinski 		/* Parent qdiscs accounted for 1 skb of size @prev_len */
618a7fa12d1SJakub Kicinski 		qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
619e0ad032eSJakub Kicinski 	} else if (!skb) {
620e0ad032eSJakub Kicinski 		return NET_XMIT_DROP;
6216071bd1aSNeil Horman 	}
62210f6dfcfSstephen hemminger 	return NET_XMIT_SUCCESS;
6231da177e4SLinus Torvalds }
6241da177e4SLinus Torvalds 
625836af83bSDave Taht /* Delay the next round with a new future slot with a
626836af83bSDave Taht  * correct number of bytes and packets.
627836af83bSDave Taht  */
628836af83bSDave Taht 
629836af83bSDave Taht static void get_slot_next(struct netem_sched_data *q, u64 now)
630836af83bSDave Taht {
6310a9fe5c3SYousuk Seung 	s64 next_delay;
6320a9fe5c3SYousuk Seung 
6330a9fe5c3SYousuk Seung 	if (!q->slot_dist)
6340a9fe5c3SYousuk Seung 		next_delay = q->slot_config.min_delay +
635836af83bSDave Taht 				(prandom_u32() *
636836af83bSDave Taht 				 (q->slot_config.max_delay -
637836af83bSDave Taht 				  q->slot_config.min_delay) >> 32);
6380a9fe5c3SYousuk Seung 	else
6390a9fe5c3SYousuk Seung 		next_delay = tabledist(q->slot_config.dist_delay,
6400a9fe5c3SYousuk Seung 				       (s32)(q->slot_config.dist_jitter),
6410a9fe5c3SYousuk Seung 				       NULL, q->slot_dist);
6420a9fe5c3SYousuk Seung 
6430a9fe5c3SYousuk Seung 	q->slot.slot_next = now + next_delay;
644836af83bSDave Taht 	q->slot.packets_left = q->slot_config.max_packets;
645836af83bSDave Taht 	q->slot.bytes_left = q->slot_config.max_bytes;
646836af83bSDave Taht }
647836af83bSDave Taht 
648d66280b1SPeter Oskolkov static struct sk_buff *netem_peek(struct netem_sched_data *q)
649d66280b1SPeter Oskolkov {
650d66280b1SPeter Oskolkov 	struct sk_buff *skb = skb_rb_first(&q->t_root);
651d66280b1SPeter Oskolkov 	u64 t1, t2;
652d66280b1SPeter Oskolkov 
653d66280b1SPeter Oskolkov 	if (!skb)
654d66280b1SPeter Oskolkov 		return q->t_head;
655d66280b1SPeter Oskolkov 	if (!q->t_head)
656d66280b1SPeter Oskolkov 		return skb;
657d66280b1SPeter Oskolkov 
658d66280b1SPeter Oskolkov 	t1 = netem_skb_cb(skb)->time_to_send;
659d66280b1SPeter Oskolkov 	t2 = netem_skb_cb(q->t_head)->time_to_send;
660d66280b1SPeter Oskolkov 	if (t1 < t2)
661d66280b1SPeter Oskolkov 		return skb;
662d66280b1SPeter Oskolkov 	return q->t_head;
663d66280b1SPeter Oskolkov }
664d66280b1SPeter Oskolkov 
665d66280b1SPeter Oskolkov static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
666d66280b1SPeter Oskolkov {
667d66280b1SPeter Oskolkov 	if (skb == q->t_head) {
668d66280b1SPeter Oskolkov 		q->t_head = skb->next;
669d66280b1SPeter Oskolkov 		if (!q->t_head)
670d66280b1SPeter Oskolkov 			q->t_tail = NULL;
671d66280b1SPeter Oskolkov 	} else {
672d66280b1SPeter Oskolkov 		rb_erase(&skb->rbnode, &q->t_root);
673d66280b1SPeter Oskolkov 	}
674d66280b1SPeter Oskolkov }
675d66280b1SPeter Oskolkov 
6761da177e4SLinus Torvalds static struct sk_buff *netem_dequeue(struct Qdisc *sch)
6771da177e4SLinus Torvalds {
6781da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
6791da177e4SLinus Torvalds 	struct sk_buff *skb;
6801da177e4SLinus Torvalds 
68150612537SEric Dumazet tfifo_dequeue:
682ed760cb8SFlorian Westphal 	skb = __qdisc_dequeue_head(&sch->q);
683771018e7SStephen Hemminger 	if (skb) {
68425331d6cSJohn Fastabend 		qdisc_qstats_backlog_dec(sch, skb);
6850ad2a836SBeshay, Joseph deliver:
686aec0a40aSEric Dumazet 		qdisc_bstats_update(sch, skb);
687aec0a40aSEric Dumazet 		return skb;
688aec0a40aSEric Dumazet 	}
689d66280b1SPeter Oskolkov 	skb = netem_peek(q);
690d66280b1SPeter Oskolkov 	if (skb) {
691112f9cb6SDave Taht 		u64 time_to_send;
692836af83bSDave Taht 		u64 now = ktime_get_ns();
69336b7bfe0SEric Dumazet 
6940f9f32acSStephen Hemminger 		/* if more time remaining? */
69536b7bfe0SEric Dumazet 		time_to_send = netem_skb_cb(skb)->time_to_send;
696836af83bSDave Taht 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
697836af83bSDave Taht 			get_slot_next(q, now);
698aec0a40aSEric Dumazet 
699836af83bSDave Taht 		if (time_to_send <= now && q->slot.slot_next <= now) {
700d66280b1SPeter Oskolkov 			netem_erase_head(q, skb);
701aec0a40aSEric Dumazet 			sch->q.qlen--;
7020ad2a836SBeshay, Joseph 			qdisc_qstats_backlog_dec(sch, skb);
703aec0a40aSEric Dumazet 			skb->next = NULL;
704aec0a40aSEric Dumazet 			skb->prev = NULL;
705bffa72cfSEric Dumazet 			/* skb->dev shares skb->rbnode area,
706bffa72cfSEric Dumazet 			 * we need to restore its value.
707bffa72cfSEric Dumazet 			 */
708bffa72cfSEric Dumazet 			skb->dev = qdisc_dev(sch);
70903c05f0dSJarek Poplawski 
710836af83bSDave Taht 			if (q->slot.slot_next) {
711836af83bSDave Taht 				q->slot.packets_left--;
712836af83bSDave Taht 				q->slot.bytes_left -= qdisc_pkt_len(skb);
713836af83bSDave Taht 				if (q->slot.packets_left <= 0 ||
714836af83bSDave Taht 				    q->slot.bytes_left <= 0)
715836af83bSDave Taht 					get_slot_next(q, now);
716836af83bSDave Taht 			}
717836af83bSDave Taht 
71850612537SEric Dumazet 			if (q->qdisc) {
71921de12eeSEric Dumazet 				unsigned int pkt_len = qdisc_pkt_len(skb);
720520ac30fSEric Dumazet 				struct sk_buff *to_free = NULL;
721520ac30fSEric Dumazet 				int err;
72250612537SEric Dumazet 
723520ac30fSEric Dumazet 				err = qdisc_enqueue(skb, q->qdisc, &to_free);
724520ac30fSEric Dumazet 				kfree_skb_list(to_free);
72521de12eeSEric Dumazet 				if (err != NET_XMIT_SUCCESS &&
72621de12eeSEric Dumazet 				    net_xmit_drop_count(err)) {
72725331d6cSJohn Fastabend 					qdisc_qstats_drop(sch);
7282ccccf5fSWANG Cong 					qdisc_tree_reduce_backlog(sch, 1,
72921de12eeSEric Dumazet 								  pkt_len);
73050612537SEric Dumazet 				}
73150612537SEric Dumazet 				goto tfifo_dequeue;
73250612537SEric Dumazet 			}
733aec0a40aSEric Dumazet 			goto deliver;
73411274e5aSStephen Hemminger 		}
73507aaa115SStephen Hemminger 
73650612537SEric Dumazet 		if (q->qdisc) {
73750612537SEric Dumazet 			skb = q->qdisc->ops->dequeue(q->qdisc);
73850612537SEric Dumazet 			if (skb)
73950612537SEric Dumazet 				goto deliver;
74050612537SEric Dumazet 		}
741836af83bSDave Taht 
742836af83bSDave Taht 		qdisc_watchdog_schedule_ns(&q->watchdog,
743836af83bSDave Taht 					   max(time_to_send,
744836af83bSDave Taht 					       q->slot.slot_next));
7450f9f32acSStephen Hemminger 	}
7460f9f32acSStephen Hemminger 
74750612537SEric Dumazet 	if (q->qdisc) {
74850612537SEric Dumazet 		skb = q->qdisc->ops->dequeue(q->qdisc);
74950612537SEric Dumazet 		if (skb)
75050612537SEric Dumazet 			goto deliver;
75150612537SEric Dumazet 	}
7520f9f32acSStephen Hemminger 	return NULL;
7531da177e4SLinus Torvalds }
7541da177e4SLinus Torvalds 
7551da177e4SLinus Torvalds static void netem_reset(struct Qdisc *sch)
7561da177e4SLinus Torvalds {
7571da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
7581da177e4SLinus Torvalds 
75950612537SEric Dumazet 	qdisc_reset_queue(sch);
760ff704050Sstephen hemminger 	tfifo_reset(sch);
76150612537SEric Dumazet 	if (q->qdisc)
7621da177e4SLinus Torvalds 		qdisc_reset(q->qdisc);
76359cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
7641da177e4SLinus Torvalds }
7651da177e4SLinus Torvalds 
7666373a9a2Sstephen hemminger static void dist_free(struct disttable *d)
7676373a9a2Sstephen hemminger {
7684cb28970SWANG Cong 	kvfree(d);
7696373a9a2Sstephen hemminger }
7706373a9a2Sstephen hemminger 
7711da177e4SLinus Torvalds /*
7721da177e4SLinus Torvalds  * Distribution data is a variable size payload containing
7731da177e4SLinus Torvalds  * signed 16 bit values.
7741da177e4SLinus Torvalds  */
775836af83bSDave Taht 
7760a9fe5c3SYousuk Seung static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
7770a9fe5c3SYousuk Seung 			  const struct nlattr *attr)
7781da177e4SLinus Torvalds {
7796373a9a2Sstephen hemminger 	size_t n = nla_len(attr)/sizeof(__s16);
7801e90474cSPatrick McHardy 	const __s16 *data = nla_data(attr);
7817698b4fcSDavid S. Miller 	spinlock_t *root_lock;
7821da177e4SLinus Torvalds 	struct disttable *d;
7831da177e4SLinus Torvalds 	int i;
7841da177e4SLinus Torvalds 
785b41d936bSEric Dumazet 	if (!n || n > NETEM_DIST_MAX)
7861da177e4SLinus Torvalds 		return -EINVAL;
7871da177e4SLinus Torvalds 
788752ade68SMichal Hocko 	d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
7891da177e4SLinus Torvalds 	if (!d)
7901da177e4SLinus Torvalds 		return -ENOMEM;
7911da177e4SLinus Torvalds 
7921da177e4SLinus Torvalds 	d->size = n;
7931da177e4SLinus Torvalds 	for (i = 0; i < n; i++)
7941da177e4SLinus Torvalds 		d->table[i] = data[i];
7951da177e4SLinus Torvalds 
796102396aeSJarek Poplawski 	root_lock = qdisc_root_sleeping_lock(sch);
7977698b4fcSDavid S. Miller 
7987698b4fcSDavid S. Miller 	spin_lock_bh(root_lock);
7990a9fe5c3SYousuk Seung 	swap(*tbl, d);
8007698b4fcSDavid S. Miller 	spin_unlock_bh(root_lock);
801bb52c7acSEric Dumazet 
802bb52c7acSEric Dumazet 	dist_free(d);
8031da177e4SLinus Torvalds 	return 0;
8041da177e4SLinus Torvalds }
8051da177e4SLinus Torvalds 
806836af83bSDave Taht static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
807836af83bSDave Taht {
808836af83bSDave Taht 	const struct tc_netem_slot *c = nla_data(attr);
809836af83bSDave Taht 
810836af83bSDave Taht 	q->slot_config = *c;
811836af83bSDave Taht 	if (q->slot_config.max_packets == 0)
812836af83bSDave Taht 		q->slot_config.max_packets = INT_MAX;
813836af83bSDave Taht 	if (q->slot_config.max_bytes == 0)
814836af83bSDave Taht 		q->slot_config.max_bytes = INT_MAX;
815836af83bSDave Taht 	q->slot.packets_left = q->slot_config.max_packets;
816836af83bSDave Taht 	q->slot.bytes_left = q->slot_config.max_bytes;
8170a9fe5c3SYousuk Seung 	if (q->slot_config.min_delay | q->slot_config.max_delay |
8180a9fe5c3SYousuk Seung 	    q->slot_config.dist_jitter)
819836af83bSDave Taht 		q->slot.slot_next = ktime_get_ns();
820836af83bSDave Taht 	else
821836af83bSDave Taht 		q->slot.slot_next = 0;
822836af83bSDave Taht }
823836af83bSDave Taht 
82449545a77SYang Yingliang static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
8251da177e4SLinus Torvalds {
8261e90474cSPatrick McHardy 	const struct tc_netem_corr *c = nla_data(attr);
8271da177e4SLinus Torvalds 
8281da177e4SLinus Torvalds 	init_crandom(&q->delay_cor, c->delay_corr);
8291da177e4SLinus Torvalds 	init_crandom(&q->loss_cor, c->loss_corr);
8301da177e4SLinus Torvalds 	init_crandom(&q->dup_cor, c->dup_corr);
8311da177e4SLinus Torvalds }
8321da177e4SLinus Torvalds 
83349545a77SYang Yingliang static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
8340dca51d3SStephen Hemminger {
8351e90474cSPatrick McHardy 	const struct tc_netem_reorder *r = nla_data(attr);
8360dca51d3SStephen Hemminger 
8370dca51d3SStephen Hemminger 	q->reorder = r->probability;
8380dca51d3SStephen Hemminger 	init_crandom(&q->reorder_cor, r->correlation);
8390dca51d3SStephen Hemminger }
8400dca51d3SStephen Hemminger 
84149545a77SYang Yingliang static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
842c865e5d9SStephen Hemminger {
8431e90474cSPatrick McHardy 	const struct tc_netem_corrupt *r = nla_data(attr);
844c865e5d9SStephen Hemminger 
845c865e5d9SStephen Hemminger 	q->corrupt = r->probability;
846c865e5d9SStephen Hemminger 	init_crandom(&q->corrupt_cor, r->correlation);
847c865e5d9SStephen Hemminger }
848c865e5d9SStephen Hemminger 
84949545a77SYang Yingliang static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
8507bc0f28cSHagen Paul Pfeifer {
8517bc0f28cSHagen Paul Pfeifer 	const struct tc_netem_rate *r = nla_data(attr);
8527bc0f28cSHagen Paul Pfeifer 
8537bc0f28cSHagen Paul Pfeifer 	q->rate = r->rate;
85490b41a1cSHagen Paul Pfeifer 	q->packet_overhead = r->packet_overhead;
85590b41a1cSHagen Paul Pfeifer 	q->cell_size = r->cell_size;
856809fa972SHannes Frederic Sowa 	q->cell_overhead = r->cell_overhead;
85790b41a1cSHagen Paul Pfeifer 	if (q->cell_size)
85890b41a1cSHagen Paul Pfeifer 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
859809fa972SHannes Frederic Sowa 	else
860809fa972SHannes Frederic Sowa 		q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
8617bc0f28cSHagen Paul Pfeifer }
8627bc0f28cSHagen Paul Pfeifer 
86349545a77SYang Yingliang static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
864661b7972Sstephen hemminger {
865661b7972Sstephen hemminger 	const struct nlattr *la;
866661b7972Sstephen hemminger 	int rem;
867661b7972Sstephen hemminger 
868661b7972Sstephen hemminger 	nla_for_each_nested(la, attr, rem) {
869661b7972Sstephen hemminger 		u16 type = nla_type(la);
870661b7972Sstephen hemminger 
871661b7972Sstephen hemminger 		switch (type) {
872661b7972Sstephen hemminger 		case NETEM_LOSS_GI: {
873661b7972Sstephen hemminger 			const struct tc_netem_gimodel *gi = nla_data(la);
874661b7972Sstephen hemminger 
8752494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
876661b7972Sstephen hemminger 				pr_info("netem: incorrect gi model size\n");
877661b7972Sstephen hemminger 				return -EINVAL;
878661b7972Sstephen hemminger 			}
879661b7972Sstephen hemminger 
880661b7972Sstephen hemminger 			q->loss_model = CLG_4_STATES;
881661b7972Sstephen hemminger 
8823fbac2a8SYang Yingliang 			q->clg.state = TX_IN_GAP_PERIOD;
883661b7972Sstephen hemminger 			q->clg.a1 = gi->p13;
884661b7972Sstephen hemminger 			q->clg.a2 = gi->p31;
885661b7972Sstephen hemminger 			q->clg.a3 = gi->p32;
886661b7972Sstephen hemminger 			q->clg.a4 = gi->p14;
887661b7972Sstephen hemminger 			q->clg.a5 = gi->p23;
888661b7972Sstephen hemminger 			break;
889661b7972Sstephen hemminger 		}
890661b7972Sstephen hemminger 
891661b7972Sstephen hemminger 		case NETEM_LOSS_GE: {
892661b7972Sstephen hemminger 			const struct tc_netem_gemodel *ge = nla_data(la);
893661b7972Sstephen hemminger 
8942494654dSstephen hemminger 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
8952494654dSstephen hemminger 				pr_info("netem: incorrect ge model size\n");
896661b7972Sstephen hemminger 				return -EINVAL;
897661b7972Sstephen hemminger 			}
898661b7972Sstephen hemminger 
899661b7972Sstephen hemminger 			q->loss_model = CLG_GILB_ELL;
9003fbac2a8SYang Yingliang 			q->clg.state = GOOD_STATE;
901661b7972Sstephen hemminger 			q->clg.a1 = ge->p;
902661b7972Sstephen hemminger 			q->clg.a2 = ge->r;
903661b7972Sstephen hemminger 			q->clg.a3 = ge->h;
904661b7972Sstephen hemminger 			q->clg.a4 = ge->k1;
905661b7972Sstephen hemminger 			break;
906661b7972Sstephen hemminger 		}
907661b7972Sstephen hemminger 
908661b7972Sstephen hemminger 		default:
909661b7972Sstephen hemminger 			pr_info("netem: unknown loss type %u\n", type);
910661b7972Sstephen hemminger 			return -EINVAL;
911661b7972Sstephen hemminger 		}
912661b7972Sstephen hemminger 	}
913661b7972Sstephen hemminger 
914661b7972Sstephen hemminger 	return 0;
915661b7972Sstephen hemminger }
916661b7972Sstephen hemminger 
91727a3421eSPatrick McHardy static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
91827a3421eSPatrick McHardy 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
91927a3421eSPatrick McHardy 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
92027a3421eSPatrick McHardy 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
9217bc0f28cSHagen Paul Pfeifer 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
922661b7972Sstephen hemminger 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
923e4ae004bSEric Dumazet 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
9246a031f67SYang Yingliang 	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
92599803171SDave Taht 	[TCA_NETEM_LATENCY64]	= { .type = NLA_S64 },
92699803171SDave Taht 	[TCA_NETEM_JITTER64]	= { .type = NLA_S64 },
927836af83bSDave Taht 	[TCA_NETEM_SLOT]	= { .len = sizeof(struct tc_netem_slot) },
92827a3421eSPatrick McHardy };
92927a3421eSPatrick McHardy 
9302c10b32bSThomas Graf static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
9312c10b32bSThomas Graf 		      const struct nla_policy *policy, int len)
9322c10b32bSThomas Graf {
9332c10b32bSThomas Graf 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
9342c10b32bSThomas Graf 
935661b7972Sstephen hemminger 	if (nested_len < 0) {
936661b7972Sstephen hemminger 		pr_info("netem: invalid attributes len %d\n", nested_len);
9372c10b32bSThomas Graf 		return -EINVAL;
938661b7972Sstephen hemminger 	}
939661b7972Sstephen hemminger 
9402c10b32bSThomas Graf 	if (nested_len >= nla_attr_size(0))
9418cb08174SJohannes Berg 		return nla_parse_deprecated(tb, maxtype,
9428cb08174SJohannes Berg 					    nla_data(nla) + NLA_ALIGN(len),
943fceb6435SJohannes Berg 					    nested_len, policy, NULL);
944661b7972Sstephen hemminger 
9452c10b32bSThomas Graf 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
9462c10b32bSThomas Graf 	return 0;
9472c10b32bSThomas Graf }
9482c10b32bSThomas Graf 
949c865e5d9SStephen Hemminger /* Parse netlink message to set options */
9502030721cSAlexander Aring static int netem_change(struct Qdisc *sch, struct nlattr *opt,
9512030721cSAlexander Aring 			struct netlink_ext_ack *extack)
9521da177e4SLinus Torvalds {
9531da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
954b03f4672SPatrick McHardy 	struct nlattr *tb[TCA_NETEM_MAX + 1];
9551da177e4SLinus Torvalds 	struct tc_netem_qopt *qopt;
95654a4b05cSYang Yingliang 	struct clgstate old_clg;
95754a4b05cSYang Yingliang 	int old_loss_model = CLG_RANDOM;
9581da177e4SLinus Torvalds 	int ret;
9591da177e4SLinus Torvalds 
960b03f4672SPatrick McHardy 	if (opt == NULL)
9611da177e4SLinus Torvalds 		return -EINVAL;
9621da177e4SLinus Torvalds 
9632c10b32bSThomas Graf 	qopt = nla_data(opt);
9642c10b32bSThomas Graf 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
965b03f4672SPatrick McHardy 	if (ret < 0)
966b03f4672SPatrick McHardy 		return ret;
967b03f4672SPatrick McHardy 
96854a4b05cSYang Yingliang 	/* backup q->clg and q->loss_model */
96954a4b05cSYang Yingliang 	old_clg = q->clg;
97054a4b05cSYang Yingliang 	old_loss_model = q->loss_model;
97154a4b05cSYang Yingliang 
97254a4b05cSYang Yingliang 	if (tb[TCA_NETEM_LOSS]) {
97349545a77SYang Yingliang 		ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
97454a4b05cSYang Yingliang 		if (ret) {
97554a4b05cSYang Yingliang 			q->loss_model = old_loss_model;
97654a4b05cSYang Yingliang 			return ret;
97754a4b05cSYang Yingliang 		}
97854a4b05cSYang Yingliang 	} else {
97954a4b05cSYang Yingliang 		q->loss_model = CLG_RANDOM;
98054a4b05cSYang Yingliang 	}
98154a4b05cSYang Yingliang 
98254a4b05cSYang Yingliang 	if (tb[TCA_NETEM_DELAY_DIST]) {
9830a9fe5c3SYousuk Seung 		ret = get_dist_table(sch, &q->delay_dist,
9840a9fe5c3SYousuk Seung 				     tb[TCA_NETEM_DELAY_DIST]);
9850a9fe5c3SYousuk Seung 		if (ret)
9860a9fe5c3SYousuk Seung 			goto get_table_failure;
98754a4b05cSYang Yingliang 	}
9880a9fe5c3SYousuk Seung 
9890a9fe5c3SYousuk Seung 	if (tb[TCA_NETEM_SLOT_DIST]) {
9900a9fe5c3SYousuk Seung 		ret = get_dist_table(sch, &q->slot_dist,
9910a9fe5c3SYousuk Seung 				     tb[TCA_NETEM_SLOT_DIST]);
9920a9fe5c3SYousuk Seung 		if (ret)
9930a9fe5c3SYousuk Seung 			goto get_table_failure;
99454a4b05cSYang Yingliang 	}
99554a4b05cSYang Yingliang 
99650612537SEric Dumazet 	sch->limit = qopt->limit;
9971da177e4SLinus Torvalds 
998112f9cb6SDave Taht 	q->latency = PSCHED_TICKS2NS(qopt->latency);
999112f9cb6SDave Taht 	q->jitter = PSCHED_TICKS2NS(qopt->jitter);
10001da177e4SLinus Torvalds 	q->limit = qopt->limit;
10011da177e4SLinus Torvalds 	q->gap = qopt->gap;
10020dca51d3SStephen Hemminger 	q->counter = 0;
10031da177e4SLinus Torvalds 	q->loss = qopt->loss;
10041da177e4SLinus Torvalds 	q->duplicate = qopt->duplicate;
10051da177e4SLinus Torvalds 
1006bb2f8cc0SStephen Hemminger 	/* for compatibility with earlier versions.
1007bb2f8cc0SStephen Hemminger 	 * if gap is set, need to assume 100% probability
10080dca51d3SStephen Hemminger 	 */
1009a362e0a7SStephen Hemminger 	if (q->gap)
10100dca51d3SStephen Hemminger 		q->reorder = ~0;
10110dca51d3SStephen Hemminger 
1012265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORR])
101349545a77SYang Yingliang 		get_correlation(q, tb[TCA_NETEM_CORR]);
10141da177e4SLinus Torvalds 
1015265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_REORDER])
101649545a77SYang Yingliang 		get_reorder(q, tb[TCA_NETEM_REORDER]);
10171da177e4SLinus Torvalds 
1018265eb67fSStephen Hemminger 	if (tb[TCA_NETEM_CORRUPT])
101949545a77SYang Yingliang 		get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
10201da177e4SLinus Torvalds 
10217bc0f28cSHagen Paul Pfeifer 	if (tb[TCA_NETEM_RATE])
102249545a77SYang Yingliang 		get_rate(q, tb[TCA_NETEM_RATE]);
10237bc0f28cSHagen Paul Pfeifer 
10246a031f67SYang Yingliang 	if (tb[TCA_NETEM_RATE64])
10256a031f67SYang Yingliang 		q->rate = max_t(u64, q->rate,
10266a031f67SYang Yingliang 				nla_get_u64(tb[TCA_NETEM_RATE64]));
10276a031f67SYang Yingliang 
102899803171SDave Taht 	if (tb[TCA_NETEM_LATENCY64])
102999803171SDave Taht 		q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
103099803171SDave Taht 
103199803171SDave Taht 	if (tb[TCA_NETEM_JITTER64])
103299803171SDave Taht 		q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
103399803171SDave Taht 
1034e4ae004bSEric Dumazet 	if (tb[TCA_NETEM_ECN])
1035e4ae004bSEric Dumazet 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1036e4ae004bSEric Dumazet 
1037836af83bSDave Taht 	if (tb[TCA_NETEM_SLOT])
1038836af83bSDave Taht 		get_slot(q, tb[TCA_NETEM_SLOT]);
1039836af83bSDave Taht 
1040661b7972Sstephen hemminger 	return ret;
10410a9fe5c3SYousuk Seung 
10420a9fe5c3SYousuk Seung get_table_failure:
10430a9fe5c3SYousuk Seung 	/* recover clg and loss_model, in case of
10440a9fe5c3SYousuk Seung 	 * q->clg and q->loss_model were modified
10450a9fe5c3SYousuk Seung 	 * in get_loss_clg()
10460a9fe5c3SYousuk Seung 	 */
10470a9fe5c3SYousuk Seung 	q->clg = old_clg;
10480a9fe5c3SYousuk Seung 	q->loss_model = old_loss_model;
10490a9fe5c3SYousuk Seung 	return ret;
10501da177e4SLinus Torvalds }
10511da177e4SLinus Torvalds 
1052e63d7dfdSAlexander Aring static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1053e63d7dfdSAlexander Aring 		      struct netlink_ext_ack *extack)
10541da177e4SLinus Torvalds {
10551da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
10561da177e4SLinus Torvalds 	int ret;
10571da177e4SLinus Torvalds 
1058634576a1SNikolay Aleksandrov 	qdisc_watchdog_init(&q->watchdog, sch);
1059634576a1SNikolay Aleksandrov 
10601da177e4SLinus Torvalds 	if (!opt)
10611da177e4SLinus Torvalds 		return -EINVAL;
10621da177e4SLinus Torvalds 
1063661b7972Sstephen hemminger 	q->loss_model = CLG_RANDOM;
10642030721cSAlexander Aring 	ret = netem_change(sch, opt, extack);
106550612537SEric Dumazet 	if (ret)
1066250a65f7Sstephen hemminger 		pr_info("netem: change failed\n");
10671da177e4SLinus Torvalds 	return ret;
10681da177e4SLinus Torvalds }
10691da177e4SLinus Torvalds 
10701da177e4SLinus Torvalds static void netem_destroy(struct Qdisc *sch)
10711da177e4SLinus Torvalds {
10721da177e4SLinus Torvalds 	struct netem_sched_data *q = qdisc_priv(sch);
10731da177e4SLinus Torvalds 
107459cb5c67SPatrick McHardy 	qdisc_watchdog_cancel(&q->watchdog);
107550612537SEric Dumazet 	if (q->qdisc)
107686bd446bSVlad Buslov 		qdisc_put(q->qdisc);
10776373a9a2Sstephen hemminger 	dist_free(q->delay_dist);
10780a9fe5c3SYousuk Seung 	dist_free(q->slot_dist);
10791da177e4SLinus Torvalds }
10801da177e4SLinus Torvalds 
1081661b7972Sstephen hemminger static int dump_loss_model(const struct netem_sched_data *q,
1082661b7972Sstephen hemminger 			   struct sk_buff *skb)
1083661b7972Sstephen hemminger {
1084661b7972Sstephen hemminger 	struct nlattr *nest;
1085661b7972Sstephen hemminger 
1086ae0be8deSMichal Kubecek 	nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
1087661b7972Sstephen hemminger 	if (nest == NULL)
1088661b7972Sstephen hemminger 		goto nla_put_failure;
1089661b7972Sstephen hemminger 
1090661b7972Sstephen hemminger 	switch (q->loss_model) {
1091661b7972Sstephen hemminger 	case CLG_RANDOM:
1092661b7972Sstephen hemminger 		/* legacy loss model */
1093661b7972Sstephen hemminger 		nla_nest_cancel(skb, nest);
1094661b7972Sstephen hemminger 		return 0;	/* no data */
1095661b7972Sstephen hemminger 
1096661b7972Sstephen hemminger 	case CLG_4_STATES: {
1097661b7972Sstephen hemminger 		struct tc_netem_gimodel gi = {
1098661b7972Sstephen hemminger 			.p13 = q->clg.a1,
1099661b7972Sstephen hemminger 			.p31 = q->clg.a2,
1100661b7972Sstephen hemminger 			.p32 = q->clg.a3,
1101661b7972Sstephen hemminger 			.p14 = q->clg.a4,
1102661b7972Sstephen hemminger 			.p23 = q->clg.a5,
1103661b7972Sstephen hemminger 		};
1104661b7972Sstephen hemminger 
11051b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
11061b34ec43SDavid S. Miller 			goto nla_put_failure;
1107661b7972Sstephen hemminger 		break;
1108661b7972Sstephen hemminger 	}
1109661b7972Sstephen hemminger 	case CLG_GILB_ELL: {
1110661b7972Sstephen hemminger 		struct tc_netem_gemodel ge = {
1111661b7972Sstephen hemminger 			.p = q->clg.a1,
1112661b7972Sstephen hemminger 			.r = q->clg.a2,
1113661b7972Sstephen hemminger 			.h = q->clg.a3,
1114661b7972Sstephen hemminger 			.k1 = q->clg.a4,
1115661b7972Sstephen hemminger 		};
1116661b7972Sstephen hemminger 
11171b34ec43SDavid S. Miller 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
11181b34ec43SDavid S. Miller 			goto nla_put_failure;
1119661b7972Sstephen hemminger 		break;
1120661b7972Sstephen hemminger 	}
1121661b7972Sstephen hemminger 	}
1122661b7972Sstephen hemminger 
1123661b7972Sstephen hemminger 	nla_nest_end(skb, nest);
1124661b7972Sstephen hemminger 	return 0;
1125661b7972Sstephen hemminger 
1126661b7972Sstephen hemminger nla_put_failure:
1127661b7972Sstephen hemminger 	nla_nest_cancel(skb, nest);
1128661b7972Sstephen hemminger 	return -1;
1129661b7972Sstephen hemminger }
1130661b7972Sstephen hemminger 
11311da177e4SLinus Torvalds static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
11321da177e4SLinus Torvalds {
11331da177e4SLinus Torvalds 	const struct netem_sched_data *q = qdisc_priv(sch);
1134861d7f74Sstephen hemminger 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
11351da177e4SLinus Torvalds 	struct tc_netem_qopt qopt;
11361da177e4SLinus Torvalds 	struct tc_netem_corr cor;
11370dca51d3SStephen Hemminger 	struct tc_netem_reorder reorder;
1138c865e5d9SStephen Hemminger 	struct tc_netem_corrupt corrupt;
11397bc0f28cSHagen Paul Pfeifer 	struct tc_netem_rate rate;
1140836af83bSDave Taht 	struct tc_netem_slot slot;
11411da177e4SLinus Torvalds 
1142112f9cb6SDave Taht 	qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
1143112f9cb6SDave Taht 			     UINT_MAX);
1144112f9cb6SDave Taht 	qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
1145112f9cb6SDave Taht 			    UINT_MAX);
11461da177e4SLinus Torvalds 	qopt.limit = q->limit;
11471da177e4SLinus Torvalds 	qopt.loss = q->loss;
11481da177e4SLinus Torvalds 	qopt.gap = q->gap;
11491da177e4SLinus Torvalds 	qopt.duplicate = q->duplicate;
11501b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
11511b34ec43SDavid S. Miller 		goto nla_put_failure;
11521da177e4SLinus Torvalds 
115399803171SDave Taht 	if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
115499803171SDave Taht 		goto nla_put_failure;
115599803171SDave Taht 
115699803171SDave Taht 	if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
115799803171SDave Taht 		goto nla_put_failure;
115899803171SDave Taht 
11591da177e4SLinus Torvalds 	cor.delay_corr = q->delay_cor.rho;
11601da177e4SLinus Torvalds 	cor.loss_corr = q->loss_cor.rho;
11611da177e4SLinus Torvalds 	cor.dup_corr = q->dup_cor.rho;
11621b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
11631b34ec43SDavid S. Miller 		goto nla_put_failure;
11640dca51d3SStephen Hemminger 
11650dca51d3SStephen Hemminger 	reorder.probability = q->reorder;
11660dca51d3SStephen Hemminger 	reorder.correlation = q->reorder_cor.rho;
11671b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
11681b34ec43SDavid S. Miller 		goto nla_put_failure;
11690dca51d3SStephen Hemminger 
1170c865e5d9SStephen Hemminger 	corrupt.probability = q->corrupt;
1171c865e5d9SStephen Hemminger 	corrupt.correlation = q->corrupt_cor.rho;
11721b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
11731b34ec43SDavid S. Miller 		goto nla_put_failure;
1174c865e5d9SStephen Hemminger 
11756a031f67SYang Yingliang 	if (q->rate >= (1ULL << 32)) {
11762a51c1e8SNicolas Dichtel 		if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
11772a51c1e8SNicolas Dichtel 				      TCA_NETEM_PAD))
11786a031f67SYang Yingliang 			goto nla_put_failure;
11796a031f67SYang Yingliang 		rate.rate = ~0U;
11806a031f67SYang Yingliang 	} else {
11817bc0f28cSHagen Paul Pfeifer 		rate.rate = q->rate;
11826a031f67SYang Yingliang 	}
118390b41a1cSHagen Paul Pfeifer 	rate.packet_overhead = q->packet_overhead;
118490b41a1cSHagen Paul Pfeifer 	rate.cell_size = q->cell_size;
118590b41a1cSHagen Paul Pfeifer 	rate.cell_overhead = q->cell_overhead;
11861b34ec43SDavid S. Miller 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
11871b34ec43SDavid S. Miller 		goto nla_put_failure;
11887bc0f28cSHagen Paul Pfeifer 
1189e4ae004bSEric Dumazet 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1190e4ae004bSEric Dumazet 		goto nla_put_failure;
1191e4ae004bSEric Dumazet 
1192661b7972Sstephen hemminger 	if (dump_loss_model(q, skb) != 0)
1193661b7972Sstephen hemminger 		goto nla_put_failure;
1194661b7972Sstephen hemminger 
11950a9fe5c3SYousuk Seung 	if (q->slot_config.min_delay | q->slot_config.max_delay |
11960a9fe5c3SYousuk Seung 	    q->slot_config.dist_jitter) {
1197836af83bSDave Taht 		slot = q->slot_config;
1198836af83bSDave Taht 		if (slot.max_packets == INT_MAX)
1199836af83bSDave Taht 			slot.max_packets = 0;
1200836af83bSDave Taht 		if (slot.max_bytes == INT_MAX)
1201836af83bSDave Taht 			slot.max_bytes = 0;
1202836af83bSDave Taht 		if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1203836af83bSDave Taht 			goto nla_put_failure;
1204836af83bSDave Taht 	}
1205836af83bSDave Taht 
1206861d7f74Sstephen hemminger 	return nla_nest_end(skb, nla);
12071da177e4SLinus Torvalds 
12081e90474cSPatrick McHardy nla_put_failure:
1209861d7f74Sstephen hemminger 	nlmsg_trim(skb, nla);
12101da177e4SLinus Torvalds 	return -1;
12111da177e4SLinus Torvalds }
12121da177e4SLinus Torvalds 
121310f6dfcfSstephen hemminger static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
121410f6dfcfSstephen hemminger 			  struct sk_buff *skb, struct tcmsg *tcm)
121510f6dfcfSstephen hemminger {
121610f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
121710f6dfcfSstephen hemminger 
121850612537SEric Dumazet 	if (cl != 1 || !q->qdisc) 	/* only one class */
121910f6dfcfSstephen hemminger 		return -ENOENT;
122010f6dfcfSstephen hemminger 
122110f6dfcfSstephen hemminger 	tcm->tcm_handle |= TC_H_MIN(1);
122210f6dfcfSstephen hemminger 	tcm->tcm_info = q->qdisc->handle;
122310f6dfcfSstephen hemminger 
122410f6dfcfSstephen hemminger 	return 0;
122510f6dfcfSstephen hemminger }
122610f6dfcfSstephen hemminger 
122710f6dfcfSstephen hemminger static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1228653d6fd6SAlexander Aring 		     struct Qdisc **old, struct netlink_ext_ack *extack)
122910f6dfcfSstephen hemminger {
123010f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
123110f6dfcfSstephen hemminger 
123286a7996cSWANG Cong 	*old = qdisc_replace(sch, new, &q->qdisc);
123310f6dfcfSstephen hemminger 	return 0;
123410f6dfcfSstephen hemminger }
123510f6dfcfSstephen hemminger 
123610f6dfcfSstephen hemminger static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
123710f6dfcfSstephen hemminger {
123810f6dfcfSstephen hemminger 	struct netem_sched_data *q = qdisc_priv(sch);
123910f6dfcfSstephen hemminger 	return q->qdisc;
124010f6dfcfSstephen hemminger }
124110f6dfcfSstephen hemminger 
1242143976ceSWANG Cong static unsigned long netem_find(struct Qdisc *sch, u32 classid)
124310f6dfcfSstephen hemminger {
124410f6dfcfSstephen hemminger 	return 1;
124510f6dfcfSstephen hemminger }
124610f6dfcfSstephen hemminger 
124710f6dfcfSstephen hemminger static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
124810f6dfcfSstephen hemminger {
124910f6dfcfSstephen hemminger 	if (!walker->stop) {
125010f6dfcfSstephen hemminger 		if (walker->count >= walker->skip)
125110f6dfcfSstephen hemminger 			if (walker->fn(sch, 1, walker) < 0) {
125210f6dfcfSstephen hemminger 				walker->stop = 1;
125310f6dfcfSstephen hemminger 				return;
125410f6dfcfSstephen hemminger 			}
125510f6dfcfSstephen hemminger 		walker->count++;
125610f6dfcfSstephen hemminger 	}
125710f6dfcfSstephen hemminger }
125810f6dfcfSstephen hemminger 
125910f6dfcfSstephen hemminger static const struct Qdisc_class_ops netem_class_ops = {
126010f6dfcfSstephen hemminger 	.graft		=	netem_graft,
126110f6dfcfSstephen hemminger 	.leaf		=	netem_leaf,
1262143976ceSWANG Cong 	.find		=	netem_find,
126310f6dfcfSstephen hemminger 	.walk		=	netem_walk,
126410f6dfcfSstephen hemminger 	.dump		=	netem_dump_class,
126510f6dfcfSstephen hemminger };
126610f6dfcfSstephen hemminger 
126720fea08bSEric Dumazet static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
12681da177e4SLinus Torvalds 	.id		=	"netem",
126910f6dfcfSstephen hemminger 	.cl_ops		=	&netem_class_ops,
12701da177e4SLinus Torvalds 	.priv_size	=	sizeof(struct netem_sched_data),
12711da177e4SLinus Torvalds 	.enqueue	=	netem_enqueue,
12721da177e4SLinus Torvalds 	.dequeue	=	netem_dequeue,
127377be155cSJarek Poplawski 	.peek		=	qdisc_peek_dequeued,
12741da177e4SLinus Torvalds 	.init		=	netem_init,
12751da177e4SLinus Torvalds 	.reset		=	netem_reset,
12761da177e4SLinus Torvalds 	.destroy	=	netem_destroy,
12771da177e4SLinus Torvalds 	.change		=	netem_change,
12781da177e4SLinus Torvalds 	.dump		=	netem_dump,
12791da177e4SLinus Torvalds 	.owner		=	THIS_MODULE,
12801da177e4SLinus Torvalds };
12811da177e4SLinus Torvalds 
12821da177e4SLinus Torvalds 
12831da177e4SLinus Torvalds static int __init netem_module_init(void)
12841da177e4SLinus Torvalds {
1285eb229c4cSStephen Hemminger 	pr_info("netem: version " VERSION "\n");
12861da177e4SLinus Torvalds 	return register_qdisc(&netem_qdisc_ops);
12871da177e4SLinus Torvalds }
12881da177e4SLinus Torvalds static void __exit netem_module_exit(void)
12891da177e4SLinus Torvalds {
12901da177e4SLinus Torvalds 	unregister_qdisc(&netem_qdisc_ops);
12911da177e4SLinus Torvalds }
12921da177e4SLinus Torvalds module_init(netem_module_init)
12931da177e4SLinus Torvalds module_exit(netem_module_exit)
12941da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1295